Old question removing and updating

This commit is contained in:
mrfry 2022-05-16 16:22:09 +02:00
parent 4305fe2023
commit 5c2b46f2a3
7 changed files with 440 additions and 36 deletions

View file

@ -53,6 +53,7 @@ import {
dataToString, dataToString,
getSubjNameWithoutYear, getSubjNameWithoutYear,
WorkerResult, WorkerResult,
SearchResultQuestion,
// compareQuestionObj, // compareQuestionObj,
} from '../../../utils/classes' } from '../../../utils/classes'
import { import {
@ -194,7 +195,7 @@ function searchInDbs(
logger.DebugLog(taskResult, 'ask', 2) logger.DebugLog(taskResult, 'ask', 2)
resolve({ resolve({
question: question, question: question,
result: taskResult.result, result: taskResult.result as SearchResultQuestion[],
success: true, success: true,
}) })
} catch (err) { } catch (err) {

View file

@ -0,0 +1,197 @@
import { updateQuestionsInArray } from '../utils/actions'
import { createQuestion } from '../utils/classes'
import { cleanDb } from '../utils/classes'
import { QuestionDb, Subject, Question } from '../types/basicTypes'
const date = (x?: number) => new Date().getTime() + (x || 0)
const q1 = createQuestion(
'A kötvény és a részvény közös tulajdonsága, hogy TOREMOVE',
'piaci áruk eltérhet a névértéktől.',
{
type: 'simple',
date: date(-1000),
}
)
const q2 = createQuestion(
'A kötvény és a részvény közös tulajdonsága, hogy TOREMOVE',
'afjléa gféda gfdjs légf',
{
type: 'simple',
date: date(-1000),
}
)
const q3 = createQuestion(
'A kötvény és a részvény közös tulajdonsága, hogy TOREMOVE',
'afjlsd gfds dgfs gf sdgf d',
{
type: 'simple',
date: date(-1000),
}
)
const q4 = createQuestion(
'A kötvény névértéke',
'A kötvényen feltüntetett meghatározott nagyságú összeg.',
{
type: 'simple',
date: date(-1000),
}
)
const q5 = createQuestion(
'Mi az osztalék? asd asd',
'A vállalati profit egy része..',
{
type: 'simple',
date: date(1000),
}
)
const q6 = createQuestion(
'valaim nagyon értelmes kérdés asd asd',
'A vállalati profit egy része..',
{
type: 'simple',
date: date(1000),
}
)
function setupTest({
newQuestions,
data,
subjToClean,
}: {
newQuestions: Question[]
data: Subject[]
subjToClean?: string
}) {
const recievedQuestions: Question[] = newQuestions.map((x) => {
return {
...x,
data: {
...x.data,
date: date(),
},
}
})
const subjName = subjToClean || 'subject'
const overwriteFromDate = date(-100)
const qdbIndex = 0
const qdbs: QuestionDb[] = [
{
name: 'test',
data: data,
index: 0,
path: '',
shouldSearch: 'asd',
shouldSave: {},
},
]
const subjIndex = qdbs[qdbIndex].data.findIndex((x) => {
return x.Name.toLowerCase().includes(subjName.toLowerCase())
})
const questionIndexesToRemove = cleanDb(
{
questions: recievedQuestions,
subjToClean: subjName,
overwriteFromDate: overwriteFromDate,
qdbIndex: qdbIndex,
},
qdbs
)
const updatedQuestions = updateQuestionsInArray(
questionIndexesToRemove,
qdbs[qdbIndex].data[subjIndex].Questions,
recievedQuestions
)
return {
questionIndexesToRemove: questionIndexesToRemove,
updatedQuestions: updatedQuestions,
overwriteFromDate: overwriteFromDate,
subjIndex: subjIndex,
}
}
const s1: Subject = { Name: 'test subject', Questions: [q1, q2, q4, q5] }
test('Old and duplicate questions should be removed from the database', () => {
const { questionIndexesToRemove, updatedQuestions, overwriteFromDate } =
setupTest({ newQuestions: [q1, q4, q5], data: [s1] })
expect(questionIndexesToRemove.length).toBe(3)
expect(questionIndexesToRemove[0].length).toBe(2)
expect(updatedQuestions.length).toBe(3)
const toremoveCount = updatedQuestions.filter((question) => {
return question.Q.includes('TOREMOVE')
}).length
expect(toremoveCount).toBe(1)
const newQuestion = updatedQuestions.find((question) => {
return question.Q.includes('TOREMOVE')
})
expect(newQuestion.data.date > overwriteFromDate).toBeTruthy()
})
const s2: Subject = {
Name: 'test subject',
Questions: [q1, q2, q3, q4, q5, q6],
}
test('Old and duplicate questions should be removed from the database round 2', () => {
const { questionIndexesToRemove, updatedQuestions, overwriteFromDate } =
setupTest({ newQuestions: [q1, q4, q5], data: [s2] })
expect(questionIndexesToRemove.length).toBe(3)
expect(questionIndexesToRemove[0].length).toBe(3)
expect(updatedQuestions.length).toBe(4)
const toremoveCount = updatedQuestions.filter((question) => {
return question.Q.includes('TOREMOVE')
}).length
expect(toremoveCount).toBe(1)
const newQuestion = updatedQuestions.find((question) => {
return question.Q.includes('TOREMOVE')
})
expect(newQuestion.data.date > overwriteFromDate).toBeTruthy()
})
const s3: Subject = {
Name: 'test subject',
Questions: [q5, q6].map((x) => ({
...x,
data: {
...x.data,
date: date(+50000),
},
})),
}
test('Old and duplicate questions should be removed from the database: questions should be left alone when they are newer', () => {
const { questionIndexesToRemove, updatedQuestions } = setupTest({
newQuestions: [q5, q6],
data: [s3],
})
expect(questionIndexesToRemove.length).toBe(2)
questionIndexesToRemove.forEach((x) => {
expect(x.length).toBe(0)
})
expect(updatedQuestions.length).toBe(2)
})
const s4: Subject = {
Name: 'something else',
Questions: [q5, q6],
}
test('Old and duplicate questions should be removed from the database:other subjects should be left alone', () => {
const { subjIndex } = setupTest({
newQuestions: [q5, q6],
data: [s2, s1, s4, s3],
subjToClean: 'else',
})
expect(subjIndex).toBe(2)
})

View file

@ -56,6 +56,10 @@ export interface DataFile {
path: string path: string
name: string name: string
locked?: Boolean locked?: Boolean
overwrites?: Array<{
subjName: string
overwriteFromDate: number
}>
shouldSearch: shouldSearch:
| string | string
| { | {

View file

@ -27,7 +27,7 @@ import {
WorkerResult, WorkerResult,
SearchResultQuestion, SearchResultQuestion,
} from '../utils/classes' } from '../utils/classes'
import { doALongTask } from './workerPool' import { doALongTask, msgAllWorker } from './workerPool'
import idStats from '../utils/ids' import idStats from '../utils/ids'
import utils from '../utils/utils' import utils from '../utils/utils'
import { addQuestion, getSubjNameWithoutYear } from './classes' import { addQuestion, getSubjNameWithoutYear } from './classes'
@ -201,32 +201,20 @@ function processIncomingRequestUsingDb(
.then((results: Array<WorkerResult>) => { .then((results: Array<WorkerResult>) => {
const allQuestions: Question[] = [] // all new questions here that do not have result const allQuestions: Question[] = [] // all new questions here that do not have result
results.forEach((result: WorkerResult, i) => { results.forEach((result: WorkerResult, i) => {
const add = result.result.every((res: SearchResultQuestion) => { const add = (result.result as SearchResultQuestion[]).every(
(res: SearchResultQuestion) => {
return res.match < minMatchAmmountToAdd return res.match < minMatchAmmountToAdd
}) }
)
if (add && !result.error) { if (add && !result.error) {
allQuestions.push(recievedQuestions[i]) allQuestions.push(recievedQuestions[i])
} }
}) })
try { try {
const subjName = getSubjNameWithoutYear(recievedData.subj)
if (allQuestions.length > 0) { if (allQuestions.length > 0) {
allQuestions.forEach((currentQuestion) => { addQuestionsToDb(allQuestions, subjName, qdb)
const sName = getSubjNameWithoutYear(recievedData.subj)
logger.DebugLog(
'Adding question with subjName: ' + sName + ' :',
'isadding',
3
)
logger.DebugLog(currentQuestion, 'isadding', 3)
addQuestion(qdb.data, sName, {
...currentQuestion,
data: {
...currentQuestion.data,
date: new Date().getTime(),
},
})
})
currWrites++ currWrites++
logger.DebugLog( logger.DebugLog(
@ -258,6 +246,7 @@ function processIncomingRequestUsingDb(
qdbIndex: qdb.index, qdbIndex: qdb.index,
qdbName: qdb.name, qdbName: qdb.name,
}) })
runCleanWorker(recievedData.quiz, subjName, qdb)
} catch (error) { } catch (error) {
console.error(error) console.error(error)
logger.Log( logger.Log(
@ -287,6 +276,124 @@ function processIncomingRequestUsingDb(
}) })
} }
function addQuestionsToDb(
allQuestions: Question[],
subjName: string,
qdb: QuestionDb
) {
allQuestions.forEach((currentQuestion) => {
logger.DebugLog(
'Adding question with subjName: ' + subjName + ' :',
'isadding',
3
)
logger.DebugLog(currentQuestion, 'isadding', 3)
addQuestion(qdb.data, subjName, {
...currentQuestion,
data: {
...currentQuestion.data,
date: new Date().getTime(),
},
})
})
}
function runCleanWorker(
recievedQuesitons: Question[],
subjName: string,
qdb: QuestionDb
) {
if (qdb.overwrites && qdb.overwrites.length) {
// check if subject needs to be updated, and qdb has overwriteFromDate
const overwrite = qdb.overwrites.find((x) => {
return subjName.toLowerCase().includes(x.subjName.toLowerCase())
})
if (!overwrite) {
return
}
// logger.Log(
// `\tStarting cleaning in subject "${logger.C(
// 'green'
// )}${subjName}${logger.C('')}" (matched: "${logger.C('green')}${
// overwrite.subjName
// }${logger.C('')}")`
// )
// pass recieved questions to a worker
doALongTask({
type: 'dbClean',
data: {
questions: recievedQuesitons,
subjToClean: subjName,
overwriteFromDate: overwrite.overwriteFromDate,
qdbIndex: qdb.index,
},
}).then(({ result: questionIndexesToRemove }) => {
const subjIndex = qdb.data.findIndex((x) => {
return x.Name.toLowerCase().includes(subjName.toLowerCase())
})
// sends msgs to all workers to remove it too
msgAllWorker({
type: 'rmQuestions',
data: {
questionIndexesToRemove: questionIndexesToRemove as number[][],
subjIndex: subjIndex,
qdbIndex: qdb.index,
recievedQuestions: recievedQuesitons,
},
})
// it adds the recieved question WITH DATE!
// recievedQuestions doesnt have date-s
qdb.data[subjIndex].Questions = updateQuestionsInArray(
questionIndexesToRemove as number[][],
qdb.data[subjIndex].Questions,
recievedQuesitons
)
// saves the file
writeData(qdb.data, qdb.path)
logger.Log(
`\tRemoved ${logger.C('green')}${
(questionIndexesToRemove as number[][]).filter(
(x: number[]) => x.length > 1
).length
}${logger.C()} old questions from ${logger.C(
'green'
)}${subjName}${logger.C()}`
)
})
}
}
export function updateQuestionsInArray(
questionIndexesToRemove: number[][],
questions: Question[],
newQuestions: Question[]
): Question[] {
const indexesToRemove = questionIndexesToRemove.reduce((acc, x) => {
if (x.length > 1) {
return [...acc, ...x]
}
return acc
}, [])
const newQuestionsToAdd: Question[] = newQuestions.filter((_q, i) => {
return questionIndexesToRemove[i].length > 1
})
return [
...questions.filter((_x, i) => {
return !indexesToRemove.includes(i)
}),
...newQuestionsToAdd.map((x) => {
x.data.date = new Date()
return x
}),
]
}
export function isQuestionValid(question: Question): boolean { export function isQuestionValid(question: Question): boolean {
if (!question.Q) { if (!question.Q) {
return false return false
@ -435,12 +542,12 @@ export function backupData(questionDbs: Array<QuestionDb>): void {
const path = './publicDirs/qminingPublic/backs/' const path = './publicDirs/qminingPublic/backs/'
utils.CreatePath(path) utils.CreatePath(path)
try { try {
logger.Log(`Backing up ${data.name}...`) // logger.Log(`Backing up ${data.name}...`)
writeData( writeData(
data.data, data.data,
`${path}${data.name}_${utils.GetDateString(undefined, true)}.json` `${path}${data.name}_${utils.GetDateString(undefined, true)}.json`
) )
logger.Log('Done') // logger.Log('Done')
} catch (err) { } catch (err) {
logger.Log( logger.Log(
`Error backing up data file ${data.name}!`, `Error backing up data file ${data.name}!`,

View file

@ -17,6 +17,7 @@
along with this program. If not, see <https://www.gnu.org/licenses/>. along with this program. If not, see <https://www.gnu.org/licenses/>.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
// FIXME: this should be renamed to worker.ts or something
import { isMainThread, parentPort, workerData } from 'worker_threads' import { isMainThread, parentPort, workerData } from 'worker_threads'
@ -27,12 +28,13 @@ import {
QuestionDb, QuestionDb,
Subject, Subject,
} from '../types/basicTypes' } from '../types/basicTypes'
import { editDb, Edits } from './actions' import { editDb, Edits, updateQuestionsInArray } from './actions'
// import { TaskObject } from './workerPool'
export interface WorkerResult { export interface WorkerResult {
msg: string msg: string
workerIndex: number workerIndex: number
result?: SearchResultQuestion[] result?: SearchResultQuestion[] | number[][]
error?: boolean error?: boolean
} }
@ -115,12 +117,15 @@ function removeUnnecesarySpaces(toremove: string): string {
return normalizeSpaces(toremove).replace(/\s+/g, ' ') return normalizeSpaces(toremove).replace(/\s+/g, ' ')
} }
export function compareString( function compareString(
s1: string, s1: string,
s1a: Array<string>,
s2: string, s2: string,
s2a: Array<string> s1cache?: Array<string>,
s2cache?: Array<string>
): number { ): number {
const s1a = s1cache || s1.split(' ')
const s2a = s2cache || s2.split(' ')
if (s1 === s2) { if (s1 === s2) {
return 100 return 100
} }
@ -277,16 +282,16 @@ function compareImage(data: QuestionData, data2: QuestionData): number {
if (data.hashedImages && data2.hashedImages) { if (data.hashedImages && data2.hashedImages) {
return compareString( return compareString(
data.hashedImages.join(' '), data.hashedImages.join(' '),
data.hashedImages,
data2.hashedImages.join(' '), data2.hashedImages.join(' '),
data.hashedImages,
data2.hashedImages data2.hashedImages
) )
} else if (data.images && data2.images) { } else if (data.images && data2.images) {
return ( return (
compareString( compareString(
data.images.join(' '), data.images.join(' '),
data.images,
data2.images.join(' '), data2.images.join(' '),
data.images,
data2.images data2.images
) - 10 ) - 10
) )
@ -324,7 +329,7 @@ function compareData(q1: Question, q2: Question): number {
} }
function compareQuestion(q1: Question, q2: Question): number { function compareQuestion(q1: Question, q2: Question): number {
return compareString(q1.Q, q1.cache.Q, q2.Q, q2.cache.Q) return compareString(q1.Q, q2.Q, q1.cache.Q, q2.cache.Q)
// return compareString( // return compareString(
// q1.Q, // q1.Q,
// q1.Q ? q1.Q.split(' ') : [], // q1.Q ? q1.Q.split(' ') : [],
@ -334,7 +339,7 @@ function compareQuestion(q1: Question, q2: Question): number {
} }
function compareAnswer(q1: Question, q2: Question): number { function compareAnswer(q1: Question, q2: Question): number {
return compareString(q1.A, q1.cache.A, q2.A, q2.cache.A) return compareString(q1.A, q2.A, q1.cache.A, q2.cache.A)
// return compareString( // return compareString(
// q1.A, // q1.A,
// q1.A ? q1.A.split(' ') : [], // q1.A ? q1.A.split(' ') : [],
@ -648,7 +653,7 @@ if (!isMainThread) {
`[THREAD #${workerIndex}]: Worker ${workerIndex} reporting for duty` `[THREAD #${workerIndex}]: Worker ${workerIndex} reporting for duty`
) )
parentPort.on('message', (msg) => { parentPort.on('message', (msg /*: TaskObject */) => {
if (msg.type === 'work') { if (msg.type === 'work') {
const { const {
subjName, subjName,
@ -814,15 +819,93 @@ if (!isMainThread) {
workerIndex: workerIndex, workerIndex: workerIndex,
}) })
// console.log(`[THREAD #${workerIndex}]: newdb`) // console.log(`[THREAD #${workerIndex}]: newdb`)
} else if (msg.type === 'dbClean') {
const removedIndexes = cleanDb(msg.data, qdbs)
const workerResult: WorkerResult = {
msg: `From thread #${workerIndex}: db clean done`,
workerIndex: workerIndex,
result: removedIndexes,
}
parentPort.postMessage(workerResult)
} else if (msg.type === 'rmQuestions') {
const {
questionIndexesToRemove,
subjIndex,
qdbIndex,
recievedQuestions,
} = msg.data
qdbs[qdbIndex].data[subjIndex].Questions = updateQuestionsInArray(
questionIndexesToRemove,
qdbs[qdbIndex].data[subjIndex].Questions,
recievedQuestions
)
parentPort.postMessage({
msg: `From thread #${workerIndex}: rm question done`,
workerIndex: workerIndex,
})
} else { } else {
logger.Log(`Invalid msg type!`, logger.GetColor('redbg')) logger.Log(`Invalid msg type!`, logger.GetColor('redbg'))
console.error(msg) console.error(msg)
parentPort.postMessage({
msg: `From thread #${workerIndex}: Invalid message type (${msg.type})!`,
workerIndex: workerIndex,
})
} }
}) })
} else { } else {
// console.log('[THREAD]: Main thread!') // console.log('[THREAD]: Main thread!')
} }
export function cleanDb(
{
questions: recievedQuestions,
subjToClean,
overwriteFromDate,
qdbIndex,
}: {
questions: Question[]
subjToClean: string
overwriteFromDate: number
qdbIndex: number
},
qdbs: QuestionDb[]
): number[][] {
const subjIndex = qdbs[qdbIndex].data.findIndex((x) => {
return x.Name.toLowerCase().includes(subjToClean.toLowerCase())
})
if (!qdbs[qdbIndex].data[subjIndex]) {
return recievedQuestions.map(() => [])
}
const questionIndexesToRemove = recievedQuestions.map((recievedQuestion) =>
qdbs[qdbIndex].data[subjIndex].Questions.reduce<number[]>(
(acc, question, i) => {
const res = compareString(
simplifyQuestion(recievedQuestion.Q),
simplifyQuestion(question.Q)
)
if (
res > minMatchToNotSearchOtherSubjects &&
(!question.data.date || question.data.date < overwriteFromDate)
) {
return [...acc, i]
}
return acc
},
[]
)
)
return questionIndexesToRemove
}
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
export { export {

View file

@ -34,8 +34,8 @@ interface WorkerObj {
free: Boolean free: Boolean
} }
interface TaskObject { export interface TaskObject {
type: 'work' | 'dbEdit' | 'newQuestions' | 'newdb' type: 'work' | 'dbEdit' | 'newQuestions' | 'newdb' | 'dbClean' | 'rmQuestions'
data: data:
| { | {
searchIn: number[] searchIn: number[]
@ -50,6 +50,18 @@ interface TaskObject {
| { dbIndex: number; edits: Edits } | { dbIndex: number; edits: Edits }
| QuestionDb | QuestionDb
| Result | Result
| {
questions: Question[]
subjToClean: string
overwriteFromDate: number
qdbIndex: number
}
| {
questionIndexesToRemove: number[][]
subjIndex: number
qdbIndex: number
recievedQuestions: Question[]
}
} }
interface PendingJob { interface PendingJob {

@ -1 +1 @@
Subproject commit ce63911b68e5604c02f5761dd4b5388272b76c6f Subproject commit 9c333008c7049b032e4e3d0dbf5fd5893542ffb7