From 5c2b46f2a3d46927993b1cb9dc1bbe0f554d1148 Mon Sep 17 00:00:00 2001 From: mrfry Date: Mon, 16 May 2022 16:22:09 +0200 Subject: [PATCH] Old question removing and updating --- src/modules/api/submodules/qminingapi.ts | 3 +- src/tests/oldQuestionRemoving.test.ts | 197 +++++++++++++++++++++++ src/types/basicTypes.ts | 4 + src/utils/actions.ts | 151 ++++++++++++++--- src/utils/classes.ts | 103 ++++++++++-- src/utils/workerPool.ts | 16 +- submodules/qmining-page | 2 +- 7 files changed, 440 insertions(+), 36 deletions(-) diff --git a/src/modules/api/submodules/qminingapi.ts b/src/modules/api/submodules/qminingapi.ts index 60805f8..610606b 100644 --- a/src/modules/api/submodules/qminingapi.ts +++ b/src/modules/api/submodules/qminingapi.ts @@ -53,6 +53,7 @@ import { dataToString, getSubjNameWithoutYear, WorkerResult, + SearchResultQuestion, // compareQuestionObj, } from '../../../utils/classes' import { @@ -194,7 +195,7 @@ function searchInDbs( logger.DebugLog(taskResult, 'ask', 2) resolve({ question: question, - result: taskResult.result, + result: taskResult.result as SearchResultQuestion[], success: true, }) } catch (err) { diff --git a/src/tests/oldQuestionRemoving.test.ts b/src/tests/oldQuestionRemoving.test.ts index e69de29..1445aa6 100644 --- a/src/tests/oldQuestionRemoving.test.ts +++ b/src/tests/oldQuestionRemoving.test.ts @@ -0,0 +1,197 @@ +import { updateQuestionsInArray } from '../utils/actions' +import { createQuestion } from '../utils/classes' +import { cleanDb } from '../utils/classes' +import { QuestionDb, Subject, Question } from '../types/basicTypes' + +const date = (x?: number) => new Date().getTime() + (x || 0) + +const q1 = createQuestion( + 'A kötvény és a részvény közös tulajdonsága, hogy TOREMOVE', + 'piaci áruk eltérhet a névértéktől.', + { + type: 'simple', + date: date(-1000), + } +) +const q2 = createQuestion( + 'A kötvény és a részvény közös tulajdonsága, hogy TOREMOVE', + 'afjléa gféda gfdjs légf', + { + type: 'simple', + date: date(-1000), + } +) +const q3 = createQuestion( + 'A kötvény és a részvény közös tulajdonsága, hogy TOREMOVE', + 'afjlsd gfds dgfs gf sdgf d', + { + type: 'simple', + date: date(-1000), + } +) +const q4 = createQuestion( + 'A kötvény névértéke', + 'A kötvényen feltüntetett meghatározott nagyságú összeg.', + { + type: 'simple', + date: date(-1000), + } +) +const q5 = createQuestion( + 'Mi az osztalék? asd asd', + 'A vállalati profit egy része..', + { + type: 'simple', + date: date(1000), + } +) +const q6 = createQuestion( + 'valaim nagyon értelmes kérdés asd asd', + 'A vállalati profit egy része..', + { + type: 'simple', + date: date(1000), + } +) + +function setupTest({ + newQuestions, + data, + subjToClean, +}: { + newQuestions: Question[] + data: Subject[] + subjToClean?: string +}) { + const recievedQuestions: Question[] = newQuestions.map((x) => { + return { + ...x, + data: { + ...x.data, + date: date(), + }, + } + }) + const subjName = subjToClean || 'subject' + const overwriteFromDate = date(-100) + const qdbIndex = 0 + const qdbs: QuestionDb[] = [ + { + name: 'test', + data: data, + index: 0, + path: '', + shouldSearch: 'asd', + shouldSave: {}, + }, + ] + const subjIndex = qdbs[qdbIndex].data.findIndex((x) => { + return x.Name.toLowerCase().includes(subjName.toLowerCase()) + }) + + const questionIndexesToRemove = cleanDb( + { + questions: recievedQuestions, + subjToClean: subjName, + overwriteFromDate: overwriteFromDate, + qdbIndex: qdbIndex, + }, + qdbs + ) + + const updatedQuestions = updateQuestionsInArray( + questionIndexesToRemove, + qdbs[qdbIndex].data[subjIndex].Questions, + recievedQuestions + ) + + return { + questionIndexesToRemove: questionIndexesToRemove, + updatedQuestions: updatedQuestions, + overwriteFromDate: overwriteFromDate, + subjIndex: subjIndex, + } +} + +const s1: Subject = { Name: 'test subject', Questions: [q1, q2, q4, q5] } + +test('Old and duplicate questions should be removed from the database', () => { + const { questionIndexesToRemove, updatedQuestions, overwriteFromDate } = + setupTest({ newQuestions: [q1, q4, q5], data: [s1] }) + + expect(questionIndexesToRemove.length).toBe(3) + expect(questionIndexesToRemove[0].length).toBe(2) + + expect(updatedQuestions.length).toBe(3) + const toremoveCount = updatedQuestions.filter((question) => { + return question.Q.includes('TOREMOVE') + }).length + expect(toremoveCount).toBe(1) + const newQuestion = updatedQuestions.find((question) => { + return question.Q.includes('TOREMOVE') + }) + expect(newQuestion.data.date > overwriteFromDate).toBeTruthy() +}) + +const s2: Subject = { + Name: 'test subject', + Questions: [q1, q2, q3, q4, q5, q6], +} + +test('Old and duplicate questions should be removed from the database round 2', () => { + const { questionIndexesToRemove, updatedQuestions, overwriteFromDate } = + setupTest({ newQuestions: [q1, q4, q5], data: [s2] }) + + expect(questionIndexesToRemove.length).toBe(3) + expect(questionIndexesToRemove[0].length).toBe(3) + + expect(updatedQuestions.length).toBe(4) + const toremoveCount = updatedQuestions.filter((question) => { + return question.Q.includes('TOREMOVE') + }).length + expect(toremoveCount).toBe(1) + const newQuestion = updatedQuestions.find((question) => { + return question.Q.includes('TOREMOVE') + }) + expect(newQuestion.data.date > overwriteFromDate).toBeTruthy() +}) + +const s3: Subject = { + Name: 'test subject', + Questions: [q5, q6].map((x) => ({ + ...x, + data: { + ...x.data, + date: date(+50000), + }, + })), +} + +test('Old and duplicate questions should be removed from the database: questions should be left alone when they are newer', () => { + const { questionIndexesToRemove, updatedQuestions } = setupTest({ + newQuestions: [q5, q6], + data: [s3], + }) + + expect(questionIndexesToRemove.length).toBe(2) + questionIndexesToRemove.forEach((x) => { + expect(x.length).toBe(0) + }) + + expect(updatedQuestions.length).toBe(2) +}) + +const s4: Subject = { + Name: 'something else', + Questions: [q5, q6], +} + +test('Old and duplicate questions should be removed from the database:other subjects should be left alone', () => { + const { subjIndex } = setupTest({ + newQuestions: [q5, q6], + data: [s2, s1, s4, s3], + subjToClean: 'else', + }) + + expect(subjIndex).toBe(2) +}) diff --git a/src/types/basicTypes.ts b/src/types/basicTypes.ts index fb170af..f446c8c 100644 --- a/src/types/basicTypes.ts +++ b/src/types/basicTypes.ts @@ -56,6 +56,10 @@ export interface DataFile { path: string name: string locked?: Boolean + overwrites?: Array<{ + subjName: string + overwriteFromDate: number + }> shouldSearch: | string | { diff --git a/src/utils/actions.ts b/src/utils/actions.ts index 01fb822..48cdeca 100755 --- a/src/utils/actions.ts +++ b/src/utils/actions.ts @@ -27,7 +27,7 @@ import { WorkerResult, SearchResultQuestion, } from '../utils/classes' -import { doALongTask } from './workerPool' +import { doALongTask, msgAllWorker } from './workerPool' import idStats from '../utils/ids' import utils from '../utils/utils' import { addQuestion, getSubjNameWithoutYear } from './classes' @@ -201,32 +201,20 @@ function processIncomingRequestUsingDb( .then((results: Array) => { const allQuestions: Question[] = [] // all new questions here that do not have result results.forEach((result: WorkerResult, i) => { - const add = result.result.every((res: SearchResultQuestion) => { - return res.match < minMatchAmmountToAdd - }) + const add = (result.result as SearchResultQuestion[]).every( + (res: SearchResultQuestion) => { + return res.match < minMatchAmmountToAdd + } + ) if (add && !result.error) { allQuestions.push(recievedQuestions[i]) } }) try { + const subjName = getSubjNameWithoutYear(recievedData.subj) if (allQuestions.length > 0) { - allQuestions.forEach((currentQuestion) => { - const sName = getSubjNameWithoutYear(recievedData.subj) - logger.DebugLog( - 'Adding question with subjName: ' + sName + ' :', - 'isadding', - 3 - ) - logger.DebugLog(currentQuestion, 'isadding', 3) - addQuestion(qdb.data, sName, { - ...currentQuestion, - data: { - ...currentQuestion.data, - date: new Date().getTime(), - }, - }) - }) + addQuestionsToDb(allQuestions, subjName, qdb) currWrites++ logger.DebugLog( @@ -258,6 +246,7 @@ function processIncomingRequestUsingDb( qdbIndex: qdb.index, qdbName: qdb.name, }) + runCleanWorker(recievedData.quiz, subjName, qdb) } catch (error) { console.error(error) logger.Log( @@ -287,6 +276,124 @@ function processIncomingRequestUsingDb( }) } +function addQuestionsToDb( + allQuestions: Question[], + subjName: string, + qdb: QuestionDb +) { + allQuestions.forEach((currentQuestion) => { + logger.DebugLog( + 'Adding question with subjName: ' + subjName + ' :', + 'isadding', + 3 + ) + logger.DebugLog(currentQuestion, 'isadding', 3) + addQuestion(qdb.data, subjName, { + ...currentQuestion, + data: { + ...currentQuestion.data, + date: new Date().getTime(), + }, + }) + }) +} + +function runCleanWorker( + recievedQuesitons: Question[], + subjName: string, + qdb: QuestionDb +) { + if (qdb.overwrites && qdb.overwrites.length) { + // check if subject needs to be updated, and qdb has overwriteFromDate + const overwrite = qdb.overwrites.find((x) => { + return subjName.toLowerCase().includes(x.subjName.toLowerCase()) + }) + + if (!overwrite) { + return + } + // logger.Log( + // `\tStarting cleaning in subject "${logger.C( + // 'green' + // )}${subjName}${logger.C('')}" (matched: "${logger.C('green')}${ + // overwrite.subjName + // }${logger.C('')}")` + // ) + // pass recieved questions to a worker + doALongTask({ + type: 'dbClean', + data: { + questions: recievedQuesitons, + subjToClean: subjName, + overwriteFromDate: overwrite.overwriteFromDate, + qdbIndex: qdb.index, + }, + }).then(({ result: questionIndexesToRemove }) => { + const subjIndex = qdb.data.findIndex((x) => { + return x.Name.toLowerCase().includes(subjName.toLowerCase()) + }) + // sends msgs to all workers to remove it too + + msgAllWorker({ + type: 'rmQuestions', + data: { + questionIndexesToRemove: questionIndexesToRemove as number[][], + subjIndex: subjIndex, + qdbIndex: qdb.index, + recievedQuestions: recievedQuesitons, + }, + }) + + // it adds the recieved question WITH DATE! + // recievedQuestions doesnt have date-s + qdb.data[subjIndex].Questions = updateQuestionsInArray( + questionIndexesToRemove as number[][], + qdb.data[subjIndex].Questions, + recievedQuesitons + ) + + // saves the file + writeData(qdb.data, qdb.path) + logger.Log( + `\tRemoved ${logger.C('green')}${ + (questionIndexesToRemove as number[][]).filter( + (x: number[]) => x.length > 1 + ).length + }${logger.C()} old questions from ${logger.C( + 'green' + )}${subjName}${logger.C()}` + ) + }) + } +} + +export function updateQuestionsInArray( + questionIndexesToRemove: number[][], + questions: Question[], + newQuestions: Question[] +): Question[] { + const indexesToRemove = questionIndexesToRemove.reduce((acc, x) => { + if (x.length > 1) { + return [...acc, ...x] + } + return acc + }, []) + + const newQuestionsToAdd: Question[] = newQuestions.filter((_q, i) => { + return questionIndexesToRemove[i].length > 1 + }) + + return [ + ...questions.filter((_x, i) => { + return !indexesToRemove.includes(i) + }), + ...newQuestionsToAdd.map((x) => { + x.data.date = new Date() + return x + }), + ] +} + export function isQuestionValid(question: Question): boolean { if (!question.Q) { return false @@ -435,12 +542,12 @@ export function backupData(questionDbs: Array): void { const path = './publicDirs/qminingPublic/backs/' utils.CreatePath(path) try { - logger.Log(`Backing up ${data.name}...`) + // logger.Log(`Backing up ${data.name}...`) writeData( data.data, `${path}${data.name}_${utils.GetDateString(undefined, true)}.json` ) - logger.Log('Done') + // logger.Log('Done') } catch (err) { logger.Log( `Error backing up data file ${data.name}!`, diff --git a/src/utils/classes.ts b/src/utils/classes.ts index d5eb488..306f8af 100755 --- a/src/utils/classes.ts +++ b/src/utils/classes.ts @@ -17,6 +17,7 @@ along with this program. If not, see . ------------------------------------------------------------------------- */ +// FIXME: this should be renamed to worker.ts or something import { isMainThread, parentPort, workerData } from 'worker_threads' @@ -27,12 +28,13 @@ import { QuestionDb, Subject, } from '../types/basicTypes' -import { editDb, Edits } from './actions' +import { editDb, Edits, updateQuestionsInArray } from './actions' +// import { TaskObject } from './workerPool' export interface WorkerResult { msg: string workerIndex: number - result?: SearchResultQuestion[] + result?: SearchResultQuestion[] | number[][] error?: boolean } @@ -115,12 +117,15 @@ function removeUnnecesarySpaces(toremove: string): string { return normalizeSpaces(toremove).replace(/\s+/g, ' ') } -export function compareString( +function compareString( s1: string, - s1a: Array, s2: string, - s2a: Array + s1cache?: Array, + s2cache?: Array ): number { + const s1a = s1cache || s1.split(' ') + const s2a = s2cache || s2.split(' ') + if (s1 === s2) { return 100 } @@ -277,16 +282,16 @@ function compareImage(data: QuestionData, data2: QuestionData): number { if (data.hashedImages && data2.hashedImages) { return compareString( data.hashedImages.join(' '), - data.hashedImages, data2.hashedImages.join(' '), + data.hashedImages, data2.hashedImages ) } else if (data.images && data2.images) { return ( compareString( data.images.join(' '), - data.images, data2.images.join(' '), + data.images, data2.images ) - 10 ) @@ -324,7 +329,7 @@ function compareData(q1: Question, q2: Question): number { } function compareQuestion(q1: Question, q2: Question): number { - return compareString(q1.Q, q1.cache.Q, q2.Q, q2.cache.Q) + return compareString(q1.Q, q2.Q, q1.cache.Q, q2.cache.Q) // return compareString( // q1.Q, // q1.Q ? q1.Q.split(' ') : [], @@ -334,7 +339,7 @@ function compareQuestion(q1: Question, q2: Question): number { } function compareAnswer(q1: Question, q2: Question): number { - return compareString(q1.A, q1.cache.A, q2.A, q2.cache.A) + return compareString(q1.A, q2.A, q1.cache.A, q2.cache.A) // return compareString( // q1.A, // q1.A ? q1.A.split(' ') : [], @@ -648,7 +653,7 @@ if (!isMainThread) { `[THREAD #${workerIndex}]: Worker ${workerIndex} reporting for duty` ) - parentPort.on('message', (msg) => { + parentPort.on('message', (msg /*: TaskObject */) => { if (msg.type === 'work') { const { subjName, @@ -814,15 +819,93 @@ if (!isMainThread) { workerIndex: workerIndex, }) // console.log(`[THREAD #${workerIndex}]: newdb`) + } else if (msg.type === 'dbClean') { + const removedIndexes = cleanDb(msg.data, qdbs) + + const workerResult: WorkerResult = { + msg: `From thread #${workerIndex}: db clean done`, + workerIndex: workerIndex, + result: removedIndexes, + } + + parentPort.postMessage(workerResult) + } else if (msg.type === 'rmQuestions') { + const { + questionIndexesToRemove, + subjIndex, + qdbIndex, + recievedQuestions, + } = msg.data + + qdbs[qdbIndex].data[subjIndex].Questions = updateQuestionsInArray( + questionIndexesToRemove, + qdbs[qdbIndex].data[subjIndex].Questions, + recievedQuestions + ) + + parentPort.postMessage({ + msg: `From thread #${workerIndex}: rm question done`, + workerIndex: workerIndex, + }) } else { logger.Log(`Invalid msg type!`, logger.GetColor('redbg')) console.error(msg) + + parentPort.postMessage({ + msg: `From thread #${workerIndex}: Invalid message type (${msg.type})!`, + workerIndex: workerIndex, + }) } }) } else { // console.log('[THREAD]: Main thread!') } +export function cleanDb( + { + questions: recievedQuestions, + subjToClean, + overwriteFromDate, + qdbIndex, + }: { + questions: Question[] + subjToClean: string + overwriteFromDate: number + qdbIndex: number + }, + qdbs: QuestionDb[] +): number[][] { + const subjIndex = qdbs[qdbIndex].data.findIndex((x) => { + return x.Name.toLowerCase().includes(subjToClean.toLowerCase()) + }) + + if (!qdbs[qdbIndex].data[subjIndex]) { + return recievedQuestions.map(() => []) + } + + const questionIndexesToRemove = recievedQuestions.map((recievedQuestion) => + qdbs[qdbIndex].data[subjIndex].Questions.reduce( + (acc, question, i) => { + const res = compareString( + simplifyQuestion(recievedQuestion.Q), + simplifyQuestion(question.Q) + ) + + if ( + res > minMatchToNotSearchOtherSubjects && + (!question.data.date || question.data.date < overwriteFromDate) + ) { + return [...acc, i] + } + return acc + }, + [] + ) + ) + + return questionIndexesToRemove +} + // ------------------------------------------------------------------------ export { diff --git a/src/utils/workerPool.ts b/src/utils/workerPool.ts index 6e12c65..ce1aab7 100644 --- a/src/utils/workerPool.ts +++ b/src/utils/workerPool.ts @@ -34,8 +34,8 @@ interface WorkerObj { free: Boolean } -interface TaskObject { - type: 'work' | 'dbEdit' | 'newQuestions' | 'newdb' +export interface TaskObject { + type: 'work' | 'dbEdit' | 'newQuestions' | 'newdb' | 'dbClean' | 'rmQuestions' data: | { searchIn: number[] @@ -50,6 +50,18 @@ interface TaskObject { | { dbIndex: number; edits: Edits } | QuestionDb | Result + | { + questions: Question[] + subjToClean: string + overwriteFromDate: number + qdbIndex: number + } + | { + questionIndexesToRemove: number[][] + subjIndex: number + qdbIndex: number + recievedQuestions: Question[] + } } interface PendingJob { diff --git a/submodules/qmining-page b/submodules/qmining-page index ce63911..9c33300 160000 --- a/submodules/qmining-page +++ b/submodules/qmining-page @@ -1 +1 @@ -Subproject commit ce63911b68e5604c02f5761dd4b5388272b76c6f +Subproject commit 9c333008c7049b032e4e3d0dbf5fd5893542ffb7