/* ---------------------------------------------------------------------------- Question Server GitLab: This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . ------------------------------------------------------------------------- */ // FIXME: this should be renamed to worker.ts or something import { isMainThread, parentPort, workerData } from 'worker_threads' import { recognizeTextFromBase64, tesseractLoaded } from './tesseract' import logger from './logger' import { Question, QuestionData, QuestionDb, Subject, } from '../types/basicTypes' import { editDb, Edits, updateQuestionsInArray } from './actions' // import { TaskObject } from './workerPool' export interface WorkerResult { msg: string workerIndex: number result?: SearchResultQuestion[] | number[][] error?: boolean } interface DetailedMatch { qMatch: number aMatch: number dMatch: number matchedSubjName: string avg: number } export interface SearchResultQuestion { q: Question match: number detailedMatch: DetailedMatch } const commonUselessAnswerParts = [ 'A helyes válasz az ', 'A helyes válasz a ', 'A helyes válaszok: ', 'A helyes válaszok:', 'A helyes válasz: ', 'A helyes válasz:', 'The correct answer is:', "'", ] // const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.'] /* Percent minus for length difference */ const lengthDiffMultiplier = 10 export const noPossibleAnswerMatchPenalty = 5 /* Minimum ammount to consider that two questions match during answering */ const minMatchAmmount = 75 const magicNumber = 0.7 // same as minMatchAmmount, but /100 /* If all of the results are below this match percent (when only one subject is searched due to * subject name matching) then all subjects are searched for answer */ const minMatchToNotSearchOtherSubjects = 90 // --------------------------------------------------------------------------------------------------------- // String Utils // --------------------------------------------------------------------------------------------------------- // Exported // --------------------------------------------------------------------------------------------------------- function getSubjNameWithoutYear(subjName: string): string { const t = subjName.split(' - ') if (t[0].match(/^[0-9]{4}\/[0-9]{2}\/[0-9]{1}$/i)) { return t[1] || subjName } else { return subjName } } // Not exported // --------------------------------------------------------------------------------------------------------- function simplifyString(toremove: string): string { return toremove.replace(/\s/g, ' ').replace(/\s+/g, ' ').toLowerCase() } function removeStuff( value: string, removableStrings: Array, toReplace?: string ): string { removableStrings.forEach((removableString) => { const regex = new RegExp(removableString, 'g') value = value.replace(regex, toReplace || '') }) return value } // damn nonbreaking space function normalizeSpaces(input: string): string { return input.replace(/\s/g, ' ') } function removeUnnecesarySpaces(toremove: string): string { return normalizeSpaces(toremove) .replace(/\s+/g, ' ') .replace(/(\r\n|\n|\r)/gm, '') .trim() } function compareString( s1: string, s2: string, s1cache?: Array, s2cache?: Array ): number { const s1a = s1cache || s1.split(' ') const s2a = s2cache || s2.split(' ') if (s1 === s2) { return 100 } if (!s1a || !s2a) { if (!s1a && !s2a) { return 100 } else { return 0 } } if (s1a.length < 0 || s2a.length < 0) { if (s1a.length === 0 && s2a.length === 0) { return 100 } else { return 0 } } let match = 0 let lastMatchIndex = -2 let i = 0 while (i < s1a.length) { if (match / i < magicNumber) { break } const currMatchIndex = s2a.indexOf(s1a[i]) if (currMatchIndex !== -1 && lastMatchIndex < currMatchIndex) { match++ lastMatchIndex = currMatchIndex } i++ } let percent = Math.round( parseFloat(((match / s1a.length) * 100).toFixed(2)) ) const lengthDifference = Math.abs(s2a.length - s1a.length) percent -= lengthDifference * lengthDiffMultiplier if (percent < 0) { percent = 0 } return percent } function answerPreProcessor(value: string): string { if (!value) { return value } return removeStuff(value, commonUselessAnswerParts) } // 'a. pécsi sör' -> 'pécsi sör' function removeAnswerLetters(value: string): string { if (!value) { return value } const val = value.split('. ') if (val[0].length < 2 && val.length > 1) { val.shift() return val.join(' ') } else { return value } } function simplifyQA(value: string, mods: Array): string { if (!value) { return value } return mods.reduce((res, fn) => { return fn(res) }, value) } function simplifyAnswer(value: string): string { if (!value) { return value } return simplifyQA(value, [ removeUnnecesarySpaces, answerPreProcessor, removeAnswerLetters, ]) } function simplifyQuestion(question: string): string { if (!question) { return question } return simplifyQA(question, [removeUnnecesarySpaces, removeAnswerLetters]) } function simplifyQuestionObj(question: Question): Question { if (!question) { return question } if (question.Q) { question.Q = simplifyQA(question.Q, [ removeUnnecesarySpaces, removeAnswerLetters, ]) } if (question.A) { question.A = simplifyQA(question.A, [ removeUnnecesarySpaces, removeAnswerLetters, ]) } return question } // --------------------------------------------------------------------------------------------------------- // Question // --------------------------------------------------------------------------------------------------------- function createQuestion( question: Question | string, answer?: string, data?: QuestionData ): Question { try { if (typeof question === 'string') { return { Q: simplifyQuestion(question), A: answer ? simplifyAnswer(answer) : undefined, data: data, cache: { Q: question ? simplifyString(question).split(' ') : [], A: answer ? simplifyString(answer).split(' ') : [], }, } } else { return { ...question, cache: { Q: question.Q ? simplifyString(question.Q).split(' ') : [], A: question.A ? simplifyString(question.A).split(' ') : [], }, } } } catch (err) { logger.Log('Error creating question', logger.GetColor('redbg')) console.error(question, answer, data) console.error(err) return null } } async function recognizeQuestionImage(question: Question): Promise { const base64Data = question.data.base64 if (Array.isArray(base64Data) && base64Data.length) { const res: string[] = [] for (let i = 0; i < base64Data.length; i++) { const base64 = base64Data[i] const text = await recognizeTextFromBase64(base64) if (text && text.trim()) { res.push(text) } } if (res.length) { return { ...question, Q: res.join(' '), data: { ...question.data, type: 'simple', }, } } } return question } function compareImage(data: QuestionData, data2: QuestionData): number { if (data.hashedImages && data2.hashedImages) { return compareString( data.hashedImages.join(' '), data2.hashedImages.join(' '), data.hashedImages, data2.hashedImages ) } else if (data.images && data2.images) { return ( compareString( data.images.join(' '), data2.images.join(' '), data.images, data2.images ) - 10 ) } else { return 0 } } function compareData(q1: Question, q2: Question): number { try { if (q1.data.type === q2.data.type) { const dataType = q1.data.type if (dataType === 'simple') { return -1 } else if (dataType === 'image') { return compareImage(q1.data, q2.data) } else { logger.DebugLog( `Unhandled data type ${dataType}`, 'Compare question data', 1 ) logger.DebugLog(q1, 'Compare question data', 2) } } else { return 0 } } catch (error) { logger.DebugLog('Error comparing data', 'Compare question data', 1) logger.DebugLog(error.message, 'Compare question data', 1) logger.DebugLog(error, 'Compare question data', 2) console.error(error) } return 0 } function compareQuestion(q1: Question, q2: Question): number { return compareString(q1.Q, q2.Q, q1.cache.Q, q2.cache.Q) // return compareString( // q1.Q, // q1.Q ? q1.Q.split(' ') : [], // q2.Q, // q2.Q ? q2.Q.split(' ') : [] // ) } function compareAnswer(q1: Question, q2: Question): number { return compareString(q1.A, q2.A, q1.cache.A, q2.cache.A) // return compareString( // q1.A, // q1.A ? q1.A.split(' ') : [], // q2.A, // q2.A ? q2.A.split(' ') : [] // ) } function compareQuestionObj( q1: Question, _q1subjName: string, q2: Question, q2subjName: string ): DetailedMatch { const qMatch = compareQuestion(q1, q2) const aMatch = q2.A ? compareAnswer(q1, q2) : 0 // -1 if botth questions are simple const dMatch = compareData(q1, q2) let avg = -1 if (q2.A) { if (dMatch === -1) { avg = Math.min(qMatch, aMatch) } else { avg = Math.min(qMatch, aMatch, dMatch) } } else { if (dMatch === -1) { avg = qMatch } else { avg = Math.min(qMatch, dMatch) } } return { qMatch: qMatch, aMatch: aMatch, dMatch: dMatch, matchedSubjName: q2subjName, avg: avg, } } function questionToString(question: Question): string { const { Q, A, data } = question if (data.type !== 'simple') { return '?' + Q + '\n!' + A + '\n>' + JSON.stringify(data) } else { return '?' + Q + '\n!' + A } } // --------------------------------------------------------------------------------------------------------- // Subject // --------------------------------------------------------------------------------------------------------- function searchSubject( subj: Subject, question: Question, subjName: string, searchTillMatchPercent?: number ): SearchResultQuestion[] { let result: SearchResultQuestion[] = [] let stopSearch = false let i = subj.Questions.length - 1 while (i >= 0 && !stopSearch) { const currentQuestion = subj.Questions[i] const percent = compareQuestionObj( currentQuestion, subjName, question, subj.Name ) if (percent.avg >= minMatchAmmount) { result.push({ q: currentQuestion, match: percent.avg, detailedMatch: percent, }) } if (searchTillMatchPercent && percent.avg >= searchTillMatchPercent) { stopSearch = true } i-- } result = result.sort((q1, q2) => { if (q1.match < q2.match) { return 1 } else if (q1.match > q2.match) { return -1 } else { return 0 } }) return result } function subjectToString(subj: Subject): string { const { Questions, Name } = subj const result: string[] = [] Questions.forEach((question) => { result.push(questionToString(question)) }) return '+' + Name + '\n' + result.join('\n') } // --------------------------------------------------------------------------------------------------------- // QuestionDB // --------------------------------------------------------------------------------------------------------- function addQuestion( data: Array, subj: string, question: Question ): void { logger.DebugLog('Adding new question with subjName: ' + subj, 'qdb add', 1) logger.DebugLog(question, 'qdb add', 3) const i = data.findIndex((subject) => { return ( subject.Name && subj .toLowerCase() .includes(getSubjNameWithoutYear(subject.Name).toLowerCase()) ) }) if (i !== -1) { logger.DebugLog('Adding new question to existing subject', 'qdb add', 1) data[i].Questions.push(question) } else { logger.Log(`Creating new subject: ${subj}`) data.push({ Name: subj, Questions: [question], }) } } function prepareQuestion(question: Question): Question { return simplifyQuestionObj(createQuestion(question)) } function dataToString(data: Array): string { const result: string[] = [] data.forEach((subj) => { result.push(subjectToString(subj)) }) return result.join('\n\n') } function doSearch( data: Array, subjName: string, question: Question, searchTillMatchPercent?: number, searchInAllIfNoResult?: Boolean ): SearchResultQuestion[] { let result: SearchResultQuestion[] = [] const questionToSearch = prepareQuestion(question) data.every((subj) => { if ( subjName .toLowerCase() .includes(getSubjNameWithoutYear(subj.Name).toLowerCase()) ) { logger.DebugLog(`Searching in ${subj.Name} `, 'searchworker', 2) const subjRes = searchSubject( subj, questionToSearch, subjName, searchTillMatchPercent ) result = result.concat(subjRes) if (searchTillMatchPercent) { return !subjRes.some((sr) => { return sr.match >= searchTillMatchPercent }) } return true } return true }) if (searchInAllIfNoResult) { // FIXME: dont research subject searched above if ( result.length === 0 || result[0].match < minMatchToNotSearchOtherSubjects ) { logger.DebugLog( 'Reqults length is zero when comparing names, trying all subjects', 'searchworker', 1 ) data.every((subj) => { const subjRes = searchSubject( subj, questionToSearch, subjName, searchTillMatchPercent ) result = result.concat(subjRes) if (searchTillMatchPercent) { const continueSearching = !subjRes.some((sr) => { return sr.match >= searchTillMatchPercent }) return continueSearching } return true }) } } result = setNoPossibleAnswersPenalties( questionToSearch.data.possibleAnswers, result ) result = result.sort((q1, q2) => { if (q1.match < q2.match) { return 1 } else if (q1.match > q2.match) { return -1 } else { return 0 } }) return result } function setNoPossibleAnswersPenalties( questionPossibleAnswers: QuestionData['possibleAnswers'], results: SearchResultQuestion[] ): SearchResultQuestion[] { if (!Array.isArray(questionPossibleAnswers)) { return results } const noneHasPossibleAnswers = results.every((x) => { return !Array.isArray(x.q.data.possibleAnswers) }) if (noneHasPossibleAnswers) return results let possibleAnswerMatch = false const updated = results.map((result) => { const matchCount = Array.isArray(result.q.data.possibleAnswers) ? result.q.data.possibleAnswers.filter((resultPossibleAnswer) => { return questionPossibleAnswers.some( (questionPossibleAnswer) => { if ( questionPossibleAnswer.val && resultPossibleAnswer.val ) { return questionPossibleAnswer.val.includes( resultPossibleAnswer.val ) } else { return false } } ) }).length : 0 if (matchCount === questionPossibleAnswers.length) { possibleAnswerMatch = true return result } else { return { ...result, match: result.match - noPossibleAnswerMatchPenalty, detailedMatch: { ...result.detailedMatch, qMatch: result.detailedMatch.qMatch - noPossibleAnswerMatchPenalty, }, } } }) if (possibleAnswerMatch) { return updated } else { return results } } // --------------------------------------------------------------------------------------------------------- // Multi threaded stuff // --------------------------------------------------------------------------------------------------------- interface WorkData { subjName: string question: Question searchTillMatchPercent: number searchInAllIfNoResult: boolean searchIn: number[] index: number } if (!isMainThread) { handleWorkerData() } function handleWorkerData() { const { workerIndex, initData, }: { workerIndex: number; initData: Array } = workerData let qdbs: Array = initData const qdbCount = initData.length const subjCount = initData.reduce((sCount, qdb) => { return sCount + qdb.data.length }, 0) const questionCount = initData.reduce((qCount, qdb) => { return ( qCount + qdb.data.reduce((sCount, subject) => { return sCount + subject.Questions.length }, 0) ) }, 0) logger.Log( `[THREAD #${workerIndex}]: Worker ${workerIndex} reporting for duty! qdbs: ${qdbCount}, subjects: ${subjCount}, questions: ${questionCount}` ) parentPort.on('message', async (msg /*: TaskObject */) => { await tesseractLoaded if (msg.type === 'work') { const { subjName, question: originalQuestion, searchTillMatchPercent, searchInAllIfNoResult, searchIn, index, }: WorkData = msg.data let searchResult: SearchResultQuestion[] = [] let error = false const question = await recognizeQuestionImage(originalQuestion) try { qdbs.forEach((qdb) => { if (searchIn.includes(qdb.index)) { const res = doSearch( qdb.data, subjName, question, searchTillMatchPercent, searchInAllIfNoResult ) searchResult = [ ...searchResult, ...res.map((x) => { return { ...x, detailedMatch: { ...x.detailedMatch, qdb: qdb.name, }, } }), ] } }) } catch (err) { logger.Log('Error in worker thread!', logger.GetColor('redbg')) console.error(err) console.error( JSON.stringify( { subjName: subjName, question: question, searchTillMatchPercent: searchTillMatchPercent, searchInAllIfNoResult: searchInAllIfNoResult, searchIn: searchIn, index: index, }, null, 2 ) ) error = true } // sorting const sortedResult: SearchResultQuestion[] = searchResult.sort( (q1, q2) => { if (q1.match < q2.match) { return 1 } else if (q1.match > q2.match) { return -1 } else { return 0 } } ) const workerResult: WorkerResult = { msg: `From thread #${workerIndex}: job ${ !isNaN(index) ? `#${index}` : '' }done`, workerIndex: workerIndex, result: sortedResult, error: error, } // ONDONE: parentPort.postMessage(workerResult) // console.log( // `[THREAD #${workerIndex}]: Work ${ // !isNaN(index) ? `#${index}` : '' // }done!` // ) } else if (msg.type === 'dbEdit') { const { dbIndex, edits }: { dbIndex: number; edits: Edits } = msg.data const { resultDb } = editDb(qdbs[dbIndex], edits) qdbs[dbIndex] = resultDb logger.DebugLog(`Worker db edit ${workerIndex}`, 'worker update', 1) parentPort.postMessage({ msg: `From thread #${workerIndex}: db edit`, workerIndex: workerIndex, }) } else if (msg.type === 'newQuestions') { const { subjName, qdbIndex, newQuestions, }: { subjName: string qdbIndex: number newQuestions: Question[] } = msg.data let added = false qdbs = qdbs.map((qdb) => { if (qdb.index === qdbIndex) { return { ...qdb, data: qdb.data.map((subj) => { if (subj.Name === subjName) { added = true return { Name: subj.Name, Questions: [ ...subj.Questions, ...newQuestions, ], } } else { return subj } }), } } else { return qdb } }) if (!added) { qdbs = qdbs.map((qdb) => { if (qdb.index === qdbIndex) { return { ...qdb, data: [ ...qdb.data, { Name: subjName, Questions: [...newQuestions], }, ], } } else { return qdb } }) } logger.DebugLog( `Worker new question ${workerIndex}`, 'worker update', 1 ) parentPort.postMessage({ msg: `From thread #${workerIndex}: update done`, workerIndex: workerIndex, }) // console.log(`[THREAD #${workerIndex}]: update`) } else if (msg.type === 'newdb') { const { data }: { data: QuestionDb } = msg qdbs.push(data) parentPort.postMessage({ msg: `From thread #${workerIndex}: new db add done`, workerIndex: workerIndex, }) // console.log(`[THREAD #${workerIndex}]: newdb`) } else if (msg.type === 'dbClean') { const removedIndexes = cleanDb(msg.data, qdbs) const workerResult: WorkerResult = { msg: `From thread #${workerIndex}: db clean done`, workerIndex: workerIndex, result: removedIndexes, } parentPort.postMessage(workerResult) } else if (msg.type === 'rmQuestions') { const { questionIndexesToRemove, subjIndex, qdbIndex, recievedQuestions, } = msg.data qdbs[qdbIndex].data[subjIndex].Questions = updateQuestionsInArray( questionIndexesToRemove, qdbs[qdbIndex].data[subjIndex].Questions, recievedQuestions ) parentPort.postMessage({ msg: `From thread #${workerIndex}: rm question done`, workerIndex: workerIndex, }) } else { logger.Log(`Invalid msg type!`, logger.GetColor('redbg')) console.error(msg) parentPort.postMessage({ msg: `From thread #${workerIndex}: Invalid message type (${msg.type})!`, workerIndex: workerIndex, }) } }) } export function cleanDb( { questions: recievedQuestions, subjToClean, overwriteFromDate, qdbIndex, }: { questions: Question[] subjToClean: string overwriteFromDate: number qdbIndex: number }, qdbs: QuestionDb[] ): number[][] { const subjIndex = qdbs[qdbIndex].data.findIndex((x) => { return x.Name.toLowerCase().includes(subjToClean.toLowerCase()) }) if (!qdbs[qdbIndex].data[subjIndex]) { return recievedQuestions.map(() => []) } // FIXME: compare images & data too! const questionIndexesToRemove = recievedQuestions.map((recievedQuestion) => qdbs[qdbIndex].data[subjIndex].Questions.reduce( (acc, question, i) => { const res = compareString( simplifyQuestion(recievedQuestion.Q), simplifyQuestion(question.Q) ) if ( res > minMatchToNotSearchOtherSubjects && (!question.data.date || question.data.date < overwriteFromDate) ) { return [...acc, i] } return acc }, [] ) ) return questionIndexesToRemove } // ------------------------------------------------------------------------ export { compareQuestionObj, minMatchAmmount, getSubjNameWithoutYear, createQuestion, addQuestion, dataToString, doSearch, setNoPossibleAnswersPenalties, }