import { Worker, isMainThread, parentPort, workerData } from 'worker_threads' import logger from './logger' import { Question, QuestionDb, QuestionData, Subject, } from '../types/basicTypes' interface SearchResultQuestion extends Question { match: number } export interface SearchResult { result: Array dbName: string } const searchDataWorkerFile = './src/utils/classes.ts' const assert = (val) => { if (!val) { throw new Error('Assertion failed') } } const commonUselessAnswerParts = [ 'A helyes válasz az ', 'A helyes válasz a ', 'A helyes válaszok: ', 'A helyes válaszok:', 'A helyes válasz: ', 'A helyes válasz:', 'The correct answer is:', "'", ] const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.'] const specialChars = ['&', '\\+'] /* Percent minus for length difference */ const lengthDiffMultiplier = 10 /* Minimum ammount to consider that two questions match during answering */ const minMatchAmmount = 60 /* If all of the results are below this match percent (when only one subject is searched due to * subject name matching) then all subjects are searched for answer */ const minMatchToNotSearchOtherSubjects = 90 // --------------------------------------------------------------------------------------------------------- // String Utils // --------------------------------------------------------------------------------------------------------- // Exported // --------------------------------------------------------------------------------------------------------- function getSubjNameWithoutYear(subjName: string): string { const t = subjName.split(' - ') if (t[0].match(/^[0-9]{4}\/[0-9]{2}\/[0-9]{1}$/i)) { return t[1] || subjName } else { return subjName } } // Not exported // --------------------------------------------------------------------------------------------------------- function removeStuff( value: string, removableStrings: Array, toReplace?: string ) { removableStrings.forEach((removableString) => { const regex = new RegExp(removableString, 'g') value = value.replace(regex, toReplace || '') }) return value } // removes whitespace from begining and and, and replaces multiple spaces with one space function removeUnnecesarySpaces(toremove: string) { assert(toremove) toremove = normalizeSpaces(toremove) while (toremove.includes(' ')) { toremove = toremove.replace(/ {2}/g, ' ') } return toremove.trim() } // simplifies a string for easier comparison function simplifyStringForComparison(value: string) { assert(value) value = removeUnnecesarySpaces(value).toLowerCase() return removeStuff(value, commonUselessStringParts) } function removeSpecialChars(value: string) { assert(value) return removeStuff(value, specialChars, ' ') } // damn nonbreaking space function normalizeSpaces(input: string) { assert(input) return input.replace(/\s/g, ' ') } function compareString(string1: string, string2: string) { if (!string1 || !string2) { if (!string1 && !string2) { return 100 } else { return 0 } } const s1 = simplifyStringForComparison(string1).split(' ') const s2 = simplifyStringForComparison(string2).split(' ') let match = 0 for (let i = 0; i < s1.length; i++) { if (s2.includes(s1[i])) { match++ } } let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2))) const lengthDifference = Math.abs(s2.length - s1.length) percent -= lengthDifference * lengthDiffMultiplier if (percent < 0) { percent = 0 } return percent } function answerPreProcessor(value: string) { assert(value) return removeStuff(value, commonUselessAnswerParts) } // 'a. pécsi sör' -> 'pécsi sör' function removeAnswerLetters(value: string) { if (!value) { return } const val = value.split('. ') if (val[0].length < 2 && val.length > 1) { val.shift() return val.join(' ') } else { return value } } function simplifyQA(value: string, mods: Array) { if (!value) { return } return mods.reduce((res, fn) => { return fn(res) }, value) } function simplifyAnswer(value: string) { if (!value) { return value } return simplifyQA(value, [ removeSpecialChars, removeUnnecesarySpaces, answerPreProcessor, removeAnswerLetters, ]) } function simplifyQuestion(question: Question | string) { if (!question) { return } if (typeof question === 'string') { return simplifyQA(question, [ removeSpecialChars, removeUnnecesarySpaces, removeAnswerLetters, ]) } else { if (question.Q) { question.Q = simplifyQA(question.Q, [ removeSpecialChars, removeUnnecesarySpaces, removeAnswerLetters, ]) } if (question.A) { question.A = simplifyQA(question.A, [ removeSpecialChars, removeUnnecesarySpaces, removeAnswerLetters, ]) } return question } } // --------------------------------------------------------------------------------------------------------- // Question // --------------------------------------------------------------------------------------------------------- function createQuestion( question: Question | string, answer: string, data: QuestionData ): Question { return { Q: simplifyQuestion(question), A: simplifyAnswer(answer), data, } } function compareImage(data: QuestionData, data2: QuestionData) { // TODO: img comparing (hashed images vs images) const imgs1 = data.hashedImages ? data.hashedImages : data.images const imgs2 = data2.hashedImages ? data2.hashedImages : data2.images return compareString(imgs1.join(' '), imgs2.join(' ')) } function compareData(q1: Question, q2: Question) { try { if (q1.data.type === q2.data.type) { const dataType = q1.data.type if (dataType === 'simple') { return -1 } else if (dataType === 'image') { return compareImage(q1.data, q2.data) } else { logger.DebugLog( `Unhandled data type ${dataType}`, 'Compare question data', 1 ) logger.DebugLog(q1, 'Compare question data', 2) } } else { return 0 } } catch (error) { logger.DebugLog('Error comparing data', 'Compare question data', 1) logger.DebugLog(error.message, 'Compare question data', 1) logger.DebugLog(error, 'Compare question data', 2) } return 0 } function compareQuestion(q1: Question, q2: Question) { return compareString(q1.Q, q2.Q) } function compareAnswer(q1: Question, q2: Question) { return compareString(q1.A, q2.A) } function compareQuestionObj( q1: Question, q1subjName: string, q2: Question, q2subjName: string, data: QuestionData ) { assert(data) assert(q1) assert(typeof q1 === 'object') assert(q2) let qObj if (typeof q2 === 'string') { qObj = { Q: q2, data: data, } } else { qObj = q2 } const qMatch = compareQuestion(q1, qObj) const aMatch = compareAnswer(q1, qObj) // -1 if botth questions are simple const dMatch = compareData(q1, qObj) let avg = -1 if (qObj.A) { if (dMatch === -1) { avg = (qMatch + aMatch) / 2 } else { avg = (qMatch + aMatch + dMatch) / 3 } } else { if (dMatch === -1) { avg = qMatch } else { avg = (qMatch + dMatch) / 2 } } return { qMatch: qMatch, aMatch: aMatch, dMatch: dMatch, matchedSubjName: q2subjName, avg: avg, } } function questionToString(question: Question) { const { Q, A, data } = question if (data.type !== 'simple') { return '?' + Q + '\n!' + A + '\n>' + JSON.stringify(data) } else { return '?' + Q + '\n!' + A } } // --------------------------------------------------------------------------------------------------------- // Subject // --------------------------------------------------------------------------------------------------------- function searchQuestion( subj: Subject, question: Question, questionData: QuestionData, subjName: string ) { assert(question) let result = [] subj.Questions.forEach((currentQuestion) => { const percent = compareQuestionObj( currentQuestion, subjName, question, subj.Name, questionData ) if (percent.avg > minMatchAmmount) { result.push({ q: currentQuestion, match: percent.avg, detailedMatch: percent, }) } }) result = result.sort((q1, q2) => { if (q1.match < q2.match) { return 1 } else if (q1.match > q2.match) { return -1 } else { return 0 } }) return result } function subjectToString(subj: Subject) { const { Questions, Name } = subj const result = [] Questions.forEach((question) => { result.push(questionToString(question)) }) return '+' + Name + '\n' + result.join('\n') } // --------------------------------------------------------------------------------------------------------- // QuestionDB // --------------------------------------------------------------------------------------------------------- function addQuestion( data: Array, subj: string, question: Question ): void { logger.DebugLog('Adding new question with subjName: ' + subj, 'qdb add', 1) logger.DebugLog(question, 'qdb add', 3) assert(data) assert(subj) assert(question) assert(typeof question === 'object') let i = 0 while ( i < data.length && !subj .toLowerCase() .includes(getSubjNameWithoutYear(data[i].Name).toLowerCase()) ) { i++ } if (i < data.length) { logger.DebugLog('Adding new question to existing subject', 'qdb add', 1) data[i].Questions.push(question) } else { logger.DebugLog('Creating new subject for question', 'qdb add', 1) data.push({ Name: subj, Questions: [question], }) } } function searchDatas( data: Array, question: any, subjName: string, questionData?: QuestionData ): Promise> { return Promise.all( data.map((db: QuestionDb) => { return searchData(db, question, subjName, questionData) }) ) } // FIXME: remove questionData, make question only Question type function searchData( qdb: QuestionDb, question: any, subjName: string, questionData?: QuestionData ): Promise { return new Promise((resolve, reject) => { assert(question) logger.DebugLog('Searching for question', 'qdb search', 1) logger.DebugLog('Question:', 'qdb search', 2) logger.DebugLog(question, 'qdb search', 2) logger.DebugLog(`Subject name: ${subjName}`, 'qdb search', 2) logger.DebugLog('Data:', 'qdb search', 2) logger.DebugLog(questionData || question.data, 'qdb search', 2) if (!questionData) { questionData = question.data || { type: 'simple' } } if (!subjName) { subjName = '' logger.DebugLog('No subject name as param!', 'qdb search', 1) } question = simplifyQuestion(question) const worker = workerTs(searchDataWorkerFile, { workerData: { data: qdb.data, subjName, question, questionData }, }) worker.on('error', (err) => { logger.Log('Search Data Worker error!', logger.GetColor('redbg')) console.error(err) reject(err) }) worker.on('exit', (code) => { logger.DebugLog('Search Data exit, code: ' + code, 'actions', 1) if (code !== 0) { logger.Log( 'Search Data Worker error! Exit code is not 0', logger.GetColor('redbg') ) reject(new Error('Search Data Worker error! Exit code is not 0')) } }) worker.on('message', (result) => { logger.DebugLog(`Worker message arrived`, 'worker', 2) logger.DebugLog(result, 'worker', 3) logger.DebugLog(`Question result length: ${result.length}`, 'ask', 1) logger.DebugLog(result, 'ask', 2) logger.DebugLog( `QDB search result length: ${result.length}`, 'qdb search', 1 ) resolve({ result: result, dbName: qdb.name, }) }) }) } function dataToString(data: Array): string { const result = [] data.forEach((subj) => { result.push(subjectToString(subj)) }) return result.join('\n\n') } // ------------------------------------------------------------------------ function searchWorker( data: Array, subjName: string, question: Question, questionData?: QuestionData ): any { let result = [] data.forEach((subj) => { if ( subjName .toLowerCase() .includes(getSubjNameWithoutYear(subj.Name).toLowerCase()) ) { logger.DebugLog(`Searching in ${subj.Name} `, 'searchworker', 2) result = result.concat( searchQuestion(subj, question, questionData, subjName) ) } }) // FIXME: try to remove this? but this is also a good backup plan so idk // its sufficent to check only result[0].match, since its sorted, and the first one should have // the highest match if ( result.length === 0 || result[0].match < minMatchToNotSearchOtherSubjects ) { logger.DebugLog( 'Reqults length is zero when comparing names, trying all subjects', 'searchworker', 1 ) data.forEach((subj) => { result = result.concat( searchQuestion(subj, question, questionData, subjName) ) }) if (result.length > 0) { logger.DebugLog( `FIXME: '${subjName}' gave no result but '' did!`, 'searchworker', 1 ) console.error(`FIXME: '${subjName}' gave no result but '' did!`) } } result = result.sort((q1, q2) => { if (q1.match < q2.match) { return 1 } else if (q1.match > q2.match) { return -1 } else { return 0 } }) parentPort.postMessage(result) process.exit(0) } const workerTs = (file: string, wkOpts: any) => { wkOpts.eval = true if (!wkOpts.workerData) { wkOpts.workerData = {} } wkOpts.workerData.__filename = file return new Worker( ` const wk = require('worker_threads'); require('ts-node').register(); let file = wk.workerData.__filename; delete wk.workerData.__filename; require(file); `, wkOpts ) } if (!isMainThread) { logger.DebugLog(`Starting search worker ...`, 'searchworker', 1) const { data, subjName, question, questionData } = workerData searchWorker(data, subjName, question, questionData) } // ------------------------------------------------------------------------ export { minMatchAmmount, getSubjNameWithoutYear, createQuestion, addQuestion, searchData, searchDatas, dataToString, }