import { isMainThread, parentPort, workerData } from 'worker_threads' import logger from './logger' import { Question, QuestionData, Subject } from '../types/basicTypes' interface SearchResultQuestion extends Question { match: number } export interface SearchResult { result: Array dbName: string } const assert = (val) => { if (!val) { throw new Error('Assertion failed') } } const commonUselessAnswerParts = [ 'A helyes válasz az ', 'A helyes válasz a ', 'A helyes válaszok: ', 'A helyes válaszok:', 'A helyes válasz: ', 'A helyes válasz:', 'The correct answer is:', "'", ] const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.'] const specialChars = ['&', '\\+'] /* Percent minus for length difference */ const lengthDiffMultiplier = 10 /* Minimum ammount to consider that two questions match during answering */ const minMatchAmmount = 70 /* If all of the results are below this match percent (when only one subject is searched due to * subject name matching) then all subjects are searched for answer */ const minMatchToNotSearchOtherSubjects = 90 // --------------------------------------------------------------------------------------------------------- // String Utils // --------------------------------------------------------------------------------------------------------- // Exported // --------------------------------------------------------------------------------------------------------- function getSubjNameWithoutYear(subjName: string): string { const t = subjName.split(' - ') if (t[0].match(/^[0-9]{4}\/[0-9]{2}\/[0-9]{1}$/i)) { return t[1] || subjName } else { return subjName } } // Not exported // --------------------------------------------------------------------------------------------------------- function removeStuff( value: string, removableStrings: Array, toReplace?: string ) { removableStrings.forEach((removableString) => { const regex = new RegExp(removableString, 'g') value = value.replace(regex, toReplace || '') }) return value } // removes whitespace from begining and and, and replaces multiple spaces with one space function removeUnnecesarySpaces(toremove: string) { assert(toremove) toremove = normalizeSpaces(toremove) while (toremove.includes(' ')) { toremove = toremove.replace(/ {2}/g, ' ') } return toremove.trim() } // simplifies a string for easier comparison function simplifyStringForComparison(value: string) { assert(value) value = removeUnnecesarySpaces(value).toLowerCase() return removeStuff(value, commonUselessStringParts) } function removeSpecialChars(value: string) { assert(value) return removeStuff(value, specialChars, ' ') } // damn nonbreaking space function normalizeSpaces(input: string) { assert(input) return input.replace(/\s/g, ' ') } function compareString(string1: string, string2: string) { if (!string1 || !string2) { if (!string1 && !string2) { return 100 } else { return 0 } } const s1 = simplifyStringForComparison(string1).split(' ') const s2 = simplifyStringForComparison(string2).split(' ') let match = 0 for (let i = 0; i < s1.length; i++) { if (s2.includes(s1[i])) { match++ } } let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2))) const lengthDifference = Math.abs(s2.length - s1.length) percent -= lengthDifference * lengthDiffMultiplier if (percent < 0) { percent = 0 } return percent } function answerPreProcessor(value: string) { if (!value) { return value } return removeStuff(value, commonUselessAnswerParts) } // 'a. pécsi sör' -> 'pécsi sör' function removeAnswerLetters(value: string) { if (!value) { return } const val = value.split('. ') if (val[0].length < 2 && val.length > 1) { val.shift() return val.join(' ') } else { return value } } function simplifyQA(value: string, mods: Array) { if (!value) { return } return mods.reduce((res, fn) => { return fn(res) }, value) } function simplifyAnswer(value: string) { if (!value) { return value } return simplifyQA(value, [ removeSpecialChars, removeUnnecesarySpaces, answerPreProcessor, removeAnswerLetters, ]) } function simplifyQuestion(question: Question | string) { if (!question) { return } if (typeof question === 'string') { return simplifyQA(question, [ removeSpecialChars, removeUnnecesarySpaces, removeAnswerLetters, ]) } else { if (question.Q) { question.Q = simplifyQA(question.Q, [ removeSpecialChars, removeUnnecesarySpaces, removeAnswerLetters, ]) } if (question.A) { question.A = simplifyQA(question.A, [ removeSpecialChars, removeUnnecesarySpaces, removeAnswerLetters, ]) } return question } } // --------------------------------------------------------------------------------------------------------- // Question // --------------------------------------------------------------------------------------------------------- function createQuestion( question: Question | string, answer: string, data: QuestionData ): Question { return { Q: simplifyQuestion(question), A: answer ? simplifyAnswer(answer) : undefined, data: data, } } function compareImage(data: QuestionData, data2: QuestionData) { if (data.hashedImages && data2.hashedImages) { return compareString( data.hashedImages.join(' '), data.hashedImages.join(' ') ) } else { return compareString(data.images.join(' '), data2.images.join(' ')) - 10 } } function compareData(q1: Question, q2: Question) { try { if (q1.data.type === q2.data.type) { const dataType = q1.data.type if (dataType === 'simple') { return -1 } else if (dataType === 'image') { return compareImage(q1.data, q2.data) } else { logger.DebugLog( `Unhandled data type ${dataType}`, 'Compare question data', 1 ) logger.DebugLog(q1, 'Compare question data', 2) } } else { return 0 } } catch (error) { logger.DebugLog('Error comparing data', 'Compare question data', 1) logger.DebugLog(error.message, 'Compare question data', 1) logger.DebugLog(error, 'Compare question data', 2) console.error(error) } return 0 } function compareQuestion(q1: Question, q2: Question) { return compareString(q1.Q, q2.Q) } function compareAnswer(q1: Question, q2: Question) { return compareString(q1.A, q2.A) } function compareQuestionObj( q1: Question, q1subjName: string, q2: Question, q2subjName: string, data: QuestionData ) { assert(data !== undefined || data !== null) assert(q1) assert(typeof q1 === 'object') assert(q2) assert(typeof q2 === 'object') const qMatch = compareQuestion(q1, q2) const aMatch = q2.A ? compareAnswer(q1, q2) : 0 // -1 if botth questions are simple const dMatch = compareData(q1, q2) let avg = -1 if (q2.A) { if (dMatch === -1) { avg = (qMatch + aMatch) / 2 } else { avg = (qMatch + aMatch + dMatch) / 3 } } else { if (dMatch === -1) { avg = qMatch } else { avg = (qMatch + dMatch) / 2 } } return { qMatch: qMatch, aMatch: aMatch, dMatch: dMatch, matchedSubjName: q2subjName, avg: avg, } } function questionToString(question: Question) { const { Q, A, data } = question if (data.type !== 'simple') { return '?' + Q + '\n!' + A + '\n>' + JSON.stringify(data) } else { return '?' + Q + '\n!' + A } } // --------------------------------------------------------------------------------------------------------- // Subject // --------------------------------------------------------------------------------------------------------- function searchSubject( subj: Subject, question: Question, subjName: string, searchTillMatchPercent?: number ) { assert(question) let result = [] subj.Questions.every((currentQuestion) => { const percent = compareQuestionObj( currentQuestion, subjName, question, subj.Name, question.data ) if (percent.avg > minMatchAmmount) { result.push({ q: currentQuestion, match: percent.avg, detailedMatch: percent, }) } if (searchTillMatchPercent && percent.avg >= searchTillMatchPercent) { return false } return true }) result = result.sort((q1, q2) => { if (q1.match < q2.match) { return 1 } else if (q1.match > q2.match) { return -1 } else { return 0 } }) return result } function subjectToString(subj: Subject) { const { Questions, Name } = subj const result = [] Questions.forEach((question) => { result.push(questionToString(question)) }) return '+' + Name + '\n' + result.join('\n') } // --------------------------------------------------------------------------------------------------------- // QuestionDB // --------------------------------------------------------------------------------------------------------- function addQuestion( data: Array, subj: string, question: Question ): void { logger.DebugLog('Adding new question with subjName: ' + subj, 'qdb add', 1) logger.DebugLog(question, 'qdb add', 3) assert(data) assert(subj) assert(question) assert(typeof question === 'object') let i = 0 // FIXME: this only adds to the first matched subject name. Check if this doesnt cause any bugs while ( i < data.length && !subj .toLowerCase() .includes(getSubjNameWithoutYear(data[i].Name).toLowerCase()) ) { i++ } if (i < data.length) { logger.DebugLog('Adding new question to existing subject', 'qdb add', 1) data[i].Questions.push(question) } else { logger.DebugLog('Creating new subject for question', 'qdb add', 1) data.push({ Name: subj, Questions: [question], }) } } function prepareQuestion( question: string | Question, data: string | QuestionData ): Question { let preparedQuestion: Question if (typeof question === 'object') { preparedQuestion = question } else { let parsedData if (typeof data === 'string') { try { parsedData = JSON.parse(data) } catch (err) { // asd } } if (typeof data === 'object') { parsedData = data } preparedQuestion = createQuestion(question, null, parsedData) } return simplifyQuestion(preparedQuestion) } function dataToString(data: Array): string { const result = [] data.forEach((subj) => { result.push(subjectToString(subj)) }) return result.join('\n\n') } function doSearch( data: Array, subjName: string, question: Question | string, questionData?: QuestionData, searchTillMatchPercent?: number, searchInAllIfNoResult?: Boolean ): any { let result = [] const questionToSearch = prepareQuestion(question, questionData) assert(questionToSearch.data) data.every((subj) => { if ( subjName .toLowerCase() .includes(getSubjNameWithoutYear(subj.Name).toLowerCase()) ) { logger.DebugLog(`Searching in ${subj.Name} `, 'searchworker', 2) const subjRes = searchSubject( subj, questionToSearch, subjName, searchTillMatchPercent ) result = result.concat(subjRes) if (searchTillMatchPercent) { return !subjRes.some((sr) => { return sr.match >= searchTillMatchPercent }) } return true } return true }) if (searchInAllIfNoResult) { // FIXME: dont research subject searched above if ( result.length === 0 || result[0].match < minMatchToNotSearchOtherSubjects ) { logger.DebugLog( 'Reqults length is zero when comparing names, trying all subjects', 'searchworker', 1 ) data.every((subj) => { const subjRes = searchSubject( subj, questionToSearch, subjName, searchTillMatchPercent ) result = result.concat(subjRes) if (searchTillMatchPercent) { const continueSearching = !subjRes.some((sr) => { return sr.match >= searchTillMatchPercent }) return continueSearching } return true }) } } result = result.sort((q1, q2) => { if (q1.match < q2.match) { return 1 } else if (q1.match > q2.match) { return -1 } else { return 0 } }) return result } // --------------------------------------------------------------------------------------------------------- // Multi threaded stuff // --------------------------------------------------------------------------------------------------------- if (!isMainThread) { const { workerIndex } = workerData let qdbs: Array = workerData.initData logger.Log( `[THREAD #${workerIndex}]: Worker ${workerIndex} reporting for duty` ) parentPort.on('message', (msg) => { if (msg.type === 'work') { const { subjName, question, questionData, searchTillMatchPercent, searchInAllIfNoResult, searchIn, index, } = msg.data // console.log( // `[THREAD #${workerIndex}]: staring work${ // !isNaN(index) ? ` on job index #${index}` : '' // }` // ) let searchResult = [] try { qdbs.forEach((qdb) => { if (searchIn === 'all' || searchIn.includes(qdb.index)) { const res = doSearch( qdb.data, subjName, question, questionData, searchTillMatchPercent, searchInAllIfNoResult ) searchResult = [ ...searchResult, ...res.map((x) => { return { ...x, detailedMatch: { ...x.detailedMatch, qdb: qdb.name, }, } }), ] } }) } catch (err) { logger.Log('Error in worker thread!', logger.GetColor('redbg')) console.error(err) } // sorting const sortedResult = searchResult.sort((q1, q2) => { if (q1.match < q2.match) { return 1 } else if (q1.match > q2.match) { return -1 } else { return 0 } }) // ONDONE: parentPort.postMessage({ msg: `From thread #${workerIndex}: job ${ !isNaN(index) ? `#${index}` : '' }done`, workerIndex: workerIndex, result: sortedResult, }) // console.log( // `[THREAD #${workerIndex}]: Work ${ // !isNaN(index) ? `#${index}` : '' // }done!` // ) } else if (msg.type === 'update') { qdbs = msg.qdbs // console.log(`[THREAD #${workerIndex}]: update`) } else if (msg.type === 'newdb') { qdbs.push(msg.newdb) // console.log(`[THREAD #${workerIndex}]: newdb`) } }) } else { // console.log('[THREAD]: Main thread!') } // ------------------------------------------------------------------------ export { minMatchAmmount, getSubjNameWithoutYear, createQuestion, addQuestion, dataToString, }