mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2026-04-28 03:07:38 +02:00
601 lines
14 KiB
TypeScript
Executable File
601 lines
14 KiB
TypeScript
Executable File
import { Worker, isMainThread, parentPort, workerData } from 'worker_threads'
|
|
import logger from './logger'
|
|
import {
|
|
Question,
|
|
QuestionDb,
|
|
QuestionData,
|
|
Subject,
|
|
} from '../types/basicTypes'
|
|
|
|
interface SearchResultQuestion extends Question {
|
|
match: number
|
|
}
|
|
|
|
export interface SearchResult {
|
|
result: Array<SearchResultQuestion>
|
|
dbName: string
|
|
}
|
|
|
|
const searchDataWorkerFile = './src/utils/classes.ts'
|
|
|
|
const assert = (val) => {
|
|
if (!val) {
|
|
throw new Error('Assertion failed')
|
|
}
|
|
}
|
|
|
|
const commonUselessAnswerParts = [
|
|
'A helyes válasz az ',
|
|
'A helyes válasz a ',
|
|
'A helyes válaszok: ',
|
|
'A helyes válaszok:',
|
|
'A helyes válasz: ',
|
|
'A helyes válasz:',
|
|
'The correct answer is:',
|
|
"'",
|
|
]
|
|
|
|
const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
|
|
const specialChars = ['&', '\\+']
|
|
/* Percent minus for length difference */
|
|
const lengthDiffMultiplier = 10
|
|
/* Minimum ammount to consider that two questions match during answering */
|
|
const minMatchAmmount = 60
|
|
/* If all of the results are below this match percent (when only one subject is searched due to
|
|
* subject name matching) then all subjects are searched for answer */
|
|
const minMatchToNotSearchOtherSubjects = 90
|
|
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
// String Utils
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
|
|
// Exported
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
function getSubjNameWithoutYear(subjName: string): string {
|
|
const t = subjName.split(' - ')
|
|
if (t[0].match(/^[0-9]{4}\/[0-9]{2}\/[0-9]{1}$/i)) {
|
|
return t[1] || subjName
|
|
} else {
|
|
return subjName
|
|
}
|
|
}
|
|
|
|
// Not exported
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
function removeStuff(
|
|
value: string,
|
|
removableStrings: Array<string>,
|
|
toReplace?: string
|
|
) {
|
|
removableStrings.forEach((removableString) => {
|
|
const regex = new RegExp(removableString, 'g')
|
|
value = value.replace(regex, toReplace || '')
|
|
})
|
|
return value
|
|
}
|
|
|
|
// removes whitespace from begining and and, and replaces multiple spaces with one space
|
|
function removeUnnecesarySpaces(toremove: string) {
|
|
assert(toremove)
|
|
|
|
toremove = normalizeSpaces(toremove)
|
|
while (toremove.includes(' ')) {
|
|
toremove = toremove.replace(/ {2}/g, ' ')
|
|
}
|
|
return toremove.trim()
|
|
}
|
|
|
|
// simplifies a string for easier comparison
|
|
function simplifyStringForComparison(value: string) {
|
|
assert(value)
|
|
|
|
value = removeUnnecesarySpaces(value).toLowerCase()
|
|
return removeStuff(value, commonUselessStringParts)
|
|
}
|
|
|
|
function removeSpecialChars(value: string) {
|
|
assert(value)
|
|
|
|
return removeStuff(value, specialChars, ' ')
|
|
}
|
|
|
|
// damn nonbreaking space
|
|
function normalizeSpaces(input: string) {
|
|
assert(input)
|
|
|
|
return input.replace(/\s/g, ' ')
|
|
}
|
|
|
|
function compareString(string1: string, string2: string) {
|
|
if (!string1 || !string2) {
|
|
if (!string1 && !string2) {
|
|
return 100
|
|
} else {
|
|
return 0
|
|
}
|
|
}
|
|
|
|
const s1 = simplifyStringForComparison(string1).split(' ')
|
|
const s2 = simplifyStringForComparison(string2).split(' ')
|
|
let match = 0
|
|
for (let i = 0; i < s1.length; i++) {
|
|
if (s2.includes(s1[i])) {
|
|
match++
|
|
}
|
|
}
|
|
let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2)))
|
|
const lengthDifference = Math.abs(s2.length - s1.length)
|
|
percent -= lengthDifference * lengthDiffMultiplier
|
|
if (percent < 0) {
|
|
percent = 0
|
|
}
|
|
return percent
|
|
}
|
|
|
|
function answerPreProcessor(value: string) {
|
|
assert(value)
|
|
|
|
return removeStuff(value, commonUselessAnswerParts)
|
|
}
|
|
|
|
// 'a. pécsi sör' -> 'pécsi sör'
|
|
function removeAnswerLetters(value: string) {
|
|
if (!value) {
|
|
return
|
|
}
|
|
|
|
const val = value.split('. ')
|
|
if (val[0].length < 2 && val.length > 1) {
|
|
val.shift()
|
|
return val.join(' ')
|
|
} else {
|
|
return value
|
|
}
|
|
}
|
|
|
|
function simplifyQA(value: string, mods: Array<Function>) {
|
|
if (!value) {
|
|
return
|
|
}
|
|
|
|
return mods.reduce((res, fn) => {
|
|
return fn(res)
|
|
}, value)
|
|
}
|
|
|
|
function simplifyAnswer(value: string) {
|
|
if (!value) {
|
|
return value
|
|
}
|
|
return simplifyQA(value, [
|
|
removeSpecialChars,
|
|
removeUnnecesarySpaces,
|
|
answerPreProcessor,
|
|
removeAnswerLetters,
|
|
])
|
|
}
|
|
|
|
function simplifyQuestion(question: Question | string) {
|
|
if (!question) {
|
|
return
|
|
}
|
|
if (typeof question === 'string') {
|
|
return simplifyQA(question, [
|
|
removeSpecialChars,
|
|
removeUnnecesarySpaces,
|
|
removeAnswerLetters,
|
|
])
|
|
} else {
|
|
if (question.Q) {
|
|
question.Q = simplifyQA(question.Q, [
|
|
removeSpecialChars,
|
|
removeUnnecesarySpaces,
|
|
removeAnswerLetters,
|
|
])
|
|
}
|
|
if (question.A) {
|
|
question.A = simplifyQA(question.A, [
|
|
removeSpecialChars,
|
|
removeUnnecesarySpaces,
|
|
removeAnswerLetters,
|
|
])
|
|
}
|
|
return question
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
// Question
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
|
|
function createQuestion(
|
|
question: Question | string,
|
|
answer: string,
|
|
data: QuestionData
|
|
): Question {
|
|
return {
|
|
Q: simplifyQuestion(question),
|
|
A: simplifyAnswer(answer),
|
|
data,
|
|
}
|
|
}
|
|
|
|
function compareImage(data: QuestionData, data2: QuestionData) {
|
|
// TODO: img comparing (hashed images vs images)
|
|
const imgs1 = data.hashedImages ? data.hashedImages : data.images
|
|
const imgs2 = data2.hashedImages ? data2.hashedImages : data2.images
|
|
return compareString(imgs1.join(' '), imgs2.join(' '))
|
|
}
|
|
|
|
function compareData(q1: Question, q2: Question) {
|
|
try {
|
|
if (q1.data.type === q2.data.type) {
|
|
const dataType = q1.data.type
|
|
if (dataType === 'simple') {
|
|
return -1
|
|
} else if (dataType === 'image') {
|
|
return compareImage(q1.data, q2.data)
|
|
} else {
|
|
logger.DebugLog(
|
|
`Unhandled data type ${dataType}`,
|
|
'Compare question data',
|
|
1
|
|
)
|
|
logger.DebugLog(q1, 'Compare question data', 2)
|
|
}
|
|
} else {
|
|
return 0
|
|
}
|
|
} catch (error) {
|
|
logger.DebugLog('Error comparing data', 'Compare question data', 1)
|
|
logger.DebugLog(error.message, 'Compare question data', 1)
|
|
logger.DebugLog(error, 'Compare question data', 2)
|
|
}
|
|
return 0
|
|
}
|
|
|
|
function compareQuestion(q1: Question, q2: Question) {
|
|
return compareString(q1.Q, q2.Q)
|
|
}
|
|
|
|
function compareAnswer(q1: Question, q2: Question) {
|
|
return compareString(q1.A, q2.A)
|
|
}
|
|
|
|
function compareQuestionObj(
|
|
q1: Question,
|
|
q1subjName: string,
|
|
q2: Question,
|
|
q2subjName: string,
|
|
data: QuestionData
|
|
) {
|
|
assert(data)
|
|
assert(q1)
|
|
assert(typeof q1 === 'object')
|
|
assert(q2)
|
|
let qObj
|
|
|
|
if (typeof q2 === 'string') {
|
|
qObj = {
|
|
Q: q2,
|
|
data: data,
|
|
}
|
|
} else {
|
|
qObj = q2
|
|
}
|
|
|
|
const qMatch = compareQuestion(q1, qObj)
|
|
const aMatch = compareAnswer(q1, qObj)
|
|
// -1 if botth questions are simple
|
|
const dMatch = compareData(q1, qObj)
|
|
|
|
let avg = -1
|
|
if (qObj.A) {
|
|
if (dMatch === -1) {
|
|
avg = (qMatch + aMatch) / 2
|
|
} else {
|
|
avg = (qMatch + aMatch + dMatch) / 3
|
|
}
|
|
} else {
|
|
if (dMatch === -1) {
|
|
avg = qMatch
|
|
} else {
|
|
avg = (qMatch + dMatch) / 2
|
|
}
|
|
}
|
|
|
|
return {
|
|
qMatch: qMatch,
|
|
aMatch: aMatch,
|
|
dMatch: dMatch,
|
|
matchedSubjName: q2subjName,
|
|
avg: avg,
|
|
}
|
|
}
|
|
|
|
function questionToString(question: Question) {
|
|
const { Q, A, data } = question
|
|
|
|
if (data.type !== 'simple') {
|
|
return '?' + Q + '\n!' + A + '\n>' + JSON.stringify(data)
|
|
} else {
|
|
return '?' + Q + '\n!' + A
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
// Subject
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
function searchQuestion(
|
|
subj: Subject,
|
|
question: Question,
|
|
questionData: QuestionData,
|
|
subjName: string
|
|
) {
|
|
assert(question)
|
|
|
|
let result = []
|
|
subj.Questions.forEach((currentQuestion) => {
|
|
const percent = compareQuestionObj(
|
|
currentQuestion,
|
|
subjName,
|
|
question,
|
|
subj.Name,
|
|
questionData
|
|
)
|
|
|
|
if (percent.avg > minMatchAmmount) {
|
|
result.push({
|
|
q: currentQuestion,
|
|
match: percent.avg,
|
|
detailedMatch: percent,
|
|
})
|
|
}
|
|
})
|
|
|
|
result = result.sort((q1, q2) => {
|
|
if (q1.match < q2.match) {
|
|
return 1
|
|
} else if (q1.match > q2.match) {
|
|
return -1
|
|
} else {
|
|
return 0
|
|
}
|
|
})
|
|
|
|
return result
|
|
}
|
|
|
|
function subjectToString(subj: Subject) {
|
|
const { Questions, Name } = subj
|
|
|
|
const result = []
|
|
Questions.forEach((question) => {
|
|
result.push(questionToString(question))
|
|
})
|
|
|
|
return '+' + Name + '\n' + result.join('\n')
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
// QuestionDB
|
|
// ---------------------------------------------------------------------------------------------------------
|
|
function addQuestion(
|
|
data: Array<Subject>,
|
|
subj: string,
|
|
question: Question
|
|
): void {
|
|
logger.DebugLog('Adding new question with subjName: ' + subj, 'qdb add', 1)
|
|
logger.DebugLog(question, 'qdb add', 3)
|
|
assert(data)
|
|
assert(subj)
|
|
assert(question)
|
|
assert(typeof question === 'object')
|
|
|
|
let i = 0
|
|
while (
|
|
i < data.length &&
|
|
!subj
|
|
.toLowerCase()
|
|
.includes(getSubjNameWithoutYear(data[i].Name).toLowerCase())
|
|
) {
|
|
i++
|
|
}
|
|
|
|
if (i < data.length) {
|
|
logger.DebugLog('Adding new question to existing subject', 'qdb add', 1)
|
|
data[i].Questions.push(question)
|
|
} else {
|
|
logger.DebugLog('Creating new subject for question', 'qdb add', 1)
|
|
data.push({
|
|
Name: subj,
|
|
Questions: [question],
|
|
})
|
|
}
|
|
}
|
|
|
|
function searchDatas(
|
|
data: Array<QuestionDb>,
|
|
question: any,
|
|
subjName: string,
|
|
questionData?: QuestionData
|
|
): Promise<Array<SearchResult>> {
|
|
return Promise.all(
|
|
data.map((db: QuestionDb) => {
|
|
return searchData(db, question, subjName, questionData)
|
|
})
|
|
)
|
|
}
|
|
|
|
// FIXME: remove questionData, make question only Question type
|
|
function searchData(
|
|
qdb: QuestionDb,
|
|
question: any,
|
|
subjName: string,
|
|
questionData?: QuestionData
|
|
): Promise<SearchResult> {
|
|
return new Promise((resolve, reject) => {
|
|
assert(question)
|
|
logger.DebugLog('Searching for question', 'qdb search', 1)
|
|
logger.DebugLog('Question:', 'qdb search', 2)
|
|
logger.DebugLog(question, 'qdb search', 2)
|
|
logger.DebugLog(`Subject name: ${subjName}`, 'qdb search', 2)
|
|
logger.DebugLog('Data:', 'qdb search', 2)
|
|
logger.DebugLog(questionData || question.data, 'qdb search', 2)
|
|
|
|
if (!questionData) {
|
|
questionData = question.data || { type: 'simple' }
|
|
}
|
|
if (!subjName) {
|
|
subjName = ''
|
|
logger.DebugLog('No subject name as param!', 'qdb search', 1)
|
|
}
|
|
question = simplifyQuestion(question)
|
|
|
|
const worker = workerTs(searchDataWorkerFile, {
|
|
workerData: { data: qdb.data, subjName, question, questionData },
|
|
})
|
|
|
|
worker.on('error', (err) => {
|
|
logger.Log('Search Data Worker error!', logger.GetColor('redbg'))
|
|
console.error(err)
|
|
reject(err)
|
|
})
|
|
|
|
worker.on('exit', (code) => {
|
|
logger.DebugLog('Search Data exit, code: ' + code, 'actions', 1)
|
|
if (code !== 0) {
|
|
logger.Log(
|
|
'Search Data Worker error! Exit code is not 0',
|
|
logger.GetColor('redbg')
|
|
)
|
|
reject(new Error('Search Data Worker error! Exit code is not 0'))
|
|
}
|
|
})
|
|
|
|
worker.on('message', (result) => {
|
|
logger.DebugLog(`Worker message arrived`, 'worker', 2)
|
|
logger.DebugLog(result, 'worker', 3)
|
|
logger.DebugLog(`Question result length: ${result.length}`, 'ask', 1)
|
|
logger.DebugLog(result, 'ask', 2)
|
|
|
|
logger.DebugLog(
|
|
`QDB search result length: ${result.length}`,
|
|
'qdb search',
|
|
1
|
|
)
|
|
resolve({
|
|
result: result,
|
|
dbName: qdb.name,
|
|
})
|
|
})
|
|
})
|
|
}
|
|
|
|
function dataToString(data: Array<Subject>): string {
|
|
const result = []
|
|
data.forEach((subj) => {
|
|
result.push(subjectToString(subj))
|
|
})
|
|
return result.join('\n\n')
|
|
}
|
|
|
|
// ------------------------------------------------------------------------
|
|
|
|
function searchWorker(
|
|
data: Array<Subject>,
|
|
subjName: string,
|
|
question: Question,
|
|
questionData?: QuestionData
|
|
): any {
|
|
let result = []
|
|
|
|
data.forEach((subj) => {
|
|
if (
|
|
subjName
|
|
.toLowerCase()
|
|
.includes(getSubjNameWithoutYear(subj.Name).toLowerCase())
|
|
) {
|
|
logger.DebugLog(`Searching in ${subj.Name} `, 'searchworker', 2)
|
|
result = result.concat(
|
|
searchQuestion(subj, question, questionData, subjName)
|
|
)
|
|
}
|
|
})
|
|
|
|
// FIXME: try to remove this? but this is also a good backup plan so idk
|
|
// its sufficent to check only result[0].match, since its sorted, and the first one should have
|
|
// the highest match
|
|
if (
|
|
result.length === 0 ||
|
|
result[0].match < minMatchToNotSearchOtherSubjects
|
|
) {
|
|
logger.DebugLog(
|
|
'Reqults length is zero when comparing names, trying all subjects',
|
|
'searchworker',
|
|
1
|
|
)
|
|
data.forEach((subj) => {
|
|
result = result.concat(
|
|
searchQuestion(subj, question, questionData, subjName)
|
|
)
|
|
})
|
|
if (result.length > 0) {
|
|
logger.DebugLog(
|
|
`FIXME: '${subjName}' gave no result but '' did!`,
|
|
'searchworker',
|
|
1
|
|
)
|
|
console.error(`FIXME: '${subjName}' gave no result but '' did!`)
|
|
}
|
|
}
|
|
|
|
result = result.sort((q1, q2) => {
|
|
if (q1.match < q2.match) {
|
|
return 1
|
|
} else if (q1.match > q2.match) {
|
|
return -1
|
|
} else {
|
|
return 0
|
|
}
|
|
})
|
|
|
|
parentPort.postMessage(result)
|
|
process.exit(0)
|
|
}
|
|
|
|
const workerTs = (file: string, wkOpts: any) => {
|
|
wkOpts.eval = true
|
|
if (!wkOpts.workerData) {
|
|
wkOpts.workerData = {}
|
|
}
|
|
wkOpts.workerData.__filename = file
|
|
return new Worker(
|
|
`
|
|
const wk = require('worker_threads');
|
|
require('ts-node').register();
|
|
let file = wk.workerData.__filename;
|
|
delete wk.workerData.__filename;
|
|
require(file);
|
|
`,
|
|
wkOpts
|
|
)
|
|
}
|
|
|
|
if (!isMainThread) {
|
|
logger.DebugLog(`Starting search worker ...`, 'searchworker', 1)
|
|
const { data, subjName, question, questionData } = workerData
|
|
searchWorker(data, subjName, question, questionData)
|
|
}
|
|
// ------------------------------------------------------------------------
|
|
|
|
export {
|
|
minMatchAmmount,
|
|
getSubjNameWithoutYear,
|
|
createQuestion,
|
|
addQuestion,
|
|
searchData,
|
|
searchDatas,
|
|
dataToString,
|
|
}
|