Files
mrfrys-node-server/src/utils/classes.ts
T
2020-11-26 09:16:12 +01:00

601 lines
14 KiB
TypeScript
Executable File

import { Worker, isMainThread, parentPort, workerData } from 'worker_threads'
import logger from './logger'
import {
Question,
QuestionDb,
QuestionData,
Subject,
} from '../types/basicTypes'
interface SearchResultQuestion extends Question {
match: number
}
export interface SearchResult {
result: Array<SearchResultQuestion>
dbName: string
}
const searchDataWorkerFile = './src/utils/classes.ts'
const assert = (val) => {
if (!val) {
throw new Error('Assertion failed')
}
}
const commonUselessAnswerParts = [
'A helyes válasz az ',
'A helyes válasz a ',
'A helyes válaszok: ',
'A helyes válaszok:',
'A helyes válasz: ',
'A helyes válasz:',
'The correct answer is:',
"'",
]
const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
const specialChars = ['&', '\\+']
/* Percent minus for length difference */
const lengthDiffMultiplier = 10
/* Minimum ammount to consider that two questions match during answering */
const minMatchAmmount = 60
/* If all of the results are below this match percent (when only one subject is searched due to
* subject name matching) then all subjects are searched for answer */
const minMatchToNotSearchOtherSubjects = 90
// ---------------------------------------------------------------------------------------------------------
// String Utils
// ---------------------------------------------------------------------------------------------------------
// Exported
// ---------------------------------------------------------------------------------------------------------
function getSubjNameWithoutYear(subjName: string): string {
const t = subjName.split(' - ')
if (t[0].match(/^[0-9]{4}\/[0-9]{2}\/[0-9]{1}$/i)) {
return t[1] || subjName
} else {
return subjName
}
}
// Not exported
// ---------------------------------------------------------------------------------------------------------
function removeStuff(
value: string,
removableStrings: Array<string>,
toReplace?: string
) {
removableStrings.forEach((removableString) => {
const regex = new RegExp(removableString, 'g')
value = value.replace(regex, toReplace || '')
})
return value
}
// removes whitespace from begining and and, and replaces multiple spaces with one space
function removeUnnecesarySpaces(toremove: string) {
assert(toremove)
toremove = normalizeSpaces(toremove)
while (toremove.includes(' ')) {
toremove = toremove.replace(/ {2}/g, ' ')
}
return toremove.trim()
}
// simplifies a string for easier comparison
function simplifyStringForComparison(value: string) {
assert(value)
value = removeUnnecesarySpaces(value).toLowerCase()
return removeStuff(value, commonUselessStringParts)
}
function removeSpecialChars(value: string) {
assert(value)
return removeStuff(value, specialChars, ' ')
}
// damn nonbreaking space
function normalizeSpaces(input: string) {
assert(input)
return input.replace(/\s/g, ' ')
}
function compareString(string1: string, string2: string) {
if (!string1 || !string2) {
if (!string1 && !string2) {
return 100
} else {
return 0
}
}
const s1 = simplifyStringForComparison(string1).split(' ')
const s2 = simplifyStringForComparison(string2).split(' ')
let match = 0
for (let i = 0; i < s1.length; i++) {
if (s2.includes(s1[i])) {
match++
}
}
let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2)))
const lengthDifference = Math.abs(s2.length - s1.length)
percent -= lengthDifference * lengthDiffMultiplier
if (percent < 0) {
percent = 0
}
return percent
}
function answerPreProcessor(value: string) {
assert(value)
return removeStuff(value, commonUselessAnswerParts)
}
// 'a. pécsi sör' -> 'pécsi sör'
function removeAnswerLetters(value: string) {
if (!value) {
return
}
const val = value.split('. ')
if (val[0].length < 2 && val.length > 1) {
val.shift()
return val.join(' ')
} else {
return value
}
}
function simplifyQA(value: string, mods: Array<Function>) {
if (!value) {
return
}
return mods.reduce((res, fn) => {
return fn(res)
}, value)
}
function simplifyAnswer(value: string) {
if (!value) {
return value
}
return simplifyQA(value, [
removeSpecialChars,
removeUnnecesarySpaces,
answerPreProcessor,
removeAnswerLetters,
])
}
function simplifyQuestion(question: Question | string) {
if (!question) {
return
}
if (typeof question === 'string') {
return simplifyQA(question, [
removeSpecialChars,
removeUnnecesarySpaces,
removeAnswerLetters,
])
} else {
if (question.Q) {
question.Q = simplifyQA(question.Q, [
removeSpecialChars,
removeUnnecesarySpaces,
removeAnswerLetters,
])
}
if (question.A) {
question.A = simplifyQA(question.A, [
removeSpecialChars,
removeUnnecesarySpaces,
removeAnswerLetters,
])
}
return question
}
}
// ---------------------------------------------------------------------------------------------------------
// Question
// ---------------------------------------------------------------------------------------------------------
function createQuestion(
question: Question | string,
answer: string,
data: QuestionData
): Question {
return {
Q: simplifyQuestion(question),
A: simplifyAnswer(answer),
data,
}
}
function compareImage(data: QuestionData, data2: QuestionData) {
// TODO: img comparing (hashed images vs images)
const imgs1 = data.hashedImages ? data.hashedImages : data.images
const imgs2 = data2.hashedImages ? data2.hashedImages : data2.images
return compareString(imgs1.join(' '), imgs2.join(' '))
}
function compareData(q1: Question, q2: Question) {
try {
if (q1.data.type === q2.data.type) {
const dataType = q1.data.type
if (dataType === 'simple') {
return -1
} else if (dataType === 'image') {
return compareImage(q1.data, q2.data)
} else {
logger.DebugLog(
`Unhandled data type ${dataType}`,
'Compare question data',
1
)
logger.DebugLog(q1, 'Compare question data', 2)
}
} else {
return 0
}
} catch (error) {
logger.DebugLog('Error comparing data', 'Compare question data', 1)
logger.DebugLog(error.message, 'Compare question data', 1)
logger.DebugLog(error, 'Compare question data', 2)
}
return 0
}
function compareQuestion(q1: Question, q2: Question) {
return compareString(q1.Q, q2.Q)
}
function compareAnswer(q1: Question, q2: Question) {
return compareString(q1.A, q2.A)
}
function compareQuestionObj(
q1: Question,
q1subjName: string,
q2: Question,
q2subjName: string,
data: QuestionData
) {
assert(data)
assert(q1)
assert(typeof q1 === 'object')
assert(q2)
let qObj
if (typeof q2 === 'string') {
qObj = {
Q: q2,
data: data,
}
} else {
qObj = q2
}
const qMatch = compareQuestion(q1, qObj)
const aMatch = compareAnswer(q1, qObj)
// -1 if botth questions are simple
const dMatch = compareData(q1, qObj)
let avg = -1
if (qObj.A) {
if (dMatch === -1) {
avg = (qMatch + aMatch) / 2
} else {
avg = (qMatch + aMatch + dMatch) / 3
}
} else {
if (dMatch === -1) {
avg = qMatch
} else {
avg = (qMatch + dMatch) / 2
}
}
return {
qMatch: qMatch,
aMatch: aMatch,
dMatch: dMatch,
matchedSubjName: q2subjName,
avg: avg,
}
}
function questionToString(question: Question) {
const { Q, A, data } = question
if (data.type !== 'simple') {
return '?' + Q + '\n!' + A + '\n>' + JSON.stringify(data)
} else {
return '?' + Q + '\n!' + A
}
}
// ---------------------------------------------------------------------------------------------------------
// Subject
// ---------------------------------------------------------------------------------------------------------
function searchQuestion(
subj: Subject,
question: Question,
questionData: QuestionData,
subjName: string
) {
assert(question)
let result = []
subj.Questions.forEach((currentQuestion) => {
const percent = compareQuestionObj(
currentQuestion,
subjName,
question,
subj.Name,
questionData
)
if (percent.avg > minMatchAmmount) {
result.push({
q: currentQuestion,
match: percent.avg,
detailedMatch: percent,
})
}
})
result = result.sort((q1, q2) => {
if (q1.match < q2.match) {
return 1
} else if (q1.match > q2.match) {
return -1
} else {
return 0
}
})
return result
}
function subjectToString(subj: Subject) {
const { Questions, Name } = subj
const result = []
Questions.forEach((question) => {
result.push(questionToString(question))
})
return '+' + Name + '\n' + result.join('\n')
}
// ---------------------------------------------------------------------------------------------------------
// QuestionDB
// ---------------------------------------------------------------------------------------------------------
function addQuestion(
data: Array<Subject>,
subj: string,
question: Question
): void {
logger.DebugLog('Adding new question with subjName: ' + subj, 'qdb add', 1)
logger.DebugLog(question, 'qdb add', 3)
assert(data)
assert(subj)
assert(question)
assert(typeof question === 'object')
let i = 0
while (
i < data.length &&
!subj
.toLowerCase()
.includes(getSubjNameWithoutYear(data[i].Name).toLowerCase())
) {
i++
}
if (i < data.length) {
logger.DebugLog('Adding new question to existing subject', 'qdb add', 1)
data[i].Questions.push(question)
} else {
logger.DebugLog('Creating new subject for question', 'qdb add', 1)
data.push({
Name: subj,
Questions: [question],
})
}
}
function searchDatas(
data: Array<QuestionDb>,
question: any,
subjName: string,
questionData?: QuestionData
): Promise<Array<SearchResult>> {
return Promise.all(
data.map((db: QuestionDb) => {
return searchData(db, question, subjName, questionData)
})
)
}
// FIXME: remove questionData, make question only Question type
function searchData(
qdb: QuestionDb,
question: any,
subjName: string,
questionData?: QuestionData
): Promise<SearchResult> {
return new Promise((resolve, reject) => {
assert(question)
logger.DebugLog('Searching for question', 'qdb search', 1)
logger.DebugLog('Question:', 'qdb search', 2)
logger.DebugLog(question, 'qdb search', 2)
logger.DebugLog(`Subject name: ${subjName}`, 'qdb search', 2)
logger.DebugLog('Data:', 'qdb search', 2)
logger.DebugLog(questionData || question.data, 'qdb search', 2)
if (!questionData) {
questionData = question.data || { type: 'simple' }
}
if (!subjName) {
subjName = ''
logger.DebugLog('No subject name as param!', 'qdb search', 1)
}
question = simplifyQuestion(question)
const worker = workerTs(searchDataWorkerFile, {
workerData: { data: qdb.data, subjName, question, questionData },
})
worker.on('error', (err) => {
logger.Log('Search Data Worker error!', logger.GetColor('redbg'))
console.error(err)
reject(err)
})
worker.on('exit', (code) => {
logger.DebugLog('Search Data exit, code: ' + code, 'actions', 1)
if (code !== 0) {
logger.Log(
'Search Data Worker error! Exit code is not 0',
logger.GetColor('redbg')
)
reject(new Error('Search Data Worker error! Exit code is not 0'))
}
})
worker.on('message', (result) => {
logger.DebugLog(`Worker message arrived`, 'worker', 2)
logger.DebugLog(result, 'worker', 3)
logger.DebugLog(`Question result length: ${result.length}`, 'ask', 1)
logger.DebugLog(result, 'ask', 2)
logger.DebugLog(
`QDB search result length: ${result.length}`,
'qdb search',
1
)
resolve({
result: result,
dbName: qdb.name,
})
})
})
}
function dataToString(data: Array<Subject>): string {
const result = []
data.forEach((subj) => {
result.push(subjectToString(subj))
})
return result.join('\n\n')
}
// ------------------------------------------------------------------------
function searchWorker(
data: Array<Subject>,
subjName: string,
question: Question,
questionData?: QuestionData
): any {
let result = []
data.forEach((subj) => {
if (
subjName
.toLowerCase()
.includes(getSubjNameWithoutYear(subj.Name).toLowerCase())
) {
logger.DebugLog(`Searching in ${subj.Name} `, 'searchworker', 2)
result = result.concat(
searchQuestion(subj, question, questionData, subjName)
)
}
})
// FIXME: try to remove this? but this is also a good backup plan so idk
// its sufficent to check only result[0].match, since its sorted, and the first one should have
// the highest match
if (
result.length === 0 ||
result[0].match < minMatchToNotSearchOtherSubjects
) {
logger.DebugLog(
'Reqults length is zero when comparing names, trying all subjects',
'searchworker',
1
)
data.forEach((subj) => {
result = result.concat(
searchQuestion(subj, question, questionData, subjName)
)
})
if (result.length > 0) {
logger.DebugLog(
`FIXME: '${subjName}' gave no result but '' did!`,
'searchworker',
1
)
console.error(`FIXME: '${subjName}' gave no result but '' did!`)
}
}
result = result.sort((q1, q2) => {
if (q1.match < q2.match) {
return 1
} else if (q1.match > q2.match) {
return -1
} else {
return 0
}
})
parentPort.postMessage(result)
process.exit(0)
}
const workerTs = (file: string, wkOpts: any) => {
wkOpts.eval = true
if (!wkOpts.workerData) {
wkOpts.workerData = {}
}
wkOpts.workerData.__filename = file
return new Worker(
`
const wk = require('worker_threads');
require('ts-node').register();
let file = wk.workerData.__filename;
delete wk.workerData.__filename;
require(file);
`,
wkOpts
)
}
if (!isMainThread) {
logger.DebugLog(`Starting search worker ...`, 'searchworker', 1)
const { data, subjName, question, questionData } = workerData
searchWorker(data, subjName, question, questionData)
}
// ------------------------------------------------------------------------
export {
minMatchAmmount,
getSubjNameWithoutYear,
createQuestion,
addQuestion,
searchData,
searchDatas,
dataToString,
}