mrfrys-node-server/src/utils/classes.ts

638 lines
15 KiB
TypeScript
Executable file

import { isMainThread, parentPort, workerData } from 'worker_threads'
import logger from './logger'
import { Question, QuestionData, Subject } from '../types/basicTypes'
interface SearchResultQuestion extends Question {
match: number
}
export interface SearchResult {
result: Array<SearchResultQuestion>
dbName: string
}
const assert = (val) => {
if (!val) {
throw new Error('Assertion failed')
}
}
const commonUselessAnswerParts = [
'A helyes válasz az ',
'A helyes válasz a ',
'A helyes válaszok: ',
'A helyes válaszok:',
'A helyes válasz: ',
'A helyes válasz:',
'The correct answer is:',
"'",
]
const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
const specialChars = ['&', '\\+']
/* Percent minus for length difference */
const lengthDiffMultiplier = 10
/* Minimum ammount to consider that two questions match during answering */
const minMatchAmmount = 70
/* If all of the results are below this match percent (when only one subject is searched due to
* subject name matching) then all subjects are searched for answer */
const minMatchToNotSearchOtherSubjects = 90
// ---------------------------------------------------------------------------------------------------------
// String Utils
// ---------------------------------------------------------------------------------------------------------
// Exported
// ---------------------------------------------------------------------------------------------------------
function getSubjNameWithoutYear(subjName: string): string {
const t = subjName.split(' - ')
if (t[0].match(/^[0-9]{4}\/[0-9]{2}\/[0-9]{1}$/i)) {
return t[1] || subjName
} else {
return subjName
}
}
// Not exported
// ---------------------------------------------------------------------------------------------------------
function removeStuff(
value: string,
removableStrings: Array<string>,
toReplace?: string
) {
removableStrings.forEach((removableString) => {
const regex = new RegExp(removableString, 'g')
value = value.replace(regex, toReplace || '')
})
return value
}
// removes whitespace from begining and and, and replaces multiple spaces with one space
function removeUnnecesarySpaces(toremove: string) {
assert(toremove)
toremove = normalizeSpaces(toremove)
while (toremove.includes(' ')) {
toremove = toremove.replace(/ {2}/g, ' ')
}
return toremove.trim()
}
// simplifies a string for easier comparison
function simplifyStringForComparison(value: string) {
assert(value)
value = removeUnnecesarySpaces(value).toLowerCase()
return removeStuff(value, commonUselessStringParts)
}
function removeSpecialChars(value: string) {
assert(value)
return removeStuff(value, specialChars, ' ')
}
// damn nonbreaking space
function normalizeSpaces(input: string) {
assert(input)
return input.replace(/\s/g, ' ')
}
function compareString(string1: string, string2: string) {
if (!string1 || !string2) {
if (!string1 && !string2) {
return 100
} else {
return 0
}
}
const s1 = simplifyStringForComparison(string1).split(' ')
const s2 = simplifyStringForComparison(string2).split(' ')
let match = 0
for (let i = 0; i < s1.length; i++) {
if (s2.includes(s1[i])) {
match++
}
}
let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2)))
const lengthDifference = Math.abs(s2.length - s1.length)
percent -= lengthDifference * lengthDiffMultiplier
if (percent < 0) {
percent = 0
}
return percent
}
function answerPreProcessor(value: string) {
if (!value) {
return value
}
return removeStuff(value, commonUselessAnswerParts)
}
// 'a. pécsi sör' -> 'pécsi sör'
function removeAnswerLetters(value: string) {
if (!value) {
return
}
const val = value.split('. ')
if (val[0].length < 2 && val.length > 1) {
val.shift()
return val.join(' ')
} else {
return value
}
}
function simplifyQA(value: string, mods: Array<Function>) {
if (!value) {
return
}
return mods.reduce((res, fn) => {
return fn(res)
}, value)
}
function simplifyAnswer(value: string) {
if (!value) {
return value
}
return simplifyQA(value, [
removeSpecialChars,
removeUnnecesarySpaces,
answerPreProcessor,
removeAnswerLetters,
])
}
function simplifyQuestion(question: Question | string) {
if (!question) {
return
}
if (typeof question === 'string') {
return simplifyQA(question, [
removeSpecialChars,
removeUnnecesarySpaces,
removeAnswerLetters,
])
} else {
if (question.Q) {
question.Q = simplifyQA(question.Q, [
removeSpecialChars,
removeUnnecesarySpaces,
removeAnswerLetters,
])
}
if (question.A) {
question.A = simplifyQA(question.A, [
removeSpecialChars,
removeUnnecesarySpaces,
removeAnswerLetters,
])
}
return question
}
}
// ---------------------------------------------------------------------------------------------------------
// Question
// ---------------------------------------------------------------------------------------------------------
function createQuestion(
question: Question | string,
answer: string,
data: QuestionData
): Question {
return {
Q: simplifyQuestion(question),
A: answer ? simplifyAnswer(answer) : undefined,
data: data,
}
}
function compareImage(data: QuestionData, data2: QuestionData) {
if (data.hashedImages && data2.hashedImages) {
return compareString(
data.hashedImages.join(' '),
data.hashedImages.join(' ')
)
} else {
return compareString(data.images.join(' '), data2.images.join(' ')) - 10
}
}
function compareData(q1: Question, q2: Question) {
try {
if (q1.data.type === q2.data.type) {
const dataType = q1.data.type
if (dataType === 'simple') {
return -1
} else if (dataType === 'image') {
return compareImage(q1.data, q2.data)
} else {
logger.DebugLog(
`Unhandled data type ${dataType}`,
'Compare question data',
1
)
logger.DebugLog(q1, 'Compare question data', 2)
}
} else {
return 0
}
} catch (error) {
logger.DebugLog('Error comparing data', 'Compare question data', 1)
logger.DebugLog(error.message, 'Compare question data', 1)
logger.DebugLog(error, 'Compare question data', 2)
console.error(error)
}
return 0
}
function compareQuestion(q1: Question, q2: Question) {
return compareString(q1.Q, q2.Q)
}
function compareAnswer(q1: Question, q2: Question) {
return compareString(q1.A, q2.A)
}
function compareQuestionObj(
q1: Question,
q1subjName: string,
q2: Question,
q2subjName: string,
data: QuestionData
) {
assert(data !== undefined || data !== null)
assert(q1)
assert(typeof q1 === 'object')
assert(q2)
assert(typeof q2 === 'object')
const qMatch = compareQuestion(q1, q2)
const aMatch = q2.A ? compareAnswer(q1, q2) : 0
// -1 if botth questions are simple
const dMatch = compareData(q1, q2)
let avg = -1
if (q2.A) {
if (dMatch === -1) {
avg = (qMatch + aMatch) / 2
} else {
avg = (qMatch + aMatch + dMatch) / 3
}
} else {
if (dMatch === -1) {
avg = qMatch
} else {
avg = (qMatch + dMatch) / 2
}
}
return {
qMatch: qMatch,
aMatch: aMatch,
dMatch: dMatch,
matchedSubjName: q2subjName,
avg: avg,
}
}
function questionToString(question: Question) {
const { Q, A, data } = question
if (data.type !== 'simple') {
return '?' + Q + '\n!' + A + '\n>' + JSON.stringify(data)
} else {
return '?' + Q + '\n!' + A
}
}
// ---------------------------------------------------------------------------------------------------------
// Subject
// ---------------------------------------------------------------------------------------------------------
function searchSubject(
subj: Subject,
question: Question,
subjName: string,
searchTillMatchPercent?: number
) {
assert(question)
let result = []
subj.Questions.every((currentQuestion) => {
const percent = compareQuestionObj(
currentQuestion,
subjName,
question,
subj.Name,
question.data
)
if (percent.avg > minMatchAmmount) {
result.push({
q: currentQuestion,
match: percent.avg,
detailedMatch: percent,
})
}
if (searchTillMatchPercent && percent.avg >= searchTillMatchPercent) {
return false
}
return true
})
result = result.sort((q1, q2) => {
if (q1.match < q2.match) {
return 1
} else if (q1.match > q2.match) {
return -1
} else {
return 0
}
})
return result
}
function subjectToString(subj: Subject) {
const { Questions, Name } = subj
const result = []
Questions.forEach((question) => {
result.push(questionToString(question))
})
return '+' + Name + '\n' + result.join('\n')
}
// ---------------------------------------------------------------------------------------------------------
// QuestionDB
// ---------------------------------------------------------------------------------------------------------
function addQuestion(
data: Array<Subject>,
subj: string,
question: Question
): void {
logger.DebugLog('Adding new question with subjName: ' + subj, 'qdb add', 1)
logger.DebugLog(question, 'qdb add', 3)
assert(data)
assert(subj)
assert(question)
assert(typeof question === 'object')
let i = 0
// FIXME: this only adds to the first matched subject name. Check if this doesnt cause any bugs
while (
i < data.length &&
!subj
.toLowerCase()
.includes(getSubjNameWithoutYear(data[i].Name).toLowerCase())
) {
i++
}
if (i < data.length) {
logger.DebugLog('Adding new question to existing subject', 'qdb add', 1)
data[i].Questions.push(question)
} else {
logger.DebugLog('Creating new subject for question', 'qdb add', 1)
data.push({
Name: subj,
Questions: [question],
})
}
}
function prepareQuestion(
question: string | Question,
data: string | QuestionData
): Question {
let preparedQuestion: Question
if (typeof question === 'object') {
preparedQuestion = question
} else {
let parsedData
if (typeof data === 'string') {
try {
parsedData = JSON.parse(data)
} catch (err) {
// asd
}
}
if (typeof data === 'object') {
parsedData = data
}
preparedQuestion = createQuestion(question, null, parsedData)
}
return simplifyQuestion(preparedQuestion)
}
function dataToString(data: Array<Subject>): string {
const result = []
data.forEach((subj) => {
result.push(subjectToString(subj))
})
return result.join('\n\n')
}
function doSearch(
data: Array<Subject>,
subjName: string,
question: Question | string,
questionData?: QuestionData,
searchTillMatchPercent?: number,
searchInAllIfNoResult?: Boolean
): any {
let result = []
const questionToSearch = prepareQuestion(question, questionData)
assert(questionToSearch.data)
data.every((subj) => {
if (
subjName
.toLowerCase()
.includes(getSubjNameWithoutYear(subj.Name).toLowerCase())
) {
logger.DebugLog(`Searching in ${subj.Name} `, 'searchworker', 2)
const subjRes = searchSubject(
subj,
questionToSearch,
subjName,
searchTillMatchPercent
)
result = result.concat(subjRes)
if (searchTillMatchPercent) {
return !subjRes.some((sr) => {
return sr.match >= searchTillMatchPercent
})
}
return true
}
return true
})
if (searchInAllIfNoResult) {
// FIXME: dont research subject searched above
if (
result.length === 0 ||
result[0].match < minMatchToNotSearchOtherSubjects
) {
logger.DebugLog(
'Reqults length is zero when comparing names, trying all subjects',
'searchworker',
1
)
data.every((subj) => {
const subjRes = searchSubject(
subj,
questionToSearch,
subjName,
searchTillMatchPercent
)
result = result.concat(subjRes)
if (searchTillMatchPercent) {
const continueSearching = !subjRes.some((sr) => {
return sr.match >= searchTillMatchPercent
})
return continueSearching
}
return true
})
}
}
result = result.sort((q1, q2) => {
if (q1.match < q2.match) {
return 1
} else if (q1.match > q2.match) {
return -1
} else {
return 0
}
})
return result
}
// ---------------------------------------------------------------------------------------------------------
// Multi threaded stuff
// ---------------------------------------------------------------------------------------------------------
if (!isMainThread) {
const { workerIndex } = workerData
let qdbs: Array<any> = workerData.initData
logger.Log(
`[THREAD #${workerIndex}]: Worker ${workerIndex} reporting for duty`
)
parentPort.on('message', (msg) => {
if (msg.type === 'work') {
const {
subjName,
question,
questionData,
searchTillMatchPercent,
searchInAllIfNoResult,
searchIn,
index,
} = msg.data
// console.log(
// `[THREAD #${workerIndex}]: staring work${
// !isNaN(index) ? ` on job index #${index}` : ''
// }`
// )
let searchResult = []
try {
qdbs.forEach((qdb) => {
if (searchIn === 'all' || searchIn.includes(qdb.index)) {
const res = doSearch(
qdb.data,
subjName,
question,
questionData,
searchTillMatchPercent,
searchInAllIfNoResult
)
searchResult = [
...searchResult,
...res.map((x) => {
return {
...x,
detailedMatch: {
...x.detailedMatch,
qdb: qdb.name,
},
}
}),
]
}
})
} catch (err) {
logger.Log('Error in worker thread!', logger.GetColor('redbg'))
console.error(err)
}
// sorting
const sortedResult = searchResult.sort((q1, q2) => {
if (q1.match < q2.match) {
return 1
} else if (q1.match > q2.match) {
return -1
} else {
return 0
}
})
// ONDONE:
parentPort.postMessage({
msg: `From thread #${workerIndex}: job ${
!isNaN(index) ? `#${index}` : ''
}done`,
workerIndex: workerIndex,
result: sortedResult,
})
// console.log(
// `[THREAD #${workerIndex}]: Work ${
// !isNaN(index) ? `#${index}` : ''
// }done!`
// )
} else if (msg.type === 'update') {
qdbs = msg.qdbs
// console.log(`[THREAD #${workerIndex}]: update`)
} else if (msg.type === 'newdb') {
qdbs.push(msg.newdb)
// console.log(`[THREAD #${workerIndex}]: newdb`)
}
})
} else {
// console.log('[THREAD]: Main thread!')
}
// ------------------------------------------------------------------------
export {
minMatchAmmount,
getSubjNameWithoutYear,
createQuestion,
addQuestion,
dataToString,
}