Files
mrfrys-node-server/src/utils/classes.ts
T

992 lines
29 KiB
TypeScript
Executable File

/* ----------------------------------------------------------------------------
Question Server
GitLab: <https://gitlab.com/MrFry/mrfrys-node-server>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
------------------------------------------------------------------------- */
// FIXME: this should be renamed to worker.ts or something
import { isMainThread, parentPort, workerData } from 'worker_threads'
import { recognizeTextFromBase64, tesseractLoaded } from './tesseract'
import logger from './logger'
import {
Question,
QuestionData,
QuestionDb,
Subject,
} from '../types/basicTypes'
import { editDb, Edits, updateQuestionsInArray } from './actions'
// import { TaskObject } from './workerPool'
export interface WorkerResult {
msg: string
workerIndex: number
result?: SearchResultQuestion[] | number[][]
error?: boolean
}
interface DetailedMatch {
qMatch: number
aMatch: number
dMatch: number
matchedSubjName: string
avg: number
}
export interface SearchResultQuestion {
q: Question
match: number
detailedMatch: DetailedMatch
}
const commonUselessAnswerParts = [
'A helyes válasz az ',
'A helyes válasz a ',
'A helyes válaszok: ',
'A helyes válaszok:',
'A helyes válasz: ',
'A helyes válasz:',
'The correct answer is:',
"'",
]
// const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
/* Percent minus for length difference */
const lengthDiffMultiplier = 10
export const noPossibleAnswerMatchPenalty = 5
/* Minimum ammount to consider that two questions match during answering */
const minMatchAmmount = 75
const magicNumber = 0.7 // same as minMatchAmmount, but /100
/* If all of the results are below this match percent (when only one subject is searched due to
* subject name matching) then all subjects are searched for answer */
const minMatchToNotSearchOtherSubjects = 90
// ---------------------------------------------------------------------------------------------------------
// String Utils
// ---------------------------------------------------------------------------------------------------------
// Exported
// ---------------------------------------------------------------------------------------------------------
function getSubjNameWithoutYear(subjName: string): string {
const t = subjName.split(' - ')
if (t[0].match(/^[0-9]{4}\/[0-9]{2}\/[0-9]{1}$/i)) {
return t[1] || subjName
} else {
return subjName
}
}
// Not exported
// ---------------------------------------------------------------------------------------------------------
function simplifyString(toremove: string): string {
return toremove.replace(/\s/g, ' ').replace(/\s+/g, ' ').toLowerCase()
}
function removeStuff(
value: string,
removableStrings: Array<string>,
toReplace?: string
): string {
removableStrings.forEach((removableString) => {
const regex = new RegExp(removableString, 'g')
value = value.replace(regex, toReplace || '')
})
return value
}
// damn nonbreaking space
function normalizeSpaces(input: string): string {
return input.replace(/\s/g, ' ')
}
function removeUnnecesarySpaces(toremove: string): string {
return normalizeSpaces(toremove)
.replace(/\s+/g, ' ')
.replace(/(\r\n|\n|\r)/gm, '')
.trim()
}
function compareString(
s1: string,
s2: string,
s1cache?: Array<string>,
s2cache?: Array<string>
): number {
const s1a = s1cache || s1.split(' ')
const s2a = s2cache || s2.split(' ')
if (s1 === s2) {
return 100
}
if (!s1a || !s2a) {
if (!s1a && !s2a) {
return 100
} else {
return 0
}
}
if (s1a.length < 0 || s2a.length < 0) {
if (s1a.length === 0 && s2a.length === 0) {
return 100
} else {
return 0
}
}
let match = 0
let lastMatchIndex = -2
let i = 0
while (i < s1a.length) {
if (match / i < magicNumber) {
break
}
const currMatchIndex = s2a.indexOf(s1a[i])
if (currMatchIndex !== -1 && lastMatchIndex < currMatchIndex) {
match++
lastMatchIndex = currMatchIndex
}
i++
}
let percent = Math.round(
parseFloat(((match / s1a.length) * 100).toFixed(2))
)
const lengthDifference = Math.abs(s2a.length - s1a.length)
percent -= lengthDifference * lengthDiffMultiplier
if (percent < 0) {
percent = 0
}
return percent
}
function answerPreProcessor(value: string): string {
if (!value) {
return value
}
return removeStuff(value, commonUselessAnswerParts)
}
// 'a. pécsi sör' -> 'pécsi sör'
function removeAnswerLetters(value: string): string {
if (!value) {
return value
}
const val = value.split('. ')
if (val[0].length < 2 && val.length > 1) {
val.shift()
return val.join(' ')
} else {
return value
}
}
function simplifyQA(value: string, mods: Array<Function>): string {
if (!value) {
return value
}
return mods.reduce((res, fn) => {
return fn(res)
}, value)
}
function simplifyAnswer(value: string): string {
if (!value) {
return value
}
return simplifyQA(value, [
removeUnnecesarySpaces,
answerPreProcessor,
removeAnswerLetters,
])
}
function simplifyQuestion(question: string): string {
if (!question) {
return question
}
return simplifyQA(question, [removeUnnecesarySpaces, removeAnswerLetters])
}
function simplifyQuestionObj(question: Question): Question {
if (!question) {
return question
}
if (question.Q) {
question.Q = simplifyQA(question.Q, [
removeUnnecesarySpaces,
removeAnswerLetters,
])
}
if (question.A) {
question.A = simplifyQA(question.A, [
removeUnnecesarySpaces,
removeAnswerLetters,
])
}
return question
}
// ---------------------------------------------------------------------------------------------------------
// Question
// ---------------------------------------------------------------------------------------------------------
function createQuestion(
question: Question | string,
answer?: string,
data?: QuestionData
): Question {
try {
if (typeof question === 'string') {
return {
Q: simplifyQuestion(question),
A: answer ? simplifyAnswer(answer) : undefined,
data: data,
cache: {
Q: question ? simplifyString(question).split(' ') : [],
A: answer ? simplifyString(answer).split(' ') : [],
},
}
} else {
return {
...question,
cache: {
Q: question.Q ? simplifyString(question.Q).split(' ') : [],
A: question.A ? simplifyString(question.A).split(' ') : [],
},
}
}
} catch (err) {
logger.Log('Error creating question', logger.GetColor('redbg'))
console.error(question, answer, data)
console.error(err)
return null
}
}
async function recognizeQuestionImage(question: Question): Promise<Question> {
const base64Data = question.data.base64
if (Array.isArray(base64Data) && base64Data.length) {
const res: string[] = []
for (let i = 0; i < base64Data.length; i++) {
const base64 = base64Data[i]
const text = await recognizeTextFromBase64(base64)
if (text && text.trim()) {
res.push(text)
}
}
if (res.length) {
return {
...question,
Q: res.join(' '),
data: {
...question.data,
type: 'simple',
},
}
}
}
return question
}
function compareImage(data: QuestionData, data2: QuestionData): number {
if (data.hashedImages && data2.hashedImages) {
return compareString(
data.hashedImages.join(' '),
data2.hashedImages.join(' '),
data.hashedImages,
data2.hashedImages
)
} else if (data.images && data2.images) {
return (
compareString(
data.images.join(' '),
data2.images.join(' '),
data.images,
data2.images
) - 10
)
} else {
return 0
}
}
function compareData(q1: Question, q2: Question): number {
try {
if (q1.data.type === q2.data.type) {
const dataType = q1.data.type
if (dataType === 'simple') {
return -1
} else if (dataType === 'image') {
return compareImage(q1.data, q2.data)
} else {
logger.DebugLog(
`Unhandled data type ${dataType}`,
'Compare question data',
1
)
logger.DebugLog(q1, 'Compare question data', 2)
}
} else {
return 0
}
} catch (error) {
logger.DebugLog('Error comparing data', 'Compare question data', 1)
logger.DebugLog(error.message, 'Compare question data', 1)
logger.DebugLog(error, 'Compare question data', 2)
console.error(error)
}
return 0
}
function compareQuestion(q1: Question, q2: Question): number {
return compareString(q1.Q, q2.Q, q1.cache.Q, q2.cache.Q)
// return compareString(
// q1.Q,
// q1.Q ? q1.Q.split(' ') : [],
// q2.Q,
// q2.Q ? q2.Q.split(' ') : []
// )
}
function compareAnswer(q1: Question, q2: Question): number {
return compareString(q1.A, q2.A, q1.cache.A, q2.cache.A)
// return compareString(
// q1.A,
// q1.A ? q1.A.split(' ') : [],
// q2.A,
// q2.A ? q2.A.split(' ') : []
// )
}
function compareQuestionObj(
q1: Question,
_q1subjName: string,
q2: Question,
q2subjName: string
): DetailedMatch {
const qMatch = compareQuestion(q1, q2)
const aMatch = q2.A ? compareAnswer(q1, q2) : 0
// -1 if botth questions are simple
const dMatch = compareData(q1, q2)
let avg = -1
if (q2.A) {
if (dMatch === -1) {
avg = Math.min(qMatch, aMatch)
} else {
avg = Math.min(qMatch, aMatch, dMatch)
}
} else {
if (dMatch === -1) {
avg = qMatch
} else {
avg = Math.min(qMatch, dMatch)
}
}
return {
qMatch: qMatch,
aMatch: aMatch,
dMatch: dMatch,
matchedSubjName: q2subjName,
avg: avg,
}
}
function questionToString(question: Question): string {
const { Q, A, data } = question
if (data.type !== 'simple') {
return '?' + Q + '\n!' + A + '\n>' + JSON.stringify(data)
} else {
return '?' + Q + '\n!' + A
}
}
// ---------------------------------------------------------------------------------------------------------
// Subject
// ---------------------------------------------------------------------------------------------------------
function searchSubject(
subj: Subject,
question: Question,
subjName: string,
searchTillMatchPercent?: number
): SearchResultQuestion[] {
let result: SearchResultQuestion[] = []
let stopSearch = false
let i = subj.Questions.length - 1
while (i >= 0 && !stopSearch) {
const currentQuestion = subj.Questions[i]
const percent = compareQuestionObj(
currentQuestion,
subjName,
question,
subj.Name
)
if (percent.avg >= minMatchAmmount) {
result.push({
q: currentQuestion,
match: percent.avg,
detailedMatch: percent,
})
}
if (searchTillMatchPercent && percent.avg >= searchTillMatchPercent) {
stopSearch = true
}
i--
}
result = result.sort((q1, q2) => {
if (q1.match < q2.match) {
return 1
} else if (q1.match > q2.match) {
return -1
} else {
return 0
}
})
return result
}
function subjectToString(subj: Subject): string {
const { Questions, Name } = subj
const result: string[] = []
Questions.forEach((question) => {
result.push(questionToString(question))
})
return '+' + Name + '\n' + result.join('\n')
}
// ---------------------------------------------------------------------------------------------------------
// QuestionDB
// ---------------------------------------------------------------------------------------------------------
function addQuestion(
data: Array<Subject>,
subj: string,
question: Question
): void {
logger.DebugLog('Adding new question with subjName: ' + subj, 'qdb add', 1)
logger.DebugLog(question, 'qdb add', 3)
const i = data.findIndex((subject) => {
return (
subject.Name &&
subj
.toLowerCase()
.includes(getSubjNameWithoutYear(subject.Name).toLowerCase())
)
})
if (i !== -1) {
logger.DebugLog('Adding new question to existing subject', 'qdb add', 1)
data[i].Questions.push(question)
} else {
logger.Log(`Creating new subject: ${subj}`)
data.push({
Name: subj,
Questions: [question],
})
}
}
function prepareQuestion(question: Question): Question {
return simplifyQuestionObj(createQuestion(question))
}
function dataToString(data: Array<Subject>): string {
const result: string[] = []
data.forEach((subj) => {
result.push(subjectToString(subj))
})
return result.join('\n\n')
}
function doSearch(
data: Array<Subject>,
subjName: string,
question: Question,
searchTillMatchPercent?: number,
searchInAllIfNoResult?: Boolean
): SearchResultQuestion[] {
let result: SearchResultQuestion[] = []
const questionToSearch = prepareQuestion(question)
data.every((subj) => {
if (
subjName
.toLowerCase()
.includes(getSubjNameWithoutYear(subj.Name).toLowerCase())
) {
logger.DebugLog(`Searching in ${subj.Name} `, 'searchworker', 2)
const subjRes = searchSubject(
subj,
questionToSearch,
subjName,
searchTillMatchPercent
)
result = result.concat(subjRes)
if (searchTillMatchPercent) {
return !subjRes.some((sr) => {
return sr.match >= searchTillMatchPercent
})
}
return true
}
return true
})
if (searchInAllIfNoResult) {
// FIXME: dont research subject searched above
if (
result.length === 0 ||
result[0].match < minMatchToNotSearchOtherSubjects
) {
logger.DebugLog(
'Reqults length is zero when comparing names, trying all subjects',
'searchworker',
1
)
data.every((subj) => {
const subjRes = searchSubject(
subj,
questionToSearch,
subjName,
searchTillMatchPercent
)
result = result.concat(subjRes)
if (searchTillMatchPercent) {
const continueSearching = !subjRes.some((sr) => {
return sr.match >= searchTillMatchPercent
})
return continueSearching
}
return true
})
}
}
result = setNoPossibleAnswersPenalties(
questionToSearch.data.possibleAnswers,
result
)
result = result.sort((q1, q2) => {
if (q1.match < q2.match) {
return 1
} else if (q1.match > q2.match) {
return -1
} else {
return 0
}
})
return result
}
function setNoPossibleAnswersPenalties(
questionPossibleAnswers: QuestionData['possibleAnswers'],
results: SearchResultQuestion[]
): SearchResultQuestion[] {
if (!Array.isArray(questionPossibleAnswers)) {
return results
}
const noneHasPossibleAnswers = results.every((x) => {
return !Array.isArray(x.q.data.possibleAnswers)
})
if (noneHasPossibleAnswers) return results
let possibleAnswerMatch = false
const updated = results.map((result) => {
const matchCount = Array.isArray(result.q.data.possibleAnswers)
? result.q.data.possibleAnswers.filter((resultPossibleAnswer) => {
return questionPossibleAnswers.some(
(questionPossibleAnswer) => {
if (
questionPossibleAnswer.val &&
resultPossibleAnswer.val
) {
return questionPossibleAnswer.val.includes(
resultPossibleAnswer.val
)
} else {
return false
}
}
)
}).length
: 0
if (matchCount === questionPossibleAnswers.length) {
possibleAnswerMatch = true
return result
} else {
return {
...result,
match: result.match - noPossibleAnswerMatchPenalty,
detailedMatch: {
...result.detailedMatch,
qMatch:
result.detailedMatch.qMatch -
noPossibleAnswerMatchPenalty,
},
}
}
})
if (possibleAnswerMatch) {
return updated
} else {
return results
}
}
// ---------------------------------------------------------------------------------------------------------
// Multi threaded stuff
// ---------------------------------------------------------------------------------------------------------
interface WorkData {
subjName: string
question: Question
searchTillMatchPercent: number
searchInAllIfNoResult: boolean
searchIn: number[]
index: number
}
if (!isMainThread) {
handleWorkerData()
}
function handleWorkerData() {
const {
workerIndex,
initData,
}: { workerIndex: number; initData: Array<QuestionDb> } = workerData
let qdbs: Array<QuestionDb> = initData
const qdbCount = initData.length
const subjCount = initData.reduce((sCount, qdb) => {
return sCount + qdb.data.length
}, 0)
const questionCount = initData.reduce((qCount, qdb) => {
return (
qCount +
qdb.data.reduce((sCount, subject) => {
return sCount + subject.Questions.length
}, 0)
)
}, 0)
logger.Log(
`[THREAD #${workerIndex}]: Worker ${workerIndex} reporting for duty! qdbs: ${qdbCount}, subjects: ${subjCount}, questions: ${questionCount}`
)
parentPort.on('message', async (msg /*: TaskObject */) => {
await tesseractLoaded
if (msg.type === 'work') {
const {
subjName,
question: originalQuestion,
searchTillMatchPercent,
searchInAllIfNoResult,
searchIn,
index,
}: WorkData = msg.data
let searchResult: SearchResultQuestion[] = []
let error = false
const question = await recognizeQuestionImage(originalQuestion)
try {
qdbs.forEach((qdb) => {
if (searchIn.includes(qdb.index)) {
const res = doSearch(
qdb.data,
subjName,
question,
searchTillMatchPercent,
searchInAllIfNoResult
)
searchResult = [
...searchResult,
...res.map((x) => {
return {
...x,
detailedMatch: {
...x.detailedMatch,
qdb: qdb.name,
},
}
}),
]
}
})
} catch (err) {
logger.Log('Error in worker thread!', logger.GetColor('redbg'))
console.error(err)
console.error(
JSON.stringify(
{
subjName: subjName,
question: question,
searchTillMatchPercent: searchTillMatchPercent,
searchInAllIfNoResult: searchInAllIfNoResult,
searchIn: searchIn,
index: index,
},
null,
2
)
)
error = true
}
// sorting
const sortedResult: SearchResultQuestion[] = searchResult.sort(
(q1, q2) => {
if (q1.match < q2.match) {
return 1
} else if (q1.match > q2.match) {
return -1
} else {
return 0
}
}
)
const workerResult: WorkerResult = {
msg: `From thread #${workerIndex}: job ${
!isNaN(index) ? `#${index}` : ''
}done`,
workerIndex: workerIndex,
result: sortedResult,
error: error,
}
// ONDONE:
parentPort.postMessage(workerResult)
// console.log(
// `[THREAD #${workerIndex}]: Work ${
// !isNaN(index) ? `#${index}` : ''
// }done!`
// )
} else if (msg.type === 'dbEdit') {
const { dbIndex, edits }: { dbIndex: number; edits: Edits } =
msg.data
const { resultDb } = editDb(qdbs[dbIndex], edits)
qdbs[dbIndex] = resultDb
logger.DebugLog(`Worker db edit ${workerIndex}`, 'worker update', 1)
parentPort.postMessage({
msg: `From thread #${workerIndex}: db edit`,
workerIndex: workerIndex,
})
} else if (msg.type === 'newQuestions') {
const {
subjName,
qdbIndex,
newQuestions,
}: {
subjName: string
qdbIndex: number
newQuestions: Question[]
} = msg.data
let added = false
qdbs = qdbs.map((qdb) => {
if (qdb.index === qdbIndex) {
return {
...qdb,
data: qdb.data.map((subj) => {
if (subj.Name === subjName) {
added = true
return {
Name: subj.Name,
Questions: [
...subj.Questions,
...newQuestions,
],
}
} else {
return subj
}
}),
}
} else {
return qdb
}
})
if (!added) {
qdbs = qdbs.map((qdb) => {
if (qdb.index === qdbIndex) {
return {
...qdb,
data: [
...qdb.data,
{
Name: subjName,
Questions: [...newQuestions],
},
],
}
} else {
return qdb
}
})
}
logger.DebugLog(
`Worker new question ${workerIndex}`,
'worker update',
1
)
parentPort.postMessage({
msg: `From thread #${workerIndex}: update done`,
workerIndex: workerIndex,
})
// console.log(`[THREAD #${workerIndex}]: update`)
} else if (msg.type === 'newdb') {
const { data }: { data: QuestionDb } = msg
qdbs.push(data)
parentPort.postMessage({
msg: `From thread #${workerIndex}: new db add done`,
workerIndex: workerIndex,
})
// console.log(`[THREAD #${workerIndex}]: newdb`)
} else if (msg.type === 'dbClean') {
const removedIndexes = cleanDb(msg.data, qdbs)
const workerResult: WorkerResult = {
msg: `From thread #${workerIndex}: db clean done`,
workerIndex: workerIndex,
result: removedIndexes,
}
parentPort.postMessage(workerResult)
} else if (msg.type === 'rmQuestions') {
const {
questionIndexesToRemove,
subjIndex,
qdbIndex,
recievedQuestions,
} = msg.data
qdbs[qdbIndex].data[subjIndex].Questions = updateQuestionsInArray(
questionIndexesToRemove,
qdbs[qdbIndex].data[subjIndex].Questions,
recievedQuestions
)
parentPort.postMessage({
msg: `From thread #${workerIndex}: rm question done`,
workerIndex: workerIndex,
})
} else {
logger.Log(`Invalid msg type!`, logger.GetColor('redbg'))
console.error(msg)
parentPort.postMessage({
msg: `From thread #${workerIndex}: Invalid message type (${msg.type})!`,
workerIndex: workerIndex,
})
}
})
}
export function cleanDb(
{
questions: recievedQuestions,
subjToClean,
overwriteFromDate,
qdbIndex,
}: {
questions: Question[]
subjToClean: string
overwriteFromDate: number
qdbIndex: number
},
qdbs: QuestionDb[]
): number[][] {
const subjIndex = qdbs[qdbIndex].data.findIndex((x) => {
return x.Name.toLowerCase().includes(subjToClean.toLowerCase())
})
if (!qdbs[qdbIndex].data[subjIndex]) {
return recievedQuestions.map(() => [])
}
// FIXME: compare images & data too!
const questionIndexesToRemove = recievedQuestions.map((recievedQuestion) =>
qdbs[qdbIndex].data[subjIndex].Questions.reduce<number[]>(
(acc, question, i) => {
const res = compareString(
simplifyQuestion(recievedQuestion.Q),
simplifyQuestion(question.Q)
)
if (
res > minMatchToNotSearchOtherSubjects &&
(!question.data.date ||
question.data.date < overwriteFromDate)
) {
return [...acc, i]
}
return acc
},
[]
)
)
return questionIndexesToRemove
}
// ------------------------------------------------------------------------
export {
compareQuestionObj,
minMatchAmmount,
getSubjNameWithoutYear,
createQuestion,
addQuestion,
dataToString,
doSearch,
setNoPossibleAnswersPenalties,
}