Search speedup by: caching splitted questions/answers, and refactoring string compare algorithym

This commit is contained in:
mrfry 2021-03-17 12:24:50 +01:00
parent 043e825302
commit 8fdc62349b
6 changed files with 152 additions and 86 deletions

View file

@ -28,12 +28,12 @@ const commonUselessAnswerParts = [
"'",
]
const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
const specialChars = ['&', '\\+']
// const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
/* Percent minus for length difference */
const lengthDiffMultiplier = 10
/* Minimum ammount to consider that two questions match during answering */
const minMatchAmmount = 70
const magicNumber = 0.7 // same as minMatchAmmount, but /100
/* If all of the results are below this match percent (when only one subject is searched due to
* subject name matching) then all subjects are searched for answer */
const minMatchToNotSearchOtherSubjects = 90
@ -55,6 +55,14 @@ function getSubjNameWithoutYear(subjName: string): string {
// Not exported
// ---------------------------------------------------------------------------------------------------------
function simplifyString(toremove) {
return toremove
.replace(/\s/g, ' ')
.replace(/\s+/g, ' ')
.toLowerCase()
}
function removeStuff(
value: string,
removableStrings: Array<string>,
@ -67,55 +75,49 @@ function removeStuff(
return value
}
// removes whitespace from begining and and, and replaces multiple spaces with one space
function removeUnnecesarySpaces(toremove: string) {
assert(toremove)
toremove = normalizeSpaces(toremove)
while (toremove.includes(' ')) {
toremove = toremove.replace(/ {2}/g, ' ')
}
return toremove.trim()
}
// simplifies a string for easier comparison
function simplifyStringForComparison(value: string) {
assert(value)
value = removeUnnecesarySpaces(value).toLowerCase()
return removeStuff(value, commonUselessStringParts)
}
function removeSpecialChars(value: string) {
assert(value)
return removeStuff(value, specialChars, ' ')
}
// damn nonbreaking space
function normalizeSpaces(input: string) {
assert(input)
function normalizeSpaces(input) {
return input.replace(/\s/g, ' ')
}
function compareString(string1: string, string2: string) {
if (!string1 || !string2) {
if (!string1 && !string2) {
function removeUnnecesarySpaces(toremove: string) {
return normalizeSpaces(toremove).replace(/\s+/g, ' ')
}
function compareString(s1, s2) {
if (!s1 || !s2) {
if (!s1 && !s2) {
return 100
} else {
return 0
}
}
if (s1.length < 0 || s2.length < 0) {
if (s1.length === 0 && s2.length === 0) {
return 100
} else {
return 0
}
}
const s1 = simplifyStringForComparison(string1).split(' ')
const s2 = simplifyStringForComparison(string2).split(' ')
let match = 0
for (let i = 0; i < s1.length; i++) {
if (s2.includes(s1[i])) {
match++
let lastMatchIndex = -1
let i = 0
while (i < s1.length) {
if (match / i < magicNumber) {
break
}
const currMatchIndex = s2.indexOf(s1[i])
if (lastMatchIndex < currMatchIndex) {
match++
lastMatchIndex = currMatchIndex
}
i++
}
let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2)))
const lengthDifference = Math.abs(s2.length - s1.length)
percent -= lengthDifference * lengthDiffMultiplier
@ -163,7 +165,6 @@ function simplifyAnswer(value: string) {
return value
}
return simplifyQA(value, [
removeSpecialChars,
removeUnnecesarySpaces,
answerPreProcessor,
removeAnswerLetters,
@ -175,22 +176,16 @@ function simplifyQuestion(question: Question | string) {
return
}
if (typeof question === 'string') {
return simplifyQA(question, [
removeSpecialChars,
removeUnnecesarySpaces,
removeAnswerLetters,
])
return simplifyQA(question, [removeUnnecesarySpaces, removeAnswerLetters])
} else {
if (question.Q) {
question.Q = simplifyQA(question.Q, [
removeSpecialChars,
removeUnnecesarySpaces,
removeAnswerLetters,
])
}
if (question.A) {
question.A = simplifyQA(question.A, [
removeSpecialChars,
removeUnnecesarySpaces,
removeAnswerLetters,
])
@ -205,13 +200,29 @@ function simplifyQuestion(question: Question | string) {
function createQuestion(
question: Question | string,
answer: string,
data: QuestionData
answer?: string,
data?: QuestionData
): Question {
return {
Q: simplifyQuestion(question),
A: answer ? simplifyAnswer(answer) : undefined,
data: data,
try {
if (typeof question === 'string') {
return {
Q: simplifyQuestion(question),
A: answer ? simplifyAnswer(answer) : undefined,
data: data,
}
} else {
return {
...question,
cache: {
Q: question.Q ? simplifyString(question.Q).split(' ') : [],
A: question.A ? simplifyString(question.A).split(' ') : [],
},
}
}
} catch (err) {
logger.Log('Error creating question', logger.GetColor('redbg'))
console.error(question, answer, data)
console.error(err)
}
}
@ -257,11 +268,11 @@ function compareData(q1: Question, q2: Question) {
}
function compareQuestion(q1: Question, q2: Question) {
return compareString(q1.Q, q2.Q)
return compareString(q1.cache.Q, q2.cache.Q)
}
function compareAnswer(q1: Question, q2: Question) {
return compareString(q1.A, q2.A)
return compareString(q1.cache.A, q2.cache.A)
}
function compareQuestionObj(
@ -328,7 +339,11 @@ function searchSubject(
assert(question)
let result = []
subj.Questions.every((currentQuestion) => {
let stopSearch = false
let i = subj.Questions.length - 1
while (i >= 0 && !stopSearch) {
const currentQuestion = subj.Questions[i]
const percent = compareQuestionObj(
currentQuestion,
subjName,
@ -337,7 +352,7 @@ function searchSubject(
question.data
)
if (percent.avg > minMatchAmmount) {
if (percent.avg >= minMatchAmmount) {
result.push({
q: currentQuestion,
match: percent.avg,
@ -346,11 +361,11 @@ function searchSubject(
}
if (searchTillMatchPercent && percent.avg >= searchTillMatchPercent) {
return false
stopSearch = true
}
return true
})
i--
}
result = result.sort((q1, q2) => {
if (q1.match < q2.match) {
@ -421,9 +436,9 @@ function prepareQuestion(
let preparedQuestion: Question
if (typeof question === 'object') {
preparedQuestion = question
preparedQuestion = createQuestion(question)
} else {
let parsedData
let parsedData: any
if (typeof data === 'string') {
try {
parsedData = JSON.parse(data)