mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
Search speedup by: caching splitted questions/answers, and refactoring string compare algorithym
This commit is contained in:
parent
043e825302
commit
8fdc62349b
6 changed files with 152 additions and 86 deletions
|
@ -28,12 +28,12 @@ const commonUselessAnswerParts = [
|
|||
"'",
|
||||
]
|
||||
|
||||
const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
|
||||
const specialChars = ['&', '\\+']
|
||||
// const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
|
||||
/* Percent minus for length difference */
|
||||
const lengthDiffMultiplier = 10
|
||||
/* Minimum ammount to consider that two questions match during answering */
|
||||
const minMatchAmmount = 70
|
||||
const magicNumber = 0.7 // same as minMatchAmmount, but /100
|
||||
/* If all of the results are below this match percent (when only one subject is searched due to
|
||||
* subject name matching) then all subjects are searched for answer */
|
||||
const minMatchToNotSearchOtherSubjects = 90
|
||||
|
@ -55,6 +55,14 @@ function getSubjNameWithoutYear(subjName: string): string {
|
|||
|
||||
// Not exported
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
function simplifyString(toremove) {
|
||||
return toremove
|
||||
.replace(/\s/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.toLowerCase()
|
||||
}
|
||||
|
||||
function removeStuff(
|
||||
value: string,
|
||||
removableStrings: Array<string>,
|
||||
|
@ -67,55 +75,49 @@ function removeStuff(
|
|||
return value
|
||||
}
|
||||
|
||||
// removes whitespace from begining and and, and replaces multiple spaces with one space
|
||||
function removeUnnecesarySpaces(toremove: string) {
|
||||
assert(toremove)
|
||||
|
||||
toremove = normalizeSpaces(toremove)
|
||||
while (toremove.includes(' ')) {
|
||||
toremove = toremove.replace(/ {2}/g, ' ')
|
||||
}
|
||||
return toremove.trim()
|
||||
}
|
||||
|
||||
// simplifies a string for easier comparison
|
||||
function simplifyStringForComparison(value: string) {
|
||||
assert(value)
|
||||
|
||||
value = removeUnnecesarySpaces(value).toLowerCase()
|
||||
return removeStuff(value, commonUselessStringParts)
|
||||
}
|
||||
|
||||
function removeSpecialChars(value: string) {
|
||||
assert(value)
|
||||
|
||||
return removeStuff(value, specialChars, ' ')
|
||||
}
|
||||
|
||||
// damn nonbreaking space
|
||||
function normalizeSpaces(input: string) {
|
||||
assert(input)
|
||||
|
||||
function normalizeSpaces(input) {
|
||||
return input.replace(/\s/g, ' ')
|
||||
}
|
||||
|
||||
function compareString(string1: string, string2: string) {
|
||||
if (!string1 || !string2) {
|
||||
if (!string1 && !string2) {
|
||||
function removeUnnecesarySpaces(toremove: string) {
|
||||
return normalizeSpaces(toremove).replace(/\s+/g, ' ')
|
||||
}
|
||||
|
||||
function compareString(s1, s2) {
|
||||
if (!s1 || !s2) {
|
||||
if (!s1 && !s2) {
|
||||
return 100
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
if (s1.length < 0 || s2.length < 0) {
|
||||
if (s1.length === 0 && s2.length === 0) {
|
||||
return 100
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
const s1 = simplifyStringForComparison(string1).split(' ')
|
||||
const s2 = simplifyStringForComparison(string2).split(' ')
|
||||
let match = 0
|
||||
for (let i = 0; i < s1.length; i++) {
|
||||
if (s2.includes(s1[i])) {
|
||||
match++
|
||||
let lastMatchIndex = -1
|
||||
let i = 0
|
||||
|
||||
while (i < s1.length) {
|
||||
if (match / i < magicNumber) {
|
||||
break
|
||||
}
|
||||
|
||||
const currMatchIndex = s2.indexOf(s1[i])
|
||||
if (lastMatchIndex < currMatchIndex) {
|
||||
match++
|
||||
lastMatchIndex = currMatchIndex
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
|
||||
let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2)))
|
||||
const lengthDifference = Math.abs(s2.length - s1.length)
|
||||
percent -= lengthDifference * lengthDiffMultiplier
|
||||
|
@ -163,7 +165,6 @@ function simplifyAnswer(value: string) {
|
|||
return value
|
||||
}
|
||||
return simplifyQA(value, [
|
||||
removeSpecialChars,
|
||||
removeUnnecesarySpaces,
|
||||
answerPreProcessor,
|
||||
removeAnswerLetters,
|
||||
|
@ -175,22 +176,16 @@ function simplifyQuestion(question: Question | string) {
|
|||
return
|
||||
}
|
||||
if (typeof question === 'string') {
|
||||
return simplifyQA(question, [
|
||||
removeSpecialChars,
|
||||
removeUnnecesarySpaces,
|
||||
removeAnswerLetters,
|
||||
])
|
||||
return simplifyQA(question, [removeUnnecesarySpaces, removeAnswerLetters])
|
||||
} else {
|
||||
if (question.Q) {
|
||||
question.Q = simplifyQA(question.Q, [
|
||||
removeSpecialChars,
|
||||
removeUnnecesarySpaces,
|
||||
removeAnswerLetters,
|
||||
])
|
||||
}
|
||||
if (question.A) {
|
||||
question.A = simplifyQA(question.A, [
|
||||
removeSpecialChars,
|
||||
removeUnnecesarySpaces,
|
||||
removeAnswerLetters,
|
||||
])
|
||||
|
@ -205,13 +200,29 @@ function simplifyQuestion(question: Question | string) {
|
|||
|
||||
function createQuestion(
|
||||
question: Question | string,
|
||||
answer: string,
|
||||
data: QuestionData
|
||||
answer?: string,
|
||||
data?: QuestionData
|
||||
): Question {
|
||||
return {
|
||||
Q: simplifyQuestion(question),
|
||||
A: answer ? simplifyAnswer(answer) : undefined,
|
||||
data: data,
|
||||
try {
|
||||
if (typeof question === 'string') {
|
||||
return {
|
||||
Q: simplifyQuestion(question),
|
||||
A: answer ? simplifyAnswer(answer) : undefined,
|
||||
data: data,
|
||||
}
|
||||
} else {
|
||||
return {
|
||||
...question,
|
||||
cache: {
|
||||
Q: question.Q ? simplifyString(question.Q).split(' ') : [],
|
||||
A: question.A ? simplifyString(question.A).split(' ') : [],
|
||||
},
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
logger.Log('Error creating question', logger.GetColor('redbg'))
|
||||
console.error(question, answer, data)
|
||||
console.error(err)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -257,11 +268,11 @@ function compareData(q1: Question, q2: Question) {
|
|||
}
|
||||
|
||||
function compareQuestion(q1: Question, q2: Question) {
|
||||
return compareString(q1.Q, q2.Q)
|
||||
return compareString(q1.cache.Q, q2.cache.Q)
|
||||
}
|
||||
|
||||
function compareAnswer(q1: Question, q2: Question) {
|
||||
return compareString(q1.A, q2.A)
|
||||
return compareString(q1.cache.A, q2.cache.A)
|
||||
}
|
||||
|
||||
function compareQuestionObj(
|
||||
|
@ -328,7 +339,11 @@ function searchSubject(
|
|||
assert(question)
|
||||
|
||||
let result = []
|
||||
subj.Questions.every((currentQuestion) => {
|
||||
|
||||
let stopSearch = false
|
||||
let i = subj.Questions.length - 1
|
||||
while (i >= 0 && !stopSearch) {
|
||||
const currentQuestion = subj.Questions[i]
|
||||
const percent = compareQuestionObj(
|
||||
currentQuestion,
|
||||
subjName,
|
||||
|
@ -337,7 +352,7 @@ function searchSubject(
|
|||
question.data
|
||||
)
|
||||
|
||||
if (percent.avg > minMatchAmmount) {
|
||||
if (percent.avg >= minMatchAmmount) {
|
||||
result.push({
|
||||
q: currentQuestion,
|
||||
match: percent.avg,
|
||||
|
@ -346,11 +361,11 @@ function searchSubject(
|
|||
}
|
||||
|
||||
if (searchTillMatchPercent && percent.avg >= searchTillMatchPercent) {
|
||||
return false
|
||||
stopSearch = true
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
i--
|
||||
}
|
||||
|
||||
result = result.sort((q1, q2) => {
|
||||
if (q1.match < q2.match) {
|
||||
|
@ -421,9 +436,9 @@ function prepareQuestion(
|
|||
let preparedQuestion: Question
|
||||
|
||||
if (typeof question === 'object') {
|
||||
preparedQuestion = question
|
||||
preparedQuestion = createQuestion(question)
|
||||
} else {
|
||||
let parsedData
|
||||
let parsedData: any
|
||||
if (typeof data === 'string') {
|
||||
try {
|
||||
parsedData = JSON.parse(data)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue