mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
Search speedup by: caching splitted questions/answers, and refactoring string compare algorithym
This commit is contained in:
parent
043e825302
commit
8fdc62349b
6 changed files with 152 additions and 86 deletions
|
@ -33,6 +33,7 @@ import {
|
||||||
processIncomingRequest,
|
processIncomingRequest,
|
||||||
logResult,
|
logResult,
|
||||||
backupData,
|
backupData,
|
||||||
|
writeData,
|
||||||
shouldSaveDataFile,
|
shouldSaveDataFile,
|
||||||
shouldSearchDataFile,
|
shouldSearchDataFile,
|
||||||
loadJSON,
|
loadJSON,
|
||||||
|
@ -1364,6 +1365,7 @@ function GetApp(): ModuleType {
|
||||||
|
|
||||||
function deleteComment(obj, path) {
|
function deleteComment(obj, path) {
|
||||||
if (path.length === 1) {
|
if (path.length === 1) {
|
||||||
|
// TODO: check if its actually deleteable by user (deleting other users comments)
|
||||||
obj.splice(path[0], 1)
|
obj.splice(path[0], 1)
|
||||||
} else {
|
} else {
|
||||||
const i = path.pop()
|
const i = path.pop()
|
||||||
|
@ -1888,7 +1890,7 @@ function GetApp(): ModuleType {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (saveDb) {
|
if (saveDb) {
|
||||||
utils.WriteFile(JSON.stringify(currDb.data), currDb.path)
|
writeData(currDb.data, currDb.path)
|
||||||
msgAllWorker({
|
msgAllWorker({
|
||||||
qdbs: questionDbs,
|
qdbs: questionDbs,
|
||||||
type: 'update',
|
type: 'update',
|
||||||
|
|
|
@ -1,37 +1,41 @@
|
||||||
const minpercent = 97
|
|
||||||
const resultDbFileName = 'res.json'
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
|
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
|
||||||
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
|
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
|
||||||
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line
|
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line
|
||||||
|
const { loadData } = require('../../dist/utils/actions.js') // eslint-disable-line
|
||||||
|
|
||||||
// TODO: merge 2 dbs
|
const minpercent = 95
|
||||||
// TODO: filter questions out from a db based on another, producing a new one
|
const resultDbFileName = 'res.json'
|
||||||
|
|
||||||
const params = process.argv.splice(2)
|
|
||||||
|
|
||||||
console.log('Params', params)
|
|
||||||
|
|
||||||
const fileA = params[0]
|
|
||||||
const fileB = params[1]
|
|
||||||
|
|
||||||
const dbA = utils.ReadJSON(fileA)
|
|
||||||
const dbB = fileB ? utils.ReadJSON(fileB) : null
|
|
||||||
|
|
||||||
const line =
|
const line =
|
||||||
'===================================================================='
|
'===================================================================='
|
||||||
const logPath = './duplicateRemovingLog/'
|
const logPath = './duplicateRemovingLog/'
|
||||||
utils.CreatePath(logPath)
|
utils.CreatePath(logPath)
|
||||||
|
|
||||||
|
const params = process.argv.splice(2)
|
||||||
|
|
||||||
|
const fileA = params[0]
|
||||||
|
const fileB = params[1]
|
||||||
|
|
||||||
|
console.time('load')
|
||||||
|
const dbA = loadData(fileA)
|
||||||
|
const dbB = fileB ? loadData(fileB) : null
|
||||||
|
console.timeEnd('load')
|
||||||
|
|
||||||
|
console.time('rmduplicates')
|
||||||
if (!dbB) {
|
if (!dbB) {
|
||||||
|
console.log(`Removing duplicate questions from ${fileA}`)
|
||||||
rmDuplicates(dbA).then((res) => {
|
rmDuplicates(dbA).then((res) => {
|
||||||
|
console.timeEnd('rmduplicates')
|
||||||
utils.WriteFile(JSON.stringify(res), resultDbFileName)
|
utils.WriteFile(JSON.stringify(res), resultDbFileName)
|
||||||
console.log('File written')
|
console.log('File written')
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
|
console.log(
|
||||||
|
`Removing questions found in ${C('green')}${fileB}${C()} from ${C(
|
||||||
|
'green'
|
||||||
|
)}${fileA}${C()}`
|
||||||
|
)
|
||||||
difference({ dbA: dbA, dbB: dbB }).then((res) => {
|
difference({ dbA: dbA, dbB: dbB }).then((res) => {
|
||||||
|
console.timeEnd('rmduplicates')
|
||||||
utils.WriteFile(JSON.stringify(res), resultDbFileName)
|
utils.WriteFile(JSON.stringify(res), resultDbFileName)
|
||||||
console.log('File written')
|
console.log('File written')
|
||||||
})
|
})
|
||||||
|
@ -85,6 +89,7 @@ async function difference({ dbA, dbB }) {
|
||||||
subjName: subj.Name,
|
subjName: subj.Name,
|
||||||
question: question,
|
question: question,
|
||||||
searchInAllIfNoResult: doingDifference,
|
searchInAllIfNoResult: doingDifference,
|
||||||
|
searchTillMatchPercent: minpercent,
|
||||||
})
|
})
|
||||||
|
|
||||||
printProgressBar(j + 1, subj.Questions.length)
|
printProgressBar(j + 1, subj.Questions.length)
|
||||||
|
|
|
@ -10,6 +10,10 @@ export interface Question {
|
||||||
Q: string
|
Q: string
|
||||||
A: string
|
A: string
|
||||||
data: QuestionData
|
data: QuestionData
|
||||||
|
cache?: {
|
||||||
|
Q: string
|
||||||
|
A: string
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Subject {
|
export interface Subject {
|
||||||
|
|
|
@ -28,7 +28,13 @@ import utils from '../utils/utils'
|
||||||
import { SearchResult, addQuestion, getSubjNameWithoutYear } from './classes'
|
import { SearchResult, addQuestion, getSubjNameWithoutYear } from './classes'
|
||||||
|
|
||||||
// types
|
// types
|
||||||
import { QuestionDb, Question, User, DataFile } from '../types/basicTypes'
|
import {
|
||||||
|
QuestionDb,
|
||||||
|
Subject,
|
||||||
|
Question,
|
||||||
|
User,
|
||||||
|
DataFile,
|
||||||
|
} from '../types/basicTypes'
|
||||||
|
|
||||||
// if a recievend question doesnt match at least this % to any other question in the db it gets
|
// if a recievend question doesnt match at least this % to any other question in the db it gets
|
||||||
// added to db
|
// added to db
|
||||||
|
@ -219,7 +225,7 @@ function processIncomingRequestUsingDb(
|
||||||
if (currWrites >= writeAfter && !dryRun) {
|
if (currWrites >= writeAfter && !dryRun) {
|
||||||
currWrites = 0
|
currWrites = 0
|
||||||
logger.DebugLog('Writing data.json', 'isadding', 1)
|
logger.DebugLog('Writing data.json', 'isadding', 1)
|
||||||
utils.WriteFile(JSON.stringify(qdb.data), qdb.path)
|
writeData(qdb.data, qdb.path)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -335,6 +341,20 @@ export function shouldSaveDataFile(
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function loadData(path: string): Array<Subject> {
|
||||||
|
return JSON.parse(utils.ReadFile(path)).reduce((acc, subj) => {
|
||||||
|
return [
|
||||||
|
...acc,
|
||||||
|
{
|
||||||
|
Name: subj.Name,
|
||||||
|
Questions: subj.Questions.map((question) => {
|
||||||
|
return createQuestion(question)
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}, [])
|
||||||
|
}
|
||||||
|
|
||||||
export function loadJSON(
|
export function loadJSON(
|
||||||
dataFiles: Array<DataFile>,
|
dataFiles: Array<DataFile>,
|
||||||
dataDir: string
|
dataDir: string
|
||||||
|
@ -351,7 +371,7 @@ export function loadJSON(
|
||||||
...dataFile,
|
...dataFile,
|
||||||
path: dataPath,
|
path: dataPath,
|
||||||
index: index,
|
index: index,
|
||||||
data: JSON.parse(utils.ReadFile(dataPath)),
|
data: loadData(dataPath),
|
||||||
})
|
})
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(err)
|
console.error(err)
|
||||||
|
@ -364,14 +384,34 @@ export function loadJSON(
|
||||||
}, [])
|
}, [])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function writeData(data: Array<Subject>, path: string): void {
|
||||||
|
utils.WriteFile(
|
||||||
|
JSON.stringify(
|
||||||
|
data.map((subj) => {
|
||||||
|
return {
|
||||||
|
Name: subj.Name,
|
||||||
|
Questions: subj.Questions.map((question) => {
|
||||||
|
return {
|
||||||
|
Q: question.Q,
|
||||||
|
A: question.A,
|
||||||
|
data: question.data,
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
),
|
||||||
|
path
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
export function backupData(questionDbs: Array<QuestionDb>): void {
|
export function backupData(questionDbs: Array<QuestionDb>): void {
|
||||||
questionDbs.forEach((data) => {
|
questionDbs.forEach((data) => {
|
||||||
const path = './publicDirs/qminingPublic/backs/'
|
const path = './publicDirs/qminingPublic/backs/'
|
||||||
utils.CreatePath(path)
|
utils.CreatePath(path)
|
||||||
try {
|
try {
|
||||||
logger.Log(`Backing up ${data.name}...`)
|
logger.Log(`Backing up ${data.name}...`)
|
||||||
utils.WriteFile(
|
writeData(
|
||||||
JSON.stringify(data.data),
|
data.data,
|
||||||
`${path}${data.name}_${utils.GetDateString(true)}.json`
|
`${path}${data.name}_${utils.GetDateString(true)}.json`
|
||||||
)
|
)
|
||||||
logger.Log('Done')
|
logger.Log('Done')
|
||||||
|
|
|
@ -28,12 +28,12 @@ const commonUselessAnswerParts = [
|
||||||
"'",
|
"'",
|
||||||
]
|
]
|
||||||
|
|
||||||
const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
|
// const commonUselessStringParts = [',', '\\.', ':', '!', '\\+', '\\s*\\.']
|
||||||
const specialChars = ['&', '\\+']
|
|
||||||
/* Percent minus for length difference */
|
/* Percent minus for length difference */
|
||||||
const lengthDiffMultiplier = 10
|
const lengthDiffMultiplier = 10
|
||||||
/* Minimum ammount to consider that two questions match during answering */
|
/* Minimum ammount to consider that two questions match during answering */
|
||||||
const minMatchAmmount = 70
|
const minMatchAmmount = 70
|
||||||
|
const magicNumber = 0.7 // same as minMatchAmmount, but /100
|
||||||
/* If all of the results are below this match percent (when only one subject is searched due to
|
/* If all of the results are below this match percent (when only one subject is searched due to
|
||||||
* subject name matching) then all subjects are searched for answer */
|
* subject name matching) then all subjects are searched for answer */
|
||||||
const minMatchToNotSearchOtherSubjects = 90
|
const minMatchToNotSearchOtherSubjects = 90
|
||||||
|
@ -55,6 +55,14 @@ function getSubjNameWithoutYear(subjName: string): string {
|
||||||
|
|
||||||
// Not exported
|
// Not exported
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function simplifyString(toremove) {
|
||||||
|
return toremove
|
||||||
|
.replace(/\s/g, ' ')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.toLowerCase()
|
||||||
|
}
|
||||||
|
|
||||||
function removeStuff(
|
function removeStuff(
|
||||||
value: string,
|
value: string,
|
||||||
removableStrings: Array<string>,
|
removableStrings: Array<string>,
|
||||||
|
@ -67,55 +75,49 @@ function removeStuff(
|
||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
|
|
||||||
// removes whitespace from begining and and, and replaces multiple spaces with one space
|
|
||||||
function removeUnnecesarySpaces(toremove: string) {
|
|
||||||
assert(toremove)
|
|
||||||
|
|
||||||
toremove = normalizeSpaces(toremove)
|
|
||||||
while (toremove.includes(' ')) {
|
|
||||||
toremove = toremove.replace(/ {2}/g, ' ')
|
|
||||||
}
|
|
||||||
return toremove.trim()
|
|
||||||
}
|
|
||||||
|
|
||||||
// simplifies a string for easier comparison
|
|
||||||
function simplifyStringForComparison(value: string) {
|
|
||||||
assert(value)
|
|
||||||
|
|
||||||
value = removeUnnecesarySpaces(value).toLowerCase()
|
|
||||||
return removeStuff(value, commonUselessStringParts)
|
|
||||||
}
|
|
||||||
|
|
||||||
function removeSpecialChars(value: string) {
|
|
||||||
assert(value)
|
|
||||||
|
|
||||||
return removeStuff(value, specialChars, ' ')
|
|
||||||
}
|
|
||||||
|
|
||||||
// damn nonbreaking space
|
// damn nonbreaking space
|
||||||
function normalizeSpaces(input: string) {
|
function normalizeSpaces(input) {
|
||||||
assert(input)
|
|
||||||
|
|
||||||
return input.replace(/\s/g, ' ')
|
return input.replace(/\s/g, ' ')
|
||||||
}
|
}
|
||||||
|
|
||||||
function compareString(string1: string, string2: string) {
|
function removeUnnecesarySpaces(toremove: string) {
|
||||||
if (!string1 || !string2) {
|
return normalizeSpaces(toremove).replace(/\s+/g, ' ')
|
||||||
if (!string1 && !string2) {
|
}
|
||||||
|
|
||||||
|
function compareString(s1, s2) {
|
||||||
|
if (!s1 || !s2) {
|
||||||
|
if (!s1 && !s2) {
|
||||||
|
return 100
|
||||||
|
} else {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (s1.length < 0 || s2.length < 0) {
|
||||||
|
if (s1.length === 0 && s2.length === 0) {
|
||||||
return 100
|
return 100
|
||||||
} else {
|
} else {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const s1 = simplifyStringForComparison(string1).split(' ')
|
|
||||||
const s2 = simplifyStringForComparison(string2).split(' ')
|
|
||||||
let match = 0
|
let match = 0
|
||||||
for (let i = 0; i < s1.length; i++) {
|
let lastMatchIndex = -1
|
||||||
if (s2.includes(s1[i])) {
|
let i = 0
|
||||||
match++
|
|
||||||
|
while (i < s1.length) {
|
||||||
|
if (match / i < magicNumber) {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const currMatchIndex = s2.indexOf(s1[i])
|
||||||
|
if (lastMatchIndex < currMatchIndex) {
|
||||||
|
match++
|
||||||
|
lastMatchIndex = currMatchIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2)))
|
let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2)))
|
||||||
const lengthDifference = Math.abs(s2.length - s1.length)
|
const lengthDifference = Math.abs(s2.length - s1.length)
|
||||||
percent -= lengthDifference * lengthDiffMultiplier
|
percent -= lengthDifference * lengthDiffMultiplier
|
||||||
|
@ -163,7 +165,6 @@ function simplifyAnswer(value: string) {
|
||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
return simplifyQA(value, [
|
return simplifyQA(value, [
|
||||||
removeSpecialChars,
|
|
||||||
removeUnnecesarySpaces,
|
removeUnnecesarySpaces,
|
||||||
answerPreProcessor,
|
answerPreProcessor,
|
||||||
removeAnswerLetters,
|
removeAnswerLetters,
|
||||||
|
@ -175,22 +176,16 @@ function simplifyQuestion(question: Question | string) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if (typeof question === 'string') {
|
if (typeof question === 'string') {
|
||||||
return simplifyQA(question, [
|
return simplifyQA(question, [removeUnnecesarySpaces, removeAnswerLetters])
|
||||||
removeSpecialChars,
|
|
||||||
removeUnnecesarySpaces,
|
|
||||||
removeAnswerLetters,
|
|
||||||
])
|
|
||||||
} else {
|
} else {
|
||||||
if (question.Q) {
|
if (question.Q) {
|
||||||
question.Q = simplifyQA(question.Q, [
|
question.Q = simplifyQA(question.Q, [
|
||||||
removeSpecialChars,
|
|
||||||
removeUnnecesarySpaces,
|
removeUnnecesarySpaces,
|
||||||
removeAnswerLetters,
|
removeAnswerLetters,
|
||||||
])
|
])
|
||||||
}
|
}
|
||||||
if (question.A) {
|
if (question.A) {
|
||||||
question.A = simplifyQA(question.A, [
|
question.A = simplifyQA(question.A, [
|
||||||
removeSpecialChars,
|
|
||||||
removeUnnecesarySpaces,
|
removeUnnecesarySpaces,
|
||||||
removeAnswerLetters,
|
removeAnswerLetters,
|
||||||
])
|
])
|
||||||
|
@ -205,13 +200,29 @@ function simplifyQuestion(question: Question | string) {
|
||||||
|
|
||||||
function createQuestion(
|
function createQuestion(
|
||||||
question: Question | string,
|
question: Question | string,
|
||||||
answer: string,
|
answer?: string,
|
||||||
data: QuestionData
|
data?: QuestionData
|
||||||
): Question {
|
): Question {
|
||||||
return {
|
try {
|
||||||
Q: simplifyQuestion(question),
|
if (typeof question === 'string') {
|
||||||
A: answer ? simplifyAnswer(answer) : undefined,
|
return {
|
||||||
data: data,
|
Q: simplifyQuestion(question),
|
||||||
|
A: answer ? simplifyAnswer(answer) : undefined,
|
||||||
|
data: data,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return {
|
||||||
|
...question,
|
||||||
|
cache: {
|
||||||
|
Q: question.Q ? simplifyString(question.Q).split(' ') : [],
|
||||||
|
A: question.A ? simplifyString(question.A).split(' ') : [],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
logger.Log('Error creating question', logger.GetColor('redbg'))
|
||||||
|
console.error(question, answer, data)
|
||||||
|
console.error(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -257,11 +268,11 @@ function compareData(q1: Question, q2: Question) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function compareQuestion(q1: Question, q2: Question) {
|
function compareQuestion(q1: Question, q2: Question) {
|
||||||
return compareString(q1.Q, q2.Q)
|
return compareString(q1.cache.Q, q2.cache.Q)
|
||||||
}
|
}
|
||||||
|
|
||||||
function compareAnswer(q1: Question, q2: Question) {
|
function compareAnswer(q1: Question, q2: Question) {
|
||||||
return compareString(q1.A, q2.A)
|
return compareString(q1.cache.A, q2.cache.A)
|
||||||
}
|
}
|
||||||
|
|
||||||
function compareQuestionObj(
|
function compareQuestionObj(
|
||||||
|
@ -328,7 +339,11 @@ function searchSubject(
|
||||||
assert(question)
|
assert(question)
|
||||||
|
|
||||||
let result = []
|
let result = []
|
||||||
subj.Questions.every((currentQuestion) => {
|
|
||||||
|
let stopSearch = false
|
||||||
|
let i = subj.Questions.length - 1
|
||||||
|
while (i >= 0 && !stopSearch) {
|
||||||
|
const currentQuestion = subj.Questions[i]
|
||||||
const percent = compareQuestionObj(
|
const percent = compareQuestionObj(
|
||||||
currentQuestion,
|
currentQuestion,
|
||||||
subjName,
|
subjName,
|
||||||
|
@ -337,7 +352,7 @@ function searchSubject(
|
||||||
question.data
|
question.data
|
||||||
)
|
)
|
||||||
|
|
||||||
if (percent.avg > minMatchAmmount) {
|
if (percent.avg >= minMatchAmmount) {
|
||||||
result.push({
|
result.push({
|
||||||
q: currentQuestion,
|
q: currentQuestion,
|
||||||
match: percent.avg,
|
match: percent.avg,
|
||||||
|
@ -346,11 +361,11 @@ function searchSubject(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (searchTillMatchPercent && percent.avg >= searchTillMatchPercent) {
|
if (searchTillMatchPercent && percent.avg >= searchTillMatchPercent) {
|
||||||
return false
|
stopSearch = true
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
i--
|
||||||
})
|
}
|
||||||
|
|
||||||
result = result.sort((q1, q2) => {
|
result = result.sort((q1, q2) => {
|
||||||
if (q1.match < q2.match) {
|
if (q1.match < q2.match) {
|
||||||
|
@ -421,9 +436,9 @@ function prepareQuestion(
|
||||||
let preparedQuestion: Question
|
let preparedQuestion: Question
|
||||||
|
|
||||||
if (typeof question === 'object') {
|
if (typeof question === 'object') {
|
||||||
preparedQuestion = question
|
preparedQuestion = createQuestion(question)
|
||||||
} else {
|
} else {
|
||||||
let parsedData
|
let parsedData: any
|
||||||
if (typeof data === 'string') {
|
if (typeof data === 'string') {
|
||||||
try {
|
try {
|
||||||
parsedData = JSON.parse(data)
|
parsedData = JSON.parse(data)
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 49eae83f8194ab9585939b93119f82f7c0da16bb
|
Subproject commit 7f4163736cc0bfed3259f39f7bc0063ca191da21
|
Loading…
Add table
Add a link
Reference in a new issue