Search speedup by: caching splitted questions/answers, and refactoring string compare algorithym

This commit is contained in:
mrfry 2021-03-17 12:24:50 +01:00
parent 043e825302
commit 8fdc62349b
6 changed files with 152 additions and 86 deletions

View file

@ -1,37 +1,41 @@
const minpercent = 97
const resultDbFileName = 'res.json'
// ---------------------------------------------------------------------------------------------------
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line
const { loadData } = require('../../dist/utils/actions.js') // eslint-disable-line
// TODO: merge 2 dbs
// TODO: filter questions out from a db based on another, producing a new one
const params = process.argv.splice(2)
console.log('Params', params)
const fileA = params[0]
const fileB = params[1]
const dbA = utils.ReadJSON(fileA)
const dbB = fileB ? utils.ReadJSON(fileB) : null
const minpercent = 95
const resultDbFileName = 'res.json'
const line =
'===================================================================='
const logPath = './duplicateRemovingLog/'
utils.CreatePath(logPath)
const params = process.argv.splice(2)
const fileA = params[0]
const fileB = params[1]
console.time('load')
const dbA = loadData(fileA)
const dbB = fileB ? loadData(fileB) : null
console.timeEnd('load')
console.time('rmduplicates')
if (!dbB) {
console.log(`Removing duplicate questions from ${fileA}`)
rmDuplicates(dbA).then((res) => {
console.timeEnd('rmduplicates')
utils.WriteFile(JSON.stringify(res), resultDbFileName)
console.log('File written')
})
} else {
console.log(
`Removing questions found in ${C('green')}${fileB}${C()} from ${C(
'green'
)}${fileA}${C()}`
)
difference({ dbA: dbA, dbB: dbB }).then((res) => {
console.timeEnd('rmduplicates')
utils.WriteFile(JSON.stringify(res), resultDbFileName)
console.log('File written')
})
@ -85,6 +89,7 @@ async function difference({ dbA, dbB }) {
subjName: subj.Name,
question: question,
searchInAllIfNoResult: doingDifference,
searchTillMatchPercent: minpercent,
})
printProgressBar(j + 1, subj.Questions.length)