mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
Duplicate remover: managing possible questions
This commit is contained in:
parent
934319f984
commit
43b8d939c1
3 changed files with 174 additions and 35 deletions
|
@ -1,7 +1,13 @@
|
|||
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
|
||||
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
|
||||
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line
|
||||
const {
|
||||
addQuestion,
|
||||
doSearch,
|
||||
compareQuestionObj,
|
||||
createQuestion,
|
||||
} = require('../../dist/utils/classes.js') // eslint-disable-line
|
||||
const { loadData, writeData } = require('../../dist/utils/actions.js') // eslint-disable-line
|
||||
const fs = require('fs') // eslint-disable-line
|
||||
|
||||
// Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ]
|
||||
// load: 2.767ms
|
||||
|
@ -38,17 +44,33 @@ utils.WriteFile('', globalLog)
|
|||
|
||||
const params = process.argv.splice(2)
|
||||
|
||||
const fileA = params[0]
|
||||
const fileB = params[1]
|
||||
const pathA = params[0]
|
||||
const pathB = params[1]
|
||||
|
||||
const stat = fs.lstatSync(pathA)
|
||||
if (stat.isDirectory()) {
|
||||
if (pathB) {
|
||||
log(
|
||||
`Clearing possible questions from ${C(
|
||||
'green'
|
||||
)}${pathA}${C()} based on ${C('green')}${pathB}${C()} db`
|
||||
)
|
||||
const db = pathB ? loadData(pathB) : null
|
||||
|
||||
clearPossibleAnswers(pathA, db)
|
||||
} else {
|
||||
removePossibleAnswersDuplicates(pathA)
|
||||
}
|
||||
} else {
|
||||
console.time('load')
|
||||
const dbA = loadData(fileA)
|
||||
const dbB = fileB ? loadData(fileB) : null
|
||||
const dbA = loadData(pathA)
|
||||
const dbB = pathB ? loadData(pathB) : null
|
||||
console.timeEnd('load')
|
||||
|
||||
console.time('rmduplicates')
|
||||
|
||||
if (!dbB) {
|
||||
log(`Removing duplicate questions from ${fileA}`)
|
||||
log(`Removing duplicate questions from ${C('green')}${pathA}${C()}`)
|
||||
rmDuplicates(dbA).then((res) => {
|
||||
console.timeEnd('rmduplicates')
|
||||
writeData(res, resultDbFileName)
|
||||
|
@ -56,9 +78,9 @@ if (!dbB) {
|
|||
})
|
||||
} else {
|
||||
log(
|
||||
`Removing questions found in ${C('green')}${fileB}${C()} from ${C(
|
||||
`Removing questions found in ${C('green')}${pathB}${C()} from ${C(
|
||||
'green'
|
||||
)}${fileA}${C()}`
|
||||
)}${pathA}${C()}`
|
||||
)
|
||||
difference({ dbA: dbA, dbB: dbB }).then((res) => {
|
||||
console.timeEnd('rmduplicates')
|
||||
|
@ -66,12 +88,105 @@ if (!dbB) {
|
|||
log('File written')
|
||||
})
|
||||
}
|
||||
|
||||
async function rmDuplicates(db) {
|
||||
return await difference({ dbA: db })
|
||||
}
|
||||
|
||||
async function difference({ dbA, dbB }) {
|
||||
// ---------------------------------------------------------------------------------
|
||||
// possible answers duplicate removing
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
||||
function removePossibleAnswersDuplicates(path) {
|
||||
let count = 0
|
||||
let currIndex = 1
|
||||
let delets = 0
|
||||
|
||||
iterateDir(path, () => {
|
||||
count++
|
||||
})
|
||||
|
||||
iterateDir(path, (currPath) => {
|
||||
currIndex++
|
||||
if (currPath.includes('savedQuestions.json')) {
|
||||
return
|
||||
}
|
||||
if (!utils.FileExists(currPath)) {
|
||||
return
|
||||
}
|
||||
const currData = utils.ReadJSON(currPath)
|
||||
currData.questions.forEach((q1) => {
|
||||
iterateDir(path, (currPath2) => {
|
||||
if (currPath === currPath2) {
|
||||
return
|
||||
}
|
||||
if (currPath2.includes('savedQuestions.json')) {
|
||||
return
|
||||
}
|
||||
if (!utils.FileExists(currPath)) {
|
||||
return
|
||||
}
|
||||
const dataB = utils.ReadJSON(currPath2)
|
||||
|
||||
dataB.questions.forEach((q2) => {
|
||||
const percent = compareQuestionObj(
|
||||
createQuestion(q1),
|
||||
'',
|
||||
createQuestion(q2),
|
||||
''
|
||||
)
|
||||
if (percent.avg === 100) {
|
||||
utils.deleteFile(currPath2)
|
||||
count--
|
||||
delets++
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
printProgressBar(currIndex, count)
|
||||
})
|
||||
log(`Deleted ${C('green')}${delets}${C()} files`)
|
||||
}
|
||||
|
||||
function clearPossibleAnswers(path, db) {
|
||||
let count = 0
|
||||
let currIndex = 1
|
||||
let delets = 0
|
||||
iterateDir(path, () => {
|
||||
count++
|
||||
})
|
||||
|
||||
iterateDir(path, (currPath) => {
|
||||
currIndex++
|
||||
if (currPath.includes('savedQuestions.json')) {
|
||||
return
|
||||
}
|
||||
const { subj, questions } = utils.ReadJSON(currPath)
|
||||
|
||||
questions.forEach((question) => {
|
||||
const searchRes = search({
|
||||
qdb: db,
|
||||
subjName: subj,
|
||||
question: question,
|
||||
searchTillMatchPercent: 80,
|
||||
})
|
||||
if (searchRes.length > 0) {
|
||||
utils.deleteFile(currPath)
|
||||
delets++
|
||||
}
|
||||
})
|
||||
printProgressBar(currIndex, count)
|
||||
})
|
||||
log(`Deleted ${C('green')}${delets}${C()} files`)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------
|
||||
// difference
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
||||
function rmDuplicates(db) {
|
||||
return difference({ dbA: db })
|
||||
}
|
||||
|
||||
function difference({ dbA, dbB }) {
|
||||
const doingDifference = !!dbB
|
||||
// Stuff only from A
|
||||
const resultDb = []
|
||||
|
@ -105,7 +220,7 @@ async function difference({ dbA, dbB }) {
|
|||
|
||||
for (let j = 0; j < subj.Questions.length; j++) {
|
||||
const question = subj.Questions[j]
|
||||
const searchRes = await search({
|
||||
const searchRes = search({
|
||||
qdb: doingDifference ? dbB : resultDb,
|
||||
subjName: subj.Name,
|
||||
question: question,
|
||||
|
@ -167,11 +282,14 @@ function hasRequiredPercent(result, minpercent) {
|
|||
// ---------------------------------------------------------------------------------
|
||||
|
||||
function search({ qdb, subjName, question, searchInAllIfNoResult }) {
|
||||
return new Promise((resolve) => {
|
||||
resolve(
|
||||
doSearch(qdb, subjName, question, null, minpercent, searchInAllIfNoResult)
|
||||
return doSearch(
|
||||
qdb,
|
||||
subjName,
|
||||
question,
|
||||
null,
|
||||
minpercent,
|
||||
searchInAllIfNoResult
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
function iterateSubjects(db, fn) {
|
||||
|
@ -182,6 +300,26 @@ function iterateSubjects(db, fn) {
|
|||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------
|
||||
// possible answers tools
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
||||
function iterateDir(path, action) {
|
||||
if (!utils.FileExists(path)) {
|
||||
return
|
||||
}
|
||||
|
||||
const stat = fs.lstatSync(path)
|
||||
if (stat.isDirectory()) {
|
||||
const content = fs.readdirSync(path)
|
||||
content.forEach((currContent) => {
|
||||
iterateDir(`${path}/${currContent}`, action)
|
||||
})
|
||||
} else {
|
||||
action(path)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------
|
||||
// logging and tools
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
|
|
@ -302,7 +302,7 @@ function compareQuestionObj(
|
|||
q2: Question,
|
||||
q2subjName: string,
|
||||
data: QuestionData
|
||||
) {
|
||||
): any {
|
||||
assert(data !== undefined || data !== null)
|
||||
assert(q1)
|
||||
assert(typeof q1 === 'object')
|
||||
|
@ -678,6 +678,7 @@ if (!isMainThread) {
|
|||
// ------------------------------------------------------------------------
|
||||
|
||||
export {
|
||||
compareQuestionObj,
|
||||
minMatchAmmount,
|
||||
getSubjNameWithoutYear,
|
||||
createQuestion,
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 33e8b3a49e7ddbf5c52721c51e655dc28b6ff877
|
||||
Subproject commit 1446a57c28072384b471fcad71e8c637c98ff207
|
Loading…
Add table
Add a link
Reference in a new issue