Duplicate remover: managing possible questions

This commit is contained in:
mrfry 2021-03-29 18:19:02 +02:00
parent 934319f984
commit 43b8d939c1
3 changed files with 174 additions and 35 deletions

View file

@ -1,7 +1,13 @@
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line const {
addQuestion,
doSearch,
compareQuestionObj,
createQuestion,
} = require('../../dist/utils/classes.js') // eslint-disable-line
const { loadData, writeData } = require('../../dist/utils/actions.js') // eslint-disable-line const { loadData, writeData } = require('../../dist/utils/actions.js') // eslint-disable-line
const fs = require('fs') // eslint-disable-line
// Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ] // Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ]
// load: 2.767ms // load: 2.767ms
@ -38,40 +44,149 @@ utils.WriteFile('', globalLog)
const params = process.argv.splice(2) const params = process.argv.splice(2)
const fileA = params[0] const pathA = params[0]
const fileB = params[1] const pathB = params[1]
console.time('load') const stat = fs.lstatSync(pathA)
const dbA = loadData(fileA) if (stat.isDirectory()) {
const dbB = fileB ? loadData(fileB) : null if (pathB) {
console.timeEnd('load') log(
`Clearing possible questions from ${C(
'green'
)}${pathA}${C()} based on ${C('green')}${pathB}${C()} db`
)
const db = pathB ? loadData(pathB) : null
console.time('rmduplicates') clearPossibleAnswers(pathA, db)
if (!dbB) { } else {
log(`Removing duplicate questions from ${fileA}`) removePossibleAnswersDuplicates(pathA)
rmDuplicates(dbA).then((res) => { }
console.timeEnd('rmduplicates')
writeData(res, resultDbFileName)
log('File written')
})
} else { } else {
log( console.time('load')
`Removing questions found in ${C('green')}${fileB}${C()} from ${C( const dbA = loadData(pathA)
'green' const dbB = pathB ? loadData(pathB) : null
)}${fileA}${C()}` console.timeEnd('load')
)
difference({ dbA: dbA, dbB: dbB }).then((res) => { console.time('rmduplicates')
console.timeEnd('rmduplicates')
writeData(res, resultDbFileName) if (!dbB) {
log('File written') log(`Removing duplicate questions from ${C('green')}${pathA}${C()}`)
rmDuplicates(dbA).then((res) => {
console.timeEnd('rmduplicates')
writeData(res, resultDbFileName)
log('File written')
})
} else {
log(
`Removing questions found in ${C('green')}${pathB}${C()} from ${C(
'green'
)}${pathA}${C()}`
)
difference({ dbA: dbA, dbB: dbB }).then((res) => {
console.timeEnd('rmduplicates')
writeData(res, resultDbFileName)
log('File written')
})
}
}
// ---------------------------------------------------------------------------------
// possible answers duplicate removing
// ---------------------------------------------------------------------------------
function removePossibleAnswersDuplicates(path) {
let count = 0
let currIndex = 1
let delets = 0
iterateDir(path, () => {
count++
}) })
iterateDir(path, (currPath) => {
currIndex++
if (currPath.includes('savedQuestions.json')) {
return
}
if (!utils.FileExists(currPath)) {
return
}
const currData = utils.ReadJSON(currPath)
currData.questions.forEach((q1) => {
iterateDir(path, (currPath2) => {
if (currPath === currPath2) {
return
}
if (currPath2.includes('savedQuestions.json')) {
return
}
if (!utils.FileExists(currPath)) {
return
}
const dataB = utils.ReadJSON(currPath2)
dataB.questions.forEach((q2) => {
const percent = compareQuestionObj(
createQuestion(q1),
'',
createQuestion(q2),
''
)
if (percent.avg === 100) {
utils.deleteFile(currPath2)
count--
delets++
}
})
})
})
printProgressBar(currIndex, count)
})
log(`Deleted ${C('green')}${delets}${C()} files`)
} }
async function rmDuplicates(db) { function clearPossibleAnswers(path, db) {
return await difference({ dbA: db }) let count = 0
let currIndex = 1
let delets = 0
iterateDir(path, () => {
count++
})
iterateDir(path, (currPath) => {
currIndex++
if (currPath.includes('savedQuestions.json')) {
return
}
const { subj, questions } = utils.ReadJSON(currPath)
questions.forEach((question) => {
const searchRes = search({
qdb: db,
subjName: subj,
question: question,
searchTillMatchPercent: 80,
})
if (searchRes.length > 0) {
utils.deleteFile(currPath)
delets++
}
})
printProgressBar(currIndex, count)
})
log(`Deleted ${C('green')}${delets}${C()} files`)
} }
async function difference({ dbA, dbB }) { // ---------------------------------------------------------------------------------
// difference
// ---------------------------------------------------------------------------------
function rmDuplicates(db) {
return difference({ dbA: db })
}
function difference({ dbA, dbB }) {
const doingDifference = !!dbB const doingDifference = !!dbB
// Stuff only from A // Stuff only from A
const resultDb = [] const resultDb = []
@ -105,7 +220,7 @@ async function difference({ dbA, dbB }) {
for (let j = 0; j < subj.Questions.length; j++) { for (let j = 0; j < subj.Questions.length; j++) {
const question = subj.Questions[j] const question = subj.Questions[j]
const searchRes = await search({ const searchRes = search({
qdb: doingDifference ? dbB : resultDb, qdb: doingDifference ? dbB : resultDb,
subjName: subj.Name, subjName: subj.Name,
question: question, question: question,
@ -167,11 +282,14 @@ function hasRequiredPercent(result, minpercent) {
// --------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------
function search({ qdb, subjName, question, searchInAllIfNoResult }) { function search({ qdb, subjName, question, searchInAllIfNoResult }) {
return new Promise((resolve) => { return doSearch(
resolve( qdb,
doSearch(qdb, subjName, question, null, minpercent, searchInAllIfNoResult) subjName,
) question,
}) null,
minpercent,
searchInAllIfNoResult
)
} }
function iterateSubjects(db, fn) { function iterateSubjects(db, fn) {
@ -182,6 +300,26 @@ function iterateSubjects(db, fn) {
}) })
} }
// ---------------------------------------------------------------------------------
// possible answers tools
// ---------------------------------------------------------------------------------
function iterateDir(path, action) {
if (!utils.FileExists(path)) {
return
}
const stat = fs.lstatSync(path)
if (stat.isDirectory()) {
const content = fs.readdirSync(path)
content.forEach((currContent) => {
iterateDir(`${path}/${currContent}`, action)
})
} else {
action(path)
}
}
// --------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------
// logging and tools // logging and tools
// --------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------

View file

@ -302,7 +302,7 @@ function compareQuestionObj(
q2: Question, q2: Question,
q2subjName: string, q2subjName: string,
data: QuestionData data: QuestionData
) { ): any {
assert(data !== undefined || data !== null) assert(data !== undefined || data !== null)
assert(q1) assert(q1)
assert(typeof q1 === 'object') assert(typeof q1 === 'object')
@ -678,6 +678,7 @@ if (!isMainThread) {
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
export { export {
compareQuestionObj,
minMatchAmmount, minMatchAmmount,
getSubjNameWithoutYear, getSubjNameWithoutYear,
createQuestion, createQuestion,

@ -1 +1 @@
Subproject commit 33e8b3a49e7ddbf5c52721c51e655dc28b6ff877 Subproject commit 1446a57c28072384b471fcad71e8c637c98ff207