mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
Duplicate remover: managing possible questions
This commit is contained in:
parent
934319f984
commit
43b8d939c1
3 changed files with 174 additions and 35 deletions
|
@ -1,7 +1,13 @@
|
||||||
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
|
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
|
||||||
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
|
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
|
||||||
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line
|
const {
|
||||||
|
addQuestion,
|
||||||
|
doSearch,
|
||||||
|
compareQuestionObj,
|
||||||
|
createQuestion,
|
||||||
|
} = require('../../dist/utils/classes.js') // eslint-disable-line
|
||||||
const { loadData, writeData } = require('../../dist/utils/actions.js') // eslint-disable-line
|
const { loadData, writeData } = require('../../dist/utils/actions.js') // eslint-disable-line
|
||||||
|
const fs = require('fs') // eslint-disable-line
|
||||||
|
|
||||||
// Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ]
|
// Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ]
|
||||||
// load: 2.767ms
|
// load: 2.767ms
|
||||||
|
@ -38,40 +44,149 @@ utils.WriteFile('', globalLog)
|
||||||
|
|
||||||
const params = process.argv.splice(2)
|
const params = process.argv.splice(2)
|
||||||
|
|
||||||
const fileA = params[0]
|
const pathA = params[0]
|
||||||
const fileB = params[1]
|
const pathB = params[1]
|
||||||
|
|
||||||
console.time('load')
|
const stat = fs.lstatSync(pathA)
|
||||||
const dbA = loadData(fileA)
|
if (stat.isDirectory()) {
|
||||||
const dbB = fileB ? loadData(fileB) : null
|
if (pathB) {
|
||||||
console.timeEnd('load')
|
log(
|
||||||
|
`Clearing possible questions from ${C(
|
||||||
|
'green'
|
||||||
|
)}${pathA}${C()} based on ${C('green')}${pathB}${C()} db`
|
||||||
|
)
|
||||||
|
const db = pathB ? loadData(pathB) : null
|
||||||
|
|
||||||
console.time('rmduplicates')
|
clearPossibleAnswers(pathA, db)
|
||||||
if (!dbB) {
|
} else {
|
||||||
log(`Removing duplicate questions from ${fileA}`)
|
removePossibleAnswersDuplicates(pathA)
|
||||||
rmDuplicates(dbA).then((res) => {
|
}
|
||||||
console.timeEnd('rmduplicates')
|
|
||||||
writeData(res, resultDbFileName)
|
|
||||||
log('File written')
|
|
||||||
})
|
|
||||||
} else {
|
} else {
|
||||||
log(
|
console.time('load')
|
||||||
`Removing questions found in ${C('green')}${fileB}${C()} from ${C(
|
const dbA = loadData(pathA)
|
||||||
'green'
|
const dbB = pathB ? loadData(pathB) : null
|
||||||
)}${fileA}${C()}`
|
console.timeEnd('load')
|
||||||
)
|
|
||||||
difference({ dbA: dbA, dbB: dbB }).then((res) => {
|
console.time('rmduplicates')
|
||||||
console.timeEnd('rmduplicates')
|
|
||||||
writeData(res, resultDbFileName)
|
if (!dbB) {
|
||||||
log('File written')
|
log(`Removing duplicate questions from ${C('green')}${pathA}${C()}`)
|
||||||
|
rmDuplicates(dbA).then((res) => {
|
||||||
|
console.timeEnd('rmduplicates')
|
||||||
|
writeData(res, resultDbFileName)
|
||||||
|
log('File written')
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
log(
|
||||||
|
`Removing questions found in ${C('green')}${pathB}${C()} from ${C(
|
||||||
|
'green'
|
||||||
|
)}${pathA}${C()}`
|
||||||
|
)
|
||||||
|
difference({ dbA: dbA, dbB: dbB }).then((res) => {
|
||||||
|
console.timeEnd('rmduplicates')
|
||||||
|
writeData(res, resultDbFileName)
|
||||||
|
log('File written')
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
// possible answers duplicate removing
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function removePossibleAnswersDuplicates(path) {
|
||||||
|
let count = 0
|
||||||
|
let currIndex = 1
|
||||||
|
let delets = 0
|
||||||
|
|
||||||
|
iterateDir(path, () => {
|
||||||
|
count++
|
||||||
})
|
})
|
||||||
|
|
||||||
|
iterateDir(path, (currPath) => {
|
||||||
|
currIndex++
|
||||||
|
if (currPath.includes('savedQuestions.json')) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (!utils.FileExists(currPath)) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const currData = utils.ReadJSON(currPath)
|
||||||
|
currData.questions.forEach((q1) => {
|
||||||
|
iterateDir(path, (currPath2) => {
|
||||||
|
if (currPath === currPath2) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (currPath2.includes('savedQuestions.json')) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if (!utils.FileExists(currPath)) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const dataB = utils.ReadJSON(currPath2)
|
||||||
|
|
||||||
|
dataB.questions.forEach((q2) => {
|
||||||
|
const percent = compareQuestionObj(
|
||||||
|
createQuestion(q1),
|
||||||
|
'',
|
||||||
|
createQuestion(q2),
|
||||||
|
''
|
||||||
|
)
|
||||||
|
if (percent.avg === 100) {
|
||||||
|
utils.deleteFile(currPath2)
|
||||||
|
count--
|
||||||
|
delets++
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
printProgressBar(currIndex, count)
|
||||||
|
})
|
||||||
|
log(`Deleted ${C('green')}${delets}${C()} files`)
|
||||||
}
|
}
|
||||||
|
|
||||||
async function rmDuplicates(db) {
|
function clearPossibleAnswers(path, db) {
|
||||||
return await difference({ dbA: db })
|
let count = 0
|
||||||
|
let currIndex = 1
|
||||||
|
let delets = 0
|
||||||
|
iterateDir(path, () => {
|
||||||
|
count++
|
||||||
|
})
|
||||||
|
|
||||||
|
iterateDir(path, (currPath) => {
|
||||||
|
currIndex++
|
||||||
|
if (currPath.includes('savedQuestions.json')) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const { subj, questions } = utils.ReadJSON(currPath)
|
||||||
|
|
||||||
|
questions.forEach((question) => {
|
||||||
|
const searchRes = search({
|
||||||
|
qdb: db,
|
||||||
|
subjName: subj,
|
||||||
|
question: question,
|
||||||
|
searchTillMatchPercent: 80,
|
||||||
|
})
|
||||||
|
if (searchRes.length > 0) {
|
||||||
|
utils.deleteFile(currPath)
|
||||||
|
delets++
|
||||||
|
}
|
||||||
|
})
|
||||||
|
printProgressBar(currIndex, count)
|
||||||
|
})
|
||||||
|
log(`Deleted ${C('green')}${delets}${C()} files`)
|
||||||
}
|
}
|
||||||
|
|
||||||
async function difference({ dbA, dbB }) {
|
// ---------------------------------------------------------------------------------
|
||||||
|
// difference
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function rmDuplicates(db) {
|
||||||
|
return difference({ dbA: db })
|
||||||
|
}
|
||||||
|
|
||||||
|
function difference({ dbA, dbB }) {
|
||||||
const doingDifference = !!dbB
|
const doingDifference = !!dbB
|
||||||
// Stuff only from A
|
// Stuff only from A
|
||||||
const resultDb = []
|
const resultDb = []
|
||||||
|
@ -105,7 +220,7 @@ async function difference({ dbA, dbB }) {
|
||||||
|
|
||||||
for (let j = 0; j < subj.Questions.length; j++) {
|
for (let j = 0; j < subj.Questions.length; j++) {
|
||||||
const question = subj.Questions[j]
|
const question = subj.Questions[j]
|
||||||
const searchRes = await search({
|
const searchRes = search({
|
||||||
qdb: doingDifference ? dbB : resultDb,
|
qdb: doingDifference ? dbB : resultDb,
|
||||||
subjName: subj.Name,
|
subjName: subj.Name,
|
||||||
question: question,
|
question: question,
|
||||||
|
@ -167,11 +282,14 @@ function hasRequiredPercent(result, minpercent) {
|
||||||
// ---------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------
|
||||||
|
|
||||||
function search({ qdb, subjName, question, searchInAllIfNoResult }) {
|
function search({ qdb, subjName, question, searchInAllIfNoResult }) {
|
||||||
return new Promise((resolve) => {
|
return doSearch(
|
||||||
resolve(
|
qdb,
|
||||||
doSearch(qdb, subjName, question, null, minpercent, searchInAllIfNoResult)
|
subjName,
|
||||||
)
|
question,
|
||||||
})
|
null,
|
||||||
|
minpercent,
|
||||||
|
searchInAllIfNoResult
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
function iterateSubjects(db, fn) {
|
function iterateSubjects(db, fn) {
|
||||||
|
@ -182,6 +300,26 @@ function iterateSubjects(db, fn) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
// possible answers tools
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function iterateDir(path, action) {
|
||||||
|
if (!utils.FileExists(path)) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const stat = fs.lstatSync(path)
|
||||||
|
if (stat.isDirectory()) {
|
||||||
|
const content = fs.readdirSync(path)
|
||||||
|
content.forEach((currContent) => {
|
||||||
|
iterateDir(`${path}/${currContent}`, action)
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
action(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------
|
||||||
// logging and tools
|
// logging and tools
|
||||||
// ---------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------
|
||||||
|
|
|
@ -302,7 +302,7 @@ function compareQuestionObj(
|
||||||
q2: Question,
|
q2: Question,
|
||||||
q2subjName: string,
|
q2subjName: string,
|
||||||
data: QuestionData
|
data: QuestionData
|
||||||
) {
|
): any {
|
||||||
assert(data !== undefined || data !== null)
|
assert(data !== undefined || data !== null)
|
||||||
assert(q1)
|
assert(q1)
|
||||||
assert(typeof q1 === 'object')
|
assert(typeof q1 === 'object')
|
||||||
|
@ -678,6 +678,7 @@ if (!isMainThread) {
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
export {
|
export {
|
||||||
|
compareQuestionObj,
|
||||||
minMatchAmmount,
|
minMatchAmmount,
|
||||||
getSubjNameWithoutYear,
|
getSubjNameWithoutYear,
|
||||||
createQuestion,
|
createQuestion,
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 33e8b3a49e7ddbf5c52721c51e655dc28b6ff877
|
Subproject commit 1446a57c28072384b471fcad71e8c637c98ff207
|
Loading…
Add table
Add a link
Reference in a new issue