Files
mrfrys-node-server/src/standaloneUtils/rmDuplicates.js
T

215 lines
5.4 KiB
JavaScript

const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line
const { loadData } = require('../../dist/utils/actions.js') // eslint-disable-line
const minpercent = 95
const resultDbFileName = 'res.json'
const line =
'===================================================================='
const logPath = './duplicateRemovingLog/'
utils.CreatePath(logPath)
const params = process.argv.splice(2)
const fileA = params[0]
const fileB = params[1]
console.time('load')
const dbA = loadData(fileA)
const dbB = fileB ? loadData(fileB) : null
console.timeEnd('load')
console.time('rmduplicates')
if (!dbB) {
console.log(`Removing duplicate questions from ${fileA}`)
rmDuplicates(dbA).then((res) => {
console.timeEnd('rmduplicates')
utils.WriteFile(JSON.stringify(res), resultDbFileName)
console.log('File written')
})
} else {
console.log(
`Removing questions found in ${C('green')}${fileB}${C()} from ${C(
'green'
)}${fileA}${C()}`
)
difference({ dbA: dbA, dbB: dbB }).then((res) => {
console.timeEnd('rmduplicates')
utils.WriteFile(JSON.stringify(res), resultDbFileName)
console.log('File written')
})
}
async function rmDuplicates(db) {
return await difference({ dbA: db })
}
async function difference({ dbA, dbB }) {
const doingDifference = !!dbB
// Stuff only from A
const resultDb = []
let dbLength = 0
let removedTotal = 0
iterateSubjects(dbA, () => {
dbLength++
})
const getResultDbLength = () => {
let resultDbLength = 0
iterateSubjects(resultDb, () => {
resultDbLength++
})
return resultDbLength
}
for (let i = 0; i < dbA.length; i++) {
const subj = dbA[i]
const subjLogPath = logPath + subj.Name
utils.WriteFile('', subjLogPath)
let removedCount = 0
hr()
console.log(
`${C('blue')}${i + 1} / ${dbA.length}: ${C('green')}${subj.Name}, ${C(
'blue'
)}${subj.Questions.length}${C('green')} questions${C()}`
)
console.log(
`${C('green')}Result length: ${C('blue')}${getResultDbLength()}${C(
'green'
)}, original length: ${C('blue')}${dbLength}${C()}`
)
for (let j = 0; j < subj.Questions.length; j++) {
const question = subj.Questions[j]
const searchRes = await search({
qdb: doingDifference ? dbB : resultDb,
subjName: subj.Name,
question: question,
searchInAllIfNoResult: doingDifference,
searchTillMatchPercent: minpercent,
})
printProgressBar(j + 1, subj.Questions.length)
const res = hasRequiredPercent(searchRes, minpercent)
// no result: adding to difference
if (res.length === 0) {
// no result: adding to difference
addQuestion(resultDb, subj.Name, question)
} else {
// has result, not adding to difference
utils.AppendToFile(
line +
'\n' +
line +
'\n' +
JSON.stringify(question, null, 2) +
'\n' +
line +
JSON.stringify(res, null, 2) +
'\n',
subjLogPath
)
removedCount++
removedTotal++
}
}
console.log(
`${C('yellow')}Removed ${C('red')}${removedCount}${C(
'yellow'
)} questions${C()}`
)
}
hr()
// console.log('RESTUL', JSON.stringify(resultDb, null, 2))
console.log(
`Result length: ${getResultDbLength()}, original length: ${dbLength}, removed ${removedTotal} questions`
)
return resultDb
}
function hasRequiredPercent(result, minpercent) {
return result.reduce((acc, res) => {
if (res.match >= minpercent) {
acc.push(res)
}
return acc
}, [])
}
// ---------------------------------------------------------------------------------
// db editing tools
// ---------------------------------------------------------------------------------
function search({ qdb, subjName, question, searchInAllIfNoResult }) {
return new Promise((resolve) => {
resolve(
doSearch(qdb, subjName, question, null, minpercent, searchInAllIfNoResult)
)
})
}
function iterateSubjects(db, fn) {
db.forEach((subj) => {
subj.Questions.forEach((question) => {
fn(subj, question)
})
})
}
// ---------------------------------------------------------------------------------
// logging and tools
// ---------------------------------------------------------------------------------
function hr() {
let res = ''
for (let i = 0; i < process.stdout.columns; i++) {
res += '='
}
console.log(`${C('cyan')}${res}${C()}`)
}
function writeInSameLine(text, returnToLineStart) {
process.stdout.write(text)
if (returnToLineStart) {
process.stdout.write('\r')
} else {
process.stdout.write('\n')
}
}
function printProgressBar(current, total) {
const width = process.stdout.columns - 25
if (width <= 0) {
return
}
const x = width / total
const xCurrent = Math.floor(current * x)
const xTotal = Math.floor(total * x)
let line = ''
for (let i = 0; i < xCurrent; i++) {
line += '='
}
for (let i = 0; i < xTotal - xCurrent; i++) {
line += ' '
}
const numbers = `${current} / ${total}`
writeInSameLine(
`${C('magenta')} [${line}]${C('green')} ${numbers}${C()}`,
current !== total
)
}
function C(color) {
return logger.C(color)
}