mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2026-04-28 11:17:38 +02:00
215 lines
5.4 KiB
JavaScript
215 lines
5.4 KiB
JavaScript
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
|
|
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
|
|
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line
|
|
const { loadData } = require('../../dist/utils/actions.js') // eslint-disable-line
|
|
|
|
const minpercent = 95
|
|
const resultDbFileName = 'res.json'
|
|
const line =
|
|
'===================================================================='
|
|
const logPath = './duplicateRemovingLog/'
|
|
utils.CreatePath(logPath)
|
|
|
|
const params = process.argv.splice(2)
|
|
|
|
const fileA = params[0]
|
|
const fileB = params[1]
|
|
|
|
console.time('load')
|
|
const dbA = loadData(fileA)
|
|
const dbB = fileB ? loadData(fileB) : null
|
|
console.timeEnd('load')
|
|
|
|
console.time('rmduplicates')
|
|
if (!dbB) {
|
|
console.log(`Removing duplicate questions from ${fileA}`)
|
|
rmDuplicates(dbA).then((res) => {
|
|
console.timeEnd('rmduplicates')
|
|
utils.WriteFile(JSON.stringify(res), resultDbFileName)
|
|
console.log('File written')
|
|
})
|
|
} else {
|
|
console.log(
|
|
`Removing questions found in ${C('green')}${fileB}${C()} from ${C(
|
|
'green'
|
|
)}${fileA}${C()}`
|
|
)
|
|
difference({ dbA: dbA, dbB: dbB }).then((res) => {
|
|
console.timeEnd('rmduplicates')
|
|
utils.WriteFile(JSON.stringify(res), resultDbFileName)
|
|
console.log('File written')
|
|
})
|
|
}
|
|
|
|
async function rmDuplicates(db) {
|
|
return await difference({ dbA: db })
|
|
}
|
|
|
|
async function difference({ dbA, dbB }) {
|
|
const doingDifference = !!dbB
|
|
// Stuff only from A
|
|
const resultDb = []
|
|
let dbLength = 0
|
|
let removedTotal = 0
|
|
|
|
iterateSubjects(dbA, () => {
|
|
dbLength++
|
|
})
|
|
|
|
const getResultDbLength = () => {
|
|
let resultDbLength = 0
|
|
iterateSubjects(resultDb, () => {
|
|
resultDbLength++
|
|
})
|
|
return resultDbLength
|
|
}
|
|
|
|
for (let i = 0; i < dbA.length; i++) {
|
|
const subj = dbA[i]
|
|
const subjLogPath = logPath + subj.Name
|
|
utils.WriteFile('', subjLogPath)
|
|
let removedCount = 0
|
|
|
|
hr()
|
|
console.log(
|
|
`${C('blue')}${i + 1} / ${dbA.length}: ${C('green')}${subj.Name}, ${C(
|
|
'blue'
|
|
)}${subj.Questions.length}${C('green')} questions${C()}`
|
|
)
|
|
console.log(
|
|
`${C('green')}Result length: ${C('blue')}${getResultDbLength()}${C(
|
|
'green'
|
|
)}, original length: ${C('blue')}${dbLength}${C()}`
|
|
)
|
|
|
|
for (let j = 0; j < subj.Questions.length; j++) {
|
|
const question = subj.Questions[j]
|
|
const searchRes = await search({
|
|
qdb: doingDifference ? dbB : resultDb,
|
|
subjName: subj.Name,
|
|
question: question,
|
|
searchInAllIfNoResult: doingDifference,
|
|
searchTillMatchPercent: minpercent,
|
|
})
|
|
|
|
printProgressBar(j + 1, subj.Questions.length)
|
|
|
|
const res = hasRequiredPercent(searchRes, minpercent)
|
|
|
|
// no result: adding to difference
|
|
if (res.length === 0) {
|
|
// no result: adding to difference
|
|
addQuestion(resultDb, subj.Name, question)
|
|
} else {
|
|
// has result, not adding to difference
|
|
utils.AppendToFile(
|
|
line +
|
|
'\n' +
|
|
line +
|
|
'\n' +
|
|
JSON.stringify(question, null, 2) +
|
|
'\n' +
|
|
line +
|
|
JSON.stringify(res, null, 2) +
|
|
'\n',
|
|
subjLogPath
|
|
)
|
|
removedCount++
|
|
removedTotal++
|
|
}
|
|
}
|
|
console.log(
|
|
`${C('yellow')}Removed ${C('red')}${removedCount}${C(
|
|
'yellow'
|
|
)} questions${C()}`
|
|
)
|
|
}
|
|
|
|
hr()
|
|
// console.log('RESTUL', JSON.stringify(resultDb, null, 2))
|
|
console.log(
|
|
`Result length: ${getResultDbLength()}, original length: ${dbLength}, removed ${removedTotal} questions`
|
|
)
|
|
return resultDb
|
|
}
|
|
|
|
function hasRequiredPercent(result, minpercent) {
|
|
return result.reduce((acc, res) => {
|
|
if (res.match >= minpercent) {
|
|
acc.push(res)
|
|
}
|
|
return acc
|
|
}, [])
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------
|
|
// db editing tools
|
|
// ---------------------------------------------------------------------------------
|
|
|
|
function search({ qdb, subjName, question, searchInAllIfNoResult }) {
|
|
return new Promise((resolve) => {
|
|
resolve(
|
|
doSearch(qdb, subjName, question, null, minpercent, searchInAllIfNoResult)
|
|
)
|
|
})
|
|
}
|
|
|
|
function iterateSubjects(db, fn) {
|
|
db.forEach((subj) => {
|
|
subj.Questions.forEach((question) => {
|
|
fn(subj, question)
|
|
})
|
|
})
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------
|
|
// logging and tools
|
|
// ---------------------------------------------------------------------------------
|
|
|
|
function hr() {
|
|
let res = ''
|
|
for (let i = 0; i < process.stdout.columns; i++) {
|
|
res += '='
|
|
}
|
|
console.log(`${C('cyan')}${res}${C()}`)
|
|
}
|
|
|
|
function writeInSameLine(text, returnToLineStart) {
|
|
process.stdout.write(text)
|
|
if (returnToLineStart) {
|
|
process.stdout.write('\r')
|
|
} else {
|
|
process.stdout.write('\n')
|
|
}
|
|
}
|
|
|
|
function printProgressBar(current, total) {
|
|
const width = process.stdout.columns - 25
|
|
|
|
if (width <= 0) {
|
|
return
|
|
}
|
|
|
|
const x = width / total
|
|
const xCurrent = Math.floor(current * x)
|
|
const xTotal = Math.floor(total * x)
|
|
|
|
let line = ''
|
|
for (let i = 0; i < xCurrent; i++) {
|
|
line += '='
|
|
}
|
|
|
|
for (let i = 0; i < xTotal - xCurrent; i++) {
|
|
line += ' '
|
|
}
|
|
const numbers = `${current} / ${total}`
|
|
writeInSameLine(
|
|
`${C('magenta')} [${line}]${C('green')} ${numbers}${C()}`,
|
|
current !== total
|
|
)
|
|
}
|
|
|
|
function C(color) {
|
|
return logger.C(color)
|
|
}
|