mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
239 lines
6.4 KiB
JavaScript
239 lines
6.4 KiB
JavaScript
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
|
|
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
|
|
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line
|
|
const { loadData, writeData } = require('../../dist/utils/actions.js') // eslint-disable-line
|
|
|
|
// Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ]
|
|
// load: 2.767ms
|
|
// ==============================================================================================
|
|
// 1 / 1: Elektronika, 826 questions
|
|
// Result length: 0, original length: 826
|
|
// [=====================================================================] 826 / 826
|
|
// Removed 19 questions
|
|
// ==============================================================================================
|
|
// Result length: 807, original length: 826, removed 19 questions
|
|
// rmduplicates: 9.527s
|
|
// File written
|
|
|
|
// load: 15.91ms
|
|
// Removing duplicate questions from publicDirs/qminingPublic/questionDbs/elektro.json
|
|
// ==============================================================================================
|
|
// 1 / 1: Elektronika, 826 questions
|
|
// Result length: 0, original length: 826
|
|
// [=====================================================================] 826 / 826
|
|
// Removed 10 questions
|
|
// ==============================================================================================
|
|
// Result length: 816, original length: 826, removed 10 questions
|
|
// rmduplicates: 488.853ms
|
|
// File written
|
|
|
|
const minpercent = 95
|
|
const resultDbFileName = 'res.json'
|
|
const line =
|
|
'===================================================================='
|
|
const logPath = './duplicateRemovingLog/'
|
|
const globalLog = './duplicateRemovingLog/log'
|
|
utils.CreatePath(logPath)
|
|
utils.WriteFile('', globalLog)
|
|
|
|
const params = process.argv.splice(2)
|
|
|
|
const fileA = params[0]
|
|
const fileB = params[1]
|
|
|
|
console.time('load')
|
|
const dbA = loadData(fileA)
|
|
const dbB = fileB ? loadData(fileB) : null
|
|
console.timeEnd('load')
|
|
|
|
console.time('rmduplicates')
|
|
if (!dbB) {
|
|
log(`Removing duplicate questions from ${fileA}`)
|
|
rmDuplicates(dbA).then((res) => {
|
|
console.timeEnd('rmduplicates')
|
|
writeData(res, resultDbFileName)
|
|
log('File written')
|
|
})
|
|
} else {
|
|
log(
|
|
`Removing questions found in ${C('green')}${fileB}${C()} from ${C(
|
|
'green'
|
|
)}${fileA}${C()}`
|
|
)
|
|
difference({ dbA: dbA, dbB: dbB }).then((res) => {
|
|
console.timeEnd('rmduplicates')
|
|
writeData(res, resultDbFileName)
|
|
log('File written')
|
|
})
|
|
}
|
|
|
|
async function rmDuplicates(db) {
|
|
return await difference({ dbA: db })
|
|
}
|
|
|
|
async function difference({ dbA, dbB }) {
|
|
const doingDifference = !!dbB
|
|
// Stuff only from A
|
|
const resultDb = []
|
|
let dbLength = 0
|
|
let removedTotal = 0
|
|
|
|
iterateSubjects(dbA, () => {
|
|
dbLength++
|
|
})
|
|
|
|
const getResultDbLength = () => {
|
|
let resultDbLength = 0
|
|
iterateSubjects(resultDb, () => {
|
|
resultDbLength++
|
|
})
|
|
return resultDbLength
|
|
}
|
|
|
|
for (let i = 0; i < dbA.length; i++) {
|
|
const subj = dbA[i]
|
|
const subjLogPath = logPath + subj.Name
|
|
utils.WriteFile('', subjLogPath)
|
|
let removedCount = 0
|
|
|
|
hr()
|
|
log(
|
|
`${C('blue')}${i + 1} / ${dbA.length}: ${C('green')}${subj.Name}, ${C(
|
|
'blue'
|
|
)}${subj.Questions.length}${C('green')} questions${C()}`
|
|
)
|
|
|
|
for (let j = 0; j < subj.Questions.length; j++) {
|
|
const question = subj.Questions[j]
|
|
const searchRes = await search({
|
|
qdb: doingDifference ? dbB : resultDb,
|
|
subjName: subj.Name,
|
|
question: question,
|
|
searchInAllIfNoResult: doingDifference,
|
|
searchTillMatchPercent: minpercent,
|
|
})
|
|
|
|
printProgressBar(j + 1, subj.Questions.length)
|
|
|
|
const res = hasRequiredPercent(searchRes, minpercent)
|
|
|
|
// no result: adding to difference
|
|
if (res.length === 0) {
|
|
// no result: adding to difference
|
|
addQuestion(resultDb, subj.Name, question)
|
|
} else {
|
|
// has result, not adding to difference
|
|
utils.AppendToFile(
|
|
line +
|
|
'\n' +
|
|
line +
|
|
'\n' +
|
|
JSON.stringify(question, null, 2) +
|
|
'\n' +
|
|
line +
|
|
JSON.stringify(res, null, 2) +
|
|
'\n',
|
|
subjLogPath
|
|
)
|
|
removedCount++
|
|
removedTotal++
|
|
}
|
|
}
|
|
log(
|
|
`${C('yellow')}Removed ${C('red')}${removedCount}${C(
|
|
'yellow'
|
|
)} questions${C()}`
|
|
)
|
|
}
|
|
|
|
hr()
|
|
log(
|
|
`Result length: ${getResultDbLength()}, original length: ${dbLength}, removed ${removedTotal} questions`
|
|
)
|
|
return resultDb
|
|
}
|
|
|
|
function hasRequiredPercent(result, minpercent) {
|
|
return result.reduce((acc, res) => {
|
|
if (res.match >= minpercent) {
|
|
acc.push(res)
|
|
}
|
|
return acc
|
|
}, [])
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------
|
|
// db editing tools
|
|
// ---------------------------------------------------------------------------------
|
|
|
|
function search({ qdb, subjName, question, searchInAllIfNoResult }) {
|
|
return new Promise((resolve) => {
|
|
resolve(
|
|
doSearch(qdb, subjName, question, null, minpercent, searchInAllIfNoResult)
|
|
)
|
|
})
|
|
}
|
|
|
|
function iterateSubjects(db, fn) {
|
|
db.forEach((subj) => {
|
|
subj.Questions.forEach((question) => {
|
|
fn(subj, question)
|
|
})
|
|
})
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------
|
|
// logging and tools
|
|
// ---------------------------------------------------------------------------------
|
|
|
|
function hr() {
|
|
let res = ''
|
|
for (let i = 0; i < process.stdout.columns; i++) {
|
|
res += '='
|
|
}
|
|
log(`${C('cyan')}${res}${C()}`)
|
|
}
|
|
|
|
function log(text) {
|
|
console.log(text)
|
|
utils.AppendToFile(text, globalLog)
|
|
}
|
|
|
|
function writeInSameLine(text, returnToLineStart) {
|
|
process.stdout.write(text)
|
|
if (returnToLineStart) {
|
|
process.stdout.write('\r')
|
|
} else {
|
|
process.stdout.write('\n')
|
|
}
|
|
}
|
|
|
|
function printProgressBar(current, total) {
|
|
const width = process.stdout.columns - 25
|
|
|
|
if (width <= 0) {
|
|
return
|
|
}
|
|
|
|
const x = width / total
|
|
const xCurrent = Math.floor(current * x)
|
|
const xTotal = Math.floor(total * x)
|
|
|
|
let line = ''
|
|
for (let i = 0; i < xCurrent; i++) {
|
|
line += '='
|
|
}
|
|
|
|
for (let i = 0; i < xTotal - xCurrent; i++) {
|
|
line += ' '
|
|
}
|
|
const numbers = `${current} / ${total}`
|
|
writeInSameLine(
|
|
`${C('magenta')} [${line}]${C('green')} ${numbers}${C()}`,
|
|
current !== total
|
|
)
|
|
}
|
|
|
|
function C(color) {
|
|
return logger.C(color)
|
|
}
|