const utils = require('../../dist/utils/utils.js').default // eslint-disable-line const logger = require('../../dist/utils/logger.js').default // eslint-disable-line const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line const { loadData, writeData } = require('../../dist/utils/actions.js') // eslint-disable-line // Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ] // load: 2.767ms // ============================================================================================== // 1 / 1: Elektronika, 826 questions // Result length: 0, original length: 826 // [=====================================================================] 826 / 826 // Removed 19 questions // ============================================================================================== // Result length: 807, original length: 826, removed 19 questions // rmduplicates: 9.527s // File written // load: 15.91ms // Removing duplicate questions from publicDirs/qminingPublic/questionDbs/elektro.json // ============================================================================================== // 1 / 1: Elektronika, 826 questions // Result length: 0, original length: 826 // [=====================================================================] 826 / 826 // Removed 10 questions // ============================================================================================== // Result length: 816, original length: 826, removed 10 questions // rmduplicates: 488.853ms // File written const minpercent = 95 const resultDbFileName = 'res.json' const line = '====================================================================' const logPath = './duplicateRemovingLog/' const globalLog = './duplicateRemovingLog/log' utils.CreatePath(logPath) utils.WriteFile('', globalLog) const params = process.argv.splice(2) const fileA = params[0] const fileB = params[1] console.time('load') const dbA = loadData(fileA) const dbB = fileB ? loadData(fileB) : null console.timeEnd('load') console.time('rmduplicates') if (!dbB) { log(`Removing duplicate questions from ${fileA}`) rmDuplicates(dbA).then((res) => { console.timeEnd('rmduplicates') writeData(res, resultDbFileName) log('File written') }) } else { log( `Removing questions found in ${C('green')}${fileB}${C()} from ${C( 'green' )}${fileA}${C()}` ) difference({ dbA: dbA, dbB: dbB }).then((res) => { console.timeEnd('rmduplicates') writeData(res, resultDbFileName) log('File written') }) } async function rmDuplicates(db) { return await difference({ dbA: db }) } async function difference({ dbA, dbB }) { const doingDifference = !!dbB // Stuff only from A const resultDb = [] let dbLength = 0 let removedTotal = 0 iterateSubjects(dbA, () => { dbLength++ }) const getResultDbLength = () => { let resultDbLength = 0 iterateSubjects(resultDb, () => { resultDbLength++ }) return resultDbLength } for (let i = 0; i < dbA.length; i++) { const subj = dbA[i] const subjLogPath = logPath + subj.Name utils.WriteFile('', subjLogPath) let removedCount = 0 hr() log( `${C('blue')}${i + 1} / ${dbA.length}: ${C('green')}${subj.Name}, ${C( 'blue' )}${subj.Questions.length}${C('green')} questions${C()}` ) for (let j = 0; j < subj.Questions.length; j++) { const question = subj.Questions[j] const searchRes = await search({ qdb: doingDifference ? dbB : resultDb, subjName: subj.Name, question: question, searchInAllIfNoResult: doingDifference, searchTillMatchPercent: minpercent, }) printProgressBar(j + 1, subj.Questions.length) const res = hasRequiredPercent(searchRes, minpercent) // no result: adding to difference if (res.length === 0) { // no result: adding to difference addQuestion(resultDb, subj.Name, question) } else { // has result, not adding to difference utils.AppendToFile( line + '\n' + line + '\n' + JSON.stringify(question, null, 2) + '\n' + line + JSON.stringify(res, null, 2) + '\n', subjLogPath ) removedCount++ removedTotal++ } } log( `${C('yellow')}Removed ${C('red')}${removedCount}${C( 'yellow' )} questions${C()}` ) } hr() log( `Result length: ${getResultDbLength()}, original length: ${dbLength}, removed ${removedTotal} questions` ) return resultDb } function hasRequiredPercent(result, minpercent) { return result.reduce((acc, res) => { if (res.match >= minpercent) { acc.push(res) } return acc }, []) } // --------------------------------------------------------------------------------- // db editing tools // --------------------------------------------------------------------------------- function search({ qdb, subjName, question, searchInAllIfNoResult }) { return new Promise((resolve) => { resolve( doSearch(qdb, subjName, question, null, minpercent, searchInAllIfNoResult) ) }) } function iterateSubjects(db, fn) { db.forEach((subj) => { subj.Questions.forEach((question) => { fn(subj, question) }) }) } // --------------------------------------------------------------------------------- // logging and tools // --------------------------------------------------------------------------------- function hr() { let res = '' for (let i = 0; i < process.stdout.columns; i++) { res += '=' } log(`${C('cyan')}${res}${C()}`) } function log(text) { console.log(text) utils.AppendToFile(text, globalLog) } function writeInSameLine(text, returnToLineStart) { process.stdout.write(text) if (returnToLineStart) { process.stdout.write('\r') } else { process.stdout.write('\n') } } function printProgressBar(current, total) { const width = process.stdout.columns - 25 if (width <= 0) { return } const x = width / total const xCurrent = Math.floor(current * x) const xTotal = Math.floor(total * x) let line = '' for (let i = 0; i < xCurrent; i++) { line += '=' } for (let i = 0; i < xTotal - xCurrent; i++) { line += ' ' } const numbers = `${current} / ${total}` writeInSameLine( `${C('magenta')} [${line}]${C('green')} ${numbers}${C()}`, current !== total ) } function C(color) { return logger.C(color) }