const utils = require('../../dist/utils/utils.js').default // eslint-disable-line const logger = require('../../dist/utils/logger.js').default // eslint-disable-line const { addQuestion, doSearch, compareQuestionObj, createQuestion, } = require('../../dist/utils/classes.js') // eslint-disable-line const { loadData, writeData } = require('../../dist/utils/actions.js') // eslint-disable-line const fs = require('fs') // eslint-disable-line // Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ] // load: 2.767ms // ============================================================================================== // 1 / 1: Elektronika, 826 questions // Result length: 0, original length: 826 // [=====================================================================] 826 / 826 // Removed 19 questions // ============================================================================================== // Result length: 807, original length: 826, removed 19 questions // rmduplicates: 9.527s // File written // load: 15.91ms // Removing duplicate questions from publicDirs/qminingPublic/questionDbs/elektro.json // ============================================================================================== // 1 / 1: Elektronika, 826 questions // Result length: 0, original length: 826 // [=====================================================================] 826 / 826 // Removed 10 questions // ============================================================================================== // Result length: 816, original length: 826, removed 10 questions // rmduplicates: 488.853ms // File written const minpercent = 95 const line = '====================================================================' const logPath = './duplicateRemovingLog/' const globalLog = './duplicateRemovingLog/log' utils.CreatePath(logPath) utils.WriteFile('', globalLog) // ---------------------------------------------- let currentMaxIndex = -1 let currentIndex = -1 process.on('message', function () { process.send({ currentMaxIndex: currentMaxIndex, currentIndex: currentIndex, }) }) // ---------------------------------------------- let params = process.argv.splice(2) let silenced = false if (params.includes('-s')) { silenced = true } params = params.filter((x) => { return !x.startsWith('-') }) console.log(params) if (params.length === 0) { console.log('At least 1 parameter required (path to DB)') process.exit(1) } const pathA = params[0] const pathB = params[1] const stat = fs.lstatSync(pathA) if (stat.isDirectory()) { if (pathB) { log( `Clearing possible questions from ${C( 'green' )}${pathA}${C()} based on ${C('green')}${pathB}${C()} db` ) const db = pathB ? loadData(pathB) : null clearPossibleAnswers(pathA, db) log( `Cleared possible questions from ${C( 'green' )}${pathA}${C()} based on ${C('green')}${pathB}${C()} db` ) } else { log( `Removing possible question duplicates from ${C( 'green' )}${pathA}${C()}` ) removePossibleAnswersDuplicates(pathA) log( `Removed possible question duplicates from ${C( 'green' )}${pathA}${C()}` ) } } else { console.time('load') const dbA = loadData(pathA) const dbB = pathB ? loadData(pathB) : null console.timeEnd('load') console.time('rmduplicates') if (!dbB) { log(`Removing duplicate questions from ${C('green')}${pathA}${C()}`) const resultDbFileName = pathA.split('/')[pathA.split('/').length - 1] const res = rmDuplicates(dbA) console.timeEnd('rmduplicates') writeData(res, resultDbFileName + '.res') log('File written') log(`Removed duplicate questions from ${C('green')}${pathA}${C()}`) } else { log( `Removing questions found in ${C('green')}${pathB}${C()} from ${C( 'green' )}${pathA}${C()}` ) const res = difference({ dbA: dbA, dbB: dbB }) console.timeEnd('rmduplicates') const resultDbFileName = pathA.split('/')[pathA.split('/').length - 1] writeData(res, resultDbFileName + '.res') log('File written') log( `Removed questions found in ${C('green')}${pathB}${C()} from ${C( 'green' )}${pathA}${C()}` ) } } // --------------------------------------------------------------------------------- // possible answers duplicate removing // --------------------------------------------------------------------------------- // TODO: dont check every file, only check per directorires // only compare questions of same subjects function removePossibleAnswersDuplicates(path) { const dirs = fs.readdirSync(path) let count = 0 let currIndex = 1 let delets = 0 iterateDir(path, () => { count++ }) dirs.forEach((currDir) => { const contents = fs.readdirSync(path + '/' + currDir) contents.forEach((currFile) => { const currPath = path + '/' + currDir + '/' + currFile if (currPath.includes('savedQuestions.json')) { return } if (!utils.FileExists(currPath)) { return } const dataA = utils.ReadJSON(currPath) currIndex++ printProgressBar(currIndex, count - 1) contents.forEach((currFile2) => { const currPath2 = path + '/' + currDir + '/' + currFile2 if (currPath2.includes('savedQuestions.json')) { return } if (!utils.FileExists(currPath2)) { return } if (currPath === currPath2) { return } const dataB = utils.ReadJSON(currPath2) dataA.questions.forEach((q1) => { dataB.questions.some((q2) => { const percent = compareQuestionObj( createQuestion(q1), '', createQuestion(q2), '' ) if (percent.avg === 100) { utils.deleteFile(currPath2) count-- delets++ return true } }) }) }) }) }) log(`${C('green')}Deleting empty directories ...${C()}`) count = dirs.length currIndex = 0 let deletedDirCount = 0 dirs.forEach((dir) => { currIndex++ const currDirContent = fs.readdirSync(path + '/' + dir) if (currDirContent.length === 0) { fs.rmdirSync(path + '/' + dir) deletedDirCount++ } printProgressBar(currIndex, count) }) log(`${C('green')}Updating savedQuestions.json ...${C()}`) count = dirs.length currIndex = 0 dirs.forEach((dir) => { currIndex++ updateSavedQuestionsFile(path + '/' + dir) printProgressBar(currIndex, count) }) log( `Deleted ${C('green')}${delets}${C()} files, and ${C( 'green' )}${deletedDirCount}${C()} directories` ) } function clearPossibleAnswers(path, db) { let count = 0 let currIndex = 1 let delets = 0 iterateDir(path, () => { count++ }) iterateDir(path, (currPath) => { currIndex++ if (currPath.includes('savedQuestions.json')) { return } const { subj, questions } = utils.ReadJSON(currPath) questions.forEach((question) => { const searchRes = search({ qdb: db, subjName: subj, question: question, searchTillMatchPercent: 80, }) if (searchRes.length > 0) { utils.deleteFile(currPath) delets++ } }) printProgressBar(currIndex, count) }) log(`Deleted ${C('green')}${delets}${C()} files`) } function updateSavedQuestionsFile(path) { const filePath = path + '/' + 'savedQuestions.json' if (!utils.FileExists(filePath)) { log(`${filePath} does not exists!`) return } const savedQuestions = utils.ReadJSON(filePath) const filtered = savedQuestions.filter((sq) => { return utils.FileExists(path + '/' + sq.fname) }) if (savedQuestions.length !== filtered.length) { utils.WriteFile(JSON.stringify(filtered), filePath) } } // --------------------------------------------------------------------------------- // difference // --------------------------------------------------------------------------------- function rmDuplicates(db) { return difference({ dbA: db }) } function difference({ dbA, dbB }) { const doingDifference = !!dbB // Stuff only from A const resultDb = [] let dbLength = 0 let removedTotal = 0 let processedQuestions = 0 iterateSubjects(dbA, () => { dbLength++ }) currentMaxIndex = dbLength const getResultDbLength = () => { let resultDbLength = 0 iterateSubjects(resultDb, () => { resultDbLength++ }) return resultDbLength } for (let i = 0; i < dbA.length; i++) { const subj = dbA[i] const subjLogPath = logPath + subj.Name utils.WriteFile('', subjLogPath) let removedCount = 0 hr() log( `${C('blue')}${i + 1} / ${dbA.length}: ${C('green')}${ subj.Name }, ${C('blue')}${subj.Questions.length}${C( 'green' )} questions${C()}` ) printProgressBar(i + 1, dbA.length) for (let j = 0; j < subj.Questions.length; j++) { const question = subj.Questions[j] const searchRes = search({ qdb: doingDifference ? dbB : resultDb, subjName: subj.Name, question: question, searchInAllIfNoResult: doingDifference, searchTillMatchPercent: minpercent, }) printProgressBar(processedQuestions, dbLength) processedQuestions++ currentIndex = processedQuestions const res = hasRequiredPercent(searchRes, minpercent) // no result: adding to difference if (res.length === 0) { // no result: adding to difference addQuestion(resultDb, subj.Name, question) } else { // has result, not adding to difference utils.AppendToFile( line + '\n' + line + '\n' + JSON.stringify(question, null, 2) + '\n' + line + JSON.stringify(res, null, 2) + '\n', subjLogPath ) removedCount++ removedTotal++ } } log( `${C('yellow')}Removed ${C('red')}${removedCount}${C( 'yellow' )} questions${C()}` ) } hr() log( `Result length: ${getResultDbLength()}, original length: ${dbLength}, removed ${removedTotal} questions` ) return resultDb } function hasRequiredPercent(result, minpercent) { return result.reduce((acc, res) => { if (res.match >= minpercent) { acc.push(res) } return acc }, []) } // --------------------------------------------------------------------------------- // db editing tools // --------------------------------------------------------------------------------- function search({ qdb, subjName, question, searchInAllIfNoResult }) { return doSearch( qdb, subjName, question, null, minpercent, searchInAllIfNoResult ) } function iterateSubjects(db, fn) { db.forEach((subj) => { subj.Questions.forEach((question) => { fn(subj, question) }) }) } // --------------------------------------------------------------------------------- // possible answers tools // --------------------------------------------------------------------------------- function iterateDir(path, action) { if (!utils.FileExists(path)) { return } const stat = fs.lstatSync(path) if (stat.isDirectory()) { const content = fs.readdirSync(path) content.forEach((currContent) => { iterateDir(`${path}/${currContent}`, action) }) } else { action(path) } } // --------------------------------------------------------------------------------- // logging and tools // --------------------------------------------------------------------------------- function hr() { let res = '' for (let i = 0; i < process.stdout.columns; i++) { res += '=' } log(`${C('cyan')}${res}${C()}`) } function log(text) { utils.AppendToFile(text, globalLog) if (silenced) return if (process.stdout.isTTY) { process.stdout.clearLine() process.stdout.cursorTo(0) } console.log(text) } function writeInSameLine(text, returnToLineStart) { if (!process.stdout.isTTY) { return } process.stdout.clearLine() process.stdout.cursorTo(0) process.stdout.write(text) if (returnToLineStart) { process.stdout.write('\r') } else { process.stdout.write('\n') } } function printProgressBar(current, total) { if (!process.stdout.isTTY || silenced) { return } const width = process.stdout.columns - 30 if (width <= 0) { return } const x = width / total const xCurrent = Math.floor(current * x) const xTotal = Math.floor(total * x) let line = '' for (let i = 0; i < xCurrent; i++) { line += '=' } for (let i = 0; i < xTotal - xCurrent; i++) { line += ' ' } const numbers = `${current} / ${total}` writeInSameLine( `${C('magenta')} [${line}]${C('green')} ${numbers}${C()}`, current !== total ) } function C(color) { return logger.C(color) } process.exit()