diff --git a/src/standaloneUtils/rmDuplicates.js b/src/standaloneUtils/rmDuplicates.js index e06acdc..62be432 100644 --- a/src/standaloneUtils/rmDuplicates.js +++ b/src/standaloneUtils/rmDuplicates.js @@ -1,309 +1,209 @@ -/* ---------------------------------------------------------------------------- +const minpercent = 97 +const resultDbFileName = 'res.json' - Question Server question file merger - GitLab: +// --------------------------------------------------------------------------------------------------- - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. +const utils = require('../../dist/utils/utils.js').default // eslint-disable-line +const logger = require('../../dist/utils/logger.js').default // eslint-disable-line +const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +// TODO: merge 2 dbs +// TODO: filter questions out from a db based on another, producing a new one - You should have received a copy of the GNU General Public License - along with this program. If not, see . +const params = process.argv.splice(2) - ------------------------------------------------------------------------- */ +console.log('Params', params) -const utils = require('./utils.js') -const { - searchData, - addQuestion, - getSubjNameWithoutYear, - createQuestion, -} = require('./classes.js') -const actions = require('./actions.js') -const logger = require('./logger.js') +const fileA = params[0] +const fileB = params[1] -const resultFileName = 'res.json' -const minMatchAmmount = 100 +const dbA = utils.ReadJSON(fileA) +const dbB = fileB ? utils.ReadJSON(fileB) : null -const logPath = './mergeLogs/mergelog_' + GetDateString().replace(/ /g, '_') +const line = + '====================================================================' +const logPath = './duplicateRemovingLog/' +utils.CreatePath(logPath) -Main() - -async function Main() { - const params = GetParams() - console.log(params) - if (params.length === 0) { - console.error('No params! Need a path to a question database!') - process.exit() - } - const data = actions.LoadJSON(params[0]) - - PrintDB(data) - console.log(hr('=')) - - const { res, stats } = await RemoveDuplicates(data) - console.log(hr('=')) - - LogStats(stats, data, res) - console.log(hr('=')) - - console.log('Result database:') - PrintDB(res) - console.log(hr('=')) - - utils.WriteFile(JSON.stringify(res), resultFileName) - console.log(C('green') + resultFileName + ' written!' + C()) - console.log(hr('=')) - - console.log(C('green') + 'Done' + C()) -} - -function LogStats(stats, oldData, newData) { - const maxSubjNameLength = MaxLengthOf(stats, 'name') - const maxPrevLength = MaxLengthOf(stats, 'prevQuestions') - const maxAddedLength = MaxLengthOf(stats, 'addedQuestions') - const maxRemovedLength = MaxLengthOf(stats, 'removedQuestions') - - stats.forEach((currStat) => { - const { name, prevQuestions, addedQuestions, removedQuestions } = currStat - let toLog = '' - - toLog += C('green') - toLog += GetExactLength(name, maxSubjNameLength) - toLog += C() - toLog += ' ' - toLog += C('magenta') - toLog += GetExactLength(prevQuestions, maxPrevLength) - toLog += C() - toLog += C('cyan') - toLog += ' -> ' - toLog += C() - toLog += C('green') - toLog += GetExactLength(addedQuestions, maxAddedLength) - toLog += C() - toLog += ' [ ' - toLog += C('red') - toLog += GetExactLength(removedQuestions, maxRemovedLength) - toLog += C() - toLog += ' ]' - - console.log(toLog) +if (!dbB) { + rmDuplicates(dbA).then((res) => { + utils.WriteFile(JSON.stringify(res), resultDbFileName) + console.log('File written') }) - console.log(hr()) - console.log('Old data:') - LogDataCount(oldData) - console.log('New data:') - LogDataCount(newData) -} - -function LogDataCount(data) { - const subjLength = data.length - const qLength = data.reduce((acc, subj) => { - return acc + subj.Questions.length - }, 0) - - console.log( - 'Subjects: ' + - C('green') + - subjLength + - C() + - ', Questions: ' + - C('green') + - qLength + - C() - ) -} - -function PrintDB(data) { - const maxSubjNameLength = MaxLengthOf(data, 'Name') - - data.forEach((subj) => { - let toLog = '' - toLog += C('green') - toLog += GetExactLength(subj.Name, maxSubjNameLength) - toLog += C() - toLog += ' [ ' - toLog += C('cyan') - toLog += subj.Questions.length - toLog += C() - toLog += ' ]' - - console.log(toLog) +} else { + difference({ dbA: dbA, dbB: dbB }).then((res) => { + utils.WriteFile(JSON.stringify(res), resultDbFileName) + console.log('File written') }) - console.log(hr()) - LogDataCount(data) - console.log(hr()) } -function GetExactLength(string, length) { - let toLog = string.toString() - const lengthDiff = length - toLog.length - for (let i = 0; i < lengthDiff; i++) { - toLog += ' ' +async function rmDuplicates(db) { + return await difference({ dbA: db }) +} + +async function difference({ dbA, dbB }) { + const doingDifference = !!dbB + // Stuff only from A + const resultDb = [] + let dbLength = 0 + let removedTotal = 0 + + iterateSubjects(dbA, () => { + dbLength++ + }) + + const getResultDbLength = () => { + let resultDbLength = 0 + iterateSubjects(resultDb, () => { + resultDbLength++ + }) + return resultDbLength } - return toLog -} + for (let i = 0; i < dbA.length; i++) { + const subj = dbA[i] + const subjLogPath = logPath + subj.Name + utils.WriteFile('', subjLogPath) + let removedCount = 0 -function MaxLengthOf(prop, key) { - return prop.reduce((acc, currStat) => { - if (acc < currStat[key].toString().length) { - acc = currStat[key].toString().length - } - return acc - }, 0) -} + hr() + console.log( + `${C('blue')}${i + 1} / ${dbA.length}: ${C('green')}${subj.Name}, ${C( + 'blue' + )}${subj.Questions.length}${C('green')} questions${C()}` + ) + console.log( + `${C('green')}Result length: ${C('blue')}${getResultDbLength()}${C( + 'green' + )}, original length: ${C('blue')}${dbLength}${C()}` + ) -async function RemoveDuplicates(data) { - console.log(C('yellow') + 'Removing duplicates' + C()) - let res = [] - const stats = [] - - for (let i = 0; i < data.length; i++) { - const subj = data[i] - const logFile = - logPath + '/' + subj.Name.replace(/ /g, '_').replace(/\//g, '-') - let addedQuestions = 0 - let removedQuestions = 0 for (let j = 0; j < subj.Questions.length; j++) { const question = subj.Questions[j] - try { - // Searching for same question in result database - const tempQuestion = createQuestion( - question.Q, - question.A, - question.data - ) - let result = await searchData(res, tempQuestion) - result = result.reduce((acc, res) => { - if (res.match >= minMatchAmmount) { - acc.push(res) - } - return acc - }, []) + const searchRes = await search({ + qdb: doingDifference ? dbB : resultDb, + subjName: subj.Name, + question: question, + searchInAllIfNoResult: doingDifference, + }) - // if htere are more that one same questions in the new database - if (result.length > 0) { - utils.AppendToFile(hr('#'), logFile) - utils.AppendToFile('QUESTION', logFile) - utils.AppendToFile(JSON.stringify(tempQuestion, null, 2), logFile) - utils.AppendToFile(hr(), logFile) - utils.AppendToFile('SAMES', logFile) - utils.AppendToFile(JSON.stringify(result, null, 2), logFile) - removedQuestions++ - } else { - // if no same questions are fount then adding it to then new db - addQuestion(res, getSubjNameWithoutYear(subj.Name), tempQuestion) - addedQuestions++ - } - LogResultProgress( - subj, - i, - j, - subj.Questions.length, - addedQuestions, - removedQuestions, - data.length + printProgressBar(j + 1, subj.Questions.length) + + const res = hasRequiredPercent(searchRes, minpercent) + + // no result: adding to difference + if (res.length === 0) { + // no result: adding to difference + addQuestion(resultDb, subj.Name, question) + } else { + // has result, not adding to difference + utils.AppendToFile( + line + + '\n' + + line + + '\n' + + JSON.stringify(question, null, 2) + + '\n' + + line + + JSON.stringify(res, null, 2) + + '\n', + subjLogPath ) - } catch (err) { - console.log() - console.log('ERROR') - console.log(err) - console.log('QUESTION') - console.log(question) - console.log() + removedCount++ + removedTotal++ } } - stats.push({ - name: subj.Name, - prevQuestions: subj.Questions.length, - addedQuestions: addedQuestions, - removedQuestions: removedQuestions, - }) + console.log( + `${C('yellow')}Removed ${C('red')}${removedCount}${C( + 'yellow' + )} questions${C()}` + ) } - return { res, stats } -} -function LogResultProgress( - subj, - i, - j, - length, - addedQuestions, - removedQuestions, - subjCount -) { - process.stdout.write( - '[ ' + - C('cyan') + - (i + 1) + - C() + - ' / ' + - C('green') + - subjCount + - C() + - ' ] ' + - C('yellow') + - subj.Name + - C() + - ': ' + - C('green') + - subj.Questions.length + - ' ' + - C('cyan') + - '-> ' + - C('green') + - addedQuestions + - C() + - ', removed: ' + - C('red') + - removedQuestions + - C() + hr() + // console.log('RESTUL', JSON.stringify(resultDb, null, 2)) + console.log( + `Result length: ${getResultDbLength()}, original length: ${dbLength}, removed ${removedTotal} questions` ) + return resultDb +} - if (j === length - 1) { - process.stdout.write('\n') - } else { +function hasRequiredPercent(result, minpercent) { + return result.reduce((acc, res) => { + if (res.match >= minpercent) { + acc.push(res) + } + return acc + }, []) +} + +// --------------------------------------------------------------------------------- +// db editing tools +// --------------------------------------------------------------------------------- + +function search({ qdb, subjName, question, searchInAllIfNoResult }) { + return new Promise((resolve) => { + resolve( + doSearch(qdb, subjName, question, null, minpercent, searchInAllIfNoResult) + ) + }) +} + +function iterateSubjects(db, fn) { + db.forEach((subj) => { + subj.Questions.forEach((question) => { + fn(subj, question) + }) + }) +} + +// --------------------------------------------------------------------------------- +// logging and tools +// --------------------------------------------------------------------------------- + +function hr() { + let res = '' + for (let i = 0; i < process.stdout.columns; i++) { + res += '=' + } + console.log(`${C('cyan')}${res}${C()}`) +} + +function writeInSameLine(text, returnToLineStart) { + process.stdout.write(text) + if (returnToLineStart) { process.stdout.write('\r') + } else { + process.stdout.write('\n') } } -function hr(char) { - let hr = '' - const cols = process.stdout.columns || 20 - for (let i = 0; i < cols; i++) { - hr += char || '-' +function printProgressBar(current, total) { + const width = process.stdout.columns - 25 + + if (width <= 0) { + return } - return hr + + const x = width / total + const xCurrent = Math.floor(current * x) + const xTotal = Math.floor(total * x) + + let line = '' + for (let i = 0; i < xCurrent; i++) { + line += '=' + } + + for (let i = 0; i < xTotal - xCurrent; i++) { + line += ' ' + } + const numbers = `${current} / ${total}` + writeInSameLine( + `${C('magenta')} [${line}]${C('green')} ${numbers}${C()}`, + current !== total + ) } function C(color) { return logger.C(color) } - -function GetParams() { - return process.argv.splice(2) -} - -function GetDateString() { - const date = new Date() - const dateString = - date.getFullYear() + - '-' + - ('0' + (date.getMonth() + 1)).slice(-2) + - '-' + - ('0' + date.getDate()).slice(-2) + - ' ' + - ('0' + date.getHours()).slice(-2) + - ':' + - ('0' + date.getMinutes()).slice(-2) + - ':' + - ('0' + date.getSeconds()).slice(-2) - return dateString -}