From 6f142e07331b706b172ae60447ca89cb7bce285a Mon Sep 17 00:00:00 2001 From: mrfry Date: Wed, 21 Oct 2020 16:25:53 +0200 Subject: [PATCH] Updated duplicate remover script --- src/utils/rmDuplicates.js | 90 +++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 36 deletions(-) diff --git a/src/utils/rmDuplicates.js b/src/utils/rmDuplicates.js index 9c8e93f..e06acdc 100644 --- a/src/utils/rmDuplicates.js +++ b/src/utils/rmDuplicates.js @@ -23,6 +23,7 @@ const { searchData, addQuestion, getSubjNameWithoutYear, + createQuestion, } = require('./classes.js') const actions = require('./actions.js') const logger = require('./logger.js') @@ -34,7 +35,7 @@ const logPath = './mergeLogs/mergelog_' + GetDateString().replace(/ /g, '_') Main() -function Main() { +async function Main() { const params = GetParams() console.log(params) if (params.length === 0) { @@ -46,7 +47,7 @@ function Main() { PrintDB(data) console.log(hr('=')) - const { res, stats } = RemoveDuplicates(data) + const { res, stats } = await RemoveDuplicates(data) console.log(hr('=')) LogStats(stats, data, res) @@ -159,56 +160,73 @@ function MaxLengthOf(prop, key) { }, 0) } -function RemoveDuplicates(data) { +async function RemoveDuplicates(data) { console.log(C('yellow') + 'Removing duplicates' + C()) let res = [] const stats = [] - data.forEach((subj, i) => { + for (let i = 0; i < data.length; i++) { + const subj = data[i] const logFile = logPath + '/' + subj.Name.replace(/ /g, '_').replace(/\//g, '-') let addedQuestions = 0 let removedQuestions = 0 - subj.Questions.forEach((question, j) => { - // Searching for same question in result database - let result = searchData(res, question).reduce((acc, res) => { - if (res.match >= minMatchAmmount) { - acc.push(res) - } - return acc - }, []) + for (let j = 0; j < subj.Questions.length; j++) { + const question = subj.Questions[j] + try { + // Searching for same question in result database + const tempQuestion = createQuestion( + question.Q, + question.A, + question.data + ) + let result = await searchData(res, tempQuestion) + result = result.reduce((acc, res) => { + if (res.match >= minMatchAmmount) { + acc.push(res) + } + return acc + }, []) - // if htere are more that one same questions in the new database - if (result.length > 0) { - utils.AppendToFile(hr('#'), logFile) - utils.AppendToFile('QUESTION', logFile) - utils.AppendToFile(JSON.stringify(question, null, 2), logFile) - utils.AppendToFile(hr(), logFile) - utils.AppendToFile('SAMES', logFile) - utils.AppendToFile(JSON.stringify(result, null, 2), logFile) - removedQuestions++ - } else { - // if no same questions are fount then adding it to then new db - res = addQuestion(res, getSubjNameWithoutYear(subj.Name), question) - addedQuestions++ + // if htere are more that one same questions in the new database + if (result.length > 0) { + utils.AppendToFile(hr('#'), logFile) + utils.AppendToFile('QUESTION', logFile) + utils.AppendToFile(JSON.stringify(tempQuestion, null, 2), logFile) + utils.AppendToFile(hr(), logFile) + utils.AppendToFile('SAMES', logFile) + utils.AppendToFile(JSON.stringify(result, null, 2), logFile) + removedQuestions++ + } else { + // if no same questions are fount then adding it to then new db + addQuestion(res, getSubjNameWithoutYear(subj.Name), tempQuestion) + addedQuestions++ + } + LogResultProgress( + subj, + i, + j, + subj.Questions.length, + addedQuestions, + removedQuestions, + data.length + ) + } catch (err) { + console.log() + console.log('ERROR') + console.log(err) + console.log('QUESTION') + console.log(question) + console.log() } - LogResultProgress( - subj, - i, - j, - subj.Questions.length, - addedQuestions, - removedQuestions, - data.length - ) - }) + } stats.push({ name: subj.Name, prevQuestions: subj.Questions.length, addedQuestions: addedQuestions, removedQuestions: removedQuestions, }) - }) + } return { res, stats } }