From e6db0f9175af6b3e09c54da915a29dfc82517ff1 Mon Sep 17 00:00:00 2001 From: MrFry Date: Thu, 26 Mar 2020 14:33:52 +0100 Subject: [PATCH] Added duplicate remover ( #5 ), saving subjects without year, file append now sync, minor logging improvements --- utils/actions.js | 13 ++- utils/logger.js | 6 +- utils/rmDuplicates.js | 263 ++++++++++++++++++++++++++++++++++++++++++ utils/utils.js | 10 +- 4 files changed, 284 insertions(+), 8 deletions(-) create mode 100644 utils/rmDuplicates.js diff --git a/utils/actions.js b/utils/actions.js index 8b460f6..0fd2818 100755 --- a/utils/actions.js +++ b/utils/actions.js @@ -65,6 +65,7 @@ function ProcessIncomingRequest (recievedData, qdb, infos) { } logger.DebugLog('recievedData JSON parsed', 'actions', 1) + logger.DebugLog(d, 'actions', 3) let allQLength = d.quiz.length let allQuestions = [] @@ -72,6 +73,9 @@ function ProcessIncomingRequest (recievedData, qdb, infos) { logger.DebugLog('Question:', 'actions', 2) logger.DebugLog(question, 'actions', 2) let q = new classes.Question(question.Q, question.A, question.data) + logger.DebugLog('Searching for question in subj ' + d.subj, 'actions', 3) + logger.DebugLog(q, 'actions', 3) + let sames = qdb.Search(q, d.subj) logger.DebugLog('Same questions:', 'actions', 2) logger.DebugLog('Length: ' + sames.length, 'actions', 2) @@ -89,10 +93,13 @@ function ProcessIncomingRequest (recievedData, qdb, infos) { let color = logger.GetColor('green') let msg = '' if (allQuestions.length > 0) { - color = logger.GetColor('blue') + color = logger.GetColor('cyan') msg += `New questions: ${allQuestions.length} ( All: ${allQLength} )` allQuestions.forEach((q) => { - qdb.AddQuestion(d.subj, q) + const sName = classes.SUtils.GetSubjNameWithoutYear(d.subj) + logger.DebugLog('Adding question with subjName: ' + sName + ' :', 'actions', 3) + logger.DebugLog(q, 'actions', 3) + qdb.AddQuestion(sName, q) }) currWrites++ @@ -139,7 +146,7 @@ function ProcessIncomingRequest (recievedData, qdb, infos) { function LoadJSON (dataFile) { try { var d = JSON.parse(utils.ReadFile(dataFile)) - var r = new classes.QuestionDB((x) => true, (x, y) => console.log(x, y)) + var r = new classes.QuestionDB() var rt = [] for (var i = 0; i < d.Subjects.length; i++) { diff --git a/utils/logger.js b/utils/logger.js index 35a6f09..0299a6c 100755 --- a/utils/logger.js +++ b/utils/logger.js @@ -29,7 +29,8 @@ module.exports = { LogStat: LogStat, Load: Load, logHashed: logHashed, - hr: hr + hr: hr, + C: C } const DELIM = C('green') + '|' + C() @@ -82,6 +83,7 @@ function DebugLog (msg, name, lvl) { s = header + msg } else { Log(header + 'OBJECT:', 'yellow') + s = msg } Log(s, 'yellow') } @@ -91,7 +93,7 @@ function Log (s, c) { let log = s if (typeof s !== 'object') { let dl = DELIM + C(c) - log = C(c) + GetDateString() + dl + s + log = C(c) + GetDateString() + dl + s + C() } console.log(log) diff --git a/utils/rmDuplicates.js b/utils/rmDuplicates.js new file mode 100644 index 0000000..3e47ce2 --- /dev/null +++ b/utils/rmDuplicates.js @@ -0,0 +1,263 @@ +/* ---------------------------------------------------------------------------- + + Question Server question file merger + GitLab: + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + ------------------------------------------------------------------------- */ + +const utils = require('./utils.js') +const classes = require('./question-classes/classes.js') +const actions = require('./actions.js') +const logger = require('./logger.js') + +const minMatchAmmount = 100 + +const logPath = './mergeLogs/mergelog_' + GetDateString().replace(/ /g, '_') + +Main() + +function Main () { + const params = GetParams() + console.log(params) + if (params.length === 0) { + console.error('No params! Need a path to a question database!') + process.exit() + } + const data = actions.LoadJSON(params[0]) + + PrintDB(data) + console.log(hr('=')) + + const { res, stats } = RemoveDuplicates(data) + console.log(hr('=')) + + LogStats(stats, data, res) + console.log(hr('=')) + + console.log('Result database:') + PrintDB(res) + console.log(hr('=')) + + utils.WriteFile(JSON.stringify(res), 'res.json') + console.log(C('green') + 'res.json written!' + C()) + console.log(hr('=')) + + console.log(C('green') + 'Done' + C()) +} + +function LogStats (stats, oldData, newData) { + const maxSubjNameLength = MaxLengthOf(stats, 'name') + const maxPrevLength = MaxLengthOf(stats, 'prevQuestions') + const maxAddedLength = MaxLengthOf(stats, 'addedQuestions') + const maxRemovedLength = MaxLengthOf(stats, 'removedQuestions') + + stats.forEach((currStat) => { + const { name, prevQuestions, addedQuestions, removedQuestions } = currStat + let toLog = '' + + toLog += C('green') + toLog += GetExactLength(name, maxSubjNameLength) + toLog += C() + toLog += ' ' + toLog += C('magenta') + toLog += GetExactLength(prevQuestions, maxPrevLength) + toLog += C() + toLog += C('cyan') + toLog += ' -> ' + toLog += C() + toLog += C('green') + toLog += GetExactLength(addedQuestions, maxAddedLength) + toLog += C() + toLog += ' [ ' + toLog += C('red') + toLog += GetExactLength(removedQuestions, maxRemovedLength) + toLog += C() + toLog += ' ]' + + console.log(toLog) + }) + console.log(hr()) + console.log('Old data:') + LogDataCount(oldData) + console.log('New data:') + LogDataCount(newData) +} + +function LogDataCount (data) { + const subjLength = data.Subjects.length + const qLength = data.Subjects.reduce((acc, subj) => { + return acc + subj.Questions.length + }, 0) + + console.log('Subjects: ' + C('green') + subjLength + C() + ', Questions: ' + C('green') + qLength + C()) +} + +function PrintDB (data) { + const maxSubjNameLength = MaxLengthOf(data.Subjects, 'Name') + + data.Subjects.forEach((subj, i) => { + let toLog = '' + toLog += C('magenta') + toLog += (i + 1) + toLog += C() + toLog += ': ' + toLog += C('green') + toLog += GetExactLength(subj.Name, maxSubjNameLength) + toLog += C() + toLog += ' [ ' + toLog += C('cyan') + toLog += subj.Questions.length + toLog += C() + toLog += ' ]' + + console.log(toLog) + }) + console.log(hr()) + LogDataCount(data) + console.log(hr()) +} + +function GetExactLength (s, length) { + let toLog = s.toString() + const lengthDiff = length - toLog.length + for (let i = 0; i < lengthDiff; i++) { + toLog += ' ' + } + + return toLog +} + +function MaxLengthOf (prop, key) { + return prop.reduce((acc, currStat) => { + if (acc < currStat[key].toString().length) { + acc = currStat[key].toString().length + } + return acc + }, 0) +} + +function RemoveDuplicates (data) { + console.log(C('yellow') + 'Removing duplicates' + C()) + const res = new classes.QuestionDB() + const stats = [] + + data.Subjects.forEach((subj, i) => { + const logFile = logPath + '/' + subj.Name.replace(/ /g, '_').replace(/\//g, '-') + LogSubjProgress(i, subj, data.Subjects.length) + let addedQuestions = 0 + let removedQuestions = 0 + subj.Questions.forEach((question) => { + // Searching for same question in result database + let r = res.Search(question).reduce((acc, r) => { + if (r.match >= minMatchAmmount) { + acc.push(r) + } + return acc + }, []) + + // if htere are more that one same questions in the new database + if (r.length > 0) { + utils.AppendToFile(hr('#'), logFile) + utils.AppendToFile('QUESTION', logFile) + utils.AppendToFile(JSON.stringify(question, null, 2), logFile) + utils.AppendToFile(hr(), logFile) + utils.AppendToFile('SAMES', logFile) + utils.AppendToFile(JSON.stringify(r, null, 2), logFile) + removedQuestions++ + } else { + // if no same questions are fount then adding it to then new db + res.AddQuestion(subj.getSubjNameWithoutYear(), question) + addedQuestions++ + } + }) + LogResultProgress(subj, addedQuestions, removedQuestions) + stats.push({ + name: subj.Name, + prevQuestions: subj.Questions.length, + addedQuestions: addedQuestions, + removedQuestions: removedQuestions + }) + }) + return { res, stats } +} + +function LogSubjProgress (i, subj, subjCount) { + log( + '[ ' + + C('cyan') + + (i + 1) + + C() + + ' / ' + + C('green') + + subjCount + + C() + + ' ] ' + + C('yellow') + + subj.Name + + C() + + ': ' + + C('green') + + subj.Questions.length + ) +} + +function LogResultProgress (subj, addedQuestions, removedQuestions) { + log( + ' ' + + C('cyan') + + '-> ' + + C('green') + + addedQuestions + + C() + + ', removed: ' + + C('red') + + removedQuestions + + C() + + '\n' + ) +} + +function log (msg) { + process.stdout.write(msg) +} + +function hr (char) { + let h = '' + const cols = process.stdout.columns || 20 + for (let i = 0; i < cols; i++) { + h += char || '-' + } + return h +} + +function C (color) { + return logger.C(color) +} + +function GetParams () { + return process.argv.splice(2) +} + +function GetDateString () { + const m = new Date() + const d = m.getFullYear() + '-' + + ('0' + (m.getMonth() + 1)).slice(-2) + '-' + + ('0' + m.getDate()).slice(-2) + ' ' + + ('0' + m.getHours()).slice(-2) + ':' + + ('0' + m.getMinutes()).slice(-2) + ':' + + ('0' + m.getSeconds()).slice(-2) + return d +} diff --git a/utils/utils.js b/utils/utils.js index c376773..31cf9c7 100755 --- a/utils/utils.js +++ b/utils/utils.js @@ -87,9 +87,13 @@ function WriteFileAsync (content, path) { function AppendToFile (data, file) { CreatePath(file) - fs.appendFile(file, '\n' + data, function (err) { - if (err) { logger.Log('Error writing log file: ' + file + ' (sync)', logger.GetColor('redbg')) } - }) + try { + fs.appendFileSync(file, '\n' + data) + } catch (e) { + logger.Log('Error appendig to file log file: ' + file + ' (sync)', logger.GetColor('redbg')) + logger.Log(data) + console.log(e) + } } function Beep () {