Files
mrfrys-node-server/utils/rmDuplicates.js

265 lines
6.7 KiB
JavaScript

/* ----------------------------------------------------------------------------
Question Server question file merger
GitLab: <https://gitlab.com/MrFry/mrfrys-node-server>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
------------------------------------------------------------------------- */
const utils = require('./utils.js')
const classes = require('./question-classes/classes.js')
const actions = require('./actions.js')
const logger = require('./logger.js')
const resultFileName = 'res.json'
const minMatchAmmount = 100
const logPath = './mergeLogs/mergelog_' + GetDateString().replace(/ /g, '_')
Main()
function Main () {
const params = GetParams()
console.log(params)
if (params.length === 0) {
console.error('No params! Need a path to a question database!')
process.exit()
}
const data = actions.LoadJSON(params[0])
PrintDB(data)
console.log(hr('='))
const { res, stats } = RemoveDuplicates(data)
console.log(hr('='))
LogStats(stats, data, res)
console.log(hr('='))
console.log('Result database:')
PrintDB(res)
console.log(hr('='))
utils.WriteFile(JSON.stringify(res), resultFileName)
console.log(C('green') + resultFileName + ' written!' + C())
console.log(hr('='))
console.log(C('green') + 'Done' + C())
}
function LogStats (stats, oldData, newData) {
const maxSubjNameLength = MaxLengthOf(stats, 'name')
const maxPrevLength = MaxLengthOf(stats, 'prevQuestions')
const maxAddedLength = MaxLengthOf(stats, 'addedQuestions')
const maxRemovedLength = MaxLengthOf(stats, 'removedQuestions')
stats.forEach((currStat) => {
const { name, prevQuestions, addedQuestions, removedQuestions } = currStat
let toLog = ''
toLog += C('green')
toLog += GetExactLength(name, maxSubjNameLength)
toLog += C()
toLog += ' '
toLog += C('magenta')
toLog += GetExactLength(prevQuestions, maxPrevLength)
toLog += C()
toLog += C('cyan')
toLog += ' -> '
toLog += C()
toLog += C('green')
toLog += GetExactLength(addedQuestions, maxAddedLength)
toLog += C()
toLog += ' [ '
toLog += C('red')
toLog += GetExactLength(removedQuestions, maxRemovedLength)
toLog += C()
toLog += ' ]'
console.log(toLog)
})
console.log(hr())
console.log('Old data:')
LogDataCount(oldData)
console.log('New data:')
LogDataCount(newData)
}
function LogDataCount (data) {
const subjLength = data.Subjects.length
const qLength = data.Subjects.reduce((acc, subj) => {
return acc + subj.Questions.length
}, 0)
console.log('Subjects: ' + C('green') + subjLength + C() + ', Questions: ' + C('green') + qLength + C())
}
function PrintDB (data) {
const maxSubjNameLength = MaxLengthOf(data.Subjects, 'Name')
data.Subjects.forEach((subj, i) => {
let toLog = ''
toLog += C('magenta')
toLog += (i + 1)
toLog += C()
toLog += ': '
toLog += C('green')
toLog += GetExactLength(subj.Name, maxSubjNameLength)
toLog += C()
toLog += ' [ '
toLog += C('cyan')
toLog += subj.Questions.length
toLog += C()
toLog += ' ]'
console.log(toLog)
})
console.log(hr())
LogDataCount(data)
console.log(hr())
}
function GetExactLength (s, length) {
let toLog = s.toString()
const lengthDiff = length - toLog.length
for (let i = 0; i < lengthDiff; i++) {
toLog += ' '
}
return toLog
}
function MaxLengthOf (prop, key) {
return prop.reduce((acc, currStat) => {
if (acc < currStat[key].toString().length) {
acc = currStat[key].toString().length
}
return acc
}, 0)
}
function RemoveDuplicates (data) {
console.log(C('yellow') + 'Removing duplicates' + C())
const res = new classes.QuestionDB()
const stats = []
data.Subjects.forEach((subj, i) => {
const logFile = logPath + '/' + subj.Name.replace(/ /g, '_').replace(/\//g, '-')
LogSubjProgress(i, subj, data.Subjects.length)
let addedQuestions = 0
let removedQuestions = 0
subj.Questions.forEach((question) => {
// Searching for same question in result database
let r = res.Search(question).reduce((acc, r) => {
if (r.match >= minMatchAmmount) {
acc.push(r)
}
return acc
}, [])
// if htere are more that one same questions in the new database
if (r.length > 0) {
utils.AppendToFile(hr('#'), logFile)
utils.AppendToFile('QUESTION', logFile)
utils.AppendToFile(JSON.stringify(question, null, 2), logFile)
utils.AppendToFile(hr(), logFile)
utils.AppendToFile('SAMES', logFile)
utils.AppendToFile(JSON.stringify(r, null, 2), logFile)
removedQuestions++
} else {
// if no same questions are fount then adding it to then new db
res.AddQuestion(subj.getSubjNameWithoutYear(), question)
addedQuestions++
}
})
LogResultProgress(subj, addedQuestions, removedQuestions)
stats.push({
name: subj.Name,
prevQuestions: subj.Questions.length,
addedQuestions: addedQuestions,
removedQuestions: removedQuestions
})
})
return { res, stats }
}
function LogSubjProgress (i, subj, subjCount) {
log(
'[ ' +
C('cyan') +
(i + 1) +
C() +
' / ' +
C('green') +
subjCount +
C() +
' ] ' +
C('yellow') +
subj.Name +
C() +
': ' +
C('green') +
subj.Questions.length
)
}
function LogResultProgress (subj, addedQuestions, removedQuestions) {
log(
' ' +
C('cyan') +
'-> ' +
C('green') +
addedQuestions +
C() +
', removed: ' +
C('red') +
removedQuestions +
C() +
'\n'
)
}
function log (msg) {
process.stdout.write(msg)
}
function hr (char) {
let h = ''
const cols = process.stdout.columns || 20
for (let i = 0; i < cols; i++) {
h += char || '-'
}
return h
}
function C (color) {
return logger.C(color)
}
function GetParams () {
return process.argv.splice(2)
}
function GetDateString () {
const m = new Date()
const d = m.getFullYear() + '-' +
('0' + (m.getMonth() + 1)).slice(-2) + '-' +
('0' + m.getDate()).slice(-2) + ' ' +
('0' + m.getHours()).slice(-2) + ':' +
('0' + m.getMinutes()).slice(-2) + ':' +
('0' + m.getSeconds()).slice(-2)
return d
}