Added duplicate remover ( #5 ), saving subjects without year, file append now sync, minor logging improvements

This commit is contained in:
MrFry 2020-03-26 14:33:52 +01:00
parent be34113969
commit e6db0f9175
4 changed files with 284 additions and 8 deletions

View file

@ -65,6 +65,7 @@ function ProcessIncomingRequest (recievedData, qdb, infos) {
} }
logger.DebugLog('recievedData JSON parsed', 'actions', 1) logger.DebugLog('recievedData JSON parsed', 'actions', 1)
logger.DebugLog(d, 'actions', 3)
let allQLength = d.quiz.length let allQLength = d.quiz.length
let allQuestions = [] let allQuestions = []
@ -72,6 +73,9 @@ function ProcessIncomingRequest (recievedData, qdb, infos) {
logger.DebugLog('Question:', 'actions', 2) logger.DebugLog('Question:', 'actions', 2)
logger.DebugLog(question, 'actions', 2) logger.DebugLog(question, 'actions', 2)
let q = new classes.Question(question.Q, question.A, question.data) let q = new classes.Question(question.Q, question.A, question.data)
logger.DebugLog('Searching for question in subj ' + d.subj, 'actions', 3)
logger.DebugLog(q, 'actions', 3)
let sames = qdb.Search(q, d.subj) let sames = qdb.Search(q, d.subj)
logger.DebugLog('Same questions:', 'actions', 2) logger.DebugLog('Same questions:', 'actions', 2)
logger.DebugLog('Length: ' + sames.length, 'actions', 2) logger.DebugLog('Length: ' + sames.length, 'actions', 2)
@ -89,10 +93,13 @@ function ProcessIncomingRequest (recievedData, qdb, infos) {
let color = logger.GetColor('green') let color = logger.GetColor('green')
let msg = '' let msg = ''
if (allQuestions.length > 0) { if (allQuestions.length > 0) {
color = logger.GetColor('blue') color = logger.GetColor('cyan')
msg += `New questions: ${allQuestions.length} ( All: ${allQLength} )` msg += `New questions: ${allQuestions.length} ( All: ${allQLength} )`
allQuestions.forEach((q) => { allQuestions.forEach((q) => {
qdb.AddQuestion(d.subj, q) const sName = classes.SUtils.GetSubjNameWithoutYear(d.subj)
logger.DebugLog('Adding question with subjName: ' + sName + ' :', 'actions', 3)
logger.DebugLog(q, 'actions', 3)
qdb.AddQuestion(sName, q)
}) })
currWrites++ currWrites++
@ -139,7 +146,7 @@ function ProcessIncomingRequest (recievedData, qdb, infos) {
function LoadJSON (dataFile) { function LoadJSON (dataFile) {
try { try {
var d = JSON.parse(utils.ReadFile(dataFile)) var d = JSON.parse(utils.ReadFile(dataFile))
var r = new classes.QuestionDB((x) => true, (x, y) => console.log(x, y)) var r = new classes.QuestionDB()
var rt = [] var rt = []
for (var i = 0; i < d.Subjects.length; i++) { for (var i = 0; i < d.Subjects.length; i++) {

View file

@ -29,7 +29,8 @@ module.exports = {
LogStat: LogStat, LogStat: LogStat,
Load: Load, Load: Load,
logHashed: logHashed, logHashed: logHashed,
hr: hr hr: hr,
C: C
} }
const DELIM = C('green') + '|' + C() const DELIM = C('green') + '|' + C()
@ -82,6 +83,7 @@ function DebugLog (msg, name, lvl) {
s = header + msg s = header + msg
} else { } else {
Log(header + 'OBJECT:', 'yellow') Log(header + 'OBJECT:', 'yellow')
s = msg
} }
Log(s, 'yellow') Log(s, 'yellow')
} }
@ -91,7 +93,7 @@ function Log (s, c) {
let log = s let log = s
if (typeof s !== 'object') { if (typeof s !== 'object') {
let dl = DELIM + C(c) let dl = DELIM + C(c)
log = C(c) + GetDateString() + dl + s log = C(c) + GetDateString() + dl + s + C()
} }
console.log(log) console.log(log)

263
utils/rmDuplicates.js Normal file
View file

@ -0,0 +1,263 @@
/* ----------------------------------------------------------------------------
Question Server question file merger
GitLab: <https://gitlab.com/MrFry/mrfrys-node-server>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
------------------------------------------------------------------------- */
const utils = require('./utils.js')
const classes = require('./question-classes/classes.js')
const actions = require('./actions.js')
const logger = require('./logger.js')
const minMatchAmmount = 100
const logPath = './mergeLogs/mergelog_' + GetDateString().replace(/ /g, '_')
Main()
function Main () {
const params = GetParams()
console.log(params)
if (params.length === 0) {
console.error('No params! Need a path to a question database!')
process.exit()
}
const data = actions.LoadJSON(params[0])
PrintDB(data)
console.log(hr('='))
const { res, stats } = RemoveDuplicates(data)
console.log(hr('='))
LogStats(stats, data, res)
console.log(hr('='))
console.log('Result database:')
PrintDB(res)
console.log(hr('='))
utils.WriteFile(JSON.stringify(res), 'res.json')
console.log(C('green') + 'res.json written!' + C())
console.log(hr('='))
console.log(C('green') + 'Done' + C())
}
function LogStats (stats, oldData, newData) {
const maxSubjNameLength = MaxLengthOf(stats, 'name')
const maxPrevLength = MaxLengthOf(stats, 'prevQuestions')
const maxAddedLength = MaxLengthOf(stats, 'addedQuestions')
const maxRemovedLength = MaxLengthOf(stats, 'removedQuestions')
stats.forEach((currStat) => {
const { name, prevQuestions, addedQuestions, removedQuestions } = currStat
let toLog = ''
toLog += C('green')
toLog += GetExactLength(name, maxSubjNameLength)
toLog += C()
toLog += ' '
toLog += C('magenta')
toLog += GetExactLength(prevQuestions, maxPrevLength)
toLog += C()
toLog += C('cyan')
toLog += ' -> '
toLog += C()
toLog += C('green')
toLog += GetExactLength(addedQuestions, maxAddedLength)
toLog += C()
toLog += ' [ '
toLog += C('red')
toLog += GetExactLength(removedQuestions, maxRemovedLength)
toLog += C()
toLog += ' ]'
console.log(toLog)
})
console.log(hr())
console.log('Old data:')
LogDataCount(oldData)
console.log('New data:')
LogDataCount(newData)
}
function LogDataCount (data) {
const subjLength = data.Subjects.length
const qLength = data.Subjects.reduce((acc, subj) => {
return acc + subj.Questions.length
}, 0)
console.log('Subjects: ' + C('green') + subjLength + C() + ', Questions: ' + C('green') + qLength + C())
}
function PrintDB (data) {
const maxSubjNameLength = MaxLengthOf(data.Subjects, 'Name')
data.Subjects.forEach((subj, i) => {
let toLog = ''
toLog += C('magenta')
toLog += (i + 1)
toLog += C()
toLog += ': '
toLog += C('green')
toLog += GetExactLength(subj.Name, maxSubjNameLength)
toLog += C()
toLog += ' [ '
toLog += C('cyan')
toLog += subj.Questions.length
toLog += C()
toLog += ' ]'
console.log(toLog)
})
console.log(hr())
LogDataCount(data)
console.log(hr())
}
function GetExactLength (s, length) {
let toLog = s.toString()
const lengthDiff = length - toLog.length
for (let i = 0; i < lengthDiff; i++) {
toLog += ' '
}
return toLog
}
function MaxLengthOf (prop, key) {
return prop.reduce((acc, currStat) => {
if (acc < currStat[key].toString().length) {
acc = currStat[key].toString().length
}
return acc
}, 0)
}
function RemoveDuplicates (data) {
console.log(C('yellow') + 'Removing duplicates' + C())
const res = new classes.QuestionDB()
const stats = []
data.Subjects.forEach((subj, i) => {
const logFile = logPath + '/' + subj.Name.replace(/ /g, '_').replace(/\//g, '-')
LogSubjProgress(i, subj, data.Subjects.length)
let addedQuestions = 0
let removedQuestions = 0
subj.Questions.forEach((question) => {
// Searching for same question in result database
let r = res.Search(question).reduce((acc, r) => {
if (r.match >= minMatchAmmount) {
acc.push(r)
}
return acc
}, [])
// if htere are more that one same questions in the new database
if (r.length > 0) {
utils.AppendToFile(hr('#'), logFile)
utils.AppendToFile('QUESTION', logFile)
utils.AppendToFile(JSON.stringify(question, null, 2), logFile)
utils.AppendToFile(hr(), logFile)
utils.AppendToFile('SAMES', logFile)
utils.AppendToFile(JSON.stringify(r, null, 2), logFile)
removedQuestions++
} else {
// if no same questions are fount then adding it to then new db
res.AddQuestion(subj.getSubjNameWithoutYear(), question)
addedQuestions++
}
})
LogResultProgress(subj, addedQuestions, removedQuestions)
stats.push({
name: subj.Name,
prevQuestions: subj.Questions.length,
addedQuestions: addedQuestions,
removedQuestions: removedQuestions
})
})
return { res, stats }
}
function LogSubjProgress (i, subj, subjCount) {
log(
'[ ' +
C('cyan') +
(i + 1) +
C() +
' / ' +
C('green') +
subjCount +
C() +
' ] ' +
C('yellow') +
subj.Name +
C() +
': ' +
C('green') +
subj.Questions.length
)
}
function LogResultProgress (subj, addedQuestions, removedQuestions) {
log(
' ' +
C('cyan') +
'-> ' +
C('green') +
addedQuestions +
C() +
', removed: ' +
C('red') +
removedQuestions +
C() +
'\n'
)
}
function log (msg) {
process.stdout.write(msg)
}
function hr (char) {
let h = ''
const cols = process.stdout.columns || 20
for (let i = 0; i < cols; i++) {
h += char || '-'
}
return h
}
function C (color) {
return logger.C(color)
}
function GetParams () {
return process.argv.splice(2)
}
function GetDateString () {
const m = new Date()
const d = m.getFullYear() + '-' +
('0' + (m.getMonth() + 1)).slice(-2) + '-' +
('0' + m.getDate()).slice(-2) + ' ' +
('0' + m.getHours()).slice(-2) + ':' +
('0' + m.getMinutes()).slice(-2) + ':' +
('0' + m.getSeconds()).slice(-2)
return d
}

View file

@ -87,9 +87,13 @@ function WriteFileAsync (content, path) {
function AppendToFile (data, file) { function AppendToFile (data, file) {
CreatePath(file) CreatePath(file)
fs.appendFile(file, '\n' + data, function (err) { try {
if (err) { logger.Log('Error writing log file: ' + file + ' (sync)', logger.GetColor('redbg')) } fs.appendFileSync(file, '\n' + data)
}) } catch (e) {
logger.Log('Error appendig to file log file: ' + file + ' (sync)', logger.GetColor('redbg'))
logger.Log(data)
console.log(e)
}
} }
function Beep () { function Beep () {