Updated duplicate remover script

This commit is contained in:
mrfry 2020-10-21 16:25:53 +02:00
parent 113120c00a
commit 6f142e0733

View file

@ -23,6 +23,7 @@ const {
searchData,
addQuestion,
getSubjNameWithoutYear,
createQuestion,
} = require('./classes.js')
const actions = require('./actions.js')
const logger = require('./logger.js')
@ -34,7 +35,7 @@ const logPath = './mergeLogs/mergelog_' + GetDateString().replace(/ /g, '_')
Main()
function Main() {
async function Main() {
const params = GetParams()
console.log(params)
if (params.length === 0) {
@ -46,7 +47,7 @@ function Main() {
PrintDB(data)
console.log(hr('='))
const { res, stats } = RemoveDuplicates(data)
const { res, stats } = await RemoveDuplicates(data)
console.log(hr('='))
LogStats(stats, data, res)
@ -159,56 +160,73 @@ function MaxLengthOf(prop, key) {
}, 0)
}
function RemoveDuplicates(data) {
async function RemoveDuplicates(data) {
console.log(C('yellow') + 'Removing duplicates' + C())
let res = []
const stats = []
data.forEach((subj, i) => {
for (let i = 0; i < data.length; i++) {
const subj = data[i]
const logFile =
logPath + '/' + subj.Name.replace(/ /g, '_').replace(/\//g, '-')
let addedQuestions = 0
let removedQuestions = 0
subj.Questions.forEach((question, j) => {
// Searching for same question in result database
let result = searchData(res, question).reduce((acc, res) => {
if (res.match >= minMatchAmmount) {
acc.push(res)
}
return acc
}, [])
for (let j = 0; j < subj.Questions.length; j++) {
const question = subj.Questions[j]
try {
// Searching for same question in result database
const tempQuestion = createQuestion(
question.Q,
question.A,
question.data
)
let result = await searchData(res, tempQuestion)
result = result.reduce((acc, res) => {
if (res.match >= minMatchAmmount) {
acc.push(res)
}
return acc
}, [])
// if htere are more that one same questions in the new database
if (result.length > 0) {
utils.AppendToFile(hr('#'), logFile)
utils.AppendToFile('QUESTION', logFile)
utils.AppendToFile(JSON.stringify(question, null, 2), logFile)
utils.AppendToFile(hr(), logFile)
utils.AppendToFile('SAMES', logFile)
utils.AppendToFile(JSON.stringify(result, null, 2), logFile)
removedQuestions++
} else {
// if no same questions are fount then adding it to then new db
res = addQuestion(res, getSubjNameWithoutYear(subj.Name), question)
addedQuestions++
// if htere are more that one same questions in the new database
if (result.length > 0) {
utils.AppendToFile(hr('#'), logFile)
utils.AppendToFile('QUESTION', logFile)
utils.AppendToFile(JSON.stringify(tempQuestion, null, 2), logFile)
utils.AppendToFile(hr(), logFile)
utils.AppendToFile('SAMES', logFile)
utils.AppendToFile(JSON.stringify(result, null, 2), logFile)
removedQuestions++
} else {
// if no same questions are fount then adding it to then new db
addQuestion(res, getSubjNameWithoutYear(subj.Name), tempQuestion)
addedQuestions++
}
LogResultProgress(
subj,
i,
j,
subj.Questions.length,
addedQuestions,
removedQuestions,
data.length
)
} catch (err) {
console.log()
console.log('ERROR')
console.log(err)
console.log('QUESTION')
console.log(question)
console.log()
}
LogResultProgress(
subj,
i,
j,
subj.Questions.length,
addedQuestions,
removedQuestions,
data.length
)
})
}
stats.push({
name: subj.Name,
prevQuestions: subj.Questions.length,
addedQuestions: addedQuestions,
removedQuestions: removedQuestions,
})
})
}
return { res, stats }
}