Updated duplicate remover script

This commit is contained in:
mrfry 2020-10-21 16:25:53 +02:00
parent 113120c00a
commit 6f142e0733

View file

@ -23,6 +23,7 @@ const {
searchData, searchData,
addQuestion, addQuestion,
getSubjNameWithoutYear, getSubjNameWithoutYear,
createQuestion,
} = require('./classes.js') } = require('./classes.js')
const actions = require('./actions.js') const actions = require('./actions.js')
const logger = require('./logger.js') const logger = require('./logger.js')
@ -34,7 +35,7 @@ const logPath = './mergeLogs/mergelog_' + GetDateString().replace(/ /g, '_')
Main() Main()
function Main() { async function Main() {
const params = GetParams() const params = GetParams()
console.log(params) console.log(params)
if (params.length === 0) { if (params.length === 0) {
@ -46,7 +47,7 @@ function Main() {
PrintDB(data) PrintDB(data)
console.log(hr('=')) console.log(hr('='))
const { res, stats } = RemoveDuplicates(data) const { res, stats } = await RemoveDuplicates(data)
console.log(hr('=')) console.log(hr('='))
LogStats(stats, data, res) LogStats(stats, data, res)
@ -159,56 +160,73 @@ function MaxLengthOf(prop, key) {
}, 0) }, 0)
} }
function RemoveDuplicates(data) { async function RemoveDuplicates(data) {
console.log(C('yellow') + 'Removing duplicates' + C()) console.log(C('yellow') + 'Removing duplicates' + C())
let res = [] let res = []
const stats = [] const stats = []
data.forEach((subj, i) => { for (let i = 0; i < data.length; i++) {
const subj = data[i]
const logFile = const logFile =
logPath + '/' + subj.Name.replace(/ /g, '_').replace(/\//g, '-') logPath + '/' + subj.Name.replace(/ /g, '_').replace(/\//g, '-')
let addedQuestions = 0 let addedQuestions = 0
let removedQuestions = 0 let removedQuestions = 0
subj.Questions.forEach((question, j) => { for (let j = 0; j < subj.Questions.length; j++) {
// Searching for same question in result database const question = subj.Questions[j]
let result = searchData(res, question).reduce((acc, res) => { try {
if (res.match >= minMatchAmmount) { // Searching for same question in result database
acc.push(res) const tempQuestion = createQuestion(
} question.Q,
return acc question.A,
}, []) question.data
)
let result = await searchData(res, tempQuestion)
result = result.reduce((acc, res) => {
if (res.match >= minMatchAmmount) {
acc.push(res)
}
return acc
}, [])
// if htere are more that one same questions in the new database // if htere are more that one same questions in the new database
if (result.length > 0) { if (result.length > 0) {
utils.AppendToFile(hr('#'), logFile) utils.AppendToFile(hr('#'), logFile)
utils.AppendToFile('QUESTION', logFile) utils.AppendToFile('QUESTION', logFile)
utils.AppendToFile(JSON.stringify(question, null, 2), logFile) utils.AppendToFile(JSON.stringify(tempQuestion, null, 2), logFile)
utils.AppendToFile(hr(), logFile) utils.AppendToFile(hr(), logFile)
utils.AppendToFile('SAMES', logFile) utils.AppendToFile('SAMES', logFile)
utils.AppendToFile(JSON.stringify(result, null, 2), logFile) utils.AppendToFile(JSON.stringify(result, null, 2), logFile)
removedQuestions++ removedQuestions++
} else { } else {
// if no same questions are fount then adding it to then new db // if no same questions are fount then adding it to then new db
res = addQuestion(res, getSubjNameWithoutYear(subj.Name), question) addQuestion(res, getSubjNameWithoutYear(subj.Name), tempQuestion)
addedQuestions++ addedQuestions++
}
LogResultProgress(
subj,
i,
j,
subj.Questions.length,
addedQuestions,
removedQuestions,
data.length
)
} catch (err) {
console.log()
console.log('ERROR')
console.log(err)
console.log('QUESTION')
console.log(question)
console.log()
} }
LogResultProgress( }
subj,
i,
j,
subj.Questions.length,
addedQuestions,
removedQuestions,
data.length
)
})
stats.push({ stats.push({
name: subj.Name, name: subj.Name,
prevQuestions: subj.Questions.length, prevQuestions: subj.Questions.length,
addedQuestions: addedQuestions, addedQuestions: addedQuestions,
removedQuestions: removedQuestions, removedQuestions: removedQuestions,
}) })
}) }
return { res, stats } return { res, stats }
} }