mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
500 lines
14 KiB
JavaScript
500 lines
14 KiB
JavaScript
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
|
|
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
|
|
const {
|
|
addQuestion,
|
|
doSearch,
|
|
compareQuestionObj,
|
|
createQuestion,
|
|
} = require('../../dist/utils/classes.js') // eslint-disable-line
|
|
const { loadData, writeData } = require('../../dist/utils/actions.js') // eslint-disable-line
|
|
const fs = require('fs') // eslint-disable-line
|
|
|
|
// Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ]
|
|
// load: 2.767ms
|
|
// ==============================================================================================
|
|
// 1 / 1: Elektronika, 826 questions
|
|
// Result length: 0, original length: 826
|
|
// [=====================================================================] 826 / 826
|
|
// Removed 19 questions
|
|
// ==============================================================================================
|
|
// Result length: 807, original length: 826, removed 19 questions
|
|
// rmduplicates: 9.527s
|
|
// File written
|
|
|
|
// load: 15.91ms
|
|
// Removing duplicate questions from publicDirs/qminingPublic/questionDbs/elektro.json
|
|
// ==============================================================================================
|
|
// 1 / 1: Elektronika, 826 questions
|
|
// Result length: 0, original length: 826
|
|
// [=====================================================================] 826 / 826
|
|
// Removed 10 questions
|
|
// ==============================================================================================
|
|
// Result length: 816, original length: 826, removed 10 questions
|
|
// rmduplicates: 488.853ms
|
|
// File written
|
|
|
|
const minpercent = 95
|
|
const line =
|
|
'===================================================================='
|
|
const logPath = './duplicateRemovingLog/'
|
|
const globalLog = './duplicateRemovingLog/log'
|
|
utils.CreatePath(logPath)
|
|
utils.WriteFile('', globalLog)
|
|
|
|
// ----------------------------------------------
|
|
let currentMaxIndex = -1
|
|
let currentIndex = -1
|
|
process.on('message', function () {
|
|
process.send({
|
|
currentMaxIndex: currentMaxIndex,
|
|
currentIndex: currentIndex,
|
|
})
|
|
})
|
|
// ----------------------------------------------
|
|
|
|
let params = process.argv.splice(2)
|
|
let silenced = false
|
|
if (params.includes('-s')) {
|
|
silenced = true
|
|
}
|
|
params = params.filter((x) => {
|
|
return !x.startsWith('-')
|
|
})
|
|
console.log(params)
|
|
if (params.length === 0) {
|
|
console.log('At least 1 parameter required (path to DB)')
|
|
process.exit(1)
|
|
}
|
|
|
|
const pathA = params[0]
|
|
const pathB = params[1]
|
|
|
|
const stat = fs.lstatSync(pathA)
|
|
if (stat.isDirectory()) {
|
|
if (pathB) {
|
|
log(
|
|
`Clearing possible questions from ${C(
|
|
'green'
|
|
)}${pathA}${C()} based on ${C('green')}${pathB}${C()} db`
|
|
)
|
|
const db = pathB ? loadData(pathB) : null
|
|
|
|
clearPossibleAnswers(pathA, db)
|
|
|
|
log(
|
|
`Cleared possible questions from ${C(
|
|
'green'
|
|
)}${pathA}${C()} based on ${C('green')}${pathB}${C()} db`
|
|
)
|
|
} else {
|
|
log(
|
|
`Removing possible question duplicates from ${C(
|
|
'green'
|
|
)}${pathA}${C()}`
|
|
)
|
|
removePossibleAnswersDuplicates(pathA)
|
|
log(
|
|
`Removed possible question duplicates from ${C(
|
|
'green'
|
|
)}${pathA}${C()}`
|
|
)
|
|
}
|
|
} else {
|
|
console.time('load')
|
|
const dbA = loadData(pathA)
|
|
const dbB = pathB ? loadData(pathB) : null
|
|
console.timeEnd('load')
|
|
|
|
console.time('rmduplicates')
|
|
|
|
if (!dbB) {
|
|
log(`Removing duplicate questions from ${C('green')}${pathA}${C()}`)
|
|
const resultDbFileName = pathA.split('/')[pathA.split('/').length - 1]
|
|
const res = rmDuplicates(dbA)
|
|
console.timeEnd('rmduplicates')
|
|
writeData(res, resultDbFileName + '.res')
|
|
log('File written')
|
|
log(`Removed duplicate questions from ${C('green')}${pathA}${C()}`)
|
|
} else {
|
|
log(
|
|
`Removing questions found in ${C('green')}${pathB}${C()} from ${C(
|
|
'green'
|
|
)}${pathA}${C()}`
|
|
)
|
|
const res = difference({ dbA: dbA, dbB: dbB })
|
|
console.timeEnd('rmduplicates')
|
|
const resultDbFileName = pathA.split('/')[pathA.split('/').length - 1]
|
|
writeData(res, resultDbFileName + '.res')
|
|
log('File written')
|
|
log(
|
|
`Removed questions found in ${C('green')}${pathB}${C()} from ${C(
|
|
'green'
|
|
)}${pathA}${C()}`
|
|
)
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------
|
|
// possible answers duplicate removing
|
|
// ---------------------------------------------------------------------------------
|
|
|
|
// TODO: dont check every file, only check per directorires
|
|
// only compare questions of same subjects
|
|
function removePossibleAnswersDuplicates(path) {
|
|
const dirs = fs.readdirSync(path)
|
|
let count = 0
|
|
let currIndex = 1
|
|
let delets = 0
|
|
|
|
iterateDir(path, () => {
|
|
count++
|
|
})
|
|
|
|
dirs.forEach((currDir) => {
|
|
const contents = fs.readdirSync(path + '/' + currDir)
|
|
|
|
contents.forEach((currFile) => {
|
|
const currPath = path + '/' + currDir + '/' + currFile
|
|
if (currPath.includes('savedQuestions.json')) {
|
|
return
|
|
}
|
|
if (!utils.FileExists(currPath)) {
|
|
return
|
|
}
|
|
const dataA = utils.ReadJSON(currPath)
|
|
|
|
currIndex++
|
|
printProgressBar(currIndex, count - 1)
|
|
|
|
contents.forEach((currFile2) => {
|
|
const currPath2 = path + '/' + currDir + '/' + currFile2
|
|
if (currPath2.includes('savedQuestions.json')) {
|
|
return
|
|
}
|
|
if (!utils.FileExists(currPath2)) {
|
|
return
|
|
}
|
|
if (currPath === currPath2) {
|
|
return
|
|
}
|
|
const dataB = utils.ReadJSON(currPath2)
|
|
|
|
dataA.questions.forEach((q1) => {
|
|
dataB.questions.some((q2) => {
|
|
const percent = compareQuestionObj(
|
|
createQuestion(q1),
|
|
'',
|
|
createQuestion(q2),
|
|
''
|
|
)
|
|
if (percent.avg === 100) {
|
|
utils.deleteFile(currPath2)
|
|
count--
|
|
delets++
|
|
return true
|
|
}
|
|
})
|
|
})
|
|
})
|
|
})
|
|
})
|
|
|
|
log(`${C('green')}Deleting empty directories ...${C()}`)
|
|
count = dirs.length
|
|
currIndex = 0
|
|
let deletedDirCount = 0
|
|
dirs.forEach((dir) => {
|
|
currIndex++
|
|
const currDirContent = fs.readdirSync(path + '/' + dir)
|
|
if (currDirContent.length === 0) {
|
|
fs.rmdirSync(path + '/' + dir)
|
|
deletedDirCount++
|
|
}
|
|
printProgressBar(currIndex, count)
|
|
})
|
|
|
|
log(`${C('green')}Updating savedQuestions.json ...${C()}`)
|
|
count = dirs.length
|
|
currIndex = 0
|
|
dirs.forEach((dir) => {
|
|
currIndex++
|
|
updateSavedQuestionsFile(path + '/' + dir)
|
|
printProgressBar(currIndex, count)
|
|
})
|
|
|
|
log(
|
|
`Deleted ${C('green')}${delets}${C()} files, and ${C(
|
|
'green'
|
|
)}${deletedDirCount}${C()} directories`
|
|
)
|
|
}
|
|
|
|
function clearPossibleAnswers(path, db) {
|
|
let count = 0
|
|
let currIndex = 1
|
|
let delets = 0
|
|
iterateDir(path, () => {
|
|
count++
|
|
})
|
|
|
|
iterateDir(path, (currPath) => {
|
|
currIndex++
|
|
if (currPath.includes('savedQuestions.json')) {
|
|
return
|
|
}
|
|
const { subj, questions } = utils.ReadJSON(currPath)
|
|
|
|
questions.forEach((question) => {
|
|
const searchRes = search({
|
|
qdb: db,
|
|
subjName: subj,
|
|
question: question,
|
|
searchTillMatchPercent: 80,
|
|
})
|
|
if (searchRes.length > 0) {
|
|
utils.deleteFile(currPath)
|
|
delets++
|
|
}
|
|
})
|
|
printProgressBar(currIndex, count)
|
|
})
|
|
log(`Deleted ${C('green')}${delets}${C()} files`)
|
|
}
|
|
|
|
function updateSavedQuestionsFile(path) {
|
|
const filePath = path + '/' + 'savedQuestions.json'
|
|
if (!utils.FileExists(filePath)) {
|
|
log(`${filePath} does not exists!`)
|
|
return
|
|
}
|
|
|
|
const savedQuestions = utils.ReadJSON(filePath)
|
|
const filtered = savedQuestions.filter((sq) => {
|
|
return utils.FileExists(path + '/' + sq.fname)
|
|
})
|
|
|
|
if (savedQuestions.length !== filtered.length) {
|
|
utils.WriteFile(JSON.stringify(filtered), filePath)
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------
|
|
// difference
|
|
// ---------------------------------------------------------------------------------
|
|
|
|
function rmDuplicates(db) {
|
|
return difference({ dbA: db })
|
|
}
|
|
|
|
function difference({ dbA, dbB }) {
|
|
const doingDifference = !!dbB
|
|
// Stuff only from A
|
|
const resultDb = []
|
|
let dbLength = 0
|
|
let removedTotal = 0
|
|
let processedQuestions = 0
|
|
|
|
iterateSubjects(dbA, () => {
|
|
dbLength++
|
|
})
|
|
currentMaxIndex = dbLength
|
|
|
|
const getResultDbLength = () => {
|
|
let resultDbLength = 0
|
|
iterateSubjects(resultDb, () => {
|
|
resultDbLength++
|
|
})
|
|
return resultDbLength
|
|
}
|
|
|
|
for (let i = 0; i < dbA.length; i++) {
|
|
const subj = dbA[i]
|
|
const subjLogPath = logPath + subj.Name
|
|
utils.WriteFile('', subjLogPath)
|
|
let removedCount = 0
|
|
|
|
hr()
|
|
log(
|
|
`${C('blue')}${i + 1} / ${dbA.length}: ${C('green')}${
|
|
subj.Name
|
|
}, ${C('blue')}${subj.Questions.length}${C(
|
|
'green'
|
|
)} questions${C()}`
|
|
)
|
|
|
|
printProgressBar(i + 1, dbA.length)
|
|
for (let j = 0; j < subj.Questions.length; j++) {
|
|
const question = subj.Questions[j]
|
|
const searchRes = search({
|
|
qdb: doingDifference ? dbB : resultDb,
|
|
subjName: subj.Name,
|
|
question: question,
|
|
searchInAllIfNoResult: doingDifference,
|
|
searchTillMatchPercent: minpercent,
|
|
})
|
|
|
|
printProgressBar(processedQuestions, dbLength)
|
|
processedQuestions++
|
|
currentIndex = processedQuestions
|
|
|
|
const res = hasRequiredPercent(searchRes, minpercent)
|
|
|
|
// no result: adding to difference
|
|
if (res.length === 0) {
|
|
// no result: adding to difference
|
|
addQuestion(resultDb, subj.Name, question)
|
|
} else {
|
|
// has result, not adding to difference
|
|
utils.AppendToFile(
|
|
line +
|
|
'\n' +
|
|
line +
|
|
'\n' +
|
|
JSON.stringify(question, null, 2) +
|
|
'\n' +
|
|
line +
|
|
JSON.stringify(res, null, 2) +
|
|
'\n',
|
|
subjLogPath
|
|
)
|
|
removedCount++
|
|
removedTotal++
|
|
}
|
|
}
|
|
log(
|
|
`${C('yellow')}Removed ${C('red')}${removedCount}${C(
|
|
'yellow'
|
|
)} questions${C()}`
|
|
)
|
|
}
|
|
|
|
hr()
|
|
log(
|
|
`Result length: ${getResultDbLength()}, original length: ${dbLength}, removed ${removedTotal} questions`
|
|
)
|
|
return resultDb
|
|
}
|
|
|
|
function hasRequiredPercent(result, minpercent) {
|
|
return result.reduce((acc, res) => {
|
|
if (res.match >= minpercent) {
|
|
acc.push(res)
|
|
}
|
|
return acc
|
|
}, [])
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------
|
|
// db editing tools
|
|
// ---------------------------------------------------------------------------------
|
|
|
|
function search({ qdb, subjName, question, searchInAllIfNoResult }) {
|
|
return doSearch(
|
|
qdb,
|
|
subjName,
|
|
question,
|
|
null,
|
|
minpercent,
|
|
searchInAllIfNoResult
|
|
)
|
|
}
|
|
|
|
function iterateSubjects(db, fn) {
|
|
db.forEach((subj) => {
|
|
subj.Questions.forEach((question) => {
|
|
fn(subj, question)
|
|
})
|
|
})
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------
|
|
// possible answers tools
|
|
// ---------------------------------------------------------------------------------
|
|
|
|
function iterateDir(path, action) {
|
|
if (!utils.FileExists(path)) {
|
|
return
|
|
}
|
|
|
|
const stat = fs.lstatSync(path)
|
|
if (stat.isDirectory()) {
|
|
const content = fs.readdirSync(path)
|
|
content.forEach((currContent) => {
|
|
iterateDir(`${path}/${currContent}`, action)
|
|
})
|
|
} else {
|
|
action(path)
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------------
|
|
// logging and tools
|
|
// ---------------------------------------------------------------------------------
|
|
|
|
function hr() {
|
|
let res = ''
|
|
for (let i = 0; i < process.stdout.columns; i++) {
|
|
res += '='
|
|
}
|
|
log(`${C('cyan')}${res}${C()}`)
|
|
}
|
|
|
|
function log(text) {
|
|
utils.AppendToFile(text, globalLog)
|
|
if (silenced) return
|
|
if (process.stdout.isTTY) {
|
|
process.stdout.clearLine()
|
|
process.stdout.cursorTo(0)
|
|
}
|
|
|
|
console.log(text)
|
|
}
|
|
|
|
function writeInSameLine(text, returnToLineStart) {
|
|
if (!process.stdout.isTTY) {
|
|
return
|
|
}
|
|
process.stdout.clearLine()
|
|
process.stdout.cursorTo(0)
|
|
process.stdout.write(text)
|
|
if (returnToLineStart) {
|
|
process.stdout.write('\r')
|
|
} else {
|
|
process.stdout.write('\n')
|
|
}
|
|
}
|
|
|
|
function printProgressBar(current, total) {
|
|
if (!process.stdout.isTTY || silenced) {
|
|
return
|
|
}
|
|
const width = process.stdout.columns - 30
|
|
|
|
if (width <= 0) {
|
|
return
|
|
}
|
|
|
|
const x = width / total
|
|
const xCurrent = Math.floor(current * x)
|
|
const xTotal = Math.floor(total * x)
|
|
|
|
let line = ''
|
|
for (let i = 0; i < xCurrent; i++) {
|
|
line += '='
|
|
}
|
|
|
|
for (let i = 0; i < xTotal - xCurrent; i++) {
|
|
line += ' '
|
|
}
|
|
const numbers = `${current} / ${total}`
|
|
writeInSameLine(
|
|
`${C('magenta')} [${line}]${C('green')} ${numbers}${C()}`,
|
|
current !== total
|
|
)
|
|
}
|
|
|
|
function C(color) {
|
|
return logger.C(color)
|
|
}
|
|
|
|
process.exit()
|