mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
Remove duplicate tool redo
This commit is contained in:
parent
87df6638ed
commit
14c9df4a62
1 changed files with 172 additions and 272 deletions
|
@ -1,309 +1,209 @@
|
||||||
/* ----------------------------------------------------------------------------
|
const minpercent = 97
|
||||||
|
const resultDbFileName = 'res.json'
|
||||||
|
|
||||||
Question Server question file merger
|
// ---------------------------------------------------------------------------------------------------
|
||||||
GitLab: <https://gitlab.com/MrFry/mrfrys-node-server>
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
const utils = require('../../dist/utils/utils.js').default // eslint-disable-line
|
||||||
it under the terms of the GNU General Public License as published by
|
const logger = require('../../dist/utils/logger.js').default // eslint-disable-line
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
// TODO: merge 2 dbs
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
// TODO: filter questions out from a db based on another, producing a new one
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
const params = process.argv.splice(2)
|
||||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
------------------------------------------------------------------------- */
|
console.log('Params', params)
|
||||||
|
|
||||||
const utils = require('./utils.js')
|
const fileA = params[0]
|
||||||
const {
|
const fileB = params[1]
|
||||||
searchData,
|
|
||||||
addQuestion,
|
|
||||||
getSubjNameWithoutYear,
|
|
||||||
createQuestion,
|
|
||||||
} = require('./classes.js')
|
|
||||||
const actions = require('./actions.js')
|
|
||||||
const logger = require('./logger.js')
|
|
||||||
|
|
||||||
const resultFileName = 'res.json'
|
const dbA = utils.ReadJSON(fileA)
|
||||||
const minMatchAmmount = 100
|
const dbB = fileB ? utils.ReadJSON(fileB) : null
|
||||||
|
|
||||||
const logPath = './mergeLogs/mergelog_' + GetDateString().replace(/ /g, '_')
|
const line =
|
||||||
|
'===================================================================='
|
||||||
|
const logPath = './duplicateRemovingLog/'
|
||||||
|
utils.CreatePath(logPath)
|
||||||
|
|
||||||
Main()
|
if (!dbB) {
|
||||||
|
rmDuplicates(dbA).then((res) => {
|
||||||
async function Main() {
|
utils.WriteFile(JSON.stringify(res), resultDbFileName)
|
||||||
const params = GetParams()
|
console.log('File written')
|
||||||
console.log(params)
|
|
||||||
if (params.length === 0) {
|
|
||||||
console.error('No params! Need a path to a question database!')
|
|
||||||
process.exit()
|
|
||||||
}
|
|
||||||
const data = actions.LoadJSON(params[0])
|
|
||||||
|
|
||||||
PrintDB(data)
|
|
||||||
console.log(hr('='))
|
|
||||||
|
|
||||||
const { res, stats } = await RemoveDuplicates(data)
|
|
||||||
console.log(hr('='))
|
|
||||||
|
|
||||||
LogStats(stats, data, res)
|
|
||||||
console.log(hr('='))
|
|
||||||
|
|
||||||
console.log('Result database:')
|
|
||||||
PrintDB(res)
|
|
||||||
console.log(hr('='))
|
|
||||||
|
|
||||||
utils.WriteFile(JSON.stringify(res), resultFileName)
|
|
||||||
console.log(C('green') + resultFileName + ' written!' + C())
|
|
||||||
console.log(hr('='))
|
|
||||||
|
|
||||||
console.log(C('green') + 'Done' + C())
|
|
||||||
}
|
|
||||||
|
|
||||||
function LogStats(stats, oldData, newData) {
|
|
||||||
const maxSubjNameLength = MaxLengthOf(stats, 'name')
|
|
||||||
const maxPrevLength = MaxLengthOf(stats, 'prevQuestions')
|
|
||||||
const maxAddedLength = MaxLengthOf(stats, 'addedQuestions')
|
|
||||||
const maxRemovedLength = MaxLengthOf(stats, 'removedQuestions')
|
|
||||||
|
|
||||||
stats.forEach((currStat) => {
|
|
||||||
const { name, prevQuestions, addedQuestions, removedQuestions } = currStat
|
|
||||||
let toLog = ''
|
|
||||||
|
|
||||||
toLog += C('green')
|
|
||||||
toLog += GetExactLength(name, maxSubjNameLength)
|
|
||||||
toLog += C()
|
|
||||||
toLog += ' '
|
|
||||||
toLog += C('magenta')
|
|
||||||
toLog += GetExactLength(prevQuestions, maxPrevLength)
|
|
||||||
toLog += C()
|
|
||||||
toLog += C('cyan')
|
|
||||||
toLog += ' -> '
|
|
||||||
toLog += C()
|
|
||||||
toLog += C('green')
|
|
||||||
toLog += GetExactLength(addedQuestions, maxAddedLength)
|
|
||||||
toLog += C()
|
|
||||||
toLog += ' [ '
|
|
||||||
toLog += C('red')
|
|
||||||
toLog += GetExactLength(removedQuestions, maxRemovedLength)
|
|
||||||
toLog += C()
|
|
||||||
toLog += ' ]'
|
|
||||||
|
|
||||||
console.log(toLog)
|
|
||||||
})
|
})
|
||||||
console.log(hr())
|
} else {
|
||||||
console.log('Old data:')
|
difference({ dbA: dbA, dbB: dbB }).then((res) => {
|
||||||
LogDataCount(oldData)
|
utils.WriteFile(JSON.stringify(res), resultDbFileName)
|
||||||
console.log('New data:')
|
console.log('File written')
|
||||||
LogDataCount(newData)
|
|
||||||
}
|
|
||||||
|
|
||||||
function LogDataCount(data) {
|
|
||||||
const subjLength = data.length
|
|
||||||
const qLength = data.reduce((acc, subj) => {
|
|
||||||
return acc + subj.Questions.length
|
|
||||||
}, 0)
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
'Subjects: ' +
|
|
||||||
C('green') +
|
|
||||||
subjLength +
|
|
||||||
C() +
|
|
||||||
', Questions: ' +
|
|
||||||
C('green') +
|
|
||||||
qLength +
|
|
||||||
C()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
function PrintDB(data) {
|
|
||||||
const maxSubjNameLength = MaxLengthOf(data, 'Name')
|
|
||||||
|
|
||||||
data.forEach((subj) => {
|
|
||||||
let toLog = ''
|
|
||||||
toLog += C('green')
|
|
||||||
toLog += GetExactLength(subj.Name, maxSubjNameLength)
|
|
||||||
toLog += C()
|
|
||||||
toLog += ' [ '
|
|
||||||
toLog += C('cyan')
|
|
||||||
toLog += subj.Questions.length
|
|
||||||
toLog += C()
|
|
||||||
toLog += ' ]'
|
|
||||||
|
|
||||||
console.log(toLog)
|
|
||||||
})
|
})
|
||||||
console.log(hr())
|
|
||||||
LogDataCount(data)
|
|
||||||
console.log(hr())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function GetExactLength(string, length) {
|
async function rmDuplicates(db) {
|
||||||
let toLog = string.toString()
|
return await difference({ dbA: db })
|
||||||
const lengthDiff = length - toLog.length
|
}
|
||||||
for (let i = 0; i < lengthDiff; i++) {
|
|
||||||
toLog += ' '
|
async function difference({ dbA, dbB }) {
|
||||||
|
const doingDifference = !!dbB
|
||||||
|
// Stuff only from A
|
||||||
|
const resultDb = []
|
||||||
|
let dbLength = 0
|
||||||
|
let removedTotal = 0
|
||||||
|
|
||||||
|
iterateSubjects(dbA, () => {
|
||||||
|
dbLength++
|
||||||
|
})
|
||||||
|
|
||||||
|
const getResultDbLength = () => {
|
||||||
|
let resultDbLength = 0
|
||||||
|
iterateSubjects(resultDb, () => {
|
||||||
|
resultDbLength++
|
||||||
|
})
|
||||||
|
return resultDbLength
|
||||||
}
|
}
|
||||||
|
|
||||||
return toLog
|
for (let i = 0; i < dbA.length; i++) {
|
||||||
}
|
const subj = dbA[i]
|
||||||
|
const subjLogPath = logPath + subj.Name
|
||||||
|
utils.WriteFile('', subjLogPath)
|
||||||
|
let removedCount = 0
|
||||||
|
|
||||||
function MaxLengthOf(prop, key) {
|
hr()
|
||||||
return prop.reduce((acc, currStat) => {
|
console.log(
|
||||||
if (acc < currStat[key].toString().length) {
|
`${C('blue')}${i + 1} / ${dbA.length}: ${C('green')}${subj.Name}, ${C(
|
||||||
acc = currStat[key].toString().length
|
'blue'
|
||||||
}
|
)}${subj.Questions.length}${C('green')} questions${C()}`
|
||||||
return acc
|
)
|
||||||
}, 0)
|
console.log(
|
||||||
}
|
`${C('green')}Result length: ${C('blue')}${getResultDbLength()}${C(
|
||||||
|
'green'
|
||||||
|
)}, original length: ${C('blue')}${dbLength}${C()}`
|
||||||
|
)
|
||||||
|
|
||||||
async function RemoveDuplicates(data) {
|
|
||||||
console.log(C('yellow') + 'Removing duplicates' + C())
|
|
||||||
let res = []
|
|
||||||
const stats = []
|
|
||||||
|
|
||||||
for (let i = 0; i < data.length; i++) {
|
|
||||||
const subj = data[i]
|
|
||||||
const logFile =
|
|
||||||
logPath + '/' + subj.Name.replace(/ /g, '_').replace(/\//g, '-')
|
|
||||||
let addedQuestions = 0
|
|
||||||
let removedQuestions = 0
|
|
||||||
for (let j = 0; j < subj.Questions.length; j++) {
|
for (let j = 0; j < subj.Questions.length; j++) {
|
||||||
const question = subj.Questions[j]
|
const question = subj.Questions[j]
|
||||||
try {
|
const searchRes = await search({
|
||||||
// Searching for same question in result database
|
qdb: doingDifference ? dbB : resultDb,
|
||||||
const tempQuestion = createQuestion(
|
subjName: subj.Name,
|
||||||
question.Q,
|
question: question,
|
||||||
question.A,
|
searchInAllIfNoResult: doingDifference,
|
||||||
question.data
|
})
|
||||||
)
|
|
||||||
let result = await searchData(res, tempQuestion)
|
|
||||||
result = result.reduce((acc, res) => {
|
|
||||||
if (res.match >= minMatchAmmount) {
|
|
||||||
acc.push(res)
|
|
||||||
}
|
|
||||||
return acc
|
|
||||||
}, [])
|
|
||||||
|
|
||||||
// if htere are more that one same questions in the new database
|
printProgressBar(j + 1, subj.Questions.length)
|
||||||
if (result.length > 0) {
|
|
||||||
utils.AppendToFile(hr('#'), logFile)
|
const res = hasRequiredPercent(searchRes, minpercent)
|
||||||
utils.AppendToFile('QUESTION', logFile)
|
|
||||||
utils.AppendToFile(JSON.stringify(tempQuestion, null, 2), logFile)
|
// no result: adding to difference
|
||||||
utils.AppendToFile(hr(), logFile)
|
if (res.length === 0) {
|
||||||
utils.AppendToFile('SAMES', logFile)
|
// no result: adding to difference
|
||||||
utils.AppendToFile(JSON.stringify(result, null, 2), logFile)
|
addQuestion(resultDb, subj.Name, question)
|
||||||
removedQuestions++
|
} else {
|
||||||
} else {
|
// has result, not adding to difference
|
||||||
// if no same questions are fount then adding it to then new db
|
utils.AppendToFile(
|
||||||
addQuestion(res, getSubjNameWithoutYear(subj.Name), tempQuestion)
|
line +
|
||||||
addedQuestions++
|
'\n' +
|
||||||
}
|
line +
|
||||||
LogResultProgress(
|
'\n' +
|
||||||
subj,
|
JSON.stringify(question, null, 2) +
|
||||||
i,
|
'\n' +
|
||||||
j,
|
line +
|
||||||
subj.Questions.length,
|
JSON.stringify(res, null, 2) +
|
||||||
addedQuestions,
|
'\n',
|
||||||
removedQuestions,
|
subjLogPath
|
||||||
data.length
|
|
||||||
)
|
)
|
||||||
} catch (err) {
|
removedCount++
|
||||||
console.log()
|
removedTotal++
|
||||||
console.log('ERROR')
|
|
||||||
console.log(err)
|
|
||||||
console.log('QUESTION')
|
|
||||||
console.log(question)
|
|
||||||
console.log()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats.push({
|
console.log(
|
||||||
name: subj.Name,
|
`${C('yellow')}Removed ${C('red')}${removedCount}${C(
|
||||||
prevQuestions: subj.Questions.length,
|
'yellow'
|
||||||
addedQuestions: addedQuestions,
|
)} questions${C()}`
|
||||||
removedQuestions: removedQuestions,
|
)
|
||||||
})
|
|
||||||
}
|
}
|
||||||
return { res, stats }
|
|
||||||
}
|
|
||||||
|
|
||||||
function LogResultProgress(
|
hr()
|
||||||
subj,
|
// console.log('RESTUL', JSON.stringify(resultDb, null, 2))
|
||||||
i,
|
console.log(
|
||||||
j,
|
`Result length: ${getResultDbLength()}, original length: ${dbLength}, removed ${removedTotal} questions`
|
||||||
length,
|
|
||||||
addedQuestions,
|
|
||||||
removedQuestions,
|
|
||||||
subjCount
|
|
||||||
) {
|
|
||||||
process.stdout.write(
|
|
||||||
'[ ' +
|
|
||||||
C('cyan') +
|
|
||||||
(i + 1) +
|
|
||||||
C() +
|
|
||||||
' / ' +
|
|
||||||
C('green') +
|
|
||||||
subjCount +
|
|
||||||
C() +
|
|
||||||
' ] ' +
|
|
||||||
C('yellow') +
|
|
||||||
subj.Name +
|
|
||||||
C() +
|
|
||||||
': ' +
|
|
||||||
C('green') +
|
|
||||||
subj.Questions.length +
|
|
||||||
' ' +
|
|
||||||
C('cyan') +
|
|
||||||
'-> ' +
|
|
||||||
C('green') +
|
|
||||||
addedQuestions +
|
|
||||||
C() +
|
|
||||||
', removed: ' +
|
|
||||||
C('red') +
|
|
||||||
removedQuestions +
|
|
||||||
C()
|
|
||||||
)
|
)
|
||||||
|
return resultDb
|
||||||
|
}
|
||||||
|
|
||||||
if (j === length - 1) {
|
function hasRequiredPercent(result, minpercent) {
|
||||||
process.stdout.write('\n')
|
return result.reduce((acc, res) => {
|
||||||
} else {
|
if (res.match >= minpercent) {
|
||||||
|
acc.push(res)
|
||||||
|
}
|
||||||
|
return acc
|
||||||
|
}, [])
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
// db editing tools
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function search({ qdb, subjName, question, searchInAllIfNoResult }) {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
resolve(
|
||||||
|
doSearch(qdb, subjName, question, null, minpercent, searchInAllIfNoResult)
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function iterateSubjects(db, fn) {
|
||||||
|
db.forEach((subj) => {
|
||||||
|
subj.Questions.forEach((question) => {
|
||||||
|
fn(subj, question)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
// logging and tools
|
||||||
|
// ---------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function hr() {
|
||||||
|
let res = ''
|
||||||
|
for (let i = 0; i < process.stdout.columns; i++) {
|
||||||
|
res += '='
|
||||||
|
}
|
||||||
|
console.log(`${C('cyan')}${res}${C()}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeInSameLine(text, returnToLineStart) {
|
||||||
|
process.stdout.write(text)
|
||||||
|
if (returnToLineStart) {
|
||||||
process.stdout.write('\r')
|
process.stdout.write('\r')
|
||||||
|
} else {
|
||||||
|
process.stdout.write('\n')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function hr(char) {
|
function printProgressBar(current, total) {
|
||||||
let hr = ''
|
const width = process.stdout.columns - 25
|
||||||
const cols = process.stdout.columns || 20
|
|
||||||
for (let i = 0; i < cols; i++) {
|
if (width <= 0) {
|
||||||
hr += char || '-'
|
return
|
||||||
}
|
}
|
||||||
return hr
|
|
||||||
|
const x = width / total
|
||||||
|
const xCurrent = Math.floor(current * x)
|
||||||
|
const xTotal = Math.floor(total * x)
|
||||||
|
|
||||||
|
let line = ''
|
||||||
|
for (let i = 0; i < xCurrent; i++) {
|
||||||
|
line += '='
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < xTotal - xCurrent; i++) {
|
||||||
|
line += ' '
|
||||||
|
}
|
||||||
|
const numbers = `${current} / ${total}`
|
||||||
|
writeInSameLine(
|
||||||
|
`${C('magenta')} [${line}]${C('green')} ${numbers}${C()}`,
|
||||||
|
current !== total
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
function C(color) {
|
function C(color) {
|
||||||
return logger.C(color)
|
return logger.C(color)
|
||||||
}
|
}
|
||||||
|
|
||||||
function GetParams() {
|
|
||||||
return process.argv.splice(2)
|
|
||||||
}
|
|
||||||
|
|
||||||
function GetDateString() {
|
|
||||||
const date = new Date()
|
|
||||||
const dateString =
|
|
||||||
date.getFullYear() +
|
|
||||||
'-' +
|
|
||||||
('0' + (date.getMonth() + 1)).slice(-2) +
|
|
||||||
'-' +
|
|
||||||
('0' + date.getDate()).slice(-2) +
|
|
||||||
' ' +
|
|
||||||
('0' + date.getHours()).slice(-2) +
|
|
||||||
':' +
|
|
||||||
('0' + date.getMinutes()).slice(-2) +
|
|
||||||
':' +
|
|
||||||
('0' + date.getSeconds()).slice(-2)
|
|
||||||
return dateString
|
|
||||||
}
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue