mrfrys-node-server/utils/merger.js
2019-10-05 15:46:36 +02:00

420 lines
11 KiB
JavaScript

/* ----------------------------------------------------------------------------
Question Server question file merger
GitLab: <https://gitlab.com/MrFry/question-node-server>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
------------------------------------------------------------------------- */
// TODO: handle flags
// join json datas, or raw datas
// or something else
const minMatchAmmount = 55
const minResultMatchPercent = 99
const lengthDiffMultiplier = 10
class Question {
constructor (q, a, i) {
this.Q = q
this.A = a
this.I = i
}
toString () {
var r = '?' + this.Q + '\n!' + this.A
if (this.I) { r += '\n>' + this.I }
return r
}
HasQuestion () {
return this.Q != undefined
}
HasAnswer () {
return this.A != undefined
}
HasImage () {
return this.I != undefined
}
IsComplete () {
return this.HasQuestion() && this.HasAnswer()
}
// TODO: TEST DIS
Compare (q2, i) {
if (typeof q2 === 'string') {
var qmatchpercent = Question.CompareString(this.Q, q2)
if (i == undefined || i.length == 0) { return qmatchpercent } else {
if (this.HasImage()) {
const imatchpercent = this.HasImage() ? Question.CompareString(this.I.join(' '), i.join(' '))
: 0
return (qmatchpercent + imatchpercent) / 2
} else {
qmatchpercent -= 30
if (qmatchpercent < 0) { return 0 } else { return qmatchpercent }
}
}
} else {
const qmatchpercent = Question.CompareString(this.Q, q2.Q)
const amatchpercent = Question.CompareString(this.A, q2.A)
if (this.I != undefined) {
const imatchpercent = this.I == undefined ? Question.CompareString(this.I.join(' '), q2.I.join(
' ')) : 0
return (qmatchpercent + amatchpercent + imatchpercent) / 3
} else {
return (qmatchpercent + amatchpercent) / 2
}
}
}
static CompareString (s1, s2) {
s1 = SimplifyStringForComparison(s1).split(' ')
s2 = SimplifyStringForComparison(s2).split(' ')
var match = 0
for (var i = 0; i < s1.length; i++) {
if (s2.includes(s1[i])) { match++ }
}
var percent = Math.round(((match / s1.length) * 100).toFixed(2)) // matched words percent
var lengthDifference = Math.abs(s2.length - s1.length)
percent -= lengthDifference * lengthDiffMultiplier
if (percent < 0) { percent = 0 }
return percent
}
}
class Subject {
constructor (n) {
this.Name = n
this.Questions = []
}
get length () {
return this.Questions.length
}
AddQuestion (q) {
this.Questions.push(q)
}
toString () {
var r = []
for (var i = 0; i < this.Questions.length; i++) { r.push(this.Questions[i].toString()) }
return '+' + this.Name + '\n' + r.join('\n')
}
}
class QuestionDB {
constructor () {
this.Subjects = []
}
get length () {
return this.Subjects.length
}
AddQuestion (subj, q) {
var i = 0
while (i < this.Subjects.length && this.Subjects[i].Name != subj) { i++ }
if (i < this.Subjects.length) { this.Subjects[i].AddQuestion(q) } else {
const n = new Subject(subj)
n.AddQuestion(q)
this.Subjects.push(n)
}
}
AddSubject (subj) {
var i = 0
while (i < this.length && subj.Name != this.Subjects[i].Name) { i++ }
if (i < this.length) {
this.Subjects.concat(subj.Questions)
} else {
this.Subjects.push(subj)
}
}
toString () {
var r = []
for (var i = 0; i < this.Subjects.length; i++) { r.push(this.Subjects[i].toString()) }
return r.join('\n\n')
}
}
var utils = require('./utils.js')
var actions = require('./actions.js')
Main()
function Main () {
console.clear()
const params = GetParams()
console.log(params)
var dbs = []
for (var i = 0; i < params.length; i++) {
PrintLN()
console.log(params[i] + ': ')
try {
dbs.push(ParseJSONData(utils.ReadFile(params[i])))
console.log('JSON data added')
} catch (e) {
console.log(e)
console.log('Trying with old format...')
dbs.push(ReadData(utils.ReadFile(params[i])).result)
}
}
PrintLN()
dbs.forEach((item) => {
PrintDB(item)
})
var olds = []
if (dbs.length == 1) {
for (let i = 0; i < dbs[0].length; i++) { olds.push(dbs[0].Subjects[i].length) }
}
console.log('Parsed data count: ' + dbs.length)
PrintLN()
console.log('Merging databases...')
var db = MergeDatabases(dbs)
console.log('Removing duplicates...')
var r = RemoveDuplicates(db)
console.log('RESULT:')
PrintDB(r, olds)
utils.WriteFile(JSON.stringify(r), 'newData')
console.log('File written!')
}
function PrintLN () {
console.log('------------------------------------------------------')
}
function PrintDB (r, olds) {
console.log('Data subject count: ' + r.length)
var maxLength = 0
for (var i = 0; i < r.length; i++) {
if (maxLength < r.Subjects[i].Name.length) { maxLength = r.Subjects[i].Name.length }
}
let qcount = 0
for (var i = 0; i < r.length; i++) {
let line = i
if (line < 10) { line += ' ' }
line += ': '
var currLength = line.length + maxLength + 4
line += r.Subjects[i].Name
while (line.length < currLength) {
if (i % 4 == 0) { line += '.' } else { line += ' ' }
}
if (olds && olds.length > 0) {
// TODO: check if correct row! should be now, but well...
if (olds[i] < 10) { line += ' ' }
if (olds[i] < 100) { line += ' ' }
line += olds[i]
line += ' -> '
}
if (r.Subjects[i].length < 10) { line += ' ' }
if (r.Subjects[i].length < 100) { line += ' ' }
line += r.Subjects[i].length
qcount += r.Subjects[i].length
line += ' db'
console.log(line)
}
console.log('Total questions: ' + qcount)
PrintLN()
}
function GetParams () {
return process.argv.splice(2)
}
function ParseJSONData (data) {
var d = JSON.parse(data)
var r = new QuestionDB()
var rt = []
for (var i = 0; i < d.Subjects.length; i++) {
let s = new Subject(d.Subjects[i].Name)
var j = 0
for (j = 0; j < d.Subjects[i].Questions.length; j++) {
var currQ = d.Subjects[i].Questions[j]
s.AddQuestion(new Question(currQ.Q, currQ.A, currQ.I))
}
rt.push({
name: d.Subjects[i].Name,
count: j
})
r.AddSubject(s)
}
return r
}
function MergeDatabases (dbs) {
var db = new QuestionDB()
for (var i = 0; i < dbs.length; i++) {
for (var j = 0; j < dbs[i].length; j++) { db.AddSubject(dbs[i].Subjects[j]) }
}
return db
}
/*
* Returns a question database from the given data.
* Parameter should be raw read file in string with "\n"-s
* TODO: ??? -s are not listed as errors, tho works correctly
* */
function ReadData (data) {
const d = data.split('\n')
const r = new QuestionDB()
var logs = []
var currSubj = '' // the current subjects name
var ExpectedIdentifier = ['+', '?']
let currQuestion = new Question()
var i = -1
while (i < d.length) {
let currIdentifier
let skipped = 0
do {
if (skipped >= 1) { logs.push(i + ': ' + d[i]) }
i++
if (i >= d.length) {
if (currQuestion.IsComplete()) { r.AddQuestion(currSubj, currQuestion) }
return {
result: r,
logs: logs
}
}
currIdentifier = d[i][0]
skipped++
} while (!ExpectedIdentifier.includes(currIdentifier) && i < d.length)
let currData = d[i].substring(1).trim()
if (currIdentifier == '+') {
if (currQuestion.IsComplete()) { r.AddQuestion(currSubj, currQuestion) }
currQuestion = new Question()
currSubj = currData
ExpectedIdentifier = ['?']
continue
}
if (currIdentifier == '?') {
if (currQuestion.IsComplete()) {
r.AddQuestion(currSubj, currQuestion)
currQuestion = new Question()
}
// overwriting is allowed here, bcus:
// ?????!>
currQuestion.Q = currData
ExpectedIdentifier = ['!', '?']
continue
}
if (currIdentifier == '!') {
// if dont have question continue
if (!currQuestion.HasQuestion()) { throw 'No question! (A)' }
// dont allow overwriting
// ?!!!!
if (!currQuestion.HasAnswer()) {
currData = currData.replace('A helyes válaszok: ', '')
currData = currData.replace('A helyes válasz: ', '')
currQuestion.A = currData
}
ExpectedIdentifier = ['?', '>', '+']
continue
}
if (currIdentifier == '>') {
// if dont have question or answer continue
if (!currQuestion.HasQuestion()) { throw 'No question! (I)' }
if (!currQuestion.HasAnswer()) { throw 'No asnwer! (I)' }
// dont allow overwriting
// ?!>>>
if (!currQuestion.HasImage()) {
try {
currQuestion.I = JSON.parse(currData)
} catch (e) {
currQuestion.I = currData.split(',')
}
}
ExpectedIdentifier = ['?', '+']
continue
}
}
return {
result: r,
logs: logs
}
}
function RemoveDuplicates (dataObj) {
for (var i = 0; i < dataObj.length; i++) { RemoveDuplFromSubject(dataObj.Subjects[i]) }
return dataObj
}
function RemoveDuplFromSubject (subj) {
var cp = subj.Questions
subj.Questions = []
for (var i = 0; i < cp.length; i++) {
var j = 0
// Only removes 100% match!
while (j < subj.length && cp[i].Compare(subj.Questions[j]) != 100) {
j++
}
if (j < subj.length) {
// console.log("----------------------------------------------------------");
// console.log(cp[i].toString());
// console.log(" VS ");
// console.log(subj.Questions[j].toString());
// console.log(cp[i].Compare(subj.Questions[j]));
// console.log(j);
// console.log("removed:");
// console.log(subj.Questions.splice(j, 1).toString());
// console.log("----------------------------------------------------------");
} else {
subj.AddQuestion(cp[i])
}
}
}
function SimplifyStringForComparison (value) {
value = RemoveUnnecesarySpaces(value).toLowerCase()
var removableChars = [',', '.', ':', '!']
for (var i = 0; i < removableChars.length; i++) {
var regex = new RegExp(removableChars[i], 'g')
value.replace(regex, '')
}
return value
}
function RemoveUnnecesarySpaces (toremove) {
toremove = NormalizeSpaces(toremove)
while (toremove.includes(' ')) // while the text includes double spaces replaces all of them with a single one
{
toremove = toremove.replace(/ {2}/g, ' ')
}
return toremove.trim()
}
function NormalizeSpaces (input) {
return input.replace(/\s/g, ' ')
}