mrfrys-node-server/utils/merger.js
YourFriendlyNeighborhoodDealer 0d8c1c154a Moving stuff around
2019-08-14 10:27:31 +02:00

452 lines
10 KiB
JavaScript

/* ----------------------------------------------------------------------------
Question Server question file merger
GitLab: <https://gitlab.com/YourFriendlyNeighborhoodDealer/question-node-server>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
------------------------------------------------------------------------- */
// TODO: handle flags
// join json datas, or raw datas
// or something else
const minMatchAmmount = 55;
const minResultMatchPercent = 99;
const lengthDiffMultiplier = 10;
class Question {
constructor(q, a, i) {
this.Q = q;
this.A = a;
this.I = i;
}
toString() {
var r = "?" + this.Q + "\n!" + this.A;
if (this.I)
r += "\n>" + this.I;
return r;
}
HasQuestion() {
return this.Q != undefined;
}
HasAnswer() {
return this.A != undefined;
}
HasImage() {
return this.I != undefined;
}
IsComplete() {
return this.HasQuestion() && this.HasAnswer();
}
// TODO: TEST DIS
Compare(q2, i) {
if (typeof q2 == 'string') {
var qmatchpercent = Question.CompareString(this.Q, q2);
if (i == undefined || i.length == 0)
return qmatchpercent;
else {
if (this.HasImage()) {
const imatchpercent = this.HasImage() ? Question.CompareString(this.I.join(" "), i.join(" ")) :
0;
return (qmatchpercent + imatchpercent) / 2;
} else {
qmatchpercent -= 30;
if (qmatchpercent < 0)
return 0;
else
return qmatchpercent;
}
}
} else {
const qmatchpercent = Question.CompareString(this.Q, q2.Q);
const amatchpercent = Question.CompareString(this.A, q2.A);
if (this.I != undefined) {
const imatchpercent = this.I == undefined ? Question.CompareString(this.I.join(" "), q2.I.join(
" ")) : 0;
return (qmatchpercent + amatchpercent + imatchpercent) / 3;
} else {
return (qmatchpercent + amatchpercent) / 2;
}
}
}
static CompareString(s1, s2) {
s1 = SimplifyStringForComparison(s1).split(" ");
s2 = SimplifyStringForComparison(s2).split(" ");
var match = 0;
for (var i = 0; i < s1.length; i++)
if (s2.includes(s1[i]))
match++;
var percent = Math.round(((match / s1.length) * 100).toFixed(2)); // matched words percent
var lengthDifference = Math.abs(s2.length - s1.length);
percent -= lengthDifference * lengthDiffMultiplier;
if (percent < 0)
percent = 0;
return percent;
}
}
class Subject {
constructor(n) {
this.Name = n;
this.Questions = [];
}
get length() {
return this.Questions.length;
}
AddQuestion(q) {
this.Questions.push(q);
}
toString() {
var r = [];
for (var i = 0; i < this.Questions.length; i++)
r.push(this.Questions[i].toString());
return "+" + this.Name + "\n" + r.join("\n");
}
}
class QuestionDB {
constructor() {
this.Subjects = [];
}
get length() {
return this.Subjects.length;
}
AddQuestion(subj, q) {
var i = 0;
while (i < this.Subjects.length && this.Subjects[i].Name != subj)
i++;
if (i < this.Subjects.length)
this.Subjects[i].AddQuestion(q);
else {
const n = new Subject(subj);
n.AddQuestion(q);
this.Subjects.push(n);
}
}
AddSubject(subj) {
var i = 0;
while (i < this.length && subj.Name != this.Subjects[i].Name)
i++;
if (i < this.length) {
this.Subjects.concat(subj.Questions);
} else {
this.Subjects.push(subj);
}
}
toString() {
var r = [];
for (var i = 0; i < this.Subjects.length; i++)
r.push(this.Subjects[i].toString());
return r.join("\n\n");
}
}
var utils = require('./utils.js');
var actions = require('./actions.js');
Main();
function Main() {
console.clear();
const params = GetParams();
console.log(params);
var dbs = [];
for (var i = 0; i < params.length; i++) {
PrintLN();
console.log(params[i] + ": ");
try {
dbs.push(ParseJSONData(utils.ReadFile(params[i])));
console.log("JSON data added");
} catch (e) {
console.log(e);
console.log("Trying with old format...");
dbs.push(ReadData(utils.ReadFile(params[i])).result);
}
}
PrintLN();
dbs.forEach((item) => {
PrintDB(item);
});
var olds = [];
if (dbs.length == 1) {
for ( let i = 0; i < dbs[0].length; i++)
olds.push(dbs[0].Subjects[i].length);
}
console.log("Parsed data count: " + dbs.length);
PrintLN();
console.log("Merging databases...");
var db = MergeDatabases(dbs);
console.log("Removing duplicates...");
var r = RemoveDuplicates(db);
console.log("RESULT:");
PrintDB(r, olds);
utils.WriteFile(JSON.stringify(r), "newData");
console.log("File written!");
}
function PrintLN() {
console.log("------------------------------------------------------");
}
function PrintDB(r, olds) {
console.log("Data subject count: " + r.length);
var maxLength = 0;
for (var i = 0; i < r.length; i++) {
if (maxLength < r.Subjects[i].Name.length)
maxLength = r.Subjects[i].Name.length;
}
let qcount = 0;
for (var i = 0; i < r.length; i++) {
let line = i;
if (line < 10)
line += ' ';
line += ": ";
var currLength = line.length + maxLength + 4;
line += r.Subjects[i].Name;
while (line.length < currLength) {
if (i % 4 == 0)
line += ".";
else
line += " ";
}
if (olds && olds.length > 0) {
// TODO: check if correct row! should be now, but well...
if (olds[i] < 10)
line += " ";
if (olds[i] < 100)
line += " ";
line += olds[i];
line += " -> ";
}
if (r.Subjects[i].length < 10)
line += " ";
if (r.Subjects[i].length < 100)
line += " ";
line += r.Subjects[i].length;
qcount += r.Subjects[i].length;
line += " db";
console.log(line);
}
console.log("Total questions: " + qcount);
PrintLN();
}
function GetParams() {
return process.argv.splice(2);
}
function ParseJSONData(data) {
var d = JSON.parse(data);
var r = new QuestionDB();
var rt = [];
for (var i = 0; i < d.Subjects.length; i++) {
let s = new Subject(d.Subjects[i].Name);
var j = 0;
for (j = 0; j < d.Subjects[i].Questions.length; j++) {
var currQ = d.Subjects[i].Questions[j];
s.AddQuestion(new Question(currQ.Q, currQ.A, currQ.I));
}
rt.push({
name: d.Subjects[i].Name,
count: j
});
r.AddSubject(s);
}
return r;
}
function MergeDatabases(dbs) {
var db = new QuestionDB();
for (var i = 0; i < dbs.length; i++)
for (var j = 0; j < dbs[i].length; j++)
db.AddSubject(dbs[i].Subjects[j]);
return db;
}
/*
* Returns a question database from the given data.
* Parameter should be raw read file in string with "\n"-s
* TODO: ??? -s are not listed as errors, tho works correctly
* */
function ReadData(data) {
const d = data.split("\n");
const r = new QuestionDB();
var logs = [];
var currSubj = ""; // the current subjects name
var ExpectedIdentifier = ['+', '?'];
let currQuestion = new Question();
var i = -1;
while (i < d.length) {
let currIdentifier;
let skipped = 0;
do {
if (skipped >= 1)
logs.push(i + ": " + d[i]);
i++;
if (i >= d.length) {
if (currQuestion.IsComplete())
r.AddQuestion(currSubj, currQuestion);
return {
result: r,
logs: logs
};
}
currIdentifier = d[i][0];
skipped++;
} while (!ExpectedIdentifier.includes(currIdentifier) && i < d.length);
let currData = d[i].substring(1).trim();
if (currIdentifier == '+') {
if (currQuestion.IsComplete())
r.AddQuestion(currSubj, currQuestion);
currQuestion = new Question();
currSubj = currData;
ExpectedIdentifier = ['?'];
continue;
}
if (currIdentifier == '?') {
if (currQuestion.IsComplete()) {
r.AddQuestion(currSubj, currQuestion);
currQuestion = new Question();
}
// overwriting is allowed here, bcus:
// ?????!>
currQuestion.Q = currData;
ExpectedIdentifier = ['!', '?'];
continue;
}
if (currIdentifier == '!') {
// if dont have question continue
if (!currQuestion.HasQuestion())
throw "No question! (A)";
// dont allow overwriting
// ?!!!!
if (!currQuestion.HasAnswer()) {
currData = currData.replace("A helyes válaszok: ", "");
currData = currData.replace("A helyes válasz: ", "");
currQuestion.A = currData;
}
ExpectedIdentifier = ['?', '>', '+'];
continue;
}
if (currIdentifier == '>') {
// if dont have question or answer continue
if (!currQuestion.HasQuestion())
throw "No question! (I)";
if (!currQuestion.HasAnswer())
throw "No asnwer! (I)";
// dont allow overwriting
// ?!>>>
if (!currQuestion.HasImage()) {
try {
currQuestion.I = JSON.parse(currData);
} catch (e) {
currQuestion.I = currData.split(',');
}
}
ExpectedIdentifier = ['?', '+'];
continue;
}
}
return {
result: r,
logs: logs
};
}
function RemoveDuplicates(dataObj) {
for (var i = 0; i < dataObj.length; i++)
RemoveDuplFromSubject(dataObj.Subjects[i]);
return dataObj;
}
function RemoveDuplFromSubject(subj) {
var cp = subj.Questions;
subj.Questions = [];
for (var i = 0; i < cp.length; i++) {
var j = 0;
// Only removes 100% match!
while (j < subj.length && cp[i].Compare(subj.Questions[j]) != 100) {
j++;
}
if (j < subj.length) {
//console.log("----------------------------------------------------------");
//console.log(cp[i].toString());
//console.log(" VS ");
//console.log(subj.Questions[j].toString());
//console.log(cp[i].Compare(subj.Questions[j]));
//console.log(j);
//console.log("removed:");
//console.log(subj.Questions.splice(j, 1).toString());
//console.log("----------------------------------------------------------");
} else {
subj.AddQuestion(cp[i]);
}
}
}
function SimplifyStringForComparison(value) {
value = RemoveUnnecesarySpaces(value).toLowerCase();
var removableChars = [",", ".", ":", "!"];
for (var i = 0; i < removableChars.length; i++) {
var regex = new RegExp(removableChars[i], "g");
value.replace(regex, "");
}
return value;
}
function RemoveUnnecesarySpaces(toremove) {
toremove = NormalizeSpaces(toremove);
while (toremove.includes(" ")) // while the text includes double spaces replaces all of them with a single one
{
toremove = toremove.replace(/ /g, " ");
}
return toremove.trim();
}
function NormalizeSpaces(input) {
return input.replace(/\s/g, ' ');
}