mrfrys-node-server/utils/dataUpdater.js
2020-01-22 17:16:11 +01:00

637 lines
15 KiB
JavaScript
Executable file

/* ----------------------------------------------------------------------------
Question Server question file merger
GitLab: <https://gitlab.com/MrFry/mrfrys-node-server>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
------------------------------------------------------------------------- */
const commonUselessAnswerParts = [
'A helyes válasz az ',
'A helyes válasz a ',
'A helyes válaszok: ',
'A helyes válaszok:',
'A helyes válasz: ',
'A helyes válasz:',
'The correct answer is:',
'\''
]
const commonUselessStringParts = [',', '\\.', ':', '!', '\\+']
const specialChars = [ '&', '\\+' ]
const lengthDiffMultiplier = 10 /* Percent minus for length difference */
const minMatchAmmount = 60 /* Minimum ammount to consider that two questions match during answering */
const assert = (val) => {
if (!val) { throw new Error('Assertion failed') }
}
class StringUtils {
RemoveStuff (value, removableStrings, toReplace) {
removableStrings.forEach((x) => {
var regex = new RegExp(x, 'g')
value = value.replace(regex, toReplace || '')
})
return value
}
SimplifyQuery (q) {
assert(q)
var result = q.replace(/\n/g, ' ').replace(/\s/g, ' ')
return this.RemoveUnnecesarySpaces(result)
}
ShortenString (toShorten, ammount) {
assert(toShorten)
var result = ''
var i = 0
while (i < toShorten.length && i < ammount) {
result += toShorten[i]
i++
}
return result
}
ReplaceCharsWithSpace (val, char) {
assert(val)
assert(char)
var toremove = this.NormalizeSpaces(val)
var regex = new RegExp(char, 'g')
toremove = toremove.replace(regex, ' ')
return this.RemoveUnnecesarySpaces(toremove)
}
// removes whitespace from begining and and, and replaces multiple spaces with one space
RemoveUnnecesarySpaces (toremove) {
assert(toremove)
toremove = this.NormalizeSpaces(toremove)
while (toremove.includes(' ')) {
toremove = toremove.replace(/ {2}/g, ' ')
}
return toremove.trim()
}
// simplifies a string for easier comparison
SimplifyStringForComparison (value) {
assert(value)
value = this.RemoveUnnecesarySpaces(value).toLowerCase()
return this.RemoveStuff(value, commonUselessStringParts)
}
RemoveSpecialChars (value) {
assert(value)
return this.RemoveStuff(value, specialChars, ' ')
}
// if the value is empty, or whitespace
EmptyOrWhiteSpace (value) {
// replaces /n-s with "". then replaces spaces with "". if it equals "", then its empty, or only consists of white space
if (value === undefined) { return true }
return (value.replace(/\n/g, '').replace(/ /g, '').replace(/\s/g, ' ') === '')
}
// damn nonbreaking space
NormalizeSpaces (input) {
assert(input)
return input.replace(/\s/g, ' ')
}
CompareString (s1, s2) {
if (!s1 || !s2) {
return 0
}
s1 = this.SimplifyStringForComparison(s1).split(' ')
s2 = this.SimplifyStringForComparison(s2).split(' ')
var match = 0
for (var i = 0; i < s1.length; i++) {
if (s2.includes(s1[i])) { match++ }
}
var percent = Math.round(((match / s1.length) * 100).toFixed(2)) // matched words percent
var lengthDifference = Math.abs(s2.length - s1.length)
percent -= lengthDifference * lengthDiffMultiplier
if (percent < 0) { percent = 0 }
return percent
}
AnswerPreProcessor (value) {
assert(value)
return this.RemoveStuff(
value, commonUselessAnswerParts)
}
// 'a. pécsi sör' -> 'pécsi sör'
RemoveAnswerLetters (value) {
assert(value)
let s = value.split('. ')
if (s[0].length < 2 && s.length > 1) {
s.shift()
return s.join(' ')
} else {
return value
}
}
SimplifyQA (value, mods) {
if (!value) { return }
const reducer = (res, fn) => {
return fn(res)
}
return mods.reduce(reducer, value)
}
SimplifyAnswer (value) {
return this.SimplifyQA(
value,
[
this.RemoveSpecialChars.bind(this),
this.RemoveUnnecesarySpaces.bind(this),
this.AnswerPreProcessor.bind(this),
this.RemoveAnswerLetters.bind(this)
])
}
SimplifyQuestion (value) {
return this.SimplifyQA(
value,
[
this.RemoveSpecialChars.bind(this),
this.RemoveUnnecesarySpaces.bind(this)
])
}
SimplifyStack (stack) {
return this.SimplifyQuery(stack)
}
}
const SUtils = new StringUtils()
class Question {
constructor (q, a, i) {
this.Q = SUtils.SimplifyQuestion(q)
this.A = SUtils.SimplifyAnswer(a)
this.I = i
}
toString () {
var r = '?' + this.Q + '\n!' + this.A
if (this.I) { r += '\n>' + this.I }
return r
}
HasQuestion () {
return this.Q !== undefined
}
HasAnswer () {
return this.A !== undefined
}
HasImage () {
return this.I !== undefined && (typeof this.I === 'string' || Array.isArray(this.I))
}
IsComplete () {
return this.HasQuestion() && this.HasAnswer()
}
Compare (q2, i) {
assert(q2)
if (typeof q2 === 'string') {
var qmatchpercent = SUtils.CompareString(this.Q, q2)
if (i === undefined || i.length === 0) { return qmatchpercent } else {
if (this.HasImage()) {
const imatchpercent = this.HasImage() ? SUtils.CompareString(this.I.join(' '), i.join(' '))
: 0
return (qmatchpercent + imatchpercent) / 2
} else {
qmatchpercent -= 30
if (qmatchpercent < 0) { return 0 } else { return qmatchpercent }
}
}
} else {
const qmatchpercent = SUtils.CompareString(this.Q, q2.Q)
const amatchpercent = SUtils.CompareString(this.A, q2.A)
if (this.I !== undefined) {
const imatchpercent = this.I === undefined ? SUtils.CompareString(this.I.join(' '), q2.I.join(
' ')) : 0
return (qmatchpercent + amatchpercent + imatchpercent) / 3
} else {
return (qmatchpercent + amatchpercent) / 2
}
}
}
}
class Subject {
constructor (n) {
assert(n)
this.Name = n
this.Questions = []
this.active = false
}
setIndex (i) {
this.index = i
}
getIndex () {
return this.index || -1
}
get length () {
return this.Questions.length
}
markActive () {
this.active = true
}
getIfActive () {
return this.active
}
AddQuestion (q) {
assert(q)
this.Questions.push(q)
}
getSubjNameWithoutYear () {
let t = this.Name.split(' - ')
if (t[0].match(/^[0-9]{4}\/[0-9]{2}\/[0-9]{1}$/i)) {
return t[1] || ''
} else {
return ''
}
}
getYear () {
let t = this.Name.split(' - ')[0]
if (t.match(/^[0-9]{4}\/[0-9]{2}\/[0-9]{1}$/i)) {
return t
} else {
return ''
}
}
Search (q, img) {
assert(q)
var r = []
for (let i = 0; i < this.length; i++) {
let percent = this.Questions[i].Compare(q, img)
if (percent > minMatchAmmount) {
r.push({
q: this.Questions[i],
match: percent
})
}
}
for (let i = 0; i < r.length; i++) {
for (var j = i; j < r.length; j++) {
if (r[i].match < r[j].match) {
var tmp = r[i]
r[i] = r[j]
r[j] = tmp
}
}
}
return r
}
toString () {
var r = []
for (var i = 0; i < this.Questions.length; i++) { r.push(this.Questions[i].toString()) }
return '+' + this.Name + '\n' + r.join('\n')
}
}
class QuestionDB {
constructor (getVal, setVal) {
this.Subjects = []
this.getVal = getVal
this.setVal = setVal
}
get length () {
return this.Subjects.length
}
get activeIndexes () {
var r = []
for (var i = 0; i < this.length; i++) {
if (this.getVal('Is' + i + 'Active')) {
r.push(i)
}
}
return r
}
GetIfActive (ind) {
return this.getVal('Is' + ind + 'Active')
}
ChangeActive (i, value) {
this.setVal('Is' + i + 'Active', !!value)
}
AddQuestion (subj, q) {
assert(subj)
var i = 0
while (i < this.Subjects.length && this.Subjects[i].Name !== subj) { i++ }
if (i < this.Subjects.length) { this.Subjects[i].AddQuestion(q) } else {
const n = new Subject(subj)
n.AddQuestion(q)
this.Subjects.push(n)
}
}
Search (q, img) {
assert(q)
var r = []
for (let i = 0; i < this.length; i++) {
if (this.GetIfActive(i)) { r = r.concat(this.Subjects[i].Search(q, img)) }
}
for (let i = 0; i < r.length; i++) {
for (var j = i; j < r.length; j++) {
if (r[i].match < r[j].match) {
var tmp = r[i]
r[i] = r[j]
r[j] = tmp
}
}
}
return r
}
AddSubject (subj) {
assert(subj)
var i = 0
while (i < this.length && subj.Name !== this.Subjects[i].Name) { i++ }
if (i < this.length) {
this.Subjects.concat(subj.Questions)
} else {
this.Subjects.push(subj)
}
}
toString () {
var r = []
for (var i = 0; i < this.Subjects.length; i++) { r.push(this.Subjects[i].toString()) }
return r.join('\n\n')
}
}
module.exports.StringUtils = StringUtils
module.exports.Question = Question
module.exports.Subject = Subject
module.exports.QuestionDB = QuestionDB
const utils = require('./utils.js')
Main()
function Main () {
console.clear()
const params = GetParams()
console.log(params)
var db = []
PrintLN()
console.log(params[0] + ': ')
try {
db = ParseJSONData(utils.ReadFile(params[0]))
console.log('JSON data added')
} catch (e) {
console.log(e)
console.log('Trying with old format...')
db = ReadData(utils.ReadFile(params[0])).result
}
PrintLN()
PrintDB(db)
PrintLN()
RefactorDb(db)
utils.WriteFile(JSON.stringify(db), 'newData')
}
function RefactorDb (db) {
db.Subjects.forEach((subj) => {
subj.Questions.forEach((question) => {
if (question.I) {
question.data = {
type: 'image',
images: typeof question.I === 'string' ? JSON.parse(question.I) : question.I
}
delete question.I
} else {
question.data = {
type: 'simple'
}
}
})
})
}
// -----------------------------------------------------------------------------------------
function PrintLN () {
console.log('------------------------------------------------------')
}
function PrintDB (r, olds) {
console.log('Data subject count: ' + r.length)
var maxLength = 0
for (let i = 0; i < r.length; i++) {
if (maxLength < r.Subjects[i].Name.length) { maxLength = r.Subjects[i].Name.length }
}
let qcount = 0
for (let i = 0; i < r.length; i++) {
let line = i
if (line < 10) { line += ' ' }
line += ': '
var currLength = line.length + maxLength + 4
line += r.Subjects[i].Name
while (line.length < currLength) {
if (i % 4 === 0) { line += '.' } else { line += ' ' }
}
if (olds && olds.length > 0) {
// TODO: check if correct row! should be now, but well...
if (olds[i] < 10) { line += ' ' }
if (olds[i] < 100) { line += ' ' }
line += olds[i]
line += ' -> '
}
if (r.Subjects[i].length < 10) { line += ' ' }
if (r.Subjects[i].length < 100) { line += ' ' }
line += r.Subjects[i].length
qcount += r.Subjects[i].length
line += ' db'
console.log(line)
}
console.log('Total questions: ' + qcount)
PrintLN()
}
function GetParams () {
return process.argv.splice(2)
}
function ParseJSONData (data) {
var d = JSON.parse(data)
var r = new QuestionDB((x) => true, (x, y) => console.log(x, y))
var rt = []
for (var i = 0; i < d.Subjects.length; i++) {
let s = new Subject(d.Subjects[i].Name)
var j = 0
for (j = 0; j < d.Subjects[i].Questions.length; j++) {
var currQ = d.Subjects[i].Questions[j]
s.AddQuestion(new Question(currQ.Q, currQ.A, currQ.I))
}
rt.push({
name: d.Subjects[i].Name,
count: j
})
r.AddSubject(s)
}
return r
}
function ReadData (data) {
const d = data.split('\n')
const r = new QuestionDB((x) => true, (x, y) => console.log(x, y))
var logs = []
var currSubj = '' // the current subjects name
var ExpectedIdentifier = ['+', '?']
let currQuestion = new Question()
var i = -1
while (i < d.length) {
let currIdentifier
let skipped = 0
do {
if (skipped >= 1) { logs.push(i + ': ' + d[i]) }
i++
if (i >= d.length) {
if (currQuestion.IsComplete()) { r.AddQuestion(currSubj, currQuestion) }
return {
result: r,
logs: logs
}
}
currIdentifier = d[i][0]
skipped++
} while (!ExpectedIdentifier.includes(currIdentifier) && i < d.length)
let currData = d[i].substring(1).trim()
if (currIdentifier === '+') {
if (currQuestion.IsComplete()) { r.AddQuestion(currSubj, currQuestion) }
currQuestion = new Question()
currSubj = currData
ExpectedIdentifier = ['?']
continue
}
if (currIdentifier === '?') {
if (currQuestion.IsComplete()) {
r.AddQuestion(currSubj, currQuestion)
currQuestion = new Question()
}
// overwriting is allowed here, bcus:
// ?????!>
currQuestion.Q = currData
ExpectedIdentifier = ['!', '?']
continue
}
if (currIdentifier === '!') {
// if dont have question continue
if (!currQuestion.HasQuestion()) { throw new Error('No question! (A)') }
// dont allow overwriting
// ?!!!!
if (!currQuestion.HasAnswer()) {
currData = currData.replace('A helyes válaszok: ', '')
currData = currData.replace('A helyes válasz: ', '')
currQuestion.A = currData
}
ExpectedIdentifier = ['?', '>', '+']
continue
}
if (currIdentifier === '>') {
// if dont have question or answer continue
if (!currQuestion.HasQuestion()) { throw new Error('No question! (I)') }
if (!currQuestion.HasAnswer()) { throw new Error('No asnwer! (I)') }
// dont allow overwriting
// ?!>>>
if (!currQuestion.HasImage()) {
try {
currQuestion.I = JSON.parse(currData)
} catch (e) {
currQuestion.I = currData.split(',')
}
}
ExpectedIdentifier = ['?', '+']
continue
}
}
return {
result: r,
logs: logs
}
}