diff --git a/src/standaloneUtils/rmDuplicates.js b/src/standaloneUtils/rmDuplicates.js index 6258993..7631204 100644 --- a/src/standaloneUtils/rmDuplicates.js +++ b/src/standaloneUtils/rmDuplicates.js @@ -3,6 +3,28 @@ const logger = require('../../dist/utils/logger.js').default // eslint-disable-l const { addQuestion, doSearch } = require('../../dist/utils/classes.js') // eslint-disable-line const { loadData } = require('../../dist/utils/actions.js') // eslint-disable-line +// Params [ 'publicDirs/qminingPublic/questionDbs/elektro.json' ] +// ============================================================================================== +// 1 / 1: Elektronika, 826 questions +// Result length: 0, original length: 826 +// [=====================================================================] 826 / 826 +// Removed 11 questions +// ============================================================================================== +// Result length: 815, original length: 826, removed 11 questions +// File written + +// load: 15.605ms +// Removing duplicate questions from publicDirs/qminingPublic/questionDbs/elektro.json +// ============================================================================================== +// 1 / 1: Elektronika, 826 questions +// Result length: 0, original length: 826 +// [=====================================================================] 826 / 826 +// Removed 160 questions +// ============================================================================================== +// Result length: 666, original length: 826, removed 160 questions +// rmduplicates: 569.828ms +// File written + const minpercent = 95 const resultDbFileName = 'res.json' const line = diff --git a/src/types/basicTypes.ts b/src/types/basicTypes.ts index 5346df2..585db75 100644 --- a/src/types/basicTypes.ts +++ b/src/types/basicTypes.ts @@ -11,8 +11,8 @@ export interface Question { A: string data: QuestionData cache?: { - Q: string - A: string + Q: Array + A: Array } } diff --git a/src/utils/classes.ts b/src/utils/classes.ts index 6ca9edd..a637034 100755 --- a/src/utils/classes.ts +++ b/src/utils/classes.ts @@ -84,16 +84,24 @@ function removeUnnecesarySpaces(toremove: string) { return normalizeSpaces(toremove).replace(/\s+/g, ' ') } -function compareString(s1, s2) { - if (!s1 || !s2) { - if (!s1 && !s2) { +function compareString( + s1: string, + s1a: Array, + s2: string, + s2a: Array +): number { + if (s1 === s2) { + return 100 + } + if (!s1a || !s2a) { + if (!s1a && !s2a) { return 100 } else { return 0 } } - if (s1.length < 0 || s2.length < 0) { - if (s1.length === 0 && s2.length === 0) { + if (s1a.length < 0 || s2a.length < 0) { + if (s1a.length === 0 && s2a.length === 0) { return 100 } else { return 0 @@ -101,15 +109,15 @@ function compareString(s1, s2) { } let match = 0 - let lastMatchIndex = -1 + let lastMatchIndex = -2 let i = 0 - while (i < s1.length) { + while (i < s1a.length) { if (match / i < magicNumber) { break } - const currMatchIndex = s2.indexOf(s1[i]) + const currMatchIndex = s2a.indexOf(s1a[i]) if (lastMatchIndex < currMatchIndex) { match++ lastMatchIndex = currMatchIndex @@ -118,8 +126,8 @@ function compareString(s1, s2) { i++ } - let percent = Math.round(parseFloat(((match / s1.length) * 100).toFixed(2))) - const lengthDifference = Math.abs(s2.length - s1.length) + let percent = Math.round(parseFloat(((match / s1a.length) * 100).toFixed(2))) + const lengthDifference = Math.abs(s2a.length - s1a.length) percent -= lengthDifference * lengthDiffMultiplier if (percent < 0) { percent = 0 @@ -230,10 +238,19 @@ function compareImage(data: QuestionData, data2: QuestionData) { if (data.hashedImages && data2.hashedImages) { return compareString( data.hashedImages.join(' '), - data.hashedImages.join(' ') + data.hashedImages, + data2.hashedImages.join(' '), + data2.hashedImages ) } else if (data.images && data2.images) { - return compareString(data.images.join(' '), data2.images.join(' ')) - 10 + return ( + compareString( + data.images.join(' '), + data.images, + data2.images.join(' '), + data2.images + ) - 10 + ) } else { return 0 } @@ -268,11 +285,11 @@ function compareData(q1: Question, q2: Question) { } function compareQuestion(q1: Question, q2: Question) { - return compareString(q1.cache.Q, q2.cache.Q) + return compareString(q1.Q, q1.cache.Q, q2.Q, q2.cache.Q) } function compareAnswer(q1: Question, q2: Question) { - return compareString(q1.cache.A, q2.cache.A) + return compareString(q1.A, q1.cache.A, q2.A, q2.cache.A) } function compareQuestionObj(