added tesseract package, trying to recognize text from base64 image

This commit is contained in:
mrfry 2022-11-23 21:47:07 +01:00
parent 70e7af0ba0
commit 00ec614f1d
9 changed files with 484 additions and 284 deletions

View file

@ -21,6 +21,7 @@
import { isMainThread, parentPort, workerData } from 'worker_threads'
import { recognizeTextFromBase64, tesseractLoaded } from './tesseract'
import logger from './logger'
import {
Question,
@ -114,7 +115,10 @@ function normalizeSpaces(input: string): string {
}
function removeUnnecesarySpaces(toremove: string): string {
return normalizeSpaces(toremove).replace(/\s+/g, ' ')
return normalizeSpaces(toremove)
.replace(/\s+/g, ' ')
.replace(/(\r\n|\n|\r)/gm, '')
.trim()
}
function compareString(
@ -278,6 +282,34 @@ function createQuestion(
}
}
async function recognizeQuestionImage(question: Question): Promise<Question> {
const base64Data = question.data.base64
if (Array.isArray(base64Data) && base64Data.length) {
try {
const res: string[] = []
for (let i = 0; i < base64Data.length; i++) {
const base64 = base64Data[i]
const text = await recognizeTextFromBase64(base64)
res.push(text)
}
return {
...question,
Q: res.join(' '),
data: {
...question.data,
type: 'simple',
},
}
} catch (e) {
console.error('Error happened in recognizeQuestionImage!')
console.error(e)
}
}
return question
}
function compareImage(data: QuestionData, data2: QuestionData): number {
if (data.hashedImages && data2.hashedImages) {
return compareString(
@ -643,6 +675,10 @@ interface WorkData {
}
if (!isMainThread) {
handleWorkerData()
}
function handleWorkerData() {
const {
workerIndex,
initData,
@ -653,11 +689,12 @@ if (!isMainThread) {
`[THREAD #${workerIndex}]: Worker ${workerIndex} reporting for duty`
)
parentPort.on('message', (msg /*: TaskObject */) => {
parentPort.on('message', async (msg /*: TaskObject */) => {
await tesseractLoaded
if (msg.type === 'work') {
const {
subjName,
question,
question: originalQuestion,
searchTillMatchPercent,
searchInAllIfNoResult,
searchIn,
@ -667,6 +704,8 @@ if (!isMainThread) {
let searchResult: SearchResultQuestion[] = []
let error = false
const question = await recognizeQuestionImage(originalQuestion)
try {
qdbs.forEach((qdb) => {
if (searchIn.includes(qdb.index)) {
@ -857,8 +896,6 @@ if (!isMainThread) {
})
}
})
} else {
// console.log('[THREAD]: Main thread!')
}
export function cleanDb(
@ -917,4 +954,5 @@ export {
dataToString,
doSearch,
setNoPossibleAnswersPenalties,
recognizeQuestionImage,
}