mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
added tesseract package, trying to recognize text from base64 image
This commit is contained in:
parent
70e7af0ba0
commit
00ec614f1d
9 changed files with 484 additions and 284 deletions
|
@ -21,6 +21,7 @@
|
|||
|
||||
import { isMainThread, parentPort, workerData } from 'worker_threads'
|
||||
|
||||
import { recognizeTextFromBase64, tesseractLoaded } from './tesseract'
|
||||
import logger from './logger'
|
||||
import {
|
||||
Question,
|
||||
|
@ -114,7 +115,10 @@ function normalizeSpaces(input: string): string {
|
|||
}
|
||||
|
||||
function removeUnnecesarySpaces(toremove: string): string {
|
||||
return normalizeSpaces(toremove).replace(/\s+/g, ' ')
|
||||
return normalizeSpaces(toremove)
|
||||
.replace(/\s+/g, ' ')
|
||||
.replace(/(\r\n|\n|\r)/gm, '')
|
||||
.trim()
|
||||
}
|
||||
|
||||
function compareString(
|
||||
|
@ -278,6 +282,34 @@ function createQuestion(
|
|||
}
|
||||
}
|
||||
|
||||
async function recognizeQuestionImage(question: Question): Promise<Question> {
|
||||
const base64Data = question.data.base64
|
||||
if (Array.isArray(base64Data) && base64Data.length) {
|
||||
try {
|
||||
const res: string[] = []
|
||||
for (let i = 0; i < base64Data.length; i++) {
|
||||
const base64 = base64Data[i]
|
||||
const text = await recognizeTextFromBase64(base64)
|
||||
res.push(text)
|
||||
}
|
||||
|
||||
return {
|
||||
...question,
|
||||
Q: res.join(' '),
|
||||
data: {
|
||||
...question.data,
|
||||
type: 'simple',
|
||||
},
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error happened in recognizeQuestionImage!')
|
||||
console.error(e)
|
||||
}
|
||||
}
|
||||
|
||||
return question
|
||||
}
|
||||
|
||||
function compareImage(data: QuestionData, data2: QuestionData): number {
|
||||
if (data.hashedImages && data2.hashedImages) {
|
||||
return compareString(
|
||||
|
@ -643,6 +675,10 @@ interface WorkData {
|
|||
}
|
||||
|
||||
if (!isMainThread) {
|
||||
handleWorkerData()
|
||||
}
|
||||
|
||||
function handleWorkerData() {
|
||||
const {
|
||||
workerIndex,
|
||||
initData,
|
||||
|
@ -653,11 +689,12 @@ if (!isMainThread) {
|
|||
`[THREAD #${workerIndex}]: Worker ${workerIndex} reporting for duty`
|
||||
)
|
||||
|
||||
parentPort.on('message', (msg /*: TaskObject */) => {
|
||||
parentPort.on('message', async (msg /*: TaskObject */) => {
|
||||
await tesseractLoaded
|
||||
if (msg.type === 'work') {
|
||||
const {
|
||||
subjName,
|
||||
question,
|
||||
question: originalQuestion,
|
||||
searchTillMatchPercent,
|
||||
searchInAllIfNoResult,
|
||||
searchIn,
|
||||
|
@ -667,6 +704,8 @@ if (!isMainThread) {
|
|||
let searchResult: SearchResultQuestion[] = []
|
||||
let error = false
|
||||
|
||||
const question = await recognizeQuestionImage(originalQuestion)
|
||||
|
||||
try {
|
||||
qdbs.forEach((qdb) => {
|
||||
if (searchIn.includes(qdb.index)) {
|
||||
|
@ -857,8 +896,6 @@ if (!isMainThread) {
|
|||
})
|
||||
}
|
||||
})
|
||||
} else {
|
||||
// console.log('[THREAD]: Main thread!')
|
||||
}
|
||||
|
||||
export function cleanDb(
|
||||
|
@ -917,4 +954,5 @@ export {
|
|||
dataToString,
|
||||
doSearch,
|
||||
setNoPossibleAnswersPenalties,
|
||||
recognizeQuestionImage,
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue