added tesseract package, trying to recognize text from base64 image

This commit is contained in:
mrfry 2022-11-23 21:47:07 +01:00
parent 70e7af0ba0
commit 00ec614f1d
9 changed files with 484 additions and 284 deletions

53
src/utils/tesseract.ts Normal file
View file

@ -0,0 +1,53 @@
import {
createWorker,
Worker as TesseractWorker,
ConfigResult,
} from 'tesseract.js'
import logger from './logger'
import { isMainThread, workerData } from 'worker_threads'
// https://github.com/naptha/tesseract.js/blob/master/docs/api.md
let tesseractWorker: TesseractWorker = null
export async function initTesseractWorker(): Promise<TesseractWorker> {
const worker = createWorker({
cacheMethod: 'refresh',
// logger: (m) => console.log(m),
})
await worker.load()
await worker.loadLanguage('hun+eng')
await worker.initialize('hun+eng')
return worker
// await worker.terminate();
}
let resolveLoaded: () => void = null
export const tesseractLoaded: Promise<void> = new Promise((resolve) => {
resolveLoaded = resolve
})
initTesseractWorker().then((worker) => {
tesseractWorker = worker
if (isMainThread) {
logger.Log('Tesseract loaded on main thread')
} else {
const { workerIndex }: { workerIndex: number } = workerData
logger.Log(`[THREAD #${workerIndex}]: Tesseract loaded`)
}
resolveLoaded()
})
export async function recognizeTextFromBase64(base64: string): Promise<string> {
const {
data: { text },
} = await tesseractWorker.recognize(base64)
return text
}
export async function terminateWorker(): Promise<void | ConfigResult> {
if (tesseractWorker) {
return tesseractWorker.terminate()
}
return
}