import { createWorker, Worker as TesseractWorker, ConfigResult, } from 'tesseract.js' import logger from './logger' import utils from './utils' import { isMainThread, workerData } from 'worker_threads' let recognizeCount = 0 const MAX_ALLOWED_RECOGNIZE_COUNT = 3000 // ~ 500 MB // https://github.com/naptha/tesseract.js/blob/master/docs/api.md let tesseractWorker: TesseractWorker = null export async function initTesseractWorker(): Promise { const worker = await createWorker({ cacheMethod: 'refresh', // logger: (m) => console.log(m), }) await worker.loadLanguage('hun+eng') await worker.initialize('hun+eng') if (isMainThread) { logger.Log('Tesseract loaded on main thread') } else { const { workerIndex }: { workerIndex: number } = workerData logger.Log(`[THREAD #${workerIndex}]: Tesseract loaded`) } return worker } let resolveLoaded: () => void = null export let tesseractLoaded: Promise = new Promise((resolve) => { resolveLoaded = resolve }) export async function restartTesseractWorker(): Promise { tesseractLoaded = new Promise((resolve) => { resolveLoaded = resolve }) await tesseractWorker.terminate() tesseractWorker = await initTesseractWorker() resolveLoaded() } initTesseractWorker().then((worker) => { tesseractWorker = worker resolveLoaded() }) export async function recognizeTextFromBase64( base64: string ): Promise { await tesseractLoaded try { // TODO: somehow integrate confidence const { data: { text /*, confidence */ }, } = await tesseractWorker.recognize(base64) recognizeCount += 1 restartIfNecesarry() return text } catch (e) { logger.Log( 'Error happened during recognizing base64 text!', logger.GetColor('redbg') ) console.error(e) await restartTesseractWorker() return null } } async function restartIfNecesarry() { if (recognizeCount > MAX_ALLOWED_RECOGNIZE_COUNT) { logger.Log('Restarting Tesseract worker') const memBefore = process.memoryUsage().rss await restartTesseractWorker() const memAfter = process.memoryUsage().rss const freed = utils.formatBytes(memBefore - memAfter) logger.Log(`Restarted tesseract worker, freed up ${freed} memory`) recognizeCount = 0 } } export async function terminateWorker(): Promise { if (tesseractWorker) { return tesseractWorker.terminate() } return }