mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
tesseract worker restart to aviod memory leak
This commit is contained in:
parent
96b413a365
commit
0259cfe1a7
3 changed files with 85 additions and 25 deletions
|
@ -5,8 +5,12 @@ import {
|
|||
} from 'tesseract.js'
|
||||
|
||||
import logger from './logger'
|
||||
import utils from './utils'
|
||||
import { isMainThread, workerData } from 'worker_threads'
|
||||
|
||||
let recognizeCount = 0
|
||||
const MAX_ALLOWED_RECOGNIZE_COUNT = 100
|
||||
|
||||
// https://github.com/naptha/tesseract.js/blob/master/docs/api.md
|
||||
let tesseractWorker: TesseractWorker = null
|
||||
export async function initTesseractWorker(): Promise<TesseractWorker> {
|
||||
|
@ -17,17 +21,6 @@ export async function initTesseractWorker(): Promise<TesseractWorker> {
|
|||
await worker.load()
|
||||
await worker.loadLanguage('hun+eng')
|
||||
await worker.initialize('hun+eng')
|
||||
return worker
|
||||
// await worker.terminate();
|
||||
}
|
||||
|
||||
let resolveLoaded: () => void = null
|
||||
export const tesseractLoaded: Promise<void> = new Promise((resolve) => {
|
||||
resolveLoaded = resolve
|
||||
})
|
||||
|
||||
initTesseractWorker().then((worker) => {
|
||||
tesseractWorker = worker
|
||||
|
||||
if (isMainThread) {
|
||||
logger.Log('Tesseract loaded on main thread')
|
||||
|
@ -35,14 +28,70 @@ initTesseractWorker().then((worker) => {
|
|||
const { workerIndex }: { workerIndex: number } = workerData
|
||||
logger.Log(`[THREAD #${workerIndex}]: Tesseract loaded`)
|
||||
}
|
||||
|
||||
return worker
|
||||
}
|
||||
|
||||
let resolveLoaded: () => void = null
|
||||
export let tesseractLoaded: Promise<void> = new Promise((resolve) => {
|
||||
resolveLoaded = resolve
|
||||
})
|
||||
|
||||
export async function restartTesseractWorker(): Promise<void> {
|
||||
tesseractLoaded = new Promise((resolve) => {
|
||||
resolveLoaded = resolve
|
||||
})
|
||||
|
||||
await tesseractWorker.terminate()
|
||||
tesseractWorker = await initTesseractWorker()
|
||||
resolveLoaded()
|
||||
}
|
||||
|
||||
initTesseractWorker().then((worker) => {
|
||||
tesseractWorker = worker
|
||||
resolveLoaded()
|
||||
})
|
||||
|
||||
export async function recognizeTextFromBase64(base64: string): Promise<string> {
|
||||
const {
|
||||
data: { text },
|
||||
} = await tesseractWorker.recognize(base64)
|
||||
return text
|
||||
export async function recognizeTextFromBase64(
|
||||
base64: string
|
||||
): Promise<string | null> {
|
||||
await tesseractLoaded
|
||||
try {
|
||||
// TODO: somehow integrate confidence
|
||||
const {
|
||||
data: { text /*, confidence */ },
|
||||
} = await tesseractWorker.recognize(base64)
|
||||
|
||||
recognizeCount += 1
|
||||
restartIfNecesarry()
|
||||
return text
|
||||
} catch (e) {
|
||||
logger.Log(
|
||||
'Error happened during recognizing base64 text!',
|
||||
logger.GetColor('redbg')
|
||||
)
|
||||
console.error(e)
|
||||
|
||||
await restartTesseractWorker()
|
||||
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
async function restartIfNecesarry() {
|
||||
if (recognizeCount > MAX_ALLOWED_RECOGNIZE_COUNT) {
|
||||
logger.Log('Restarting Tesseract worker')
|
||||
|
||||
const memBefore = process.memoryUsage().rss
|
||||
|
||||
await restartTesseractWorker()
|
||||
|
||||
const memAfter = process.memoryUsage().rss
|
||||
const freed = utils.formatBytes(memBefore - memAfter)
|
||||
logger.Log(`Restarted tesseract worker, freed up ${freed} memory`)
|
||||
|
||||
recognizeCount = 0
|
||||
}
|
||||
}
|
||||
|
||||
export async function terminateWorker(): Promise<void | ConfigResult> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue