mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
tesseract worker restart to aviod memory leak
This commit is contained in:
parent
96b413a365
commit
0259cfe1a7
3 changed files with 85 additions and 25 deletions
|
@ -287,14 +287,16 @@ function createQuestion(
|
|||
async function recognizeQuestionImage(question: Question): Promise<Question> {
|
||||
const base64Data = question.data.base64
|
||||
if (Array.isArray(base64Data) && base64Data.length) {
|
||||
try {
|
||||
const res: string[] = []
|
||||
for (let i = 0; i < base64Data.length; i++) {
|
||||
const base64 = base64Data[i]
|
||||
const text = await recognizeTextFromBase64(base64)
|
||||
const res: string[] = []
|
||||
for (let i = 0; i < base64Data.length; i++) {
|
||||
const base64 = base64Data[i]
|
||||
const text = await recognizeTextFromBase64(base64)
|
||||
if (text && text.trim()) {
|
||||
res.push(text)
|
||||
}
|
||||
}
|
||||
|
||||
if (res.length) {
|
||||
return {
|
||||
...question,
|
||||
Q: res.join(' '),
|
||||
|
@ -303,9 +305,6 @@ async function recognizeQuestionImage(question: Question): Promise<Question> {
|
|||
type: 'simple',
|
||||
},
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error happened in recognizeQuestionImage!')
|
||||
console.error(e)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -972,5 +971,4 @@ export {
|
|||
dataToString,
|
||||
doSearch,
|
||||
setNoPossibleAnswersPenalties,
|
||||
recognizeQuestionImage,
|
||||
}
|
||||
|
|
|
@ -5,8 +5,12 @@ import {
|
|||
} from 'tesseract.js'
|
||||
|
||||
import logger from './logger'
|
||||
import utils from './utils'
|
||||
import { isMainThread, workerData } from 'worker_threads'
|
||||
|
||||
let recognizeCount = 0
|
||||
const MAX_ALLOWED_RECOGNIZE_COUNT = 100
|
||||
|
||||
// https://github.com/naptha/tesseract.js/blob/master/docs/api.md
|
||||
let tesseractWorker: TesseractWorker = null
|
||||
export async function initTesseractWorker(): Promise<TesseractWorker> {
|
||||
|
@ -17,17 +21,6 @@ export async function initTesseractWorker(): Promise<TesseractWorker> {
|
|||
await worker.load()
|
||||
await worker.loadLanguage('hun+eng')
|
||||
await worker.initialize('hun+eng')
|
||||
return worker
|
||||
// await worker.terminate();
|
||||
}
|
||||
|
||||
let resolveLoaded: () => void = null
|
||||
export const tesseractLoaded: Promise<void> = new Promise((resolve) => {
|
||||
resolveLoaded = resolve
|
||||
})
|
||||
|
||||
initTesseractWorker().then((worker) => {
|
||||
tesseractWorker = worker
|
||||
|
||||
if (isMainThread) {
|
||||
logger.Log('Tesseract loaded on main thread')
|
||||
|
@ -35,14 +28,70 @@ initTesseractWorker().then((worker) => {
|
|||
const { workerIndex }: { workerIndex: number } = workerData
|
||||
logger.Log(`[THREAD #${workerIndex}]: Tesseract loaded`)
|
||||
}
|
||||
|
||||
return worker
|
||||
}
|
||||
|
||||
let resolveLoaded: () => void = null
|
||||
export let tesseractLoaded: Promise<void> = new Promise((resolve) => {
|
||||
resolveLoaded = resolve
|
||||
})
|
||||
|
||||
export async function restartTesseractWorker(): Promise<void> {
|
||||
tesseractLoaded = new Promise((resolve) => {
|
||||
resolveLoaded = resolve
|
||||
})
|
||||
|
||||
await tesseractWorker.terminate()
|
||||
tesseractWorker = await initTesseractWorker()
|
||||
resolveLoaded()
|
||||
}
|
||||
|
||||
initTesseractWorker().then((worker) => {
|
||||
tesseractWorker = worker
|
||||
resolveLoaded()
|
||||
})
|
||||
|
||||
export async function recognizeTextFromBase64(base64: string): Promise<string> {
|
||||
const {
|
||||
data: { text },
|
||||
} = await tesseractWorker.recognize(base64)
|
||||
return text
|
||||
export async function recognizeTextFromBase64(
|
||||
base64: string
|
||||
): Promise<string | null> {
|
||||
await tesseractLoaded
|
||||
try {
|
||||
// TODO: somehow integrate confidence
|
||||
const {
|
||||
data: { text /*, confidence */ },
|
||||
} = await tesseractWorker.recognize(base64)
|
||||
|
||||
recognizeCount += 1
|
||||
restartIfNecesarry()
|
||||
return text
|
||||
} catch (e) {
|
||||
logger.Log(
|
||||
'Error happened during recognizing base64 text!',
|
||||
logger.GetColor('redbg')
|
||||
)
|
||||
console.error(e)
|
||||
|
||||
await restartTesseractWorker()
|
||||
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
async function restartIfNecesarry() {
|
||||
if (recognizeCount > MAX_ALLOWED_RECOGNIZE_COUNT) {
|
||||
logger.Log('Restarting Tesseract worker')
|
||||
|
||||
const memBefore = process.memoryUsage().rss
|
||||
|
||||
await restartTesseractWorker()
|
||||
|
||||
const memAfter = process.memoryUsage().rss
|
||||
const freed = utils.formatBytes(memBefore - memAfter)
|
||||
logger.Log(`Restarted tesseract worker, freed up ${freed} memory`)
|
||||
|
||||
recognizeCount = 0
|
||||
}
|
||||
}
|
||||
|
||||
export async function terminateWorker(): Promise<void | ConfigResult> {
|
||||
|
|
|
@ -36,6 +36,7 @@ export default {
|
|||
statFile: statFile,
|
||||
renameFile: renameFile,
|
||||
deleteDir: deleteDir,
|
||||
formatBytes: formatBytes,
|
||||
}
|
||||
|
||||
import fs from 'fs'
|
||||
|
@ -296,3 +297,15 @@ function renameFile(oldPath: string, newPath: string): string {
|
|||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function formatBytes(number: number, unit: 'MB' | 'GB' = 'MB'): string {
|
||||
let res = number / 1024 / 1024 // MB
|
||||
if (unit === 'MB') {
|
||||
return `${res} MB`
|
||||
}
|
||||
res = res / 1024
|
||||
if (unit === 'GB') {
|
||||
return `${res} GB`
|
||||
}
|
||||
return `${number} byte`
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue