mirror of
https://gitlab.com/MrFry/mrfrys-node-server
synced 2025-04-01 20:24:18 +02:00
tesseract worker restart to aviod memory leak
This commit is contained in:
parent
96b413a365
commit
0259cfe1a7
3 changed files with 85 additions and 25 deletions
|
@ -287,14 +287,16 @@ function createQuestion(
|
||||||
async function recognizeQuestionImage(question: Question): Promise<Question> {
|
async function recognizeQuestionImage(question: Question): Promise<Question> {
|
||||||
const base64Data = question.data.base64
|
const base64Data = question.data.base64
|
||||||
if (Array.isArray(base64Data) && base64Data.length) {
|
if (Array.isArray(base64Data) && base64Data.length) {
|
||||||
try {
|
|
||||||
const res: string[] = []
|
const res: string[] = []
|
||||||
for (let i = 0; i < base64Data.length; i++) {
|
for (let i = 0; i < base64Data.length; i++) {
|
||||||
const base64 = base64Data[i]
|
const base64 = base64Data[i]
|
||||||
const text = await recognizeTextFromBase64(base64)
|
const text = await recognizeTextFromBase64(base64)
|
||||||
|
if (text && text.trim()) {
|
||||||
res.push(text)
|
res.push(text)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res.length) {
|
||||||
return {
|
return {
|
||||||
...question,
|
...question,
|
||||||
Q: res.join(' '),
|
Q: res.join(' '),
|
||||||
|
@ -303,9 +305,6 @@ async function recognizeQuestionImage(question: Question): Promise<Question> {
|
||||||
type: 'simple',
|
type: 'simple',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
} catch (e) {
|
|
||||||
console.error('Error happened in recognizeQuestionImage!')
|
|
||||||
console.error(e)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -972,5 +971,4 @@ export {
|
||||||
dataToString,
|
dataToString,
|
||||||
doSearch,
|
doSearch,
|
||||||
setNoPossibleAnswersPenalties,
|
setNoPossibleAnswersPenalties,
|
||||||
recognizeQuestionImage,
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,8 +5,12 @@ import {
|
||||||
} from 'tesseract.js'
|
} from 'tesseract.js'
|
||||||
|
|
||||||
import logger from './logger'
|
import logger from './logger'
|
||||||
|
import utils from './utils'
|
||||||
import { isMainThread, workerData } from 'worker_threads'
|
import { isMainThread, workerData } from 'worker_threads'
|
||||||
|
|
||||||
|
let recognizeCount = 0
|
||||||
|
const MAX_ALLOWED_RECOGNIZE_COUNT = 100
|
||||||
|
|
||||||
// https://github.com/naptha/tesseract.js/blob/master/docs/api.md
|
// https://github.com/naptha/tesseract.js/blob/master/docs/api.md
|
||||||
let tesseractWorker: TesseractWorker = null
|
let tesseractWorker: TesseractWorker = null
|
||||||
export async function initTesseractWorker(): Promise<TesseractWorker> {
|
export async function initTesseractWorker(): Promise<TesseractWorker> {
|
||||||
|
@ -17,17 +21,6 @@ export async function initTesseractWorker(): Promise<TesseractWorker> {
|
||||||
await worker.load()
|
await worker.load()
|
||||||
await worker.loadLanguage('hun+eng')
|
await worker.loadLanguage('hun+eng')
|
||||||
await worker.initialize('hun+eng')
|
await worker.initialize('hun+eng')
|
||||||
return worker
|
|
||||||
// await worker.terminate();
|
|
||||||
}
|
|
||||||
|
|
||||||
let resolveLoaded: () => void = null
|
|
||||||
export const tesseractLoaded: Promise<void> = new Promise((resolve) => {
|
|
||||||
resolveLoaded = resolve
|
|
||||||
})
|
|
||||||
|
|
||||||
initTesseractWorker().then((worker) => {
|
|
||||||
tesseractWorker = worker
|
|
||||||
|
|
||||||
if (isMainThread) {
|
if (isMainThread) {
|
||||||
logger.Log('Tesseract loaded on main thread')
|
logger.Log('Tesseract loaded on main thread')
|
||||||
|
@ -35,14 +28,70 @@ initTesseractWorker().then((worker) => {
|
||||||
const { workerIndex }: { workerIndex: number } = workerData
|
const { workerIndex }: { workerIndex: number } = workerData
|
||||||
logger.Log(`[THREAD #${workerIndex}]: Tesseract loaded`)
|
logger.Log(`[THREAD #${workerIndex}]: Tesseract loaded`)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return worker
|
||||||
|
}
|
||||||
|
|
||||||
|
let resolveLoaded: () => void = null
|
||||||
|
export let tesseractLoaded: Promise<void> = new Promise((resolve) => {
|
||||||
|
resolveLoaded = resolve
|
||||||
|
})
|
||||||
|
|
||||||
|
export async function restartTesseractWorker(): Promise<void> {
|
||||||
|
tesseractLoaded = new Promise((resolve) => {
|
||||||
|
resolveLoaded = resolve
|
||||||
|
})
|
||||||
|
|
||||||
|
await tesseractWorker.terminate()
|
||||||
|
tesseractWorker = await initTesseractWorker()
|
||||||
|
resolveLoaded()
|
||||||
|
}
|
||||||
|
|
||||||
|
initTesseractWorker().then((worker) => {
|
||||||
|
tesseractWorker = worker
|
||||||
resolveLoaded()
|
resolveLoaded()
|
||||||
})
|
})
|
||||||
|
|
||||||
export async function recognizeTextFromBase64(base64: string): Promise<string> {
|
export async function recognizeTextFromBase64(
|
||||||
|
base64: string
|
||||||
|
): Promise<string | null> {
|
||||||
|
await tesseractLoaded
|
||||||
|
try {
|
||||||
|
// TODO: somehow integrate confidence
|
||||||
const {
|
const {
|
||||||
data: { text },
|
data: { text /*, confidence */ },
|
||||||
} = await tesseractWorker.recognize(base64)
|
} = await tesseractWorker.recognize(base64)
|
||||||
|
|
||||||
|
recognizeCount += 1
|
||||||
|
restartIfNecesarry()
|
||||||
return text
|
return text
|
||||||
|
} catch (e) {
|
||||||
|
logger.Log(
|
||||||
|
'Error happened during recognizing base64 text!',
|
||||||
|
logger.GetColor('redbg')
|
||||||
|
)
|
||||||
|
console.error(e)
|
||||||
|
|
||||||
|
await restartTesseractWorker()
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function restartIfNecesarry() {
|
||||||
|
if (recognizeCount > MAX_ALLOWED_RECOGNIZE_COUNT) {
|
||||||
|
logger.Log('Restarting Tesseract worker')
|
||||||
|
|
||||||
|
const memBefore = process.memoryUsage().rss
|
||||||
|
|
||||||
|
await restartTesseractWorker()
|
||||||
|
|
||||||
|
const memAfter = process.memoryUsage().rss
|
||||||
|
const freed = utils.formatBytes(memBefore - memAfter)
|
||||||
|
logger.Log(`Restarted tesseract worker, freed up ${freed} memory`)
|
||||||
|
|
||||||
|
recognizeCount = 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function terminateWorker(): Promise<void | ConfigResult> {
|
export async function terminateWorker(): Promise<void | ConfigResult> {
|
||||||
|
|
|
@ -36,6 +36,7 @@ export default {
|
||||||
statFile: statFile,
|
statFile: statFile,
|
||||||
renameFile: renameFile,
|
renameFile: renameFile,
|
||||||
deleteDir: deleteDir,
|
deleteDir: deleteDir,
|
||||||
|
formatBytes: formatBytes,
|
||||||
}
|
}
|
||||||
|
|
||||||
import fs from 'fs'
|
import fs from 'fs'
|
||||||
|
@ -296,3 +297,15 @@ function renameFile(oldPath: string, newPath: string): string {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function formatBytes(number: number, unit: 'MB' | 'GB' = 'MB'): string {
|
||||||
|
let res = number / 1024 / 1024 // MB
|
||||||
|
if (unit === 'MB') {
|
||||||
|
return `${res} MB`
|
||||||
|
}
|
||||||
|
res = res / 1024
|
||||||
|
if (unit === 'GB') {
|
||||||
|
return `${res} GB`
|
||||||
|
}
|
||||||
|
return `${number} byte`
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue