diff --git a/src/core/operations/OpticalCharacterRecognition.mjs b/src/core/operations/OpticalCharacterRecognition.mjs index fd028e4b..f2766914 100644 --- a/src/core/operations/OpticalCharacterRecognition.mjs +++ b/src/core/operations/OpticalCharacterRecognition.mjs @@ -15,6 +15,7 @@ import { isWorkerEnvironment } from "../Utils.mjs"; import { createWorker } from "tesseract.js"; const OEM_MODES = ["Tesseract only", "LSTM only", "Tesseract/LSTM Combined"]; +const OCR_DEFAULT_WHITELIST = ""; /** * Optical Character Recognition operation @@ -34,6 +35,11 @@ class OpticalCharacterRecognition extends Operation { this.inputType = "ArrayBuffer"; this.outputType = "string"; this.args = [ + { + name: "Character whitelist (optional)", + type: "string", + value: OCR_DEFAULT_WHITELIST + }, { name: "Show confidence", type: "boolean", @@ -61,7 +67,7 @@ class OpticalCharacterRecognition extends Operation { * @returns {string} */ async run(input, args) { - const [showConfidence, oemChoice] = args; + const [whitelist, showConfidence, oemChoice] = args; if (!isWorkerEnvironment()) throw new OperationError("This operation only works in a browser"); @@ -86,6 +92,10 @@ class OpticalCharacterRecognition extends Operation { } } }); + self.sendStatusMessage("Configuring OCR parameters..."); + if (whitelist && whitelist.length) { + await worker.setParameters({ /* eslint-disable camelcase */ tessedit_char_whitelist: whitelist /* eslint-enable camelcase */ }); + } self.sendStatusMessage("Finding text..."); const result = await worker.recognize(image); diff --git a/src/core/operations/RenderImage.mjs b/src/core/operations/RenderImage.mjs index 5dee6d3c..61b57de6 100644 --- a/src/core/operations/RenderImage.mjs +++ b/src/core/operations/RenderImage.mjs @@ -8,7 +8,7 @@ import { fromBase64, toBase64 } from "../lib/Base64.mjs"; import { fromHex } from "../lib/Hex.mjs"; import Operation from "../Operation.mjs"; import OperationError from "../errors/OperationError.mjs"; -import Utils from "../Utils.mjs"; +import Utils, { isWorkerEnvironment } from "../Utils.mjs"; import {isImage} from "../lib/FileType.mjs"; /** @@ -104,7 +104,12 @@ class RenderImage extends Operation { // Add image data to URI dataURI += "base64," + toBase64(data); - return ""; + let html = ""; + if (isWorkerEnvironment()) { + const ocrLink = "#recipe=Optical_Character_Recognition('Show confidence',true,'OCR Engine Mode','LSTM only')"; + html = "
" + html + "
"; + } + return html; } }