mirror of
https://github.com/gchq/CyberChef.git
synced 2026-01-30 12:20:33 -08:00
feat(image+ocr): add OCR quick-action on Render Image and OCR whitelist option
- Render Image (browser): overlay a small ✎ button linking to an OCR recipe for quick access - OCR: add optional "Character whitelist" arg and apply via tesseract parameter to improve accuracy on restricted alphabets Authored-by: Izai Alejandro Zalles Merino <zallesrene@gmail.com>
This commit is contained in:
parent
de2a83c0a0
commit
d3c512e878
2 changed files with 18 additions and 3 deletions
|
|
@ -15,6 +15,7 @@ import { isWorkerEnvironment } from "../Utils.mjs";
|
|||
import { createWorker } from "tesseract.js";
|
||||
|
||||
const OEM_MODES = ["Tesseract only", "LSTM only", "Tesseract/LSTM Combined"];
|
||||
const OCR_DEFAULT_WHITELIST = "";
|
||||
|
||||
/**
|
||||
* Optical Character Recognition operation
|
||||
|
|
@ -34,6 +35,11 @@ class OpticalCharacterRecognition extends Operation {
|
|||
this.inputType = "ArrayBuffer";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
name: "Character whitelist (optional)",
|
||||
type: "string",
|
||||
value: OCR_DEFAULT_WHITELIST
|
||||
},
|
||||
{
|
||||
name: "Show confidence",
|
||||
type: "boolean",
|
||||
|
|
@ -61,7 +67,7 @@ class OpticalCharacterRecognition extends Operation {
|
|||
* @returns {string}
|
||||
*/
|
||||
async run(input, args) {
|
||||
const [showConfidence, oemChoice] = args;
|
||||
const [whitelist, showConfidence, oemChoice] = args;
|
||||
|
||||
if (!isWorkerEnvironment()) throw new OperationError("This operation only works in a browser");
|
||||
|
||||
|
|
@ -86,6 +92,10 @@ class OpticalCharacterRecognition extends Operation {
|
|||
}
|
||||
}
|
||||
});
|
||||
self.sendStatusMessage("Configuring OCR parameters...");
|
||||
if (whitelist && whitelist.length) {
|
||||
await worker.setParameters({ /* eslint-disable camelcase */ tessedit_char_whitelist: whitelist /* eslint-enable camelcase */ });
|
||||
}
|
||||
self.sendStatusMessage("Finding text...");
|
||||
const result = await worker.recognize(image);
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import { fromBase64, toBase64 } from "../lib/Base64.mjs";
|
|||
import { fromHex } from "../lib/Hex.mjs";
|
||||
import Operation from "../Operation.mjs";
|
||||
import OperationError from "../errors/OperationError.mjs";
|
||||
import Utils from "../Utils.mjs";
|
||||
import Utils, { isWorkerEnvironment } from "../Utils.mjs";
|
||||
import {isImage} from "../lib/FileType.mjs";
|
||||
|
||||
/**
|
||||
|
|
@ -104,7 +104,12 @@ class RenderImage extends Operation {
|
|||
// Add image data to URI
|
||||
dataURI += "base64," + toBase64(data);
|
||||
|
||||
return "<img src='" + dataURI + "'>";
|
||||
let html = "<img src='" + dataURI + "'>";
|
||||
if (isWorkerEnvironment()) {
|
||||
const ocrLink = "#recipe=Optical_Character_Recognition('Show confidence',true,'OCR Engine Mode','LSTM only')";
|
||||
html = "<div style=\"position:relative; display:inline-block;\">" + html + "<a href=\"" + ocrLink + "\" title=\"Run OCR\" style=\"position:absolute; top:8px; right:8px; background:rgba(0,0,0,.6); color:#fff; padding:4px 6px; border-radius:4px; text-decoration:none; font-weight:bold;\">✎</a></div>";
|
||||
}
|
||||
return html;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue