diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 8b62046e3..38df32ac9 100644 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -347,6 +347,7 @@ "Pseudo-Random Number Generator", "Sleep", "File Tree", + "N-gram" "Take nth bytes", "Drop nth bytes" ] diff --git a/src/core/operations/Ngram.mjs b/src/core/operations/Ngram.mjs new file mode 100644 index 000000000..269c788dc --- /dev/null +++ b/src/core/operations/Ngram.mjs @@ -0,0 +1,60 @@ +/** + * @author benjcal [benj.calderon@gmail.com] + * @copyright Crown Copyright 2024 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; +import {JOIN_DELIM_OPTIONS} from "../lib/Delim.mjs"; + +/** + * ngram operation + */ +class Ngram extends Operation { + + /** + * Ngram constructor + */ + constructor() { + super(); + + this.name = "N-gram"; + this.module = "Default"; + this.description = "Extracts n-grams from the input text. N-grams are contiguous sequences of n characters from a given text sample."; + this.infoURL = "https://wikipedia.org/wiki/N-gram"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + name: "N-gram size", + type: "number", + value: 3 + }, + { + "name": "Join delimiter", + "type": "editableOptionShort", + "value": JOIN_DELIM_OPTIONS + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const nGramSize = args[0], + joinDelim = args[1]; + + const ngrams = []; + for (let i = 0; i <= input.length - nGramSize; i++) { + ngrams.push(input.slice(i, i + nGramSize)); + } + + return ngrams.join(joinDelim); + } + +} + +export default Ngram; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index fb03a5f71..b8825f42e 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -117,6 +117,7 @@ import "./tests/MS.mjs"; import "./tests/MultipleBombe.mjs"; import "./tests/MurmurHash3.mjs"; import "./tests/NetBIOS.mjs"; +import "./tests/Ngram.mjs"; import "./tests/NormaliseUnicode.mjs"; import "./tests/NTLM.mjs"; import "./tests/OTP.mjs"; diff --git a/tests/operations/tests/Ngram.mjs b/tests/operations/tests/Ngram.mjs new file mode 100644 index 000000000..97af9d125 --- /dev/null +++ b/tests/operations/tests/Ngram.mjs @@ -0,0 +1,34 @@ +/** + * Ngram tests. + * + * @author jg42526 + * + * @copyright Crown Copyright 2017 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "Ngram 3", + input: "hello", + expectedOutput: "hel,ell,llo", + recipeConfig: [ + { + op: "N-gram", + args: ["3", ","], + }, + ], + }, + { + name: "Ngram longer than input", + input: "hello", + expectedOutput: "", + recipeConfig: [ + { + op: "N-gram", + args: ["6", ","], + }, + ], + }, +]);