diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 8b62046e3..4c47d658f 100644 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -49,6 +49,7 @@ "Escape Unicode Characters", "Unescape Unicode Characters", "Normalise Unicode", + "To Fullwidth", "To Quoted Printable", "From Quoted Printable", "To Punycode", @@ -591,4 +592,4 @@ "Comment" ] } -] +] \ No newline at end of file diff --git a/src/core/operations/ToFullwidth.mjs b/src/core/operations/ToFullwidth.mjs new file mode 100644 index 000000000..b93fc61cb --- /dev/null +++ b/src/core/operations/ToFullwidth.mjs @@ -0,0 +1,57 @@ +/** + * @author jyeu [chen@jyeu.xyz] + * @copyright Crown Copyright 2026 + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; + +/** + * To Fullwidth operation + */ +class ToFullwidth extends Operation { + /** + * ToFullwidth constructor + */ + constructor() { + super(); + + this.name = "To Fullwidth"; + this.module = "Encodings"; + this.description = + "Converts ASCII (halfwidth) characters to their fullwidth Unicode equivalents (U+FF01–U+FF5E). Commonly used in security testing to bypass WAF keyword filters, evade regex-based blocklists, and exploit Unicode normalization (NFKC/NFKD) vulnerabilities in web applications and path parsers. For example, /admin may bypass a WAF rule matching /admin if the backend normalises Unicode before routing."; + this.infoURL = + "https://wikipedia.org/wiki/Halfwidth_and_fullwidth_forms_(Unicode_block)"; + this.inputType = "string"; + this.outputType = "string"; + this.args = []; + } + + /** + * @param {string} input + * @param {never} _args + * @returns {string} + */ + run(input, _args) { + let output = ""; + + for (const char of input) { + const code = char.codePointAt(0); + + if (code === 0x20) { + // Regular space -> Ideographic space (U+3000) + output += "\u3000"; + } else if (code >= 0x21 && code <= 0x7e) { + // Visible ASCII characters -> Fullwidth equivalents (U+FF01–U+FF5E) + output += String.fromCodePoint(code + 0xfee0); + } else { + // Non-ASCII characters (CJK, newlines, etc.) are passed through unchanged + output += char; + } + } + + return output; + } +} + +export default ToFullwidth; diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index fb03a5f71..bd08a7c7e 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -167,6 +167,7 @@ import "./tests/TakeNthBytes.mjs"; import "./tests/Template.mjs"; import "./tests/TextEncodingBruteForce.mjs"; import "./tests/TextIntegerConverter.mjs"; +import "./tests/ToFullwidth.mjs"; import "./tests/ToFromInsensitiveRegex.mjs"; import "./tests/TranslateDateTimeFormat.mjs"; import "./tests/Typex.mjs"; diff --git a/tests/operations/tests/ToFullwidth.mjs b/tests/operations/tests/ToFullwidth.mjs new file mode 100644 index 000000000..0d1c52fd9 --- /dev/null +++ b/tests/operations/tests/ToFullwidth.mjs @@ -0,0 +1,144 @@ +/** + * To Fullwidth tests. + * + * @author jyeu [chen@jyeu.xyz] + * + * @copyright Crown Copyright 2026 + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "To Fullwidth: empty string", + input: "", + expectedOutput: "", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: lowercase letters", + input: "admin", + expectedOutput: "\uFF41\uFF44\uFF4D\uFF49\uFF4E", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: uppercase letters", + input: "ADMIN", + expectedOutput: "\uFF21\uFF24\uFF2D\uFF29\uFF2E", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: digits", + input: "0123456789", + expectedOutput: "\uFF10\uFF11\uFF12\uFF13\uFF14\uFF15\uFF16\uFF17\uFF18\uFF19", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: space becomes ideographic space (U+3000)", + input: "hello world", + expectedOutput: "\uFF48\uFF45\uFF4C\uFF4C\uFF4F\u3000\uFF57\uFF4F\uFF52\uFF4C\uFF44", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: common punctuation and symbols", + input: "!@#$%^&*()_+-=[]{}|;':\",./<>?", + expectedOutput: "\uFF01\uFF20\uFF03\uFF04\uFF05\uFF3E\uFF06\uFF0A\uFF08\uFF09\uFF3F\uFF0B\uFF0D\uFF1D\uFF3B\uFF3D\uFF5B\uFF5D\uFF5C\uFF1B\uFF07\uFF1A\uFF02\uFF0C\uFF0E\uFF0F\uFF1C\uFF1E\uFF1F", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: slash for WAF bypass simulation", + input: "/admin/secret", + expectedOutput: "\uFF0F\uFF41\uFF44\uFF4D\uFF49\uFF4E\uFF0F\uFF53\uFF45\uFF43\uFF52\uFF45\uFF54", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: non-ASCII characters pass through unchanged", + input: "你好世界", + expectedOutput: "你好世界", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: newline passes through unchanged", + input: "line1\nline2", + expectedOutput: "\uFF4C\uFF49\uFF4E\uFF45\uFF11\n\uFF4C\uFF49\uFF4E\uFF45\uFF12", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: mixed ASCII and non-ASCII", + input: "hello,世界!", + expectedOutput: "\uFF48\uFF45\uFF4C\uFF4C\uFF4F\uFF0C世界\uFF01", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: boundary character 0x21 (!)", + input: "!", + expectedOutput: "\uFF01", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, + { + name: "To Fullwidth: boundary character 0x7E (~)", + input: "~", + expectedOutput: "\uFF5E", + recipeConfig: [ + { + op: "To Fullwidth", + args: [], + }, + ], + }, +]);