This commit is contained in:
s793016 2025-12-02 14:33:58 +01:00 committed by GitHub
commit 2d141526c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 86 additions and 39 deletions

View file

@ -0,0 +1,17 @@
title: $:/changenotes/5.4.0/#9397
description: Fix critical freelinks bugs: first character loss and false positive matches in v5.4.0
release: 5.4.0
tags: $:/tags/ChangeNote
change-type: bugfix
change-category: plugin
github-links: https://github.com/TiddlyWiki/TiddlyWiki5/pull/9084 https://github.com/TiddlyWiki/TiddlyWiki5/pull/9397
github-contributors: s793016
This note addresses two major bugs introduced in the Freelinks plugin with the v5.4.0 release:
Fixes:
* First Character Loss: The first character of a matched word would incorrectly disappear (e.g., "The" became "he"). This was fixed by correctly timing the filtering of the current tiddler's title during match validation, ensuring proper substring handling.
* False Positive Matches: Unrelated words (like "it is" or "Choose") would incorrectly link to a tiddler title. This was resolved by fixing wrong output merging in the Aho-Corasick failure-link handling, eliminating spurious matches from intermediate nodes, and adding cycle detection.
Impact:
* Significantly improved correctness and reliability of automatic linking for all users, especially in multilingual and large wikis.

View file

@ -3,8 +3,7 @@ title: $:/core/modules/utils/aho-corasick.js
type: application/javascript
module-type: utils
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance
and error handling for TiddlyWiki freelinking functionality.
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance and error handling for TiddlyWiki freelinking functionality.
Useage:
@ -39,7 +38,7 @@ Notes
Word Boundary: Enabling useWordBoundary ensures more precise matches, ideal for link detection scenarios.
Compatibility: Ensure compatibility with other TiddlyWiki modules (e.g., wikiparser.js) when processing WikiText.
Debugging: Use getStats() to inspect the trie structure's size and ensure it does not overload browser memory.
\*/
"use strict";
@ -111,14 +110,9 @@ AhoCorasick.prototype.buildFailureLinks = function() {
var failureLink = (fail && fail[char]) ? fail[char] : root;
this.failure[child] = failureLink;
var failureOutput = this.failure[child];
if(failureOutput && failureOutput.$) {
if(!child.$) {
child.$ = [];
}
child.$.push.apply(child.$, failureOutput.$);
}
// Do not merge outputs from failure links during build
// Instead, collect matches dynamically by traversing failure links during search
queue.push(child);
}
}
@ -143,6 +137,7 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) {
var char = text[i];
var transitionCount = 0;
// Follow failure links to find a valid transition
while(node && !node[char] && node !== this.trie && transitionCount < this.maxFailureDepth) {
node = this.failure[node] || this.trie;
transitionCount++;
@ -157,9 +152,19 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) {
}
}
// Traverse the current node and its failure link chain to gather all patterns
var currentNode = node;
var collectCount = 0;
var visitedNodes = new Set();
while(currentNode && collectCount < 10) {
// Prevent infinite loops
if(visitedNodes.has(currentNode)) {
break;
}
visitedNodes.add(currentNode);
// Only collect outputs from the current node (not merged ones)
if(currentNode.$) {
var outputs = currentNode.$;
for(var j = 0; j < outputs.length && matches.length < maxMatches; j++) {
@ -167,6 +172,11 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) {
var matchStart = i - output.length + 1;
var matchEnd = i + 1;
var matchedText = text.substring(matchStart, matchEnd);
if(matchedText !== output.pattern) {
continue;
}
if(useWordBoundary && !this.isWordBoundaryMatch(text, matchStart, matchEnd)) {
continue;
}
@ -179,6 +189,7 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) {
});
}
}
currentNode = this.failure[currentNode];
if(currentNode === this.trie) break;
collectCount++;

View file

@ -84,7 +84,8 @@ TextNodeWidget.prototype.execute = function() {
if(this.tiddlerTitleInfo.titles.length > 0) {
var newParseTree = this.processTextWithMatches(text, currentTiddlerTitle, ignoreCase, useWordBoundary);
if(newParseTree.length > 1 || newParseTree[0].type !== "plain-text") {
if(newParseTree && newParseTree.length > 0 &&
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
childParseTree = newParseTree;
}
}
@ -94,6 +95,10 @@ TextNodeWidget.prototype.execute = function() {
};
TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerTitle, ignoreCase, useWordBoundary) {
if(!text || text.length === 0) {
return [{type: "plain-text", text: text}];
}
var searchText = ignoreCase ? text.toLowerCase() : text;
var matches;
@ -108,8 +113,10 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT
}
matches.sort(function(a, b) {
var posDiff = a.index - b.index;
return posDiff !== 0 ? posDiff : b.length - a.length;
if(a.index !== b.index) {
return a.index - b.index;
}
return b.length - a.length;
});
var processedPositions = new FastPositionSet();
@ -120,6 +127,23 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT
var matchStart = match.index;
var matchEnd = matchStart + match.length;
if(matchStart < 0 || matchEnd > text.length) {
continue;
}
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
var titleToCompare = ignoreCase ?
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
currentTiddlerTitle;
var matchedTitleToCompare = ignoreCase ?
(matchedTitle ? matchedTitle.toLowerCase() : "") :
matchedTitle;
if(titleToCompare && matchedTitleToCompare === titleToCompare) {
continue;
}
var hasOverlap = false;
for(var pos = matchStart; pos < matchEnd && !hasOverlap; pos++) {
if(processedPositions.has(pos)) {
@ -148,39 +172,36 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT
var matchEnd = matchStart + match.length;
if(matchStart > currentPos) {
var beforeText = text.substring(currentPos, matchStart);
newParseTree.push({
type: "plain-text",
text: text.slice(currentPos, matchStart)
text: beforeText
});
}
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
var matchedText = text.substring(matchStart, matchEnd);
if(matchedTitle === currentTiddlerTitle) {
newParseTree.push({
newParseTree.push({
type: "link",
attributes: {
to: {type: "string", value: matchedTitle},
"class": {type: "string", value: "tc-freelink"}
},
children: [{
type: "plain-text",
text: text.slice(matchStart, matchEnd)
});
} else {
newParseTree.push({
type: "link",
attributes: {
to: {type: "string", value: matchedTitle},
"class": {type: "string", value: "tc-freelink"}
},
children: [{
type: "plain-text",
text: text.slice(matchStart, matchEnd)
}]
});
}
text: matchedText
}]
});
currentPos = matchEnd;
}
if(currentPos < text.length) {
var remainingText = text.substring(currentPos);
newParseTree.push({
type: "plain-text",
text: text.slice(currentPos)
text: remainingText
});
}
@ -203,7 +224,6 @@ function computeTiddlerTitleInfo(self, ignoreCase) {
var validTitles = [];
var ac = new AhoCorasick();
// Process titles in a single pass to avoid duplication
for(var i = 0; i < titles.length; i++) {
var title = titles[i];
if(title && title.length > 0 && title.substring(0,3) !== "$:/") {
@ -214,17 +234,16 @@ function computeTiddlerTitleInfo(self, ignoreCase) {
}
}
// Sort by length (descending) then alphabetically
// Longer titles are prioritized to avoid partial matches (e.g., "JavaScript" before "Java")
var sortedTitles = validTitles.sort(function(a,b) {
var lenDiff = b.length - a.length;
return lenDiff !== 0 ? lenDiff : (a < b ? -1 : a > b ? 1 : 0);
if(lenDiff !== 0) return lenDiff;
return a < b ? -1 : a > b ? 1 : 0;
});
// Build Aho-Corasick automaton
for(var i = 0; i < sortedTitles.length; i++) {
var title = sortedTitles[i];
ac.addPattern(ignoreCase ? title.toLowerCase() : title, i);
var pattern = ignoreCase ? title.toLowerCase() : title;
ac.addPattern(pattern, i);
}
try {