mirror of
https://github.com/Jermolene/TiddlyWiki5.git
synced 2025-12-06 02:30:46 -08:00
Merge 2522705ee4 into 5cd3084298
This commit is contained in:
commit
2d141526c0
3 changed files with 86 additions and 39 deletions
17
editions/tw5.com/tiddlers/releasenotes/5.4.0/#9397.tid
Normal file
17
editions/tw5.com/tiddlers/releasenotes/5.4.0/#9397.tid
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
title: $:/changenotes/5.4.0/#9397
|
||||
description: Fix critical freelinks bugs: first character loss and false positive matches in v5.4.0
|
||||
release: 5.4.0
|
||||
tags: $:/tags/ChangeNote
|
||||
change-type: bugfix
|
||||
change-category: plugin
|
||||
github-links: https://github.com/TiddlyWiki/TiddlyWiki5/pull/9084 https://github.com/TiddlyWiki/TiddlyWiki5/pull/9397
|
||||
github-contributors: s793016
|
||||
|
||||
This note addresses two major bugs introduced in the Freelinks plugin with the v5.4.0 release:
|
||||
|
||||
Fixes:
|
||||
* First Character Loss: The first character of a matched word would incorrectly disappear (e.g., "The" became "he"). This was fixed by correctly timing the filtering of the current tiddler's title during match validation, ensuring proper substring handling.
|
||||
* False Positive Matches: Unrelated words (like "it is" or "Choose") would incorrectly link to a tiddler title. This was resolved by fixing wrong output merging in the Aho-Corasick failure-link handling, eliminating spurious matches from intermediate nodes, and adding cycle detection.
|
||||
|
||||
Impact:
|
||||
* Significantly improved correctness and reliability of automatic linking for all users, especially in multilingual and large wikis.
|
||||
|
|
@ -3,8 +3,7 @@ title: $:/core/modules/utils/aho-corasick.js
|
|||
type: application/javascript
|
||||
module-type: utils
|
||||
|
||||
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance
|
||||
and error handling for TiddlyWiki freelinking functionality.
|
||||
Optimized Aho-Corasick string matching algorithm implementation with enhanced performance and error handling for TiddlyWiki freelinking functionality.
|
||||
|
||||
Useage:
|
||||
|
||||
|
|
@ -39,7 +38,7 @@ Notes
|
|||
Word Boundary: Enabling useWordBoundary ensures more precise matches, ideal for link detection scenarios.
|
||||
Compatibility: Ensure compatibility with other TiddlyWiki modules (e.g., wikiparser.js) when processing WikiText.
|
||||
Debugging: Use getStats() to inspect the trie structure's size and ensure it does not overload browser memory.
|
||||
|
||||
|
||||
\*/
|
||||
|
||||
"use strict";
|
||||
|
|
@ -111,14 +110,9 @@ AhoCorasick.prototype.buildFailureLinks = function() {
|
|||
var failureLink = (fail && fail[char]) ? fail[char] : root;
|
||||
this.failure[child] = failureLink;
|
||||
|
||||
var failureOutput = this.failure[child];
|
||||
if(failureOutput && failureOutput.$) {
|
||||
if(!child.$) {
|
||||
child.$ = [];
|
||||
}
|
||||
child.$.push.apply(child.$, failureOutput.$);
|
||||
}
|
||||
|
||||
// Do not merge outputs from failure links during build
|
||||
// Instead, collect matches dynamically by traversing failure links during search
|
||||
|
||||
queue.push(child);
|
||||
}
|
||||
}
|
||||
|
|
@ -143,6 +137,7 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) {
|
|||
var char = text[i];
|
||||
var transitionCount = 0;
|
||||
|
||||
// Follow failure links to find a valid transition
|
||||
while(node && !node[char] && node !== this.trie && transitionCount < this.maxFailureDepth) {
|
||||
node = this.failure[node] || this.trie;
|
||||
transitionCount++;
|
||||
|
|
@ -157,9 +152,19 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) {
|
|||
}
|
||||
}
|
||||
|
||||
// Traverse the current node and its failure link chain to gather all patterns
|
||||
var currentNode = node;
|
||||
var collectCount = 0;
|
||||
var visitedNodes = new Set();
|
||||
|
||||
while(currentNode && collectCount < 10) {
|
||||
// Prevent infinite loops
|
||||
if(visitedNodes.has(currentNode)) {
|
||||
break;
|
||||
}
|
||||
visitedNodes.add(currentNode);
|
||||
|
||||
// Only collect outputs from the current node (not merged ones)
|
||||
if(currentNode.$) {
|
||||
var outputs = currentNode.$;
|
||||
for(var j = 0; j < outputs.length && matches.length < maxMatches; j++) {
|
||||
|
|
@ -167,6 +172,11 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) {
|
|||
var matchStart = i - output.length + 1;
|
||||
var matchEnd = i + 1;
|
||||
|
||||
var matchedText = text.substring(matchStart, matchEnd);
|
||||
if(matchedText !== output.pattern) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(useWordBoundary && !this.isWordBoundaryMatch(text, matchStart, matchEnd)) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -179,6 +189,7 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) {
|
|||
});
|
||||
}
|
||||
}
|
||||
|
||||
currentNode = this.failure[currentNode];
|
||||
if(currentNode === this.trie) break;
|
||||
collectCount++;
|
||||
|
|
|
|||
|
|
@ -84,7 +84,8 @@ TextNodeWidget.prototype.execute = function() {
|
|||
|
||||
if(this.tiddlerTitleInfo.titles.length > 0) {
|
||||
var newParseTree = this.processTextWithMatches(text, currentTiddlerTitle, ignoreCase, useWordBoundary);
|
||||
if(newParseTree.length > 1 || newParseTree[0].type !== "plain-text") {
|
||||
if(newParseTree && newParseTree.length > 0 &&
|
||||
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
|
||||
childParseTree = newParseTree;
|
||||
}
|
||||
}
|
||||
|
|
@ -94,6 +95,10 @@ TextNodeWidget.prototype.execute = function() {
|
|||
};
|
||||
|
||||
TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerTitle, ignoreCase, useWordBoundary) {
|
||||
if(!text || text.length === 0) {
|
||||
return [{type: "plain-text", text: text}];
|
||||
}
|
||||
|
||||
var searchText = ignoreCase ? text.toLowerCase() : text;
|
||||
var matches;
|
||||
|
||||
|
|
@ -108,8 +113,10 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT
|
|||
}
|
||||
|
||||
matches.sort(function(a, b) {
|
||||
var posDiff = a.index - b.index;
|
||||
return posDiff !== 0 ? posDiff : b.length - a.length;
|
||||
if(a.index !== b.index) {
|
||||
return a.index - b.index;
|
||||
}
|
||||
return b.length - a.length;
|
||||
});
|
||||
|
||||
var processedPositions = new FastPositionSet();
|
||||
|
|
@ -120,6 +127,23 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT
|
|||
var matchStart = match.index;
|
||||
var matchEnd = matchStart + match.length;
|
||||
|
||||
if(matchStart < 0 || matchEnd > text.length) {
|
||||
continue;
|
||||
}
|
||||
|
||||
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
|
||||
|
||||
var titleToCompare = ignoreCase ?
|
||||
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
|
||||
currentTiddlerTitle;
|
||||
var matchedTitleToCompare = ignoreCase ?
|
||||
(matchedTitle ? matchedTitle.toLowerCase() : "") :
|
||||
matchedTitle;
|
||||
|
||||
if(titleToCompare && matchedTitleToCompare === titleToCompare) {
|
||||
continue;
|
||||
}
|
||||
|
||||
var hasOverlap = false;
|
||||
for(var pos = matchStart; pos < matchEnd && !hasOverlap; pos++) {
|
||||
if(processedPositions.has(pos)) {
|
||||
|
|
@ -148,39 +172,36 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT
|
|||
var matchEnd = matchStart + match.length;
|
||||
|
||||
if(matchStart > currentPos) {
|
||||
var beforeText = text.substring(currentPos, matchStart);
|
||||
newParseTree.push({
|
||||
type: "plain-text",
|
||||
text: text.slice(currentPos, matchStart)
|
||||
text: beforeText
|
||||
});
|
||||
}
|
||||
|
||||
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
|
||||
var matchedText = text.substring(matchStart, matchEnd);
|
||||
|
||||
if(matchedTitle === currentTiddlerTitle) {
|
||||
newParseTree.push({
|
||||
newParseTree.push({
|
||||
type: "link",
|
||||
attributes: {
|
||||
to: {type: "string", value: matchedTitle},
|
||||
"class": {type: "string", value: "tc-freelink"}
|
||||
},
|
||||
children: [{
|
||||
type: "plain-text",
|
||||
text: text.slice(matchStart, matchEnd)
|
||||
});
|
||||
} else {
|
||||
newParseTree.push({
|
||||
type: "link",
|
||||
attributes: {
|
||||
to: {type: "string", value: matchedTitle},
|
||||
"class": {type: "string", value: "tc-freelink"}
|
||||
},
|
||||
children: [{
|
||||
type: "plain-text",
|
||||
text: text.slice(matchStart, matchEnd)
|
||||
}]
|
||||
});
|
||||
}
|
||||
text: matchedText
|
||||
}]
|
||||
});
|
||||
|
||||
currentPos = matchEnd;
|
||||
}
|
||||
|
||||
if(currentPos < text.length) {
|
||||
var remainingText = text.substring(currentPos);
|
||||
newParseTree.push({
|
||||
type: "plain-text",
|
||||
text: text.slice(currentPos)
|
||||
text: remainingText
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -203,7 +224,6 @@ function computeTiddlerTitleInfo(self, ignoreCase) {
|
|||
var validTitles = [];
|
||||
var ac = new AhoCorasick();
|
||||
|
||||
// Process titles in a single pass to avoid duplication
|
||||
for(var i = 0; i < titles.length; i++) {
|
||||
var title = titles[i];
|
||||
if(title && title.length > 0 && title.substring(0,3) !== "$:/") {
|
||||
|
|
@ -214,17 +234,16 @@ function computeTiddlerTitleInfo(self, ignoreCase) {
|
|||
}
|
||||
}
|
||||
|
||||
// Sort by length (descending) then alphabetically
|
||||
// Longer titles are prioritized to avoid partial matches (e.g., "JavaScript" before "Java")
|
||||
var sortedTitles = validTitles.sort(function(a,b) {
|
||||
var lenDiff = b.length - a.length;
|
||||
return lenDiff !== 0 ? lenDiff : (a < b ? -1 : a > b ? 1 : 0);
|
||||
if(lenDiff !== 0) return lenDiff;
|
||||
return a < b ? -1 : a > b ? 1 : 0;
|
||||
});
|
||||
|
||||
// Build Aho-Corasick automaton
|
||||
for(var i = 0; i < sortedTitles.length; i++) {
|
||||
var title = sortedTitles[i];
|
||||
ac.addPattern(ignoreCase ? title.toLowerCase() : title, i);
|
||||
var pattern = ignoreCase ? title.toLowerCase() : title;
|
||||
ac.addPattern(pattern, i);
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue