1
Fork 0
mirror of git://git.sv.gnu.org/emacs.git synced 2026-03-06 05:52:32 -08:00

New regex tests imported from glibc 2.21

* test/src/regex-resources/BOOST.tests:
* test/src/regex-resources/PCRE.tests:
* test/src/regex-resources/PTESTS:
* test/src/regex-resources/TESTS:
New test data files

[mina86@mina86.com: Moved files from test/src/regex/* to test/src/*.]
This commit is contained in:
Dima Kogan 2016-03-11 18:18:14 -08:00 committed by Michal Nazarewicz
parent 53917616b0
commit 6db72771cc
4 changed files with 3723 additions and 0 deletions

View file

@ -0,0 +1,829 @@
;
;
; this file contains a script of tests to run through regress.exe
;
; comments start with a semicolon and proceed to the end of the line
;
; changes to regular expression compile flags start with a "-" as the first
; non-whitespace character and consist of a list of the printable names
; of the flags, for example "match_default"
;
; Other lines contain a test to perform using the current flag status
; the first token contains the expression to compile, the second the string
; to match it against. If the second string is "!" then the expression should
; not compile, that is the first string is an invalid regular expression.
; This is then followed by a list of integers that specify what should match,
; each pair represents the starting and ending positions of a subexpression
; starting with the zeroth subexpression (the whole match).
; A value of -1 indicates that the subexpression should not take part in the
; match at all, if the first value is -1 then no part of the expression should
; match the string.
;
; Tests taken from BOOST testsuite and adapted to glibc regex.
;
; Boost Software License - Version 1.0 - August 17th, 2003
;
; Permission is hereby granted, free of charge, to any person or organization
; obtaining a copy of the software and accompanying documentation covered by
; this license (the "Software") to use, reproduce, display, distribute,
; execute, and transmit the Software, and to prepare derivative works of the
; Software, and to permit third-parties to whom the Software is furnished to
; do so, all subject to the following:
;
; The copyright notices in the Software and this entire statement, including
; the above license grant, this restriction and the following disclaimer,
; must be included in all copies of the Software, in whole or in part, and
; all derivative works of the Software, unless such copies or derivative
; works are solely in the form of machine-executable object code generated by
; a source language processor.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
; DEALINGS IN THE SOFTWARE.
;
- match_default normal REG_EXTENDED
;
; try some really simple literals:
a a 0 1
Z Z 0 1
Z aaa -1 -1
Z xxxxZZxxx 4 5
; and some simple brackets:
(a) zzzaazz 3 4 3 4
() zzz 0 0 0 0
() "" 0 0 0 0
( !
) ) 0 1
(aa !
aa) baa)b 1 4
a b -1 -1
\(\) () 0 2
\(a\) (a) 0 3
\() () 0 2
(\) !
p(a)rameter ABCparameterXYZ 3 12 4 5
[pq](a)rameter ABCparameterXYZ 3 12 4 5
; now try escaped brackets:
- match_default bk_parens REG_BASIC
\(a\) zzzaazz 3 4 3 4
\(\) zzz 0 0 0 0
\(\) "" 0 0 0 0
\( !
\) !
\(aa !
aa\) !
() () 0 2
(a) (a) 0 3
(\) !
\() !
; now move on to "." wildcards
- match_default normal REG_EXTENDED REG_STARTEND
. a 0 1
. \n 0 1
. \r 0 1
. \0 0 1
;
; now move on to the repetion ops,
; starting with operator *
- match_default normal REG_EXTENDED
a* b 0 0
ab* a 0 1
ab* ab 0 2
ab* sssabbbbbbsss 3 10
ab*c* a 0 1
ab*c* abbb 0 4
ab*c* accc 0 4
ab*c* abbcc 0 5
*a !
\<* !
\>* !
\n* \n\n 0 2
\** ** 0 2
\* * 0 1
; now try operator +
ab+ a -1 -1
ab+ ab 0 2
ab+ sssabbbbbbsss 3 10
ab+c+ a -1 -1
ab+c+ abbb -1 -1
ab+c+ accc -1 -1
ab+c+ abbcc 0 5
+a !
\<+ !
\>+ !
\n+ \n\n 0 2
\+ + 0 1
\+ ++ 0 1
\++ ++ 0 2
; now try operator ?
- match_default normal REG_EXTENDED
a? b 0 0
ab? a 0 1
ab? ab 0 2
ab? sssabbbbbbsss 3 5
ab?c? a 0 1
ab?c? abbb 0 2
ab?c? accc 0 2
ab?c? abcc 0 3
?a !
\<? !
\>? !
\n? \n\n 0 1
\? ? 0 1
\? ?? 0 1
\?? ?? 0 1
; now try operator {}
- match_default normal REG_EXTENDED
a{2} a -1 -1
a{2} aa 0 2
a{2} aaa 0 2
a{2,} a -1 -1
a{2,} aa 0 2
a{2,} aaaaa 0 5
a{2,4} a -1 -1
a{2,4} aa 0 2
a{2,4} aaa 0 3
a{2,4} aaaa 0 4
a{2,4} aaaaa 0 4
a{} !
a{2 !
a} a} 0 2
\{\} {} 0 2
- match_default normal REG_BASIC
a\{2\} a -1 -1
a\{2\} aa 0 2
a\{2\} aaa 0 2
a\{2,\} a -1 -1
a\{2,\} aa 0 2
a\{2,\} aaaaa 0 5
a\{2,4\} a -1 -1
a\{2,4\} aa 0 2
a\{2,4\} aaa 0 3
a\{2,4\} aaaa 0 4
a\{2,4\} aaaaa 0 4
{} {} 0 2
; now test the alternation operator |
- match_default normal REG_EXTENDED
a|b a 0 1
a|b b 0 1
a(b|c) ab 0 2 1 2
a(b|c) ac 0 2 1 2
a(b|c) ad -1 -1 -1 -1
a\| a| 0 2
; now test the set operator []
- match_default normal REG_EXTENDED
; try some literals first
[abc] a 0 1
[abc] b 0 1
[abc] c 0 1
[abc] d -1 -1
[^bcd] a 0 1
[^bcd] b -1 -1
[^bcd] d -1 -1
[^bcd] e 0 1
a[b]c abc 0 3
a[ab]c abc 0 3
a[^ab]c adc 0 3
a[]b]c a]c 0 3
a[[b]c a[c 0 3
a[-b]c a-c 0 3
a[^]b]c adc 0 3
a[^-b]c adc 0 3
a[b-]c a-c 0 3
a[b !
a[] !
; then some ranges
[b-e] a -1 -1
[b-e] b 0 1
[b-e] e 0 1
[b-e] f -1 -1
[^b-e] a 0 1
[^b-e] b -1 -1
[^b-e] e -1 -1
[^b-e] f 0 1
a[1-3]c a2c 0 3
a[3-1]c !
a[1-3-5]c !
a[1- !
; and some classes
a[[:alpha:]]c abc 0 3
a[[:unknown:]]c !
a[[: !
a[[:alpha !
a[[:alpha:] !
a[[:alpha,:] !
a[[:]:]]b !
a[[:-:]]b !
a[[:alph:]] !
a[[:alphabet:]] !
[[:alnum:]]+ -%@a0X_- 3 6
[[:alpha:]]+ -%@aX_0- 3 5
[[:blank:]]+ "a \tb" 1 4
[[:cntrl:]]+ a\n\tb 1 3
[[:digit:]]+ a019b 1 4
[[:graph:]]+ " a%b " 1 4
[[:lower:]]+ AabC 1 3
; This test fails with STLPort, disable for now as this is a corner case anyway...
;[[:print:]]+ "\na b\n" 1 4
[[:punct:]]+ " %-&\t" 1 4
[[:space:]]+ "a \n\t\rb" 1 5
[[:upper:]]+ aBCd 1 3
[[:xdigit:]]+ p0f3Cx 1 5
; now test flag settings:
- escape_in_lists REG_NO_POSIX_TEST
[\n] \n 0 1
- REG_NO_POSIX_TEST
; line anchors
- match_default normal REG_EXTENDED
^ab ab 0 2
^ab xxabxx -1 -1
ab$ ab 0 2
ab$ abxx -1 -1
- match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
^ab ab -1 -1
^ab xxabxx -1 -1
ab$ ab -1 -1
ab$ abxx -1 -1
; back references
- match_default normal REG_PERL
a(b)\2c !
a(b\1)c !
a(b*)c\1d abbcbbd 0 7 1 3
a(b*)c\1d abbcbd -1 -1
a(b*)c\1d abbcbbbd -1 -1
^(.)\1 abc -1 -1
a([bc])\1d abcdabbd 4 8 5 6
; strictly speaking this is at best ambiguous, at worst wrong, this is what most
; re implimentations will match though.
a(([bc])\2)*d abbccd 0 6 3 5 3 4
a(([bc])\2)*d abbcbd -1 -1
a((b)*\2)*d abbbd 0 5 1 4 2 3
; perl only:
(ab*)[ab]*\1 ababaaa 0 7 0 1
(a)\1bcd aabcd 0 5 0 1
(a)\1bc*d aabcd 0 5 0 1
(a)\1bc*d aabd 0 4 0 1
(a)\1bc*d aabcccd 0 7 0 1
(a)\1bc*[ce]d aabcccd 0 7 0 1
^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5
; posix only:
- match_default extended REG_EXTENDED
(ab*)[ab]*\1 ababaaa 0 7 0 1
;
; word operators:
\w a 0 1
\w z 0 1
\w A 0 1
\w Z 0 1
\w _ 0 1
\w } -1 -1
\w ` -1 -1
\w [ -1 -1
\w @ -1 -1
; non-word:
\W a -1 -1
\W z -1 -1
\W A -1 -1
\W Z -1 -1
\W _ -1 -1
\W } 0 1
\W ` 0 1
\W [ 0 1
\W @ 0 1
; word start:
\<abcd " abcd" 2 6
\<ab cab -1 -1
\<ab "\nab" 1 3
\<tag ::tag 2 5
;word end:
abc\> abc 0 3
abc\> abcd -1 -1
abc\> abc\n 0 3
abc\> abc:: 0 3
; word boundary:
\babcd " abcd" 2 6
\bab cab -1 -1
\bab "\nab" 1 3
\btag ::tag 2 5
abc\b abc 0 3
abc\b abcd -1 -1
abc\b abc\n 0 3
abc\b abc:: 0 3
; within word:
\B ab 1 1
a\Bb ab 0 2
a\B ab 0 1
a\B a -1 -1
a\B "a " -1 -1
;
; buffer operators:
\`abc abc 0 3
\`abc \nabc -1 -1
\`abc " abc" -1 -1
abc\' abc 0 3
abc\' abc\n -1 -1
abc\' "abc " -1 -1
;
; now follows various complex expressions designed to try and bust the matcher:
a(((b)))c abc 0 3 1 2 1 2 1 2
a(b|(c))d abd 0 3 1 2 -1 -1
a(b|(c))d acd 0 3 1 2 1 2
a(b*|c)d abbd 0 4 1 3
; just gotta have one DFA-buster, of course
a[ab]{20} aaaaabaaaabaaaabaaaab 0 21
; and an inline expansion in case somebody gets tricky
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21
; and in case somebody just slips in an NFA...
a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31
; one really big one
1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71
; fish for problems as brackets go past 8
[ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8
[ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9
[ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10
[ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10
; and as parenthesis go past 9:
(a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9
(a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10
(a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11
(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12
(a)d|(b)c abc 1 3 -1 -1 1 2
_+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19
; subtleties of matching
;a(b)?c\1d acd 0 3 -1 -1
; POSIX is about the following test:
a(b)?c\1d acd -1 -1 -1 -1
a(b?c)+d accd 0 4 2 3
(wee|week)(knights|night) weeknights 0 10 0 3 3 10
.* abc 0 3
a(b|(c))d abd 0 3 1 2 -1 -1
a(b|(c))d acd 0 3 1 2 1 2
a(b*|c|e)d abbd 0 4 1 3
a(b*|c|e)d acd 0 3 1 2
a(b*|c|e)d ad 0 2 1 1
a(b?)c abc 0 3 1 2
a(b?)c ac 0 2 1 1
a(b+)c abc 0 3 1 2
a(b+)c abbbc 0 5 1 4
a(b*)c ac 0 2 1 1
(a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5
a([bc]?)c abc 0 3 1 2
a([bc]?)c ac 0 2 1 1
a([bc]+)c abc 0 3 1 2
a([bc]+)c abcc 0 4 1 3
a([bc]+)bc abcbc 0 5 1 3
a(bb+|b)b abb 0 3 1 2
a(bbb+|bb+|b)b abb 0 3 1 2
a(bbb+|bb+|b)b abbb 0 4 1 3
a(bbb+|bb+|b)bb abbb 0 4 1 2
(.*).* abcdef 0 6 0 6
(a*)* bc 0 0 0 0
xyx*xz xyxxxxyxxxz 5 11
; do we get the right subexpression when it is used more than once?
a(b|c)*d ad 0 2 -1 -1
a(b|c)*d abcd 0 4 2 3
a(b|c)+d abd 0 3 1 2
a(b|c)+d abcd 0 4 2 3
a(b|c?)+d ad 0 2 1 1
a(b|c){0,0}d ad 0 2 -1 -1
a(b|c){0,1}d ad 0 2 -1 -1
a(b|c){0,1}d abd 0 3 1 2
a(b|c){0,2}d ad 0 2 -1 -1
a(b|c){0,2}d abcd 0 4 2 3
a(b|c){0,}d ad 0 2 -1 -1
a(b|c){0,}d abcd 0 4 2 3
a(b|c){1,1}d abd 0 3 1 2
a(b|c){1,2}d abd 0 3 1 2
a(b|c){1,2}d abcd 0 4 2 3
a(b|c){1,}d abd 0 3 1 2
a(b|c){1,}d abcd 0 4 2 3
a(b|c){2,2}d acbd 0 4 2 3
a(b|c){2,2}d abcd 0 4 2 3
a(b|c){2,4}d abcd 0 4 2 3
a(b|c){2,4}d abcbd 0 5 3 4
a(b|c){2,4}d abcbcd 0 6 4 5
a(b|c){2,}d abcd 0 4 2 3
a(b|c){2,}d abcbd 0 5 3 4
; perl only: these conflict with the POSIX test below
;a(b|c?)+d abcd 0 4 3 3
;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
; posix only:
- match_default extended REG_EXTENDED REG_STARTEND
a(b|c?)+d abcd 0 4 2 3
a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1
a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3
a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1
a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3
a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1
- match_default normal REG_PERL
; try to match C++ syntax elements:
; line comment:
//[^\n]* "++i //here is a line comment\n" 4 28
; block comment:
/\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27
/\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1
/\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1
/\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1
/\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1
/\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1
; preprossor directives:
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1
; perl only:
^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42
; literals:
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1
((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24
; strings:
'([^\\']|\\.)*' '\\x3A' 0 6 4 5
'([^\\']|\\.)*' '\\'' 0 4 1 3
'([^\\']|\\.)*' '\\n' 0 4 1 3
; finally try some case insensitive matches:
- match_default normal REG_EXTENDED REG_ICASE
; upper and lower have no meaning here so they fail, however these
; may compile with other libraries...
;[[:lower:]] !
;[[:upper:]] !
0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72
; known and suspected bugs:
- match_default normal REG_EXTENDED
\( ( 0 1
\) ) 0 1
\$ $ 0 1
\^ ^ 0 1
\. . 0 1
\* * 0 1
\+ + 0 1
\? ? 0 1
\[ [ 0 1
\] ] 0 1
\| | 0 1
\\ \\ 0 1
# # 0 1
\# # 0 1
a- a- 0 2
\- - 0 1
\{ { 0 1
\} } 0 1
0 0 0 1
1 1 0 1
9 9 0 1
b b 0 1
B B 0 1
< < 0 1
> > 0 1
w w 0 1
W W 0 1
` ` 0 1
' ' 0 1
\n \n 0 1
, , 0 1
a a 0 1
f f 0 1
n n 0 1
r r 0 1
t t 0 1
v v 0 1
c c 0 1
x x 0 1
: : 0 1
(\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5
- match_default normal REG_EXTENDED REG_ICASE
a A 0 1
A a 0 1
[abc]+ abcABC 0 6
[ABC]+ abcABC 0 6
[a-z]+ abcABC 0 6
[A-Z]+ abzANZ 0 6
[a-Z]+ abzABZ 0 6
[A-z]+ abzABZ 0 6
[[:lower:]]+ abyzABYZ 0 8
[[:upper:]]+ abzABZ 0 6
[[:alpha:]]+ abyzABYZ 0 8
[[:alnum:]]+ 09abyzABYZ 0 10
; word start:
\<abcd " abcd" 2 6
\<ab cab -1 -1
\<ab "\nab" 1 3
\<tag ::tag 2 5
;word end:
abc\> abc 0 3
abc\> abcd -1 -1
abc\> abc\n 0 3
abc\> abc:: 0 3
; collating elements and rewritten set code:
- match_default normal REG_EXTENDED REG_STARTEND
;[[.zero.]] 0 0 1
;[[.one.]] 1 0 1
;[[.two.]] 2 0 1
;[[.three.]] 3 0 1
[[.a.]] baa 1 2
;[[.right-curly-bracket.]] } 0 1
;[[.NUL.]] \0 0 1
[[:<:]z] !
[a[:>:]] !
[[=a=]] a 0 1
;[[=right-curly-bracket=]] } 0 1
- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
[[.A.]] A 0 1
[[.A.]] a 0 1
[[.A.]-b]+ AaBb 0 4
[A-[.b.]]+ AaBb 0 4
[[.a.]-B]+ AaBb 0 4
[a-[.B.]]+ AaBb 0 4
- match_default normal REG_EXTENDED REG_STARTEND
[[.a.]-c]+ abcd 0 3
[a-[.c.]]+ abcd 0 3
[[:alpha:]-a] !
[a-[:alpha:]] !
; try mutli-character ligatures:
;[[.ae.]] ae 0 2
;[[.ae.]] aE -1 -1
;[[.AE.]] AE 0 2
;[[.Ae.]] Ae 0 2
;[[.ae.]-b] a -1 -1
;[[.ae.]-b] b 0 1
;[[.ae.]-b] ae 0 2
;[a-[.ae.]] a 0 1
;[a-[.ae.]] b -1 -1
;[a-[.ae.]] ae 0 2
- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
;[[.ae.]] AE 0 2
;[[.ae.]] Ae 0 2
;[[.AE.]] Ae 0 2
;[[.Ae.]] aE 0 2
;[[.AE.]-B] a -1 -1
;[[.Ae.]-b] b 0 1
;[[.Ae.]-b] B 0 1
;[[.ae.]-b] AE 0 2
- match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST
\s+ "ab ab" 2 5
\S+ " abc " 2 5
- match_default normal REG_EXTENDED REG_STARTEND
\`abc abc 0 3
\`abc aabc -1 -1
abc\' abc 0 3
abc\' abcd -1 -1
abc\' abc\n\n -1 -1
abc\' abc 0 3
; extended repeat checking to exercise new algorithms:
ab.*xy abxy_ 0 4
ab.*xy ab_xy_ 0 5
ab.*xy abxy 0 4
ab.*xy ab_xy 0 5
ab.* ab 0 2
ab.* ab__ 0 4
ab.{2,5}xy ab__xy_ 0 6
ab.{2,5}xy ab____xy_ 0 8
ab.{2,5}xy ab_____xy_ 0 9
ab.{2,5}xy ab__xy 0 6
ab.{2,5}xy ab_____xy 0 9
ab.{2,5} ab__ 0 4
ab.{2,5} ab_______ 0 7
ab.{2,5}xy ab______xy -1 -1
ab.{2,5}xy ab_xy -1 -1
ab.*?xy abxy_ 0 4
ab.*?xy ab_xy_ 0 5
ab.*?xy abxy 0 4
ab.*?xy ab_xy 0 5
ab.*? ab 0 2
ab.*? ab__ 0 4
ab.{2,5}?xy ab__xy_ 0 6
ab.{2,5}?xy ab____xy_ 0 8
ab.{2,5}?xy ab_____xy_ 0 9
ab.{2,5}?xy ab__xy 0 6
ab.{2,5}?xy ab_____xy 0 9
ab.{2,5}? ab__ 0 4
ab.{2,5}? ab_______ 0 7
ab.{2,5}?xy ab______xy -1 -1
ab.{2,5}xy ab_xy -1 -1
; again but with slower algorithm variant:
- match_default REG_EXTENDED
; now again for single character repeats:
ab_*xy abxy_ 0 4
ab_*xy ab_xy_ 0 5
ab_*xy abxy 0 4
ab_*xy ab_xy 0 5
ab_* ab 0 2
ab_* ab__ 0 4
ab_{2,5}xy ab__xy_ 0 6
ab_{2,5}xy ab____xy_ 0 8
ab_{2,5}xy ab_____xy_ 0 9
ab_{2,5}xy ab__xy 0 6
ab_{2,5}xy ab_____xy 0 9
ab_{2,5} ab__ 0 4
ab_{2,5} ab_______ 0 7
ab_{2,5}xy ab______xy -1 -1
ab_{2,5}xy ab_xy -1 -1
ab_*?xy abxy_ 0 4
ab_*?xy ab_xy_ 0 5
ab_*?xy abxy 0 4
ab_*?xy ab_xy 0 5
ab_*? ab 0 2
ab_*? ab__ 0 4
ab_{2,5}?xy ab__xy_ 0 6
ab_{2,5}?xy ab____xy_ 0 8
ab_{2,5}?xy ab_____xy_ 0 9
ab_{2,5}?xy ab__xy 0 6
ab_{2,5}?xy ab_____xy 0 9
ab_{2,5}? ab__ 0 4
ab_{2,5}? ab_______ 0 7
ab_{2,5}?xy ab______xy -1 -1
ab_{2,5}xy ab_xy -1 -1
; and again for sets:
ab[_,;]*xy abxy_ 0 4
ab[_,;]*xy ab_xy_ 0 5
ab[_,;]*xy abxy 0 4
ab[_,;]*xy ab_xy 0 5
ab[_,;]* ab 0 2
ab[_,;]* ab__ 0 4
ab[_,;]{2,5}xy ab__xy_ 0 6
ab[_,;]{2,5}xy ab____xy_ 0 8
ab[_,;]{2,5}xy ab_____xy_ 0 9
ab[_,;]{2,5}xy ab__xy 0 6
ab[_,;]{2,5}xy ab_____xy 0 9
ab[_,;]{2,5} ab__ 0 4
ab[_,;]{2,5} ab_______ 0 7
ab[_,;]{2,5}xy ab______xy -1 -1
ab[_,;]{2,5}xy ab_xy -1 -1
ab[_,;]*?xy abxy_ 0 4
ab[_,;]*?xy ab_xy_ 0 5
ab[_,;]*?xy abxy 0 4
ab[_,;]*?xy ab_xy 0 5
ab[_,;]*? ab 0 2
ab[_,;]*? ab__ 0 4
ab[_,;]{2,5}?xy ab__xy_ 0 6
ab[_,;]{2,5}?xy ab____xy_ 0 8
ab[_,;]{2,5}?xy ab_____xy_ 0 9
ab[_,;]{2,5}?xy ab__xy 0 6
ab[_,;]{2,5}?xy ab_____xy 0 9
ab[_,;]{2,5}? ab__ 0 4
ab[_,;]{2,5}? ab_______ 0 7
ab[_,;]{2,5}?xy ab______xy -1 -1
ab[_,;]{2,5}xy ab_xy -1 -1
; and again for tricky sets with digraphs:
;ab[_[.ae.]]*xy abxy_ 0 4
;ab[_[.ae.]]*xy ab_xy_ 0 5
;ab[_[.ae.]]*xy abxy 0 4
;ab[_[.ae.]]*xy ab_xy 0 5
;ab[_[.ae.]]* ab 0 2
;ab[_[.ae.]]* ab__ 0 4
;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6
;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8
;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9
;ab[_[.ae.]]{2,5}xy ab__xy 0 6
;ab[_[.ae.]]{2,5}xy ab_____xy 0 9
;ab[_[.ae.]]{2,5} ab__ 0 4
;ab[_[.ae.]]{2,5} ab_______ 0 7
;ab[_[.ae.]]{2,5}xy ab______xy -1 -1
;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
;ab[_[.ae.]]*?xy abxy_ 0 4
;ab[_[.ae.]]*?xy ab_xy_ 0 5
;ab[_[.ae.]]*?xy abxy 0 4
;ab[_[.ae.]]*?xy ab_xy 0 5
;ab[_[.ae.]]*? ab 0 2
;ab[_[.ae.]]*? ab__ 0 2
;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6
;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8
;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9
;ab[_[.ae.]]{2,5}?xy ab__xy 0 6
;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9
;ab[_[.ae.]]{2,5}? ab__ 0 4
;ab[_[.ae.]]{2,5}? ab_______ 0 4
;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1
;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
; new bugs detected in spring 2003:
- normal match_continuous REG_NO_POSIX_TEST
b abc 1 2
() abc 0 0 0 0
^() abc 0 0 0 0
^()+ abc 0 0 0 0
^(){1} abc 0 0 0 0
^(){2} abc 0 0 0 0
^((){2}) abc 0 0 0 0 0 0
() "" 0 0 0 0
()\1 "" 0 0 0 0
()\1 a 0 0 0 0
a()\1b ab 0 2 1 1
a()b\1 ab 0 2 1 1
; subtleties of matching with no sub-expressions marked
- normal match_nosubs REG_NO_POSIX_TEST
a(b?c)+d accd 0 4
(wee|week)(knights|night) weeknights 0 10
.* abc 0 3
a(b|(c))d abd 0 3
a(b|(c))d acd 0 3
a(b*|c|e)d abbd 0 4
a(b*|c|e)d acd 0 3
a(b*|c|e)d ad 0 2
a(b?)c abc 0 3
a(b?)c ac 0 2
a(b+)c abc 0 3
a(b+)c abbbc 0 5
a(b*)c ac 0 2
(a|ab)(bc([de]+)f|cde) abcdef 0 6
a([bc]?)c abc 0 3
a([bc]?)c ac 0 2
a([bc]+)c abc 0 3
a([bc]+)c abcc 0 4
a([bc]+)bc abcbc 0 5
a(bb+|b)b abb 0 3
a(bbb+|bb+|b)b abb 0 3
a(bbb+|bb+|b)b abbb 0 4
a(bbb+|bb+|b)bb abbb 0 4
(.*).* abcdef 0 6
(a*)* bc 0 0
- normal nosubs REG_NO_POSIX_TEST
a(b?c)+d accd 0 4
(wee|week)(knights|night) weeknights 0 10
.* abc 0 3
a(b|(c))d abd 0 3
a(b|(c))d acd 0 3
a(b*|c|e)d abbd 0 4
a(b*|c|e)d acd 0 3
a(b*|c|e)d ad 0 2
a(b?)c abc 0 3
a(b?)c ac 0 2
a(b+)c abc 0 3
a(b+)c abbbc 0 5
a(b*)c ac 0 2
(a|ab)(bc([de]+)f|cde) abcdef 0 6
a([bc]?)c abc 0 3
a([bc]?)c ac 0 2
a([bc]+)c abc 0 3
a([bc]+)c abcc 0 4
a([bc]+)bc abcbc 0 5
a(bb+|b)b abb 0 3
a(bbb+|bb+|b)b abb 0 3
a(bbb+|bb+|b)b abbb 0 4
a(bbb+|bb+|b)bb abbb 0 4
(.*).* abcdef 0 6
(a*)* bc 0 0

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,341 @@
# 2.8.2 Regular Expression General Requirement
2¦4¦bb*¦abbbc¦
2¦2¦bb*¦ababbbc¦
7¦9¦A#*::¦A:A#:qA::qA#::qA##::q¦
1¦5¦A#*::¦A##::A#::qA::qA#:q¦
# 2.8.3.1.2 BRE Special Characters
# GA108
2¦2¦\.¦a.c¦
2¦2¦\[¦a[c¦
2¦2¦\\¦a\c¦
2¦2¦\*¦a*c¦
2¦2¦\^¦a^c¦
2¦2¦\$¦a$c¦
7¦11¦X\*Y\*8¦Y*8X*8X*Y*8¦
# GA109
2¦2¦[.]¦a.c¦
2¦2¦[[]¦a[c¦
-1¦-1¦[[]¦ac¦
2¦2¦[\]¦a\c¦
1¦1¦[\a]¦abc¦
2¦2¦[\.]¦a\.c¦
2¦2¦[\.]¦a.\c¦
2¦2¦[*]¦a*c¦
2¦2¦[$]¦a$c¦
2¦2¦[X*Y8]¦7*8YX¦
# GA110
2¦2¦*¦a*c¦
3¦4¦*a¦*b*a*c¦
1¦5¦**9=¦***9=9¦
# GA111
1¦1¦^*¦*bc¦
-1¦-1¦^*¦a*c¦
-1¦-1¦^*¦^*ab¦
1¦5¦^**9=¦***9=¦
-1¦-1¦^*5<*9¦5<9*5<*9¦
# GA112
2¦3¦\(*b\)¦a*b¦
-1¦-1¦\(*b\)¦ac¦
1¦6¦A\(**9\)=¦A***9=79¦
# GA113(1)
1¦3¦\(^*ab\)¦*ab¦
-1¦-1¦\(^*ab\)¦^*ab¦
-1¦-1¦\(^*b\)¦a*b¦
-1¦-1¦\(^*b\)¦^*b¦
### GA113(2) GNU regex implements GA113(1)
##-1¦-1¦\(^*ab\)¦*ab¦
##-1¦-1¦\(^*ab\)¦^*ab¦
##1¦1¦\(^*b\)¦b¦
##1¦3¦\(^*b\)¦^^b¦
# GA114
1¦3¦a^b¦a^b¦
1¦3¦a\^b¦a^b¦
1¦1¦^^¦^bc¦
2¦2¦\^¦a^c¦
1¦1¦[c^b]¦^abc¦
1¦1¦[\^ab]¦^ab¦
2¦2¦[\^ab]¦c\d¦
-1¦-1¦[^^]¦^¦
1¦3¦\(a^b\)¦a^b¦
1¦3¦\(a\^b\)¦a^b¦
2¦2¦\(\^\)¦a^b¦
# GA115
3¦3¦$$¦ab$¦
-1¦-1¦$$¦$ab¦
2¦3¦$c¦a$c¦
2¦2¦[$]¦a$c¦
1¦2¦\$a¦$a¦
3¦3¦\$$¦ab$¦
2¦6¦A\([34]$[34]\)B¦XA4$3BY¦
# 2.8.3.1.3 Periods in BREs
# GA116
1¦1¦.¦abc¦
-1¦-1¦.ab¦abc¦
1¦3¦ab.¦abc¦
1¦3¦a.b¦a,b¦
-1¦-1¦.......¦PqRs6¦
1¦7¦.......¦PqRs6T8¦
# 2.8.3.2 RE Bracket Expression
# GA118
2¦2¦[abc]¦xbyz¦
-1¦-1¦[abc]¦xyz¦
2¦2¦[abc]¦xbay¦
# GA119
2¦2¦[^a]¦abc¦
4¦4¦[^]cd]¦cd]ef¦
2¦2¦[^abc]¦axyz¦
-1¦-1¦[^abc]¦abc¦
3¦3¦[^[.a.]b]¦abc¦
3¦3¦[^[=a=]b]¦abc¦
2¦2¦[^-ac]¦abcde-¦
2¦2¦[^ac-]¦abcde-¦
3¦3¦[^a-b]¦abcde¦
3¦3¦[^a-bd-e]¦dec¦
2¦2¦[^---]¦-ab¦
16¦16¦[^a-zA-Z0-9]¦pqrstVWXYZ23579#¦
# GA120(1)
3¦3¦[]a]¦cd]ef¦
1¦1¦[]-a]¦a_b¦
3¦3¦[][.-.]-0]¦ab0-]¦
1¦1¦[]^a-z]¦string¦
# GA120(2)
4¦4¦[^]cd]¦cd]ef¦
0¦0¦[^]]*¦]]]]]]]]X¦
0¦0¦[^]]*¦]]]]]]]]¦
9¦9¦[^]]\{1,\}¦]]]]]]]]X¦
-1¦-1¦[^]]\{1,\}¦]]]]]]]]¦
# GA120(3)
3¦3¦[c[.].]d]¦ab]cd¦
2¦8¦[a-z]*[[.].]][A-Z]*¦Abcd]DEFg¦
# GA121
2¦2¦[[.a.]b]¦Abc¦
1¦1¦[[.a.]b]¦aBc¦
-1¦-1¦[[.a.]b]¦ABc¦
3¦3¦[^[.a.]b]¦abc¦
3¦3¦[][.-.]-0]¦ab0-]¦
3¦3¦[A-[.].]c]¦ab]!¦
# GA122
-2¦-2¦[[.ch.]]¦abc¦
-2¦-2¦[[.ab.][.CD.][.EF.]]¦yZabCDEFQ9¦
# GA125
2¦2¦[[=a=]b]¦Abc¦
1¦1¦[[=a=]b]¦aBc¦
-1¦-1¦[[=a=]b]¦ABc¦
3¦3¦[^[=a=]b]¦abc¦
# GA126
#W the expected result for [[:alnum:]]* is 2-7 which is wrong
0¦0¦[[:alnum:]]*¦ aB28gH¦
2¦7¦[[:alnum:]][[:alnum:]]*¦ aB28gH¦
#W the expected result for [^[:alnum:]]* is 2-5 which is wrong
0¦0¦[^[:alnum:]]*¦2 ,
2¦5¦[^[:alnum:]][^[:alnum:]]*¦2 ,
#W the expected result for [[:alpha:]]* is 2-5 which is wrong
0¦0¦[[:alpha:]]*¦ aBgH2¦
2¦5¦[[:alpha:]][[:alpha:]]*¦ aBgH2¦
1¦6¦[^[:alpha:]]*¦2 8,
1¦2¦[[:blank:]]*¦ ¦
1¦8¦[^[:blank:]]*¦aB28gH, ¦
1¦2¦[[:cntrl:]]*¦  ¦
1¦8¦[^[:cntrl:]]*¦aB2 8gh,¦
#W the expected result for [[:digit:]]* is 2-3 which is wrong
0¦0¦[[:digit:]]*¦a28¦
2¦3¦[[:digit:]][[:digit:]]*¦a28¦
1¦8¦[^[:digit:]]*¦aB gH,¦
1¦7¦[[:graph:]]*¦aB28gH, ¦
1¦3¦[^[:graph:]]*¦ 
1¦2¦[[:lower:]]*¦agB¦
1¦8¦[^[:lower:]]*¦B2 8H,
1¦8¦[[:print:]]*¦aB2 8gH, ¦
1¦2¦[^[:print:]]*¦  ¦
#W the expected result for [[:punct:]]* is 2-2 which is wrong
0¦0¦[[:punct:]]*¦a,2¦
2¦3¦[[:punct:]][[:punct:]]*¦a,,2¦
1¦9¦[^[:punct:]]*¦aB2 8gH¦
1¦3¦[[:space:]]*¦ ¦
#W the expected result for [^[:space:]]* is 2-9 which is wrong
0¦0¦[^[:space:]]*¦ aB28gH, ¦
2¦9¦[^[:space:]][^[:space:]]*¦ aB28gH, ¦
#W the expected result for [[:upper:]]* is 2-3 which is wrong
0¦0¦[[:upper:]]*¦aBH2¦
2¦3¦[[:upper:]][[:upper:]]*¦aBH2¦
1¦8¦[^[:upper:]]*¦a2 8g,
#W the expected result for [[:xdigit:]]* is 2-5 which is wrong
0¦0¦[[:xdigit:]]*¦gaB28h¦
2¦5¦[[:xdigit:]][[:xdigit:]]*¦gaB28h¦
#W the expected result for [^[:xdigit:]]* is 2-7 which is wrong
2¦7¦[^[:xdigit:]][^[:xdigit:]]*¦a gH,
# GA127
-2¦-2¦[b-a]¦abc¦
1¦1¦[a-c]¦bbccde¦
2¦2¦[a-b]¦-bc¦
3¦3¦[a-z0-9]¦AB0¦
3¦3¦[^a-b]¦abcde¦
3¦3¦[^a-bd-e]¦dec¦
1¦1¦[]-a]¦a_b¦
2¦2¦[+--]¦a,b¦
2¦2¦[--/]¦a.b¦
2¦2¦[^---]¦-ab¦
3¦3¦[][.-.]-0]¦ab0-]¦
3¦3¦[A-[.].]c]¦ab]!¦
2¦6¦bc[d-w]xy¦abchxyz¦
# GA129
1¦1¦[a-cd-f]¦dbccde¦
-1¦-1¦[a-ce-f]¦dBCCdE¦
2¦4¦b[n-zA-M]Y¦absY9Z¦
2¦4¦b[n-zA-M]Y¦abGY9Z¦
# GA130
3¦3¦[-xy]¦ac-¦
2¦4¦c[-xy]D¦ac-D+¦
2¦2¦[--/]¦a.b¦
2¦4¦c[--/]D¦ac.D+b¦
2¦2¦[^-ac]¦abcde-¦
1¦3¦a[^-ac]c¦abcde-¦
3¦3¦[xy-]¦zc-¦
2¦4¦c[xy-]7¦zc-786¦
2¦2¦[^ac-]¦abcde-¦
2¦4¦a[^ac-]c¦5abcde-¦
2¦2¦[+--]¦a,b¦
2¦4¦a[+--]B¦Xa,By¦
2¦2¦[^---]¦-ab¦
4¦6¦X[^---]Y¦X-YXaYXbY¦
# 2.8.3.3 BREs Matching Multiple Characters
# GA131
3¦4¦cd¦abcdeabcde¦
1¦2¦ag*b¦abcde¦
-1¦-1¦[a-c][e-f]¦abcdef¦
3¦4¦[a-c][e-f]¦acbedf¦
4¦8¦abc*XYZ¦890abXYZ#*¦
4¦9¦abc*XYZ¦890abcXYZ#*¦
4¦15¦abc*XYZ¦890abcccccccXYZ#*¦
-1¦-1¦abc*XYZ¦890abc*XYZ#*¦
# GA132
2¦4¦\(*bc\)¦a*bc¦
1¦2¦\(ab\)¦abcde¦
1¦10¦\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)¦abcdefghijk¦
3¦8¦43\(2\(6\)*0\)AB¦654320ABCD¦
3¦9¦43\(2\(7\)*0\)AB¦6543270ABCD¦
3¦12¦43\(2\(7\)*0\)AB¦6543277770ABCD¦
# GA133
1¦10¦\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)¦abcdefghijk¦
-1¦-1¦\(a\(b\(c\(d\(e\(f\(g\)h\(i\(k\)\)\)\)\)\)\)\)¦abcdefghijk¦
# GA134
2¦4¦\(bb*\)¦abbbc¦
2¦2¦\(bb*\)¦ababbbc¦
1¦6¦a\(.*b\)¦ababbbc¦
1¦2¦a\(b*\)¦ababbbc¦
1¦20¦a\(.*b\)c¦axcaxbbbcsxbbbbbbbbc¦
# GA135
1¦7¦\(a\(b\(c\(d\(e\)\)\)\)\)\4¦abcdededede¦
#W POSIX does not really specify whether a\(b\)*c\1 matches acb.
#W back references are supposed to expand to the last match, but what
#W if there never was a match as in this case?
-1¦-1¦a\(b\)*c\1¦acb¦
1¦11¦\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)\9¦abcdefghijjk¦
# GA136
#W These two tests have the same problem as the test in GA135. No match
#W of a subexpression, why should the back reference be usable?
#W 1 2 a\(b\)*c\1 acb
#W 4 7 a\(b\(c\(d\(f\)*\)\)\)\4¦xYzabcdePQRST
-1¦-1¦a\(b\)*c\1¦acb¦
-1¦-1¦a\(b\(c\(d\(f\)*\)\)\)\4¦xYzabcdePQRST¦
# GA137
-2¦-2¦\(a\(b\)\)\3¦foo¦
-2¦-2¦\(a\(b\)\)\(a\(b\)\)\5¦foo¦
# GA138
1¦2¦ag*b¦abcde¦
1¦10¦a.*b¦abababvbabc¦
2¦5¦b*c¦abbbcdeabbbbbbcde¦
2¦5¦bbb*c¦abbbcdeabbbbbbcde¦
1¦5¦a\(b\)*c\1¦abbcbbb¦
-1¦-1¦a\(b\)*c\1¦abbdbd¦
0¦0¦\([a-c]*\)\1¦abcacdef¦
1¦6¦\([a-c]*\)\1¦abcabcabcd¦
1¦2¦a^*b¦ab¦
1¦5¦a^*b¦a^^^b¦
# GA139
1¦2¦a\{2\}¦aaaa¦
1¦7¦\([a-c]*\)\{0,\}¦aabcaab¦
1¦2¦\(a\)\1\{1,2\}¦aabc¦
1¦3¦\(a\)\1\{1,2\}¦aaaabc¦
#W the expression \(\(a\)\1\)\{1,2\} is ill-formed, using \2
1¦4¦\(\(a\)\2\)\{1,2\}¦aaaabc¦
# GA140
1¦2¦a\{2\}¦aaaa¦
-1¦-1¦a\{2\}¦abcd¦
0¦0¦a\{0\}¦aaaa¦
1¦64¦a\{64\}¦aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa¦
# GA141
1¦7¦\([a-c]*\)\{0,\}¦aabcaab¦
#W the expected result for \([a-c]*\)\{2,\} is failure which isn't correct
1¦3¦\([a-c]*\)\{2,\}¦abcdefg¦
1¦3¦\([a-c]*\)\{1,\}¦abcdefg¦
-1¦-1¦a\{64,\}¦aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa¦
# GA142
1¦3¦a\{2,3\}¦aaaa¦
-1¦-1¦a\{2,3\}¦abcd¦
0¦0¦\([a-c]*\)\{0,0\}¦foo¦
1¦63¦a\{1,63\}¦aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa¦
# 2.8.3.4 BRE Precedence
# GA143
#W There are numerous bugs in the original version.
2¦19¦\^\[[[.].]]\\(\\1\\)\*\\{1,2\\}\$¦a^[]\(\1\)*\{1,2\}$b¦
1¦6¦[[=*=]][[=\=]][[=]=]][[===]][[...]][[:punct:]]¦*\]=.;¦
1¦6¦[$\(*\)^]*¦$\()*^¦
1¦1¦[\1]¦1¦
1¦1¦[\{1,2\}]¦{¦
#W the expected result for \(*\)*\1* is 2-2 which isn't correct
0¦0¦\(*\)*\1*¦a*b*11¦
2¦3¦\(*\)*\1*b¦a*b*11¦
#W the expected result for \(a\(b\{1,2\}\)\{1,2\}\) is 1-5 which isn't correct
1¦3¦\(a\(b\{1,2\}\)\{1,2\}\)¦abbab¦
1¦5¦\(a\(b\{1,2\}\)\)\{1,2\}¦abbab¦
1¦1¦^\(^\(^a$\)$\)$¦a¦
1¦2¦\(a\)\1$¦aa¦
1¦3¦ab*¦abb¦
1¦4¦ab\{2,4\}¦abbbc¦
# 2.8.3.5 BRE Expression Anchoring
# GA144
1¦1¦^a¦abc¦
-1¦-1¦^b¦abc¦
-1¦-1¦^[a-zA-Z]¦99Nine¦
1¦4¦^[a-zA-Z]*¦Nine99¦
# GA145(1)
1¦2¦\(^a\)\1¦aabc¦
-1¦-1¦\(^a\)\1¦^a^abc¦
1¦2¦\(^^a\)¦^a¦
1¦1¦\(^^\)¦^^¦
1¦3¦\(^abc\)¦abcdef¦
-1¦-1¦\(^def\)¦abcdef¦
### GA145(2) GNU regex implements GA145(1)
##-1¦-1¦\(^a\)\1¦aabc¦
##1¦4¦\(^a\)\1¦^a^abc¦
##-1¦-1¦\(^^a\)¦^a¦
##1¦2¦\(^^\)¦^^¦
# GA146
3¦3¦a$¦cba¦
-1¦-1¦a$¦abc¦
5¦7¦[a-z]*$¦99ZZxyz¦
#W the expected result for [a-z]*$ is failure which isn't correct
10¦9¦[a-z]*$¦99ZZxyz99¦
3¦3¦$$¦ab$¦
-1¦-1¦$$¦$ab¦
3¦3¦\$$¦ab$¦
# GA147(1)
-1¦-1¦\(a$\)\1¦bcaa¦
-1¦-1¦\(a$\)\1¦ba$¦
-1¦-1¦\(ab$\)¦ab$¦
1¦2¦\(ab$\)¦ab¦
4¦6¦\(def$\)¦abcdef¦
-1¦-1¦\(abc$\)¦abcdef¦
### GA147(2) GNU regex implements GA147(1)
##-1¦-1¦\(a$\)\1¦bcaa¦
##2¦5¦\(a$\)\1¦ba$a$¦
##-1¦-1¦\(ab$\)¦ab¦
##1¦3¦\(ab$\)¦ab$¦
# GA148
0¦0¦^$¦¦
1¦3¦^abc$¦abc¦
-1¦-1¦^xyz$¦^xyz^¦
-1¦-1¦^234$¦^234$¦
1¦9¦^[a-zA-Z0-9]*$¦2aA3bB9zZ¦
-1¦-1¦^[a-z0-9]*$¦2aA3b#B9zZ¦

View file

@ -0,0 +1,167 @@
0:(.*)*\1:xx
0:^:
0:$:
0:^$:
0:^a$:a
0:abc:abc
1:abc:xbc
1:abc:axc
1:abc:abx
0:abc:xabcy
0:abc:ababc
0:ab*c:abc
0:ab*bc:abc
0:ab*bc:abbc
0:ab*bc:abbbbc
0:ab+bc:abbc
1:ab+bc:abc
1:ab+bc:abq
0:ab+bc:abbbbc
0:ab?bc:abbc
0:ab?bc:abc
1:ab?bc:abbbbc
0:ab?c:abc
0:^abc$:abc
1:^abc$:abcc
0:^abc:abcc
1:^abc$:aabc
0:abc$:aabc
0:^:abc
0:$:abc
0:a.c:abc
0:a.c:axc
0:a.*c:axyzc
1:a.*c:axyzd
1:a[bc]d:abc
0:a[bc]d:abd
1:a[b-d]e:abd
0:a[b-d]e:ace
0:a[b-d]:aac
0:a[-b]:a-
0:a[b-]:a-
2:a[b-a]:-
2:a[]b:-
2:a[:-
0:a]:a]
0:a[]]b:a]b
0:a[^bc]d:aed
1:a[^bc]d:abd
0:a[^-b]c:adc
1:a[^-b]c:a-c
1:a[^]b]c:a]c
0:a[^]b]c:adc
0:ab|cd:abc
0:ab|cd:abcd
0:()ef:def
0:()*:-
2:*a:-
2:^*:-
2:$*:-
2:(*)b:-
1:$b:b
2:a\:-
0:a\(b:a(b
0:a\(*b:ab
0:a\(*b:a((b
1:a\x:a\x
1:abc):-
2:(abc:-
0:((a)):abc
0:(a)b(c):abc
0:a+b+c:aabbabc
0:a**:-
0:a*?:-
0:(a*)*:-
0:(a*)+:-
0:(a|)*:-
0:(a*|b)*:-
0:(a+|b)*:ab
0:(a+|b)+:ab
0:(a+|b)?:ab
0:[^ab]*:cde
0:(^)*:-
0:(ab|)*:-
2:)(:-
1:abc:
1:abc:
0:a*:
0:([abc])*d:abbbcd
0:([abc])*bcd:abcd
0:a|b|c|d|e:e
0:(a|b|c|d|e)f:ef
0:((a*|b))*:-
0:abcd*efg:abcdefg
0:ab*:xabyabbbz
0:ab*:xayabbbz
0:(ab|cd)e:abcde
0:[abhgefdc]ij:hij
1:^(ab|cd)e:abcde
0:(abc|)ef:abcdef
0:(a|b)c*d:abcd
0:(ab|ab*)bc:abc
0:a([bc]*)c*:abc
0:a([bc]*)(c*d):abcd
0:a([bc]+)(c*d):abcd
0:a([bc]*)(c+d):abcd
0:a[bcd]*dcdcde:adcdcde
1:a[bcd]+dcdcde:adcdcde
0:(ab|a)b*c:abc
0:((a)(b)c)(d):abcd
0:[A-Za-z_][A-Za-z0-9_]*:alpha
0:^a(bc+|b[eh])g|.h$:abh
0:(bc+d$|ef*g.|h?i(j|k)):effgz
0:(bc+d$|ef*g.|h?i(j|k)):ij
1:(bc+d$|ef*g.|h?i(j|k)):effg
1:(bc+d$|ef*g.|h?i(j|k)):bcdd
0:(bc+d$|ef*g.|h?i(j|k)):reffgz
1:((((((((((a)))))))))):-
0:(((((((((a))))))))):a
1:multiple words of text:uh-uh
0:multiple words:multiple words, yeah
0:(.*)c(.*):abcde
1:\((.*),:(.*)\)
1:[k]:ab
0:abcd:abcd
0:a(bc)d:abcd
0:a[-]?c:ac
0:(....).*\1:beriberi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Qaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mo'ammar Gadhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Kaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Qadhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moammar El Kadhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Gadafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar al-Qadafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moamer El Kazzafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moamar al-Gaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar Al Qathafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Al Qathafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mo'ammar el-Gadhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moamar El Kadhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar al-Qadhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar al-Qadhdhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar Qadafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moamar Gaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar Qadhdhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Khaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar al-Khaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'amar al-Kadafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Ghaddafy
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Ghadafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Ghaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muamar Kaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Quathafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muammar Gheddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Muamar Al-Kaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moammar Khadafy
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Moammar Qudhafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mu'ammar al-Qaddafi
0:M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]:Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi
0:[[:digit:]]+:01234
1:[[:alpha:]]+:01234
0:^[[:digit:]]*$:01234
1:^[[:digit:]]*$:01234a
0:^[[:alnum:]]*$:01234a
0:^[[:xdigit:]]*$:01234a
1:^[[:xdigit:]]*$:01234g
0:^[[:alnum:][:space:]]*$:Hello world