/-- These tests are for the Perl >= 5.10 features that PCRE supports. --/ /\H\h\V\v/ X X\x0a 0: X X\x0a X\x09X\x0b 0: X\x09X\x0b ** Failers No match \xa0 X\x0a No match /\H*\h+\V?\v{3,4}/ \x09\x20\xa0X\x0a\x0b\x0c\x0d\x0a 0: \x09 \xa0X\x0a\x0b\x0c\x0d \x09\x20\xa0\x0a\x0b\x0c\x0d\x0a 0: \x09 \xa0\x0a\x0b\x0c\x0d \x09\x20\xa0\x0a\x0b\x0c 0: \x09 \xa0\x0a\x0b\x0c ** Failers No match \x09\x20\xa0\x0a\x0b No match /\H{3,4}/ XY ABCDE 0: ABCD XY PQR ST 0: PQR /.\h{3,4}./ XY AB PQRS 0: B P /\h*X\h?\H+Y\H?Z/ >XNNNYZ 0: XNNNYZ > X NYQZ 0: X NYQZ ** Failers No match >XYZ No match > X NY Z No match /\v*X\v?Y\v+Z\V*\x0a\V+\x0b\V{2,3}\x0c/ >XY\x0aZ\x0aA\x0bNN\x0c 0: XY\x0aZ\x0aA\x0bNN\x0c >\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c 0: \x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c /(foo)\Kbar/ foobar 0: bar 1: foo /(foo)(\Kbar|baz)/ foobar 0: bar 1: foo 2: bar foobaz 0: foobaz 1: foo 2: baz /(foo\Kbar)baz/ foobarbaz 0: barbaz 1: foobar /abc\K|def\K/g+ Xabcdefghi 0: 0+ defghi 0: 0+ ghi /ab\Kc|de\Kf/g+ Xabcdefghi 0: c 0+ defghi 0: f 0+ ghi /(?=C)/g+ ABCDECBA 0: 0+ CDECBA 0: 0+ CBA /^abc\K/+ abcdef 0: 0+ def ** Failers No match defabcxyz No match /^(a(b))\1\g1\g{1}\g-1\g{-1}\g{-02}Z/ ababababbbabZXXXX 0: ababababbbabZ 1: ab 2: b /(?tom|bon)-\g{A}/ tom-tom 0: tom-tom 1: tom bon-bon 0: bon-bon 1: bon /(^(a|b\g{-1}))/ bacxxx No match /(?|(abc)|(xyz))\1/ abcabc 0: abcabc 1: abc xyzxyz 0: xyzxyz 1: xyz ** Failers No match abcxyz No match xyzabc No match /(?|(abc)|(xyz))(?1)/ abcabc 0: abcabc 1: abc xyzabc 0: xyzabc 1: xyz ** Failers No match xyzxyz No match /^X(?5)(a)(?|(b)|(q))(c)(d)(Y)/ XYabcdY 0: XYabcdY 1: a 2: b 3: c 4: d 5: Y /^X(?7)(a)(?|(b|(r)(s))|(q))(c)(d)(Y)/ XYabcdY 0: XYabcdY 1: a 2: b 3: 4: 5: c 6: d 7: Y /^X(?7)(a)(?|(b|(?|(r)|(t))(s))|(q))(c)(d)(Y)/ XYabcdY 0: XYabcdY 1: a 2: b 3: 4: 5: c 6: d 7: Y /(?'abc'\w+):\k{2}/ a:aaxyz 0: a:aa 1: a ab:ababxyz 0: ab:abab 1: ab ** Failers No match a:axyz No match ab:abxyz No match /(?'abc'\w+):\g{abc}{2}/ a:aaxyz 0: a:aa 1: a ab:ababxyz 0: ab:abab 1: ab ** Failers No match a:axyz No match ab:abxyz No match /^(?a)? (?()b|c) (?('ab')d|e)/x abd 0: abd 1: a ce 0: ce /^(a.)\g-1Z/ aXaXZ 0: aXaXZ 1: aX /^(a.)\g{-1}Z/ aXaXZ 0: aXaXZ 1: aX /^(?(DEFINE) (? a) (? b) ) (?&A) (?&B) /x abcd 0: ab /(?(?&NAME_PAT))\s+(?(?&ADDRESS_PAT)) (?(DEFINE) (?[a-z]+) (?\d+) )/x metcalfe 33 0: metcalfe 33 1: metcalfe 2: 33 /(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))\b(?&byte)(\.(?&byte)){3}/ 1.2.3.4 0: 1.2.3.4 1: 2: .4 131.111.10.206 0: 131.111.10.206 1: 2: .206 10.0.0.0 0: 10.0.0.0 1: 2: .0 ** Failers No match 10.6 No match 455.3.4.5 No match /\b(?&byte)(\.(?&byte)){3}(?(DEFINE)(?2[0-4]\d|25[0-5]|1\d\d|[1-9]?\d))/ 1.2.3.4 0: 1.2.3.4 1: .4 131.111.10.206 0: 131.111.10.206 1: .206 10.0.0.0 0: 10.0.0.0 1: .0 ** Failers No match 10.6 No match 455.3.4.5 No match /^(\w++|\s++)*$/ now is the time for all good men to come to the aid of the party 0: now is the time for all good men to come to the aid of the party 1: party *** Failers No match this is not a line with only words and spaces! No match /(\d++)(\w)/ 12345a 0: 12345a 1: 12345 2: a *** Failers No match 12345+ No match /a++b/ aaab 0: aaab /(a++b)/ aaab 0: aaab 1: aaab /(a++)b/ aaab 0: aaab 1: aaa /([^()]++|\([^()]*\))+/ ((abc(ade)ufh()()x 0: abc(ade)ufh()()x 1: x /\(([^()]++|\([^()]+\))+\)/ (abc) 0: (abc) 1: abc (abc(def)xyz) 0: (abc(def)xyz) 1: xyz *** Failers No match ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa No match /^([^()]|\((?1)*\))*$/ abc 0: abc 1: c a(b)c 0: a(b)c 1: c a(b(c))d 0: a(b(c))d 1: d *** Failers) No match a(b(c)d No match /^>abc>([^()]|\((?1)*\))*abc>123abc>123abc>1(2)3abc>1(2)3abc>(1(2)3)abc>(1(2)3) 2: 3: Satanoscillatemymetallicsonatas 4: S AmanaplanacanalPanama 0: AmanaplanacanalPanama 1: 2: 3: AmanaplanacanalPanama 4: A AblewasIereIsawElba 0: AblewasIereIsawElba 1: 2: 3: AblewasIereIsawElba 4: A *** Failers No match Thequickbrownfox No match /^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/ 12 0: 12 1: 12 (((2+2)*-3)-7) 0: (((2+2)*-3)-7) 1: (((2+2)*-3)-7) 2: - -12 0: -12 1: -12 *** Failers No match ((2+2)*-3)-7) No match /^(x(y|(?1){2})z)/ xyz 0: xyz 1: xyz 2: y xxyzxyzz 0: xxyzxyzz 1: xxyzxyzz 2: xyzxyz *** Failers No match xxyzz No match xxyzxyzxyzz No match /((< (?: (?(R) \d++ | [^<>]*+) | (?2)) * >))/x <> 0: <> 1: <> 2: <> 0: 1: 2: hij> 0: hij> 1: hij> 2: hij> hij> 0: 1: 2: def> 0: def> 1: def> 2: def> 0: <> 1: <> 2: <> *** Failers No match 2: 3: Satan, oscillate my metallic sonatas 4: S A man, a plan, a canal: Panama! 0: A man, a plan, a canal: Panama! 1: 2: 3: A man, a plan, a canal: Panama 4: A Able was I ere I saw Elba. 0: Able was I ere I saw Elba. 1: 2: 3: Able was I ere I saw Elba 4: A *** Failers No match The quick brown fox No match /^((.)(?1)\2|.)$/ a 0: a 1: a aba 0: aba 1: aba 2: a aabaa 0: aabaa 1: aabaa 2: a abcdcba 0: abcdcba 1: abcdcba 2: a pqaabaaqp 0: pqaabaaqp 1: pqaabaaqp 2: p ablewasiereisawelba 0: ablewasiereisawelba 1: ablewasiereisawelba 2: a rhubarb No match the quick brown fox No match /(a)(?<=b(?1))/ baz 0: a 1: a ** Failers No match caz No match /(?<=b(?1))(a)/ zbaaz 0: a 1: a ** Failers No match aaa No match /(?a)(?<=b(?&X))/ baz 0: a 1: a /^(?|(abc)|(def))\1/ abcabc 0: abcabc 1: abc defdef 0: defdef 1: def ** Failers No match abcdef No match defabc No match /^(?|(abc)|(def))(?1)/ abcabc 0: abcabc 1: abc defabc 0: defabc 1: def ** Failers No match defdef No match abcdef No match /(?:a(? (?')|(?")) |b(? (?')|(?")) ) (?('quote')[a-z]+|[0-9]+)/xJ a\"aaaaa 0: a"aaaaa 1: " 2: 3: " b\"aaaaa 0: b"aaaaa 1: 2: 3: 4: " 5: 6: " ** Failers No match b\"11111 No match /(?:(?1)|B)(A(*F)|C)/ ABCD 0: BC 1: C CCD 0: CC 1: C ** Failers No match CAD No match /^(?:(?1)|B)(A(*F)|C)/ CCD 0: CC 1: C BCD 0: BC 1: C ** Failers No match ABCD No match CAD No match BAD No match /(?:(?1)|B)(A(*ACCEPT)XX|C)D/ AAD 0: AA 1: A ACD 0: ACD 1: C BAD 0: BA 1: A BCD 0: BCD 1: C BAX 0: BA 1: A ** Failers No match ACX No match ABC No match /(?(DEFINE)(A))B(?1)C/ BAC 0: BAC /(?(DEFINE)((A)\2))B(?1)C/ BAAC 0: BAAC /(? \( ( [^()]++ | (?&pn) )* \) )/x (ab(cd)ef) 0: (ab(cd)ef) 1: (ab(cd)ef) 2: ef /^(?!a(*SKIP)b)/ ac 0: /^(?=a(*SKIP)b|ac)/ ** Failers No match ac No match /^(?=a(*THEN)b|ac)/ ac 0: /^(?=a(*PRUNE)b)/ ab 0: ** Failers No match ac No match /^(?=a(*ACCEPT)b)/ ac 0: /^(?(?!a(*SKIP)b))/ ac 0: /(?>a\Kb)/ ab 0: b /((?>a\Kb))/ ab 0: b 1: ab /(a\Kb)/ ab 0: b 1: ab /^a\Kcz|ac/ ac 0: ac /(?>a\Kbz|ab)/ ab 0: ab /^(?&t)(?(DEFINE)(?a\Kb))$/ ab 0: b /^([^()]|\((?1)*\))*$/ a(b)c 0: a(b)c 1: c a(b(c)d)e 0: a(b(c)d)e 1: e /(?P(?P0)(?P>L1)|(?P>L2))/ 0 0: 0 1: 0 00 0: 00 1: 00 2: 0 0000 0: 0000 1: 0000 2: 0 /(?P(?P0)|(?P>L2)(?P>L1))/ 0 0: 0 1: 0 2: 0 00 0: 0 1: 0 2: 0 0000 0: 0 1: 0 2: 0 /--- This one does fail, as expected, in Perl. It needs the complex item at the end of the pattern. A single letter instead of (B|D) makes it not fail, which I think is a Perl bug. --- / /A(*COMMIT)(B|D)/ ACABX No match /--- Check the use of names for failure ---/ /^(A(*PRUNE:A)B|C(*PRUNE:B)D)/K ** Failers No match AC No match, mark = A CB No match, mark = B /--- Force no study, otherwise mark is not seen. The studied version is in test 2 because it isn't Perl-compatible. ---/ /(*MARK:A)(*SKIP:B)(C|X)/KSS C 0: C 1: C MK: A D No match, mark = A /^(A(*THEN:A)B|C(*THEN:B)D)/K ** Failers No match CB No match, mark = B /^(?:A(*THEN:A)B|C(*THEN:B)D)/K CB No match, mark = B /^(?>A(*THEN:A)B|C(*THEN:B)D)/K CB No match, mark = B /--- This should succeed, as the skip causes bump to offset 1 (the mark). Note that we have to have something complicated such as (B|Z) at the end because, for Perl, a simple character somehow causes an unwanted optimization to mess with the handling of backtracking verbs. ---/ /A(*MARK:A)A+(*SKIP:A)(B|Z) | AC/xK AAAC 0: AC /--- Test skipping over a non-matching mark. ---/ /A(*MARK:A)A+(*MARK:B)(*SKIP:A)(B|Z) | AC/xK AAAC 0: AC /--- Check shorthand for MARK ---/ /A(*:A)A+(*SKIP:A)(B|Z) | AC/xK AAAC 0: AC /--- Don't loop! Force no study, otherwise mark is not seen. ---/ /(*:A)A+(*SKIP:A)(B|Z)/KSS AAAC No match, mark = A /--- This should succeed, as a non-existent skip name disables the skip ---/ /A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/xK AAAC 0: AC /A(*MARK:A)A+(*SKIP:B)(B|Z) | AC(*:B)/xK AAAC 0: AC MK: B /--- COMMIT at the start of a pattern should act like an anchor. Again, however, we need the complication for Perl. ---/ /(*COMMIT)(A|P)(B|P)(C|P)/ ABCDEFG 0: ABC 1: A 2: B 3: C ** Failers No match DEFGABC No match /--- COMMIT inside an atomic group can't stop backtracking over the group. ---/ /(\w+)(?>b(*COMMIT))\w{2}/ abbb 0: abbb 1: a /(\w+)b(*COMMIT)\w{2}/ abbb No match /--- Check opening parens in comment when seeking forward reference. ---/ /(?&t)(?#()(?(DEFINE)(?a))/ bac 0: a /--- COMMIT should override THEN ---/ /(?>(*COMMIT)(?>yes|no)(*THEN)(*F))?/ yes No match /(?>(*COMMIT)(yes|no)(*THEN)(*F))?/ yes No match /b?(*SKIP)c/ bc 0: bc abc 0: bc /(*SKIP)bc/ a No match /(*SKIP)b/ a No match /(?P(?P=abn)xxx|)+/ xxx 0: 1: /(?i:([^b]))(?1)/ aa 0: aa 1: a aA 0: aA 1: a ** Failers 0: ** 1: * ab No match aB No match Ba No match ba No match /^(?&t)*+(?(DEFINE)(?a))\w$/ aaaaaaX 0: aaaaaaX ** Failers No match aaaaaa No match /^(?&t)*(?(DEFINE)(?a))\w$/ aaaaaaX 0: aaaaaaX aaaaaa 0: aaaaaa /^(a)*+(\w)/ aaaaX 0: aaaaX 1: a 2: X YZ 0: Y 1: 2: Y ** Failers No match aaaa No match /^(?:a)*+(\w)/ aaaaX 0: aaaaX 1: X YZ 0: Y 1: Y ** Failers No match aaaa No match /^(a)++(\w)/ aaaaX 0: aaaaX 1: a 2: X ** Failers No match aaaa No match YZ No match /^(?:a)++(\w)/ aaaaX 0: aaaaX 1: X ** Failers No match aaaa No match YZ No match /^(a)?+(\w)/ aaaaX 0: aa 1: a 2: a YZ 0: Y 1: 2: Y /^(?:a)?+(\w)/ aaaaX 0: aa 1: a YZ 0: Y 1: Y /^(a){2,}+(\w)/ aaaaX 0: aaaaX 1: a 2: X ** Failers No match aaa No match YZ No match /^(?:a){2,}+(\w)/ aaaaX 0: aaaaX 1: X ** Failers No match aaa No match YZ No match /(a|)*(?1)b/ b 0: b 1: ab 0: ab 1: aab 0: aab 1: /(a)++(?1)b/ ** Failers No match ab No match aab No match /(a)*+(?1)b/ ** Failers No match ab No match aab No match /(?1)(?:(b)){0}/ b 0: b /(foo ( \( ((?:(?> [^()]+ )|(?2))*) \) ) )/x foo(bar(baz)+baz(bop)) 0: foo(bar(baz)+baz(bop)) 1: foo(bar(baz)+baz(bop)) 2: (bar(baz)+baz(bop)) 3: bar(baz)+baz(bop) /(A (A|B(*ACCEPT)|C) D)(E)/x AB 0: AB 1: AB 2: B /\A.*?(?:a|b(*THEN)c)/ ba 0: ba /\A.*?(?:a|bc)/ ba 0: ba /\A.*?(a|b(*THEN)c)/ ba 0: ba 1: a /\A.*?(a|bc)/ ba 0: ba 1: a /\A.*?(?:a|b(*THEN)c)++/ ba 0: ba /\A.*?(?:a|bc)++/ ba 0: ba /\A.*?(a|b(*THEN)c)++/ ba 0: ba 1: a /\A.*?(a|bc)++/ ba 0: ba 1: a /\A.*?(?:a|b(*THEN)c|d)/ ba 0: ba /\A.*?(?:a|bc|d)/ ba 0: ba /(?:(b))++/ beetle 0: b 1: b /(?(?=(a(*ACCEPT)z))a)/ a 0: a 1: a /^(a)(?1)+ab/ aaaab 0: aaaab 1: a /^(a)(?1)++ab/ aaaab No match /^(?=a(*:M))aZ/K aZbc 0: aZ MK: M /^(?!(*:M)b)aZ/K aZbc 0: aZ /(?(DEFINE)(a))?b(?1)/ backgammon 0: ba /^\N+/ abc\ndef 0: abc /^\N{1,}/ abc\ndef 0: abc /(?(R)a+|(?R)b)/ aaaabcde 0: aaaab /(?(R)a+|((?R))b)/ aaaabcde 0: aaaab 1: aaaa /((?(R)a+|(?1)b))/ aaaabcde 0: aaaab 1: aaaab /((?(R1)a+|(?1)b))/ aaaabcde 0: aaaab 1: aaaab /a(*:any name)/K abc 0: a MK: any name /(?>(?&t)c|(?&t))(?(DEFINE)(?a|b(*PRUNE)c))/ a 0: a ba 0: a bba 0: a /--- Checking revised (*THEN) handling ---/ /--- Capture ---/ /^.*? (a(*THEN)b) c/x aabc No match /^.*? (a(*THEN)b|(*F)) c/x aabc 0: aabc 1: ab /^.*? ( (a(*THEN)b) | (*F) ) c/x aabc 0: aabc 1: ab 2: ab /^.*? ( (a(*THEN)b) ) c/x aabc No match /--- Non-capture ---/ /^.*? (?:a(*THEN)b) c/x aabc No match /^.*? (?:a(*THEN)b|(*F)) c/x aabc 0: aabc /^.*? (?: (?:a(*THEN)b) | (*F) ) c/x aabc 0: aabc /^.*? (?: (?:a(*THEN)b) ) c/x aabc No match /--- Atomic ---/ /^.*? (?>a(*THEN)b) c/x aabc No match /^.*? (?>a(*THEN)b|(*F)) c/x aabc 0: aabc /^.*? (?> (?>a(*THEN)b) | (*F) ) c/x aabc 0: aabc /^.*? (?> (?>a(*THEN)b) ) c/x aabc No match /--- Possessive capture ---/ /^.*? (a(*THEN)b)++ c/x aabc No match /^.*? (a(*THEN)b|(*F))++ c/x aabc 0: aabc 1: ab /^.*? ( (a(*THEN)b)++ | (*F) )++ c/x aabc 0: aabc 1: ab 2: ab /^.*? ( (a(*THEN)b)++ )++ c/x aabc No match /--- Possessive non-capture ---/ /^.*? (?:a(*THEN)b)++ c/x aabc No match /^.*? (?:a(*THEN)b|(*F))++ c/x aabc 0: aabc /^.*? (?: (?:a(*THEN)b)++ | (*F) )++ c/x aabc 0: aabc /^.*? (?: (?:a(*THEN)b)++ )++ c/x aabc No match /--- Condition assertion ---/ /^(?(?=a(*THEN)b)ab|ac)/ ac 0: ac /--- Condition ---/ /^.*?(?(?=a)a|b(*THEN)c)/ ba No match /^.*?(?:(?(?=a)a|b(*THEN)c)|d)/ ba 0: ba /^.*?(?(?=a)a(*THEN)b|c)/ ac No match /--- Assertion ---/ /^.*(?=a(*THEN)b)/ aabc 0: a /------------------------------/ /(?>a(*:m))/imsxSK a 0: a MK: m /(?>(a)(*:m))/imsxSK a 0: a 1: a MK: m /(?<=a(*ACCEPT)b)c/ xacd 0: c /(?<=(a(*ACCEPT)b))c/ xacd 0: c 1: a /(?<=(a(*COMMIT)b))c/ xabcd 0: c 1: ab ** Failers No match xacd No match /(?