--- embedaddon/pcre/testdata/testinput10 2012/02/21 23:05:52 1.1 +++ embedaddon/pcre/testdata/testinput10 2014/06/15 19:46:05 1.1.1.5 @@ -1,137 +1,1419 @@ -/-- These are a few representative patterns whose lengths and offsets are to be -shown when the link size is 2. This is just a doublecheck test to ensure the -sizes don't go horribly wrong when something is changed. The pattern contents -are all themselves checked in other tests. Unicode, including property support, -is required for these tests. --/ +/-- This set of tests check Unicode property support with the DFA matching + functionality of pcre_dfa_exec(). The -dfa flag must be used with pcretest + when running it. --/ -/((?i)b)/BM +/\pL\P{Nd}/8 + AB + *** Failers + A0 + 00 -/(?s)(.*X|^B)/BM +/\X./8 + AB + A\x{300}BC + A\x{300}\x{301}\x{302}BC + *** Failers + \x{300} -/(?s:.*X|^B)/BM +/\X\X/8 + ABC + A\x{300}B\x{300}\x{301}C + A\x{300}\x{301}\x{302}BC + *** Failers + \x{300} -/^[[:alnum:]]/BM +/^\pL+/8 + abcd + a + *** Failers -/#/IxMD +/^\PL+/8 + 1234 + = + *** Failers + abcd -/a#/IxMD +/^\X+/8 + abcdA\x{300}\x{301}\x{302} + A\x{300}\x{301}\x{302} + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302} + a + *** Failers + \x{300}\x{301}\x{302} -/x?+/BM +/\X?abc/8 + abc + A\x{300}abc + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + \x{300}abc + *** Failers -/x++/BM +/^\X?abc/8 + abc + A\x{300}abc + *** Failers + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + \x{300}abc -/x{1,3}+/BM +/\X*abc/8 + abc + A\x{300}abc + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + \x{300}abc + *** Failers -/(x)*+/BM +/^\X*abc/8 + abc + A\x{300}abc + A\x{300}\x{301}\x{302}A\x{300}A\x{300}A\x{300}abcxyz + *** Failers + \x{300}abc -/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/BM +/^\pL?=./8 + A=b + =c + *** Failers + 1=2 + AAAA=b -|8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM +/^\pL*=./8 + AAAA=b + =c + *** Failers + 1=2 -|\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM +/^\X{2,3}X/8 + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X + *** Failers + X + A\x{300}\x{301}\x{302}X + A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}A\x{300}\x{301}\x{302}X -/(a(?1)b)/BM +/^\pC\pL\pM\pN\pP\pS\pZb|c)d(?Pe)/BM +/\p{^Lu}/8i + 1234 + ** Failers + ABC -/(?:a(?Pc(?Pd)))(?Pa)/BM +/\P{Lu}/8i + 1234 + ** Failers + ABC -/(?Pa)...(?P=a)bbb(?P>a)d/BM +/(?<=A\p{Nd})XYZ/8 + A2XYZ + 123A5XYZPQR + ABA\x{660}XYZpqr + ** Failers + AXYZ + XYZ + +/(?\p{Xsp}/8 + >\x{1680}\x{2028}\x{0b} + ** Failers + \x{0b} + +/^>\p{Xsp}+/8O + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}*/8O + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}/8O + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xsp}]/8O + >\x{2028}\x{0b} + +/^>[\p{Xsp}]+/8O + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}/8 + >\x{1680}\x{2028}\x{0b} + >\x{a0} + ** Failers + \x{0b} + +/^>\p{Xps}+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}+?/8 + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}*/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}?/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xps}]/8 + >\x{2028}\x{0b} + +/^>[\p{Xps}]+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^\p{Xwd}/8 + ABCD + 1234 + \x{6ca} + \x{a6c} + \x{10a7} + _ABC + ** Failers + [] + +/^\p{Xwd}+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}*/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}{2,9}/8 + A_12\x{6ca}\x{a6c}\x{10a7} + +/^[\p{Xwd}]/8 + ABCD1234_ + 1234abcd_ + \x{6ca} + \x{a6c} + \x{10a7} + _ABC + ** Failers + [] + +/^[\p{Xwd}]+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/-- Unicode properties for \b abd \B --/ + +/\b...\B/8W + abc_ + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/ + +/\b...\B/8 + abc_ + ** Failers + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/ + +/\b...\B/W + abc_ + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +/-- Caseless single negated characters > 127 need UCP support --/ + +/[^\x{100}]/8i + \x{100}\x{101}X + +/[^\x{100}]+/8i + \x{100}\x{101}XX + +/^\X/8 + A\P + A\P\P + A\x{300}\x{301}\P + A\x{300}\x{301}\P\P + A\x{301}\P + A\x{301}\P\P + +/^\X{2,3}/8 + A\P + A\P\P + AA\P + AA\P\P + A\x{300}\x{301}\P + A\x{300}\x{301}\P\P + A\x{300}\x{301}A\x{300}\x{301}\P + A\x{300}\x{301}A\x{300}\x{301}\P\P + +/^\X{2}/8 + AA\P + AA\P\P + A\x{300}\x{301}A\x{300}\x{301}\P + A\x{300}\x{301}A\x{300}\x{301}\P\P + +/^\X+/8 + AA\P + AA\P\P + +/^\X+?Z/8 + AA\P + AA\P\P + +/-- These are tests for extended grapheme clusters --/ + +/^\X/8+ + G\x{34e}\x{34e}X + \x{34e}\x{34e}X + \x04X + \x{1100}X + \x{1100}\x{34e}X + \x{1b04}\x{1b04}X + *These match up to the roman letters + \x{1111}\x{1111}L,L + \x{1111}\x{1111}\x{1169}L,L,V + \x{1111}\x{ae4c}L, LV + \x{1111}\x{ad89}L, LVT + \x{1111}\x{ae4c}\x{1169}L, LV, V + \x{1111}\x{ae4c}\x{1169}\x{1169}L, LV, V, V + \x{1111}\x{ae4c}\x{1169}\x{11fe}L, LV, V, T + \x{1111}\x{ad89}\x{11fe}L, LVT, T + \x{1111}\x{ad89}\x{11fe}\x{11fe}L, LVT, T, T + \x{ad89}\x{11fe}\x{11fe}LVT, T, T + *These match just the first codepoint (invalid sequence) + \x{1111}\x{11fe}L, T + \x{ae4c}\x{1111}LV, L + \x{ae4c}\x{ae4c}LV, LV + \x{ae4c}\x{ad89}LV, LVT + \x{1169}\x{1111}V, L + \x{1169}\x{ae4c}V, LV + \x{1169}\x{ad89}V, LVT + \x{ad89}\x{1111}LVT, L + \x{ad89}\x{1169}LVT, V + \x{ad89}\x{ae4c}LVT, LV + \x{ad89}\x{ad89}LVT, LVT + \x{11fe}\x{1111}T, L + \x{11fe}\x{1169}T, V + \x{11fe}\x{ae4c}T, LV + \x{11fe}\x{ad89}T, LVT + *Test extend and spacing mark + \x{1111}\x{ae4c}\x{0711}L, LV, extend + \x{1111}\x{ae4c}\x{1b04}L, LV, spacing mark + \x{1111}\x{ae4c}\x{1b04}\x{0711}\x{1b04}L, LV, spacing mark, extend, spacing mark + *Test CR, LF, and control + \x0d\x{0711}CR, extend + \x0d\x{1b04}CR, spacingmark + \x0a\x{0711}LF, extend + \x0a\x{1b04}LF, spacingmark + \x0b\x{0711}Control, extend + \x09\x{1b04}Control, spacingmark + *There are no Prepend characters, so we can't test Prepend, CR + +/^(?>\X{2})X/8+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + +/^\X{2,4}X/8+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + +/^\X{2,4}?X/8+ + \x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + \x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}\x{1111}\x{ae4c}X + +/-- --/ + +/\x{1e9e}+/8i + \x{1e9e}\x{00df} + +/[z\x{1e9e}]+/8i + \x{1e9e}\x{00df} + +/\x{00df}+/8i + \x{1e9e}\x{00df} + +/[z\x{00df}]+/8i + \x{1e9e}\x{00df} + +/\x{1f88}+/8i + \x{1f88}\x{1f80} + +/[z\x{1f88}]+/8i + \x{1f88}\x{1f80} + +/-- Perl matches these --/ + +/\x{00b5}+/8i + \x{00b5}\x{039c}\x{03bc} + +/\x{039c}+/8i + \x{00b5}\x{039c}\x{03bc} + +/\x{03bc}+/8i + \x{00b5}\x{039c}\x{03bc} + + +/\x{00c5}+/8i + \x{00c5}\x{00e5}\x{212b} + +/\x{00e5}+/8i + \x{00c5}\x{00e5}\x{212b} + +/\x{212b}+/8i + \x{00c5}\x{00e5}\x{212b} + + +/\x{01c4}+/8i + \x{01c4}\x{01c5}\x{01c6} + +/\x{01c5}+/8i + \x{01c4}\x{01c5}\x{01c6} + +/\x{01c6}+/8i + \x{01c4}\x{01c5}\x{01c6} + + +/\x{01c7}+/8i + \x{01c7}\x{01c8}\x{01c9} + +/\x{01c8}+/8i + \x{01c7}\x{01c8}\x{01c9} + +/\x{01c9}+/8i + \x{01c7}\x{01c8}\x{01c9} + + +/\x{01ca}+/8i + \x{01ca}\x{01cb}\x{01cc} + +/\x{01cb}+/8i + \x{01ca}\x{01cb}\x{01cc} + +/\x{01cc}+/8i + \x{01ca}\x{01cb}\x{01cc} + + +/\x{01f1}+/8i + \x{01f1}\x{01f2}\x{01f3} + +/\x{01f2}+/8i + \x{01f1}\x{01f2}\x{01f3} + +/\x{01f3}+/8i + \x{01f1}\x{01f2}\x{01f3} + + +/\x{0345}+/8i + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{0399}+/8i + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{03b9}+/8i + \x{0345}\x{0399}\x{03b9}\x{1fbe} + +/\x{1fbe}+/8i + \x{0345}\x{0399}\x{03b9}\x{1fbe} + + +/\x{0392}+/8i + \x{0392}\x{03b2}\x{03d0} + +/\x{03b2}+/8i + \x{0392}\x{03b2}\x{03d0} + +/\x{03d0}+/8i + \x{0392}\x{03b2}\x{03d0} + + +/\x{0395}+/8i + \x{0395}\x{03b5}\x{03f5} + +/\x{03b5}+/8i + \x{0395}\x{03b5}\x{03f5} + +/\x{03f5}+/8i + \x{0395}\x{03b5}\x{03f5} + + +/\x{0398}+/8i + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{03b8}+/8i + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{03d1}+/8i + \x{0398}\x{03b8}\x{03d1}\x{03f4} + +/\x{03f4}+/8i + \x{0398}\x{03b8}\x{03d1}\x{03f4} + + +/\x{039a}+/8i + \x{039a}\x{03ba}\x{03f0} + +/\x{03ba}+/8i + \x{039a}\x{03ba}\x{03f0} + +/\x{03f0}+/8i + \x{039a}\x{03ba}\x{03f0} + + +/\x{03a0}+/8i + \x{03a0}\x{03c0}\x{03d6} + +/\x{03c0}+/8i + \x{03a0}\x{03c0}\x{03d6} + +/\x{03d6}+/8i + \x{03a0}\x{03c0}\x{03d6} + + +/\x{03a1}+/8i + \x{03a1}\x{03c1}\x{03f1} + +/\x{03c1}+/8i + \x{03a1}\x{03c1}\x{03f1} + +/\x{03f1}+/8i + \x{03a1}\x{03c1}\x{03f1} + + +/\x{03a3}+/8i + \x{03A3}\x{03C2}\x{03C3} + +/\x{03c2}+/8i + \x{03A3}\x{03C2}\x{03C3} + +/\x{03c3}+/8i + \x{03A3}\x{03C2}\x{03C3} + + +/\x{03a6}+/8i + \x{03a6}\x{03c6}\x{03d5} + +/\x{03c6}+/8i + \x{03a6}\x{03c6}\x{03d5} + +/\x{03d5}+/8i + \x{03a6}\x{03c6}\x{03d5} + + +/\x{03c9}+/8i + \x{03c9}\x{03a9}\x{2126} + +/\x{03a9}+/8i + \x{03c9}\x{03a9}\x{2126} + +/\x{2126}+/8i + \x{03c9}\x{03a9}\x{2126} + + +/\x{1e60}+/8i + \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e61}+/8i + \x{1e60}\x{1e61}\x{1e9b} + +/\x{1e9b}+/8i + \x{1e60}\x{1e61}\x{1e9b} + + +/\x{1e9e}+/8i + \x{1e9e}\x{00df} + +/\x{00df}+/8i + \x{1e9e}\x{00df} + + +/\x{1f88}+/8i + \x{1f88}\x{1f80} + +/\x{1f80}+/8i + \x{1f88}\x{1f80} + +/\x{004b}+/8i + \x{004b}\x{006b}\x{212a} + +/\x{006b}+/8i + \x{004b}\x{006b}\x{212a} + +/\x{212a}+/8i + \x{004b}\x{006b}\x{212a} + + +/\x{0053}+/8i + \x{0053}\x{0073}\x{017f} + +/\x{0073}+/8i + \x{0053}\x{0073}\x{017f} + +/\x{017f}+/8i + \x{0053}\x{0073}\x{017f} + +/ist/8i + ikt + +/is+t/8i + iSs\x{17f}t + ikt + +/is+?t/8i + ikt + +/is?t/8i + ikt + +/is{2}t/8i + iskt + +/^\p{Xuc}/8 + $abc + @abc + `abc + \x{1234}abc + ** Failers + abc + +/^\p{Xuc}+/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}+?/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}+?\*/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}++/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}{3,5}/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}{3,5}?/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^[\p{Xuc}]/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^[\p{Xuc}]+/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\P{Xuc}/8 + abc + ** Failers + $abc + @abc + `abc + \x{1234}abc + +/^[\P{Xuc}]/8 + abc + ** Failers + $abc + @abc + `abc + \x{1234}abc + +/^A\s+Z/8W + A\x{2005}Z + A\x{85}\x{180e}\x{2005}Z + +/^A[\s]+Z/8W + A\x{2005}Z + A\x{85}\x{180e}\x{2005}Z + +/-- End of testinput10 --/