--- embedaddon/pcre/testdata/testoutput15 2012/02/21 23:50:25 1.1.1.2 +++ embedaddon/pcre/testdata/testoutput15 2014/06/15 19:46:05 1.1.1.5 @@ -1,6 +1,8 @@ -/-- This set of tests is for UTF-8 support, and is relevant only to the 8-bit - library. --/ +/-- This set of tests is for UTF-8 support but not Unicode property support, + and is relevant only to the 8-bit library. --/ +< forbid W + /X(\C{3})/8 X\x{1234} 0: X\x{1234} @@ -72,7 +74,7 @@ Failed: invalid UTF-8 string at offset 0 /ÃÃÃxxx/8 Failed: invalid UTF-8 string at offset 0 -/ÃÃÃxxx/8?DZSS +/ÃÃÃxxx/8?DZSSO ------------------------------------------------------------------ Bra \X{c0}\X{c0}\X{c0}xxx @@ -80,51 +82,129 @@ Failed: invalid UTF-8 string at offset 0 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf no_utf_check +Options: no_auto_possessify utf no_utf_check First char = \x{c3} Need char = 'x' -/abc/8 - Ã] -Error -10 (bad UTF-8 string) offset=0 reason=6 - Ã +/badutf/8 + \xdf Error -10 (bad UTF-8 string) offset=0 reason=1 - ÃÃÃ -Error -10 (bad UTF-8 string) offset=0 reason=6 - ÃÃÃ\? -No match - \xe1\x88 + \xef +Error -10 (bad UTF-8 string) offset=0 reason=2 + \xef\x80 Error -10 (bad UTF-8 string) offset=0 reason=1 - \P\xe1\x88 + \xf7 +Error -10 (bad UTF-8 string) offset=0 reason=3 + \xf7\x80 +Error -10 (bad UTF-8 string) offset=0 reason=2 + \xf7\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=1 - \P\P\xe1\x88 + \xfb +Error -10 (bad UTF-8 string) offset=0 reason=4 + \xfb\x80 +Error -10 (bad UTF-8 string) offset=0 reason=3 + \xfb\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=2 + \xfb\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=1 + \xfd +Error -10 (bad UTF-8 string) offset=0 reason=5 + \xfd\x80 +Error -10 (bad UTF-8 string) offset=0 reason=4 + \xfd\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=3 + \xfd\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=2 + \xfd\x80\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=1 + \xdf\x7f +Error -10 (bad UTF-8 string) offset=0 reason=6 + \xef\x7f\x80 +Error -10 (bad UTF-8 string) offset=0 reason=6 + \xef\x80\x7f +Error -10 (bad UTF-8 string) offset=0 reason=7 + \xf7\x7f\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=6 + \xf7\x80\x7f\x80 +Error -10 (bad UTF-8 string) offset=0 reason=7 + \xf7\x80\x80\x7f +Error -10 (bad UTF-8 string) offset=0 reason=8 + \xfb\x7f\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=6 + \xfb\x80\x7f\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=7 + \xfb\x80\x80\x7f\x80 +Error -10 (bad UTF-8 string) offset=0 reason=8 + \xfb\x80\x80\x80\x7f +Error -10 (bad UTF-8 string) offset=0 reason=9 + \xfd\x7f\x80\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=6 + \xfd\x80\x7f\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=7 + \xfd\x80\x80\x7f\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=8 + \xfd\x80\x80\x80\x7f\x80 +Error -10 (bad UTF-8 string) offset=0 reason=9 + \xfd\x80\x80\x80\x80\x7f +Error -10 (bad UTF-8 string) offset=0 reason=10 + \xed\xa0\x80 +Error -10 (bad UTF-8 string) offset=0 reason=14 + \xc0\x8f +Error -10 (bad UTF-8 string) offset=0 reason=15 + \xe0\x80\x8f +Error -10 (bad UTF-8 string) offset=0 reason=16 + \xf0\x80\x80\x8f +Error -10 (bad UTF-8 string) offset=0 reason=17 + \xf8\x80\x80\x80\x8f +Error -10 (bad UTF-8 string) offset=0 reason=18 + \xfc\x80\x80\x80\x80\x8f +Error -10 (bad UTF-8 string) offset=0 reason=19 + \x80 +Error -10 (bad UTF-8 string) offset=0 reason=20 + \xfe +Error -10 (bad UTF-8 string) offset=0 reason=21 + \xff +Error -10 (bad UTF-8 string) offset=0 reason=21 + +/badutf/8 + \xfb\x80\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=11 + \xfd\x80\x80\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=12 + \xf7\xbf\xbf\xbf +Error -10 (bad UTF-8 string) offset=0 reason=13 + +/shortutf/8 + \P\P\xdf Error -25 (short UTF-8 string) offset=0 reason=1 - XX\xea -Error -10 (bad UTF-8 string) offset=2 reason=2 - \O0XX\xea -Error -10 (bad UTF-8 string) - \O1XX\xea -Error -10 (bad UTF-8 string) - \O2XX\xea -Error -10 (bad UTF-8 string) offset=2 reason=2 - XX\xf1 -Error -10 (bad UTF-8 string) offset=2 reason=3 - XX\xf8 -Error -10 (bad UTF-8 string) offset=2 reason=4 - XX\xfc -Error -10 (bad UTF-8 string) offset=2 reason=5 - ZZ\xea\xaf\x20YY -Error -10 (bad UTF-8 string) offset=2 reason=7 - ZZ\xfd\xbf\xbf\x2f\xbf\xbfYY -Error -10 (bad UTF-8 string) offset=2 reason=8 - ZZ\xfd\xbf\xbf\xbf\x2f\xbfYY -Error -10 (bad UTF-8 string) offset=2 reason=9 - ZZ\xfd\xbf\xbf\xbf\xbf\x2fYY -Error -10 (bad UTF-8 string) offset=2 reason=10 - ZZ\xffYY -Error -10 (bad UTF-8 string) offset=2 reason=21 - ZZ\xfeYY -Error -10 (bad UTF-8 string) offset=2 reason=21 + \P\P\xef +Error -25 (short UTF-8 string) offset=0 reason=2 + \P\P\xef\x80 +Error -25 (short UTF-8 string) offset=0 reason=1 + \P\P\xf7 +Error -25 (short UTF-8 string) offset=0 reason=3 + \P\P\xf7\x80 +Error -25 (short UTF-8 string) offset=0 reason=2 + \P\P\xf7\x80\x80 +Error -25 (short UTF-8 string) offset=0 reason=1 + \P\P\xfb +Error -25 (short UTF-8 string) offset=0 reason=4 + \P\P\xfb\x80 +Error -25 (short UTF-8 string) offset=0 reason=3 + \P\P\xfb\x80\x80 +Error -25 (short UTF-8 string) offset=0 reason=2 + \P\P\xfb\x80\x80\x80 +Error -25 (short UTF-8 string) offset=0 reason=1 + \P\P\xfd +Error -25 (short UTF-8 string) offset=0 reason=5 + \P\P\xfd\x80 +Error -25 (short UTF-8 string) offset=0 reason=4 + \P\P\xfd\x80\x80 +Error -25 (short UTF-8 string) offset=0 reason=3 + \P\P\xfd\x80\x80\x80 +Error -25 (short UTF-8 string) offset=0 reason=2 + \P\P\xfd\x80\x80\x80\x80 +Error -25 (short UTF-8 string) offset=0 reason=1 /anything/8 \xc0\x80 @@ -430,7 +510,7 @@ No match ------------------------------------------------------------------ Bra \x{100}{3} - \x{100}? + \x{100}?+ Ket End ------------------------------------------------------------------ @@ -447,7 +527,7 @@ No set of starting bytes ------------------------------------------------------------------ Bra CBra 1 - \x{100}+ + \x{100}++ Alt x Ket @@ -484,7 +564,7 @@ Starting byte set: a x \xc4 ------------------------------------------------------------------ Bra CBra 1 - \x{100}{0,2} + \x{100}{0,2}+ a Alt x @@ -504,7 +584,7 @@ Starting byte set: a x \xc4 Bra CBra 1 \x{100} - \x{100}{0,1} + \x{100}{0,1}+ a Alt x @@ -535,7 +615,7 @@ Need char = \x{80} ------------------------------------------------------------------ Bra a\x{100} - \x{101}* + \x{101}*+ Ket End ------------------------------------------------------------------ @@ -548,7 +628,7 @@ Need char = \x{80} ------------------------------------------------------------------ Bra a\x{100} - \x{101}+ + \x{101}++ Ket End ------------------------------------------------------------------ @@ -560,7 +640,7 @@ Need char = \x{81} /[^\x{c4}]/DZ ------------------------------------------------------------------ Bra - [^\xc4] + [^\x{c4}] Ket End ------------------------------------------------------------------ @@ -606,7 +686,7 @@ Need char = \x{bf} /[^\xff]/8DZ ------------------------------------------------------------------ Bra - [\x00-\xfe] (neg) + [^\x{ff}] Ket End ------------------------------------------------------------------ @@ -677,8 +757,6 @@ Failed: missing terminating ] for character class at o /-- This tests the stricter UTF-8 check according to RFC 3629. --/ /X/8 - \x{0}\x{d7ff}\x{e000}\x{10ffff} -No match \x{d800} Error -10 (bad UTF-8 string) offset=0 reason=14 \x{d800}\? @@ -708,7 +786,7 @@ No match abcd\x{1234}pqr 0: \x{1234} -/(*CRLF)(*UTF8)(*BSR_UNICODE)a\Rb/I +/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I Capturing subpattern count = 0 Options: bsr_unicode utf Forced newline sequence: CRLF @@ -785,7 +863,7 @@ Options: utf No first char Need char = 'x' Subject length lower bound = 4 -Starting byte set: \x09 \x0a \x0c \x0d \x20 x +Starting byte set: \x09 \x0a \x0b \x0c \x0d \x20 x /\sxxx\s/I8ST1 Capturing subpattern count = 0 @@ -868,7 +946,7 @@ Starting byte set: \xe1 /[^\x{c4}]/8DZ ------------------------------------------------------------------ Bra - [\x00-\xc3\xc5-\xff] (neg) + [^\x{c4}] Ket End ------------------------------------------------------------------ @@ -909,5 +987,153 @@ Capturing subpattern count = 0 Options: utf First char = \x{c7} Need char = \x{bf} + +/\w+\x{C4}/8BZ +------------------------------------------------------------------ + Bra + \w++ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4} + +/\w+\x{C4}/8BZT1 +------------------------------------------------------------------ + Bra + \w+ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4}\x{c4} + +/\W+\x{C4}/8BZ +------------------------------------------------------------------ + Bra + \W+ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{C4}/8BZT1 +------------------------------------------------------------------ + Bra + \W++ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{A1}/8BZ +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/\W+\x{A1}/8BZT1 +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/X\s+\x{A0}/8BZ +------------------------------------------------------------------ + Bra + X + \s++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0} + +/X\s+\x{A0}/8BZT1 +------------------------------------------------------------------ + Bra + X + \s+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0}\x{a0} + +/\S+\x{A0}/8BZ +------------------------------------------------------------------ + Bra + \S+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0}\x{a0} + +/\S+\x{A0}/8BZT1 +------------------------------------------------------------------ + Bra + \S++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0} + +/\x{a0}+\s!/8BZ +------------------------------------------------------------------ + Bra + \x{a0}++ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/\x{a0}+\s!/8BZT1 +------------------------------------------------------------------ + Bra + \x{a0}+ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/A/8 + \x{ff000041} +** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8 + \x{7f000041} +Error -10 (bad UTF-8 string) offset=0 reason=12 + +/(*UTF8)abc/9 +Failed: setting UTF is disabled by the application at offset 0 + +/abc/89 +Failed: setting UTF is disabled by the application at offset 0 /-- End of testinput15 --/