--- embedaddon/pcre/testdata/testoutput15 2012/02/21 23:05:52 1.1.1.1 +++ embedaddon/pcre/testdata/testoutput15 2012/02/21 23:50:25 1.1.1.2 @@ -1,21 +1,913 @@ -/-- This test is run only when JIT support is not available. It checks that an -attempt to use it has the expected behaviour. It also tests things that -are different without JIT. --/ - -/abc/S+I +/-- This set of tests is for UTF-8 support, and is relevant only to the 8-bit + library. --/ + +/X(\C{3})/8 + X\x{1234} + 0: X\x{1234} + 1: \x{1234} + +/X(\C{4})/8 + X\x{1234}YZ + 0: X\x{1234}Y + 1: \x{1234}Y + +/X\C*/8 + XYZabcdce + 0: XYZabcdce + +/X\C*?/8 + XYZabcde + 0: X + +/X\C{3,5}/8 + Xabcdefg + 0: Xabcde + X\x{1234} + 0: X\x{1234} + X\x{1234}YZ + 0: X\x{1234}YZ + X\x{1234}\x{512} + 0: X\x{1234}\x{512} + X\x{1234}\x{512}YZ + 0: X\x{1234}\x{512} + +/X\C{3,5}?/8 + Xabcdefg + 0: Xabc + X\x{1234} + 0: X\x{1234} + X\x{1234}YZ + 0: X\x{1234} + X\x{1234}\x{512} + 0: X\x{1234} + +/a\Cb/8 + aXb + 0: aXb + a\nb + 0: a\x{0a}b + +/a\C\Cb/8 + a\x{100}b + 0: a\x{100}b + +/ab\Cde/8 + abXde + 0: abXde + +/a\C\Cb/8 + a\x{100}b + 0: a\x{100}b + ** Failers +No match + a\x{12257}b +No match + +/[Ã]/8 +Failed: invalid UTF-8 string at offset 1 + +/Ã/8 +Failed: invalid UTF-8 string at offset 0 + +/ÃÃÃxxx/8 +Failed: invalid UTF-8 string at offset 0 + +/ÃÃÃxxx/8?DZSS +------------------------------------------------------------------ + Bra + \X{c0}\X{c0}\X{c0}xxx + Ket + End +------------------------------------------------------------------ Capturing subpattern count = 0 -No options -First char = 'a' -Need char = 'c' +Options: utf no_utf_check +First char = \x{c3} +Need char = 'x' + +/abc/8 + Ã] +Error -10 (bad UTF-8 string) offset=0 reason=6 + Ã +Error -10 (bad UTF-8 string) offset=0 reason=1 + ÃÃÃ +Error -10 (bad UTF-8 string) offset=0 reason=6 + ÃÃÃ\? +No match + \xe1\x88 +Error -10 (bad UTF-8 string) offset=0 reason=1 + \P\xe1\x88 +Error -10 (bad UTF-8 string) offset=0 reason=1 + \P\P\xe1\x88 +Error -25 (short UTF-8 string) offset=0 reason=1 + XX\xea +Error -10 (bad UTF-8 string) offset=2 reason=2 + \O0XX\xea +Error -10 (bad UTF-8 string) + \O1XX\xea +Error -10 (bad UTF-8 string) + \O2XX\xea +Error -10 (bad UTF-8 string) offset=2 reason=2 + XX\xf1 +Error -10 (bad UTF-8 string) offset=2 reason=3 + XX\xf8 +Error -10 (bad UTF-8 string) offset=2 reason=4 + XX\xfc +Error -10 (bad UTF-8 string) offset=2 reason=5 + ZZ\xea\xaf\x20YY +Error -10 (bad UTF-8 string) offset=2 reason=7 + ZZ\xfd\xbf\xbf\x2f\xbf\xbfYY +Error -10 (bad UTF-8 string) offset=2 reason=8 + ZZ\xfd\xbf\xbf\xbf\x2f\xbfYY +Error -10 (bad UTF-8 string) offset=2 reason=9 + ZZ\xfd\xbf\xbf\xbf\xbf\x2fYY +Error -10 (bad UTF-8 string) offset=2 reason=10 + ZZ\xffYY +Error -10 (bad UTF-8 string) offset=2 reason=21 + ZZ\xfeYY +Error -10 (bad UTF-8 string) offset=2 reason=21 + +/anything/8 + \xc0\x80 +Error -10 (bad UTF-8 string) offset=0 reason=15 + \xc1\x8f +Error -10 (bad UTF-8 string) offset=0 reason=15 + \xe0\x9f\x80 +Error -10 (bad UTF-8 string) offset=0 reason=16 + \xf0\x8f\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=17 + \xf8\x87\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=18 + \xfc\x83\x80\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=19 + \xfe\x80\x80\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=21 + \xff\x80\x80\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=21 + \xc3\x8f +No match + \xe0\xaf\x80 +No match + \xe1\x80\x80 +No match + \xf0\x9f\x80\x80 +No match + \xf1\x8f\x80\x80 +No match + \xf8\x88\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=11 + \xf9\x87\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=11 + \xfc\x84\x80\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=12 + \xfd\x83\x80\x80\x80\x80 +Error -10 (bad UTF-8 string) offset=0 reason=12 + \?\xf8\x88\x80\x80\x80 +No match + \?\xf9\x87\x80\x80\x80 +No match + \?\xfc\x84\x80\x80\x80\x80 +No match + \?\xfd\x83\x80\x80\x80\x80 +No match + +/\x{100}/8DZ +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c4} +Need char = \x{80} + +/\x{1000}/8DZ +------------------------------------------------------------------ + Bra + \x{1000} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{e1} +Need char = \x{80} + +/\x{10000}/8DZ +------------------------------------------------------------------ + Bra + \x{10000} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{f0} +Need char = \x{80} + +/\x{100000}/8DZ +------------------------------------------------------------------ + Bra + \x{100000} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{f4} +Need char = \x{80} + +/\x{10ffff}/8DZ +------------------------------------------------------------------ + Bra + \x{10ffff} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{f4} +Need char = \x{bf} + +/[\x{ff}]/8DZ +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c3} +Need char = \x{bf} + +/[\x{100}]/8DZ +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c4} +Need char = \x{80} + +/\x80/8DZ +------------------------------------------------------------------ + Bra + \x{80} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c2} +Need char = \x{80} + +/\xff/8DZ +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c3} +Need char = \x{bf} + +/\x{D55c}\x{ad6d}\x{C5B4}/DZ8 +------------------------------------------------------------------ + Bra + \x{d55c}\x{ad6d}\x{c5b4} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{ed} +Need char = \x{b4} + \x{D55c}\x{ad6d}\x{C5B4} + 0: \x{d55c}\x{ad6d}\x{c5b4} + +/\x{65e5}\x{672c}\x{8a9e}/DZ8 +------------------------------------------------------------------ + Bra + \x{65e5}\x{672c}\x{8a9e} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{e6} +Need char = \x{9e} + \x{65e5}\x{672c}\x{8a9e} + 0: \x{65e5}\x{672c}\x{8a9e} + +/\x{80}/DZ8 +------------------------------------------------------------------ + Bra + \x{80} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c2} +Need char = \x{80} + +/\x{084}/DZ8 +------------------------------------------------------------------ + Bra + \x{84} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c2} +Need char = \x{84} + +/\x{104}/DZ8 +------------------------------------------------------------------ + Bra + \x{104} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c4} +Need char = \x{84} + +/\x{861}/DZ8 +------------------------------------------------------------------ + Bra + \x{861} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{e0} +Need char = \x{a1} + +/\x{212ab}/DZ8 +------------------------------------------------------------------ + Bra + \x{212ab} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{f0} +Need char = \x{ab} + +/-- This one is here not because it's different to Perl, but because the way +the captured single-byte is displayed. (In Perl it becomes a character, and you +can't tell the difference.) --/ + +/X(\C)(.*)/8 + X\x{1234} + 0: X\x{1234} + 1: \x{e1} + 2: \x{88}\x{b4} + X\nabc + 0: X\x{0a}abc + 1: \x{0a} + 2: abc + +/-- This one is here because Perl gives out a grumbly error message (quite +correctly, but that messes up comparisons). --/ + +/a\Cb/8 + *** Failers +No match + a\x{100}b +No match + +/[^ab\xC0-\xF0]/8SDZ +------------------------------------------------------------------ + Bra + [\x00-`c-\xbf\xf1-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 + 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y + Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f + \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 + \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf + \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee + \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd + \xfe \xff + \x{f1} + 0: \x{f1} + \x{bf} + 0: \x{bf} + \x{100} + 0: \x{100} + \x{1000} + 0: \x{1000} + *** Failers + 0: * + \x{c0} +No match + \x{f0} +No match + +/Ä€{3,4}/8SDZ +------------------------------------------------------------------ + Bra + \x{100}{3} + \x{100}? + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c4} +Need char = \x{80} Subject length lower bound = 3 No set of starting bytes -JIT support is not available in this version of PCRE + \x{100}\x{100}\x{100}\x{100\x{100} + 0: \x{100}\x{100}\x{100} -/a*/SI +/(\x{100}+|x)/8SDZ +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}+ + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Options: utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: x \xc4 + +/(\x{100}*a|x)/8SDZ +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}*+ + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Options: utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: a x \xc4 + +/(\x{100}{0,2}a|x)/8SDZ +------------------------------------------------------------------ + Bra + CBra 1 + \x{100}{0,2} + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Options: utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: a x \xc4 + +/(\x{100}{1,2}a|x)/8SDZ +------------------------------------------------------------------ + Bra + CBra 1 + \x{100} + \x{100}{0,1} + a + Alt + x + Ket + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Options: utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: x \xc4 + +/\x{100}/8DZ +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ Capturing subpattern count = 0 +Options: utf +First char = \x{c4} +Need char = \x{80} + +/a\x{100}\x{101}*/8DZ +------------------------------------------------------------------ + Bra + a\x{100} + \x{101}* + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = 'a' +Need char = \x{80} + +/a\x{100}\x{101}+/8DZ +------------------------------------------------------------------ + Bra + a\x{100} + \x{101}+ + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = 'a' +Need char = \x{81} + +/[^\x{c4}]/DZ +------------------------------------------------------------------ + Bra + [^\xc4] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 No options No first char No need char -Study returned NULL + +/[\x{100}]/8DZ +------------------------------------------------------------------ + Bra + \x{100} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c4} +Need char = \x{80} + \x{100} + 0: \x{100} + Z\x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + *** Failers +No match + +/[\xff]/DZ8 +------------------------------------------------------------------ + Bra + \x{ff} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c3} +Need char = \x{bf} + >\x{ff}< + 0: \x{ff} + +/[^\xff]/8DZ +------------------------------------------------------------------ + Bra + [\x00-\xfe] (neg) + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +No first char +No need char + +/\x{100}abc(xyz(?1))/8DZ +------------------------------------------------------------------ + Bra + \x{100}abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Options: utf +First char = \x{c4} +Need char = 'z' + +/a\x{1234}b/P8 + a\x{1234}b + 0: a\x{1234}b + +/\777/8I +Capturing subpattern count = 0 +Options: utf +First char = \x{c7} +Need char = \x{bf} + \x{1ff} + 0: \x{1ff} + \777 + 0: \x{1ff} + +/\x{100}+\x{200}/8DZ +------------------------------------------------------------------ + Bra + \x{100}++ + \x{200} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c4} +Need char = \x{80} + +/\x{100}+X/8DZ +------------------------------------------------------------------ + Bra + \x{100}++ + X + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c4} +Need char = 'X' + +/^[\QÄ€\E-\QÅ\E/BZ8 +Failed: missing terminating ] for character class at offset 15 + +/-- This tests the stricter UTF-8 check according to RFC 3629. --/ + +/X/8 + \x{0}\x{d7ff}\x{e000}\x{10ffff} +No match + \x{d800} +Error -10 (bad UTF-8 string) offset=0 reason=14 + \x{d800}\? +No match + \x{da00} +Error -10 (bad UTF-8 string) offset=0 reason=14 + \x{da00}\? +No match + \x{dfff} +Error -10 (bad UTF-8 string) offset=0 reason=14 + \x{dfff}\? +No match + \x{110000} +Error -10 (bad UTF-8 string) offset=0 reason=13 + \x{110000}\? +No match + \x{2000000} +Error -10 (bad UTF-8 string) offset=0 reason=11 + \x{2000000}\? +No match + \x{7fffffff} +Error -10 (bad UTF-8 string) offset=0 reason=12 + \x{7fffffff}\? +No match + +/(*UTF8)\x{1234}/ + abcd\x{1234}pqr + 0: \x{1234} + +/(*CRLF)(*UTF8)(*BSR_UNICODE)a\Rb/I +Capturing subpattern count = 0 +Options: bsr_unicode utf +Forced newline sequence: CRLF +First char = 'a' +Need char = 'b' + +/\h/SI8 +Capturing subpattern count = 0 +Options: utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \x09 \x20 \xc2 \xe1 \xe2 \xe3 + ABC\x{09} + 0: \x{09} + ABC\x{20} + 0: + ABC\x{a0} + 0: \x{a0} + ABC\x{1680} + 0: \x{1680} + ABC\x{180e} + 0: \x{180e} + ABC\x{2000} + 0: \x{2000} + ABC\x{202f} + 0: \x{202f} + ABC\x{205f} + 0: \x{205f} + ABC\x{3000} + 0: \x{3000} + +/\v/SI8 +Capturing subpattern count = 0 +Options: utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 + ABC\x{0a} + 0: \x{0a} + ABC\x{0b} + 0: \x{0b} + ABC\x{0c} + 0: \x{0c} + ABC\x{0d} + 0: \x{0d} + ABC\x{85} + 0: \x{85} + ABC\x{2028} + 0: \x{2028} + +/\h*A/SI8 +Capturing subpattern count = 0 +Options: utf +No first char +Need char = 'A' +Subject length lower bound = 1 +Starting byte set: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 + CDBABC + 0: A + +/\v+A/SI8 +Capturing subpattern count = 0 +Options: utf +No first char +Need char = 'A' +Subject length lower bound = 2 +Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 + +/\s?xxx\s/8SI +Capturing subpattern count = 0 +Options: utf +No first char +Need char = 'x' +Subject length lower bound = 4 +Starting byte set: \x09 \x0a \x0c \x0d \x20 x + +/\sxxx\s/I8ST1 +Capturing subpattern count = 0 +Options: utf +No first char +Need char = 'x' +Subject length lower bound = 5 +Starting byte set: \x09 \x0a \x0c \x0d \x20 \xc2 + AB\x{85}xxx\x{a0}XYZ + 0: \x{85}xxx\x{a0} + AB\x{a0}xxx\x{85}XYZ + 0: \x{a0}xxx\x{85} + +/\S \S/I8ST1 +Capturing subpattern count = 0 +Options: utf +No first char +Need char = ' ' +Subject length lower bound = 3 +Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e + \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d + \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ + A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e + f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 + \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 + \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 + \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 + \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff + \x{a2} \x{84} + 0: \x{a2} \x{84} + A Z + 0: A Z + +/a+/8 + a\x{123}aa\>1 + 0: aa + a\x{123}aa\>2 +Error -11 (bad UTF-8 offset) + a\x{123}aa\>3 + 0: aa + a\x{123}aa\>4 + 0: a + a\x{123}aa\>5 +No match + a\x{123}aa\>6 +Error -24 (bad offset value) + +/\x{1234}+/iS8I +Capturing subpattern count = 0 +Options: caseless utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \xe1 + +/\x{1234}+?/iS8I +Capturing subpattern count = 0 +Options: caseless utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \xe1 + +/\x{1234}++/iS8I +Capturing subpattern count = 0 +Options: caseless utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \xe1 + +/\x{1234}{2}/iS8I +Capturing subpattern count = 0 +Options: caseless utf +No first char +No need char +Subject length lower bound = 2 +Starting byte set: \xe1 + +/[^\x{c4}]/8DZ +------------------------------------------------------------------ + Bra + [\x00-\xc3\xc5-\xff] (neg) + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +No first char +No need char + +/X+\x{200}/8DZ +------------------------------------------------------------------ + Bra + X++ + \x{200} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = 'X' +Need char = \x{80} + +/\R/SI8 +Capturing subpattern count = 0 +Options: utf +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 + +/\777/8DZ +------------------------------------------------------------------ + Bra + \x{1ff} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +First char = \x{c7} +Need char = \x{bf} /-- End of testinput15 --/