--- embedaddon/pcre/testdata/testoutput5 2012/02/21 23:05:52 1.1.1.1 +++ embedaddon/pcre/testdata/testoutput5 2012/02/21 23:50:25 1.1.1.2 @@ -1,148 +1,30 @@ -/-- This set of tests checks the API, internals, and non-Perl stuff for UTF-8 - support, excluding Unicode properties. --/ +/-- This set of tests checks the API, internals, and non-Perl stuff for UTF + support, excluding Unicode properties. However, tests that give different + results in 8-bit and 16-bit modes are excluded (see tests 16 and 17). --/ -/\x{100}/8DZ ------------------------------------------------------------------- - Bra - \x{100} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 196 -Need char = 128 +/\x{110000}/8DZ +Failed: character value in \x{...} sequence is too large at offset 9 -/\x{1000}/8DZ ------------------------------------------------------------------- - Bra - \x{1000} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 225 -Need char = 128 - -/\x{10000}/8DZ ------------------------------------------------------------------- - Bra - \x{10000} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 240 -Need char = 128 - -/\x{100000}/8DZ ------------------------------------------------------------------- - Bra - \x{100000} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 244 -Need char = 128 - -/\x{1000000}/8DZ ------------------------------------------------------------------- - Bra - \x{1000000} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 249 -Need char = 128 - -/\x{4000000}/8DZ ------------------------------------------------------------------- - Bra - \x{4000000} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 252 -Need char = 128 - -/\x{7fffFFFF}/8DZ ------------------------------------------------------------------- - Bra - \x{7fffffff} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 253 -Need char = 191 - -/[\x{ff}]/8DZ ------------------------------------------------------------------- - Bra - \x{ff} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 195 -Need char = 191 - -/[\x{100}]/8DZ ------------------------------------------------------------------- - Bra - [\x{100}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char - /\x{ffffffff}/8 Failed: character value in \x{...} sequence is too large at offset 11 /\x{100000000}/8 Failed: character value in \x{...} sequence is too large at offset 12 +/\x{d800}/8 +Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7 + +/\x{dfff}/8 +Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7 + +/\x{d7ff}/8 + +/\x{e000}/8 + /^\x{100}a\x{1234}/8 \x{100}a\x{1234}bcd 0: \x{100}a\x{1234} -/\x80/8DZ ------------------------------------------------------------------- - Bra - \x{80} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 194 -Need char = 128 - -/\xff/8DZ ------------------------------------------------------------------- - Bra - \x{ff} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 195 -Need char = 191 - /\x{0041}\x{2262}\x{0391}\x{002e}/DZ8 ------------------------------------------------------------------ Bra @@ -151,100 +33,12 @@ Need char = 191 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf First char = 'A' Need char = '.' \x{0041}\x{2262}\x{0391}\x{002e} 0: A\x{2262}\x{391}. -/\x{D55c}\x{ad6d}\x{C5B4}/DZ8 ------------------------------------------------------------------- - Bra - \x{d55c}\x{ad6d}\x{c5b4} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 237 -Need char = 180 - \x{D55c}\x{ad6d}\x{C5B4} - 0: \x{d55c}\x{ad6d}\x{c5b4} - -/\x{65e5}\x{672c}\x{8a9e}/DZ8 ------------------------------------------------------------------- - Bra - \x{65e5}\x{672c}\x{8a9e} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 230 -Need char = 158 - \x{65e5}\x{672c}\x{8a9e} - 0: \x{65e5}\x{672c}\x{8a9e} - -/\x{80}/DZ8 ------------------------------------------------------------------- - Bra - \x{80} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 194 -Need char = 128 - -/\x{084}/DZ8 ------------------------------------------------------------------- - Bra - \x{84} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 194 -Need char = 132 - -/\x{104}/DZ8 ------------------------------------------------------------------- - Bra - \x{104} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 196 -Need char = 132 - -/\x{861}/DZ8 ------------------------------------------------------------------- - Bra - \x{861} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 224 -Need char = 161 - -/\x{212ab}/DZ8 ------------------------------------------------------------------- - Bra - \x{212ab} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 240 -Need char = 171 - /.{3,5}X/DZ8 ------------------------------------------------------------------ Bra @@ -255,13 +49,12 @@ Need char = 171 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char Need char = 'X' \x{212ab}\x{212ab}\x{212ab}\x{861}X 0: \x{212ab}\x{212ab}\x{212ab}\x{861}X - /.{3,5}?/DZ8 ------------------------------------------------------------------ Bra @@ -271,7 +64,7 @@ Need char = 'X' End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char \x{212ab}\x{212ab}\x{212ab}\x{861} @@ -280,29 +73,6 @@ No need char /(?<=\C)X/8 Failed: \C not allowed in lookbehind assertion at offset 6 -/-- This one is here not because it's different to Perl, but because the way -the captured single-byte is displayed. (In Perl it becomes a character, and you -can't tell the difference.) --/ - -/X(\C)(.*)/8 - X\x{1234} - 0: X\x{1234} - 1: \xe1 - 2: \x88\xb4 - X\nabc - 0: X\x{0a}abc - 1: \x{0a} - 2: abc - -/-- This one is here because Perl gives out a grumbly error message (quite -correctly, but that messes up comparisons). --/ - -/a\Cb/8 - *** Failers -No match - a\x{100}b -No match - /^[ab]/8DZ ------------------------------------------------------------------ Bra @@ -312,7 +82,7 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: anchored utf8 +Options: anchored utf No first char No need char bar @@ -335,7 +105,7 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: anchored utf8 +Options: anchored utf No first char No need char c @@ -349,136 +119,6 @@ No need char aaa No match -/[^ab\xC0-\xF0]/8SDZ ------------------------------------------------------------------- - Bra - [\x00-`c-\xbf\xf1-\xff] (neg) - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a - \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 - \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 - 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y - Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f - \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 - \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf - \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee - \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd - \xfe \xff - \x{f1} - 0: \x{f1} - \x{bf} - 0: \x{bf} - \x{100} - 0: \x{100} - \x{1000} - 0: \x{1000} - *** Failers - 0: * - \x{c0} -No match - \x{f0} -No match - -/Ā{3,4}/8SDZ ------------------------------------------------------------------- - Bra - \x{100}{3} - \x{100}? - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 196 -Need char = 128 -Subject length lower bound = 3 -No set of starting bytes - \x{100}\x{100}\x{100}\x{100\x{100} - 0: \x{100}\x{100}\x{100} - -/(\x{100}+|x)/8SDZ ------------------------------------------------------------------- - Bra - CBra 1 - \x{100}+ - Alt - x - Ket - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 1 -Options: utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: x \xc4 - -/(\x{100}*a|x)/8SDZ ------------------------------------------------------------------- - Bra - CBra 1 - \x{100}*+ - a - Alt - x - Ket - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 1 -Options: utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: a x \xc4 - -/(\x{100}{0,2}a|x)/8SDZ ------------------------------------------------------------------- - Bra - CBra 1 - \x{100}{0,2} - a - Alt - x - Ket - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 1 -Options: utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: a x \xc4 - -/(\x{100}{1,2}a|x)/8SDZ ------------------------------------------------------------------- - Bra - CBra 1 - \x{100} - \x{100}{0,1} - a - Alt - x - Ket - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 1 -Options: utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: x \xc4 - /\x{100}*(\d+|"(?1)")/8 1234 0: 1234 @@ -503,18 +143,6 @@ No match \x{100}\x{100}abcd No match -/\x{100}/8DZ ------------------------------------------------------------------- - Bra - \x{100} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 196 -Need char = 128 - /\x{100}*/8DZ ------------------------------------------------------------------ Bra @@ -523,7 +151,7 @@ Need char = 128 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char @@ -536,7 +164,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf First char = 'a' No need char @@ -549,36 +177,10 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf First char = 'a' Need char = 'b' -/a\x{100}\x{101}*/8DZ ------------------------------------------------------------------- - Bra - a\x{100} - \x{101}* - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 'a' -Need char = 128 - -/a\x{100}\x{101}+/8DZ ------------------------------------------------------------------- - Bra - a\x{100} - \x{101}+ - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 'a' -Need char = 129 - /\x{100}*A/8DZ ------------------------------------------------------------------ Bra @@ -588,7 +190,7 @@ Need char = 129 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char Need char = 'A' A @@ -604,54 +206,10 @@ Need char = 'A' End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char -/[^\x{c4}]/DZ ------------------------------------------------------------------- - Bra - [^\xc4] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -No options -No first char -No need char - -/[^\x{c4}]/8DZ ------------------------------------------------------------------- - Bra - [\x00-\xc3\xc5-\xff] (neg) - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char - -/[\x{100}]/8DZ ------------------------------------------------------------------- - Bra - [\x{100}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char - \x{100} - 0: \x{100} - Z\x{100} - 0: \x{100} - \x{100}Z - 0: \x{100} - *** Failers -No match - /[Z\x{100}]/8DZ ------------------------------------------------------------------ Bra @@ -660,7 +218,7 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char Z\x{100} @@ -695,7 +253,7 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char @@ -707,7 +265,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char \x{100} @@ -724,25 +282,11 @@ No need char ------------------------------------------------------------------ Capturing subpattern count = 0 No options -First char = 255 +First char = \xff No need char >\xff< 0: \xff -/[\xff]/DZ8 ------------------------------------------------------------------- - Bra - \x{ff} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 195 -Need char = 191 - >\x{ff}< - 0: \x{ff} - /[^\xFF]/DZ ------------------------------------------------------------------ Bra @@ -755,18 +299,6 @@ No options No first char No need char -/[^\xff]/8DZ ------------------------------------------------------------------- - Bra - [\x00-\xfe] (neg) - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char - /[Ä-Ü]/8 Ö # Matches without Study 0: \x{d6} @@ -791,129 +323,6 @@ No need char \x{d6} 0: \x{d6} -/[]/8 -Failed: invalid UTF-8 string at offset 1 - -//8 -Failed: invalid UTF-8 string at offset 0 - -/xxx/8 -Failed: invalid UTF-8 string at offset 0 - -/xxx/8?DZSS ------------------------------------------------------------------- - Bra - \X{c0}\X{c0}\X{c0}xxx - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 no_utf8_check -First char = 195 -Need char = 'x' - -/abc/8 - ] -Error -10 (bad UTF-8 string) offset=0 reason=6 - -Error -10 (bad UTF-8 string) offset=0 reason=1 - -Error -10 (bad UTF-8 string) offset=0 reason=6 - \? -No match - \xe1\x88 -Error -10 (bad UTF-8 string) offset=0 reason=1 - \P\xe1\x88 -Error -10 (bad UTF-8 string) offset=0 reason=1 - \P\P\xe1\x88 -Error -25 (short UTF-8 string) offset=0 reason=1 - XX\xea -Error -10 (bad UTF-8 string) offset=2 reason=2 - \O0XX\xea -Error -10 (bad UTF-8 string) - \O1XX\xea -Error -10 (bad UTF-8 string) - \O2XX\xea -Error -10 (bad UTF-8 string) offset=2 reason=2 - XX\xf1 -Error -10 (bad UTF-8 string) offset=2 reason=3 - XX\xf8 -Error -10 (bad UTF-8 string) offset=2 reason=4 - XX\xfc -Error -10 (bad UTF-8 string) offset=2 reason=5 - ZZ\xea\xaf\x20YY -Error -10 (bad UTF-8 string) offset=2 reason=7 - ZZ\xfd\xbf\xbf\x2f\xbf\xbfYY -Error -10 (bad UTF-8 string) offset=2 reason=8 - ZZ\xfd\xbf\xbf\xbf\x2f\xbfYY -Error -10 (bad UTF-8 string) offset=2 reason=9 - ZZ\xfd\xbf\xbf\xbf\xbf\x2fYY -Error -10 (bad UTF-8 string) offset=2 reason=10 - ZZ\xffYY -Error -10 (bad UTF-8 string) offset=2 reason=21 - ZZ\xfeYY -Error -10 (bad UTF-8 string) offset=2 reason=21 - -/anything/8 - \xc0\x80 -Error -10 (bad UTF-8 string) offset=0 reason=15 - \xc1\x8f -Error -10 (bad UTF-8 string) offset=0 reason=15 - \xe0\x9f\x80 -Error -10 (bad UTF-8 string) offset=0 reason=16 - \xf0\x8f\x80\x80 -Error -10 (bad UTF-8 string) offset=0 reason=17 - \xf8\x87\x80\x80\x80 -Error -10 (bad UTF-8 string) offset=0 reason=18 - \xfc\x83\x80\x80\x80\x80 -Error -10 (bad UTF-8 string) offset=0 reason=19 - \xfe\x80\x80\x80\x80\x80 -Error -10 (bad UTF-8 string) offset=0 reason=21 - \xff\x80\x80\x80\x80\x80 -Error -10 (bad UTF-8 string) offset=0 reason=21 - \xc3\x8f -No match - \xe0\xaf\x80 -No match - \xe1\x80\x80 -No match - \xf0\x9f\x80\x80 -No match - \xf1\x8f\x80\x80 -No match - \xf8\x88\x80\x80\x80 -Error -10 (bad UTF-8 string) offset=0 reason=11 - \xf9\x87\x80\x80\x80 -Error -10 (bad UTF-8 string) offset=0 reason=11 - \xfc\x84\x80\x80\x80\x80 -Error -10 (bad UTF-8 string) offset=0 reason=12 - \xfd\x83\x80\x80\x80\x80 -Error -10 (bad UTF-8 string) offset=0 reason=12 - \?\xf8\x88\x80\x80\x80 -No match - \?\xf9\x87\x80\x80\x80 -No match - \?\xfc\x84\x80\x80\x80\x80 -No match - \?\xfd\x83\x80\x80\x80\x80 -No match - -/\x{100}abc(xyz(?1))/8DZ ------------------------------------------------------------------- - Bra - \x{100}abc - CBra 1 - xyz - Recurse - Ket - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 1 -Options: utf8 -First char = 196 -Need char = 'z' - /[^\x{100}]abc(xyz(?1))/8DZ ------------------------------------------------------------------ Bra @@ -927,7 +336,7 @@ Need char = 'z' End ------------------------------------------------------------------ Capturing subpattern count = 1 -Options: utf8 +Options: utf No first char Need char = 'z' @@ -944,7 +353,7 @@ Need char = 'z' End ------------------------------------------------------------------ Capturing subpattern count = 1 -Options: utf8 +Options: utf No first char Need char = 'z' @@ -964,7 +373,7 @@ Need char = 'z' End ------------------------------------------------------------------ Capturing subpattern count = 2 -Options: utf8 +Options: utf No first char No need char @@ -995,7 +404,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 2 -Options: utf8 +Options: utf No first char No need char @@ -1015,7 +424,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 2 -Options: utf8 +Options: utf No first char No need char @@ -1046,7 +455,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 2 -Options: utf8 +Options: utf No first char No need char @@ -1060,10 +469,6 @@ No need char \x{100}X 0: X -/a\x{1234}b/P8 - a\x{1234}b - 0: a\x{1234}b - /^\ሴ/8DZ ------------------------------------------------------------------ Bra @@ -1073,23 +478,10 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: anchored utf8 +Options: anchored utf No first char No need char -/\777/I -Failed: octal value is greater than \377 (not in UTF-8 mode) at offset 3 - -/\777/8I -Capturing subpattern count = 0 -Options: utf8 -First char = 199 -Need char = 191 - \x{1ff} - 0: \x{1ff} - \777 - 0: \x{1ff} - /\x{100}*\d/8DZ ------------------------------------------------------------------ Bra @@ -1099,7 +491,7 @@ Need char = 191 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char @@ -1112,7 +504,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char @@ -1125,7 +517,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char @@ -1138,7 +530,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char @@ -1151,7 +543,7 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char @@ -1164,49 +556,10 @@ No need char End ------------------------------------------------------------------ Capturing subpattern count = 0 -Options: utf8 +Options: utf No first char No need char -/\x{100}+\x{200}/8DZ ------------------------------------------------------------------- - Bra - \x{100}++ - \x{200} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 196 -Need char = 128 - -/\x{100}+X/8DZ ------------------------------------------------------------------- - Bra - \x{100}++ - X - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 196 -Need char = 'X' - -/X+\x{200}/8DZ ------------------------------------------------------------------- - Bra - X++ - \x{200} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 'X' -Need char = 128 - /()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() @@ -1248,9 +601,6 @@ Matched, but too many substrings End ------------------------------------------------------------------ -/^[\QĀ\E-\QŐ\E/BZ8 -Failed: missing terminating ] for character class at offset 15 - /^abc./mgx8 abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK 0: abc1 @@ -1436,7 +786,7 @@ No match /[\H]/8BZ ------------------------------------------------------------------ Bra - [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{7fffffff}] + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] Ket End ------------------------------------------------------------------ @@ -1444,7 +794,7 @@ No match /[\V]/8BZ ------------------------------------------------------------------ Bra - [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{2029}-\x{7fffffff}] + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}] Ket End ------------------------------------------------------------------ @@ -1453,39 +803,9 @@ No match \x{1ec5} 0: \x{1ec5} -/-- This tests the stricter UTF-8 check according to RFC 3629. --/ - -/X/8 - \x{0}\x{d7ff}\x{e000}\x{10ffff} -No match - \x{d800} -Error -10 (bad UTF-8 string) offset=0 reason=14 - \x{d800}\? -No match - \x{da00} -Error -10 (bad UTF-8 string) offset=0 reason=14 - \x{da00}\? -No match - \x{dfff} -Error -10 (bad UTF-8 string) offset=0 reason=14 - \x{dfff}\? -No match - \x{110000} -Error -10 (bad UTF-8 string) offset=0 reason=13 - \x{110000}\? -No match - \x{2000000} -Error -10 (bad UTF-8 string) offset=0 reason=11 - \x{2000000}\? -No match - \x{7fffffff} -Error -10 (bad UTF-8 string) offset=0 reason=12 - \x{7fffffff}\? -No match - /a\Rb/I8 Capturing subpattern count = 0 -Options: bsr_anycrlf utf8 +Options: bsr_anycrlf utf First char = 'a' Need char = 'b' a\rb @@ -1503,7 +823,7 @@ No match /a\Rb/I8 Capturing subpattern count = 0 -Options: bsr_unicode utf8 +Options: bsr_unicode utf First char = 'a' Need char = 'b' a\rb @@ -1525,7 +845,7 @@ No match /a\R?b/I8 Capturing subpattern count = 0 -Options: bsr_anycrlf utf8 +Options: bsr_anycrlf utf First char = 'a' Need char = 'b' a\rb @@ -1543,7 +863,7 @@ No match /a\R?b/I8 Capturing subpattern count = 0 -Options: bsr_unicode utf8 +Options: bsr_unicode utf First char = 'a' Need char = 'b' a\rb @@ -1600,26 +920,11 @@ No match \x{de}\x{de} 0: \xde\xde 1: \xde - \x{123} -** Character \x{123} is greater than 255 and UTF-8 mode is not enabled. -** Truncation will probably give the wrong result. -No match /X/8f A\x{1ec5}ABCXYZ 0: X -/(*UTF8)\x{1234}/ - abcd\x{1234}pqr - 0: \x{1234} - -/(*CRLF)(*UTF8)(*BSR_UNICODE)a\Rb/I -Capturing subpattern count = 0 -Options: bsr_unicode utf8 -Forced newline sequence: CRLF -First char = 'a' -Need char = 'b' - /Xa{2,4}b/8 X\P Partial match: X @@ -2097,152 +1402,16 @@ Partial match: abcde \PX Partial match: X -/\h/SI -Capturing subpattern count = 0 -No options -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \x09 \x20 \xa0 - -/\h/SI8 -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \x09 \x20 \xc2 \xe1 \xe2 \xe3 - ABC\x{09} - 0: \x{09} - ABC\x{20} - 0: - ABC\x{a0} - 0: \x{a0} - ABC\x{1680} - 0: \x{1680} - ABC\x{180e} - 0: \x{180e} - ABC\x{2000} - 0: \x{2000} - ABC\x{202f} - 0: \x{202f} - ABC\x{205f} - 0: \x{205f} - ABC\x{3000} - 0: \x{3000} - -/\v/SI -Capturing subpattern count = 0 -No options -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \x0a \x0b \x0c \x0d \x85 - -/\v/SI8 -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 - ABC\x{0a} - 0: \x{0a} - ABC\x{0b} - 0: \x{0b} - ABC\x{0c} - 0: \x{0c} - ABC\x{0d} - 0: \x{0d} - ABC\x{85} - 0: \x{85} - ABC\x{2028} - 0: \x{2028} - -/\R/SI -Capturing subpattern count = 0 -No options -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \x0a \x0b \x0c \x0d \x85 - -/\R/SI8 -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 - -/\h*A/SI8 -Capturing subpattern count = 0 -Options: utf8 -No first char -Need char = 'A' -Subject length lower bound = 1 -Starting byte set: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 - CDBABC - 0: A - -/\v+A/SI8 -Capturing subpattern count = 0 -Options: utf8 -No first char -Need char = 'A' -Subject length lower bound = 2 -Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 - -/\s?xxx\s/8SI -Capturing subpattern count = 0 -Options: utf8 -No first char -Need char = 'x' -Subject length lower bound = 4 -Starting byte set: \x09 \x0a \x0c \x0d \x20 x - /\sxxx\s/8T1 AB\x{85}xxx\x{a0}XYZ 0: \x{85}xxx\x{a0} AB\x{a0}xxx\x{85}XYZ 0: \x{a0}xxx\x{85} -/\sxxx\s/I8ST1 -Capturing subpattern count = 0 -Options: utf8 -No first char -Need char = 'x' -Subject length lower bound = 5 -Starting byte set: \x09 \x0a \x0c \x0d \x20 \xc2 - AB\x{85}xxx\x{a0}XYZ - 0: \x{85}xxx\x{a0} - AB\x{a0}xxx\x{85}XYZ - 0: \x{a0}xxx\x{85} - /\S \S/8T1 \x{a2} \x{84} 0: \x{a2} \x{84} -/\S \S/I8ST1 -Capturing subpattern count = 0 -Options: utf8 -No first char -Need char = ' ' -Subject length lower bound = 3 -Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e - \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d - \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ - A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e - f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 - \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 - \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 - \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 - \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff - \x{a2} \x{84} - 0: \x{a2} \x{84} - A Z - 0: A Z - 'A#хц'8xBZ ------------------------------------------------------------------ Bra @@ -2304,20 +1473,6 @@ Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 End ------------------------------------------------------------------ -/a+/8 - a\x{123}aa\>1 - 0: aa - a\x{123}aa\>2 -Error -11 (bad UTF-8 offset) - a\x{123}aa\>3 - 0: aa - a\x{123}aa\>4 - 0: a - a\x{123}aa\>5 -No match - a\x{123}aa\>6 -Error -24 (bad offset value) - /^\cģ/8 Failed: \c must be followed by an ASCII character at offset 3 @@ -2349,41 +1504,9 @@ Failed: \c must be followed by an ASCII character at o 1: \x{0a} 2: \x{0d} -/\x{1234}+/iS8I -Capturing subpattern count = 0 -Options: caseless utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \xe1 - -/\x{1234}+?/iS8I -Capturing subpattern count = 0 -Options: caseless utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \xe1 - -/\x{1234}++/iS8I -Capturing subpattern count = 0 -Options: caseless utf8 -No first char -No need char -Subject length lower bound = 1 -Starting byte set: \xe1 - -/\x{1234}{2}/iS8I -Capturing subpattern count = 0 -Options: caseless utf8 -No first char -No need char -Subject length lower bound = 2 -Starting byte set: \xe1 - /[^\x{1234}]+/iS8I Capturing subpattern count = 0 -Options: caseless utf8 +Options: caseless utf No first char No need char Subject length lower bound = 1 @@ -2391,7 +1514,7 @@ No set of starting bytes /[^\x{1234}]+?/iS8I Capturing subpattern count = 0 -Options: caseless utf8 +Options: caseless utf No first char No need char Subject length lower bound = 1 @@ -2399,7 +1522,7 @@ No set of starting bytes /[^\x{1234}]++/iS8I Capturing subpattern count = 0 -Options: caseless utf8 +Options: caseless utf No first char No need char Subject length lower bound = 1 @@ -2407,7 +1530,7 @@ No set of starting bytes /[^\x{1234}]{2}/iS8I Capturing subpattern count = 0 -Options: caseless utf8 +Options: caseless utf No first char No need char Subject length lower bound = 2 @@ -2431,5 +1554,99 @@ Partial match: for /f.*/8s \P\Pfor Partial match: for + +/\x{d7ff}\x{e000}/8 + +/\x{d800}/8 +Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7 + +/\x{dfff}/8 +Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 7 + +/\h+/8 + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} + 0: \x{200a}\x{a0}\x{2000} + +/[\h\x{e000}]+/8BZ +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{e000}]+ + Ket + End +------------------------------------------------------------------ + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + 0: \x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} + 0: \x{200a}\x{a0}\x{2000} + +/\H+/8 + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + 0: \x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + 0: \x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + 0: \x{202e}\x{2030}\x{205e}\x{2060} + \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} + 0: \x{9f}\x{a1}\x{2fff}\x{3001} + +/[\H\x{d7ff}]+/8BZ +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]+ + Ket + End +------------------------------------------------------------------ + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + 0: \x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + 0: \x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + 0: \x{202e}\x{2030}\x{205e}\x{2060} + \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} + 0: \x{9f}\x{a1}\x{2fff}\x{3001} + +/\v+/8 + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} + +/[\v\x{e000}]+/8BZ +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85\x{2028}-\x{2029}\x{e000}]+ + Ket + End +------------------------------------------------------------------ + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} + +/\V+/8 + \x{2028}\x{2029}\x{2027}\x{2030} + 0: \x{2027}\x{2030} + \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} + 0: \x{09}\x{0e}\x{84}\x{86} + +/[\V\x{d7ff}]+/8BZ +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]+ + Ket + End +------------------------------------------------------------------ + \x{2028}\x{2029}\x{2027}\x{2030} + 0: \x{2027}\x{2030} + \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} + 0: \x{09}\x{0e}\x{84}\x{86} + +/\R+/8 + \x{2027}\x{2030}\x{2028}\x{2029} + 0: \x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} /-- End of testinput5 --/