Annotation of embedaddon/pcre/testdata/testoutput15, revision 1.1.1.4

1.1.1.2   misho       1: /-- This set of tests is for UTF-8 support, and is relevant only to the 8-bit 
                      2:     library. --/
                      3: 
                      4: /X(\C{3})/8
                      5:     X\x{1234}
                      6:  0: X\x{1234}
                      7:  1: \x{1234}
                      8: 
                      9: /X(\C{4})/8
                     10:     X\x{1234}YZ
                     11:  0: X\x{1234}Y
                     12:  1: \x{1234}Y
                     13:     
                     14: /X\C*/8
                     15:     XYZabcdce
                     16:  0: XYZabcdce
                     17:     
                     18: /X\C*?/8
                     19:     XYZabcde
                     20:  0: X
                     21:     
                     22: /X\C{3,5}/8
                     23:     Xabcdefg   
                     24:  0: Xabcde
                     25:     X\x{1234} 
                     26:  0: X\x{1234}
                     27:     X\x{1234}YZ
                     28:  0: X\x{1234}YZ
                     29:     X\x{1234}\x{512}  
                     30:  0: X\x{1234}\x{512}
                     31:     X\x{1234}\x{512}YZ
                     32:  0: X\x{1234}\x{512}
                     33: 
                     34: /X\C{3,5}?/8
                     35:     Xabcdefg   
                     36:  0: Xabc
                     37:     X\x{1234} 
                     38:  0: X\x{1234}
                     39:     X\x{1234}YZ
                     40:  0: X\x{1234}
                     41:     X\x{1234}\x{512}  
                     42:  0: X\x{1234}
                     43: 
                     44: /a\Cb/8
                     45:     aXb
                     46:  0: aXb
                     47:     a\nb
                     48:  0: a\x{0a}b
                     49:     
                     50: /a\C\Cb/8 
                     51:     a\x{100}b 
                     52:  0: a\x{100}b
                     53: 
                     54: /ab\Cde/8
                     55:     abXde
                     56:  0: abXde
                     57: 
                     58: /a\C\Cb/8 
                     59:     a\x{100}b
                     60:  0: a\x{100}b
                     61:     ** Failers 
                     62: No match
                     63:     a\x{12257}b
                     64: No match
                     65: 
                     66: /[]/8
                     67: Failed: invalid UTF-8 string at offset 1
                     68: 
                     69: //8
                     70: Failed: invalid UTF-8 string at offset 0
                     71: 
                     72: /xxx/8
                     73: Failed: invalid UTF-8 string at offset 0
                     74: 
                     75: /xxx/8?DZSS
                     76: ------------------------------------------------------------------
                     77:         Bra
                     78:         \X{c0}\X{c0}\X{c0}xxx
                     79:         Ket
                     80:         End
                     81: ------------------------------------------------------------------
1.1       misho      82: Capturing subpattern count = 0
1.1.1.2   misho      83: Options: utf no_utf_check
                     84: First char = \x{c3}
                     85: Need char = 'x'
                     86: 
1.1.1.4 ! misho      87: /badutf/8
        !            88:     \xdf
1.1.1.2   misho      89: Error -10 (bad UTF-8 string) offset=0 reason=1
1.1.1.4 ! misho      90:     \xef
        !            91: Error -10 (bad UTF-8 string) offset=0 reason=2
        !            92:     \xef\x80
        !            93: Error -10 (bad UTF-8 string) offset=0 reason=1
        !            94:     \xf7
        !            95: Error -10 (bad UTF-8 string) offset=0 reason=3
        !            96:     \xf7\x80
        !            97: Error -10 (bad UTF-8 string) offset=0 reason=2
        !            98:     \xf7\x80\x80
        !            99: Error -10 (bad UTF-8 string) offset=0 reason=1
        !           100:     \xfb
        !           101: Error -10 (bad UTF-8 string) offset=0 reason=4
        !           102:     \xfb\x80
        !           103: Error -10 (bad UTF-8 string) offset=0 reason=3
        !           104:     \xfb\x80\x80
        !           105: Error -10 (bad UTF-8 string) offset=0 reason=2
        !           106:     \xfb\x80\x80\x80
1.1.1.2   misho     107: Error -10 (bad UTF-8 string) offset=0 reason=1
1.1.1.4 ! misho     108:     \xfd
        !           109: Error -10 (bad UTF-8 string) offset=0 reason=5
        !           110:     \xfd\x80
        !           111: Error -10 (bad UTF-8 string) offset=0 reason=4
        !           112:     \xfd\x80\x80
        !           113: Error -10 (bad UTF-8 string) offset=0 reason=3
        !           114:     \xfd\x80\x80\x80
        !           115: Error -10 (bad UTF-8 string) offset=0 reason=2
        !           116:     \xfd\x80\x80\x80\x80
1.1.1.2   misho     117: Error -10 (bad UTF-8 string) offset=0 reason=1
1.1.1.4 ! misho     118:     \xdf\x7f
        !           119: Error -10 (bad UTF-8 string) offset=0 reason=6
        !           120:     \xef\x7f\x80
        !           121: Error -10 (bad UTF-8 string) offset=0 reason=6
        !           122:     \xef\x80\x7f
        !           123: Error -10 (bad UTF-8 string) offset=0 reason=7
        !           124:     \xf7\x7f\x80\x80
        !           125: Error -10 (bad UTF-8 string) offset=0 reason=6
        !           126:     \xf7\x80\x7f\x80
        !           127: Error -10 (bad UTF-8 string) offset=0 reason=7
        !           128:     \xf7\x80\x80\x7f
        !           129: Error -10 (bad UTF-8 string) offset=0 reason=8
        !           130:     \xfb\x7f\x80\x80\x80
        !           131: Error -10 (bad UTF-8 string) offset=0 reason=6
        !           132:     \xfb\x80\x7f\x80\x80
        !           133: Error -10 (bad UTF-8 string) offset=0 reason=7
        !           134:     \xfb\x80\x80\x7f\x80
        !           135: Error -10 (bad UTF-8 string) offset=0 reason=8
        !           136:     \xfb\x80\x80\x80\x7f
        !           137: Error -10 (bad UTF-8 string) offset=0 reason=9
        !           138:     \xfd\x7f\x80\x80\x80\x80
        !           139: Error -10 (bad UTF-8 string) offset=0 reason=6
        !           140:     \xfd\x80\x7f\x80\x80\x80
        !           141: Error -10 (bad UTF-8 string) offset=0 reason=7
        !           142:     \xfd\x80\x80\x7f\x80\x80
        !           143: Error -10 (bad UTF-8 string) offset=0 reason=8
        !           144:     \xfd\x80\x80\x80\x7f\x80
        !           145: Error -10 (bad UTF-8 string) offset=0 reason=9
        !           146:     \xfd\x80\x80\x80\x80\x7f
        !           147: Error -10 (bad UTF-8 string) offset=0 reason=10
        !           148:     \xed\xa0\x80
        !           149: Error -10 (bad UTF-8 string) offset=0 reason=14
        !           150:     \xc0\x8f
        !           151: Error -10 (bad UTF-8 string) offset=0 reason=15
        !           152:     \xe0\x80\x8f
        !           153: Error -10 (bad UTF-8 string) offset=0 reason=16
        !           154:     \xf0\x80\x80\x8f
        !           155: Error -10 (bad UTF-8 string) offset=0 reason=17
        !           156:     \xf8\x80\x80\x80\x8f
        !           157: Error -10 (bad UTF-8 string) offset=0 reason=18
        !           158:     \xfc\x80\x80\x80\x80\x8f
        !           159: Error -10 (bad UTF-8 string) offset=0 reason=19
        !           160:     \x80
        !           161: Error -10 (bad UTF-8 string) offset=0 reason=20
        !           162:     \xfe
        !           163: Error -10 (bad UTF-8 string) offset=0 reason=21
        !           164:     \xff
        !           165: Error -10 (bad UTF-8 string) offset=0 reason=21
        !           166: 
        !           167: /badutf/8
        !           168:     \xfb\x80\x80\x80\x80
        !           169: Error -10 (bad UTF-8 string) offset=0 reason=11
        !           170:     \xfd\x80\x80\x80\x80\x80
        !           171: Error -10 (bad UTF-8 string) offset=0 reason=12
        !           172:     \xf7\xbf\xbf\xbf
        !           173: Error -10 (bad UTF-8 string) offset=0 reason=13
        !           174: 
        !           175: /shortutf/8
        !           176:     \P\P\xdf
        !           177: Error -25 (short UTF-8 string) offset=0 reason=1
        !           178:     \P\P\xef
        !           179: Error -25 (short UTF-8 string) offset=0 reason=2
        !           180:     \P\P\xef\x80
        !           181: Error -25 (short UTF-8 string) offset=0 reason=1
        !           182:     \P\P\xf7
        !           183: Error -25 (short UTF-8 string) offset=0 reason=3
        !           184:     \P\P\xf7\x80
        !           185: Error -25 (short UTF-8 string) offset=0 reason=2
        !           186:     \P\P\xf7\x80\x80
        !           187: Error -25 (short UTF-8 string) offset=0 reason=1
        !           188:     \P\P\xfb
        !           189: Error -25 (short UTF-8 string) offset=0 reason=4
        !           190:     \P\P\xfb\x80
        !           191: Error -25 (short UTF-8 string) offset=0 reason=3
        !           192:     \P\P\xfb\x80\x80
        !           193: Error -25 (short UTF-8 string) offset=0 reason=2
        !           194:     \P\P\xfb\x80\x80\x80
        !           195: Error -25 (short UTF-8 string) offset=0 reason=1
        !           196:     \P\P\xfd
        !           197: Error -25 (short UTF-8 string) offset=0 reason=5
        !           198:     \P\P\xfd\x80
        !           199: Error -25 (short UTF-8 string) offset=0 reason=4
        !           200:     \P\P\xfd\x80\x80
        !           201: Error -25 (short UTF-8 string) offset=0 reason=3
        !           202:     \P\P\xfd\x80\x80\x80
        !           203: Error -25 (short UTF-8 string) offset=0 reason=2
        !           204:     \P\P\xfd\x80\x80\x80\x80
1.1.1.2   misho     205: Error -25 (short UTF-8 string) offset=0 reason=1
                    206: 
                    207: /anything/8
                    208:     \xc0\x80
                    209: Error -10 (bad UTF-8 string) offset=0 reason=15
                    210:     \xc1\x8f 
                    211: Error -10 (bad UTF-8 string) offset=0 reason=15
                    212:     \xe0\x9f\x80
                    213: Error -10 (bad UTF-8 string) offset=0 reason=16
                    214:     \xf0\x8f\x80\x80 
                    215: Error -10 (bad UTF-8 string) offset=0 reason=17
                    216:     \xf8\x87\x80\x80\x80  
                    217: Error -10 (bad UTF-8 string) offset=0 reason=18
                    218:     \xfc\x83\x80\x80\x80\x80
                    219: Error -10 (bad UTF-8 string) offset=0 reason=19
                    220:     \xfe\x80\x80\x80\x80\x80  
                    221: Error -10 (bad UTF-8 string) offset=0 reason=21
                    222:     \xff\x80\x80\x80\x80\x80  
                    223: Error -10 (bad UTF-8 string) offset=0 reason=21
                    224:     \xc3\x8f
                    225: No match
                    226:     \xe0\xaf\x80
                    227: No match
                    228:     \xe1\x80\x80
                    229: No match
                    230:     \xf0\x9f\x80\x80 
                    231: No match
                    232:     \xf1\x8f\x80\x80 
                    233: No match
                    234:     \xf8\x88\x80\x80\x80  
                    235: Error -10 (bad UTF-8 string) offset=0 reason=11
                    236:     \xf9\x87\x80\x80\x80  
                    237: Error -10 (bad UTF-8 string) offset=0 reason=11
                    238:     \xfc\x84\x80\x80\x80\x80
                    239: Error -10 (bad UTF-8 string) offset=0 reason=12
                    240:     \xfd\x83\x80\x80\x80\x80
                    241: Error -10 (bad UTF-8 string) offset=0 reason=12
                    242:     \?\xf8\x88\x80\x80\x80  
                    243: No match
                    244:     \?\xf9\x87\x80\x80\x80  
                    245: No match
                    246:     \?\xfc\x84\x80\x80\x80\x80
                    247: No match
                    248:     \?\xfd\x83\x80\x80\x80\x80
                    249: No match
                    250: 
                    251: /\x{100}/8DZ
                    252: ------------------------------------------------------------------
                    253:         Bra
                    254:         \x{100}
                    255:         Ket
                    256:         End
                    257: ------------------------------------------------------------------
                    258: Capturing subpattern count = 0
                    259: Options: utf
                    260: First char = \x{c4}
                    261: Need char = \x{80}
                    262: 
                    263: /\x{1000}/8DZ
                    264: ------------------------------------------------------------------
                    265:         Bra
                    266:         \x{1000}
                    267:         Ket
                    268:         End
                    269: ------------------------------------------------------------------
                    270: Capturing subpattern count = 0
                    271: Options: utf
                    272: First char = \x{e1}
                    273: Need char = \x{80}
                    274: 
                    275: /\x{10000}/8DZ
                    276: ------------------------------------------------------------------
                    277:         Bra
                    278:         \x{10000}
                    279:         Ket
                    280:         End
                    281: ------------------------------------------------------------------
                    282: Capturing subpattern count = 0
                    283: Options: utf
                    284: First char = \x{f0}
                    285: Need char = \x{80}
                    286: 
                    287: /\x{100000}/8DZ
                    288: ------------------------------------------------------------------
                    289:         Bra
                    290:         \x{100000}
                    291:         Ket
                    292:         End
                    293: ------------------------------------------------------------------
                    294: Capturing subpattern count = 0
                    295: Options: utf
                    296: First char = \x{f4}
                    297: Need char = \x{80}
                    298: 
                    299: /\x{10ffff}/8DZ
                    300: ------------------------------------------------------------------
                    301:         Bra
                    302:         \x{10ffff}
                    303:         Ket
                    304:         End
                    305: ------------------------------------------------------------------
                    306: Capturing subpattern count = 0
                    307: Options: utf
                    308: First char = \x{f4}
                    309: Need char = \x{bf}
                    310: 
                    311: /[\x{ff}]/8DZ
                    312: ------------------------------------------------------------------
                    313:         Bra
                    314:         \x{ff}
                    315:         Ket
                    316:         End
                    317: ------------------------------------------------------------------
                    318: Capturing subpattern count = 0
                    319: Options: utf
                    320: First char = \x{c3}
                    321: Need char = \x{bf}
                    322: 
                    323: /[\x{100}]/8DZ
                    324: ------------------------------------------------------------------
                    325:         Bra
                    326:         \x{100}
                    327:         Ket
                    328:         End
                    329: ------------------------------------------------------------------
                    330: Capturing subpattern count = 0
                    331: Options: utf
                    332: First char = \x{c4}
                    333: Need char = \x{80}
                    334: 
                    335: /\x80/8DZ
                    336: ------------------------------------------------------------------
                    337:         Bra
                    338:         \x{80}
                    339:         Ket
                    340:         End
                    341: ------------------------------------------------------------------
                    342: Capturing subpattern count = 0
                    343: Options: utf
                    344: First char = \x{c2}
                    345: Need char = \x{80}
                    346: 
                    347: /\xff/8DZ
                    348: ------------------------------------------------------------------
                    349:         Bra
                    350:         \x{ff}
                    351:         Ket
                    352:         End
                    353: ------------------------------------------------------------------
                    354: Capturing subpattern count = 0
                    355: Options: utf
                    356: First char = \x{c3}
                    357: Need char = \x{bf}
                    358: 
                    359: /\x{D55c}\x{ad6d}\x{C5B4}/DZ8 
                    360: ------------------------------------------------------------------
                    361:         Bra
                    362:         \x{d55c}\x{ad6d}\x{c5b4}
                    363:         Ket
                    364:         End
                    365: ------------------------------------------------------------------
                    366: Capturing subpattern count = 0
                    367: Options: utf
                    368: First char = \x{ed}
                    369: Need char = \x{b4}
                    370:     \x{D55c}\x{ad6d}\x{C5B4} 
                    371:  0: \x{d55c}\x{ad6d}\x{c5b4}
                    372: 
                    373: /\x{65e5}\x{672c}\x{8a9e}/DZ8
                    374: ------------------------------------------------------------------
                    375:         Bra
                    376:         \x{65e5}\x{672c}\x{8a9e}
                    377:         Ket
                    378:         End
                    379: ------------------------------------------------------------------
                    380: Capturing subpattern count = 0
                    381: Options: utf
                    382: First char = \x{e6}
                    383: Need char = \x{9e}
                    384:     \x{65e5}\x{672c}\x{8a9e}
                    385:  0: \x{65e5}\x{672c}\x{8a9e}
                    386: 
                    387: /\x{80}/DZ8
                    388: ------------------------------------------------------------------
                    389:         Bra
                    390:         \x{80}
                    391:         Ket
                    392:         End
                    393: ------------------------------------------------------------------
                    394: Capturing subpattern count = 0
                    395: Options: utf
                    396: First char = \x{c2}
                    397: Need char = \x{80}
                    398: 
                    399: /\x{084}/DZ8
                    400: ------------------------------------------------------------------
                    401:         Bra
                    402:         \x{84}
                    403:         Ket
                    404:         End
                    405: ------------------------------------------------------------------
                    406: Capturing subpattern count = 0
                    407: Options: utf
                    408: First char = \x{c2}
                    409: Need char = \x{84}
                    410: 
                    411: /\x{104}/DZ8
                    412: ------------------------------------------------------------------
                    413:         Bra
                    414:         \x{104}
                    415:         Ket
                    416:         End
                    417: ------------------------------------------------------------------
                    418: Capturing subpattern count = 0
                    419: Options: utf
                    420: First char = \x{c4}
                    421: Need char = \x{84}
                    422: 
                    423: /\x{861}/DZ8
                    424: ------------------------------------------------------------------
                    425:         Bra
                    426:         \x{861}
                    427:         Ket
                    428:         End
                    429: ------------------------------------------------------------------
                    430: Capturing subpattern count = 0
                    431: Options: utf
                    432: First char = \x{e0}
                    433: Need char = \x{a1}
                    434: 
                    435: /\x{212ab}/DZ8
                    436: ------------------------------------------------------------------
                    437:         Bra
                    438:         \x{212ab}
                    439:         Ket
                    440:         End
                    441: ------------------------------------------------------------------
                    442: Capturing subpattern count = 0
                    443: Options: utf
                    444: First char = \x{f0}
                    445: Need char = \x{ab}
                    446: 
                    447: /-- This one is here not because it's different to Perl, but because the way
                    448: the captured single-byte is displayed. (In Perl it becomes a character, and you
                    449: can't tell the difference.) --/
                    450:     
                    451: /X(\C)(.*)/8
                    452:     X\x{1234}
                    453:  0: X\x{1234}
                    454:  1: \x{e1}
                    455:  2: \x{88}\x{b4}
                    456:     X\nabc 
                    457:  0: X\x{0a}abc
                    458:  1: \x{0a}
                    459:  2: abc
                    460: 
                    461: /-- This one is here because Perl gives out a grumbly error message (quite 
                    462: correctly, but that messes up comparisons). --/
                    463:     
                    464: /a\Cb/8
                    465:     *** Failers 
                    466: No match
                    467:     a\x{100}b 
                    468: No match
                    469:     
                    470: /[^ab\xC0-\xF0]/8SDZ
                    471: ------------------------------------------------------------------
                    472:         Bra
                    473:         [\x00-`c-\xbf\xf1-\xff] (neg)
                    474:         Ket
                    475:         End
                    476: ------------------------------------------------------------------
                    477: Capturing subpattern count = 0
                    478: Options: utf
                    479: No first char
                    480: No need char
                    481: Subject length lower bound = 1
                    482: Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
                    483:   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
                    484:   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
                    485:   5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
                    486:   Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 
                    487:   \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 
                    488:   \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 
                    489:   \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 
                    490:   \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 
                    491:   \xfe \xff 
                    492:     \x{f1}
                    493:  0: \x{f1}
                    494:     \x{bf}
                    495:  0: \x{bf}
                    496:     \x{100}
                    497:  0: \x{100}
                    498:     \x{1000}   
                    499:  0: \x{1000}
                    500:     *** Failers
                    501:  0: *
                    502:     \x{c0} 
                    503: No match
                    504:     \x{f0} 
                    505: No match
                    506: 
                    507: /Ā{3,4}/8SDZ
                    508: ------------------------------------------------------------------
                    509:         Bra
                    510:         \x{100}{3}
                    511:         \x{100}?
                    512:         Ket
                    513:         End
                    514: ------------------------------------------------------------------
                    515: Capturing subpattern count = 0
                    516: Options: utf
                    517: First char = \x{c4}
                    518: Need char = \x{80}
1.1       misho     519: Subject length lower bound = 3
                    520: No set of starting bytes
1.1.1.2   misho     521:   \x{100}\x{100}\x{100}\x{100\x{100}
                    522:  0: \x{100}\x{100}\x{100}
                    523: 
                    524: /(\x{100}+|x)/8SDZ
                    525: ------------------------------------------------------------------
                    526:         Bra
                    527:         CBra 1
                    528:         \x{100}+
                    529:         Alt
                    530:         x
                    531:         Ket
                    532:         Ket
                    533:         End
                    534: ------------------------------------------------------------------
                    535: Capturing subpattern count = 1
                    536: Options: utf
                    537: No first char
                    538: No need char
                    539: Subject length lower bound = 1
                    540: Starting byte set: x \xc4 
                    541: 
                    542: /(\x{100}*a|x)/8SDZ
                    543: ------------------------------------------------------------------
                    544:         Bra
                    545:         CBra 1
                    546:         \x{100}*+
                    547:         a
                    548:         Alt
                    549:         x
                    550:         Ket
                    551:         Ket
                    552:         End
                    553: ------------------------------------------------------------------
                    554: Capturing subpattern count = 1
                    555: Options: utf
                    556: No first char
                    557: No need char
                    558: Subject length lower bound = 1
                    559: Starting byte set: a x \xc4 
                    560: 
                    561: /(\x{100}{0,2}a|x)/8SDZ
                    562: ------------------------------------------------------------------
                    563:         Bra
                    564:         CBra 1
                    565:         \x{100}{0,2}
                    566:         a
                    567:         Alt
                    568:         x
                    569:         Ket
                    570:         Ket
                    571:         End
                    572: ------------------------------------------------------------------
                    573: Capturing subpattern count = 1
                    574: Options: utf
                    575: No first char
                    576: No need char
                    577: Subject length lower bound = 1
                    578: Starting byte set: a x \xc4 
                    579: 
                    580: /(\x{100}{1,2}a|x)/8SDZ
                    581: ------------------------------------------------------------------
                    582:         Bra
                    583:         CBra 1
                    584:         \x{100}
                    585:         \x{100}{0,1}
                    586:         a
                    587:         Alt
                    588:         x
                    589:         Ket
                    590:         Ket
                    591:         End
                    592: ------------------------------------------------------------------
                    593: Capturing subpattern count = 1
                    594: Options: utf
                    595: No first char
                    596: No need char
                    597: Subject length lower bound = 1
                    598: Starting byte set: x \xc4 
                    599: 
                    600: /\x{100}/8DZ
                    601: ------------------------------------------------------------------
                    602:         Bra
                    603:         \x{100}
                    604:         Ket
                    605:         End
                    606: ------------------------------------------------------------------
                    607: Capturing subpattern count = 0
                    608: Options: utf
                    609: First char = \x{c4}
                    610: Need char = \x{80}
                    611: 
                    612: /a\x{100}\x{101}*/8DZ
                    613: ------------------------------------------------------------------
                    614:         Bra
                    615:         a\x{100}
                    616:         \x{101}*
                    617:         Ket
                    618:         End
                    619: ------------------------------------------------------------------
                    620: Capturing subpattern count = 0
                    621: Options: utf
                    622: First char = 'a'
                    623: Need char = \x{80}
                    624: 
                    625: /a\x{100}\x{101}+/8DZ
                    626: ------------------------------------------------------------------
                    627:         Bra
                    628:         a\x{100}
                    629:         \x{101}+
                    630:         Ket
                    631:         End
                    632: ------------------------------------------------------------------
                    633: Capturing subpattern count = 0
                    634: Options: utf
                    635: First char = 'a'
                    636: Need char = \x{81}
1.1       misho     637: 
1.1.1.2   misho     638: /[^\x{c4}]/DZ
                    639: ------------------------------------------------------------------
                    640:         Bra
1.1.1.4 ! misho     641:         [^\x{c4}]
1.1.1.2   misho     642:         Ket
                    643:         End
                    644: ------------------------------------------------------------------
1.1       misho     645: Capturing subpattern count = 0
                    646: No options
                    647: No first char
                    648: No need char
1.1.1.2   misho     649: 
                    650: /[\x{100}]/8DZ
                    651: ------------------------------------------------------------------
                    652:         Bra
                    653:         \x{100}
                    654:         Ket
                    655:         End
                    656: ------------------------------------------------------------------
                    657: Capturing subpattern count = 0
                    658: Options: utf
                    659: First char = \x{c4}
                    660: Need char = \x{80}
                    661:     \x{100}
                    662:  0: \x{100}
                    663:     Z\x{100}
                    664:  0: \x{100}
                    665:     \x{100}Z
                    666:  0: \x{100}
                    667:     *** Failers 
                    668: No match
                    669: 
                    670: /[\xff]/DZ8
                    671: ------------------------------------------------------------------
                    672:         Bra
                    673:         \x{ff}
                    674:         Ket
                    675:         End
                    676: ------------------------------------------------------------------
                    677: Capturing subpattern count = 0
                    678: Options: utf
                    679: First char = \x{c3}
                    680: Need char = \x{bf}
                    681:     >\x{ff}<
                    682:  0: \x{ff}
                    683: 
                    684: /[^\xff]/8DZ
                    685: ------------------------------------------------------------------
                    686:         Bra
1.1.1.3   misho     687:         [^\x{ff}]
1.1.1.2   misho     688:         Ket
                    689:         End
                    690: ------------------------------------------------------------------
                    691: Capturing subpattern count = 0
                    692: Options: utf
                    693: No first char
                    694: No need char
                    695: 
                    696: /\x{100}abc(xyz(?1))/8DZ
                    697: ------------------------------------------------------------------
                    698:         Bra
                    699:         \x{100}abc
                    700:         CBra 1
                    701:         xyz
                    702:         Recurse
                    703:         Ket
                    704:         Ket
                    705:         End
                    706: ------------------------------------------------------------------
                    707: Capturing subpattern count = 1
                    708: Options: utf
                    709: First char = \x{c4}
                    710: Need char = 'z'
                    711: 
                    712: /a\x{1234}b/P8
                    713:     a\x{1234}b
                    714:  0: a\x{1234}b
                    715: 
                    716: /\777/8I
                    717: Capturing subpattern count = 0
                    718: Options: utf
                    719: First char = \x{c7}
                    720: Need char = \x{bf}
                    721:   \x{1ff}
                    722:  0: \x{1ff}
                    723:   \777 
                    724:  0: \x{1ff}
                    725:   
                    726: /\x{100}+\x{200}/8DZ
                    727: ------------------------------------------------------------------
                    728:         Bra
                    729:         \x{100}++
                    730:         \x{200}
                    731:         Ket
                    732:         End
                    733: ------------------------------------------------------------------
                    734: Capturing subpattern count = 0
                    735: Options: utf
                    736: First char = \x{c4}
                    737: Need char = \x{80}
                    738: 
                    739: /\x{100}+X/8DZ
                    740: ------------------------------------------------------------------
                    741:         Bra
                    742:         \x{100}++
                    743:         X
                    744:         Ket
                    745:         End
                    746: ------------------------------------------------------------------
                    747: Capturing subpattern count = 0
                    748: Options: utf
                    749: First char = \x{c4}
                    750: Need char = 'X'
                    751: 
                    752: /^[\QĀ\E-\QŐ\E/BZ8
                    753: Failed: missing terminating ] for character class at offset 15
                    754: 
                    755: /-- This tests the stricter UTF-8 check according to RFC 3629. --/ 
                    756:     
                    757: /X/8
                    758:     \x{d800}
                    759: Error -10 (bad UTF-8 string) offset=0 reason=14
                    760:     \x{d800}\?
                    761: No match
                    762:     \x{da00}
                    763: Error -10 (bad UTF-8 string) offset=0 reason=14
                    764:     \x{da00}\?
                    765: No match
                    766:     \x{dfff}
                    767: Error -10 (bad UTF-8 string) offset=0 reason=14
                    768:     \x{dfff}\?
                    769: No match
                    770:     \x{110000}    
                    771: Error -10 (bad UTF-8 string) offset=0 reason=13
                    772:     \x{110000}\?    
                    773: No match
                    774:     \x{2000000} 
                    775: Error -10 (bad UTF-8 string) offset=0 reason=11
                    776:     \x{2000000}\? 
                    777: No match
                    778:     \x{7fffffff} 
                    779: Error -10 (bad UTF-8 string) offset=0 reason=12
                    780:     \x{7fffffff}\? 
                    781: No match
                    782: 
                    783: /(*UTF8)\x{1234}/
                    784:   abcd\x{1234}pqr
                    785:  0: \x{1234}
                    786: 
1.1.1.4 ! misho     787: /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
1.1.1.2   misho     788: Capturing subpattern count = 0
                    789: Options: bsr_unicode utf
                    790: Forced newline sequence: CRLF
                    791: First char = 'a'
                    792: Need char = 'b'
                    793: 
                    794: /\h/SI8
                    795: Capturing subpattern count = 0
                    796: Options: utf
                    797: No first char
                    798: No need char
                    799: Subject length lower bound = 1
                    800: Starting byte set: \x09 \x20 \xc2 \xe1 \xe2 \xe3 
                    801:     ABC\x{09}
                    802:  0: \x{09}
                    803:     ABC\x{20}
                    804:  0:  
                    805:     ABC\x{a0}
                    806:  0: \x{a0}
                    807:     ABC\x{1680}
                    808:  0: \x{1680}
                    809:     ABC\x{180e}
                    810:  0: \x{180e}
                    811:     ABC\x{2000}
                    812:  0: \x{2000}
                    813:     ABC\x{202f} 
                    814:  0: \x{202f}
                    815:     ABC\x{205f} 
                    816:  0: \x{205f}
                    817:     ABC\x{3000} 
                    818:  0: \x{3000}
                    819: 
                    820: /\v/SI8
                    821: Capturing subpattern count = 0
                    822: Options: utf
                    823: No first char
                    824: No need char
                    825: Subject length lower bound = 1
                    826: Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
                    827:     ABC\x{0a}
                    828:  0: \x{0a}
                    829:     ABC\x{0b}
                    830:  0: \x{0b}
                    831:     ABC\x{0c}
                    832:  0: \x{0c}
                    833:     ABC\x{0d}
                    834:  0: \x{0d}
                    835:     ABC\x{85}
                    836:  0: \x{85}
                    837:     ABC\x{2028}
                    838:  0: \x{2028}
                    839: 
                    840: /\h*A/SI8
                    841: Capturing subpattern count = 0
                    842: Options: utf
                    843: No first char
                    844: Need char = 'A'
                    845: Subject length lower bound = 1
                    846: Starting byte set: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 
                    847:     CDBABC
                    848:  0: A
                    849:     
                    850: /\v+A/SI8
                    851: Capturing subpattern count = 0
                    852: Options: utf
                    853: No first char
                    854: Need char = 'A'
                    855: Subject length lower bound = 2
                    856: Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
                    857: 
                    858: /\s?xxx\s/8SI
                    859: Capturing subpattern count = 0
                    860: Options: utf
                    861: No first char
                    862: Need char = 'x'
                    863: Subject length lower bound = 4
                    864: Starting byte set: \x09 \x0a \x0c \x0d \x20 x 
                    865: 
                    866: /\sxxx\s/I8ST1
                    867: Capturing subpattern count = 0
                    868: Options: utf
                    869: No first char
                    870: Need char = 'x'
                    871: Subject length lower bound = 5
                    872: Starting byte set: \x09 \x0a \x0c \x0d \x20 \xc2 
                    873:     AB\x{85}xxx\x{a0}XYZ
                    874:  0: \x{85}xxx\x{a0}
                    875:     AB\x{a0}xxx\x{85}XYZ
                    876:  0: \x{a0}xxx\x{85}
                    877: 
                    878: /\S \S/I8ST1
                    879: Capturing subpattern count = 0
                    880: Options: utf
                    881: No first char
                    882: Need char = ' '
                    883: Subject length lower bound = 3
                    884: Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
                    885:   \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d 
                    886:   \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ 
                    887:   A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e 
                    888:   f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 
                    889:   \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 
                    890:   \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 
                    891:   \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 
                    892:   \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
                    893:     \x{a2} \x{84} 
                    894:  0: \x{a2} \x{84}
                    895:     A Z 
                    896:  0: A Z
                    897: 
                    898: /a+/8
                    899:     a\x{123}aa\>1
                    900:  0: aa
                    901:     a\x{123}aa\>2
                    902: Error -11 (bad UTF-8 offset)
                    903:     a\x{123}aa\>3
                    904:  0: aa
                    905:     a\x{123}aa\>4
                    906:  0: a
                    907:     a\x{123}aa\>5
                    908: No match
                    909:     a\x{123}aa\>6
                    910: Error -24 (bad offset value)
                    911: 
                    912: /\x{1234}+/iS8I
                    913: Capturing subpattern count = 0
                    914: Options: caseless utf
                    915: No first char
                    916: No need char
                    917: Subject length lower bound = 1
                    918: Starting byte set: \xe1 
                    919: 
                    920: /\x{1234}+?/iS8I
                    921: Capturing subpattern count = 0
                    922: Options: caseless utf
                    923: No first char
                    924: No need char
                    925: Subject length lower bound = 1
                    926: Starting byte set: \xe1 
                    927: 
                    928: /\x{1234}++/iS8I
                    929: Capturing subpattern count = 0
                    930: Options: caseless utf
                    931: No first char
                    932: No need char
                    933: Subject length lower bound = 1
                    934: Starting byte set: \xe1 
                    935: 
                    936: /\x{1234}{2}/iS8I
                    937: Capturing subpattern count = 0
                    938: Options: caseless utf
                    939: No first char
                    940: No need char
                    941: Subject length lower bound = 2
                    942: Starting byte set: \xe1 
                    943: 
                    944: /[^\x{c4}]/8DZ
                    945: ------------------------------------------------------------------
                    946:         Bra
1.1.1.3   misho     947:         [^\x{c4}]
1.1.1.2   misho     948:         Ket
                    949:         End
                    950: ------------------------------------------------------------------
                    951: Capturing subpattern count = 0
                    952: Options: utf
                    953: No first char
                    954: No need char
                    955: 
                    956: /X+\x{200}/8DZ
                    957: ------------------------------------------------------------------
                    958:         Bra
                    959:         X++
                    960:         \x{200}
                    961:         Ket
                    962:         End
                    963: ------------------------------------------------------------------
                    964: Capturing subpattern count = 0
                    965: Options: utf
                    966: First char = 'X'
                    967: Need char = \x{80}
                    968: 
                    969: /\R/SI8
                    970: Capturing subpattern count = 0
                    971: Options: utf
                    972: No first char
                    973: No need char
                    974: Subject length lower bound = 1
                    975: Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
                    976: 
                    977: /\777/8DZ
                    978: ------------------------------------------------------------------
                    979:         Bra
                    980:         \x{1ff}
                    981:         Ket
                    982:         End
                    983: ------------------------------------------------------------------
                    984: Capturing subpattern count = 0
                    985: Options: utf
                    986: First char = \x{c7}
                    987: Need char = \x{bf}
1.1       misho     988: 
1.1.1.3   misho     989: /\w+\x{C4}/8BZ
                    990: ------------------------------------------------------------------
                    991:         Bra
                    992:         \w++
                    993:         \x{c4}
                    994:         Ket
                    995:         End
                    996: ------------------------------------------------------------------
                    997:     a\x{C4}\x{C4}
                    998:  0: a\x{c4}
                    999: 
                   1000: /\w+\x{C4}/8BZT1
                   1001: ------------------------------------------------------------------
                   1002:         Bra
                   1003:         \w+
                   1004:         \x{c4}
                   1005:         Ket
                   1006:         End
                   1007: ------------------------------------------------------------------
                   1008:     a\x{C4}\x{C4}
                   1009:  0: a\x{c4}\x{c4}
                   1010:     
                   1011: /\W+\x{C4}/8BZ
                   1012: ------------------------------------------------------------------
                   1013:         Bra
                   1014:         \W+
                   1015:         \x{c4}
                   1016:         Ket
                   1017:         End
                   1018: ------------------------------------------------------------------
                   1019:     !\x{C4}
                   1020:  0: !\x{c4}
                   1021:  
                   1022: /\W+\x{C4}/8BZT1
                   1023: ------------------------------------------------------------------
                   1024:         Bra
                   1025:         \W++
                   1026:         \x{c4}
                   1027:         Ket
                   1028:         End
                   1029: ------------------------------------------------------------------
                   1030:     !\x{C4}
                   1031:  0: !\x{c4}
                   1032: 
                   1033: /\W+\x{A1}/8BZ
                   1034: ------------------------------------------------------------------
                   1035:         Bra
                   1036:         \W+
                   1037:         \x{a1}
                   1038:         Ket
                   1039:         End
                   1040: ------------------------------------------------------------------
                   1041:     !\x{A1}
                   1042:  0: !\x{a1}
                   1043:  
                   1044: /\W+\x{A1}/8BZT1
                   1045: ------------------------------------------------------------------
                   1046:         Bra
                   1047:         \W+
                   1048:         \x{a1}
                   1049:         Ket
                   1050:         End
                   1051: ------------------------------------------------------------------
                   1052:     !\x{A1}
                   1053:  0: !\x{a1}
                   1054: 
                   1055: /X\s+\x{A0}/8BZ
                   1056: ------------------------------------------------------------------
                   1057:         Bra
                   1058:         X
                   1059:         \s++
                   1060:         \x{a0}
                   1061:         Ket
                   1062:         End
                   1063: ------------------------------------------------------------------
                   1064:     X\x20\x{A0}\x{A0}
                   1065:  0: X \x{a0}
                   1066: 
                   1067: /X\s+\x{A0}/8BZT1
                   1068: ------------------------------------------------------------------
                   1069:         Bra
                   1070:         X
                   1071:         \s+
                   1072:         \x{a0}
                   1073:         Ket
                   1074:         End
                   1075: ------------------------------------------------------------------
                   1076:     X\x20\x{A0}\x{A0}
                   1077:  0: X \x{a0}\x{a0}
                   1078: 
                   1079: /\S+\x{A0}/8BZ
                   1080: ------------------------------------------------------------------
                   1081:         Bra
                   1082:         \S+
                   1083:         \x{a0}
                   1084:         Ket
                   1085:         End
                   1086: ------------------------------------------------------------------
                   1087:     X\x{A0}\x{A0}
                   1088:  0: X\x{a0}\x{a0}
                   1089: 
                   1090: /\S+\x{A0}/8BZT1
                   1091: ------------------------------------------------------------------
                   1092:         Bra
                   1093:         \S++
                   1094:         \x{a0}
                   1095:         Ket
                   1096:         End
                   1097: ------------------------------------------------------------------
                   1098:     X\x{A0}\x{A0}
                   1099:  0: X\x{a0}
                   1100: 
                   1101: /\x{a0}+\s!/8BZ
                   1102: ------------------------------------------------------------------
                   1103:         Bra
                   1104:         \x{a0}++
                   1105:         \s
                   1106:         !
                   1107:         Ket
                   1108:         End
                   1109: ------------------------------------------------------------------
                   1110:     \x{a0}\x20!
                   1111:  0: \x{a0} !
                   1112: 
                   1113: /\x{a0}+\s!/8BZT1
                   1114: ------------------------------------------------------------------
                   1115:         Bra
                   1116:         \x{a0}+
                   1117:         \s
                   1118:         !
                   1119:         Ket
                   1120:         End
                   1121: ------------------------------------------------------------------
                   1122:     \x{a0}\x20!
                   1123:  0: \x{a0} !
                   1124: 
1.1.1.4 ! misho    1125: /A/8
        !          1126:   \x{ff000041}
        !          1127: ** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8
        !          1128:   \x{7f000041} 
        !          1129: Error -10 (bad UTF-8 string) offset=0 reason=12
        !          1130: 
        !          1131: /(*UTF8)abc/9
        !          1132: Failed: setting UTF is disabled by the application at offset 0
        !          1133: 
        !          1134: /abc/89
        !          1135: Failed: setting UTF is disabled by the application at offset 0
        !          1136: 
1.1       misho    1137: /-- End of testinput15 --/

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>