File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / testdata / testoutput15
Revision 1.1.1.4 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 08:25:57 2013 UTC (11 years ago) by misho
Branches: pcre, MAIN
CVS tags: v8_33, HEAD
8.33

    1: /-- This set of tests is for UTF-8 support, and is relevant only to the 8-bit 
    2:     library. --/
    3: 
    4: /X(\C{3})/8
    5:     X\x{1234}
    6:  0: X\x{1234}
    7:  1: \x{1234}
    8: 
    9: /X(\C{4})/8
   10:     X\x{1234}YZ
   11:  0: X\x{1234}Y
   12:  1: \x{1234}Y
   13:     
   14: /X\C*/8
   15:     XYZabcdce
   16:  0: XYZabcdce
   17:     
   18: /X\C*?/8
   19:     XYZabcde
   20:  0: X
   21:     
   22: /X\C{3,5}/8
   23:     Xabcdefg   
   24:  0: Xabcde
   25:     X\x{1234} 
   26:  0: X\x{1234}
   27:     X\x{1234}YZ
   28:  0: X\x{1234}YZ
   29:     X\x{1234}\x{512}  
   30:  0: X\x{1234}\x{512}
   31:     X\x{1234}\x{512}YZ
   32:  0: X\x{1234}\x{512}
   33: 
   34: /X\C{3,5}?/8
   35:     Xabcdefg   
   36:  0: Xabc
   37:     X\x{1234} 
   38:  0: X\x{1234}
   39:     X\x{1234}YZ
   40:  0: X\x{1234}
   41:     X\x{1234}\x{512}  
   42:  0: X\x{1234}
   43: 
   44: /a\Cb/8
   45:     aXb
   46:  0: aXb
   47:     a\nb
   48:  0: a\x{0a}b
   49:     
   50: /a\C\Cb/8 
   51:     a\x{100}b 
   52:  0: a\x{100}b
   53: 
   54: /ab\Cde/8
   55:     abXde
   56:  0: abXde
   57: 
   58: /a\C\Cb/8 
   59:     a\x{100}b
   60:  0: a\x{100}b
   61:     ** Failers 
   62: No match
   63:     a\x{12257}b
   64: No match
   65: 
   66: /[]/8
   67: Failed: invalid UTF-8 string at offset 1
   68: 
   69: //8
   70: Failed: invalid UTF-8 string at offset 0
   71: 
   72: /xxx/8
   73: Failed: invalid UTF-8 string at offset 0
   74: 
   75: /xxx/8?DZSS
   76: ------------------------------------------------------------------
   77:         Bra
   78:         \X{c0}\X{c0}\X{c0}xxx
   79:         Ket
   80:         End
   81: ------------------------------------------------------------------
   82: Capturing subpattern count = 0
   83: Options: utf no_utf_check
   84: First char = \x{c3}
   85: Need char = 'x'
   86: 
   87: /badutf/8
   88:     \xdf
   89: Error -10 (bad UTF-8 string) offset=0 reason=1
   90:     \xef
   91: Error -10 (bad UTF-8 string) offset=0 reason=2
   92:     \xef\x80
   93: Error -10 (bad UTF-8 string) offset=0 reason=1
   94:     \xf7
   95: Error -10 (bad UTF-8 string) offset=0 reason=3
   96:     \xf7\x80
   97: Error -10 (bad UTF-8 string) offset=0 reason=2
   98:     \xf7\x80\x80
   99: Error -10 (bad UTF-8 string) offset=0 reason=1
  100:     \xfb
  101: Error -10 (bad UTF-8 string) offset=0 reason=4
  102:     \xfb\x80
  103: Error -10 (bad UTF-8 string) offset=0 reason=3
  104:     \xfb\x80\x80
  105: Error -10 (bad UTF-8 string) offset=0 reason=2
  106:     \xfb\x80\x80\x80
  107: Error -10 (bad UTF-8 string) offset=0 reason=1
  108:     \xfd
  109: Error -10 (bad UTF-8 string) offset=0 reason=5
  110:     \xfd\x80
  111: Error -10 (bad UTF-8 string) offset=0 reason=4
  112:     \xfd\x80\x80
  113: Error -10 (bad UTF-8 string) offset=0 reason=3
  114:     \xfd\x80\x80\x80
  115: Error -10 (bad UTF-8 string) offset=0 reason=2
  116:     \xfd\x80\x80\x80\x80
  117: Error -10 (bad UTF-8 string) offset=0 reason=1
  118:     \xdf\x7f
  119: Error -10 (bad UTF-8 string) offset=0 reason=6
  120:     \xef\x7f\x80
  121: Error -10 (bad UTF-8 string) offset=0 reason=6
  122:     \xef\x80\x7f
  123: Error -10 (bad UTF-8 string) offset=0 reason=7
  124:     \xf7\x7f\x80\x80
  125: Error -10 (bad UTF-8 string) offset=0 reason=6
  126:     \xf7\x80\x7f\x80
  127: Error -10 (bad UTF-8 string) offset=0 reason=7
  128:     \xf7\x80\x80\x7f
  129: Error -10 (bad UTF-8 string) offset=0 reason=8
  130:     \xfb\x7f\x80\x80\x80
  131: Error -10 (bad UTF-8 string) offset=0 reason=6
  132:     \xfb\x80\x7f\x80\x80
  133: Error -10 (bad UTF-8 string) offset=0 reason=7
  134:     \xfb\x80\x80\x7f\x80
  135: Error -10 (bad UTF-8 string) offset=0 reason=8
  136:     \xfb\x80\x80\x80\x7f
  137: Error -10 (bad UTF-8 string) offset=0 reason=9
  138:     \xfd\x7f\x80\x80\x80\x80
  139: Error -10 (bad UTF-8 string) offset=0 reason=6
  140:     \xfd\x80\x7f\x80\x80\x80
  141: Error -10 (bad UTF-8 string) offset=0 reason=7
  142:     \xfd\x80\x80\x7f\x80\x80
  143: Error -10 (bad UTF-8 string) offset=0 reason=8
  144:     \xfd\x80\x80\x80\x7f\x80
  145: Error -10 (bad UTF-8 string) offset=0 reason=9
  146:     \xfd\x80\x80\x80\x80\x7f
  147: Error -10 (bad UTF-8 string) offset=0 reason=10
  148:     \xed\xa0\x80
  149: Error -10 (bad UTF-8 string) offset=0 reason=14
  150:     \xc0\x8f
  151: Error -10 (bad UTF-8 string) offset=0 reason=15
  152:     \xe0\x80\x8f
  153: Error -10 (bad UTF-8 string) offset=0 reason=16
  154:     \xf0\x80\x80\x8f
  155: Error -10 (bad UTF-8 string) offset=0 reason=17
  156:     \xf8\x80\x80\x80\x8f
  157: Error -10 (bad UTF-8 string) offset=0 reason=18
  158:     \xfc\x80\x80\x80\x80\x8f
  159: Error -10 (bad UTF-8 string) offset=0 reason=19
  160:     \x80
  161: Error -10 (bad UTF-8 string) offset=0 reason=20
  162:     \xfe
  163: Error -10 (bad UTF-8 string) offset=0 reason=21
  164:     \xff
  165: Error -10 (bad UTF-8 string) offset=0 reason=21
  166: 
  167: /badutf/8
  168:     \xfb\x80\x80\x80\x80
  169: Error -10 (bad UTF-8 string) offset=0 reason=11
  170:     \xfd\x80\x80\x80\x80\x80
  171: Error -10 (bad UTF-8 string) offset=0 reason=12
  172:     \xf7\xbf\xbf\xbf
  173: Error -10 (bad UTF-8 string) offset=0 reason=13
  174: 
  175: /shortutf/8
  176:     \P\P\xdf
  177: Error -25 (short UTF-8 string) offset=0 reason=1
  178:     \P\P\xef
  179: Error -25 (short UTF-8 string) offset=0 reason=2
  180:     \P\P\xef\x80
  181: Error -25 (short UTF-8 string) offset=0 reason=1
  182:     \P\P\xf7
  183: Error -25 (short UTF-8 string) offset=0 reason=3
  184:     \P\P\xf7\x80
  185: Error -25 (short UTF-8 string) offset=0 reason=2
  186:     \P\P\xf7\x80\x80
  187: Error -25 (short UTF-8 string) offset=0 reason=1
  188:     \P\P\xfb
  189: Error -25 (short UTF-8 string) offset=0 reason=4
  190:     \P\P\xfb\x80
  191: Error -25 (short UTF-8 string) offset=0 reason=3
  192:     \P\P\xfb\x80\x80
  193: Error -25 (short UTF-8 string) offset=0 reason=2
  194:     \P\P\xfb\x80\x80\x80
  195: Error -25 (short UTF-8 string) offset=0 reason=1
  196:     \P\P\xfd
  197: Error -25 (short UTF-8 string) offset=0 reason=5
  198:     \P\P\xfd\x80
  199: Error -25 (short UTF-8 string) offset=0 reason=4
  200:     \P\P\xfd\x80\x80
  201: Error -25 (short UTF-8 string) offset=0 reason=3
  202:     \P\P\xfd\x80\x80\x80
  203: Error -25 (short UTF-8 string) offset=0 reason=2
  204:     \P\P\xfd\x80\x80\x80\x80
  205: Error -25 (short UTF-8 string) offset=0 reason=1
  206: 
  207: /anything/8
  208:     \xc0\x80
  209: Error -10 (bad UTF-8 string) offset=0 reason=15
  210:     \xc1\x8f 
  211: Error -10 (bad UTF-8 string) offset=0 reason=15
  212:     \xe0\x9f\x80
  213: Error -10 (bad UTF-8 string) offset=0 reason=16
  214:     \xf0\x8f\x80\x80 
  215: Error -10 (bad UTF-8 string) offset=0 reason=17
  216:     \xf8\x87\x80\x80\x80  
  217: Error -10 (bad UTF-8 string) offset=0 reason=18
  218:     \xfc\x83\x80\x80\x80\x80
  219: Error -10 (bad UTF-8 string) offset=0 reason=19
  220:     \xfe\x80\x80\x80\x80\x80  
  221: Error -10 (bad UTF-8 string) offset=0 reason=21
  222:     \xff\x80\x80\x80\x80\x80  
  223: Error -10 (bad UTF-8 string) offset=0 reason=21
  224:     \xc3\x8f
  225: No match
  226:     \xe0\xaf\x80
  227: No match
  228:     \xe1\x80\x80
  229: No match
  230:     \xf0\x9f\x80\x80 
  231: No match
  232:     \xf1\x8f\x80\x80 
  233: No match
  234:     \xf8\x88\x80\x80\x80  
  235: Error -10 (bad UTF-8 string) offset=0 reason=11
  236:     \xf9\x87\x80\x80\x80  
  237: Error -10 (bad UTF-8 string) offset=0 reason=11
  238:     \xfc\x84\x80\x80\x80\x80
  239: Error -10 (bad UTF-8 string) offset=0 reason=12
  240:     \xfd\x83\x80\x80\x80\x80
  241: Error -10 (bad UTF-8 string) offset=0 reason=12
  242:     \?\xf8\x88\x80\x80\x80  
  243: No match
  244:     \?\xf9\x87\x80\x80\x80  
  245: No match
  246:     \?\xfc\x84\x80\x80\x80\x80
  247: No match
  248:     \?\xfd\x83\x80\x80\x80\x80
  249: No match
  250: 
  251: /\x{100}/8DZ
  252: ------------------------------------------------------------------
  253:         Bra
  254:         \x{100}
  255:         Ket
  256:         End
  257: ------------------------------------------------------------------
  258: Capturing subpattern count = 0
  259: Options: utf
  260: First char = \x{c4}
  261: Need char = \x{80}
  262: 
  263: /\x{1000}/8DZ
  264: ------------------------------------------------------------------
  265:         Bra
  266:         \x{1000}
  267:         Ket
  268:         End
  269: ------------------------------------------------------------------
  270: Capturing subpattern count = 0
  271: Options: utf
  272: First char = \x{e1}
  273: Need char = \x{80}
  274: 
  275: /\x{10000}/8DZ
  276: ------------------------------------------------------------------
  277:         Bra
  278:         \x{10000}
  279:         Ket
  280:         End
  281: ------------------------------------------------------------------
  282: Capturing subpattern count = 0
  283: Options: utf
  284: First char = \x{f0}
  285: Need char = \x{80}
  286: 
  287: /\x{100000}/8DZ
  288: ------------------------------------------------------------------
  289:         Bra
  290:         \x{100000}
  291:         Ket
  292:         End
  293: ------------------------------------------------------------------
  294: Capturing subpattern count = 0
  295: Options: utf
  296: First char = \x{f4}
  297: Need char = \x{80}
  298: 
  299: /\x{10ffff}/8DZ
  300: ------------------------------------------------------------------
  301:         Bra
  302:         \x{10ffff}
  303:         Ket
  304:         End
  305: ------------------------------------------------------------------
  306: Capturing subpattern count = 0
  307: Options: utf
  308: First char = \x{f4}
  309: Need char = \x{bf}
  310: 
  311: /[\x{ff}]/8DZ
  312: ------------------------------------------------------------------
  313:         Bra
  314:         \x{ff}
  315:         Ket
  316:         End
  317: ------------------------------------------------------------------
  318: Capturing subpattern count = 0
  319: Options: utf
  320: First char = \x{c3}
  321: Need char = \x{bf}
  322: 
  323: /[\x{100}]/8DZ
  324: ------------------------------------------------------------------
  325:         Bra
  326:         \x{100}
  327:         Ket
  328:         End
  329: ------------------------------------------------------------------
  330: Capturing subpattern count = 0
  331: Options: utf
  332: First char = \x{c4}
  333: Need char = \x{80}
  334: 
  335: /\x80/8DZ
  336: ------------------------------------------------------------------
  337:         Bra
  338:         \x{80}
  339:         Ket
  340:         End
  341: ------------------------------------------------------------------
  342: Capturing subpattern count = 0
  343: Options: utf
  344: First char = \x{c2}
  345: Need char = \x{80}
  346: 
  347: /\xff/8DZ
  348: ------------------------------------------------------------------
  349:         Bra
  350:         \x{ff}
  351:         Ket
  352:         End
  353: ------------------------------------------------------------------
  354: Capturing subpattern count = 0
  355: Options: utf
  356: First char = \x{c3}
  357: Need char = \x{bf}
  358: 
  359: /\x{D55c}\x{ad6d}\x{C5B4}/DZ8 
  360: ------------------------------------------------------------------
  361:         Bra
  362:         \x{d55c}\x{ad6d}\x{c5b4}
  363:         Ket
  364:         End
  365: ------------------------------------------------------------------
  366: Capturing subpattern count = 0
  367: Options: utf
  368: First char = \x{ed}
  369: Need char = \x{b4}
  370:     \x{D55c}\x{ad6d}\x{C5B4} 
  371:  0: \x{d55c}\x{ad6d}\x{c5b4}
  372: 
  373: /\x{65e5}\x{672c}\x{8a9e}/DZ8
  374: ------------------------------------------------------------------
  375:         Bra
  376:         \x{65e5}\x{672c}\x{8a9e}
  377:         Ket
  378:         End
  379: ------------------------------------------------------------------
  380: Capturing subpattern count = 0
  381: Options: utf
  382: First char = \x{e6}
  383: Need char = \x{9e}
  384:     \x{65e5}\x{672c}\x{8a9e}
  385:  0: \x{65e5}\x{672c}\x{8a9e}
  386: 
  387: /\x{80}/DZ8
  388: ------------------------------------------------------------------
  389:         Bra
  390:         \x{80}
  391:         Ket
  392:         End
  393: ------------------------------------------------------------------
  394: Capturing subpattern count = 0
  395: Options: utf
  396: First char = \x{c2}
  397: Need char = \x{80}
  398: 
  399: /\x{084}/DZ8
  400: ------------------------------------------------------------------
  401:         Bra
  402:         \x{84}
  403:         Ket
  404:         End
  405: ------------------------------------------------------------------
  406: Capturing subpattern count = 0
  407: Options: utf
  408: First char = \x{c2}
  409: Need char = \x{84}
  410: 
  411: /\x{104}/DZ8
  412: ------------------------------------------------------------------
  413:         Bra
  414:         \x{104}
  415:         Ket
  416:         End
  417: ------------------------------------------------------------------
  418: Capturing subpattern count = 0
  419: Options: utf
  420: First char = \x{c4}
  421: Need char = \x{84}
  422: 
  423: /\x{861}/DZ8
  424: ------------------------------------------------------------------
  425:         Bra
  426:         \x{861}
  427:         Ket
  428:         End
  429: ------------------------------------------------------------------
  430: Capturing subpattern count = 0
  431: Options: utf
  432: First char = \x{e0}
  433: Need char = \x{a1}
  434: 
  435: /\x{212ab}/DZ8
  436: ------------------------------------------------------------------
  437:         Bra
  438:         \x{212ab}
  439:         Ket
  440:         End
  441: ------------------------------------------------------------------
  442: Capturing subpattern count = 0
  443: Options: utf
  444: First char = \x{f0}
  445: Need char = \x{ab}
  446: 
  447: /-- This one is here not because it's different to Perl, but because the way
  448: the captured single-byte is displayed. (In Perl it becomes a character, and you
  449: can't tell the difference.) --/
  450:     
  451: /X(\C)(.*)/8
  452:     X\x{1234}
  453:  0: X\x{1234}
  454:  1: \x{e1}
  455:  2: \x{88}\x{b4}
  456:     X\nabc 
  457:  0: X\x{0a}abc
  458:  1: \x{0a}
  459:  2: abc
  460: 
  461: /-- This one is here because Perl gives out a grumbly error message (quite 
  462: correctly, but that messes up comparisons). --/
  463:     
  464: /a\Cb/8
  465:     *** Failers 
  466: No match
  467:     a\x{100}b 
  468: No match
  469:     
  470: /[^ab\xC0-\xF0]/8SDZ
  471: ------------------------------------------------------------------
  472:         Bra
  473:         [\x00-`c-\xbf\xf1-\xff] (neg)
  474:         Ket
  475:         End
  476: ------------------------------------------------------------------
  477: Capturing subpattern count = 0
  478: Options: utf
  479: No first char
  480: No need char
  481: Subject length lower bound = 1
  482: Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
  483:   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
  484:   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
  485:   5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
  486:   Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 
  487:   \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 
  488:   \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 
  489:   \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 
  490:   \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 
  491:   \xfe \xff 
  492:     \x{f1}
  493:  0: \x{f1}
  494:     \x{bf}
  495:  0: \x{bf}
  496:     \x{100}
  497:  0: \x{100}
  498:     \x{1000}   
  499:  0: \x{1000}
  500:     *** Failers
  501:  0: *
  502:     \x{c0} 
  503: No match
  504:     \x{f0} 
  505: No match
  506: 
  507: /Ā{3,4}/8SDZ
  508: ------------------------------------------------------------------
  509:         Bra
  510:         \x{100}{3}
  511:         \x{100}?
  512:         Ket
  513:         End
  514: ------------------------------------------------------------------
  515: Capturing subpattern count = 0
  516: Options: utf
  517: First char = \x{c4}
  518: Need char = \x{80}
  519: Subject length lower bound = 3
  520: No set of starting bytes
  521:   \x{100}\x{100}\x{100}\x{100\x{100}
  522:  0: \x{100}\x{100}\x{100}
  523: 
  524: /(\x{100}+|x)/8SDZ
  525: ------------------------------------------------------------------
  526:         Bra
  527:         CBra 1
  528:         \x{100}+
  529:         Alt
  530:         x
  531:         Ket
  532:         Ket
  533:         End
  534: ------------------------------------------------------------------
  535: Capturing subpattern count = 1
  536: Options: utf
  537: No first char
  538: No need char
  539: Subject length lower bound = 1
  540: Starting byte set: x \xc4 
  541: 
  542: /(\x{100}*a|x)/8SDZ
  543: ------------------------------------------------------------------
  544:         Bra
  545:         CBra 1
  546:         \x{100}*+
  547:         a
  548:         Alt
  549:         x
  550:         Ket
  551:         Ket
  552:         End
  553: ------------------------------------------------------------------
  554: Capturing subpattern count = 1
  555: Options: utf
  556: No first char
  557: No need char
  558: Subject length lower bound = 1
  559: Starting byte set: a x \xc4 
  560: 
  561: /(\x{100}{0,2}a|x)/8SDZ
  562: ------------------------------------------------------------------
  563:         Bra
  564:         CBra 1
  565:         \x{100}{0,2}
  566:         a
  567:         Alt
  568:         x
  569:         Ket
  570:         Ket
  571:         End
  572: ------------------------------------------------------------------
  573: Capturing subpattern count = 1
  574: Options: utf
  575: No first char
  576: No need char
  577: Subject length lower bound = 1
  578: Starting byte set: a x \xc4 
  579: 
  580: /(\x{100}{1,2}a|x)/8SDZ
  581: ------------------------------------------------------------------
  582:         Bra
  583:         CBra 1
  584:         \x{100}
  585:         \x{100}{0,1}
  586:         a
  587:         Alt
  588:         x
  589:         Ket
  590:         Ket
  591:         End
  592: ------------------------------------------------------------------
  593: Capturing subpattern count = 1
  594: Options: utf
  595: No first char
  596: No need char
  597: Subject length lower bound = 1
  598: Starting byte set: x \xc4 
  599: 
  600: /\x{100}/8DZ
  601: ------------------------------------------------------------------
  602:         Bra
  603:         \x{100}
  604:         Ket
  605:         End
  606: ------------------------------------------------------------------
  607: Capturing subpattern count = 0
  608: Options: utf
  609: First char = \x{c4}
  610: Need char = \x{80}
  611: 
  612: /a\x{100}\x{101}*/8DZ
  613: ------------------------------------------------------------------
  614:         Bra
  615:         a\x{100}
  616:         \x{101}*
  617:         Ket
  618:         End
  619: ------------------------------------------------------------------
  620: Capturing subpattern count = 0
  621: Options: utf
  622: First char = 'a'
  623: Need char = \x{80}
  624: 
  625: /a\x{100}\x{101}+/8DZ
  626: ------------------------------------------------------------------
  627:         Bra
  628:         a\x{100}
  629:         \x{101}+
  630:         Ket
  631:         End
  632: ------------------------------------------------------------------
  633: Capturing subpattern count = 0
  634: Options: utf
  635: First char = 'a'
  636: Need char = \x{81}
  637: 
  638: /[^\x{c4}]/DZ
  639: ------------------------------------------------------------------
  640:         Bra
  641:         [^\x{c4}]
  642:         Ket
  643:         End
  644: ------------------------------------------------------------------
  645: Capturing subpattern count = 0
  646: No options
  647: No first char
  648: No need char
  649: 
  650: /[\x{100}]/8DZ
  651: ------------------------------------------------------------------
  652:         Bra
  653:         \x{100}
  654:         Ket
  655:         End
  656: ------------------------------------------------------------------
  657: Capturing subpattern count = 0
  658: Options: utf
  659: First char = \x{c4}
  660: Need char = \x{80}
  661:     \x{100}
  662:  0: \x{100}
  663:     Z\x{100}
  664:  0: \x{100}
  665:     \x{100}Z
  666:  0: \x{100}
  667:     *** Failers 
  668: No match
  669: 
  670: /[\xff]/DZ8
  671: ------------------------------------------------------------------
  672:         Bra
  673:         \x{ff}
  674:         Ket
  675:         End
  676: ------------------------------------------------------------------
  677: Capturing subpattern count = 0
  678: Options: utf
  679: First char = \x{c3}
  680: Need char = \x{bf}
  681:     >\x{ff}<
  682:  0: \x{ff}
  683: 
  684: /[^\xff]/8DZ
  685: ------------------------------------------------------------------
  686:         Bra
  687:         [^\x{ff}]
  688:         Ket
  689:         End
  690: ------------------------------------------------------------------
  691: Capturing subpattern count = 0
  692: Options: utf
  693: No first char
  694: No need char
  695: 
  696: /\x{100}abc(xyz(?1))/8DZ
  697: ------------------------------------------------------------------
  698:         Bra
  699:         \x{100}abc
  700:         CBra 1
  701:         xyz
  702:         Recurse
  703:         Ket
  704:         Ket
  705:         End
  706: ------------------------------------------------------------------
  707: Capturing subpattern count = 1
  708: Options: utf
  709: First char = \x{c4}
  710: Need char = 'z'
  711: 
  712: /a\x{1234}b/P8
  713:     a\x{1234}b
  714:  0: a\x{1234}b
  715: 
  716: /\777/8I
  717: Capturing subpattern count = 0
  718: Options: utf
  719: First char = \x{c7}
  720: Need char = \x{bf}
  721:   \x{1ff}
  722:  0: \x{1ff}
  723:   \777 
  724:  0: \x{1ff}
  725:   
  726: /\x{100}+\x{200}/8DZ
  727: ------------------------------------------------------------------
  728:         Bra
  729:         \x{100}++
  730:         \x{200}
  731:         Ket
  732:         End
  733: ------------------------------------------------------------------
  734: Capturing subpattern count = 0
  735: Options: utf
  736: First char = \x{c4}
  737: Need char = \x{80}
  738: 
  739: /\x{100}+X/8DZ
  740: ------------------------------------------------------------------
  741:         Bra
  742:         \x{100}++
  743:         X
  744:         Ket
  745:         End
  746: ------------------------------------------------------------------
  747: Capturing subpattern count = 0
  748: Options: utf
  749: First char = \x{c4}
  750: Need char = 'X'
  751: 
  752: /^[\QĀ\E-\QŐ\E/BZ8
  753: Failed: missing terminating ] for character class at offset 15
  754: 
  755: /-- This tests the stricter UTF-8 check according to RFC 3629. --/ 
  756:     
  757: /X/8
  758:     \x{d800}
  759: Error -10 (bad UTF-8 string) offset=0 reason=14
  760:     \x{d800}\?
  761: No match
  762:     \x{da00}
  763: Error -10 (bad UTF-8 string) offset=0 reason=14
  764:     \x{da00}\?
  765: No match
  766:     \x{dfff}
  767: Error -10 (bad UTF-8 string) offset=0 reason=14
  768:     \x{dfff}\?
  769: No match
  770:     \x{110000}    
  771: Error -10 (bad UTF-8 string) offset=0 reason=13
  772:     \x{110000}\?    
  773: No match
  774:     \x{2000000} 
  775: Error -10 (bad UTF-8 string) offset=0 reason=11
  776:     \x{2000000}\? 
  777: No match
  778:     \x{7fffffff} 
  779: Error -10 (bad UTF-8 string) offset=0 reason=12
  780:     \x{7fffffff}\? 
  781: No match
  782: 
  783: /(*UTF8)\x{1234}/
  784:   abcd\x{1234}pqr
  785:  0: \x{1234}
  786: 
  787: /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
  788: Capturing subpattern count = 0
  789: Options: bsr_unicode utf
  790: Forced newline sequence: CRLF
  791: First char = 'a'
  792: Need char = 'b'
  793: 
  794: /\h/SI8
  795: Capturing subpattern count = 0
  796: Options: utf
  797: No first char
  798: No need char
  799: Subject length lower bound = 1
  800: Starting byte set: \x09 \x20 \xc2 \xe1 \xe2 \xe3 
  801:     ABC\x{09}
  802:  0: \x{09}
  803:     ABC\x{20}
  804:  0:  
  805:     ABC\x{a0}
  806:  0: \x{a0}
  807:     ABC\x{1680}
  808:  0: \x{1680}
  809:     ABC\x{180e}
  810:  0: \x{180e}
  811:     ABC\x{2000}
  812:  0: \x{2000}
  813:     ABC\x{202f} 
  814:  0: \x{202f}
  815:     ABC\x{205f} 
  816:  0: \x{205f}
  817:     ABC\x{3000} 
  818:  0: \x{3000}
  819: 
  820: /\v/SI8
  821: Capturing subpattern count = 0
  822: Options: utf
  823: No first char
  824: No need char
  825: Subject length lower bound = 1
  826: Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
  827:     ABC\x{0a}
  828:  0: \x{0a}
  829:     ABC\x{0b}
  830:  0: \x{0b}
  831:     ABC\x{0c}
  832:  0: \x{0c}
  833:     ABC\x{0d}
  834:  0: \x{0d}
  835:     ABC\x{85}
  836:  0: \x{85}
  837:     ABC\x{2028}
  838:  0: \x{2028}
  839: 
  840: /\h*A/SI8
  841: Capturing subpattern count = 0
  842: Options: utf
  843: No first char
  844: Need char = 'A'
  845: Subject length lower bound = 1
  846: Starting byte set: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 
  847:     CDBABC
  848:  0: A
  849:     
  850: /\v+A/SI8
  851: Capturing subpattern count = 0
  852: Options: utf
  853: No first char
  854: Need char = 'A'
  855: Subject length lower bound = 2
  856: Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
  857: 
  858: /\s?xxx\s/8SI
  859: Capturing subpattern count = 0
  860: Options: utf
  861: No first char
  862: Need char = 'x'
  863: Subject length lower bound = 4
  864: Starting byte set: \x09 \x0a \x0c \x0d \x20 x 
  865: 
  866: /\sxxx\s/I8ST1
  867: Capturing subpattern count = 0
  868: Options: utf
  869: No first char
  870: Need char = 'x'
  871: Subject length lower bound = 5
  872: Starting byte set: \x09 \x0a \x0c \x0d \x20 \xc2 
  873:     AB\x{85}xxx\x{a0}XYZ
  874:  0: \x{85}xxx\x{a0}
  875:     AB\x{a0}xxx\x{85}XYZ
  876:  0: \x{a0}xxx\x{85}
  877: 
  878: /\S \S/I8ST1
  879: Capturing subpattern count = 0
  880: Options: utf
  881: No first char
  882: Need char = ' '
  883: Subject length lower bound = 3
  884: Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
  885:   \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d 
  886:   \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ 
  887:   A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e 
  888:   f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 
  889:   \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 
  890:   \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 
  891:   \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 
  892:   \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
  893:     \x{a2} \x{84} 
  894:  0: \x{a2} \x{84}
  895:     A Z 
  896:  0: A Z
  897: 
  898: /a+/8
  899:     a\x{123}aa\>1
  900:  0: aa
  901:     a\x{123}aa\>2
  902: Error -11 (bad UTF-8 offset)
  903:     a\x{123}aa\>3
  904:  0: aa
  905:     a\x{123}aa\>4
  906:  0: a
  907:     a\x{123}aa\>5
  908: No match
  909:     a\x{123}aa\>6
  910: Error -24 (bad offset value)
  911: 
  912: /\x{1234}+/iS8I
  913: Capturing subpattern count = 0
  914: Options: caseless utf
  915: No first char
  916: No need char
  917: Subject length lower bound = 1
  918: Starting byte set: \xe1 
  919: 
  920: /\x{1234}+?/iS8I
  921: Capturing subpattern count = 0
  922: Options: caseless utf
  923: No first char
  924: No need char
  925: Subject length lower bound = 1
  926: Starting byte set: \xe1 
  927: 
  928: /\x{1234}++/iS8I
  929: Capturing subpattern count = 0
  930: Options: caseless utf
  931: No first char
  932: No need char
  933: Subject length lower bound = 1
  934: Starting byte set: \xe1 
  935: 
  936: /\x{1234}{2}/iS8I
  937: Capturing subpattern count = 0
  938: Options: caseless utf
  939: No first char
  940: No need char
  941: Subject length lower bound = 2
  942: Starting byte set: \xe1 
  943: 
  944: /[^\x{c4}]/8DZ
  945: ------------------------------------------------------------------
  946:         Bra
  947:         [^\x{c4}]
  948:         Ket
  949:         End
  950: ------------------------------------------------------------------
  951: Capturing subpattern count = 0
  952: Options: utf
  953: No first char
  954: No need char
  955: 
  956: /X+\x{200}/8DZ
  957: ------------------------------------------------------------------
  958:         Bra
  959:         X++
  960:         \x{200}
  961:         Ket
  962:         End
  963: ------------------------------------------------------------------
  964: Capturing subpattern count = 0
  965: Options: utf
  966: First char = 'X'
  967: Need char = \x{80}
  968: 
  969: /\R/SI8
  970: Capturing subpattern count = 0
  971: Options: utf
  972: No first char
  973: No need char
  974: Subject length lower bound = 1
  975: Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
  976: 
  977: /\777/8DZ
  978: ------------------------------------------------------------------
  979:         Bra
  980:         \x{1ff}
  981:         Ket
  982:         End
  983: ------------------------------------------------------------------
  984: Capturing subpattern count = 0
  985: Options: utf
  986: First char = \x{c7}
  987: Need char = \x{bf}
  988: 
  989: /\w+\x{C4}/8BZ
  990: ------------------------------------------------------------------
  991:         Bra
  992:         \w++
  993:         \x{c4}
  994:         Ket
  995:         End
  996: ------------------------------------------------------------------
  997:     a\x{C4}\x{C4}
  998:  0: a\x{c4}
  999: 
 1000: /\w+\x{C4}/8BZT1
 1001: ------------------------------------------------------------------
 1002:         Bra
 1003:         \w+
 1004:         \x{c4}
 1005:         Ket
 1006:         End
 1007: ------------------------------------------------------------------
 1008:     a\x{C4}\x{C4}
 1009:  0: a\x{c4}\x{c4}
 1010:     
 1011: /\W+\x{C4}/8BZ
 1012: ------------------------------------------------------------------
 1013:         Bra
 1014:         \W+
 1015:         \x{c4}
 1016:         Ket
 1017:         End
 1018: ------------------------------------------------------------------
 1019:     !\x{C4}
 1020:  0: !\x{c4}
 1021:  
 1022: /\W+\x{C4}/8BZT1
 1023: ------------------------------------------------------------------
 1024:         Bra
 1025:         \W++
 1026:         \x{c4}
 1027:         Ket
 1028:         End
 1029: ------------------------------------------------------------------
 1030:     !\x{C4}
 1031:  0: !\x{c4}
 1032: 
 1033: /\W+\x{A1}/8BZ
 1034: ------------------------------------------------------------------
 1035:         Bra
 1036:         \W+
 1037:         \x{a1}
 1038:         Ket
 1039:         End
 1040: ------------------------------------------------------------------
 1041:     !\x{A1}
 1042:  0: !\x{a1}
 1043:  
 1044: /\W+\x{A1}/8BZT1
 1045: ------------------------------------------------------------------
 1046:         Bra
 1047:         \W+
 1048:         \x{a1}
 1049:         Ket
 1050:         End
 1051: ------------------------------------------------------------------
 1052:     !\x{A1}
 1053:  0: !\x{a1}
 1054: 
 1055: /X\s+\x{A0}/8BZ
 1056: ------------------------------------------------------------------
 1057:         Bra
 1058:         X
 1059:         \s++
 1060:         \x{a0}
 1061:         Ket
 1062:         End
 1063: ------------------------------------------------------------------
 1064:     X\x20\x{A0}\x{A0}
 1065:  0: X \x{a0}
 1066: 
 1067: /X\s+\x{A0}/8BZT1
 1068: ------------------------------------------------------------------
 1069:         Bra
 1070:         X
 1071:         \s+
 1072:         \x{a0}
 1073:         Ket
 1074:         End
 1075: ------------------------------------------------------------------
 1076:     X\x20\x{A0}\x{A0}
 1077:  0: X \x{a0}\x{a0}
 1078: 
 1079: /\S+\x{A0}/8BZ
 1080: ------------------------------------------------------------------
 1081:         Bra
 1082:         \S+
 1083:         \x{a0}
 1084:         Ket
 1085:         End
 1086: ------------------------------------------------------------------
 1087:     X\x{A0}\x{A0}
 1088:  0: X\x{a0}\x{a0}
 1089: 
 1090: /\S+\x{A0}/8BZT1
 1091: ------------------------------------------------------------------
 1092:         Bra
 1093:         \S++
 1094:         \x{a0}
 1095:         Ket
 1096:         End
 1097: ------------------------------------------------------------------
 1098:     X\x{A0}\x{A0}
 1099:  0: X\x{a0}
 1100: 
 1101: /\x{a0}+\s!/8BZ
 1102: ------------------------------------------------------------------
 1103:         Bra
 1104:         \x{a0}++
 1105:         \s
 1106:         !
 1107:         Ket
 1108:         End
 1109: ------------------------------------------------------------------
 1110:     \x{a0}\x20!
 1111:  0: \x{a0} !
 1112: 
 1113: /\x{a0}+\s!/8BZT1
 1114: ------------------------------------------------------------------
 1115:         Bra
 1116:         \x{a0}+
 1117:         \s
 1118:         !
 1119:         Ket
 1120:         End
 1121: ------------------------------------------------------------------
 1122:     \x{a0}\x20!
 1123:  0: \x{a0} !
 1124: 
 1125: /A/8
 1126:   \x{ff000041}
 1127: ** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8
 1128:   \x{7f000041} 
 1129: Error -10 (bad UTF-8 string) offset=0 reason=12
 1130: 
 1131: /(*UTF8)abc/9
 1132: Failed: setting UTF is disabled by the application at offset 0
 1133: 
 1134: /abc/89
 1135: Failed: setting UTF is disabled by the application at offset 0
 1136: 
 1137: /-- End of testinput15 --/

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>