File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / testdata / testoutput15
Revision 1.1.1.5 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:05 2014 UTC (10 years ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD
pcre 8.34

    1: /-- This set of tests is for UTF-8 support but not Unicode property support,
    2:     and is relevant only to the 8-bit library. --/
    3: 
    4: < forbid W
    5: 
    6: /X(\C{3})/8
    7:     X\x{1234}
    8:  0: X\x{1234}
    9:  1: \x{1234}
   10: 
   11: /X(\C{4})/8
   12:     X\x{1234}YZ
   13:  0: X\x{1234}Y
   14:  1: \x{1234}Y
   15:     
   16: /X\C*/8
   17:     XYZabcdce
   18:  0: XYZabcdce
   19:     
   20: /X\C*?/8
   21:     XYZabcde
   22:  0: X
   23:     
   24: /X\C{3,5}/8
   25:     Xabcdefg   
   26:  0: Xabcde
   27:     X\x{1234} 
   28:  0: X\x{1234}
   29:     X\x{1234}YZ
   30:  0: X\x{1234}YZ
   31:     X\x{1234}\x{512}  
   32:  0: X\x{1234}\x{512}
   33:     X\x{1234}\x{512}YZ
   34:  0: X\x{1234}\x{512}
   35: 
   36: /X\C{3,5}?/8
   37:     Xabcdefg   
   38:  0: Xabc
   39:     X\x{1234} 
   40:  0: X\x{1234}
   41:     X\x{1234}YZ
   42:  0: X\x{1234}
   43:     X\x{1234}\x{512}  
   44:  0: X\x{1234}
   45: 
   46: /a\Cb/8
   47:     aXb
   48:  0: aXb
   49:     a\nb
   50:  0: a\x{0a}b
   51:     
   52: /a\C\Cb/8 
   53:     a\x{100}b 
   54:  0: a\x{100}b
   55: 
   56: /ab\Cde/8
   57:     abXde
   58:  0: abXde
   59: 
   60: /a\C\Cb/8 
   61:     a\x{100}b
   62:  0: a\x{100}b
   63:     ** Failers 
   64: No match
   65:     a\x{12257}b
   66: No match
   67: 
   68: /[]/8
   69: Failed: invalid UTF-8 string at offset 1
   70: 
   71: //8
   72: Failed: invalid UTF-8 string at offset 0
   73: 
   74: /xxx/8
   75: Failed: invalid UTF-8 string at offset 0
   76: 
   77: /xxx/8?DZSSO
   78: ------------------------------------------------------------------
   79:         Bra
   80:         \X{c0}\X{c0}\X{c0}xxx
   81:         Ket
   82:         End
   83: ------------------------------------------------------------------
   84: Capturing subpattern count = 0
   85: Options: no_auto_possessify utf no_utf_check
   86: First char = \x{c3}
   87: Need char = 'x'
   88: 
   89: /badutf/8
   90:     \xdf
   91: Error -10 (bad UTF-8 string) offset=0 reason=1
   92:     \xef
   93: Error -10 (bad UTF-8 string) offset=0 reason=2
   94:     \xef\x80
   95: Error -10 (bad UTF-8 string) offset=0 reason=1
   96:     \xf7
   97: Error -10 (bad UTF-8 string) offset=0 reason=3
   98:     \xf7\x80
   99: Error -10 (bad UTF-8 string) offset=0 reason=2
  100:     \xf7\x80\x80
  101: Error -10 (bad UTF-8 string) offset=0 reason=1
  102:     \xfb
  103: Error -10 (bad UTF-8 string) offset=0 reason=4
  104:     \xfb\x80
  105: Error -10 (bad UTF-8 string) offset=0 reason=3
  106:     \xfb\x80\x80
  107: Error -10 (bad UTF-8 string) offset=0 reason=2
  108:     \xfb\x80\x80\x80
  109: Error -10 (bad UTF-8 string) offset=0 reason=1
  110:     \xfd
  111: Error -10 (bad UTF-8 string) offset=0 reason=5
  112:     \xfd\x80
  113: Error -10 (bad UTF-8 string) offset=0 reason=4
  114:     \xfd\x80\x80
  115: Error -10 (bad UTF-8 string) offset=0 reason=3
  116:     \xfd\x80\x80\x80
  117: Error -10 (bad UTF-8 string) offset=0 reason=2
  118:     \xfd\x80\x80\x80\x80
  119: Error -10 (bad UTF-8 string) offset=0 reason=1
  120:     \xdf\x7f
  121: Error -10 (bad UTF-8 string) offset=0 reason=6
  122:     \xef\x7f\x80
  123: Error -10 (bad UTF-8 string) offset=0 reason=6
  124:     \xef\x80\x7f
  125: Error -10 (bad UTF-8 string) offset=0 reason=7
  126:     \xf7\x7f\x80\x80
  127: Error -10 (bad UTF-8 string) offset=0 reason=6
  128:     \xf7\x80\x7f\x80
  129: Error -10 (bad UTF-8 string) offset=0 reason=7
  130:     \xf7\x80\x80\x7f
  131: Error -10 (bad UTF-8 string) offset=0 reason=8
  132:     \xfb\x7f\x80\x80\x80
  133: Error -10 (bad UTF-8 string) offset=0 reason=6
  134:     \xfb\x80\x7f\x80\x80
  135: Error -10 (bad UTF-8 string) offset=0 reason=7
  136:     \xfb\x80\x80\x7f\x80
  137: Error -10 (bad UTF-8 string) offset=0 reason=8
  138:     \xfb\x80\x80\x80\x7f
  139: Error -10 (bad UTF-8 string) offset=0 reason=9
  140:     \xfd\x7f\x80\x80\x80\x80
  141: Error -10 (bad UTF-8 string) offset=0 reason=6
  142:     \xfd\x80\x7f\x80\x80\x80
  143: Error -10 (bad UTF-8 string) offset=0 reason=7
  144:     \xfd\x80\x80\x7f\x80\x80
  145: Error -10 (bad UTF-8 string) offset=0 reason=8
  146:     \xfd\x80\x80\x80\x7f\x80
  147: Error -10 (bad UTF-8 string) offset=0 reason=9
  148:     \xfd\x80\x80\x80\x80\x7f
  149: Error -10 (bad UTF-8 string) offset=0 reason=10
  150:     \xed\xa0\x80
  151: Error -10 (bad UTF-8 string) offset=0 reason=14
  152:     \xc0\x8f
  153: Error -10 (bad UTF-8 string) offset=0 reason=15
  154:     \xe0\x80\x8f
  155: Error -10 (bad UTF-8 string) offset=0 reason=16
  156:     \xf0\x80\x80\x8f
  157: Error -10 (bad UTF-8 string) offset=0 reason=17
  158:     \xf8\x80\x80\x80\x8f
  159: Error -10 (bad UTF-8 string) offset=0 reason=18
  160:     \xfc\x80\x80\x80\x80\x8f
  161: Error -10 (bad UTF-8 string) offset=0 reason=19
  162:     \x80
  163: Error -10 (bad UTF-8 string) offset=0 reason=20
  164:     \xfe
  165: Error -10 (bad UTF-8 string) offset=0 reason=21
  166:     \xff
  167: Error -10 (bad UTF-8 string) offset=0 reason=21
  168: 
  169: /badutf/8
  170:     \xfb\x80\x80\x80\x80
  171: Error -10 (bad UTF-8 string) offset=0 reason=11
  172:     \xfd\x80\x80\x80\x80\x80
  173: Error -10 (bad UTF-8 string) offset=0 reason=12
  174:     \xf7\xbf\xbf\xbf
  175: Error -10 (bad UTF-8 string) offset=0 reason=13
  176: 
  177: /shortutf/8
  178:     \P\P\xdf
  179: Error -25 (short UTF-8 string) offset=0 reason=1
  180:     \P\P\xef
  181: Error -25 (short UTF-8 string) offset=0 reason=2
  182:     \P\P\xef\x80
  183: Error -25 (short UTF-8 string) offset=0 reason=1
  184:     \P\P\xf7
  185: Error -25 (short UTF-8 string) offset=0 reason=3
  186:     \P\P\xf7\x80
  187: Error -25 (short UTF-8 string) offset=0 reason=2
  188:     \P\P\xf7\x80\x80
  189: Error -25 (short UTF-8 string) offset=0 reason=1
  190:     \P\P\xfb
  191: Error -25 (short UTF-8 string) offset=0 reason=4
  192:     \P\P\xfb\x80
  193: Error -25 (short UTF-8 string) offset=0 reason=3
  194:     \P\P\xfb\x80\x80
  195: Error -25 (short UTF-8 string) offset=0 reason=2
  196:     \P\P\xfb\x80\x80\x80
  197: Error -25 (short UTF-8 string) offset=0 reason=1
  198:     \P\P\xfd
  199: Error -25 (short UTF-8 string) offset=0 reason=5
  200:     \P\P\xfd\x80
  201: Error -25 (short UTF-8 string) offset=0 reason=4
  202:     \P\P\xfd\x80\x80
  203: Error -25 (short UTF-8 string) offset=0 reason=3
  204:     \P\P\xfd\x80\x80\x80
  205: Error -25 (short UTF-8 string) offset=0 reason=2
  206:     \P\P\xfd\x80\x80\x80\x80
  207: Error -25 (short UTF-8 string) offset=0 reason=1
  208: 
  209: /anything/8
  210:     \xc0\x80
  211: Error -10 (bad UTF-8 string) offset=0 reason=15
  212:     \xc1\x8f 
  213: Error -10 (bad UTF-8 string) offset=0 reason=15
  214:     \xe0\x9f\x80
  215: Error -10 (bad UTF-8 string) offset=0 reason=16
  216:     \xf0\x8f\x80\x80 
  217: Error -10 (bad UTF-8 string) offset=0 reason=17
  218:     \xf8\x87\x80\x80\x80  
  219: Error -10 (bad UTF-8 string) offset=0 reason=18
  220:     \xfc\x83\x80\x80\x80\x80
  221: Error -10 (bad UTF-8 string) offset=0 reason=19
  222:     \xfe\x80\x80\x80\x80\x80  
  223: Error -10 (bad UTF-8 string) offset=0 reason=21
  224:     \xff\x80\x80\x80\x80\x80  
  225: Error -10 (bad UTF-8 string) offset=0 reason=21
  226:     \xc3\x8f
  227: No match
  228:     \xe0\xaf\x80
  229: No match
  230:     \xe1\x80\x80
  231: No match
  232:     \xf0\x9f\x80\x80 
  233: No match
  234:     \xf1\x8f\x80\x80 
  235: No match
  236:     \xf8\x88\x80\x80\x80  
  237: Error -10 (bad UTF-8 string) offset=0 reason=11
  238:     \xf9\x87\x80\x80\x80  
  239: Error -10 (bad UTF-8 string) offset=0 reason=11
  240:     \xfc\x84\x80\x80\x80\x80
  241: Error -10 (bad UTF-8 string) offset=0 reason=12
  242:     \xfd\x83\x80\x80\x80\x80
  243: Error -10 (bad UTF-8 string) offset=0 reason=12
  244:     \?\xf8\x88\x80\x80\x80  
  245: No match
  246:     \?\xf9\x87\x80\x80\x80  
  247: No match
  248:     \?\xfc\x84\x80\x80\x80\x80
  249: No match
  250:     \?\xfd\x83\x80\x80\x80\x80
  251: No match
  252: 
  253: /\x{100}/8DZ
  254: ------------------------------------------------------------------
  255:         Bra
  256:         \x{100}
  257:         Ket
  258:         End
  259: ------------------------------------------------------------------
  260: Capturing subpattern count = 0
  261: Options: utf
  262: First char = \x{c4}
  263: Need char = \x{80}
  264: 
  265: /\x{1000}/8DZ
  266: ------------------------------------------------------------------
  267:         Bra
  268:         \x{1000}
  269:         Ket
  270:         End
  271: ------------------------------------------------------------------
  272: Capturing subpattern count = 0
  273: Options: utf
  274: First char = \x{e1}
  275: Need char = \x{80}
  276: 
  277: /\x{10000}/8DZ
  278: ------------------------------------------------------------------
  279:         Bra
  280:         \x{10000}
  281:         Ket
  282:         End
  283: ------------------------------------------------------------------
  284: Capturing subpattern count = 0
  285: Options: utf
  286: First char = \x{f0}
  287: Need char = \x{80}
  288: 
  289: /\x{100000}/8DZ
  290: ------------------------------------------------------------------
  291:         Bra
  292:         \x{100000}
  293:         Ket
  294:         End
  295: ------------------------------------------------------------------
  296: Capturing subpattern count = 0
  297: Options: utf
  298: First char = \x{f4}
  299: Need char = \x{80}
  300: 
  301: /\x{10ffff}/8DZ
  302: ------------------------------------------------------------------
  303:         Bra
  304:         \x{10ffff}
  305:         Ket
  306:         End
  307: ------------------------------------------------------------------
  308: Capturing subpattern count = 0
  309: Options: utf
  310: First char = \x{f4}
  311: Need char = \x{bf}
  312: 
  313: /[\x{ff}]/8DZ
  314: ------------------------------------------------------------------
  315:         Bra
  316:         \x{ff}
  317:         Ket
  318:         End
  319: ------------------------------------------------------------------
  320: Capturing subpattern count = 0
  321: Options: utf
  322: First char = \x{c3}
  323: Need char = \x{bf}
  324: 
  325: /[\x{100}]/8DZ
  326: ------------------------------------------------------------------
  327:         Bra
  328:         \x{100}
  329:         Ket
  330:         End
  331: ------------------------------------------------------------------
  332: Capturing subpattern count = 0
  333: Options: utf
  334: First char = \x{c4}
  335: Need char = \x{80}
  336: 
  337: /\x80/8DZ
  338: ------------------------------------------------------------------
  339:         Bra
  340:         \x{80}
  341:         Ket
  342:         End
  343: ------------------------------------------------------------------
  344: Capturing subpattern count = 0
  345: Options: utf
  346: First char = \x{c2}
  347: Need char = \x{80}
  348: 
  349: /\xff/8DZ
  350: ------------------------------------------------------------------
  351:         Bra
  352:         \x{ff}
  353:         Ket
  354:         End
  355: ------------------------------------------------------------------
  356: Capturing subpattern count = 0
  357: Options: utf
  358: First char = \x{c3}
  359: Need char = \x{bf}
  360: 
  361: /\x{D55c}\x{ad6d}\x{C5B4}/DZ8 
  362: ------------------------------------------------------------------
  363:         Bra
  364:         \x{d55c}\x{ad6d}\x{c5b4}
  365:         Ket
  366:         End
  367: ------------------------------------------------------------------
  368: Capturing subpattern count = 0
  369: Options: utf
  370: First char = \x{ed}
  371: Need char = \x{b4}
  372:     \x{D55c}\x{ad6d}\x{C5B4} 
  373:  0: \x{d55c}\x{ad6d}\x{c5b4}
  374: 
  375: /\x{65e5}\x{672c}\x{8a9e}/DZ8
  376: ------------------------------------------------------------------
  377:         Bra
  378:         \x{65e5}\x{672c}\x{8a9e}
  379:         Ket
  380:         End
  381: ------------------------------------------------------------------
  382: Capturing subpattern count = 0
  383: Options: utf
  384: First char = \x{e6}
  385: Need char = \x{9e}
  386:     \x{65e5}\x{672c}\x{8a9e}
  387:  0: \x{65e5}\x{672c}\x{8a9e}
  388: 
  389: /\x{80}/DZ8
  390: ------------------------------------------------------------------
  391:         Bra
  392:         \x{80}
  393:         Ket
  394:         End
  395: ------------------------------------------------------------------
  396: Capturing subpattern count = 0
  397: Options: utf
  398: First char = \x{c2}
  399: Need char = \x{80}
  400: 
  401: /\x{084}/DZ8
  402: ------------------------------------------------------------------
  403:         Bra
  404:         \x{84}
  405:         Ket
  406:         End
  407: ------------------------------------------------------------------
  408: Capturing subpattern count = 0
  409: Options: utf
  410: First char = \x{c2}
  411: Need char = \x{84}
  412: 
  413: /\x{104}/DZ8
  414: ------------------------------------------------------------------
  415:         Bra
  416:         \x{104}
  417:         Ket
  418:         End
  419: ------------------------------------------------------------------
  420: Capturing subpattern count = 0
  421: Options: utf
  422: First char = \x{c4}
  423: Need char = \x{84}
  424: 
  425: /\x{861}/DZ8
  426: ------------------------------------------------------------------
  427:         Bra
  428:         \x{861}
  429:         Ket
  430:         End
  431: ------------------------------------------------------------------
  432: Capturing subpattern count = 0
  433: Options: utf
  434: First char = \x{e0}
  435: Need char = \x{a1}
  436: 
  437: /\x{212ab}/DZ8
  438: ------------------------------------------------------------------
  439:         Bra
  440:         \x{212ab}
  441:         Ket
  442:         End
  443: ------------------------------------------------------------------
  444: Capturing subpattern count = 0
  445: Options: utf
  446: First char = \x{f0}
  447: Need char = \x{ab}
  448: 
  449: /-- This one is here not because it's different to Perl, but because the way
  450: the captured single-byte is displayed. (In Perl it becomes a character, and you
  451: can't tell the difference.) --/
  452:     
  453: /X(\C)(.*)/8
  454:     X\x{1234}
  455:  0: X\x{1234}
  456:  1: \x{e1}
  457:  2: \x{88}\x{b4}
  458:     X\nabc 
  459:  0: X\x{0a}abc
  460:  1: \x{0a}
  461:  2: abc
  462: 
  463: /-- This one is here because Perl gives out a grumbly error message (quite 
  464: correctly, but that messes up comparisons). --/
  465:     
  466: /a\Cb/8
  467:     *** Failers 
  468: No match
  469:     a\x{100}b 
  470: No match
  471:     
  472: /[^ab\xC0-\xF0]/8SDZ
  473: ------------------------------------------------------------------
  474:         Bra
  475:         [\x00-`c-\xbf\xf1-\xff] (neg)
  476:         Ket
  477:         End
  478: ------------------------------------------------------------------
  479: Capturing subpattern count = 0
  480: Options: utf
  481: No first char
  482: No need char
  483: Subject length lower bound = 1
  484: Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
  485:   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
  486:   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
  487:   5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
  488:   Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 
  489:   \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 
  490:   \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 
  491:   \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 
  492:   \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 
  493:   \xfe \xff 
  494:     \x{f1}
  495:  0: \x{f1}
  496:     \x{bf}
  497:  0: \x{bf}
  498:     \x{100}
  499:  0: \x{100}
  500:     \x{1000}   
  501:  0: \x{1000}
  502:     *** Failers
  503:  0: *
  504:     \x{c0} 
  505: No match
  506:     \x{f0} 
  507: No match
  508: 
  509: /Ā{3,4}/8SDZ
  510: ------------------------------------------------------------------
  511:         Bra
  512:         \x{100}{3}
  513:         \x{100}?+
  514:         Ket
  515:         End
  516: ------------------------------------------------------------------
  517: Capturing subpattern count = 0
  518: Options: utf
  519: First char = \x{c4}
  520: Need char = \x{80}
  521: Subject length lower bound = 3
  522: No set of starting bytes
  523:   \x{100}\x{100}\x{100}\x{100\x{100}
  524:  0: \x{100}\x{100}\x{100}
  525: 
  526: /(\x{100}+|x)/8SDZ
  527: ------------------------------------------------------------------
  528:         Bra
  529:         CBra 1
  530:         \x{100}++
  531:         Alt
  532:         x
  533:         Ket
  534:         Ket
  535:         End
  536: ------------------------------------------------------------------
  537: Capturing subpattern count = 1
  538: Options: utf
  539: No first char
  540: No need char
  541: Subject length lower bound = 1
  542: Starting byte set: x \xc4 
  543: 
  544: /(\x{100}*a|x)/8SDZ
  545: ------------------------------------------------------------------
  546:         Bra
  547:         CBra 1
  548:         \x{100}*+
  549:         a
  550:         Alt
  551:         x
  552:         Ket
  553:         Ket
  554:         End
  555: ------------------------------------------------------------------
  556: Capturing subpattern count = 1
  557: Options: utf
  558: No first char
  559: No need char
  560: Subject length lower bound = 1
  561: Starting byte set: a x \xc4 
  562: 
  563: /(\x{100}{0,2}a|x)/8SDZ
  564: ------------------------------------------------------------------
  565:         Bra
  566:         CBra 1
  567:         \x{100}{0,2}+
  568:         a
  569:         Alt
  570:         x
  571:         Ket
  572:         Ket
  573:         End
  574: ------------------------------------------------------------------
  575: Capturing subpattern count = 1
  576: Options: utf
  577: No first char
  578: No need char
  579: Subject length lower bound = 1
  580: Starting byte set: a x \xc4 
  581: 
  582: /(\x{100}{1,2}a|x)/8SDZ
  583: ------------------------------------------------------------------
  584:         Bra
  585:         CBra 1
  586:         \x{100}
  587:         \x{100}{0,1}+
  588:         a
  589:         Alt
  590:         x
  591:         Ket
  592:         Ket
  593:         End
  594: ------------------------------------------------------------------
  595: Capturing subpattern count = 1
  596: Options: utf
  597: No first char
  598: No need char
  599: Subject length lower bound = 1
  600: Starting byte set: x \xc4 
  601: 
  602: /\x{100}/8DZ
  603: ------------------------------------------------------------------
  604:         Bra
  605:         \x{100}
  606:         Ket
  607:         End
  608: ------------------------------------------------------------------
  609: Capturing subpattern count = 0
  610: Options: utf
  611: First char = \x{c4}
  612: Need char = \x{80}
  613: 
  614: /a\x{100}\x{101}*/8DZ
  615: ------------------------------------------------------------------
  616:         Bra
  617:         a\x{100}
  618:         \x{101}*+
  619:         Ket
  620:         End
  621: ------------------------------------------------------------------
  622: Capturing subpattern count = 0
  623: Options: utf
  624: First char = 'a'
  625: Need char = \x{80}
  626: 
  627: /a\x{100}\x{101}+/8DZ
  628: ------------------------------------------------------------------
  629:         Bra
  630:         a\x{100}
  631:         \x{101}++
  632:         Ket
  633:         End
  634: ------------------------------------------------------------------
  635: Capturing subpattern count = 0
  636: Options: utf
  637: First char = 'a'
  638: Need char = \x{81}
  639: 
  640: /[^\x{c4}]/DZ
  641: ------------------------------------------------------------------
  642:         Bra
  643:         [^\x{c4}]
  644:         Ket
  645:         End
  646: ------------------------------------------------------------------
  647: Capturing subpattern count = 0
  648: No options
  649: No first char
  650: No need char
  651: 
  652: /[\x{100}]/8DZ
  653: ------------------------------------------------------------------
  654:         Bra
  655:         \x{100}
  656:         Ket
  657:         End
  658: ------------------------------------------------------------------
  659: Capturing subpattern count = 0
  660: Options: utf
  661: First char = \x{c4}
  662: Need char = \x{80}
  663:     \x{100}
  664:  0: \x{100}
  665:     Z\x{100}
  666:  0: \x{100}
  667:     \x{100}Z
  668:  0: \x{100}
  669:     *** Failers 
  670: No match
  671: 
  672: /[\xff]/DZ8
  673: ------------------------------------------------------------------
  674:         Bra
  675:         \x{ff}
  676:         Ket
  677:         End
  678: ------------------------------------------------------------------
  679: Capturing subpattern count = 0
  680: Options: utf
  681: First char = \x{c3}
  682: Need char = \x{bf}
  683:     >\x{ff}<
  684:  0: \x{ff}
  685: 
  686: /[^\xff]/8DZ
  687: ------------------------------------------------------------------
  688:         Bra
  689:         [^\x{ff}]
  690:         Ket
  691:         End
  692: ------------------------------------------------------------------
  693: Capturing subpattern count = 0
  694: Options: utf
  695: No first char
  696: No need char
  697: 
  698: /\x{100}abc(xyz(?1))/8DZ
  699: ------------------------------------------------------------------
  700:         Bra
  701:         \x{100}abc
  702:         CBra 1
  703:         xyz
  704:         Recurse
  705:         Ket
  706:         Ket
  707:         End
  708: ------------------------------------------------------------------
  709: Capturing subpattern count = 1
  710: Options: utf
  711: First char = \x{c4}
  712: Need char = 'z'
  713: 
  714: /a\x{1234}b/P8
  715:     a\x{1234}b
  716:  0: a\x{1234}b
  717: 
  718: /\777/8I
  719: Capturing subpattern count = 0
  720: Options: utf
  721: First char = \x{c7}
  722: Need char = \x{bf}
  723:   \x{1ff}
  724:  0: \x{1ff}
  725:   \777 
  726:  0: \x{1ff}
  727:   
  728: /\x{100}+\x{200}/8DZ
  729: ------------------------------------------------------------------
  730:         Bra
  731:         \x{100}++
  732:         \x{200}
  733:         Ket
  734:         End
  735: ------------------------------------------------------------------
  736: Capturing subpattern count = 0
  737: Options: utf
  738: First char = \x{c4}
  739: Need char = \x{80}
  740: 
  741: /\x{100}+X/8DZ
  742: ------------------------------------------------------------------
  743:         Bra
  744:         \x{100}++
  745:         X
  746:         Ket
  747:         End
  748: ------------------------------------------------------------------
  749: Capturing subpattern count = 0
  750: Options: utf
  751: First char = \x{c4}
  752: Need char = 'X'
  753: 
  754: /^[\QĀ\E-\QŐ\E/BZ8
  755: Failed: missing terminating ] for character class at offset 15
  756: 
  757: /-- This tests the stricter UTF-8 check according to RFC 3629. --/ 
  758:     
  759: /X/8
  760:     \x{d800}
  761: Error -10 (bad UTF-8 string) offset=0 reason=14
  762:     \x{d800}\?
  763: No match
  764:     \x{da00}
  765: Error -10 (bad UTF-8 string) offset=0 reason=14
  766:     \x{da00}\?
  767: No match
  768:     \x{dfff}
  769: Error -10 (bad UTF-8 string) offset=0 reason=14
  770:     \x{dfff}\?
  771: No match
  772:     \x{110000}    
  773: Error -10 (bad UTF-8 string) offset=0 reason=13
  774:     \x{110000}\?    
  775: No match
  776:     \x{2000000} 
  777: Error -10 (bad UTF-8 string) offset=0 reason=11
  778:     \x{2000000}\? 
  779: No match
  780:     \x{7fffffff} 
  781: Error -10 (bad UTF-8 string) offset=0 reason=12
  782:     \x{7fffffff}\? 
  783: No match
  784: 
  785: /(*UTF8)\x{1234}/
  786:   abcd\x{1234}pqr
  787:  0: \x{1234}
  788: 
  789: /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
  790: Capturing subpattern count = 0
  791: Options: bsr_unicode utf
  792: Forced newline sequence: CRLF
  793: First char = 'a'
  794: Need char = 'b'
  795: 
  796: /\h/SI8
  797: Capturing subpattern count = 0
  798: Options: utf
  799: No first char
  800: No need char
  801: Subject length lower bound = 1
  802: Starting byte set: \x09 \x20 \xc2 \xe1 \xe2 \xe3 
  803:     ABC\x{09}
  804:  0: \x{09}
  805:     ABC\x{20}
  806:  0:  
  807:     ABC\x{a0}
  808:  0: \x{a0}
  809:     ABC\x{1680}
  810:  0: \x{1680}
  811:     ABC\x{180e}
  812:  0: \x{180e}
  813:     ABC\x{2000}
  814:  0: \x{2000}
  815:     ABC\x{202f} 
  816:  0: \x{202f}
  817:     ABC\x{205f} 
  818:  0: \x{205f}
  819:     ABC\x{3000} 
  820:  0: \x{3000}
  821: 
  822: /\v/SI8
  823: Capturing subpattern count = 0
  824: Options: utf
  825: No first char
  826: No need char
  827: Subject length lower bound = 1
  828: Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
  829:     ABC\x{0a}
  830:  0: \x{0a}
  831:     ABC\x{0b}
  832:  0: \x{0b}
  833:     ABC\x{0c}
  834:  0: \x{0c}
  835:     ABC\x{0d}
  836:  0: \x{0d}
  837:     ABC\x{85}
  838:  0: \x{85}
  839:     ABC\x{2028}
  840:  0: \x{2028}
  841: 
  842: /\h*A/SI8
  843: Capturing subpattern count = 0
  844: Options: utf
  845: No first char
  846: Need char = 'A'
  847: Subject length lower bound = 1
  848: Starting byte set: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 
  849:     CDBABC
  850:  0: A
  851:     
  852: /\v+A/SI8
  853: Capturing subpattern count = 0
  854: Options: utf
  855: No first char
  856: Need char = 'A'
  857: Subject length lower bound = 2
  858: Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
  859: 
  860: /\s?xxx\s/8SI
  861: Capturing subpattern count = 0
  862: Options: utf
  863: No first char
  864: Need char = 'x'
  865: Subject length lower bound = 4
  866: Starting byte set: \x09 \x0a \x0b \x0c \x0d \x20 x 
  867: 
  868: /\sxxx\s/I8ST1
  869: Capturing subpattern count = 0
  870: Options: utf
  871: No first char
  872: Need char = 'x'
  873: Subject length lower bound = 5
  874: Starting byte set: \x09 \x0a \x0c \x0d \x20 \xc2 
  875:     AB\x{85}xxx\x{a0}XYZ
  876:  0: \x{85}xxx\x{a0}
  877:     AB\x{a0}xxx\x{85}XYZ
  878:  0: \x{a0}xxx\x{85}
  879: 
  880: /\S \S/I8ST1
  881: Capturing subpattern count = 0
  882: Options: utf
  883: No first char
  884: Need char = ' '
  885: Subject length lower bound = 3
  886: Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e 
  887:   \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d 
  888:   \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ 
  889:   A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e 
  890:   f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 
  891:   \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 
  892:   \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 
  893:   \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 
  894:   \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
  895:     \x{a2} \x{84} 
  896:  0: \x{a2} \x{84}
  897:     A Z 
  898:  0: A Z
  899: 
  900: /a+/8
  901:     a\x{123}aa\>1
  902:  0: aa
  903:     a\x{123}aa\>2
  904: Error -11 (bad UTF-8 offset)
  905:     a\x{123}aa\>3
  906:  0: aa
  907:     a\x{123}aa\>4
  908:  0: a
  909:     a\x{123}aa\>5
  910: No match
  911:     a\x{123}aa\>6
  912: Error -24 (bad offset value)
  913: 
  914: /\x{1234}+/iS8I
  915: Capturing subpattern count = 0
  916: Options: caseless utf
  917: No first char
  918: No need char
  919: Subject length lower bound = 1
  920: Starting byte set: \xe1 
  921: 
  922: /\x{1234}+?/iS8I
  923: Capturing subpattern count = 0
  924: Options: caseless utf
  925: No first char
  926: No need char
  927: Subject length lower bound = 1
  928: Starting byte set: \xe1 
  929: 
  930: /\x{1234}++/iS8I
  931: Capturing subpattern count = 0
  932: Options: caseless utf
  933: No first char
  934: No need char
  935: Subject length lower bound = 1
  936: Starting byte set: \xe1 
  937: 
  938: /\x{1234}{2}/iS8I
  939: Capturing subpattern count = 0
  940: Options: caseless utf
  941: No first char
  942: No need char
  943: Subject length lower bound = 2
  944: Starting byte set: \xe1 
  945: 
  946: /[^\x{c4}]/8DZ
  947: ------------------------------------------------------------------
  948:         Bra
  949:         [^\x{c4}]
  950:         Ket
  951:         End
  952: ------------------------------------------------------------------
  953: Capturing subpattern count = 0
  954: Options: utf
  955: No first char
  956: No need char
  957: 
  958: /X+\x{200}/8DZ
  959: ------------------------------------------------------------------
  960:         Bra
  961:         X++
  962:         \x{200}
  963:         Ket
  964:         End
  965: ------------------------------------------------------------------
  966: Capturing subpattern count = 0
  967: Options: utf
  968: First char = 'X'
  969: Need char = \x{80}
  970: 
  971: /\R/SI8
  972: Capturing subpattern count = 0
  973: Options: utf
  974: No first char
  975: No need char
  976: Subject length lower bound = 1
  977: Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 
  978: 
  979: /\777/8DZ
  980: ------------------------------------------------------------------
  981:         Bra
  982:         \x{1ff}
  983:         Ket
  984:         End
  985: ------------------------------------------------------------------
  986: Capturing subpattern count = 0
  987: Options: utf
  988: First char = \x{c7}
  989: Need char = \x{bf}
  990: 
  991: /\w+\x{C4}/8BZ
  992: ------------------------------------------------------------------
  993:         Bra
  994:         \w++
  995:         \x{c4}
  996:         Ket
  997:         End
  998: ------------------------------------------------------------------
  999:     a\x{C4}\x{C4}
 1000:  0: a\x{c4}
 1001: 
 1002: /\w+\x{C4}/8BZT1
 1003: ------------------------------------------------------------------
 1004:         Bra
 1005:         \w+
 1006:         \x{c4}
 1007:         Ket
 1008:         End
 1009: ------------------------------------------------------------------
 1010:     a\x{C4}\x{C4}
 1011:  0: a\x{c4}\x{c4}
 1012:     
 1013: /\W+\x{C4}/8BZ
 1014: ------------------------------------------------------------------
 1015:         Bra
 1016:         \W+
 1017:         \x{c4}
 1018:         Ket
 1019:         End
 1020: ------------------------------------------------------------------
 1021:     !\x{C4}
 1022:  0: !\x{c4}
 1023:  
 1024: /\W+\x{C4}/8BZT1
 1025: ------------------------------------------------------------------
 1026:         Bra
 1027:         \W++
 1028:         \x{c4}
 1029:         Ket
 1030:         End
 1031: ------------------------------------------------------------------
 1032:     !\x{C4}
 1033:  0: !\x{c4}
 1034: 
 1035: /\W+\x{A1}/8BZ
 1036: ------------------------------------------------------------------
 1037:         Bra
 1038:         \W+
 1039:         \x{a1}
 1040:         Ket
 1041:         End
 1042: ------------------------------------------------------------------
 1043:     !\x{A1}
 1044:  0: !\x{a1}
 1045:  
 1046: /\W+\x{A1}/8BZT1
 1047: ------------------------------------------------------------------
 1048:         Bra
 1049:         \W+
 1050:         \x{a1}
 1051:         Ket
 1052:         End
 1053: ------------------------------------------------------------------
 1054:     !\x{A1}
 1055:  0: !\x{a1}
 1056: 
 1057: /X\s+\x{A0}/8BZ
 1058: ------------------------------------------------------------------
 1059:         Bra
 1060:         X
 1061:         \s++
 1062:         \x{a0}
 1063:         Ket
 1064:         End
 1065: ------------------------------------------------------------------
 1066:     X\x20\x{A0}\x{A0}
 1067:  0: X \x{a0}
 1068: 
 1069: /X\s+\x{A0}/8BZT1
 1070: ------------------------------------------------------------------
 1071:         Bra
 1072:         X
 1073:         \s+
 1074:         \x{a0}
 1075:         Ket
 1076:         End
 1077: ------------------------------------------------------------------
 1078:     X\x20\x{A0}\x{A0}
 1079:  0: X \x{a0}\x{a0}
 1080: 
 1081: /\S+\x{A0}/8BZ
 1082: ------------------------------------------------------------------
 1083:         Bra
 1084:         \S+
 1085:         \x{a0}
 1086:         Ket
 1087:         End
 1088: ------------------------------------------------------------------
 1089:     X\x{A0}\x{A0}
 1090:  0: X\x{a0}\x{a0}
 1091: 
 1092: /\S+\x{A0}/8BZT1
 1093: ------------------------------------------------------------------
 1094:         Bra
 1095:         \S++
 1096:         \x{a0}
 1097:         Ket
 1098:         End
 1099: ------------------------------------------------------------------
 1100:     X\x{A0}\x{A0}
 1101:  0: X\x{a0}
 1102: 
 1103: /\x{a0}+\s!/8BZ
 1104: ------------------------------------------------------------------
 1105:         Bra
 1106:         \x{a0}++
 1107:         \s
 1108:         !
 1109:         Ket
 1110:         End
 1111: ------------------------------------------------------------------
 1112:     \x{a0}\x20!
 1113:  0: \x{a0} !
 1114: 
 1115: /\x{a0}+\s!/8BZT1
 1116: ------------------------------------------------------------------
 1117:         Bra
 1118:         \x{a0}+
 1119:         \s
 1120:         !
 1121:         Ket
 1122:         End
 1123: ------------------------------------------------------------------
 1124:     \x{a0}\x20!
 1125:  0: \x{a0} !
 1126: 
 1127: /A/8
 1128:   \x{ff000041}
 1129: ** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8
 1130:   \x{7f000041} 
 1131: Error -10 (bad UTF-8 string) offset=0 reason=12
 1132: 
 1133: /(*UTF8)abc/9
 1134: Failed: setting UTF is disabled by the application at offset 0
 1135: 
 1136: /abc/89
 1137: Failed: setting UTF is disabled by the application at offset 0
 1138: 
 1139: /-- End of testinput15 --/

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>