File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / testdata / testinput15
Revision 1.1.1.5 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:46:05 2014 UTC (10 years ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, HEAD
pcre 8.34

    1: /-- This set of tests is for UTF-8 support but not Unicode property support,
    2:     and is relevant only to the 8-bit library. --/
    3: 
    4: < forbid W
    5: 
    6: /X(\C{3})/8
    7:     X\x{1234}
    8: 
    9: /X(\C{4})/8
   10:     X\x{1234}YZ
   11:     
   12: /X\C*/8
   13:     XYZabcdce
   14:     
   15: /X\C*?/8
   16:     XYZabcde
   17:     
   18: /X\C{3,5}/8
   19:     Xabcdefg   
   20:     X\x{1234} 
   21:     X\x{1234}YZ
   22:     X\x{1234}\x{512}  
   23:     X\x{1234}\x{512}YZ
   24: 
   25: /X\C{3,5}?/8
   26:     Xabcdefg   
   27:     X\x{1234} 
   28:     X\x{1234}YZ
   29:     X\x{1234}\x{512}  
   30: 
   31: /a\Cb/8
   32:     aXb
   33:     a\nb
   34:     
   35: /a\C\Cb/8 
   36:     a\x{100}b 
   37: 
   38: /ab\Cde/8
   39:     abXde
   40: 
   41: /a\C\Cb/8 
   42:     a\x{100}b
   43:     ** Failers 
   44:     a\x{12257}b
   45: 
   46: /[]/8
   47: 
   48: //8
   49: 
   50: /xxx/8
   51: 
   52: /xxx/8?DZSSO
   53: 
   54: /badutf/8
   55:     \xdf
   56:     \xef
   57:     \xef\x80
   58:     \xf7
   59:     \xf7\x80
   60:     \xf7\x80\x80
   61:     \xfb
   62:     \xfb\x80
   63:     \xfb\x80\x80
   64:     \xfb\x80\x80\x80
   65:     \xfd
   66:     \xfd\x80
   67:     \xfd\x80\x80
   68:     \xfd\x80\x80\x80
   69:     \xfd\x80\x80\x80\x80
   70:     \xdf\x7f
   71:     \xef\x7f\x80
   72:     \xef\x80\x7f
   73:     \xf7\x7f\x80\x80
   74:     \xf7\x80\x7f\x80
   75:     \xf7\x80\x80\x7f
   76:     \xfb\x7f\x80\x80\x80
   77:     \xfb\x80\x7f\x80\x80
   78:     \xfb\x80\x80\x7f\x80
   79:     \xfb\x80\x80\x80\x7f
   80:     \xfd\x7f\x80\x80\x80\x80
   81:     \xfd\x80\x7f\x80\x80\x80
   82:     \xfd\x80\x80\x7f\x80\x80
   83:     \xfd\x80\x80\x80\x7f\x80
   84:     \xfd\x80\x80\x80\x80\x7f
   85:     \xed\xa0\x80
   86:     \xc0\x8f
   87:     \xe0\x80\x8f
   88:     \xf0\x80\x80\x8f
   89:     \xf8\x80\x80\x80\x8f
   90:     \xfc\x80\x80\x80\x80\x8f
   91:     \x80
   92:     \xfe
   93:     \xff
   94: 
   95: /badutf/8
   96:     \xfb\x80\x80\x80\x80
   97:     \xfd\x80\x80\x80\x80\x80
   98:     \xf7\xbf\xbf\xbf
   99: 
  100: /shortutf/8
  101:     \P\P\xdf
  102:     \P\P\xef
  103:     \P\P\xef\x80
  104:     \P\P\xf7
  105:     \P\P\xf7\x80
  106:     \P\P\xf7\x80\x80
  107:     \P\P\xfb
  108:     \P\P\xfb\x80
  109:     \P\P\xfb\x80\x80
  110:     \P\P\xfb\x80\x80\x80
  111:     \P\P\xfd
  112:     \P\P\xfd\x80
  113:     \P\P\xfd\x80\x80
  114:     \P\P\xfd\x80\x80\x80
  115:     \P\P\xfd\x80\x80\x80\x80
  116: 
  117: /anything/8
  118:     \xc0\x80
  119:     \xc1\x8f 
  120:     \xe0\x9f\x80
  121:     \xf0\x8f\x80\x80 
  122:     \xf8\x87\x80\x80\x80  
  123:     \xfc\x83\x80\x80\x80\x80
  124:     \xfe\x80\x80\x80\x80\x80  
  125:     \xff\x80\x80\x80\x80\x80  
  126:     \xc3\x8f
  127:     \xe0\xaf\x80
  128:     \xe1\x80\x80
  129:     \xf0\x9f\x80\x80 
  130:     \xf1\x8f\x80\x80 
  131:     \xf8\x88\x80\x80\x80  
  132:     \xf9\x87\x80\x80\x80  
  133:     \xfc\x84\x80\x80\x80\x80
  134:     \xfd\x83\x80\x80\x80\x80
  135:     \?\xf8\x88\x80\x80\x80  
  136:     \?\xf9\x87\x80\x80\x80  
  137:     \?\xfc\x84\x80\x80\x80\x80
  138:     \?\xfd\x83\x80\x80\x80\x80
  139: 
  140: /\x{100}/8DZ
  141: 
  142: /\x{1000}/8DZ
  143: 
  144: /\x{10000}/8DZ
  145: 
  146: /\x{100000}/8DZ
  147: 
  148: /\x{10ffff}/8DZ
  149: 
  150: /[\x{ff}]/8DZ
  151: 
  152: /[\x{100}]/8DZ
  153: 
  154: /\x80/8DZ
  155: 
  156: /\xff/8DZ
  157: 
  158: /\x{D55c}\x{ad6d}\x{C5B4}/DZ8 
  159:     \x{D55c}\x{ad6d}\x{C5B4} 
  160: 
  161: /\x{65e5}\x{672c}\x{8a9e}/DZ8
  162:     \x{65e5}\x{672c}\x{8a9e}
  163: 
  164: /\x{80}/DZ8
  165: 
  166: /\x{084}/DZ8
  167: 
  168: /\x{104}/DZ8
  169: 
  170: /\x{861}/DZ8
  171: 
  172: /\x{212ab}/DZ8
  173: 
  174: /-- This one is here not because it's different to Perl, but because the way
  175: the captured single-byte is displayed. (In Perl it becomes a character, and you
  176: can't tell the difference.) --/
  177:     
  178: /X(\C)(.*)/8
  179:     X\x{1234}
  180:     X\nabc 
  181: 
  182: /-- This one is here because Perl gives out a grumbly error message (quite 
  183: correctly, but that messes up comparisons). --/
  184:     
  185: /a\Cb/8
  186:     *** Failers 
  187:     a\x{100}b 
  188:     
  189: /[^ab\xC0-\xF0]/8SDZ
  190:     \x{f1}
  191:     \x{bf}
  192:     \x{100}
  193:     \x{1000}   
  194:     *** Failers
  195:     \x{c0} 
  196:     \x{f0} 
  197: 
  198: /Ā{3,4}/8SDZ
  199:   \x{100}\x{100}\x{100}\x{100\x{100}
  200: 
  201: /(\x{100}+|x)/8SDZ
  202: 
  203: /(\x{100}*a|x)/8SDZ
  204: 
  205: /(\x{100}{0,2}a|x)/8SDZ
  206: 
  207: /(\x{100}{1,2}a|x)/8SDZ
  208: 
  209: /\x{100}/8DZ
  210: 
  211: /a\x{100}\x{101}*/8DZ
  212: 
  213: /a\x{100}\x{101}+/8DZ
  214: 
  215: /[^\x{c4}]/DZ
  216: 
  217: /[\x{100}]/8DZ
  218:     \x{100}
  219:     Z\x{100}
  220:     \x{100}Z
  221:     *** Failers 
  222: 
  223: /[\xff]/DZ8
  224:     >\x{ff}<
  225: 
  226: /[^\xff]/8DZ
  227: 
  228: /\x{100}abc(xyz(?1))/8DZ
  229: 
  230: /a\x{1234}b/P8
  231:     a\x{1234}b
  232: 
  233: /\777/8I
  234:   \x{1ff}
  235:   \777 
  236:   
  237: /\x{100}+\x{200}/8DZ
  238: 
  239: /\x{100}+X/8DZ
  240: 
  241: /^[\QĀ\E-\QŐ\E/BZ8
  242: 
  243: /-- This tests the stricter UTF-8 check according to RFC 3629. --/ 
  244:     
  245: /X/8
  246:     \x{d800}
  247:     \x{d800}\?
  248:     \x{da00}
  249:     \x{da00}\?
  250:     \x{dfff}
  251:     \x{dfff}\?
  252:     \x{110000}    
  253:     \x{110000}\?    
  254:     \x{2000000} 
  255:     \x{2000000}\? 
  256:     \x{7fffffff} 
  257:     \x{7fffffff}\? 
  258: 
  259: /(*UTF8)\x{1234}/
  260:   abcd\x{1234}pqr
  261: 
  262: /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
  263: 
  264: /\h/SI8
  265:     ABC\x{09}
  266:     ABC\x{20}
  267:     ABC\x{a0}
  268:     ABC\x{1680}
  269:     ABC\x{180e}
  270:     ABC\x{2000}
  271:     ABC\x{202f} 
  272:     ABC\x{205f} 
  273:     ABC\x{3000} 
  274: 
  275: /\v/SI8
  276:     ABC\x{0a}
  277:     ABC\x{0b}
  278:     ABC\x{0c}
  279:     ABC\x{0d}
  280:     ABC\x{85}
  281:     ABC\x{2028}
  282: 
  283: /\h*A/SI8
  284:     CDBABC
  285:     
  286: /\v+A/SI8
  287: 
  288: /\s?xxx\s/8SI
  289: 
  290: /\sxxx\s/I8ST1
  291:     AB\x{85}xxx\x{a0}XYZ
  292:     AB\x{a0}xxx\x{85}XYZ
  293: 
  294: /\S \S/I8ST1
  295:     \x{a2} \x{84} 
  296:     A Z 
  297: 
  298: /a+/8
  299:     a\x{123}aa\>1
  300:     a\x{123}aa\>2
  301:     a\x{123}aa\>3
  302:     a\x{123}aa\>4
  303:     a\x{123}aa\>5
  304:     a\x{123}aa\>6
  305: 
  306: /\x{1234}+/iS8I
  307: 
  308: /\x{1234}+?/iS8I
  309: 
  310: /\x{1234}++/iS8I
  311: 
  312: /\x{1234}{2}/iS8I
  313: 
  314: /[^\x{c4}]/8DZ
  315: 
  316: /X+\x{200}/8DZ
  317: 
  318: /\R/SI8
  319: 
  320: /\777/8DZ
  321: 
  322: /\w+\x{C4}/8BZ
  323:     a\x{C4}\x{C4}
  324: 
  325: /\w+\x{C4}/8BZT1
  326:     a\x{C4}\x{C4}
  327:     
  328: /\W+\x{C4}/8BZ
  329:     !\x{C4}
  330:  
  331: /\W+\x{C4}/8BZT1
  332:     !\x{C4}
  333: 
  334: /\W+\x{A1}/8BZ
  335:     !\x{A1}
  336:  
  337: /\W+\x{A1}/8BZT1
  338:     !\x{A1}
  339: 
  340: /X\s+\x{A0}/8BZ
  341:     X\x20\x{A0}\x{A0}
  342: 
  343: /X\s+\x{A0}/8BZT1
  344:     X\x20\x{A0}\x{A0}
  345: 
  346: /\S+\x{A0}/8BZ
  347:     X\x{A0}\x{A0}
  348: 
  349: /\S+\x{A0}/8BZT1
  350:     X\x{A0}\x{A0}
  351: 
  352: /\x{a0}+\s!/8BZ
  353:     \x{a0}\x20!
  354: 
  355: /\x{a0}+\s!/8BZT1
  356:     \x{a0}\x20!
  357: 
  358: /A/8
  359:   \x{ff000041}
  360:   \x{7f000041} 
  361: 
  362: /(*UTF8)abc/9
  363: 
  364: /abc/89
  365: 
  366: /-- End of testinput15 --/

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>