Annotation of embedaddon/pcre/testdata/testinput13, revision 1.1.1.1
1.1 misho 1: /-- These tests for Unicode property support test PCRE's API and show some of
2: the compiled code. They are not Perl-compatible. --/
3:
4: /[\p{L}]/DZ
5:
6: /[\p{^L}]/DZ
7:
8: /[\P{L}]/DZ
9:
10: /[\P{^L}]/DZ
11:
12: /[abc\p{L}\x{0660}]/8DZ
13:
14: /[\p{Nd}]/8DZ
15: 1234
16:
17: /[\p{Nd}+-]+/8DZ
18: 1234
19: 12-34
20: 12+\x{661}-34
21: ** Failers
22: abcd
23:
24: /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ
25:
26: /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ
27:
28: /AB\x{1fb0}/8DZ
29:
30: /AB\x{1fb0}/8DZi
31:
32: /[\x{105}-\x{109}]/8iDZ
33: \x{104}
34: \x{105}
35: \x{109}
36: ** Failers
37: \x{100}
38: \x{10a}
39:
40: /[z-\x{100}]/8iDZ
41: Z
42: z
43: \x{39c}
44: \x{178}
45: |
46: \x{80}
47: \x{ff}
48: \x{100}
49: \x{101}
50: ** Failers
51: \x{102}
52: Y
53: y
54:
55: /[z-\x{100}]/8DZi
56:
57: /(?:[\PPa*]*){8,}/
58:
59: /[\P{Any}]/BZ
60:
61: /[\P{Any}\E]/BZ
62:
63: /(\P{Yi}+\277)/
64:
65: /(\P{Yi}+\277)?/
66:
67: /(?<=\P{Yi}{3}A)X/
68:
69: /\p{Yi}+(\P{Yi}+)(?1)/
70:
71: /(\P{Yi}{2}\277)?/
72:
73: /[\P{Yi}A]/
74:
75: /[\P{Yi}\P{Yi}\P{Yi}A]/
76:
77: /[^\P{Yi}A]/
78:
79: /[^\P{Yi}\P{Yi}\P{Yi}A]/
80:
81: /(\P{Yi}*\277)*/
82:
83: /(\P{Yi}*?\277)*/
84:
85: /(\p{Yi}*+\277)*/
86:
87: /(\P{Yi}?\277)*/
88:
89: /(\P{Yi}??\277)*/
90:
91: /(\p{Yi}?+\277)*/
92:
93: /(\P{Yi}{0,3}\277)*/
94:
95: /(\P{Yi}{0,3}?\277)*/
96:
97: /(\p{Yi}{0,3}+\277)*/
98:
99: /\p{Zl}{2,3}+/8BZ
100: \xe2\x80\xa8\xe2\x80\xa8
101: \x{2028}\x{2028}\x{2028}
102:
103: /\p{Zl}/8BZ
104:
105: /\p{Lu}{3}+/8BZ
106:
107: /\pL{2}+/8BZ
108:
109: /\p{Cc}{2}+/8BZ
110:
111: /^\p{Cs}/8
112: \?\x{dfff}
113: ** Failers
114: \x{09f}
115:
116: /^\p{Sc}+/8
117: $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
118: \x{9f2}
119: ** Failers
120: X
121: \x{2c2}
122:
123: /^\p{Zs}/8
124: \ \
125: \x{a0}
126: \x{1680}
127: \x{180e}
128: \x{2000}
129: \x{2001}
130: ** Failers
131: \x{2028}
132: \x{200d}
133:
134: /-- These four are here rather than in test 6 because Perl has problems with
135: the negative versions of the properties. --/
136:
137: /\p{^Lu}/8i
138: 1234
139: ** Failers
140: ABC
141:
142: /\P{Lu}/8i
143: 1234
144: ** Failers
145: ABC
146:
147: /\p{Ll}/8i
148: a
149: Az
150: ** Failers
151: ABC
152:
153: /\p{Lu}/8i
154: A
155: a\x{10a0}B
156: ** Failers
157: a
158: \x{1d00}
159:
160: /[\x{c0}\x{391}]/8i
161: \x{c0}
162: \x{e0}
163:
164: /-- The next two are special cases where the lengths of the different cases of
165: the same character differ. The first went wrong with heap frame storage; the
166: second was broken in all cases. --/
167:
168: /^\x{023a}+?(\x{0130}+)/8i
169: \x{023a}\x{2c65}\x{0130}
170:
171: /^\x{023a}+([^X])/8i
172: \x{023a}\x{2c65}X
173:
174: /\x{c0}+\x{116}+/8i
175: \x{c0}\x{e0}\x{116}\x{117}
176:
177: /[\x{c0}\x{116}]+/8i
178: \x{c0}\x{e0}\x{116}\x{117}
179:
180: /(\x{de})\1/8i
181: \x{de}\x{de}
182: \x{de}\x{fe}
183: \x{fe}\x{fe}
184: \x{fe}\x{de}
185:
186: /^\x{c0}$/8i
187: \x{c0}
188: \x{e0}
189:
190: /^\x{e0}$/8i
191: \x{c0}
192: \x{e0}
193:
194: /-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
195: will match it only with UCP support, because without that it has no notion
196: of case for anything other than the ASCII letters. --/
197:
198: /((?i)[\x{c0}])/8
199: \x{c0}
200: \x{e0}
201:
202: /(?i:[\x{c0}])/8
203: \x{c0}
204: \x{e0}
205:
206: /-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8
207:
208: /^\X/8
209: A
210: A\x{300}BC
211: A\x{300}\x{301}\x{302}BC
212: *** Failers
213: \x{300}
214:
215: /-- These are PCRE's extra properties to help with Unicodizing \d etc. --/
216:
217: /^\p{Xan}/8
218: ABCD
219: 1234
220: \x{6ca}
221: \x{a6c}
222: \x{10a7}
223: ** Failers
224: _ABC
225:
226: /^\p{Xan}+/8
227: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
228: ** Failers
229: _ABC
230:
231: /^\p{Xan}+?/8
232: \x{6ca}\x{a6c}\x{10a7}_
233:
234: /^\p{Xan}*/8
235: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
236:
237: /^\p{Xan}{2,9}/8
238: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
239:
240: /^\p{Xan}{2,9}?/8
241: \x{6ca}\x{a6c}\x{10a7}_
242:
243: /^[\p{Xan}]/8
244: ABCD1234_
245: 1234abcd_
246: \x{6ca}
247: \x{a6c}
248: \x{10a7}
249: ** Failers
250: _ABC
251:
252: /^[\p{Xan}]+/8
253: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
254: ** Failers
255: _ABC
256:
257: /^>\p{Xsp}/8
258: >\x{1680}\x{2028}\x{0b}
259: >\x{a0}
260: ** Failers
261: \x{0b}
262:
263: /^>\p{Xsp}+/8
264: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
265:
266: /^>\p{Xsp}+?/8
267: >\x{1680}\x{2028}\x{0b}
268:
269: /^>\p{Xsp}*/8
270: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
271:
272: /^>\p{Xsp}{2,9}/8
273: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
274:
275: /^>\p{Xsp}{2,9}?/8
276: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
277:
278: /^>[\p{Xsp}]/8
279: >\x{2028}\x{0b}
280:
281: /^>[\p{Xsp}]+/8
282: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
283:
284: /^>\p{Xps}/8
285: >\x{1680}\x{2028}\x{0b}
286: >\x{a0}
287: ** Failers
288: \x{0b}
289:
290: /^>\p{Xps}+/8
291: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
292:
293: /^>\p{Xps}+?/8
294: >\x{1680}\x{2028}\x{0b}
295:
296: /^>\p{Xps}*/8
297: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
298:
299: /^>\p{Xps}{2,9}/8
300: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
301:
302: /^>\p{Xps}{2,9}?/8
303: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
304:
305: /^>[\p{Xps}]/8
306: >\x{2028}\x{0b}
307:
308: /^>[\p{Xps}]+/8
309: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
310:
311: /^\p{Xwd}/8
312: ABCD
313: 1234
314: \x{6ca}
315: \x{a6c}
316: \x{10a7}
317: _ABC
318: ** Failers
319: []
320:
321: /^\p{Xwd}+/8
322: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
323:
324: /^\p{Xwd}+?/8
325: \x{6ca}\x{a6c}\x{10a7}_
326:
327: /^\p{Xwd}*/8
328: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
329:
330: /^\p{Xwd}{2,9}/8
331: A_B12\x{6ca}\x{a6c}\x{10a7}
332:
333: /^\p{Xwd}{2,9}?/8
334: \x{6ca}\x{a6c}\x{10a7}_
335:
336: /^[\p{Xwd}]/8
337: ABCD1234_
338: 1234abcd_
339: \x{6ca}
340: \x{a6c}
341: \x{10a7}
342: _ABC
343: ** Failers
344: []
345:
346: /^[\p{Xwd}]+/8
347: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
348:
349: /-- A check not in UTF-8 mode --/
350:
351: /^[\p{Xwd}]+/
352: ABCD1234_
353:
354: /-- Some negative checks --/
355:
356: /^[\P{Xwd}]+/8
357: !.+\x{019}\x{35a}AB
358:
359: /^[\p{^Xwd}]+/8
360: !.+\x{019}\x{35a}AB
361:
362: /[\D]/WBZ8
363: 1\x{3c8}2
364:
365: /[\d]/WBZ8
366: >\x{6f4}<
367:
368: /[\S]/WBZ8
369: \x{1680}\x{6f4}\x{1680}
370:
371: /[\s]/WBZ8
372: >\x{1680}<
373:
374: /[\W]/WBZ8
375: A\x{1712}B
376:
377: /[\w]/WBZ8
378: >\x{1723}<
379:
380: /\D/WBZ8
381: 1\x{3c8}2
382:
383: /\d/WBZ8
384: >\x{6f4}<
385:
386: /\S/WBZ8
387: \x{1680}\x{6f4}\x{1680}
388:
389: /\s/WBZ8
390: >\x{1680}>
391:
392: /\W/WBZ8
393: A\x{1712}B
394:
395: /\w/WBZ8
396: >\x{1723}<
397:
398: /[[:alpha:]]/WBZ
399:
400: /[[:lower:]]/WBZ
401:
402: /[[:upper:]]/WBZ
403:
404: /[[:alnum:]]/WBZ
405:
406: /[[:ascii:]]/WBZ
407:
408: /[[:blank:]]/WBZ
409:
410: /[[:cntrl:]]/WBZ
411:
412: /[[:digit:]]/WBZ
413:
414: /[[:graph:]]/WBZ
415:
416: /[[:print:]]/WBZ
417:
418: /[[:punct:]]/WBZ
419:
420: /[[:space:]]/WBZ
421:
422: /[[:word:]]/WBZ
423:
424: /[[:xdigit:]]/WBZ
425:
426: /-- Unicode properties for \b abd \B --/
427:
428: /\b...\B/8W
429: abc_
430: \x{37e}abc\x{376}
431: \x{37e}\x{376}\x{371}\x{393}\x{394}
432: !\x{c0}++\x{c1}\x{c2}
433: !\x{c0}+++++
434:
435: /-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/
436:
437: /\b...\B/8
438: abc_
439: ** Failers
440: \x{37e}abc\x{376}
441: \x{37e}\x{376}\x{371}\x{393}\x{394}
442: !\x{c0}++\x{c1}\x{c2}
443: !\x{c0}+++++
444:
445: /-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/
446:
447: /\b...\B/W
448: abc_
449: !\x{c0}++\x{c1}\x{c2}
450: !\x{c0}+++++
451:
452: /-- POSIX interface --/
453:
454: /\w/P
455: +++\x{c2}
456:
457: /\w/WP
458: +++\x{c2}
459:
460: /-- Some of these are silly, but they check various combinations --/
461:
462: /[[:^alpha:][:^cntrl:]]+/8WBZ
463: 123
464: abc
465:
466: /[[:^cntrl:][:^alpha:]]+/8WBZ
467: 123
468: abc
469:
470: /[[:alpha:]]+/8WBZ
471: abc
472:
473: /[[:^alpha:]\S]+/8WBZ
474: 123
475: abc
476:
477: /[^\d]+/8WBZ
478: abc123
479: abc\x{123}
480: \x{660}abc
481:
482: /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/8iSI
483: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
484: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
485:
486: /\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ
487:
488: /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ
489:
490: /\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ
491:
492: /\p{Han}+X\p{Greek}+\x{370}/BZ8
493:
494: /\p{Xan}+!\p{Xan}+A/BZ
495:
496: /\p{Xsp}+!\p{Xsp}\t/BZ
497:
498: /\p{Xps}+!\p{Xps}\t/BZ
499:
500: /\p{Xwd}+!\p{Xwd}_/BZ
501:
502: /A+\p{N}A+\dB+\p{N}*B+\d*/WBZ
503:
504: /-- These behaved oddly in Perl, so they are kept in this test --/
505:
506: /(\x{23a}\x{23a}\x{23a})?\1/8i
507: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
508:
509: /(ȺȺȺ)?\1/8i
510: ȺȺȺⱥⱥ
511:
512: /(\x{23a}\x{23a}\x{23a})?\1/8i
513: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
514:
515: /(ȺȺȺ)?\1/8i
516: ȺȺȺⱥⱥⱥ
517:
518: /(\x{23a}\x{23a}\x{23a})\1/8i
519: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
520:
521: /(ȺȺȺ)\1/8i
522: ȺȺȺⱥⱥ
523:
524: /(\x{23a}\x{23a}\x{23a})\1/8i
525: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
526:
527: /(ȺȺȺ)\1/8i
528: ȺȺȺⱥⱥⱥ
529:
530: /(\x{2c65}\x{2c65})\1/8i
531: \x{2c65}\x{2c65}\x{23a}\x{23a}
532:
533: /(ⱥⱥ)\1/8i
534: ⱥⱥȺȺ
535:
536: /(\x{23a}\x{23a}\x{23a})\1Y/8i
537: X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
538:
539: /(\x{2c65}\x{2c65})\1Y/8i
540: X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
541:
542: /-- --/
543:
544: /-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/
545:
546: /^[\p{Batak}]/8
547: \x{1bc0}
548: \x{1bff}
549: ** Failers
550: \x{1bf4}
551:
552: /^[\p{Brahmi}]/8
553: \x{11000}
554: \x{1106f}
555: ** Failers
556: \x{1104e}
557:
558: /^[\p{Mandaic}]/8
559: \x{840}
560: \x{85e}
561: ** Failers
562: \x{85c}
563: \x{85d}
564:
565: /-- --/
566:
567: /(\X*)(.)/s8
568: A\x{300}
569:
570: /^S(\X*)e(\X*)$/8
571: Stéréo
572:
573: /^\X/8
574: ́réo
575:
576: /^a\X41z/<JS>
577: aX41z
578: *** Failers
579: aAz
580:
581: /(?<=ab\Cde)X/8
582:
583: /-- End of testinput13 --/
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>