1: /-- These tests for Unicode property support test PCRE's API and show some of
2: the compiled code. They are not Perl-compatible. --/
3:
4: /[\p{L}]/DZ
5: ------------------------------------------------------------------
6: Bra
7: [\p{L}]
8: Ket
9: End
10: ------------------------------------------------------------------
11: Capturing subpattern count = 0
12: No options
13: No first char
14: No need char
15:
16: /[\p{^L}]/DZ
17: ------------------------------------------------------------------
18: Bra
19: [\P{L}]
20: Ket
21: End
22: ------------------------------------------------------------------
23: Capturing subpattern count = 0
24: No options
25: No first char
26: No need char
27:
28: /[\P{L}]/DZ
29: ------------------------------------------------------------------
30: Bra
31: [\P{L}]
32: Ket
33: End
34: ------------------------------------------------------------------
35: Capturing subpattern count = 0
36: No options
37: No first char
38: No need char
39:
40: /[\P{^L}]/DZ
41: ------------------------------------------------------------------
42: Bra
43: [\p{L}]
44: Ket
45: End
46: ------------------------------------------------------------------
47: Capturing subpattern count = 0
48: No options
49: No first char
50: No need char
51:
52: /[abc\p{L}\x{0660}]/8DZ
53: ------------------------------------------------------------------
54: Bra
55: [a-c\p{L}\x{660}]
56: Ket
57: End
58: ------------------------------------------------------------------
59: Capturing subpattern count = 0
60: Options: utf
61: No first char
62: No need char
63:
64: /[\p{Nd}]/8DZ
65: ------------------------------------------------------------------
66: Bra
67: [\p{Nd}]
68: Ket
69: End
70: ------------------------------------------------------------------
71: Capturing subpattern count = 0
72: Options: utf
73: No first char
74: No need char
75: 1234
76: 0: 1
77:
78: /[\p{Nd}+-]+/8DZ
79: ------------------------------------------------------------------
80: Bra
81: [+\-\p{Nd}]+
82: Ket
83: End
84: ------------------------------------------------------------------
85: Capturing subpattern count = 0
86: Options: utf
87: No first char
88: No need char
89: 1234
90: 0: 1234
91: 12-34
92: 0: 12-34
93: 12+\x{661}-34
94: 0: 12+\x{661}-34
95: ** Failers
96: No match
97: abcd
98: No match
99:
100: /[\x{105}-\x{109}]/8iDZ
101: ------------------------------------------------------------------
102: Bra
103: [\x{104}-\x{109}]
104: Ket
105: End
106: ------------------------------------------------------------------
107: Capturing subpattern count = 0
108: Options: caseless utf
109: No first char
110: No need char
111: \x{104}
112: 0: \x{104}
113: \x{105}
114: 0: \x{105}
115: \x{109}
116: 0: \x{109}
117: ** Failers
118: No match
119: \x{100}
120: No match
121: \x{10a}
122: No match
123:
124: /[z-\x{100}]/8iDZ
125: ------------------------------------------------------------------
126: Bra
127: [Z\x{39c}\x{178}z-\x{101}]
128: Ket
129: End
130: ------------------------------------------------------------------
131: Capturing subpattern count = 0
132: Options: caseless utf
133: No first char
134: No need char
135: Z
136: 0: Z
137: z
138: 0: z
139: \x{39c}
140: 0: \x{39c}
141: \x{178}
142: 0: \x{178}
143: |
144: 0: |
145: \x{80}
146: 0: \x{80}
147: \x{ff}
148: 0: \x{ff}
149: \x{100}
150: 0: \x{100}
151: \x{101}
152: 0: \x{101}
153: ** Failers
154: No match
155: \x{102}
156: No match
157: Y
158: No match
159: y
160: No match
161:
162: /[z-\x{100}]/8DZi
163: ------------------------------------------------------------------
164: Bra
165: [Z\x{39c}\x{178}z-\x{101}]
166: Ket
167: End
168: ------------------------------------------------------------------
169: Capturing subpattern count = 0
170: Options: caseless utf
171: No first char
172: No need char
173:
174: /(?:[\PPa*]*){8,}/
175:
176: /[\P{Any}]/BZ
177: ------------------------------------------------------------------
178: Bra
179: [\P{Any}]
180: Ket
181: End
182: ------------------------------------------------------------------
183:
184: /[\P{Any}\E]/BZ
185: ------------------------------------------------------------------
186: Bra
187: [\P{Any}]
188: Ket
189: End
190: ------------------------------------------------------------------
191:
192: /(\P{Yi}+\277)/
193:
194: /(\P{Yi}+\277)?/
195:
196: /(?<=\P{Yi}{3}A)X/
197:
198: /\p{Yi}+(\P{Yi}+)(?1)/
199:
200: /(\P{Yi}{2}\277)?/
201:
202: /[\P{Yi}A]/
203:
204: /[\P{Yi}\P{Yi}\P{Yi}A]/
205:
206: /[^\P{Yi}A]/
207:
208: /[^\P{Yi}\P{Yi}\P{Yi}A]/
209:
210: /(\P{Yi}*\277)*/
211:
212: /(\P{Yi}*?\277)*/
213:
214: /(\p{Yi}*+\277)*/
215:
216: /(\P{Yi}?\277)*/
217:
218: /(\P{Yi}??\277)*/
219:
220: /(\p{Yi}?+\277)*/
221:
222: /(\P{Yi}{0,3}\277)*/
223:
224: /(\P{Yi}{0,3}?\277)*/
225:
226: /(\p{Yi}{0,3}+\277)*/
227:
228: /\p{Zl}{2,3}+/8BZ
229: ------------------------------------------------------------------
230: Bra
231: prop Zl {2}
232: prop Zl ?+
233: Ket
234: End
235: ------------------------------------------------------------------
236: \xe2\x80\xa8\xe2\x80\xa8
237: 0: \x{2028}\x{2028}
238: \x{2028}\x{2028}\x{2028}
239: 0: \x{2028}\x{2028}\x{2028}
240:
241: /\p{Zl}/8BZ
242: ------------------------------------------------------------------
243: Bra
244: prop Zl
245: Ket
246: End
247: ------------------------------------------------------------------
248:
249: /\p{Lu}{3}+/8BZ
250: ------------------------------------------------------------------
251: Bra
252: prop Lu {3}
253: Ket
254: End
255: ------------------------------------------------------------------
256:
257: /\pL{2}+/8BZ
258: ------------------------------------------------------------------
259: Bra
260: prop L {2}
261: Ket
262: End
263: ------------------------------------------------------------------
264:
265: /\p{Cc}{2}+/8BZ
266: ------------------------------------------------------------------
267: Bra
268: prop Cc {2}
269: Ket
270: End
271: ------------------------------------------------------------------
272:
273: /^\p{Cs}/8
274: \?\x{dfff}
275: 0: \x{dfff}
276: ** Failers
277: No match
278: \x{09f}
279: No match
280:
281: /^\p{Sc}+/8
282: $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
283: 0: $\x{a2}\x{a3}\x{a4}\x{a5}
284: \x{9f2}
285: 0: \x{9f2}
286: ** Failers
287: No match
288: X
289: No match
290: \x{2c2}
291: No match
292:
293: /^\p{Zs}/8
294: \ \
295: 0:
296: \x{a0}
297: 0: \x{a0}
298: \x{1680}
299: 0: \x{1680}
300: \x{180e}
301: 0: \x{180e}
302: \x{2000}
303: 0: \x{2000}
304: \x{2001}
305: 0: \x{2001}
306: ** Failers
307: No match
308: \x{2028}
309: No match
310: \x{200d}
311: No match
312:
313: /-- These four are here rather than in test 6 because Perl has problems with
314: the negative versions of the properties. --/
315:
316: /\p{^Lu}/8i
317: 1234
318: 0: 1
319: ** Failers
320: 0: *
321: ABC
322: No match
323:
324: /\P{Lu}/8i
325: 1234
326: 0: 1
327: ** Failers
328: 0: *
329: ABC
330: No match
331:
332: /\p{Ll}/8i
333: a
334: 0: a
335: Az
336: 0: z
337: ** Failers
338: 0: a
339: ABC
340: No match
341:
342: /\p{Lu}/8i
343: A
344: 0: A
345: a\x{10a0}B
346: 0: \x{10a0}
347: ** Failers
348: 0: F
349: a
350: No match
351: \x{1d00}
352: No match
353:
354: /[\x{c0}\x{391}]/8i
355: \x{c0}
356: 0: \x{c0}
357: \x{e0}
358: 0: \x{e0}
359:
360: /-- The next two are special cases where the lengths of the different cases of
361: the same character differ. The first went wrong with heap frame storage; the
362: second was broken in all cases. --/
363:
364: /^\x{023a}+?(\x{0130}+)/8i
365: \x{023a}\x{2c65}\x{0130}
366: 0: \x{23a}\x{2c65}\x{130}
367: 1: \x{130}
368:
369: /^\x{023a}+([^X])/8i
370: \x{023a}\x{2c65}X
371: 0: \x{23a}\x{2c65}
372: 1: \x{2c65}
373:
374: /\x{c0}+\x{116}+/8i
375: \x{c0}\x{e0}\x{116}\x{117}
376: 0: \x{c0}\x{e0}\x{116}\x{117}
377:
378: /[\x{c0}\x{116}]+/8i
379: \x{c0}\x{e0}\x{116}\x{117}
380: 0: \x{c0}\x{e0}\x{116}\x{117}
381:
382: /(\x{de})\1/8i
383: \x{de}\x{de}
384: 0: \x{de}\x{de}
385: 1: \x{de}
386: \x{de}\x{fe}
387: 0: \x{de}\x{fe}
388: 1: \x{de}
389: \x{fe}\x{fe}
390: 0: \x{fe}\x{fe}
391: 1: \x{fe}
392: \x{fe}\x{de}
393: 0: \x{fe}\x{de}
394: 1: \x{fe}
395:
396: /^\x{c0}$/8i
397: \x{c0}
398: 0: \x{c0}
399: \x{e0}
400: 0: \x{e0}
401:
402: /^\x{e0}$/8i
403: \x{c0}
404: 0: \x{c0}
405: \x{e0}
406: 0: \x{e0}
407:
408: /-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
409: will match it only with UCP support, because without that it has no notion
410: of case for anything other than the ASCII letters. --/
411:
412: /((?i)[\x{c0}])/8
413: \x{c0}
414: 0: \x{c0}
415: 1: \x{c0}
416: \x{e0}
417: 0: \x{e0}
418: 1: \x{e0}
419:
420: /(?i:[\x{c0}])/8
421: \x{c0}
422: 0: \x{c0}
423: \x{e0}
424: 0: \x{e0}
425:
426: /-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8
427:
428: /^\X/8
429: A
430: 0: A
431: A\x{300}BC
432: 0: A\x{300}
433: A\x{300}\x{301}\x{302}BC
434: 0: A\x{300}\x{301}\x{302}
435: *** Failers
436: 0: *
437: \x{300}
438: No match
439:
440: /-- These are PCRE's extra properties to help with Unicodizing \d etc. --/
441:
442: /^\p{Xan}/8
443: ABCD
444: 0: A
445: 1234
446: 0: 1
447: \x{6ca}
448: 0: \x{6ca}
449: \x{a6c}
450: 0: \x{a6c}
451: \x{10a7}
452: 0: \x{10a7}
453: ** Failers
454: No match
455: _ABC
456: No match
457:
458: /^\p{Xan}+/8
459: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
460: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
461: ** Failers
462: No match
463: _ABC
464: No match
465:
466: /^\p{Xan}+?/8
467: \x{6ca}\x{a6c}\x{10a7}_
468: 0: \x{6ca}
469:
470: /^\p{Xan}*/8
471: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
472: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
473:
474: /^\p{Xan}{2,9}/8
475: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
476: 0: ABCD1234\x{6ca}
477:
478: /^\p{Xan}{2,9}?/8
479: \x{6ca}\x{a6c}\x{10a7}_
480: 0: \x{6ca}\x{a6c}
481:
482: /^[\p{Xan}]/8
483: ABCD1234_
484: 0: A
485: 1234abcd_
486: 0: 1
487: \x{6ca}
488: 0: \x{6ca}
489: \x{a6c}
490: 0: \x{a6c}
491: \x{10a7}
492: 0: \x{10a7}
493: ** Failers
494: No match
495: _ABC
496: No match
497:
498: /^[\p{Xan}]+/8
499: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
500: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
501: ** Failers
502: No match
503: _ABC
504: No match
505:
506: /^>\p{Xsp}/8
507: >\x{1680}\x{2028}\x{0b}
508: 0: >\x{1680}
509: >\x{a0}
510: 0: >\x{a0}
511: ** Failers
512: No match
513: \x{0b}
514: No match
515:
516: /^>\p{Xsp}+/8
517: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
518: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}
519:
520: /^>\p{Xsp}+?/8
521: >\x{1680}\x{2028}\x{0b}
522: 0: >\x{1680}
523:
524: /^>\p{Xsp}*/8
525: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
526: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}
527:
528: /^>\p{Xsp}{2,9}/8
529: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
530: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}
531:
532: /^>\p{Xsp}{2,9}?/8
533: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
534: 0: > \x{09}
535:
536: /^>[\p{Xsp}]/8
537: >\x{2028}\x{0b}
538: 0: >\x{2028}
539:
540: /^>[\p{Xsp}]+/8
541: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
542: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}
543:
544: /^>\p{Xps}/8
545: >\x{1680}\x{2028}\x{0b}
546: 0: >\x{1680}
547: >\x{a0}
548: 0: >\x{a0}
549: ** Failers
550: No match
551: \x{0b}
552: No match
553:
554: /^>\p{Xps}+/8
555: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
556: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
557:
558: /^>\p{Xps}+?/8
559: >\x{1680}\x{2028}\x{0b}
560: 0: >\x{1680}
561:
562: /^>\p{Xps}*/8
563: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
564: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
565:
566: /^>\p{Xps}{2,9}/8
567: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
568: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
569:
570: /^>\p{Xps}{2,9}?/8
571: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
572: 0: > \x{09}
573:
574: /^>[\p{Xps}]/8
575: >\x{2028}\x{0b}
576: 0: >\x{2028}
577:
578: /^>[\p{Xps}]+/8
579: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
580: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
581:
582: /^\p{Xwd}/8
583: ABCD
584: 0: A
585: 1234
586: 0: 1
587: \x{6ca}
588: 0: \x{6ca}
589: \x{a6c}
590: 0: \x{a6c}
591: \x{10a7}
592: 0: \x{10a7}
593: _ABC
594: 0: _
595: ** Failers
596: No match
597: []
598: No match
599:
600: /^\p{Xwd}+/8
601: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
602: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
603:
604: /^\p{Xwd}+?/8
605: \x{6ca}\x{a6c}\x{10a7}_
606: 0: \x{6ca}
607:
608: /^\p{Xwd}*/8
609: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
610: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
611:
612: /^\p{Xwd}{2,9}/8
613: A_B12\x{6ca}\x{a6c}\x{10a7}
614: 0: A_B12\x{6ca}\x{a6c}\x{10a7}
615:
616: /^\p{Xwd}{2,9}?/8
617: \x{6ca}\x{a6c}\x{10a7}_
618: 0: \x{6ca}\x{a6c}
619:
620: /^[\p{Xwd}]/8
621: ABCD1234_
622: 0: A
623: 1234abcd_
624: 0: 1
625: \x{6ca}
626: 0: \x{6ca}
627: \x{a6c}
628: 0: \x{a6c}
629: \x{10a7}
630: 0: \x{10a7}
631: _ABC
632: 0: _
633: ** Failers
634: No match
635: []
636: No match
637:
638: /^[\p{Xwd}]+/8
639: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
640: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
641:
642: /-- A check not in UTF-8 mode --/
643:
644: /^[\p{Xwd}]+/
645: ABCD1234_
646: 0: ABCD1234_
647:
648: /-- Some negative checks --/
649:
650: /^[\P{Xwd}]+/8
651: !.+\x{019}\x{35a}AB
652: 0: !.+\x{19}\x{35a}
653:
654: /^[\p{^Xwd}]+/8
655: !.+\x{019}\x{35a}AB
656: 0: !.+\x{19}\x{35a}
657:
658: /[\D]/WBZ8
659: ------------------------------------------------------------------
660: Bra
661: [\P{Nd}]
662: Ket
663: End
664: ------------------------------------------------------------------
665: 1\x{3c8}2
666: 0: \x{3c8}
667:
668: /[\d]/WBZ8
669: ------------------------------------------------------------------
670: Bra
671: [\p{Nd}]
672: Ket
673: End
674: ------------------------------------------------------------------
675: >\x{6f4}<
676: 0: \x{6f4}
677:
678: /[\S]/WBZ8
679: ------------------------------------------------------------------
680: Bra
681: [\P{Xsp}]
682: Ket
683: End
684: ------------------------------------------------------------------
685: \x{1680}\x{6f4}\x{1680}
686: 0: \x{6f4}
687:
688: /[\s]/WBZ8
689: ------------------------------------------------------------------
690: Bra
691: [\p{Xsp}]
692: Ket
693: End
694: ------------------------------------------------------------------
695: >\x{1680}<
696: 0: \x{1680}
697:
698: /[\W]/WBZ8
699: ------------------------------------------------------------------
700: Bra
701: [\P{Xwd}]
702: Ket
703: End
704: ------------------------------------------------------------------
705: A\x{1712}B
706: 0: \x{1712}
707:
708: /[\w]/WBZ8
709: ------------------------------------------------------------------
710: Bra
711: [\p{Xwd}]
712: Ket
713: End
714: ------------------------------------------------------------------
715: >\x{1723}<
716: 0: \x{1723}
717:
718: /\D/WBZ8
719: ------------------------------------------------------------------
720: Bra
721: notprop Nd
722: Ket
723: End
724: ------------------------------------------------------------------
725: 1\x{3c8}2
726: 0: \x{3c8}
727:
728: /\d/WBZ8
729: ------------------------------------------------------------------
730: Bra
731: prop Nd
732: Ket
733: End
734: ------------------------------------------------------------------
735: >\x{6f4}<
736: 0: \x{6f4}
737:
738: /\S/WBZ8
739: ------------------------------------------------------------------
740: Bra
741: notprop Xsp
742: Ket
743: End
744: ------------------------------------------------------------------
745: \x{1680}\x{6f4}\x{1680}
746: 0: \x{6f4}
747:
748: /\s/WBZ8
749: ------------------------------------------------------------------
750: Bra
751: prop Xsp
752: Ket
753: End
754: ------------------------------------------------------------------
755: >\x{1680}>
756: 0: \x{1680}
757:
758: /\W/WBZ8
759: ------------------------------------------------------------------
760: Bra
761: notprop Xwd
762: Ket
763: End
764: ------------------------------------------------------------------
765: A\x{1712}B
766: 0: \x{1712}
767:
768: /\w/WBZ8
769: ------------------------------------------------------------------
770: Bra
771: prop Xwd
772: Ket
773: End
774: ------------------------------------------------------------------
775: >\x{1723}<
776: 0: \x{1723}
777:
778: /[[:alpha:]]/WBZ
779: ------------------------------------------------------------------
780: Bra
781: [\p{L}]
782: Ket
783: End
784: ------------------------------------------------------------------
785:
786: /[[:lower:]]/WBZ
787: ------------------------------------------------------------------
788: Bra
789: [\p{Ll}]
790: Ket
791: End
792: ------------------------------------------------------------------
793:
794: /[[:upper:]]/WBZ
795: ------------------------------------------------------------------
796: Bra
797: [\p{Lu}]
798: Ket
799: End
800: ------------------------------------------------------------------
801:
802: /[[:alnum:]]/WBZ
803: ------------------------------------------------------------------
804: Bra
805: [\p{Xan}]
806: Ket
807: End
808: ------------------------------------------------------------------
809:
810: /[[:ascii:]]/WBZ
811: ------------------------------------------------------------------
812: Bra
813: [\x00-\x7f]
814: Ket
815: End
816: ------------------------------------------------------------------
817:
818: /[[:cntrl:]]/WBZ
819: ------------------------------------------------------------------
820: Bra
821: [\x00-\x1f\x7f]
822: Ket
823: End
824: ------------------------------------------------------------------
825:
826: /[[:digit:]]/WBZ
827: ------------------------------------------------------------------
828: Bra
829: [\p{Nd}]
830: Ket
831: End
832: ------------------------------------------------------------------
833:
834: /[[:graph:]]/WBZ
835: ------------------------------------------------------------------
836: Bra
837: [!-~]
838: Ket
839: End
840: ------------------------------------------------------------------
841:
842: /[[:print:]]/WBZ
843: ------------------------------------------------------------------
844: Bra
845: [ -~]
846: Ket
847: End
848: ------------------------------------------------------------------
849:
850: /[[:punct:]]/WBZ
851: ------------------------------------------------------------------
852: Bra
853: [!-/:-@[-`{-~]
854: Ket
855: End
856: ------------------------------------------------------------------
857:
858: /[[:space:]]/WBZ
859: ------------------------------------------------------------------
860: Bra
861: [\p{Xps}]
862: Ket
863: End
864: ------------------------------------------------------------------
865:
866: /[[:word:]]/WBZ
867: ------------------------------------------------------------------
868: Bra
869: [\p{Xwd}]
870: Ket
871: End
872: ------------------------------------------------------------------
873:
874: /[[:xdigit:]]/WBZ
875: ------------------------------------------------------------------
876: Bra
877: [0-9A-Fa-f]
878: Ket
879: End
880: ------------------------------------------------------------------
881:
882: /-- Unicode properties for \b abd \B --/
883:
884: /\b...\B/8W
885: abc_
886: 0: abc
887: \x{37e}abc\x{376}
888: 0: abc
889: \x{37e}\x{376}\x{371}\x{393}\x{394}
890: 0: \x{376}\x{371}\x{393}
891: !\x{c0}++\x{c1}\x{c2}
892: 0: ++\x{c1}
893: !\x{c0}+++++
894: 0: \x{c0}++
895:
896: /-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/
897:
898: /\b...\B/8
899: abc_
900: 0: abc
901: ** Failers
902: 0: Fai
903: \x{37e}abc\x{376}
904: No match
905: \x{37e}\x{376}\x{371}\x{393}\x{394}
906: No match
907: !\x{c0}++\x{c1}\x{c2}
908: No match
909: !\x{c0}+++++
910: No match
911:
912: /-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/
913:
914: /\b...\B/W
915: abc_
916: 0: abc
917: !\x{c0}++\x{c1}\x{c2}
918: 0: ++\xc1
919: !\x{c0}+++++
920: 0: \xc0++
921:
922: /-- Some of these are silly, but they check various combinations --/
923:
924: /[[:^alpha:][:^cntrl:]]+/8WBZ
925: ------------------------------------------------------------------
926: Bra
927: [ -~\x80-\xff\P{L}]+
928: Ket
929: End
930: ------------------------------------------------------------------
931: 123
932: 0: 123
933: abc
934: 0: abc
935:
936: /[[:^cntrl:][:^alpha:]]+/8WBZ
937: ------------------------------------------------------------------
938: Bra
939: [ -~\x80-\xff\P{L}]+
940: Ket
941: End
942: ------------------------------------------------------------------
943: 123
944: 0: 123
945: abc
946: 0: abc
947:
948: /[[:alpha:]]+/8WBZ
949: ------------------------------------------------------------------
950: Bra
951: [\p{L}]+
952: Ket
953: End
954: ------------------------------------------------------------------
955: abc
956: 0: abc
957:
958: /[[:^alpha:]\S]+/8WBZ
959: ------------------------------------------------------------------
960: Bra
961: [\P{L}\P{Xsp}]+
962: Ket
963: End
964: ------------------------------------------------------------------
965: 123
966: 0: 123
967: abc
968: 0: abc
969:
970: /[^\d]+/8WBZ
971: ------------------------------------------------------------------
972: Bra
973: [^\p{Nd}]+
974: Ket
975: End
976: ------------------------------------------------------------------
977: abc123
978: 0: abc
979: abc\x{123}
980: 0: abc\x{123}
981: \x{660}abc
982: 0: abc
983:
984: /\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ
985: ------------------------------------------------------------------
986: Bra
987: prop Lu ++
988: 9
989: prop Lu +
990: B
991: prop Lu ++
992: b
993: Ket
994: End
995: ------------------------------------------------------------------
996:
997: /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ
998: ------------------------------------------------------------------
999: Bra
1000: notprop Lu +
1001: 9
1002: notprop Lu ++
1003: B
1004: notprop Lu +
1005: b
1006: Ket
1007: End
1008: ------------------------------------------------------------------
1009:
1010: /\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ
1011: ------------------------------------------------------------------
1012: Bra
1013: notprop Lu +
1014: 9
1015: notprop Lu ++
1016: B
1017: notprop Lu +
1018: b
1019: Ket
1020: End
1021: ------------------------------------------------------------------
1022:
1023: /\p{Han}+X\p{Greek}+\x{370}/BZ8
1024: ------------------------------------------------------------------
1025: Bra
1026: prop Han ++
1027: X
1028: prop Greek +
1029: \x{370}
1030: Ket
1031: End
1032: ------------------------------------------------------------------
1033:
1034: /\p{Xan}+!\p{Xan}+A/BZ
1035: ------------------------------------------------------------------
1036: Bra
1037: prop Xan ++
1038: !
1039: prop Xan +
1040: A
1041: Ket
1042: End
1043: ------------------------------------------------------------------
1044:
1045: /\p{Xsp}+!\p{Xsp}\t/BZ
1046: ------------------------------------------------------------------
1047: Bra
1048: prop Xsp ++
1049: !
1050: prop Xsp
1051: \x09
1052: Ket
1053: End
1054: ------------------------------------------------------------------
1055:
1056: /\p{Xps}+!\p{Xps}\t/BZ
1057: ------------------------------------------------------------------
1058: Bra
1059: prop Xps ++
1060: !
1061: prop Xps
1062: \x09
1063: Ket
1064: End
1065: ------------------------------------------------------------------
1066:
1067: /\p{Xwd}+!\p{Xwd}_/BZ
1068: ------------------------------------------------------------------
1069: Bra
1070: prop Xwd ++
1071: !
1072: prop Xwd
1073: _
1074: Ket
1075: End
1076: ------------------------------------------------------------------
1077:
1078: /A+\p{N}A+\dB+\p{N}*B+\d*/WBZ
1079: ------------------------------------------------------------------
1080: Bra
1081: A++
1082: prop N
1083: A++
1084: prop Nd
1085: B+
1086: prop N *+
1087: B+
1088: prop Nd *
1089: Ket
1090: End
1091: ------------------------------------------------------------------
1092:
1093: /-- These behaved oddly in Perl, so they are kept in this test --/
1094:
1095: /(\x{23a}\x{23a}\x{23a})?\1/8i
1096: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1097: No match
1098:
1099: /(ȺȺȺ)?\1/8i
1100: ȺȺȺⱥⱥ
1101: No match
1102:
1103: /(\x{23a}\x{23a}\x{23a})?\1/8i
1104: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1105: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1106: 1: \x{23a}\x{23a}\x{23a}
1107:
1108: /(ȺȺȺ)?\1/8i
1109: ȺȺȺⱥⱥⱥ
1110: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1111: 1: \x{23a}\x{23a}\x{23a}
1112:
1113: /(\x{23a}\x{23a}\x{23a})\1/8i
1114: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1115: No match
1116:
1117: /(ȺȺȺ)\1/8i
1118: ȺȺȺⱥⱥ
1119: No match
1120:
1121: /(\x{23a}\x{23a}\x{23a})\1/8i
1122: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1123: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1124: 1: \x{23a}\x{23a}\x{23a}
1125:
1126: /(ȺȺȺ)\1/8i
1127: ȺȺȺⱥⱥⱥ
1128: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1129: 1: \x{23a}\x{23a}\x{23a}
1130:
1131: /(\x{2c65}\x{2c65})\1/8i
1132: \x{2c65}\x{2c65}\x{23a}\x{23a}
1133: 0: \x{2c65}\x{2c65}\x{23a}\x{23a}
1134: 1: \x{2c65}\x{2c65}
1135:
1136: /(ⱥⱥ)\1/8i
1137: ⱥⱥȺȺ
1138: 0: \x{2c65}\x{2c65}\x{23a}\x{23a}
1139: 1: \x{2c65}\x{2c65}
1140:
1141: /(\x{23a}\x{23a}\x{23a})\1Y/8i
1142: X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
1143: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}Y
1144: 1: \x{23a}\x{23a}\x{23a}
1145:
1146: /(\x{2c65}\x{2c65})\1Y/8i
1147: X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
1148: 0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y
1149: 1: \x{2c65}\x{2c65}
1150:
1151: /-- --/
1152:
1153: /-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/
1154:
1155: /^[\p{Batak}]/8
1156: \x{1bc0}
1157: 0: \x{1bc0}
1158: \x{1bff}
1159: 0: \x{1bff}
1160: ** Failers
1161: No match
1162: \x{1bf4}
1163: No match
1164:
1165: /^[\p{Brahmi}]/8
1166: \x{11000}
1167: 0: \x{11000}
1168: \x{1106f}
1169: 0: \x{1106f}
1170: ** Failers
1171: No match
1172: \x{1104e}
1173: No match
1174:
1175: /^[\p{Mandaic}]/8
1176: \x{840}
1177: 0: \x{840}
1178: \x{85e}
1179: 0: \x{85e}
1180: ** Failers
1181: No match
1182: \x{85c}
1183: No match
1184: \x{85d}
1185: No match
1186:
1187: /-- --/
1188:
1189: /(\X*)(.)/s8
1190: A\x{300}
1191: 0: A
1192: 1:
1193: 2: A
1194:
1195: /^S(\X*)e(\X*)$/8
1196: Stéréo
1197: No match
1198:
1199: /^\X/8
1200: ́réo
1201: No match
1202:
1203: /^a\X41z/<JS>
1204: aX41z
1205: 0: aX41z
1206: *** Failers
1207: No match
1208: aAz
1209: No match
1210:
1211: /(?<=ab\Cde)X/8
1212: Failed: \C not allowed in lookbehind assertion at offset 10
1213:
1214: /\X/
1215: a\P
1216: 0: a
1217: a\P\P
1218: Partial match: a
1219:
1220: /\Xa/
1221: aa\P
1222: 0: aa
1223: aa\P\P
1224: 0: aa
1225:
1226: /\X{2}/
1227: aa\P
1228: 0: aa
1229: aa\P\P
1230: Partial match: aa
1231:
1232: /\X+a/
1233: a\P
1234: Partial match: a
1235: aa\P
1236: 0: aa
1237: aa\P\P
1238: Partial match: aa
1239:
1240: /\X+?a/
1241: a\P
1242: Partial match: a
1243: ab\P
1244: Partial match: ab
1245: aa\P
1246: 0: aa
1247: aa\P\P
1248: 0: aa
1249: aba\P
1250: 0: aba
1251:
1252: /-- These Unicode 6.1.0 scripts are not known to Perl. --/
1253:
1254: /\p{Chakma}\d/8W
1255: \x{11100}\x{1113c}
1256: 0: \x{11100}\x{1113c}
1257:
1258: /\p{Takri}\d/8W
1259: \x{11680}\x{116c0}
1260: 0: \x{11680}\x{116c0}
1261:
1262: /^\X/8
1263: A\P
1264: 0: A
1265: A\P\P
1266: Partial match: A
1267: A\x{300}\x{301}\P
1268: 0: A\x{300}\x{301}
1269: A\x{300}\x{301}\P\P
1270: Partial match: A\x{300}\x{301}
1271: A\x{301}\P
1272: 0: A\x{301}
1273: A\x{301}\P\P
1274: Partial match: A\x{301}
1275:
1276: /^\X{2,3}/8
1277: A\P
1278: Partial match: A
1279: A\P\P
1280: Partial match: A
1281: AA\P
1282: 0: AA
1283: AA\P\P
1284: Partial match: AA
1285: A\x{300}\x{301}\P
1286: Partial match: A\x{300}\x{301}
1287: A\x{300}\x{301}\P\P
1288: Partial match: A\x{300}\x{301}
1289: A\x{300}\x{301}A\x{300}\x{301}\P
1290: 0: A\x{300}\x{301}A\x{300}\x{301}
1291: A\x{300}\x{301}A\x{300}\x{301}\P\P
1292: Partial match: A\x{300}\x{301}A\x{300}\x{301}
1293:
1294: /^\X{2}/8
1295: AA\P
1296: 0: AA
1297: AA\P\P
1298: Partial match: AA
1299: A\x{300}\x{301}A\x{300}\x{301}\P
1300: 0: A\x{300}\x{301}A\x{300}\x{301}
1301: A\x{300}\x{301}A\x{300}\x{301}\P\P
1302: Partial match: A\x{300}\x{301}A\x{300}\x{301}
1303:
1304: /^\X+/8
1305: AA\P
1306: 0: AA
1307: AA\P\P
1308: Partial match: AA
1309:
1310: /^\X+?Z/8
1311: AA\P
1312: Partial match: AA
1313: AA\P\P
1314: Partial match: AA
1315:
1316: /-- End of testinput7 --/
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>