1: /-- These tests for Unicode property support test PCRE's API and show some of
2: the compiled code. They are not Perl-compatible. --/
3:
4: /[\p{L}]/DZ
5: ------------------------------------------------------------------
6: Bra
7: [\p{L}]
8: Ket
9: End
10: ------------------------------------------------------------------
11: Capturing subpattern count = 0
12: No options
13: No first char
14: No need char
15:
16: /[\p{^L}]/DZ
17: ------------------------------------------------------------------
18: Bra
19: [\P{L}]
20: Ket
21: End
22: ------------------------------------------------------------------
23: Capturing subpattern count = 0
24: No options
25: No first char
26: No need char
27:
28: /[\P{L}]/DZ
29: ------------------------------------------------------------------
30: Bra
31: [\P{L}]
32: Ket
33: End
34: ------------------------------------------------------------------
35: Capturing subpattern count = 0
36: No options
37: No first char
38: No need char
39:
40: /[\P{^L}]/DZ
41: ------------------------------------------------------------------
42: Bra
43: [\p{L}]
44: Ket
45: End
46: ------------------------------------------------------------------
47: Capturing subpattern count = 0
48: No options
49: No first char
50: No need char
51:
52: /[abc\p{L}\x{0660}]/8DZ
53: ------------------------------------------------------------------
54: Bra
55: [a-c\p{L}\x{660}]
56: Ket
57: End
58: ------------------------------------------------------------------
59: Capturing subpattern count = 0
60: Options: utf
61: No first char
62: No need char
63:
64: /[\p{Nd}]/8DZ
65: ------------------------------------------------------------------
66: Bra
67: [\p{Nd}]
68: Ket
69: End
70: ------------------------------------------------------------------
71: Capturing subpattern count = 0
72: Options: utf
73: No first char
74: No need char
75: 1234
76: 0: 1
77:
78: /[\p{Nd}+-]+/8DZ
79: ------------------------------------------------------------------
80: Bra
81: [+\-\p{Nd}]+
82: Ket
83: End
84: ------------------------------------------------------------------
85: Capturing subpattern count = 0
86: Options: utf
87: No first char
88: No need char
89: 1234
90: 0: 1234
91: 12-34
92: 0: 12-34
93: 12+\x{661}-34
94: 0: 12+\x{661}-34
95: ** Failers
96: No match
97: abcd
98: No match
99:
100: /[\x{105}-\x{109}]/8iDZ
101: ------------------------------------------------------------------
102: Bra
103: [\x{104}-\x{109}]
104: Ket
105: End
106: ------------------------------------------------------------------
107: Capturing subpattern count = 0
108: Options: caseless utf
109: No first char
110: No need char
111: \x{104}
112: 0: \x{104}
113: \x{105}
114: 0: \x{105}
115: \x{109}
116: 0: \x{109}
117: ** Failers
118: No match
119: \x{100}
120: No match
121: \x{10a}
122: No match
123:
124: /[z-\x{100}]/8iDZ
125: ------------------------------------------------------------------
126: Bra
127: [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}]
128: Ket
129: End
130: ------------------------------------------------------------------
131: Capturing subpattern count = 0
132: Options: caseless utf
133: No first char
134: No need char
135: Z
136: 0: Z
137: z
138: 0: z
139: \x{39c}
140: 0: \x{39c}
141: \x{178}
142: 0: \x{178}
143: |
144: 0: |
145: \x{80}
146: 0: \x{80}
147: \x{ff}
148: 0: \x{ff}
149: \x{100}
150: 0: \x{100}
151: \x{101}
152: 0: \x{101}
153: ** Failers
154: No match
155: \x{102}
156: No match
157: Y
158: No match
159: y
160: No match
161:
162: /[z-\x{100}]/8DZi
163: ------------------------------------------------------------------
164: Bra
165: [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}]
166: Ket
167: End
168: ------------------------------------------------------------------
169: Capturing subpattern count = 0
170: Options: caseless utf
171: No first char
172: No need char
173:
174: /(?:[\PPa*]*){8,}/
175:
176: /[\P{Any}]/BZ
177: ------------------------------------------------------------------
178: Bra
179: [\P{Any}]
180: Ket
181: End
182: ------------------------------------------------------------------
183:
184: /[\P{Any}\E]/BZ
185: ------------------------------------------------------------------
186: Bra
187: [\P{Any}]
188: Ket
189: End
190: ------------------------------------------------------------------
191:
192: /(\P{Yi}+\277)/
193:
194: /(\P{Yi}+\277)?/
195:
196: /(?<=\P{Yi}{3}A)X/
197:
198: /\p{Yi}+(\P{Yi}+)(?1)/
199:
200: /(\P{Yi}{2}\277)?/
201:
202: /[\P{Yi}A]/
203:
204: /[\P{Yi}\P{Yi}\P{Yi}A]/
205:
206: /[^\P{Yi}A]/
207:
208: /[^\P{Yi}\P{Yi}\P{Yi}A]/
209:
210: /(\P{Yi}*\277)*/
211:
212: /(\P{Yi}*?\277)*/
213:
214: /(\p{Yi}*+\277)*/
215:
216: /(\P{Yi}?\277)*/
217:
218: /(\P{Yi}??\277)*/
219:
220: /(\p{Yi}?+\277)*/
221:
222: /(\P{Yi}{0,3}\277)*/
223:
224: /(\P{Yi}{0,3}?\277)*/
225:
226: /(\p{Yi}{0,3}+\277)*/
227:
228: /\p{Zl}{2,3}+/8BZ
229: ------------------------------------------------------------------
230: Bra
231: prop Zl {2}
232: prop Zl ?+
233: Ket
234: End
235: ------------------------------------------------------------------
236:
237: 0: \x{2028}\x{2028}
238: \x{2028}\x{2028}\x{2028}
239: 0: \x{2028}\x{2028}\x{2028}
240:
241: /\p{Zl}/8BZ
242: ------------------------------------------------------------------
243: Bra
244: prop Zl
245: Ket
246: End
247: ------------------------------------------------------------------
248:
249: /\p{Lu}{3}+/8BZ
250: ------------------------------------------------------------------
251: Bra
252: prop Lu {3}
253: Ket
254: End
255: ------------------------------------------------------------------
256:
257: /\pL{2}+/8BZ
258: ------------------------------------------------------------------
259: Bra
260: prop L {2}
261: Ket
262: End
263: ------------------------------------------------------------------
264:
265: /\p{Cc}{2}+/8BZ
266: ------------------------------------------------------------------
267: Bra
268: prop Cc {2}
269: Ket
270: End
271: ------------------------------------------------------------------
272:
273: /^\p{Cs}/8
274: \?\x{dfff}
275: 0: \x{dfff}
276: ** Failers
277: No match
278: \x{09f}
279: No match
280:
281: /^\p{Sc}+/8
282: $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
283: 0: $\x{a2}\x{a3}\x{a4}\x{a5}
284: \x{9f2}
285: 0: \x{9f2}
286: ** Failers
287: No match
288: X
289: No match
290: \x{2c2}
291: No match
292:
293: /^\p{Zs}/8
294: \ \
295: 0:
296: \x{a0}
297: 0: \x{a0}
298: \x{1680}
299: 0: \x{1680}
300: \x{180e}
301: 0: \x{180e}
302: \x{2000}
303: 0: \x{2000}
304: \x{2001}
305: 0: \x{2001}
306: ** Failers
307: No match
308: \x{2028}
309: No match
310: \x{200d}
311: No match
312:
313: /-- These four are here rather than in test 6 because Perl has problems with
314: the negative versions of the properties. --/
315:
316: /\p{^Lu}/8i
317: 1234
318: 0: 1
319: ** Failers
320: 0: *
321: ABC
322: No match
323:
324: /\P{Lu}/8i
325: 1234
326: 0: 1
327: ** Failers
328: 0: *
329: ABC
330: No match
331:
332: /\p{Ll}/8i
333: a
334: 0: a
335: Az
336: 0: z
337: ** Failers
338: 0: a
339: ABC
340: No match
341:
342: /\p{Lu}/8i
343: A
344: 0: A
345: a\x{10a0}B
346: 0: \x{10a0}
347: ** Failers
348: 0: F
349: a
350: No match
351: \x{1d00}
352: No match
353:
354: /[\x{c0}\x{391}]/8i
355: \x{c0}
356: 0: \x{c0}
357: \x{e0}
358: 0: \x{e0}
359:
360: /-- The next two are special cases where the lengths of the different cases of
361: the same character differ. The first went wrong with heap frame storage; the
362: second was broken in all cases. --/
363:
364: /^\x{023a}+?(\x{0130}+)/8i
365: \x{023a}\x{2c65}\x{0130}
366: 0: \x{23a}\x{2c65}\x{130}
367: 1: \x{130}
368:
369: /^\x{023a}+([^X])/8i
370: \x{023a}\x{2c65}X
371: 0: \x{23a}\x{2c65}
372: 1: \x{2c65}
373:
374: /\x{c0}+\x{116}+/8i
375: \x{c0}\x{e0}\x{116}\x{117}
376: 0: \x{c0}\x{e0}\x{116}\x{117}
377:
378: /[\x{c0}\x{116}]+/8i
379: \x{c0}\x{e0}\x{116}\x{117}
380: 0: \x{c0}\x{e0}\x{116}\x{117}
381:
382: /(\x{de})\1/8i
383: \x{de}\x{de}
384: 0: \x{de}\x{de}
385: 1: \x{de}
386: \x{de}\x{fe}
387: 0: \x{de}\x{fe}
388: 1: \x{de}
389: \x{fe}\x{fe}
390: 0: \x{fe}\x{fe}
391: 1: \x{fe}
392: \x{fe}\x{de}
393: 0: \x{fe}\x{de}
394: 1: \x{fe}
395:
396: /^\x{c0}$/8i
397: \x{c0}
398: 0: \x{c0}
399: \x{e0}
400: 0: \x{e0}
401:
402: /^\x{e0}$/8i
403: \x{c0}
404: 0: \x{c0}
405: \x{e0}
406: 0: \x{e0}
407:
408: /-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
409: will match it only with UCP support, because without that it has no notion
410: of case for anything other than the ASCII letters. --/
411:
412: /((?i)[\x{c0}])/8
413: \x{c0}
414: 0: \x{c0}
415: 1: \x{c0}
416: \x{e0}
417: 0: \x{e0}
418: 1: \x{e0}
419:
420: /(?i:[\x{c0}])/8
421: \x{c0}
422: 0: \x{c0}
423: \x{e0}
424: 0: \x{e0}
425:
426: /-- These are PCRE's extra properties to help with Unicodizing \d etc. --/
427:
428: /^\p{Xan}/8
429: ABCD
430: 0: A
431: 1234
432: 0: 1
433: \x{6ca}
434: 0: \x{6ca}
435: \x{a6c}
436: 0: \x{a6c}
437: \x{10a7}
438: 0: \x{10a7}
439: ** Failers
440: No match
441: _ABC
442: No match
443:
444: /^\p{Xan}+/8
445: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
446: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
447: ** Failers
448: No match
449: _ABC
450: No match
451:
452: /^\p{Xan}+?/8
453: \x{6ca}\x{a6c}\x{10a7}_
454: 0: \x{6ca}
455:
456: /^\p{Xan}*/8
457: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
458: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
459:
460: /^\p{Xan}{2,9}/8
461: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
462: 0: ABCD1234\x{6ca}
463:
464: /^\p{Xan}{2,9}?/8
465: \x{6ca}\x{a6c}\x{10a7}_
466: 0: \x{6ca}\x{a6c}
467:
468: /^[\p{Xan}]/8
469: ABCD1234_
470: 0: A
471: 1234abcd_
472: 0: 1
473: \x{6ca}
474: 0: \x{6ca}
475: \x{a6c}
476: 0: \x{a6c}
477: \x{10a7}
478: 0: \x{10a7}
479: ** Failers
480: No match
481: _ABC
482: No match
483:
484: /^[\p{Xan}]+/8
485: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
486: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}
487: ** Failers
488: No match
489: _ABC
490: No match
491:
492: /^>\p{Xsp}/8
493: >\x{1680}\x{2028}\x{0b}
494: 0: >\x{1680}
495: >\x{a0}
496: 0: >\x{a0}
497: ** Failers
498: No match
499: \x{0b}
500: No match
501:
502: /^>\p{Xsp}+/8
503: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
504: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}
505:
506: /^>\p{Xsp}+?/8
507: >\x{1680}\x{2028}\x{0b}
508: 0: >\x{1680}
509:
510: /^>\p{Xsp}*/8
511: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
512: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}
513:
514: /^>\p{Xsp}{2,9}/8
515: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
516: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}
517:
518: /^>\p{Xsp}{2,9}?/8
519: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
520: 0: > \x{09}
521:
522: /^>[\p{Xsp}]/8
523: >\x{2028}\x{0b}
524: 0: >\x{2028}
525:
526: /^>[\p{Xsp}]+/8
527: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
528: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}
529:
530: /^>\p{Xps}/8
531: >\x{1680}\x{2028}\x{0b}
532: 0: >\x{1680}
533: >\x{a0}
534: 0: >\x{a0}
535: ** Failers
536: No match
537: \x{0b}
538: No match
539:
540: /^>\p{Xps}+/8
541: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
542: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
543:
544: /^>\p{Xps}+?/8
545: >\x{1680}\x{2028}\x{0b}
546: 0: >\x{1680}
547:
548: /^>\p{Xps}*/8
549: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
550: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
551:
552: /^>\p{Xps}{2,9}/8
553: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
554: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
555:
556: /^>\p{Xps}{2,9}?/8
557: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
558: 0: > \x{09}
559:
560: /^>[\p{Xps}]/8
561: >\x{2028}\x{0b}
562: 0: >\x{2028}
563:
564: /^>[\p{Xps}]+/8
565: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
566: 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
567:
568: /^\p{Xwd}/8
569: ABCD
570: 0: A
571: 1234
572: 0: 1
573: \x{6ca}
574: 0: \x{6ca}
575: \x{a6c}
576: 0: \x{a6c}
577: \x{10a7}
578: 0: \x{10a7}
579: _ABC
580: 0: _
581: ** Failers
582: No match
583: []
584: No match
585:
586: /^\p{Xwd}+/8
587: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
588: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
589:
590: /^\p{Xwd}+?/8
591: \x{6ca}\x{a6c}\x{10a7}_
592: 0: \x{6ca}
593:
594: /^\p{Xwd}*/8
595: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
596: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
597:
598: /^\p{Xwd}{2,9}/8
599: A_B12\x{6ca}\x{a6c}\x{10a7}
600: 0: A_B12\x{6ca}\x{a6c}\x{10a7}
601:
602: /^\p{Xwd}{2,9}?/8
603: \x{6ca}\x{a6c}\x{10a7}_
604: 0: \x{6ca}\x{a6c}
605:
606: /^[\p{Xwd}]/8
607: ABCD1234_
608: 0: A
609: 1234abcd_
610: 0: 1
611: \x{6ca}
612: 0: \x{6ca}
613: \x{a6c}
614: 0: \x{a6c}
615: \x{10a7}
616: 0: \x{10a7}
617: _ABC
618: 0: _
619: ** Failers
620: No match
621: []
622: No match
623:
624: /^[\p{Xwd}]+/8
625: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
626: 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
627:
628: /-- A check not in UTF-8 mode --/
629:
630: /^[\p{Xwd}]+/
631: ABCD1234_
632: 0: ABCD1234_
633:
634: /-- Some negative checks --/
635:
636: /^[\P{Xwd}]+/8
637: !.+\x{019}\x{35a}AB
638: 0: !.+\x{19}\x{35a}
639:
640: /^[\p{^Xwd}]+/8
641: !.+\x{019}\x{35a}AB
642: 0: !.+\x{19}\x{35a}
643:
644: /[\D]/WBZ8
645: ------------------------------------------------------------------
646: Bra
647: [\P{Nd}]
648: Ket
649: End
650: ------------------------------------------------------------------
651: 1\x{3c8}2
652: 0: \x{3c8}
653:
654: /[\d]/WBZ8
655: ------------------------------------------------------------------
656: Bra
657: [\p{Nd}]
658: Ket
659: End
660: ------------------------------------------------------------------
661: >\x{6f4}<
662: 0: \x{6f4}
663:
664: /[\S]/WBZ8
665: ------------------------------------------------------------------
666: Bra
667: [\P{Xsp}]
668: Ket
669: End
670: ------------------------------------------------------------------
671: \x{1680}\x{6f4}\x{1680}
672: 0: \x{6f4}
673:
674: /[\s]/WBZ8
675: ------------------------------------------------------------------
676: Bra
677: [\p{Xsp}]
678: Ket
679: End
680: ------------------------------------------------------------------
681: >\x{1680}<
682: 0: \x{1680}
683:
684: /[\W]/WBZ8
685: ------------------------------------------------------------------
686: Bra
687: [\P{Xwd}]
688: Ket
689: End
690: ------------------------------------------------------------------
691: A\x{1712}B
692: 0: \x{1712}
693:
694: /[\w]/WBZ8
695: ------------------------------------------------------------------
696: Bra
697: [\p{Xwd}]
698: Ket
699: End
700: ------------------------------------------------------------------
701: >\x{1723}<
702: 0: \x{1723}
703:
704: /\D/WBZ8
705: ------------------------------------------------------------------
706: Bra
707: notprop Nd
708: Ket
709: End
710: ------------------------------------------------------------------
711: 1\x{3c8}2
712: 0: \x{3c8}
713:
714: /\d/WBZ8
715: ------------------------------------------------------------------
716: Bra
717: prop Nd
718: Ket
719: End
720: ------------------------------------------------------------------
721: >\x{6f4}<
722: 0: \x{6f4}
723:
724: /\S/WBZ8
725: ------------------------------------------------------------------
726: Bra
727: notprop Xsp
728: Ket
729: End
730: ------------------------------------------------------------------
731: \x{1680}\x{6f4}\x{1680}
732: 0: \x{6f4}
733:
734: /\s/WBZ8
735: ------------------------------------------------------------------
736: Bra
737: prop Xsp
738: Ket
739: End
740: ------------------------------------------------------------------
741: >\x{1680}>
742: 0: \x{1680}
743:
744: /\W/WBZ8
745: ------------------------------------------------------------------
746: Bra
747: notprop Xwd
748: Ket
749: End
750: ------------------------------------------------------------------
751: A\x{1712}B
752: 0: \x{1712}
753:
754: /\w/WBZ8
755: ------------------------------------------------------------------
756: Bra
757: prop Xwd
758: Ket
759: End
760: ------------------------------------------------------------------
761: >\x{1723}<
762: 0: \x{1723}
763:
764: /[[:alpha:]]/WBZ
765: ------------------------------------------------------------------
766: Bra
767: [\p{L}]
768: Ket
769: End
770: ------------------------------------------------------------------
771:
772: /[[:lower:]]/WBZ
773: ------------------------------------------------------------------
774: Bra
775: [\p{Ll}]
776: Ket
777: End
778: ------------------------------------------------------------------
779:
780: /[[:upper:]]/WBZ
781: ------------------------------------------------------------------
782: Bra
783: [\p{Lu}]
784: Ket
785: End
786: ------------------------------------------------------------------
787:
788: /[[:alnum:]]/WBZ
789: ------------------------------------------------------------------
790: Bra
791: [\p{Xan}]
792: Ket
793: End
794: ------------------------------------------------------------------
795:
796: /[[:ascii:]]/WBZ
797: ------------------------------------------------------------------
798: Bra
799: [\x00-\x7f]
800: Ket
801: End
802: ------------------------------------------------------------------
803:
804: /[[:cntrl:]]/WBZ
805: ------------------------------------------------------------------
806: Bra
807: [\x00-\x1f\x7f]
808: Ket
809: End
810: ------------------------------------------------------------------
811:
812: /[[:digit:]]/WBZ
813: ------------------------------------------------------------------
814: Bra
815: [\p{Nd}]
816: Ket
817: End
818: ------------------------------------------------------------------
819:
820: /[[:graph:]]/WBZ
821: ------------------------------------------------------------------
822: Bra
823: [!-~]
824: Ket
825: End
826: ------------------------------------------------------------------
827:
828: /[[:print:]]/WBZ
829: ------------------------------------------------------------------
830: Bra
831: [ -~]
832: Ket
833: End
834: ------------------------------------------------------------------
835:
836: /[[:punct:]]/WBZ
837: ------------------------------------------------------------------
838: Bra
839: [!-/:-@[-`{-~]
840: Ket
841: End
842: ------------------------------------------------------------------
843:
844: /[[:space:]]/WBZ
845: ------------------------------------------------------------------
846: Bra
847: [\p{Xps}]
848: Ket
849: End
850: ------------------------------------------------------------------
851:
852: /[[:word:]]/WBZ
853: ------------------------------------------------------------------
854: Bra
855: [\p{Xwd}]
856: Ket
857: End
858: ------------------------------------------------------------------
859:
860: /[[:xdigit:]]/WBZ
861: ------------------------------------------------------------------
862: Bra
863: [0-9A-Fa-f]
864: Ket
865: End
866: ------------------------------------------------------------------
867:
868: /-- Unicode properties for \b abd \B --/
869:
870: /\b...\B/8W
871: abc_
872: 0: abc
873: \x{37e}abc\x{376}
874: 0: abc
875: \x{37e}\x{376}\x{371}\x{393}\x{394}
876: 0: \x{376}\x{371}\x{393}
877: !\x{c0}++\x{c1}\x{c2}
878: 0: ++\x{c1}
879: !\x{c0}+++++
880: 0: \x{c0}++
881:
882: /-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/
883:
884: /\b...\B/8
885: abc_
886: 0: abc
887: ** Failers
888: 0: Fai
889: \x{37e}abc\x{376}
890: No match
891: \x{37e}\x{376}\x{371}\x{393}\x{394}
892: No match
893: !\x{c0}++\x{c1}\x{c2}
894: No match
895: !\x{c0}+++++
896: No match
897:
898: /-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/
899:
900: /\b...\B/W
901: abc_
902: 0: abc
903: !\x{c0}++\x{c1}\x{c2}
904: 0: ++\xc1
905: !\x{c0}+++++
906: 0: \xc0++
907:
908: /-- Some of these are silly, but they check various combinations --/
909:
910: /[[:^alpha:][:^cntrl:]]+/8WBZ
911: ------------------------------------------------------------------
912: Bra
913: [ -~\x80-\xff\P{L}]+
914: Ket
915: End
916: ------------------------------------------------------------------
917: 123
918: 0: 123
919: abc
920: 0: abc
921:
922: /[[:^cntrl:][:^alpha:]]+/8WBZ
923: ------------------------------------------------------------------
924: Bra
925: [ -~\x80-\xff\P{L}]+
926: Ket
927: End
928: ------------------------------------------------------------------
929: 123
930: 0: 123
931: abc
932: 0: abc
933:
934: /[[:alpha:]]+/8WBZ
935: ------------------------------------------------------------------
936: Bra
937: [\p{L}]+
938: Ket
939: End
940: ------------------------------------------------------------------
941: abc
942: 0: abc
943:
944: /[[:^alpha:]\S]+/8WBZ
945: ------------------------------------------------------------------
946: Bra
947: [\P{L}\P{Xsp}]+
948: Ket
949: End
950: ------------------------------------------------------------------
951: 123
952: 0: 123
953: abc
954: 0: abc
955:
956: /[^\d]+/8WBZ
957: ------------------------------------------------------------------
958: Bra
959: [^\p{Nd}]+
960: Ket
961: End
962: ------------------------------------------------------------------
963: abc123
964: 0: abc
965: abc\x{123}
966: 0: abc\x{123}
967: \x{660}abc
968: 0: abc
969:
970: /\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ
971: ------------------------------------------------------------------
972: Bra
973: prop Lu ++
974: 9
975: prop Lu +
976: B
977: prop Lu ++
978: b
979: Ket
980: End
981: ------------------------------------------------------------------
982:
983: /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ
984: ------------------------------------------------------------------
985: Bra
986: notprop Lu +
987: 9
988: notprop Lu ++
989: B
990: notprop Lu +
991: b
992: Ket
993: End
994: ------------------------------------------------------------------
995:
996: /\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ
997: ------------------------------------------------------------------
998: Bra
999: notprop Lu +
1000: 9
1001: notprop Lu ++
1002: B
1003: notprop Lu +
1004: b
1005: Ket
1006: End
1007: ------------------------------------------------------------------
1008:
1009: /\p{Han}+X\p{Greek}+\x{370}/BZ8
1010: ------------------------------------------------------------------
1011: Bra
1012: prop Han ++
1013: X
1014: prop Greek +
1015: \x{370}
1016: Ket
1017: End
1018: ------------------------------------------------------------------
1019:
1020: /\p{Xan}+!\p{Xan}+A/BZ
1021: ------------------------------------------------------------------
1022: Bra
1023: prop Xan ++
1024: !
1025: prop Xan +
1026: A
1027: Ket
1028: End
1029: ------------------------------------------------------------------
1030:
1031: /\p{Xsp}+!\p{Xsp}\t/BZ
1032: ------------------------------------------------------------------
1033: Bra
1034: prop Xsp ++
1035: !
1036: prop Xsp
1037: \x09
1038: Ket
1039: End
1040: ------------------------------------------------------------------
1041:
1042: /\p{Xps}+!\p{Xps}\t/BZ
1043: ------------------------------------------------------------------
1044: Bra
1045: prop Xps ++
1046: !
1047: prop Xps
1048: \x09
1049: Ket
1050: End
1051: ------------------------------------------------------------------
1052:
1053: /\p{Xwd}+!\p{Xwd}_/BZ
1054: ------------------------------------------------------------------
1055: Bra
1056: prop Xwd ++
1057: !
1058: prop Xwd
1059: _
1060: Ket
1061: End
1062: ------------------------------------------------------------------
1063:
1064: /A+\p{N}A+\dB+\p{N}*B+\d*/WBZ
1065: ------------------------------------------------------------------
1066: Bra
1067: A++
1068: prop N
1069: A++
1070: prop Nd
1071: B+
1072: prop N *+
1073: B+
1074: prop Nd *
1075: Ket
1076: End
1077: ------------------------------------------------------------------
1078:
1079: /-- These behaved oddly in Perl, so they are kept in this test --/
1080:
1081: /(\x{23a}\x{23a}\x{23a})?\1/8i
1082: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1083: No match
1084:
1085: /(ȺȺȺ)?\1/8i
1086: ȺȺȺⱥⱥ
1087: No match
1088:
1089: /(\x{23a}\x{23a}\x{23a})?\1/8i
1090: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1091: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1092: 1: \x{23a}\x{23a}\x{23a}
1093:
1094: /(ȺȺȺ)?\1/8i
1095: ȺȺȺⱥⱥⱥ
1096: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1097: 1: \x{23a}\x{23a}\x{23a}
1098:
1099: /(\x{23a}\x{23a}\x{23a})\1/8i
1100: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
1101: No match
1102:
1103: /(ȺȺȺ)\1/8i
1104: ȺȺȺⱥⱥ
1105: No match
1106:
1107: /(\x{23a}\x{23a}\x{23a})\1/8i
1108: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1109: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1110: 1: \x{23a}\x{23a}\x{23a}
1111:
1112: /(ȺȺȺ)\1/8i
1113: ȺȺȺⱥⱥⱥ
1114: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
1115: 1: \x{23a}\x{23a}\x{23a}
1116:
1117: /(\x{2c65}\x{2c65})\1/8i
1118: \x{2c65}\x{2c65}\x{23a}\x{23a}
1119: 0: \x{2c65}\x{2c65}\x{23a}\x{23a}
1120: 1: \x{2c65}\x{2c65}
1121:
1122: /(ⱥⱥ)\1/8i
1123: ⱥⱥȺȺ
1124: 0: \x{2c65}\x{2c65}\x{23a}\x{23a}
1125: 1: \x{2c65}\x{2c65}
1126:
1127: /(\x{23a}\x{23a}\x{23a})\1Y/8i
1128: X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
1129: 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}Y
1130: 1: \x{23a}\x{23a}\x{23a}
1131:
1132: /(\x{2c65}\x{2c65})\1Y/8i
1133: X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
1134: 0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y
1135: 1: \x{2c65}\x{2c65}
1136:
1137: /-- --/
1138:
1139: /-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/
1140:
1141: /^[\p{Batak}]/8
1142: \x{1bc0}
1143: 0: \x{1bc0}
1144: \x{1bff}
1145: 0: \x{1bff}
1146: ** Failers
1147: No match
1148: \x{1bf4}
1149: No match
1150:
1151: /^[\p{Brahmi}]/8
1152: \x{11000}
1153: 0: \x{11000}
1154: \x{1106f}
1155: 0: \x{1106f}
1156: ** Failers
1157: No match
1158: \x{1104e}
1159: No match
1160:
1161: /^[\p{Mandaic}]/8
1162: \x{840}
1163: 0: \x{840}
1164: \x{85e}
1165: 0: \x{85e}
1166: ** Failers
1167: No match
1168: \x{85c}
1169: No match
1170: \x{85d}
1171: No match
1172:
1173: /-- --/
1174:
1175: /(\X*)(.)/s8
1176: A\x{300}
1177: 0: A
1178: 1:
1179: 2: A
1180:
1181: /^S(\X*)e(\X*)$/8
1182: Stéréo
1183: 0: Ste\x{301}re\x{301}o
1184: 1: te\x{301}r
1185: 2: \x{301}o
1186:
1187: /^\X/8
1188: ́réo
1189: 0: \x{301}
1190:
1191: /^a\X41z/<JS>
1192: aX41z
1193: 0: aX41z
1194: *** Failers
1195: No match
1196: aAz
1197: No match
1198:
1199: /(?<=ab\Cde)X/8
1200: Failed: \C not allowed in lookbehind assertion at offset 10
1201:
1202: /\X/
1203: a\P
1204: 0: a
1205: a\P\P
1206: Partial match: a
1207:
1208: /\Xa/
1209: aa\P
1210: 0: aa
1211: aa\P\P
1212: 0: aa
1213:
1214: /\X{2}/
1215: aa\P
1216: 0: aa
1217: aa\P\P
1218: Partial match: aa
1219:
1220: /\X+a/
1221: a\P
1222: Partial match: a
1223: aa\P
1224: 0: aa
1225: aa\P\P
1226: Partial match: aa
1227:
1228: /\X+?a/
1229: a\P
1230: Partial match: a
1231: ab\P
1232: Partial match: ab
1233: aa\P
1234: 0: aa
1235: aa\P\P
1236: 0: aa
1237: aba\P
1238: 0: aba
1239:
1240: /-- These Unicode 6.1.0 scripts are not known to Perl. --/
1241:
1242: /\p{Chakma}\d/8W
1243: \x{11100}\x{1113c}
1244: 0: \x{11100}\x{1113c}
1245:
1246: /\p{Takri}\d/8W
1247: \x{11680}\x{116c0}
1248: 0: \x{11680}\x{116c0}
1249:
1250: /^\X/8
1251: A\P
1252: 0: A
1253: A\P\P
1254: Partial match: A
1255: A\x{300}\x{301}\P
1256: 0: A\x{300}\x{301}
1257: A\x{300}\x{301}\P\P
1258: Partial match: A\x{300}\x{301}
1259: A\x{301}\P
1260: 0: A\x{301}
1261: A\x{301}\P\P
1262: Partial match: A\x{301}
1263:
1264: /^\X{2,3}/8
1265: A\P
1266: Partial match: A
1267: A\P\P
1268: Partial match: A
1269: AA\P
1270: 0: AA
1271: AA\P\P
1272: Partial match: AA
1273: A\x{300}\x{301}\P
1274: Partial match: A\x{300}\x{301}
1275: A\x{300}\x{301}\P\P
1276: Partial match: A\x{300}\x{301}
1277: A\x{300}\x{301}A\x{300}\x{301}\P
1278: 0: A\x{300}\x{301}A\x{300}\x{301}
1279: A\x{300}\x{301}A\x{300}\x{301}\P\P
1280: Partial match: A\x{300}\x{301}A\x{300}\x{301}
1281:
1282: /^\X{2}/8
1283: AA\P
1284: 0: AA
1285: AA\P\P
1286: Partial match: AA
1287: A\x{300}\x{301}A\x{300}\x{301}\P
1288: 0: A\x{300}\x{301}A\x{300}\x{301}
1289: A\x{300}\x{301}A\x{300}\x{301}\P\P
1290: Partial match: A\x{300}\x{301}A\x{300}\x{301}
1291:
1292: /^\X+/8
1293: AA\P
1294: 0: AA
1295: AA\P\P
1296: Partial match: AA
1297:
1298: /^\X+?Z/8
1299: AA\P
1300: Partial match: AA
1301: AA\P\P
1302: Partial match: AA
1303:
1304: /A\x{3a3}B/8iDZ
1305: ------------------------------------------------------------------
1306: Bra
1307: /i A
1308: clist 03a3 03c2 03c3
1309: /i B
1310: Ket
1311: End
1312: ------------------------------------------------------------------
1313: Capturing subpattern count = 0
1314: Options: caseless utf
1315: First char = 'A' (caseless)
1316: Need char = 'B' (caseless)
1317:
1318: /\x{3a3}B/8iDZ
1319: ------------------------------------------------------------------
1320: Bra
1321: clist 03a3 03c2 03c3
1322: /i B
1323: Ket
1324: End
1325: ------------------------------------------------------------------
1326: Capturing subpattern count = 0
1327: Options: caseless utf
1328: No first char
1329: Need char = 'B' (caseless)
1330:
1331: /[\x{3a3}]/8iBZ
1332: ------------------------------------------------------------------
1333: Bra
1334: clist 03a3 03c2 03c3
1335: Ket
1336: End
1337: ------------------------------------------------------------------
1338:
1339: /[^\x{3a3}]/8iBZ
1340: ------------------------------------------------------------------
1341: Bra
1342: not clist 03a3 03c2 03c3
1343: Ket
1344: End
1345: ------------------------------------------------------------------
1346:
1347: /[\x{3a3}]+/8iBZ
1348: ------------------------------------------------------------------
1349: Bra
1350: clist 03a3 03c2 03c3 +
1351: Ket
1352: End
1353: ------------------------------------------------------------------
1354:
1355: /[^\x{3a3}]+/8iBZ
1356: ------------------------------------------------------------------
1357: Bra
1358: not clist 03a3 03c2 03c3 +
1359: Ket
1360: End
1361: ------------------------------------------------------------------
1362:
1363: /a*\x{3a3}/8iBZ
1364: ------------------------------------------------------------------
1365: Bra
1366: /i a*+
1367: clist 03a3 03c2 03c3
1368: Ket
1369: End
1370: ------------------------------------------------------------------
1371:
1372: /\x{3a3}+a/8iBZ
1373: ------------------------------------------------------------------
1374: Bra
1375: clist 03a3 03c2 03c3 ++
1376: /i a
1377: Ket
1378: End
1379: ------------------------------------------------------------------
1380:
1381: /\x{3a3}*\x{3c2}/8iBZ
1382: ------------------------------------------------------------------
1383: Bra
1384: clist 03a3 03c2 03c3 *
1385: clist 03a3 03c2 03c3
1386: Ket
1387: End
1388: ------------------------------------------------------------------
1389:
1390: /\x{3a3}{3}/8i+
1391: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1392: 0: \x{3a3}\x{3c3}\x{3c2}
1393: 0+ \x{3a3}\x{3c3}\x{3c2}
1394:
1395: /\x{3a3}{2,4}/8i+
1396: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1397: 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3}
1398: 0+ \x{3c3}\x{3c2}
1399:
1400: /\x{3a3}{2,4}?/8i+
1401: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1402: 0: \x{3a3}\x{3c3}
1403: 0+ \x{3c2}\x{3a3}\x{3c3}\x{3c2}
1404:
1405: /\x{3a3}+./8i+
1406: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1407: 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1408: 0+
1409:
1410: /\x{3a3}++./8i+
1411: ** Failers
1412: No match
1413: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
1414: No match
1415:
1416: /\x{3a3}*\x{3c2}/8iBZ
1417: ------------------------------------------------------------------
1418: Bra
1419: clist 03a3 03c2 03c3 *
1420: clist 03a3 03c2 03c3
1421: Ket
1422: End
1423: ------------------------------------------------------------------
1424:
1425: /[^\x{3a3}]*\x{3c2}/8iBZ
1426: ------------------------------------------------------------------
1427: Bra
1428: not clist 03a3 03c2 03c3 *+
1429: clist 03a3 03c2 03c3
1430: Ket
1431: End
1432: ------------------------------------------------------------------
1433:
1434: /[^a]*\x{3c2}/8iBZ
1435: ------------------------------------------------------------------
1436: Bra
1437: /i [^a]*
1438: clist 03a3 03c2 03c3
1439: Ket
1440: End
1441: ------------------------------------------------------------------
1442:
1443: /ist/8iBZ
1444: ------------------------------------------------------------------
1445: Bra
1446: /i i
1447: clist 0053 0073 017f
1448: /i t
1449: Ket
1450: End
1451: ------------------------------------------------------------------
1452: ikt
1453: No match
1454:
1455: /is+t/8i
1456: iSs\x{17f}t
1457: 0: iSs\x{17f}t
1458: ikt
1459: No match
1460:
1461: /is+?t/8i
1462: ikt
1463: No match
1464:
1465: /is?t/8i
1466: ikt
1467: No match
1468:
1469: /is{2}t/8i
1470: iskt
1471: No match
1472:
1473: /-- This property is a PCRE special --/
1474:
1475: /^\p{Xuc}/8
1476: $abc
1477: 0: $
1478: @abc
1479: 0: @
1480: `abc
1481: 0: `
1482: \x{1234}abc
1483: 0: \x{1234}
1484: ** Failers
1485: No match
1486: abc
1487: No match
1488:
1489: /^\p{Xuc}+/8
1490: $@`\x{a0}\x{1234}\x{e000}**
1491: 0: $@`\x{a0}\x{1234}\x{e000}
1492: ** Failers
1493: No match
1494: \x{9f}
1495: No match
1496:
1497: /^\p{Xuc}+?/8
1498: $@`\x{a0}\x{1234}\x{e000}**
1499: 0: $
1500: ** Failers
1501: No match
1502: \x{9f}
1503: No match
1504:
1505: /^\p{Xuc}+?\*/8
1506: $@`\x{a0}\x{1234}\x{e000}**
1507: 0: $@`\x{a0}\x{1234}\x{e000}*
1508: ** Failers
1509: No match
1510: \x{9f}
1511: No match
1512:
1513: /^\p{Xuc}++/8
1514: $@`\x{a0}\x{1234}\x{e000}**
1515: 0: $@`\x{a0}\x{1234}\x{e000}
1516: ** Failers
1517: No match
1518: \x{9f}
1519: No match
1520:
1521: /^\p{Xuc}{3,5}/8
1522: $@`\x{a0}\x{1234}\x{e000}**
1523: 0: $@`\x{a0}\x{1234}
1524: ** Failers
1525: No match
1526: \x{9f}
1527: No match
1528:
1529: /^\p{Xuc}{3,5}?/8
1530: $@`\x{a0}\x{1234}\x{e000}**
1531: 0: $@`
1532: ** Failers
1533: No match
1534: \x{9f}
1535: No match
1536:
1537: /^[\p{Xuc}]/8
1538: $@`\x{a0}\x{1234}\x{e000}**
1539: 0: $
1540: ** Failers
1541: No match
1542: \x{9f}
1543: No match
1544:
1545: /^[\p{Xuc}]+/8
1546: $@`\x{a0}\x{1234}\x{e000}**
1547: 0: $@`\x{a0}\x{1234}\x{e000}
1548: ** Failers
1549: No match
1550: \x{9f}
1551: No match
1552:
1553: /^\P{Xuc}/8
1554: abc
1555: 0: a
1556: ** Failers
1557: 0: *
1558: $abc
1559: No match
1560: @abc
1561: No match
1562: `abc
1563: No match
1564: \x{1234}abc
1565: No match
1566:
1567: /^[\P{Xuc}]/8
1568: abc
1569: 0: a
1570: ** Failers
1571: 0: *
1572: $abc
1573: No match
1574: @abc
1575: No match
1576: `abc
1577: No match
1578: \x{1234}abc
1579: No match
1580:
1581: /-- End of testinput7 --/
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>