version 1.1, 2012/02/21 23:05:52
|
version 1.1.1.2, 2013/07/22 08:25:57
|
Line 1
|
Line 1
|
/-- These tests for Unicode property support test PCRE's API and show some of | /-- This test is run only when JIT support is not available. It checks that an |
the compiled code. They are not Perl-compatible. --/ | attempt to use it has the expected behaviour. It also tests things that |
| are different without JIT. --/ |
| |
| /abc/S+I |
|
|
/[\p{L}]/DZ | /a*/SI |
| |
/[\p{^L}]/DZ | |
| |
/[\P{L}]/DZ | |
| |
/[\P{^L}]/DZ | |
| |
/[abc\p{L}\x{0660}]/8DZ | |
| |
/[\p{Nd}]/8DZ | |
1234 | |
| |
/[\p{Nd}+-]+/8DZ | |
1234 | |
12-34 | |
12+\x{661}-34 | |
** Failers | |
abcd | |
| |
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ | |
| |
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ | |
| |
/AB\x{1fb0}/8DZ | |
| |
/AB\x{1fb0}/8DZi | |
| |
/[\x{105}-\x{109}]/8iDZ | |
\x{104} | |
\x{105} | |
\x{109} | |
** Failers | |
\x{100} | |
\x{10a} | |
| |
/[z-\x{100}]/8iDZ | |
Z | |
z | |
\x{39c} | |
\x{178} | |
| | |
\x{80} | |
\x{ff} | |
\x{100} | |
\x{101} | |
** Failers | |
\x{102} | |
Y | |
y | |
| |
/[z-\x{100}]/8DZi | |
| |
/(?:[\PPa*]*){8,}/ | |
| |
/[\P{Any}]/BZ | |
| |
/[\P{Any}\E]/BZ | |
| |
/(\P{Yi}+\277)/ | |
| |
/(\P{Yi}+\277)?/ | |
| |
/(?<=\P{Yi}{3}A)X/ | |
| |
/\p{Yi}+(\P{Yi}+)(?1)/ | |
| |
/(\P{Yi}{2}\277)?/ | |
| |
/[\P{Yi}A]/ | |
| |
/[\P{Yi}\P{Yi}\P{Yi}A]/ | |
| |
/[^\P{Yi}A]/ | |
| |
/[^\P{Yi}\P{Yi}\P{Yi}A]/ | |
| |
/(\P{Yi}*\277)*/ | |
| |
/(\P{Yi}*?\277)*/ | |
| |
/(\p{Yi}*+\277)*/ | |
| |
/(\P{Yi}?\277)*/ | |
| |
/(\P{Yi}??\277)*/ | |
| |
/(\p{Yi}?+\277)*/ | |
| |
/(\P{Yi}{0,3}\277)*/ | |
| |
/(\P{Yi}{0,3}?\277)*/ | |
| |
/(\p{Yi}{0,3}+\277)*/ | |
| |
/\p{Zl}{2,3}+/8BZ | |
\xe2\x80\xa8\xe2\x80\xa8 | |
\x{2028}\x{2028}\x{2028} | |
| |
/\p{Zl}/8BZ | |
| |
/\p{Lu}{3}+/8BZ | |
| |
/\pL{2}+/8BZ | |
| |
/\p{Cc}{2}+/8BZ | |
| |
/^\p{Cs}/8 | |
\?\x{dfff} | |
** Failers | |
\x{09f} | |
| |
/^\p{Sc}+/8 | |
$\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} | |
\x{9f2} | |
** Failers | |
X | |
\x{2c2} | |
| |
/^\p{Zs}/8 | |
\ \ | |
\x{a0} | |
\x{1680} | |
\x{180e} | |
\x{2000} | |
\x{2001} | |
** Failers | |
\x{2028} | |
\x{200d} | |
| |
/-- These four are here rather than in test 6 because Perl has problems with | |
the negative versions of the properties. --/ | |
| |
/\p{^Lu}/8i | |
1234 | |
** Failers | |
ABC | |
| |
/\P{Lu}/8i | |
1234 | |
** Failers | |
ABC | |
| |
/\p{Ll}/8i | |
a | |
Az | |
** Failers | |
ABC | |
| |
/\p{Lu}/8i | |
A | |
a\x{10a0}B | |
** Failers | |
a | |
\x{1d00} | |
| |
/[\x{c0}\x{391}]/8i | |
\x{c0} | |
\x{e0} | |
| |
/-- The next two are special cases where the lengths of the different cases of | |
the same character differ. The first went wrong with heap frame storage; the | |
second was broken in all cases. --/ | |
| |
/^\x{023a}+?(\x{0130}+)/8i | |
\x{023a}\x{2c65}\x{0130} | |
| |
/^\x{023a}+([^X])/8i | |
\x{023a}\x{2c65}X | |
| |
/\x{c0}+\x{116}+/8i | |
\x{c0}\x{e0}\x{116}\x{117} | |
| |
/[\x{c0}\x{116}]+/8i | |
\x{c0}\x{e0}\x{116}\x{117} | |
| |
/(\x{de})\1/8i | |
\x{de}\x{de} | |
\x{de}\x{fe} | |
\x{fe}\x{fe} | |
\x{fe}\x{de} | |
| |
/^\x{c0}$/8i | |
\x{c0} | |
\x{e0} | |
| |
/^\x{e0}$/8i | |
\x{c0} | |
\x{e0} | |
| |
/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE | |
will match it only with UCP support, because without that it has no notion | |
of case for anything other than the ASCII letters. --/ | |
| |
/((?i)[\x{c0}])/8 | |
\x{c0} | |
\x{e0} | |
| |
/(?i:[\x{c0}])/8 | |
\x{c0} | |
\x{e0} | |
| |
/-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8 | |
| |
/^\X/8 | |
A | |
A\x{300}BC | |
A\x{300}\x{301}\x{302}BC | |
*** Failers | |
\x{300} | |
| |
/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/ | |
| |
/^\p{Xan}/8 | |
ABCD | |
1234 | |
\x{6ca} | |
\x{a6c} | |
\x{10a7} | |
** Failers | |
_ABC | |
| |
/^\p{Xan}+/8 | |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_ | |
** Failers | |
_ABC | |
| |
/^\p{Xan}+?/8 | |
\x{6ca}\x{a6c}\x{10a7}_ | |
| |
/^\p{Xan}*/8 | |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_ | |
| |
/^\p{Xan}{2,9}/8 | |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_ | |
| |
/^\p{Xan}{2,9}?/8 | |
\x{6ca}\x{a6c}\x{10a7}_ | |
| |
/^[\p{Xan}]/8 | |
ABCD1234_ | |
1234abcd_ | |
\x{6ca} | |
\x{a6c} | |
\x{10a7} | |
** Failers | |
_ABC | |
| |
/^[\p{Xan}]+/8 | |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_ | |
** Failers | |
_ABC | |
| |
/^>\p{Xsp}/8 | |
>\x{1680}\x{2028}\x{0b} | |
>\x{a0} | |
** Failers | |
\x{0b} | |
| |
/^>\p{Xsp}+/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^>\p{Xsp}+?/8 | |
>\x{1680}\x{2028}\x{0b} | |
| |
/^>\p{Xsp}*/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^>\p{Xsp}{2,9}/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^>\p{Xsp}{2,9}?/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^>[\p{Xsp}]/8 | |
>\x{2028}\x{0b} | |
| |
/^>[\p{Xsp}]+/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^>\p{Xps}/8 | |
>\x{1680}\x{2028}\x{0b} | |
>\x{a0} | |
** Failers | |
\x{0b} | |
| |
/^>\p{Xps}+/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^>\p{Xps}+?/8 | |
>\x{1680}\x{2028}\x{0b} | |
| |
/^>\p{Xps}*/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^>\p{Xps}{2,9}/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^>\p{Xps}{2,9}?/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^>[\p{Xps}]/8 | |
>\x{2028}\x{0b} | |
| |
/^>[\p{Xps}]+/8 | |
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} | |
| |
/^\p{Xwd}/8 | |
ABCD | |
1234 | |
\x{6ca} | |
\x{a6c} | |
\x{10a7} | |
_ABC | |
** Failers | |
[] | |
| |
/^\p{Xwd}+/8 | |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_ | |
| |
/^\p{Xwd}+?/8 | |
\x{6ca}\x{a6c}\x{10a7}_ | |
| |
/^\p{Xwd}*/8 | |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_ | |
| |
/^\p{Xwd}{2,9}/8 | |
A_B12\x{6ca}\x{a6c}\x{10a7} | |
| |
/^\p{Xwd}{2,9}?/8 | |
\x{6ca}\x{a6c}\x{10a7}_ | |
| |
/^[\p{Xwd}]/8 | |
ABCD1234_ | |
1234abcd_ | |
\x{6ca} | |
\x{a6c} | |
\x{10a7} | |
_ABC | |
** Failers | |
[] | |
| |
/^[\p{Xwd}]+/8 | |
ABCD1234\x{6ca}\x{a6c}\x{10a7}_ | |
| |
/-- A check not in UTF-8 mode --/ | |
| |
/^[\p{Xwd}]+/ | |
ABCD1234_ | |
| |
/-- Some negative checks --/ | |
| |
/^[\P{Xwd}]+/8 | |
!.+\x{019}\x{35a}AB | |
| |
/^[\p{^Xwd}]+/8 | |
!.+\x{019}\x{35a}AB | |
| |
/[\D]/WBZ8 | |
1\x{3c8}2 | |
| |
/[\d]/WBZ8 | |
>\x{6f4}< | |
| |
/[\S]/WBZ8 | |
\x{1680}\x{6f4}\x{1680} | |
| |
/[\s]/WBZ8 | |
>\x{1680}< | |
| |
/[\W]/WBZ8 | |
A\x{1712}B | |
| |
/[\w]/WBZ8 | |
>\x{1723}< | |
| |
/\D/WBZ8 | |
1\x{3c8}2 | |
| |
/\d/WBZ8 | |
>\x{6f4}< | |
| |
/\S/WBZ8 | |
\x{1680}\x{6f4}\x{1680} | |
| |
/\s/WBZ8 | |
>\x{1680}> | |
| |
/\W/WBZ8 | |
A\x{1712}B | |
| |
/\w/WBZ8 | |
>\x{1723}< | |
| |
/[[:alpha:]]/WBZ | |
| |
/[[:lower:]]/WBZ | |
| |
/[[:upper:]]/WBZ | |
| |
/[[:alnum:]]/WBZ | |
| |
/[[:ascii:]]/WBZ | |
| |
/[[:blank:]]/WBZ | |
| |
/[[:cntrl:]]/WBZ | |
| |
/[[:digit:]]/WBZ | |
| |
/[[:graph:]]/WBZ | |
| |
/[[:print:]]/WBZ | |
| |
/[[:punct:]]/WBZ | |
| |
/[[:space:]]/WBZ | |
| |
/[[:word:]]/WBZ | |
| |
/[[:xdigit:]]/WBZ | |
| |
/-- Unicode properties for \b abd \B --/ | |
| |
/\b...\B/8W | |
abc_ | |
\x{37e}abc\x{376} | |
\x{37e}\x{376}\x{371}\x{393}\x{394} | |
!\x{c0}++\x{c1}\x{c2} | |
!\x{c0}+++++ | |
| |
/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/ | |
| |
/\b...\B/8 | |
abc_ | |
** Failers | |
\x{37e}abc\x{376} | |
\x{37e}\x{376}\x{371}\x{393}\x{394} | |
!\x{c0}++\x{c1}\x{c2} | |
!\x{c0}+++++ | |
| |
/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/ | |
| |
/\b...\B/W | |
abc_ | |
!\x{c0}++\x{c1}\x{c2} | |
!\x{c0}+++++ | |
| |
/-- POSIX interface --/ | |
| |
/\w/P | |
+++\x{c2} | |
| |
/\w/WP | |
+++\x{c2} | |
| |
/-- Some of these are silly, but they check various combinations --/ | |
| |
/[[:^alpha:][:^cntrl:]]+/8WBZ | |
123 | |
abc | |
| |
/[[:^cntrl:][:^alpha:]]+/8WBZ | |
123 | |
abc | |
| |
/[[:alpha:]]+/8WBZ | |
abc | |
| |
/[[:^alpha:]\S]+/8WBZ | |
123 | |
abc | |
| |
/[^\d]+/8WBZ | |
abc123 | |
abc\x{123} | |
\x{660}abc | |
| |
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/8iSI | |
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} | |
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} | |
| |
/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ | |
| |
/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ | |
| |
/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ | |
| |
/\p{Han}+X\p{Greek}+\x{370}/BZ8 | |
| |
/\p{Xan}+!\p{Xan}+A/BZ | |
| |
/\p{Xsp}+!\p{Xsp}\t/BZ | |
| |
/\p{Xps}+!\p{Xps}\t/BZ | |
| |
/\p{Xwd}+!\p{Xwd}_/BZ | |
| |
/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ | |
| |
/-- These behaved oddly in Perl, so they are kept in this test --/ | |
| |
/(\x{23a}\x{23a}\x{23a})?\1/8i | |
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} | |
| |
/(ȺȺȺ)?\1/8i | |
ȺȺȺⱥⱥ | |
| |
/(\x{23a}\x{23a}\x{23a})?\1/8i | |
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} | |
| |
/(ȺȺȺ)?\1/8i | |
ȺȺȺⱥⱥⱥ | |
| |
/(\x{23a}\x{23a}\x{23a})\1/8i | |
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} | |
| |
/(ȺȺȺ)\1/8i | |
ȺȺȺⱥⱥ | |
| |
/(\x{23a}\x{23a}\x{23a})\1/8i | |
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} | |
| |
/(ȺȺȺ)\1/8i | |
ȺȺȺⱥⱥⱥ | |
| |
/(\x{2c65}\x{2c65})\1/8i | |
\x{2c65}\x{2c65}\x{23a}\x{23a} | |
| |
/(ⱥⱥ)\1/8i | |
ⱥⱥȺȺ | |
| |
/(\x{23a}\x{23a}\x{23a})\1Y/8i | |
X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ | |
| |
/(\x{2c65}\x{2c65})\1Y/8i | |
X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ | |
| |
/-- --/ | |
| |
/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/ | |
| |
/^[\p{Batak}]/8 | |
\x{1bc0} | |
\x{1bff} | |
** Failers | |
\x{1bf4} | |
| |
/^[\p{Brahmi}]/8 | |
\x{11000} | |
\x{1106f} | |
** Failers | |
\x{1104e} | |
| |
/^[\p{Mandaic}]/8 | |
\x{840} | |
\x{85e} | |
** Failers | |
\x{85c} | |
\x{85d} | |
| |
/-- --/ | |
| |
/(\X*)(.)/s8 | |
A\x{300} | |
| |
/^S(\X*)e(\X*)$/8 | |
Stéréo | |
| |
/^\X/8 | |
́réo | |
| |
/^a\X41z/<JS> | |
aX41z | |
*** Failers | |
aAz | |
| |
/(?<=ab\Cde)X/8 | |
|
|
/-- End of testinput13 --/ |
/-- End of testinput13 --/ |