File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / testdata / testinput7
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:50:25 2012 UTC (12 years, 4 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_30, HEAD
pcre

/-- These tests for Unicode property support test PCRE's API and show some of
    the compiled code. They are not Perl-compatible. --/

/[\p{L}]/DZ

/[\p{^L}]/DZ

/[\P{L}]/DZ

/[\P{^L}]/DZ

/[abc\p{L}\x{0660}]/8DZ

/[\p{Nd}]/8DZ
    1234

/[\p{Nd}+-]+/8DZ
    1234
    12-34
    12+\x{661}-34  
    ** Failers
    abcd  

/[\x{105}-\x{109}]/8iDZ
    \x{104}
    \x{105}
    \x{109}  
    ** Failers
    \x{100}
    \x{10a} 
    
/[z-\x{100}]/8iDZ 
    Z
    z
    \x{39c}
    \x{178}
    |
    \x{80}
    \x{ff}
    \x{100}
    \x{101} 
    ** Failers
    \x{102}
    Y
    y           

/[z-\x{100}]/8DZi

/(?:[\PPa*]*){8,}/

/[\P{Any}]/BZ

/[\P{Any}\E]/BZ

/(\P{Yi}+\277)/

/(\P{Yi}+\277)?/

/(?<=\P{Yi}{3}A)X/

/\p{Yi}+(\P{Yi}+)(?1)/

/(\P{Yi}{2}\277)?/

/[\P{Yi}A]/

/[\P{Yi}\P{Yi}\P{Yi}A]/

/[^\P{Yi}A]/

/[^\P{Yi}\P{Yi}\P{Yi}A]/

/(\P{Yi}*\277)*/

/(\P{Yi}*?\277)*/

/(\p{Yi}*+\277)*/

/(\P{Yi}?\277)*/

/(\P{Yi}??\277)*/

/(\p{Yi}?+\277)*/

/(\P{Yi}{0,3}\277)*/

/(\P{Yi}{0,3}?\277)*/

/(\p{Yi}{0,3}+\277)*/

/\p{Zl}{2,3}+/8BZ
    \xe2\x80\xa8\xe2\x80\xa8
    \x{2028}\x{2028}\x{2028}
    
/\p{Zl}/8BZ

/\p{Lu}{3}+/8BZ

/\pL{2}+/8BZ

/\p{Cc}{2}+/8BZ

/^\p{Cs}/8
    \?\x{dfff}
    ** Failers
    \x{09f} 
  
/^\p{Sc}+/8
    $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
    \x{9f2}
    ** Failers
    X
    \x{2c2}
  
/^\p{Zs}/8
    \ \
    \x{a0}
    \x{1680}
    \x{180e}
    \x{2000}
    \x{2001}     
    ** Failers
    \x{2028}
    \x{200d} 
  
/-- These four are here rather than in test 6 because Perl has problems with
    the negative versions of the properties. --/
      
/\p{^Lu}/8i
    1234
    ** Failers
    ABC 

/\P{Lu}/8i
    1234
    ** Failers
    ABC 

/\p{Ll}/8i 
    a
    Az
    ** Failers
    ABC   

/\p{Lu}/8i
    A
    a\x{10a0}B 
    ** Failers 
    a
    \x{1d00}  

/[\x{c0}\x{391}]/8i
    \x{c0}
    \x{e0} 

/-- The next two are special cases where the lengths of the different cases of
the same character differ. The first went wrong with heap frame storage; the
second was broken in all cases. --/

/^\x{023a}+?(\x{0130}+)/8i
  \x{023a}\x{2c65}\x{0130}
  
/^\x{023a}+([^X])/8i
  \x{023a}\x{2c65}X

/\x{c0}+\x{116}+/8i
    \x{c0}\x{e0}\x{116}\x{117}

/[\x{c0}\x{116}]+/8i
    \x{c0}\x{e0}\x{116}\x{117}

/(\x{de})\1/8i
    \x{de}\x{de}
    \x{de}\x{fe}
    \x{fe}\x{fe}
    \x{fe}\x{de}

/^\x{c0}$/8i
    \x{c0}
    \x{e0} 

/^\x{e0}$/8i
    \x{c0}
    \x{e0} 

/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
will match it only with UCP support, because without that it has no notion
of case for anything other than the ASCII letters. --/ 

/((?i)[\x{c0}])/8
    \x{c0}
    \x{e0} 

/(?i:[\x{c0}])/8
    \x{c0}
    \x{e0} 

/-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8
    
/^\X/8
    A
    A\x{300}BC 
    A\x{300}\x{301}\x{302}BC 
    *** Failers
    \x{300}  
    
/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/

/^\p{Xan}/8
    ABCD
    1234
    \x{6ca}
    \x{a6c}
    \x{10a7}   
    ** Failers
    _ABC   

/^\p{Xan}+/8
    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    ** Failers
    _ABC   

/^\p{Xan}+?/8
    \x{6ca}\x{a6c}\x{10a7}_

/^\p{Xan}*/8
    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    
/^\p{Xan}{2,9}/8
    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    
/^\p{Xan}{2,9}?/8
    \x{6ca}\x{a6c}\x{10a7}_
    
/^[\p{Xan}]/8
    ABCD1234_
    1234abcd_
    \x{6ca}
    \x{a6c}
    \x{10a7}   
    ** Failers
    _ABC   
 
/^[\p{Xan}]+/8
    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    ** Failers
    _ABC   

/^>\p{Xsp}/8
    >\x{1680}\x{2028}\x{0b}
    >\x{a0} 
    ** Failers
    \x{0b} 

/^>\p{Xsp}+/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}

/^>\p{Xsp}+?/8
    >\x{1680}\x{2028}\x{0b}

/^>\p{Xsp}*/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    
/^>\p{Xsp}{2,9}/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    
/^>\p{Xsp}{2,9}?/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    
/^>[\p{Xsp}]/8
    >\x{2028}\x{0b}
 
/^>[\p{Xsp}]+/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}

/^>\p{Xps}/8
    >\x{1680}\x{2028}\x{0b}
    >\x{a0} 
    ** Failers
    \x{0b} 

/^>\p{Xps}+/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}

/^>\p{Xps}+?/8
    >\x{1680}\x{2028}\x{0b}

/^>\p{Xps}*/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    
/^>\p{Xps}{2,9}/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    
/^>\p{Xps}{2,9}?/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    
/^>[\p{Xps}]/8
    >\x{2028}\x{0b}
 
/^>[\p{Xps}]+/8
    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}

/^\p{Xwd}/8
    ABCD
    1234
    \x{6ca}
    \x{a6c}
    \x{10a7}
    _ABC    
    ** Failers
    [] 

/^\p{Xwd}+/8
    ABCD1234\x{6ca}\x{a6c}\x{10a7}_

/^\p{Xwd}+?/8
    \x{6ca}\x{a6c}\x{10a7}_

/^\p{Xwd}*/8
    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    
/^\p{Xwd}{2,9}/8
    A_B12\x{6ca}\x{a6c}\x{10a7}
    
/^\p{Xwd}{2,9}?/8
    \x{6ca}\x{a6c}\x{10a7}_
    
/^[\p{Xwd}]/8
    ABCD1234_
    1234abcd_
    \x{6ca}
    \x{a6c}
    \x{10a7}   
    _ABC 
    ** Failers
    []   
 
/^[\p{Xwd}]+/8
    ABCD1234\x{6ca}\x{a6c}\x{10a7}_

/-- A check not in UTF-8 mode --/

/^[\p{Xwd}]+/
    ABCD1234_
    
/-- Some negative checks --/

/^[\P{Xwd}]+/8
    !.+\x{019}\x{35a}AB

/^[\p{^Xwd}]+/8
    !.+\x{019}\x{35a}AB

/[\D]/WBZ8
    1\x{3c8}2

/[\d]/WBZ8
    >\x{6f4}<

/[\S]/WBZ8
    \x{1680}\x{6f4}\x{1680}

/[\s]/WBZ8
    >\x{1680}<

/[\W]/WBZ8
    A\x{1712}B

/[\w]/WBZ8
    >\x{1723}<

/\D/WBZ8
    1\x{3c8}2

/\d/WBZ8
    >\x{6f4}<

/\S/WBZ8
    \x{1680}\x{6f4}\x{1680}

/\s/WBZ8
    >\x{1680}>

/\W/WBZ8
    A\x{1712}B

/\w/WBZ8
    >\x{1723}<

/[[:alpha:]]/WBZ

/[[:lower:]]/WBZ

/[[:upper:]]/WBZ

/[[:alnum:]]/WBZ

/[[:ascii:]]/WBZ

/[[:cntrl:]]/WBZ

/[[:digit:]]/WBZ

/[[:graph:]]/WBZ

/[[:print:]]/WBZ

/[[:punct:]]/WBZ

/[[:space:]]/WBZ

/[[:word:]]/WBZ

/[[:xdigit:]]/WBZ

/-- Unicode properties for \b abd \B --/

/\b...\B/8W
    abc_
    \x{37e}abc\x{376} 
    \x{37e}\x{376}\x{371}\x{393}\x{394} 
    !\x{c0}++\x{c1}\x{c2} 
    !\x{c0}+++++ 

/-- Without PCRE_UCP, non-ASCII always fail, even if < 256  --/

/\b...\B/8
    abc_
    ** Failers 
    \x{37e}abc\x{376} 
    \x{37e}\x{376}\x{371}\x{393}\x{394} 
    !\x{c0}++\x{c1}\x{c2} 
    !\x{c0}+++++ 

/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties  --/

/\b...\B/W
    abc_
    !\x{c0}++\x{c1}\x{c2} 
    !\x{c0}+++++ 

/-- Some of these are silly, but they check various combinations --/

/[[:^alpha:][:^cntrl:]]+/8WBZ
    123
    abc 

/[[:^cntrl:][:^alpha:]]+/8WBZ
    123
    abc 

/[[:alpha:]]+/8WBZ
    abc

/[[:^alpha:]\S]+/8WBZ
    123
    abc 

/[^\d]+/8WBZ
    abc123
    abc\x{123}
    \x{660}abc   

/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ

/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ

/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ

/\p{Han}+X\p{Greek}+\x{370}/BZ8

/\p{Xan}+!\p{Xan}+A/BZ

/\p{Xsp}+!\p{Xsp}\t/BZ

/\p{Xps}+!\p{Xps}\t/BZ

/\p{Xwd}+!\p{Xwd}_/BZ

/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ

/-- These behaved oddly in Perl, so they are kept in this test --/

/(\x{23a}\x{23a}\x{23a})?\1/8i
    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}

/(ȺȺȺ)?\1/8i
    ȺȺȺⱥⱥ

/(\x{23a}\x{23a}\x{23a})?\1/8i
    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}

/(ȺȺȺ)?\1/8i
    ȺȺȺⱥⱥⱥ

/(\x{23a}\x{23a}\x{23a})\1/8i
    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}

/(ȺȺȺ)\1/8i
    ȺȺȺⱥⱥ

/(\x{23a}\x{23a}\x{23a})\1/8i
    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}

/(ȺȺȺ)\1/8i
    ȺȺȺⱥⱥⱥ

/(\x{2c65}\x{2c65})\1/8i
    \x{2c65}\x{2c65}\x{23a}\x{23a}
    
/(ⱥⱥ)\1/8i
    ⱥⱥȺȺ 
    
/(\x{23a}\x{23a}\x{23a})\1Y/8i
    X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ

/(\x{2c65}\x{2c65})\1Y/8i
    X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ

/-- --/ 

/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/

/^[\p{Batak}]/8
    \x{1bc0}
    \x{1bff}
    ** Failers
    \x{1bf4}
    
/^[\p{Brahmi}]/8
    \x{11000}
    \x{1106f}
    ** Failers
    \x{1104e}
    
/^[\p{Mandaic}]/8
    \x{840}
    \x{85e}
    ** Failers
    \x{85c}
    \x{85d}    

/-- --/ 

/(\X*)(.)/s8
    A\x{300}

/^S(\X*)e(\X*)$/8
    Stéréo
    
/^\X/8 
    ́réo

/^a\X41z/<JS>
    aX41z
    *** Failers
    aAz

/(?<=ab\Cde)X/8

/-- End of testinput7 --/

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>