File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / testdata / testinput17
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:50:25 2012 UTC (12 years, 4 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_30, HEAD
pcre

/-- This set of tests is for the 16-bit library's basic (non-UTF-16) features 
    that are not compatible with the 8-bit library, or which give different 
    output in 16-bit mode. --/

/a\Cb/
    aXb
    a\nb
  
/-- Check maximum non-UTF character size --/

/\x{ffff}/
    A\x{ffff}B

/\x{10000}/ 

/[^\x{c4}]/DZ

  
/\x{100}/I

/  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*                          # optional leading comment
(?:    (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?:                      # opening quote...
[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
|                     #    or
\\ [^\x80-\xff]           #   Escaped something (something != CR)
)* "  # closing quote
)                    # initial word
(?:  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  \.  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*   (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?:                      # opening quote...
[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
|                     #    or
\\ [^\x80-\xff]           #   Escaped something (something != CR)
)* "  # closing quote
)  )* # further okay, if led by a period
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  @  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*    (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|   \[                         # [
(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
\]                        #           ]
)                           # initial subdomain
(?:                                  #
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  \.                        # if led by a period...
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*   (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|   \[                         # [
(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
\]                        #           ]
)                     #   ...further okay
)*
# address
|                     #  or
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?:                      # opening quote...
[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
|                     #    or
\\ [^\x80-\xff]           #   Escaped something (something != CR)
)* "  # closing quote
)             # one word, optionally followed by....
(?:
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]  |  # atom and space parts, or...
\(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)       |  # comments, or...

" (?:                      # opening quote...
[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
|                     #    or
\\ [^\x80-\xff]           #   Escaped something (something != CR)
)* "  # closing quote
# quoted strings
)*
<  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*                     # leading <
(?:  @  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*    (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|   \[                         # [
(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
\]                        #           ]
)                           # initial subdomain
(?:                                  #
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  \.                        # if led by a period...
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*   (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|   \[                         # [
(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
\]                        #           ]
)                     #   ...further okay
)*

(?:  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  ,  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  @  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*    (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|   \[                         # [
(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
\]                        #           ]
)                           # initial subdomain
(?:                                  #
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  \.                        # if led by a period...
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*   (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|   \[                         # [
(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
\]                        #           ]
)                     #   ...further okay
)*
)* # further okay, if led by comma
:                                # closing colon
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  )? #       optional route
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?:                      # opening quote...
[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
|                     #    or
\\ [^\x80-\xff]           #   Escaped something (something != CR)
)* "  # closing quote
)                    # initial word
(?:  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  \.  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*   (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?:                      # opening quote...
[^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
|                     #    or
\\ [^\x80-\xff]           #   Escaped something (something != CR)
)* "  # closing quote
)  )* # further okay, if led by a period
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  @  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*    (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|   \[                         # [
(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
\]                        #           ]
)                           # initial subdomain
(?:                                  #
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  \.                        # if led by a period...
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*   (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|   \[                         # [
(?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
\]                        #           ]
)                     #   ...further okay
)*
#       address spec
(?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*  > #                  trailing >
# name and address
)  (?: [\040\t] |  \(
(?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
\)  )*                       # optional trailing comment
/xSI

/[\h]/BZ
    >\x09<

/[\h]+/BZ
    >\x09\x20\xa0<

/[\v]/BZ

/[\H]/BZ

/[^\h]/BZ

/[\V]/BZ

/[\x0a\V]/BZ

/\h+/SI
    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
    \x{3001}\x{2fff}\x{200a}\xa0\x{2000}

/[\h\x{dc00}]+/BZSI
    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
    \x{3001}\x{2fff}\x{200a}\xa0\x{2000}

/\H+/SI
    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
    \x{2000}\x{200a}\x{1fff}\x{200b}
    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
    \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}

/[\H\x{d800}]+/BZSI
    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
    \x{2000}\x{200a}\x{1fff}\x{200b}
    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
    \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}

/\v+/SI
    \x{2027}\x{2030}\x{2028}\x{2029}
    \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d

/[\v\x{dc00}]+/BZSI
    \x{2027}\x{2030}\x{2028}\x{2029}
    \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d

/\V+/SI
    \x{2028}\x{2029}\x{2027}\x{2030}
    \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86

/[\V\x{d800}]+/BZSI
    \x{2028}\x{2029}\x{2027}\x{2030}
    \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86

/\R+/SI<bsr_unicode>
    \x{2027}\x{2030}\x{2028}\x{2029}
    \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d

/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
    \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}

/-- End of testinput17 --/

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>