Annotation of embedaddon/pcre/testdata/testinput17, revision 1.1.1.2

1.1       misho       1: /-- This set of tests is for the 16-bit library's basic (non-UTF-16) features 
                      2:     that are not compatible with the 8-bit library, or which give different 
                      3:     output in 16-bit mode. --/
                      4: 
                      5: /a\Cb/
                      6:     aXb
                      7:     a\nb
                      8:   
                      9: /-- Check maximum non-UTF character size --/
                     10: 
                     11: /\x{ffff}/
                     12:     A\x{ffff}B
                     13: 
                     14: /\x{10000}/ 
                     15: 
                     16: /[^\x{c4}]/DZ
                     17: 
                     18:   
                     19: /\x{100}/I
                     20: 
                     21: /  (?: [\040\t] |  \(
                     22: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                     23: \)  )*                          # optional leading comment
                     24: (?:    (?:
                     25: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                     26: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                     27: |
                     28: " (?:                      # opening quote...
                     29: [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
                     30: |                     #    or
                     31: \\ [^\x80-\xff]           #   Escaped something (something != CR)
                     32: )* "  # closing quote
                     33: )                    # initial word
                     34: (?:  (?: [\040\t] |  \(
                     35: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                     36: \)  )*  \.  (?: [\040\t] |  \(
                     37: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                     38: \)  )*   (?:
                     39: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                     40: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                     41: |
                     42: " (?:                      # opening quote...
                     43: [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
                     44: |                     #    or
                     45: \\ [^\x80-\xff]           #   Escaped something (something != CR)
                     46: )* "  # closing quote
                     47: )  )* # further okay, if led by a period
                     48: (?: [\040\t] |  \(
                     49: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                     50: \)  )*  @  (?: [\040\t] |  \(
                     51: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                     52: \)  )*    (?:
                     53: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                     54: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                     55: |   \[                         # [
                     56: (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
                     57: \]                        #           ]
                     58: )                           # initial subdomain
                     59: (?:                                  #
                     60: (?: [\040\t] |  \(
                     61: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                     62: \)  )*  \.                        # if led by a period...
                     63: (?: [\040\t] |  \(
                     64: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                     65: \)  )*   (?:
                     66: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                     67: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                     68: |   \[                         # [
                     69: (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
                     70: \]                        #           ]
                     71: )                     #   ...further okay
                     72: )*
                     73: # address
                     74: |                     #  or
                     75: (?:
                     76: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                     77: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                     78: |
                     79: " (?:                      # opening quote...
                     80: [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
                     81: |                     #    or
                     82: \\ [^\x80-\xff]           #   Escaped something (something != CR)
                     83: )* "  # closing quote
                     84: )             # one word, optionally followed by....
                     85: (?:
                     86: [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]  |  # atom and space parts, or...
                     87: \(
                     88: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                     89: \)       |  # comments, or...
                     90: 
                     91: " (?:                      # opening quote...
                     92: [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
                     93: |                     #    or
                     94: \\ [^\x80-\xff]           #   Escaped something (something != CR)
                     95: )* "  # closing quote
                     96: # quoted strings
                     97: )*
                     98: <  (?: [\040\t] |  \(
                     99: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    100: \)  )*                     # leading <
                    101: (?:  @  (?: [\040\t] |  \(
                    102: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    103: \)  )*    (?:
                    104: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                    105: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                    106: |   \[                         # [
                    107: (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
                    108: \]                        #           ]
                    109: )                           # initial subdomain
                    110: (?:                                  #
                    111: (?: [\040\t] |  \(
                    112: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    113: \)  )*  \.                        # if led by a period...
                    114: (?: [\040\t] |  \(
                    115: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    116: \)  )*   (?:
                    117: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                    118: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                    119: |   \[                         # [
                    120: (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
                    121: \]                        #           ]
                    122: )                     #   ...further okay
                    123: )*
                    124: 
                    125: (?:  (?: [\040\t] |  \(
                    126: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    127: \)  )*  ,  (?: [\040\t] |  \(
                    128: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    129: \)  )*  @  (?: [\040\t] |  \(
                    130: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    131: \)  )*    (?:
                    132: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                    133: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                    134: |   \[                         # [
                    135: (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
                    136: \]                        #           ]
                    137: )                           # initial subdomain
                    138: (?:                                  #
                    139: (?: [\040\t] |  \(
                    140: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    141: \)  )*  \.                        # if led by a period...
                    142: (?: [\040\t] |  \(
                    143: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    144: \)  )*   (?:
                    145: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                    146: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                    147: |   \[                         # [
                    148: (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
                    149: \]                        #           ]
                    150: )                     #   ...further okay
                    151: )*
                    152: )* # further okay, if led by comma
                    153: :                                # closing colon
                    154: (?: [\040\t] |  \(
                    155: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    156: \)  )*  )? #       optional route
                    157: (?:
                    158: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                    159: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                    160: |
                    161: " (?:                      # opening quote...
                    162: [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
                    163: |                     #    or
                    164: \\ [^\x80-\xff]           #   Escaped something (something != CR)
                    165: )* "  # closing quote
                    166: )                    # initial word
                    167: (?:  (?: [\040\t] |  \(
                    168: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    169: \)  )*  \.  (?: [\040\t] |  \(
                    170: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    171: \)  )*   (?:
                    172: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                    173: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                    174: |
                    175: " (?:                      # opening quote...
                    176: [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
                    177: |                     #    or
                    178: \\ [^\x80-\xff]           #   Escaped something (something != CR)
                    179: )* "  # closing quote
                    180: )  )* # further okay, if led by a period
                    181: (?: [\040\t] |  \(
                    182: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    183: \)  )*  @  (?: [\040\t] |  \(
                    184: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    185: \)  )*    (?:
                    186: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                    187: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                    188: |   \[                         # [
                    189: (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
                    190: \]                        #           ]
                    191: )                           # initial subdomain
                    192: (?:                                  #
                    193: (?: [\040\t] |  \(
                    194: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    195: \)  )*  \.                        # if led by a period...
                    196: (?: [\040\t] |  \(
                    197: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    198: \)  )*   (?:
                    199: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
                    200: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
                    201: |   \[                         # [
                    202: (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
                    203: \]                        #           ]
                    204: )                     #   ...further okay
                    205: )*
                    206: #       address spec
                    207: (?: [\040\t] |  \(
                    208: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    209: \)  )*  > #                  trailing >
                    210: # name and address
                    211: )  (?: [\040\t] |  \(
                    212: (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
                    213: \)  )*                       # optional trailing comment
                    214: /xSI
                    215: 
                    216: /[\h]/BZ
                    217:     >\x09<
                    218: 
                    219: /[\h]+/BZ
                    220:     >\x09\x20\xa0<
                    221: 
                    222: /[\v]/BZ
                    223: 
                    224: /[\H]/BZ
                    225: 
                    226: /[^\h]/BZ
                    227: 
                    228: /[\V]/BZ
                    229: 
                    230: /[\x0a\V]/BZ
                    231: 
                    232: /\h+/SI
                    233:     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
                    234:     \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
                    235: 
                    236: /[\h\x{dc00}]+/BZSI
                    237:     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
                    238:     \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
                    239: 
                    240: /\H+/SI
                    241:     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
                    242:     \x{2000}\x{200a}\x{1fff}\x{200b}
                    243:     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
                    244:     \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
                    245: 
                    246: /[\H\x{d800}]+/BZSI
                    247:     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
                    248:     \x{2000}\x{200a}\x{1fff}\x{200b}
                    249:     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
                    250:     \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
                    251: 
                    252: /\v+/SI
                    253:     \x{2027}\x{2030}\x{2028}\x{2029}
                    254:     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
                    255: 
                    256: /[\v\x{dc00}]+/BZSI
                    257:     \x{2027}\x{2030}\x{2028}\x{2029}
                    258:     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
                    259: 
                    260: /\V+/SI
                    261:     \x{2028}\x{2029}\x{2027}\x{2030}
                    262:     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
                    263: 
                    264: /[\V\x{d800}]+/BZSI
                    265:     \x{2028}\x{2029}\x{2027}\x{2030}
                    266:     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
                    267: 
                    268: /\R+/SI<bsr_unicode>
                    269:     \x{2027}\x{2030}\x{2028}\x{2029}
                    270:     \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
                    271: 
                    272: /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
                    273:     \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
                    274: 
1.1.1.2 ! misho     275: /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZ
        !           276: 
        !           277: /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZi
        !           278: 
        !           279: /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZ
        !           280: 
        !           281: /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZi
        !           282: 
        !           283: /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K
        !           284:     XX
        !           285:      
        !           286: /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
        !           287:     XX
        !           288: 
        !           289: /\u0100/<JS>BZ
        !           290: 
        !           291: /[\u0100-\u0200]/<JS>BZ
        !           292: 
        !           293: /\ud800/<JS>BZ
        !           294: 
1.1       misho     295: /-- End of testinput17 --/

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>