Annotation of embedaddon/pcre/testdata/testinput17, revision 1.1.1.2
1.1 misho 1: /-- This set of tests is for the 16-bit library's basic (non-UTF-16) features
2: that are not compatible with the 8-bit library, or which give different
3: output in 16-bit mode. --/
4:
5: /a\Cb/
6: aXb
7: a\nb
8:
9: /-- Check maximum non-UTF character size --/
10:
11: /\x{ffff}/
12: A\x{ffff}B
13:
14: /\x{10000}/
15:
16: /[^\x{c4}]/DZ
17:
18:
19: /\x{100}/I
20:
21: / (?: [\040\t] | \(
22: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
23: \) )* # optional leading comment
24: (?: (?:
25: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
26: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
27: |
28: " (?: # opening quote...
29: [^\\\x80-\xff\n\015"] # Anything except backslash and quote
30: | # or
31: \\ [^\x80-\xff] # Escaped something (something != CR)
32: )* " # closing quote
33: ) # initial word
34: (?: (?: [\040\t] | \(
35: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
36: \) )* \. (?: [\040\t] | \(
37: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
38: \) )* (?:
39: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
40: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
41: |
42: " (?: # opening quote...
43: [^\\\x80-\xff\n\015"] # Anything except backslash and quote
44: | # or
45: \\ [^\x80-\xff] # Escaped something (something != CR)
46: )* " # closing quote
47: ) )* # further okay, if led by a period
48: (?: [\040\t] | \(
49: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
50: \) )* @ (?: [\040\t] | \(
51: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
52: \) )* (?:
53: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
54: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
55: | \[ # [
56: (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
57: \] # ]
58: ) # initial subdomain
59: (?: #
60: (?: [\040\t] | \(
61: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
62: \) )* \. # if led by a period...
63: (?: [\040\t] | \(
64: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
65: \) )* (?:
66: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
67: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
68: | \[ # [
69: (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
70: \] # ]
71: ) # ...further okay
72: )*
73: # address
74: | # or
75: (?:
76: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
77: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
78: |
79: " (?: # opening quote...
80: [^\\\x80-\xff\n\015"] # Anything except backslash and quote
81: | # or
82: \\ [^\x80-\xff] # Escaped something (something != CR)
83: )* " # closing quote
84: ) # one word, optionally followed by....
85: (?:
86: [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
87: \(
88: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
89: \) | # comments, or...
90:
91: " (?: # opening quote...
92: [^\\\x80-\xff\n\015"] # Anything except backslash and quote
93: | # or
94: \\ [^\x80-\xff] # Escaped something (something != CR)
95: )* " # closing quote
96: # quoted strings
97: )*
98: < (?: [\040\t] | \(
99: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
100: \) )* # leading <
101: (?: @ (?: [\040\t] | \(
102: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
103: \) )* (?:
104: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
105: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
106: | \[ # [
107: (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
108: \] # ]
109: ) # initial subdomain
110: (?: #
111: (?: [\040\t] | \(
112: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
113: \) )* \. # if led by a period...
114: (?: [\040\t] | \(
115: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
116: \) )* (?:
117: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
118: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
119: | \[ # [
120: (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
121: \] # ]
122: ) # ...further okay
123: )*
124:
125: (?: (?: [\040\t] | \(
126: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
127: \) )* , (?: [\040\t] | \(
128: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
129: \) )* @ (?: [\040\t] | \(
130: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
131: \) )* (?:
132: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
133: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
134: | \[ # [
135: (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
136: \] # ]
137: ) # initial subdomain
138: (?: #
139: (?: [\040\t] | \(
140: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
141: \) )* \. # if led by a period...
142: (?: [\040\t] | \(
143: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
144: \) )* (?:
145: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
146: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
147: | \[ # [
148: (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
149: \] # ]
150: ) # ...further okay
151: )*
152: )* # further okay, if led by comma
153: : # closing colon
154: (?: [\040\t] | \(
155: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
156: \) )* )? # optional route
157: (?:
158: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
159: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
160: |
161: " (?: # opening quote...
162: [^\\\x80-\xff\n\015"] # Anything except backslash and quote
163: | # or
164: \\ [^\x80-\xff] # Escaped something (something != CR)
165: )* " # closing quote
166: ) # initial word
167: (?: (?: [\040\t] | \(
168: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
169: \) )* \. (?: [\040\t] | \(
170: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
171: \) )* (?:
172: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
173: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
174: |
175: " (?: # opening quote...
176: [^\\\x80-\xff\n\015"] # Anything except backslash and quote
177: | # or
178: \\ [^\x80-\xff] # Escaped something (something != CR)
179: )* " # closing quote
180: ) )* # further okay, if led by a period
181: (?: [\040\t] | \(
182: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
183: \) )* @ (?: [\040\t] | \(
184: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
185: \) )* (?:
186: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
187: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
188: | \[ # [
189: (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
190: \] # ]
191: ) # initial subdomain
192: (?: #
193: (?: [\040\t] | \(
194: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
195: \) )* \. # if led by a period...
196: (?: [\040\t] | \(
197: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
198: \) )* (?:
199: [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
200: (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
201: | \[ # [
202: (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
203: \] # ]
204: ) # ...further okay
205: )*
206: # address spec
207: (?: [\040\t] | \(
208: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
209: \) )* > # trailing >
210: # name and address
211: ) (?: [\040\t] | \(
212: (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
213: \) )* # optional trailing comment
214: /xSI
215:
216: /[\h]/BZ
217: >\x09<
218:
219: /[\h]+/BZ
220: >\x09\x20\xa0<
221:
222: /[\v]/BZ
223:
224: /[\H]/BZ
225:
226: /[^\h]/BZ
227:
228: /[\V]/BZ
229:
230: /[\x0a\V]/BZ
231:
232: /\h+/SI
233: \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
234: \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
235:
236: /[\h\x{dc00}]+/BZSI
237: \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
238: \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
239:
240: /\H+/SI
241: \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
242: \x{2000}\x{200a}\x{1fff}\x{200b}
243: \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
244: \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
245:
246: /[\H\x{d800}]+/BZSI
247: \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
248: \x{2000}\x{200a}\x{1fff}\x{200b}
249: \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
250: \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
251:
252: /\v+/SI
253: \x{2027}\x{2030}\x{2028}\x{2029}
254: \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
255:
256: /[\v\x{dc00}]+/BZSI
257: \x{2027}\x{2030}\x{2028}\x{2029}
258: \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
259:
260: /\V+/SI
261: \x{2028}\x{2029}\x{2027}\x{2030}
262: \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
263:
264: /[\V\x{d800}]+/BZSI
265: \x{2028}\x{2029}\x{2027}\x{2030}
266: \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
267:
268: /\R+/SI<bsr_unicode>
269: \x{2027}\x{2030}\x{2028}\x{2029}
270: \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
271:
272: /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
273: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
274:
1.1.1.2 ! misho 275: /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZ
! 276:
! 277: /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZi
! 278:
! 279: /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZ
! 280:
! 281: /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZi
! 282:
! 283: /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K
! 284: XX
! 285:
! 286: /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
! 287: XX
! 288:
! 289: /\u0100/<JS>BZ
! 290:
! 291: /[\u0100-\u0200]/<JS>BZ
! 292:
! 293: /\ud800/<JS>BZ
! 294:
1.1 misho 295: /-- End of testinput17 --/
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>