1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2012 University of Cambridge
10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains internal functions for testing newlines when more than
42: one kind of newline is to be recognized. When a newline is found, its length is
43: returned. In principle, we could implement several newline "types", each
44: referring to a different set of newline characters. At present, PCRE supports
45: only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
46: and NLTYPE_ANY. The full list of Unicode newline characters is taken from
47: http://unicode.org/unicode/reports/tr18/. */
48:
49:
50: #ifdef HAVE_CONFIG_H
51: #include "config.h"
52: #endif
53:
54: #include "pcre_internal.h"
55:
56:
57:
58: /*************************************************
59: * Check for newline at given position *
60: *************************************************/
61:
62: /* It is guaranteed that the initial value of ptr is less than the end of the
63: string that is being processed.
64:
65: Arguments:
66: ptr pointer to possible newline
67: type the newline type
68: endptr pointer to the end of the string
69: lenptr where to return the length
70: utf TRUE if in utf mode
71:
72: Returns: TRUE or FALSE
73: */
74:
75: BOOL
76: PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
77: BOOL utf)
78: {
79: pcre_uint32 c;
80: (void)utf;
81: #ifdef SUPPORT_UTF
82: if (utf)
83: {
84: GETCHAR(c, ptr);
85: }
86: else
87: #endif /* SUPPORT_UTF */
88: c = *ptr;
89:
90: /* Note that this function is called only for ANY or ANYCRLF. */
91:
92: if (type == NLTYPE_ANYCRLF) switch(c)
93: {
94: case CHAR_LF: *lenptr = 1; return TRUE;
95: case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
96: return TRUE;
97: default: return FALSE;
98: }
99:
100: /* NLTYPE_ANY */
101:
102: else switch(c)
103: {
104: #ifdef EBCDIC
105: case CHAR_NEL:
106: #endif
107: case CHAR_LF:
108: case CHAR_VT:
109: case CHAR_FF: *lenptr = 1; return TRUE;
110:
111: case CHAR_CR:
112: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
113: return TRUE;
114:
115: #ifndef EBCDIC
116: #ifdef COMPILE_PCRE8
117: case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
118: case 0x2028: /* LS */
119: case 0x2029: *lenptr = 3; return TRUE; /* PS */
120: #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
121: case CHAR_NEL:
122: case 0x2028: /* LS */
123: case 0x2029: *lenptr = 1; return TRUE; /* PS */
124: #endif /* COMPILE_PCRE8 */
125: #endif /* Not EBCDIC */
126:
127: default: return FALSE;
128: }
129: }
130:
131:
132:
133: /*************************************************
134: * Check for newline at previous position *
135: *************************************************/
136:
137: /* It is guaranteed that the initial value of ptr is greater than the start of
138: the string that is being processed.
139:
140: Arguments:
141: ptr pointer to possible newline
142: type the newline type
143: startptr pointer to the start of the string
144: lenptr where to return the length
145: utf TRUE if in utf mode
146:
147: Returns: TRUE or FALSE
148: */
149:
150: BOOL
151: PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
152: BOOL utf)
153: {
154: pcre_uint32 c;
155: (void)utf;
156: ptr--;
157: #ifdef SUPPORT_UTF
158: if (utf)
159: {
160: BACKCHAR(ptr);
161: GETCHAR(c, ptr);
162: }
163: else
164: #endif /* SUPPORT_UTF */
165: c = *ptr;
166:
167: /* Note that this function is called only for ANY or ANYCRLF. */
168:
169: if (type == NLTYPE_ANYCRLF) switch(c)
170: {
171: case CHAR_LF:
172: *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
173: return TRUE;
174:
175: case CHAR_CR: *lenptr = 1; return TRUE;
176: default: return FALSE;
177: }
178:
179: /* NLTYPE_ANY */
180:
181: else switch(c)
182: {
183: case CHAR_LF:
184: *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
185: return TRUE;
186:
187: #ifdef EBCDIC
188: case CHAR_NEL:
189: #endif
190: case CHAR_VT:
191: case CHAR_FF:
192: case CHAR_CR: *lenptr = 1; return TRUE;
193:
194: #ifndef EBCDIC
195: #ifdef COMPILE_PCRE8
196: case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
197: case 0x2028: /* LS */
198: case 0x2029: *lenptr = 3; return TRUE; /* PS */
199: #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
200: case CHAR_NEL:
201: case 0x2028: /* LS */
202: case 0x2029: *lenptr = 1; return TRUE; /* PS */
203: #endif /* COMPILE_PCRE8 */
204: #endif /* NotEBCDIC */
205:
206: default: return FALSE;
207: }
208: }
209:
210: /* End of pcre_newline.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>