Annotation of embedaddon/pcre/pcre_newline.c, revision 1.1.1.1
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2009 University of Cambridge
10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains internal functions for testing newlines when more than
42: one kind of newline is to be recognized. When a newline is found, its length is
43: returned. In principle, we could implement several newline "types", each
44: referring to a different set of newline characters. At present, PCRE supports
45: only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
46: and NLTYPE_ANY. The full list of Unicode newline characters is taken from
47: http://unicode.org/unicode/reports/tr18/. */
48:
49:
50: #ifdef HAVE_CONFIG_H
51: #include "config.h"
52: #endif
53:
54: #include "pcre_internal.h"
55:
56:
57:
58: /*************************************************
59: * Check for newline at given position *
60: *************************************************/
61:
62: /* It is guaranteed that the initial value of ptr is less than the end of the
63: string that is being processed.
64:
65: Arguments:
66: ptr pointer to possible newline
67: type the newline type
68: endptr pointer to the end of the string
69: lenptr where to return the length
70: utf8 TRUE if in utf8 mode
71:
72: Returns: TRUE or FALSE
73: */
74:
75: BOOL
76: _pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8)
77: {
78: int c;
79: if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
80:
81: if (type == NLTYPE_ANYCRLF) switch(c)
82: {
83: case 0x000a: *lenptr = 1; return TRUE; /* LF */
84: case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
85: return TRUE; /* CR */
86: default: return FALSE;
87: }
88:
89: /* NLTYPE_ANY */
90:
91: else switch(c)
92: {
93: case 0x000a: /* LF */
94: case 0x000b: /* VT */
95: case 0x000c: *lenptr = 1; return TRUE; /* FF */
96: case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
97: return TRUE; /* CR */
98: case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
99: case 0x2028: /* LS */
100: case 0x2029: *lenptr = 3; return TRUE; /* PS */
101: default: return FALSE;
102: }
103: }
104:
105:
106:
107: /*************************************************
108: * Check for newline at previous position *
109: *************************************************/
110:
111: /* It is guaranteed that the initial value of ptr is greater than the start of
112: the string that is being processed.
113:
114: Arguments:
115: ptr pointer to possible newline
116: type the newline type
117: startptr pointer to the start of the string
118: lenptr where to return the length
119: utf8 TRUE if in utf8 mode
120:
121: Returns: TRUE or FALSE
122: */
123:
124: BOOL
125: _pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8)
126: {
127: int c;
128: ptr--;
129: #ifdef SUPPORT_UTF8
130: if (utf8)
131: {
132: BACKCHAR(ptr);
133: GETCHAR(c, ptr);
134: }
135: else c = *ptr;
136: #else /* no UTF-8 support */
137: c = *ptr;
138: #endif /* SUPPORT_UTF8 */
139:
140: if (type == NLTYPE_ANYCRLF) switch(c)
141: {
142: case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
143: return TRUE; /* LF */
144: case 0x000d: *lenptr = 1; return TRUE; /* CR */
145: default: return FALSE;
146: }
147:
148: else switch(c)
149: {
150: case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
151: return TRUE; /* LF */
152: case 0x000b: /* VT */
153: case 0x000c: /* FF */
154: case 0x000d: *lenptr = 1; return TRUE; /* CR */
155: case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
156: case 0x2028: /* LS */
157: case 0x2029: *lenptr = 3; return TRUE; /* PS */
158: default: return FALSE;
159: }
160: }
161:
162: /* End of pcre_newline.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>