Annotation of embedaddon/pcre/pcre_newline.c, revision 1.1.1.3
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
1.1.1.2 misho 9: Copyright (c) 1997-2012 University of Cambridge
1.1 misho 10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains internal functions for testing newlines when more than
42: one kind of newline is to be recognized. When a newline is found, its length is
43: returned. In principle, we could implement several newline "types", each
44: referring to a different set of newline characters. At present, PCRE supports
45: only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
46: and NLTYPE_ANY. The full list of Unicode newline characters is taken from
47: http://unicode.org/unicode/reports/tr18/. */
48:
49:
50: #ifdef HAVE_CONFIG_H
51: #include "config.h"
52: #endif
53:
54: #include "pcre_internal.h"
55:
56:
57:
58: /*************************************************
59: * Check for newline at given position *
60: *************************************************/
61:
62: /* It is guaranteed that the initial value of ptr is less than the end of the
63: string that is being processed.
64:
65: Arguments:
66: ptr pointer to possible newline
67: type the newline type
68: endptr pointer to the end of the string
69: lenptr where to return the length
1.1.1.2 misho 70: utf TRUE if in utf mode
1.1 misho 71:
72: Returns: TRUE or FALSE
73: */
74:
75: BOOL
1.1.1.2 misho 76: PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
77: BOOL utf)
1.1 misho 78: {
1.1.1.3 ! misho 79: pcre_uint32 c;
1.1.1.2 misho 80: (void)utf;
81: #ifdef SUPPORT_UTF
82: if (utf)
83: {
84: GETCHAR(c, ptr);
85: }
86: else
87: #endif /* SUPPORT_UTF */
88: c = *ptr;
1.1 misho 89:
1.1.1.3 ! misho 90: /* Note that this function is called only for ANY or ANYCRLF. */
! 91:
1.1 misho 92: if (type == NLTYPE_ANYCRLF) switch(c)
93: {
1.1.1.3 ! misho 94: case CHAR_LF: *lenptr = 1; return TRUE;
! 95: case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
! 96: return TRUE;
1.1 misho 97: default: return FALSE;
98: }
99:
100: /* NLTYPE_ANY */
101:
102: else switch(c)
103: {
1.1.1.3 ! misho 104: #ifdef EBCDIC
! 105: case CHAR_NEL:
! 106: #endif
! 107: case CHAR_LF:
! 108: case CHAR_VT:
! 109: case CHAR_FF: *lenptr = 1; return TRUE;
! 110:
! 111: case CHAR_CR:
! 112: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
! 113: return TRUE;
! 114:
! 115: #ifndef EBCDIC
1.1.1.2 misho 116: #ifdef COMPILE_PCRE8
1.1.1.3 ! misho 117: case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
1.1 misho 118: case 0x2028: /* LS */
119: case 0x2029: *lenptr = 3; return TRUE; /* PS */
1.1.1.3 ! misho 120: #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
! 121: case CHAR_NEL:
1.1.1.2 misho 122: case 0x2028: /* LS */
123: case 0x2029: *lenptr = 1; return TRUE; /* PS */
1.1.1.3 ! misho 124: #endif /* COMPILE_PCRE8 */
! 125: #endif /* Not EBCDIC */
! 126:
1.1 misho 127: default: return FALSE;
128: }
129: }
130:
131:
132:
133: /*************************************************
134: * Check for newline at previous position *
135: *************************************************/
136:
137: /* It is guaranteed that the initial value of ptr is greater than the start of
138: the string that is being processed.
139:
140: Arguments:
141: ptr pointer to possible newline
142: type the newline type
143: startptr pointer to the start of the string
144: lenptr where to return the length
1.1.1.2 misho 145: utf TRUE if in utf mode
1.1 misho 146:
147: Returns: TRUE or FALSE
148: */
149:
150: BOOL
1.1.1.2 misho 151: PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
152: BOOL utf)
1.1 misho 153: {
1.1.1.3 ! misho 154: pcre_uint32 c;
1.1.1.2 misho 155: (void)utf;
1.1 misho 156: ptr--;
1.1.1.2 misho 157: #ifdef SUPPORT_UTF
158: if (utf)
1.1 misho 159: {
160: BACKCHAR(ptr);
161: GETCHAR(c, ptr);
162: }
1.1.1.2 misho 163: else
164: #endif /* SUPPORT_UTF */
165: c = *ptr;
1.1 misho 166:
1.1.1.3 ! misho 167: /* Note that this function is called only for ANY or ANYCRLF. */
! 168:
1.1 misho 169: if (type == NLTYPE_ANYCRLF) switch(c)
170: {
1.1.1.3 ! misho 171: case CHAR_LF:
! 172: *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
! 173: return TRUE;
! 174:
! 175: case CHAR_CR: *lenptr = 1; return TRUE;
1.1 misho 176: default: return FALSE;
177: }
178:
1.1.1.3 ! misho 179: /* NLTYPE_ANY */
! 180:
1.1 misho 181: else switch(c)
182: {
1.1.1.3 ! misho 183: case CHAR_LF:
! 184: *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
! 185: return TRUE;
! 186:
! 187: #ifdef EBCDIC
! 188: case CHAR_NEL:
! 189: #endif
! 190: case CHAR_VT:
! 191: case CHAR_FF:
! 192: case CHAR_CR: *lenptr = 1; return TRUE;
! 193:
! 194: #ifndef EBCDIC
1.1.1.2 misho 195: #ifdef COMPILE_PCRE8
1.1.1.3 ! misho 196: case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
! 197: case 0x2028: /* LS */
! 198: case 0x2029: *lenptr = 3; return TRUE; /* PS */
! 199: #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
! 200: case CHAR_NEL:
1.1.1.2 misho 201: case 0x2028: /* LS */
202: case 0x2029: *lenptr = 1; return TRUE; /* PS */
1.1.1.3 ! misho 203: #endif /* COMPILE_PCRE8 */
! 204: #endif /* NotEBCDIC */
! 205:
1.1 misho 206: default: return FALSE;
207: }
208: }
209:
210: /* End of pcre_newline.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>