Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_xclass.c, revision 1.1.1.1
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2010 University of Cambridge
10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains an internal function that is used to match an extended
42: class. It is used by both pcre_exec() and pcre_def_exec(). */
43:
44:
45: #include "config.h"
46:
47: #include "pcre_internal.h"
48:
49:
50: /*************************************************
51: * Match character against an XCLASS *
52: *************************************************/
53:
54: /* This function is called to match a character against an extended class that
55: might contain values > 255 and/or Unicode properties.
56:
57: Arguments:
58: c the character
59: data points to the flag byte of the XCLASS data
60:
61: Returns: TRUE if character matches, else FALSE
62: */
63:
64: BOOL
65: _pcre_xclass(int c, const uschar *data)
66: {
67: int t;
68: BOOL negated = (*data & XCL_NOT) != 0;
69:
70: /* Character values < 256 are matched against a bitmap, if one is present. If
71: not, we still carry on, because there may be ranges that start below 256 in the
72: additional data. */
73:
74: if (c < 256)
75: {
76: if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
77: return !negated; /* char found */
78: }
79:
80: /* First skip the bit map if present. Then match against the list of Unicode
81: properties or large chars or ranges that end with a large char. We won't ever
82: encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
83:
84: if ((*data++ & XCL_MAP) != 0) data += 32;
85:
86: while ((t = *data++) != XCL_END)
87: {
88: int x, y;
89: if (t == XCL_SINGLE)
90: {
91: GETCHARINC(x, data);
92: if (c == x) return !negated;
93: }
94: else if (t == XCL_RANGE)
95: {
96: GETCHARINC(x, data);
97: GETCHARINC(y, data);
98: if (c >= x && c <= y) return !negated;
99: }
100:
101: #ifdef SUPPORT_UCP
102: else /* XCL_PROP & XCL_NOTPROP */
103: {
104: const ucd_record *prop = GET_UCD(c);
105:
106: switch(*data)
107: {
108: case PT_ANY:
109: if (t == XCL_PROP) return !negated;
110: break;
111:
112: case PT_LAMP:
113: if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
114: prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
115: break;
116:
117: case PT_GC:
118: if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP))
119: return !negated;
120: break;
121:
122: case PT_PC:
123: if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
124: break;
125:
126: case PT_SC:
127: if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
128: break;
129:
130: case PT_ALNUM:
131: if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
132: _pcre_ucp_gentype[prop->chartype] == ucp_N) == (t == XCL_PROP))
133: return !negated;
134: break;
135:
136: case PT_SPACE: /* Perl space */
137: if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
138: c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
139: == (t == XCL_PROP))
140: return !negated;
141: break;
142:
143: case PT_PXSPACE: /* POSIX space */
144: if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
145: c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
146: c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
147: return !negated;
148: break;
149:
150: case PT_WORD:
151: if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
152: _pcre_ucp_gentype[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
153: == (t == XCL_PROP))
154: return !negated;
155: break;
156:
157: /* This should never occur, but compilers may mutter if there is no
158: default. */
159:
160: default:
161: return FALSE;
162: }
163:
164: data += 2;
165: }
166: #endif /* SUPPORT_UCP */
167: }
168:
169: return negated; /* char did not match */
170: }
171:
172: /* End of pcre_xclass.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>