Annotation of embedaddon/pcre/pcre_xclass.c, revision 1.1.1.1
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2010 University of Cambridge
10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains an internal function that is used to match an extended
42: class. It is used by both pcre_exec() and pcre_def_exec(). */
43:
44:
45: #ifdef HAVE_CONFIG_H
46: #include "config.h"
47: #endif
48:
49: #include "pcre_internal.h"
50:
51:
52: /*************************************************
53: * Match character against an XCLASS *
54: *************************************************/
55:
56: /* This function is called to match a character against an extended class that
57: might contain values > 255 and/or Unicode properties.
58:
59: Arguments:
60: c the character
61: data points to the flag byte of the XCLASS data
62:
63: Returns: TRUE if character matches, else FALSE
64: */
65:
66: BOOL
67: _pcre_xclass(int c, const uschar *data)
68: {
69: int t;
70: BOOL negated = (*data & XCL_NOT) != 0;
71:
72: /* Character values < 256 are matched against a bitmap, if one is present. If
73: not, we still carry on, because there may be ranges that start below 256 in the
74: additional data. */
75:
76: if (c < 256)
77: {
78: if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
79: return !negated; /* char found */
80: }
81:
82: /* First skip the bit map if present. Then match against the list of Unicode
83: properties or large chars or ranges that end with a large char. We won't ever
84: encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
85:
86: if ((*data++ & XCL_MAP) != 0) data += 32;
87:
88: while ((t = *data++) != XCL_END)
89: {
90: int x, y;
91: if (t == XCL_SINGLE)
92: {
93: GETCHARINC(x, data);
94: if (c == x) return !negated;
95: }
96: else if (t == XCL_RANGE)
97: {
98: GETCHARINC(x, data);
99: GETCHARINC(y, data);
100: if (c >= x && c <= y) return !negated;
101: }
102:
103: #ifdef SUPPORT_UCP
104: else /* XCL_PROP & XCL_NOTPROP */
105: {
106: const ucd_record *prop = GET_UCD(c);
107:
108: switch(*data)
109: {
110: case PT_ANY:
111: if (t == XCL_PROP) return !negated;
112: break;
113:
114: case PT_LAMP:
115: if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
116: prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
117: break;
118:
119: case PT_GC:
120: if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP))
121: return !negated;
122: break;
123:
124: case PT_PC:
125: if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
126: break;
127:
128: case PT_SC:
129: if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
130: break;
131:
132: case PT_ALNUM:
133: if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
134: _pcre_ucp_gentype[prop->chartype] == ucp_N) == (t == XCL_PROP))
135: return !negated;
136: break;
137:
138: case PT_SPACE: /* Perl space */
139: if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
140: c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
141: == (t == XCL_PROP))
142: return !negated;
143: break;
144:
145: case PT_PXSPACE: /* POSIX space */
146: if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
147: c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
148: c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
149: return !negated;
150: break;
151:
152: case PT_WORD:
153: if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
154: _pcre_ucp_gentype[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
155: == (t == XCL_PROP))
156: return !negated;
157: break;
158:
159: /* This should never occur, but compilers may mutter if there is no
160: default. */
161:
162: default:
163: return FALSE;
164: }
165:
166: data += 2;
167: }
168: #endif /* SUPPORT_UCP */
169: }
170:
171: return negated; /* char did not match */
172: }
173:
174: /* End of pcre_xclass.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>