File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcre_xclass.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:05:51 2012 UTC (12 years, 4 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_21, HEAD
pcre

    1: /*************************************************
    2: *      Perl-Compatible Regular Expressions       *
    3: *************************************************/
    4: 
    5: /* PCRE is a library of functions to support regular expressions whose syntax
    6: and semantics are as close as possible to those of the Perl 5 language.
    7: 
    8:                        Written by Philip Hazel
    9:            Copyright (c) 1997-2010 University of Cambridge
   10: 
   11: -----------------------------------------------------------------------------
   12: Redistribution and use in source and binary forms, with or without
   13: modification, are permitted provided that the following conditions are met:
   14: 
   15:     * Redistributions of source code must retain the above copyright notice,
   16:       this list of conditions and the following disclaimer.
   17: 
   18:     * Redistributions in binary form must reproduce the above copyright
   19:       notice, this list of conditions and the following disclaimer in the
   20:       documentation and/or other materials provided with the distribution.
   21: 
   22:     * Neither the name of the University of Cambridge nor the names of its
   23:       contributors may be used to endorse or promote products derived from
   24:       this software without specific prior written permission.
   25: 
   26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36: POSSIBILITY OF SUCH DAMAGE.
   37: -----------------------------------------------------------------------------
   38: */
   39: 
   40: 
   41: /* This module contains an internal function that is used to match an extended
   42: class. It is used by both pcre_exec() and pcre_def_exec(). */
   43: 
   44: 
   45: #ifdef HAVE_CONFIG_H
   46: #include "config.h"
   47: #endif
   48: 
   49: #include "pcre_internal.h"
   50: 
   51: 
   52: /*************************************************
   53: *       Match character against an XCLASS        *
   54: *************************************************/
   55: 
   56: /* This function is called to match a character against an extended class that
   57: might contain values > 255 and/or Unicode properties.
   58: 
   59: Arguments:
   60:   c           the character
   61:   data        points to the flag byte of the XCLASS data
   62: 
   63: Returns:      TRUE if character matches, else FALSE
   64: */
   65: 
   66: BOOL
   67: _pcre_xclass(int c, const uschar *data)
   68: {
   69: int t;
   70: BOOL negated = (*data & XCL_NOT) != 0;
   71: 
   72: /* Character values < 256 are matched against a bitmap, if one is present. If
   73: not, we still carry on, because there may be ranges that start below 256 in the
   74: additional data. */
   75: 
   76: if (c < 256)
   77:   {
   78:   if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
   79:     return !negated;   /* char found */
   80:   }
   81: 
   82: /* First skip the bit map if present. Then match against the list of Unicode
   83: properties or large chars or ranges that end with a large char. We won't ever
   84: encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
   85: 
   86: if ((*data++ & XCL_MAP) != 0) data += 32;
   87: 
   88: while ((t = *data++) != XCL_END)
   89:   {
   90:   int x, y;
   91:   if (t == XCL_SINGLE)
   92:     {
   93:     GETCHARINC(x, data);
   94:     if (c == x) return !negated;
   95:     }
   96:   else if (t == XCL_RANGE)
   97:     {
   98:     GETCHARINC(x, data);
   99:     GETCHARINC(y, data);
  100:     if (c >= x && c <= y) return !negated;
  101:     }
  102: 
  103: #ifdef SUPPORT_UCP
  104:   else  /* XCL_PROP & XCL_NOTPROP */
  105:     {
  106:     const ucd_record *prop = GET_UCD(c);
  107: 
  108:     switch(*data)
  109:       {
  110:       case PT_ANY:
  111:       if (t == XCL_PROP) return !negated;
  112:       break;
  113: 
  114:       case PT_LAMP:
  115:       if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
  116:            prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
  117:       break;
  118: 
  119:       case PT_GC:
  120:       if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP))
  121:         return !negated;
  122:       break;
  123: 
  124:       case PT_PC:
  125:       if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
  126:       break;
  127: 
  128:       case PT_SC:
  129:       if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
  130:       break;
  131: 
  132:       case PT_ALNUM:
  133:       if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
  134:            _pcre_ucp_gentype[prop->chartype] == ucp_N) == (t == XCL_PROP))
  135:         return !negated;
  136:       break;
  137: 
  138:       case PT_SPACE:    /* Perl space */
  139:       if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
  140:            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
  141:              == (t == XCL_PROP))
  142:         return !negated;
  143:       break;
  144: 
  145:       case PT_PXSPACE:  /* POSIX space */
  146:       if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
  147:            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
  148:            c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
  149:         return !negated;
  150:       break;
  151: 
  152:       case PT_WORD:
  153:       if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
  154:            _pcre_ucp_gentype[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
  155:              == (t == XCL_PROP))
  156:         return !negated;
  157:       break;
  158: 
  159:       /* This should never occur, but compilers may mutter if there is no
  160:       default. */
  161: 
  162:       default:
  163:       return FALSE;
  164:       }
  165: 
  166:     data += 2;
  167:     }
  168: #endif  /* SUPPORT_UCP */
  169:   }
  170: 
  171: return negated;   /* char did not match */
  172: }
  173: 
  174: /* End of pcre_xclass.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>