Diff for /embedaddon/pcre/pcre_xclass.c between versions 1.1.1.2 and 1.1.1.4

version 1.1.1.2, 2012/02/21 23:50:25 version 1.1.1.4, 2014/06/15 19:46:03
Line 6 Line 6
 and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
   
                        Written by Philip Hazel                         Written by Philip Hazel
           Copyright (c) 1997-2012 University of Cambridge           Copyright (c) 1997-2013 University of Cambridge
   
 -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
Line 64  Returns:      TRUE if character matches, else FALSE Line 64  Returns:      TRUE if character matches, else FALSE
 */  */
   
 BOOL  BOOL
PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf)PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
 {  {
int t;pcre_uchar t;
 BOOL negated = (*data & XCL_NOT) != 0;  BOOL negated = (*data & XCL_NOT) != 0;
   
 (void)utf;  (void)utf;
Line 94  if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre Line 94  if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre
   
 while ((t = *data++) != XCL_END)  while ((t = *data++) != XCL_END)
   {    {
  int x, y;  pcre_uint32 x, y;
   if (t == XCL_SINGLE)    if (t == XCL_SINGLE)
     {      {
 #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
Line 128  while ((t = *data++) != XCL_END) Line 128  while ((t = *data++) != XCL_END)
   else  /* XCL_PROP & XCL_NOTPROP */    else  /* XCL_PROP & XCL_NOTPROP */
     {      {
     const ucd_record *prop = GET_UCD(c);      const ucd_record *prop = GET_UCD(c);
       BOOL isprop = t == XCL_PROP;
   
     switch(*data)      switch(*data)
       {        {
       case PT_ANY:        case PT_ANY:
      if (t == XCL_PROP) return !negated;      if (isprop) return !negated;
       break;        break;
   
       case PT_LAMP:        case PT_LAMP:
       if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||        if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
           prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;           prop->chartype == ucp_Lt) == isprop) return !negated;
       break;        break;
   
       case PT_GC:        case PT_GC:
      if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))      if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
         return !negated;          return !negated;
       break;        break;
   
       case PT_PC:        case PT_PC:
      if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;      if ((data[1] == prop->chartype) == isprop) return !negated;
       break;        break;
   
       case PT_SC:        case PT_SC:
      if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;      if ((data[1] == prop->script) == isprop) return !negated;
       break;        break;
   
       case PT_ALNUM:        case PT_ALNUM:
       if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
         return !negated;          return !negated;
       break;        break;
   
      case PT_SPACE:    /* Perl space */      /* Perl space used to exclude VT, but from Perl 5.18 it is included,
      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||      which means that Perl space and POSIX space are now identical. PCRE
           c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)      was changed at release 8.34. */
             == (t == XCL_PROP)) 
        return !negated; 
      break; 
   
         case PT_SPACE:    /* Perl space */
       case PT_PXSPACE:  /* POSIX space */        case PT_PXSPACE:  /* POSIX space */
      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||      switch(c)
           c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||        {
           c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))        HSPACE_CASES:
        return !negated;        VSPACE_CASES:
         if (isprop) return !negated;
         break;
 
         default:
         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
           return !negated;
         break;
         }
       break;        break;
   
       case PT_WORD:        case PT_WORD:
       if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
            PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)             PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
             == (t == XCL_PROP))             == isprop)
         return !negated;
       break;
 
       case PT_UCNC:
       if (c < 0xa0)
         {
         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
              c == CHAR_GRAVE_ACCENT) == isprop)
           return !negated;
         }
       else
         {
         if ((c < 0xd800 || c > 0xdfff) == isprop)
           return !negated;
         }
       break;
 
       /* The following three properties can occur only in an XCLASS, as there
       is no \p or \P coding for them. */
 
       /* Graphic character. Implement this as not Z (space or separator) and
       not C (other), except for Cf (format) with a few exceptions. This seems
       to be what Perl does. The exceptional characters are:
 
       U+061C           Arabic Letter Mark
       U+180E           Mongolian Vowel Separator
       U+2066 - U+2069  Various "isolate"s
       */
 
       case PT_PXGRAPH:
       if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
             (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
               (prop->chartype == ucp_Cf &&
                 c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
          )) == isprop)
         return !negated;
       break;
 
       /* Printable character: same as graphic, with the addition of Zs, i.e.
       not Zl and not Zp, and U+180E. */
 
       case PT_PXPRINT:
       if ((prop->chartype != ucp_Zl &&
            prop->chartype != ucp_Zp &&
             (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
               (prop->chartype == ucp_Cf &&
                 c != 0x061c && (c < 0x2066 || c > 0x2069))
          )) == isprop)
         return !negated;
       break;
 
       /* Punctuation: all Unicode punctuation, plus ASCII characters that
       Unicode treats as symbols rather than punctuation, for Perl
       compatibility (these are $+<=>^`|~). */
 
       case PT_PXPUNCT:
       if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
             (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
         return !negated;          return !negated;
       break;        break;
   

Removed from v.1.1.1.2  
changed lines
  Added in v.1.1.1.4


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>