File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcre_newline.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 08:25:55 2013 UTC (10 years, 11 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, v8_33, HEAD
8.33

    1: /*************************************************
    2: *      Perl-Compatible Regular Expressions       *
    3: *************************************************/
    4: 
    5: /* PCRE is a library of functions to support regular expressions whose syntax
    6: and semantics are as close as possible to those of the Perl 5 language.
    7: 
    8:                        Written by Philip Hazel
    9:            Copyright (c) 1997-2012 University of Cambridge
   10: 
   11: -----------------------------------------------------------------------------
   12: Redistribution and use in source and binary forms, with or without
   13: modification, are permitted provided that the following conditions are met:
   14: 
   15:     * Redistributions of source code must retain the above copyright notice,
   16:       this list of conditions and the following disclaimer.
   17: 
   18:     * Redistributions in binary form must reproduce the above copyright
   19:       notice, this list of conditions and the following disclaimer in the
   20:       documentation and/or other materials provided with the distribution.
   21: 
   22:     * Neither the name of the University of Cambridge nor the names of its
   23:       contributors may be used to endorse or promote products derived from
   24:       this software without specific prior written permission.
   25: 
   26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36: POSSIBILITY OF SUCH DAMAGE.
   37: -----------------------------------------------------------------------------
   38: */
   39: 
   40: 
   41: /* This module contains internal functions for testing newlines when more than
   42: one kind of newline is to be recognized. When a newline is found, its length is
   43: returned. In principle, we could implement several newline "types", each
   44: referring to a different set of newline characters. At present, PCRE supports
   45: only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
   46: and NLTYPE_ANY. The full list of Unicode newline characters is taken from
   47: http://unicode.org/unicode/reports/tr18/. */
   48: 
   49: 
   50: #ifdef HAVE_CONFIG_H
   51: #include "config.h"
   52: #endif
   53: 
   54: #include "pcre_internal.h"
   55: 
   56: 
   57: 
   58: /*************************************************
   59: *      Check for newline at given position       *
   60: *************************************************/
   61: 
   62: /* It is guaranteed that the initial value of ptr is less than the end of the
   63: string that is being processed.
   64: 
   65: Arguments:
   66:   ptr          pointer to possible newline
   67:   type         the newline type
   68:   endptr       pointer to the end of the string
   69:   lenptr       where to return the length
   70:   utf          TRUE if in utf mode
   71: 
   72: Returns:       TRUE or FALSE
   73: */
   74: 
   75: BOOL
   76: PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
   77:   BOOL utf)
   78: {
   79: pcre_uint32 c;
   80: (void)utf;
   81: #ifdef SUPPORT_UTF
   82: if (utf)
   83:   {
   84:   GETCHAR(c, ptr);
   85:   }
   86: else
   87: #endif  /* SUPPORT_UTF */
   88:   c = *ptr;
   89: 
   90: /* Note that this function is called only for ANY or ANYCRLF. */
   91: 
   92: if (type == NLTYPE_ANYCRLF) switch(c)
   93:   {
   94:   case CHAR_LF: *lenptr = 1; return TRUE;
   95:   case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
   96:                return TRUE;
   97:   default: return FALSE;
   98:   }
   99: 
  100: /* NLTYPE_ANY */
  101: 
  102: else switch(c)
  103:   {
  104: #ifdef EBCDIC
  105:   case CHAR_NEL:
  106: #endif
  107:   case CHAR_LF:
  108:   case CHAR_VT:
  109:   case CHAR_FF: *lenptr = 1; return TRUE;
  110: 
  111:   case CHAR_CR:
  112:   *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
  113:   return TRUE;
  114: 
  115: #ifndef EBCDIC
  116: #ifdef COMPILE_PCRE8
  117:   case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
  118:   case 0x2028:                                       /* LS */
  119:   case 0x2029: *lenptr = 3; return TRUE;             /* PS */
  120: #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
  121:   case CHAR_NEL:
  122:   case 0x2028:                                       /* LS */
  123:   case 0x2029: *lenptr = 1; return TRUE;             /* PS */
  124: #endif  /* COMPILE_PCRE8 */
  125: #endif  /* Not EBCDIC */
  126: 
  127:   default: return FALSE;
  128:   }
  129: }
  130: 
  131: 
  132: 
  133: /*************************************************
  134: *     Check for newline at previous position     *
  135: *************************************************/
  136: 
  137: /* It is guaranteed that the initial value of ptr is greater than the start of
  138: the string that is being processed.
  139: 
  140: Arguments:
  141:   ptr          pointer to possible newline
  142:   type         the newline type
  143:   startptr     pointer to the start of the string
  144:   lenptr       where to return the length
  145:   utf          TRUE if in utf mode
  146: 
  147: Returns:       TRUE or FALSE
  148: */
  149: 
  150: BOOL
  151: PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
  152:   BOOL utf)
  153: {
  154: pcre_uint32 c;
  155: (void)utf;
  156: ptr--;
  157: #ifdef SUPPORT_UTF
  158: if (utf)
  159:   {
  160:   BACKCHAR(ptr);
  161:   GETCHAR(c, ptr);
  162:   }
  163: else
  164: #endif  /* SUPPORT_UTF */
  165:   c = *ptr;
  166: 
  167: /* Note that this function is called only for ANY or ANYCRLF. */
  168: 
  169: if (type == NLTYPE_ANYCRLF) switch(c)
  170:   {
  171:   case CHAR_LF:
  172:   *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
  173:   return TRUE;
  174: 
  175:   case CHAR_CR: *lenptr = 1; return TRUE;
  176:   default: return FALSE;
  177:   }
  178: 
  179: /* NLTYPE_ANY */
  180: 
  181: else switch(c)
  182:   {
  183:   case CHAR_LF:
  184:   *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
  185:   return TRUE;
  186: 
  187: #ifdef EBCDIC
  188:   case CHAR_NEL:
  189: #endif
  190:   case CHAR_VT:
  191:   case CHAR_FF:
  192:   case CHAR_CR: *lenptr = 1; return TRUE;
  193: 
  194: #ifndef EBCDIC
  195: #ifdef COMPILE_PCRE8
  196:   case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
  197:   case 0x2028:                                       /* LS */
  198:   case 0x2029: *lenptr = 3; return TRUE;             /* PS */
  199: #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
  200:   case CHAR_NEL:
  201:   case 0x2028:                                       /* LS */
  202:   case 0x2029: *lenptr = 1; return TRUE;             /* PS */
  203: #endif  /* COMPILE_PCRE8 */
  204: #endif  /* NotEBCDIC */
  205: 
  206:   default: return FALSE;
  207:   }
  208: }
  209: 
  210: /* End of pcre_newline.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>