Annotation of embedaddon/php/ext/pcre/pcrelib/pcre_newline.c, revision 1.1.1.1

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
                      9:            Copyright (c) 1997-2009 University of Cambridge
                     10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains internal functions for testing newlines when more than
                     42: one kind of newline is to be recognized. When a newline is found, its length is
                     43: returned. In principle, we could implement several newline "types", each
                     44: referring to a different set of newline characters. At present, PCRE supports
                     45: only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
                     46: and NLTYPE_ANY. The full list of Unicode newline characters is taken from
                     47: http://unicode.org/unicode/reports/tr18/. */
                     48: 
                     49: 
                     50: #include "config.h"
                     51: 
                     52: #include "pcre_internal.h"
                     53: 
                     54: 
                     55: 
                     56: /*************************************************
                     57: *      Check for newline at given position       *
                     58: *************************************************/
                     59: 
                     60: /* It is guaranteed that the initial value of ptr is less than the end of the
                     61: string that is being processed.
                     62: 
                     63: Arguments:
                     64:   ptr          pointer to possible newline
                     65:   type         the newline type
                     66:   endptr       pointer to the end of the string
                     67:   lenptr       where to return the length
                     68:   utf8         TRUE if in utf8 mode
                     69: 
                     70: Returns:       TRUE or FALSE
                     71: */
                     72: 
                     73: BOOL
                     74: _pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8)
                     75: {
                     76: int c;
                     77: if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
                     78: 
                     79: if (type == NLTYPE_ANYCRLF) switch(c)
                     80:   {
                     81:   case 0x000a: *lenptr = 1; return TRUE;             /* LF */
                     82:   case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
                     83:                return TRUE;                          /* CR */
                     84:   default: return FALSE;
                     85:   }
                     86: 
                     87: /* NLTYPE_ANY */
                     88: 
                     89: else switch(c)
                     90:   {
                     91:   case 0x000a:                                       /* LF */
                     92:   case 0x000b:                                       /* VT */
                     93:   case 0x000c: *lenptr = 1; return TRUE;             /* FF */
                     94:   case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
                     95:                return TRUE;                          /* CR */
                     96:   case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;   /* NEL */
                     97:   case 0x2028:                                       /* LS */
                     98:   case 0x2029: *lenptr = 3; return TRUE;             /* PS */
                     99:   default: return FALSE;
                    100:   }
                    101: }
                    102: 
                    103: 
                    104: 
                    105: /*************************************************
                    106: *     Check for newline at previous position     *
                    107: *************************************************/
                    108: 
                    109: /* It is guaranteed that the initial value of ptr is greater than the start of
                    110: the string that is being processed.
                    111: 
                    112: Arguments:
                    113:   ptr          pointer to possible newline
                    114:   type         the newline type
                    115:   startptr     pointer to the start of the string
                    116:   lenptr       where to return the length
                    117:   utf8         TRUE if in utf8 mode
                    118: 
                    119: Returns:       TRUE or FALSE
                    120: */
                    121: 
                    122: BOOL
                    123: _pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8)
                    124: {
                    125: int c;
                    126: ptr--;
                    127: #ifdef SUPPORT_UTF8
                    128: if (utf8)
                    129:   {
                    130:   BACKCHAR(ptr);
                    131:   GETCHAR(c, ptr);
                    132:   }
                    133: else c = *ptr;
                    134: #else   /* no UTF-8 support */
                    135: c = *ptr;
                    136: #endif  /* SUPPORT_UTF8 */
                    137: 
                    138: if (type == NLTYPE_ANYCRLF) switch(c)
                    139:   {
                    140:   case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
                    141:                return TRUE;                         /* LF */
                    142:   case 0x000d: *lenptr = 1; return TRUE;            /* CR */
                    143:   default: return FALSE;
                    144:   }
                    145: 
                    146: else switch(c)
                    147:   {
                    148:   case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
                    149:                return TRUE;                         /* LF */
                    150:   case 0x000b:                                      /* VT */
                    151:   case 0x000c:                                      /* FF */
                    152:   case 0x000d: *lenptr = 1; return TRUE;            /* CR */
                    153:   case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;  /* NEL */
                    154:   case 0x2028:                                      /* LS */
                    155:   case 0x2029: *lenptr = 3; return TRUE;            /* PS */
                    156:   default: return FALSE;
                    157:   }
                    158: }
                    159: 
                    160: /* End of pcre_newline.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>