Annotation of embedaddon/pcre/pcre_newline.c, revision 1.1.1.3

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
1.1.1.2   misho       9:            Copyright (c) 1997-2012 University of Cambridge
1.1       misho      10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains internal functions for testing newlines when more than
                     42: one kind of newline is to be recognized. When a newline is found, its length is
                     43: returned. In principle, we could implement several newline "types", each
                     44: referring to a different set of newline characters. At present, PCRE supports
                     45: only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
                     46: and NLTYPE_ANY. The full list of Unicode newline characters is taken from
                     47: http://unicode.org/unicode/reports/tr18/. */
                     48: 
                     49: 
                     50: #ifdef HAVE_CONFIG_H
                     51: #include "config.h"
                     52: #endif
                     53: 
                     54: #include "pcre_internal.h"
                     55: 
                     56: 
                     57: 
                     58: /*************************************************
                     59: *      Check for newline at given position       *
                     60: *************************************************/
                     61: 
                     62: /* It is guaranteed that the initial value of ptr is less than the end of the
                     63: string that is being processed.
                     64: 
                     65: Arguments:
                     66:   ptr          pointer to possible newline
                     67:   type         the newline type
                     68:   endptr       pointer to the end of the string
                     69:   lenptr       where to return the length
1.1.1.2   misho      70:   utf          TRUE if in utf mode
1.1       misho      71: 
                     72: Returns:       TRUE or FALSE
                     73: */
                     74: 
                     75: BOOL
1.1.1.2   misho      76: PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
                     77:   BOOL utf)
1.1       misho      78: {
1.1.1.3 ! misho      79: pcre_uint32 c;
1.1.1.2   misho      80: (void)utf;
                     81: #ifdef SUPPORT_UTF
                     82: if (utf)
                     83:   {
                     84:   GETCHAR(c, ptr);
                     85:   }
                     86: else
                     87: #endif  /* SUPPORT_UTF */
                     88:   c = *ptr;
1.1       misho      89: 
1.1.1.3 ! misho      90: /* Note that this function is called only for ANY or ANYCRLF. */
        !            91: 
1.1       misho      92: if (type == NLTYPE_ANYCRLF) switch(c)
                     93:   {
1.1.1.3 ! misho      94:   case CHAR_LF: *lenptr = 1; return TRUE;
        !            95:   case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
        !            96:                return TRUE;
1.1       misho      97:   default: return FALSE;
                     98:   }
                     99: 
                    100: /* NLTYPE_ANY */
                    101: 
                    102: else switch(c)
                    103:   {
1.1.1.3 ! misho     104: #ifdef EBCDIC
        !           105:   case CHAR_NEL:
        !           106: #endif
        !           107:   case CHAR_LF:
        !           108:   case CHAR_VT:
        !           109:   case CHAR_FF: *lenptr = 1; return TRUE;
        !           110: 
        !           111:   case CHAR_CR:
        !           112:   *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
        !           113:   return TRUE;
        !           114: 
        !           115: #ifndef EBCDIC
1.1.1.2   misho     116: #ifdef COMPILE_PCRE8
1.1.1.3 ! misho     117:   case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
1.1       misho     118:   case 0x2028:                                       /* LS */
                    119:   case 0x2029: *lenptr = 3; return TRUE;             /* PS */
1.1.1.3 ! misho     120: #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
        !           121:   case CHAR_NEL:
1.1.1.2   misho     122:   case 0x2028:                                       /* LS */
                    123:   case 0x2029: *lenptr = 1; return TRUE;             /* PS */
1.1.1.3 ! misho     124: #endif  /* COMPILE_PCRE8 */
        !           125: #endif  /* Not EBCDIC */
        !           126: 
1.1       misho     127:   default: return FALSE;
                    128:   }
                    129: }
                    130: 
                    131: 
                    132: 
                    133: /*************************************************
                    134: *     Check for newline at previous position     *
                    135: *************************************************/
                    136: 
                    137: /* It is guaranteed that the initial value of ptr is greater than the start of
                    138: the string that is being processed.
                    139: 
                    140: Arguments:
                    141:   ptr          pointer to possible newline
                    142:   type         the newline type
                    143:   startptr     pointer to the start of the string
                    144:   lenptr       where to return the length
1.1.1.2   misho     145:   utf          TRUE if in utf mode
1.1       misho     146: 
                    147: Returns:       TRUE or FALSE
                    148: */
                    149: 
                    150: BOOL
1.1.1.2   misho     151: PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
                    152:   BOOL utf)
1.1       misho     153: {
1.1.1.3 ! misho     154: pcre_uint32 c;
1.1.1.2   misho     155: (void)utf;
1.1       misho     156: ptr--;
1.1.1.2   misho     157: #ifdef SUPPORT_UTF
                    158: if (utf)
1.1       misho     159:   {
                    160:   BACKCHAR(ptr);
                    161:   GETCHAR(c, ptr);
                    162:   }
1.1.1.2   misho     163: else
                    164: #endif  /* SUPPORT_UTF */
                    165:   c = *ptr;
1.1       misho     166: 
1.1.1.3 ! misho     167: /* Note that this function is called only for ANY or ANYCRLF. */
        !           168: 
1.1       misho     169: if (type == NLTYPE_ANYCRLF) switch(c)
                    170:   {
1.1.1.3 ! misho     171:   case CHAR_LF:
        !           172:   *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
        !           173:   return TRUE;
        !           174: 
        !           175:   case CHAR_CR: *lenptr = 1; return TRUE;
1.1       misho     176:   default: return FALSE;
                    177:   }
                    178: 
1.1.1.3 ! misho     179: /* NLTYPE_ANY */
        !           180: 
1.1       misho     181: else switch(c)
                    182:   {
1.1.1.3 ! misho     183:   case CHAR_LF:
        !           184:   *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
        !           185:   return TRUE;
        !           186: 
        !           187: #ifdef EBCDIC
        !           188:   case CHAR_NEL:
        !           189: #endif
        !           190:   case CHAR_VT:
        !           191:   case CHAR_FF:
        !           192:   case CHAR_CR: *lenptr = 1; return TRUE;
        !           193: 
        !           194: #ifndef EBCDIC
1.1.1.2   misho     195: #ifdef COMPILE_PCRE8
1.1.1.3 ! misho     196:   case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
        !           197:   case 0x2028:                                       /* LS */
        !           198:   case 0x2029: *lenptr = 3; return TRUE;             /* PS */
        !           199: #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
        !           200:   case CHAR_NEL:
1.1.1.2   misho     201:   case 0x2028:                                       /* LS */
                    202:   case 0x2029: *lenptr = 1; return TRUE;             /* PS */
1.1.1.3 ! misho     203: #endif  /* COMPILE_PCRE8 */
        !           204: #endif  /* NotEBCDIC */
        !           205: 
1.1       misho     206:   default: return FALSE;
                    207:   }
                    208: }
                    209: 
                    210: /* End of pcre_newline.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>