Annotation of embedaddon/pcre/pcre16_utf16_utils.c, revision 1.1.1.2

1.1       misho       1: /*************************************************
                      2: *      Perl-Compatible Regular Expressions       *
                      3: *************************************************/
                      4: 
                      5: /* PCRE is a library of functions to support regular expressions whose syntax
                      6: and semantics are as close as possible to those of the Perl 5 language.
                      7: 
                      8:                        Written by Philip Hazel
                      9:            Copyright (c) 1997-2012 University of Cambridge
                     10: 
                     11: -----------------------------------------------------------------------------
                     12: Redistribution and use in source and binary forms, with or without
                     13: modification, are permitted provided that the following conditions are met:
                     14: 
                     15:     * Redistributions of source code must retain the above copyright notice,
                     16:       this list of conditions and the following disclaimer.
                     17: 
                     18:     * Redistributions in binary form must reproduce the above copyright
                     19:       notice, this list of conditions and the following disclaimer in the
                     20:       documentation and/or other materials provided with the distribution.
                     21: 
                     22:     * Neither the name of the University of Cambridge nor the names of its
                     23:       contributors may be used to endorse or promote products derived from
                     24:       this software without specific prior written permission.
                     25: 
                     26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
                     27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
                     30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36: POSSIBILITY OF SUCH DAMAGE.
                     37: -----------------------------------------------------------------------------
                     38: */
                     39: 
                     40: 
                     41: /* This module contains a function for converting any UTF-16 character
                     42: strings to host byte order. */
                     43: 
                     44: 
                     45: #ifdef HAVE_CONFIG_H
                     46: #include "config.h"
                     47: #endif
                     48: 
                     49: /* Generate code with 16 bit character support. */
                     50: #define COMPILE_PCRE16
                     51: 
                     52: #include "pcre_internal.h"
                     53: 
                     54: /*************************************************
                     55: *  Convert any UTF-16 string to host byte order  *
                     56: *************************************************/
                     57: 
                     58: /* This function takes an UTF-16 string and converts
                     59: it to host byte order. The length can be explicitly set,
                     60: or automatically detected for zero terminated strings.
                     61: BOMs can be kept or discarded during the conversion.
                     62: Conversion can be done in place (output == input).
                     63: 
                     64: Arguments:
                     65:   output     the output buffer, its size must be greater
                     66:              or equal than the input string
                     67:   input      any UTF-16 string
                     68:   length     the number of 16-bit units in the input string
                     69:              can be less than zero for zero terminated strings
                     70:   host_byte_order
                     71:              A non-zero value means the input is in host byte
                     72:              order, which can be dynamically changed by BOMs later.
                     73:              Initially it contains the starting byte order and returns
                     74:              with the last byte order so it can be used for stream
                     75:              processing. It can be NULL, which set the host byte
                     76:              order mode by default.
                     77:   keep_boms  for a non-zero value, the BOM (0xfeff) characters
                     78:              are copied as well
                     79: 
                     80: Returns:     the number of 16-bit units placed into the output buffer,
                     81:              including the zero-terminator
                     82: */
                     83: 
                     84: int
                     85: pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *output, PCRE_SPTR16 input,
                     86:   int length, int *host_byte_order, int keep_boms)
                     87: {
                     88: #ifdef SUPPORT_UTF
                     89: /* This function converts any UTF-16 string to host byte order and optionally
                     90: removes any Byte Order Marks (BOMS). Returns with the remainig length. */
                     91: int host_bo = host_byte_order != NULL ? *host_byte_order : 1;
                     92: pcre_uchar *optr = (pcre_uchar *)output;
                     93: const pcre_uchar *iptr = (const pcre_uchar *)input;
                     94: const pcre_uchar *end;
                     95: /* The c variable must be unsigned. */
                     96: register pcre_uchar c;
                     97: 
                     98: if (length < 0)
                     99:   length = STRLEN_UC(iptr) + 1;
                    100: end = iptr + length;
                    101: 
                    102: while (iptr < end)
                    103:   {
                    104:   c = *iptr++;
                    105:   if (c == 0xfeff || c == 0xfffe)
                    106:     {
                    107:     /* Detecting the byte order of the machine is unnecessary, it is
                    108:     enough to know that the UTF-16 string has the same byte order or not. */
                    109:     host_bo = c == 0xfeff;
                    110:     if (keep_boms != 0)
                    111:       *optr++ = 0xfeff;
                    112:     else
                    113:       length--;
                    114:     }
                    115:   else
                    116:     *optr++ = host_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */
                    117:   }
                    118: if (host_byte_order != NULL)
                    119:   *host_byte_order = host_bo;
                    120: 
1.1.1.2 ! misho     121: #else /* Not SUPPORT_UTF */
1.1       misho     122: (void)(output);  /* Keep picky compilers happy */
                    123: (void)(input);
                    124: (void)(keep_boms);
1.1.1.2 ! misho     125: (void)(host_byte_order);
1.1       misho     126: #endif /* SUPPORT_UTF */
                    127: return length;
                    128: }
                    129: 
                    130: /* End of pcre16_utf16_utils.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>