Annotation of embedaddon/pcre/pcre32_utf32_utils.c, revision 1.1.1.1
1.1 misho 1: /*************************************************
2: * Perl-Compatible Regular Expressions *
3: *************************************************/
4:
5: /* PCRE is a library of functions to support regular expressions whose syntax
6: and semantics are as close as possible to those of the Perl 5 language.
7:
8: Written by Philip Hazel
9: Copyright (c) 1997-2012 University of Cambridge
10:
11: -----------------------------------------------------------------------------
12: Redistribution and use in source and binary forms, with or without
13: modification, are permitted provided that the following conditions are met:
14:
15: * Redistributions of source code must retain the above copyright notice,
16: this list of conditions and the following disclaimer.
17:
18: * Redistributions in binary form must reproduce the above copyright
19: notice, this list of conditions and the following disclaimer in the
20: documentation and/or other materials provided with the distribution.
21:
22: * Neither the name of the University of Cambridge nor the names of its
23: contributors may be used to endorse or promote products derived from
24: this software without specific prior written permission.
25:
26: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: POSSIBILITY OF SUCH DAMAGE.
37: -----------------------------------------------------------------------------
38: */
39:
40:
41: /* This module contains a function for converting any UTF-32 character
42: strings to host byte order. */
43:
44:
45: #ifdef HAVE_CONFIG_H
46: #include "config.h"
47: #endif
48:
49: /* Generate code with 32 bit character support. */
50: #define COMPILE_PCRE32
51:
52: #include "pcre_internal.h"
53:
54: #ifdef SUPPORT_UTF
55: static pcre_uint32
56: swap_uint32(pcre_uint32 value)
57: {
58: return ((value & 0x000000ff) << 24) |
59: ((value & 0x0000ff00) << 8) |
60: ((value & 0x00ff0000) >> 8) |
61: (value >> 24);
62: }
63: #endif
64:
65:
66: /*************************************************
67: * Convert any UTF-32 string to host byte order *
68: *************************************************/
69:
70: /* This function takes an UTF-32 string and converts
71: it to host byte order. The length can be explicitly set,
72: or automatically detected for zero terminated strings.
73: BOMs can be kept or discarded during the conversion.
74: Conversion can be done in place (output == input).
75:
76: Arguments:
77: output the output buffer, its size must be greater
78: or equal than the input string
79: input any UTF-32 string
80: length the number of 32-bit units in the input string
81: can be less than zero for zero terminated strings
82: host_byte_order
83: A non-zero value means the input is in host byte
84: order, which can be dynamically changed by BOMs later.
85: Initially it contains the starting byte order and returns
86: with the last byte order so it can be used for stream
87: processing. It can be NULL, which set the host byte
88: order mode by default.
89: keep_boms for a non-zero value, the BOM (0xfeff) characters
90: are copied as well
91:
92: Returns: the number of 32-bit units placed into the output buffer,
93: including the zero-terminator
94: */
95:
96: int
97: pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *output, PCRE_SPTR32 input,
98: int length, int *host_byte_order, int keep_boms)
99: {
100: #ifdef SUPPORT_UTF
101: /* This function converts any UTF-32 string to host byte order and optionally
102: removes any Byte Order Marks (BOMS). Returns with the remainig length. */
103: int host_bo = host_byte_order != NULL ? *host_byte_order : 1;
104: pcre_uchar *optr = (pcre_uchar *)output;
105: const pcre_uchar *iptr = (const pcre_uchar *)input;
106: const pcre_uchar *end;
107: /* The c variable must be unsigned. */
108: register pcre_uchar c;
109:
110: if (length < 0)
111: end = iptr + STRLEN_UC(iptr) + 1;
112: else
113: end = iptr + length;
114:
115: while (iptr < end)
116: {
117: c = *iptr++;
118: if (c == 0x0000feffu || c == 0xfffe0000u)
119: {
120: /* Detecting the byte order of the machine is unnecessary, it is
121: enough to know that the UTF-32 string has the same byte order or not. */
122: host_bo = c == 0x0000feffu;
123: if (keep_boms != 0)
124: *optr++ = 0x0000feffu;
125: }
126: else
127: *optr++ = host_bo ? c : swap_uint32(c);
128: }
129: if (host_byte_order != NULL)
130: *host_byte_order = host_bo;
131:
132: #else /* SUPPORT_UTF */
133: (void)(output); /* Keep picky compilers happy */
134: (void)(input);
135: (void)(keep_boms);
136: (void)(host_byte_order);
137: #endif /* SUPPORT_UTF */
138: return length;
139: }
140:
141: /* End of pcre32_utf32_utils.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>