Annotation of embedaddon/php/ext/intl/normalizer/normalizer_normalize.c, revision 1.1.1.2
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | This source file is subject to version 3.01 of the PHP license, |
6: | that is bundled with this package in the file LICENSE, and is |
7: | available through the world-wide-web at the following url: |
8: | http://www.php.net/license/3_01.txt |
9: | If you did not receive a copy of the PHP license and are unable to |
10: | obtain it through the world-wide-web, please send a note to |
11: | license@php.net so we can mail you a copy immediately. |
12: +----------------------------------------------------------------------+
13: | Authors: Ed Batutis <ed@batutis.com> |
14: +----------------------------------------------------------------------+
15: */
16:
17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
21: #include "php_intl.h"
22: #include "unicode/unorm.h"
23: #include "normalizer.h"
24: #include "normalizer_class.h"
25: #include "normalizer_normalize.h"
26: #include "intl_convert.h"
27:
28: /* {{{ proto string Normalizer::normalize( string $input [, string $form = FORM_C] )
29: * Normalize a string. }}} */
30: /* {{{ proto string normalizer_normalize( string $input [, string $form = FORM_C] )
31: * Normalize a string.
32: */
33: PHP_FUNCTION( normalizer_normalize )
34: {
35: char* input = NULL;
36: /* form is optional, defaults to FORM_C */
37: long form = NORMALIZER_DEFAULT;
38: int input_len = 0;
39:
40: UChar* uinput = NULL;
41: int uinput_len = 0;
42: int expansion_factor = 1;
43: UErrorCode status = U_ZERO_ERROR;
44:
45: UChar* uret_buf = NULL;
46: int uret_len = 0;
47:
48: char* ret_buf = NULL;
49: int32_t ret_len = 0;
50:
51: int32_t size_needed;
52:
53: intl_error_reset( NULL TSRMLS_CC );
54:
55: /* Parse parameters. */
56: if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
57: &input, &input_len, &form ) == FAILURE )
58: {
59: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
60: "normalizer_normalize: unable to parse input params", 0 TSRMLS_CC );
61:
62: RETURN_FALSE;
63: }
64:
65: expansion_factor = 1;
66:
67: switch(form) {
68: case NORMALIZER_NONE:
69: break;
70: case NORMALIZER_FORM_D:
71: expansion_factor = 3;
72: break;
73: case NORMALIZER_FORM_KD:
74: expansion_factor = 3;
75: break;
76: case NORMALIZER_FORM_C:
77: case NORMALIZER_FORM_KC:
78: break;
79: default:
80: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
81: "normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
82: RETURN_FALSE;
83: }
84:
85: /*
86: * Normalize string (converting it to UTF-16 first).
87: */
88:
89: /* First convert the string to UTF-16. */
90: intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
91:
92: if( U_FAILURE( status ) )
93: {
94: /* Set global error code. */
95: intl_error_set_code( NULL, status TSRMLS_CC );
96:
97: /* Set error messages. */
98: intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
99: if (uinput) {
100: efree( uinput );
101: }
102: RETURN_FALSE;
103: }
104:
105:
106: /* Allocate memory for the destination buffer for normalization */
107: uret_len = uinput_len * expansion_factor;
108: uret_buf = eumalloc( uret_len + 1 );
109:
110: /* normalize */
111: size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
112:
1.1.1.2 ! misho 113: /* Bail out if an unexpected error occurred.
1.1 misho 114: * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
115: * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
116: */
117: if( U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && status != U_STRING_NOT_TERMINATED_WARNING ) {
118: efree( uret_buf );
119: efree( uinput );
120: RETURN_NULL();
121: }
122:
123: if ( size_needed > uret_len ) {
124: /* realloc does not seem to work properly - memory is corrupted
125: * uret_buf = eurealloc(uret_buf, size_needed + 1);
126: */
127: efree( uret_buf );
128: uret_buf = eumalloc( size_needed + 1 );
129: uret_len = size_needed;
130:
131: status = U_ZERO_ERROR;
132:
133: /* try normalize again */
134: size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
135:
1.1.1.2 ! misho 136: /* Bail out if an unexpected error occurred. */
1.1 misho 137: if( U_FAILURE(status) ) {
138: /* Set error messages. */
139: intl_error_set_custom_msg( NULL,"Error normalizing string", 0 TSRMLS_CC );
140: efree( uret_buf );
141: efree( uinput );
142: RETURN_FALSE;
143: }
144: }
145:
146: efree( uinput );
147:
148: /* the buffer we actually used */
149: uret_len = size_needed;
150:
151: /* Convert normalized string from UTF-16 to UTF-8. */
152: intl_convert_utf16_to_utf8( &ret_buf, &ret_len, uret_buf, uret_len, &status );
153: efree( uret_buf );
154: if( U_FAILURE( status ) )
155: {
156: intl_error_set( NULL, status,
157: "normalizer_normalize: error converting normalized text UTF-8", 0 TSRMLS_CC );
158: RETURN_FALSE;
159: }
160:
161: /* Return it. */
162: RETVAL_STRINGL( ret_buf, ret_len, FALSE );
163: }
164: /* }}} */
165:
166: /* {{{ proto bool Normalizer::isNormalized( string $input [, string $form = FORM_C] )
167: * Test if a string is in a given normalization form. }}} */
168: /* {{{ proto bool normalizer_is_normalize( string $input [, string $form = FORM_C] )
169: * Test if a string is in a given normalization form.
170: */
171: PHP_FUNCTION( normalizer_is_normalized )
172: {
173: char* input = NULL;
174: /* form is optional, defaults to FORM_C */
175: long form = NORMALIZER_DEFAULT;
176: int input_len = 0;
177:
178: UChar* uinput = NULL;
179: int uinput_len = 0;
180: UErrorCode status = U_ZERO_ERROR;
181:
182: UBool uret = FALSE;
183:
184: intl_error_reset( NULL TSRMLS_CC );
185:
186: /* Parse parameters. */
187: if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
188: &input, &input_len, &form) == FAILURE )
189: {
190: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
191: "normalizer_is_normalized: unable to parse input params", 0 TSRMLS_CC );
192:
193: RETURN_FALSE;
194: }
195:
196: switch(form) {
197: /* case NORMALIZER_NONE: not allowed - doesn't make sense */
198:
199: case NORMALIZER_FORM_D:
200: case NORMALIZER_FORM_KD:
201: case NORMALIZER_FORM_C:
202: case NORMALIZER_FORM_KC:
203: break;
204: default:
205: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
206: "normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
207: RETURN_FALSE;
208: }
209:
210:
211: /*
212: * Test normalization of string (converting it to UTF-16 first).
213: */
214:
215: /* First convert the string to UTF-16. */
216: intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
217:
218: if( U_FAILURE( status ) )
219: {
220: /* Set global error code. */
221: intl_error_set_code( NULL, status TSRMLS_CC );
222:
223: /* Set error messages. */
224: intl_error_set_custom_msg( NULL, "Error converting string to UTF-16.", 0 TSRMLS_CC );
225: if (uinput) {
226: efree( uinput );
227: }
228: RETURN_FALSE;
229: }
230:
231:
232: /* test string */
233: uret = unorm_isNormalizedWithOptions( uinput, uinput_len, form, (int32_t) 0 /* options */, &status);
234:
235: efree( uinput );
236:
1.1.1.2 ! misho 237: /* Bail out if an unexpected error occurred. */
1.1 misho 238: if( U_FAILURE(status) ) {
239: /* Set error messages. */
240: intl_error_set_custom_msg( NULL,"Error testing if string is the given normalization form.", 0 TSRMLS_CC );
241: RETURN_FALSE;
242: }
243:
244: if ( uret )
245: RETURN_TRUE;
246:
247: RETURN_FALSE;
248: }
249: /* }}} */
250:
251: /*
252: * Local variables:
253: * tab-width: 4
254: * c-basic-offset: 4
255: * End:
256: * vim600: noet sw=4 ts=4 fdm=marker
257: * vim<600: noet sw=4 ts=4
258: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>