Annotation of embedaddon/php/ext/intl/transliterator/transliterator_methods.c, revision 1.1.1.1
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | This source file is subject to version 3.01 of the PHP license, |
6: | that is bundled with this package in the file LICENSE, and is |
7: | available through the world-wide-web at the following url: |
8: | http://www.php.net/license/3_01.txt |
9: | If you did not receive a copy of the PHP license and are unable to |
10: | obtain it through the world-wide-web, please send a note to |
11: | license@php.net so we can mail you a copy immediately. |
12: +----------------------------------------------------------------------+
13: | Authors: Gustavo Lopes <cataphract@php.net> |
14: +----------------------------------------------------------------------+
15: */
16:
17: #ifdef HAVE_CONFIG_H
18: #include "config.h"
19: #endif
20:
21: #include "php_intl.h"
22: #include "transliterator.h"
23: #include "transliterator_class.h"
24: #include "transliterator_methods.h"
25: #include "intl_data.h"
26: #include "intl_convert.h"
27:
28: #include <zend_exceptions.h>
29:
30: static int create_transliterator( char *str_id, int str_id_len, long direction, zval *object TSRMLS_DC )
31: {
32: Transliterator_object *to;
33: UChar *ustr_id = NULL;
34: int32_t ustr_id_len = 0;
35: UTransliterator *utrans;
36: UParseError parse_error = {0, -1};
37:
38: intl_error_reset( NULL TSRMLS_CC );
39:
40: if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
41: {
42: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
43: "transliterator_create: invalid direction", 0 TSRMLS_CC );
44: return FAILURE;
45: }
46:
47: object_init_ex( object, Transliterator_ce_ptr );
48: TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */
49:
50: /* Convert transliterator id to UTF-16 */
51: intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
52: if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
53: {
54: intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
55: intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 TSRMLS_CC );
56: zval_dtor( object );
57: return FAILURE;
58: }
59:
60: /* Open ICU Transliterator. */
61: utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction,
62: NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
63: if (ustr_id) {
64: efree( ustr_id );
65: }
66:
67: if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
68: {
69: char *buf = NULL;
70: intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
71: spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator"
72: " with id \"%s\"", str_id );
73: if( buf == NULL ) {
74: intl_error_set_custom_msg( NULL,
75: "transliterator_create: unable to open ICU transliterator", 0 TSRMLS_CC );
76: }
77: else
78: {
79: intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 TSRMLS_CC );
80: efree( buf );
81: }
82: zval_dtor( object );
83: return FAILURE;
84: }
85:
86: transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC );
87: /* no need to close the transliterator manually on construction error */
88: if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
89: {
90: intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
91: intl_error_set_custom_msg( NULL,
92: "transliterator_create: internal constructor call failed", 0 TSRMLS_CC );
93: zval_dtor( object );
94: return FAILURE;
95: }
96:
97: return SUCCESS;
98: }
99:
100: /* {{{ proto Transliterator transliterator_create( string id [, int direction ] )
101: * proto Transliterator Transliterator::create( string id [, int direction ] )
102: * Opens a transliterator by id.
103: */
104: PHP_FUNCTION( transliterator_create )
105: {
106: char *str_id;
107: int str_id_len;
108: long direction = TRANSLITERATOR_FORWARD;
109: int res;
110:
111: TRANSLITERATOR_METHOD_INIT_VARS;
112:
113: (void) to; /* unused */
114:
115: if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|l",
116: &str_id, &str_id_len, &direction ) == FAILURE )
117: {
118: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
119: "transliterator_create: bad arguments", 0 TSRMLS_CC );
120: RETURN_NULL();
121: }
122:
123: object = return_value;
124: res = create_transliterator( str_id, str_id_len, direction, object TSRMLS_CC );
125: if( res == FAILURE )
126: RETURN_NULL();
127:
128: /* success, leave return_value as it is (set by create_transliterator) */
129: }
130: /* }}} */
131:
132: /* {{{ proto Transliterator transliterator_create_from_rules( string rules [, int direction ] )
133: * proto Transliterator Transliterator::createFromRules( string rules [, int direction ] )
134: * Opens a transliterator by id.
135: */
136: PHP_FUNCTION( transliterator_create_from_rules )
137: {
138: char *str_rules;
139: int str_rules_len;
140: UChar *ustr_rules = NULL;
141: int32_t ustr_rules_len = 0;
142: long direction = TRANSLITERATOR_FORWARD;
143: UParseError parse_error = {0, -1};
144: UTransliterator *utrans;
145: UChar id[] = {0x52, 0x75, 0x6C, 0x65, 0x73, 0x54, 0x72,
146: 0x61, 0x6E, 0x73, 0x50, 0x48, 0x50, 0}; /* RulesTransPHP */
147: TRANSLITERATOR_METHOD_INIT_VARS;
148:
149: if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|l",
150: &str_rules, &str_rules_len, &direction ) == FAILURE )
151: {
152: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
153: "transliterator_create_from_rules: bad arguments", 0 TSRMLS_CC );
154: RETURN_NULL();
155: }
156:
157: if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) )
158: {
159: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
160: "transliterator_create_from_rules: invalid direction", 0 TSRMLS_CC );
161: RETURN_NULL();
162: }
163:
164: object = return_value;
165: object_init_ex( object, Transliterator_ce_ptr );
166: TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK;
167:
168: intl_convert_utf8_to_utf16( &ustr_rules, &ustr_rules_len,
169: str_rules, str_rules_len, TRANSLITERATOR_ERROR_CODE_P( to ) );
170: /* (I'm not a big fan of non-obvious flow control macros ).
171: * This one checks the error value, destroys object and returns false */
172: INTL_CTOR_CHECK_STATUS( to, "String conversion of rules to UTF-16 failed" );
173:
174: /* Open ICU Transliterator. */
175: utrans = utrans_openU( id, ( sizeof( id ) - 1 ) / ( sizeof( *id ) ), (UTransDirection ) direction,
176: ustr_rules, ustr_rules_len, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) );
177: if (ustr_rules) {
178: efree( ustr_rules );
179: }
180:
181: intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to ) TSRMLS_CC );
182: if( U_FAILURE( INTL_DATA_ERROR_CODE( to ) ) )
183: {
184: char *msg = NULL;
185: smart_str parse_error_str;
186: parse_error_str = transliterator_parse_error_to_string( &parse_error );
187: spprintf( &msg, 0, "transliterator_create_from_rules: unable to "
188: "create ICU transliterator from rules (%s)", parse_error_str.c );
189: smart_str_free( &parse_error_str );
190: if( msg != NULL )
191: {
192: intl_errors_set_custom_msg( INTL_DATA_ERROR_P( to ), msg, 1 TSRMLS_CC );
193: efree( msg );
194: }
195: zval_dtor( return_value );
196: RETURN_NULL();
197: }
198: transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC );
199: /* no need to close the transliterator manually on construction error */
200: INTL_CTOR_CHECK_STATUS( to, "transliterator_create_from_rules: internal constructor call failed" );
201: }
202: /* }}} */
203:
204: /* {{{ proto Transliterator transliterator_create_inverse( Transliterator orig_trans )
205: * proto Transliterator Transliterator::createInverse()
206: * Opens the inverse transliterator transliterator.
207: */
208: PHP_FUNCTION( transliterator_create_inverse )
209: {
210: Transliterator_object *to_orig;
211: UTransliterator *utrans;
212: TRANSLITERATOR_METHOD_INIT_VARS;
213:
214: if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O",
215: &object, Transliterator_ce_ptr ) == FAILURE )
216: {
217: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
218: "transliterator_create_inverse: bad arguments", 0 TSRMLS_CC );
219: RETURN_NULL();
220: }
221:
222: TRANSLITERATOR_METHOD_FETCH_OBJECT;
223: to_orig = to;
224:
225: object = return_value;
226: object_init_ex( object, Transliterator_ce_ptr );
227: TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* change "to" into new object (from "object" ) */
228:
229: utrans = utrans_openInverse( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to ) );
230: INTL_CTOR_CHECK_STATUS( to, "transliterator_create_inverse: could not create "
231: "inverse ICU transliterator" );
232: transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC );
233: /* no need to close the transliterator manually on construction error */
234: INTL_CTOR_CHECK_STATUS( to, "transliterator_create: internal constructor call failed" );
235: }
236: /* }}} */
237:
238: /* {{{ proto array transliterator_list_ids()
239: * proto array Transliterator::listIDs()
240: * Return an array with the registered transliterator IDs.
241: */
242: PHP_FUNCTION( transliterator_list_ids )
243: {
244: UEnumeration *en;
245: const UChar *elem;
246: int32_t elem_len;
247: UErrorCode status = U_ZERO_ERROR;
248:
249: intl_error_reset( NULL TSRMLS_CC );
250:
251: if( zend_parse_parameters_none() == FAILURE )
252: {
253: /* seems to be the convention in this lib to return false instead of
254: * null on bad parameter types, except on constructors and factory
255: * methods */
256: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
257: "transliterator_list_ids: bad arguments", 0 TSRMLS_CC );
258: RETURN_FALSE;
259: }
260:
261: en = utrans_openIDs( &status );
262: INTL_CHECK_STATUS( status,
263: "transliterator_list_ids: Failed to obtain registered transliterators" );
264:
265: array_init( return_value );
266: while( (elem = uenum_unext( en, &elem_len, &status )) )
267: {
268: char *el_char = NULL;
269: int el_len = 0;
270:
271: intl_convert_utf16_to_utf8( &el_char, &el_len, elem, elem_len, &status );
272:
273: if( U_FAILURE( status ) )
274: {
275: efree( el_char );
276: break;
277: }
278: else
279: {
280: add_next_index_stringl( return_value, el_char, el_len, 0 );
281: }
282: }
283: uenum_close( en );
284:
285: intl_error_set_code( NULL, status TSRMLS_CC );
286: if( U_FAILURE( status ) )
287: {
288: zval_dtor( return_value );
289: RETVAL_FALSE;
290: intl_error_set_custom_msg( NULL, "transliterator_list_ids: "
291: "Failed to build array of registered transliterators", 0 TSRMLS_CC );
292: }
293: }
294: /* }}} */
295:
296: /* {{{ proto string transliterator_transliterate( Transliterator trans, string subject [, int start = 0 [, int end = -1 ]] )
297: * proto string Transliterator::transliterate( string subject [, int start = 0 [, int end = -1 ]] )
298: * Transliterate a string. */
299: PHP_FUNCTION( transliterator_transliterate )
300: {
301: char *str;
302: UChar *ustr = NULL,
303: *uresult = NULL;
304: int str_len;
305: int32_t ustr_len = 0,
306: capacity,
307: uresult_len;
308: long start = 0,
309: limit = -1;
310: int success = 0,
311: temp_trans = 0;
312: TRANSLITERATOR_METHOD_INIT_VARS;
313:
314: object = getThis();
315:
316: if( object == NULL )
317: {
318: /* in non-OOP version, accept both a transliterator and a string */
319: zval **arg1;
320: if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "Zs|ll",
321: &arg1, &str, &str_len, &start, &limit ) == FAILURE )
322: {
323: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
324: "transliterator_transliterate: bad arguments", 0 TSRMLS_CC );
325: RETURN_FALSE;
326: }
327:
328: if( Z_TYPE_PP( arg1 ) == IS_OBJECT &&
329: instanceof_function( Z_OBJCE_PP( arg1 ), Transliterator_ce_ptr TSRMLS_CC ) )
330: {
331: object = *arg1;
332: }
333: else
334: { /* not a transliterator object as first argument */
335: int res;
336: if(Z_TYPE_PP( arg1 ) != IS_STRING )
337: {
338: SEPARATE_ZVAL( arg1 );
339: convert_to_string( *arg1 );
340: }
341: ALLOC_INIT_ZVAL( object );
342: temp_trans = 1;
343: res = create_transliterator( Z_STRVAL_PP( arg1 ), Z_STRLEN_PP( arg1 ),
344: TRANSLITERATOR_FORWARD, object TSRMLS_CC );
345: if( res == FAILURE )
346: {
347: char *message = intl_error_get_message( NULL TSRMLS_CC );
348: php_error_docref0( NULL TSRMLS_CC, E_WARNING, "Could not create "
349: "transliterator with ID \"%s\" (%s)", Z_STRVAL_PP( arg1 ), message );
350: efree( message );
351: /* don't set U_ILLEGAL_ARGUMENT_ERROR to allow fetching of inner error */
352: goto cleanup;
353: }
354: }
355: }
356: else if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|ll",
357: &str, &str_len, &start, &limit ) == FAILURE )
358: {
359: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
360: "transliterator_transliterate: bad arguments", 0 TSRMLS_CC );
361: RETURN_FALSE;
362: }
363:
364: if( limit < -1 )
365: {
366: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
367: "transliterator_transliterate: \"end\" argument should be "
368: "either non-negative or -1", 0 TSRMLS_CC );
369: RETURN_FALSE;
370: }
371:
372: if( start < 0 || ((limit != -1 ) && (start > limit )) )
373: {
374: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
375: "transliterator_transliterate: \"start\" argument should be "
376: "non-negative and not bigger than \"end\" (if defined)", 0 TSRMLS_CC );
377: RETURN_FALSE;
378: }
379:
380: /* end argument parsing/validation */
381:
382: TRANSLITERATOR_METHOD_FETCH_OBJECT;
383:
384: intl_convert_utf8_to_utf16( &ustr, &ustr_len, str, str_len,
385: TRANSLITERATOR_ERROR_CODE_P( to ) );
386: INTL_METHOD_CHECK_STATUS( to, "String conversion of string to UTF-16 failed" );
387:
388: /* we've started allocating resources, goto from now on */
389:
390: if( ( start > ustr_len ) || (( limit != -1 ) && (limit > ustr_len ) ) )
391: {
392: char *msg;
393: spprintf( &msg, 0,
394: "transliterator_transliterate: Neither \"start\" nor the \"end\" "
395: "arguments can exceed the number of UTF-16 code units "
396: "(in this case, %d)", (int) ustr_len );
397: if(msg != NULL )
398: {
399: intl_errors_set( TRANSLITERATOR_ERROR_P( to ), U_ILLEGAL_ARGUMENT_ERROR,
400: msg, 1 TSRMLS_CC );
401: efree( msg );
402: }
403: RETVAL_FALSE;
404: goto cleanup;
405: }
406:
407: uresult = safe_emalloc( ustr_len, sizeof( UChar ), 1 * sizeof( UChar ) );
408: capacity = ustr_len + 1;
409:
410: while( 1 )
411: {
412: int32_t temp_limit = ( limit == -1 ? ustr_len : (int32_t) limit );
413: memcpy( uresult, ustr, ustr_len * sizeof( UChar ) );
414: uresult_len = ustr_len;
415:
416: utrans_transUChars( to->utrans, uresult, &uresult_len, capacity, (int32_t) start,
417: &temp_limit, TRANSLITERATOR_ERROR_CODE_P( to ) );
418: if( TRANSLITERATOR_ERROR_CODE( to ) == U_BUFFER_OVERFLOW_ERROR )
419: {
420: efree( uresult );
421:
422: uresult = safe_emalloc( uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
423: capacity = uresult_len + 1;
424:
425: intl_error_reset( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC );
426: }
427: else if(TRANSLITERATOR_ERROR_CODE( to ) == U_STRING_NOT_TERMINATED_WARNING )
428: {
429: uresult = safe_erealloc( uresult, uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) );
430:
431: intl_error_reset( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC );
432: break;
433: }
434: else if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) )
435: {
436: intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC );
437: intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to ),
438: "transliterator_transliterate: transliteration failed", 0 TSRMLS_CC );
439: goto cleanup;
440: }
441: else
442: break;
443: }
444:
445: uresult[uresult_len] = (UChar) 0;
446:
447: success = 1;
448:
449: cleanup:
450: if( ustr )
451: efree( ustr );
452:
453: if( success ) {
454: /* frees uresult even on error */
455: INTL_METHOD_RETVAL_UTF8( to, uresult, uresult_len, 1 );
456: }
457: else
458: {
459: if( uresult )
460: efree( uresult );
461: RETVAL_FALSE;
462: }
463:
464: if (temp_trans )
465: zval_ptr_dtor( &object );
466: }
467: /* }}} */
468:
469: PHP_METHOD( Transliterator, __construct )
470: {
471: /* this constructor shouldn't be called as it's private */
472: zend_throw_exception( NULL,
473: "An object of this type cannot be created with the new operator.",
474: 0 TSRMLS_CC );
475: }
476:
477: /* {{{ proto int transliterator_get_error_code( Transliterator trans )
478: * proto int Transliterator::getErrorCode()
479: * Get the last error code for this transliterator.
480: */
481: PHP_FUNCTION( transliterator_get_error_code )
482: {
483: TRANSLITERATOR_METHOD_INIT_VARS
484:
485: if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O",
486: &object, Transliterator_ce_ptr ) == FAILURE )
487: {
488: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
489: "transliterator_get_error_code: unable to parse input params", 0 TSRMLS_CC );
490:
491: RETURN_FALSE;
492: }
493:
494: /* Fetch the object (without resetting its last error code ). */
495: to = zend_object_store_get_object( object TSRMLS_CC );
496: if (to == NULL )
497: RETURN_FALSE;
498:
499: RETURN_LONG( (long) TRANSLITERATOR_ERROR_CODE( to ) );
500: }
501: /* }}} */
502:
503:
504: /* {{{ proto string transliterator_get_error_message( Transliterator trans )
505: * proto string Transliterator::getErrorMessage()
506: * Get the last error message for this transliterator.
507: */
508: PHP_FUNCTION( transliterator_get_error_message )
509: {
510: const char* message = NULL;
511: TRANSLITERATOR_METHOD_INIT_VARS
512:
513: if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O",
514: &object, Transliterator_ce_ptr ) == FAILURE )
515: {
516: intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
517: "transliterator_get_error_message: unable to parse input params", 0 TSRMLS_CC );
518:
519: RETURN_FALSE;
520: }
521:
522:
523: /* Fetch the object (without resetting its last error code ). */
524: to = zend_object_store_get_object( object TSRMLS_CC );
525: if (to == NULL )
526: RETURN_FALSE;
527:
528: /* Return last error message. */
529: message = intl_error_get_message( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC );
530: RETURN_STRING( message, 0 );
531: }
532: /* }}} */
533:
534:
535: /*
536: * Local variables:
537: * tab-width: 4
538: * c-basic-offset: 4
539: * End:
540: * vim600: noet sw=4 ts=4 fdm=marker
541: * vim<600: noet sw=4 ts=4
542: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>