Annotation of embedaddon/php/ext/intl/idn/idn.c, revision 1.1.1.2
1.1 misho 1: /*
2: +----------------------------------------------------------------------+
3: | PHP Version 5 |
4: +----------------------------------------------------------------------+
5: | Copyright (c) 2009 The PHP Group |
6: +----------------------------------------------------------------------+
7: | This source file is subject to version 3.01 of the PHP license, |
8: | that is bundled with this package in the file LICENSE, and is |
9: | available through the world-wide-web at the following url: |
10: | http://www.php.net/license/3_01.txt |
11: | If you did not receive a copy of the PHP license and are unable to |
12: | obtain it through the world-wide-web, please send a note to |
13: | license@php.net so we can mail you a copy immediately. |
14: +----------------------------------------------------------------------+
15: | Author: Pierre A. Joye <pierre@php.net> |
1.1.1.2 ! misho 16: | Gustavo Lopes <cataphract@php.net> |
1.1 misho 17: +----------------------------------------------------------------------+
18: */
1.1.1.2 ! misho 19: /* $Id$ */
1.1 misho 20:
21: /* {{{ includes */
22: #ifdef HAVE_CONFIG_H
23: #include "config.h"
24: #endif
25:
26: #include <php.h>
27:
28: #include <unicode/uidna.h>
29: #include <unicode/ustring.h>
30: #include "ext/standard/php_string.h"
31:
32: #include "intl_error.h"
1.1.1.2 ! misho 33: #include "intl_convert.h"
1.1 misho 34: /* }}} */
35:
1.1.1.2 ! misho 36: #ifdef UIDNA_INFO_INITIALIZER
! 37: #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
! 38: #endif
! 39:
! 40: enum {
! 41: INTL_IDN_VARIANT_2003 = 0,
! 42: INTL_IDN_VARIANT_UTS46
! 43: };
! 44:
1.1 misho 45: /* {{{ grapheme_register_constants
46: * Register API constants
47: */
48: void idn_register_constants( INIT_FUNC_ARGS )
49: {
1.1.1.2 ! misho 50: /* OPTIONS */
! 51:
1.1 misho 52: /* Option to prohibit processing of unassigned codepoints in the input and
53: do not check if the input conforms to STD-3 ASCII rules. */
54: REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
55:
56: /* Option to allow processing of unassigned codepoints in the input */
57: REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
58:
59: /* Option to check if input conforms to STD-3 ASCII rules */
60: REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
1.1.1.2 ! misho 61:
! 62: #ifdef HAVE_46_API
! 63:
! 64: /* Option to check for whether the input conforms to the BiDi rules.
! 65: * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
! 66: REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
! 67:
! 68: /* Option to check for whether the input conforms to the CONTEXTJ rules.
! 69: * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
! 70: REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
! 71:
! 72: /* Option for nontransitional processing in ToASCII().
! 73: * By default, ToASCII() uses transitional processing.
! 74: * Ignored by the IDNA2003 implementation. */
! 75: REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
! 76:
! 77: /* Option for nontransitional processing in ToUnicode().
! 78: * By default, ToUnicode() uses transitional processing.
! 79: * Ignored by the IDNA2003 implementation. */
! 80: REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
! 81: #endif
! 82:
! 83: /* VARIANTS */
! 84: REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
! 85: #ifdef HAVE_46_API
! 86: REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
! 87: #endif
! 88:
! 89: #ifdef HAVE_46_API
! 90: /* PINFO ERROR CODES */
! 91: REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
! 92: REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
! 93: REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
! 94: REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
! 95: REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
! 96: REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
! 97: REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
! 98: REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
! 99: REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
! 100: REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
! 101: REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
! 102: REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
! 103: REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
! 104: #endif
1.1 misho 105: }
106: /* }}} */
107:
108: enum {
109: INTL_IDN_TO_ASCII = 0,
110: INTL_IDN_TO_UTF8
111: };
112:
1.1.1.2 ! misho 113: /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
! 114: static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode TSRMLS_DC)
! 115: {
! 116: intl_error_set_code(NULL, err TSRMLS_CC);
! 117: if (U_FAILURE(err)) {
! 118: char *buff;
! 119: spprintf(&buff, 0, "%s: %s",
! 120: mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8",
! 121: msg);
! 122: intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC);
! 123: efree(buff);
! 124: return FAILURE;
! 125: }
! 126:
! 127: return SUCCESS;
! 128: }
! 129:
! 130: static inline void php_intl_bad_args(const char *msg, int mode TSRMLS_DC)
! 131: {
! 132: php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode TSRMLS_CC);
! 133: }
! 134:
! 135: #ifdef HAVE_46_API
! 136: static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
! 137: const char *domain, int domain_len, uint32_t option, int mode, zval *idna_info)
! 138: {
! 139: UErrorCode status = U_ZERO_ERROR;
! 140: UIDNA *uts46;
! 141: int32_t len;
! 142: int32_t buffer_capac = 255; /* no domain name may exceed this */
! 143: char *buffer = emalloc(buffer_capac);
! 144: UIDNAInfo info = UIDNA_INFO_INITIALIZER;
! 145: int buffer_used = 0;
! 146:
! 147: uts46 = uidna_openUTS46(option, &status);
! 148: if (php_intl_idn_check_status(status, "failed to open UIDNA instance",
! 149: mode TSRMLS_CC) == FAILURE) {
! 150: efree(buffer);
! 151: RETURN_FALSE;
! 152: }
! 153:
! 154: if (mode == INTL_IDN_TO_ASCII) {
! 155: len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len,
! 156: buffer, buffer_capac, &info, &status);
! 157: } else {
! 158: len = uidna_nameToUnicodeUTF8(uts46, domain, (int32_t)domain_len,
! 159: buffer, buffer_capac, &info, &status);
! 160: }
! 161: if (php_intl_idn_check_status(status, "failed to convert name",
! 162: mode TSRMLS_CC) == FAILURE) {
! 163: uidna_close(uts46);
! 164: efree(buffer);
! 165: RETURN_FALSE;
! 166: }
! 167: if (len >= 255) {
! 168: php_error_docref(NULL TSRMLS_CC, E_ERROR, "ICU returned an unexpected length");
! 169: }
! 170:
! 171: buffer[len] = '\0';
! 172:
! 173: if (info.errors == 0) {
! 174: RETVAL_STRINGL(buffer, len, 0);
! 175: buffer_used = 1;
! 176: } else {
! 177: RETVAL_FALSE;
! 178: }
! 179:
! 180: if (idna_info) {
! 181: if (buffer_used) { /* used in return_value then */
! 182: zval_addref_p(return_value);
! 183: add_assoc_zval_ex(idna_info, "result", sizeof("result"), return_value);
! 184: } else {
! 185: zval *zv;
! 186: ALLOC_INIT_ZVAL(zv);
! 187: ZVAL_STRINGL(zv, buffer, len, 0);
! 188: buffer_used = 1;
! 189: add_assoc_zval_ex(idna_info, "result", sizeof("result"), zv);
! 190: }
! 191: add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
! 192: sizeof("isTransitionalDifferent"), info.isTransitionalDifferent);
! 193: add_assoc_long_ex(idna_info, "errors", sizeof("errors"), (long)info.errors);
! 194: }
! 195:
! 196: if (!buffer_used) {
! 197: efree(buffer);
! 198: }
! 199:
! 200: uidna_close(uts46);
! 201: }
! 202: #endif
! 203:
! 204: static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
! 205: const char *domain, int domain_len, uint32_t option, int mode)
1.1 misho 206: {
207: UChar* ustring = NULL;
208: int ustring_len = 0;
209: UErrorCode status;
210: char *converted_utf8;
211: int32_t converted_utf8_len;
212: UChar converted[MAXPATHLEN];
213: int32_t converted_ret_len;
214:
215: /* convert the string to UTF-16. */
216: status = U_ZERO_ERROR;
1.1.1.2 ! misho 217: intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status);
1.1 misho 218:
219: if (U_FAILURE(status)) {
220: intl_error_set_code(NULL, status TSRMLS_CC);
221:
222: /* Set error messages. */
223: intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
224: if (ustring) {
225: efree(ustring);
226: }
227: RETURN_FALSE;
228: } else {
229: UParseError parse_error;
230:
231: status = U_ZERO_ERROR;
232: if (mode == INTL_IDN_TO_ASCII) {
233: converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
234: } else {
235: converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
236: }
237: efree(ustring);
238:
239: if (U_FAILURE(status)) {
240: intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
241: RETURN_FALSE;
242: }
243:
244: status = U_ZERO_ERROR;
245: intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);
246:
247: if (U_FAILURE(status)) {
248: /* Set global error code. */
249: intl_error_set_code(NULL, status TSRMLS_CC);
250:
251: /* Set error messages. */
252: intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
253: efree(converted_utf8);
254: RETURN_FALSE;
255: }
256: }
257:
258: /* return the allocated string, not a duplicate */
259: RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
260: }
261:
1.1.1.2 ! misho 262: static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
! 263: {
! 264: char *domain;
! 265: int domain_len;
! 266: long option = 0,
! 267: variant = INTL_IDN_VARIANT_2003;
! 268: zval *idna_info = NULL;
! 269:
! 270: intl_error_reset(NULL TSRMLS_CC);
! 271:
! 272: if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz",
! 273: &domain, &domain_len, &option, &variant, &idna_info) == FAILURE) {
! 274: php_intl_bad_args("bad arguments", mode TSRMLS_CC);
! 275: RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
! 276: }
! 277:
! 278: #ifdef HAVE_46_API
! 279: if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
! 280: php_intl_bad_args("invalid variant, must be one of {"
! 281: "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode TSRMLS_CC);
! 282: RETURN_FALSE;
! 283: }
! 284: #else
! 285: if (variant != INTL_IDN_VARIANT_2003) {
! 286: php_intl_bad_args("invalid variant, PHP was compiled against "
! 287: "an old version of ICU and only supports INTL_IDN_VARIANT_2003",
! 288: mode TSRMLS_CC);
! 289: RETURN_FALSE;
! 290: }
! 291: #endif
! 292:
! 293: if (domain_len < 1) {
! 294: php_intl_bad_args("empty domain name", mode TSRMLS_CC);
! 295: RETURN_FALSE;
! 296: }
! 297: if (domain_len > INT32_MAX - 1) {
! 298: php_intl_bad_args("domain name too large", mode TSRMLS_CC);
! 299: RETURN_FALSE;
! 300: }
! 301: /* don't check options; it wasn't checked before */
! 302:
! 303: if (idna_info != NULL) {
! 304: if (variant == INTL_IDN_VARIANT_2003) {
! 305: php_error_docref0(NULL TSRMLS_CC, E_NOTICE,
! 306: "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
! 307: "takes 3 - extra argument ignored");
! 308: } else {
! 309: zval_dtor(idna_info);
! 310: array_init(idna_info);
! 311: }
! 312: }
! 313:
! 314: if (variant == INTL_IDN_VARIANT_2003) {
! 315: php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU,
! 316: domain, domain_len, (uint32_t)option, mode);
! 317: }
! 318: #ifdef HAVE_46_API
! 319: else {
! 320: php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, domain_len,
! 321: (uint32_t)option, mode, idna_info);
! 322: }
! 323: #endif
! 324: }
! 325:
! 326: /* {{{ proto int idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]])
1.1 misho 327: Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
328: PHP_FUNCTION(idn_to_ascii)
329: {
1.1.1.2 ! misho 330: php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
1.1 misho 331: }
332: /* }}} */
333:
334:
1.1.1.2 ! misho 335: /* {{{ proto int idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]])
1.1 misho 336: Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
337: PHP_FUNCTION(idn_to_utf8)
338: {
1.1.1.2 ! misho 339: php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
1.1 misho 340: }
341: /* }}} */
342:
343:
344: /*
345: * Local variables:
346: * tab-width: 4
347: * c-basic-offset: 4
348: * End:
349: * vim600: fdm=marker
350: * vim: noet sw=4 ts=4
351: */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>