--- embedaddon/php/ext/intl/idn/idn.c 2012/02/21 23:47:56 1.1.1.1 +++ embedaddon/php/ext/intl/idn/idn.c 2012/05/29 12:34:40 1.1.1.2 @@ -13,9 +13,10 @@ | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Author: Pierre A. Joye | + | Gustavo Lopes | +----------------------------------------------------------------------+ */ -/* $Id: idn.c,v 1.1.1.1 2012/02/21 23:47:56 misho Exp $ */ +/* $Id: idn.c,v 1.1.1.2 2012/05/29 12:34:40 misho Exp $ */ /* {{{ includes */ #ifdef HAVE_CONFIG_H @@ -29,14 +30,25 @@ #include "ext/standard/php_string.h" #include "intl_error.h" - #include "intl_convert.h" +#include "intl_convert.h" /* }}} */ +#ifdef UIDNA_INFO_INITIALIZER +#define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */ +#endif + +enum { + INTL_IDN_VARIANT_2003 = 0, + INTL_IDN_VARIANT_UTS46 +}; + /* {{{ grapheme_register_constants * Register API constants */ void idn_register_constants( INIT_FUNC_ARGS ) { + /* OPTIONS */ + /* Option to prohibit processing of unassigned codepoints in the input and do not check if the input conforms to STD-3 ASCII rules. */ REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT); @@ -46,6 +58,50 @@ void idn_register_constants( INIT_FUNC_ARGS ) /* Option to check if input conforms to STD-3 ASCII rules */ REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT); + +#ifdef HAVE_46_API + + /* Option to check for whether the input conforms to the BiDi rules. + * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */ + REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT); + + /* Option to check for whether the input conforms to the CONTEXTJ rules. + * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */ + REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT); + + /* Option for nontransitional processing in ToASCII(). + * By default, ToASCII() uses transitional processing. + * Ignored by the IDNA2003 implementation. */ + REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT); + + /* Option for nontransitional processing in ToUnicode(). + * By default, ToUnicode() uses transitional processing. + * Ignored by the IDNA2003 implementation. */ + REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT); +#endif + + /* VARIANTS */ + REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT); +#ifdef HAVE_46_API + REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT); +#endif + +#ifdef HAVE_46_API + /* PINFO ERROR CODES */ + REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT); +#endif } /* }}} */ @@ -54,11 +110,100 @@ enum { INTL_IDN_TO_UTF8 }; -static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode) +/* like INTL_CHECK_STATUS, but as a function and varying the name of the func */ +static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode TSRMLS_DC) { - unsigned char* domain; - int domain_len; - long option = 0; + intl_error_set_code(NULL, err TSRMLS_CC); + if (U_FAILURE(err)) { + char *buff; + spprintf(&buff, 0, "%s: %s", + mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8", + msg); + intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC); + efree(buff); + return FAILURE; + } + + return SUCCESS; +} + +static inline void php_intl_bad_args(const char *msg, int mode TSRMLS_DC) +{ + php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode TSRMLS_CC); +} + +#ifdef HAVE_46_API +static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS, + const char *domain, int domain_len, uint32_t option, int mode, zval *idna_info) +{ + UErrorCode status = U_ZERO_ERROR; + UIDNA *uts46; + int32_t len; + int32_t buffer_capac = 255; /* no domain name may exceed this */ + char *buffer = emalloc(buffer_capac); + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + int buffer_used = 0; + + uts46 = uidna_openUTS46(option, &status); + if (php_intl_idn_check_status(status, "failed to open UIDNA instance", + mode TSRMLS_CC) == FAILURE) { + efree(buffer); + RETURN_FALSE; + } + + if (mode == INTL_IDN_TO_ASCII) { + len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len, + buffer, buffer_capac, &info, &status); + } else { + len = uidna_nameToUnicodeUTF8(uts46, domain, (int32_t)domain_len, + buffer, buffer_capac, &info, &status); + } + if (php_intl_idn_check_status(status, "failed to convert name", + mode TSRMLS_CC) == FAILURE) { + uidna_close(uts46); + efree(buffer); + RETURN_FALSE; + } + if (len >= 255) { + php_error_docref(NULL TSRMLS_CC, E_ERROR, "ICU returned an unexpected length"); + } + + buffer[len] = '\0'; + + if (info.errors == 0) { + RETVAL_STRINGL(buffer, len, 0); + buffer_used = 1; + } else { + RETVAL_FALSE; + } + + if (idna_info) { + if (buffer_used) { /* used in return_value then */ + zval_addref_p(return_value); + add_assoc_zval_ex(idna_info, "result", sizeof("result"), return_value); + } else { + zval *zv; + ALLOC_INIT_ZVAL(zv); + ZVAL_STRINGL(zv, buffer, len, 0); + buffer_used = 1; + add_assoc_zval_ex(idna_info, "result", sizeof("result"), zv); + } + add_assoc_bool_ex(idna_info, "isTransitionalDifferent", + sizeof("isTransitionalDifferent"), info.isTransitionalDifferent); + add_assoc_long_ex(idna_info, "errors", sizeof("errors"), (long)info.errors); + } + + if (!buffer_used) { + efree(buffer); + } + + uidna_close(uts46); +} +#endif + +static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, + const char *domain, int domain_len, uint32_t option, int mode) +{ UChar* ustring = NULL; int ustring_len = 0; UErrorCode status; @@ -67,18 +212,9 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETE UChar converted[MAXPATHLEN]; int32_t converted_ret_len; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", (char **)&domain, &domain_len, &option) == FAILURE) { - return; - } - - if (domain_len < 1) { - intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC ); - RETURN_FALSE; - } - /* convert the string to UTF-16. */ status = U_ZERO_ERROR; - intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status ); + intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status); if (U_FAILURE(status)) { intl_error_set_code(NULL, status TSRMLS_CC); @@ -123,20 +259,84 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETE RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0); } -/* {{{ proto int idn_to_ascii(string domain[, int options]) +static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode) +{ + char *domain; + int domain_len; + long option = 0, + variant = INTL_IDN_VARIANT_2003; + zval *idna_info = NULL; + + intl_error_reset(NULL TSRMLS_CC); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz", + &domain, &domain_len, &option, &variant, &idna_info) == FAILURE) { + php_intl_bad_args("bad arguments", mode TSRMLS_CC); + RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */ + } + +#ifdef HAVE_46_API + if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) { + php_intl_bad_args("invalid variant, must be one of {" + "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode TSRMLS_CC); + RETURN_FALSE; + } +#else + if (variant != INTL_IDN_VARIANT_2003) { + php_intl_bad_args("invalid variant, PHP was compiled against " + "an old version of ICU and only supports INTL_IDN_VARIANT_2003", + mode TSRMLS_CC); + RETURN_FALSE; + } +#endif + + if (domain_len < 1) { + php_intl_bad_args("empty domain name", mode TSRMLS_CC); + RETURN_FALSE; + } + if (domain_len > INT32_MAX - 1) { + php_intl_bad_args("domain name too large", mode TSRMLS_CC); + RETURN_FALSE; + } + /* don't check options; it wasn't checked before */ + + if (idna_info != NULL) { + if (variant == INTL_IDN_VARIANT_2003) { + php_error_docref0(NULL TSRMLS_CC, E_NOTICE, + "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only " + "takes 3 - extra argument ignored"); + } else { + zval_dtor(idna_info); + array_init(idna_info); + } + } + + if (variant == INTL_IDN_VARIANT_2003) { + php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, + domain, domain_len, (uint32_t)option, mode); + } +#ifdef HAVE_46_API + else { + php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, domain_len, + (uint32_t)option, mode, idna_info); + } +#endif +} + +/* {{{ proto int idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]]) Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */ PHP_FUNCTION(idn_to_ascii) { - php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII); + php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII); } /* }}} */ -/* {{{ proto int idn_to_utf8(string domain[, int options]) +/* {{{ proto int idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]]) Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */ PHP_FUNCTION(idn_to_utf8) { - php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8); + php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8); } /* }}} */