Annotation of embedaddon/php/ext/intl/idn/idn.c, revision 1.1.1.2

1.1       misho       1: /*
                      2:    +----------------------------------------------------------------------+
                      3:    | PHP Version 5                                                        |
                      4:    +----------------------------------------------------------------------+
                      5:    | Copyright (c) 2009 The PHP Group                                     |
                      6:    +----------------------------------------------------------------------+
                      7:    | This source file is subject to version 3.01 of the PHP license,      |
                      8:    | that is bundled with this package in the file LICENSE, and is        |
                      9:    | available through the world-wide-web at the following url:           |
                     10:    | http://www.php.net/license/3_01.txt                                  |
                     11:    | If you did not receive a copy of the PHP license and are unable to   |
                     12:    | obtain it through the world-wide-web, please send a note to          |
                     13:    | license@php.net so we can mail you a copy immediately.               |
                     14:    +----------------------------------------------------------------------+
                     15:    | Author: Pierre A. Joye <pierre@php.net>                              |
1.1.1.2 ! misho      16:    |         Gustavo Lopes  <cataphract@php.net>                          |
1.1       misho      17:    +----------------------------------------------------------------------+
                     18:  */
1.1.1.2 ! misho      19: /* $Id$ */
1.1       misho      20: 
                     21: /* {{{ includes */
                     22: #ifdef HAVE_CONFIG_H
                     23: #include "config.h"
                     24: #endif
                     25: 
                     26: #include <php.h>
                     27: 
                     28: #include <unicode/uidna.h>
                     29: #include <unicode/ustring.h>
                     30: #include "ext/standard/php_string.h"
                     31: 
                     32: #include "intl_error.h"
1.1.1.2 ! misho      33: #include "intl_convert.h"
1.1       misho      34: /* }}} */
                     35: 
1.1.1.2 ! misho      36: #ifdef UIDNA_INFO_INITIALIZER
        !            37: #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
        !            38: #endif
        !            39: 
        !            40: enum {
        !            41:        INTL_IDN_VARIANT_2003 = 0,
        !            42:        INTL_IDN_VARIANT_UTS46
        !            43: };
        !            44: 
1.1       misho      45: /* {{{ grapheme_register_constants
                     46:  * Register API constants
                     47:  */
                     48: void idn_register_constants( INIT_FUNC_ARGS )
                     49: {
1.1.1.2 ! misho      50:        /* OPTIONS */
        !            51: 
1.1       misho      52:        /* Option to prohibit processing of unassigned codepoints in the input and
                     53:           do not check if the input conforms to STD-3 ASCII rules. */
                     54:        REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
                     55: 
                     56:        /* Option to allow processing of unassigned codepoints in the input */
                     57:        REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
                     58: 
                     59:        /* Option to check if input conforms to STD-3 ASCII rules */
                     60:        REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
1.1.1.2 ! misho      61: 
        !            62: #ifdef HAVE_46_API
        !            63: 
        !            64:        /* Option to check for whether the input conforms to the BiDi rules.
        !            65:         * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
        !            66:        REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
        !            67: 
        !            68:        /* Option to check for whether the input conforms to the CONTEXTJ rules.
        !            69:         * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
        !            70:        REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
        !            71: 
        !            72:        /* Option for nontransitional processing in ToASCII().
        !            73:         * By default, ToASCII() uses transitional processing.
        !            74:         * Ignored by the IDNA2003 implementation. */
        !            75:        REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
        !            76: 
        !            77:        /* Option for nontransitional processing in ToUnicode().
        !            78:         * By default, ToUnicode() uses transitional processing.
        !            79:         * Ignored by the IDNA2003 implementation. */
        !            80:        REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
        !            81: #endif
        !            82: 
        !            83:        /* VARIANTS */
        !            84:        REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
        !            85: #ifdef HAVE_46_API
        !            86:        REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
        !            87: #endif
        !            88: 
        !            89: #ifdef HAVE_46_API
        !            90:        /* PINFO ERROR CODES */
        !            91:        REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
        !            92:        REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
        !            93:        REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
        !            94:        REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
        !            95:        REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
        !            96:        REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
        !            97:        REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
        !            98:        REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
        !            99:        REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
        !           100:        REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
        !           101:        REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
        !           102:        REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
        !           103:        REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
        !           104: #endif
1.1       misho     105: }
                    106: /* }}} */
                    107: 
                    108: enum {
                    109:        INTL_IDN_TO_ASCII = 0,
                    110:        INTL_IDN_TO_UTF8
                    111: };
                    112: 
1.1.1.2 ! misho     113: /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
        !           114: static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode TSRMLS_DC)
        !           115: {
        !           116:        intl_error_set_code(NULL, err TSRMLS_CC);
        !           117:        if (U_FAILURE(err)) {
        !           118:                char *buff;
        !           119:                spprintf(&buff, 0, "%s: %s",
        !           120:                        mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8",
        !           121:                        msg);
        !           122:                intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC);
        !           123:                efree(buff);
        !           124:                return FAILURE;
        !           125:        }
        !           126: 
        !           127:        return SUCCESS;
        !           128: }
        !           129: 
        !           130: static inline void php_intl_bad_args(const char *msg, int mode TSRMLS_DC)
        !           131: {
        !           132:        php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode TSRMLS_CC);
        !           133: }
        !           134: 
        !           135: #ifdef HAVE_46_API
        !           136: static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
        !           137:                const char *domain, int domain_len, uint32_t option, int mode, zval *idna_info)
        !           138: {
        !           139:        UErrorCode        status = U_ZERO_ERROR;
        !           140:        UIDNA             *uts46;
        !           141:        int32_t           len;
        !           142:        int32_t           buffer_capac = 255; /* no domain name may exceed this */
        !           143:        char              *buffer = emalloc(buffer_capac);
        !           144:        UIDNAInfo         info = UIDNA_INFO_INITIALIZER;
        !           145:        int                       buffer_used = 0;
        !           146:        
        !           147:        uts46 = uidna_openUTS46(option, &status);
        !           148:        if (php_intl_idn_check_status(status, "failed to open UIDNA instance",
        !           149:                        mode TSRMLS_CC) == FAILURE) {
        !           150:                efree(buffer);
        !           151:                RETURN_FALSE;
        !           152:        }
        !           153: 
        !           154:        if (mode == INTL_IDN_TO_ASCII) {
        !           155:                len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len,
        !           156:                                buffer, buffer_capac, &info, &status);
        !           157:        } else {
        !           158:                len = uidna_nameToUnicodeUTF8(uts46, domain, (int32_t)domain_len,
        !           159:                                buffer, buffer_capac, &info, &status);
        !           160:        }
        !           161:        if (php_intl_idn_check_status(status, "failed to convert name",
        !           162:                        mode TSRMLS_CC) == FAILURE) {
        !           163:                uidna_close(uts46);
        !           164:                efree(buffer);
        !           165:                RETURN_FALSE;
        !           166:        }
        !           167:        if (len >= 255) {
        !           168:                php_error_docref(NULL TSRMLS_CC, E_ERROR, "ICU returned an unexpected length");
        !           169:        }
        !           170: 
        !           171:        buffer[len] = '\0';
        !           172: 
        !           173:        if (info.errors == 0) {
        !           174:                RETVAL_STRINGL(buffer, len, 0);
        !           175:                buffer_used = 1;
        !           176:        } else {
        !           177:                RETVAL_FALSE;
        !           178:        }
        !           179: 
        !           180:        if (idna_info) {
        !           181:                if (buffer_used) { /* used in return_value then */
        !           182:                        zval_addref_p(return_value);
        !           183:                        add_assoc_zval_ex(idna_info, "result", sizeof("result"), return_value);
        !           184:                } else {
        !           185:                        zval *zv;
        !           186:                        ALLOC_INIT_ZVAL(zv);
        !           187:                        ZVAL_STRINGL(zv, buffer, len, 0);
        !           188:                        buffer_used = 1;
        !           189:                        add_assoc_zval_ex(idna_info, "result", sizeof("result"), zv);
        !           190:                }
        !           191:                add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
        !           192:                                sizeof("isTransitionalDifferent"), info.isTransitionalDifferent);
        !           193:                add_assoc_long_ex(idna_info, "errors", sizeof("errors"), (long)info.errors);
        !           194:        }
        !           195: 
        !           196:        if (!buffer_used) {
        !           197:                efree(buffer);
        !           198:        }
        !           199: 
        !           200:        uidna_close(uts46);
        !           201: }
        !           202: #endif
        !           203: 
        !           204: static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
        !           205:                const char *domain, int domain_len, uint32_t option, int mode)
1.1       misho     206: {
                    207:        UChar* ustring = NULL;
                    208:        int ustring_len = 0;
                    209:        UErrorCode status;
                    210:        char     *converted_utf8;
                    211:        int32_t   converted_utf8_len;
                    212:        UChar     converted[MAXPATHLEN];
                    213:        int32_t   converted_ret_len;
                    214: 
                    215:        /* convert the string to UTF-16. */
                    216:        status = U_ZERO_ERROR;
1.1.1.2 ! misho     217:        intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status);
1.1       misho     218: 
                    219:        if (U_FAILURE(status)) {
                    220:                intl_error_set_code(NULL, status TSRMLS_CC);
                    221: 
                    222:                /* Set error messages. */
                    223:                intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
                    224:                if (ustring) {
                    225:                        efree(ustring);
                    226:                }
                    227:                RETURN_FALSE;
                    228:        } else {
                    229:                UParseError parse_error;
                    230: 
                    231:                status = U_ZERO_ERROR;
                    232:                if (mode == INTL_IDN_TO_ASCII) {
                    233:                        converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
                    234:                } else {
                    235:                        converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
                    236:                }
                    237:                efree(ustring);
                    238: 
                    239:                if (U_FAILURE(status)) {
                    240:                        intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
                    241:                        RETURN_FALSE;
                    242:                }
                    243: 
                    244:                status = U_ZERO_ERROR;
                    245:                intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);
                    246: 
                    247:                if (U_FAILURE(status)) {
                    248:                        /* Set global error code. */
                    249:                        intl_error_set_code(NULL, status TSRMLS_CC);
                    250: 
                    251:                        /* Set error messages. */
                    252:                        intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
                    253:                        efree(converted_utf8);
                    254:                        RETURN_FALSE;
                    255:                }
                    256:        }
                    257: 
                    258:        /* return the allocated string, not a duplicate */
                    259:        RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
                    260: }
                    261: 
1.1.1.2 ! misho     262: static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
        !           263: {
        !           264:        char *domain;
        !           265:        int domain_len;
        !           266:        long option = 0,
        !           267:                 variant = INTL_IDN_VARIANT_2003;
        !           268:        zval *idna_info = NULL;
        !           269: 
        !           270:        intl_error_reset(NULL TSRMLS_CC);
        !           271: 
        !           272:        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz",
        !           273:                        &domain, &domain_len, &option, &variant, &idna_info) == FAILURE) {
        !           274:                php_intl_bad_args("bad arguments", mode TSRMLS_CC);
        !           275:                RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
        !           276:        }
        !           277: 
        !           278: #ifdef HAVE_46_API
        !           279:        if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
        !           280:                php_intl_bad_args("invalid variant, must be one of {"
        !           281:                        "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode TSRMLS_CC);
        !           282:                RETURN_FALSE;
        !           283:        }
        !           284: #else
        !           285:        if (variant != INTL_IDN_VARIANT_2003) {
        !           286:                php_intl_bad_args("invalid variant, PHP was compiled against "
        !           287:                        "an old version of ICU and only supports INTL_IDN_VARIANT_2003",
        !           288:                        mode TSRMLS_CC);
        !           289:                RETURN_FALSE;
        !           290:        }
        !           291: #endif
        !           292: 
        !           293:        if (domain_len < 1) {
        !           294:                php_intl_bad_args("empty domain name", mode TSRMLS_CC);
        !           295:                RETURN_FALSE;
        !           296:        }
        !           297:        if (domain_len > INT32_MAX - 1) {
        !           298:                php_intl_bad_args("domain name too large", mode TSRMLS_CC);
        !           299:                RETURN_FALSE;
        !           300:        }
        !           301:        /* don't check options; it wasn't checked before */
        !           302: 
        !           303:        if (idna_info != NULL) {
        !           304:                if (variant == INTL_IDN_VARIANT_2003) {
        !           305:                        php_error_docref0(NULL TSRMLS_CC, E_NOTICE,
        !           306:                                "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
        !           307:                                "takes 3 - extra argument ignored");
        !           308:                } else {
        !           309:                        zval_dtor(idna_info);
        !           310:                        array_init(idna_info);
        !           311:                }
        !           312:        }
        !           313:        
        !           314:        if (variant == INTL_IDN_VARIANT_2003) {
        !           315:                php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU,
        !           316:                                domain, domain_len, (uint32_t)option, mode);
        !           317:        }
        !           318: #ifdef HAVE_46_API
        !           319:        else {
        !           320:                php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, domain_len,
        !           321:                                (uint32_t)option, mode, idna_info);
        !           322:        }
        !           323: #endif
        !           324: }
        !           325: 
        !           326: /* {{{ proto int idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]])
1.1       misho     327:    Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
                    328: PHP_FUNCTION(idn_to_ascii)
                    329: {
1.1.1.2 ! misho     330:        php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
1.1       misho     331: }
                    332: /* }}} */
                    333: 
                    334: 
1.1.1.2 ! misho     335: /* {{{ proto int idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]])
1.1       misho     336:    Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
                    337: PHP_FUNCTION(idn_to_utf8)
                    338: {
1.1.1.2 ! misho     339:        php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
1.1       misho     340: }
                    341: /* }}} */
                    342: 
                    343: 
                    344: /*
                    345:  * Local variables:
                    346:  * tab-width: 4
                    347:  * c-basic-offset: 4
                    348:  * End:
                    349:  * vim600: fdm=marker
                    350:  * vim: noet sw=4 ts=4
                    351:  */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>