Annotation of embedaddon/php/ext/mbstring/oniguruma/enc/iso8859_1.c, revision 1.1.1.1

1.1       misho       1: /**********************************************************************
                      2:   iso8859_1.c -  Oniguruma (regular expression library)
                      3: **********************************************************************/
                      4: /*-
                      5:  * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29: 
                     30: #include "regenc.h"
                     31: 
                     32: #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
                     33:   ((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
                     34: 
                     35: static const unsigned short EncISO_8859_1_CtypeTable[256] = {
                     36:   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
                     37:   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
                     38:   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
                     39:   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
                     40:   0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
                     41:   0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
                     42:   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
                     43:   0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
                     44:   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
                     45:   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
                     46:   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
                     47:   0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
                     48:   0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
                     49:   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
                     50:   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
                     51:   0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
                     52:   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
                     53:   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
                     54:   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
                     55:   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
                     56:   0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
                     57:   0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
                     58:   0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
                     59:   0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
                     60:   0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
                     61:   0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
                     62:   0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
                     63:   0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
                     64:   0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
                     65:   0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
                     66:   0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
                     67:   0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
                     68: };
                     69: 
                     70: static int
                     71: iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
                     72: {
                     73:   const UChar* p = *pp;
                     74: 
                     75:   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
                     76:        ONIGENC_IS_MBC_ASCII(p)) ||
                     77:       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
                     78:        !ONIGENC_IS_MBC_ASCII(p))) {
                     79:     *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
                     80:   }
                     81:   else {
                     82:     *lower = *p;
                     83:   }
                     84:   (*pp)++;
                     85:   return 1; /* return byte length of converted char to lower */
                     86: }
                     87: 
                     88: static int
                     89: iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
                     90:                            const UChar** pp, const UChar* end)
                     91: {
                     92:   const UChar* p = *pp;
                     93: 
                     94:   (*pp)++;
                     95:   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
                     96:        ONIGENC_IS_MBC_ASCII(p)) ||
                     97:       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
                     98:        !ONIGENC_IS_MBC_ASCII(p))) {
                     99:     int v = (EncISO_8859_1_CtypeTable[*p] &
                    100:              (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
                    101: 
                    102:     if ((v | ONIGENC_CTYPE_LOWER) != 0) {
                    103:       /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
                    104:       if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
                    105:         return FALSE;
                    106:       else
                    107:         return TRUE;
                    108:     }
                    109: 
                    110:     return (v != 0 ? TRUE : FALSE);
                    111:   }
                    112:   return FALSE;
                    113: }
                    114: 
                    115: static int
                    116: iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype)
                    117: {
                    118:   if (code < 256)
                    119:     return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
                    120:   else
                    121:     return FALSE;
                    122: }
                    123: 
                    124: OnigEncodingType OnigEncodingISO_8859_1 = {
                    125:   onigenc_single_byte_mbc_enc_len,
                    126:   "ISO-8859-1",  /* name */
                    127:   1,             /* max enc length */
                    128:   1,             /* min enc length */
                    129:   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
                    130:    ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
                    131:   {
                    132:       (OnigCodePoint )'\\'                       /* esc */
                    133:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
                    134:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
                    135:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
                    136:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
                    137:     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
                    138:   },
                    139:   onigenc_is_mbc_newline_0x0a,
                    140:   onigenc_single_byte_mbc_to_code,
                    141:   onigenc_single_byte_code_to_mbclen,
                    142:   onigenc_single_byte_code_to_mbc,
                    143:   iso_8859_1_mbc_to_normalize,
                    144:   iso_8859_1_is_mbc_ambiguous,
                    145:   onigenc_iso_8859_1_get_all_pair_ambig_codes,
                    146:   onigenc_ess_tsett_get_all_comp_ambig_codes,
                    147:   iso_8859_1_is_code_ctype,
                    148:   onigenc_not_support_get_ctype_code_range,
                    149:   onigenc_single_byte_left_adjust_char_head,
                    150:   onigenc_always_true_is_allowed_reverse_match
                    151: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>