Annotation of embedaddon/php/ext/mbstring/oniguruma/enc/iso8859_1.c, revision 1.1
1.1 ! misho 1: /**********************************************************************
! 2: iso8859_1.c - Oniguruma (regular expression library)
! 3: **********************************************************************/
! 4: /*-
! 5: * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: *
! 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 27: * SUCH DAMAGE.
! 28: */
! 29:
! 30: #include "regenc.h"
! 31:
! 32: #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
! 33: ((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
! 34:
! 35: static const unsigned short EncISO_8859_1_CtypeTable[256] = {
! 36: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
! 37: 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
! 38: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
! 39: 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
! 40: 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
! 41: 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
! 42: 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
! 43: 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
! 44: 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
! 45: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
! 46: 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
! 47: 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
! 48: 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
! 49: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
! 50: 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
! 51: 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
! 52: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
! 53: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
! 54: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
! 55: 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
! 56: 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
! 57: 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
! 58: 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
! 59: 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
! 60: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
! 61: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
! 62: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
! 63: 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
! 64: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
! 65: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
! 66: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
! 67: 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
! 68: };
! 69:
! 70: static int
! 71: iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
! 72: {
! 73: const UChar* p = *pp;
! 74:
! 75: if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
! 76: ONIGENC_IS_MBC_ASCII(p)) ||
! 77: ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
! 78: !ONIGENC_IS_MBC_ASCII(p))) {
! 79: *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
! 80: }
! 81: else {
! 82: *lower = *p;
! 83: }
! 84: (*pp)++;
! 85: return 1; /* return byte length of converted char to lower */
! 86: }
! 87:
! 88: static int
! 89: iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
! 90: const UChar** pp, const UChar* end)
! 91: {
! 92: const UChar* p = *pp;
! 93:
! 94: (*pp)++;
! 95: if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
! 96: ONIGENC_IS_MBC_ASCII(p)) ||
! 97: ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
! 98: !ONIGENC_IS_MBC_ASCII(p))) {
! 99: int v = (EncISO_8859_1_CtypeTable[*p] &
! 100: (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
! 101:
! 102: if ((v | ONIGENC_CTYPE_LOWER) != 0) {
! 103: /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
! 104: if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
! 105: return FALSE;
! 106: else
! 107: return TRUE;
! 108: }
! 109:
! 110: return (v != 0 ? TRUE : FALSE);
! 111: }
! 112: return FALSE;
! 113: }
! 114:
! 115: static int
! 116: iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype)
! 117: {
! 118: if (code < 256)
! 119: return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
! 120: else
! 121: return FALSE;
! 122: }
! 123:
! 124: OnigEncodingType OnigEncodingISO_8859_1 = {
! 125: onigenc_single_byte_mbc_enc_len,
! 126: "ISO-8859-1", /* name */
! 127: 1, /* max enc length */
! 128: 1, /* min enc length */
! 129: (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
! 130: ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
! 131: {
! 132: (OnigCodePoint )'\\' /* esc */
! 133: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
! 134: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
! 135: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
! 136: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
! 137: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
! 138: },
! 139: onigenc_is_mbc_newline_0x0a,
! 140: onigenc_single_byte_mbc_to_code,
! 141: onigenc_single_byte_code_to_mbclen,
! 142: onigenc_single_byte_code_to_mbc,
! 143: iso_8859_1_mbc_to_normalize,
! 144: iso_8859_1_is_mbc_ambiguous,
! 145: onigenc_iso_8859_1_get_all_pair_ambig_codes,
! 146: onigenc_ess_tsett_get_all_comp_ambig_codes,
! 147: iso_8859_1_is_code_ctype,
! 148: onigenc_not_support_get_ctype_code_range,
! 149: onigenc_single_byte_left_adjust_char_head,
! 150: onigenc_always_true_is_allowed_reverse_match
! 151: };
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>