Annotation of embedaddon/php/ext/mbstring/oniguruma/enc/euc_tw.c, revision 1.1
1.1 ! misho 1: /**********************************************************************
! 2: euc_tw.c - Oniguruma (regular expression library)
! 3: **********************************************************************/
! 4: /*-
! 5: * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: *
! 17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 18: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 20: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 21: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 22: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 23: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 24: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 25: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 26: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 27: * SUCH DAMAGE.
! 28: */
! 29:
! 30: #include "regenc.h"
! 31:
! 32: static const int EncLen_EUCTW[] = {
! 33: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 34: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 35: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 36: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 37: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 38: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 39: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 40: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 41: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
! 42: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! 43: 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
! 44: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
! 45: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
! 46: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
! 47: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
! 48: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
! 49: };
! 50:
! 51: static int
! 52: euctw_mbc_enc_len(const UChar* p)
! 53: {
! 54: return EncLen_EUCTW[*p];
! 55: }
! 56:
! 57: static OnigCodePoint
! 58: euctw_mbc_to_code(const UChar* p, const UChar* end)
! 59: {
! 60: return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
! 61: }
! 62:
! 63: static int
! 64: euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
! 65: {
! 66: return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf);
! 67: }
! 68:
! 69: static int
! 70: euctw_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
! 71: UChar* lower)
! 72: {
! 73: return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_TW, flag,
! 74: pp, end, lower);
! 75: }
! 76:
! 77: static int
! 78: euctw_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
! 79: {
! 80: return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_TW, flag, pp, end);
! 81: }
! 82:
! 83: static int
! 84: euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
! 85: {
! 86: return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
! 87: }
! 88:
! 89: #define euctw_islead(c) (((c) < 0xa1 && (c) != 0x8e) || (c) == 0xff)
! 90:
! 91: static UChar*
! 92: euctw_left_adjust_char_head(const UChar* start, const UChar* s)
! 93: {
! 94: /* Assumed in this encoding,
! 95: mb-trail bytes don't mix with single bytes.
! 96: */
! 97: const UChar *p;
! 98: int len;
! 99:
! 100: if (s <= start) return (UChar* )s;
! 101: p = s;
! 102:
! 103: while (!euctw_islead(*p) && p > start) p--;
! 104: len = enc_len(ONIG_ENCODING_EUC_TW, p);
! 105: if (p + len > s) return (UChar* )p;
! 106: p += len;
! 107: return (UChar* )(p + ((s - p) & ~1));
! 108: }
! 109:
! 110: static int
! 111: euctw_is_allowed_reverse_match(const UChar* s, const UChar* end)
! 112: {
! 113: const UChar c = *s;
! 114: if (c <= 0x7e) return TRUE;
! 115: else return FALSE;
! 116: }
! 117:
! 118: OnigEncodingType OnigEncodingEUC_TW = {
! 119: euctw_mbc_enc_len,
! 120: "EUC-TW", /* name */
! 121: 4, /* max enc length */
! 122: 1, /* min enc length */
! 123: ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
! 124: {
! 125: (OnigCodePoint )'\\' /* esc */
! 126: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
! 127: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
! 128: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
! 129: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
! 130: , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
! 131: },
! 132: onigenc_is_mbc_newline_0x0a,
! 133: euctw_mbc_to_code,
! 134: onigenc_mb4_code_to_mbclen,
! 135: euctw_code_to_mbc,
! 136: euctw_mbc_to_normalize,
! 137: euctw_is_mbc_ambiguous,
! 138: onigenc_ascii_get_all_pair_ambig_codes,
! 139: onigenc_nothing_get_all_comp_ambig_codes,
! 140: euctw_is_code_ctype,
! 141: onigenc_not_support_get_ctype_code_range,
! 142: euctw_left_adjust_char_head,
! 143: euctw_is_allowed_reverse_match
! 144: };
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>