Annotation of embedaddon/php/ext/mbstring/oniguruma/regerror.c, revision 1.1.1.1

1.1       misho       1: /**********************************************************************
                      2:   regerror.c -  Oniguruma (regular expression library)
                      3: **********************************************************************/
                      4: /*-
                      5:  * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
                      6:  * All rights reserved.
                      7:  *
                      8:  * Redistribution and use in source and binary forms, with or without
                      9:  * modification, are permitted provided that the following conditions
                     10:  * are met:
                     11:  * 1. Redistributions of source code must retain the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer.
                     13:  * 2. Redistributions in binary form must reproduce the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer in the
                     15:  *    documentation and/or other materials provided with the distribution.
                     16:  *
                     17:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
                     18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     20:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     27:  * SUCH DAMAGE.
                     28:  */
                     29: 
                     30: #include "regint.h"
                     31: #include <stdio.h> /* for vsnprintf() */
                     32: 
                     33: #ifdef HAVE_STDARG_PROTOTYPES
                     34: #include <stdarg.h>
                     35: #define va_init_list(a,b) va_start(a,b)
                     36: #else
                     37: #include <varargs.h>
                     38: #define va_init_list(a,b) va_start(a)
                     39: #endif
                     40: 
                     41: extern UChar*
                     42: onig_error_code_to_format(int code)
                     43: {
                     44:   char *p;
                     45: 
                     46:   if (code >= 0) return (UChar* )0;
                     47: 
                     48:   switch (code) {
                     49:   case ONIG_MISMATCH:
                     50:     p = "mismatch"; break;
                     51:   case ONIG_NO_SUPPORT_CONFIG:
                     52:     p = "no support in this configuration"; break;
                     53:   case ONIGERR_MEMORY:
                     54:     p = "fail to memory allocation"; break;
                     55:   case ONIGERR_MATCH_STACK_LIMIT_OVER:
                     56:     p = "match-stack limit over"; break;
                     57:   case ONIGERR_TYPE_BUG:
                     58:     p = "undefined type (bug)"; break;
                     59:   case ONIGERR_PARSER_BUG:
                     60:     p = "internal parser error (bug)"; break;
                     61:   case ONIGERR_STACK_BUG:
                     62:     p = "stack error (bug)"; break;
                     63:   case ONIGERR_UNDEFINED_BYTECODE:
                     64:     p = "undefined bytecode (bug)"; break;
                     65:   case ONIGERR_UNEXPECTED_BYTECODE:
                     66:     p = "unexpected bytecode (bug)"; break;
                     67:   case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
                     68:     p = "default multibyte-encoding is not setted"; break;
                     69:   case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
                     70:     p = "can't convert to wide-char on specified multibyte-encoding"; break;
                     71:   case ONIGERR_INVALID_ARGUMENT:
                     72:     p = "invalid argument"; break;
                     73:   case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
                     74:     p = "end pattern at left brace"; break;
                     75:   case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
                     76:     p = "end pattern at left bracket"; break;
                     77:   case ONIGERR_EMPTY_CHAR_CLASS:
                     78:     p = "empty char-class"; break;
                     79:   case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
                     80:     p = "premature end of char-class"; break;
                     81:   case ONIGERR_END_PATTERN_AT_ESCAPE:
                     82:     p = "end pattern at escape"; break;
                     83:   case ONIGERR_END_PATTERN_AT_META:
                     84:     p = "end pattern at meta"; break;
                     85:   case ONIGERR_END_PATTERN_AT_CONTROL:
                     86:     p = "end pattern at control"; break;
                     87:   case ONIGERR_META_CODE_SYNTAX:
                     88:     p = "illegal meta-code syntax"; break;
                     89:   case ONIGERR_CONTROL_CODE_SYNTAX:
                     90:     p = "illegal control-code syntax"; break;
                     91:   case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
                     92:     p = "char-class value at end of range"; break;
                     93:   case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
                     94:     p = "char-class value at start of range"; break;
                     95:   case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
                     96:     p = "unmatched range specifier in char-class"; break;
                     97:   case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
                     98:     p = "target of repeat operator is not specified"; break;
                     99:   case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
                    100:     p = "target of repeat operator is invalid"; break;
                    101:   case ONIGERR_NESTED_REPEAT_OPERATOR:
                    102:     p = "nested repeat operator"; break;
                    103:   case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
                    104:     p = "unmatched close parenthesis"; break;
                    105:   case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
                    106:     p = "end pattern with unmatched parenthesis"; break;
                    107:   case ONIGERR_END_PATTERN_IN_GROUP:
                    108:     p = "end pattern in group"; break;
                    109:   case ONIGERR_UNDEFINED_GROUP_OPTION:
                    110:     p = "undefined group option"; break;
                    111:   case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
                    112:     p = "invalid POSIX bracket type"; break;
                    113:   case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
                    114:     p = "invalid pattern in look-behind"; break;
                    115:   case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
                    116:     p = "invalid repeat range {lower,upper}"; break;
                    117:   case ONIGERR_TOO_BIG_NUMBER:
                    118:     p = "too big number"; break;
                    119:   case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
                    120:     p = "too big number for repeat range"; break;
                    121:   case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
                    122:     p = "upper is smaller than lower in repeat range"; break;
                    123:   case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
                    124:     p = "empty range in char class"; break;
                    125:   case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
                    126:     p = "mismatch multibyte code length in char-class range"; break;
                    127:   case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
                    128:     p = "too many multibyte code ranges are specified"; break;
                    129:   case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
                    130:     p = "too short multibyte code string"; break;
                    131:   case ONIGERR_TOO_BIG_BACKREF_NUMBER:
                    132:     p = "too big backref number"; break;
                    133:   case ONIGERR_INVALID_BACKREF:
                    134: #ifdef USE_NAMED_GROUP
                    135:     p = "invalid backref number/name"; break;
                    136: #else
                    137:     p = "invalid backref number"; break;
                    138: #endif
                    139:   case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
                    140:     p = "numbered backref/call is not allowed. (use name)"; break;
                    141:   case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
                    142:     p = "too big wide-char value"; break;
                    143:   case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
                    144:     p = "too long wide-char value"; break;
                    145:   case ONIGERR_INVALID_WIDE_CHAR_VALUE:
                    146:     p = "invalid wide-char value"; break;
                    147:   case ONIGERR_EMPTY_GROUP_NAME:
                    148:     p = "group name is empty"; break;
                    149:   case ONIGERR_INVALID_GROUP_NAME:
                    150:     p = "invalid group name <%n>"; break;
                    151:   case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
                    152: #ifdef USE_NAMED_GROUP
                    153:     p = "invalid char in group name <%n>"; break;
                    154: #else
                    155:     p = "invalid char in group number <%n>"; break;
                    156: #endif
                    157:   case ONIGERR_UNDEFINED_NAME_REFERENCE:
                    158:     p = "undefined name <%n> reference"; break;
                    159:   case ONIGERR_UNDEFINED_GROUP_REFERENCE:
                    160:     p = "undefined group <%n> reference"; break;
                    161:   case ONIGERR_MULTIPLEX_DEFINED_NAME:
                    162:     p = "multiplex defined name <%n>"; break;
                    163:   case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
                    164:     p = "multiplex definition name <%n> call"; break;
                    165:   case ONIGERR_NEVER_ENDING_RECURSION:
                    166:     p = "never ending recursion"; break;
                    167:   case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
                    168:     p = "group number is too big for capture history"; break;
                    169:   case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
                    170:     p = "invalid character property name {%n}"; break;
                    171:   case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
                    172:     p = "not supported encoding combination"; break;
                    173:   case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
                    174:     p = "invalid combination of options"; break;
                    175:   case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
                    176:     p = "over thread pass limit count"; break;
                    177: 
                    178:   default:
                    179:     p = "undefined error code"; break;
                    180:   }
                    181: 
                    182:   return (UChar* )p;
                    183: }
                    184: 
                    185: 
                    186: static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
                    187:                    UChar buf[], int buf_size, int *is_over)
                    188: {
                    189:   int len;
                    190:   UChar *p;
                    191:   OnigCodePoint code;
                    192: 
                    193:   if (ONIGENC_MBC_MINLEN(enc) > 1) {
                    194:     p = s;
                    195:     len = 0;
                    196:     while (p < end) {
                    197:       code = ONIGENC_MBC_TO_CODE(enc, p, end);
                    198:       if (code >= 0x80) {
                    199:        if (len + 5 <= buf_size) {
                    200:          sprintf((char* )(&(buf[len])), "\\%03o",
                    201:                  (unsigned int)(code & 0377));
                    202:          len += 5;
                    203:        }
                    204:        else {
                    205:          break;
                    206:        }
                    207:       }
                    208:       else {
                    209:        buf[len++] = (UChar )code;
                    210:       }
                    211: 
                    212:       p += enc_len(enc, p);
                    213:       if (len >= buf_size) break;
                    214:     }
                    215: 
                    216:     *is_over = ((p < end) ? 1 : 0);
                    217:   }
                    218:   else {
                    219:     len = MIN((end - s), buf_size);
                    220:     xmemcpy(buf, s, (size_t )len);
                    221:     *is_over = ((buf_size < (end - s)) ? 1 : 0);
                    222:   }
                    223: 
                    224:   return len;
                    225: }
                    226: 
                    227: 
                    228: /* for ONIG_MAX_ERROR_MESSAGE_LEN */
                    229: #define MAX_ERROR_PAR_LEN   30
                    230: 
                    231: extern int
                    232: #ifdef HAVE_STDARG_PROTOTYPES
                    233: onig_error_code_to_str(UChar* s, int code, ...)
                    234: #else
                    235: onig_error_code_to_str(s, code, va_alist)
                    236:   UChar* s;
                    237:   int code;
                    238:   va_dcl 
                    239: #endif
                    240: {
                    241:   UChar *p, *q;
                    242:   OnigErrorInfo* einfo;
                    243:   int len, is_over;
                    244:   UChar parbuf[MAX_ERROR_PAR_LEN];
                    245:   va_list vargs;
                    246: 
                    247:   va_init_list(vargs, code);
                    248: 
                    249:   switch (code) {
                    250:   case ONIGERR_UNDEFINED_NAME_REFERENCE:
                    251:   case ONIGERR_UNDEFINED_GROUP_REFERENCE:
                    252:   case ONIGERR_MULTIPLEX_DEFINED_NAME:
                    253:   case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
                    254:   case ONIGERR_INVALID_GROUP_NAME:
                    255:   case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
                    256:   case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
                    257:     einfo = va_arg(vargs, OnigErrorInfo*);
                    258:     len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
                    259:                   parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
                    260:     q = onig_error_code_to_format(code);
                    261:     p = s;
                    262:     while (*q != '\0') {
                    263:       if (*q == '%') {
                    264:        q++;
                    265:        if (*q == 'n') { /* '%n': name */
                    266:          xmemcpy(p, parbuf, len);
                    267:          p += len;
                    268:          if (is_over != 0) {
                    269:            xmemcpy(p, "...", 3);
                    270:            p += 3;
                    271:          }
                    272:          q++;
                    273:        }
                    274:        else
                    275:          goto normal_char;
                    276:       }
                    277:       else {
                    278:       normal_char:
                    279:        *p++ = *q++;
                    280:       }
                    281:     }
                    282:     *p = '\0';
                    283:     len = p - s;
                    284:     break;
                    285: 
                    286:   default:
                    287:     q = onig_error_code_to_format(code);
                    288:     len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
                    289:     xmemcpy(s, q, len);
                    290:     s[len] = '\0';
                    291:     break;
                    292:   }
                    293: 
                    294:   va_end(vargs);
                    295:   return len;
                    296: }
                    297: 
                    298: 
                    299: void
                    300: #ifdef HAVE_STDARG_PROTOTYPES
                    301: onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
                    302:                            UChar* pat, UChar* pat_end, const UChar *fmt, ...)
                    303: #else
                    304: onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
                    305:     UChar buf[];
                    306:     int bufsize;
                    307:     OnigEncoding enc;
                    308:     UChar* pat;
                    309:     UChar* pat_end;
                    310:     const UChar *fmt;
                    311:     va_dcl
                    312: #endif
                    313: {
                    314:   int n, need, len;
                    315:   UChar *p, *s, *bp;
                    316:   UChar bs[6];
                    317:   va_list args;
                    318: 
                    319:   va_init_list(args, fmt);
                    320:   n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args);
                    321:   va_end(args);
                    322: 
                    323:   need = (pat_end - pat) * 4 + 4;
                    324: 
                    325:   if (n + need < bufsize) {
                    326:     strcat((char* )buf, ": /");
                    327:     s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
                    328: 
                    329:     p = pat;
                    330:     while (p < pat_end) {
                    331:       if (*p == MC_ESC(enc)) {
                    332:        *s++ = *p++;
                    333:        len = enc_len(enc, p);
                    334:        while (len-- > 0) *s++ = *p++;
                    335:       }
                    336:       else if (*p == '/') {
                    337:        *s++ = (unsigned char )MC_ESC(enc);
                    338:        *s++ = *p++;
                    339:       }
                    340:       else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
                    341:         len = enc_len(enc, p);
                    342:         if (ONIGENC_MBC_MINLEN(enc) == 1) {
                    343:           while (len-- > 0) *s++ = *p++;
                    344:         }
                    345:         else { /* for UTF16 */
                    346:           int blen;
                    347: 
                    348:           while (len-- > 0) {
                    349:             sprintf((char* )bs, "\\%03o", *p++ & 0377);
                    350:             blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
                    351:             bp = bs;
                    352:             while (blen-- > 0) *s++ = *bp++;
                    353:           }
                    354:         }
                    355:       }
                    356:       else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
                    357:               !ONIGENC_IS_CODE_SPACE(enc, *p)) {
                    358:        sprintf((char* )bs, "\\%03o", *p++ & 0377);
                    359:        len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
                    360:         bp = bs;
                    361:        while (len-- > 0) *s++ = *bp++;
                    362:       }
                    363:       else {
                    364:        *s++ = *p++;
                    365:       }
                    366:     }
                    367: 
                    368:     *s++ = '/';
                    369:     *s   = '\0';
                    370:   }
                    371: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>