embedaddon/smartmontools/regex/regex_internal.h - annotate

Return to regex_internal.h CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / smartmontools / regex
Annotation of embedaddon/smartmontools/regex/regex_internal.h, revision 1.1.1.1

1.1       misho       1: /* Extended regular expression matching and search library.
                      2:    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
                      3:    This file is part of the GNU C Library.
                      4:    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
                      5: 
                      6:    The GNU C Library is free software; you can redistribute it and/or
                      7:    modify it under the terms of the GNU Lesser General Public
                      8:    License as published by the Free Software Foundation; either
                      9:    version 2.1 of the License, or (at your option) any later version.
                     10: 
                     11:    The GNU C Library is distributed in the hope that it will be useful,
                     12:    but WITHOUT ANY WARRANTY; without even the implied warranty of
                     13:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     14:    Lesser General Public License for more details.
                     15: 
                     16:    You should have received a copy of the GNU Lesser General Public
                     17:    License along with the GNU C Library; if not, write to the Free
                     18:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
                     19:    02111-1307 USA.  */
                     20: 
                     21: #ifndef _REGEX_INTERNAL_H
                     22: #define _REGEX_INTERNAL_H 1
                     23: 
                     24: #ifdef HAVE_CONFIG_H
                     25: #include "config.h"
                     26: #endif
                     27: 
                     28: #include <assert.h>
                     29: #include <ctype.h>
                     30: #include <limits.h>
                     31: #include <stdio.h>
                     32: #include <stdlib.h>
                     33: #include <string.h>
                     34: 
                     35: #if defined HAVE_LOCALE_H || defined _LIBC
                     36: # include <locale.h>
                     37: #endif
                     38: #if defined HAVE_WCHAR_H || defined _LIBC
                     39: # include <wchar.h>
                     40: #endif /* HAVE_WCHAR_H || _LIBC */
                     41: #if defined HAVE_WCTYPE_H || defined _LIBC
                     42: # include <wctype.h>
                     43: #endif /* HAVE_WCTYPE_H || _LIBC */
                     44: 
                     45: /* In case that the system doesn't have isblank().  */
                     46: #if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
                     47: # define isblank(ch) ((ch) == ' ' || (ch) == '\t')
                     48: #endif
                     49: 
                     50: #ifdef _LIBC
                     51: # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
                     52: #  define _RE_DEFINE_LOCALE_FUNCTIONS 1
                     53: #   include <locale/localeinfo.h>
                     54: #   include <locale/elem-hash.h>
                     55: #   include <locale/coll-lookup.h>
                     56: # endif
                     57: #endif
                     58: 
                     59: /* This is for other GNU distributions with internationalized messages.  */
                     60: #if HAVE_LIBINTL_H || defined _LIBC
                     61: # include <libintl.h>
                     62: # ifdef _LIBC
                     63: #  undef gettext
                     64: #  define gettext(msgid) \
                     65:   INTUSE(__dcgettext) (INTUSE(_libc_intl_domainname), msgid, LC_MESSAGES)
                     66: # endif
                     67: #else
                     68: # define gettext(msgid) (msgid)
                     69: #endif
                     70: 
                     71: #ifndef gettext_noop
                     72: /* This define is so xgettext can find the internationalizable
                     73:    strings.  */
                     74: # define gettext_noop(String) String
                     75: #endif
                     76: 
                     77: #if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
                     78: # define RE_ENABLE_I18N
                     79: #endif
                     80: 
                     81: #if __GNUC__ >= 3
                     82: # define BE(expr, val) __builtin_expect (expr, val)
                     83: #else
                     84: # define BE(expr, val) (expr)
                     85: # define inline
                     86: #endif
                     87: 
                     88: /* Number of bits in a byte.  */
                     89: #define BYTE_BITS 8
                     90: /* Number of single byte character.  */
                     91: #define SBC_MAX 256
                     92: 
                     93: #define COLL_ELEM_LEN_MAX 8
                     94: 
                     95: /* The character which represents newline.  */
                     96: #define NEWLINE_CHAR '\n'
                     97: #define WIDE_NEWLINE_CHAR L'\n'
                     98: 
                     99: /* Rename to standard API for using out of glibc.  */
                    100: #ifndef _LIBC
                    101: # define __wctype wctype
                    102: # define __iswctype iswctype
                    103: # define __btowc btowc
                    104: # define __mempcpy mempcpy
                    105: # define __wcrtomb wcrtomb
                    106: # define attribute_hidden
                    107: #endif /* not _LIBC */
                    108: 
                    109: extern const char __re_error_msgid[] attribute_hidden;
                    110: extern const size_t __re_error_msgid_idx[] attribute_hidden;
                    111: 
                    112: /* Number of bits in an unsinged int.  */
                    113: #define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
                    114: /* Number of unsigned int in an bit_set.  */
                    115: #define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
                    116: typedef unsigned int bitset[BITSET_UINTS];
                    117: typedef unsigned int *re_bitset_ptr_t;
                    118: 
                    119: #define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
                    120: #define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
                    121: #define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
                    122: #define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
                    123: #define bitset_set_all(set) \
                    124:   memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
                    125: #define bitset_copy(dest,src) \
                    126:   memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
                    127: static inline void bitset_not (bitset set);
                    128: static inline void bitset_merge (bitset dest, const bitset src);
                    129: static inline void bitset_not_merge (bitset dest, const bitset src);
                    130: 
                    131: #define PREV_WORD_CONSTRAINT 0x0001
                    132: #define PREV_NOTWORD_CONSTRAINT 0x0002
                    133: #define NEXT_WORD_CONSTRAINT 0x0004
                    134: #define NEXT_NOTWORD_CONSTRAINT 0x0008
                    135: #define PREV_NEWLINE_CONSTRAINT 0x0010
                    136: #define NEXT_NEWLINE_CONSTRAINT 0x0020
                    137: #define PREV_BEGBUF_CONSTRAINT 0x0040
                    138: #define NEXT_ENDBUF_CONSTRAINT 0x0080
                    139: #define DUMMY_CONSTRAINT 0x0100
                    140: 
                    141: typedef enum
                    142: {
                    143:   INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
                    144:   WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
                    145:   WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
                    146:   LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
                    147:   LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
                    148:   BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
                    149:   BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
                    150:   WORD_DELIM = DUMMY_CONSTRAINT
                    151: } re_context_type;
                    152: 
                    153: typedef struct
                    154: {
                    155:   int alloc;
                    156:   int nelem;
                    157:   int *elems;
                    158: } re_node_set;
                    159: 
                    160: typedef enum
                    161: {
                    162:   NON_TYPE = 0,
                    163: 
                    164:   /* Token type, these are used only by token.  */
                    165:   OP_OPEN_BRACKET,
                    166:   OP_CLOSE_BRACKET,
                    167:   OP_CHARSET_RANGE,
                    168:   OP_OPEN_DUP_NUM,
                    169:   OP_CLOSE_DUP_NUM,
                    170:   OP_NON_MATCH_LIST,
                    171:   OP_OPEN_COLL_ELEM,
                    172:   OP_CLOSE_COLL_ELEM,
                    173:   OP_OPEN_EQUIV_CLASS,
                    174:   OP_CLOSE_EQUIV_CLASS,
                    175:   OP_OPEN_CHAR_CLASS,
                    176:   OP_CLOSE_CHAR_CLASS,
                    177:   OP_WORD,
                    178:   OP_NOTWORD,
                    179:   BACK_SLASH,
                    180: 
                    181:   /* Tree type, these are used only by tree. */
                    182:   CONCAT,
                    183:   ALT,
                    184:   SUBEXP,
                    185:   SIMPLE_BRACKET,
                    186: #ifdef RE_ENABLE_I18N
                    187:   COMPLEX_BRACKET,
                    188: #endif /* RE_ENABLE_I18N */
                    189: 
                    190:   /* Node type, These are used by token, node, tree.  */
                    191:   OP_OPEN_SUBEXP,
                    192:   OP_CLOSE_SUBEXP,
                    193:   OP_PERIOD,
                    194:   CHARACTER,
                    195:   END_OF_RE,
                    196:   OP_ALT,
                    197:   OP_DUP_ASTERISK,
                    198:   OP_DUP_PLUS,
                    199:   OP_DUP_QUESTION,
                    200:   OP_BACK_REF,
                    201:   ANCHOR,
                    202: 
                    203:   /* Dummy marker.  */
                    204:   END_OF_RE_TOKEN_T
                    205: } re_token_type_t;
                    206: 
                    207: #ifdef RE_ENABLE_I18N
                    208: typedef struct
                    209: {
                    210:   /* Multibyte characters.  */
                    211:   wchar_t *mbchars;
                    212: 
                    213:   /* Collating symbols.  */
                    214: # ifdef _LIBC
                    215:   int32_t *coll_syms;
                    216: # endif
                    217: 
                    218:   /* Equivalence classes. */
                    219: # ifdef _LIBC
                    220:   int32_t *equiv_classes;
                    221: # endif
                    222: 
                    223:   /* Range expressions. */
                    224: # ifdef _LIBC
                    225:   uint32_t *range_starts;
                    226:   uint32_t *range_ends;
                    227: # else /* not _LIBC */
                    228:   wchar_t *range_starts;
                    229:   wchar_t *range_ends;
                    230: # endif /* not _LIBC */
                    231: 
                    232:   /* Character classes. */
                    233:   wctype_t *char_classes;
                    234: 
                    235:   /* If this character set is the non-matching list.  */
                    236:   unsigned int non_match : 1;
                    237: 
                    238:   /* # of multibyte characters.  */
                    239:   int nmbchars;
                    240: 
                    241:   /* # of collating symbols.  */
                    242:   int ncoll_syms;
                    243: 
                    244:   /* # of equivalence classes. */
                    245:   int nequiv_classes;
                    246: 
                    247:   /* # of range expressions. */
                    248:   int nranges;
                    249: 
                    250:   /* # of character classes. */
                    251:   int nchar_classes;
                    252: } re_charset_t;
                    253: #endif /* RE_ENABLE_I18N */
                    254: 
                    255: typedef struct
                    256: {
                    257:   union
                    258:   {
                    259:     unsigned char c;           /* for CHARACTER */
                    260:     re_bitset_ptr_t sbcset;    /* for SIMPLE_BRACKET */
                    261: #ifdef RE_ENABLE_I18N
                    262:     re_charset_t *mbcset;      /* for COMPLEX_BRACKET */
                    263: #endif /* RE_ENABLE_I18N */
                    264:     int idx;                   /* for BACK_REF */
                    265:     re_context_type ctx_type;  /* for ANCHOR */
                    266:   } opr;
                    267: #if __GNUC__ >= 2
                    268:   re_token_type_t type : 8;
                    269: #else
                    270:   re_token_type_t type;
                    271: #endif
                    272:   unsigned int constraint : 10;        /* context constraint */
                    273:   unsigned int duplicated : 1;
                    274: #ifdef RE_ENABLE_I18N
                    275:   unsigned int mb_partial : 1;
                    276: #endif
                    277: } re_token_t;
                    278: 
                    279: #define IS_EPSILON_NODE(type) \
                    280:   ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \
                    281:    || (type) == OP_DUP_QUESTION || (type) == ANCHOR \
                    282:    || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP)
                    283: 
                    284: #define ACCEPT_MB_NODE(type) \
                    285:   ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD)
                    286: 
                    287: struct re_string_t
                    288: {
                    289:   /* Indicate the raw buffer which is the original string passed as an
                    290:      argument of regexec(), re_search(), etc..  */
                    291:   const unsigned char *raw_mbs;
                    292:   /* Store the multibyte string.  In case of "case insensitive mode" like
                    293:      REG_ICASE, upper cases of the string are stored, otherwise MBS points
                    294:      the same address that RAW_MBS points.  */
                    295:   unsigned char *mbs;
                    296:   /* Store the case sensitive multibyte string.  In case of
                    297:      "case insensitive mode", the original string are stored,
                    298:      otherwise MBS_CASE points the same address that MBS points.  */
                    299:   unsigned char *mbs_case;
                    300: #ifdef RE_ENABLE_I18N
                    301:   /* Store the wide character string which is corresponding to MBS.  */
                    302:   wint_t *wcs;
                    303:   mbstate_t cur_state;
                    304: #endif
                    305:   /* Index in RAW_MBS.  Each character mbs[i] corresponds to
                    306:      raw_mbs[raw_mbs_idx + i].  */
                    307:   int raw_mbs_idx;
                    308:   /* The length of the valid characters in the buffers.  */
                    309:   int valid_len;
                    310:   /* The length of the buffers MBS, MBS_CASE, and WCS.  */
                    311:   int bufs_len;
                    312:   /* The index in MBS, which is updated by re_string_fetch_byte.  */
                    313:   int cur_idx;
                    314:   /* This is length_of_RAW_MBS - RAW_MBS_IDX.  */
                    315:   int len;
                    316:   /* End of the buffer may be shorter than its length in the cases such
                    317:      as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
                    318:      instead of LEN.  */
                    319:   int stop;
                    320: 
                    321:   /* The context of mbs[0].  We store the context independently, since
                    322:      the context of mbs[0] may be different from raw_mbs[0], which is
                    323:      the beginning of the input string.  */
                    324:   unsigned int tip_context;
                    325:   /* The translation passed as a part of an argument of re_compile_pattern.  */
                    326:   RE_TRANSLATE_TYPE trans;
                    327:   /* 1 if REG_ICASE.  */
                    328:   unsigned int icase : 1;
                    329: };
                    330: typedef struct re_string_t re_string_t;
                    331: /* In case of REG_ICASE, we allocate the buffer dynamically for mbs.  */
                    332: #define MBS_ALLOCATED(pstr) (pstr->icase)
                    333: /* In case that we need translation, we allocate the buffer dynamically
                    334:    for mbs_case.  Note that mbs == mbs_case if not REG_ICASE.  */
                    335: #define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
                    336: 
                    337: 
                    338: static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
                    339:                                         int len, int init_len,
                    340:                                         RE_TRANSLATE_TYPE trans, int icase);
                    341: static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
                    342:                                          int len, RE_TRANSLATE_TYPE trans,
                    343:                                          int icase);
                    344: static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
                    345:                                            int eflags, int newline);
                    346: static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
                    347:                                                int new_buf_len);
                    348: #ifdef RE_ENABLE_I18N
                    349: static void build_wcs_buffer (re_string_t *pstr);
                    350: static void build_wcs_upper_buffer (re_string_t *pstr);
                    351: #endif /* RE_ENABLE_I18N */
                    352: static void build_upper_buffer (re_string_t *pstr);
                    353: static void re_string_translate_buffer (re_string_t *pstr);
                    354: static void re_string_destruct (re_string_t *pstr);
                    355: #ifdef RE_ENABLE_I18N
                    356: static int re_string_elem_size_at (const re_string_t *pstr, int idx);
                    357: static inline int re_string_char_size_at (const re_string_t *pstr, int idx);
                    358: static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx);
                    359: #endif /* RE_ENABLE_I18N */
                    360: static unsigned int re_string_context_at (const re_string_t *input, int idx,
                    361:                                          int eflags, int newline_anchor);
                    362: #define re_string_peek_byte(pstr, offset) \
                    363:   ((pstr)->mbs[(pstr)->cur_idx + offset])
                    364: #define re_string_peek_byte_case(pstr, offset) \
                    365:   ((pstr)->mbs_case[(pstr)->cur_idx + offset])
                    366: #define re_string_fetch_byte(pstr) \
                    367:   ((pstr)->mbs[(pstr)->cur_idx++])
                    368: #define re_string_fetch_byte_case(pstr) \
                    369:   ((pstr)->mbs_case[(pstr)->cur_idx++])
                    370: #define re_string_first_byte(pstr, idx) \
                    371:   ((idx) == (pstr)->len || (pstr)->wcs[idx] != WEOF)
                    372: #define re_string_is_single_byte_char(pstr, idx) \
                    373:   ((pstr)->wcs[idx] != WEOF && ((pstr)->len == (idx) \
                    374:                                || (pstr)->wcs[(idx) + 1] != WEOF))
                    375: #define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
                    376: #define re_string_cur_idx(pstr) ((pstr)->cur_idx)
                    377: #define re_string_get_buffer(pstr) ((pstr)->mbs)
                    378: #define re_string_length(pstr) ((pstr)->len)
                    379: #define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
                    380: #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
                    381: #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
                    382: 
                    383: #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
                    384: #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
                    385: #define re_free(p) free (p)
                    386: 
                    387: struct bin_tree_t
                    388: {
                    389:   struct bin_tree_t *parent;
                    390:   struct bin_tree_t *left;
                    391:   struct bin_tree_t *right;
                    392: 
                    393:   /* `node_idx' is the index in dfa->nodes, if `type' == 0.
                    394:      Otherwise `type' indicate the type of this node.  */
                    395:   re_token_type_t type;
                    396:   int node_idx;
                    397: 
                    398:   int first;
                    399:   int next;
                    400:   re_node_set eclosure;
                    401: };
                    402: typedef struct bin_tree_t bin_tree_t;
                    403: 
                    404: 
                    405: #define CONTEXT_WORD 1
                    406: #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
                    407: #define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
                    408: #define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
                    409: 
                    410: #define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
                    411: #define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
                    412: #define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
                    413: #define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
                    414: #define IS_ORDINARY_CONTEXT(c) ((c) == 0)
                    415: 
                    416: #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
                    417: #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
                    418: #define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
                    419: #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
                    420: 
                    421: #define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
                    422:  ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
                    423:   || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
                    424:   || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
                    425:   || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
                    426: 
                    427: #define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
                    428:  ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
                    429:   || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
                    430:   || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
                    431:   || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
                    432: 
                    433: struct re_dfastate_t
                    434: {
                    435:   unsigned int hash;
                    436:   re_node_set nodes;
                    437:   re_node_set *entrance_nodes;
                    438:   struct re_dfastate_t **trtable;
                    439:   struct re_dfastate_t **trtable_search;
                    440:   /* If this state is a special state.
                    441:      A state is a special state if the state is the halt state, or
                    442:      a anchor.  */
                    443:   unsigned int context : 2;
                    444:   unsigned int halt : 1;
                    445:   /* If this state can accept `multi byte'.
                    446:      Note that we refer to multibyte characters, and multi character
                    447:      collating elements as `multi byte'.  */
                    448:   unsigned int accept_mb : 1;
                    449:   /* If this state has backreference node(s).  */
                    450:   unsigned int has_backref : 1;
                    451:   unsigned int has_constraint : 1;
                    452: };
                    453: typedef struct re_dfastate_t re_dfastate_t;
                    454: 
                    455: typedef struct
                    456: {
                    457:   /* start <= node < end  */
                    458:   int start;
                    459:   int end;
                    460: } re_subexp_t;
                    461: 
                    462: struct re_state_table_entry
                    463: {
                    464:   int num;
                    465:   int alloc;
                    466:   re_dfastate_t **array;
                    467: };
                    468: 
                    469: /* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
                    470: 
                    471: typedef struct
                    472: {
                    473:   int next_idx;
                    474:   int alloc;
                    475:   re_dfastate_t **array;
                    476: } state_array_t;
                    477: 
                    478: /* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
                    479: 
                    480: typedef struct
                    481: {
                    482:   int node;
                    483:   int str_idx; /* The position NODE match at.  */
                    484:   state_array_t path;
                    485: } re_sub_match_last_t;
                    486: 
                    487: /* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
                    488:    And information about the node, whose type is OP_CLOSE_SUBEXP,
                    489:    corresponding to NODE is stored in LASTS.  */
                    490: 
                    491: typedef struct
                    492: {
                    493:   int str_idx;
                    494:   int node;
                    495:   int next_last_offset;
                    496:   state_array_t *path;
                    497:   int alasts; /* Allocation size of LASTS.  */
                    498:   int nlasts; /* The number of LASTS.  */
                    499:   re_sub_match_last_t **lasts;
                    500: } re_sub_match_top_t;
                    501: 
                    502: struct re_backref_cache_entry
                    503: {
                    504:   int node;
                    505:   int str_idx;
                    506:   int subexp_from;
                    507:   int subexp_to;
                    508:   int flag;
                    509: };
                    510: 
                    511: typedef struct
                    512: {
                    513:   /* EFLAGS of the argument of regexec.  */
                    514:   int eflags;
                    515:   /* Where the matching ends.  */
                    516:   int match_last;
                    517:   int last_node;
                    518:   /* The string object corresponding to the input string.  */
                    519:   re_string_t *input;
                    520:   /* The state log used by the matcher.  */
                    521:   re_dfastate_t **state_log;
                    522:   int state_log_top;
                    523:   /* Back reference cache.  */
                    524:   int nbkref_ents;
                    525:   int abkref_ents;
                    526:   struct re_backref_cache_entry *bkref_ents;
                    527:   int max_mb_elem_len;
                    528:   int nsub_tops;
                    529:   int asub_tops;
                    530:   re_sub_match_top_t **sub_tops;
                    531: } re_match_context_t;
                    532: 
                    533: typedef struct
                    534: {
                    535:   int cur_bkref;
                    536:   int cls_subexp_idx;
                    537: 
                    538:   re_dfastate_t **sifted_states;
                    539:   re_dfastate_t **limited_states;
                    540: 
                    541:   re_node_set limits;
                    542: 
                    543:   int last_node;
                    544:   int last_str_idx;
                    545:   int check_subexp;
                    546: } re_sift_context_t;
                    547: 
                    548: struct re_fail_stack_ent_t
                    549: {
                    550:   int idx;
                    551:   int node;
                    552:   regmatch_t *regs;
                    553:   re_node_set eps_via_nodes;
                    554: };
                    555: 
                    556: struct re_fail_stack_t
                    557: {
                    558:   int num;
                    559:   int alloc;
                    560:   struct re_fail_stack_ent_t *stack;
                    561: };
                    562: 
                    563: struct re_dfa_t
                    564: {
                    565:   re_bitset_ptr_t word_char;
                    566: 
                    567:   /* number of subexpressions `re_nsub' is in regex_t.  */
                    568:   int subexps_alloc;
                    569:   re_subexp_t *subexps;
                    570: 
                    571:   re_token_t *nodes;
                    572:   int nodes_alloc;
                    573:   int nodes_len;
                    574:   bin_tree_t *str_tree;
                    575:   int *nexts;
                    576:   int *org_indices;
                    577:   re_node_set *edests;
                    578:   re_node_set *eclosures;
                    579:   re_node_set *inveclosures;
                    580:   struct re_state_table_entry *state_table;
                    581:   unsigned int state_hash_mask;
                    582:   re_dfastate_t *init_state;
                    583:   re_dfastate_t *init_state_word;
                    584:   re_dfastate_t *init_state_nl;
                    585:   re_dfastate_t *init_state_begbuf;
                    586:   int states_alloc;
                    587:   int init_node;
                    588:   int nbackref; /* The number of backreference in this dfa.  */
                    589:   /* Bitmap expressing which backreference is used.  */
                    590:   unsigned int used_bkref_map;
                    591: #ifdef DEBUG
                    592:   char* re_str;
                    593: #endif
                    594:   unsigned int has_plural_match : 1;
                    595:   /* If this dfa has "multibyte node", which is a backreference or
                    596:      a node which can accept multibyte character or multi character
                    597:      collating element.  */
                    598:   unsigned int has_mb_node : 1;
                    599: };
                    600: typedef struct re_dfa_t re_dfa_t;
                    601: 
                    602: static reg_errcode_t re_node_set_alloc (re_node_set *set, int size);
                    603: static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem);
                    604: static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1,
                    605:                                         int elem2);
                    606: #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
                    607: static reg_errcode_t re_node_set_init_copy (re_node_set *dest,
                    608:                                            const re_node_set *src);
                    609: static reg_errcode_t re_node_set_add_intersect (re_node_set *dest,
                    610:                                                const re_node_set *src1,
                    611:                                                const re_node_set *src2);
                    612: static reg_errcode_t re_node_set_init_union (re_node_set *dest,
                    613:                                             const re_node_set *src1,
                    614:                                             const re_node_set *src2);
                    615: static reg_errcode_t re_node_set_merge (re_node_set *dest,
                    616:                                        const re_node_set *src);
                    617: static int re_node_set_insert (re_node_set *set, int elem);
                    618: static int re_node_set_compare (const re_node_set *set1,
                    619:                                const re_node_set *set2);
                    620: static int re_node_set_contains (const re_node_set *set, int elem);
                    621: static void re_node_set_remove_at (re_node_set *set, int idx);
                    622: #define re_node_set_remove(set,id) \
                    623:   (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
                    624: #define re_node_set_empty(p) ((p)->nelem = 0)
                    625: #define re_node_set_free(set) re_free ((set)->elems)
                    626: static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
                    627: static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
                    628:                                        const re_node_set *nodes);
                    629: static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
                    630:                                                re_dfa_t *dfa,
                    631:                                                const re_node_set *nodes,
                    632:                                                unsigned int context);
                    633: static void free_state (re_dfastate_t *state);
                    634: 
                    635: 
                    636: typedef enum
                    637: {
                    638:   SB_CHAR,
                    639:   MB_CHAR,
                    640:   EQUIV_CLASS,
                    641:   COLL_SYM,
                    642:   CHAR_CLASS
                    643: } bracket_elem_type;
                    644: 
                    645: typedef struct
                    646: {
                    647:   bracket_elem_type type;
                    648:   union
                    649:   {
                    650:     unsigned char ch;
                    651:     unsigned char *name;
                    652:     wchar_t wch;
                    653:   } opr;
                    654: } bracket_elem_t;
                    655: 
                    656: 
                    657: /* Inline functions for bitset operation.  */
                    658: static inline void
                    659: bitset_not (set)
                    660:      bitset set;
                    661: {
                    662:   int bitset_i;
                    663:   for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
                    664:     set[bitset_i] = ~set[bitset_i];
                    665: }
                    666: 
                    667: static inline void
                    668: bitset_merge (dest, src)
                    669:      bitset dest;
                    670:      const bitset src;
                    671: {
                    672:   int bitset_i;
                    673:   for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
                    674:     dest[bitset_i] |= src[bitset_i];
                    675: }
                    676: 
                    677: static inline void
                    678: bitset_not_merge (dest, src)
                    679:      bitset dest;
                    680:      const bitset src;
                    681: {
                    682:   int i;
                    683:   for (i = 0; i < BITSET_UINTS; ++i)
                    684:     dest[i] |= ~src[i];
                    685: }
                    686: 
                    687: #ifdef RE_ENABLE_I18N
                    688: /* Inline functions for re_string.  */
                    689: static inline int
                    690: re_string_char_size_at (pstr, idx)
                    691:      const re_string_t *pstr;
                    692:      int idx;
                    693: {
                    694:   int byte_idx;
                    695:   if (MB_CUR_MAX == 1)
                    696:     return 1;
                    697:   for (byte_idx = 1; idx + byte_idx < pstr->len; ++byte_idx)
                    698:     if (pstr->wcs[idx + byte_idx] != WEOF)
                    699:       break;
                    700:   return byte_idx;
                    701: }
                    702: 
                    703: static inline wint_t
                    704: re_string_wchar_at (pstr, idx)
                    705:      const re_string_t *pstr;
                    706:      int idx;
                    707: {
                    708:   if (MB_CUR_MAX == 1)
                    709:     return (wint_t) pstr->mbs[idx];
                    710:   return (wint_t) pstr->wcs[idx];
                    711: }
                    712: 
                    713: static int
                    714: re_string_elem_size_at (pstr, idx)
                    715:      const re_string_t *pstr;
                    716:      int idx;
                    717: {
                    718: #ifdef _LIBC
                    719:   const unsigned char *p, *extra;
                    720:   const int32_t *table, *indirect;
                    721:   int32_t tmp;
                    722: # include <locale/weight.h>
                    723:   uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
                    724: 
                    725:   if (nrules != 0)
                    726:     {
                    727:       table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
                    728:       extra = (const unsigned char *)
                    729:        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
                    730:       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
                    731:                                                _NL_COLLATE_INDIRECTMB);
                    732:       p = pstr->mbs + idx;
                    733:       tmp = findidx (&p);
                    734:       return p - pstr->mbs - idx;
                    735:     }
                    736:   else
                    737: #endif /* _LIBC */
                    738:     return 1;
                    739: }
                    740: #endif /* RE_ENABLE_I18N */
                    741: 
                    742: #endif /*  _REGEX_INTERNAL_H */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>