embedaddon/smartmontools/regex/regcomp.c - annotate

Return to regcomp.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / smartmontools / regex
Annotation of embedaddon/smartmontools/regex/regcomp.c, revision 1.1

1.1     ! misho       1: /* Extended regular expression matching and search library.
        !             2:    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
        !             3:    This file is part of the GNU C Library.
        !             4:    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
        !             5: 
        !             6:    The GNU C Library is free software; you can redistribute it and/or
        !             7:    modify it under the terms of the GNU Lesser General Public
        !             8:    License as published by the Free Software Foundation; either
        !             9:    version 2.1 of the License, or (at your option) any later version.
        !            10: 
        !            11:    The GNU C Library is distributed in the hope that it will be useful,
        !            12:    but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            13:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            14:    Lesser General Public License for more details.
        !            15: 
        !            16:    You should have received a copy of the GNU Lesser General Public
        !            17:    License along with the GNU C Library; if not, write to the Free
        !            18:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
        !            19:    02111-1307 USA.  */
        !            20: 
        !            21: static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
        !            22:                                          int length, reg_syntax_t syntax);
        !            23: static void re_compile_fastmap_iter (regex_t *bufp,
        !            24:                                     const re_dfastate_t *init_state,
        !            25:                                     char *fastmap);
        !            26: static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
        !            27: static reg_errcode_t init_word_char (re_dfa_t *dfa);
        !            28: #ifdef RE_ENABLE_I18N
        !            29: static void free_charset (re_charset_t *cset);
        !            30: #endif /* RE_ENABLE_I18N */
        !            31: static void free_workarea_compile (regex_t *preg);
        !            32: static reg_errcode_t create_initial_state (re_dfa_t *dfa);
        !            33: static reg_errcode_t analyze (re_dfa_t *dfa);
        !            34: static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
        !            35: static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
        !            36: static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
        !            37: static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
        !            38: static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
        !            39:                                             int top_clone_node, int root_node,
        !            40:                                             unsigned int constraint);
        !            41: static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
        !            42:                                     unsigned int constraint);
        !            43: static int search_duplicated_node (re_dfa_t *dfa, int org_node,
        !            44:                                   unsigned int constraint);
        !            45: static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
        !            46: static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
        !            47:                                         int node, int root);
        !            48: static void calc_inveclosure (re_dfa_t *dfa);
        !            49: static int fetch_number (re_string_t *input, re_token_t *token,
        !            50:                         reg_syntax_t syntax);
        !            51: static re_token_t fetch_token (re_string_t *input, reg_syntax_t syntax);
        !            52: static int peek_token (re_token_t *token, re_string_t *input,
        !            53:                        reg_syntax_t syntax);
        !            54: static int peek_token_bracket (re_token_t *token, re_string_t *input,
        !            55:                               reg_syntax_t syntax);
        !            56: static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
        !            57:                          reg_syntax_t syntax, reg_errcode_t *err);
        !            58: static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
        !            59:                                  re_token_t *token, reg_syntax_t syntax,
        !            60:                                  int nest, reg_errcode_t *err);
        !            61: static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
        !            62:                                 re_token_t *token, reg_syntax_t syntax,
        !            63:                                 int nest, reg_errcode_t *err);
        !            64: static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
        !            65:                                     re_token_t *token, reg_syntax_t syntax,
        !            66:                                     int nest, reg_errcode_t *err);
        !            67: static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
        !            68:                                  re_token_t *token, reg_syntax_t syntax,
        !            69:                                  int nest, reg_errcode_t *err);
        !            70: static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
        !            71:                                 re_dfa_t *dfa, re_token_t *token,
        !            72:                                 reg_syntax_t syntax, reg_errcode_t *err);
        !            73: static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
        !            74:                                      re_token_t *token, reg_syntax_t syntax,
        !            75:                                      reg_errcode_t *err);
        !            76: static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
        !            77:                                            re_string_t *regexp,
        !            78:                                            re_token_t *token, int token_len,
        !            79:                                            re_dfa_t *dfa,
        !            80:                                            reg_syntax_t syntax);
        !            81: static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
        !            82:                                          re_string_t *regexp,
        !            83:                                          re_token_t *token);
        !            84: #ifndef _LIBC
        !            85: # ifdef RE_ENABLE_I18N
        !            86: static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
        !            87:                                      re_charset_t *mbcset, int *range_alloc,
        !            88:                                      bracket_elem_t *start_elem,
        !            89:                                      bracket_elem_t *end_elem);
        !            90: static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
        !            91:                                             re_charset_t *mbcset,
        !            92:                                             int *coll_sym_alloc,
        !            93:                                             const unsigned char *name);
        !            94: # else /* not RE_ENABLE_I18N */
        !            95: static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
        !            96:                                      bracket_elem_t *start_elem,
        !            97:                                      bracket_elem_t *end_elem);
        !            98: static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
        !            99:                                             const unsigned char *name);
        !           100: # endif /* not RE_ENABLE_I18N */
        !           101: #endif /* not _LIBC */
        !           102: #ifdef RE_ENABLE_I18N
        !           103: static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
        !           104:                                        re_charset_t *mbcset,
        !           105:                                        int *equiv_class_alloc,
        !           106:                                        const unsigned char *name);
        !           107: static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset,
        !           108:                                      re_charset_t *mbcset,
        !           109:                                      int *char_class_alloc,
        !           110:                                      const unsigned char *class_name,
        !           111:                                      reg_syntax_t syntax);
        !           112: #else  /* not RE_ENABLE_I18N */
        !           113: static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
        !           114:                                        const unsigned char *name);
        !           115: static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset,
        !           116:                                      const unsigned char *class_name,
        !           117:                                      reg_syntax_t syntax);
        !           118: #endif /* not RE_ENABLE_I18N */
        !           119: static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err);
        !           120: static void free_bin_tree (bin_tree_t *tree);
        !           121: static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right,
        !           122:                                re_token_type_t type, int index);
        !           123: static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
        !           124: 
        !           125: /* This table gives an error message for each of the error codes listed
        !           126:    in regex.h.  Obviously the order here has to be same as there.
        !           127:    POSIX doesn't require that we do anything for REG_NOERROR,
        !           128:    but why not be nice?  */
        !           129: 
        !           130: const char __re_error_msgid[] attribute_hidden =
        !           131:   {
        !           132: #define REG_NOERROR_IDX        0
        !           133:     gettext_noop ("Success")   /* REG_NOERROR */
        !           134:     "\0"
        !           135: #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
        !           136:     gettext_noop ("No match")  /* REG_NOMATCH */
        !           137:     "\0"
        !           138: #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
        !           139:     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
        !           140:     "\0"
        !           141: #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
        !           142:     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
        !           143:     "\0"
        !           144: #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
        !           145:     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
        !           146:     "\0"
        !           147: #define REG_EESCAPE_IDX        (REG_ECTYPE_IDX + sizeof "Invalid character class name")
        !           148:     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
        !           149:     "\0"
        !           150: #define REG_ESUBREG_IDX        (REG_EESCAPE_IDX + sizeof "Trailing backslash")
        !           151:     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
        !           152:     "\0"
        !           153: #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
        !           154:     gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
        !           155:     "\0"
        !           156: #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
        !           157:     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
        !           158:     "\0"
        !           159: #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
        !           160:     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
        !           161:     "\0"
        !           162: #define REG_BADBR_IDX  (REG_EBRACE_IDX + sizeof "Unmatched \\{")
        !           163:     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
        !           164:     "\0"
        !           165: #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
        !           166:     gettext_noop ("Invalid range end") /* REG_ERANGE */
        !           167:     "\0"
        !           168: #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
        !           169:     gettext_noop ("Memory exhausted") /* REG_ESPACE */
        !           170:     "\0"
        !           171: #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
        !           172:     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
        !           173:     "\0"
        !           174: #define REG_EEND_IDX   (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
        !           175:     gettext_noop ("Premature end of regular expression") /* REG_EEND */
        !           176:     "\0"
        !           177: #define REG_ESIZE_IDX  (REG_EEND_IDX + sizeof "Premature end of regular expression")
        !           178:     gettext_noop ("Regular expression too big") /* REG_ESIZE */
        !           179:     "\0"
        !           180: #define REG_ERPAREN_IDX        (REG_ESIZE_IDX + sizeof "Regular expression too big")
        !           181:     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
        !           182:   };
        !           183: 
        !           184: const size_t __re_error_msgid_idx[] attribute_hidden =
        !           185:   {
        !           186:     REG_NOERROR_IDX,
        !           187:     REG_NOMATCH_IDX,
        !           188:     REG_BADPAT_IDX,
        !           189:     REG_ECOLLATE_IDX,
        !           190:     REG_ECTYPE_IDX,
        !           191:     REG_EESCAPE_IDX,
        !           192:     REG_ESUBREG_IDX,
        !           193:     REG_EBRACK_IDX,
        !           194:     REG_EPAREN_IDX,
        !           195:     REG_EBRACE_IDX,
        !           196:     REG_BADBR_IDX,
        !           197:     REG_ERANGE_IDX,
        !           198:     REG_ESPACE_IDX,
        !           199:     REG_BADRPT_IDX,
        !           200:     REG_EEND_IDX,
        !           201:     REG_ESIZE_IDX,
        !           202:     REG_ERPAREN_IDX
        !           203:   };
        !           204: 
        !           205: /* Entry points for GNU code.  */
        !           206: 
        !           207: /* re_compile_pattern is the GNU regular expression compiler: it
        !           208:    compiles PATTERN (of length LENGTH) and puts the result in BUFP.
        !           209:    Returns 0 if the pattern was valid, otherwise an error string.
        !           210: 
        !           211:    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
        !           212:    are set in BUFP on entry.  */
        !           213: 
        !           214: const char *
        !           215: re_compile_pattern (pattern, length, bufp)
        !           216:     const char *pattern;
        !           217:     size_t length;
        !           218:     struct re_pattern_buffer *bufp;
        !           219: {
        !           220:   reg_errcode_t ret;
        !           221: 
        !           222:   /* And GNU code determines whether or not to get register information
        !           223:      by passing null for the REGS argument to re_match, etc., not by
        !           224:      setting no_sub.  */
        !           225:   bufp->no_sub = 0;
        !           226: 
        !           227:   /* Match anchors at newline.  */
        !           228:   bufp->newline_anchor = 1;
        !           229: 
        !           230:   ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
        !           231: 
        !           232:   if (!ret)
        !           233:     return NULL;
        !           234:   return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
        !           235: }
        !           236: #ifdef _LIBC
        !           237: weak_alias (__re_compile_pattern, re_compile_pattern)
        !           238: #endif
        !           239: 
        !           240: /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
        !           241:    also be assigned to arbitrarily: each pattern buffer stores its own
        !           242:    syntax, so it can be changed between regex compilations.  */
        !           243: /* This has no initializer because initialized variables in Emacs
        !           244:    become read-only after dumping.  */
        !           245: reg_syntax_t re_syntax_options;
        !           246: 
        !           247: 
        !           248: /* Specify the precise syntax of regexps for compilation.  This provides
        !           249:    for compatibility for various utilities which historically have
        !           250:    different, incompatible syntaxes.
        !           251: 
        !           252:    The argument SYNTAX is a bit mask comprised of the various bits
        !           253:    defined in regex.h.  We return the old syntax.  */
        !           254: 
        !           255: reg_syntax_t
        !           256: re_set_syntax (syntax)
        !           257:     reg_syntax_t syntax;
        !           258: {
        !           259:   reg_syntax_t ret = re_syntax_options;
        !           260: 
        !           261:   re_syntax_options = syntax;
        !           262:   return ret;
        !           263: }
        !           264: #ifdef _LIBC
        !           265: weak_alias (__re_set_syntax, re_set_syntax)
        !           266: #endif
        !           267: 
        !           268: int
        !           269: re_compile_fastmap (bufp)
        !           270:     struct re_pattern_buffer *bufp;
        !           271: {
        !           272:   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
        !           273:   char *fastmap = bufp->fastmap;
        !           274: 
        !           275:   memset (fastmap, '\0', sizeof (char) * SBC_MAX);
        !           276:   re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
        !           277:   if (dfa->init_state != dfa->init_state_word)
        !           278:     re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
        !           279:   if (dfa->init_state != dfa->init_state_nl)
        !           280:     re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
        !           281:   if (dfa->init_state != dfa->init_state_begbuf)
        !           282:     re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
        !           283:   bufp->fastmap_accurate = 1;
        !           284:   return 0;
        !           285: }
        !           286: #ifdef _LIBC
        !           287: weak_alias (__re_compile_fastmap, re_compile_fastmap)
        !           288: #endif
        !           289: 
        !           290: static inline void
        !           291: re_set_fastmap (char *fastmap, int icase, int ch)
        !           292: {
        !           293:   fastmap[ch] = 1;
        !           294:   if (icase)
        !           295:     fastmap[tolower (ch)] = 1;
        !           296: }
        !           297: 
        !           298: /* Helper function for re_compile_fastmap.
        !           299:    Compile fastmap for the initial_state INIT_STATE.  */
        !           300: 
        !           301: static void
        !           302: re_compile_fastmap_iter (bufp, init_state, fastmap)
        !           303:      regex_t *bufp;
        !           304:      const re_dfastate_t *init_state;
        !           305:      char *fastmap;
        !           306: {
        !           307:   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
        !           308:   int node_cnt;
        !           309:   int icase = (MB_CUR_MAX == 1 && (bufp->syntax & RE_ICASE));
        !           310:   for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
        !           311:     {
        !           312:       int node = init_state->nodes.elems[node_cnt];
        !           313:       re_token_type_t type = dfa->nodes[node].type;
        !           314: 
        !           315:       if (type == CHARACTER)
        !           316:        re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
        !           317:       else if (type == SIMPLE_BRACKET)
        !           318:        {
        !           319:          int i, j, ch;
        !           320:          for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
        !           321:            for (j = 0; j < UINT_BITS; ++j, ++ch)
        !           322:              if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
        !           323:                re_set_fastmap (fastmap, icase, ch);
        !           324:        }
        !           325: #ifdef RE_ENABLE_I18N
        !           326:       else if (type == COMPLEX_BRACKET)
        !           327:        {
        !           328:          int i;
        !           329:          re_charset_t *cset = dfa->nodes[node].opr.mbcset;
        !           330:          if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
        !           331:              || cset->nranges || cset->nchar_classes)
        !           332:            {
        !           333: # ifdef _LIBC
        !           334:              if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
        !           335:                {
        !           336:                  /* In this case we want to catch the bytes which are
        !           337:                     the first byte of any collation elements.
        !           338:                     e.g. In da_DK, we want to catch 'a' since "aa"
        !           339:                          is a valid collation element, and don't catch
        !           340:                          'b' since 'b' is the only collation element
        !           341:                          which starts from 'b'.  */
        !           342:                  int j, ch;
        !           343:                  const int32_t *table = (const int32_t *)
        !           344:                    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
        !           345:                  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
        !           346:                    for (j = 0; j < UINT_BITS; ++j, ++ch)
        !           347:                      if (table[ch] < 0)
        !           348:                        re_set_fastmap (fastmap, icase, ch);
        !           349:                }
        !           350: # else
        !           351:              if (MB_CUR_MAX > 1)
        !           352:                for (i = 0; i < SBC_MAX; ++i)
        !           353:                  if (__btowc (i) == WEOF)
        !           354:                    re_set_fastmap (fastmap, icase, i);
        !           355: # endif /* not _LIBC */
        !           356:            }
        !           357:          for (i = 0; i < cset->nmbchars; ++i)
        !           358:            {
        !           359:              char buf[256];
        !           360:              mbstate_t state;
        !           361:              memset (&state, '\0', sizeof (state));
        !           362:              __wcrtomb (buf, cset->mbchars[i], &state);
        !           363:              re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
        !           364:            }
        !           365:        }
        !           366: #endif /* RE_ENABLE_I18N */
        !           367:       else if (type == END_OF_RE || type == OP_PERIOD)
        !           368:        {
        !           369:          memset (fastmap, '\1', sizeof (char) * SBC_MAX);
        !           370:          if (type == END_OF_RE)
        !           371:            bufp->can_be_null = 1;
        !           372:          return;
        !           373:        }
        !           374:     }
        !           375: }
        !           376: 
        !           377: /* Entry point for POSIX code.  */
        !           378: /* regcomp takes a regular expression as a string and compiles it.
        !           379: 
        !           380:    PREG is a regex_t *.  We do not expect any fields to be initialized,
        !           381:    since POSIX says we shouldn't.  Thus, we set
        !           382: 
        !           383:      `buffer' to the compiled pattern;
        !           384:      `used' to the length of the compiled pattern;
        !           385:      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
        !           386:        REG_EXTENDED bit in CFLAGS is set; otherwise, to
        !           387:        RE_SYNTAX_POSIX_BASIC;
        !           388:      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
        !           389:      `fastmap' to an allocated space for the fastmap;
        !           390:      `fastmap_accurate' to zero;
        !           391:      `re_nsub' to the number of subexpressions in PATTERN.
        !           392: 
        !           393:    PATTERN is the address of the pattern string.
        !           394: 
        !           395:    CFLAGS is a series of bits which affect compilation.
        !           396: 
        !           397:      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
        !           398:      use POSIX basic syntax.
        !           399: 
        !           400:      If REG_NEWLINE is set, then . and [^...] don't match newline.
        !           401:      Also, regexec will try a match beginning after every newline.
        !           402: 
        !           403:      If REG_ICASE is set, then we considers upper- and lowercase
        !           404:      versions of letters to be equivalent when matching.
        !           405: 
        !           406:      If REG_NOSUB is set, then when PREG is passed to regexec, that
        !           407:      routine will report only success or failure, and nothing about the
        !           408:      registers.
        !           409: 
        !           410:    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
        !           411:    the return codes and their meanings.)  */
        !           412: 
        !           413: int
        !           414: regcomp (preg, pattern, cflags)
        !           415:     regex_t *__restrict preg;
        !           416:     const char *__restrict pattern;
        !           417:     int cflags;
        !           418: {
        !           419:   reg_errcode_t ret;
        !           420:   reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
        !           421:                         : RE_SYNTAX_POSIX_BASIC);
        !           422: 
        !           423:   preg->buffer = NULL;
        !           424:   preg->allocated = 0;
        !           425:   preg->used = 0;
        !           426: 
        !           427:   /* Try to allocate space for the fastmap.  */
        !           428:   preg->fastmap = re_malloc (char, SBC_MAX);
        !           429:   if (BE (preg->fastmap == NULL, 0))
        !           430:     return REG_ESPACE;
        !           431: 
        !           432:   syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
        !           433: 
        !           434:   /* If REG_NEWLINE is set, newlines are treated differently.  */
        !           435:   if (cflags & REG_NEWLINE)
        !           436:     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
        !           437:       syntax &= ~RE_DOT_NEWLINE;
        !           438:       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
        !           439:       /* It also changes the matching behavior.  */
        !           440:       preg->newline_anchor = 1;
        !           441:     }
        !           442:   else
        !           443:     preg->newline_anchor = 0;
        !           444:   preg->no_sub = !!(cflags & REG_NOSUB);
        !           445:   preg->translate = NULL;
        !           446: 
        !           447:   ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
        !           448: 
        !           449:   /* POSIX doesn't distinguish between an unmatched open-group and an
        !           450:      unmatched close-group: both are REG_EPAREN.  */
        !           451:   if (ret == REG_ERPAREN)
        !           452:     ret = REG_EPAREN;
        !           453: 
        !           454:   /* We have already checked preg->fastmap != NULL.  */
        !           455:   if (BE (ret == REG_NOERROR, 1))
        !           456:     /* Compute the fastmap now, since regexec cannot modify the pattern
        !           457:        buffer.  This function nevers fails in this implementation.  */
        !           458:     (void) re_compile_fastmap (preg);
        !           459:   else
        !           460:     {
        !           461:       /* Some error occurred while compiling the expression.  */
        !           462:       re_free (preg->fastmap);
        !           463:       preg->fastmap = NULL;
        !           464:     }
        !           465: 
        !           466:   return (int) ret;
        !           467: }
        !           468: #ifdef _LIBC
        !           469: weak_alias (__regcomp, regcomp)
        !           470: #endif
        !           471: 
        !           472: /* Returns a message corresponding to an error code, ERRCODE, returned
        !           473:    from either regcomp or regexec.   We don't use PREG here.  */
        !           474: 
        !           475: size_t
        !           476: regerror (errcode, preg, errbuf, errbuf_size)
        !           477:     int errcode;
        !           478:     const regex_t *preg;
        !           479:     char *errbuf;
        !           480:     size_t errbuf_size;
        !           481: {
        !           482:   const char *msg;
        !           483:   size_t msg_size;
        !           484: 
        !           485:   if (BE (errcode < 0
        !           486:          || errcode >= (int) (sizeof (__re_error_msgid_idx)
        !           487:                               / sizeof (__re_error_msgid_idx[0])), 0))
        !           488:     /* Only error codes returned by the rest of the code should be passed
        !           489:        to this routine.  If we are given anything else, or if other regex
        !           490:        code generates an invalid error code, then the program has a bug.
        !           491:        Dump core so we can fix it.  */
        !           492:     abort ();
        !           493: 
        !           494:   msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
        !           495: 
        !           496:   msg_size = strlen (msg) + 1; /* Includes the null.  */
        !           497: 
        !           498:   if (BE (errbuf_size != 0, 1))
        !           499:     {
        !           500:       if (BE (msg_size > errbuf_size, 0))
        !           501:        {
        !           502: #if defined HAVE_MEMPCPY || defined _LIBC
        !           503:          *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
        !           504: #else
        !           505:          memcpy (errbuf, msg, errbuf_size - 1);
        !           506:          errbuf[errbuf_size - 1] = 0;
        !           507: #endif
        !           508:        }
        !           509:       else
        !           510:        memcpy (errbuf, msg, msg_size);
        !           511:     }
        !           512: 
        !           513:   return msg_size;
        !           514: }
        !           515: #ifdef _LIBC
        !           516: weak_alias (__regerror, regerror)
        !           517: #endif
        !           518: 
        !           519: 
        !           520: static void
        !           521: free_dfa_content (re_dfa_t *dfa)
        !           522: {
        !           523:   int i, j;
        !           524: 
        !           525:   re_free (dfa->subexps);
        !           526: 
        !           527:   for (i = 0; i < dfa->nodes_len; ++i)
        !           528:     {
        !           529:       re_token_t *node = dfa->nodes + i;
        !           530: #ifdef RE_ENABLE_I18N
        !           531:       if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
        !           532:        free_charset (node->opr.mbcset);
        !           533:       else
        !           534: #endif /* RE_ENABLE_I18N */
        !           535:        if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
        !           536:          re_free (node->opr.sbcset);
        !           537:     }
        !           538:   re_free (dfa->nexts);
        !           539:   for (i = 0; i < dfa->nodes_len; ++i)
        !           540:     {
        !           541:       if (dfa->eclosures != NULL)
        !           542:        re_node_set_free (dfa->eclosures + i);
        !           543:       if (dfa->inveclosures != NULL)
        !           544:        re_node_set_free (dfa->inveclosures + i);
        !           545:       if (dfa->edests != NULL)
        !           546:        re_node_set_free (dfa->edests + i);
        !           547:     }
        !           548:   re_free (dfa->edests);
        !           549:   re_free (dfa->eclosures);
        !           550:   re_free (dfa->inveclosures);
        !           551:   re_free (dfa->nodes);
        !           552: 
        !           553:   for (i = 0; i <= dfa->state_hash_mask; ++i)
        !           554:     {
        !           555:       struct re_state_table_entry *entry = dfa->state_table + i;
        !           556:       for (j = 0; j < entry->num; ++j)
        !           557:        {
        !           558:          re_dfastate_t *state = entry->array[j];
        !           559:          free_state (state);
        !           560:        }
        !           561:       re_free (entry->array);
        !           562:     }
        !           563:   re_free (dfa->state_table);
        !           564: 
        !           565:   if (dfa->word_char != NULL)
        !           566:     re_free (dfa->word_char);
        !           567: #ifdef DEBUG
        !           568:   re_free (dfa->re_str);
        !           569: #endif
        !           570: 
        !           571:   re_free (dfa);
        !           572: }
        !           573: 
        !           574: 
        !           575: /* Free dynamically allocated space used by PREG.  */
        !           576: 
        !           577: void
        !           578: regfree (preg)
        !           579:     regex_t *preg;
        !           580: {
        !           581:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
        !           582:   if (BE (dfa != NULL, 1))
        !           583:     free_dfa_content (dfa);
        !           584: 
        !           585:   re_free (preg->fastmap);
        !           586: }
        !           587: #ifdef _LIBC
        !           588: weak_alias (__regfree, regfree)
        !           589: #endif
        !           590: 
        !           591: /* Entry points compatible with 4.2 BSD regex library.  We don't define
        !           592:    them unless specifically requested.  */
        !           593: 
        !           594: #if defined _REGEX_RE_COMP || defined _LIBC
        !           595: 
        !           596: /* BSD has one and only one pattern buffer.  */
        !           597: static struct re_pattern_buffer re_comp_buf;
        !           598: 
        !           599: char *
        !           600: # ifdef _LIBC
        !           601: /* Make these definitions weak in libc, so POSIX programs can redefine
        !           602:    these names if they don't use our functions, and still use
        !           603:    regcomp/regexec above without link errors.  */
        !           604: weak_function
        !           605: # endif
        !           606: re_comp (s)
        !           607:      const char *s;
        !           608: {
        !           609:   reg_errcode_t ret;
        !           610:   char *fastmap;
        !           611: 
        !           612:   if (!s)
        !           613:     {
        !           614:       if (!re_comp_buf.buffer)
        !           615:        return gettext ("No previous regular expression");
        !           616:       return 0;
        !           617:     }
        !           618: 
        !           619:   if (re_comp_buf.buffer)
        !           620:     {
        !           621:       fastmap = re_comp_buf.fastmap;
        !           622:       re_comp_buf.fastmap = NULL;
        !           623:       __regfree (&re_comp_buf);
        !           624:       memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
        !           625:       re_comp_buf.fastmap = fastmap;
        !           626:     }
        !           627: 
        !           628:   if (re_comp_buf.fastmap == NULL)
        !           629:     {
        !           630:       re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
        !           631:       if (re_comp_buf.fastmap == NULL)
        !           632:        return (char *) gettext (__re_error_msgid
        !           633:                                 + __re_error_msgid_idx[(int) REG_ESPACE]);
        !           634:     }
        !           635: 
        !           636:   /* Since `re_exec' always passes NULL for the `regs' argument, we
        !           637:      don't need to initialize the pattern buffer fields which affect it.  */
        !           638: 
        !           639:   /* Match anchors at newlines.  */
        !           640:   re_comp_buf.newline_anchor = 1;
        !           641: 
        !           642:   ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
        !           643: 
        !           644:   if (!ret)
        !           645:     return NULL;
        !           646: 
        !           647:   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
        !           648:   return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
        !           649: }
        !           650: 
        !           651: #ifdef _LIBC
        !           652: libc_freeres_fn (free_mem)
        !           653: {
        !           654:   __regfree (&re_comp_buf);
        !           655: }
        !           656: #endif
        !           657: 
        !           658: #endif /* _REGEX_RE_COMP */
        !           659: 
        !           660: /* Internal entry point.
        !           661:    Compile the regular expression PATTERN, whose length is LENGTH.
        !           662:    SYNTAX indicate regular expression's syntax.  */
        !           663: 
        !           664: static reg_errcode_t
        !           665: re_compile_internal (preg, pattern, length, syntax)
        !           666:      regex_t *preg;
        !           667:      const char * pattern;
        !           668:      int length;
        !           669:      reg_syntax_t syntax;
        !           670: {
        !           671:   reg_errcode_t err = REG_NOERROR;
        !           672:   re_dfa_t *dfa;
        !           673:   re_string_t regexp;
        !           674: 
        !           675:   /* Initialize the pattern buffer.  */
        !           676:   preg->fastmap_accurate = 0;
        !           677:   preg->syntax = syntax;
        !           678:   preg->not_bol = preg->not_eol = 0;
        !           679:   preg->used = 0;
        !           680:   preg->re_nsub = 0;
        !           681:   preg->can_be_null = 0;
        !           682:   preg->regs_allocated = REGS_UNALLOCATED;
        !           683: 
        !           684:   /* Initialize the dfa.  */
        !           685:   dfa = (re_dfa_t *) preg->buffer;
        !           686:   if (preg->allocated < sizeof (re_dfa_t))
        !           687:     {
        !           688:       /* If zero allocated, but buffer is non-null, try to realloc
        !           689:         enough space.  This loses if buffer's address is bogus, but
        !           690:         that is the user's responsibility.  If ->buffer is NULL this
        !           691:         is a simple allocation.  */
        !           692:       dfa = re_realloc (preg->buffer, re_dfa_t, 1);
        !           693:       if (dfa == NULL)
        !           694:        return REG_ESPACE;
        !           695:       preg->allocated = sizeof (re_dfa_t);
        !           696:     }
        !           697:   preg->buffer = (unsigned char *) dfa;
        !           698:   preg->used = sizeof (re_dfa_t);
        !           699: 
        !           700:   err = init_dfa (dfa, length);
        !           701:   if (BE (err != REG_NOERROR, 0))
        !           702:     {
        !           703:       re_free (dfa);
        !           704:       preg->buffer = NULL;
        !           705:       preg->allocated = 0;
        !           706:       return err;
        !           707:     }
        !           708: #ifdef DEBUG
        !           709:   dfa->re_str = re_malloc (char, length + 1);
        !           710:   strncpy (dfa->re_str, pattern, length + 1);
        !           711: #endif
        !           712: 
        !           713:   err = re_string_construct (&regexp, pattern, length, preg->translate,
        !           714:                             syntax & RE_ICASE);
        !           715:   if (BE (err != REG_NOERROR, 0))
        !           716:     {
        !           717:       re_free (dfa);
        !           718:       preg->buffer = NULL;
        !           719:       preg->allocated = 0;
        !           720:       return err;
        !           721:     }
        !           722: 
        !           723:   /* Parse the regular expression, and build a structure tree.  */
        !           724:   preg->re_nsub = 0;
        !           725:   dfa->str_tree = parse (&regexp, preg, syntax, &err);
        !           726:   if (BE (dfa->str_tree == NULL, 0))
        !           727:     goto re_compile_internal_free_return;
        !           728: 
        !           729:   /* Analyze the tree and collect information which is necessary to
        !           730:      create the dfa.  */
        !           731:   err = analyze (dfa);
        !           732:   if (BE (err != REG_NOERROR, 0))
        !           733:     goto re_compile_internal_free_return;
        !           734: 
        !           735:   /* Then create the initial state of the dfa.  */
        !           736:   err = create_initial_state (dfa);
        !           737: 
        !           738:   /* Release work areas.  */
        !           739:   free_workarea_compile (preg);
        !           740:   re_string_destruct (&regexp);
        !           741: 
        !           742:   if (BE (err != REG_NOERROR, 0))
        !           743:     {
        !           744:     re_compile_internal_free_return:
        !           745:       free_dfa_content (dfa);
        !           746:       preg->buffer = NULL;
        !           747:       preg->allocated = 0;
        !           748:     }
        !           749: 
        !           750:   return err;
        !           751: }
        !           752: 
        !           753: /* Initialize DFA.  We use the length of the regular expression PAT_LEN
        !           754:    as the initial length of some arrays.  */
        !           755: 
        !           756: static reg_errcode_t
        !           757: init_dfa (dfa, pat_len)
        !           758:      re_dfa_t *dfa;
        !           759:      int pat_len;
        !           760: {
        !           761:   int table_size;
        !           762: 
        !           763:   memset (dfa, '\0', sizeof (re_dfa_t));
        !           764: 
        !           765:   dfa->nodes_alloc = pat_len + 1;
        !           766:   dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
        !           767: 
        !           768:   dfa->states_alloc = pat_len + 1;
        !           769: 
        !           770:   /*  table_size = 2 ^ ceil(log pat_len) */
        !           771:   for (table_size = 1; table_size > 0; table_size <<= 1)
        !           772:     if (table_size > pat_len)
        !           773:       break;
        !           774: 
        !           775:   dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
        !           776:   dfa->state_hash_mask = table_size - 1;
        !           777: 
        !           778:   dfa->subexps_alloc = 1;
        !           779:   dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc);
        !           780:   dfa->word_char = NULL;
        !           781: 
        !           782:   if (BE (dfa->nodes == NULL || dfa->state_table == NULL
        !           783:          || dfa->subexps == NULL, 0))
        !           784:     {
        !           785:       /* We don't bother to free anything which was allocated.  Very
        !           786:         soon the process will go down anyway.  */
        !           787:       dfa->subexps = NULL;
        !           788:       dfa->state_table = NULL;
        !           789:       dfa->nodes = NULL;
        !           790:       return REG_ESPACE;
        !           791:     }
        !           792:   return REG_NOERROR;
        !           793: }
        !           794: 
        !           795: /* Initialize WORD_CHAR table, which indicate which character is
        !           796:    "word".  In this case "word" means that it is the word construction
        !           797:    character used by some operators like "\<", "\>", etc.  */
        !           798: 
        !           799: static reg_errcode_t
        !           800: init_word_char (dfa)
        !           801:      re_dfa_t *dfa;
        !           802: {
        !           803:   int i, j, ch;
        !           804:   dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
        !           805:   if (BE (dfa->word_char == NULL, 0))
        !           806:     return REG_ESPACE;
        !           807:   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
        !           808:     for (j = 0; j < UINT_BITS; ++j, ++ch)
        !           809:       if (isalnum (ch) || ch == '_')
        !           810:        dfa->word_char[i] |= 1 << j;
        !           811:   return REG_NOERROR;
        !           812: }
        !           813: 
        !           814: /* Free the work area which are only used while compiling.  */
        !           815: 
        !           816: static void
        !           817: free_workarea_compile (preg)
        !           818:      regex_t *preg;
        !           819: {
        !           820:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
        !           821:   free_bin_tree (dfa->str_tree);
        !           822:   dfa->str_tree = NULL;
        !           823:   re_free (dfa->org_indices);
        !           824:   dfa->org_indices = NULL;
        !           825: }
        !           826: 
        !           827: /* Create initial states for all contexts.  */
        !           828: 
        !           829: static reg_errcode_t
        !           830: create_initial_state (dfa)
        !           831:      re_dfa_t *dfa;
        !           832: {
        !           833:   int first, i;
        !           834:   reg_errcode_t err;
        !           835:   re_node_set init_nodes;
        !           836: 
        !           837:   /* Initial states have the epsilon closure of the node which is
        !           838:      the first node of the regular expression.  */
        !           839:   first = dfa->str_tree->first;
        !           840:   dfa->init_node = first;
        !           841:   err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
        !           842:   if (BE (err != REG_NOERROR, 0))
        !           843:     return err;
        !           844: 
        !           845:   /* The back-references which are in initial states can epsilon transit,
        !           846:      since in this case all of the subexpressions can be null.
        !           847:      Then we add epsilon closures of the nodes which are the next nodes of
        !           848:      the back-references.  */
        !           849:   if (dfa->nbackref > 0)
        !           850:     for (i = 0; i < init_nodes.nelem; ++i)
        !           851:       {
        !           852:        int node_idx = init_nodes.elems[i];
        !           853:        re_token_type_t type = dfa->nodes[node_idx].type;
        !           854: 
        !           855:        int clexp_idx;
        !           856:        if (type != OP_BACK_REF)
        !           857:          continue;
        !           858:        for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
        !           859:          {
        !           860:            re_token_t *clexp_node;
        !           861:            clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
        !           862:            if (clexp_node->type == OP_CLOSE_SUBEXP
        !           863:                && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
        !           864:              break;
        !           865:          }
        !           866:        if (clexp_idx == init_nodes.nelem)
        !           867:          continue;
        !           868: 
        !           869:        if (type == OP_BACK_REF)
        !           870:          {
        !           871:            int dest_idx = dfa->edests[node_idx].elems[0];
        !           872:            if (!re_node_set_contains (&init_nodes, dest_idx))
        !           873:              {
        !           874:                re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
        !           875:                i = 0;
        !           876:              }
        !           877:          }
        !           878:       }
        !           879: 
        !           880:   /* It must be the first time to invoke acquire_state.  */
        !           881:   dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
        !           882:   /* We don't check ERR here, since the initial state must not be NULL.  */
        !           883:   if (BE (dfa->init_state == NULL, 0))
        !           884:     return err;
        !           885:   if (dfa->init_state->has_constraint)
        !           886:     {
        !           887:       dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
        !           888:                                                       CONTEXT_WORD);
        !           889:       dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
        !           890:                                                     CONTEXT_NEWLINE);
        !           891:       dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
        !           892:                                                         &init_nodes,
        !           893:                                                         CONTEXT_NEWLINE
        !           894:                                                         | CONTEXT_BEGBUF);
        !           895:       if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
        !           896:              || dfa->init_state_begbuf == NULL, 0))
        !           897:        return err;
        !           898:     }
        !           899:   else
        !           900:     dfa->init_state_word = dfa->init_state_nl
        !           901:       = dfa->init_state_begbuf = dfa->init_state;
        !           902: 
        !           903:   re_node_set_free (&init_nodes);
        !           904:   return REG_NOERROR;
        !           905: }
        !           906: 
        !           907: /* Analyze the structure tree, and calculate "first", "next", "edest",
        !           908:    "eclosure", and "inveclosure".  */
        !           909: 
        !           910: static reg_errcode_t
        !           911: analyze (dfa)
        !           912:      re_dfa_t *dfa;
        !           913: {
        !           914:   int i;
        !           915:   reg_errcode_t ret;
        !           916: 
        !           917:   /* Allocate arrays.  */
        !           918:   dfa->nexts = re_malloc (int, dfa->nodes_alloc);
        !           919:   dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
        !           920:   dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
        !           921:   dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
        !           922:   dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
        !           923:   if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
        !           924:          || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
        !           925:     return REG_ESPACE;
        !           926:   /* Initialize them.  */
        !           927:   for (i = 0; i < dfa->nodes_len; ++i)
        !           928:     {
        !           929:       dfa->nexts[i] = -1;
        !           930:       re_node_set_init_empty (dfa->edests + i);
        !           931:       re_node_set_init_empty (dfa->eclosures + i);
        !           932:       re_node_set_init_empty (dfa->inveclosures + i);
        !           933:     }
        !           934: 
        !           935:   ret = analyze_tree (dfa, dfa->str_tree);
        !           936:   if (BE (ret == REG_NOERROR, 1))
        !           937:     {
        !           938:       ret = calc_eclosure (dfa);
        !           939:       if (ret == REG_NOERROR)
        !           940:        calc_inveclosure (dfa);
        !           941:     }
        !           942:   return ret;
        !           943: }
        !           944: 
        !           945: /* Helper functions for analyze.
        !           946:    This function calculate "first", "next", and "edest" for the subtree
        !           947:    whose root is NODE.  */
        !           948: 
        !           949: static reg_errcode_t
        !           950: analyze_tree (dfa, node)
        !           951:      re_dfa_t *dfa;
        !           952:      bin_tree_t *node;
        !           953: {
        !           954:   reg_errcode_t ret;
        !           955:   if (node->first == -1)
        !           956:     calc_first (dfa, node);
        !           957:   if (node->next == -1)
        !           958:     calc_next (dfa, node);
        !           959:   if (node->eclosure.nelem == 0)
        !           960:     calc_epsdest (dfa, node);
        !           961:   /* Calculate "first" etc. for the left child.  */
        !           962:   if (node->left != NULL)
        !           963:     {
        !           964:       ret = analyze_tree (dfa, node->left);
        !           965:       if (BE (ret != REG_NOERROR, 0))
        !           966:        return ret;
        !           967:     }
        !           968:   /* Calculate "first" etc. for the right child.  */
        !           969:   if (node->right != NULL)
        !           970:     {
        !           971:       ret = analyze_tree (dfa, node->right);
        !           972:       if (BE (ret != REG_NOERROR, 0))
        !           973:        return ret;
        !           974:     }
        !           975:   return REG_NOERROR;
        !           976: }
        !           977: 
        !           978: /* Calculate "first" for the node NODE.  */
        !           979: static void
        !           980: calc_first (dfa, node)
        !           981:      re_dfa_t *dfa;
        !           982:      bin_tree_t *node;
        !           983: {
        !           984:   int idx, type;
        !           985:   idx = node->node_idx;
        !           986:   type = (node->type == 0) ? dfa->nodes[idx].type : node->type;
        !           987: 
        !           988:   switch (type)
        !           989:     {
        !           990: #ifdef DEBUG
        !           991:     case OP_OPEN_BRACKET:
        !           992:     case OP_CLOSE_BRACKET:
        !           993:     case OP_OPEN_DUP_NUM:
        !           994:     case OP_CLOSE_DUP_NUM:
        !           995:     case OP_NON_MATCH_LIST:
        !           996:     case OP_OPEN_COLL_ELEM:
        !           997:     case OP_CLOSE_COLL_ELEM:
        !           998:     case OP_OPEN_EQUIV_CLASS:
        !           999:     case OP_CLOSE_EQUIV_CLASS:
        !          1000:     case OP_OPEN_CHAR_CLASS:
        !          1001:     case OP_CLOSE_CHAR_CLASS:
        !          1002:       /* These must not be appeared here.  */
        !          1003:       assert (0);
        !          1004: #endif
        !          1005:     case END_OF_RE:
        !          1006:     case CHARACTER:
        !          1007:     case OP_PERIOD:
        !          1008:     case OP_DUP_ASTERISK:
        !          1009:     case OP_DUP_QUESTION:
        !          1010: #ifdef RE_ENABLE_I18N
        !          1011:     case COMPLEX_BRACKET:
        !          1012: #endif /* RE_ENABLE_I18N */
        !          1013:     case SIMPLE_BRACKET:
        !          1014:     case OP_BACK_REF:
        !          1015:     case ANCHOR:
        !          1016:     case OP_OPEN_SUBEXP:
        !          1017:     case OP_CLOSE_SUBEXP:
        !          1018:       node->first = idx;
        !          1019:       break;
        !          1020:     case OP_DUP_PLUS:
        !          1021: #ifdef DEBUG
        !          1022:       assert (node->left != NULL);
        !          1023: #endif
        !          1024:       if (node->left->first == -1)
        !          1025:        calc_first (dfa, node->left);
        !          1026:       node->first = node->left->first;
        !          1027:       break;
        !          1028:     case OP_ALT:
        !          1029:       node->first = idx;
        !          1030:       break;
        !          1031:       /* else fall through */
        !          1032:     default:
        !          1033: #ifdef DEBUG
        !          1034:       assert (node->left != NULL);
        !          1035: #endif
        !          1036:       if (node->left->first == -1)
        !          1037:        calc_first (dfa, node->left);
        !          1038:       node->first = node->left->first;
        !          1039:       break;
        !          1040:     }
        !          1041: }
        !          1042: 
        !          1043: /* Calculate "next" for the node NODE.  */
        !          1044: 
        !          1045: static void
        !          1046: calc_next (dfa, node)
        !          1047:      re_dfa_t *dfa;
        !          1048:      bin_tree_t *node;
        !          1049: {
        !          1050:   int idx, type;
        !          1051:   bin_tree_t *parent = node->parent;
        !          1052:   if (parent == NULL)
        !          1053:     {
        !          1054:       node->next = -1;
        !          1055:       idx = node->node_idx;
        !          1056:       if (node->type == 0)
        !          1057:        dfa->nexts[idx] = node->next;
        !          1058:       return;
        !          1059:     }
        !          1060: 
        !          1061:   idx = parent->node_idx;
        !          1062:   type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type;
        !          1063: 
        !          1064:   switch (type)
        !          1065:     {
        !          1066:     case OP_DUP_ASTERISK:
        !          1067:     case OP_DUP_PLUS:
        !          1068:       node->next = idx;
        !          1069:       break;
        !          1070:     case CONCAT:
        !          1071:       if (parent->left == node)
        !          1072:        {
        !          1073:          if (parent->right->first == -1)
        !          1074:            calc_first (dfa, parent->right);
        !          1075:          node->next = parent->right->first;
        !          1076:          break;
        !          1077:        }
        !          1078:       /* else fall through */
        !          1079:     default:
        !          1080:       if (parent->next == -1)
        !          1081:        calc_next (dfa, parent);
        !          1082:       node->next = parent->next;
        !          1083:       break;
        !          1084:     }
        !          1085:   idx = node->node_idx;
        !          1086:   if (node->type == 0)
        !          1087:     dfa->nexts[idx] = node->next;
        !          1088: }
        !          1089: 
        !          1090: /* Calculate "edest" for the node NODE.  */
        !          1091: 
        !          1092: static void
        !          1093: calc_epsdest (dfa, node)
        !          1094:      re_dfa_t *dfa;
        !          1095:      bin_tree_t *node;
        !          1096: {
        !          1097:   int idx;
        !          1098:   idx = node->node_idx;
        !          1099:   if (node->type == 0)
        !          1100:     {
        !          1101:       if (dfa->nodes[idx].type == OP_DUP_ASTERISK
        !          1102:          || dfa->nodes[idx].type == OP_DUP_PLUS
        !          1103:          || dfa->nodes[idx].type == OP_DUP_QUESTION)
        !          1104:        {
        !          1105:          if (node->left->first == -1)
        !          1106:            calc_first (dfa, node->left);
        !          1107:          if (node->next == -1)
        !          1108:            calc_next (dfa, node);
        !          1109:          re_node_set_init_2 (dfa->edests + idx, node->left->first,
        !          1110:                              node->next);
        !          1111:        }
        !          1112:       else if (dfa->nodes[idx].type == OP_ALT)
        !          1113:        {
        !          1114:          int left, right;
        !          1115:          if (node->left != NULL)
        !          1116:            {
        !          1117:              if (node->left->first == -1)
        !          1118:                calc_first (dfa, node->left);
        !          1119:              left = node->left->first;
        !          1120:            }
        !          1121:          else
        !          1122:            {
        !          1123:              if (node->next == -1)
        !          1124:                calc_next (dfa, node);
        !          1125:              left = node->next;
        !          1126:            }
        !          1127:          if (node->right != NULL)
        !          1128:            {
        !          1129:              if (node->right->first == -1)
        !          1130:                calc_first (dfa, node->right);
        !          1131:              right = node->right->first;
        !          1132:            }
        !          1133:          else
        !          1134:            {
        !          1135:              if (node->next == -1)
        !          1136:                calc_next (dfa, node);
        !          1137:              right = node->next;
        !          1138:            }
        !          1139:          re_node_set_init_2 (dfa->edests + idx, left, right);
        !          1140:        }
        !          1141:       else if (dfa->nodes[idx].type == ANCHOR
        !          1142:               || dfa->nodes[idx].type == OP_OPEN_SUBEXP
        !          1143:               || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
        !          1144:               || dfa->nodes[idx].type == OP_BACK_REF)
        !          1145:        re_node_set_init_1 (dfa->edests + idx, node->next);
        !          1146:     }
        !          1147: }
        !          1148: 
        !          1149: /* Duplicate the epsilon closure of the node ROOT_NODE.
        !          1150:    Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
        !          1151:    to their own constraint.  */
        !          1152: 
        !          1153: static reg_errcode_t
        !          1154: duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
        !          1155:                        init_constraint)
        !          1156:      re_dfa_t *dfa;
        !          1157:      int top_org_node, top_clone_node, root_node;
        !          1158:      unsigned int init_constraint;
        !          1159: {
        !          1160:   reg_errcode_t err;
        !          1161:   int org_node, clone_node, ret;
        !          1162:   unsigned int constraint = init_constraint;
        !          1163:   for (org_node = top_org_node, clone_node = top_clone_node;;)
        !          1164:     {
        !          1165:       int org_dest, clone_dest;
        !          1166:       if (dfa->nodes[org_node].type == OP_BACK_REF)
        !          1167:        {
        !          1168:          /* If the back reference epsilon-transit, its destination must
        !          1169:             also have the constraint.  Then duplicate the epsilon closure
        !          1170:             of the destination of the back reference, and store it in
        !          1171:             edests of the back reference.  */
        !          1172:          org_dest = dfa->nexts[org_node];
        !          1173:          re_node_set_empty (dfa->edests + clone_node);
        !          1174:          err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
        !          1175:          if (BE (err != REG_NOERROR, 0))
        !          1176:            return err;
        !          1177:          dfa->nexts[clone_node] = dfa->nexts[org_node];
        !          1178:          ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
        !          1179:          if (BE (ret < 0, 0))
        !          1180:            return REG_ESPACE;
        !          1181:        }
        !          1182:       else if (dfa->edests[org_node].nelem == 0)
        !          1183:        {
        !          1184:          /* In case of the node can't epsilon-transit, don't duplicate the
        !          1185:             destination and store the original destination as the
        !          1186:             destination of the node.  */
        !          1187:          dfa->nexts[clone_node] = dfa->nexts[org_node];
        !          1188:          break;
        !          1189:        }
        !          1190:       else if (dfa->edests[org_node].nelem == 1)
        !          1191:        {
        !          1192:          /* In case of the node can epsilon-transit, and it has only one
        !          1193:             destination.  */
        !          1194:          org_dest = dfa->edests[org_node].elems[0];
        !          1195:          re_node_set_empty (dfa->edests + clone_node);
        !          1196:          if (dfa->nodes[org_node].type == ANCHOR)
        !          1197:            {
        !          1198:              /* In case of the node has another constraint, append it.  */
        !          1199:              if (org_node == root_node && clone_node != org_node)
        !          1200:                {
        !          1201:                  /* ...but if the node is root_node itself, it means the
        !          1202:                     epsilon closure have a loop, then tie it to the
        !          1203:                     destination of the root_node.  */
        !          1204:                  ret = re_node_set_insert (dfa->edests + clone_node,
        !          1205:                                            org_dest);
        !          1206:                  if (BE (ret < 0, 0))
        !          1207:                    return REG_ESPACE;
        !          1208:                  break;
        !          1209:                }
        !          1210:              constraint |= dfa->nodes[org_node].opr.ctx_type;
        !          1211:            }
        !          1212:          err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
        !          1213:          if (BE (err != REG_NOERROR, 0))
        !          1214:            return err;
        !          1215:          ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
        !          1216:          if (BE (ret < 0, 0))
        !          1217:            return REG_ESPACE;
        !          1218:        }
        !          1219:       else /* dfa->edests[org_node].nelem == 2 */
        !          1220:        {
        !          1221:          /* In case of the node can epsilon-transit, and it has two
        !          1222:             destinations. E.g. '|', '*', '+', '?'.   */
        !          1223:          org_dest = dfa->edests[org_node].elems[0];
        !          1224:          re_node_set_empty (dfa->edests + clone_node);
        !          1225:          /* Search for a duplicated node which satisfies the constraint.  */
        !          1226:          clone_dest = search_duplicated_node (dfa, org_dest, constraint);
        !          1227:          if (clone_dest == -1)
        !          1228:            {
        !          1229:              /* There are no such a duplicated node, create a new one.  */
        !          1230:              err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
        !          1231:              if (BE (err != REG_NOERROR, 0))
        !          1232:                return err;
        !          1233:              ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
        !          1234:              if (BE (ret < 0, 0))
        !          1235:                return REG_ESPACE;
        !          1236:              err = duplicate_node_closure (dfa, org_dest, clone_dest,
        !          1237:                                            root_node, constraint);
        !          1238:              if (BE (err != REG_NOERROR, 0))
        !          1239:                return err;
        !          1240:            }
        !          1241:          else
        !          1242:            {
        !          1243:              /* There are a duplicated node which satisfy the constraint,
        !          1244:                 use it to avoid infinite loop.  */
        !          1245:              ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
        !          1246:              if (BE (ret < 0, 0))
        !          1247:                return REG_ESPACE;
        !          1248:            }
        !          1249: 
        !          1250:          org_dest = dfa->edests[org_node].elems[1];
        !          1251:          err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
        !          1252:          if (BE (err != REG_NOERROR, 0))
        !          1253:            return err;
        !          1254:          ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
        !          1255:          if (BE (ret < 0, 0))
        !          1256:            return REG_ESPACE;
        !          1257:        }
        !          1258:       org_node = org_dest;
        !          1259:       clone_node = clone_dest;
        !          1260:     }
        !          1261:   return REG_NOERROR;
        !          1262: }
        !          1263: 
        !          1264: /* Search for a node which is duplicated from the node ORG_NODE, and
        !          1265:    satisfies the constraint CONSTRAINT.  */
        !          1266: 
        !          1267: static int
        !          1268: search_duplicated_node (dfa, org_node, constraint)
        !          1269:      re_dfa_t *dfa;
        !          1270:      int org_node;
        !          1271:      unsigned int constraint;
        !          1272: {
        !          1273:   int idx;
        !          1274:   for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
        !          1275:     {
        !          1276:       if (org_node == dfa->org_indices[idx]
        !          1277:          && constraint == dfa->nodes[idx].constraint)
        !          1278:        return idx; /* Found.  */
        !          1279:     }
        !          1280:   return -1; /* Not found.  */
        !          1281: }
        !          1282: 
        !          1283: /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
        !          1284:    The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
        !          1285:    otherwise return the error code.  */
        !          1286: 
        !          1287: static reg_errcode_t
        !          1288: duplicate_node (new_idx, dfa, org_idx, constraint)
        !          1289:      re_dfa_t *dfa;
        !          1290:      int *new_idx, org_idx;
        !          1291:      unsigned int constraint;
        !          1292: {
        !          1293:   re_token_t dup;
        !          1294:   int dup_idx;
        !          1295: 
        !          1296:   dup = dfa->nodes[org_idx];
        !          1297:   dup_idx = re_dfa_add_node (dfa, dup, 1);
        !          1298:   if (BE (dup_idx == -1, 0))
        !          1299:     return REG_ESPACE;
        !          1300:   dfa->nodes[dup_idx].constraint = constraint;
        !          1301:   if (dfa->nodes[org_idx].type == ANCHOR)
        !          1302:     dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
        !          1303:   dfa->nodes[dup_idx].duplicated = 1;
        !          1304:   re_node_set_init_empty (dfa->edests + dup_idx);
        !          1305:   re_node_set_init_empty (dfa->eclosures + dup_idx);
        !          1306:   re_node_set_init_empty (dfa->inveclosures + dup_idx);
        !          1307: 
        !          1308:   /* Store the index of the original node.  */
        !          1309:   dfa->org_indices[dup_idx] = org_idx;
        !          1310:   *new_idx = dup_idx;
        !          1311:   return REG_NOERROR;
        !          1312: }
        !          1313: 
        !          1314: static void
        !          1315: calc_inveclosure (dfa)
        !          1316:      re_dfa_t *dfa;
        !          1317: {
        !          1318:   int src, idx, dest;
        !          1319:   for (src = 0; src < dfa->nodes_len; ++src)
        !          1320:     {
        !          1321:       for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
        !          1322:        {
        !          1323:          dest = dfa->eclosures[src].elems[idx];
        !          1324:          re_node_set_insert (dfa->inveclosures + dest, src);
        !          1325:        }
        !          1326:     }
        !          1327: }
        !          1328: 
        !          1329: /* Calculate "eclosure" for all the node in DFA.  */
        !          1330: 
        !          1331: static reg_errcode_t
        !          1332: calc_eclosure (dfa)
        !          1333:      re_dfa_t *dfa;
        !          1334: {
        !          1335:   int node_idx, incomplete;
        !          1336: #ifdef DEBUG
        !          1337:   assert (dfa->nodes_len > 0);
        !          1338: #endif
        !          1339:   incomplete = 0;
        !          1340:   /* For each nodes, calculate epsilon closure.  */
        !          1341:   for (node_idx = 0; ; ++node_idx)
        !          1342:     {
        !          1343:       reg_errcode_t err;
        !          1344:       re_node_set eclosure_elem;
        !          1345:       if (node_idx == dfa->nodes_len)
        !          1346:        {
        !          1347:          if (!incomplete)
        !          1348:            break;
        !          1349:          incomplete = 0;
        !          1350:          node_idx = 0;
        !          1351:        }
        !          1352: 
        !          1353: #ifdef DEBUG
        !          1354:       assert (dfa->eclosures[node_idx].nelem != -1);
        !          1355: #endif
        !          1356:       /* If we have already calculated, skip it.  */
        !          1357:       if (dfa->eclosures[node_idx].nelem != 0)
        !          1358:        continue;
        !          1359:       /* Calculate epsilon closure of `node_idx'.  */
        !          1360:       err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
        !          1361:       if (BE (err != REG_NOERROR, 0))
        !          1362:        return err;
        !          1363: 
        !          1364:       if (dfa->eclosures[node_idx].nelem == 0)
        !          1365:        {
        !          1366:          incomplete = 1;
        !          1367:          re_node_set_free (&eclosure_elem);
        !          1368:        }
        !          1369:     }
        !          1370:   return REG_NOERROR;
        !          1371: }
        !          1372: 
        !          1373: /* Calculate epsilon closure of NODE.  */
        !          1374: 
        !          1375: static reg_errcode_t
        !          1376: calc_eclosure_iter (new_set, dfa, node, root)
        !          1377:      re_node_set *new_set;
        !          1378:      re_dfa_t *dfa;
        !          1379:      int node, root;
        !          1380: {
        !          1381:   reg_errcode_t err;
        !          1382:   unsigned int constraint;
        !          1383:   int i, incomplete;
        !          1384:   re_node_set eclosure;
        !          1385:   incomplete = 0;
        !          1386:   err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
        !          1387:   if (BE (err != REG_NOERROR, 0))
        !          1388:     return err;
        !          1389: 
        !          1390:   /* This indicates that we are calculating this node now.
        !          1391:      We reference this value to avoid infinite loop.  */
        !          1392:   dfa->eclosures[node].nelem = -1;
        !          1393: 
        !          1394:   constraint = ((dfa->nodes[node].type == ANCHOR)
        !          1395:                ? dfa->nodes[node].opr.ctx_type : 0);
        !          1396:   /* If the current node has constraints, duplicate all nodes.
        !          1397:      Since they must inherit the constraints.  */
        !          1398:   if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
        !          1399:     {
        !          1400:       int org_node, cur_node;
        !          1401:       org_node = cur_node = node;
        !          1402:       err = duplicate_node_closure (dfa, node, node, node, constraint);
        !          1403:       if (BE (err != REG_NOERROR, 0))
        !          1404:        return err;
        !          1405:     }
        !          1406: 
        !          1407:   /* Expand each epsilon destination nodes.  */
        !          1408:   if (IS_EPSILON_NODE(dfa->nodes[node].type))
        !          1409:     for (i = 0; i < dfa->edests[node].nelem; ++i)
        !          1410:       {
        !          1411:        re_node_set eclosure_elem;
        !          1412:        int edest = dfa->edests[node].elems[i];
        !          1413:        /* If calculating the epsilon closure of `edest' is in progress,
        !          1414:           return intermediate result.  */
        !          1415:        if (dfa->eclosures[edest].nelem == -1)
        !          1416:          {
        !          1417:            incomplete = 1;
        !          1418:            continue;
        !          1419:          }
        !          1420:        /* If we haven't calculated the epsilon closure of `edest' yet,
        !          1421:           calculate now. Otherwise use calculated epsilon closure.  */
        !          1422:        if (dfa->eclosures[edest].nelem == 0)
        !          1423:          {
        !          1424:            err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
        !          1425:            if (BE (err != REG_NOERROR, 0))
        !          1426:              return err;
        !          1427:          }
        !          1428:        else
        !          1429:          eclosure_elem = dfa->eclosures[edest];
        !          1430:        /* Merge the epsilon closure of `edest'.  */
        !          1431:        re_node_set_merge (&eclosure, &eclosure_elem);
        !          1432:        /* If the epsilon closure of `edest' is incomplete,
        !          1433:           the epsilon closure of this node is also incomplete.  */
        !          1434:        if (dfa->eclosures[edest].nelem == 0)
        !          1435:          {
        !          1436:            incomplete = 1;
        !          1437:            re_node_set_free (&eclosure_elem);
        !          1438:          }
        !          1439:       }
        !          1440: 
        !          1441:   /* Epsilon closures include itself.  */
        !          1442:   re_node_set_insert (&eclosure, node);
        !          1443:   if (incomplete && !root)
        !          1444:     dfa->eclosures[node].nelem = 0;
        !          1445:   else
        !          1446:     dfa->eclosures[node] = eclosure;
        !          1447:   *new_set = eclosure;
        !          1448:   return REG_NOERROR;
        !          1449: }
        !          1450: 
        !          1451: /* Functions for token which are used in the parser.  */
        !          1452: 
        !          1453: /* Fetch a token from INPUT.
        !          1454:    We must not use this function inside bracket expressions.  */
        !          1455: 
        !          1456: static re_token_t
        !          1457: fetch_token (input, syntax)
        !          1458:      re_string_t *input;
        !          1459:      reg_syntax_t syntax;
        !          1460: {
        !          1461:   re_token_t token;
        !          1462:   int consumed_byte;
        !          1463:   consumed_byte = peek_token (&token, input, syntax);
        !          1464:   re_string_skip_bytes (input, consumed_byte);
        !          1465:   return token;
        !          1466: }
        !          1467: 
        !          1468: /* Peek a token from INPUT, and return the length of the token.
        !          1469:    We must not use this function inside bracket expressions.  */
        !          1470: 
        !          1471: static int
        !          1472: peek_token (token, input, syntax)
        !          1473:      re_token_t *token;
        !          1474:      re_string_t *input;
        !          1475:      reg_syntax_t syntax;
        !          1476: {
        !          1477:   unsigned char c;
        !          1478: 
        !          1479:   if (re_string_eoi (input))
        !          1480:     {
        !          1481:       token->type = END_OF_RE;
        !          1482:       return 0;
        !          1483:     }
        !          1484: 
        !          1485:   c = re_string_peek_byte (input, 0);
        !          1486:   token->opr.c = c;
        !          1487: 
        !          1488: #ifdef RE_ENABLE_I18N
        !          1489:   token->mb_partial = 0;
        !          1490:   if (MB_CUR_MAX > 1 &&
        !          1491:       !re_string_first_byte (input, re_string_cur_idx (input)))
        !          1492:     {
        !          1493:       token->type = CHARACTER;
        !          1494:       token->mb_partial = 1;
        !          1495:       return 1;
        !          1496:     }
        !          1497: #endif
        !          1498:   if (c == '\\')
        !          1499:     {
        !          1500:       unsigned char c2;
        !          1501:       if (re_string_cur_idx (input) + 1 >= re_string_length (input))
        !          1502:        {
        !          1503:          token->type = BACK_SLASH;
        !          1504:          return 1;
        !          1505:        }
        !          1506: 
        !          1507:       c2 = re_string_peek_byte_case (input, 1);
        !          1508:       token->opr.c = c2;
        !          1509:       token->type = CHARACTER;
        !          1510:       switch (c2)
        !          1511:        {
        !          1512:        case '|':
        !          1513:          if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
        !          1514:            token->type = OP_ALT;
        !          1515:          break;
        !          1516:        case '1': case '2': case '3': case '4': case '5':
        !          1517:        case '6': case '7': case '8': case '9':
        !          1518:          if (!(syntax & RE_NO_BK_REFS))
        !          1519:            {
        !          1520:              token->type = OP_BACK_REF;
        !          1521:              token->opr.idx = c2 - '0';
        !          1522:            }
        !          1523:          break;
        !          1524:        case '<':
        !          1525:          if (!(syntax & RE_NO_GNU_OPS))
        !          1526:            {
        !          1527:              token->type = ANCHOR;
        !          1528:              token->opr.idx = WORD_FIRST;
        !          1529:            }
        !          1530:          break;
        !          1531:        case '>':
        !          1532:          if (!(syntax & RE_NO_GNU_OPS))
        !          1533:            {
        !          1534:              token->type = ANCHOR;
        !          1535:              token->opr.idx = WORD_LAST;
        !          1536:            }
        !          1537:          break;
        !          1538:        case 'b':
        !          1539:          if (!(syntax & RE_NO_GNU_OPS))
        !          1540:            {
        !          1541:              token->type = ANCHOR;
        !          1542:              token->opr.idx = WORD_DELIM;
        !          1543:            }
        !          1544:          break;
        !          1545:        case 'B':
        !          1546:          if (!(syntax & RE_NO_GNU_OPS))
        !          1547:            {
        !          1548:              token->type = ANCHOR;
        !          1549:              token->opr.idx = INSIDE_WORD;
        !          1550:            }
        !          1551:          break;
        !          1552:        case 'w':
        !          1553:          if (!(syntax & RE_NO_GNU_OPS))
        !          1554:            token->type = OP_WORD;
        !          1555:          break;
        !          1556:        case 'W':
        !          1557:          if (!(syntax & RE_NO_GNU_OPS))
        !          1558:            token->type = OP_NOTWORD;
        !          1559:          break;
        !          1560:        case '`':
        !          1561:          if (!(syntax & RE_NO_GNU_OPS))
        !          1562:            {
        !          1563:              token->type = ANCHOR;
        !          1564:              token->opr.idx = BUF_FIRST;
        !          1565:            }
        !          1566:          break;
        !          1567:        case '\'':
        !          1568:          if (!(syntax & RE_NO_GNU_OPS))
        !          1569:            {
        !          1570:              token->type = ANCHOR;
        !          1571:              token->opr.idx = BUF_LAST;
        !          1572:            }
        !          1573:          break;
        !          1574:        case '(':
        !          1575:          if (!(syntax & RE_NO_BK_PARENS))
        !          1576:            token->type = OP_OPEN_SUBEXP;
        !          1577:          break;
        !          1578:        case ')':
        !          1579:          if (!(syntax & RE_NO_BK_PARENS))
        !          1580:            token->type = OP_CLOSE_SUBEXP;
        !          1581:          break;
        !          1582:        case '+':
        !          1583:          if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
        !          1584:            token->type = OP_DUP_PLUS;
        !          1585:          break;
        !          1586:        case '?':
        !          1587:          if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
        !          1588:            token->type = OP_DUP_QUESTION;
        !          1589:          break;
        !          1590:        case '{':
        !          1591:          if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
        !          1592:            token->type = OP_OPEN_DUP_NUM;
        !          1593:          break;
        !          1594:        case '}':
        !          1595:          if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
        !          1596:            token->type = OP_CLOSE_DUP_NUM;
        !          1597:          break;
        !          1598:        default:
        !          1599:          break;
        !          1600:        }
        !          1601:       return 2;
        !          1602:     }
        !          1603: 
        !          1604:   token->type = CHARACTER;
        !          1605:   switch (c)
        !          1606:     {
        !          1607:     case '\n':
        !          1608:       if (syntax & RE_NEWLINE_ALT)
        !          1609:        token->type = OP_ALT;
        !          1610:       break;
        !          1611:     case '|':
        !          1612:       if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
        !          1613:        token->type = OP_ALT;
        !          1614:       break;
        !          1615:     case '*':
        !          1616:       token->type = OP_DUP_ASTERISK;
        !          1617:       break;
        !          1618:     case '+':
        !          1619:       if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
        !          1620:        token->type = OP_DUP_PLUS;
        !          1621:       break;
        !          1622:     case '?':
        !          1623:       if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
        !          1624:        token->type = OP_DUP_QUESTION;
        !          1625:       break;
        !          1626:     case '{':
        !          1627:       if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
        !          1628:        token->type = OP_OPEN_DUP_NUM;
        !          1629:       break;
        !          1630:     case '}':
        !          1631:       if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
        !          1632:        token->type = OP_CLOSE_DUP_NUM;
        !          1633:       break;
        !          1634:     case '(':
        !          1635:       if (syntax & RE_NO_BK_PARENS)
        !          1636:        token->type = OP_OPEN_SUBEXP;
        !          1637:       break;
        !          1638:     case ')':
        !          1639:       if (syntax & RE_NO_BK_PARENS)
        !          1640:        token->type = OP_CLOSE_SUBEXP;
        !          1641:       break;
        !          1642:     case '[':
        !          1643:       token->type = OP_OPEN_BRACKET;
        !          1644:       break;
        !          1645:     case '.':
        !          1646:       token->type = OP_PERIOD;
        !          1647:       break;
        !          1648:     case '^':
        !          1649:       if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
        !          1650:          re_string_cur_idx (input) != 0)
        !          1651:        {
        !          1652:          char prev = re_string_peek_byte (input, -1);
        !          1653:          if (prev != '|' && prev != '(' &&
        !          1654:              (!(syntax & RE_NEWLINE_ALT) || prev != '\n'))
        !          1655:            break;
        !          1656:        }
        !          1657:       token->type = ANCHOR;
        !          1658:       token->opr.idx = LINE_FIRST;
        !          1659:       break;
        !          1660:     case '$':
        !          1661:       if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
        !          1662:          re_string_cur_idx (input) + 1 != re_string_length (input))
        !          1663:        {
        !          1664:          re_token_t next;
        !          1665:          re_string_skip_bytes (input, 1);
        !          1666:          peek_token (&next, input, syntax);
        !          1667:          re_string_skip_bytes (input, -1);
        !          1668:          if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
        !          1669:            break;
        !          1670:        }
        !          1671:       token->type = ANCHOR;
        !          1672:       token->opr.idx = LINE_LAST;
        !          1673:       break;
        !          1674:     default:
        !          1675:       break;
        !          1676:     }
        !          1677:   return 1;
        !          1678: }
        !          1679: 
        !          1680: /* Peek a token from INPUT, and return the length of the token.
        !          1681:    We must not use this function out of bracket expressions.  */
        !          1682: 
        !          1683: static int
        !          1684: peek_token_bracket (token, input, syntax)
        !          1685:      re_token_t *token;
        !          1686:      re_string_t *input;
        !          1687:      reg_syntax_t syntax;
        !          1688: {
        !          1689:   unsigned char c;
        !          1690:   if (re_string_eoi (input))
        !          1691:     {
        !          1692:       token->type = END_OF_RE;
        !          1693:       return 0;
        !          1694:     }
        !          1695:   c = re_string_peek_byte (input, 0);
        !          1696:   token->opr.c = c;
        !          1697: 
        !          1698: #ifdef RE_ENABLE_I18N
        !          1699:   if (MB_CUR_MAX > 1 &&
        !          1700:       !re_string_first_byte (input, re_string_cur_idx (input)))
        !          1701:     {
        !          1702:       token->type = CHARACTER;
        !          1703:       return 1;
        !          1704:     }
        !          1705: #endif /* RE_ENABLE_I18N */
        !          1706: 
        !          1707:   if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS))
        !          1708:     {
        !          1709:       /* In this case, '\' escape a character.  */
        !          1710:       unsigned char c2;
        !          1711:       re_string_skip_bytes (input, 1);
        !          1712:       c2 = re_string_peek_byte (input, 0);
        !          1713:       token->opr.c = c2;
        !          1714:       token->type = CHARACTER;
        !          1715:       return 1;
        !          1716:     }
        !          1717:   if (c == '[') /* '[' is a special char in a bracket exps.  */
        !          1718:     {
        !          1719:       unsigned char c2;
        !          1720:       int token_len;
        !          1721:       c2 = re_string_peek_byte (input, 1);
        !          1722:       token->opr.c = c2;
        !          1723:       token_len = 2;
        !          1724:       switch (c2)
        !          1725:        {
        !          1726:        case '.':
        !          1727:          token->type = OP_OPEN_COLL_ELEM;
        !          1728:          break;
        !          1729:        case '=':
        !          1730:          token->type = OP_OPEN_EQUIV_CLASS;
        !          1731:          break;
        !          1732:        case ':':
        !          1733:          if (syntax & RE_CHAR_CLASSES)
        !          1734:            {
        !          1735:              token->type = OP_OPEN_CHAR_CLASS;
        !          1736:              break;
        !          1737:            }
        !          1738:          /* else fall through.  */
        !          1739:        default:
        !          1740:          token->type = CHARACTER;
        !          1741:          token->opr.c = c;
        !          1742:          token_len = 1;
        !          1743:          break;
        !          1744:        }
        !          1745:       return token_len;
        !          1746:     }
        !          1747:   switch (c)
        !          1748:     {
        !          1749:     case '-':
        !          1750:       token->type = OP_CHARSET_RANGE;
        !          1751:       break;
        !          1752:     case ']':
        !          1753:       token->type = OP_CLOSE_BRACKET;
        !          1754:       break;
        !          1755:     case '^':
        !          1756:       token->type = OP_NON_MATCH_LIST;
        !          1757:       break;
        !          1758:     default:
        !          1759:       token->type = CHARACTER;
        !          1760:     }
        !          1761:   return 1;
        !          1762: }
        !          1763: 
        !          1764: /* Functions for parser.  */
        !          1765: 
        !          1766: /* Entry point of the parser.
        !          1767:    Parse the regular expression REGEXP and return the structure tree.
        !          1768:    If an error is occured, ERR is set by error code, and return NULL.
        !          1769:    This function build the following tree, from regular expression <reg_exp>:
        !          1770:           CAT
        !          1771:           / \
        !          1772:          /   \
        !          1773:    <reg_exp>  EOR
        !          1774: 
        !          1775:    CAT means concatenation.
        !          1776:    EOR means end of regular expression.  */
        !          1777: 
        !          1778: static bin_tree_t *
        !          1779: parse (regexp, preg, syntax, err)
        !          1780:      re_string_t *regexp;
        !          1781:      regex_t *preg;
        !          1782:      reg_syntax_t syntax;
        !          1783:      reg_errcode_t *err;
        !          1784: {
        !          1785:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
        !          1786:   bin_tree_t *tree, *eor, *root;
        !          1787:   re_token_t current_token;
        !          1788:   int new_idx;
        !          1789:   current_token = fetch_token (regexp, syntax);
        !          1790:   tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
        !          1791:   if (BE (*err != REG_NOERROR && tree == NULL, 0))
        !          1792:     return NULL;
        !          1793:   new_idx = re_dfa_add_node (dfa, current_token, 0);
        !          1794:   eor = create_tree (NULL, NULL, 0, new_idx);
        !          1795:   if (tree != NULL)
        !          1796:     root = create_tree (tree, eor, CONCAT, 0);
        !          1797:   else
        !          1798:     root = eor;
        !          1799:   if (BE (new_idx == -1 || eor == NULL || root == NULL, 0))
        !          1800:     {
        !          1801:       *err = REG_ESPACE;
        !          1802:       return NULL;
        !          1803:     }
        !          1804:   return root;
        !          1805: }
        !          1806: 
        !          1807: /* This function build the following tree, from regular expression
        !          1808:    <branch1>|<branch2>:
        !          1809:           ALT
        !          1810:           / \
        !          1811:          /   \
        !          1812:    <branch1> <branch2>
        !          1813: 
        !          1814:    ALT means alternative, which represents the operator `|'.  */
        !          1815: 
        !          1816: static bin_tree_t *
        !          1817: parse_reg_exp (regexp, preg, token, syntax, nest, err)
        !          1818:      re_string_t *regexp;
        !          1819:      regex_t *preg;
        !          1820:      re_token_t *token;
        !          1821:      reg_syntax_t syntax;
        !          1822:      int nest;
        !          1823:      reg_errcode_t *err;
        !          1824: {
        !          1825:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
        !          1826:   bin_tree_t *tree, *branch = NULL;
        !          1827:   int new_idx;
        !          1828:   tree = parse_branch (regexp, preg, token, syntax, nest, err);
        !          1829:   if (BE (*err != REG_NOERROR && tree == NULL, 0))
        !          1830:     return NULL;
        !          1831: 
        !          1832:   while (token->type == OP_ALT)
        !          1833:     {
        !          1834:       re_token_t alt_token = *token;
        !          1835:       new_idx = re_dfa_add_node (dfa, alt_token, 0);
        !          1836:       *token = fetch_token (regexp, syntax);
        !          1837:       if (token->type != OP_ALT && token->type != END_OF_RE
        !          1838:          && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
        !          1839:        {
        !          1840:          branch = parse_branch (regexp, preg, token, syntax, nest, err);
        !          1841:          if (BE (*err != REG_NOERROR && branch == NULL, 0))
        !          1842:            {
        !          1843:              free_bin_tree (tree);
        !          1844:              return NULL;
        !          1845:            }
        !          1846:        }
        !          1847:       else
        !          1848:        branch = NULL;
        !          1849:       tree = create_tree (tree, branch, 0, new_idx);
        !          1850:       if (BE (new_idx == -1 || tree == NULL, 0))
        !          1851:        {
        !          1852:          *err = REG_ESPACE;
        !          1853:          return NULL;
        !          1854:        }
        !          1855:       dfa->has_plural_match = 1;
        !          1856:     }
        !          1857:   return tree;
        !          1858: }
        !          1859: 
        !          1860: /* This function build the following tree, from regular expression
        !          1861:    <exp1><exp2>:
        !          1862:        CAT
        !          1863:        / \
        !          1864:        /   \
        !          1865:    <exp1> <exp2>
        !          1866: 
        !          1867:    CAT means concatenation.  */
        !          1868: 
        !          1869: static bin_tree_t *
        !          1870: parse_branch (regexp, preg, token, syntax, nest, err)
        !          1871:      re_string_t *regexp;
        !          1872:      regex_t *preg;
        !          1873:      re_token_t *token;
        !          1874:      reg_syntax_t syntax;
        !          1875:      int nest;
        !          1876:      reg_errcode_t *err;
        !          1877: {
        !          1878:   bin_tree_t *tree, *exp;
        !          1879:   tree = parse_expression (regexp, preg, token, syntax, nest, err);
        !          1880:   if (BE (*err != REG_NOERROR && tree == NULL, 0))
        !          1881:     return NULL;
        !          1882: 
        !          1883:   while (token->type != OP_ALT && token->type != END_OF_RE
        !          1884:         && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
        !          1885:     {
        !          1886:       exp = parse_expression (regexp, preg, token, syntax, nest, err);
        !          1887:       if (BE (*err != REG_NOERROR && exp == NULL, 0))
        !          1888:        {
        !          1889:          free_bin_tree (tree);
        !          1890:          return NULL;
        !          1891:        }
        !          1892:       if (tree != NULL && exp != NULL)
        !          1893:        {
        !          1894:          tree = create_tree (tree, exp, CONCAT, 0);
        !          1895:          if (tree == NULL)
        !          1896:            {
        !          1897:              *err = REG_ESPACE;
        !          1898:              return NULL;
        !          1899:            }
        !          1900:        }
        !          1901:       else if (tree == NULL)
        !          1902:        tree = exp;
        !          1903:       /* Otherwise exp == NULL, we don't need to create new tree.  */
        !          1904:     }
        !          1905:   return tree;
        !          1906: }
        !          1907: 
        !          1908: /* This function build the following tree, from regular expression a*:
        !          1909:         *
        !          1910:         |
        !          1911:         a
        !          1912: */
        !          1913: 
        !          1914: static bin_tree_t *
        !          1915: parse_expression (regexp, preg, token, syntax, nest, err)
        !          1916:      re_string_t *regexp;
        !          1917:      regex_t *preg;
        !          1918:      re_token_t *token;
        !          1919:      reg_syntax_t syntax;
        !          1920:      int nest;
        !          1921:      reg_errcode_t *err;
        !          1922: {
        !          1923:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
        !          1924:   bin_tree_t *tree;
        !          1925:   int new_idx;
        !          1926:   switch (token->type)
        !          1927:     {
        !          1928:     case CHARACTER:
        !          1929:       new_idx = re_dfa_add_node (dfa, *token, 0);
        !          1930:       tree = create_tree (NULL, NULL, 0, new_idx);
        !          1931:       if (BE (new_idx == -1 || tree == NULL, 0))
        !          1932:        {
        !          1933:          *err = REG_ESPACE;
        !          1934:          return NULL;
        !          1935:        }
        !          1936: #ifdef RE_ENABLE_I18N
        !          1937:       if (MB_CUR_MAX > 1)
        !          1938:        {
        !          1939:          while (!re_string_eoi (regexp)
        !          1940:                 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
        !          1941:            {
        !          1942:              bin_tree_t *mbc_remain;
        !          1943:              *token = fetch_token (regexp, syntax);
        !          1944:              new_idx = re_dfa_add_node (dfa, *token, 0);
        !          1945:              mbc_remain = create_tree (NULL, NULL, 0, new_idx);
        !          1946:              tree = create_tree (tree, mbc_remain, CONCAT, 0);
        !          1947:              if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0))
        !          1948:                {
        !          1949:                  *err = REG_ESPACE;
        !          1950:                  return NULL;
        !          1951:                }
        !          1952:            }
        !          1953:        }
        !          1954: #endif
        !          1955:       break;
        !          1956:     case OP_OPEN_SUBEXP:
        !          1957:       tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
        !          1958:       if (BE (*err != REG_NOERROR && tree == NULL, 0))
        !          1959:        return NULL;
        !          1960:       break;
        !          1961:     case OP_OPEN_BRACKET:
        !          1962:       tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
        !          1963:       if (BE (*err != REG_NOERROR && tree == NULL, 0))
        !          1964:        return NULL;
        !          1965:       break;
        !          1966:     case OP_BACK_REF:
        !          1967:       if (BE (preg->re_nsub < token->opr.idx
        !          1968:              || dfa->subexps[token->opr.idx - 1].end == -1, 0))
        !          1969:        {
        !          1970:          *err = REG_ESUBREG;
        !          1971:          return NULL;
        !          1972:        }
        !          1973:       dfa->used_bkref_map |= 1 << (token->opr.idx - 1);
        !          1974:       new_idx = re_dfa_add_node (dfa, *token, 0);
        !          1975:       tree = create_tree (NULL, NULL, 0, new_idx);
        !          1976:       if (BE (new_idx == -1 || tree == NULL, 0))
        !          1977:        {
        !          1978:          *err = REG_ESPACE;
        !          1979:          return NULL;
        !          1980:        }
        !          1981:       ++dfa->nbackref;
        !          1982:       dfa->has_mb_node = 1;
        !          1983:       break;
        !          1984:     case OP_DUP_ASTERISK:
        !          1985:     case OP_DUP_PLUS:
        !          1986:     case OP_DUP_QUESTION:
        !          1987:     case OP_OPEN_DUP_NUM:
        !          1988:       if (syntax & RE_CONTEXT_INVALID_OPS)
        !          1989:        {
        !          1990:          *err = REG_BADRPT;
        !          1991:          return NULL;
        !          1992:        }
        !          1993:       else if (syntax & RE_CONTEXT_INDEP_OPS)
        !          1994:        {
        !          1995:          *token = fetch_token (regexp, syntax);
        !          1996:          return parse_expression (regexp, preg, token, syntax, nest, err);
        !          1997:        }
        !          1998:       /* else fall through  */
        !          1999:     case OP_CLOSE_SUBEXP:
        !          2000:       if ((token->type == OP_CLOSE_SUBEXP) &&
        !          2001:          !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
        !          2002:        {
        !          2003:          *err = REG_ERPAREN;
        !          2004:          return NULL;
        !          2005:        }
        !          2006:       /* else fall through  */
        !          2007:     case OP_CLOSE_DUP_NUM:
        !          2008:       /* We treat it as a normal character.  */
        !          2009: 
        !          2010:       /* Then we can these characters as normal characters.  */
        !          2011:       token->type = CHARACTER;
        !          2012:       new_idx = re_dfa_add_node (dfa, *token, 0);
        !          2013:       tree = create_tree (NULL, NULL, 0, new_idx);
        !          2014:       if (BE (new_idx == -1 || tree == NULL, 0))
        !          2015:        {
        !          2016:          *err = REG_ESPACE;
        !          2017:          return NULL;
        !          2018:        }
        !          2019:       break;
        !          2020:     case ANCHOR:
        !          2021:       if (dfa->word_char == NULL)
        !          2022:        {
        !          2023:          *err = init_word_char (dfa);
        !          2024:          if (BE (*err != REG_NOERROR, 0))
        !          2025:            return NULL;
        !          2026:        }
        !          2027:       if (token->opr.ctx_type == WORD_DELIM)
        !          2028:        {
        !          2029:          bin_tree_t *tree_first, *tree_last;
        !          2030:          int idx_first, idx_last;
        !          2031:          token->opr.ctx_type = WORD_FIRST;
        !          2032:          idx_first = re_dfa_add_node (dfa, *token, 0);
        !          2033:          tree_first = create_tree (NULL, NULL, 0, idx_first);
        !          2034:          token->opr.ctx_type = WORD_LAST;
        !          2035:          idx_last = re_dfa_add_node (dfa, *token, 0);
        !          2036:          tree_last = create_tree (NULL, NULL, 0, idx_last);
        !          2037:          token->type = OP_ALT;
        !          2038:          new_idx = re_dfa_add_node (dfa, *token, 0);
        !          2039:          tree = create_tree (tree_first, tree_last, 0, new_idx);
        !          2040:          if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1
        !          2041:                  || tree_first == NULL || tree_last == NULL
        !          2042:                  || tree == NULL, 0))
        !          2043:            {
        !          2044:              *err = REG_ESPACE;
        !          2045:              return NULL;
        !          2046:            }
        !          2047:        }
        !          2048:       else
        !          2049:        {
        !          2050:          new_idx = re_dfa_add_node (dfa, *token, 0);
        !          2051:          tree = create_tree (NULL, NULL, 0, new_idx);
        !          2052:          if (BE (new_idx == -1 || tree == NULL, 0))
        !          2053:            {
        !          2054:              *err = REG_ESPACE;
        !          2055:              return NULL;
        !          2056:            }
        !          2057:        }
        !          2058:       /* We must return here, since ANCHORs can't be followed
        !          2059:         by repetition operators.
        !          2060:         eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
        !          2061:             it must not be "<ANCHOR(^)><REPEAT(*)>".  */
        !          2062:       *token = fetch_token (regexp, syntax);
        !          2063:       return tree;
        !          2064:     case OP_PERIOD:
        !          2065:       new_idx = re_dfa_add_node (dfa, *token, 0);
        !          2066:       tree = create_tree (NULL, NULL, 0, new_idx);
        !          2067:       if (BE (new_idx == -1 || tree == NULL, 0))
        !          2068:        {
        !          2069:          *err = REG_ESPACE;
        !          2070:          return NULL;
        !          2071:        }
        !          2072:       if (MB_CUR_MAX > 1)
        !          2073:        dfa->has_mb_node = 1;
        !          2074:       break;
        !          2075:     case OP_WORD:
        !          2076:       tree = build_word_op (dfa, 0, err);
        !          2077:       if (BE (*err != REG_NOERROR && tree == NULL, 0))
        !          2078:        return NULL;
        !          2079:       break;
        !          2080:     case OP_NOTWORD:
        !          2081:       tree = build_word_op (dfa, 1, err);
        !          2082:       if (BE (*err != REG_NOERROR && tree == NULL, 0))
        !          2083:        return NULL;
        !          2084:       break;
        !          2085:     case OP_ALT:
        !          2086:     case END_OF_RE:
        !          2087:       return NULL;
        !          2088:     case BACK_SLASH:
        !          2089:       *err = REG_EESCAPE;
        !          2090:       return NULL;
        !          2091:     default:
        !          2092:       /* Must not happen?  */
        !          2093: #ifdef DEBUG
        !          2094:       assert (0);
        !          2095: #endif
        !          2096:       return NULL;
        !          2097:     }
        !          2098:   *token = fetch_token (regexp, syntax);
        !          2099: 
        !          2100:   while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
        !          2101:         || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
        !          2102:     {
        !          2103:       tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
        !          2104:       if (BE (*err != REG_NOERROR && tree == NULL, 0))
        !          2105:        return NULL;
        !          2106:       dfa->has_plural_match = 1;
        !          2107:     }
        !          2108: 
        !          2109:   return tree;
        !          2110: }
        !          2111: 
        !          2112: /* This function build the following tree, from regular expression
        !          2113:    (<reg_exp>):
        !          2114:         SUBEXP
        !          2115:            |
        !          2116:        <reg_exp>
        !          2117: */
        !          2118: 
        !          2119: static bin_tree_t *
        !          2120: parse_sub_exp (regexp, preg, token, syntax, nest, err)
        !          2121:      re_string_t *regexp;
        !          2122:      regex_t *preg;
        !          2123:      re_token_t *token;
        !          2124:      reg_syntax_t syntax;
        !          2125:      int nest;
        !          2126:      reg_errcode_t *err;
        !          2127: {
        !          2128:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
        !          2129:   bin_tree_t *tree, *left_par, *right_par;
        !          2130:   size_t cur_nsub;
        !          2131:   int new_idx;
        !          2132:   cur_nsub = preg->re_nsub++;
        !          2133:   if (dfa->subexps_alloc < preg->re_nsub)
        !          2134:     {
        !          2135:       re_subexp_t *new_array;
        !          2136:       dfa->subexps_alloc *= 2;
        !          2137:       new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc);
        !          2138:       if (BE (new_array == NULL, 0))
        !          2139:        {
        !          2140:          dfa->subexps_alloc /= 2;
        !          2141:          *err = REG_ESPACE;
        !          2142:          return NULL;
        !          2143:        }
        !          2144:       dfa->subexps = new_array;
        !          2145:     }
        !          2146:   dfa->subexps[cur_nsub].start = dfa->nodes_len;
        !          2147:   dfa->subexps[cur_nsub].end = -1;
        !          2148: 
        !          2149:   new_idx = re_dfa_add_node (dfa, *token, 0);
        !          2150:   left_par = create_tree (NULL, NULL, 0, new_idx);
        !          2151:   if (BE (new_idx == -1 || left_par == NULL, 0))
        !          2152:     {
        !          2153:       *err = REG_ESPACE;
        !          2154:       return NULL;
        !          2155:     }
        !          2156:   dfa->nodes[new_idx].opr.idx = cur_nsub;
        !          2157:   *token = fetch_token (regexp, syntax);
        !          2158: 
        !          2159:   /* The subexpression may be a null string.  */
        !          2160:   if (token->type == OP_CLOSE_SUBEXP)
        !          2161:     tree = NULL;
        !          2162:   else
        !          2163:     {
        !          2164:       tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
        !          2165:       if (BE (*err != REG_NOERROR && tree == NULL, 0))
        !          2166:        return NULL;
        !          2167:     }
        !          2168:   if (BE (token->type != OP_CLOSE_SUBEXP, 0))
        !          2169:     {
        !          2170:       free_bin_tree (tree);
        !          2171:       *err = REG_BADPAT;
        !          2172:       return NULL;
        !          2173:     }
        !          2174:   new_idx = re_dfa_add_node (dfa, *token, 0);
        !          2175:   dfa->subexps[cur_nsub].end = dfa->nodes_len;
        !          2176:   right_par = create_tree (NULL, NULL, 0, new_idx);
        !          2177:   tree = ((tree == NULL) ? right_par
        !          2178:          : create_tree (tree, right_par, CONCAT, 0));
        !          2179:   tree = create_tree (left_par, tree, CONCAT, 0);
        !          2180:   if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
        !          2181:     {
        !          2182:       *err = REG_ESPACE;
        !          2183:       return NULL;
        !          2184:     }
        !          2185:   dfa->nodes[new_idx].opr.idx = cur_nsub;
        !          2186: 
        !          2187:   return tree;
        !          2188: }
        !          2189: 
        !          2190: /* This function parse repetition operators like "*", "+", "{1,3}" etc.  */
        !          2191: 
        !          2192: static bin_tree_t *
        !          2193: parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
        !          2194:      bin_tree_t *dup_elem;
        !          2195:      re_string_t *regexp;
        !          2196:      re_dfa_t *dfa;
        !          2197:      re_token_t *token;
        !          2198:      reg_syntax_t syntax;
        !          2199:      reg_errcode_t *err;
        !          2200: {
        !          2201:   re_token_t dup_token;
        !          2202:   bin_tree_t *tree = dup_elem, *work_tree;
        !          2203:   int new_idx, start_idx = re_string_cur_idx (regexp);
        !          2204:   re_token_t start_token = *token;
        !          2205:   if (token->type == OP_OPEN_DUP_NUM)
        !          2206:     {
        !          2207:       int i;
        !          2208:       int end = 0;
        !          2209:       int start = fetch_number (regexp, token, syntax);
        !          2210:       bin_tree_t *elem;
        !          2211:       if (start == -1)
        !          2212:        {
        !          2213:          if (token->type == CHARACTER && token->opr.c == ',')
        !          2214:            start = 0; /* We treat "{,m}" as "{0,m}".  */
        !          2215:          else
        !          2216:            {
        !          2217:              *err = REG_BADBR; /* <re>{} is invalid.  */
        !          2218:              return NULL;
        !          2219:            }
        !          2220:        }
        !          2221:       if (BE (start != -2, 1))
        !          2222:        {
        !          2223:          /* We treat "{n}" as "{n,n}".  */
        !          2224:          end = ((token->type == OP_CLOSE_DUP_NUM) ? start
        !          2225:                 : ((token->type == CHARACTER && token->opr.c == ',')
        !          2226:                    ? fetch_number (regexp, token, syntax) : -2));
        !          2227:        }
        !          2228:       if (BE (start == -2 || end == -2, 0))
        !          2229:        {
        !          2230:          /* Invalid sequence.  */
        !          2231:          if (token->type == OP_CLOSE_DUP_NUM)
        !          2232:            goto parse_dup_op_invalid_interval;
        !          2233:          else
        !          2234:            goto parse_dup_op_ebrace;
        !          2235:        }
        !          2236:       if (BE (start == 0 && end == 0, 0))
        !          2237:        {
        !          2238:          /* We treat "<re>{0}" and "<re>{0,0}" as null string.  */
        !          2239:          *token = fetch_token (regexp, syntax);
        !          2240:          free_bin_tree (dup_elem);
        !          2241:          return NULL;
        !          2242:        }
        !          2243: 
        !          2244:       /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
        !          2245:       elem = tree;
        !          2246:       for (i = 0; i < start; ++i)
        !          2247:        if (i != 0)
        !          2248:          {
        !          2249:            work_tree = duplicate_tree (elem, dfa);
        !          2250:            tree = create_tree (tree, work_tree, CONCAT, 0);
        !          2251:            if (BE (work_tree == NULL || tree == NULL, 0))
        !          2252:              goto parse_dup_op_espace;
        !          2253:          }
        !          2254: 
        !          2255:       if (end == -1)
        !          2256:        {
        !          2257:          /* We treat "<re>{0,}" as "<re>*".  */
        !          2258:          dup_token.type = OP_DUP_ASTERISK;
        !          2259:          if (start > 0)
        !          2260:            {
        !          2261:              elem = duplicate_tree (elem, dfa);
        !          2262:              new_idx = re_dfa_add_node (dfa, dup_token, 0);
        !          2263:              work_tree = create_tree (elem, NULL, 0, new_idx);
        !          2264:              tree = create_tree (tree, work_tree, CONCAT, 0);
        !          2265:              if (BE (elem == NULL || new_idx == -1 || work_tree == NULL
        !          2266:                      || tree == NULL, 0))
        !          2267:                goto parse_dup_op_espace;
        !          2268:            }
        !          2269:          else
        !          2270:            {
        !          2271:              new_idx = re_dfa_add_node (dfa, dup_token, 0);
        !          2272:              tree = create_tree (elem, NULL, 0, new_idx);
        !          2273:              if (BE (new_idx == -1 || tree == NULL, 0))
        !          2274:                goto parse_dup_op_espace;
        !          2275:            }
        !          2276:        }
        !          2277:       else if (end - start > 0)
        !          2278:        {
        !          2279:          /* Then extract "<re>{0,m}" to "<re>?<re>?...<re>?".  */
        !          2280:          dup_token.type = OP_DUP_QUESTION;
        !          2281:          if (start > 0)
        !          2282:            {
        !          2283:              elem = duplicate_tree (elem, dfa);
        !          2284:              new_idx = re_dfa_add_node (dfa, dup_token, 0);
        !          2285:              elem = create_tree (elem, NULL, 0, new_idx);
        !          2286:              tree = create_tree (tree, elem, CONCAT, 0);
        !          2287:              if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0))
        !          2288:                goto parse_dup_op_espace;
        !          2289:            }
        !          2290:          else
        !          2291:            {
        !          2292:              new_idx = re_dfa_add_node (dfa, dup_token, 0);
        !          2293:              tree = elem = create_tree (elem, NULL, 0, new_idx);
        !          2294:              if (BE (new_idx == -1 || tree == NULL, 0))
        !          2295:                goto parse_dup_op_espace;
        !          2296:            }
        !          2297:          for (i = 1; i < end - start; ++i)
        !          2298:            {
        !          2299:              work_tree = duplicate_tree (elem, dfa);
        !          2300:              tree = create_tree (tree, work_tree, CONCAT, 0);
        !          2301:              if (BE (work_tree == NULL || tree == NULL, 0))
        !          2302:                {
        !          2303:                  *err = REG_ESPACE;
        !          2304:                  return NULL;
        !          2305:                }
        !          2306:            }
        !          2307:        }
        !          2308:     }
        !          2309:   else
        !          2310:     {
        !          2311:       new_idx = re_dfa_add_node (dfa, *token, 0);
        !          2312:       tree = create_tree (tree, NULL, 0, new_idx);
        !          2313:       if (BE (new_idx == -1 || tree == NULL, 0))
        !          2314:        {
        !          2315:          *err = REG_ESPACE;
        !          2316:          return NULL;
        !          2317:        }
        !          2318:     }
        !          2319:   *token = fetch_token (regexp, syntax);
        !          2320:   return tree;
        !          2321: 
        !          2322:  parse_dup_op_espace:
        !          2323:   free_bin_tree (tree);
        !          2324:   *err = REG_ESPACE;
        !          2325:   return NULL;
        !          2326: 
        !          2327:  parse_dup_op_ebrace:
        !          2328:   if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
        !          2329:     {
        !          2330:       *err = REG_EBRACE;
        !          2331:       return NULL;
        !          2332:     }
        !          2333:   goto parse_dup_op_rollback;
        !          2334:  parse_dup_op_invalid_interval:
        !          2335:   if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
        !          2336:     {
        !          2337:       *err = REG_BADBR;
        !          2338:       return NULL;
        !          2339:     }
        !          2340:  parse_dup_op_rollback:
        !          2341:   re_string_set_index (regexp, start_idx);
        !          2342:   *token = start_token;
        !          2343:   token->type = CHARACTER;
        !          2344:   return dup_elem;
        !          2345: }
        !          2346: 
        !          2347: /* Size of the names for collating symbol/equivalence_class/character_class.
        !          2348:    I'm not sure, but maybe enough.  */
        !          2349: #define BRACKET_NAME_BUF_SIZE 32
        !          2350: 
        !          2351: #ifndef _LIBC
        !          2352:   /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
        !          2353:      Build the range expression which starts from START_ELEM, and ends
        !          2354:      at END_ELEM.  The result are written to MBCSET and SBCSET.
        !          2355:      RANGE_ALLOC is the allocated size of mbcset->range_starts, and
        !          2356:      mbcset->range_ends, is a pointer argument sinse we may
        !          2357:      update it.  */
        !          2358: 
        !          2359: static reg_errcode_t
        !          2360: # ifdef RE_ENABLE_I18N
        !          2361: build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
        !          2362:      re_charset_t *mbcset;
        !          2363:      int *range_alloc;
        !          2364: # else /* not RE_ENABLE_I18N */
        !          2365: build_range_exp (sbcset, start_elem, end_elem)
        !          2366: # endif /* not RE_ENABLE_I18N */
        !          2367:      re_bitset_ptr_t sbcset;
        !          2368:      bracket_elem_t *start_elem, *end_elem;
        !          2369: {
        !          2370:   unsigned int start_ch, end_ch;
        !          2371:   /* Equivalence Classes and Character Classes can't be a range start/end.  */
        !          2372:   if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
        !          2373:          || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
        !          2374:          0))
        !          2375:     return REG_ERANGE;
        !          2376: 
        !          2377:   /* We can handle no multi character collating elements without libc
        !          2378:      support.  */
        !          2379:   if (BE ((start_elem->type == COLL_SYM
        !          2380:           && strlen ((char *) start_elem->opr.name) > 1)
        !          2381:          || (end_elem->type == COLL_SYM
        !          2382:              && strlen ((char *) end_elem->opr.name) > 1), 0))
        !          2383:     return REG_ECOLLATE;
        !          2384: 
        !          2385: # ifdef RE_ENABLE_I18N
        !          2386:   {
        !          2387:     wchar_t wc, start_wc, end_wc;
        !          2388:     wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
        !          2389: 
        !          2390:     start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
        !          2391:                : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
        !          2392:                   : 0));
        !          2393:     end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
        !          2394:              : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
        !          2395:                 : 0));
        !          2396:     start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
        !          2397:                ? __btowc (start_ch) : start_elem->opr.wch);
        !          2398:     end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
        !          2399:              ? __btowc (end_ch) : end_elem->opr.wch);
        !          2400:     cmp_buf[0] = start_wc;
        !          2401:     cmp_buf[4] = end_wc;
        !          2402:     if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
        !          2403:       return REG_ERANGE;
        !          2404: 
        !          2405:     /* Check the space of the arrays.  */
        !          2406:     if (*range_alloc == mbcset->nranges)
        !          2407:       {
        !          2408:        /* There are not enough space, need realloc.  */
        !          2409:        wchar_t *new_array_start, *new_array_end;
        !          2410:        int new_nranges;
        !          2411: 
        !          2412:        /* +1 in case of mbcset->nranges is 0.  */
        !          2413:        new_nranges = 2 * mbcset->nranges + 1;
        !          2414:        /* Use realloc since mbcset->range_starts and mbcset->range_ends
        !          2415:           are NULL if *range_alloc == 0.  */
        !          2416:        new_array_start = re_realloc (mbcset->range_starts, wchar_t,
        !          2417:                                      new_nranges);
        !          2418:        new_array_end = re_realloc (mbcset->range_ends, wchar_t,
        !          2419:                                    new_nranges);
        !          2420: 
        !          2421:        if (BE (new_array_start == NULL || new_array_end == NULL, 0))
        !          2422:          return REG_ESPACE;
        !          2423: 
        !          2424:        mbcset->range_starts = new_array_start;
        !          2425:        mbcset->range_ends = new_array_end;
        !          2426:        *range_alloc = new_nranges;
        !          2427:       }
        !          2428: 
        !          2429:     mbcset->range_starts[mbcset->nranges] = start_wc;
        !          2430:     mbcset->range_ends[mbcset->nranges++] = end_wc;
        !          2431: 
        !          2432:     /* Build the table for single byte characters.  */
        !          2433:     for (wc = 0; wc <= SBC_MAX; ++wc)
        !          2434:       {
        !          2435:        cmp_buf[2] = wc;
        !          2436:        if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
        !          2437:            && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
        !          2438:          bitset_set (sbcset, wc);
        !          2439:       }
        !          2440:   }
        !          2441: # else /* not RE_ENABLE_I18N */
        !          2442:   {
        !          2443:     unsigned int ch;
        !          2444:     start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
        !          2445:                : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
        !          2446:                   : 0));
        !          2447:     end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
        !          2448:              : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
        !          2449:                 : 0));
        !          2450:     if (start_ch > end_ch)
        !          2451:       return REG_ERANGE;
        !          2452:     /* Build the table for single byte characters.  */
        !          2453:     for (ch = 0; ch <= SBC_MAX; ++ch)
        !          2454:       if (start_ch <= ch  && ch <= end_ch)
        !          2455:        bitset_set (sbcset, ch);
        !          2456:   }
        !          2457: # endif /* not RE_ENABLE_I18N */
        !          2458:   return REG_NOERROR;
        !          2459: }
        !          2460: #endif /* not _LIBC */
        !          2461: 
        !          2462: #ifndef _LIBC
        !          2463: /* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
        !          2464:    Build the collating element which is represented by NAME.
        !          2465:    The result are written to MBCSET and SBCSET.
        !          2466:    COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
        !          2467:    pointer argument since we may update it.  */
        !          2468: 
        !          2469: static reg_errcode_t
        !          2470: # ifdef RE_ENABLE_I18N
        !          2471: build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
        !          2472:      re_charset_t *mbcset;
        !          2473:      int *coll_sym_alloc;
        !          2474: # else /* not RE_ENABLE_I18N */
        !          2475: build_collating_symbol (sbcset, name)
        !          2476: # endif /* not RE_ENABLE_I18N */
        !          2477:      re_bitset_ptr_t sbcset;
        !          2478:      const unsigned char *name;
        !          2479: {
        !          2480:   size_t name_len = strlen ((const char *) name);
        !          2481:   if (BE (name_len != 1, 0))
        !          2482:     return REG_ECOLLATE;
        !          2483:   else
        !          2484:     {
        !          2485:       bitset_set (sbcset, name[0]);
        !          2486:       return REG_NOERROR;
        !          2487:     }
        !          2488: }
        !          2489: #endif /* not _LIBC */
        !          2490: 
        !          2491: /* This function parse bracket expression like "[abc]", "[a-c]",
        !          2492:    "[[.a-a.]]" etc.  */
        !          2493: 
        !          2494: static bin_tree_t *
        !          2495: parse_bracket_exp (regexp, dfa, token, syntax, err)
        !          2496:      re_string_t *regexp;
        !          2497:      re_dfa_t *dfa;
        !          2498:      re_token_t *token;
        !          2499:      reg_syntax_t syntax;
        !          2500:      reg_errcode_t *err;
        !          2501: {
        !          2502: #ifdef _LIBC
        !          2503:   const unsigned char *collseqmb;
        !          2504:   const char *collseqwc;
        !          2505:   uint32_t nrules;
        !          2506:   int32_t table_size;
        !          2507:   const int32_t *symb_table;
        !          2508:   const unsigned char *extra;
        !          2509: 
        !          2510:   /* Local function for parse_bracket_exp used in _LIBC environement.
        !          2511:      Seek the collating symbol entry correspondings to NAME.
        !          2512:      Return the index of the symbol in the SYMB_TABLE.  */
        !          2513: 
        !          2514:   static inline int32_t
        !          2515:   seek_collating_symbol_entry (name, name_len)
        !          2516:         const unsigned char *name;
        !          2517:         size_t name_len;
        !          2518:     {
        !          2519:       int32_t hash = elem_hash ((const char *) name, name_len);
        !          2520:       int32_t elem = hash % table_size;
        !          2521:       int32_t second = hash % (table_size - 2);
        !          2522:       while (symb_table[2 * elem] != 0)
        !          2523:        {
        !          2524:          /* First compare the hashing value.  */
        !          2525:          if (symb_table[2 * elem] == hash
        !          2526:              /* Compare the length of the name.  */
        !          2527:              && name_len == extra[symb_table[2 * elem + 1]]
        !          2528:              /* Compare the name.  */
        !          2529:              && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
        !          2530:                         name_len) == 0)
        !          2531:            {
        !          2532:              /* Yep, this is the entry.  */
        !          2533:              break;
        !          2534:            }
        !          2535: 
        !          2536:          /* Next entry.  */
        !          2537:          elem += second;
        !          2538:        }
        !          2539:       return elem;
        !          2540:     }
        !          2541: 
        !          2542:   /* Local function for parse_bracket_exp used in _LIBC environement.
        !          2543:      Look up the collation sequence value of BR_ELEM.
        !          2544:      Return the value if succeeded, UINT_MAX otherwise.  */
        !          2545: 
        !          2546:   static inline unsigned int
        !          2547:   lookup_collation_sequence_value (br_elem)
        !          2548:         bracket_elem_t *br_elem;
        !          2549:     {
        !          2550:       if (br_elem->type == SB_CHAR)
        !          2551:        {
        !          2552:          /*
        !          2553:          if (MB_CUR_MAX == 1)
        !          2554:          */
        !          2555:          if (nrules == 0)
        !          2556:            return collseqmb[br_elem->opr.ch];
        !          2557:          else
        !          2558:            {
        !          2559:              wint_t wc = __btowc (br_elem->opr.ch);
        !          2560:              return collseq_table_lookup (collseqwc, wc);
        !          2561:            }
        !          2562:        }
        !          2563:       else if (br_elem->type == MB_CHAR)
        !          2564:        {
        !          2565:          return collseq_table_lookup (collseqwc, br_elem->opr.wch);
        !          2566:        }
        !          2567:       else if (br_elem->type == COLL_SYM)
        !          2568:        {
        !          2569:          size_t sym_name_len = strlen ((char *) br_elem->opr.name);
        !          2570:          if (nrules != 0)
        !          2571:            {
        !          2572:              int32_t elem, idx;
        !          2573:              elem = seek_collating_symbol_entry (br_elem->opr.name,
        !          2574:                                                  sym_name_len);
        !          2575:              if (symb_table[2 * elem] != 0)
        !          2576:                {
        !          2577:                  /* We found the entry.  */
        !          2578:                  idx = symb_table[2 * elem + 1];
        !          2579:                  /* Skip the name of collating element name.  */
        !          2580:                  idx += 1 + extra[idx];
        !          2581:                  /* Skip the byte sequence of the collating element.  */
        !          2582:                  idx += 1 + extra[idx];
        !          2583:                  /* Adjust for the alignment.  */
        !          2584:                  idx = (idx + 3) & ~3;
        !          2585:                  /* Skip the multibyte collation sequence value.  */
        !          2586:                  idx += sizeof (unsigned int);
        !          2587:                  /* Skip the wide char sequence of the collating element.  */
        !          2588:                  idx += sizeof (unsigned int) *
        !          2589:                    (1 + *(unsigned int *) (extra + idx));
        !          2590:                  /* Return the collation sequence value.  */
        !          2591:                  return *(unsigned int *) (extra + idx);
        !          2592:                }
        !          2593:              else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
        !          2594:                {
        !          2595:                  /* No valid character.  Match it as a single byte
        !          2596:                     character.  */
        !          2597:                  return collseqmb[br_elem->opr.name[0]];
        !          2598:                }
        !          2599:            }
        !          2600:          else if (sym_name_len == 1)
        !          2601:            return collseqmb[br_elem->opr.name[0]];
        !          2602:        }
        !          2603:       return UINT_MAX;
        !          2604:     }
        !          2605: 
        !          2606:   /* Local function for parse_bracket_exp used in _LIBC environement.
        !          2607:      Build the range expression which starts from START_ELEM, and ends
        !          2608:      at END_ELEM.  The result are written to MBCSET and SBCSET.
        !          2609:      RANGE_ALLOC is the allocated size of mbcset->range_starts, and
        !          2610:      mbcset->range_ends, is a pointer argument sinse we may
        !          2611:      update it.  */
        !          2612: 
        !          2613:   static inline reg_errcode_t
        !          2614: # ifdef RE_ENABLE_I18N
        !          2615:   build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
        !          2616:         re_charset_t *mbcset;
        !          2617:         int *range_alloc;
        !          2618: # else /* not RE_ENABLE_I18N */
        !          2619:   build_range_exp (sbcset, start_elem, end_elem)
        !          2620: # endif /* not RE_ENABLE_I18N */
        !          2621:         re_bitset_ptr_t sbcset;
        !          2622:         bracket_elem_t *start_elem, *end_elem;
        !          2623:     {
        !          2624:       unsigned int ch;
        !          2625:       uint32_t start_collseq;
        !          2626:       uint32_t end_collseq;
        !          2627: 
        !          2628: # ifdef RE_ENABLE_I18N
        !          2629:       /* Check the space of the arrays.  */
        !          2630:       if (*range_alloc == mbcset->nranges)
        !          2631:        {
        !          2632:          /* There are not enough space, need realloc.  */
        !          2633:          uint32_t *new_array_start;
        !          2634:          uint32_t *new_array_end;
        !          2635:          int new_nranges;
        !          2636: 
        !          2637:          /* +1 in case of mbcset->nranges is 0.  */
        !          2638:          new_nranges = 2 * mbcset->nranges + 1;
        !          2639:          /* Use realloc since mbcset->range_starts and mbcset->range_ends
        !          2640:             are NULL if *range_alloc == 0.  */
        !          2641:          new_array_start = re_realloc (mbcset->range_starts, uint32_t,
        !          2642:                                        new_nranges);
        !          2643:          new_array_end = re_realloc (mbcset->range_ends, uint32_t,
        !          2644:                                      new_nranges);
        !          2645: 
        !          2646:          if (BE (new_array_start == NULL || new_array_end == NULL, 0))
        !          2647:            return REG_ESPACE;
        !          2648: 
        !          2649:          mbcset->range_starts = new_array_start;
        !          2650:          mbcset->range_ends = new_array_end;
        !          2651:          *range_alloc = new_nranges;
        !          2652:        }
        !          2653: # endif /* RE_ENABLE_I18N */
        !          2654: 
        !          2655:       /* Equivalence Classes and Character Classes can't be a range
        !          2656:         start/end.  */
        !          2657:       if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
        !          2658:              || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
        !          2659:              0))
        !          2660:        return REG_ERANGE;
        !          2661: 
        !          2662:       start_collseq = lookup_collation_sequence_value (start_elem);
        !          2663:       end_collseq = lookup_collation_sequence_value (end_elem);
        !          2664:       /* Check start/end collation sequence values.  */
        !          2665:       if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
        !          2666:        return REG_ECOLLATE;
        !          2667:       if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
        !          2668:        return REG_ERANGE;
        !          2669: 
        !          2670: # ifdef RE_ENABLE_I18N
        !          2671:       /* Got valid collation sequence values, add them as a new entry.  */
        !          2672:       mbcset->range_starts[mbcset->nranges] = start_collseq;
        !          2673:       mbcset->range_ends[mbcset->nranges++] = end_collseq;
        !          2674: # endif /* RE_ENABLE_I18N */
        !          2675: 
        !          2676:       /* Build the table for single byte characters.  */
        !          2677:       for (ch = 0; ch <= SBC_MAX; ch++)
        !          2678:        {
        !          2679:          uint32_t ch_collseq;
        !          2680:          /*
        !          2681:          if (MB_CUR_MAX == 1)
        !          2682:          */
        !          2683:          if (nrules == 0)
        !          2684:            ch_collseq = collseqmb[ch];
        !          2685:          else
        !          2686:            ch_collseq = collseq_table_lookup (collseqwc, __btowc (ch));
        !          2687:          if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
        !          2688:            bitset_set (sbcset, ch);
        !          2689:        }
        !          2690:       return REG_NOERROR;
        !          2691:     }
        !          2692: 
        !          2693:   /* Local function for parse_bracket_exp used in _LIBC environement.
        !          2694:      Build the collating element which is represented by NAME.
        !          2695:      The result are written to MBCSET and SBCSET.
        !          2696:      COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
        !          2697:      pointer argument sinse we may update it.  */
        !          2698: 
        !          2699:   static inline reg_errcode_t
        !          2700: # ifdef RE_ENABLE_I18N
        !          2701:   build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
        !          2702:         re_charset_t *mbcset;
        !          2703:         int *coll_sym_alloc;
        !          2704: # else /* not RE_ENABLE_I18N */
        !          2705:   build_collating_symbol (sbcset, name)
        !          2706: # endif /* not RE_ENABLE_I18N */
        !          2707:         re_bitset_ptr_t sbcset;
        !          2708:         const unsigned char *name;
        !          2709:     {
        !          2710:       int32_t elem, idx;
        !          2711:       size_t name_len = strlen ((const char *) name);
        !          2712:       if (nrules != 0)
        !          2713:        {
        !          2714:          elem = seek_collating_symbol_entry (name, name_len);
        !          2715:          if (symb_table[2 * elem] != 0)
        !          2716:            {
        !          2717:              /* We found the entry.  */
        !          2718:              idx = symb_table[2 * elem + 1];
        !          2719:              /* Skip the name of collating element name.  */
        !          2720:              idx += 1 + extra[idx];
        !          2721:            }
        !          2722:          else if (symb_table[2 * elem] == 0 && name_len == 1)
        !          2723:            {
        !          2724:              /* No valid character, treat it as a normal
        !          2725:                 character.  */
        !          2726:              bitset_set (sbcset, name[0]);
        !          2727:              return REG_NOERROR;
        !          2728:            }
        !          2729:          else
        !          2730:            return REG_ECOLLATE;
        !          2731: 
        !          2732: # ifdef RE_ENABLE_I18N
        !          2733:          /* Got valid collation sequence, add it as a new entry.  */
        !          2734:          /* Check the space of the arrays.  */
        !          2735:          if (*coll_sym_alloc == mbcset->ncoll_syms)
        !          2736:            {
        !          2737:              /* Not enough, realloc it.  */
        !          2738:              /* +1 in case of mbcset->ncoll_syms is 0.  */
        !          2739:              *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
        !          2740:              /* Use realloc since mbcset->coll_syms is NULL
        !          2741:                 if *alloc == 0.  */
        !          2742:              mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
        !          2743:                                              *coll_sym_alloc);
        !          2744:              if (BE (mbcset->coll_syms == NULL, 0))
        !          2745:                return REG_ESPACE;
        !          2746:            }
        !          2747:          mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
        !          2748: # endif /* RE_ENABLE_I18N */
        !          2749:          return REG_NOERROR;
        !          2750:        }
        !          2751:       else
        !          2752:        {
        !          2753:          if (BE (name_len != 1, 0))
        !          2754:            return REG_ECOLLATE;
        !          2755:          else
        !          2756:            {
        !          2757:              bitset_set (sbcset, name[0]);
        !          2758:              return REG_NOERROR;
        !          2759:            }
        !          2760:        }
        !          2761:     }
        !          2762: #endif
        !          2763: 
        !          2764:   re_token_t br_token;
        !          2765:   re_bitset_ptr_t sbcset;
        !          2766: #ifdef RE_ENABLE_I18N
        !          2767:   re_charset_t *mbcset;
        !          2768:   int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
        !          2769:   int equiv_class_alloc = 0, char_class_alloc = 0;
        !          2770: #else /* not RE_ENABLE_I18N */
        !          2771:   int non_match = 0;
        !          2772: #endif /* not RE_ENABLE_I18N */
        !          2773:   bin_tree_t *work_tree;
        !          2774:   int token_len, new_idx;
        !          2775: #ifdef _LIBC
        !          2776:   collseqmb = (const unsigned char *)
        !          2777:     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
        !          2778:   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
        !          2779:   if (nrules)
        !          2780:     {
        !          2781:       /*
        !          2782:       if (MB_CUR_MAX > 1)
        !          2783:       */
        !          2784:        collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
        !          2785:       table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
        !          2786:       symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
        !          2787:                                                  _NL_COLLATE_SYMB_TABLEMB);
        !          2788:       extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
        !          2789:                                                   _NL_COLLATE_SYMB_EXTRAMB);
        !          2790:     }
        !          2791: #endif
        !          2792:   sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
        !          2793: #ifdef RE_ENABLE_I18N
        !          2794:   mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
        !          2795: #endif /* RE_ENABLE_I18N */
        !          2796: #ifdef RE_ENABLE_I18N
        !          2797:   if (BE (sbcset == NULL || mbcset == NULL, 0))
        !          2798: #else
        !          2799:   if (BE (sbcset == NULL, 0))
        !          2800: #endif /* RE_ENABLE_I18N */
        !          2801:     {
        !          2802:       *err = REG_ESPACE;
        !          2803:       return NULL;
        !          2804:     }
        !          2805: 
        !          2806:   token_len = peek_token_bracket (token, regexp, syntax);
        !          2807:   if (BE (token->type == END_OF_RE, 0))
        !          2808:     {
        !          2809:       *err = REG_BADPAT;
        !          2810:       goto parse_bracket_exp_free_return;
        !          2811:     }
        !          2812:   if (token->type == OP_NON_MATCH_LIST)
        !          2813:     {
        !          2814: #ifdef RE_ENABLE_I18N
        !          2815:       int i;
        !          2816:       mbcset->non_match = 1;
        !          2817: #else /* not RE_ENABLE_I18N */
        !          2818:       non_match = 1;
        !          2819: #endif /* not RE_ENABLE_I18N */
        !          2820:       if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
        !          2821:        bitset_set (sbcset, '\0');
        !          2822:       re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
        !          2823:       token_len = peek_token_bracket (token, regexp, syntax);
        !          2824:       if (BE (token->type == END_OF_RE, 0))
        !          2825:        {
        !          2826:          *err = REG_BADPAT;
        !          2827:          goto parse_bracket_exp_free_return;
        !          2828:        }
        !          2829: #ifdef RE_ENABLE_I18N
        !          2830:       if (MB_CUR_MAX > 1)
        !          2831:        for (i = 0; i < SBC_MAX; ++i)
        !          2832:          if (__btowc (i) == WEOF)
        !          2833:            bitset_set (sbcset, i);
        !          2834: #endif /* RE_ENABLE_I18N */
        !          2835:     }
        !          2836: 
        !          2837:   /* We treat the first ']' as a normal character.  */
        !          2838:   if (token->type == OP_CLOSE_BRACKET)
        !          2839:     token->type = CHARACTER;
        !          2840: 
        !          2841:   while (1)
        !          2842:     {
        !          2843:       bracket_elem_t start_elem, end_elem;
        !          2844:       unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
        !          2845:       unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
        !          2846:       reg_errcode_t ret;
        !          2847:       int token_len2 = 0, is_range_exp = 0;
        !          2848:       re_token_t token2;
        !          2849: 
        !          2850:       start_elem.opr.name = start_name_buf;
        !          2851:       ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
        !          2852:                                   syntax);
        !          2853:       if (BE (ret != REG_NOERROR, 0))
        !          2854:        {
        !          2855:          *err = ret;
        !          2856:          goto parse_bracket_exp_free_return;
        !          2857:        }
        !          2858: 
        !          2859:       token_len = peek_token_bracket (token, regexp, syntax);
        !          2860:       if (BE (token->type == END_OF_RE, 0))
        !          2861:        {
        !          2862:          *err = REG_BADPAT;
        !          2863:          goto parse_bracket_exp_free_return;
        !          2864:        }
        !          2865:       if (token->type == OP_CHARSET_RANGE)
        !          2866:        {
        !          2867:          re_string_skip_bytes (regexp, token_len); /* Skip '-'.  */
        !          2868:          token_len2 = peek_token_bracket (&token2, regexp, syntax);
        !          2869:          if (BE (token->type == END_OF_RE, 0))
        !          2870:            {
        !          2871:              *err = REG_BADPAT;
        !          2872:              goto parse_bracket_exp_free_return;
        !          2873:            }
        !          2874:          if (token2.type == OP_CLOSE_BRACKET)
        !          2875:            {
        !          2876:              /* We treat the last '-' as a normal character.  */
        !          2877:              re_string_skip_bytes (regexp, -token_len);
        !          2878:              token->type = CHARACTER;
        !          2879:            }
        !          2880:          else
        !          2881:            is_range_exp = 1;
        !          2882:        }
        !          2883: 
        !          2884:       if (is_range_exp == 1)
        !          2885:        {
        !          2886:          end_elem.opr.name = end_name_buf;
        !          2887:          ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
        !          2888:                                       dfa, syntax);
        !          2889:          if (BE (ret != REG_NOERROR, 0))
        !          2890:            {
        !          2891:              *err = ret;
        !          2892:              goto parse_bracket_exp_free_return;
        !          2893:            }
        !          2894: 
        !          2895:          token_len = peek_token_bracket (token, regexp, syntax);
        !          2896:          if (BE (token->type == END_OF_RE, 0))
        !          2897:            {
        !          2898:              *err = REG_BADPAT;
        !          2899:              goto parse_bracket_exp_free_return;
        !          2900:            }
        !          2901:          *err = build_range_exp (sbcset,
        !          2902: #ifdef RE_ENABLE_I18N
        !          2903:                                  mbcset, &range_alloc,
        !          2904: #endif /* RE_ENABLE_I18N */
        !          2905:                                  &start_elem, &end_elem);
        !          2906:          if (BE (*err != REG_NOERROR, 0))
        !          2907:            goto parse_bracket_exp_free_return;
        !          2908:        }
        !          2909:       else
        !          2910:        {
        !          2911:          switch (start_elem.type)
        !          2912:            {
        !          2913:            case SB_CHAR:
        !          2914:              bitset_set (sbcset, start_elem.opr.ch);
        !          2915:              break;
        !          2916: #ifdef RE_ENABLE_I18N
        !          2917:            case MB_CHAR:
        !          2918:              /* Check whether the array has enough space.  */
        !          2919:              if (mbchar_alloc == mbcset->nmbchars)
        !          2920:                {
        !          2921:                  /* Not enough, realloc it.  */
        !          2922:                  /* +1 in case of mbcset->nmbchars is 0.  */
        !          2923:                  mbchar_alloc = 2 * mbcset->nmbchars + 1;
        !          2924:                  /* Use realloc since array is NULL if *alloc == 0.  */
        !          2925:                  mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
        !          2926:                                                mbchar_alloc);
        !          2927:                  if (BE (mbcset->mbchars == NULL, 0))
        !          2928:                    goto parse_bracket_exp_espace;
        !          2929:                }
        !          2930:              mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
        !          2931:              break;
        !          2932: #endif /* RE_ENABLE_I18N */
        !          2933:            case EQUIV_CLASS:
        !          2934:              *err = build_equiv_class (sbcset,
        !          2935: #ifdef RE_ENABLE_I18N
        !          2936:                                        mbcset, &equiv_class_alloc,
        !          2937: #endif /* RE_ENABLE_I18N */
        !          2938:                                        start_elem.opr.name);
        !          2939:              if (BE (*err != REG_NOERROR, 0))
        !          2940:                goto parse_bracket_exp_free_return;
        !          2941:              break;
        !          2942:            case COLL_SYM:
        !          2943:              *err = build_collating_symbol (sbcset,
        !          2944: #ifdef RE_ENABLE_I18N
        !          2945:                                             mbcset, &coll_sym_alloc,
        !          2946: #endif /* RE_ENABLE_I18N */
        !          2947:                                             start_elem.opr.name);
        !          2948:              if (BE (*err != REG_NOERROR, 0))
        !          2949:                goto parse_bracket_exp_free_return;
        !          2950:              break;
        !          2951:            case CHAR_CLASS:
        !          2952:              *err = build_charclass (sbcset,
        !          2953: #ifdef RE_ENABLE_I18N
        !          2954:                                      mbcset, &char_class_alloc,
        !          2955: #endif /* RE_ENABLE_I18N */
        !          2956:                                      start_elem.opr.name, syntax);
        !          2957:              if (BE (*err != REG_NOERROR, 0))
        !          2958:               goto parse_bracket_exp_free_return;
        !          2959:              break;
        !          2960:            default:
        !          2961:              assert (0);
        !          2962:              break;
        !          2963:            }
        !          2964:        }
        !          2965:       if (token->type == OP_CLOSE_BRACKET)
        !          2966:        break;
        !          2967:     }
        !          2968: 
        !          2969:   re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
        !          2970: 
        !          2971:   /* If it is non-matching list.  */
        !          2972: #ifdef RE_ENABLE_I18N
        !          2973:   if (mbcset->non_match)
        !          2974: #else /* not RE_ENABLE_I18N */
        !          2975:   if (non_match)
        !          2976: #endif /* not RE_ENABLE_I18N */
        !          2977:     bitset_not (sbcset);
        !          2978: 
        !          2979:   /* Build a tree for simple bracket.  */
        !          2980:   br_token.type = SIMPLE_BRACKET;
        !          2981:   br_token.opr.sbcset = sbcset;
        !          2982:   new_idx = re_dfa_add_node (dfa, br_token, 0);
        !          2983:   work_tree = create_tree (NULL, NULL, 0, new_idx);
        !          2984:   if (BE (new_idx == -1 || work_tree == NULL, 0))
        !          2985:     goto parse_bracket_exp_espace;
        !          2986: 
        !          2987: #ifdef RE_ENABLE_I18N
        !          2988:   if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
        !          2989:       || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes
        !          2990:                                                || mbcset->non_match)))
        !          2991:     {
        !          2992:       re_token_t alt_token;
        !          2993:       bin_tree_t *mbc_tree;
        !          2994:       /* Build a tree for complex bracket.  */
        !          2995:       br_token.type = COMPLEX_BRACKET;
        !          2996:       br_token.opr.mbcset = mbcset;
        !          2997:       dfa->has_mb_node = 1;
        !          2998:       new_idx = re_dfa_add_node (dfa, br_token, 0);
        !          2999:       mbc_tree = create_tree (NULL, NULL, 0, new_idx);
        !          3000:       if (BE (new_idx == -1 || mbc_tree == NULL, 0))
        !          3001:        goto parse_bracket_exp_espace;
        !          3002:       /* Then join them by ALT node.  */
        !          3003:       dfa->has_plural_match = 1;
        !          3004:       alt_token.type = OP_ALT;
        !          3005:       new_idx = re_dfa_add_node (dfa, alt_token, 0);
        !          3006:       work_tree = create_tree (work_tree, mbc_tree, 0, new_idx);
        !          3007:       if (BE (new_idx != -1 && mbc_tree != NULL, 1))
        !          3008:        return work_tree;
        !          3009:     }
        !          3010:   else
        !          3011:     {
        !          3012:       free_charset (mbcset);
        !          3013:       return work_tree;
        !          3014:     }
        !          3015: #else /* not RE_ENABLE_I18N */
        !          3016:   return work_tree;
        !          3017: #endif /* not RE_ENABLE_I18N */
        !          3018: 
        !          3019:  parse_bracket_exp_espace:
        !          3020:   *err = REG_ESPACE;
        !          3021:  parse_bracket_exp_free_return:
        !          3022:   re_free (sbcset);
        !          3023: #ifdef RE_ENABLE_I18N
        !          3024:   free_charset (mbcset);
        !          3025: #endif /* RE_ENABLE_I18N */
        !          3026:   return NULL;
        !          3027: }
        !          3028: 
        !          3029: /* Parse an element in the bracket expression.  */
        !          3030: 
        !          3031: static reg_errcode_t
        !          3032: parse_bracket_element (elem, regexp, token, token_len, dfa, syntax)
        !          3033:      bracket_elem_t *elem;
        !          3034:      re_string_t *regexp;
        !          3035:      re_token_t *token;
        !          3036:      int token_len;
        !          3037:      re_dfa_t *dfa;
        !          3038:      reg_syntax_t syntax;
        !          3039: {
        !          3040: #ifdef RE_ENABLE_I18N
        !          3041:   int cur_char_size;
        !          3042:   cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
        !          3043:   if (cur_char_size > 1)
        !          3044:     {
        !          3045:       elem->type = MB_CHAR;
        !          3046:       elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
        !          3047:       re_string_skip_bytes (regexp, cur_char_size);
        !          3048:       return REG_NOERROR;
        !          3049:     }
        !          3050: #endif /* RE_ENABLE_I18N */
        !          3051:   re_string_skip_bytes (regexp, token_len); /* Skip a token.  */
        !          3052:   if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
        !          3053:       || token->type == OP_OPEN_EQUIV_CLASS)
        !          3054:     return parse_bracket_symbol (elem, regexp, token);
        !          3055:   elem->type = SB_CHAR;
        !          3056:   elem->opr.ch = token->opr.c;
        !          3057:   return REG_NOERROR;
        !          3058: }
        !          3059: 
        !          3060: /* Parse a bracket symbol in the bracket expression.  Bracket symbols are
        !          3061:    such as [:<character_class>:], [.<collating_element>.], and
        !          3062:    [=<equivalent_class>=].  */
        !          3063: 
        !          3064: static reg_errcode_t
        !          3065: parse_bracket_symbol (elem, regexp, token)
        !          3066:      bracket_elem_t *elem;
        !          3067:      re_string_t *regexp;
        !          3068:      re_token_t *token;
        !          3069: {
        !          3070:   unsigned char ch, delim = token->opr.c;
        !          3071:   int i = 0;
        !          3072:   for (;; ++i)
        !          3073:     {
        !          3074:       if (re_string_eoi(regexp) || i >= BRACKET_NAME_BUF_SIZE)
        !          3075:        return REG_EBRACK;
        !          3076:       if (token->type == OP_OPEN_CHAR_CLASS)
        !          3077:        ch = re_string_fetch_byte_case (regexp);
        !          3078:       else
        !          3079:        ch = re_string_fetch_byte (regexp);
        !          3080:       if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
        !          3081:        break;
        !          3082:       elem->opr.name[i] = ch;
        !          3083:     }
        !          3084:   re_string_skip_bytes (regexp, 1);
        !          3085:   elem->opr.name[i] = '\0';
        !          3086:   switch (token->type)
        !          3087:     {
        !          3088:     case OP_OPEN_COLL_ELEM:
        !          3089:       elem->type = COLL_SYM;
        !          3090:       break;
        !          3091:     case OP_OPEN_EQUIV_CLASS:
        !          3092:       elem->type = EQUIV_CLASS;
        !          3093:       break;
        !          3094:     case OP_OPEN_CHAR_CLASS:
        !          3095:       elem->type = CHAR_CLASS;
        !          3096:       break;
        !          3097:     default:
        !          3098:       break;
        !          3099:     }
        !          3100:   return REG_NOERROR;
        !          3101: }
        !          3102: 
        !          3103:   /* Helper function for parse_bracket_exp.
        !          3104:      Build the equivalence class which is represented by NAME.
        !          3105:      The result are written to MBCSET and SBCSET.
        !          3106:      EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
        !          3107:      is a pointer argument sinse we may update it.  */
        !          3108: 
        !          3109: static reg_errcode_t
        !          3110: #ifdef RE_ENABLE_I18N
        !          3111: build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
        !          3112:      re_charset_t *mbcset;
        !          3113:      int *equiv_class_alloc;
        !          3114: #else /* not RE_ENABLE_I18N */
        !          3115: build_equiv_class (sbcset, name)
        !          3116: #endif /* not RE_ENABLE_I18N */
        !          3117:      re_bitset_ptr_t sbcset;
        !          3118:      const unsigned char *name;
        !          3119: {
        !          3120: #if defined _LIBC && defined RE_ENABLE_I18N
        !          3121:   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
        !          3122:   if (nrules != 0)
        !          3123:     {
        !          3124:       const int32_t *table, *indirect;
        !          3125:       const unsigned char *weights, *extra, *cp;
        !          3126:       unsigned char char_buf[2];
        !          3127:       int32_t idx1, idx2;
        !          3128:       unsigned int ch;
        !          3129:       size_t len;
        !          3130:       /* This #include defines a local function!  */
        !          3131: # include <locale/weight.h>
        !          3132:       /* Calculate the index for equivalence class.  */
        !          3133:       cp = name;
        !          3134:       table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
        !          3135:       weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
        !          3136:                                               _NL_COLLATE_WEIGHTMB);
        !          3137:       extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
        !          3138:                                                   _NL_COLLATE_EXTRAMB);
        !          3139:       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
        !          3140:                                                _NL_COLLATE_INDIRECTMB);
        !          3141:       idx1 = findidx (&cp);
        !          3142:       if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
        !          3143:        /* This isn't a valid character.  */
        !          3144:        return REG_ECOLLATE;
        !          3145: 
        !          3146:       /* Build single byte matcing table for this equivalence class.  */
        !          3147:       char_buf[1] = (unsigned char) '\0';
        !          3148:       len = weights[idx1];
        !          3149:       for (ch = 0; ch < SBC_MAX; ++ch)
        !          3150:        {
        !          3151:          char_buf[0] = ch;
        !          3152:          cp = char_buf;
        !          3153:          idx2 = findidx (&cp);
        !          3154: /*
        !          3155:          idx2 = table[ch];
        !          3156: */
        !          3157:          if (idx2 == 0)
        !          3158:            /* This isn't a valid character.  */
        !          3159:            continue;
        !          3160:          if (len == weights[idx2])
        !          3161:            {
        !          3162:              int cnt = 0;
        !          3163:              while (cnt <= len &&
        !          3164:                     weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
        !          3165:                ++cnt;
        !          3166: 
        !          3167:              if (cnt > len)
        !          3168:                bitset_set (sbcset, ch);
        !          3169:            }
        !          3170:        }
        !          3171:       /* Check whether the array has enough space.  */
        !          3172:       if (*equiv_class_alloc == mbcset->nequiv_classes)
        !          3173:        {
        !          3174:          /* Not enough, realloc it.  */
        !          3175:          /* +1 in case of mbcset->nequiv_classes is 0.  */
        !          3176:          *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
        !          3177:          /* Use realloc since the array is NULL if *alloc == 0.  */
        !          3178:          mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
        !          3179:                                              *equiv_class_alloc);
        !          3180:          if (BE (mbcset->equiv_classes == NULL, 0))
        !          3181:            return REG_ESPACE;
        !          3182:        }
        !          3183:       mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
        !          3184:     }
        !          3185:   else
        !          3186: #endif /* _LIBC && RE_ENABLE_I18N */
        !          3187:     {
        !          3188:       if (BE (strlen ((const char *) name) != 1, 0))
        !          3189:        return REG_ECOLLATE;
        !          3190:       bitset_set (sbcset, *name);
        !          3191:     }
        !          3192:   return REG_NOERROR;
        !          3193: }
        !          3194: 
        !          3195:   /* Helper function for parse_bracket_exp.
        !          3196:      Build the character class which is represented by NAME.
        !          3197:      The result are written to MBCSET and SBCSET.
        !          3198:      CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
        !          3199:      is a pointer argument sinse we may update it.  */
        !          3200: 
        !          3201: static reg_errcode_t
        !          3202: #ifdef RE_ENABLE_I18N
        !          3203: build_charclass (sbcset, mbcset, char_class_alloc, class_name, syntax)
        !          3204:      re_charset_t *mbcset;
        !          3205:      int *char_class_alloc;
        !          3206: #else /* not RE_ENABLE_I18N */
        !          3207: build_charclass (sbcset, class_name, syntax)
        !          3208: #endif /* not RE_ENABLE_I18N */
        !          3209:      re_bitset_ptr_t sbcset;
        !          3210:      const unsigned char *class_name;
        !          3211:      reg_syntax_t syntax;
        !          3212: {
        !          3213:   int i;
        !          3214:   const char *name = (const char *) class_name;
        !          3215: 
        !          3216:   /* In case of REG_ICASE "upper" and "lower" match the both of
        !          3217:      upper and lower cases.  */
        !          3218:   if ((syntax & RE_ICASE)
        !          3219:       && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
        !          3220:     name = "alpha";
        !          3221: 
        !          3222: #ifdef RE_ENABLE_I18N
        !          3223:   /* Check the space of the arrays.  */
        !          3224:   if (*char_class_alloc == mbcset->nchar_classes)
        !          3225:     {
        !          3226:       /* Not enough, realloc it.  */
        !          3227:       /* +1 in case of mbcset->nchar_classes is 0.  */
        !          3228:       *char_class_alloc = 2 * mbcset->nchar_classes + 1;
        !          3229:       /* Use realloc since array is NULL if *alloc == 0.  */
        !          3230:       mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
        !          3231:                                         *char_class_alloc);
        !          3232:       if (BE (mbcset->char_classes == NULL, 0))
        !          3233:        return REG_ESPACE;
        !          3234:     }
        !          3235:   mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
        !          3236: #endif /* RE_ENABLE_I18N */
        !          3237: 
        !          3238: #define BUILD_CHARCLASS_LOOP(ctype_func)\
        !          3239:     for (i = 0; i < SBC_MAX; ++i)      \
        !          3240:       {                                        \
        !          3241:        if (ctype_func (i))             \
        !          3242:          bitset_set (sbcset, i);       \
        !          3243:       }
        !          3244: 
        !          3245:   if (strcmp (name, "alnum") == 0)
        !          3246:     BUILD_CHARCLASS_LOOP (isalnum)
        !          3247:   else if (strcmp (name, "cntrl") == 0)
        !          3248:     BUILD_CHARCLASS_LOOP (iscntrl)
        !          3249:   else if (strcmp (name, "lower") == 0)
        !          3250:     BUILD_CHARCLASS_LOOP (islower)
        !          3251:   else if (strcmp (name, "space") == 0)
        !          3252:     BUILD_CHARCLASS_LOOP (isspace)
        !          3253:   else if (strcmp (name, "alpha") == 0)
        !          3254:     BUILD_CHARCLASS_LOOP (isalpha)
        !          3255:   else if (strcmp (name, "digit") == 0)
        !          3256:     BUILD_CHARCLASS_LOOP (isdigit)
        !          3257:   else if (strcmp (name, "print") == 0)
        !          3258:     BUILD_CHARCLASS_LOOP (isprint)
        !          3259:   else if (strcmp (name, "upper") == 0)
        !          3260:     BUILD_CHARCLASS_LOOP (isupper)
        !          3261:   else if (strcmp (name, "blank") == 0)
        !          3262:     BUILD_CHARCLASS_LOOP (isblank)
        !          3263:   else if (strcmp (name, "graph") == 0)
        !          3264:     BUILD_CHARCLASS_LOOP (isgraph)
        !          3265:   else if (strcmp (name, "punct") == 0)
        !          3266:     BUILD_CHARCLASS_LOOP (ispunct)
        !          3267:   else if (strcmp (name, "xdigit") == 0)
        !          3268:     BUILD_CHARCLASS_LOOP (isxdigit)
        !          3269:   else
        !          3270:     return REG_ECTYPE;
        !          3271: 
        !          3272:   return REG_NOERROR;
        !          3273: }
        !          3274: 
        !          3275: static bin_tree_t *
        !          3276: build_word_op (dfa, not, err)
        !          3277:      re_dfa_t *dfa;
        !          3278:      int not;
        !          3279:      reg_errcode_t *err;
        !          3280: {
        !          3281:   re_bitset_ptr_t sbcset;
        !          3282: #ifdef RE_ENABLE_I18N
        !          3283:   re_charset_t *mbcset;
        !          3284:   int alloc = 0;
        !          3285: #else /* not RE_ENABLE_I18N */
        !          3286:   int non_match = 0;
        !          3287: #endif /* not RE_ENABLE_I18N */
        !          3288:   reg_errcode_t ret;
        !          3289:   re_token_t br_token;
        !          3290:   bin_tree_t *tree;
        !          3291:   int new_idx;
        !          3292: 
        !          3293:   sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
        !          3294: #ifdef RE_ENABLE_I18N
        !          3295:   mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
        !          3296: #endif /* RE_ENABLE_I18N */
        !          3297: 
        !          3298: #ifdef RE_ENABLE_I18N
        !          3299:   if (BE (sbcset == NULL || mbcset == NULL, 0))
        !          3300: #else /* not RE_ENABLE_I18N */
        !          3301:   if (BE (sbcset == NULL, 0))
        !          3302: #endif /* not RE_ENABLE_I18N */
        !          3303:     {
        !          3304:       *err = REG_ESPACE;
        !          3305:       return NULL;
        !          3306:     }
        !          3307: 
        !          3308:   if (not)
        !          3309:     {
        !          3310: #ifdef RE_ENABLE_I18N
        !          3311:       int i;
        !          3312:       /*
        !          3313:       if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
        !          3314:        bitset_set(cset->sbcset, '\0');
        !          3315:       */
        !          3316:       mbcset->non_match = 1;
        !          3317:       if (MB_CUR_MAX > 1)
        !          3318:        for (i = 0; i < SBC_MAX; ++i)
        !          3319:          if (__btowc (i) == WEOF)
        !          3320:            bitset_set (sbcset, i);
        !          3321: #else /* not RE_ENABLE_I18N */
        !          3322:       non_match = 1;
        !          3323: #endif /* not RE_ENABLE_I18N */
        !          3324:     }
        !          3325: 
        !          3326:   /* We don't care the syntax in this case.  */
        !          3327:   ret = build_charclass (sbcset,
        !          3328: #ifdef RE_ENABLE_I18N
        !          3329:                         mbcset, &alloc,
        !          3330: #endif /* RE_ENABLE_I18N */
        !          3331:                         (const unsigned char *) "alpha", 0);
        !          3332: 
        !          3333:   if (BE (ret != REG_NOERROR, 0))
        !          3334:     {
        !          3335:       re_free (sbcset);
        !          3336: #ifdef RE_ENABLE_I18N
        !          3337:       free_charset (mbcset);
        !          3338: #endif /* RE_ENABLE_I18N */
        !          3339:       *err = ret;
        !          3340:       return NULL;
        !          3341:     }
        !          3342:   /* \w match '_' also.  */
        !          3343:   bitset_set (sbcset, '_');
        !          3344: 
        !          3345:   /* If it is non-matching list.  */
        !          3346: #ifdef RE_ENABLE_I18N
        !          3347:   if (mbcset->non_match)
        !          3348: #else /* not RE_ENABLE_I18N */
        !          3349:   if (non_match)
        !          3350: #endif /* not RE_ENABLE_I18N */
        !          3351:     bitset_not (sbcset);
        !          3352: 
        !          3353:   /* Build a tree for simple bracket.  */
        !          3354:   br_token.type = SIMPLE_BRACKET;
        !          3355:   br_token.opr.sbcset = sbcset;
        !          3356:   new_idx = re_dfa_add_node (dfa, br_token, 0);
        !          3357:   tree = create_tree (NULL, NULL, 0, new_idx);
        !          3358:   if (BE (new_idx == -1 || tree == NULL, 0))
        !          3359:     goto build_word_op_espace;
        !          3360: 
        !          3361: #ifdef RE_ENABLE_I18N
        !          3362:   if (MB_CUR_MAX > 1)
        !          3363:     {
        !          3364:       re_token_t alt_token;
        !          3365:       bin_tree_t *mbc_tree;
        !          3366:       /* Build a tree for complex bracket.  */
        !          3367:       br_token.type = COMPLEX_BRACKET;
        !          3368:       br_token.opr.mbcset = mbcset;
        !          3369:       dfa->has_mb_node = 1;
        !          3370:       new_idx = re_dfa_add_node (dfa, br_token, 0);
        !          3371:       mbc_tree = create_tree (NULL, NULL, 0, new_idx);
        !          3372:       if (BE (new_idx == -1 || mbc_tree == NULL, 0))
        !          3373:        goto build_word_op_espace;
        !          3374:       /* Then join them by ALT node.  */
        !          3375:       alt_token.type = OP_ALT;
        !          3376:       new_idx = re_dfa_add_node (dfa, alt_token, 0);
        !          3377:       tree = create_tree (tree, mbc_tree, 0, new_idx);
        !          3378:       if (BE (new_idx != -1 && mbc_tree != NULL, 1))
        !          3379:        return tree;
        !          3380:     }
        !          3381:   else
        !          3382:     {
        !          3383:       free_charset (mbcset);
        !          3384:       return tree;
        !          3385:     }
        !          3386: #else /* not RE_ENABLE_I18N */
        !          3387:   return tree;
        !          3388: #endif /* not RE_ENABLE_I18N */
        !          3389: 
        !          3390:  build_word_op_espace:
        !          3391:   re_free (sbcset);
        !          3392: #ifdef RE_ENABLE_I18N
        !          3393:   free_charset (mbcset);
        !          3394: #endif /* RE_ENABLE_I18N */
        !          3395:   *err = REG_ESPACE;
        !          3396:   return NULL;
        !          3397: }
        !          3398: 
        !          3399: /* This is intended for the expressions like "a{1,3}".
        !          3400:    Fetch a number from `input', and return the number.
        !          3401:    Return -1, if the number field is empty like "{,1}".
        !          3402:    Return -2, If an error is occured.  */
        !          3403: 
        !          3404: static int
        !          3405: fetch_number (input, token, syntax)
        !          3406:      re_string_t *input;
        !          3407:      re_token_t *token;
        !          3408:      reg_syntax_t syntax;
        !          3409: {
        !          3410:   int num = -1;
        !          3411:   unsigned char c;
        !          3412:   while (1)
        !          3413:     {
        !          3414:       *token = fetch_token (input, syntax);
        !          3415:       c = token->opr.c;
        !          3416:       if (BE (token->type == END_OF_RE, 0))
        !          3417:        return -2;
        !          3418:       if (token->type == OP_CLOSE_DUP_NUM || c == ',')
        !          3419:        break;
        !          3420:       num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
        !          3421:             ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
        !          3422:       num = (num > RE_DUP_MAX) ? -2 : num;
        !          3423:     }
        !          3424:   return num;
        !          3425: }
        !          3426: 
        !          3427: #ifdef RE_ENABLE_I18N
        !          3428: static void
        !          3429: free_charset (re_charset_t *cset)
        !          3430: {
        !          3431:   re_free (cset->mbchars);
        !          3432: # ifdef _LIBC
        !          3433:   re_free (cset->coll_syms);
        !          3434:   re_free (cset->equiv_classes);
        !          3435:   re_free (cset->range_starts);
        !          3436:   re_free (cset->range_ends);
        !          3437: # endif
        !          3438:   re_free (cset->char_classes);
        !          3439:   re_free (cset);
        !          3440: }
        !          3441: #endif /* RE_ENABLE_I18N */
        !          3442: 
        !          3443: /* Functions for binary tree operation.  */
        !          3444: 
        !          3445: /* Create a node of tree.
        !          3446:    Note: This function automatically free left and right if malloc fails.  */
        !          3447: 
        !          3448: static bin_tree_t *
        !          3449: create_tree (left, right, type, index)
        !          3450:      bin_tree_t *left;
        !          3451:      bin_tree_t *right;
        !          3452:      re_token_type_t type;
        !          3453:      int index;
        !          3454: {
        !          3455:   bin_tree_t *tree;
        !          3456:   tree = re_malloc (bin_tree_t, 1);
        !          3457:   if (BE (tree == NULL, 0))
        !          3458:     {
        !          3459:       free_bin_tree (left);
        !          3460:       free_bin_tree (right);
        !          3461:       return NULL;
        !          3462:     }
        !          3463:   tree->parent = NULL;
        !          3464:   tree->left = left;
        !          3465:   tree->right = right;
        !          3466:   tree->type = type;
        !          3467:   tree->node_idx = index;
        !          3468:   tree->first = -1;
        !          3469:   tree->next = -1;
        !          3470:   re_node_set_init_empty (&tree->eclosure);
        !          3471: 
        !          3472:   if (left != NULL)
        !          3473:     left->parent = tree;
        !          3474:   if (right != NULL)
        !          3475:     right->parent = tree;
        !          3476:   return tree;
        !          3477: }
        !          3478: 
        !          3479: /* Free the sub tree pointed by TREE.  */
        !          3480: 
        !          3481: static void
        !          3482: free_bin_tree (tree)
        !          3483:      bin_tree_t *tree;
        !          3484: {
        !          3485:   if (tree == NULL)
        !          3486:     return;
        !          3487:   /*re_node_set_free (&tree->eclosure);*/
        !          3488:   free_bin_tree (tree->left);
        !          3489:   free_bin_tree (tree->right);
        !          3490:   re_free (tree);
        !          3491: }
        !          3492: 
        !          3493: /* Duplicate the node SRC, and return new node.  */
        !          3494: 
        !          3495: static bin_tree_t *
        !          3496: duplicate_tree (src, dfa)
        !          3497:      const bin_tree_t *src;
        !          3498:      re_dfa_t *dfa;
        !          3499: {
        !          3500:   bin_tree_t *left = NULL, *right = NULL, *new_tree;
        !          3501:   int new_node_idx;
        !          3502:   /* Since node indies must be according to Post-order of the tree,
        !          3503:      we must duplicate the left at first.  */
        !          3504:   if (src->left != NULL)
        !          3505:     {
        !          3506:       left = duplicate_tree (src->left, dfa);
        !          3507:       if (left == NULL)
        !          3508:        return NULL;
        !          3509:     }
        !          3510: 
        !          3511:   /* Secondaly, duplicate the right.  */
        !          3512:   if (src->right != NULL)
        !          3513:     {
        !          3514:       right = duplicate_tree (src->right, dfa);
        !          3515:       if (right == NULL)
        !          3516:        {
        !          3517:          free_bin_tree (left);
        !          3518:          return NULL;
        !          3519:        }
        !          3520:     }
        !          3521: 
        !          3522:   /* At last, duplicate itself.  */
        !          3523:   if (src->type == NON_TYPE)
        !          3524:     {
        !          3525:       new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0);
        !          3526:       dfa->nodes[new_node_idx].duplicated = 1;
        !          3527:       if (BE (new_node_idx == -1, 0))
        !          3528:        {
        !          3529:          free_bin_tree (left);
        !          3530:          free_bin_tree (right);
        !          3531:          return NULL;
        !          3532:        }
        !          3533:     }
        !          3534:   else
        !          3535:     new_node_idx = src->type;
        !          3536: 
        !          3537:   new_tree = create_tree (left, right, src->type, new_node_idx);
        !          3538:   if (BE (new_tree == NULL, 0))
        !          3539:     {
        !          3540:       free_bin_tree (left);
        !          3541:       free_bin_tree (right);
        !          3542:     }
        !          3543:   return new_tree;
        !          3544: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>