File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / smartmontools / regex / regexec.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 16:32:16 2012 UTC (12 years, 4 months ago) by misho
Branches: smartmontools, elwix, MAIN
CVS tags: v5_43, v5_42, HEAD
smartmontools

    1: /* Extended regular expression matching and search library.
    2:    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    3:    This file is part of the GNU C Library.
    4:    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
    5: 
    6:    The GNU C Library is free software; you can redistribute it and/or
    7:    modify it under the terms of the GNU Lesser General Public
    8:    License as published by the Free Software Foundation; either
    9:    version 2.1 of the License, or (at your option) any later version.
   10: 
   11:    The GNU C Library is distributed in the hope that it will be useful,
   12:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14:    Lesser General Public License for more details.
   15: 
   16:    You should have received a copy of the GNU Lesser General Public
   17:    License along with the GNU C Library; if not, write to the Free
   18:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   19:    02111-1307 USA.  */
   20: 
   21: static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
   22: 				     re_string_t *input, int n);
   23: static void match_ctx_clean (re_match_context_t *mctx);
   24: static void match_ctx_free (re_match_context_t *cache);
   25: static void match_ctx_free_subtops (re_match_context_t *mctx);
   26: static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
   27: 					  int str_idx, int from, int to);
   28: static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
   29: static void match_ctx_clear_flag (re_match_context_t *mctx);
   30: static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
   31: 					   int str_idx);
   32: static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
   33: 						   int node, int str_idx);
   34: static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
   35: 			   re_dfastate_t **limited_sts, int last_node,
   36: 			   int last_str_idx, int check_subexp);
   37: static reg_errcode_t re_search_internal (const regex_t *preg,
   38: 					 const char *string, int length,
   39: 					 int start, int range, int stop,
   40: 					 size_t nmatch, regmatch_t pmatch[],
   41: 					 int eflags);
   42: static int re_search_2_stub (struct re_pattern_buffer *bufp,
   43: 			     const char *string1, int length1,
   44: 			     const char *string2, int length2,
   45: 			     int start, int range, struct re_registers *regs,
   46: 			     int stop, int ret_len);
   47: static int re_search_stub (struct re_pattern_buffer *bufp,
   48: 			   const char *string, int length, int start,
   49: 			   int range, int stop, struct re_registers *regs,
   50: 			   int ret_len);
   51: static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
   52: 			      int nregs, int regs_allocated);
   53: static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
   54: 							 const regex_t *preg,
   55: 							 const re_match_context_t *mctx,
   56: 							 int idx);
   57: static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
   58: 					     re_match_context_t *mctx);
   59: static int check_matching (const regex_t *preg, re_match_context_t *mctx,
   60: 			   int fl_search, int fl_longest_match);
   61: static int check_halt_node_context (const re_dfa_t *dfa, int node,
   62: 				    unsigned int context);
   63: static int check_halt_state_context (const regex_t *preg,
   64: 				     const re_dfastate_t *state,
   65: 				     const re_match_context_t *mctx, int idx);
   66: static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
   67: 			 int cur_idx, int nmatch);
   68: static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
   69: 			      const re_match_context_t *mctx,
   70: 			      int *pidx, int node, re_node_set *eps_via_nodes,
   71: 			      struct re_fail_stack_t *fs);
   72: static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
   73: 				      int str_idx, int *dests, int nregs,
   74: 				      regmatch_t *regs,
   75: 				      re_node_set *eps_via_nodes);
   76: static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
   77: 			   regmatch_t *regs, re_node_set *eps_via_nodes);
   78: static reg_errcode_t set_regs (const regex_t *preg,
   79: 			       const re_match_context_t *mctx,
   80: 			       size_t nmatch, regmatch_t *pmatch,
   81: 			       int fl_backtrack);
   82: static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
   83: 
   84: #ifdef RE_ENABLE_I18N
   85: static int sift_states_iter_mb (const regex_t *preg,
   86: 				const re_match_context_t *mctx,
   87: 				re_sift_context_t *sctx,
   88: 				int node_idx, int str_idx, int max_str_idx);
   89: #endif /* RE_ENABLE_I18N */
   90: static reg_errcode_t sift_states_backward (const regex_t *preg,
   91: 					   re_match_context_t *mctx,
   92: 					   re_sift_context_t *sctx);
   93: static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
   94: 					      re_match_context_t *mctx,
   95: 					      re_sift_context_t *sctx,
   96: 					      int str_idx,
   97: 					      re_node_set *dest_nodes);
   98: static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
   99: 					    re_node_set *dest_nodes,
  100: 					    const re_node_set *candidates);
  101: static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
  102: 					    re_node_set *dest_nodes,
  103: 					    const re_node_set *and_nodes);
  104: static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
  105: 			     re_match_context_t *mctx, int dst_node,
  106: 			     int dst_idx, int src_node, int src_idx);
  107: static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
  108: 				      int limit, re_node_set *eclosures,
  109: 				      int subexp_idx, int node, int str_idx);
  110: static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
  111: 					  re_node_set *dest_nodes,
  112: 					  const re_node_set *candidates,
  113: 					  re_node_set *limits,
  114: 					  struct re_backref_cache_entry *bkref_ents,
  115: 					  int str_idx);
  116: static reg_errcode_t sift_states_bkref (const regex_t *preg,
  117: 					re_match_context_t *mctx,
  118: 					re_sift_context_t *sctx,
  119: 					int str_idx, re_node_set *dest_nodes);
  120: static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
  121: 					      int next_state_log_idx);
  122: static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
  123: 					re_dfastate_t **src, int num);
  124: static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
  125: 				     re_match_context_t *mctx,
  126: 				     re_dfastate_t *state, int fl_search);
  127: static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
  128: 						re_match_context_t *mctx,
  129: 						re_node_set *cur_nodes,
  130: 						int str_idx);
  131: static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
  132: 					re_dfastate_t *pstate,
  133: 					int fl_search,
  134: 					re_match_context_t *mctx);
  135: #ifdef RE_ENABLE_I18N
  136: static reg_errcode_t transit_state_mb (const regex_t *preg,
  137: 				       re_dfastate_t *pstate,
  138: 				       re_match_context_t *mctx);
  139: #endif /* RE_ENABLE_I18N */
  140: static reg_errcode_t transit_state_bkref (const regex_t *preg,
  141: 					  re_node_set *nodes,
  142: 					  re_match_context_t *mctx);
  143: static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
  144: 				 int bkref_node, int bkref_str_idx);
  145: static reg_errcode_t get_subexp_sub (const regex_t *preg,
  146: 				     re_match_context_t *mctx,
  147: 				     re_sub_match_top_t *sub_top,
  148: 				     re_sub_match_last_t *sub_last,
  149: 				     int bkref_node, int bkref_str);
  150: static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes,
  151: 			     int subexp_idx, int fl_open);
  152: static reg_errcode_t check_arrival (const regex_t *preg,
  153: 				    re_match_context_t *mctx,
  154: 				    state_array_t *path, int top_node,
  155: 				    int top_str, int last_node, int last_str,
  156: 				    int fl_open);
  157: static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
  158: 						   re_dfa_t *dfa,
  159: 						   re_match_context_t *mctx,
  160: 						   int str_idx,
  161: 						   re_node_set *cur_nodes,
  162: 						   re_node_set *next_nodes);
  163: static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
  164: 					       re_node_set *cur_nodes,
  165: 					       int ex_subexp, int fl_open);
  166: static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
  167: 						   re_node_set *dst_nodes,
  168: 						   int target, int ex_subexp,
  169: 						   int fl_open);
  170: static reg_errcode_t expand_bkref_cache (const regex_t *preg,
  171: 					 re_match_context_t *mctx,
  172: 					 re_node_set *cur_nodes, int cur_str,
  173: 					 int last_str, int subexp_num,
  174: 					 int fl_open);
  175: static re_dfastate_t **build_trtable (const regex_t *dfa,
  176: 				      const re_dfastate_t *state,
  177: 				      int fl_search);
  178: #ifdef RE_ENABLE_I18N
  179: static int check_node_accept_bytes (const regex_t *preg, int node_idx,
  180: 				    const re_string_t *input, int idx);
  181: # ifdef _LIBC
  182: static unsigned int find_collation_sequence_value (const unsigned char *mbs,
  183: 						   size_t name_len);
  184: # endif /* _LIBC */
  185: #endif /* RE_ENABLE_I18N */
  186: static int group_nodes_into_DFAstates (const regex_t *dfa,
  187: 				       const re_dfastate_t *state,
  188: 				       re_node_set *states_node,
  189: 				       bitset *states_ch);
  190: static int check_node_accept (const regex_t *preg, const re_token_t *node,
  191: 			      const re_match_context_t *mctx, int idx);
  192: static reg_errcode_t extend_buffers (re_match_context_t *mctx);
  193: 
  194: /* Entry point for POSIX code.  */
  195: 
  196: /* regexec searches for a given pattern, specified by PREG, in the
  197:    string STRING.
  198: 
  199:    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
  200:    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
  201:    least NMATCH elements, and we set them to the offsets of the
  202:    corresponding matched substrings.
  203: 
  204:    EFLAGS specifies `execution flags' which affect matching: if
  205:    REG_NOTBOL is set, then ^ does not match at the beginning of the
  206:    string; if REG_NOTEOL is set, then $ does not match at the end.
  207: 
  208:    We return 0 if we find a match and REG_NOMATCH if not.  */
  209: 
  210: int
  211: regexec (preg, string, nmatch, pmatch, eflags)
  212:     const regex_t *__restrict preg;
  213:     const char *__restrict string;
  214:     size_t nmatch;
  215:     regmatch_t pmatch[];
  216:     int eflags;
  217: {
  218:   reg_errcode_t err;
  219:   int length = strlen (string);
  220:   if (preg->no_sub)
  221:     err = re_search_internal (preg, string, length, 0, length, length, 0,
  222: 			      NULL, eflags);
  223:   else
  224:     err = re_search_internal (preg, string, length, 0, length, length, nmatch,
  225: 			      pmatch, eflags);
  226:   return err != REG_NOERROR;
  227: }
  228: #ifdef _LIBC
  229: weak_alias (__regexec, regexec)
  230: #endif
  231: 
  232: /* Entry points for GNU code.  */
  233: 
  234: /* re_match, re_search, re_match_2, re_search_2
  235: 
  236:    The former two functions operate on STRING with length LENGTH,
  237:    while the later two operate on concatenation of STRING1 and STRING2
  238:    with lengths LENGTH1 and LENGTH2, respectively.
  239: 
  240:    re_match() matches the compiled pattern in BUFP against the string,
  241:    starting at index START.
  242: 
  243:    re_search() first tries matching at index START, then it tries to match
  244:    starting from index START + 1, and so on.  The last start position tried
  245:    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
  246:    way as re_match().)
  247: 
  248:    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
  249:    the first STOP characters of the concatenation of the strings should be
  250:    concerned.
  251: 
  252:    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
  253:    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
  254:    computed relative to the concatenation, not relative to the individual
  255:    strings.)
  256: 
  257:    On success, re_match* functions return the length of the match, re_search*
  258:    return the position of the start of the match.  Return value -1 means no
  259:    match was found and -2 indicates an internal error.  */
  260: 
  261: int
  262: re_match (bufp, string, length, start, regs)
  263:     struct re_pattern_buffer *bufp;
  264:     const char *string;
  265:     int length, start;
  266:     struct re_registers *regs;
  267: {
  268:   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
  269: }
  270: #ifdef _LIBC
  271: weak_alias (__re_match, re_match)
  272: #endif
  273: 
  274: int
  275: re_search (bufp, string, length, start, range, regs)
  276:     struct re_pattern_buffer *bufp;
  277:     const char *string;
  278:     int length, start, range;
  279:     struct re_registers *regs;
  280: {
  281:   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
  282: }
  283: #ifdef _LIBC
  284: weak_alias (__re_search, re_search)
  285: #endif
  286: 
  287: int
  288: re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
  289:     struct re_pattern_buffer *bufp;
  290:     const char *string1, *string2;
  291:     int length1, length2, start, stop;
  292:     struct re_registers *regs;
  293: {
  294:   return re_search_2_stub (bufp, string1, length1, string2, length2,
  295: 			   start, 0, regs, stop, 1);
  296: }
  297: #ifdef _LIBC
  298: weak_alias (__re_match_2, re_match_2)
  299: #endif
  300: 
  301: int
  302: re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
  303:     struct re_pattern_buffer *bufp;
  304:     const char *string1, *string2;
  305:     int length1, length2, start, range, stop;
  306:     struct re_registers *regs;
  307: {
  308:   return re_search_2_stub (bufp, string1, length1, string2, length2,
  309: 			   start, range, regs, stop, 0);
  310: }
  311: #ifdef _LIBC
  312: weak_alias (__re_search_2, re_search_2)
  313: #endif
  314: 
  315: static int
  316: re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
  317: 		  stop, ret_len)
  318:     struct re_pattern_buffer *bufp;
  319:     const char *string1, *string2;
  320:     int length1, length2, start, range, stop, ret_len;
  321:     struct re_registers *regs;
  322: {
  323:   const char *str;
  324:   int rval;
  325:   int len = length1 + length2;
  326:   int free_str = 0;
  327: 
  328:   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
  329:     return -2;
  330: 
  331:   /* Concatenate the strings.  */
  332:   if (length2 > 0)
  333:     if (length1 > 0)
  334:       {
  335: 	char *s = re_malloc (char, len);
  336: 
  337: 	if (BE (s == NULL, 0))
  338: 	  return -2;
  339: 	memcpy (s, string1, length1);
  340: 	memcpy (s + length1, string2, length2);
  341: 	str = s;
  342: 	free_str = 1;
  343:       }
  344:     else
  345:       str = string2;
  346:   else
  347:     str = string1;
  348: 
  349:   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
  350: 			 ret_len);
  351:   if (free_str)
  352:     re_free ((char *) str);
  353:   return rval;
  354: }
  355: 
  356: /* The parameters have the same meaning as those of re_search.
  357:    Additional parameters:
  358:    If RET_LEN is nonzero the length of the match is returned (re_match style);
  359:    otherwise the position of the match is returned.  */
  360: 
  361: static int
  362: re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
  363:     struct re_pattern_buffer *bufp;
  364:     const char *string;
  365:     int length, start, range, stop, ret_len;
  366:     struct re_registers *regs;
  367: {
  368:   reg_errcode_t result;
  369:   regmatch_t *pmatch;
  370:   int nregs, rval;
  371:   int eflags = 0;
  372: 
  373:   /* Check for out-of-range.  */
  374:   if (BE (start < 0 || start > length, 0))
  375:     return -1;
  376:   if (BE (start + range > length, 0))
  377:     range = length - start;
  378:   else if (BE (start + range < 0, 0))
  379:     range = -start;
  380: 
  381:   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
  382:   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
  383: 
  384:   /* Compile fastmap if we haven't yet.  */
  385:   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
  386:     re_compile_fastmap (bufp);
  387: 
  388:   if (BE (bufp->no_sub, 0))
  389:     regs = NULL;
  390: 
  391:   /* We need at least 1 register.  */
  392:   if (regs == NULL)
  393:     nregs = 1;
  394:   else if (BE (bufp->regs_allocated == REGS_FIXED &&
  395: 	       regs->num_regs < bufp->re_nsub + 1, 0))
  396:     {
  397:       nregs = regs->num_regs;
  398:       if (BE (nregs < 1, 0))
  399: 	{
  400: 	  /* Nothing can be copied to regs.  */
  401: 	  regs = NULL;
  402: 	  nregs = 1;
  403: 	}
  404:     }
  405:   else
  406:     nregs = bufp->re_nsub + 1;
  407:   pmatch = re_malloc (regmatch_t, nregs);
  408:   if (BE (pmatch == NULL, 0))
  409:     return -2;
  410: 
  411:   result = re_search_internal (bufp, string, length, start, range, stop,
  412: 			       nregs, pmatch, eflags);
  413: 
  414:   rval = 0;
  415: 
  416:   /* I hope we needn't fill ther regs with -1's when no match was found.  */
  417:   if (result != REG_NOERROR)
  418:     rval = -1;
  419:   else if (regs != NULL)
  420:     {
  421:       /* If caller wants register contents data back, copy them.  */
  422:       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
  423: 					   bufp->regs_allocated);
  424:       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
  425: 	rval = -2;
  426:     }
  427: 
  428:   if (BE (rval == 0, 1))
  429:     {
  430:       if (ret_len)
  431: 	{
  432: 	  assert (pmatch[0].rm_so == start);
  433: 	  rval = pmatch[0].rm_eo - start;
  434: 	}
  435:       else
  436: 	rval = pmatch[0].rm_so;
  437:     }
  438:   re_free (pmatch);
  439:   return rval;
  440: }
  441: 
  442: static unsigned
  443: re_copy_regs (regs, pmatch, nregs, regs_allocated)
  444:     struct re_registers *regs;
  445:     regmatch_t *pmatch;
  446:     int nregs, regs_allocated;
  447: {
  448:   int rval = REGS_REALLOCATE;
  449:   int i;
  450:   int need_regs = nregs + 1;
  451:   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
  452:      uses.  */
  453: 
  454:   /* Have the register data arrays been allocated?  */
  455:   if (regs_allocated == REGS_UNALLOCATED)
  456:     { /* No.  So allocate them with malloc.  */
  457:       regs->start = re_malloc (regoff_t, need_regs);
  458:       if (BE (regs->start == NULL, 0))
  459: 	return REGS_UNALLOCATED;
  460:       regs->end = re_malloc (regoff_t, need_regs);
  461:       if (BE (regs->end == NULL, 0))
  462: 	{
  463: 	  re_free (regs->start);
  464: 	  return REGS_UNALLOCATED;
  465: 	}
  466:       regs->num_regs = need_regs;
  467:     }
  468:   else if (regs_allocated == REGS_REALLOCATE)
  469:     { /* Yes.  If we need more elements than were already
  470: 	 allocated, reallocate them.  If we need fewer, just
  471: 	 leave it alone.  */
  472:       if (need_regs > regs->num_regs)
  473: 	{
  474: 	  regs->start = re_realloc (regs->start, regoff_t, need_regs);
  475: 	  if (BE (regs->start == NULL, 0))
  476: 	    {
  477: 	      if (regs->end != NULL)
  478: 		re_free (regs->end);
  479: 	      return REGS_UNALLOCATED;
  480: 	    }
  481: 	  regs->end = re_realloc (regs->end, regoff_t, need_regs);
  482: 	  if (BE (regs->end == NULL, 0))
  483: 	    {
  484: 	      re_free (regs->start);
  485: 	      return REGS_UNALLOCATED;
  486: 	    }
  487: 	  regs->num_regs = need_regs;
  488: 	}
  489:     }
  490:   else
  491:     {
  492:       assert (regs_allocated == REGS_FIXED);
  493:       /* This function may not be called with REGS_FIXED and nregs too big.  */
  494:       assert (regs->num_regs >= nregs);
  495:       rval = REGS_FIXED;
  496:     }
  497: 
  498:   /* Copy the regs.  */
  499:   for (i = 0; i < nregs; ++i)
  500:     {
  501:       regs->start[i] = pmatch[i].rm_so;
  502:       regs->end[i] = pmatch[i].rm_eo;
  503:     }
  504:   for ( ; i < regs->num_regs; ++i)
  505:     regs->start[i] = regs->end[i] = -1;
  506: 
  507:   return rval;
  508: }
  509: 
  510: /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
  511:    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
  512:    this memory for recording register information.  STARTS and ENDS
  513:    must be allocated using the malloc library routine, and must each
  514:    be at least NUM_REGS * sizeof (regoff_t) bytes long.
  515: 
  516:    If NUM_REGS == 0, then subsequent matches should allocate their own
  517:    register data.
  518: 
  519:    Unless this function is called, the first search or match using
  520:    PATTERN_BUFFER will allocate its own register data, without
  521:    freeing the old data.  */
  522: 
  523: void
  524: re_set_registers (bufp, regs, num_regs, starts, ends)
  525:     struct re_pattern_buffer *bufp;
  526:     struct re_registers *regs;
  527:     unsigned num_regs;
  528:     regoff_t *starts, *ends;
  529: {
  530:   if (num_regs)
  531:     {
  532:       bufp->regs_allocated = REGS_REALLOCATE;
  533:       regs->num_regs = num_regs;
  534:       regs->start = starts;
  535:       regs->end = ends;
  536:     }
  537:   else
  538:     {
  539:       bufp->regs_allocated = REGS_UNALLOCATED;
  540:       regs->num_regs = 0;
  541:       regs->start = regs->end = (regoff_t *) 0;
  542:     }
  543: }
  544: #ifdef _LIBC
  545: weak_alias (__re_set_registers, re_set_registers)
  546: #endif
  547: 
  548: /* Entry points compatible with 4.2 BSD regex library.  We don't define
  549:    them unless specifically requested.  */
  550: 
  551: #if defined _REGEX_RE_COMP || defined _LIBC
  552: int
  553: # ifdef _LIBC
  554: weak_function
  555: # endif
  556: re_exec (s)
  557:      const char *s;
  558: {
  559:   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
  560: }
  561: #endif /* _REGEX_RE_COMP */
  562: 
  563: static re_node_set empty_set;
  564: 
  565: /* Internal entry point.  */
  566: 
  567: /* Searches for a compiled pattern PREG in the string STRING, whose
  568:    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
  569:    mingings with regexec.  START, and RANGE have the same meanings
  570:    with re_search.
  571:    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
  572:    otherwise return the error code.
  573:    Note: We assume front end functions already check ranges.
  574:    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
  575: 
  576: static reg_errcode_t
  577: re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
  578: 		    eflags)
  579:     const regex_t *preg;
  580:     const char *string;
  581:     int length, start, range, stop, eflags;
  582:     size_t nmatch;
  583:     regmatch_t pmatch[];
  584: {
  585:   reg_errcode_t err;
  586:   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
  587:   re_string_t input;
  588:   int left_lim, right_lim, incr;
  589:   int fl_longest_match, match_first, match_last = -1;
  590:   int fast_translate, sb;
  591:   re_match_context_t mctx;
  592:   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
  593: 		    && range && !preg->can_be_null) ? preg->fastmap : NULL);
  594: 
  595:   /* Check if the DFA haven't been compiled.  */
  596:   if (BE (preg->used == 0 || dfa->init_state == NULL
  597: 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
  598: 	  || dfa->init_state_begbuf == NULL, 0))
  599:     return REG_NOMATCH;
  600: 
  601:   re_node_set_init_empty (&empty_set);
  602:   memset (&mctx, '\0', sizeof (re_match_context_t));
  603: 
  604:   /* We must check the longest matching, if nmatch > 0.  */
  605:   fl_longest_match = (nmatch != 0 || dfa->nbackref);
  606: 
  607:   err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
  608: 			    preg->translate, preg->syntax & RE_ICASE);
  609:   if (BE (err != REG_NOERROR, 0))
  610:     goto free_return;
  611:   input.stop = stop;
  612: 
  613:   err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
  614:   if (BE (err != REG_NOERROR, 0))
  615:     goto free_return;
  616: 
  617:   /* We will log all the DFA states through which the dfa pass,
  618:      if nmatch > 1, or this dfa has "multibyte node", which is a
  619:      back-reference or a node which can accept multibyte character or
  620:      multi character collating element.  */
  621:   if (nmatch > 1 || dfa->has_mb_node)
  622:     {
  623:       mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
  624:       if (BE (mctx.state_log == NULL, 0))
  625: 	{
  626: 	  err = REG_ESPACE;
  627: 	  goto free_return;
  628: 	}
  629:     }
  630:   else
  631:     mctx.state_log = NULL;
  632: 
  633: #ifdef DEBUG
  634:   /* We assume front-end functions already check them.  */
  635:   assert (start + range >= 0 && start + range <= length);
  636: #endif
  637: 
  638:   match_first = start;
  639:   input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
  640: 		       : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
  641: 
  642:   /* Check incrementally whether of not the input string match.  */
  643:   incr = (range < 0) ? -1 : 1;
  644:   left_lim = (range < 0) ? start + range : start;
  645:   right_lim = (range < 0) ? start : start + range;
  646:   sb = MB_CUR_MAX == 1;
  647:   fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
  648: 
  649:   for (;;)
  650:     {
  651:       /* At first get the current byte from input string.  */
  652:       if (fastmap)
  653: 	{
  654: 	  if (BE (fast_translate, 1))
  655: 	    {
  656: 	      unsigned RE_TRANSLATE_TYPE t
  657: 		= (unsigned RE_TRANSLATE_TYPE) preg->translate;
  658: 	      if (BE (range >= 0, 1))
  659: 		{
  660: 		  if (BE (t != NULL, 0))
  661: 		    {
  662: 		      while (BE (match_first < right_lim, 1)
  663: 			     && !fastmap[t[(unsigned char) string[match_first]]])
  664: 			++match_first;
  665: 		    }
  666: 		  else
  667: 		    {
  668: 		      while (BE (match_first < right_lim, 1)
  669: 			     && !fastmap[(unsigned char) string[match_first]])
  670: 			++match_first;
  671: 		    }
  672: 		  if (BE (match_first == right_lim, 0))
  673: 		    {
  674: 		      int ch = match_first >= length
  675: 			       ? 0 : (unsigned char) string[match_first];
  676: 		      if (!fastmap[t ? t[ch] : ch])
  677: 			break;
  678: 		    }
  679: 		}
  680: 	      else
  681: 		{
  682: 		  while (match_first >= left_lim)
  683: 		    {
  684: 		      int ch = match_first >= length
  685: 			       ? 0 : (unsigned char) string[match_first];
  686: 		      if (fastmap[t ? t[ch] : ch])
  687: 			break;
  688: 		      --match_first;
  689: 		    }
  690: 		  if (match_first < left_lim)
  691: 		    break;
  692: 		}
  693: 	    }
  694: 	  else
  695: 	    {
  696: 	      int ch;
  697: 
  698: 	      do
  699: 		{
  700: 		  /* In this case, we can't determine easily the current byte,
  701: 		     since it might be a component byte of a multibyte
  702: 		     character.  Then we use the constructed buffer
  703: 		     instead.  */
  704: 		  /* If MATCH_FIRST is out of the valid range, reconstruct the
  705: 		     buffers.  */
  706: 		  if (input.raw_mbs_idx + input.valid_len <= match_first
  707: 		      || match_first < input.raw_mbs_idx)
  708: 		    {
  709: 		      err = re_string_reconstruct (&input, match_first, eflags,
  710: 						   preg->newline_anchor);
  711: 		      if (BE (err != REG_NOERROR, 0))
  712: 			goto free_return;
  713: 		    }
  714: 		  /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
  715: 		     Note that MATCH_FIRST must not be smaller than 0.  */
  716: 		  ch = ((match_first >= length) ? 0
  717: 		       : re_string_byte_at (&input,
  718: 					    match_first - input.raw_mbs_idx));
  719: 		  if (fastmap[ch])
  720: 		    break;
  721: 		  match_first += incr;
  722: 		}
  723: 	      while (match_first >= left_lim && match_first <= right_lim);
  724: 	      if (! fastmap[ch])
  725: 		break;
  726: 	    }
  727: 	}
  728: 
  729:       /* Reconstruct the buffers so that the matcher can assume that
  730: 	 the matching starts from the begining of the buffer.  */
  731:       err = re_string_reconstruct (&input, match_first, eflags,
  732: 				   preg->newline_anchor);
  733:       if (BE (err != REG_NOERROR, 0))
  734: 	goto free_return;
  735: #ifdef RE_ENABLE_I18N
  736:      /* Eliminate it when it is a component of a multibyte character
  737: 	 and isn't the head of a multibyte character.  */
  738:       if (sb || re_string_first_byte (&input, 0))
  739: #endif
  740: 	{
  741: 	  /* It seems to be appropriate one, then use the matcher.  */
  742: 	  /* We assume that the matching starts from 0.  */
  743: 	  mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
  744: 	  match_last = check_matching (preg, &mctx, 0, fl_longest_match);
  745: 	  if (match_last != -1)
  746: 	    {
  747: 	      if (BE (match_last == -2, 0))
  748: 		{
  749: 		  err = REG_ESPACE;
  750: 		  goto free_return;
  751: 		}
  752: 	      else
  753: 		{
  754: 		  mctx.match_last = match_last;
  755: 		  if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
  756: 		    {
  757: 		      re_dfastate_t *pstate = mctx.state_log[match_last];
  758: 		      mctx.last_node = check_halt_state_context (preg, pstate,
  759: 								 &mctx, match_last);
  760: 		    }
  761: 		  if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
  762: 		      || dfa->nbackref)
  763: 		    {
  764: 		      err = prune_impossible_nodes (preg, &mctx);
  765: 		      if (err == REG_NOERROR)
  766: 			break;
  767: 		      if (BE (err != REG_NOMATCH, 0))
  768: 			goto free_return;
  769: 		    }
  770: 		  else
  771: 		    break; /* We found a matching.  */
  772: 		}
  773: 	    }
  774: 	  match_ctx_clean (&mctx);
  775: 	}
  776:       /* Update counter.  */
  777:       match_first += incr;
  778:       if (match_first < left_lim || right_lim < match_first)
  779: 	break;
  780:     }
  781: 
  782:   /* Set pmatch[] if we need.  */
  783:   if (match_last != -1 && nmatch > 0)
  784:     {
  785:       int reg_idx;
  786: 
  787:       /* Initialize registers.  */
  788:       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
  789: 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
  790: 
  791:       /* Set the points where matching start/end.  */
  792:       pmatch[0].rm_so = 0;
  793:       pmatch[0].rm_eo = mctx.match_last;
  794: 
  795:       if (!preg->no_sub && nmatch > 1)
  796: 	{
  797: 	  err = set_regs (preg, &mctx, nmatch, pmatch,
  798: 			  dfa->has_plural_match && dfa->nbackref > 0);
  799: 	  if (BE (err != REG_NOERROR, 0))
  800: 	    goto free_return;
  801: 	}
  802: 
  803:       /* At last, add the offset to the each registers, since we slided
  804: 	 the buffers so that We can assume that the matching starts from 0.  */
  805:       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
  806: 	if (pmatch[reg_idx].rm_so != -1)
  807: 	  {
  808: 	    pmatch[reg_idx].rm_so += match_first;
  809: 	    pmatch[reg_idx].rm_eo += match_first;
  810: 	  }
  811:     }
  812:   err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
  813:  free_return:
  814:   re_free (mctx.state_log);
  815:   if (dfa->nbackref)
  816:     match_ctx_free (&mctx);
  817:   re_string_destruct (&input);
  818:   return err;
  819: }
  820: 
  821: static reg_errcode_t
  822: prune_impossible_nodes (preg, mctx)
  823:      const regex_t *preg;
  824:      re_match_context_t *mctx;
  825: {
  826:   int halt_node, match_last;
  827:   reg_errcode_t ret;
  828:   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
  829:   re_dfastate_t **sifted_states;
  830:   re_dfastate_t **lim_states = NULL;
  831:   re_sift_context_t sctx;
  832: #ifdef DEBUG
  833:   assert (mctx->state_log != NULL);
  834: #endif
  835:   match_last = mctx->match_last;
  836:   halt_node = mctx->last_node;
  837:   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
  838:   if (BE (sifted_states == NULL, 0))
  839:     {
  840:       ret = REG_ESPACE;
  841:       goto free_return;
  842:     }
  843:   if (dfa->nbackref)
  844:     {
  845:       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
  846:       if (BE (lim_states == NULL, 0))
  847: 	{
  848: 	  ret = REG_ESPACE;
  849: 	  goto free_return;
  850: 	}
  851:       while (1)
  852: 	{
  853: 	  memset (lim_states, '\0',
  854: 		  sizeof (re_dfastate_t *) * (match_last + 1));
  855: 	  match_ctx_clear_flag (mctx);
  856: 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
  857: 			 match_last, 0);
  858: 	  ret = sift_states_backward (preg, mctx, &sctx);
  859: 	  re_node_set_free (&sctx.limits);
  860: 	  if (BE (ret != REG_NOERROR, 0))
  861: 	      goto free_return;
  862: 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
  863: 	    break;
  864: 	  do
  865: 	    {
  866: 	      --match_last;
  867: 	      if (match_last < 0)
  868: 		{
  869: 		  ret = REG_NOMATCH;
  870: 		  goto free_return;
  871: 		}
  872: 	    } while (!mctx->state_log[match_last]->halt);
  873: 	  halt_node = check_halt_state_context (preg,
  874: 						mctx->state_log[match_last],
  875: 						mctx, match_last);
  876: 	}
  877:       ret = merge_state_array (dfa, sifted_states, lim_states,
  878: 			       match_last + 1);
  879:       re_free (lim_states);
  880:       lim_states = NULL;
  881:       if (BE (ret != REG_NOERROR, 0))
  882: 	goto free_return;
  883:     }
  884:   else
  885:     {
  886:       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
  887: 		     match_last, 0);
  888:       ret = sift_states_backward (preg, mctx, &sctx);
  889:       re_node_set_free (&sctx.limits);
  890:       if (BE (ret != REG_NOERROR, 0))
  891: 	goto free_return;
  892:     }
  893:   re_free (mctx->state_log);
  894:   mctx->state_log = sifted_states;
  895:   sifted_states = NULL;
  896:   mctx->last_node = halt_node;
  897:   mctx->match_last = match_last;
  898:   ret = REG_NOERROR;
  899:  free_return:
  900:   re_free (sifted_states);
  901:   re_free (lim_states);
  902:   return ret;
  903: }
  904: 
  905: /* Acquire an initial state and return it.
  906:    We must select appropriate initial state depending on the context,
  907:    since initial states may have constraints like "\<", "^", etc..  */
  908: 
  909: static inline re_dfastate_t *
  910: acquire_init_state_context (err, preg, mctx, idx)
  911:      reg_errcode_t *err;
  912:      const regex_t *preg;
  913:      const re_match_context_t *mctx;
  914:      int idx;
  915: {
  916:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
  917: 
  918:   *err = REG_NOERROR;
  919:   if (dfa->init_state->has_constraint)
  920:     {
  921:       unsigned int context;
  922:       context =  re_string_context_at (mctx->input, idx - 1, mctx->eflags,
  923: 				       preg->newline_anchor);
  924:       if (IS_WORD_CONTEXT (context))
  925: 	return dfa->init_state_word;
  926:       else if (IS_ORDINARY_CONTEXT (context))
  927: 	return dfa->init_state;
  928:       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
  929: 	return dfa->init_state_begbuf;
  930:       else if (IS_NEWLINE_CONTEXT (context))
  931: 	return dfa->init_state_nl;
  932:       else if (IS_BEGBUF_CONTEXT (context))
  933: 	{
  934: 	  /* It is relatively rare case, then calculate on demand.  */
  935: 	  return  re_acquire_state_context (err, dfa,
  936: 					    dfa->init_state->entrance_nodes,
  937: 					    context);
  938: 	}
  939:       else
  940: 	/* Must not happen?  */
  941: 	return dfa->init_state;
  942:     }
  943:   else
  944:     return dfa->init_state;
  945: }
  946: 
  947: /* Check whether the regular expression match input string INPUT or not,
  948:    and return the index where the matching end, return -1 if not match,
  949:    or return -2 in case of an error.
  950:    FL_SEARCH means we must search where the matching starts,
  951:    FL_LONGEST_MATCH means we want the POSIX longest matching.
  952:    Note that the matcher assume that the maching starts from the current
  953:    index of the buffer.  */
  954: 
  955: static int
  956: check_matching (preg, mctx, fl_search, fl_longest_match)
  957:     const regex_t *preg;
  958:     re_match_context_t *mctx;
  959:     int fl_search, fl_longest_match;
  960: {
  961:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
  962:   reg_errcode_t err;
  963:   int match = 0;
  964:   int match_last = -1;
  965:   int cur_str_idx = re_string_cur_idx (mctx->input);
  966:   re_dfastate_t *cur_state;
  967: 
  968:   cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
  969:   /* An initial state must not be NULL(invalid state).  */
  970:   if (BE (cur_state == NULL, 0))
  971:     return -2;
  972:   if (mctx->state_log != NULL)
  973:     mctx->state_log[cur_str_idx] = cur_state;
  974: 
  975:   /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
  976:      later.  E.g. Processing back references.  */
  977:   if (dfa->nbackref)
  978:     {
  979:       err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
  980:       if (BE (err != REG_NOERROR, 0))
  981: 	return err;
  982:     }
  983: 
  984:   if (cur_state->has_backref)
  985:     {
  986:       err = transit_state_bkref (preg, &cur_state->nodes, mctx);
  987:       if (BE (err != REG_NOERROR, 0))
  988: 	return err;
  989:     }
  990: 
  991:   /* If the RE accepts NULL string.  */
  992:   if (cur_state->halt)
  993:     {
  994:       if (!cur_state->has_constraint
  995: 	  || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
  996: 	{
  997: 	  if (!fl_longest_match)
  998: 	    return cur_str_idx;
  999: 	  else
 1000: 	    {
 1001: 	      match_last = cur_str_idx;
 1002: 	      match = 1;
 1003: 	    }
 1004: 	}
 1005:     }
 1006: 
 1007:   while (!re_string_eoi (mctx->input))
 1008:     {
 1009:       cur_state = transit_state (&err, preg, mctx, cur_state,
 1010: 				 fl_search && !match);
 1011:       if (cur_state == NULL) /* Reached at the invalid state or an error.  */
 1012: 	{
 1013: 	  cur_str_idx = re_string_cur_idx (mctx->input);
 1014: 	  if (BE (err != REG_NOERROR, 0))
 1015: 	    return -2;
 1016: 	  if (fl_search && !match)
 1017: 	    {
 1018: 	      /* Restart from initial state, since we are searching
 1019: 		 the point from where matching start.  */
 1020: #ifdef RE_ENABLE_I18N
 1021: 	      if (MB_CUR_MAX == 1
 1022: 		  || re_string_first_byte (mctx->input, cur_str_idx))
 1023: #endif /* RE_ENABLE_I18N */
 1024: 		cur_state = acquire_init_state_context (&err, preg, mctx,
 1025: 							cur_str_idx);
 1026: 	      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
 1027: 		return -2;
 1028: 	      if (mctx->state_log != NULL)
 1029: 		mctx->state_log[cur_str_idx] = cur_state;
 1030: 	    }
 1031: 	  else if (!fl_longest_match && match)
 1032: 	    break;
 1033: 	  else /* (fl_longest_match && match) || (!fl_search && !match)  */
 1034: 	    {
 1035: 	      if (mctx->state_log == NULL)
 1036: 		break;
 1037: 	      else
 1038: 		{
 1039: 		  int max = mctx->state_log_top;
 1040: 		  for (; cur_str_idx <= max; ++cur_str_idx)
 1041: 		    if (mctx->state_log[cur_str_idx] != NULL)
 1042: 		      break;
 1043: 		  if (cur_str_idx > max)
 1044: 		    break;
 1045: 		}
 1046: 	    }
 1047: 	}
 1048: 
 1049:       if (cur_state != NULL && cur_state->halt)
 1050: 	{
 1051: 	  /* Reached at a halt state.
 1052: 	     Check the halt state can satisfy the current context.  */
 1053: 	  if (!cur_state->has_constraint
 1054: 	      || check_halt_state_context (preg, cur_state, mctx,
 1055: 					   re_string_cur_idx (mctx->input)))
 1056: 	    {
 1057: 	      /* We found an appropriate halt state.  */
 1058: 	      match_last = re_string_cur_idx (mctx->input);
 1059: 	      match = 1;
 1060: 	      if (!fl_longest_match)
 1061: 		break;
 1062: 	    }
 1063: 	}
 1064:    }
 1065:   return match_last;
 1066: }
 1067: 
 1068: /* Check NODE match the current context.  */
 1069: 
 1070: static int check_halt_node_context (dfa, node, context)
 1071:     const re_dfa_t *dfa;
 1072:     int node;
 1073:     unsigned int context;
 1074: {
 1075:   re_token_type_t type = dfa->nodes[node].type;
 1076:   unsigned int constraint = dfa->nodes[node].constraint;
 1077:   if (type != END_OF_RE)
 1078:     return 0;
 1079:   if (!constraint)
 1080:     return 1;
 1081:   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
 1082:     return 0;
 1083:   return 1;
 1084: }
 1085: 
 1086: /* Check the halt state STATE match the current context.
 1087:    Return 0 if not match, if the node, STATE has, is a halt node and
 1088:    match the context, return the node.  */
 1089: 
 1090: static int
 1091: check_halt_state_context (preg, state, mctx, idx)
 1092:     const regex_t *preg;
 1093:     const re_dfastate_t *state;
 1094:     const re_match_context_t *mctx;
 1095:     int idx;
 1096: {
 1097:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 1098:   int i;
 1099:   unsigned int context;
 1100: #ifdef DEBUG
 1101:   assert (state->halt);
 1102: #endif
 1103:   context = re_string_context_at (mctx->input, idx, mctx->eflags,
 1104: 				  preg->newline_anchor);
 1105:   for (i = 0; i < state->nodes.nelem; ++i)
 1106:     if (check_halt_node_context (dfa, state->nodes.elems[i], context))
 1107:       return state->nodes.elems[i];
 1108:   return 0;
 1109: }
 1110: 
 1111: /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
 1112:    corresponding to the DFA).
 1113:    Return the destination node, and update EPS_VIA_NODES, return -1 in case
 1114:    of errors.  */
 1115: 
 1116: static int
 1117: proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
 1118:     const regex_t *preg;
 1119:     regmatch_t *regs;
 1120:     const re_match_context_t *mctx;
 1121:     int nregs, *pidx, node;
 1122:     re_node_set *eps_via_nodes;
 1123:     struct re_fail_stack_t *fs;
 1124: {
 1125:   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 1126:   int i, err, dest_node;
 1127:   dest_node = -1;
 1128:   if (IS_EPSILON_NODE (dfa->nodes[node].type))
 1129:     {
 1130:       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
 1131:       int ndest, dest_nodes[2];
 1132:       err = re_node_set_insert (eps_via_nodes, node);
 1133:       if (BE (err < 0, 0))
 1134: 	return -1;
 1135:       /* Pick up valid destinations.  */
 1136:       for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
 1137: 	{
 1138: 	  int candidate = dfa->edests[node].elems[i];
 1139: 	  if (!re_node_set_contains (cur_nodes, candidate))
 1140: 	    continue;
 1141: 	  dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
 1142: 	  dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
 1143: 	  ++ndest;
 1144: 	}
 1145:       if (ndest <= 1)
 1146: 	return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
 1147:       /* In order to avoid infinite loop like "(a*)*".  */
 1148:       if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
 1149: 	return dest_nodes[1];
 1150:       if (fs != NULL)
 1151: 	push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
 1152:       return dest_nodes[0];
 1153:     }
 1154:   else
 1155:     {
 1156:       int naccepted = 0;
 1157:       re_token_type_t type = dfa->nodes[node].type;
 1158: 
 1159: #ifdef RE_ENABLE_I18N
 1160:       if (ACCEPT_MB_NODE (type))
 1161: 	naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
 1162:       else
 1163: #endif /* RE_ENABLE_I18N */
 1164:       if (type == OP_BACK_REF)
 1165: 	{
 1166: 	  int subexp_idx = dfa->nodes[node].opr.idx;
 1167: 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
 1168: 	  if (fs != NULL)
 1169: 	    {
 1170: 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
 1171: 		return -1;
 1172: 	      else if (naccepted)
 1173: 		{
 1174: 		  char *buf = (char *) re_string_get_buffer (mctx->input);
 1175: 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
 1176: 			      naccepted) != 0)
 1177: 		    return -1;
 1178: 		}
 1179: 	    }
 1180: 
 1181: 	  if (naccepted == 0)
 1182: 	    {
 1183: 	      err = re_node_set_insert (eps_via_nodes, node);
 1184: 	      if (BE (err < 0, 0))
 1185: 		return -2;
 1186: 	      dest_node = dfa->edests[node].elems[0];
 1187: 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
 1188: 					dest_node))
 1189: 		return dest_node;
 1190: 	    }
 1191: 	}
 1192: 
 1193:       if (naccepted != 0
 1194: 	  || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
 1195: 	{
 1196: 	  dest_node = dfa->nexts[node];
 1197: 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
 1198: 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
 1199: 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
 1200: 					       dest_node)))
 1201: 	    return -1;
 1202: 	  re_node_set_empty (eps_via_nodes);
 1203: 	  return dest_node;
 1204: 	}
 1205:     }
 1206:   return -1;
 1207: }
 1208: 
 1209: static reg_errcode_t
 1210: push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
 1211:      struct re_fail_stack_t *fs;
 1212:      int str_idx, *dests, nregs;
 1213:      regmatch_t *regs;
 1214:      re_node_set *eps_via_nodes;
 1215: {
 1216:   reg_errcode_t err;
 1217:   int num = fs->num++;
 1218:   if (fs->num == fs->alloc)
 1219:     {
 1220:       struct re_fail_stack_ent_t *new_array;
 1221:       fs->alloc *= 2;
 1222:       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
 1223: 				       * fs->alloc));
 1224:       if (new_array == NULL)
 1225: 	return REG_ESPACE;
 1226:       fs->stack = new_array;
 1227:     }
 1228:   fs->stack[num].idx = str_idx;
 1229:   fs->stack[num].node = dests[1];
 1230:   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
 1231:   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
 1232:   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
 1233:   return err;
 1234: }
 1235: 
 1236: static int
 1237: pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
 1238:      struct re_fail_stack_t *fs;
 1239:      int *pidx, nregs;
 1240:      regmatch_t *regs;
 1241:      re_node_set *eps_via_nodes;
 1242: {
 1243:   int num = --fs->num;
 1244:   assert (num >= 0);
 1245:  *pidx = fs->stack[num].idx;
 1246:   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
 1247:   re_node_set_free (eps_via_nodes);
 1248:   re_free (fs->stack[num].regs);
 1249:   *eps_via_nodes = fs->stack[num].eps_via_nodes;
 1250:   return fs->stack[num].node;
 1251: }
 1252: 
 1253: /* Set the positions where the subexpressions are starts/ends to registers
 1254:    PMATCH.
 1255:    Note: We assume that pmatch[0] is already set, and
 1256:    pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1).  */
 1257: 
 1258: static reg_errcode_t
 1259: set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
 1260:      const regex_t *preg;
 1261:      const re_match_context_t *mctx;
 1262:      size_t nmatch;
 1263:      regmatch_t *pmatch;
 1264:      int fl_backtrack;
 1265: {
 1266:   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 1267:   int idx, cur_node, real_nmatch;
 1268:   re_node_set eps_via_nodes;
 1269:   struct re_fail_stack_t *fs;
 1270:   struct re_fail_stack_t fs_body = {0, 2, NULL};
 1271: #ifdef DEBUG
 1272:   assert (nmatch > 1);
 1273:   assert (mctx->state_log != NULL);
 1274: #endif
 1275:   if (fl_backtrack)
 1276:     {
 1277:       fs = &fs_body;
 1278:       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
 1279:     }
 1280:   else
 1281:     fs = NULL;
 1282:   cur_node = dfa->init_node;
 1283:   real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
 1284:   re_node_set_init_empty (&eps_via_nodes);
 1285:   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
 1286:     {
 1287:       update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
 1288:       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
 1289: 	{
 1290: 	  int reg_idx;
 1291: 	  if (fs)
 1292: 	    {
 1293: 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 1294: 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
 1295: 		  break;
 1296: 	      if (reg_idx == nmatch)
 1297: 		{
 1298: 		  re_node_set_free (&eps_via_nodes);
 1299: 		  return free_fail_stack_return (fs);
 1300: 		}
 1301: 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
 1302: 					 &eps_via_nodes);
 1303: 	    }
 1304: 	  else
 1305: 	    {
 1306: 	      re_node_set_free (&eps_via_nodes);
 1307: 	      return REG_NOERROR;
 1308: 	    }
 1309: 	}
 1310: 
 1311:       /* Proceed to next node.  */
 1312:       cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
 1313: 				    &eps_via_nodes, fs);
 1314: 
 1315:       if (BE (cur_node < 0, 0))
 1316: 	{
 1317: 	  if (cur_node == -2)
 1318: 	    return REG_ESPACE;
 1319: 	  if (fs)
 1320: 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
 1321: 				       &eps_via_nodes);
 1322: 	  else
 1323: 	    {
 1324: 	      re_node_set_free (&eps_via_nodes);
 1325: 	      return REG_NOMATCH;
 1326: 	    }
 1327: 	}
 1328:     }
 1329:   re_node_set_free (&eps_via_nodes);
 1330:   return free_fail_stack_return (fs);
 1331: }
 1332: 
 1333: static reg_errcode_t
 1334: free_fail_stack_return (fs)
 1335:      struct re_fail_stack_t *fs;
 1336: {
 1337:   if (fs)
 1338:     {
 1339:       int fs_idx;
 1340:       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
 1341: 	{
 1342: 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
 1343: 	  re_free (fs->stack[fs_idx].regs);
 1344: 	}
 1345:       re_free (fs->stack);
 1346:     }
 1347:   return REG_NOERROR;
 1348: }
 1349: 
 1350: static void
 1351: update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
 1352:      re_dfa_t *dfa;
 1353:      regmatch_t *pmatch;
 1354:      int cur_node, cur_idx, nmatch;
 1355: {
 1356:   int type = dfa->nodes[cur_node].type;
 1357:   int reg_num;
 1358:   if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
 1359:     return;
 1360:   reg_num = dfa->nodes[cur_node].opr.idx + 1;
 1361:   if (reg_num >= nmatch)
 1362:     return;
 1363:   if (type == OP_OPEN_SUBEXP)
 1364:     {
 1365:       /* We are at the first node of this sub expression.  */
 1366:       pmatch[reg_num].rm_so = cur_idx;
 1367:       pmatch[reg_num].rm_eo = -1;
 1368:     }
 1369:   else if (type == OP_CLOSE_SUBEXP)
 1370:     /* We are at the first node of this sub expression.  */
 1371:     pmatch[reg_num].rm_eo = cur_idx;
 1372: }
 1373: 
 1374: #define NUMBER_OF_STATE 1
 1375: 
 1376: /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
 1377:    and sift the nodes in each states according to the following rules.
 1378:    Updated state_log will be wrote to STATE_LOG.
 1379: 
 1380:    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
 1381:      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
 1382: 	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
 1383: 	the LAST_NODE, we throw away the node `a'.
 1384:      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
 1385: 	string `s' and transit to `b':
 1386: 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
 1387: 	   away the node `a'.
 1388: 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
 1389: 	    throwed away, we throw away the node `a'.
 1390:      3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
 1391: 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
 1392: 	   node `a'.
 1393: 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
 1394: 	    we throw away the node `a'.  */
 1395: 
 1396: #define STATE_NODE_CONTAINS(state,node) \
 1397:   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
 1398: 
 1399: static reg_errcode_t
 1400: sift_states_backward (preg, mctx, sctx)
 1401:      const regex_t *preg;
 1402:      re_match_context_t *mctx;
 1403:      re_sift_context_t *sctx;
 1404: {
 1405:   reg_errcode_t err;
 1406:   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 1407:   int null_cnt = 0;
 1408:   int str_idx = sctx->last_str_idx;
 1409:   re_node_set cur_dest;
 1410:   re_node_set *cur_src; /* Points the state_log[str_idx]->nodes  */
 1411: 
 1412: #ifdef DEBUG
 1413:   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
 1414: #endif
 1415:   cur_src = &mctx->state_log[str_idx]->nodes;
 1416: 
 1417:   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
 1418:      transit to the last_node and the last_node itself.  */
 1419:   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
 1420:   if (BE (err != REG_NOERROR, 0))
 1421:     return err;
 1422:   err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
 1423:   if (BE (err != REG_NOERROR, 0))
 1424:     goto free_return;
 1425: 
 1426:   /* Then check each states in the state_log.  */
 1427:   while (str_idx > 0)
 1428:     {
 1429:       int i, ret;
 1430:       /* Update counters.  */
 1431:       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
 1432:       if (null_cnt > mctx->max_mb_elem_len)
 1433: 	{
 1434: 	  memset (sctx->sifted_states, '\0',
 1435: 		  sizeof (re_dfastate_t *) * str_idx);
 1436: 	  re_node_set_free (&cur_dest);
 1437: 	  return REG_NOERROR;
 1438: 	}
 1439:       re_node_set_empty (&cur_dest);
 1440:       --str_idx;
 1441:       cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
 1442: 		 : &mctx->state_log[str_idx]->nodes);
 1443: 
 1444:       /* Then build the next sifted state.
 1445: 	 We build the next sifted state on `cur_dest', and update
 1446: 	 `sifted_states[str_idx]' with `cur_dest'.
 1447: 	 Note:
 1448: 	 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
 1449: 	 `cur_src' points the node_set of the old `state_log[str_idx]'.  */
 1450:       for (i = 0; i < cur_src->nelem; i++)
 1451: 	{
 1452: 	  int prev_node = cur_src->elems[i];
 1453: 	  int naccepted = 0;
 1454: 	  re_token_type_t type = dfa->nodes[prev_node].type;
 1455: 
 1456: 	  if (IS_EPSILON_NODE(type))
 1457: 	    continue;
 1458: #ifdef RE_ENABLE_I18N
 1459: 	  /* If the node may accept `multi byte'.  */
 1460: 	  if (ACCEPT_MB_NODE (type))
 1461: 	    naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
 1462: 					     str_idx, sctx->last_str_idx);
 1463: 
 1464: #endif /* RE_ENABLE_I18N */
 1465: 	  /* We don't check backreferences here.
 1466: 	     See update_cur_sifted_state().  */
 1467: 
 1468: 	  if (!naccepted
 1469: 	      && check_node_accept (preg, dfa->nodes + prev_node, mctx,
 1470: 				    str_idx)
 1471: 	      && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
 1472: 				      dfa->nexts[prev_node]))
 1473: 	    naccepted = 1;
 1474: 
 1475: 	  if (naccepted == 0)
 1476: 	    continue;
 1477: 
 1478: 	  if (sctx->limits.nelem)
 1479: 	    {
 1480: 	      int to_idx = str_idx + naccepted;
 1481: 	      if (check_dst_limits (dfa, &sctx->limits, mctx,
 1482: 				    dfa->nexts[prev_node], to_idx,
 1483: 				    prev_node, str_idx))
 1484: 		continue;
 1485: 	    }
 1486: 	  ret = re_node_set_insert (&cur_dest, prev_node);
 1487: 	  if (BE (ret == -1, 0))
 1488: 	    {
 1489: 	      err = REG_ESPACE;
 1490: 	      goto free_return;
 1491: 	    }
 1492: 	}
 1493: 
 1494:       /* Add all the nodes which satisfy the following conditions:
 1495: 	 - It can epsilon transit to a node in CUR_DEST.
 1496: 	 - It is in CUR_SRC.
 1497: 	 And update state_log.  */
 1498:       err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
 1499:       if (BE (err != REG_NOERROR, 0))
 1500: 	goto free_return;
 1501:     }
 1502:   err = REG_NOERROR;
 1503:  free_return:
 1504:   re_node_set_free (&cur_dest);
 1505:   return err;
 1506: }
 1507: 
 1508: /* Helper functions.  */
 1509: 
 1510: static inline reg_errcode_t
 1511: clean_state_log_if_need (mctx, next_state_log_idx)
 1512:     re_match_context_t *mctx;
 1513:     int next_state_log_idx;
 1514: {
 1515:   int top = mctx->state_log_top;
 1516: 
 1517:   if (next_state_log_idx >= mctx->input->bufs_len
 1518:       || (next_state_log_idx >= mctx->input->valid_len
 1519: 	  && mctx->input->valid_len < mctx->input->len))
 1520:     {
 1521:       reg_errcode_t err;
 1522:       err = extend_buffers (mctx);
 1523:       if (BE (err != REG_NOERROR, 0))
 1524: 	return err;
 1525:     }
 1526: 
 1527:   if (top < next_state_log_idx)
 1528:     {
 1529:       memset (mctx->state_log + top + 1, '\0',
 1530: 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
 1531:       mctx->state_log_top = next_state_log_idx;
 1532:     }
 1533:   return REG_NOERROR;
 1534: }
 1535: 
 1536: static reg_errcode_t
 1537: merge_state_array (dfa, dst, src, num)
 1538:      re_dfa_t *dfa;
 1539:      re_dfastate_t **dst;
 1540:      re_dfastate_t **src;
 1541:      int num;
 1542: {
 1543:   int st_idx;
 1544:   reg_errcode_t err;
 1545:   for (st_idx = 0; st_idx < num; ++st_idx)
 1546:     {
 1547:       if (dst[st_idx] == NULL)
 1548: 	dst[st_idx] = src[st_idx];
 1549:       else if (src[st_idx] != NULL)
 1550: 	{
 1551: 	  re_node_set merged_set;
 1552: 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
 1553: 					&src[st_idx]->nodes);
 1554: 	  if (BE (err != REG_NOERROR, 0))
 1555: 	    return err;
 1556: 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
 1557: 	  re_node_set_free (&merged_set);
 1558: 	  if (BE (err != REG_NOERROR, 0))
 1559: 	    return err;
 1560: 	}
 1561:     }
 1562:   return REG_NOERROR;
 1563: }
 1564: 
 1565: static reg_errcode_t
 1566: update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
 1567:      const regex_t *preg;
 1568:      re_match_context_t *mctx;
 1569:      re_sift_context_t *sctx;
 1570:      int str_idx;
 1571:      re_node_set *dest_nodes;
 1572: {
 1573:   reg_errcode_t err;
 1574:   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 1575:   const re_node_set *candidates;
 1576:   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
 1577: 		: &mctx->state_log[str_idx]->nodes);
 1578: 
 1579:   /* At first, add the nodes which can epsilon transit to a node in
 1580:      DEST_NODE.  */
 1581:   if (dest_nodes->nelem)
 1582:     {
 1583:       err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
 1584:       if (BE (err != REG_NOERROR, 0))
 1585: 	return err;
 1586:     }
 1587: 
 1588:   /* Then, check the limitations in the current sift_context.  */
 1589:   if (dest_nodes->nelem && sctx->limits.nelem)
 1590:     {
 1591:       err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
 1592: 				 mctx->bkref_ents, str_idx);
 1593:       if (BE (err != REG_NOERROR, 0))
 1594: 	return err;
 1595:     }
 1596: 
 1597:   /* Update state_log.  */
 1598:   sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
 1599:   if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
 1600:     return err;
 1601: 
 1602:   if ((mctx->state_log[str_idx] != NULL
 1603:        && mctx->state_log[str_idx]->has_backref))
 1604:     {
 1605:       err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
 1606:       if (BE (err != REG_NOERROR, 0))
 1607: 	return err;
 1608:     }
 1609:   return REG_NOERROR;
 1610: }
 1611: 
 1612: static reg_errcode_t
 1613: add_epsilon_src_nodes (dfa, dest_nodes, candidates)
 1614:      re_dfa_t *dfa;
 1615:      re_node_set *dest_nodes;
 1616:      const re_node_set *candidates;
 1617: {
 1618:   reg_errcode_t err;
 1619:   int src_idx;
 1620:   re_node_set src_copy;
 1621: 
 1622:   err = re_node_set_init_copy (&src_copy, dest_nodes);
 1623:   if (BE (err != REG_NOERROR, 0))
 1624:     return err;
 1625:   for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
 1626:     {
 1627:       err = re_node_set_add_intersect (dest_nodes, candidates,
 1628: 				       dfa->inveclosures
 1629: 				       + src_copy.elems[src_idx]);
 1630:       if (BE (err != REG_NOERROR, 0))
 1631: 	{
 1632: 	  re_node_set_free (&src_copy);
 1633: 	  return err;
 1634: 	}
 1635:     }
 1636:   re_node_set_free (&src_copy);
 1637:   return REG_NOERROR;
 1638: }
 1639: 
 1640: static reg_errcode_t
 1641: sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
 1642:      re_dfa_t *dfa;
 1643:      int node;
 1644:      re_node_set *dest_nodes;
 1645:      const re_node_set *candidates;
 1646: {
 1647:     int ecl_idx;
 1648:     reg_errcode_t err;
 1649:     re_node_set *inv_eclosure = dfa->inveclosures + node;
 1650:     re_node_set except_nodes;
 1651:     re_node_set_init_empty (&except_nodes);
 1652:     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
 1653:       {
 1654: 	int cur_node = inv_eclosure->elems[ecl_idx];
 1655: 	if (cur_node == node)
 1656: 	  continue;
 1657: 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
 1658: 	  {
 1659: 	    int edst1 = dfa->edests[cur_node].elems[0];
 1660: 	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
 1661: 			 ? dfa->edests[cur_node].elems[1] : -1);
 1662: 	    if ((!re_node_set_contains (inv_eclosure, edst1)
 1663: 		 && re_node_set_contains (dest_nodes, edst1))
 1664: 		|| (edst2 > 0
 1665: 		    && !re_node_set_contains (inv_eclosure, edst2)
 1666: 		    && re_node_set_contains (dest_nodes, edst2)))
 1667: 	      {
 1668: 		err = re_node_set_add_intersect (&except_nodes, candidates,
 1669: 						 dfa->inveclosures + cur_node);
 1670: 		if (BE (err != REG_NOERROR, 0))
 1671: 		  {
 1672: 		    re_node_set_free (&except_nodes);
 1673: 		    return err;
 1674: 		  }
 1675: 	      }
 1676: 	  }
 1677:       }
 1678:     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
 1679:       {
 1680: 	int cur_node = inv_eclosure->elems[ecl_idx];
 1681: 	if (!re_node_set_contains (&except_nodes, cur_node))
 1682: 	  {
 1683: 	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
 1684: 	    re_node_set_remove_at (dest_nodes, idx);
 1685: 	  }
 1686:       }
 1687:     re_node_set_free (&except_nodes);
 1688:     return REG_NOERROR;
 1689: }
 1690: 
 1691: static int
 1692: check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
 1693:      re_dfa_t *dfa;
 1694:      re_node_set *limits;
 1695:      re_match_context_t *mctx;
 1696:      int dst_node, dst_idx, src_node, src_idx;
 1697: {
 1698:   int lim_idx, src_pos, dst_pos;
 1699: 
 1700:   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
 1701:     {
 1702:       int subexp_idx;
 1703:       struct re_backref_cache_entry *ent;
 1704:       ent = mctx->bkref_ents + limits->elems[lim_idx];
 1705:       subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
 1706: 
 1707:       dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
 1708: 					   dfa->eclosures + dst_node,
 1709: 					   subexp_idx, dst_node, dst_idx);
 1710:       src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
 1711: 					   dfa->eclosures + src_node,
 1712: 					   subexp_idx, src_node, src_idx);
 1713: 
 1714:       /* In case of:
 1715: 	 <src> <dst> ( <subexp> )
 1716: 	 ( <subexp> ) <src> <dst>
 1717: 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
 1718:       if (src_pos == dst_pos)
 1719: 	continue; /* This is unrelated limitation.  */
 1720:       else
 1721: 	return 1;
 1722:     }
 1723:   return 0;
 1724: }
 1725: 
 1726: static int
 1727: check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
 1728: 			   str_idx)
 1729:      re_dfa_t *dfa;
 1730:      re_match_context_t *mctx;
 1731:      re_node_set *eclosures;
 1732:      int limit, subexp_idx, node, str_idx;
 1733: {
 1734:   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
 1735:   int pos = (str_idx < lim->subexp_from ? -1
 1736: 	     : (lim->subexp_to < str_idx ? 1 : 0));
 1737:   if (pos == 0
 1738:       && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
 1739:     {
 1740:       int node_idx;
 1741:       for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
 1742: 	{
 1743: 	  int node = eclosures->elems[node_idx];
 1744: 	  re_token_type_t type= dfa->nodes[node].type;
 1745: 	  if (type == OP_BACK_REF)
 1746: 	    {
 1747: 	      int bi = search_cur_bkref_entry (mctx, str_idx);
 1748: 	      for (; bi < mctx->nbkref_ents; ++bi)
 1749: 		{
 1750: 		  struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
 1751: 		  if (ent->str_idx > str_idx)
 1752: 		    break;
 1753: 		  if (ent->node == node && ent->subexp_from == ent->subexp_to)
 1754: 		    {
 1755: 		      int cpos, dst;
 1756: 		      dst = dfa->edests[node].elems[0];
 1757: 		      cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
 1758: 							dfa->eclosures + dst,
 1759: 							subexp_idx, dst,
 1760: 							str_idx);
 1761: 		      if ((str_idx == lim->subexp_from && cpos == -1)
 1762: 			  || (str_idx == lim->subexp_to && cpos == 0))
 1763: 			return cpos;
 1764: 		    }
 1765: 		}
 1766: 	    }
 1767: 	  if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
 1768: 	      && str_idx == lim->subexp_from)
 1769: 	    {
 1770: 	      pos = -1;
 1771: 	      break;
 1772: 	    }
 1773: 	  if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
 1774: 	      && str_idx == lim->subexp_to)
 1775: 	    break;
 1776: 	}
 1777:       if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
 1778: 	pos = 1;
 1779:     }
 1780:   return pos;
 1781: }
 1782: 
 1783: /* Check the limitations of sub expressions LIMITS, and remove the nodes
 1784:    which are against limitations from DEST_NODES. */
 1785: 
 1786: static reg_errcode_t
 1787: check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
 1788:      re_dfa_t *dfa;
 1789:      re_node_set *dest_nodes;
 1790:      const re_node_set *candidates;
 1791:      re_node_set *limits;
 1792:      struct re_backref_cache_entry *bkref_ents;
 1793:      int str_idx;
 1794: {
 1795:   reg_errcode_t err;
 1796:   int node_idx, lim_idx;
 1797: 
 1798:   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
 1799:     {
 1800:       int subexp_idx;
 1801:       struct re_backref_cache_entry *ent;
 1802:       ent = bkref_ents + limits->elems[lim_idx];
 1803: 
 1804:       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
 1805: 	continue; /* This is unrelated limitation.  */
 1806: 
 1807:       subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
 1808:       if (ent->subexp_to == str_idx)
 1809: 	{
 1810: 	  int ops_node = -1;
 1811: 	  int cls_node = -1;
 1812: 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 1813: 	    {
 1814: 	      int node = dest_nodes->elems[node_idx];
 1815: 	      re_token_type_t type= dfa->nodes[node].type;
 1816: 	      if (type == OP_OPEN_SUBEXP
 1817: 		  && subexp_idx == dfa->nodes[node].opr.idx)
 1818: 		ops_node = node;
 1819: 	      else if (type == OP_CLOSE_SUBEXP
 1820: 		       && subexp_idx == dfa->nodes[node].opr.idx)
 1821: 		cls_node = node;
 1822: 	    }
 1823: 
 1824: 	  /* Check the limitation of the open subexpression.  */
 1825: 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
 1826: 	  if (ops_node >= 0)
 1827: 	    {
 1828: 	      err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
 1829: 					  candidates);
 1830: 	      if (BE (err != REG_NOERROR, 0))
 1831: 		return err;
 1832: 	    }
 1833: 	  /* Check the limitation of the close subexpression.  */
 1834: 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 1835: 	    {
 1836: 	      int node = dest_nodes->elems[node_idx];
 1837: 	      if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
 1838: 		  && !re_node_set_contains (dfa->eclosures + node, cls_node))
 1839: 		{
 1840: 		  /* It is against this limitation.
 1841: 		     Remove it form the current sifted state.  */
 1842: 		  err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
 1843: 					      candidates);
 1844: 		  if (BE (err != REG_NOERROR, 0))
 1845: 		    return err;
 1846: 		  --node_idx;
 1847: 		}
 1848: 	    }
 1849: 	}
 1850:       else /* (ent->subexp_to != str_idx)  */
 1851: 	{
 1852: 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 1853: 	    {
 1854: 	      int node = dest_nodes->elems[node_idx];
 1855: 	      re_token_type_t type= dfa->nodes[node].type;
 1856: 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
 1857: 		{
 1858: 		  if (subexp_idx != dfa->nodes[node].opr.idx)
 1859: 		    continue;
 1860: 		  if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
 1861: 		      || (type == OP_OPEN_SUBEXP))
 1862: 		    {
 1863: 		      /* It is against this limitation.
 1864: 			 Remove it form the current sifted state.  */
 1865: 		      err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
 1866: 						  candidates);
 1867: 		      if (BE (err != REG_NOERROR, 0))
 1868: 			return err;
 1869: 		    }
 1870: 		}
 1871: 	    }
 1872: 	}
 1873:     }
 1874:   return REG_NOERROR;
 1875: }
 1876: 
 1877: static reg_errcode_t
 1878: sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
 1879:      const regex_t *preg;
 1880:      re_match_context_t *mctx;
 1881:      re_sift_context_t *sctx;
 1882:      int str_idx;
 1883:      re_node_set *dest_nodes;
 1884: {
 1885:   reg_errcode_t err;
 1886:   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
 1887:   int node_idx, node;
 1888:   re_sift_context_t local_sctx;
 1889:   const re_node_set *candidates;
 1890:   candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
 1891: 		: &mctx->state_log[str_idx]->nodes);
 1892:   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
 1893: 
 1894:   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
 1895:     {
 1896:       int cur_bkref_idx = re_string_cur_idx (mctx->input);
 1897:       re_token_type_t type;
 1898:       node = candidates->elems[node_idx];
 1899:       type = dfa->nodes[node].type;
 1900:       if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
 1901: 	continue;
 1902:       /* Avoid infinite loop for the REs like "()\1+".  */
 1903:       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
 1904: 	continue;
 1905:       if (type == OP_BACK_REF)
 1906: 	{
 1907: 	  int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
 1908: 	  for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
 1909: 	    {
 1910: 	      int disabled_idx, subexp_len, to_idx, dst_node;
 1911: 	      struct re_backref_cache_entry *entry;
 1912: 	      entry = mctx->bkref_ents + enabled_idx;
 1913: 	      if (entry->str_idx > str_idx)
 1914: 		break;
 1915: 	      if (entry->node != node)
 1916: 		  continue;
 1917: 	      subexp_len = entry->subexp_to - entry->subexp_from;
 1918: 	      to_idx = str_idx + subexp_len;
 1919: 	      dst_node = (subexp_len ? dfa->nexts[node]
 1920: 			  : dfa->edests[node].elems[0]);
 1921: 
 1922: 	      if (to_idx > sctx->last_str_idx
 1923: 		  || sctx->sifted_states[to_idx] == NULL
 1924: 		  || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
 1925: 					   dst_node)
 1926: 		  || check_dst_limits (dfa, &sctx->limits, mctx, node,
 1927: 				       str_idx, dst_node, to_idx))
 1928: 		continue;
 1929: 		{
 1930: 		  re_dfastate_t *cur_state;
 1931: 		  entry->flag = 0;
 1932: 		  for (disabled_idx = enabled_idx + 1;
 1933: 		       disabled_idx < mctx->nbkref_ents; ++disabled_idx)
 1934: 		    {
 1935: 		      struct re_backref_cache_entry *entry2;
 1936: 		      entry2 = mctx->bkref_ents + disabled_idx;
 1937: 		      if (entry2->str_idx > str_idx)
 1938: 			break;
 1939: 		      entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
 1940: 		    }
 1941: 
 1942: 		  if (local_sctx.sifted_states == NULL)
 1943: 		    {
 1944: 		      local_sctx = *sctx;
 1945: 		      err = re_node_set_init_copy (&local_sctx.limits,
 1946: 						   &sctx->limits);
 1947: 		      if (BE (err != REG_NOERROR, 0))
 1948: 			goto free_return;
 1949: 		    }
 1950: 		  local_sctx.last_node = node;
 1951: 		  local_sctx.last_str_idx = str_idx;
 1952: 		  err = re_node_set_insert (&local_sctx.limits, enabled_idx);
 1953: 		  if (BE (err < 0, 0))
 1954: 		    {
 1955: 		      err = REG_ESPACE;
 1956: 		      goto free_return;
 1957: 		    }
 1958: 		  cur_state = local_sctx.sifted_states[str_idx];
 1959: 		  err = sift_states_backward (preg, mctx, &local_sctx);
 1960: 		  if (BE (err != REG_NOERROR, 0))
 1961: 		    goto free_return;
 1962: 		  if (sctx->limited_states != NULL)
 1963: 		    {
 1964: 		      err = merge_state_array (dfa, sctx->limited_states,
 1965: 					       local_sctx.sifted_states,
 1966: 					       str_idx + 1);
 1967: 		      if (BE (err != REG_NOERROR, 0))
 1968: 			goto free_return;
 1969: 		    }
 1970: 		  local_sctx.sifted_states[str_idx] = cur_state;
 1971: 		  re_node_set_remove (&local_sctx.limits, enabled_idx);
 1972: 		  /* We must not use the variable entry here, since
 1973: 		     mctx->bkref_ents might be realloced.  */
 1974: 		  mctx->bkref_ents[enabled_idx].flag = 1;
 1975: 		}
 1976: 	    }
 1977: 	  enabled_idx = search_cur_bkref_entry (mctx, str_idx);
 1978: 	  for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
 1979: 	    {
 1980: 	      struct re_backref_cache_entry *entry;
 1981: 	      entry = mctx->bkref_ents + enabled_idx;
 1982: 	      if (entry->str_idx > str_idx)
 1983: 		break;
 1984: 	      if (entry->node == node)
 1985: 		entry->flag = 0;
 1986: 	    }
 1987: 	}
 1988:     }
 1989:   err = REG_NOERROR;
 1990:  free_return:
 1991:   if (local_sctx.sifted_states != NULL)
 1992:     {
 1993:       re_node_set_free (&local_sctx.limits);
 1994:     }
 1995: 
 1996:   return err;
 1997: }
 1998: 
 1999: 
 2000: #ifdef RE_ENABLE_I18N
 2001: static int
 2002: sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
 2003:     const regex_t *preg;
 2004:     const re_match_context_t *mctx;
 2005:     re_sift_context_t *sctx;
 2006:     int node_idx, str_idx, max_str_idx;
 2007: {
 2008:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 2009:   int naccepted;
 2010:   /* Check the node can accept `multi byte'.  */
 2011:   naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
 2012:   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
 2013:       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
 2014: 			    dfa->nexts[node_idx]))
 2015:     /* The node can't accept the `multi byte', or the
 2016:        destination was already throwed away, then the node
 2017:        could't accept the current input `multi byte'.   */
 2018:     naccepted = 0;
 2019:   /* Otherwise, it is sure that the node could accept
 2020:      `naccepted' bytes input.  */
 2021:   return naccepted;
 2022: }
 2023: #endif /* RE_ENABLE_I18N */
 2024: 
 2025: 
 2026: /* Functions for state transition.  */
 2027: 
 2028: /* Return the next state to which the current state STATE will transit by
 2029:    accepting the current input byte, and update STATE_LOG if necessary.
 2030:    If STATE can accept a multibyte char/collating element/back reference
 2031:    update the destination of STATE_LOG.  */
 2032: 
 2033: static re_dfastate_t *
 2034: transit_state (err, preg, mctx, state, fl_search)
 2035:      reg_errcode_t *err;
 2036:      const regex_t *preg;
 2037:      re_match_context_t *mctx;
 2038:      re_dfastate_t *state;
 2039:      int fl_search;
 2040: {
 2041:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 2042:   re_dfastate_t **trtable, *next_state;
 2043:   unsigned char ch;
 2044:   int cur_idx;
 2045: 
 2046:   if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
 2047:       || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
 2048: 	  && mctx->input->valid_len < mctx->input->len))
 2049:     {
 2050:       *err = extend_buffers (mctx);
 2051:       if (BE (*err != REG_NOERROR, 0))
 2052: 	return NULL;
 2053:     }
 2054: 
 2055:   *err = REG_NOERROR;
 2056:   if (state == NULL)
 2057:     {
 2058:       next_state = state;
 2059:       re_string_skip_bytes (mctx->input, 1);
 2060:     }
 2061:   else
 2062:     {
 2063: #ifdef RE_ENABLE_I18N
 2064:       /* If the current state can accept multibyte.  */
 2065:       if (state->accept_mb)
 2066: 	{
 2067: 	  *err = transit_state_mb (preg, state, mctx);
 2068: 	  if (BE (*err != REG_NOERROR, 0))
 2069: 	    return NULL;
 2070: 	}
 2071: #endif /* RE_ENABLE_I18N */
 2072: 
 2073:       /* Then decide the next state with the single byte.  */
 2074:       if (1)
 2075: 	{
 2076: 	  /* Use transition table  */
 2077: 	  ch = re_string_fetch_byte (mctx->input);
 2078: 	  trtable = fl_search ? state->trtable_search : state->trtable;
 2079: 	  if (trtable == NULL)
 2080: 	    {
 2081: 	      trtable = build_trtable (preg, state, fl_search);
 2082: 	      if (fl_search)
 2083: 		state->trtable_search = trtable;
 2084: 	      else
 2085: 		state->trtable = trtable;
 2086: 	    }
 2087: 	  next_state = trtable[ch];
 2088: 	}
 2089:       else
 2090: 	{
 2091: 	  /* don't use transition table  */
 2092: 	  next_state = transit_state_sb (err, preg, state, fl_search, mctx);
 2093: 	  if (BE (next_state == NULL && err != REG_NOERROR, 0))
 2094: 	    return NULL;
 2095: 	}
 2096:     }
 2097: 
 2098:   cur_idx = re_string_cur_idx (mctx->input);
 2099:   /* Update the state_log if we need.  */
 2100:   if (mctx->state_log != NULL)
 2101:     {
 2102:       if (cur_idx > mctx->state_log_top)
 2103: 	{
 2104: 	  mctx->state_log[cur_idx] = next_state;
 2105: 	  mctx->state_log_top = cur_idx;
 2106: 	}
 2107:       else if (mctx->state_log[cur_idx] == 0)
 2108: 	{
 2109: 	  mctx->state_log[cur_idx] = next_state;
 2110: 	}
 2111:       else
 2112: 	{
 2113: 	  re_dfastate_t *pstate;
 2114: 	  unsigned int context;
 2115: 	  re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
 2116: 	  /* If (state_log[cur_idx] != 0), it implies that cur_idx is
 2117: 	     the destination of a multibyte char/collating element/
 2118: 	     back reference.  Then the next state is the union set of
 2119: 	     these destinations and the results of the transition table.  */
 2120: 	  pstate = mctx->state_log[cur_idx];
 2121: 	  log_nodes = pstate->entrance_nodes;
 2122: 	  if (next_state != NULL)
 2123: 	    {
 2124: 	      table_nodes = next_state->entrance_nodes;
 2125: 	      *err = re_node_set_init_union (&next_nodes, table_nodes,
 2126: 					     log_nodes);
 2127: 	      if (BE (*err != REG_NOERROR, 0))
 2128: 		return NULL;
 2129: 	    }
 2130: 	  else
 2131: 	    next_nodes = *log_nodes;
 2132: 	  /* Note: We already add the nodes of the initial state,
 2133: 		   then we don't need to add them here.  */
 2134: 
 2135: 	  context = re_string_context_at (mctx->input,
 2136: 					  re_string_cur_idx (mctx->input) - 1,
 2137: 					  mctx->eflags, preg->newline_anchor);
 2138: 	  next_state = mctx->state_log[cur_idx]
 2139: 	    = re_acquire_state_context (err, dfa, &next_nodes, context);
 2140: 	  /* We don't need to check errors here, since the return value of
 2141: 	     this function is next_state and ERR is already set.  */
 2142: 
 2143: 	  if (table_nodes != NULL)
 2144: 	    re_node_set_free (&next_nodes);
 2145: 	}
 2146:     }
 2147: 
 2148:   /* Check OP_OPEN_SUBEXP in the current state in case that we use them
 2149:      later.  We must check them here, since the back references in the
 2150:      next state might use them.  */
 2151:   if (dfa->nbackref && next_state/* && fl_process_bkref */)
 2152:     {
 2153:       *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
 2154: 					cur_idx);
 2155:       if (BE (*err != REG_NOERROR, 0))
 2156: 	return NULL;
 2157:     }
 2158: 
 2159:   /* If the next state has back references.  */
 2160:   if (next_state != NULL && next_state->has_backref)
 2161:     {
 2162:       *err = transit_state_bkref (preg, &next_state->nodes, mctx);
 2163:       if (BE (*err != REG_NOERROR, 0))
 2164: 	return NULL;
 2165:       next_state = mctx->state_log[cur_idx];
 2166:     }
 2167:   return next_state;
 2168: }
 2169: 
 2170: /* Helper functions for transit_state.  */
 2171: 
 2172: /* From the node set CUR_NODES, pick up the nodes whose types are
 2173:    OP_OPEN_SUBEXP and which have corresponding back references in the regular
 2174:    expression. And register them to use them later for evaluating the
 2175:    correspoding back references.  */
 2176: 
 2177: static reg_errcode_t
 2178: check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
 2179:      re_dfa_t *dfa;
 2180:      re_match_context_t *mctx;
 2181:      re_node_set *cur_nodes;
 2182:      int str_idx;
 2183: {
 2184:   int node_idx;
 2185:   reg_errcode_t err;
 2186: 
 2187:   /* TODO: This isn't efficient.
 2188: 	   Because there might be more than one nodes whose types are
 2189: 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
 2190: 	   nodes.
 2191: 	   E.g. RE: (a){2}  */
 2192:   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
 2193:     {
 2194:       int node = cur_nodes->elems[node_idx];
 2195:       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
 2196: 	  && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
 2197: 	{
 2198: 	  err = match_ctx_add_subtop (mctx, node, str_idx);
 2199: 	  if (BE (err != REG_NOERROR, 0))
 2200: 	    return err;
 2201: 	}
 2202:     }
 2203:   return REG_NOERROR;
 2204: }
 2205: 
 2206: /* Return the next state to which the current state STATE will transit by
 2207:    accepting the current input byte.  */
 2208: 
 2209: static re_dfastate_t *
 2210: transit_state_sb (err, preg, state, fl_search, mctx)
 2211:      reg_errcode_t *err;
 2212:      const regex_t *preg;
 2213:      re_dfastate_t *state;
 2214:      int fl_search;
 2215:      re_match_context_t *mctx;
 2216: {
 2217:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 2218:   re_node_set next_nodes;
 2219:   re_dfastate_t *next_state;
 2220:   int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
 2221:   unsigned int context;
 2222: 
 2223:   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
 2224:   if (BE (*err != REG_NOERROR, 0))
 2225:     return NULL;
 2226:   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
 2227:     {
 2228:       int cur_node = state->nodes.elems[node_cnt];
 2229:       if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
 2230: 	{
 2231: 	  *err = re_node_set_merge (&next_nodes,
 2232: 				    dfa->eclosures + dfa->nexts[cur_node]);
 2233: 	  if (BE (*err != REG_NOERROR, 0))
 2234: 	    {
 2235: 	      re_node_set_free (&next_nodes);
 2236: 	      return NULL;
 2237: 	    }
 2238: 	}
 2239:     }
 2240:   if (fl_search)
 2241:     {
 2242: #ifdef RE_ENABLE_I18N
 2243:       int not_initial = 0;
 2244:       if (MB_CUR_MAX > 1)
 2245: 	for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
 2246: 	  if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
 2247: 	    {
 2248: 	      not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
 2249: 	      break;
 2250: 	    }
 2251:       if (!not_initial)
 2252: #endif
 2253: 	{
 2254: 	  *err = re_node_set_merge (&next_nodes,
 2255: 				    dfa->init_state->entrance_nodes);
 2256: 	  if (BE (*err != REG_NOERROR, 0))
 2257: 	    {
 2258: 	      re_node_set_free (&next_nodes);
 2259: 	      return NULL;
 2260: 	    }
 2261: 	}
 2262:     }
 2263:   context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
 2264: 				  preg->newline_anchor);
 2265:   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
 2266:   /* We don't need to check errors here, since the return value of
 2267:      this function is next_state and ERR is already set.  */
 2268: 
 2269:   re_node_set_free (&next_nodes);
 2270:   re_string_skip_bytes (mctx->input, 1);
 2271:   return next_state;
 2272: }
 2273: 
 2274: #ifdef RE_ENABLE_I18N
 2275: static reg_errcode_t
 2276: transit_state_mb (preg, pstate, mctx)
 2277:     const regex_t *preg;
 2278:     re_dfastate_t *pstate;
 2279:     re_match_context_t *mctx;
 2280: {
 2281:   reg_errcode_t err;
 2282:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 2283:   int i;
 2284: 
 2285:   for (i = 0; i < pstate->nodes.nelem; ++i)
 2286:     {
 2287:       re_node_set dest_nodes, *new_nodes;
 2288:       int cur_node_idx = pstate->nodes.elems[i];
 2289:       int naccepted = 0, dest_idx;
 2290:       unsigned int context;
 2291:       re_dfastate_t *dest_state;
 2292: 
 2293:       if (dfa->nodes[cur_node_idx].constraint)
 2294: 	{
 2295: 	  context = re_string_context_at (mctx->input,
 2296: 					  re_string_cur_idx (mctx->input),
 2297: 					  mctx->eflags, preg->newline_anchor);
 2298: 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
 2299: 					   context))
 2300: 	    continue;
 2301: 	}
 2302: 
 2303:       /* How many bytes the node can accepts?  */
 2304:       if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
 2305: 	naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
 2306: 					     re_string_cur_idx (mctx->input));
 2307:       if (naccepted == 0)
 2308: 	continue;
 2309: 
 2310:       /* The node can accepts `naccepted' bytes.  */
 2311:       dest_idx = re_string_cur_idx (mctx->input) + naccepted;
 2312:       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
 2313: 			       : mctx->max_mb_elem_len);
 2314:       err = clean_state_log_if_need (mctx, dest_idx);
 2315:       if (BE (err != REG_NOERROR, 0))
 2316: 	return err;
 2317: #ifdef DEBUG
 2318:       assert (dfa->nexts[cur_node_idx] != -1);
 2319: #endif
 2320:       /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
 2321: 	 then we use pstate->nodes.elems[i] instead.  */
 2322:       new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
 2323: 
 2324:       dest_state = mctx->state_log[dest_idx];
 2325:       if (dest_state == NULL)
 2326: 	dest_nodes = *new_nodes;
 2327:       else
 2328: 	{
 2329: 	  err = re_node_set_init_union (&dest_nodes,
 2330: 					dest_state->entrance_nodes, new_nodes);
 2331: 	  if (BE (err != REG_NOERROR, 0))
 2332: 	    return err;
 2333: 	}
 2334:       context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
 2335: 				      preg->newline_anchor);
 2336:       mctx->state_log[dest_idx]
 2337: 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
 2338:       if (dest_state != NULL)
 2339: 	re_node_set_free (&dest_nodes);
 2340:       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
 2341: 	return err;
 2342:     }
 2343:   return REG_NOERROR;
 2344: }
 2345: #endif /* RE_ENABLE_I18N */
 2346: 
 2347: static reg_errcode_t
 2348: transit_state_bkref (preg, nodes, mctx)
 2349:     const regex_t *preg;
 2350:     re_node_set *nodes;
 2351:     re_match_context_t *mctx;
 2352: {
 2353:   reg_errcode_t err;
 2354:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 2355:   int i;
 2356:   int cur_str_idx = re_string_cur_idx (mctx->input);
 2357: 
 2358:   for (i = 0; i < nodes->nelem; ++i)
 2359:     {
 2360:       int dest_str_idx, prev_nelem, bkc_idx;
 2361:       int node_idx = nodes->elems[i];
 2362:       unsigned int context;
 2363:       re_token_t *node = dfa->nodes + node_idx;
 2364:       re_node_set *new_dest_nodes;
 2365: 
 2366:       /* Check whether `node' is a backreference or not.  */
 2367:       if (node->type != OP_BACK_REF)
 2368: 	continue;
 2369: 
 2370:       if (node->constraint)
 2371: 	{
 2372: 	  context = re_string_context_at (mctx->input, cur_str_idx,
 2373: 					  mctx->eflags, preg->newline_anchor);
 2374: 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
 2375: 	    continue;
 2376: 	}
 2377: 
 2378:       /* `node' is a backreference.
 2379: 	 Check the substring which the substring matched.  */
 2380:       bkc_idx = mctx->nbkref_ents;
 2381:       err = get_subexp (preg, mctx, node_idx, cur_str_idx);
 2382:       if (BE (err != REG_NOERROR, 0))
 2383: 	goto free_return;
 2384: 
 2385:       /* And add the epsilon closures (which is `new_dest_nodes') of
 2386: 	 the backreference to appropriate state_log.  */
 2387: #ifdef DEBUG
 2388:       assert (dfa->nexts[node_idx] != -1);
 2389: #endif
 2390:       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
 2391: 	{
 2392: 	  int subexp_len;
 2393: 	  re_dfastate_t *dest_state;
 2394: 	  struct re_backref_cache_entry *bkref_ent;
 2395: 	  bkref_ent = mctx->bkref_ents + bkc_idx;
 2396: 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
 2397: 	    continue;
 2398: 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
 2399: 	  new_dest_nodes = (subexp_len == 0
 2400: 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
 2401: 			    : dfa->eclosures + dfa->nexts[node_idx]);
 2402: 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
 2403: 			  - bkref_ent->subexp_from);
 2404: 	  context = re_string_context_at (mctx->input, dest_str_idx - 1,
 2405: 					  mctx->eflags, preg->newline_anchor);
 2406: 	  dest_state = mctx->state_log[dest_str_idx];
 2407: 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
 2408: 			: mctx->state_log[cur_str_idx]->nodes.nelem);
 2409: 	  /* Add `new_dest_node' to state_log.  */
 2410: 	  if (dest_state == NULL)
 2411: 	    {
 2412: 	      mctx->state_log[dest_str_idx]
 2413: 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
 2414: 					    context);
 2415: 	      if (BE (mctx->state_log[dest_str_idx] == NULL
 2416: 		      && err != REG_NOERROR, 0))
 2417: 		goto free_return;
 2418: 	    }
 2419: 	  else
 2420: 	    {
 2421: 	      re_node_set dest_nodes;
 2422: 	      err = re_node_set_init_union (&dest_nodes,
 2423: 					    dest_state->entrance_nodes,
 2424: 					    new_dest_nodes);
 2425: 	      if (BE (err != REG_NOERROR, 0))
 2426: 		{
 2427: 		  re_node_set_free (&dest_nodes);
 2428: 		  goto free_return;
 2429: 		}
 2430: 	      mctx->state_log[dest_str_idx]
 2431: 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
 2432: 	      re_node_set_free (&dest_nodes);
 2433: 	      if (BE (mctx->state_log[dest_str_idx] == NULL
 2434: 		      && err != REG_NOERROR, 0))
 2435: 		goto free_return;
 2436: 	    }
 2437: 	  /* We need to check recursively if the backreference can epsilon
 2438: 	     transit.  */
 2439: 	  if (subexp_len == 0
 2440: 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
 2441: 	    {
 2442: 	      err = check_subexp_matching_top (dfa, mctx, new_dest_nodes,
 2443: 					       cur_str_idx);
 2444: 	      if (BE (err != REG_NOERROR, 0))
 2445: 		goto free_return;
 2446: 	      err = transit_state_bkref (preg, new_dest_nodes, mctx);
 2447: 	      if (BE (err != REG_NOERROR, 0))
 2448: 		goto free_return;
 2449: 	    }
 2450: 	}
 2451:     }
 2452:   err = REG_NOERROR;
 2453:  free_return:
 2454:   return err;
 2455: }
 2456: 
 2457: /* Enumerate all the candidates which the backreference BKREF_NODE can match
 2458:    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
 2459:    Note that we might collect inappropriate candidates here.
 2460:    However, the cost of checking them strictly here is too high, then we
 2461:    delay these checking for prune_impossible_nodes().  */
 2462: 
 2463: static reg_errcode_t
 2464: get_subexp (preg, mctx, bkref_node, bkref_str_idx)
 2465:      const regex_t *preg;
 2466:      re_match_context_t *mctx;
 2467:      int bkref_node, bkref_str_idx;
 2468: {
 2469:   int subexp_num, sub_top_idx;
 2470:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 2471:   char *buf = (char *) re_string_get_buffer (mctx->input);
 2472:   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
 2473:   int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
 2474:   for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
 2475:     {
 2476:       struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
 2477:       if (entry->str_idx > bkref_str_idx)
 2478: 	break;
 2479:       if (entry->node == bkref_node)
 2480: 	return REG_NOERROR; /* We already checked it.  */
 2481:     }
 2482:   subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
 2483: 
 2484:   /* For each sub expression  */
 2485:   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
 2486:     {
 2487:       reg_errcode_t err;
 2488:       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
 2489:       re_sub_match_last_t *sub_last;
 2490:       int sub_last_idx, sl_str;
 2491:       char *bkref_str;
 2492: 
 2493:       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
 2494: 	continue; /* It isn't related.  */
 2495: 
 2496:       sl_str = sub_top->str_idx;
 2497:       bkref_str = buf + bkref_str_idx;
 2498:       /* At first, check the last node of sub expressions we already
 2499: 	 evaluated.  */
 2500:       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
 2501: 	{
 2502: 	  int sl_str_diff;
 2503: 	  sub_last = sub_top->lasts[sub_last_idx];
 2504: 	  sl_str_diff = sub_last->str_idx - sl_str;
 2505: 	  /* The matched string by the sub expression match with the substring
 2506: 	     at the back reference?  */
 2507: 	  if (sl_str_diff > 0
 2508: 	      && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0)
 2509: 	    break; /* We don't need to search this sub expression any more.  */
 2510: 	  bkref_str += sl_str_diff;
 2511: 	  sl_str += sl_str_diff;
 2512: 	  err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
 2513: 				bkref_str_idx);
 2514: 	  if (err == REG_NOMATCH)
 2515: 	    continue;
 2516: 	  if (BE (err != REG_NOERROR, 0))
 2517: 	    return err;
 2518: 	}
 2519:       if (sub_last_idx < sub_top->nlasts)
 2520: 	continue;
 2521:       if (sub_last_idx > 0)
 2522: 	++sl_str;
 2523:       /* Then, search for the other last nodes of the sub expression.  */
 2524:       for (; sl_str <= bkref_str_idx; ++sl_str)
 2525: 	{
 2526: 	  int cls_node, sl_str_off;
 2527: 	  re_node_set *nodes;
 2528: 	  sl_str_off = sl_str - sub_top->str_idx;
 2529: 	  /* The matched string by the sub expression match with the substring
 2530: 	     at the back reference?  */
 2531: 	  if (sl_str_off > 0
 2532: 	      && memcmp (bkref_str++, buf + sl_str - 1, 1) != 0)
 2533: 	    break; /* We don't need to search this sub expression any more.  */
 2534: 	  if (mctx->state_log[sl_str] == NULL)
 2535: 	    continue;
 2536: 	  /* Does this state have a ')' of the sub expression?  */
 2537: 	  nodes = &mctx->state_log[sl_str]->nodes;
 2538: 	  cls_node = find_subexp_node (dfa, nodes, subexp_num, 0);
 2539: 	  if (cls_node == -1)
 2540: 	    continue; /* No.  */
 2541: 	  if (sub_top->path == NULL)
 2542: 	    {
 2543: 	      sub_top->path = calloc (sizeof (state_array_t),
 2544: 				      sl_str - sub_top->str_idx + 1);
 2545: 	      if (sub_top->path == NULL)
 2546: 		return REG_ESPACE;
 2547: 	    }
 2548: 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
 2549: 	     in the current context?  */
 2550: 	  err = check_arrival (preg, mctx, sub_top->path, sub_top->node,
 2551: 			       sub_top->str_idx, cls_node, sl_str, 0);
 2552: 	  if (err == REG_NOMATCH)
 2553: 	      continue;
 2554: 	  if (BE (err != REG_NOERROR, 0))
 2555: 	      return err;
 2556: 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
 2557: 	  if (BE (sub_last == NULL, 0))
 2558: 	    return REG_ESPACE;
 2559: 	  err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
 2560: 				bkref_str_idx);
 2561: 	  if (err == REG_NOMATCH)
 2562: 	    continue;
 2563: 	}
 2564:     }
 2565:   return REG_NOERROR;
 2566: }
 2567: 
 2568: /* Helper functions for get_subexp().  */
 2569: 
 2570: /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
 2571:    If it can arrive, register the sub expression expressed with SUB_TOP
 2572:    and SUB_LAST.  */
 2573: 
 2574: static reg_errcode_t
 2575: get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str)
 2576:      const regex_t *preg;
 2577:      re_match_context_t *mctx;
 2578:      re_sub_match_top_t *sub_top;
 2579:      re_sub_match_last_t *sub_last;
 2580:      int bkref_node, bkref_str;
 2581: {
 2582:   reg_errcode_t err;
 2583:   int to_idx;
 2584:   /* Can the subexpression arrive the back reference?  */
 2585:   err = check_arrival (preg, mctx, &sub_last->path, sub_last->node,
 2586: 		       sub_last->str_idx, bkref_node, bkref_str, 1);
 2587:   if (err != REG_NOERROR)
 2588:     return err;
 2589:   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
 2590: 			     sub_last->str_idx);
 2591:   if (BE (err != REG_NOERROR, 0))
 2592:     return err;
 2593:   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
 2594:   clean_state_log_if_need (mctx, to_idx);
 2595:   return REG_NOERROR;
 2596: }
 2597: 
 2598: /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
 2599:    Search '(' if FL_OPEN, or search ')' otherwise.
 2600:    TODO: This function isn't efficient...
 2601: 	 Because there might be more than one nodes whose types are
 2602: 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
 2603: 	 nodes.
 2604: 	 E.g. RE: (a){2}  */
 2605: 
 2606: static int
 2607: find_subexp_node (dfa, nodes, subexp_idx, fl_open)
 2608:      re_dfa_t *dfa;
 2609:      re_node_set *nodes;
 2610:      int subexp_idx, fl_open;
 2611: {
 2612:   int cls_idx;
 2613:   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
 2614:     {
 2615:       int cls_node = nodes->elems[cls_idx];
 2616:       re_token_t *node = dfa->nodes + cls_node;
 2617:       if (((fl_open && node->type == OP_OPEN_SUBEXP)
 2618: 	  || (!fl_open && node->type == OP_CLOSE_SUBEXP))
 2619: 	  && node->opr.idx == subexp_idx)
 2620: 	return cls_node;
 2621:     }
 2622:   return -1;
 2623: }
 2624: 
 2625: /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
 2626:    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
 2627:    heavily reused.
 2628:    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
 2629: 
 2630: static reg_errcode_t
 2631: check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str,
 2632: 	       fl_open)
 2633:      const regex_t *preg;
 2634:      re_match_context_t *mctx;
 2635:      state_array_t *path;
 2636:      int top_node, top_str, last_node, last_str, fl_open;
 2637: {
 2638:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 2639:   reg_errcode_t err;
 2640:   int subexp_num, backup_cur_idx, str_idx, null_cnt;
 2641:   re_dfastate_t *cur_state = NULL;
 2642:   re_node_set *cur_nodes, next_nodes;
 2643:   re_dfastate_t **backup_state_log;
 2644:   unsigned int context;
 2645: 
 2646:   subexp_num = dfa->nodes[top_node].opr.idx;
 2647:   /* Extend the buffer if we need.  */
 2648:   if (path->alloc < last_str + mctx->max_mb_elem_len + 1)
 2649:     {
 2650:       re_dfastate_t **new_array;
 2651:       int old_alloc = path->alloc;
 2652:       path->alloc += last_str + mctx->max_mb_elem_len + 1;
 2653:       new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
 2654:       if (new_array == NULL)
 2655: 	return REG_ESPACE;
 2656:       path->array = new_array;
 2657:       memset (new_array + old_alloc, '\0',
 2658: 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
 2659:     }
 2660: 
 2661:   str_idx = path->next_idx == 0 ? top_str : path->next_idx;
 2662: 
 2663:   /* Temporary modify MCTX.  */
 2664:   backup_state_log = mctx->state_log;
 2665:   backup_cur_idx = mctx->input->cur_idx;
 2666:   mctx->state_log = path->array;
 2667:   mctx->input->cur_idx = str_idx;
 2668: 
 2669:   /* Setup initial node set.  */
 2670:   context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
 2671: 				  preg->newline_anchor);
 2672:   if (str_idx == top_str)
 2673:     {
 2674:       err = re_node_set_init_1 (&next_nodes, top_node);
 2675:       if (BE (err != REG_NOERROR, 0))
 2676: 	return err;
 2677:       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, fl_open);
 2678:       if (BE (err != REG_NOERROR, 0))
 2679: 	{
 2680: 	  re_node_set_free (&next_nodes);
 2681: 	  return err;
 2682: 	}
 2683:     }
 2684:   else
 2685:     {
 2686:       cur_state = mctx->state_log[str_idx];
 2687:       if (cur_state && cur_state->has_backref)
 2688: 	{
 2689: 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
 2690: 	  if (BE ( err != REG_NOERROR, 0))
 2691: 	    return err;
 2692: 	}
 2693:       else
 2694: 	re_node_set_init_empty (&next_nodes);
 2695:     }
 2696:   if (str_idx == top_str || (cur_state && cur_state->has_backref))
 2697:     {
 2698:       if (next_nodes.nelem)
 2699: 	{
 2700: 	  err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
 2701: 				    subexp_num, fl_open);
 2702: 	  if (BE ( err != REG_NOERROR, 0))
 2703: 	    {
 2704: 	      re_node_set_free (&next_nodes);
 2705: 	      return err;
 2706: 	    }
 2707: 	}
 2708:       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
 2709:       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
 2710: 	{
 2711: 	  re_node_set_free (&next_nodes);
 2712: 	  return err;
 2713: 	}
 2714:       mctx->state_log[str_idx] = cur_state;
 2715:     }
 2716: 
 2717:   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
 2718:     {
 2719:       re_node_set_empty (&next_nodes);
 2720:       if (mctx->state_log[str_idx + 1])
 2721: 	{
 2722: 	  err = re_node_set_merge (&next_nodes,
 2723: 				   &mctx->state_log[str_idx + 1]->nodes);
 2724: 	  if (BE (err != REG_NOERROR, 0))
 2725: 	    {
 2726: 	      re_node_set_free (&next_nodes);
 2727: 	      return err;
 2728: 	    }
 2729: 	}
 2730:       if (cur_state)
 2731: 	{
 2732: 	  err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx,
 2733: 					     &cur_state->nodes, &next_nodes);
 2734: 	  if (BE (err != REG_NOERROR, 0))
 2735: 	    {
 2736: 	      re_node_set_free (&next_nodes);
 2737: 	      return err;
 2738: 	    }
 2739: 	}
 2740:       ++str_idx;
 2741:       if (next_nodes.nelem)
 2742: 	{
 2743: 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num,
 2744: 					  fl_open);
 2745: 	  if (BE (err != REG_NOERROR, 0))
 2746: 	    {
 2747: 	      re_node_set_free (&next_nodes);
 2748: 	      return err;
 2749: 	    }
 2750: 	  err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
 2751: 				    subexp_num, fl_open);
 2752: 	  if (BE ( err != REG_NOERROR, 0))
 2753: 	    {
 2754: 	      re_node_set_free (&next_nodes);
 2755: 	      return err;
 2756: 	    }
 2757: 	}
 2758:       context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
 2759: 				      preg->newline_anchor);
 2760:       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
 2761:       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
 2762: 	{
 2763: 	  re_node_set_free (&next_nodes);
 2764: 	  return err;
 2765: 	}
 2766:       mctx->state_log[str_idx] = cur_state;
 2767:       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
 2768:     }
 2769:   re_node_set_free (&next_nodes);
 2770:   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
 2771: 	       : &mctx->state_log[last_str]->nodes);
 2772:   path->next_idx = str_idx;
 2773: 
 2774:   /* Fix MCTX.  */
 2775:   mctx->state_log = backup_state_log;
 2776:   mctx->input->cur_idx = backup_cur_idx;
 2777: 
 2778:   if (cur_nodes == NULL)
 2779:     return REG_NOMATCH;
 2780:   /* Then check the current node set has the node LAST_NODE.  */
 2781:   return (re_node_set_contains (cur_nodes, last_node)
 2782: 	  || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR
 2783: 	  : REG_NOMATCH);
 2784: }
 2785: 
 2786: /* Helper functions for check_arrival.  */
 2787: 
 2788: /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
 2789:    to NEXT_NODES.
 2790:    TODO: This function is similar to the functions transit_state*(),
 2791: 	 however this function has many additional works.
 2792: 	 Can't we unify them?  */
 2793: 
 2794: static reg_errcode_t
 2795: check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes)
 2796:      const regex_t *preg;
 2797:      re_dfa_t *dfa;
 2798:      re_match_context_t *mctx;
 2799:      int str_idx;
 2800:      re_node_set *cur_nodes, *next_nodes;
 2801: {
 2802:   int cur_idx;
 2803:   reg_errcode_t err;
 2804:   re_node_set union_set;
 2805:   re_node_set_init_empty (&union_set);
 2806:   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
 2807:     {
 2808:       int naccepted = 0;
 2809:       int cur_node = cur_nodes->elems[cur_idx];
 2810:       re_token_type_t type = dfa->nodes[cur_node].type;
 2811:       if (IS_EPSILON_NODE(type))
 2812: 	continue;
 2813: #ifdef RE_ENABLE_I18N
 2814:       /* If the node may accept `multi byte'.  */
 2815:       if (ACCEPT_MB_NODE (type))
 2816: 	{
 2817: 	  naccepted = check_node_accept_bytes (preg, cur_node, mctx->input,
 2818: 					       str_idx);
 2819: 	  if (naccepted > 1)
 2820: 	    {
 2821: 	      re_dfastate_t *dest_state;
 2822: 	      int next_node = dfa->nexts[cur_node];
 2823: 	      int next_idx = str_idx + naccepted;
 2824: 	      dest_state = mctx->state_log[next_idx];
 2825: 	      re_node_set_empty (&union_set);
 2826: 	      if (dest_state)
 2827: 		{
 2828: 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
 2829: 		  if (BE (err != REG_NOERROR, 0))
 2830: 		    {
 2831: 		      re_node_set_free (&union_set);
 2832: 		      return err;
 2833: 		    }
 2834: 		  err = re_node_set_insert (&union_set, next_node);
 2835: 		  if (BE (err < 0, 0))
 2836: 		    {
 2837: 		      re_node_set_free (&union_set);
 2838: 		      return REG_ESPACE;
 2839: 		    }
 2840: 		}
 2841: 	      else
 2842: 		{
 2843: 		  err = re_node_set_insert (&union_set, next_node);
 2844: 		  if (BE (err < 0, 0))
 2845: 		    {
 2846: 		      re_node_set_free (&union_set);
 2847: 		      return REG_ESPACE;
 2848: 		    }
 2849: 		}
 2850: 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
 2851: 							    &union_set);
 2852: 	      if (BE (mctx->state_log[next_idx] == NULL
 2853: 		      && err != REG_NOERROR, 0))
 2854: 		{
 2855: 		  re_node_set_free (&union_set);
 2856: 		  return err;
 2857: 		}
 2858: 	    }
 2859: 	}
 2860: #endif /* RE_ENABLE_I18N */
 2861:       if (naccepted
 2862: 	  || check_node_accept (preg, dfa->nodes + cur_node, mctx,
 2863: 				str_idx))
 2864: 	{
 2865: 	  err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
 2866: 	  if (BE (err < 0, 0))
 2867: 	    {
 2868: 	      re_node_set_free (&union_set);
 2869: 	      return REG_ESPACE;
 2870: 	    }
 2871: 	}
 2872:     }
 2873:   re_node_set_free (&union_set);
 2874:   return REG_NOERROR;
 2875: }
 2876: 
 2877: /* For all the nodes in CUR_NODES, add the epsilon closures of them to
 2878:    CUR_NODES, however exclude the nodes which are:
 2879:     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
 2880:     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
 2881: */
 2882: 
 2883: static reg_errcode_t
 2884: check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, fl_open)
 2885:      re_dfa_t *dfa;
 2886:      re_node_set *cur_nodes;
 2887:      int ex_subexp, fl_open;
 2888: {
 2889:   reg_errcode_t err;
 2890:   int idx, outside_node;
 2891:   re_node_set new_nodes;
 2892: #ifdef DEBUG
 2893:   assert (cur_nodes->nelem);
 2894: #endif
 2895:   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
 2896:   if (BE (err != REG_NOERROR, 0))
 2897:     return err;
 2898:   /* Create a new node set NEW_NODES with the nodes which are epsilon
 2899:      closures of the node in CUR_NODES.  */
 2900: 
 2901:   for (idx = 0; idx < cur_nodes->nelem; ++idx)
 2902:     {
 2903:       int cur_node = cur_nodes->elems[idx];
 2904:       re_node_set *eclosure = dfa->eclosures + cur_node;
 2905:       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open);
 2906:       if (outside_node == -1)
 2907: 	{
 2908: 	  /* There are no problematic nodes, just merge them.  */
 2909: 	  err = re_node_set_merge (&new_nodes, eclosure);
 2910: 	  if (BE (err != REG_NOERROR, 0))
 2911: 	    {
 2912: 	      re_node_set_free (&new_nodes);
 2913: 	      return err;
 2914: 	    }
 2915: 	}
 2916:       else
 2917: 	{
 2918: 	  /* There are problematic nodes, re-calculate incrementally.  */
 2919: 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
 2920: 					      ex_subexp, fl_open);
 2921: 	  if (BE (err != REG_NOERROR, 0))
 2922: 	    {
 2923: 	      re_node_set_free (&new_nodes);
 2924: 	      return err;
 2925: 	    }
 2926: 	}
 2927:     }
 2928:   re_node_set_free (cur_nodes);
 2929:   *cur_nodes = new_nodes;
 2930:   return REG_NOERROR;
 2931: }
 2932: 
 2933: /* Helper function for check_arrival_expand_ecl.
 2934:    Check incrementally the epsilon closure of TARGET, and if it isn't
 2935:    problematic append it to DST_NODES.  */
 2936: 
 2937: static reg_errcode_t
 2938: check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, fl_open)
 2939:      re_dfa_t *dfa;
 2940:      int target, ex_subexp, fl_open;
 2941:      re_node_set *dst_nodes;
 2942: {
 2943:   int cur_node, type;
 2944:   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
 2945:     {
 2946:       int err;
 2947:       type = dfa->nodes[cur_node].type;
 2948: 
 2949:       if (((type == OP_OPEN_SUBEXP && fl_open)
 2950: 	   || (type == OP_CLOSE_SUBEXP && !fl_open))
 2951: 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
 2952: 	{
 2953: 	  if (!fl_open)
 2954: 	    {
 2955: 	      err = re_node_set_insert (dst_nodes, cur_node);
 2956: 	      if (BE (err == -1, 0))
 2957: 		return REG_ESPACE;
 2958: 	    }
 2959: 	  break;
 2960: 	}
 2961:       err = re_node_set_insert (dst_nodes, cur_node);
 2962:       if (BE (err == -1, 0))
 2963: 	return REG_ESPACE;
 2964:       if (dfa->edests[cur_node].nelem == 0)
 2965: 	break;
 2966:       if (dfa->edests[cur_node].nelem == 2)
 2967: 	{
 2968: 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
 2969: 					      dfa->edests[cur_node].elems[1],
 2970: 					      ex_subexp, fl_open);
 2971: 	  if (BE (err != REG_NOERROR, 0))
 2972: 	    return err;
 2973: 	}
 2974:       cur_node = dfa->edests[cur_node].elems[0];
 2975:     }
 2976:   return REG_NOERROR;
 2977: }
 2978: 
 2979: 
 2980: /* For all the back references in the current state, calculate the
 2981:    destination of the back references by the appropriate entry
 2982:    in MCTX->BKREF_ENTS.  */
 2983: 
 2984: static reg_errcode_t
 2985: expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
 2986: 		    fl_open)
 2987:      const regex_t *preg;
 2988:      re_match_context_t *mctx;
 2989:      int cur_str, last_str, subexp_num, fl_open;
 2990:      re_node_set *cur_nodes;
 2991: {
 2992:   reg_errcode_t err;
 2993:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 2994:   int cache_idx, cache_idx_start;
 2995:   /* The current state.  */
 2996: 
 2997:   cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
 2998:   for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
 2999:     {
 3000:       int to_idx, next_node;
 3001:       struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
 3002:       if (ent->str_idx > cur_str)
 3003: 	break;
 3004:       /* Is this entry ENT is appropriate?  */
 3005:       if (!re_node_set_contains (cur_nodes, ent->node))
 3006: 	continue; /* No.  */
 3007: 
 3008:       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
 3009:       /* Calculate the destination of the back reference, and append it
 3010: 	 to MCTX->STATE_LOG.  */
 3011:       if (to_idx == cur_str)
 3012: 	{
 3013: 	  /* The backreference did epsilon transit, we must re-check all the
 3014: 	     node in the current state.  */
 3015: 	  re_node_set new_dests;
 3016: 	  reg_errcode_t err2, err3;
 3017: 	  next_node = dfa->edests[ent->node].elems[0];
 3018: 	  if (re_node_set_contains (cur_nodes, next_node))
 3019: 	    continue;
 3020: 	  err = re_node_set_init_1 (&new_dests, next_node);
 3021: 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num,
 3022: 					   fl_open);
 3023: 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
 3024: 	  re_node_set_free (&new_dests);
 3025: 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
 3026: 		  || err3 != REG_NOERROR, 0))
 3027: 	    {
 3028: 	      err = (err != REG_NOERROR ? err
 3029: 		     : (err2 != REG_NOERROR ? err2 : err3));
 3030: 	      return err;
 3031: 	    }
 3032: 	  /* TODO: It is still inefficient...  */
 3033: 	  cache_idx = cache_idx_start - 1;
 3034: 	  continue;
 3035: 	}
 3036:       else
 3037: 	{
 3038: 	  re_node_set union_set;
 3039: 	  next_node = dfa->nexts[ent->node];
 3040: 	  if (mctx->state_log[to_idx])
 3041: 	    {
 3042: 	      int ret;
 3043: 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
 3044: 					next_node))
 3045: 		continue;
 3046: 	      err = re_node_set_init_copy (&union_set,
 3047: 					   &mctx->state_log[to_idx]->nodes);
 3048: 	      ret = re_node_set_insert (&union_set, next_node);
 3049: 	      if (BE (err != REG_NOERROR || ret < 0, 0))
 3050: 		{
 3051: 		  re_node_set_free (&union_set);
 3052: 		  err = err != REG_NOERROR ? err : REG_ESPACE;
 3053: 		  return err;
 3054: 		}
 3055: 	    }
 3056: 	  else
 3057: 	    {
 3058: 	      err = re_node_set_init_1 (&union_set, next_node);
 3059: 	      if (BE (err != REG_NOERROR, 0))
 3060: 		return err;
 3061: 	    }
 3062: 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
 3063: 	  re_node_set_free (&union_set);
 3064: 	  if (BE (mctx->state_log[to_idx] == NULL
 3065: 		  && err != REG_NOERROR, 0))
 3066: 	    return err;
 3067: 	}
 3068:     }
 3069:   return REG_NOERROR;
 3070: }
 3071: 
 3072: /* Build transition table for the state.
 3073:    Return the new table if succeeded, otherwise return NULL.  */
 3074: 
 3075: static re_dfastate_t **
 3076: build_trtable (preg, state, fl_search)
 3077:     const regex_t *preg;
 3078:     const re_dfastate_t *state;
 3079:     int fl_search;
 3080: {
 3081:   reg_errcode_t err;
 3082:   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 3083:   int i, j, k, ch;
 3084:   int dests_node_malloced = 0, dest_states_malloced = 0;
 3085:   int ndests; /* Number of the destination states from `state'.  */
 3086:   re_dfastate_t **trtable;
 3087:   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
 3088:   re_node_set follows, *dests_node;
 3089:   bitset *dests_ch;
 3090:   bitset acceptable;
 3091: 
 3092:   /* We build DFA states which corresponds to the destination nodes
 3093:      from `state'.  `dests_node[i]' represents the nodes which i-th
 3094:      destination state contains, and `dests_ch[i]' represents the
 3095:      characters which i-th destination state accepts.  */
 3096: #ifdef _LIBC
 3097:   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
 3098:     dests_node = (re_node_set *)
 3099: 		 alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
 3100:   else
 3101: #endif
 3102:     {
 3103:       dests_node = (re_node_set *)
 3104: 		   malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
 3105:       if (BE (dests_node == NULL, 0))
 3106: 	return NULL;
 3107:       dests_node_malloced = 1;
 3108:     }
 3109:   dests_ch = (bitset *) (dests_node + SBC_MAX);
 3110: 
 3111:   /* Initialize transiton table.  */
 3112:   trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
 3113:   if (BE (trtable == NULL, 0))
 3114:     {
 3115:       if (dests_node_malloced)
 3116: 	free (dests_node);
 3117:       return NULL;
 3118:     }
 3119: 
 3120:   /* At first, group all nodes belonging to `state' into several
 3121:      destinations.  */
 3122:   ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
 3123:   if (BE (ndests <= 0, 0))
 3124:     {
 3125:       if (dests_node_malloced)
 3126: 	free (dests_node);
 3127:       /* Return NULL in case of an error, trtable otherwise.  */
 3128:       if (ndests == 0)
 3129: 	return trtable;
 3130:       free (trtable);
 3131:       return NULL;
 3132:     }
 3133: 
 3134:   err = re_node_set_alloc (&follows, ndests + 1);
 3135:   if (BE (err != REG_NOERROR, 0))
 3136:     goto out_free;
 3137: 
 3138: #ifdef _LIBC
 3139:   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
 3140: 			 + ndests * 3 * sizeof (re_dfastate_t *)))
 3141:     dest_states = (re_dfastate_t **)
 3142: 		  alloca (ndests * 3 * sizeof (re_dfastate_t *));
 3143:   else
 3144: #endif
 3145:     {
 3146:       dest_states = (re_dfastate_t **)
 3147: 		    malloc (ndests * 3 * sizeof (re_dfastate_t *));
 3148:       if (BE (dest_states == NULL, 0))
 3149: 	{
 3150: out_free:
 3151: 	  if (dest_states_malloced)
 3152: 	    free (dest_states);
 3153: 	  re_node_set_free (&follows);
 3154: 	  for (i = 0; i < ndests; ++i)
 3155: 	    re_node_set_free (dests_node + i);
 3156: 	  free (trtable);
 3157: 	  if (dests_node_malloced)
 3158: 	    free (dests_node);
 3159: 	  return NULL;
 3160: 	}
 3161:       dest_states_malloced = 1;
 3162:     }
 3163:   dest_states_word = dest_states + ndests;
 3164:   dest_states_nl = dest_states_word + ndests;
 3165:   bitset_empty (acceptable);
 3166: 
 3167:   /* Then build the states for all destinations.  */
 3168:   for (i = 0; i < ndests; ++i)
 3169:     {
 3170:       int next_node;
 3171:       re_node_set_empty (&follows);
 3172:       /* Merge the follows of this destination states.  */
 3173:       for (j = 0; j < dests_node[i].nelem; ++j)
 3174: 	{
 3175: 	  next_node = dfa->nexts[dests_node[i].elems[j]];
 3176: 	  if (next_node != -1)
 3177: 	    {
 3178: 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
 3179: 	      if (BE (err != REG_NOERROR, 0))
 3180: 		goto out_free;
 3181: 	    }
 3182: 	}
 3183:       /* If search flag is set, merge the initial state.  */
 3184:       if (fl_search)
 3185: 	{
 3186: #ifdef RE_ENABLE_I18N
 3187: 	  int not_initial = 0;
 3188: 	  for (j = 0; j < follows.nelem; ++j)
 3189: 	    if (dfa->nodes[follows.elems[j]].type == CHARACTER)
 3190: 	      {
 3191: 		not_initial = dfa->nodes[follows.elems[j]].mb_partial;
 3192: 		break;
 3193: 	      }
 3194: 	  if (!not_initial)
 3195: #endif
 3196: 	    {
 3197: 	      err = re_node_set_merge (&follows,
 3198: 				       dfa->init_state->entrance_nodes);
 3199: 	      if (BE (err != REG_NOERROR, 0))
 3200: 		goto out_free;
 3201: 	    }
 3202: 	}
 3203:       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
 3204:       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
 3205: 	goto out_free;
 3206:       /* If the new state has context constraint,
 3207: 	 build appropriate states for these contexts.  */
 3208:       if (dest_states[i]->has_constraint)
 3209: 	{
 3210: 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
 3211: 							  CONTEXT_WORD);
 3212: 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
 3213: 	    goto out_free;
 3214: 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
 3215: 							CONTEXT_NEWLINE);
 3216: 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
 3217: 	    goto out_free;
 3218: 	}
 3219:       else
 3220: 	{
 3221: 	  dest_states_word[i] = dest_states[i];
 3222: 	  dest_states_nl[i] = dest_states[i];
 3223: 	}
 3224:       bitset_merge (acceptable, dests_ch[i]);
 3225:     }
 3226: 
 3227:   /* Update the transition table.  */
 3228:   /* For all characters ch...:  */
 3229:   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
 3230:     for (j = 0; j < UINT_BITS; ++j, ++ch)
 3231:       if ((acceptable[i] >> j) & 1)
 3232: 	{
 3233: 	  /* The current state accepts the character ch.  */
 3234: 	  if (IS_WORD_CHAR (ch))
 3235: 	    {
 3236: 	      for (k = 0; k < ndests; ++k)
 3237: 		if ((dests_ch[k][i] >> j) & 1)
 3238: 		  {
 3239: 		    /* k-th destination accepts the word character ch.  */
 3240: 		    trtable[ch] = dest_states_word[k];
 3241: 		    /* There must be only one destination which accepts
 3242: 		       character ch.  See group_nodes_into_DFAstates.  */
 3243: 		    break;
 3244: 		  }
 3245: 	    }
 3246: 	  else /* not WORD_CHAR */
 3247: 	    {
 3248: 	      for (k = 0; k < ndests; ++k)
 3249: 		if ((dests_ch[k][i] >> j) & 1)
 3250: 		  {
 3251: 		    /* k-th destination accepts the non-word character ch.  */
 3252: 		    trtable[ch] = dest_states[k];
 3253: 		    /* There must be only one destination which accepts
 3254: 		       character ch.  See group_nodes_into_DFAstates.  */
 3255: 		    break;
 3256: 		  }
 3257: 	    }
 3258: 	}
 3259:   /* new line */
 3260:   if (bitset_contain (acceptable, NEWLINE_CHAR))
 3261:     {
 3262:       /* The current state accepts newline character.  */
 3263:       for (k = 0; k < ndests; ++k)
 3264: 	if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
 3265: 	  {
 3266: 	    /* k-th destination accepts newline character.  */
 3267: 	    trtable[NEWLINE_CHAR] = dest_states_nl[k];
 3268: 	    /* There must be only one destination which accepts
 3269: 	       newline.  See group_nodes_into_DFAstates.  */
 3270: 	    break;
 3271: 	  }
 3272:     }
 3273: 
 3274:   if (dest_states_malloced)
 3275:     free (dest_states);
 3276: 
 3277:   re_node_set_free (&follows);
 3278:   for (i = 0; i < ndests; ++i)
 3279:     re_node_set_free (dests_node + i);
 3280: 
 3281:   if (dests_node_malloced)
 3282:     free (dests_node);
 3283: 
 3284:   return trtable;
 3285: }
 3286: 
 3287: /* Group all nodes belonging to STATE into several destinations.
 3288:    Then for all destinations, set the nodes belonging to the destination
 3289:    to DESTS_NODE[i] and set the characters accepted by the destination
 3290:    to DEST_CH[i].  This function return the number of destinations.  */
 3291: 
 3292: static int
 3293: group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
 3294:     const regex_t *preg;
 3295:     const re_dfastate_t *state;
 3296:     re_node_set *dests_node;
 3297:     bitset *dests_ch;
 3298: {
 3299:   reg_errcode_t err;
 3300:   const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 3301:   int i, j, k;
 3302:   int ndests; /* Number of the destinations from `state'.  */
 3303:   bitset accepts; /* Characters a node can accept.  */
 3304:   const re_node_set *cur_nodes = &state->nodes;
 3305:   bitset_empty (accepts);
 3306:   ndests = 0;
 3307: 
 3308:   /* For all the nodes belonging to `state',  */
 3309:   for (i = 0; i < cur_nodes->nelem; ++i)
 3310:     {
 3311:       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
 3312:       re_token_type_t type = node->type;
 3313:       unsigned int constraint = node->constraint;
 3314: 
 3315:       /* Enumerate all single byte character this node can accept.  */
 3316:       if (type == CHARACTER)
 3317: 	bitset_set (accepts, node->opr.c);
 3318:       else if (type == SIMPLE_BRACKET)
 3319: 	{
 3320: 	  bitset_merge (accepts, node->opr.sbcset);
 3321: 	}
 3322:       else if (type == OP_PERIOD)
 3323: 	{
 3324: 	  bitset_set_all (accepts);
 3325: 	  if (!(preg->syntax & RE_DOT_NEWLINE))
 3326: 	    bitset_clear (accepts, '\n');
 3327: 	  if (preg->syntax & RE_DOT_NOT_NULL)
 3328: 	    bitset_clear (accepts, '\0');
 3329: 	}
 3330:       else
 3331: 	continue;
 3332: 
 3333:       /* Check the `accepts' and sift the characters which are not
 3334: 	 match it the context.  */
 3335:       if (constraint)
 3336: 	{
 3337: 	  if (constraint & NEXT_WORD_CONSTRAINT)
 3338: 	    for (j = 0; j < BITSET_UINTS; ++j)
 3339: 	      accepts[j] &= dfa->word_char[j];
 3340: 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
 3341: 	    for (j = 0; j < BITSET_UINTS; ++j)
 3342: 	      accepts[j] &= ~dfa->word_char[j];
 3343: 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
 3344: 	    {
 3345: 	      int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
 3346: 	      bitset_empty (accepts);
 3347: 	      if (accepts_newline)
 3348: 		bitset_set (accepts, NEWLINE_CHAR);
 3349: 	      else
 3350: 		continue;
 3351: 	    }
 3352: 	}
 3353: 
 3354:       /* Then divide `accepts' into DFA states, or create a new
 3355: 	 state.  */
 3356:       for (j = 0; j < ndests; ++j)
 3357: 	{
 3358: 	  bitset intersec; /* Intersection sets, see below.  */
 3359: 	  bitset remains;
 3360: 	  /* Flags, see below.  */
 3361: 	  int has_intersec, not_subset, not_consumed;
 3362: 
 3363: 	  /* Optimization, skip if this state doesn't accept the character.  */
 3364: 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
 3365: 	    continue;
 3366: 
 3367: 	  /* Enumerate the intersection set of this state and `accepts'.  */
 3368: 	  has_intersec = 0;
 3369: 	  for (k = 0; k < BITSET_UINTS; ++k)
 3370: 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
 3371: 	  /* And skip if the intersection set is empty.  */
 3372: 	  if (!has_intersec)
 3373: 	    continue;
 3374: 
 3375: 	  /* Then check if this state is a subset of `accepts'.  */
 3376: 	  not_subset = not_consumed = 0;
 3377: 	  for (k = 0; k < BITSET_UINTS; ++k)
 3378: 	    {
 3379: 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
 3380: 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
 3381: 	    }
 3382: 
 3383: 	  /* If this state isn't a subset of `accepts', create a
 3384: 	     new group state, which has the `remains'. */
 3385: 	  if (not_subset)
 3386: 	    {
 3387: 	      bitset_copy (dests_ch[ndests], remains);
 3388: 	      bitset_copy (dests_ch[j], intersec);
 3389: 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
 3390: 	      if (BE (err != REG_NOERROR, 0))
 3391: 		goto error_return;
 3392: 	      ++ndests;
 3393: 	    }
 3394: 
 3395: 	  /* Put the position in the current group. */
 3396: 	  err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
 3397: 	  if (BE (err < 0, 0))
 3398: 	    goto error_return;
 3399: 
 3400: 	  /* If all characters are consumed, go to next node. */
 3401: 	  if (!not_consumed)
 3402: 	    break;
 3403: 	}
 3404:       /* Some characters remain, create a new group. */
 3405:       if (j == ndests)
 3406: 	{
 3407: 	  bitset_copy (dests_ch[ndests], accepts);
 3408: 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
 3409: 	  if (BE (err != REG_NOERROR, 0))
 3410: 	    goto error_return;
 3411: 	  ++ndests;
 3412: 	  bitset_empty (accepts);
 3413: 	}
 3414:     }
 3415:   return ndests;
 3416:  error_return:
 3417:   for (j = 0; j < ndests; ++j)
 3418:     re_node_set_free (dests_node + j);
 3419:   return -1;
 3420: }
 3421: 
 3422: #ifdef RE_ENABLE_I18N
 3423: /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
 3424:    Return the number of the bytes the node accepts.
 3425:    STR_IDX is the current index of the input string.
 3426: 
 3427:    This function handles the nodes which can accept one character, or
 3428:    one collating element like '.', '[a-z]', opposite to the other nodes
 3429:    can only accept one byte.  */
 3430: 
 3431: static int
 3432: check_node_accept_bytes (preg, node_idx, input, str_idx)
 3433:     const regex_t *preg;
 3434:     int node_idx, str_idx;
 3435:     const re_string_t *input;
 3436: {
 3437:   const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
 3438:   const re_token_t *node = dfa->nodes + node_idx;
 3439:   int elem_len = re_string_elem_size_at (input, str_idx);
 3440:   int char_len = re_string_char_size_at (input, str_idx);
 3441:   int i;
 3442: # ifdef _LIBC
 3443:   int j;
 3444:   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 3445: # endif /* _LIBC */
 3446:   if (elem_len <= 1 && char_len <= 1)
 3447:     return 0;
 3448:   if (node->type == OP_PERIOD)
 3449:     {
 3450:       /* '.' accepts any one character except the following two cases.  */
 3451:       if ((!(preg->syntax & RE_DOT_NEWLINE) &&
 3452: 	   re_string_byte_at (input, str_idx) == '\n') ||
 3453: 	  ((preg->syntax & RE_DOT_NOT_NULL) &&
 3454: 	   re_string_byte_at (input, str_idx) == '\0'))
 3455: 	return 0;
 3456:       return char_len;
 3457:     }
 3458:   else if (node->type == COMPLEX_BRACKET)
 3459:     {
 3460:       const re_charset_t *cset = node->opr.mbcset;
 3461: # ifdef _LIBC
 3462:       const unsigned char *pin = ((char *) re_string_get_buffer (input)
 3463: 				  + str_idx);
 3464: # endif /* _LIBC */
 3465:       int match_len = 0;
 3466:       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
 3467: 		    ? re_string_wchar_at (input, str_idx) : 0);
 3468: 
 3469:       /* match with multibyte character?  */
 3470:       for (i = 0; i < cset->nmbchars; ++i)
 3471: 	if (wc == cset->mbchars[i])
 3472: 	  {
 3473: 	    match_len = char_len;
 3474: 	    goto check_node_accept_bytes_match;
 3475: 	  }
 3476:       /* match with character_class?  */
 3477:       for (i = 0; i < cset->nchar_classes; ++i)
 3478: 	{
 3479: 	  wctype_t wt = cset->char_classes[i];
 3480: 	  if (__iswctype (wc, wt))
 3481: 	    {
 3482: 	      match_len = char_len;
 3483: 	      goto check_node_accept_bytes_match;
 3484: 	    }
 3485: 	}
 3486: 
 3487: # ifdef _LIBC
 3488:       if (nrules != 0)
 3489: 	{
 3490: 	  unsigned int in_collseq = 0;
 3491: 	  const int32_t *table, *indirect;
 3492: 	  const unsigned char *weights, *extra;
 3493: 	  const char *collseqwc;
 3494: 	  int32_t idx;
 3495: 	  /* This #include defines a local function!  */
 3496: #  include <locale/weight.h>
 3497: 
 3498: 	  /* match with collating_symbol?  */
 3499: 	  if (cset->ncoll_syms)
 3500: 	    extra = (const unsigned char *)
 3501: 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
 3502: 	  for (i = 0; i < cset->ncoll_syms; ++i)
 3503: 	    {
 3504: 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
 3505: 	      /* Compare the length of input collating element and
 3506: 		 the length of current collating element.  */
 3507: 	      if (*coll_sym != elem_len)
 3508: 		continue;
 3509: 	      /* Compare each bytes.  */
 3510: 	      for (j = 0; j < *coll_sym; j++)
 3511: 		if (pin[j] != coll_sym[1 + j])
 3512: 		  break;
 3513: 	      if (j == *coll_sym)
 3514: 		{
 3515: 		  /* Match if every bytes is equal.  */
 3516: 		  match_len = j;
 3517: 		  goto check_node_accept_bytes_match;
 3518: 		}
 3519: 	    }
 3520: 
 3521: 	  if (cset->nranges)
 3522: 	    {
 3523: 	      if (elem_len <= char_len)
 3524: 		{
 3525: 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
 3526: 		  in_collseq = collseq_table_lookup (collseqwc, wc);
 3527: 		}
 3528: 	      else
 3529: 		in_collseq = find_collation_sequence_value (pin, elem_len);
 3530: 	    }
 3531: 	  /* match with range expression?  */
 3532: 	  for (i = 0; i < cset->nranges; ++i)
 3533: 	    if (cset->range_starts[i] <= in_collseq
 3534: 		&& in_collseq <= cset->range_ends[i])
 3535: 	      {
 3536: 		match_len = elem_len;
 3537: 		goto check_node_accept_bytes_match;
 3538: 	      }
 3539: 
 3540: 	  /* match with equivalence_class?  */
 3541: 	  if (cset->nequiv_classes)
 3542: 	    {
 3543: 	      const unsigned char *cp = pin;
 3544: 	      table = (const int32_t *)
 3545: 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 3546: 	      weights = (const unsigned char *)
 3547: 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 3548: 	      extra = (const unsigned char *)
 3549: 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 3550: 	      indirect = (const int32_t *)
 3551: 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 3552: 	      idx = findidx (&cp);
 3553: 	      if (idx > 0)
 3554: 		for (i = 0; i < cset->nequiv_classes; ++i)
 3555: 		  {
 3556: 		    int32_t equiv_class_idx = cset->equiv_classes[i];
 3557: 		    size_t weight_len = weights[idx];
 3558: 		    if (weight_len == weights[equiv_class_idx])
 3559: 		      {
 3560: 			int cnt = 0;
 3561: 			while (cnt <= weight_len
 3562: 			       && (weights[equiv_class_idx + 1 + cnt]
 3563: 				   == weights[idx + 1 + cnt]))
 3564: 			  ++cnt;
 3565: 			if (cnt > weight_len)
 3566: 			  {
 3567: 			    match_len = elem_len;
 3568: 			    goto check_node_accept_bytes_match;
 3569: 			  }
 3570: 		      }
 3571: 		  }
 3572: 	    }
 3573: 	}
 3574:       else
 3575: # endif /* _LIBC */
 3576: 	{
 3577: 	  /* match with range expression?  */
 3578: #if __GNUC__ >= 2
 3579: 	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
 3580: #else
 3581: 	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
 3582: 	  cmp_buf[2] = wc;
 3583: #endif
 3584: 	  for (i = 0; i < cset->nranges; ++i)
 3585: 	    {
 3586: 	      cmp_buf[0] = cset->range_starts[i];
 3587: 	      cmp_buf[4] = cset->range_ends[i];
 3588: 	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
 3589: 		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
 3590: 		{
 3591: 		  match_len = char_len;
 3592: 		  goto check_node_accept_bytes_match;
 3593: 		}
 3594: 	    }
 3595: 	}
 3596:     check_node_accept_bytes_match:
 3597:       if (!cset->non_match)
 3598: 	return match_len;
 3599:       else
 3600: 	{
 3601: 	  if (match_len > 0)
 3602: 	    return 0;
 3603: 	  else
 3604: 	    return (elem_len > char_len) ? elem_len : char_len;
 3605: 	}
 3606:     }
 3607:   return 0;
 3608: }
 3609: 
 3610: # ifdef _LIBC
 3611: static unsigned int
 3612: find_collation_sequence_value (mbs, mbs_len)
 3613:     const unsigned char *mbs;
 3614:     size_t mbs_len;
 3615: {
 3616:   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 3617:   if (nrules == 0)
 3618:     {
 3619:       if (mbs_len == 1)
 3620: 	{
 3621: 	  /* No valid character.  Match it as a single byte character.  */
 3622: 	  const unsigned char *collseq = (const unsigned char *)
 3623: 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
 3624: 	  return collseq[mbs[0]];
 3625: 	}
 3626:       return UINT_MAX;
 3627:     }
 3628:   else
 3629:     {
 3630:       int32_t idx;
 3631:       const unsigned char *extra = (const unsigned char *)
 3632: 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
 3633: 
 3634:       for (idx = 0; ;)
 3635: 	{
 3636: 	  int mbs_cnt, found = 0;
 3637: 	  int32_t elem_mbs_len;
 3638: 	  /* Skip the name of collating element name.  */
 3639: 	  idx = idx + extra[idx] + 1;
 3640: 	  elem_mbs_len = extra[idx++];
 3641: 	  if (mbs_len == elem_mbs_len)
 3642: 	    {
 3643: 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
 3644: 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
 3645: 		  break;
 3646: 	      if (mbs_cnt == elem_mbs_len)
 3647: 		/* Found the entry.  */
 3648: 		found = 1;
 3649: 	    }
 3650: 	  /* Skip the byte sequence of the collating element.  */
 3651: 	  idx += elem_mbs_len;
 3652: 	  /* Adjust for the alignment.  */
 3653: 	  idx = (idx + 3) & ~3;
 3654: 	  /* Skip the collation sequence value.  */
 3655: 	  idx += sizeof (uint32_t);
 3656: 	  /* Skip the wide char sequence of the collating element.  */
 3657: 	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
 3658: 	  /* If we found the entry, return the sequence value.  */
 3659: 	  if (found)
 3660: 	    return *(uint32_t *) (extra + idx);
 3661: 	  /* Skip the collation sequence value.  */
 3662: 	  idx += sizeof (uint32_t);
 3663: 	}
 3664:     }
 3665: }
 3666: # endif /* _LIBC */
 3667: #endif /* RE_ENABLE_I18N */
 3668: 
 3669: /* Check whether the node accepts the byte which is IDX-th
 3670:    byte of the INPUT.  */
 3671: 
 3672: static int
 3673: check_node_accept (preg, node, mctx, idx)
 3674:     const regex_t *preg;
 3675:     const re_token_t *node;
 3676:     const re_match_context_t *mctx;
 3677:     int idx;
 3678: {
 3679:   unsigned char ch;
 3680:   if (node->constraint)
 3681:     {
 3682:       /* The node has constraints.  Check whether the current context
 3683: 	 satisfies the constraints.  */
 3684:       unsigned int context = re_string_context_at (mctx->input, idx,
 3685: 						   mctx->eflags,
 3686: 						   preg->newline_anchor);
 3687:       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
 3688: 	return 0;
 3689:     }
 3690:   ch = re_string_byte_at (mctx->input, idx);
 3691:   if (node->type == CHARACTER)
 3692:     return node->opr.c == ch;
 3693:   else if (node->type == SIMPLE_BRACKET)
 3694:     return bitset_contain (node->opr.sbcset, ch);
 3695:   else if (node->type == OP_PERIOD)
 3696:     return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
 3697: 	     || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
 3698:   else
 3699:     return 0;
 3700: }
 3701: 
 3702: /* Extend the buffers, if the buffers have run out.  */
 3703: 
 3704: static reg_errcode_t
 3705: extend_buffers (mctx)
 3706:      re_match_context_t *mctx;
 3707: {
 3708:   reg_errcode_t ret;
 3709:   re_string_t *pstr = mctx->input;
 3710: 
 3711:   /* Double the lengthes of the buffers.  */
 3712:   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
 3713:   if (BE (ret != REG_NOERROR, 0))
 3714:     return ret;
 3715: 
 3716:   if (mctx->state_log != NULL)
 3717:     {
 3718:       /* And double the length of state_log.  */
 3719:       re_dfastate_t **new_array;
 3720:       new_array = re_realloc (mctx->state_log, re_dfastate_t *,
 3721: 			      pstr->bufs_len * 2);
 3722:       if (BE (new_array == NULL, 0))
 3723: 	return REG_ESPACE;
 3724:       mctx->state_log = new_array;
 3725:     }
 3726: 
 3727:   /* Then reconstruct the buffers.  */
 3728:   if (pstr->icase)
 3729:     {
 3730: #ifdef RE_ENABLE_I18N
 3731:       if (MB_CUR_MAX > 1)
 3732: 	build_wcs_upper_buffer (pstr);
 3733:       else
 3734: #endif /* RE_ENABLE_I18N  */
 3735: 	build_upper_buffer (pstr);
 3736:     }
 3737:   else
 3738:     {
 3739: #ifdef RE_ENABLE_I18N
 3740:       if (MB_CUR_MAX > 1)
 3741: 	build_wcs_buffer (pstr);
 3742:       else
 3743: #endif /* RE_ENABLE_I18N  */
 3744: 	{
 3745: 	  if (pstr->trans != NULL)
 3746: 	    re_string_translate_buffer (pstr);
 3747: 	  else
 3748: 	    pstr->valid_len = pstr->bufs_len;
 3749: 	}
 3750:     }
 3751:   return REG_NOERROR;
 3752: }
 3753: 
 3754: 
 3755: /* Functions for matching context.  */
 3756: 
 3757: /* Initialize MCTX.  */
 3758: 
 3759: static reg_errcode_t
 3760: match_ctx_init (mctx, eflags, input, n)
 3761:     re_match_context_t *mctx;
 3762:     int eflags, n;
 3763:     re_string_t *input;
 3764: {
 3765:   mctx->eflags = eflags;
 3766:   mctx->input = input;
 3767:   mctx->match_last = -1;
 3768:   if (n > 0)
 3769:     {
 3770:       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
 3771:       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
 3772:       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
 3773: 	return REG_ESPACE;
 3774:     }
 3775:   else
 3776:     mctx->bkref_ents = NULL;
 3777:   mctx->nbkref_ents = 0;
 3778:   mctx->abkref_ents = n;
 3779:   mctx->max_mb_elem_len = 1;
 3780:   mctx->nsub_tops = 0;
 3781:   mctx->asub_tops = n;
 3782:   return REG_NOERROR;
 3783: }
 3784: 
 3785: /* Clean the entries which depend on the current input in MCTX.
 3786:    This function must be invoked when the matcher changes the start index
 3787:    of the input, or changes the input string.  */
 3788: 
 3789: static void
 3790: match_ctx_clean (mctx)
 3791:     re_match_context_t *mctx;
 3792: {
 3793:   match_ctx_free_subtops (mctx);
 3794:   mctx->nsub_tops = 0;
 3795:   mctx->nbkref_ents = 0;
 3796: }
 3797: 
 3798: /* Free all the memory associated with MCTX.  */
 3799: 
 3800: static void
 3801: match_ctx_free (mctx)
 3802:     re_match_context_t *mctx;
 3803: {
 3804:   match_ctx_free_subtops (mctx);
 3805:   re_free (mctx->sub_tops);
 3806:   re_free (mctx->bkref_ents);
 3807: }
 3808: 
 3809: /* Free all the memory associated with MCTX->SUB_TOPS.  */
 3810: 
 3811: static void
 3812: match_ctx_free_subtops (mctx)
 3813:      re_match_context_t *mctx;
 3814: {
 3815:   int st_idx;
 3816:   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
 3817:     {
 3818:       int sl_idx;
 3819:       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
 3820:       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
 3821: 	{
 3822: 	  re_sub_match_last_t *last = top->lasts[sl_idx];
 3823: 	  re_free (last->path.array);
 3824: 	  re_free (last);
 3825: 	}
 3826:       re_free (top->lasts);
 3827:       if (top->path)
 3828: 	{
 3829: 	  re_free (top->path->array);
 3830: 	  re_free (top->path);
 3831: 	}
 3832:       free (top);
 3833:     }
 3834: }
 3835: 
 3836: /* Add a new backreference entry to MCTX.
 3837:    Note that we assume that caller never call this function with duplicate
 3838:    entry, and call with STR_IDX which isn't smaller than any existing entry.
 3839: */
 3840: 
 3841: static reg_errcode_t
 3842: match_ctx_add_entry (mctx, node, str_idx, from, to)
 3843:      re_match_context_t *mctx;
 3844:      int node, str_idx, from, to;
 3845: {
 3846:   if (mctx->nbkref_ents >= mctx->abkref_ents)
 3847:     {
 3848:       struct re_backref_cache_entry* new_entry;
 3849:       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
 3850: 			      mctx->abkref_ents * 2);
 3851:       if (BE (new_entry == NULL, 0))
 3852: 	{
 3853: 	  re_free (mctx->bkref_ents);
 3854: 	  return REG_ESPACE;
 3855: 	}
 3856:       mctx->bkref_ents = new_entry;
 3857:       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
 3858: 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
 3859:       mctx->abkref_ents *= 2;
 3860:     }
 3861:   mctx->bkref_ents[mctx->nbkref_ents].node = node;
 3862:   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
 3863:   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
 3864:   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
 3865:   mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
 3866:   if (mctx->max_mb_elem_len < to - from)
 3867:     mctx->max_mb_elem_len = to - from;
 3868:   return REG_NOERROR;
 3869: }
 3870: 
 3871: /* Search for the first entry which has the same str_idx.
 3872:    Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
 3873: 
 3874: static int
 3875: search_cur_bkref_entry (mctx, str_idx)
 3876:      re_match_context_t *mctx;
 3877:      int str_idx;
 3878: {
 3879:   int left, right, mid;
 3880:   right = mctx->nbkref_ents;
 3881:   for (left = 0; left < right;)
 3882:     {
 3883:       mid = (left + right) / 2;
 3884:       if (mctx->bkref_ents[mid].str_idx < str_idx)
 3885: 	left = mid + 1;
 3886:       else
 3887: 	right = mid;
 3888:     }
 3889:   return left;
 3890: }
 3891: 
 3892: static void
 3893: match_ctx_clear_flag (mctx)
 3894:      re_match_context_t *mctx;
 3895: {
 3896:   int i;
 3897:   for (i = 0; i < mctx->nbkref_ents; ++i)
 3898:     {
 3899:       mctx->bkref_ents[i].flag = 0;
 3900:     }
 3901: }
 3902: 
 3903: /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
 3904:    at STR_IDX.  */
 3905: 
 3906: static reg_errcode_t
 3907: match_ctx_add_subtop (mctx, node, str_idx)
 3908:      re_match_context_t *mctx;
 3909:      int node, str_idx;
 3910: {
 3911: #ifdef DEBUG
 3912:   assert (mctx->sub_tops != NULL);
 3913:   assert (mctx->asub_tops > 0);
 3914: #endif
 3915:   if (mctx->nsub_tops == mctx->asub_tops)
 3916:     {
 3917:       re_sub_match_top_t **new_array;
 3918:       mctx->asub_tops *= 2;
 3919:       new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *,
 3920: 			      mctx->asub_tops);
 3921:       if (BE (new_array == NULL, 0))
 3922: 	return REG_ESPACE;
 3923:       mctx->sub_tops = new_array;
 3924:     }
 3925:   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
 3926:   if (mctx->sub_tops[mctx->nsub_tops] == NULL)
 3927:     return REG_ESPACE;
 3928:   mctx->sub_tops[mctx->nsub_tops]->node = node;
 3929:   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
 3930:   return REG_NOERROR;
 3931: }
 3932: 
 3933: /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
 3934:    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
 3935: 
 3936: static re_sub_match_last_t *
 3937: match_ctx_add_sublast (subtop, node, str_idx)
 3938:      re_sub_match_top_t *subtop;
 3939:      int node, str_idx;
 3940: {
 3941:   re_sub_match_last_t *new_entry;
 3942:   if (subtop->nlasts == subtop->alasts)
 3943:     {
 3944:       re_sub_match_last_t **new_array;
 3945:       subtop->alasts = 2 * subtop->alasts + 1;
 3946:       new_array = re_realloc (subtop->lasts, re_sub_match_last_t *,
 3947: 			      subtop->alasts);
 3948:       if (BE (new_array == NULL, 0))
 3949: 	return NULL;
 3950:       subtop->lasts = new_array;
 3951:     }
 3952:   new_entry = calloc (1, sizeof (re_sub_match_last_t));
 3953:   if (BE (new_entry == NULL, 0))
 3954:     return NULL;
 3955:   subtop->lasts[subtop->nlasts] = new_entry;
 3956:   new_entry->node = node;
 3957:   new_entry->str_idx = str_idx;
 3958:   ++subtop->nlasts;
 3959:   return new_entry;
 3960: }
 3961: 
 3962: static void
 3963: sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
 3964: 	       check_subexp)
 3965:     re_sift_context_t *sctx;
 3966:     re_dfastate_t **sifted_sts, **limited_sts;
 3967:     int last_node, last_str_idx, check_subexp;
 3968: {
 3969:   sctx->sifted_states = sifted_sts;
 3970:   sctx->limited_states = limited_sts;
 3971:   sctx->last_node = last_node;
 3972:   sctx->last_str_idx = last_str_idx;
 3973:   sctx->check_subexp = check_subexp;
 3974:   sctx->cur_bkref = -1;
 3975:   sctx->cls_subexp_idx = -1;
 3976:   re_node_set_init_empty (&sctx->limits);
 3977: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>