Annotation of embedaddon/smartmontools/regex/regexec.c, revision 1.1
1.1 ! misho 1: /* Extended regular expression matching and search library.
! 2: Copyright (C) 2002, 2003 Free Software Foundation, Inc.
! 3: This file is part of the GNU C Library.
! 4: Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
! 5:
! 6: The GNU C Library is free software; you can redistribute it and/or
! 7: modify it under the terms of the GNU Lesser General Public
! 8: License as published by the Free Software Foundation; either
! 9: version 2.1 of the License, or (at your option) any later version.
! 10:
! 11: The GNU C Library is distributed in the hope that it will be useful,
! 12: but WITHOUT ANY WARRANTY; without even the implied warranty of
! 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! 14: Lesser General Public License for more details.
! 15:
! 16: You should have received a copy of the GNU Lesser General Public
! 17: License along with the GNU C Library; if not, write to the Free
! 18: Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
! 19: 02111-1307 USA. */
! 20:
! 21: static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
! 22: re_string_t *input, int n);
! 23: static void match_ctx_clean (re_match_context_t *mctx);
! 24: static void match_ctx_free (re_match_context_t *cache);
! 25: static void match_ctx_free_subtops (re_match_context_t *mctx);
! 26: static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
! 27: int str_idx, int from, int to);
! 28: static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
! 29: static void match_ctx_clear_flag (re_match_context_t *mctx);
! 30: static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
! 31: int str_idx);
! 32: static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
! 33: int node, int str_idx);
! 34: static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
! 35: re_dfastate_t **limited_sts, int last_node,
! 36: int last_str_idx, int check_subexp);
! 37: static reg_errcode_t re_search_internal (const regex_t *preg,
! 38: const char *string, int length,
! 39: int start, int range, int stop,
! 40: size_t nmatch, regmatch_t pmatch[],
! 41: int eflags);
! 42: static int re_search_2_stub (struct re_pattern_buffer *bufp,
! 43: const char *string1, int length1,
! 44: const char *string2, int length2,
! 45: int start, int range, struct re_registers *regs,
! 46: int stop, int ret_len);
! 47: static int re_search_stub (struct re_pattern_buffer *bufp,
! 48: const char *string, int length, int start,
! 49: int range, int stop, struct re_registers *regs,
! 50: int ret_len);
! 51: static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
! 52: int nregs, int regs_allocated);
! 53: static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
! 54: const regex_t *preg,
! 55: const re_match_context_t *mctx,
! 56: int idx);
! 57: static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
! 58: re_match_context_t *mctx);
! 59: static int check_matching (const regex_t *preg, re_match_context_t *mctx,
! 60: int fl_search, int fl_longest_match);
! 61: static int check_halt_node_context (const re_dfa_t *dfa, int node,
! 62: unsigned int context);
! 63: static int check_halt_state_context (const regex_t *preg,
! 64: const re_dfastate_t *state,
! 65: const re_match_context_t *mctx, int idx);
! 66: static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
! 67: int cur_idx, int nmatch);
! 68: static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
! 69: const re_match_context_t *mctx,
! 70: int *pidx, int node, re_node_set *eps_via_nodes,
! 71: struct re_fail_stack_t *fs);
! 72: static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
! 73: int str_idx, int *dests, int nregs,
! 74: regmatch_t *regs,
! 75: re_node_set *eps_via_nodes);
! 76: static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
! 77: regmatch_t *regs, re_node_set *eps_via_nodes);
! 78: static reg_errcode_t set_regs (const regex_t *preg,
! 79: const re_match_context_t *mctx,
! 80: size_t nmatch, regmatch_t *pmatch,
! 81: int fl_backtrack);
! 82: static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
! 83:
! 84: #ifdef RE_ENABLE_I18N
! 85: static int sift_states_iter_mb (const regex_t *preg,
! 86: const re_match_context_t *mctx,
! 87: re_sift_context_t *sctx,
! 88: int node_idx, int str_idx, int max_str_idx);
! 89: #endif /* RE_ENABLE_I18N */
! 90: static reg_errcode_t sift_states_backward (const regex_t *preg,
! 91: re_match_context_t *mctx,
! 92: re_sift_context_t *sctx);
! 93: static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
! 94: re_match_context_t *mctx,
! 95: re_sift_context_t *sctx,
! 96: int str_idx,
! 97: re_node_set *dest_nodes);
! 98: static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
! 99: re_node_set *dest_nodes,
! 100: const re_node_set *candidates);
! 101: static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
! 102: re_node_set *dest_nodes,
! 103: const re_node_set *and_nodes);
! 104: static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
! 105: re_match_context_t *mctx, int dst_node,
! 106: int dst_idx, int src_node, int src_idx);
! 107: static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
! 108: int limit, re_node_set *eclosures,
! 109: int subexp_idx, int node, int str_idx);
! 110: static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
! 111: re_node_set *dest_nodes,
! 112: const re_node_set *candidates,
! 113: re_node_set *limits,
! 114: struct re_backref_cache_entry *bkref_ents,
! 115: int str_idx);
! 116: static reg_errcode_t sift_states_bkref (const regex_t *preg,
! 117: re_match_context_t *mctx,
! 118: re_sift_context_t *sctx,
! 119: int str_idx, re_node_set *dest_nodes);
! 120: static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
! 121: int next_state_log_idx);
! 122: static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
! 123: re_dfastate_t **src, int num);
! 124: static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
! 125: re_match_context_t *mctx,
! 126: re_dfastate_t *state, int fl_search);
! 127: static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
! 128: re_match_context_t *mctx,
! 129: re_node_set *cur_nodes,
! 130: int str_idx);
! 131: static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
! 132: re_dfastate_t *pstate,
! 133: int fl_search,
! 134: re_match_context_t *mctx);
! 135: #ifdef RE_ENABLE_I18N
! 136: static reg_errcode_t transit_state_mb (const regex_t *preg,
! 137: re_dfastate_t *pstate,
! 138: re_match_context_t *mctx);
! 139: #endif /* RE_ENABLE_I18N */
! 140: static reg_errcode_t transit_state_bkref (const regex_t *preg,
! 141: re_node_set *nodes,
! 142: re_match_context_t *mctx);
! 143: static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
! 144: int bkref_node, int bkref_str_idx);
! 145: static reg_errcode_t get_subexp_sub (const regex_t *preg,
! 146: re_match_context_t *mctx,
! 147: re_sub_match_top_t *sub_top,
! 148: re_sub_match_last_t *sub_last,
! 149: int bkref_node, int bkref_str);
! 150: static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes,
! 151: int subexp_idx, int fl_open);
! 152: static reg_errcode_t check_arrival (const regex_t *preg,
! 153: re_match_context_t *mctx,
! 154: state_array_t *path, int top_node,
! 155: int top_str, int last_node, int last_str,
! 156: int fl_open);
! 157: static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
! 158: re_dfa_t *dfa,
! 159: re_match_context_t *mctx,
! 160: int str_idx,
! 161: re_node_set *cur_nodes,
! 162: re_node_set *next_nodes);
! 163: static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
! 164: re_node_set *cur_nodes,
! 165: int ex_subexp, int fl_open);
! 166: static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
! 167: re_node_set *dst_nodes,
! 168: int target, int ex_subexp,
! 169: int fl_open);
! 170: static reg_errcode_t expand_bkref_cache (const regex_t *preg,
! 171: re_match_context_t *mctx,
! 172: re_node_set *cur_nodes, int cur_str,
! 173: int last_str, int subexp_num,
! 174: int fl_open);
! 175: static re_dfastate_t **build_trtable (const regex_t *dfa,
! 176: const re_dfastate_t *state,
! 177: int fl_search);
! 178: #ifdef RE_ENABLE_I18N
! 179: static int check_node_accept_bytes (const regex_t *preg, int node_idx,
! 180: const re_string_t *input, int idx);
! 181: # ifdef _LIBC
! 182: static unsigned int find_collation_sequence_value (const unsigned char *mbs,
! 183: size_t name_len);
! 184: # endif /* _LIBC */
! 185: #endif /* RE_ENABLE_I18N */
! 186: static int group_nodes_into_DFAstates (const regex_t *dfa,
! 187: const re_dfastate_t *state,
! 188: re_node_set *states_node,
! 189: bitset *states_ch);
! 190: static int check_node_accept (const regex_t *preg, const re_token_t *node,
! 191: const re_match_context_t *mctx, int idx);
! 192: static reg_errcode_t extend_buffers (re_match_context_t *mctx);
! 193:
! 194: /* Entry point for POSIX code. */
! 195:
! 196: /* regexec searches for a given pattern, specified by PREG, in the
! 197: string STRING.
! 198:
! 199: If NMATCH is zero or REG_NOSUB was set in the cflags argument to
! 200: `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
! 201: least NMATCH elements, and we set them to the offsets of the
! 202: corresponding matched substrings.
! 203:
! 204: EFLAGS specifies `execution flags' which affect matching: if
! 205: REG_NOTBOL is set, then ^ does not match at the beginning of the
! 206: string; if REG_NOTEOL is set, then $ does not match at the end.
! 207:
! 208: We return 0 if we find a match and REG_NOMATCH if not. */
! 209:
! 210: int
! 211: regexec (preg, string, nmatch, pmatch, eflags)
! 212: const regex_t *__restrict preg;
! 213: const char *__restrict string;
! 214: size_t nmatch;
! 215: regmatch_t pmatch[];
! 216: int eflags;
! 217: {
! 218: reg_errcode_t err;
! 219: int length = strlen (string);
! 220: if (preg->no_sub)
! 221: err = re_search_internal (preg, string, length, 0, length, length, 0,
! 222: NULL, eflags);
! 223: else
! 224: err = re_search_internal (preg, string, length, 0, length, length, nmatch,
! 225: pmatch, eflags);
! 226: return err != REG_NOERROR;
! 227: }
! 228: #ifdef _LIBC
! 229: weak_alias (__regexec, regexec)
! 230: #endif
! 231:
! 232: /* Entry points for GNU code. */
! 233:
! 234: /* re_match, re_search, re_match_2, re_search_2
! 235:
! 236: The former two functions operate on STRING with length LENGTH,
! 237: while the later two operate on concatenation of STRING1 and STRING2
! 238: with lengths LENGTH1 and LENGTH2, respectively.
! 239:
! 240: re_match() matches the compiled pattern in BUFP against the string,
! 241: starting at index START.
! 242:
! 243: re_search() first tries matching at index START, then it tries to match
! 244: starting from index START + 1, and so on. The last start position tried
! 245: is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
! 246: way as re_match().)
! 247:
! 248: The parameter STOP of re_{match,search}_2 specifies that no match exceeding
! 249: the first STOP characters of the concatenation of the strings should be
! 250: concerned.
! 251:
! 252: If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
! 253: and all groups is stroed in REGS. (For the "_2" variants, the offsets are
! 254: computed relative to the concatenation, not relative to the individual
! 255: strings.)
! 256:
! 257: On success, re_match* functions return the length of the match, re_search*
! 258: return the position of the start of the match. Return value -1 means no
! 259: match was found and -2 indicates an internal error. */
! 260:
! 261: int
! 262: re_match (bufp, string, length, start, regs)
! 263: struct re_pattern_buffer *bufp;
! 264: const char *string;
! 265: int length, start;
! 266: struct re_registers *regs;
! 267: {
! 268: return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
! 269: }
! 270: #ifdef _LIBC
! 271: weak_alias (__re_match, re_match)
! 272: #endif
! 273:
! 274: int
! 275: re_search (bufp, string, length, start, range, regs)
! 276: struct re_pattern_buffer *bufp;
! 277: const char *string;
! 278: int length, start, range;
! 279: struct re_registers *regs;
! 280: {
! 281: return re_search_stub (bufp, string, length, start, range, length, regs, 0);
! 282: }
! 283: #ifdef _LIBC
! 284: weak_alias (__re_search, re_search)
! 285: #endif
! 286:
! 287: int
! 288: re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
! 289: struct re_pattern_buffer *bufp;
! 290: const char *string1, *string2;
! 291: int length1, length2, start, stop;
! 292: struct re_registers *regs;
! 293: {
! 294: return re_search_2_stub (bufp, string1, length1, string2, length2,
! 295: start, 0, regs, stop, 1);
! 296: }
! 297: #ifdef _LIBC
! 298: weak_alias (__re_match_2, re_match_2)
! 299: #endif
! 300:
! 301: int
! 302: re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
! 303: struct re_pattern_buffer *bufp;
! 304: const char *string1, *string2;
! 305: int length1, length2, start, range, stop;
! 306: struct re_registers *regs;
! 307: {
! 308: return re_search_2_stub (bufp, string1, length1, string2, length2,
! 309: start, range, regs, stop, 0);
! 310: }
! 311: #ifdef _LIBC
! 312: weak_alias (__re_search_2, re_search_2)
! 313: #endif
! 314:
! 315: static int
! 316: re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
! 317: stop, ret_len)
! 318: struct re_pattern_buffer *bufp;
! 319: const char *string1, *string2;
! 320: int length1, length2, start, range, stop, ret_len;
! 321: struct re_registers *regs;
! 322: {
! 323: const char *str;
! 324: int rval;
! 325: int len = length1 + length2;
! 326: int free_str = 0;
! 327:
! 328: if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
! 329: return -2;
! 330:
! 331: /* Concatenate the strings. */
! 332: if (length2 > 0)
! 333: if (length1 > 0)
! 334: {
! 335: char *s = re_malloc (char, len);
! 336:
! 337: if (BE (s == NULL, 0))
! 338: return -2;
! 339: memcpy (s, string1, length1);
! 340: memcpy (s + length1, string2, length2);
! 341: str = s;
! 342: free_str = 1;
! 343: }
! 344: else
! 345: str = string2;
! 346: else
! 347: str = string1;
! 348:
! 349: rval = re_search_stub (bufp, str, len, start, range, stop, regs,
! 350: ret_len);
! 351: if (free_str)
! 352: re_free ((char *) str);
! 353: return rval;
! 354: }
! 355:
! 356: /* The parameters have the same meaning as those of re_search.
! 357: Additional parameters:
! 358: If RET_LEN is nonzero the length of the match is returned (re_match style);
! 359: otherwise the position of the match is returned. */
! 360:
! 361: static int
! 362: re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
! 363: struct re_pattern_buffer *bufp;
! 364: const char *string;
! 365: int length, start, range, stop, ret_len;
! 366: struct re_registers *regs;
! 367: {
! 368: reg_errcode_t result;
! 369: regmatch_t *pmatch;
! 370: int nregs, rval;
! 371: int eflags = 0;
! 372:
! 373: /* Check for out-of-range. */
! 374: if (BE (start < 0 || start > length, 0))
! 375: return -1;
! 376: if (BE (start + range > length, 0))
! 377: range = length - start;
! 378: else if (BE (start + range < 0, 0))
! 379: range = -start;
! 380:
! 381: eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
! 382: eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
! 383:
! 384: /* Compile fastmap if we haven't yet. */
! 385: if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
! 386: re_compile_fastmap (bufp);
! 387:
! 388: if (BE (bufp->no_sub, 0))
! 389: regs = NULL;
! 390:
! 391: /* We need at least 1 register. */
! 392: if (regs == NULL)
! 393: nregs = 1;
! 394: else if (BE (bufp->regs_allocated == REGS_FIXED &&
! 395: regs->num_regs < bufp->re_nsub + 1, 0))
! 396: {
! 397: nregs = regs->num_regs;
! 398: if (BE (nregs < 1, 0))
! 399: {
! 400: /* Nothing can be copied to regs. */
! 401: regs = NULL;
! 402: nregs = 1;
! 403: }
! 404: }
! 405: else
! 406: nregs = bufp->re_nsub + 1;
! 407: pmatch = re_malloc (regmatch_t, nregs);
! 408: if (BE (pmatch == NULL, 0))
! 409: return -2;
! 410:
! 411: result = re_search_internal (bufp, string, length, start, range, stop,
! 412: nregs, pmatch, eflags);
! 413:
! 414: rval = 0;
! 415:
! 416: /* I hope we needn't fill ther regs with -1's when no match was found. */
! 417: if (result != REG_NOERROR)
! 418: rval = -1;
! 419: else if (regs != NULL)
! 420: {
! 421: /* If caller wants register contents data back, copy them. */
! 422: bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
! 423: bufp->regs_allocated);
! 424: if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
! 425: rval = -2;
! 426: }
! 427:
! 428: if (BE (rval == 0, 1))
! 429: {
! 430: if (ret_len)
! 431: {
! 432: assert (pmatch[0].rm_so == start);
! 433: rval = pmatch[0].rm_eo - start;
! 434: }
! 435: else
! 436: rval = pmatch[0].rm_so;
! 437: }
! 438: re_free (pmatch);
! 439: return rval;
! 440: }
! 441:
! 442: static unsigned
! 443: re_copy_regs (regs, pmatch, nregs, regs_allocated)
! 444: struct re_registers *regs;
! 445: regmatch_t *pmatch;
! 446: int nregs, regs_allocated;
! 447: {
! 448: int rval = REGS_REALLOCATE;
! 449: int i;
! 450: int need_regs = nregs + 1;
! 451: /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
! 452: uses. */
! 453:
! 454: /* Have the register data arrays been allocated? */
! 455: if (regs_allocated == REGS_UNALLOCATED)
! 456: { /* No. So allocate them with malloc. */
! 457: regs->start = re_malloc (regoff_t, need_regs);
! 458: if (BE (regs->start == NULL, 0))
! 459: return REGS_UNALLOCATED;
! 460: regs->end = re_malloc (regoff_t, need_regs);
! 461: if (BE (regs->end == NULL, 0))
! 462: {
! 463: re_free (regs->start);
! 464: return REGS_UNALLOCATED;
! 465: }
! 466: regs->num_regs = need_regs;
! 467: }
! 468: else if (regs_allocated == REGS_REALLOCATE)
! 469: { /* Yes. If we need more elements than were already
! 470: allocated, reallocate them. If we need fewer, just
! 471: leave it alone. */
! 472: if (need_regs > regs->num_regs)
! 473: {
! 474: regs->start = re_realloc (regs->start, regoff_t, need_regs);
! 475: if (BE (regs->start == NULL, 0))
! 476: {
! 477: if (regs->end != NULL)
! 478: re_free (regs->end);
! 479: return REGS_UNALLOCATED;
! 480: }
! 481: regs->end = re_realloc (regs->end, regoff_t, need_regs);
! 482: if (BE (regs->end == NULL, 0))
! 483: {
! 484: re_free (regs->start);
! 485: return REGS_UNALLOCATED;
! 486: }
! 487: regs->num_regs = need_regs;
! 488: }
! 489: }
! 490: else
! 491: {
! 492: assert (regs_allocated == REGS_FIXED);
! 493: /* This function may not be called with REGS_FIXED and nregs too big. */
! 494: assert (regs->num_regs >= nregs);
! 495: rval = REGS_FIXED;
! 496: }
! 497:
! 498: /* Copy the regs. */
! 499: for (i = 0; i < nregs; ++i)
! 500: {
! 501: regs->start[i] = pmatch[i].rm_so;
! 502: regs->end[i] = pmatch[i].rm_eo;
! 503: }
! 504: for ( ; i < regs->num_regs; ++i)
! 505: regs->start[i] = regs->end[i] = -1;
! 506:
! 507: return rval;
! 508: }
! 509:
! 510: /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
! 511: ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
! 512: this memory for recording register information. STARTS and ENDS
! 513: must be allocated using the malloc library routine, and must each
! 514: be at least NUM_REGS * sizeof (regoff_t) bytes long.
! 515:
! 516: If NUM_REGS == 0, then subsequent matches should allocate their own
! 517: register data.
! 518:
! 519: Unless this function is called, the first search or match using
! 520: PATTERN_BUFFER will allocate its own register data, without
! 521: freeing the old data. */
! 522:
! 523: void
! 524: re_set_registers (bufp, regs, num_regs, starts, ends)
! 525: struct re_pattern_buffer *bufp;
! 526: struct re_registers *regs;
! 527: unsigned num_regs;
! 528: regoff_t *starts, *ends;
! 529: {
! 530: if (num_regs)
! 531: {
! 532: bufp->regs_allocated = REGS_REALLOCATE;
! 533: regs->num_regs = num_regs;
! 534: regs->start = starts;
! 535: regs->end = ends;
! 536: }
! 537: else
! 538: {
! 539: bufp->regs_allocated = REGS_UNALLOCATED;
! 540: regs->num_regs = 0;
! 541: regs->start = regs->end = (regoff_t *) 0;
! 542: }
! 543: }
! 544: #ifdef _LIBC
! 545: weak_alias (__re_set_registers, re_set_registers)
! 546: #endif
! 547:
! 548: /* Entry points compatible with 4.2 BSD regex library. We don't define
! 549: them unless specifically requested. */
! 550:
! 551: #if defined _REGEX_RE_COMP || defined _LIBC
! 552: int
! 553: # ifdef _LIBC
! 554: weak_function
! 555: # endif
! 556: re_exec (s)
! 557: const char *s;
! 558: {
! 559: return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
! 560: }
! 561: #endif /* _REGEX_RE_COMP */
! 562:
! 563: static re_node_set empty_set;
! 564:
! 565: /* Internal entry point. */
! 566:
! 567: /* Searches for a compiled pattern PREG in the string STRING, whose
! 568: length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
! 569: mingings with regexec. START, and RANGE have the same meanings
! 570: with re_search.
! 571: Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
! 572: otherwise return the error code.
! 573: Note: We assume front end functions already check ranges.
! 574: (START + RANGE >= 0 && START + RANGE <= LENGTH) */
! 575:
! 576: static reg_errcode_t
! 577: re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
! 578: eflags)
! 579: const regex_t *preg;
! 580: const char *string;
! 581: int length, start, range, stop, eflags;
! 582: size_t nmatch;
! 583: regmatch_t pmatch[];
! 584: {
! 585: reg_errcode_t err;
! 586: re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
! 587: re_string_t input;
! 588: int left_lim, right_lim, incr;
! 589: int fl_longest_match, match_first, match_last = -1;
! 590: int fast_translate, sb;
! 591: re_match_context_t mctx;
! 592: char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
! 593: && range && !preg->can_be_null) ? preg->fastmap : NULL);
! 594:
! 595: /* Check if the DFA haven't been compiled. */
! 596: if (BE (preg->used == 0 || dfa->init_state == NULL
! 597: || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
! 598: || dfa->init_state_begbuf == NULL, 0))
! 599: return REG_NOMATCH;
! 600:
! 601: re_node_set_init_empty (&empty_set);
! 602: memset (&mctx, '\0', sizeof (re_match_context_t));
! 603:
! 604: /* We must check the longest matching, if nmatch > 0. */
! 605: fl_longest_match = (nmatch != 0 || dfa->nbackref);
! 606:
! 607: err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
! 608: preg->translate, preg->syntax & RE_ICASE);
! 609: if (BE (err != REG_NOERROR, 0))
! 610: goto free_return;
! 611: input.stop = stop;
! 612:
! 613: err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
! 614: if (BE (err != REG_NOERROR, 0))
! 615: goto free_return;
! 616:
! 617: /* We will log all the DFA states through which the dfa pass,
! 618: if nmatch > 1, or this dfa has "multibyte node", which is a
! 619: back-reference or a node which can accept multibyte character or
! 620: multi character collating element. */
! 621: if (nmatch > 1 || dfa->has_mb_node)
! 622: {
! 623: mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
! 624: if (BE (mctx.state_log == NULL, 0))
! 625: {
! 626: err = REG_ESPACE;
! 627: goto free_return;
! 628: }
! 629: }
! 630: else
! 631: mctx.state_log = NULL;
! 632:
! 633: #ifdef DEBUG
! 634: /* We assume front-end functions already check them. */
! 635: assert (start + range >= 0 && start + range <= length);
! 636: #endif
! 637:
! 638: match_first = start;
! 639: input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
! 640: : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
! 641:
! 642: /* Check incrementally whether of not the input string match. */
! 643: incr = (range < 0) ? -1 : 1;
! 644: left_lim = (range < 0) ? start + range : start;
! 645: right_lim = (range < 0) ? start : start + range;
! 646: sb = MB_CUR_MAX == 1;
! 647: fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
! 648:
! 649: for (;;)
! 650: {
! 651: /* At first get the current byte from input string. */
! 652: if (fastmap)
! 653: {
! 654: if (BE (fast_translate, 1))
! 655: {
! 656: unsigned RE_TRANSLATE_TYPE t
! 657: = (unsigned RE_TRANSLATE_TYPE) preg->translate;
! 658: if (BE (range >= 0, 1))
! 659: {
! 660: if (BE (t != NULL, 0))
! 661: {
! 662: while (BE (match_first < right_lim, 1)
! 663: && !fastmap[t[(unsigned char) string[match_first]]])
! 664: ++match_first;
! 665: }
! 666: else
! 667: {
! 668: while (BE (match_first < right_lim, 1)
! 669: && !fastmap[(unsigned char) string[match_first]])
! 670: ++match_first;
! 671: }
! 672: if (BE (match_first == right_lim, 0))
! 673: {
! 674: int ch = match_first >= length
! 675: ? 0 : (unsigned char) string[match_first];
! 676: if (!fastmap[t ? t[ch] : ch])
! 677: break;
! 678: }
! 679: }
! 680: else
! 681: {
! 682: while (match_first >= left_lim)
! 683: {
! 684: int ch = match_first >= length
! 685: ? 0 : (unsigned char) string[match_first];
! 686: if (fastmap[t ? t[ch] : ch])
! 687: break;
! 688: --match_first;
! 689: }
! 690: if (match_first < left_lim)
! 691: break;
! 692: }
! 693: }
! 694: else
! 695: {
! 696: int ch;
! 697:
! 698: do
! 699: {
! 700: /* In this case, we can't determine easily the current byte,
! 701: since it might be a component byte of a multibyte
! 702: character. Then we use the constructed buffer
! 703: instead. */
! 704: /* If MATCH_FIRST is out of the valid range, reconstruct the
! 705: buffers. */
! 706: if (input.raw_mbs_idx + input.valid_len <= match_first
! 707: || match_first < input.raw_mbs_idx)
! 708: {
! 709: err = re_string_reconstruct (&input, match_first, eflags,
! 710: preg->newline_anchor);
! 711: if (BE (err != REG_NOERROR, 0))
! 712: goto free_return;
! 713: }
! 714: /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
! 715: Note that MATCH_FIRST must not be smaller than 0. */
! 716: ch = ((match_first >= length) ? 0
! 717: : re_string_byte_at (&input,
! 718: match_first - input.raw_mbs_idx));
! 719: if (fastmap[ch])
! 720: break;
! 721: match_first += incr;
! 722: }
! 723: while (match_first >= left_lim && match_first <= right_lim);
! 724: if (! fastmap[ch])
! 725: break;
! 726: }
! 727: }
! 728:
! 729: /* Reconstruct the buffers so that the matcher can assume that
! 730: the matching starts from the begining of the buffer. */
! 731: err = re_string_reconstruct (&input, match_first, eflags,
! 732: preg->newline_anchor);
! 733: if (BE (err != REG_NOERROR, 0))
! 734: goto free_return;
! 735: #ifdef RE_ENABLE_I18N
! 736: /* Eliminate it when it is a component of a multibyte character
! 737: and isn't the head of a multibyte character. */
! 738: if (sb || re_string_first_byte (&input, 0))
! 739: #endif
! 740: {
! 741: /* It seems to be appropriate one, then use the matcher. */
! 742: /* We assume that the matching starts from 0. */
! 743: mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
! 744: match_last = check_matching (preg, &mctx, 0, fl_longest_match);
! 745: if (match_last != -1)
! 746: {
! 747: if (BE (match_last == -2, 0))
! 748: {
! 749: err = REG_ESPACE;
! 750: goto free_return;
! 751: }
! 752: else
! 753: {
! 754: mctx.match_last = match_last;
! 755: if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
! 756: {
! 757: re_dfastate_t *pstate = mctx.state_log[match_last];
! 758: mctx.last_node = check_halt_state_context (preg, pstate,
! 759: &mctx, match_last);
! 760: }
! 761: if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
! 762: || dfa->nbackref)
! 763: {
! 764: err = prune_impossible_nodes (preg, &mctx);
! 765: if (err == REG_NOERROR)
! 766: break;
! 767: if (BE (err != REG_NOMATCH, 0))
! 768: goto free_return;
! 769: }
! 770: else
! 771: break; /* We found a matching. */
! 772: }
! 773: }
! 774: match_ctx_clean (&mctx);
! 775: }
! 776: /* Update counter. */
! 777: match_first += incr;
! 778: if (match_first < left_lim || right_lim < match_first)
! 779: break;
! 780: }
! 781:
! 782: /* Set pmatch[] if we need. */
! 783: if (match_last != -1 && nmatch > 0)
! 784: {
! 785: int reg_idx;
! 786:
! 787: /* Initialize registers. */
! 788: for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
! 789: pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
! 790:
! 791: /* Set the points where matching start/end. */
! 792: pmatch[0].rm_so = 0;
! 793: pmatch[0].rm_eo = mctx.match_last;
! 794:
! 795: if (!preg->no_sub && nmatch > 1)
! 796: {
! 797: err = set_regs (preg, &mctx, nmatch, pmatch,
! 798: dfa->has_plural_match && dfa->nbackref > 0);
! 799: if (BE (err != REG_NOERROR, 0))
! 800: goto free_return;
! 801: }
! 802:
! 803: /* At last, add the offset to the each registers, since we slided
! 804: the buffers so that We can assume that the matching starts from 0. */
! 805: for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
! 806: if (pmatch[reg_idx].rm_so != -1)
! 807: {
! 808: pmatch[reg_idx].rm_so += match_first;
! 809: pmatch[reg_idx].rm_eo += match_first;
! 810: }
! 811: }
! 812: err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
! 813: free_return:
! 814: re_free (mctx.state_log);
! 815: if (dfa->nbackref)
! 816: match_ctx_free (&mctx);
! 817: re_string_destruct (&input);
! 818: return err;
! 819: }
! 820:
! 821: static reg_errcode_t
! 822: prune_impossible_nodes (preg, mctx)
! 823: const regex_t *preg;
! 824: re_match_context_t *mctx;
! 825: {
! 826: int halt_node, match_last;
! 827: reg_errcode_t ret;
! 828: re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
! 829: re_dfastate_t **sifted_states;
! 830: re_dfastate_t **lim_states = NULL;
! 831: re_sift_context_t sctx;
! 832: #ifdef DEBUG
! 833: assert (mctx->state_log != NULL);
! 834: #endif
! 835: match_last = mctx->match_last;
! 836: halt_node = mctx->last_node;
! 837: sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
! 838: if (BE (sifted_states == NULL, 0))
! 839: {
! 840: ret = REG_ESPACE;
! 841: goto free_return;
! 842: }
! 843: if (dfa->nbackref)
! 844: {
! 845: lim_states = re_malloc (re_dfastate_t *, match_last + 1);
! 846: if (BE (lim_states == NULL, 0))
! 847: {
! 848: ret = REG_ESPACE;
! 849: goto free_return;
! 850: }
! 851: while (1)
! 852: {
! 853: memset (lim_states, '\0',
! 854: sizeof (re_dfastate_t *) * (match_last + 1));
! 855: match_ctx_clear_flag (mctx);
! 856: sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
! 857: match_last, 0);
! 858: ret = sift_states_backward (preg, mctx, &sctx);
! 859: re_node_set_free (&sctx.limits);
! 860: if (BE (ret != REG_NOERROR, 0))
! 861: goto free_return;
! 862: if (sifted_states[0] != NULL || lim_states[0] != NULL)
! 863: break;
! 864: do
! 865: {
! 866: --match_last;
! 867: if (match_last < 0)
! 868: {
! 869: ret = REG_NOMATCH;
! 870: goto free_return;
! 871: }
! 872: } while (!mctx->state_log[match_last]->halt);
! 873: halt_node = check_halt_state_context (preg,
! 874: mctx->state_log[match_last],
! 875: mctx, match_last);
! 876: }
! 877: ret = merge_state_array (dfa, sifted_states, lim_states,
! 878: match_last + 1);
! 879: re_free (lim_states);
! 880: lim_states = NULL;
! 881: if (BE (ret != REG_NOERROR, 0))
! 882: goto free_return;
! 883: }
! 884: else
! 885: {
! 886: sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
! 887: match_last, 0);
! 888: ret = sift_states_backward (preg, mctx, &sctx);
! 889: re_node_set_free (&sctx.limits);
! 890: if (BE (ret != REG_NOERROR, 0))
! 891: goto free_return;
! 892: }
! 893: re_free (mctx->state_log);
! 894: mctx->state_log = sifted_states;
! 895: sifted_states = NULL;
! 896: mctx->last_node = halt_node;
! 897: mctx->match_last = match_last;
! 898: ret = REG_NOERROR;
! 899: free_return:
! 900: re_free (sifted_states);
! 901: re_free (lim_states);
! 902: return ret;
! 903: }
! 904:
! 905: /* Acquire an initial state and return it.
! 906: We must select appropriate initial state depending on the context,
! 907: since initial states may have constraints like "\<", "^", etc.. */
! 908:
! 909: static inline re_dfastate_t *
! 910: acquire_init_state_context (err, preg, mctx, idx)
! 911: reg_errcode_t *err;
! 912: const regex_t *preg;
! 913: const re_match_context_t *mctx;
! 914: int idx;
! 915: {
! 916: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 917:
! 918: *err = REG_NOERROR;
! 919: if (dfa->init_state->has_constraint)
! 920: {
! 921: unsigned int context;
! 922: context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
! 923: preg->newline_anchor);
! 924: if (IS_WORD_CONTEXT (context))
! 925: return dfa->init_state_word;
! 926: else if (IS_ORDINARY_CONTEXT (context))
! 927: return dfa->init_state;
! 928: else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
! 929: return dfa->init_state_begbuf;
! 930: else if (IS_NEWLINE_CONTEXT (context))
! 931: return dfa->init_state_nl;
! 932: else if (IS_BEGBUF_CONTEXT (context))
! 933: {
! 934: /* It is relatively rare case, then calculate on demand. */
! 935: return re_acquire_state_context (err, dfa,
! 936: dfa->init_state->entrance_nodes,
! 937: context);
! 938: }
! 939: else
! 940: /* Must not happen? */
! 941: return dfa->init_state;
! 942: }
! 943: else
! 944: return dfa->init_state;
! 945: }
! 946:
! 947: /* Check whether the regular expression match input string INPUT or not,
! 948: and return the index where the matching end, return -1 if not match,
! 949: or return -2 in case of an error.
! 950: FL_SEARCH means we must search where the matching starts,
! 951: FL_LONGEST_MATCH means we want the POSIX longest matching.
! 952: Note that the matcher assume that the maching starts from the current
! 953: index of the buffer. */
! 954:
! 955: static int
! 956: check_matching (preg, mctx, fl_search, fl_longest_match)
! 957: const regex_t *preg;
! 958: re_match_context_t *mctx;
! 959: int fl_search, fl_longest_match;
! 960: {
! 961: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 962: reg_errcode_t err;
! 963: int match = 0;
! 964: int match_last = -1;
! 965: int cur_str_idx = re_string_cur_idx (mctx->input);
! 966: re_dfastate_t *cur_state;
! 967:
! 968: cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
! 969: /* An initial state must not be NULL(invalid state). */
! 970: if (BE (cur_state == NULL, 0))
! 971: return -2;
! 972: if (mctx->state_log != NULL)
! 973: mctx->state_log[cur_str_idx] = cur_state;
! 974:
! 975: /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
! 976: later. E.g. Processing back references. */
! 977: if (dfa->nbackref)
! 978: {
! 979: err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
! 980: if (BE (err != REG_NOERROR, 0))
! 981: return err;
! 982: }
! 983:
! 984: if (cur_state->has_backref)
! 985: {
! 986: err = transit_state_bkref (preg, &cur_state->nodes, mctx);
! 987: if (BE (err != REG_NOERROR, 0))
! 988: return err;
! 989: }
! 990:
! 991: /* If the RE accepts NULL string. */
! 992: if (cur_state->halt)
! 993: {
! 994: if (!cur_state->has_constraint
! 995: || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
! 996: {
! 997: if (!fl_longest_match)
! 998: return cur_str_idx;
! 999: else
! 1000: {
! 1001: match_last = cur_str_idx;
! 1002: match = 1;
! 1003: }
! 1004: }
! 1005: }
! 1006:
! 1007: while (!re_string_eoi (mctx->input))
! 1008: {
! 1009: cur_state = transit_state (&err, preg, mctx, cur_state,
! 1010: fl_search && !match);
! 1011: if (cur_state == NULL) /* Reached at the invalid state or an error. */
! 1012: {
! 1013: cur_str_idx = re_string_cur_idx (mctx->input);
! 1014: if (BE (err != REG_NOERROR, 0))
! 1015: return -2;
! 1016: if (fl_search && !match)
! 1017: {
! 1018: /* Restart from initial state, since we are searching
! 1019: the point from where matching start. */
! 1020: #ifdef RE_ENABLE_I18N
! 1021: if (MB_CUR_MAX == 1
! 1022: || re_string_first_byte (mctx->input, cur_str_idx))
! 1023: #endif /* RE_ENABLE_I18N */
! 1024: cur_state = acquire_init_state_context (&err, preg, mctx,
! 1025: cur_str_idx);
! 1026: if (BE (cur_state == NULL && err != REG_NOERROR, 0))
! 1027: return -2;
! 1028: if (mctx->state_log != NULL)
! 1029: mctx->state_log[cur_str_idx] = cur_state;
! 1030: }
! 1031: else if (!fl_longest_match && match)
! 1032: break;
! 1033: else /* (fl_longest_match && match) || (!fl_search && !match) */
! 1034: {
! 1035: if (mctx->state_log == NULL)
! 1036: break;
! 1037: else
! 1038: {
! 1039: int max = mctx->state_log_top;
! 1040: for (; cur_str_idx <= max; ++cur_str_idx)
! 1041: if (mctx->state_log[cur_str_idx] != NULL)
! 1042: break;
! 1043: if (cur_str_idx > max)
! 1044: break;
! 1045: }
! 1046: }
! 1047: }
! 1048:
! 1049: if (cur_state != NULL && cur_state->halt)
! 1050: {
! 1051: /* Reached at a halt state.
! 1052: Check the halt state can satisfy the current context. */
! 1053: if (!cur_state->has_constraint
! 1054: || check_halt_state_context (preg, cur_state, mctx,
! 1055: re_string_cur_idx (mctx->input)))
! 1056: {
! 1057: /* We found an appropriate halt state. */
! 1058: match_last = re_string_cur_idx (mctx->input);
! 1059: match = 1;
! 1060: if (!fl_longest_match)
! 1061: break;
! 1062: }
! 1063: }
! 1064: }
! 1065: return match_last;
! 1066: }
! 1067:
! 1068: /* Check NODE match the current context. */
! 1069:
! 1070: static int check_halt_node_context (dfa, node, context)
! 1071: const re_dfa_t *dfa;
! 1072: int node;
! 1073: unsigned int context;
! 1074: {
! 1075: re_token_type_t type = dfa->nodes[node].type;
! 1076: unsigned int constraint = dfa->nodes[node].constraint;
! 1077: if (type != END_OF_RE)
! 1078: return 0;
! 1079: if (!constraint)
! 1080: return 1;
! 1081: if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
! 1082: return 0;
! 1083: return 1;
! 1084: }
! 1085:
! 1086: /* Check the halt state STATE match the current context.
! 1087: Return 0 if not match, if the node, STATE has, is a halt node and
! 1088: match the context, return the node. */
! 1089:
! 1090: static int
! 1091: check_halt_state_context (preg, state, mctx, idx)
! 1092: const regex_t *preg;
! 1093: const re_dfastate_t *state;
! 1094: const re_match_context_t *mctx;
! 1095: int idx;
! 1096: {
! 1097: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 1098: int i;
! 1099: unsigned int context;
! 1100: #ifdef DEBUG
! 1101: assert (state->halt);
! 1102: #endif
! 1103: context = re_string_context_at (mctx->input, idx, mctx->eflags,
! 1104: preg->newline_anchor);
! 1105: for (i = 0; i < state->nodes.nelem; ++i)
! 1106: if (check_halt_node_context (dfa, state->nodes.elems[i], context))
! 1107: return state->nodes.elems[i];
! 1108: return 0;
! 1109: }
! 1110:
! 1111: /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
! 1112: corresponding to the DFA).
! 1113: Return the destination node, and update EPS_VIA_NODES, return -1 in case
! 1114: of errors. */
! 1115:
! 1116: static int
! 1117: proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
! 1118: const regex_t *preg;
! 1119: regmatch_t *regs;
! 1120: const re_match_context_t *mctx;
! 1121: int nregs, *pidx, node;
! 1122: re_node_set *eps_via_nodes;
! 1123: struct re_fail_stack_t *fs;
! 1124: {
! 1125: re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
! 1126: int i, err, dest_node;
! 1127: dest_node = -1;
! 1128: if (IS_EPSILON_NODE (dfa->nodes[node].type))
! 1129: {
! 1130: re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
! 1131: int ndest, dest_nodes[2];
! 1132: err = re_node_set_insert (eps_via_nodes, node);
! 1133: if (BE (err < 0, 0))
! 1134: return -1;
! 1135: /* Pick up valid destinations. */
! 1136: for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
! 1137: {
! 1138: int candidate = dfa->edests[node].elems[i];
! 1139: if (!re_node_set_contains (cur_nodes, candidate))
! 1140: continue;
! 1141: dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
! 1142: dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
! 1143: ++ndest;
! 1144: }
! 1145: if (ndest <= 1)
! 1146: return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
! 1147: /* In order to avoid infinite loop like "(a*)*". */
! 1148: if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
! 1149: return dest_nodes[1];
! 1150: if (fs != NULL)
! 1151: push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
! 1152: return dest_nodes[0];
! 1153: }
! 1154: else
! 1155: {
! 1156: int naccepted = 0;
! 1157: re_token_type_t type = dfa->nodes[node].type;
! 1158:
! 1159: #ifdef RE_ENABLE_I18N
! 1160: if (ACCEPT_MB_NODE (type))
! 1161: naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
! 1162: else
! 1163: #endif /* RE_ENABLE_I18N */
! 1164: if (type == OP_BACK_REF)
! 1165: {
! 1166: int subexp_idx = dfa->nodes[node].opr.idx;
! 1167: naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
! 1168: if (fs != NULL)
! 1169: {
! 1170: if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
! 1171: return -1;
! 1172: else if (naccepted)
! 1173: {
! 1174: char *buf = (char *) re_string_get_buffer (mctx->input);
! 1175: if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
! 1176: naccepted) != 0)
! 1177: return -1;
! 1178: }
! 1179: }
! 1180:
! 1181: if (naccepted == 0)
! 1182: {
! 1183: err = re_node_set_insert (eps_via_nodes, node);
! 1184: if (BE (err < 0, 0))
! 1185: return -2;
! 1186: dest_node = dfa->edests[node].elems[0];
! 1187: if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
! 1188: dest_node))
! 1189: return dest_node;
! 1190: }
! 1191: }
! 1192:
! 1193: if (naccepted != 0
! 1194: || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
! 1195: {
! 1196: dest_node = dfa->nexts[node];
! 1197: *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
! 1198: if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
! 1199: || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
! 1200: dest_node)))
! 1201: return -1;
! 1202: re_node_set_empty (eps_via_nodes);
! 1203: return dest_node;
! 1204: }
! 1205: }
! 1206: return -1;
! 1207: }
! 1208:
! 1209: static reg_errcode_t
! 1210: push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
! 1211: struct re_fail_stack_t *fs;
! 1212: int str_idx, *dests, nregs;
! 1213: regmatch_t *regs;
! 1214: re_node_set *eps_via_nodes;
! 1215: {
! 1216: reg_errcode_t err;
! 1217: int num = fs->num++;
! 1218: if (fs->num == fs->alloc)
! 1219: {
! 1220: struct re_fail_stack_ent_t *new_array;
! 1221: fs->alloc *= 2;
! 1222: new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
! 1223: * fs->alloc));
! 1224: if (new_array == NULL)
! 1225: return REG_ESPACE;
! 1226: fs->stack = new_array;
! 1227: }
! 1228: fs->stack[num].idx = str_idx;
! 1229: fs->stack[num].node = dests[1];
! 1230: fs->stack[num].regs = re_malloc (regmatch_t, nregs);
! 1231: memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
! 1232: err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
! 1233: return err;
! 1234: }
! 1235:
! 1236: static int
! 1237: pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
! 1238: struct re_fail_stack_t *fs;
! 1239: int *pidx, nregs;
! 1240: regmatch_t *regs;
! 1241: re_node_set *eps_via_nodes;
! 1242: {
! 1243: int num = --fs->num;
! 1244: assert (num >= 0);
! 1245: *pidx = fs->stack[num].idx;
! 1246: memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
! 1247: re_node_set_free (eps_via_nodes);
! 1248: re_free (fs->stack[num].regs);
! 1249: *eps_via_nodes = fs->stack[num].eps_via_nodes;
! 1250: return fs->stack[num].node;
! 1251: }
! 1252:
! 1253: /* Set the positions where the subexpressions are starts/ends to registers
! 1254: PMATCH.
! 1255: Note: We assume that pmatch[0] is already set, and
! 1256: pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
! 1257:
! 1258: static reg_errcode_t
! 1259: set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
! 1260: const regex_t *preg;
! 1261: const re_match_context_t *mctx;
! 1262: size_t nmatch;
! 1263: regmatch_t *pmatch;
! 1264: int fl_backtrack;
! 1265: {
! 1266: re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
! 1267: int idx, cur_node, real_nmatch;
! 1268: re_node_set eps_via_nodes;
! 1269: struct re_fail_stack_t *fs;
! 1270: struct re_fail_stack_t fs_body = {0, 2, NULL};
! 1271: #ifdef DEBUG
! 1272: assert (nmatch > 1);
! 1273: assert (mctx->state_log != NULL);
! 1274: #endif
! 1275: if (fl_backtrack)
! 1276: {
! 1277: fs = &fs_body;
! 1278: fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
! 1279: }
! 1280: else
! 1281: fs = NULL;
! 1282: cur_node = dfa->init_node;
! 1283: real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
! 1284: re_node_set_init_empty (&eps_via_nodes);
! 1285: for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
! 1286: {
! 1287: update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
! 1288: if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
! 1289: {
! 1290: int reg_idx;
! 1291: if (fs)
! 1292: {
! 1293: for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
! 1294: if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
! 1295: break;
! 1296: if (reg_idx == nmatch)
! 1297: {
! 1298: re_node_set_free (&eps_via_nodes);
! 1299: return free_fail_stack_return (fs);
! 1300: }
! 1301: cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
! 1302: &eps_via_nodes);
! 1303: }
! 1304: else
! 1305: {
! 1306: re_node_set_free (&eps_via_nodes);
! 1307: return REG_NOERROR;
! 1308: }
! 1309: }
! 1310:
! 1311: /* Proceed to next node. */
! 1312: cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
! 1313: &eps_via_nodes, fs);
! 1314:
! 1315: if (BE (cur_node < 0, 0))
! 1316: {
! 1317: if (cur_node == -2)
! 1318: return REG_ESPACE;
! 1319: if (fs)
! 1320: cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
! 1321: &eps_via_nodes);
! 1322: else
! 1323: {
! 1324: re_node_set_free (&eps_via_nodes);
! 1325: return REG_NOMATCH;
! 1326: }
! 1327: }
! 1328: }
! 1329: re_node_set_free (&eps_via_nodes);
! 1330: return free_fail_stack_return (fs);
! 1331: }
! 1332:
! 1333: static reg_errcode_t
! 1334: free_fail_stack_return (fs)
! 1335: struct re_fail_stack_t *fs;
! 1336: {
! 1337: if (fs)
! 1338: {
! 1339: int fs_idx;
! 1340: for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
! 1341: {
! 1342: re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
! 1343: re_free (fs->stack[fs_idx].regs);
! 1344: }
! 1345: re_free (fs->stack);
! 1346: }
! 1347: return REG_NOERROR;
! 1348: }
! 1349:
! 1350: static void
! 1351: update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
! 1352: re_dfa_t *dfa;
! 1353: regmatch_t *pmatch;
! 1354: int cur_node, cur_idx, nmatch;
! 1355: {
! 1356: int type = dfa->nodes[cur_node].type;
! 1357: int reg_num;
! 1358: if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
! 1359: return;
! 1360: reg_num = dfa->nodes[cur_node].opr.idx + 1;
! 1361: if (reg_num >= nmatch)
! 1362: return;
! 1363: if (type == OP_OPEN_SUBEXP)
! 1364: {
! 1365: /* We are at the first node of this sub expression. */
! 1366: pmatch[reg_num].rm_so = cur_idx;
! 1367: pmatch[reg_num].rm_eo = -1;
! 1368: }
! 1369: else if (type == OP_CLOSE_SUBEXP)
! 1370: /* We are at the first node of this sub expression. */
! 1371: pmatch[reg_num].rm_eo = cur_idx;
! 1372: }
! 1373:
! 1374: #define NUMBER_OF_STATE 1
! 1375:
! 1376: /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
! 1377: and sift the nodes in each states according to the following rules.
! 1378: Updated state_log will be wrote to STATE_LOG.
! 1379:
! 1380: Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
! 1381: 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
! 1382: If `a' isn't the LAST_NODE and `a' can't epsilon transit to
! 1383: the LAST_NODE, we throw away the node `a'.
! 1384: 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
! 1385: string `s' and transit to `b':
! 1386: i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
! 1387: away the node `a'.
! 1388: ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
! 1389: throwed away, we throw away the node `a'.
! 1390: 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
! 1391: i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
! 1392: node `a'.
! 1393: ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
! 1394: we throw away the node `a'. */
! 1395:
! 1396: #define STATE_NODE_CONTAINS(state,node) \
! 1397: ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
! 1398:
! 1399: static reg_errcode_t
! 1400: sift_states_backward (preg, mctx, sctx)
! 1401: const regex_t *preg;
! 1402: re_match_context_t *mctx;
! 1403: re_sift_context_t *sctx;
! 1404: {
! 1405: reg_errcode_t err;
! 1406: re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
! 1407: int null_cnt = 0;
! 1408: int str_idx = sctx->last_str_idx;
! 1409: re_node_set cur_dest;
! 1410: re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
! 1411:
! 1412: #ifdef DEBUG
! 1413: assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
! 1414: #endif
! 1415: cur_src = &mctx->state_log[str_idx]->nodes;
! 1416:
! 1417: /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
! 1418: transit to the last_node and the last_node itself. */
! 1419: err = re_node_set_init_1 (&cur_dest, sctx->last_node);
! 1420: if (BE (err != REG_NOERROR, 0))
! 1421: return err;
! 1422: err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
! 1423: if (BE (err != REG_NOERROR, 0))
! 1424: goto free_return;
! 1425:
! 1426: /* Then check each states in the state_log. */
! 1427: while (str_idx > 0)
! 1428: {
! 1429: int i, ret;
! 1430: /* Update counters. */
! 1431: null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
! 1432: if (null_cnt > mctx->max_mb_elem_len)
! 1433: {
! 1434: memset (sctx->sifted_states, '\0',
! 1435: sizeof (re_dfastate_t *) * str_idx);
! 1436: re_node_set_free (&cur_dest);
! 1437: return REG_NOERROR;
! 1438: }
! 1439: re_node_set_empty (&cur_dest);
! 1440: --str_idx;
! 1441: cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
! 1442: : &mctx->state_log[str_idx]->nodes);
! 1443:
! 1444: /* Then build the next sifted state.
! 1445: We build the next sifted state on `cur_dest', and update
! 1446: `sifted_states[str_idx]' with `cur_dest'.
! 1447: Note:
! 1448: `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
! 1449: `cur_src' points the node_set of the old `state_log[str_idx]'. */
! 1450: for (i = 0; i < cur_src->nelem; i++)
! 1451: {
! 1452: int prev_node = cur_src->elems[i];
! 1453: int naccepted = 0;
! 1454: re_token_type_t type = dfa->nodes[prev_node].type;
! 1455:
! 1456: if (IS_EPSILON_NODE(type))
! 1457: continue;
! 1458: #ifdef RE_ENABLE_I18N
! 1459: /* If the node may accept `multi byte'. */
! 1460: if (ACCEPT_MB_NODE (type))
! 1461: naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
! 1462: str_idx, sctx->last_str_idx);
! 1463:
! 1464: #endif /* RE_ENABLE_I18N */
! 1465: /* We don't check backreferences here.
! 1466: See update_cur_sifted_state(). */
! 1467:
! 1468: if (!naccepted
! 1469: && check_node_accept (preg, dfa->nodes + prev_node, mctx,
! 1470: str_idx)
! 1471: && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
! 1472: dfa->nexts[prev_node]))
! 1473: naccepted = 1;
! 1474:
! 1475: if (naccepted == 0)
! 1476: continue;
! 1477:
! 1478: if (sctx->limits.nelem)
! 1479: {
! 1480: int to_idx = str_idx + naccepted;
! 1481: if (check_dst_limits (dfa, &sctx->limits, mctx,
! 1482: dfa->nexts[prev_node], to_idx,
! 1483: prev_node, str_idx))
! 1484: continue;
! 1485: }
! 1486: ret = re_node_set_insert (&cur_dest, prev_node);
! 1487: if (BE (ret == -1, 0))
! 1488: {
! 1489: err = REG_ESPACE;
! 1490: goto free_return;
! 1491: }
! 1492: }
! 1493:
! 1494: /* Add all the nodes which satisfy the following conditions:
! 1495: - It can epsilon transit to a node in CUR_DEST.
! 1496: - It is in CUR_SRC.
! 1497: And update state_log. */
! 1498: err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
! 1499: if (BE (err != REG_NOERROR, 0))
! 1500: goto free_return;
! 1501: }
! 1502: err = REG_NOERROR;
! 1503: free_return:
! 1504: re_node_set_free (&cur_dest);
! 1505: return err;
! 1506: }
! 1507:
! 1508: /* Helper functions. */
! 1509:
! 1510: static inline reg_errcode_t
! 1511: clean_state_log_if_need (mctx, next_state_log_idx)
! 1512: re_match_context_t *mctx;
! 1513: int next_state_log_idx;
! 1514: {
! 1515: int top = mctx->state_log_top;
! 1516:
! 1517: if (next_state_log_idx >= mctx->input->bufs_len
! 1518: || (next_state_log_idx >= mctx->input->valid_len
! 1519: && mctx->input->valid_len < mctx->input->len))
! 1520: {
! 1521: reg_errcode_t err;
! 1522: err = extend_buffers (mctx);
! 1523: if (BE (err != REG_NOERROR, 0))
! 1524: return err;
! 1525: }
! 1526:
! 1527: if (top < next_state_log_idx)
! 1528: {
! 1529: memset (mctx->state_log + top + 1, '\0',
! 1530: sizeof (re_dfastate_t *) * (next_state_log_idx - top));
! 1531: mctx->state_log_top = next_state_log_idx;
! 1532: }
! 1533: return REG_NOERROR;
! 1534: }
! 1535:
! 1536: static reg_errcode_t
! 1537: merge_state_array (dfa, dst, src, num)
! 1538: re_dfa_t *dfa;
! 1539: re_dfastate_t **dst;
! 1540: re_dfastate_t **src;
! 1541: int num;
! 1542: {
! 1543: int st_idx;
! 1544: reg_errcode_t err;
! 1545: for (st_idx = 0; st_idx < num; ++st_idx)
! 1546: {
! 1547: if (dst[st_idx] == NULL)
! 1548: dst[st_idx] = src[st_idx];
! 1549: else if (src[st_idx] != NULL)
! 1550: {
! 1551: re_node_set merged_set;
! 1552: err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
! 1553: &src[st_idx]->nodes);
! 1554: if (BE (err != REG_NOERROR, 0))
! 1555: return err;
! 1556: dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
! 1557: re_node_set_free (&merged_set);
! 1558: if (BE (err != REG_NOERROR, 0))
! 1559: return err;
! 1560: }
! 1561: }
! 1562: return REG_NOERROR;
! 1563: }
! 1564:
! 1565: static reg_errcode_t
! 1566: update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
! 1567: const regex_t *preg;
! 1568: re_match_context_t *mctx;
! 1569: re_sift_context_t *sctx;
! 1570: int str_idx;
! 1571: re_node_set *dest_nodes;
! 1572: {
! 1573: reg_errcode_t err;
! 1574: re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
! 1575: const re_node_set *candidates;
! 1576: candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
! 1577: : &mctx->state_log[str_idx]->nodes);
! 1578:
! 1579: /* At first, add the nodes which can epsilon transit to a node in
! 1580: DEST_NODE. */
! 1581: if (dest_nodes->nelem)
! 1582: {
! 1583: err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
! 1584: if (BE (err != REG_NOERROR, 0))
! 1585: return err;
! 1586: }
! 1587:
! 1588: /* Then, check the limitations in the current sift_context. */
! 1589: if (dest_nodes->nelem && sctx->limits.nelem)
! 1590: {
! 1591: err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
! 1592: mctx->bkref_ents, str_idx);
! 1593: if (BE (err != REG_NOERROR, 0))
! 1594: return err;
! 1595: }
! 1596:
! 1597: /* Update state_log. */
! 1598: sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
! 1599: if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
! 1600: return err;
! 1601:
! 1602: if ((mctx->state_log[str_idx] != NULL
! 1603: && mctx->state_log[str_idx]->has_backref))
! 1604: {
! 1605: err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
! 1606: if (BE (err != REG_NOERROR, 0))
! 1607: return err;
! 1608: }
! 1609: return REG_NOERROR;
! 1610: }
! 1611:
! 1612: static reg_errcode_t
! 1613: add_epsilon_src_nodes (dfa, dest_nodes, candidates)
! 1614: re_dfa_t *dfa;
! 1615: re_node_set *dest_nodes;
! 1616: const re_node_set *candidates;
! 1617: {
! 1618: reg_errcode_t err;
! 1619: int src_idx;
! 1620: re_node_set src_copy;
! 1621:
! 1622: err = re_node_set_init_copy (&src_copy, dest_nodes);
! 1623: if (BE (err != REG_NOERROR, 0))
! 1624: return err;
! 1625: for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
! 1626: {
! 1627: err = re_node_set_add_intersect (dest_nodes, candidates,
! 1628: dfa->inveclosures
! 1629: + src_copy.elems[src_idx]);
! 1630: if (BE (err != REG_NOERROR, 0))
! 1631: {
! 1632: re_node_set_free (&src_copy);
! 1633: return err;
! 1634: }
! 1635: }
! 1636: re_node_set_free (&src_copy);
! 1637: return REG_NOERROR;
! 1638: }
! 1639:
! 1640: static reg_errcode_t
! 1641: sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
! 1642: re_dfa_t *dfa;
! 1643: int node;
! 1644: re_node_set *dest_nodes;
! 1645: const re_node_set *candidates;
! 1646: {
! 1647: int ecl_idx;
! 1648: reg_errcode_t err;
! 1649: re_node_set *inv_eclosure = dfa->inveclosures + node;
! 1650: re_node_set except_nodes;
! 1651: re_node_set_init_empty (&except_nodes);
! 1652: for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
! 1653: {
! 1654: int cur_node = inv_eclosure->elems[ecl_idx];
! 1655: if (cur_node == node)
! 1656: continue;
! 1657: if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
! 1658: {
! 1659: int edst1 = dfa->edests[cur_node].elems[0];
! 1660: int edst2 = ((dfa->edests[cur_node].nelem > 1)
! 1661: ? dfa->edests[cur_node].elems[1] : -1);
! 1662: if ((!re_node_set_contains (inv_eclosure, edst1)
! 1663: && re_node_set_contains (dest_nodes, edst1))
! 1664: || (edst2 > 0
! 1665: && !re_node_set_contains (inv_eclosure, edst2)
! 1666: && re_node_set_contains (dest_nodes, edst2)))
! 1667: {
! 1668: err = re_node_set_add_intersect (&except_nodes, candidates,
! 1669: dfa->inveclosures + cur_node);
! 1670: if (BE (err != REG_NOERROR, 0))
! 1671: {
! 1672: re_node_set_free (&except_nodes);
! 1673: return err;
! 1674: }
! 1675: }
! 1676: }
! 1677: }
! 1678: for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
! 1679: {
! 1680: int cur_node = inv_eclosure->elems[ecl_idx];
! 1681: if (!re_node_set_contains (&except_nodes, cur_node))
! 1682: {
! 1683: int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
! 1684: re_node_set_remove_at (dest_nodes, idx);
! 1685: }
! 1686: }
! 1687: re_node_set_free (&except_nodes);
! 1688: return REG_NOERROR;
! 1689: }
! 1690:
! 1691: static int
! 1692: check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
! 1693: re_dfa_t *dfa;
! 1694: re_node_set *limits;
! 1695: re_match_context_t *mctx;
! 1696: int dst_node, dst_idx, src_node, src_idx;
! 1697: {
! 1698: int lim_idx, src_pos, dst_pos;
! 1699:
! 1700: for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
! 1701: {
! 1702: int subexp_idx;
! 1703: struct re_backref_cache_entry *ent;
! 1704: ent = mctx->bkref_ents + limits->elems[lim_idx];
! 1705: subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
! 1706:
! 1707: dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
! 1708: dfa->eclosures + dst_node,
! 1709: subexp_idx, dst_node, dst_idx);
! 1710: src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
! 1711: dfa->eclosures + src_node,
! 1712: subexp_idx, src_node, src_idx);
! 1713:
! 1714: /* In case of:
! 1715: <src> <dst> ( <subexp> )
! 1716: ( <subexp> ) <src> <dst>
! 1717: ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
! 1718: if (src_pos == dst_pos)
! 1719: continue; /* This is unrelated limitation. */
! 1720: else
! 1721: return 1;
! 1722: }
! 1723: return 0;
! 1724: }
! 1725:
! 1726: static int
! 1727: check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
! 1728: str_idx)
! 1729: re_dfa_t *dfa;
! 1730: re_match_context_t *mctx;
! 1731: re_node_set *eclosures;
! 1732: int limit, subexp_idx, node, str_idx;
! 1733: {
! 1734: struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
! 1735: int pos = (str_idx < lim->subexp_from ? -1
! 1736: : (lim->subexp_to < str_idx ? 1 : 0));
! 1737: if (pos == 0
! 1738: && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
! 1739: {
! 1740: int node_idx;
! 1741: for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
! 1742: {
! 1743: int node = eclosures->elems[node_idx];
! 1744: re_token_type_t type= dfa->nodes[node].type;
! 1745: if (type == OP_BACK_REF)
! 1746: {
! 1747: int bi = search_cur_bkref_entry (mctx, str_idx);
! 1748: for (; bi < mctx->nbkref_ents; ++bi)
! 1749: {
! 1750: struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
! 1751: if (ent->str_idx > str_idx)
! 1752: break;
! 1753: if (ent->node == node && ent->subexp_from == ent->subexp_to)
! 1754: {
! 1755: int cpos, dst;
! 1756: dst = dfa->edests[node].elems[0];
! 1757: cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
! 1758: dfa->eclosures + dst,
! 1759: subexp_idx, dst,
! 1760: str_idx);
! 1761: if ((str_idx == lim->subexp_from && cpos == -1)
! 1762: || (str_idx == lim->subexp_to && cpos == 0))
! 1763: return cpos;
! 1764: }
! 1765: }
! 1766: }
! 1767: if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
! 1768: && str_idx == lim->subexp_from)
! 1769: {
! 1770: pos = -1;
! 1771: break;
! 1772: }
! 1773: if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
! 1774: && str_idx == lim->subexp_to)
! 1775: break;
! 1776: }
! 1777: if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
! 1778: pos = 1;
! 1779: }
! 1780: return pos;
! 1781: }
! 1782:
! 1783: /* Check the limitations of sub expressions LIMITS, and remove the nodes
! 1784: which are against limitations from DEST_NODES. */
! 1785:
! 1786: static reg_errcode_t
! 1787: check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
! 1788: re_dfa_t *dfa;
! 1789: re_node_set *dest_nodes;
! 1790: const re_node_set *candidates;
! 1791: re_node_set *limits;
! 1792: struct re_backref_cache_entry *bkref_ents;
! 1793: int str_idx;
! 1794: {
! 1795: reg_errcode_t err;
! 1796: int node_idx, lim_idx;
! 1797:
! 1798: for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
! 1799: {
! 1800: int subexp_idx;
! 1801: struct re_backref_cache_entry *ent;
! 1802: ent = bkref_ents + limits->elems[lim_idx];
! 1803:
! 1804: if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
! 1805: continue; /* This is unrelated limitation. */
! 1806:
! 1807: subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
! 1808: if (ent->subexp_to == str_idx)
! 1809: {
! 1810: int ops_node = -1;
! 1811: int cls_node = -1;
! 1812: for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
! 1813: {
! 1814: int node = dest_nodes->elems[node_idx];
! 1815: re_token_type_t type= dfa->nodes[node].type;
! 1816: if (type == OP_OPEN_SUBEXP
! 1817: && subexp_idx == dfa->nodes[node].opr.idx)
! 1818: ops_node = node;
! 1819: else if (type == OP_CLOSE_SUBEXP
! 1820: && subexp_idx == dfa->nodes[node].opr.idx)
! 1821: cls_node = node;
! 1822: }
! 1823:
! 1824: /* Check the limitation of the open subexpression. */
! 1825: /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
! 1826: if (ops_node >= 0)
! 1827: {
! 1828: err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
! 1829: candidates);
! 1830: if (BE (err != REG_NOERROR, 0))
! 1831: return err;
! 1832: }
! 1833: /* Check the limitation of the close subexpression. */
! 1834: for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
! 1835: {
! 1836: int node = dest_nodes->elems[node_idx];
! 1837: if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
! 1838: && !re_node_set_contains (dfa->eclosures + node, cls_node))
! 1839: {
! 1840: /* It is against this limitation.
! 1841: Remove it form the current sifted state. */
! 1842: err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
! 1843: candidates);
! 1844: if (BE (err != REG_NOERROR, 0))
! 1845: return err;
! 1846: --node_idx;
! 1847: }
! 1848: }
! 1849: }
! 1850: else /* (ent->subexp_to != str_idx) */
! 1851: {
! 1852: for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
! 1853: {
! 1854: int node = dest_nodes->elems[node_idx];
! 1855: re_token_type_t type= dfa->nodes[node].type;
! 1856: if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
! 1857: {
! 1858: if (subexp_idx != dfa->nodes[node].opr.idx)
! 1859: continue;
! 1860: if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
! 1861: || (type == OP_OPEN_SUBEXP))
! 1862: {
! 1863: /* It is against this limitation.
! 1864: Remove it form the current sifted state. */
! 1865: err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
! 1866: candidates);
! 1867: if (BE (err != REG_NOERROR, 0))
! 1868: return err;
! 1869: }
! 1870: }
! 1871: }
! 1872: }
! 1873: }
! 1874: return REG_NOERROR;
! 1875: }
! 1876:
! 1877: static reg_errcode_t
! 1878: sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
! 1879: const regex_t *preg;
! 1880: re_match_context_t *mctx;
! 1881: re_sift_context_t *sctx;
! 1882: int str_idx;
! 1883: re_node_set *dest_nodes;
! 1884: {
! 1885: reg_errcode_t err;
! 1886: re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
! 1887: int node_idx, node;
! 1888: re_sift_context_t local_sctx;
! 1889: const re_node_set *candidates;
! 1890: candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
! 1891: : &mctx->state_log[str_idx]->nodes);
! 1892: local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
! 1893:
! 1894: for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
! 1895: {
! 1896: int cur_bkref_idx = re_string_cur_idx (mctx->input);
! 1897: re_token_type_t type;
! 1898: node = candidates->elems[node_idx];
! 1899: type = dfa->nodes[node].type;
! 1900: if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
! 1901: continue;
! 1902: /* Avoid infinite loop for the REs like "()\1+". */
! 1903: if (node == sctx->last_node && str_idx == sctx->last_str_idx)
! 1904: continue;
! 1905: if (type == OP_BACK_REF)
! 1906: {
! 1907: int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
! 1908: for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
! 1909: {
! 1910: int disabled_idx, subexp_len, to_idx, dst_node;
! 1911: struct re_backref_cache_entry *entry;
! 1912: entry = mctx->bkref_ents + enabled_idx;
! 1913: if (entry->str_idx > str_idx)
! 1914: break;
! 1915: if (entry->node != node)
! 1916: continue;
! 1917: subexp_len = entry->subexp_to - entry->subexp_from;
! 1918: to_idx = str_idx + subexp_len;
! 1919: dst_node = (subexp_len ? dfa->nexts[node]
! 1920: : dfa->edests[node].elems[0]);
! 1921:
! 1922: if (to_idx > sctx->last_str_idx
! 1923: || sctx->sifted_states[to_idx] == NULL
! 1924: || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
! 1925: dst_node)
! 1926: || check_dst_limits (dfa, &sctx->limits, mctx, node,
! 1927: str_idx, dst_node, to_idx))
! 1928: continue;
! 1929: {
! 1930: re_dfastate_t *cur_state;
! 1931: entry->flag = 0;
! 1932: for (disabled_idx = enabled_idx + 1;
! 1933: disabled_idx < mctx->nbkref_ents; ++disabled_idx)
! 1934: {
! 1935: struct re_backref_cache_entry *entry2;
! 1936: entry2 = mctx->bkref_ents + disabled_idx;
! 1937: if (entry2->str_idx > str_idx)
! 1938: break;
! 1939: entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
! 1940: }
! 1941:
! 1942: if (local_sctx.sifted_states == NULL)
! 1943: {
! 1944: local_sctx = *sctx;
! 1945: err = re_node_set_init_copy (&local_sctx.limits,
! 1946: &sctx->limits);
! 1947: if (BE (err != REG_NOERROR, 0))
! 1948: goto free_return;
! 1949: }
! 1950: local_sctx.last_node = node;
! 1951: local_sctx.last_str_idx = str_idx;
! 1952: err = re_node_set_insert (&local_sctx.limits, enabled_idx);
! 1953: if (BE (err < 0, 0))
! 1954: {
! 1955: err = REG_ESPACE;
! 1956: goto free_return;
! 1957: }
! 1958: cur_state = local_sctx.sifted_states[str_idx];
! 1959: err = sift_states_backward (preg, mctx, &local_sctx);
! 1960: if (BE (err != REG_NOERROR, 0))
! 1961: goto free_return;
! 1962: if (sctx->limited_states != NULL)
! 1963: {
! 1964: err = merge_state_array (dfa, sctx->limited_states,
! 1965: local_sctx.sifted_states,
! 1966: str_idx + 1);
! 1967: if (BE (err != REG_NOERROR, 0))
! 1968: goto free_return;
! 1969: }
! 1970: local_sctx.sifted_states[str_idx] = cur_state;
! 1971: re_node_set_remove (&local_sctx.limits, enabled_idx);
! 1972: /* We must not use the variable entry here, since
! 1973: mctx->bkref_ents might be realloced. */
! 1974: mctx->bkref_ents[enabled_idx].flag = 1;
! 1975: }
! 1976: }
! 1977: enabled_idx = search_cur_bkref_entry (mctx, str_idx);
! 1978: for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
! 1979: {
! 1980: struct re_backref_cache_entry *entry;
! 1981: entry = mctx->bkref_ents + enabled_idx;
! 1982: if (entry->str_idx > str_idx)
! 1983: break;
! 1984: if (entry->node == node)
! 1985: entry->flag = 0;
! 1986: }
! 1987: }
! 1988: }
! 1989: err = REG_NOERROR;
! 1990: free_return:
! 1991: if (local_sctx.sifted_states != NULL)
! 1992: {
! 1993: re_node_set_free (&local_sctx.limits);
! 1994: }
! 1995:
! 1996: return err;
! 1997: }
! 1998:
! 1999:
! 2000: #ifdef RE_ENABLE_I18N
! 2001: static int
! 2002: sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
! 2003: const regex_t *preg;
! 2004: const re_match_context_t *mctx;
! 2005: re_sift_context_t *sctx;
! 2006: int node_idx, str_idx, max_str_idx;
! 2007: {
! 2008: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 2009: int naccepted;
! 2010: /* Check the node can accept `multi byte'. */
! 2011: naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
! 2012: if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
! 2013: !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
! 2014: dfa->nexts[node_idx]))
! 2015: /* The node can't accept the `multi byte', or the
! 2016: destination was already throwed away, then the node
! 2017: could't accept the current input `multi byte'. */
! 2018: naccepted = 0;
! 2019: /* Otherwise, it is sure that the node could accept
! 2020: `naccepted' bytes input. */
! 2021: return naccepted;
! 2022: }
! 2023: #endif /* RE_ENABLE_I18N */
! 2024:
! 2025:
! 2026: /* Functions for state transition. */
! 2027:
! 2028: /* Return the next state to which the current state STATE will transit by
! 2029: accepting the current input byte, and update STATE_LOG if necessary.
! 2030: If STATE can accept a multibyte char/collating element/back reference
! 2031: update the destination of STATE_LOG. */
! 2032:
! 2033: static re_dfastate_t *
! 2034: transit_state (err, preg, mctx, state, fl_search)
! 2035: reg_errcode_t *err;
! 2036: const regex_t *preg;
! 2037: re_match_context_t *mctx;
! 2038: re_dfastate_t *state;
! 2039: int fl_search;
! 2040: {
! 2041: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 2042: re_dfastate_t **trtable, *next_state;
! 2043: unsigned char ch;
! 2044: int cur_idx;
! 2045:
! 2046: if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
! 2047: || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
! 2048: && mctx->input->valid_len < mctx->input->len))
! 2049: {
! 2050: *err = extend_buffers (mctx);
! 2051: if (BE (*err != REG_NOERROR, 0))
! 2052: return NULL;
! 2053: }
! 2054:
! 2055: *err = REG_NOERROR;
! 2056: if (state == NULL)
! 2057: {
! 2058: next_state = state;
! 2059: re_string_skip_bytes (mctx->input, 1);
! 2060: }
! 2061: else
! 2062: {
! 2063: #ifdef RE_ENABLE_I18N
! 2064: /* If the current state can accept multibyte. */
! 2065: if (state->accept_mb)
! 2066: {
! 2067: *err = transit_state_mb (preg, state, mctx);
! 2068: if (BE (*err != REG_NOERROR, 0))
! 2069: return NULL;
! 2070: }
! 2071: #endif /* RE_ENABLE_I18N */
! 2072:
! 2073: /* Then decide the next state with the single byte. */
! 2074: if (1)
! 2075: {
! 2076: /* Use transition table */
! 2077: ch = re_string_fetch_byte (mctx->input);
! 2078: trtable = fl_search ? state->trtable_search : state->trtable;
! 2079: if (trtable == NULL)
! 2080: {
! 2081: trtable = build_trtable (preg, state, fl_search);
! 2082: if (fl_search)
! 2083: state->trtable_search = trtable;
! 2084: else
! 2085: state->trtable = trtable;
! 2086: }
! 2087: next_state = trtable[ch];
! 2088: }
! 2089: else
! 2090: {
! 2091: /* don't use transition table */
! 2092: next_state = transit_state_sb (err, preg, state, fl_search, mctx);
! 2093: if (BE (next_state == NULL && err != REG_NOERROR, 0))
! 2094: return NULL;
! 2095: }
! 2096: }
! 2097:
! 2098: cur_idx = re_string_cur_idx (mctx->input);
! 2099: /* Update the state_log if we need. */
! 2100: if (mctx->state_log != NULL)
! 2101: {
! 2102: if (cur_idx > mctx->state_log_top)
! 2103: {
! 2104: mctx->state_log[cur_idx] = next_state;
! 2105: mctx->state_log_top = cur_idx;
! 2106: }
! 2107: else if (mctx->state_log[cur_idx] == 0)
! 2108: {
! 2109: mctx->state_log[cur_idx] = next_state;
! 2110: }
! 2111: else
! 2112: {
! 2113: re_dfastate_t *pstate;
! 2114: unsigned int context;
! 2115: re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
! 2116: /* If (state_log[cur_idx] != 0), it implies that cur_idx is
! 2117: the destination of a multibyte char/collating element/
! 2118: back reference. Then the next state is the union set of
! 2119: these destinations and the results of the transition table. */
! 2120: pstate = mctx->state_log[cur_idx];
! 2121: log_nodes = pstate->entrance_nodes;
! 2122: if (next_state != NULL)
! 2123: {
! 2124: table_nodes = next_state->entrance_nodes;
! 2125: *err = re_node_set_init_union (&next_nodes, table_nodes,
! 2126: log_nodes);
! 2127: if (BE (*err != REG_NOERROR, 0))
! 2128: return NULL;
! 2129: }
! 2130: else
! 2131: next_nodes = *log_nodes;
! 2132: /* Note: We already add the nodes of the initial state,
! 2133: then we don't need to add them here. */
! 2134:
! 2135: context = re_string_context_at (mctx->input,
! 2136: re_string_cur_idx (mctx->input) - 1,
! 2137: mctx->eflags, preg->newline_anchor);
! 2138: next_state = mctx->state_log[cur_idx]
! 2139: = re_acquire_state_context (err, dfa, &next_nodes, context);
! 2140: /* We don't need to check errors here, since the return value of
! 2141: this function is next_state and ERR is already set. */
! 2142:
! 2143: if (table_nodes != NULL)
! 2144: re_node_set_free (&next_nodes);
! 2145: }
! 2146: }
! 2147:
! 2148: /* Check OP_OPEN_SUBEXP in the current state in case that we use them
! 2149: later. We must check them here, since the back references in the
! 2150: next state might use them. */
! 2151: if (dfa->nbackref && next_state/* && fl_process_bkref */)
! 2152: {
! 2153: *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
! 2154: cur_idx);
! 2155: if (BE (*err != REG_NOERROR, 0))
! 2156: return NULL;
! 2157: }
! 2158:
! 2159: /* If the next state has back references. */
! 2160: if (next_state != NULL && next_state->has_backref)
! 2161: {
! 2162: *err = transit_state_bkref (preg, &next_state->nodes, mctx);
! 2163: if (BE (*err != REG_NOERROR, 0))
! 2164: return NULL;
! 2165: next_state = mctx->state_log[cur_idx];
! 2166: }
! 2167: return next_state;
! 2168: }
! 2169:
! 2170: /* Helper functions for transit_state. */
! 2171:
! 2172: /* From the node set CUR_NODES, pick up the nodes whose types are
! 2173: OP_OPEN_SUBEXP and which have corresponding back references in the regular
! 2174: expression. And register them to use them later for evaluating the
! 2175: correspoding back references. */
! 2176:
! 2177: static reg_errcode_t
! 2178: check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
! 2179: re_dfa_t *dfa;
! 2180: re_match_context_t *mctx;
! 2181: re_node_set *cur_nodes;
! 2182: int str_idx;
! 2183: {
! 2184: int node_idx;
! 2185: reg_errcode_t err;
! 2186:
! 2187: /* TODO: This isn't efficient.
! 2188: Because there might be more than one nodes whose types are
! 2189: OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
! 2190: nodes.
! 2191: E.g. RE: (a){2} */
! 2192: for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
! 2193: {
! 2194: int node = cur_nodes->elems[node_idx];
! 2195: if (dfa->nodes[node].type == OP_OPEN_SUBEXP
! 2196: && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
! 2197: {
! 2198: err = match_ctx_add_subtop (mctx, node, str_idx);
! 2199: if (BE (err != REG_NOERROR, 0))
! 2200: return err;
! 2201: }
! 2202: }
! 2203: return REG_NOERROR;
! 2204: }
! 2205:
! 2206: /* Return the next state to which the current state STATE will transit by
! 2207: accepting the current input byte. */
! 2208:
! 2209: static re_dfastate_t *
! 2210: transit_state_sb (err, preg, state, fl_search, mctx)
! 2211: reg_errcode_t *err;
! 2212: const regex_t *preg;
! 2213: re_dfastate_t *state;
! 2214: int fl_search;
! 2215: re_match_context_t *mctx;
! 2216: {
! 2217: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 2218: re_node_set next_nodes;
! 2219: re_dfastate_t *next_state;
! 2220: int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
! 2221: unsigned int context;
! 2222:
! 2223: *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
! 2224: if (BE (*err != REG_NOERROR, 0))
! 2225: return NULL;
! 2226: for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
! 2227: {
! 2228: int cur_node = state->nodes.elems[node_cnt];
! 2229: if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
! 2230: {
! 2231: *err = re_node_set_merge (&next_nodes,
! 2232: dfa->eclosures + dfa->nexts[cur_node]);
! 2233: if (BE (*err != REG_NOERROR, 0))
! 2234: {
! 2235: re_node_set_free (&next_nodes);
! 2236: return NULL;
! 2237: }
! 2238: }
! 2239: }
! 2240: if (fl_search)
! 2241: {
! 2242: #ifdef RE_ENABLE_I18N
! 2243: int not_initial = 0;
! 2244: if (MB_CUR_MAX > 1)
! 2245: for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
! 2246: if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
! 2247: {
! 2248: not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
! 2249: break;
! 2250: }
! 2251: if (!not_initial)
! 2252: #endif
! 2253: {
! 2254: *err = re_node_set_merge (&next_nodes,
! 2255: dfa->init_state->entrance_nodes);
! 2256: if (BE (*err != REG_NOERROR, 0))
! 2257: {
! 2258: re_node_set_free (&next_nodes);
! 2259: return NULL;
! 2260: }
! 2261: }
! 2262: }
! 2263: context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
! 2264: preg->newline_anchor);
! 2265: next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
! 2266: /* We don't need to check errors here, since the return value of
! 2267: this function is next_state and ERR is already set. */
! 2268:
! 2269: re_node_set_free (&next_nodes);
! 2270: re_string_skip_bytes (mctx->input, 1);
! 2271: return next_state;
! 2272: }
! 2273:
! 2274: #ifdef RE_ENABLE_I18N
! 2275: static reg_errcode_t
! 2276: transit_state_mb (preg, pstate, mctx)
! 2277: const regex_t *preg;
! 2278: re_dfastate_t *pstate;
! 2279: re_match_context_t *mctx;
! 2280: {
! 2281: reg_errcode_t err;
! 2282: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 2283: int i;
! 2284:
! 2285: for (i = 0; i < pstate->nodes.nelem; ++i)
! 2286: {
! 2287: re_node_set dest_nodes, *new_nodes;
! 2288: int cur_node_idx = pstate->nodes.elems[i];
! 2289: int naccepted = 0, dest_idx;
! 2290: unsigned int context;
! 2291: re_dfastate_t *dest_state;
! 2292:
! 2293: if (dfa->nodes[cur_node_idx].constraint)
! 2294: {
! 2295: context = re_string_context_at (mctx->input,
! 2296: re_string_cur_idx (mctx->input),
! 2297: mctx->eflags, preg->newline_anchor);
! 2298: if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
! 2299: context))
! 2300: continue;
! 2301: }
! 2302:
! 2303: /* How many bytes the node can accepts? */
! 2304: if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
! 2305: naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
! 2306: re_string_cur_idx (mctx->input));
! 2307: if (naccepted == 0)
! 2308: continue;
! 2309:
! 2310: /* The node can accepts `naccepted' bytes. */
! 2311: dest_idx = re_string_cur_idx (mctx->input) + naccepted;
! 2312: mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
! 2313: : mctx->max_mb_elem_len);
! 2314: err = clean_state_log_if_need (mctx, dest_idx);
! 2315: if (BE (err != REG_NOERROR, 0))
! 2316: return err;
! 2317: #ifdef DEBUG
! 2318: assert (dfa->nexts[cur_node_idx] != -1);
! 2319: #endif
! 2320: /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
! 2321: then we use pstate->nodes.elems[i] instead. */
! 2322: new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
! 2323:
! 2324: dest_state = mctx->state_log[dest_idx];
! 2325: if (dest_state == NULL)
! 2326: dest_nodes = *new_nodes;
! 2327: else
! 2328: {
! 2329: err = re_node_set_init_union (&dest_nodes,
! 2330: dest_state->entrance_nodes, new_nodes);
! 2331: if (BE (err != REG_NOERROR, 0))
! 2332: return err;
! 2333: }
! 2334: context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
! 2335: preg->newline_anchor);
! 2336: mctx->state_log[dest_idx]
! 2337: = re_acquire_state_context (&err, dfa, &dest_nodes, context);
! 2338: if (dest_state != NULL)
! 2339: re_node_set_free (&dest_nodes);
! 2340: if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
! 2341: return err;
! 2342: }
! 2343: return REG_NOERROR;
! 2344: }
! 2345: #endif /* RE_ENABLE_I18N */
! 2346:
! 2347: static reg_errcode_t
! 2348: transit_state_bkref (preg, nodes, mctx)
! 2349: const regex_t *preg;
! 2350: re_node_set *nodes;
! 2351: re_match_context_t *mctx;
! 2352: {
! 2353: reg_errcode_t err;
! 2354: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 2355: int i;
! 2356: int cur_str_idx = re_string_cur_idx (mctx->input);
! 2357:
! 2358: for (i = 0; i < nodes->nelem; ++i)
! 2359: {
! 2360: int dest_str_idx, prev_nelem, bkc_idx;
! 2361: int node_idx = nodes->elems[i];
! 2362: unsigned int context;
! 2363: re_token_t *node = dfa->nodes + node_idx;
! 2364: re_node_set *new_dest_nodes;
! 2365:
! 2366: /* Check whether `node' is a backreference or not. */
! 2367: if (node->type != OP_BACK_REF)
! 2368: continue;
! 2369:
! 2370: if (node->constraint)
! 2371: {
! 2372: context = re_string_context_at (mctx->input, cur_str_idx,
! 2373: mctx->eflags, preg->newline_anchor);
! 2374: if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
! 2375: continue;
! 2376: }
! 2377:
! 2378: /* `node' is a backreference.
! 2379: Check the substring which the substring matched. */
! 2380: bkc_idx = mctx->nbkref_ents;
! 2381: err = get_subexp (preg, mctx, node_idx, cur_str_idx);
! 2382: if (BE (err != REG_NOERROR, 0))
! 2383: goto free_return;
! 2384:
! 2385: /* And add the epsilon closures (which is `new_dest_nodes') of
! 2386: the backreference to appropriate state_log. */
! 2387: #ifdef DEBUG
! 2388: assert (dfa->nexts[node_idx] != -1);
! 2389: #endif
! 2390: for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
! 2391: {
! 2392: int subexp_len;
! 2393: re_dfastate_t *dest_state;
! 2394: struct re_backref_cache_entry *bkref_ent;
! 2395: bkref_ent = mctx->bkref_ents + bkc_idx;
! 2396: if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
! 2397: continue;
! 2398: subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
! 2399: new_dest_nodes = (subexp_len == 0
! 2400: ? dfa->eclosures + dfa->edests[node_idx].elems[0]
! 2401: : dfa->eclosures + dfa->nexts[node_idx]);
! 2402: dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
! 2403: - bkref_ent->subexp_from);
! 2404: context = re_string_context_at (mctx->input, dest_str_idx - 1,
! 2405: mctx->eflags, preg->newline_anchor);
! 2406: dest_state = mctx->state_log[dest_str_idx];
! 2407: prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
! 2408: : mctx->state_log[cur_str_idx]->nodes.nelem);
! 2409: /* Add `new_dest_node' to state_log. */
! 2410: if (dest_state == NULL)
! 2411: {
! 2412: mctx->state_log[dest_str_idx]
! 2413: = re_acquire_state_context (&err, dfa, new_dest_nodes,
! 2414: context);
! 2415: if (BE (mctx->state_log[dest_str_idx] == NULL
! 2416: && err != REG_NOERROR, 0))
! 2417: goto free_return;
! 2418: }
! 2419: else
! 2420: {
! 2421: re_node_set dest_nodes;
! 2422: err = re_node_set_init_union (&dest_nodes,
! 2423: dest_state->entrance_nodes,
! 2424: new_dest_nodes);
! 2425: if (BE (err != REG_NOERROR, 0))
! 2426: {
! 2427: re_node_set_free (&dest_nodes);
! 2428: goto free_return;
! 2429: }
! 2430: mctx->state_log[dest_str_idx]
! 2431: = re_acquire_state_context (&err, dfa, &dest_nodes, context);
! 2432: re_node_set_free (&dest_nodes);
! 2433: if (BE (mctx->state_log[dest_str_idx] == NULL
! 2434: && err != REG_NOERROR, 0))
! 2435: goto free_return;
! 2436: }
! 2437: /* We need to check recursively if the backreference can epsilon
! 2438: transit. */
! 2439: if (subexp_len == 0
! 2440: && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
! 2441: {
! 2442: err = check_subexp_matching_top (dfa, mctx, new_dest_nodes,
! 2443: cur_str_idx);
! 2444: if (BE (err != REG_NOERROR, 0))
! 2445: goto free_return;
! 2446: err = transit_state_bkref (preg, new_dest_nodes, mctx);
! 2447: if (BE (err != REG_NOERROR, 0))
! 2448: goto free_return;
! 2449: }
! 2450: }
! 2451: }
! 2452: err = REG_NOERROR;
! 2453: free_return:
! 2454: return err;
! 2455: }
! 2456:
! 2457: /* Enumerate all the candidates which the backreference BKREF_NODE can match
! 2458: at BKREF_STR_IDX, and register them by match_ctx_add_entry().
! 2459: Note that we might collect inappropriate candidates here.
! 2460: However, the cost of checking them strictly here is too high, then we
! 2461: delay these checking for prune_impossible_nodes(). */
! 2462:
! 2463: static reg_errcode_t
! 2464: get_subexp (preg, mctx, bkref_node, bkref_str_idx)
! 2465: const regex_t *preg;
! 2466: re_match_context_t *mctx;
! 2467: int bkref_node, bkref_str_idx;
! 2468: {
! 2469: int subexp_num, sub_top_idx;
! 2470: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 2471: char *buf = (char *) re_string_get_buffer (mctx->input);
! 2472: /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
! 2473: int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
! 2474: for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
! 2475: {
! 2476: struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
! 2477: if (entry->str_idx > bkref_str_idx)
! 2478: break;
! 2479: if (entry->node == bkref_node)
! 2480: return REG_NOERROR; /* We already checked it. */
! 2481: }
! 2482: subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
! 2483:
! 2484: /* For each sub expression */
! 2485: for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
! 2486: {
! 2487: reg_errcode_t err;
! 2488: re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
! 2489: re_sub_match_last_t *sub_last;
! 2490: int sub_last_idx, sl_str;
! 2491: char *bkref_str;
! 2492:
! 2493: if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
! 2494: continue; /* It isn't related. */
! 2495:
! 2496: sl_str = sub_top->str_idx;
! 2497: bkref_str = buf + bkref_str_idx;
! 2498: /* At first, check the last node of sub expressions we already
! 2499: evaluated. */
! 2500: for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
! 2501: {
! 2502: int sl_str_diff;
! 2503: sub_last = sub_top->lasts[sub_last_idx];
! 2504: sl_str_diff = sub_last->str_idx - sl_str;
! 2505: /* The matched string by the sub expression match with the substring
! 2506: at the back reference? */
! 2507: if (sl_str_diff > 0
! 2508: && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0)
! 2509: break; /* We don't need to search this sub expression any more. */
! 2510: bkref_str += sl_str_diff;
! 2511: sl_str += sl_str_diff;
! 2512: err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
! 2513: bkref_str_idx);
! 2514: if (err == REG_NOMATCH)
! 2515: continue;
! 2516: if (BE (err != REG_NOERROR, 0))
! 2517: return err;
! 2518: }
! 2519: if (sub_last_idx < sub_top->nlasts)
! 2520: continue;
! 2521: if (sub_last_idx > 0)
! 2522: ++sl_str;
! 2523: /* Then, search for the other last nodes of the sub expression. */
! 2524: for (; sl_str <= bkref_str_idx; ++sl_str)
! 2525: {
! 2526: int cls_node, sl_str_off;
! 2527: re_node_set *nodes;
! 2528: sl_str_off = sl_str - sub_top->str_idx;
! 2529: /* The matched string by the sub expression match with the substring
! 2530: at the back reference? */
! 2531: if (sl_str_off > 0
! 2532: && memcmp (bkref_str++, buf + sl_str - 1, 1) != 0)
! 2533: break; /* We don't need to search this sub expression any more. */
! 2534: if (mctx->state_log[sl_str] == NULL)
! 2535: continue;
! 2536: /* Does this state have a ')' of the sub expression? */
! 2537: nodes = &mctx->state_log[sl_str]->nodes;
! 2538: cls_node = find_subexp_node (dfa, nodes, subexp_num, 0);
! 2539: if (cls_node == -1)
! 2540: continue; /* No. */
! 2541: if (sub_top->path == NULL)
! 2542: {
! 2543: sub_top->path = calloc (sizeof (state_array_t),
! 2544: sl_str - sub_top->str_idx + 1);
! 2545: if (sub_top->path == NULL)
! 2546: return REG_ESPACE;
! 2547: }
! 2548: /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
! 2549: in the current context? */
! 2550: err = check_arrival (preg, mctx, sub_top->path, sub_top->node,
! 2551: sub_top->str_idx, cls_node, sl_str, 0);
! 2552: if (err == REG_NOMATCH)
! 2553: continue;
! 2554: if (BE (err != REG_NOERROR, 0))
! 2555: return err;
! 2556: sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
! 2557: if (BE (sub_last == NULL, 0))
! 2558: return REG_ESPACE;
! 2559: err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
! 2560: bkref_str_idx);
! 2561: if (err == REG_NOMATCH)
! 2562: continue;
! 2563: }
! 2564: }
! 2565: return REG_NOERROR;
! 2566: }
! 2567:
! 2568: /* Helper functions for get_subexp(). */
! 2569:
! 2570: /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
! 2571: If it can arrive, register the sub expression expressed with SUB_TOP
! 2572: and SUB_LAST. */
! 2573:
! 2574: static reg_errcode_t
! 2575: get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str)
! 2576: const regex_t *preg;
! 2577: re_match_context_t *mctx;
! 2578: re_sub_match_top_t *sub_top;
! 2579: re_sub_match_last_t *sub_last;
! 2580: int bkref_node, bkref_str;
! 2581: {
! 2582: reg_errcode_t err;
! 2583: int to_idx;
! 2584: /* Can the subexpression arrive the back reference? */
! 2585: err = check_arrival (preg, mctx, &sub_last->path, sub_last->node,
! 2586: sub_last->str_idx, bkref_node, bkref_str, 1);
! 2587: if (err != REG_NOERROR)
! 2588: return err;
! 2589: err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
! 2590: sub_last->str_idx);
! 2591: if (BE (err != REG_NOERROR, 0))
! 2592: return err;
! 2593: to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
! 2594: clean_state_log_if_need (mctx, to_idx);
! 2595: return REG_NOERROR;
! 2596: }
! 2597:
! 2598: /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
! 2599: Search '(' if FL_OPEN, or search ')' otherwise.
! 2600: TODO: This function isn't efficient...
! 2601: Because there might be more than one nodes whose types are
! 2602: OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
! 2603: nodes.
! 2604: E.g. RE: (a){2} */
! 2605:
! 2606: static int
! 2607: find_subexp_node (dfa, nodes, subexp_idx, fl_open)
! 2608: re_dfa_t *dfa;
! 2609: re_node_set *nodes;
! 2610: int subexp_idx, fl_open;
! 2611: {
! 2612: int cls_idx;
! 2613: for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
! 2614: {
! 2615: int cls_node = nodes->elems[cls_idx];
! 2616: re_token_t *node = dfa->nodes + cls_node;
! 2617: if (((fl_open && node->type == OP_OPEN_SUBEXP)
! 2618: || (!fl_open && node->type == OP_CLOSE_SUBEXP))
! 2619: && node->opr.idx == subexp_idx)
! 2620: return cls_node;
! 2621: }
! 2622: return -1;
! 2623: }
! 2624:
! 2625: /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
! 2626: LAST_NODE at LAST_STR. We record the path onto PATH since it will be
! 2627: heavily reused.
! 2628: Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
! 2629:
! 2630: static reg_errcode_t
! 2631: check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str,
! 2632: fl_open)
! 2633: const regex_t *preg;
! 2634: re_match_context_t *mctx;
! 2635: state_array_t *path;
! 2636: int top_node, top_str, last_node, last_str, fl_open;
! 2637: {
! 2638: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 2639: reg_errcode_t err;
! 2640: int subexp_num, backup_cur_idx, str_idx, null_cnt;
! 2641: re_dfastate_t *cur_state = NULL;
! 2642: re_node_set *cur_nodes, next_nodes;
! 2643: re_dfastate_t **backup_state_log;
! 2644: unsigned int context;
! 2645:
! 2646: subexp_num = dfa->nodes[top_node].opr.idx;
! 2647: /* Extend the buffer if we need. */
! 2648: if (path->alloc < last_str + mctx->max_mb_elem_len + 1)
! 2649: {
! 2650: re_dfastate_t **new_array;
! 2651: int old_alloc = path->alloc;
! 2652: path->alloc += last_str + mctx->max_mb_elem_len + 1;
! 2653: new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
! 2654: if (new_array == NULL)
! 2655: return REG_ESPACE;
! 2656: path->array = new_array;
! 2657: memset (new_array + old_alloc, '\0',
! 2658: sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
! 2659: }
! 2660:
! 2661: str_idx = path->next_idx == 0 ? top_str : path->next_idx;
! 2662:
! 2663: /* Temporary modify MCTX. */
! 2664: backup_state_log = mctx->state_log;
! 2665: backup_cur_idx = mctx->input->cur_idx;
! 2666: mctx->state_log = path->array;
! 2667: mctx->input->cur_idx = str_idx;
! 2668:
! 2669: /* Setup initial node set. */
! 2670: context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
! 2671: preg->newline_anchor);
! 2672: if (str_idx == top_str)
! 2673: {
! 2674: err = re_node_set_init_1 (&next_nodes, top_node);
! 2675: if (BE (err != REG_NOERROR, 0))
! 2676: return err;
! 2677: err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, fl_open);
! 2678: if (BE (err != REG_NOERROR, 0))
! 2679: {
! 2680: re_node_set_free (&next_nodes);
! 2681: return err;
! 2682: }
! 2683: }
! 2684: else
! 2685: {
! 2686: cur_state = mctx->state_log[str_idx];
! 2687: if (cur_state && cur_state->has_backref)
! 2688: {
! 2689: err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
! 2690: if (BE ( err != REG_NOERROR, 0))
! 2691: return err;
! 2692: }
! 2693: else
! 2694: re_node_set_init_empty (&next_nodes);
! 2695: }
! 2696: if (str_idx == top_str || (cur_state && cur_state->has_backref))
! 2697: {
! 2698: if (next_nodes.nelem)
! 2699: {
! 2700: err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
! 2701: subexp_num, fl_open);
! 2702: if (BE ( err != REG_NOERROR, 0))
! 2703: {
! 2704: re_node_set_free (&next_nodes);
! 2705: return err;
! 2706: }
! 2707: }
! 2708: cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
! 2709: if (BE (cur_state == NULL && err != REG_NOERROR, 0))
! 2710: {
! 2711: re_node_set_free (&next_nodes);
! 2712: return err;
! 2713: }
! 2714: mctx->state_log[str_idx] = cur_state;
! 2715: }
! 2716:
! 2717: for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
! 2718: {
! 2719: re_node_set_empty (&next_nodes);
! 2720: if (mctx->state_log[str_idx + 1])
! 2721: {
! 2722: err = re_node_set_merge (&next_nodes,
! 2723: &mctx->state_log[str_idx + 1]->nodes);
! 2724: if (BE (err != REG_NOERROR, 0))
! 2725: {
! 2726: re_node_set_free (&next_nodes);
! 2727: return err;
! 2728: }
! 2729: }
! 2730: if (cur_state)
! 2731: {
! 2732: err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx,
! 2733: &cur_state->nodes, &next_nodes);
! 2734: if (BE (err != REG_NOERROR, 0))
! 2735: {
! 2736: re_node_set_free (&next_nodes);
! 2737: return err;
! 2738: }
! 2739: }
! 2740: ++str_idx;
! 2741: if (next_nodes.nelem)
! 2742: {
! 2743: err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num,
! 2744: fl_open);
! 2745: if (BE (err != REG_NOERROR, 0))
! 2746: {
! 2747: re_node_set_free (&next_nodes);
! 2748: return err;
! 2749: }
! 2750: err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
! 2751: subexp_num, fl_open);
! 2752: if (BE ( err != REG_NOERROR, 0))
! 2753: {
! 2754: re_node_set_free (&next_nodes);
! 2755: return err;
! 2756: }
! 2757: }
! 2758: context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
! 2759: preg->newline_anchor);
! 2760: cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
! 2761: if (BE (cur_state == NULL && err != REG_NOERROR, 0))
! 2762: {
! 2763: re_node_set_free (&next_nodes);
! 2764: return err;
! 2765: }
! 2766: mctx->state_log[str_idx] = cur_state;
! 2767: null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
! 2768: }
! 2769: re_node_set_free (&next_nodes);
! 2770: cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
! 2771: : &mctx->state_log[last_str]->nodes);
! 2772: path->next_idx = str_idx;
! 2773:
! 2774: /* Fix MCTX. */
! 2775: mctx->state_log = backup_state_log;
! 2776: mctx->input->cur_idx = backup_cur_idx;
! 2777:
! 2778: if (cur_nodes == NULL)
! 2779: return REG_NOMATCH;
! 2780: /* Then check the current node set has the node LAST_NODE. */
! 2781: return (re_node_set_contains (cur_nodes, last_node)
! 2782: || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR
! 2783: : REG_NOMATCH);
! 2784: }
! 2785:
! 2786: /* Helper functions for check_arrival. */
! 2787:
! 2788: /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
! 2789: to NEXT_NODES.
! 2790: TODO: This function is similar to the functions transit_state*(),
! 2791: however this function has many additional works.
! 2792: Can't we unify them? */
! 2793:
! 2794: static reg_errcode_t
! 2795: check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes)
! 2796: const regex_t *preg;
! 2797: re_dfa_t *dfa;
! 2798: re_match_context_t *mctx;
! 2799: int str_idx;
! 2800: re_node_set *cur_nodes, *next_nodes;
! 2801: {
! 2802: int cur_idx;
! 2803: reg_errcode_t err;
! 2804: re_node_set union_set;
! 2805: re_node_set_init_empty (&union_set);
! 2806: for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
! 2807: {
! 2808: int naccepted = 0;
! 2809: int cur_node = cur_nodes->elems[cur_idx];
! 2810: re_token_type_t type = dfa->nodes[cur_node].type;
! 2811: if (IS_EPSILON_NODE(type))
! 2812: continue;
! 2813: #ifdef RE_ENABLE_I18N
! 2814: /* If the node may accept `multi byte'. */
! 2815: if (ACCEPT_MB_NODE (type))
! 2816: {
! 2817: naccepted = check_node_accept_bytes (preg, cur_node, mctx->input,
! 2818: str_idx);
! 2819: if (naccepted > 1)
! 2820: {
! 2821: re_dfastate_t *dest_state;
! 2822: int next_node = dfa->nexts[cur_node];
! 2823: int next_idx = str_idx + naccepted;
! 2824: dest_state = mctx->state_log[next_idx];
! 2825: re_node_set_empty (&union_set);
! 2826: if (dest_state)
! 2827: {
! 2828: err = re_node_set_merge (&union_set, &dest_state->nodes);
! 2829: if (BE (err != REG_NOERROR, 0))
! 2830: {
! 2831: re_node_set_free (&union_set);
! 2832: return err;
! 2833: }
! 2834: err = re_node_set_insert (&union_set, next_node);
! 2835: if (BE (err < 0, 0))
! 2836: {
! 2837: re_node_set_free (&union_set);
! 2838: return REG_ESPACE;
! 2839: }
! 2840: }
! 2841: else
! 2842: {
! 2843: err = re_node_set_insert (&union_set, next_node);
! 2844: if (BE (err < 0, 0))
! 2845: {
! 2846: re_node_set_free (&union_set);
! 2847: return REG_ESPACE;
! 2848: }
! 2849: }
! 2850: mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
! 2851: &union_set);
! 2852: if (BE (mctx->state_log[next_idx] == NULL
! 2853: && err != REG_NOERROR, 0))
! 2854: {
! 2855: re_node_set_free (&union_set);
! 2856: return err;
! 2857: }
! 2858: }
! 2859: }
! 2860: #endif /* RE_ENABLE_I18N */
! 2861: if (naccepted
! 2862: || check_node_accept (preg, dfa->nodes + cur_node, mctx,
! 2863: str_idx))
! 2864: {
! 2865: err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
! 2866: if (BE (err < 0, 0))
! 2867: {
! 2868: re_node_set_free (&union_set);
! 2869: return REG_ESPACE;
! 2870: }
! 2871: }
! 2872: }
! 2873: re_node_set_free (&union_set);
! 2874: return REG_NOERROR;
! 2875: }
! 2876:
! 2877: /* For all the nodes in CUR_NODES, add the epsilon closures of them to
! 2878: CUR_NODES, however exclude the nodes which are:
! 2879: - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
! 2880: - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
! 2881: */
! 2882:
! 2883: static reg_errcode_t
! 2884: check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, fl_open)
! 2885: re_dfa_t *dfa;
! 2886: re_node_set *cur_nodes;
! 2887: int ex_subexp, fl_open;
! 2888: {
! 2889: reg_errcode_t err;
! 2890: int idx, outside_node;
! 2891: re_node_set new_nodes;
! 2892: #ifdef DEBUG
! 2893: assert (cur_nodes->nelem);
! 2894: #endif
! 2895: err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
! 2896: if (BE (err != REG_NOERROR, 0))
! 2897: return err;
! 2898: /* Create a new node set NEW_NODES with the nodes which are epsilon
! 2899: closures of the node in CUR_NODES. */
! 2900:
! 2901: for (idx = 0; idx < cur_nodes->nelem; ++idx)
! 2902: {
! 2903: int cur_node = cur_nodes->elems[idx];
! 2904: re_node_set *eclosure = dfa->eclosures + cur_node;
! 2905: outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open);
! 2906: if (outside_node == -1)
! 2907: {
! 2908: /* There are no problematic nodes, just merge them. */
! 2909: err = re_node_set_merge (&new_nodes, eclosure);
! 2910: if (BE (err != REG_NOERROR, 0))
! 2911: {
! 2912: re_node_set_free (&new_nodes);
! 2913: return err;
! 2914: }
! 2915: }
! 2916: else
! 2917: {
! 2918: /* There are problematic nodes, re-calculate incrementally. */
! 2919: err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
! 2920: ex_subexp, fl_open);
! 2921: if (BE (err != REG_NOERROR, 0))
! 2922: {
! 2923: re_node_set_free (&new_nodes);
! 2924: return err;
! 2925: }
! 2926: }
! 2927: }
! 2928: re_node_set_free (cur_nodes);
! 2929: *cur_nodes = new_nodes;
! 2930: return REG_NOERROR;
! 2931: }
! 2932:
! 2933: /* Helper function for check_arrival_expand_ecl.
! 2934: Check incrementally the epsilon closure of TARGET, and if it isn't
! 2935: problematic append it to DST_NODES. */
! 2936:
! 2937: static reg_errcode_t
! 2938: check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, fl_open)
! 2939: re_dfa_t *dfa;
! 2940: int target, ex_subexp, fl_open;
! 2941: re_node_set *dst_nodes;
! 2942: {
! 2943: int cur_node, type;
! 2944: for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
! 2945: {
! 2946: int err;
! 2947: type = dfa->nodes[cur_node].type;
! 2948:
! 2949: if (((type == OP_OPEN_SUBEXP && fl_open)
! 2950: || (type == OP_CLOSE_SUBEXP && !fl_open))
! 2951: && dfa->nodes[cur_node].opr.idx == ex_subexp)
! 2952: {
! 2953: if (!fl_open)
! 2954: {
! 2955: err = re_node_set_insert (dst_nodes, cur_node);
! 2956: if (BE (err == -1, 0))
! 2957: return REG_ESPACE;
! 2958: }
! 2959: break;
! 2960: }
! 2961: err = re_node_set_insert (dst_nodes, cur_node);
! 2962: if (BE (err == -1, 0))
! 2963: return REG_ESPACE;
! 2964: if (dfa->edests[cur_node].nelem == 0)
! 2965: break;
! 2966: if (dfa->edests[cur_node].nelem == 2)
! 2967: {
! 2968: err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
! 2969: dfa->edests[cur_node].elems[1],
! 2970: ex_subexp, fl_open);
! 2971: if (BE (err != REG_NOERROR, 0))
! 2972: return err;
! 2973: }
! 2974: cur_node = dfa->edests[cur_node].elems[0];
! 2975: }
! 2976: return REG_NOERROR;
! 2977: }
! 2978:
! 2979:
! 2980: /* For all the back references in the current state, calculate the
! 2981: destination of the back references by the appropriate entry
! 2982: in MCTX->BKREF_ENTS. */
! 2983:
! 2984: static reg_errcode_t
! 2985: expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
! 2986: fl_open)
! 2987: const regex_t *preg;
! 2988: re_match_context_t *mctx;
! 2989: int cur_str, last_str, subexp_num, fl_open;
! 2990: re_node_set *cur_nodes;
! 2991: {
! 2992: reg_errcode_t err;
! 2993: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 2994: int cache_idx, cache_idx_start;
! 2995: /* The current state. */
! 2996:
! 2997: cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
! 2998: for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
! 2999: {
! 3000: int to_idx, next_node;
! 3001: struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
! 3002: if (ent->str_idx > cur_str)
! 3003: break;
! 3004: /* Is this entry ENT is appropriate? */
! 3005: if (!re_node_set_contains (cur_nodes, ent->node))
! 3006: continue; /* No. */
! 3007:
! 3008: to_idx = cur_str + ent->subexp_to - ent->subexp_from;
! 3009: /* Calculate the destination of the back reference, and append it
! 3010: to MCTX->STATE_LOG. */
! 3011: if (to_idx == cur_str)
! 3012: {
! 3013: /* The backreference did epsilon transit, we must re-check all the
! 3014: node in the current state. */
! 3015: re_node_set new_dests;
! 3016: reg_errcode_t err2, err3;
! 3017: next_node = dfa->edests[ent->node].elems[0];
! 3018: if (re_node_set_contains (cur_nodes, next_node))
! 3019: continue;
! 3020: err = re_node_set_init_1 (&new_dests, next_node);
! 3021: err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num,
! 3022: fl_open);
! 3023: err3 = re_node_set_merge (cur_nodes, &new_dests);
! 3024: re_node_set_free (&new_dests);
! 3025: if (BE (err != REG_NOERROR || err2 != REG_NOERROR
! 3026: || err3 != REG_NOERROR, 0))
! 3027: {
! 3028: err = (err != REG_NOERROR ? err
! 3029: : (err2 != REG_NOERROR ? err2 : err3));
! 3030: return err;
! 3031: }
! 3032: /* TODO: It is still inefficient... */
! 3033: cache_idx = cache_idx_start - 1;
! 3034: continue;
! 3035: }
! 3036: else
! 3037: {
! 3038: re_node_set union_set;
! 3039: next_node = dfa->nexts[ent->node];
! 3040: if (mctx->state_log[to_idx])
! 3041: {
! 3042: int ret;
! 3043: if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
! 3044: next_node))
! 3045: continue;
! 3046: err = re_node_set_init_copy (&union_set,
! 3047: &mctx->state_log[to_idx]->nodes);
! 3048: ret = re_node_set_insert (&union_set, next_node);
! 3049: if (BE (err != REG_NOERROR || ret < 0, 0))
! 3050: {
! 3051: re_node_set_free (&union_set);
! 3052: err = err != REG_NOERROR ? err : REG_ESPACE;
! 3053: return err;
! 3054: }
! 3055: }
! 3056: else
! 3057: {
! 3058: err = re_node_set_init_1 (&union_set, next_node);
! 3059: if (BE (err != REG_NOERROR, 0))
! 3060: return err;
! 3061: }
! 3062: mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
! 3063: re_node_set_free (&union_set);
! 3064: if (BE (mctx->state_log[to_idx] == NULL
! 3065: && err != REG_NOERROR, 0))
! 3066: return err;
! 3067: }
! 3068: }
! 3069: return REG_NOERROR;
! 3070: }
! 3071:
! 3072: /* Build transition table for the state.
! 3073: Return the new table if succeeded, otherwise return NULL. */
! 3074:
! 3075: static re_dfastate_t **
! 3076: build_trtable (preg, state, fl_search)
! 3077: const regex_t *preg;
! 3078: const re_dfastate_t *state;
! 3079: int fl_search;
! 3080: {
! 3081: reg_errcode_t err;
! 3082: re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 3083: int i, j, k, ch;
! 3084: int dests_node_malloced = 0, dest_states_malloced = 0;
! 3085: int ndests; /* Number of the destination states from `state'. */
! 3086: re_dfastate_t **trtable;
! 3087: re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
! 3088: re_node_set follows, *dests_node;
! 3089: bitset *dests_ch;
! 3090: bitset acceptable;
! 3091:
! 3092: /* We build DFA states which corresponds to the destination nodes
! 3093: from `state'. `dests_node[i]' represents the nodes which i-th
! 3094: destination state contains, and `dests_ch[i]' represents the
! 3095: characters which i-th destination state accepts. */
! 3096: #ifdef _LIBC
! 3097: if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
! 3098: dests_node = (re_node_set *)
! 3099: alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
! 3100: else
! 3101: #endif
! 3102: {
! 3103: dests_node = (re_node_set *)
! 3104: malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
! 3105: if (BE (dests_node == NULL, 0))
! 3106: return NULL;
! 3107: dests_node_malloced = 1;
! 3108: }
! 3109: dests_ch = (bitset *) (dests_node + SBC_MAX);
! 3110:
! 3111: /* Initialize transiton table. */
! 3112: trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
! 3113: if (BE (trtable == NULL, 0))
! 3114: {
! 3115: if (dests_node_malloced)
! 3116: free (dests_node);
! 3117: return NULL;
! 3118: }
! 3119:
! 3120: /* At first, group all nodes belonging to `state' into several
! 3121: destinations. */
! 3122: ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
! 3123: if (BE (ndests <= 0, 0))
! 3124: {
! 3125: if (dests_node_malloced)
! 3126: free (dests_node);
! 3127: /* Return NULL in case of an error, trtable otherwise. */
! 3128: if (ndests == 0)
! 3129: return trtable;
! 3130: free (trtable);
! 3131: return NULL;
! 3132: }
! 3133:
! 3134: err = re_node_set_alloc (&follows, ndests + 1);
! 3135: if (BE (err != REG_NOERROR, 0))
! 3136: goto out_free;
! 3137:
! 3138: #ifdef _LIBC
! 3139: if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
! 3140: + ndests * 3 * sizeof (re_dfastate_t *)))
! 3141: dest_states = (re_dfastate_t **)
! 3142: alloca (ndests * 3 * sizeof (re_dfastate_t *));
! 3143: else
! 3144: #endif
! 3145: {
! 3146: dest_states = (re_dfastate_t **)
! 3147: malloc (ndests * 3 * sizeof (re_dfastate_t *));
! 3148: if (BE (dest_states == NULL, 0))
! 3149: {
! 3150: out_free:
! 3151: if (dest_states_malloced)
! 3152: free (dest_states);
! 3153: re_node_set_free (&follows);
! 3154: for (i = 0; i < ndests; ++i)
! 3155: re_node_set_free (dests_node + i);
! 3156: free (trtable);
! 3157: if (dests_node_malloced)
! 3158: free (dests_node);
! 3159: return NULL;
! 3160: }
! 3161: dest_states_malloced = 1;
! 3162: }
! 3163: dest_states_word = dest_states + ndests;
! 3164: dest_states_nl = dest_states_word + ndests;
! 3165: bitset_empty (acceptable);
! 3166:
! 3167: /* Then build the states for all destinations. */
! 3168: for (i = 0; i < ndests; ++i)
! 3169: {
! 3170: int next_node;
! 3171: re_node_set_empty (&follows);
! 3172: /* Merge the follows of this destination states. */
! 3173: for (j = 0; j < dests_node[i].nelem; ++j)
! 3174: {
! 3175: next_node = dfa->nexts[dests_node[i].elems[j]];
! 3176: if (next_node != -1)
! 3177: {
! 3178: err = re_node_set_merge (&follows, dfa->eclosures + next_node);
! 3179: if (BE (err != REG_NOERROR, 0))
! 3180: goto out_free;
! 3181: }
! 3182: }
! 3183: /* If search flag is set, merge the initial state. */
! 3184: if (fl_search)
! 3185: {
! 3186: #ifdef RE_ENABLE_I18N
! 3187: int not_initial = 0;
! 3188: for (j = 0; j < follows.nelem; ++j)
! 3189: if (dfa->nodes[follows.elems[j]].type == CHARACTER)
! 3190: {
! 3191: not_initial = dfa->nodes[follows.elems[j]].mb_partial;
! 3192: break;
! 3193: }
! 3194: if (!not_initial)
! 3195: #endif
! 3196: {
! 3197: err = re_node_set_merge (&follows,
! 3198: dfa->init_state->entrance_nodes);
! 3199: if (BE (err != REG_NOERROR, 0))
! 3200: goto out_free;
! 3201: }
! 3202: }
! 3203: dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
! 3204: if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
! 3205: goto out_free;
! 3206: /* If the new state has context constraint,
! 3207: build appropriate states for these contexts. */
! 3208: if (dest_states[i]->has_constraint)
! 3209: {
! 3210: dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
! 3211: CONTEXT_WORD);
! 3212: if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
! 3213: goto out_free;
! 3214: dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
! 3215: CONTEXT_NEWLINE);
! 3216: if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
! 3217: goto out_free;
! 3218: }
! 3219: else
! 3220: {
! 3221: dest_states_word[i] = dest_states[i];
! 3222: dest_states_nl[i] = dest_states[i];
! 3223: }
! 3224: bitset_merge (acceptable, dests_ch[i]);
! 3225: }
! 3226:
! 3227: /* Update the transition table. */
! 3228: /* For all characters ch...: */
! 3229: for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
! 3230: for (j = 0; j < UINT_BITS; ++j, ++ch)
! 3231: if ((acceptable[i] >> j) & 1)
! 3232: {
! 3233: /* The current state accepts the character ch. */
! 3234: if (IS_WORD_CHAR (ch))
! 3235: {
! 3236: for (k = 0; k < ndests; ++k)
! 3237: if ((dests_ch[k][i] >> j) & 1)
! 3238: {
! 3239: /* k-th destination accepts the word character ch. */
! 3240: trtable[ch] = dest_states_word[k];
! 3241: /* There must be only one destination which accepts
! 3242: character ch. See group_nodes_into_DFAstates. */
! 3243: break;
! 3244: }
! 3245: }
! 3246: else /* not WORD_CHAR */
! 3247: {
! 3248: for (k = 0; k < ndests; ++k)
! 3249: if ((dests_ch[k][i] >> j) & 1)
! 3250: {
! 3251: /* k-th destination accepts the non-word character ch. */
! 3252: trtable[ch] = dest_states[k];
! 3253: /* There must be only one destination which accepts
! 3254: character ch. See group_nodes_into_DFAstates. */
! 3255: break;
! 3256: }
! 3257: }
! 3258: }
! 3259: /* new line */
! 3260: if (bitset_contain (acceptable, NEWLINE_CHAR))
! 3261: {
! 3262: /* The current state accepts newline character. */
! 3263: for (k = 0; k < ndests; ++k)
! 3264: if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
! 3265: {
! 3266: /* k-th destination accepts newline character. */
! 3267: trtable[NEWLINE_CHAR] = dest_states_nl[k];
! 3268: /* There must be only one destination which accepts
! 3269: newline. See group_nodes_into_DFAstates. */
! 3270: break;
! 3271: }
! 3272: }
! 3273:
! 3274: if (dest_states_malloced)
! 3275: free (dest_states);
! 3276:
! 3277: re_node_set_free (&follows);
! 3278: for (i = 0; i < ndests; ++i)
! 3279: re_node_set_free (dests_node + i);
! 3280:
! 3281: if (dests_node_malloced)
! 3282: free (dests_node);
! 3283:
! 3284: return trtable;
! 3285: }
! 3286:
! 3287: /* Group all nodes belonging to STATE into several destinations.
! 3288: Then for all destinations, set the nodes belonging to the destination
! 3289: to DESTS_NODE[i] and set the characters accepted by the destination
! 3290: to DEST_CH[i]. This function return the number of destinations. */
! 3291:
! 3292: static int
! 3293: group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
! 3294: const regex_t *preg;
! 3295: const re_dfastate_t *state;
! 3296: re_node_set *dests_node;
! 3297: bitset *dests_ch;
! 3298: {
! 3299: reg_errcode_t err;
! 3300: const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 3301: int i, j, k;
! 3302: int ndests; /* Number of the destinations from `state'. */
! 3303: bitset accepts; /* Characters a node can accept. */
! 3304: const re_node_set *cur_nodes = &state->nodes;
! 3305: bitset_empty (accepts);
! 3306: ndests = 0;
! 3307:
! 3308: /* For all the nodes belonging to `state', */
! 3309: for (i = 0; i < cur_nodes->nelem; ++i)
! 3310: {
! 3311: re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
! 3312: re_token_type_t type = node->type;
! 3313: unsigned int constraint = node->constraint;
! 3314:
! 3315: /* Enumerate all single byte character this node can accept. */
! 3316: if (type == CHARACTER)
! 3317: bitset_set (accepts, node->opr.c);
! 3318: else if (type == SIMPLE_BRACKET)
! 3319: {
! 3320: bitset_merge (accepts, node->opr.sbcset);
! 3321: }
! 3322: else if (type == OP_PERIOD)
! 3323: {
! 3324: bitset_set_all (accepts);
! 3325: if (!(preg->syntax & RE_DOT_NEWLINE))
! 3326: bitset_clear (accepts, '\n');
! 3327: if (preg->syntax & RE_DOT_NOT_NULL)
! 3328: bitset_clear (accepts, '\0');
! 3329: }
! 3330: else
! 3331: continue;
! 3332:
! 3333: /* Check the `accepts' and sift the characters which are not
! 3334: match it the context. */
! 3335: if (constraint)
! 3336: {
! 3337: if (constraint & NEXT_WORD_CONSTRAINT)
! 3338: for (j = 0; j < BITSET_UINTS; ++j)
! 3339: accepts[j] &= dfa->word_char[j];
! 3340: if (constraint & NEXT_NOTWORD_CONSTRAINT)
! 3341: for (j = 0; j < BITSET_UINTS; ++j)
! 3342: accepts[j] &= ~dfa->word_char[j];
! 3343: if (constraint & NEXT_NEWLINE_CONSTRAINT)
! 3344: {
! 3345: int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
! 3346: bitset_empty (accepts);
! 3347: if (accepts_newline)
! 3348: bitset_set (accepts, NEWLINE_CHAR);
! 3349: else
! 3350: continue;
! 3351: }
! 3352: }
! 3353:
! 3354: /* Then divide `accepts' into DFA states, or create a new
! 3355: state. */
! 3356: for (j = 0; j < ndests; ++j)
! 3357: {
! 3358: bitset intersec; /* Intersection sets, see below. */
! 3359: bitset remains;
! 3360: /* Flags, see below. */
! 3361: int has_intersec, not_subset, not_consumed;
! 3362:
! 3363: /* Optimization, skip if this state doesn't accept the character. */
! 3364: if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
! 3365: continue;
! 3366:
! 3367: /* Enumerate the intersection set of this state and `accepts'. */
! 3368: has_intersec = 0;
! 3369: for (k = 0; k < BITSET_UINTS; ++k)
! 3370: has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
! 3371: /* And skip if the intersection set is empty. */
! 3372: if (!has_intersec)
! 3373: continue;
! 3374:
! 3375: /* Then check if this state is a subset of `accepts'. */
! 3376: not_subset = not_consumed = 0;
! 3377: for (k = 0; k < BITSET_UINTS; ++k)
! 3378: {
! 3379: not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
! 3380: not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
! 3381: }
! 3382:
! 3383: /* If this state isn't a subset of `accepts', create a
! 3384: new group state, which has the `remains'. */
! 3385: if (not_subset)
! 3386: {
! 3387: bitset_copy (dests_ch[ndests], remains);
! 3388: bitset_copy (dests_ch[j], intersec);
! 3389: err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
! 3390: if (BE (err != REG_NOERROR, 0))
! 3391: goto error_return;
! 3392: ++ndests;
! 3393: }
! 3394:
! 3395: /* Put the position in the current group. */
! 3396: err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
! 3397: if (BE (err < 0, 0))
! 3398: goto error_return;
! 3399:
! 3400: /* If all characters are consumed, go to next node. */
! 3401: if (!not_consumed)
! 3402: break;
! 3403: }
! 3404: /* Some characters remain, create a new group. */
! 3405: if (j == ndests)
! 3406: {
! 3407: bitset_copy (dests_ch[ndests], accepts);
! 3408: err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
! 3409: if (BE (err != REG_NOERROR, 0))
! 3410: goto error_return;
! 3411: ++ndests;
! 3412: bitset_empty (accepts);
! 3413: }
! 3414: }
! 3415: return ndests;
! 3416: error_return:
! 3417: for (j = 0; j < ndests; ++j)
! 3418: re_node_set_free (dests_node + j);
! 3419: return -1;
! 3420: }
! 3421:
! 3422: #ifdef RE_ENABLE_I18N
! 3423: /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
! 3424: Return the number of the bytes the node accepts.
! 3425: STR_IDX is the current index of the input string.
! 3426:
! 3427: This function handles the nodes which can accept one character, or
! 3428: one collating element like '.', '[a-z]', opposite to the other nodes
! 3429: can only accept one byte. */
! 3430:
! 3431: static int
! 3432: check_node_accept_bytes (preg, node_idx, input, str_idx)
! 3433: const regex_t *preg;
! 3434: int node_idx, str_idx;
! 3435: const re_string_t *input;
! 3436: {
! 3437: const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
! 3438: const re_token_t *node = dfa->nodes + node_idx;
! 3439: int elem_len = re_string_elem_size_at (input, str_idx);
! 3440: int char_len = re_string_char_size_at (input, str_idx);
! 3441: int i;
! 3442: # ifdef _LIBC
! 3443: int j;
! 3444: uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
! 3445: # endif /* _LIBC */
! 3446: if (elem_len <= 1 && char_len <= 1)
! 3447: return 0;
! 3448: if (node->type == OP_PERIOD)
! 3449: {
! 3450: /* '.' accepts any one character except the following two cases. */
! 3451: if ((!(preg->syntax & RE_DOT_NEWLINE) &&
! 3452: re_string_byte_at (input, str_idx) == '\n') ||
! 3453: ((preg->syntax & RE_DOT_NOT_NULL) &&
! 3454: re_string_byte_at (input, str_idx) == '\0'))
! 3455: return 0;
! 3456: return char_len;
! 3457: }
! 3458: else if (node->type == COMPLEX_BRACKET)
! 3459: {
! 3460: const re_charset_t *cset = node->opr.mbcset;
! 3461: # ifdef _LIBC
! 3462: const unsigned char *pin = ((char *) re_string_get_buffer (input)
! 3463: + str_idx);
! 3464: # endif /* _LIBC */
! 3465: int match_len = 0;
! 3466: wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
! 3467: ? re_string_wchar_at (input, str_idx) : 0);
! 3468:
! 3469: /* match with multibyte character? */
! 3470: for (i = 0; i < cset->nmbchars; ++i)
! 3471: if (wc == cset->mbchars[i])
! 3472: {
! 3473: match_len = char_len;
! 3474: goto check_node_accept_bytes_match;
! 3475: }
! 3476: /* match with character_class? */
! 3477: for (i = 0; i < cset->nchar_classes; ++i)
! 3478: {
! 3479: wctype_t wt = cset->char_classes[i];
! 3480: if (__iswctype (wc, wt))
! 3481: {
! 3482: match_len = char_len;
! 3483: goto check_node_accept_bytes_match;
! 3484: }
! 3485: }
! 3486:
! 3487: # ifdef _LIBC
! 3488: if (nrules != 0)
! 3489: {
! 3490: unsigned int in_collseq = 0;
! 3491: const int32_t *table, *indirect;
! 3492: const unsigned char *weights, *extra;
! 3493: const char *collseqwc;
! 3494: int32_t idx;
! 3495: /* This #include defines a local function! */
! 3496: # include <locale/weight.h>
! 3497:
! 3498: /* match with collating_symbol? */
! 3499: if (cset->ncoll_syms)
! 3500: extra = (const unsigned char *)
! 3501: _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
! 3502: for (i = 0; i < cset->ncoll_syms; ++i)
! 3503: {
! 3504: const unsigned char *coll_sym = extra + cset->coll_syms[i];
! 3505: /* Compare the length of input collating element and
! 3506: the length of current collating element. */
! 3507: if (*coll_sym != elem_len)
! 3508: continue;
! 3509: /* Compare each bytes. */
! 3510: for (j = 0; j < *coll_sym; j++)
! 3511: if (pin[j] != coll_sym[1 + j])
! 3512: break;
! 3513: if (j == *coll_sym)
! 3514: {
! 3515: /* Match if every bytes is equal. */
! 3516: match_len = j;
! 3517: goto check_node_accept_bytes_match;
! 3518: }
! 3519: }
! 3520:
! 3521: if (cset->nranges)
! 3522: {
! 3523: if (elem_len <= char_len)
! 3524: {
! 3525: collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
! 3526: in_collseq = collseq_table_lookup (collseqwc, wc);
! 3527: }
! 3528: else
! 3529: in_collseq = find_collation_sequence_value (pin, elem_len);
! 3530: }
! 3531: /* match with range expression? */
! 3532: for (i = 0; i < cset->nranges; ++i)
! 3533: if (cset->range_starts[i] <= in_collseq
! 3534: && in_collseq <= cset->range_ends[i])
! 3535: {
! 3536: match_len = elem_len;
! 3537: goto check_node_accept_bytes_match;
! 3538: }
! 3539:
! 3540: /* match with equivalence_class? */
! 3541: if (cset->nequiv_classes)
! 3542: {
! 3543: const unsigned char *cp = pin;
! 3544: table = (const int32_t *)
! 3545: _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
! 3546: weights = (const unsigned char *)
! 3547: _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
! 3548: extra = (const unsigned char *)
! 3549: _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
! 3550: indirect = (const int32_t *)
! 3551: _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
! 3552: idx = findidx (&cp);
! 3553: if (idx > 0)
! 3554: for (i = 0; i < cset->nequiv_classes; ++i)
! 3555: {
! 3556: int32_t equiv_class_idx = cset->equiv_classes[i];
! 3557: size_t weight_len = weights[idx];
! 3558: if (weight_len == weights[equiv_class_idx])
! 3559: {
! 3560: int cnt = 0;
! 3561: while (cnt <= weight_len
! 3562: && (weights[equiv_class_idx + 1 + cnt]
! 3563: == weights[idx + 1 + cnt]))
! 3564: ++cnt;
! 3565: if (cnt > weight_len)
! 3566: {
! 3567: match_len = elem_len;
! 3568: goto check_node_accept_bytes_match;
! 3569: }
! 3570: }
! 3571: }
! 3572: }
! 3573: }
! 3574: else
! 3575: # endif /* _LIBC */
! 3576: {
! 3577: /* match with range expression? */
! 3578: #if __GNUC__ >= 2
! 3579: wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
! 3580: #else
! 3581: wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
! 3582: cmp_buf[2] = wc;
! 3583: #endif
! 3584: for (i = 0; i < cset->nranges; ++i)
! 3585: {
! 3586: cmp_buf[0] = cset->range_starts[i];
! 3587: cmp_buf[4] = cset->range_ends[i];
! 3588: if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
! 3589: && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
! 3590: {
! 3591: match_len = char_len;
! 3592: goto check_node_accept_bytes_match;
! 3593: }
! 3594: }
! 3595: }
! 3596: check_node_accept_bytes_match:
! 3597: if (!cset->non_match)
! 3598: return match_len;
! 3599: else
! 3600: {
! 3601: if (match_len > 0)
! 3602: return 0;
! 3603: else
! 3604: return (elem_len > char_len) ? elem_len : char_len;
! 3605: }
! 3606: }
! 3607: return 0;
! 3608: }
! 3609:
! 3610: # ifdef _LIBC
! 3611: static unsigned int
! 3612: find_collation_sequence_value (mbs, mbs_len)
! 3613: const unsigned char *mbs;
! 3614: size_t mbs_len;
! 3615: {
! 3616: uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
! 3617: if (nrules == 0)
! 3618: {
! 3619: if (mbs_len == 1)
! 3620: {
! 3621: /* No valid character. Match it as a single byte character. */
! 3622: const unsigned char *collseq = (const unsigned char *)
! 3623: _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
! 3624: return collseq[mbs[0]];
! 3625: }
! 3626: return UINT_MAX;
! 3627: }
! 3628: else
! 3629: {
! 3630: int32_t idx;
! 3631: const unsigned char *extra = (const unsigned char *)
! 3632: _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
! 3633:
! 3634: for (idx = 0; ;)
! 3635: {
! 3636: int mbs_cnt, found = 0;
! 3637: int32_t elem_mbs_len;
! 3638: /* Skip the name of collating element name. */
! 3639: idx = idx + extra[idx] + 1;
! 3640: elem_mbs_len = extra[idx++];
! 3641: if (mbs_len == elem_mbs_len)
! 3642: {
! 3643: for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
! 3644: if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
! 3645: break;
! 3646: if (mbs_cnt == elem_mbs_len)
! 3647: /* Found the entry. */
! 3648: found = 1;
! 3649: }
! 3650: /* Skip the byte sequence of the collating element. */
! 3651: idx += elem_mbs_len;
! 3652: /* Adjust for the alignment. */
! 3653: idx = (idx + 3) & ~3;
! 3654: /* Skip the collation sequence value. */
! 3655: idx += sizeof (uint32_t);
! 3656: /* Skip the wide char sequence of the collating element. */
! 3657: idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
! 3658: /* If we found the entry, return the sequence value. */
! 3659: if (found)
! 3660: return *(uint32_t *) (extra + idx);
! 3661: /* Skip the collation sequence value. */
! 3662: idx += sizeof (uint32_t);
! 3663: }
! 3664: }
! 3665: }
! 3666: # endif /* _LIBC */
! 3667: #endif /* RE_ENABLE_I18N */
! 3668:
! 3669: /* Check whether the node accepts the byte which is IDX-th
! 3670: byte of the INPUT. */
! 3671:
! 3672: static int
! 3673: check_node_accept (preg, node, mctx, idx)
! 3674: const regex_t *preg;
! 3675: const re_token_t *node;
! 3676: const re_match_context_t *mctx;
! 3677: int idx;
! 3678: {
! 3679: unsigned char ch;
! 3680: if (node->constraint)
! 3681: {
! 3682: /* The node has constraints. Check whether the current context
! 3683: satisfies the constraints. */
! 3684: unsigned int context = re_string_context_at (mctx->input, idx,
! 3685: mctx->eflags,
! 3686: preg->newline_anchor);
! 3687: if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
! 3688: return 0;
! 3689: }
! 3690: ch = re_string_byte_at (mctx->input, idx);
! 3691: if (node->type == CHARACTER)
! 3692: return node->opr.c == ch;
! 3693: else if (node->type == SIMPLE_BRACKET)
! 3694: return bitset_contain (node->opr.sbcset, ch);
! 3695: else if (node->type == OP_PERIOD)
! 3696: return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
! 3697: || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
! 3698: else
! 3699: return 0;
! 3700: }
! 3701:
! 3702: /* Extend the buffers, if the buffers have run out. */
! 3703:
! 3704: static reg_errcode_t
! 3705: extend_buffers (mctx)
! 3706: re_match_context_t *mctx;
! 3707: {
! 3708: reg_errcode_t ret;
! 3709: re_string_t *pstr = mctx->input;
! 3710:
! 3711: /* Double the lengthes of the buffers. */
! 3712: ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
! 3713: if (BE (ret != REG_NOERROR, 0))
! 3714: return ret;
! 3715:
! 3716: if (mctx->state_log != NULL)
! 3717: {
! 3718: /* And double the length of state_log. */
! 3719: re_dfastate_t **new_array;
! 3720: new_array = re_realloc (mctx->state_log, re_dfastate_t *,
! 3721: pstr->bufs_len * 2);
! 3722: if (BE (new_array == NULL, 0))
! 3723: return REG_ESPACE;
! 3724: mctx->state_log = new_array;
! 3725: }
! 3726:
! 3727: /* Then reconstruct the buffers. */
! 3728: if (pstr->icase)
! 3729: {
! 3730: #ifdef RE_ENABLE_I18N
! 3731: if (MB_CUR_MAX > 1)
! 3732: build_wcs_upper_buffer (pstr);
! 3733: else
! 3734: #endif /* RE_ENABLE_I18N */
! 3735: build_upper_buffer (pstr);
! 3736: }
! 3737: else
! 3738: {
! 3739: #ifdef RE_ENABLE_I18N
! 3740: if (MB_CUR_MAX > 1)
! 3741: build_wcs_buffer (pstr);
! 3742: else
! 3743: #endif /* RE_ENABLE_I18N */
! 3744: {
! 3745: if (pstr->trans != NULL)
! 3746: re_string_translate_buffer (pstr);
! 3747: else
! 3748: pstr->valid_len = pstr->bufs_len;
! 3749: }
! 3750: }
! 3751: return REG_NOERROR;
! 3752: }
! 3753:
! 3754:
! 3755: /* Functions for matching context. */
! 3756:
! 3757: /* Initialize MCTX. */
! 3758:
! 3759: static reg_errcode_t
! 3760: match_ctx_init (mctx, eflags, input, n)
! 3761: re_match_context_t *mctx;
! 3762: int eflags, n;
! 3763: re_string_t *input;
! 3764: {
! 3765: mctx->eflags = eflags;
! 3766: mctx->input = input;
! 3767: mctx->match_last = -1;
! 3768: if (n > 0)
! 3769: {
! 3770: mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
! 3771: mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
! 3772: if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
! 3773: return REG_ESPACE;
! 3774: }
! 3775: else
! 3776: mctx->bkref_ents = NULL;
! 3777: mctx->nbkref_ents = 0;
! 3778: mctx->abkref_ents = n;
! 3779: mctx->max_mb_elem_len = 1;
! 3780: mctx->nsub_tops = 0;
! 3781: mctx->asub_tops = n;
! 3782: return REG_NOERROR;
! 3783: }
! 3784:
! 3785: /* Clean the entries which depend on the current input in MCTX.
! 3786: This function must be invoked when the matcher changes the start index
! 3787: of the input, or changes the input string. */
! 3788:
! 3789: static void
! 3790: match_ctx_clean (mctx)
! 3791: re_match_context_t *mctx;
! 3792: {
! 3793: match_ctx_free_subtops (mctx);
! 3794: mctx->nsub_tops = 0;
! 3795: mctx->nbkref_ents = 0;
! 3796: }
! 3797:
! 3798: /* Free all the memory associated with MCTX. */
! 3799:
! 3800: static void
! 3801: match_ctx_free (mctx)
! 3802: re_match_context_t *mctx;
! 3803: {
! 3804: match_ctx_free_subtops (mctx);
! 3805: re_free (mctx->sub_tops);
! 3806: re_free (mctx->bkref_ents);
! 3807: }
! 3808:
! 3809: /* Free all the memory associated with MCTX->SUB_TOPS. */
! 3810:
! 3811: static void
! 3812: match_ctx_free_subtops (mctx)
! 3813: re_match_context_t *mctx;
! 3814: {
! 3815: int st_idx;
! 3816: for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
! 3817: {
! 3818: int sl_idx;
! 3819: re_sub_match_top_t *top = mctx->sub_tops[st_idx];
! 3820: for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
! 3821: {
! 3822: re_sub_match_last_t *last = top->lasts[sl_idx];
! 3823: re_free (last->path.array);
! 3824: re_free (last);
! 3825: }
! 3826: re_free (top->lasts);
! 3827: if (top->path)
! 3828: {
! 3829: re_free (top->path->array);
! 3830: re_free (top->path);
! 3831: }
! 3832: free (top);
! 3833: }
! 3834: }
! 3835:
! 3836: /* Add a new backreference entry to MCTX.
! 3837: Note that we assume that caller never call this function with duplicate
! 3838: entry, and call with STR_IDX which isn't smaller than any existing entry.
! 3839: */
! 3840:
! 3841: static reg_errcode_t
! 3842: match_ctx_add_entry (mctx, node, str_idx, from, to)
! 3843: re_match_context_t *mctx;
! 3844: int node, str_idx, from, to;
! 3845: {
! 3846: if (mctx->nbkref_ents >= mctx->abkref_ents)
! 3847: {
! 3848: struct re_backref_cache_entry* new_entry;
! 3849: new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
! 3850: mctx->abkref_ents * 2);
! 3851: if (BE (new_entry == NULL, 0))
! 3852: {
! 3853: re_free (mctx->bkref_ents);
! 3854: return REG_ESPACE;
! 3855: }
! 3856: mctx->bkref_ents = new_entry;
! 3857: memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
! 3858: sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
! 3859: mctx->abkref_ents *= 2;
! 3860: }
! 3861: mctx->bkref_ents[mctx->nbkref_ents].node = node;
! 3862: mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
! 3863: mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
! 3864: mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
! 3865: mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
! 3866: if (mctx->max_mb_elem_len < to - from)
! 3867: mctx->max_mb_elem_len = to - from;
! 3868: return REG_NOERROR;
! 3869: }
! 3870:
! 3871: /* Search for the first entry which has the same str_idx.
! 3872: Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
! 3873:
! 3874: static int
! 3875: search_cur_bkref_entry (mctx, str_idx)
! 3876: re_match_context_t *mctx;
! 3877: int str_idx;
! 3878: {
! 3879: int left, right, mid;
! 3880: right = mctx->nbkref_ents;
! 3881: for (left = 0; left < right;)
! 3882: {
! 3883: mid = (left + right) / 2;
! 3884: if (mctx->bkref_ents[mid].str_idx < str_idx)
! 3885: left = mid + 1;
! 3886: else
! 3887: right = mid;
! 3888: }
! 3889: return left;
! 3890: }
! 3891:
! 3892: static void
! 3893: match_ctx_clear_flag (mctx)
! 3894: re_match_context_t *mctx;
! 3895: {
! 3896: int i;
! 3897: for (i = 0; i < mctx->nbkref_ents; ++i)
! 3898: {
! 3899: mctx->bkref_ents[i].flag = 0;
! 3900: }
! 3901: }
! 3902:
! 3903: /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
! 3904: at STR_IDX. */
! 3905:
! 3906: static reg_errcode_t
! 3907: match_ctx_add_subtop (mctx, node, str_idx)
! 3908: re_match_context_t *mctx;
! 3909: int node, str_idx;
! 3910: {
! 3911: #ifdef DEBUG
! 3912: assert (mctx->sub_tops != NULL);
! 3913: assert (mctx->asub_tops > 0);
! 3914: #endif
! 3915: if (mctx->nsub_tops == mctx->asub_tops)
! 3916: {
! 3917: re_sub_match_top_t **new_array;
! 3918: mctx->asub_tops *= 2;
! 3919: new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *,
! 3920: mctx->asub_tops);
! 3921: if (BE (new_array == NULL, 0))
! 3922: return REG_ESPACE;
! 3923: mctx->sub_tops = new_array;
! 3924: }
! 3925: mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
! 3926: if (mctx->sub_tops[mctx->nsub_tops] == NULL)
! 3927: return REG_ESPACE;
! 3928: mctx->sub_tops[mctx->nsub_tops]->node = node;
! 3929: mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
! 3930: return REG_NOERROR;
! 3931: }
! 3932:
! 3933: /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
! 3934: at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
! 3935:
! 3936: static re_sub_match_last_t *
! 3937: match_ctx_add_sublast (subtop, node, str_idx)
! 3938: re_sub_match_top_t *subtop;
! 3939: int node, str_idx;
! 3940: {
! 3941: re_sub_match_last_t *new_entry;
! 3942: if (subtop->nlasts == subtop->alasts)
! 3943: {
! 3944: re_sub_match_last_t **new_array;
! 3945: subtop->alasts = 2 * subtop->alasts + 1;
! 3946: new_array = re_realloc (subtop->lasts, re_sub_match_last_t *,
! 3947: subtop->alasts);
! 3948: if (BE (new_array == NULL, 0))
! 3949: return NULL;
! 3950: subtop->lasts = new_array;
! 3951: }
! 3952: new_entry = calloc (1, sizeof (re_sub_match_last_t));
! 3953: if (BE (new_entry == NULL, 0))
! 3954: return NULL;
! 3955: subtop->lasts[subtop->nlasts] = new_entry;
! 3956: new_entry->node = node;
! 3957: new_entry->str_idx = str_idx;
! 3958: ++subtop->nlasts;
! 3959: return new_entry;
! 3960: }
! 3961:
! 3962: static void
! 3963: sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
! 3964: check_subexp)
! 3965: re_sift_context_t *sctx;
! 3966: re_dfastate_t **sifted_sts, **limited_sts;
! 3967: int last_node, last_str_idx, check_subexp;
! 3968: {
! 3969: sctx->sifted_states = sifted_sts;
! 3970: sctx->limited_states = limited_sts;
! 3971: sctx->last_node = last_node;
! 3972: sctx->last_str_idx = last_str_idx;
! 3973: sctx->check_subexp = check_subexp;
! 3974: sctx->cur_bkref = -1;
! 3975: sctx->cls_subexp_idx = -1;
! 3976: re_node_set_init_empty (&sctx->limits);
! 3977: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>