File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / smartmontools / regex / regex_internal.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 16:32:16 2012 UTC (12 years, 4 months ago) by misho
Branches: smartmontools, elwix, MAIN
CVS tags: v5_43, v5_42, HEAD
smartmontools

    1: /* Extended regular expression matching and search library.
    2:    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    3:    This file is part of the GNU C Library.
    4:    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
    5: 
    6:    The GNU C Library is free software; you can redistribute it and/or
    7:    modify it under the terms of the GNU Lesser General Public
    8:    License as published by the Free Software Foundation; either
    9:    version 2.1 of the License, or (at your option) any later version.
   10: 
   11:    The GNU C Library is distributed in the hope that it will be useful,
   12:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14:    Lesser General Public License for more details.
   15: 
   16:    You should have received a copy of the GNU Lesser General Public
   17:    License along with the GNU C Library; if not, write to the Free
   18:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   19:    02111-1307 USA.  */
   20: 
   21: static void re_string_construct_common (const char *str, int len,
   22: 					re_string_t *pstr,
   23: 					RE_TRANSLATE_TYPE trans, int icase);
   24: #ifdef RE_ENABLE_I18N
   25: static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
   26: 				 wint_t *last_wc);
   27: #endif /* RE_ENABLE_I18N */
   28: static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
   29: 					      const re_node_set *nodes,
   30: 					      unsigned int hash);
   31: static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
   32: 				     unsigned int hash);
   33: static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
   34: 					  const re_node_set *nodes,
   35: 					  unsigned int hash);
   36: static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
   37: 					  const re_node_set *nodes,
   38: 					  unsigned int context,
   39: 					  unsigned int hash);
   40: static unsigned int inline calc_state_hash (const re_node_set *nodes,
   41: 					    unsigned int context);
   42: 
   43: /* Functions for string operation.  */
   44: 
   45: /* This function allocate the buffers.  It is necessary to call
   46:    re_string_reconstruct before using the object.  */
   47: 
   48: static reg_errcode_t
   49: re_string_allocate (pstr, str, len, init_len, trans, icase)
   50:      re_string_t *pstr;
   51:      const char *str;
   52:      int len, init_len, icase;
   53:      RE_TRANSLATE_TYPE trans;
   54: {
   55:   reg_errcode_t ret;
   56:   int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
   57:   re_string_construct_common (str, len, pstr, trans, icase);
   58:   pstr->stop = pstr->len;
   59: 
   60:   ret = re_string_realloc_buffers (pstr, init_buf_len);
   61:   if (BE (ret != REG_NOERROR, 0))
   62:     return ret;
   63: 
   64:   pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
   65: 		    : (unsigned char *) str);
   66:   pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
   67:   pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
   68: 		     || MB_CUR_MAX > 1) ? pstr->valid_len : len;
   69:   return REG_NOERROR;
   70: }
   71: 
   72: /* This function allocate the buffers, and initialize them.  */
   73: 
   74: static reg_errcode_t
   75: re_string_construct (pstr, str, len, trans, icase)
   76:      re_string_t *pstr;
   77:      const char *str;
   78:      int len, icase;
   79:      RE_TRANSLATE_TYPE trans;
   80: {
   81:   reg_errcode_t ret;
   82:   re_string_construct_common (str, len, pstr, trans, icase);
   83:   pstr->stop = pstr->len;
   84:   /* Set 0 so that this function can initialize whole buffers.  */
   85:   pstr->valid_len = 0;
   86: 
   87:   if (len > 0)
   88:     {
   89:       ret = re_string_realloc_buffers (pstr, len + 1);
   90:       if (BE (ret != REG_NOERROR, 0))
   91: 	return ret;
   92:     }
   93:   pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
   94: 		    : (unsigned char *) str);
   95:   pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
   96: 
   97:   if (icase)
   98:     {
   99: #ifdef RE_ENABLE_I18N
  100:       if (MB_CUR_MAX > 1)
  101: 	build_wcs_upper_buffer (pstr);
  102:       else
  103: #endif /* RE_ENABLE_I18N  */
  104: 	build_upper_buffer (pstr);
  105:     }
  106:   else
  107:     {
  108: #ifdef RE_ENABLE_I18N
  109:       if (MB_CUR_MAX > 1)
  110: 	build_wcs_buffer (pstr);
  111:       else
  112: #endif /* RE_ENABLE_I18N  */
  113: 	{
  114: 	  if (trans != NULL)
  115: 	    re_string_translate_buffer (pstr);
  116: 	  else
  117: 	    pstr->valid_len = len;
  118: 	}
  119:     }
  120: 
  121:   /* Initialized whole buffers, then valid_len == bufs_len.  */
  122:   pstr->valid_len = pstr->bufs_len;
  123:   return REG_NOERROR;
  124: }
  125: 
  126: /* Helper functions for re_string_allocate, and re_string_construct.  */
  127: 
  128: static reg_errcode_t
  129: re_string_realloc_buffers (pstr, new_buf_len)
  130:      re_string_t *pstr;
  131:      int new_buf_len;
  132: {
  133: #ifdef RE_ENABLE_I18N
  134:   if (MB_CUR_MAX > 1)
  135:     {
  136:       wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
  137:       if (BE (new_array == NULL, 0))
  138: 	return REG_ESPACE;
  139:       pstr->wcs = new_array;
  140:     }
  141: #endif /* RE_ENABLE_I18N  */
  142:   if (MBS_ALLOCATED (pstr))
  143:     {
  144:       unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
  145: 					     new_buf_len);
  146:       if (BE (new_array == NULL, 0))
  147: 	return REG_ESPACE;
  148:       pstr->mbs = new_array;
  149:     }
  150:   if (MBS_CASE_ALLOCATED (pstr))
  151:     {
  152:       unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
  153: 					     new_buf_len);
  154:       if (BE (new_array == NULL, 0))
  155: 	return REG_ESPACE;
  156:       pstr->mbs_case = new_array;
  157:       if (!MBS_ALLOCATED (pstr))
  158: 	pstr->mbs = pstr->mbs_case;
  159:     }
  160:   pstr->bufs_len = new_buf_len;
  161:   return REG_NOERROR;
  162: }
  163: 
  164: 
  165: static void
  166: re_string_construct_common (str, len, pstr, trans, icase)
  167:      const char *str;
  168:      int len;
  169:      re_string_t *pstr;
  170:      RE_TRANSLATE_TYPE trans;
  171:      int icase;
  172: {
  173:   memset (pstr, '\0', sizeof (re_string_t));
  174:   pstr->raw_mbs = (const unsigned char *) str;
  175:   pstr->len = len;
  176:   pstr->trans = trans;
  177:   pstr->icase = icase ? 1 : 0;
  178: }
  179: 
  180: #ifdef RE_ENABLE_I18N
  181: 
  182: /* Build wide character buffer PSTR->WCS.
  183:    If the byte sequence of the string are:
  184:      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
  185:    Then wide character buffer will be:
  186:      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
  187:    We use WEOF for padding, they indicate that the position isn't
  188:    a first byte of a multibyte character.
  189: 
  190:    Note that this function assumes PSTR->VALID_LEN elements are already
  191:    built and starts from PSTR->VALID_LEN.  */
  192: 
  193: static void
  194: build_wcs_buffer (pstr)
  195:      re_string_t *pstr;
  196: {
  197:   mbstate_t prev_st;
  198:   int byte_idx, end_idx, mbclen, remain_len;
  199:   /* Build the buffers from pstr->valid_len to either pstr->len or
  200:      pstr->bufs_len.  */
  201:   end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
  202:   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
  203:     {
  204:       wchar_t wc;
  205:       remain_len = end_idx - byte_idx;
  206:       prev_st = pstr->cur_state;
  207:       mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
  208: 			      + byte_idx), remain_len, &pstr->cur_state);
  209:       if (BE (mbclen == (size_t) -2, 0))
  210: 	{
  211: 	  /* The buffer doesn't have enough space, finish to build.  */
  212: 	  pstr->cur_state = prev_st;
  213: 	  break;
  214: 	}
  215:       else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
  216: 	{
  217: 	  /* We treat these cases as a singlebyte character.  */
  218: 	  mbclen = 1;
  219: 	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
  220: 	  pstr->cur_state = prev_st;
  221: 	}
  222: 
  223:       /* Apply the translateion if we need.  */
  224:       if (pstr->trans != NULL && mbclen == 1)
  225: 	{
  226: 	  int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
  227: 	  pstr->mbs_case[byte_idx] = ch;
  228: 	}
  229:       /* Write wide character and padding.  */
  230:       pstr->wcs[byte_idx++] = wc;
  231:       /* Write paddings.  */
  232:       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
  233: 	pstr->wcs[byte_idx++] = WEOF;
  234:     }
  235:   pstr->valid_len = byte_idx;
  236: }
  237: 
  238: /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
  239:    but for REG_ICASE.  */
  240: 
  241: static void
  242: build_wcs_upper_buffer (pstr)
  243:      re_string_t *pstr;
  244: {
  245:   mbstate_t prev_st;
  246:   int byte_idx, end_idx, mbclen, remain_len;
  247:   /* Build the buffers from pstr->valid_len to either pstr->len or
  248:      pstr->bufs_len.  */
  249:   end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
  250:   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
  251:     {
  252:       wchar_t wc;
  253:       remain_len = end_idx - byte_idx;
  254:       prev_st = pstr->cur_state;
  255:       mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
  256: 			      + byte_idx), remain_len, &pstr->cur_state);
  257:       if (BE (mbclen == (size_t) -2, 0))
  258: 	{
  259: 	  /* The buffer doesn't have enough space, finish to build.  */
  260: 	  pstr->cur_state = prev_st;
  261: 	  break;
  262: 	}
  263:       else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
  264: 	{
  265: 	  /* In case of a singlebyte character.  */
  266: 	  int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
  267: 	  /* Apply the translateion if we need.  */
  268: 	  if (pstr->trans != NULL && mbclen == 1)
  269: 	    {
  270: 	      ch = pstr->trans[ch];
  271: 	      pstr->mbs_case[byte_idx] = ch;
  272: 	    }
  273: 	  pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
  274: 	  pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
  275: 	  if (BE (mbclen == (size_t) -1, 0))
  276: 	    pstr->cur_state = prev_st;
  277: 	}
  278:       else /* mbclen > 1 */
  279: 	{
  280: 	  if (iswlower (wc))
  281: 	    wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
  282: 	  else
  283: 	    memcpy (pstr->mbs + byte_idx,
  284: 		    pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
  285: 	  pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
  286: 	  /* Write paddings.  */
  287: 	  for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
  288: 	    pstr->wcs[byte_idx++] = WEOF;
  289: 	}
  290:     }
  291:   pstr->valid_len = byte_idx;
  292: }
  293: 
  294: /* Skip characters until the index becomes greater than NEW_RAW_IDX.
  295:    Return the index.  */
  296: 
  297: static int
  298: re_string_skip_chars (pstr, new_raw_idx, last_wc)
  299:      re_string_t *pstr;
  300:      int new_raw_idx;
  301:      wint_t *last_wc;
  302: {
  303:   mbstate_t prev_st;
  304:   int rawbuf_idx, mbclen;
  305:   wchar_t wc = 0;
  306: 
  307:   /* Skip the characters which are not necessary to check.  */
  308:   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
  309:        rawbuf_idx < new_raw_idx;)
  310:     {
  311:       int remain_len;
  312:       remain_len = pstr->len - rawbuf_idx;
  313:       prev_st = pstr->cur_state;
  314:       mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
  315: 			remain_len, &pstr->cur_state);
  316:       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
  317: 	{
  318: 	  /* We treat these cases as a singlebyte character.  */
  319: 	  mbclen = 1;
  320: 	  pstr->cur_state = prev_st;
  321: 	}
  322:       /* Then proceed the next character.  */
  323:       rawbuf_idx += mbclen;
  324:     }
  325:   *last_wc = (wint_t) wc;
  326:   return rawbuf_idx;
  327: }
  328: #endif /* RE_ENABLE_I18N  */
  329: 
  330: /* Build the buffer PSTR->MBS, and apply the translation if we need.
  331:    This function is used in case of REG_ICASE.  */
  332: 
  333: static void
  334: build_upper_buffer (pstr)
  335:      re_string_t *pstr;
  336: {
  337:   int char_idx, end_idx;
  338:   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
  339: 
  340:   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
  341:     {
  342:       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
  343:       if (pstr->trans != NULL)
  344: 	{
  345: 	  ch =  pstr->trans[ch];
  346: 	  pstr->mbs_case[char_idx] = ch;
  347: 	}
  348:       if (islower (ch))
  349: 	pstr->mbs[char_idx] = toupper (ch);
  350:       else
  351: 	pstr->mbs[char_idx] = ch;
  352:     }
  353:   pstr->valid_len = char_idx;
  354: }
  355: 
  356: /* Apply TRANS to the buffer in PSTR.  */
  357: 
  358: static void
  359: re_string_translate_buffer (pstr)
  360:      re_string_t *pstr;
  361: {
  362:   int buf_idx, end_idx;
  363:   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
  364: 
  365:   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
  366:     {
  367:       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
  368:       pstr->mbs_case[buf_idx] = pstr->trans[ch];
  369:     }
  370: 
  371:   pstr->valid_len = buf_idx;
  372: }
  373: 
  374: /* This function re-construct the buffers.
  375:    Concretely, convert to wide character in case of MB_CUR_MAX > 1,
  376:    convert to upper case in case of REG_ICASE, apply translation.  */
  377: 
  378: static reg_errcode_t
  379: re_string_reconstruct (pstr, idx, eflags, newline)
  380:      re_string_t *pstr;
  381:      int idx, eflags, newline;
  382: {
  383:   int offset = idx - pstr->raw_mbs_idx;
  384:   if (offset < 0)
  385:     {
  386:       /* Reset buffer.  */
  387: #ifdef RE_ENABLE_I18N
  388:       if (MB_CUR_MAX > 1)
  389: 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
  390: #endif /* RE_ENABLE_I18N */
  391:       pstr->len += pstr->raw_mbs_idx;
  392:       pstr->stop += pstr->raw_mbs_idx;
  393:       pstr->valid_len = pstr->raw_mbs_idx = 0;
  394:       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
  395: 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
  396:       if (!MBS_CASE_ALLOCATED (pstr))
  397: 	pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
  398:       if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
  399: 	pstr->mbs = (unsigned char *) pstr->raw_mbs;
  400:       offset = idx;
  401:     }
  402: 
  403:   if (offset != 0)
  404:     {
  405:       /* Are the characters which are already checked remain?  */
  406:       if (offset < pstr->valid_len)
  407: 	{
  408: 	  /* Yes, move them to the front of the buffer.  */
  409: 	  pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
  410: 						    newline);
  411: #ifdef RE_ENABLE_I18N
  412: 	  if (MB_CUR_MAX > 1)
  413: 	    memmove (pstr->wcs, pstr->wcs + offset,
  414: 		     (pstr->valid_len - offset) * sizeof (wint_t));
  415: #endif /* RE_ENABLE_I18N */
  416: 	  if (MBS_ALLOCATED (pstr))
  417: 	    memmove (pstr->mbs, pstr->mbs + offset,
  418: 		     pstr->valid_len - offset);
  419: 	  if (MBS_CASE_ALLOCATED (pstr))
  420: 	    memmove (pstr->mbs_case, pstr->mbs_case + offset,
  421: 		     pstr->valid_len - offset);
  422: 	  pstr->valid_len -= offset;
  423: #if DEBUG
  424: 	  assert (pstr->valid_len > 0);
  425: #endif
  426: 	}
  427:       else
  428: 	{
  429: 	  /* No, skip all characters until IDX.  */
  430: 	  pstr->valid_len = 0;
  431: #ifdef RE_ENABLE_I18N
  432: 	  if (MB_CUR_MAX > 1)
  433: 	    {
  434: 	      int wcs_idx;
  435: 	      wint_t wc;
  436: 	      pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
  437: 	      for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
  438: 		pstr->wcs[wcs_idx] = WEOF;
  439: 	      if (pstr->trans && wc <= 0xff)
  440: 		wc = pstr->trans[wc];
  441: 	      pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
  442: 				   : ((newline && IS_WIDE_NEWLINE (wc))
  443: 				      ? CONTEXT_NEWLINE : 0));
  444: 	    }
  445: 	  else
  446: #endif /* RE_ENABLE_I18N */
  447: 	    {
  448: 	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
  449: 	      if (pstr->trans)
  450: 		c = pstr->trans[c];
  451: 	      pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
  452: 				   : ((newline && IS_NEWLINE (c))
  453: 				      ? CONTEXT_NEWLINE : 0));
  454: 	    }
  455: 	}
  456:       if (!MBS_CASE_ALLOCATED (pstr))
  457: 	{
  458: 	  pstr->mbs_case += offset;
  459: 	  /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED.  */
  460: 	  if (!MBS_ALLOCATED (pstr))
  461: 	    pstr->mbs += offset;
  462: 	}
  463:     }
  464:   pstr->raw_mbs_idx = idx;
  465:   pstr->len -= offset;
  466:   pstr->stop -= offset;
  467: 
  468:   /* Then build the buffers.  */
  469: #ifdef RE_ENABLE_I18N
  470:   if (MB_CUR_MAX > 1)
  471:     {
  472:       if (pstr->icase)
  473: 	build_wcs_upper_buffer (pstr);
  474:       else
  475: 	build_wcs_buffer (pstr);
  476:     }
  477:   else
  478: #endif /* RE_ENABLE_I18N */
  479:     {
  480:       if (pstr->icase)
  481: 	build_upper_buffer (pstr);
  482:       else if (pstr->trans != NULL)
  483: 	re_string_translate_buffer (pstr);
  484:     }
  485:   pstr->cur_idx = 0;
  486: 
  487:   return REG_NOERROR;
  488: }
  489: 
  490: static void
  491: re_string_destruct (pstr)
  492:      re_string_t *pstr;
  493: {
  494: #ifdef RE_ENABLE_I18N
  495:   re_free (pstr->wcs);
  496: #endif /* RE_ENABLE_I18N  */
  497:   if (MBS_ALLOCATED (pstr))
  498:     re_free (pstr->mbs);
  499:   if (MBS_CASE_ALLOCATED (pstr))
  500:     re_free (pstr->mbs_case);
  501: }
  502: 
  503: /* Return the context at IDX in INPUT.  */
  504: 
  505: static unsigned int
  506: re_string_context_at (input, idx, eflags, newline_anchor)
  507:      const re_string_t *input;
  508:      int idx, eflags, newline_anchor;
  509: {
  510:   int c;
  511:   if (idx < 0 || idx == input->len)
  512:     {
  513:       if (idx < 0)
  514: 	/* In this case, we use the value stored in input->tip_context,
  515: 	   since we can't know the character in input->mbs[-1] here.  */
  516: 	return input->tip_context;
  517:       else /* (idx == input->len) */
  518: 	return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
  519: 		: CONTEXT_NEWLINE | CONTEXT_ENDBUF);
  520:     }
  521: #ifdef RE_ENABLE_I18N
  522:   if (MB_CUR_MAX > 1)
  523:     {
  524:       wint_t wc;
  525:       int wc_idx = idx;
  526:       while(input->wcs[wc_idx] == WEOF)
  527: 	{
  528: #ifdef DEBUG
  529: 	  /* It must not happen.  */
  530: 	  assert (wc_idx >= 0);
  531: #endif
  532: 	  --wc_idx;
  533: 	  if (wc_idx < 0)
  534: 	    return input->tip_context;
  535: 	}
  536:       wc = input->wcs[wc_idx];
  537:       if (IS_WIDE_WORD_CHAR (wc))
  538: 	return CONTEXT_WORD;
  539:       return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
  540:     }
  541:   else
  542: #endif
  543:     {
  544:       c = re_string_byte_at (input, idx);
  545:       if (IS_WORD_CHAR (c))
  546: 	return CONTEXT_WORD;
  547:       return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
  548:     }
  549: }
  550: 
  551: /* Functions for set operation.  */
  552: 
  553: static reg_errcode_t
  554: re_node_set_alloc (set, size)
  555:      re_node_set *set;
  556:      int size;
  557: {
  558:   set->alloc = size;
  559:   set->nelem = 0;
  560:   set->elems = re_malloc (int, size);
  561:   if (BE (set->elems == NULL, 0))
  562:     return REG_ESPACE;
  563:   return REG_NOERROR;
  564: }
  565: 
  566: static reg_errcode_t
  567: re_node_set_init_1 (set, elem)
  568:      re_node_set *set;
  569:      int elem;
  570: {
  571:   set->alloc = 1;
  572:   set->nelem = 1;
  573:   set->elems = re_malloc (int, 1);
  574:   if (BE (set->elems == NULL, 0))
  575:     {
  576:       set->alloc = set->nelem = 0;
  577:       return REG_ESPACE;
  578:     }
  579:   set->elems[0] = elem;
  580:   return REG_NOERROR;
  581: }
  582: 
  583: static reg_errcode_t
  584: re_node_set_init_2 (set, elem1, elem2)
  585:      re_node_set *set;
  586:      int elem1, elem2;
  587: {
  588:   set->alloc = 2;
  589:   set->elems = re_malloc (int, 2);
  590:   if (BE (set->elems == NULL, 0))
  591:     return REG_ESPACE;
  592:   if (elem1 == elem2)
  593:     {
  594:       set->nelem = 1;
  595:       set->elems[0] = elem1;
  596:     }
  597:   else
  598:     {
  599:       set->nelem = 2;
  600:       if (elem1 < elem2)
  601: 	{
  602: 	  set->elems[0] = elem1;
  603: 	  set->elems[1] = elem2;
  604: 	}
  605:       else
  606: 	{
  607: 	  set->elems[0] = elem2;
  608: 	  set->elems[1] = elem1;
  609: 	}
  610:     }
  611:   return REG_NOERROR;
  612: }
  613: 
  614: static reg_errcode_t
  615: re_node_set_init_copy (dest, src)
  616:      re_node_set *dest;
  617:      const re_node_set *src;
  618: {
  619:   dest->nelem = src->nelem;
  620:   if (src->nelem > 0)
  621:     {
  622:       dest->alloc = dest->nelem;
  623:       dest->elems = re_malloc (int, dest->alloc);
  624:       if (BE (dest->elems == NULL, 0))
  625: 	{
  626: 	  dest->alloc = dest->nelem = 0;
  627: 	  return REG_ESPACE;
  628: 	}
  629:       memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
  630:     }
  631:   else
  632:     re_node_set_init_empty (dest);
  633:   return REG_NOERROR;
  634: }
  635: 
  636: /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
  637:    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
  638:    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
  639: 
  640: static reg_errcode_t
  641: re_node_set_add_intersect (dest, src1, src2)
  642:      re_node_set *dest;
  643:      const re_node_set *src1, *src2;
  644: {
  645:   int i1, i2, id;
  646:   if (src1->nelem > 0 && src2->nelem > 0)
  647:     {
  648:       if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
  649: 	{
  650: 	  dest->alloc = src1->nelem + src2->nelem + dest->nelem;
  651: 	  dest->elems = re_realloc (dest->elems, int, dest->alloc);
  652: 	  if (BE (dest->elems == NULL, 0))
  653: 	    return REG_ESPACE;
  654: 	}
  655:     }
  656:   else
  657:     return REG_NOERROR;
  658: 
  659:   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
  660:     {
  661:       if (src1->elems[i1] > src2->elems[i2])
  662: 	{
  663: 	  ++i2;
  664: 	  continue;
  665: 	}
  666:       if (src1->elems[i1] == src2->elems[i2])
  667: 	{
  668: 	  while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
  669: 	    ++id;
  670: 	  if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
  671: 	    ++id;
  672: 	  else
  673: 	    {
  674: 	      memmove (dest->elems + id + 1, dest->elems + id,
  675: 		       sizeof (int) * (dest->nelem - id));
  676: 	      dest->elems[id++] = src2->elems[i2++];
  677: 	      ++dest->nelem;
  678: 	    }
  679: 	}
  680:       ++i1;
  681:     }
  682:   return REG_NOERROR;
  683: }
  684: 
  685: /* Calculate the union set of the sets SRC1 and SRC2. And store it to
  686:    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
  687: 
  688: static reg_errcode_t
  689: re_node_set_init_union (dest, src1, src2)
  690:      re_node_set *dest;
  691:      const re_node_set *src1, *src2;
  692: {
  693:   int i1, i2, id;
  694:   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
  695:     {
  696:       dest->alloc = src1->nelem + src2->nelem;
  697:       dest->elems = re_malloc (int, dest->alloc);
  698:       if (BE (dest->elems == NULL, 0))
  699: 	return REG_ESPACE;
  700:     }
  701:   else
  702:     {
  703:       if (src1 != NULL && src1->nelem > 0)
  704: 	return re_node_set_init_copy (dest, src1);
  705:       else if (src2 != NULL && src2->nelem > 0)
  706: 	return re_node_set_init_copy (dest, src2);
  707:       else
  708: 	re_node_set_init_empty (dest);
  709:       return REG_NOERROR;
  710:     }
  711:   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
  712:     {
  713:       if (src1->elems[i1] > src2->elems[i2])
  714: 	{
  715: 	  dest->elems[id++] = src2->elems[i2++];
  716: 	  continue;
  717: 	}
  718:       if (src1->elems[i1] == src2->elems[i2])
  719: 	++i2;
  720:       dest->elems[id++] = src1->elems[i1++];
  721:     }
  722:   if (i1 < src1->nelem)
  723:     {
  724:       memcpy (dest->elems + id, src1->elems + i1,
  725: 	     (src1->nelem - i1) * sizeof (int));
  726:       id += src1->nelem - i1;
  727:     }
  728:   else if (i2 < src2->nelem)
  729:     {
  730:       memcpy (dest->elems + id, src2->elems + i2,
  731: 	     (src2->nelem - i2) * sizeof (int));
  732:       id += src2->nelem - i2;
  733:     }
  734:   dest->nelem = id;
  735:   return REG_NOERROR;
  736: }
  737: 
  738: /* Calculate the union set of the sets DEST and SRC. And store it to
  739:    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
  740: 
  741: static reg_errcode_t
  742: re_node_set_merge (dest, src)
  743:      re_node_set *dest;
  744:      const re_node_set *src;
  745: {
  746:   int si, di;
  747:   if (src == NULL || src->nelem == 0)
  748:     return REG_NOERROR;
  749:   if (dest->alloc < src->nelem + dest->nelem)
  750:     {
  751:       int *new_buffer;
  752:       dest->alloc = 2 * (src->nelem + dest->alloc);
  753:       new_buffer = re_realloc (dest->elems, int, dest->alloc);
  754:       if (BE (new_buffer == NULL, 0))
  755: 	return REG_ESPACE;
  756:       dest->elems = new_buffer;
  757:     }
  758: 
  759:   for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
  760:     {
  761:       int cp_from, ncp, mid, right, src_elem = src->elems[si];
  762:       /* Binary search the spot we will add the new element.  */
  763:       right = dest->nelem;
  764:       while (di < right)
  765: 	{
  766: 	  mid = (di + right) / 2;
  767: 	  if (dest->elems[mid] < src_elem)
  768: 	    di = mid + 1;
  769: 	  else
  770: 	    right = mid;
  771: 	}
  772:       if (di >= dest->nelem)
  773: 	break;
  774: 
  775:       if (dest->elems[di] == src_elem)
  776: 	{
  777: 	  /* Skip since, DEST already has the element.  */
  778: 	  ++di;
  779: 	  ++si;
  780: 	  continue;
  781: 	}
  782: 
  783:       /* Skip the src elements which are less than dest->elems[di].  */
  784:       cp_from = si;
  785:       while (si < src->nelem && src->elems[si] < dest->elems[di])
  786: 	++si;
  787:       /* Copy these src elements.  */
  788:       ncp = si - cp_from;
  789:       memmove (dest->elems + di + ncp, dest->elems + di,
  790: 	       sizeof (int) * (dest->nelem - di));
  791:       memcpy (dest->elems + di, src->elems + cp_from,
  792: 	      sizeof (int) * ncp);
  793:       /* Update counters.  */
  794:       di += ncp;
  795:       dest->nelem += ncp;
  796:     }
  797: 
  798:   /* Copy remaining src elements.  */
  799:   if (si < src->nelem)
  800:     {
  801:       memcpy (dest->elems + di, src->elems + si,
  802: 	      sizeof (int) * (src->nelem - si));
  803:       dest->nelem += src->nelem - si;
  804:     }
  805:   return REG_NOERROR;
  806: }
  807: 
  808: /* Insert the new element ELEM to the re_node_set* SET.
  809:    return 0 if SET already has ELEM,
  810:    return -1 if an error is occured, return 1 otherwise.  */
  811: 
  812: static int
  813: re_node_set_insert (set, elem)
  814:      re_node_set *set;
  815:      int elem;
  816: {
  817:   int idx, right, mid;
  818:   /* In case of the set is empty.  */
  819:   if (set->elems == NULL || set->alloc == 0)
  820:     {
  821:       if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
  822: 	return 1;
  823:       else
  824: 	return -1;
  825:     }
  826: 
  827:   /* Binary search the spot we will add the new element.  */
  828:   idx = 0;
  829:   right = set->nelem;
  830:   while (idx < right)
  831:     {
  832:       mid = (idx + right) / 2;
  833:       if (set->elems[mid] < elem)
  834: 	idx = mid + 1;
  835:       else
  836: 	right = mid;
  837:     }
  838: 
  839:   /* Realloc if we need.  */
  840:   if (set->alloc < set->nelem + 1)
  841:     {
  842:       int *new_array;
  843:       set->alloc = set->alloc * 2;
  844:       new_array = re_malloc (int, set->alloc);
  845:       if (BE (new_array == NULL, 0))
  846: 	return -1;
  847:       /* Copy the elements they are followed by the new element.  */
  848:       if (idx > 0)
  849: 	memcpy (new_array, set->elems, sizeof (int) * (idx));
  850:       /* Copy the elements which follows the new element.  */
  851:       if (set->nelem - idx > 0)
  852: 	memcpy (new_array + idx + 1, set->elems + idx,
  853: 		sizeof (int) * (set->nelem - idx));
  854:       re_free (set->elems);
  855:       set->elems = new_array;
  856:     }
  857:   else
  858:     {
  859:       /* Move the elements which follows the new element.  */
  860:       if (set->nelem - idx > 0)
  861: 	memmove (set->elems + idx + 1, set->elems + idx,
  862: 		 sizeof (int) * (set->nelem - idx));
  863:     }
  864:   /* Insert the new element.  */
  865:   set->elems[idx] = elem;
  866:   ++set->nelem;
  867:   return 1;
  868: }
  869: 
  870: /* Compare two node sets SET1 and SET2.
  871:    return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise.  */
  872: 
  873: static int
  874: re_node_set_compare (set1, set2)
  875:      const re_node_set *set1, *set2;
  876: {
  877:   int i;
  878:   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
  879:     return 0;
  880:   for (i = 0 ; i < set1->nelem ; i++)
  881:     if (set1->elems[i] != set2->elems[i])
  882:       return 0;
  883:   return 1;
  884: }
  885: 
  886: /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
  887: 
  888: static int
  889: re_node_set_contains (set, elem)
  890:      const re_node_set *set;
  891:      int elem;
  892: {
  893:   int idx, right, mid;
  894:   if (set->nelem <= 0)
  895:     return 0;
  896: 
  897:   /* Binary search the element.  */
  898:   idx = 0;
  899:   right = set->nelem - 1;
  900:   while (idx < right)
  901:     {
  902:       mid = (idx + right) / 2;
  903:       if (set->elems[mid] < elem)
  904: 	idx = mid + 1;
  905:       else
  906: 	right = mid;
  907:     }
  908:   return set->elems[idx] == elem ? idx + 1 : 0;
  909: }
  910: 
  911: static void
  912: re_node_set_remove_at (set, idx)
  913:      re_node_set *set;
  914:      int idx;
  915: {
  916:   if (idx < 0 || idx >= set->nelem)
  917:     return;
  918:   if (idx < set->nelem - 1)
  919:     memmove (set->elems + idx, set->elems + idx + 1,
  920: 	     sizeof (int) * (set->nelem - idx - 1));
  921:   --set->nelem;
  922: }
  923: 
  924: 
  925: /* Add the token TOKEN to dfa->nodes, and return the index of the token.
  926:    Or return -1, if an error will be occured.  */
  927: 
  928: static int
  929: re_dfa_add_node (dfa, token, mode)
  930:      re_dfa_t *dfa;
  931:      re_token_t token;
  932:      int mode;
  933: {
  934:   if (dfa->nodes_len >= dfa->nodes_alloc)
  935:     {
  936:       re_token_t *new_array;
  937:       dfa->nodes_alloc *= 2;
  938:       new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
  939:       if (BE (new_array == NULL, 0))
  940: 	return -1;
  941:       else
  942: 	dfa->nodes = new_array;
  943:       if (mode)
  944: 	{
  945: 	  int *new_nexts, *new_indices;
  946: 	  re_node_set *new_edests, *new_eclosures, *new_inveclosures;
  947: 
  948: 	  new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
  949: 	  new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
  950: 	  new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
  951: 	  new_eclosures = re_realloc (dfa->eclosures, re_node_set,
  952: 				      dfa->nodes_alloc);
  953: 	  new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
  954: 					 dfa->nodes_alloc);
  955: 	  if (BE (new_nexts == NULL || new_indices == NULL
  956: 		  || new_edests == NULL || new_eclosures == NULL
  957: 		  || new_inveclosures == NULL, 0))
  958: 	    return -1;
  959: 	  dfa->nexts = new_nexts;
  960: 	  dfa->org_indices = new_indices;
  961: 	  dfa->edests = new_edests;
  962: 	  dfa->eclosures = new_eclosures;
  963: 	  dfa->inveclosures = new_inveclosures;
  964: 	}
  965:     }
  966:   dfa->nodes[dfa->nodes_len] = token;
  967:   dfa->nodes[dfa->nodes_len].duplicated = 0;
  968:   dfa->nodes[dfa->nodes_len].constraint = 0;
  969:   return dfa->nodes_len++;
  970: }
  971: 
  972: static unsigned int inline
  973: calc_state_hash (nodes, context)
  974:      const re_node_set *nodes;
  975:      unsigned int context;
  976: {
  977:   unsigned int hash = nodes->nelem + context;
  978:   int i;
  979:   for (i = 0 ; i < nodes->nelem ; i++)
  980:     hash += nodes->elems[i];
  981:   return hash;
  982: }
  983: 
  984: /* Search for the state whose node_set is equivalent to NODES.
  985:    Return the pointer to the state, if we found it in the DFA.
  986:    Otherwise create the new one and return it.  In case of an error
  987:    return NULL and set the error code in ERR.
  988:    Note: - We assume NULL as the invalid state, then it is possible that
  989: 	   return value is NULL and ERR is REG_NOERROR.
  990: 	 - We never return non-NULL value in case of any errors, it is for
  991: 	   optimization.  */
  992: 
  993: static re_dfastate_t*
  994: re_acquire_state (err, dfa, nodes)
  995:      reg_errcode_t *err;
  996:      re_dfa_t *dfa;
  997:      const re_node_set *nodes;
  998: {
  999:   unsigned int hash;
 1000:   re_dfastate_t *new_state;
 1001:   struct re_state_table_entry *spot;
 1002:   int i;
 1003:   if (BE (nodes->nelem == 0, 0))
 1004:     {
 1005:       *err = REG_NOERROR;
 1006:       return NULL;
 1007:     }
 1008:   hash = calc_state_hash (nodes, 0);
 1009:   spot = dfa->state_table + (hash & dfa->state_hash_mask);
 1010: 
 1011:   for (i = 0 ; i < spot->num ; i++)
 1012:     {
 1013:       re_dfastate_t *state = spot->array[i];
 1014:       if (hash != state->hash)
 1015: 	continue;
 1016:       if (re_node_set_compare (&state->nodes, nodes))
 1017: 	return state;
 1018:     }
 1019: 
 1020:   /* There are no appropriate state in the dfa, create the new one.  */
 1021:   new_state = create_ci_newstate (dfa, nodes, hash);
 1022:   if (BE (new_state != NULL, 1))
 1023:     return new_state;
 1024:   else
 1025:     {
 1026:       *err = REG_ESPACE;
 1027:       return NULL;
 1028:     }
 1029: }
 1030: 
 1031: /* Search for the state whose node_set is equivalent to NODES and
 1032:    whose context is equivalent to CONTEXT.
 1033:    Return the pointer to the state, if we found it in the DFA.
 1034:    Otherwise create the new one and return it.  In case of an error
 1035:    return NULL and set the error code in ERR.
 1036:    Note: - We assume NULL as the invalid state, then it is possible that
 1037: 	   return value is NULL and ERR is REG_NOERROR.
 1038: 	 - We never return non-NULL value in case of any errors, it is for
 1039: 	   optimization.  */
 1040: 
 1041: static re_dfastate_t*
 1042: re_acquire_state_context (err, dfa, nodes, context)
 1043:      reg_errcode_t *err;
 1044:      re_dfa_t *dfa;
 1045:      const re_node_set *nodes;
 1046:      unsigned int context;
 1047: {
 1048:   unsigned int hash;
 1049:   re_dfastate_t *new_state;
 1050:   struct re_state_table_entry *spot;
 1051:   int i;
 1052:   if (nodes->nelem == 0)
 1053:     {
 1054:       *err = REG_NOERROR;
 1055:       return NULL;
 1056:     }
 1057:   hash = calc_state_hash (nodes, context);
 1058:   spot = dfa->state_table + (hash & dfa->state_hash_mask);
 1059: 
 1060:   for (i = 0 ; i < spot->num ; i++)
 1061:     {
 1062:       re_dfastate_t *state = spot->array[i];
 1063:       if (hash != state->hash)
 1064: 	continue;
 1065:       if (re_node_set_compare (state->entrance_nodes, nodes)
 1066: 	  && state->context == context)
 1067: 	return state;
 1068:     }
 1069:   /* There are no appropriate state in `dfa', create the new one.  */
 1070:   new_state = create_cd_newstate (dfa, nodes, context, hash);
 1071:   if (BE (new_state != NULL, 1))
 1072:     return new_state;
 1073:   else
 1074:     {
 1075:       *err = REG_ESPACE;
 1076:       return NULL;
 1077:     }
 1078: }
 1079: 
 1080: /* Allocate memory for DFA state and initialize common properties.
 1081:    Return the new state if succeeded, otherwise return NULL.  */
 1082: 
 1083: static re_dfastate_t *
 1084: create_newstate_common (dfa, nodes, hash)
 1085:      re_dfa_t *dfa;
 1086:      const re_node_set *nodes;
 1087:      unsigned int hash;
 1088: {
 1089:   re_dfastate_t *newstate;
 1090:   reg_errcode_t err;
 1091:   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
 1092:   if (BE (newstate == NULL, 0))
 1093:     return NULL;
 1094:   err = re_node_set_init_copy (&newstate->nodes, nodes);
 1095:   if (BE (err != REG_NOERROR, 0))
 1096:     {
 1097:       re_free (newstate);
 1098:       return NULL;
 1099:     }
 1100:   newstate->trtable = NULL;
 1101:   newstate->trtable_search = NULL;
 1102:   newstate->hash = hash;
 1103:   return newstate;
 1104: }
 1105: 
 1106: /* Store the new state NEWSTATE whose hash value is HASH in appropriate
 1107:    position.  Return value indicate the error code if failed.  */
 1108: 
 1109: static reg_errcode_t
 1110: register_state (dfa, newstate, hash)
 1111:      re_dfa_t *dfa;
 1112:      re_dfastate_t *newstate;
 1113:      unsigned int hash;
 1114: {
 1115:   struct re_state_table_entry *spot;
 1116:   spot = dfa->state_table + (hash & dfa->state_hash_mask);
 1117: 
 1118:   if (spot->alloc <= spot->num)
 1119:     {
 1120:       re_dfastate_t **new_array;
 1121:       spot->alloc = 2 * spot->num + 2;
 1122:       new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
 1123:       if (BE (new_array == NULL, 0))
 1124: 	return REG_ESPACE;
 1125:       spot->array = new_array;
 1126:     }
 1127:   spot->array[spot->num++] = newstate;
 1128:   return REG_NOERROR;
 1129: }
 1130: 
 1131: /* Create the new state which is independ of contexts.
 1132:    Return the new state if succeeded, otherwise return NULL.  */
 1133: 
 1134: static re_dfastate_t *
 1135: create_ci_newstate (dfa, nodes, hash)
 1136:      re_dfa_t *dfa;
 1137:      const re_node_set *nodes;
 1138:      unsigned int hash;
 1139: {
 1140:   int i;
 1141:   reg_errcode_t err;
 1142:   re_dfastate_t *newstate;
 1143:   newstate = create_newstate_common (dfa, nodes, hash);
 1144:   if (BE (newstate == NULL, 0))
 1145:     return NULL;
 1146:   newstate->entrance_nodes = &newstate->nodes;
 1147: 
 1148:   for (i = 0 ; i < nodes->nelem ; i++)
 1149:     {
 1150:       re_token_t *node = dfa->nodes + nodes->elems[i];
 1151:       re_token_type_t type = node->type;
 1152:       if (type == CHARACTER && !node->constraint)
 1153: 	continue;
 1154: 
 1155:       /* If the state has the halt node, the state is a halt state.  */
 1156:       else if (type == END_OF_RE)
 1157: 	newstate->halt = 1;
 1158: #ifdef RE_ENABLE_I18N
 1159:       else if (type == COMPLEX_BRACKET
 1160: 	       || (type == OP_PERIOD && MB_CUR_MAX > 1))
 1161: 	newstate->accept_mb = 1;
 1162: #endif /* RE_ENABLE_I18N */
 1163:       else if (type == OP_BACK_REF)
 1164: 	newstate->has_backref = 1;
 1165:       else if (type == ANCHOR || node->constraint)
 1166: 	newstate->has_constraint = 1;
 1167:     }
 1168:   err = register_state (dfa, newstate, hash);
 1169:   if (BE (err != REG_NOERROR, 0))
 1170:     {
 1171:       free_state (newstate);
 1172:       newstate = NULL;
 1173:     }
 1174:   return newstate;
 1175: }
 1176: 
 1177: /* Create the new state which is depend on the context CONTEXT.
 1178:    Return the new state if succeeded, otherwise return NULL.  */
 1179: 
 1180: static re_dfastate_t *
 1181: create_cd_newstate (dfa, nodes, context, hash)
 1182:      re_dfa_t *dfa;
 1183:      const re_node_set *nodes;
 1184:      unsigned int context, hash;
 1185: {
 1186:   int i, nctx_nodes = 0;
 1187:   reg_errcode_t err;
 1188:   re_dfastate_t *newstate;
 1189: 
 1190:   newstate = create_newstate_common (dfa, nodes, hash);
 1191:   if (BE (newstate == NULL, 0))
 1192:     return NULL;
 1193:   newstate->context = context;
 1194:   newstate->entrance_nodes = &newstate->nodes;
 1195: 
 1196:   for (i = 0 ; i < nodes->nelem ; i++)
 1197:     {
 1198:       unsigned int constraint = 0;
 1199:       re_token_t *node = dfa->nodes + nodes->elems[i];
 1200:       re_token_type_t type = node->type;
 1201:       if (node->constraint)
 1202: 	constraint = node->constraint;
 1203: 
 1204:       if (type == CHARACTER && !constraint)
 1205: 	continue;
 1206:       /* If the state has the halt node, the state is a halt state.  */
 1207:       else if (type == END_OF_RE)
 1208: 	newstate->halt = 1;
 1209: #ifdef RE_ENABLE_I18N
 1210:       else if (type == COMPLEX_BRACKET
 1211: 	       || (type == OP_PERIOD && MB_CUR_MAX > 1))
 1212: 	newstate->accept_mb = 1;
 1213: #endif /* RE_ENABLE_I18N */
 1214:       else if (type == OP_BACK_REF)
 1215: 	newstate->has_backref = 1;
 1216:       else if (type == ANCHOR)
 1217: 	constraint = node->opr.ctx_type;
 1218: 
 1219:       if (constraint)
 1220: 	{
 1221: 	  if (newstate->entrance_nodes == &newstate->nodes)
 1222: 	    {
 1223: 	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
 1224: 	      if (BE (newstate->entrance_nodes == NULL, 0))
 1225: 		{
 1226: 		  free_state (newstate);
 1227: 		  return NULL;
 1228: 		}
 1229: 	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
 1230: 	      nctx_nodes = 0;
 1231: 	      newstate->has_constraint = 1;
 1232: 	    }
 1233: 
 1234: 	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
 1235: 	    {
 1236: 	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
 1237: 	      ++nctx_nodes;
 1238: 	    }
 1239: 	}
 1240:     }
 1241:   err = register_state (dfa, newstate, hash);
 1242:   if (BE (err != REG_NOERROR, 0))
 1243:     {
 1244:       free_state (newstate);
 1245:       newstate = NULL;
 1246:     }
 1247:   return  newstate;
 1248: }
 1249: 
 1250: static void
 1251: free_state (state)
 1252:      re_dfastate_t *state;
 1253: {
 1254:   if (state->entrance_nodes != &state->nodes)
 1255:     {
 1256:       re_node_set_free (state->entrance_nodes);
 1257:       re_free (state->entrance_nodes);
 1258:     }
 1259:   re_node_set_free (&state->nodes);
 1260:   re_free (state->trtable);
 1261:   re_free (state->trtable_search);
 1262:   re_free (state);
 1263: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>