embedaddon/smartmontools/regex/regex_internal.c - annotate

Return to regex_internal.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / smartmontools / regex
Annotation of embedaddon/smartmontools/regex/regex_internal.c, revision 1.1

1.1     ! misho       1: /* Extended regular expression matching and search library.
        !             2:    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
        !             3:    This file is part of the GNU C Library.
        !             4:    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
        !             5: 
        !             6:    The GNU C Library is free software; you can redistribute it and/or
        !             7:    modify it under the terms of the GNU Lesser General Public
        !             8:    License as published by the Free Software Foundation; either
        !             9:    version 2.1 of the License, or (at your option) any later version.
        !            10: 
        !            11:    The GNU C Library is distributed in the hope that it will be useful,
        !            12:    but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            13:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            14:    Lesser General Public License for more details.
        !            15: 
        !            16:    You should have received a copy of the GNU Lesser General Public
        !            17:    License along with the GNU C Library; if not, write to the Free
        !            18:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
        !            19:    02111-1307 USA.  */
        !            20: 
        !            21: static void re_string_construct_common (const char *str, int len,
        !            22:                                        re_string_t *pstr,
        !            23:                                        RE_TRANSLATE_TYPE trans, int icase);
        !            24: #ifdef RE_ENABLE_I18N
        !            25: static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
        !            26:                                 wint_t *last_wc);
        !            27: #endif /* RE_ENABLE_I18N */
        !            28: static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
        !            29:                                              const re_node_set *nodes,
        !            30:                                              unsigned int hash);
        !            31: static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
        !            32:                                     unsigned int hash);
        !            33: static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
        !            34:                                          const re_node_set *nodes,
        !            35:                                          unsigned int hash);
        !            36: static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
        !            37:                                          const re_node_set *nodes,
        !            38:                                          unsigned int context,
        !            39:                                          unsigned int hash);
        !            40: static unsigned int inline calc_state_hash (const re_node_set *nodes,
        !            41:                                            unsigned int context);
        !            42: 
        !            43: /* Functions for string operation.  */
        !            44: 
        !            45: /* This function allocate the buffers.  It is necessary to call
        !            46:    re_string_reconstruct before using the object.  */
        !            47: 
        !            48: static reg_errcode_t
        !            49: re_string_allocate (pstr, str, len, init_len, trans, icase)
        !            50:      re_string_t *pstr;
        !            51:      const char *str;
        !            52:      int len, init_len, icase;
        !            53:      RE_TRANSLATE_TYPE trans;
        !            54: {
        !            55:   reg_errcode_t ret;
        !            56:   int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
        !            57:   re_string_construct_common (str, len, pstr, trans, icase);
        !            58:   pstr->stop = pstr->len;
        !            59: 
        !            60:   ret = re_string_realloc_buffers (pstr, init_buf_len);
        !            61:   if (BE (ret != REG_NOERROR, 0))
        !            62:     return ret;
        !            63: 
        !            64:   pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
        !            65:                    : (unsigned char *) str);
        !            66:   pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
        !            67:   pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
        !            68:                     || MB_CUR_MAX > 1) ? pstr->valid_len : len;
        !            69:   return REG_NOERROR;
        !            70: }
        !            71: 
        !            72: /* This function allocate the buffers, and initialize them.  */
        !            73: 
        !            74: static reg_errcode_t
        !            75: re_string_construct (pstr, str, len, trans, icase)
        !            76:      re_string_t *pstr;
        !            77:      const char *str;
        !            78:      int len, icase;
        !            79:      RE_TRANSLATE_TYPE trans;
        !            80: {
        !            81:   reg_errcode_t ret;
        !            82:   re_string_construct_common (str, len, pstr, trans, icase);
        !            83:   pstr->stop = pstr->len;
        !            84:   /* Set 0 so that this function can initialize whole buffers.  */
        !            85:   pstr->valid_len = 0;
        !            86: 
        !            87:   if (len > 0)
        !            88:     {
        !            89:       ret = re_string_realloc_buffers (pstr, len + 1);
        !            90:       if (BE (ret != REG_NOERROR, 0))
        !            91:        return ret;
        !            92:     }
        !            93:   pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
        !            94:                    : (unsigned char *) str);
        !            95:   pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
        !            96: 
        !            97:   if (icase)
        !            98:     {
        !            99: #ifdef RE_ENABLE_I18N
        !           100:       if (MB_CUR_MAX > 1)
        !           101:        build_wcs_upper_buffer (pstr);
        !           102:       else
        !           103: #endif /* RE_ENABLE_I18N  */
        !           104:        build_upper_buffer (pstr);
        !           105:     }
        !           106:   else
        !           107:     {
        !           108: #ifdef RE_ENABLE_I18N
        !           109:       if (MB_CUR_MAX > 1)
        !           110:        build_wcs_buffer (pstr);
        !           111:       else
        !           112: #endif /* RE_ENABLE_I18N  */
        !           113:        {
        !           114:          if (trans != NULL)
        !           115:            re_string_translate_buffer (pstr);
        !           116:          else
        !           117:            pstr->valid_len = len;
        !           118:        }
        !           119:     }
        !           120: 
        !           121:   /* Initialized whole buffers, then valid_len == bufs_len.  */
        !           122:   pstr->valid_len = pstr->bufs_len;
        !           123:   return REG_NOERROR;
        !           124: }
        !           125: 
        !           126: /* Helper functions for re_string_allocate, and re_string_construct.  */
        !           127: 
        !           128: static reg_errcode_t
        !           129: re_string_realloc_buffers (pstr, new_buf_len)
        !           130:      re_string_t *pstr;
        !           131:      int new_buf_len;
        !           132: {
        !           133: #ifdef RE_ENABLE_I18N
        !           134:   if (MB_CUR_MAX > 1)
        !           135:     {
        !           136:       wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
        !           137:       if (BE (new_array == NULL, 0))
        !           138:        return REG_ESPACE;
        !           139:       pstr->wcs = new_array;
        !           140:     }
        !           141: #endif /* RE_ENABLE_I18N  */
        !           142:   if (MBS_ALLOCATED (pstr))
        !           143:     {
        !           144:       unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
        !           145:                                             new_buf_len);
        !           146:       if (BE (new_array == NULL, 0))
        !           147:        return REG_ESPACE;
        !           148:       pstr->mbs = new_array;
        !           149:     }
        !           150:   if (MBS_CASE_ALLOCATED (pstr))
        !           151:     {
        !           152:       unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
        !           153:                                             new_buf_len);
        !           154:       if (BE (new_array == NULL, 0))
        !           155:        return REG_ESPACE;
        !           156:       pstr->mbs_case = new_array;
        !           157:       if (!MBS_ALLOCATED (pstr))
        !           158:        pstr->mbs = pstr->mbs_case;
        !           159:     }
        !           160:   pstr->bufs_len = new_buf_len;
        !           161:   return REG_NOERROR;
        !           162: }
        !           163: 
        !           164: 
        !           165: static void
        !           166: re_string_construct_common (str, len, pstr, trans, icase)
        !           167:      const char *str;
        !           168:      int len;
        !           169:      re_string_t *pstr;
        !           170:      RE_TRANSLATE_TYPE trans;
        !           171:      int icase;
        !           172: {
        !           173:   memset (pstr, '\0', sizeof (re_string_t));
        !           174:   pstr->raw_mbs = (const unsigned char *) str;
        !           175:   pstr->len = len;
        !           176:   pstr->trans = trans;
        !           177:   pstr->icase = icase ? 1 : 0;
        !           178: }
        !           179: 
        !           180: #ifdef RE_ENABLE_I18N
        !           181: 
        !           182: /* Build wide character buffer PSTR->WCS.
        !           183:    If the byte sequence of the string are:
        !           184:      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
        !           185:    Then wide character buffer will be:
        !           186:      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
        !           187:    We use WEOF for padding, they indicate that the position isn't
        !           188:    a first byte of a multibyte character.
        !           189: 
        !           190:    Note that this function assumes PSTR->VALID_LEN elements are already
        !           191:    built and starts from PSTR->VALID_LEN.  */
        !           192: 
        !           193: static void
        !           194: build_wcs_buffer (pstr)
        !           195:      re_string_t *pstr;
        !           196: {
        !           197:   mbstate_t prev_st;
        !           198:   int byte_idx, end_idx, mbclen, remain_len;
        !           199:   /* Build the buffers from pstr->valid_len to either pstr->len or
        !           200:      pstr->bufs_len.  */
        !           201:   end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
        !           202:   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
        !           203:     {
        !           204:       wchar_t wc;
        !           205:       remain_len = end_idx - byte_idx;
        !           206:       prev_st = pstr->cur_state;
        !           207:       mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
        !           208:                              + byte_idx), remain_len, &pstr->cur_state);
        !           209:       if (BE (mbclen == (size_t) -2, 0))
        !           210:        {
        !           211:          /* The buffer doesn't have enough space, finish to build.  */
        !           212:          pstr->cur_state = prev_st;
        !           213:          break;
        !           214:        }
        !           215:       else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
        !           216:        {
        !           217:          /* We treat these cases as a singlebyte character.  */
        !           218:          mbclen = 1;
        !           219:          wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
        !           220:          pstr->cur_state = prev_st;
        !           221:        }
        !           222: 
        !           223:       /* Apply the translateion if we need.  */
        !           224:       if (pstr->trans != NULL && mbclen == 1)
        !           225:        {
        !           226:          int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
        !           227:          pstr->mbs_case[byte_idx] = ch;
        !           228:        }
        !           229:       /* Write wide character and padding.  */
        !           230:       pstr->wcs[byte_idx++] = wc;
        !           231:       /* Write paddings.  */
        !           232:       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
        !           233:        pstr->wcs[byte_idx++] = WEOF;
        !           234:     }
        !           235:   pstr->valid_len = byte_idx;
        !           236: }
        !           237: 
        !           238: /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
        !           239:    but for REG_ICASE.  */
        !           240: 
        !           241: static void
        !           242: build_wcs_upper_buffer (pstr)
        !           243:      re_string_t *pstr;
        !           244: {
        !           245:   mbstate_t prev_st;
        !           246:   int byte_idx, end_idx, mbclen, remain_len;
        !           247:   /* Build the buffers from pstr->valid_len to either pstr->len or
        !           248:      pstr->bufs_len.  */
        !           249:   end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
        !           250:   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
        !           251:     {
        !           252:       wchar_t wc;
        !           253:       remain_len = end_idx - byte_idx;
        !           254:       prev_st = pstr->cur_state;
        !           255:       mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
        !           256:                              + byte_idx), remain_len, &pstr->cur_state);
        !           257:       if (BE (mbclen == (size_t) -2, 0))
        !           258:        {
        !           259:          /* The buffer doesn't have enough space, finish to build.  */
        !           260:          pstr->cur_state = prev_st;
        !           261:          break;
        !           262:        }
        !           263:       else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
        !           264:        {
        !           265:          /* In case of a singlebyte character.  */
        !           266:          int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
        !           267:          /* Apply the translateion if we need.  */
        !           268:          if (pstr->trans != NULL && mbclen == 1)
        !           269:            {
        !           270:              ch = pstr->trans[ch];
        !           271:              pstr->mbs_case[byte_idx] = ch;
        !           272:            }
        !           273:          pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
        !           274:          pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
        !           275:          if (BE (mbclen == (size_t) -1, 0))
        !           276:            pstr->cur_state = prev_st;
        !           277:        }
        !           278:       else /* mbclen > 1 */
        !           279:        {
        !           280:          if (iswlower (wc))
        !           281:            wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
        !           282:          else
        !           283:            memcpy (pstr->mbs + byte_idx,
        !           284:                    pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
        !           285:          pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
        !           286:          /* Write paddings.  */
        !           287:          for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
        !           288:            pstr->wcs[byte_idx++] = WEOF;
        !           289:        }
        !           290:     }
        !           291:   pstr->valid_len = byte_idx;
        !           292: }
        !           293: 
        !           294: /* Skip characters until the index becomes greater than NEW_RAW_IDX.
        !           295:    Return the index.  */
        !           296: 
        !           297: static int
        !           298: re_string_skip_chars (pstr, new_raw_idx, last_wc)
        !           299:      re_string_t *pstr;
        !           300:      int new_raw_idx;
        !           301:      wint_t *last_wc;
        !           302: {
        !           303:   mbstate_t prev_st;
        !           304:   int rawbuf_idx, mbclen;
        !           305:   wchar_t wc = 0;
        !           306: 
        !           307:   /* Skip the characters which are not necessary to check.  */
        !           308:   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
        !           309:        rawbuf_idx < new_raw_idx;)
        !           310:     {
        !           311:       int remain_len;
        !           312:       remain_len = pstr->len - rawbuf_idx;
        !           313:       prev_st = pstr->cur_state;
        !           314:       mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
        !           315:                        remain_len, &pstr->cur_state);
        !           316:       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
        !           317:        {
        !           318:          /* We treat these cases as a singlebyte character.  */
        !           319:          mbclen = 1;
        !           320:          pstr->cur_state = prev_st;
        !           321:        }
        !           322:       /* Then proceed the next character.  */
        !           323:       rawbuf_idx += mbclen;
        !           324:     }
        !           325:   *last_wc = (wint_t) wc;
        !           326:   return rawbuf_idx;
        !           327: }
        !           328: #endif /* RE_ENABLE_I18N  */
        !           329: 
        !           330: /* Build the buffer PSTR->MBS, and apply the translation if we need.
        !           331:    This function is used in case of REG_ICASE.  */
        !           332: 
        !           333: static void
        !           334: build_upper_buffer (pstr)
        !           335:      re_string_t *pstr;
        !           336: {
        !           337:   int char_idx, end_idx;
        !           338:   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
        !           339: 
        !           340:   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
        !           341:     {
        !           342:       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
        !           343:       if (pstr->trans != NULL)
        !           344:        {
        !           345:          ch =  pstr->trans[ch];
        !           346:          pstr->mbs_case[char_idx] = ch;
        !           347:        }
        !           348:       if (islower (ch))
        !           349:        pstr->mbs[char_idx] = toupper (ch);
        !           350:       else
        !           351:        pstr->mbs[char_idx] = ch;
        !           352:     }
        !           353:   pstr->valid_len = char_idx;
        !           354: }
        !           355: 
        !           356: /* Apply TRANS to the buffer in PSTR.  */
        !           357: 
        !           358: static void
        !           359: re_string_translate_buffer (pstr)
        !           360:      re_string_t *pstr;
        !           361: {
        !           362:   int buf_idx, end_idx;
        !           363:   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
        !           364: 
        !           365:   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
        !           366:     {
        !           367:       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
        !           368:       pstr->mbs_case[buf_idx] = pstr->trans[ch];
        !           369:     }
        !           370: 
        !           371:   pstr->valid_len = buf_idx;
        !           372: }
        !           373: 
        !           374: /* This function re-construct the buffers.
        !           375:    Concretely, convert to wide character in case of MB_CUR_MAX > 1,
        !           376:    convert to upper case in case of REG_ICASE, apply translation.  */
        !           377: 
        !           378: static reg_errcode_t
        !           379: re_string_reconstruct (pstr, idx, eflags, newline)
        !           380:      re_string_t *pstr;
        !           381:      int idx, eflags, newline;
        !           382: {
        !           383:   int offset = idx - pstr->raw_mbs_idx;
        !           384:   if (offset < 0)
        !           385:     {
        !           386:       /* Reset buffer.  */
        !           387: #ifdef RE_ENABLE_I18N
        !           388:       if (MB_CUR_MAX > 1)
        !           389:        memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
        !           390: #endif /* RE_ENABLE_I18N */
        !           391:       pstr->len += pstr->raw_mbs_idx;
        !           392:       pstr->stop += pstr->raw_mbs_idx;
        !           393:       pstr->valid_len = pstr->raw_mbs_idx = 0;
        !           394:       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
        !           395:                           : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
        !           396:       if (!MBS_CASE_ALLOCATED (pstr))
        !           397:        pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
        !           398:       if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
        !           399:        pstr->mbs = (unsigned char *) pstr->raw_mbs;
        !           400:       offset = idx;
        !           401:     }
        !           402: 
        !           403:   if (offset != 0)
        !           404:     {
        !           405:       /* Are the characters which are already checked remain?  */
        !           406:       if (offset < pstr->valid_len)
        !           407:        {
        !           408:          /* Yes, move them to the front of the buffer.  */
        !           409:          pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
        !           410:                                                    newline);
        !           411: #ifdef RE_ENABLE_I18N
        !           412:          if (MB_CUR_MAX > 1)
        !           413:            memmove (pstr->wcs, pstr->wcs + offset,
        !           414:                     (pstr->valid_len - offset) * sizeof (wint_t));
        !           415: #endif /* RE_ENABLE_I18N */
        !           416:          if (MBS_ALLOCATED (pstr))
        !           417:            memmove (pstr->mbs, pstr->mbs + offset,
        !           418:                     pstr->valid_len - offset);
        !           419:          if (MBS_CASE_ALLOCATED (pstr))
        !           420:            memmove (pstr->mbs_case, pstr->mbs_case + offset,
        !           421:                     pstr->valid_len - offset);
        !           422:          pstr->valid_len -= offset;
        !           423: #if DEBUG
        !           424:          assert (pstr->valid_len > 0);
        !           425: #endif
        !           426:        }
        !           427:       else
        !           428:        {
        !           429:          /* No, skip all characters until IDX.  */
        !           430:          pstr->valid_len = 0;
        !           431: #ifdef RE_ENABLE_I18N
        !           432:          if (MB_CUR_MAX > 1)
        !           433:            {
        !           434:              int wcs_idx;
        !           435:              wint_t wc;
        !           436:              pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
        !           437:              for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
        !           438:                pstr->wcs[wcs_idx] = WEOF;
        !           439:              if (pstr->trans && wc <= 0xff)
        !           440:                wc = pstr->trans[wc];
        !           441:              pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
        !           442:                                   : ((newline && IS_WIDE_NEWLINE (wc))
        !           443:                                      ? CONTEXT_NEWLINE : 0));
        !           444:            }
        !           445:          else
        !           446: #endif /* RE_ENABLE_I18N */
        !           447:            {
        !           448:              int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
        !           449:              if (pstr->trans)
        !           450:                c = pstr->trans[c];
        !           451:              pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD
        !           452:                                   : ((newline && IS_NEWLINE (c))
        !           453:                                      ? CONTEXT_NEWLINE : 0));
        !           454:            }
        !           455:        }
        !           456:       if (!MBS_CASE_ALLOCATED (pstr))
        !           457:        {
        !           458:          pstr->mbs_case += offset;
        !           459:          /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED.  */
        !           460:          if (!MBS_ALLOCATED (pstr))
        !           461:            pstr->mbs += offset;
        !           462:        }
        !           463:     }
        !           464:   pstr->raw_mbs_idx = idx;
        !           465:   pstr->len -= offset;
        !           466:   pstr->stop -= offset;
        !           467: 
        !           468:   /* Then build the buffers.  */
        !           469: #ifdef RE_ENABLE_I18N
        !           470:   if (MB_CUR_MAX > 1)
        !           471:     {
        !           472:       if (pstr->icase)
        !           473:        build_wcs_upper_buffer (pstr);
        !           474:       else
        !           475:        build_wcs_buffer (pstr);
        !           476:     }
        !           477:   else
        !           478: #endif /* RE_ENABLE_I18N */
        !           479:     {
        !           480:       if (pstr->icase)
        !           481:        build_upper_buffer (pstr);
        !           482:       else if (pstr->trans != NULL)
        !           483:        re_string_translate_buffer (pstr);
        !           484:     }
        !           485:   pstr->cur_idx = 0;
        !           486: 
        !           487:   return REG_NOERROR;
        !           488: }
        !           489: 
        !           490: static void
        !           491: re_string_destruct (pstr)
        !           492:      re_string_t *pstr;
        !           493: {
        !           494: #ifdef RE_ENABLE_I18N
        !           495:   re_free (pstr->wcs);
        !           496: #endif /* RE_ENABLE_I18N  */
        !           497:   if (MBS_ALLOCATED (pstr))
        !           498:     re_free (pstr->mbs);
        !           499:   if (MBS_CASE_ALLOCATED (pstr))
        !           500:     re_free (pstr->mbs_case);
        !           501: }
        !           502: 
        !           503: /* Return the context at IDX in INPUT.  */
        !           504: 
        !           505: static unsigned int
        !           506: re_string_context_at (input, idx, eflags, newline_anchor)
        !           507:      const re_string_t *input;
        !           508:      int idx, eflags, newline_anchor;
        !           509: {
        !           510:   int c;
        !           511:   if (idx < 0 || idx == input->len)
        !           512:     {
        !           513:       if (idx < 0)
        !           514:        /* In this case, we use the value stored in input->tip_context,
        !           515:           since we can't know the character in input->mbs[-1] here.  */
        !           516:        return input->tip_context;
        !           517:       else /* (idx == input->len) */
        !           518:        return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
        !           519:                : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
        !           520:     }
        !           521: #ifdef RE_ENABLE_I18N
        !           522:   if (MB_CUR_MAX > 1)
        !           523:     {
        !           524:       wint_t wc;
        !           525:       int wc_idx = idx;
        !           526:       while(input->wcs[wc_idx] == WEOF)
        !           527:        {
        !           528: #ifdef DEBUG
        !           529:          /* It must not happen.  */
        !           530:          assert (wc_idx >= 0);
        !           531: #endif
        !           532:          --wc_idx;
        !           533:          if (wc_idx < 0)
        !           534:            return input->tip_context;
        !           535:        }
        !           536:       wc = input->wcs[wc_idx];
        !           537:       if (IS_WIDE_WORD_CHAR (wc))
        !           538:        return CONTEXT_WORD;
        !           539:       return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0;
        !           540:     }
        !           541:   else
        !           542: #endif
        !           543:     {
        !           544:       c = re_string_byte_at (input, idx);
        !           545:       if (IS_WORD_CHAR (c))
        !           546:        return CONTEXT_WORD;
        !           547:       return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0;
        !           548:     }
        !           549: }
        !           550: 
        !           551: /* Functions for set operation.  */
        !           552: 
        !           553: static reg_errcode_t
        !           554: re_node_set_alloc (set, size)
        !           555:      re_node_set *set;
        !           556:      int size;
        !           557: {
        !           558:   set->alloc = size;
        !           559:   set->nelem = 0;
        !           560:   set->elems = re_malloc (int, size);
        !           561:   if (BE (set->elems == NULL, 0))
        !           562:     return REG_ESPACE;
        !           563:   return REG_NOERROR;
        !           564: }
        !           565: 
        !           566: static reg_errcode_t
        !           567: re_node_set_init_1 (set, elem)
        !           568:      re_node_set *set;
        !           569:      int elem;
        !           570: {
        !           571:   set->alloc = 1;
        !           572:   set->nelem = 1;
        !           573:   set->elems = re_malloc (int, 1);
        !           574:   if (BE (set->elems == NULL, 0))
        !           575:     {
        !           576:       set->alloc = set->nelem = 0;
        !           577:       return REG_ESPACE;
        !           578:     }
        !           579:   set->elems[0] = elem;
        !           580:   return REG_NOERROR;
        !           581: }
        !           582: 
        !           583: static reg_errcode_t
        !           584: re_node_set_init_2 (set, elem1, elem2)
        !           585:      re_node_set *set;
        !           586:      int elem1, elem2;
        !           587: {
        !           588:   set->alloc = 2;
        !           589:   set->elems = re_malloc (int, 2);
        !           590:   if (BE (set->elems == NULL, 0))
        !           591:     return REG_ESPACE;
        !           592:   if (elem1 == elem2)
        !           593:     {
        !           594:       set->nelem = 1;
        !           595:       set->elems[0] = elem1;
        !           596:     }
        !           597:   else
        !           598:     {
        !           599:       set->nelem = 2;
        !           600:       if (elem1 < elem2)
        !           601:        {
        !           602:          set->elems[0] = elem1;
        !           603:          set->elems[1] = elem2;
        !           604:        }
        !           605:       else
        !           606:        {
        !           607:          set->elems[0] = elem2;
        !           608:          set->elems[1] = elem1;
        !           609:        }
        !           610:     }
        !           611:   return REG_NOERROR;
        !           612: }
        !           613: 
        !           614: static reg_errcode_t
        !           615: re_node_set_init_copy (dest, src)
        !           616:      re_node_set *dest;
        !           617:      const re_node_set *src;
        !           618: {
        !           619:   dest->nelem = src->nelem;
        !           620:   if (src->nelem > 0)
        !           621:     {
        !           622:       dest->alloc = dest->nelem;
        !           623:       dest->elems = re_malloc (int, dest->alloc);
        !           624:       if (BE (dest->elems == NULL, 0))
        !           625:        {
        !           626:          dest->alloc = dest->nelem = 0;
        !           627:          return REG_ESPACE;
        !           628:        }
        !           629:       memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
        !           630:     }
        !           631:   else
        !           632:     re_node_set_init_empty (dest);
        !           633:   return REG_NOERROR;
        !           634: }
        !           635: 
        !           636: /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
        !           637:    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
        !           638:    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
        !           639: 
        !           640: static reg_errcode_t
        !           641: re_node_set_add_intersect (dest, src1, src2)
        !           642:      re_node_set *dest;
        !           643:      const re_node_set *src1, *src2;
        !           644: {
        !           645:   int i1, i2, id;
        !           646:   if (src1->nelem > 0 && src2->nelem > 0)
        !           647:     {
        !           648:       if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
        !           649:        {
        !           650:          dest->alloc = src1->nelem + src2->nelem + dest->nelem;
        !           651:          dest->elems = re_realloc (dest->elems, int, dest->alloc);
        !           652:          if (BE (dest->elems == NULL, 0))
        !           653:            return REG_ESPACE;
        !           654:        }
        !           655:     }
        !           656:   else
        !           657:     return REG_NOERROR;
        !           658: 
        !           659:   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
        !           660:     {
        !           661:       if (src1->elems[i1] > src2->elems[i2])
        !           662:        {
        !           663:          ++i2;
        !           664:          continue;
        !           665:        }
        !           666:       if (src1->elems[i1] == src2->elems[i2])
        !           667:        {
        !           668:          while (id < dest->nelem && dest->elems[id] < src2->elems[i2])
        !           669:            ++id;
        !           670:          if (id < dest->nelem && dest->elems[id] == src2->elems[i2])
        !           671:            ++id;
        !           672:          else
        !           673:            {
        !           674:              memmove (dest->elems + id + 1, dest->elems + id,
        !           675:                       sizeof (int) * (dest->nelem - id));
        !           676:              dest->elems[id++] = src2->elems[i2++];
        !           677:              ++dest->nelem;
        !           678:            }
        !           679:        }
        !           680:       ++i1;
        !           681:     }
        !           682:   return REG_NOERROR;
        !           683: }
        !           684: 
        !           685: /* Calculate the union set of the sets SRC1 and SRC2. And store it to
        !           686:    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
        !           687: 
        !           688: static reg_errcode_t
        !           689: re_node_set_init_union (dest, src1, src2)
        !           690:      re_node_set *dest;
        !           691:      const re_node_set *src1, *src2;
        !           692: {
        !           693:   int i1, i2, id;
        !           694:   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
        !           695:     {
        !           696:       dest->alloc = src1->nelem + src2->nelem;
        !           697:       dest->elems = re_malloc (int, dest->alloc);
        !           698:       if (BE (dest->elems == NULL, 0))
        !           699:        return REG_ESPACE;
        !           700:     }
        !           701:   else
        !           702:     {
        !           703:       if (src1 != NULL && src1->nelem > 0)
        !           704:        return re_node_set_init_copy (dest, src1);
        !           705:       else if (src2 != NULL && src2->nelem > 0)
        !           706:        return re_node_set_init_copy (dest, src2);
        !           707:       else
        !           708:        re_node_set_init_empty (dest);
        !           709:       return REG_NOERROR;
        !           710:     }
        !           711:   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
        !           712:     {
        !           713:       if (src1->elems[i1] > src2->elems[i2])
        !           714:        {
        !           715:          dest->elems[id++] = src2->elems[i2++];
        !           716:          continue;
        !           717:        }
        !           718:       if (src1->elems[i1] == src2->elems[i2])
        !           719:        ++i2;
        !           720:       dest->elems[id++] = src1->elems[i1++];
        !           721:     }
        !           722:   if (i1 < src1->nelem)
        !           723:     {
        !           724:       memcpy (dest->elems + id, src1->elems + i1,
        !           725:             (src1->nelem - i1) * sizeof (int));
        !           726:       id += src1->nelem - i1;
        !           727:     }
        !           728:   else if (i2 < src2->nelem)
        !           729:     {
        !           730:       memcpy (dest->elems + id, src2->elems + i2,
        !           731:             (src2->nelem - i2) * sizeof (int));
        !           732:       id += src2->nelem - i2;
        !           733:     }
        !           734:   dest->nelem = id;
        !           735:   return REG_NOERROR;
        !           736: }
        !           737: 
        !           738: /* Calculate the union set of the sets DEST and SRC. And store it to
        !           739:    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
        !           740: 
        !           741: static reg_errcode_t
        !           742: re_node_set_merge (dest, src)
        !           743:      re_node_set *dest;
        !           744:      const re_node_set *src;
        !           745: {
        !           746:   int si, di;
        !           747:   if (src == NULL || src->nelem == 0)
        !           748:     return REG_NOERROR;
        !           749:   if (dest->alloc < src->nelem + dest->nelem)
        !           750:     {
        !           751:       int *new_buffer;
        !           752:       dest->alloc = 2 * (src->nelem + dest->alloc);
        !           753:       new_buffer = re_realloc (dest->elems, int, dest->alloc);
        !           754:       if (BE (new_buffer == NULL, 0))
        !           755:        return REG_ESPACE;
        !           756:       dest->elems = new_buffer;
        !           757:     }
        !           758: 
        !           759:   for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
        !           760:     {
        !           761:       int cp_from, ncp, mid, right, src_elem = src->elems[si];
        !           762:       /* Binary search the spot we will add the new element.  */
        !           763:       right = dest->nelem;
        !           764:       while (di < right)
        !           765:        {
        !           766:          mid = (di + right) / 2;
        !           767:          if (dest->elems[mid] < src_elem)
        !           768:            di = mid + 1;
        !           769:          else
        !           770:            right = mid;
        !           771:        }
        !           772:       if (di >= dest->nelem)
        !           773:        break;
        !           774: 
        !           775:       if (dest->elems[di] == src_elem)
        !           776:        {
        !           777:          /* Skip since, DEST already has the element.  */
        !           778:          ++di;
        !           779:          ++si;
        !           780:          continue;
        !           781:        }
        !           782: 
        !           783:       /* Skip the src elements which are less than dest->elems[di].  */
        !           784:       cp_from = si;
        !           785:       while (si < src->nelem && src->elems[si] < dest->elems[di])
        !           786:        ++si;
        !           787:       /* Copy these src elements.  */
        !           788:       ncp = si - cp_from;
        !           789:       memmove (dest->elems + di + ncp, dest->elems + di,
        !           790:               sizeof (int) * (dest->nelem - di));
        !           791:       memcpy (dest->elems + di, src->elems + cp_from,
        !           792:              sizeof (int) * ncp);
        !           793:       /* Update counters.  */
        !           794:       di += ncp;
        !           795:       dest->nelem += ncp;
        !           796:     }
        !           797: 
        !           798:   /* Copy remaining src elements.  */
        !           799:   if (si < src->nelem)
        !           800:     {
        !           801:       memcpy (dest->elems + di, src->elems + si,
        !           802:              sizeof (int) * (src->nelem - si));
        !           803:       dest->nelem += src->nelem - si;
        !           804:     }
        !           805:   return REG_NOERROR;
        !           806: }
        !           807: 
        !           808: /* Insert the new element ELEM to the re_node_set* SET.
        !           809:    return 0 if SET already has ELEM,
        !           810:    return -1 if an error is occured, return 1 otherwise.  */
        !           811: 
        !           812: static int
        !           813: re_node_set_insert (set, elem)
        !           814:      re_node_set *set;
        !           815:      int elem;
        !           816: {
        !           817:   int idx, right, mid;
        !           818:   /* In case of the set is empty.  */
        !           819:   if (set->elems == NULL || set->alloc == 0)
        !           820:     {
        !           821:       if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
        !           822:        return 1;
        !           823:       else
        !           824:        return -1;
        !           825:     }
        !           826: 
        !           827:   /* Binary search the spot we will add the new element.  */
        !           828:   idx = 0;
        !           829:   right = set->nelem;
        !           830:   while (idx < right)
        !           831:     {
        !           832:       mid = (idx + right) / 2;
        !           833:       if (set->elems[mid] < elem)
        !           834:        idx = mid + 1;
        !           835:       else
        !           836:        right = mid;
        !           837:     }
        !           838: 
        !           839:   /* Realloc if we need.  */
        !           840:   if (set->alloc < set->nelem + 1)
        !           841:     {
        !           842:       int *new_array;
        !           843:       set->alloc = set->alloc * 2;
        !           844:       new_array = re_malloc (int, set->alloc);
        !           845:       if (BE (new_array == NULL, 0))
        !           846:        return -1;
        !           847:       /* Copy the elements they are followed by the new element.  */
        !           848:       if (idx > 0)
        !           849:        memcpy (new_array, set->elems, sizeof (int) * (idx));
        !           850:       /* Copy the elements which follows the new element.  */
        !           851:       if (set->nelem - idx > 0)
        !           852:        memcpy (new_array + idx + 1, set->elems + idx,
        !           853:                sizeof (int) * (set->nelem - idx));
        !           854:       re_free (set->elems);
        !           855:       set->elems = new_array;
        !           856:     }
        !           857:   else
        !           858:     {
        !           859:       /* Move the elements which follows the new element.  */
        !           860:       if (set->nelem - idx > 0)
        !           861:        memmove (set->elems + idx + 1, set->elems + idx,
        !           862:                 sizeof (int) * (set->nelem - idx));
        !           863:     }
        !           864:   /* Insert the new element.  */
        !           865:   set->elems[idx] = elem;
        !           866:   ++set->nelem;
        !           867:   return 1;
        !           868: }
        !           869: 
        !           870: /* Compare two node sets SET1 and SET2.
        !           871:    return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise.  */
        !           872: 
        !           873: static int
        !           874: re_node_set_compare (set1, set2)
        !           875:      const re_node_set *set1, *set2;
        !           876: {
        !           877:   int i;
        !           878:   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
        !           879:     return 0;
        !           880:   for (i = 0 ; i < set1->nelem ; i++)
        !           881:     if (set1->elems[i] != set2->elems[i])
        !           882:       return 0;
        !           883:   return 1;
        !           884: }
        !           885: 
        !           886: /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
        !           887: 
        !           888: static int
        !           889: re_node_set_contains (set, elem)
        !           890:      const re_node_set *set;
        !           891:      int elem;
        !           892: {
        !           893:   int idx, right, mid;
        !           894:   if (set->nelem <= 0)
        !           895:     return 0;
        !           896: 
        !           897:   /* Binary search the element.  */
        !           898:   idx = 0;
        !           899:   right = set->nelem - 1;
        !           900:   while (idx < right)
        !           901:     {
        !           902:       mid = (idx + right) / 2;
        !           903:       if (set->elems[mid] < elem)
        !           904:        idx = mid + 1;
        !           905:       else
        !           906:        right = mid;
        !           907:     }
        !           908:   return set->elems[idx] == elem ? idx + 1 : 0;
        !           909: }
        !           910: 
        !           911: static void
        !           912: re_node_set_remove_at (set, idx)
        !           913:      re_node_set *set;
        !           914:      int idx;
        !           915: {
        !           916:   if (idx < 0 || idx >= set->nelem)
        !           917:     return;
        !           918:   if (idx < set->nelem - 1)
        !           919:     memmove (set->elems + idx, set->elems + idx + 1,
        !           920:             sizeof (int) * (set->nelem - idx - 1));
        !           921:   --set->nelem;
        !           922: }
        !           923: 
        !           924: 
        !           925: /* Add the token TOKEN to dfa->nodes, and return the index of the token.
        !           926:    Or return -1, if an error will be occured.  */
        !           927: 
        !           928: static int
        !           929: re_dfa_add_node (dfa, token, mode)
        !           930:      re_dfa_t *dfa;
        !           931:      re_token_t token;
        !           932:      int mode;
        !           933: {
        !           934:   if (dfa->nodes_len >= dfa->nodes_alloc)
        !           935:     {
        !           936:       re_token_t *new_array;
        !           937:       dfa->nodes_alloc *= 2;
        !           938:       new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc);
        !           939:       if (BE (new_array == NULL, 0))
        !           940:        return -1;
        !           941:       else
        !           942:        dfa->nodes = new_array;
        !           943:       if (mode)
        !           944:        {
        !           945:          int *new_nexts, *new_indices;
        !           946:          re_node_set *new_edests, *new_eclosures, *new_inveclosures;
        !           947: 
        !           948:          new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
        !           949:          new_indices = re_realloc (dfa->org_indices, int, dfa->nodes_alloc);
        !           950:          new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
        !           951:          new_eclosures = re_realloc (dfa->eclosures, re_node_set,
        !           952:                                      dfa->nodes_alloc);
        !           953:          new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
        !           954:                                         dfa->nodes_alloc);
        !           955:          if (BE (new_nexts == NULL || new_indices == NULL
        !           956:                  || new_edests == NULL || new_eclosures == NULL
        !           957:                  || new_inveclosures == NULL, 0))
        !           958:            return -1;
        !           959:          dfa->nexts = new_nexts;
        !           960:          dfa->org_indices = new_indices;
        !           961:          dfa->edests = new_edests;
        !           962:          dfa->eclosures = new_eclosures;
        !           963:          dfa->inveclosures = new_inveclosures;
        !           964:        }
        !           965:     }
        !           966:   dfa->nodes[dfa->nodes_len] = token;
        !           967:   dfa->nodes[dfa->nodes_len].duplicated = 0;
        !           968:   dfa->nodes[dfa->nodes_len].constraint = 0;
        !           969:   return dfa->nodes_len++;
        !           970: }
        !           971: 
        !           972: static unsigned int inline
        !           973: calc_state_hash (nodes, context)
        !           974:      const re_node_set *nodes;
        !           975:      unsigned int context;
        !           976: {
        !           977:   unsigned int hash = nodes->nelem + context;
        !           978:   int i;
        !           979:   for (i = 0 ; i < nodes->nelem ; i++)
        !           980:     hash += nodes->elems[i];
        !           981:   return hash;
        !           982: }
        !           983: 
        !           984: /* Search for the state whose node_set is equivalent to NODES.
        !           985:    Return the pointer to the state, if we found it in the DFA.
        !           986:    Otherwise create the new one and return it.  In case of an error
        !           987:    return NULL and set the error code in ERR.
        !           988:    Note: - We assume NULL as the invalid state, then it is possible that
        !           989:           return value is NULL and ERR is REG_NOERROR.
        !           990:         - We never return non-NULL value in case of any errors, it is for
        !           991:           optimization.  */
        !           992: 
        !           993: static re_dfastate_t*
        !           994: re_acquire_state (err, dfa, nodes)
        !           995:      reg_errcode_t *err;
        !           996:      re_dfa_t *dfa;
        !           997:      const re_node_set *nodes;
        !           998: {
        !           999:   unsigned int hash;
        !          1000:   re_dfastate_t *new_state;
        !          1001:   struct re_state_table_entry *spot;
        !          1002:   int i;
        !          1003:   if (BE (nodes->nelem == 0, 0))
        !          1004:     {
        !          1005:       *err = REG_NOERROR;
        !          1006:       return NULL;
        !          1007:     }
        !          1008:   hash = calc_state_hash (nodes, 0);
        !          1009:   spot = dfa->state_table + (hash & dfa->state_hash_mask);
        !          1010: 
        !          1011:   for (i = 0 ; i < spot->num ; i++)
        !          1012:     {
        !          1013:       re_dfastate_t *state = spot->array[i];
        !          1014:       if (hash != state->hash)
        !          1015:        continue;
        !          1016:       if (re_node_set_compare (&state->nodes, nodes))
        !          1017:        return state;
        !          1018:     }
        !          1019: 
        !          1020:   /* There are no appropriate state in the dfa, create the new one.  */
        !          1021:   new_state = create_ci_newstate (dfa, nodes, hash);
        !          1022:   if (BE (new_state != NULL, 1))
        !          1023:     return new_state;
        !          1024:   else
        !          1025:     {
        !          1026:       *err = REG_ESPACE;
        !          1027:       return NULL;
        !          1028:     }
        !          1029: }
        !          1030: 
        !          1031: /* Search for the state whose node_set is equivalent to NODES and
        !          1032:    whose context is equivalent to CONTEXT.
        !          1033:    Return the pointer to the state, if we found it in the DFA.
        !          1034:    Otherwise create the new one and return it.  In case of an error
        !          1035:    return NULL and set the error code in ERR.
        !          1036:    Note: - We assume NULL as the invalid state, then it is possible that
        !          1037:           return value is NULL and ERR is REG_NOERROR.
        !          1038:         - We never return non-NULL value in case of any errors, it is for
        !          1039:           optimization.  */
        !          1040: 
        !          1041: static re_dfastate_t*
        !          1042: re_acquire_state_context (err, dfa, nodes, context)
        !          1043:      reg_errcode_t *err;
        !          1044:      re_dfa_t *dfa;
        !          1045:      const re_node_set *nodes;
        !          1046:      unsigned int context;
        !          1047: {
        !          1048:   unsigned int hash;
        !          1049:   re_dfastate_t *new_state;
        !          1050:   struct re_state_table_entry *spot;
        !          1051:   int i;
        !          1052:   if (nodes->nelem == 0)
        !          1053:     {
        !          1054:       *err = REG_NOERROR;
        !          1055:       return NULL;
        !          1056:     }
        !          1057:   hash = calc_state_hash (nodes, context);
        !          1058:   spot = dfa->state_table + (hash & dfa->state_hash_mask);
        !          1059: 
        !          1060:   for (i = 0 ; i < spot->num ; i++)
        !          1061:     {
        !          1062:       re_dfastate_t *state = spot->array[i];
        !          1063:       if (hash != state->hash)
        !          1064:        continue;
        !          1065:       if (re_node_set_compare (state->entrance_nodes, nodes)
        !          1066:          && state->context == context)
        !          1067:        return state;
        !          1068:     }
        !          1069:   /* There are no appropriate state in `dfa', create the new one.  */
        !          1070:   new_state = create_cd_newstate (dfa, nodes, context, hash);
        !          1071:   if (BE (new_state != NULL, 1))
        !          1072:     return new_state;
        !          1073:   else
        !          1074:     {
        !          1075:       *err = REG_ESPACE;
        !          1076:       return NULL;
        !          1077:     }
        !          1078: }
        !          1079: 
        !          1080: /* Allocate memory for DFA state and initialize common properties.
        !          1081:    Return the new state if succeeded, otherwise return NULL.  */
        !          1082: 
        !          1083: static re_dfastate_t *
        !          1084: create_newstate_common (dfa, nodes, hash)
        !          1085:      re_dfa_t *dfa;
        !          1086:      const re_node_set *nodes;
        !          1087:      unsigned int hash;
        !          1088: {
        !          1089:   re_dfastate_t *newstate;
        !          1090:   reg_errcode_t err;
        !          1091:   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
        !          1092:   if (BE (newstate == NULL, 0))
        !          1093:     return NULL;
        !          1094:   err = re_node_set_init_copy (&newstate->nodes, nodes);
        !          1095:   if (BE (err != REG_NOERROR, 0))
        !          1096:     {
        !          1097:       re_free (newstate);
        !          1098:       return NULL;
        !          1099:     }
        !          1100:   newstate->trtable = NULL;
        !          1101:   newstate->trtable_search = NULL;
        !          1102:   newstate->hash = hash;
        !          1103:   return newstate;
        !          1104: }
        !          1105: 
        !          1106: /* Store the new state NEWSTATE whose hash value is HASH in appropriate
        !          1107:    position.  Return value indicate the error code if failed.  */
        !          1108: 
        !          1109: static reg_errcode_t
        !          1110: register_state (dfa, newstate, hash)
        !          1111:      re_dfa_t *dfa;
        !          1112:      re_dfastate_t *newstate;
        !          1113:      unsigned int hash;
        !          1114: {
        !          1115:   struct re_state_table_entry *spot;
        !          1116:   spot = dfa->state_table + (hash & dfa->state_hash_mask);
        !          1117: 
        !          1118:   if (spot->alloc <= spot->num)
        !          1119:     {
        !          1120:       re_dfastate_t **new_array;
        !          1121:       spot->alloc = 2 * spot->num + 2;
        !          1122:       new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc);
        !          1123:       if (BE (new_array == NULL, 0))
        !          1124:        return REG_ESPACE;
        !          1125:       spot->array = new_array;
        !          1126:     }
        !          1127:   spot->array[spot->num++] = newstate;
        !          1128:   return REG_NOERROR;
        !          1129: }
        !          1130: 
        !          1131: /* Create the new state which is independ of contexts.
        !          1132:    Return the new state if succeeded, otherwise return NULL.  */
        !          1133: 
        !          1134: static re_dfastate_t *
        !          1135: create_ci_newstate (dfa, nodes, hash)
        !          1136:      re_dfa_t *dfa;
        !          1137:      const re_node_set *nodes;
        !          1138:      unsigned int hash;
        !          1139: {
        !          1140:   int i;
        !          1141:   reg_errcode_t err;
        !          1142:   re_dfastate_t *newstate;
        !          1143:   newstate = create_newstate_common (dfa, nodes, hash);
        !          1144:   if (BE (newstate == NULL, 0))
        !          1145:     return NULL;
        !          1146:   newstate->entrance_nodes = &newstate->nodes;
        !          1147: 
        !          1148:   for (i = 0 ; i < nodes->nelem ; i++)
        !          1149:     {
        !          1150:       re_token_t *node = dfa->nodes + nodes->elems[i];
        !          1151:       re_token_type_t type = node->type;
        !          1152:       if (type == CHARACTER && !node->constraint)
        !          1153:        continue;
        !          1154: 
        !          1155:       /* If the state has the halt node, the state is a halt state.  */
        !          1156:       else if (type == END_OF_RE)
        !          1157:        newstate->halt = 1;
        !          1158: #ifdef RE_ENABLE_I18N
        !          1159:       else if (type == COMPLEX_BRACKET
        !          1160:               || (type == OP_PERIOD && MB_CUR_MAX > 1))
        !          1161:        newstate->accept_mb = 1;
        !          1162: #endif /* RE_ENABLE_I18N */
        !          1163:       else if (type == OP_BACK_REF)
        !          1164:        newstate->has_backref = 1;
        !          1165:       else if (type == ANCHOR || node->constraint)
        !          1166:        newstate->has_constraint = 1;
        !          1167:     }
        !          1168:   err = register_state (dfa, newstate, hash);
        !          1169:   if (BE (err != REG_NOERROR, 0))
        !          1170:     {
        !          1171:       free_state (newstate);
        !          1172:       newstate = NULL;
        !          1173:     }
        !          1174:   return newstate;
        !          1175: }
        !          1176: 
        !          1177: /* Create the new state which is depend on the context CONTEXT.
        !          1178:    Return the new state if succeeded, otherwise return NULL.  */
        !          1179: 
        !          1180: static re_dfastate_t *
        !          1181: create_cd_newstate (dfa, nodes, context, hash)
        !          1182:      re_dfa_t *dfa;
        !          1183:      const re_node_set *nodes;
        !          1184:      unsigned int context, hash;
        !          1185: {
        !          1186:   int i, nctx_nodes = 0;
        !          1187:   reg_errcode_t err;
        !          1188:   re_dfastate_t *newstate;
        !          1189: 
        !          1190:   newstate = create_newstate_common (dfa, nodes, hash);
        !          1191:   if (BE (newstate == NULL, 0))
        !          1192:     return NULL;
        !          1193:   newstate->context = context;
        !          1194:   newstate->entrance_nodes = &newstate->nodes;
        !          1195: 
        !          1196:   for (i = 0 ; i < nodes->nelem ; i++)
        !          1197:     {
        !          1198:       unsigned int constraint = 0;
        !          1199:       re_token_t *node = dfa->nodes + nodes->elems[i];
        !          1200:       re_token_type_t type = node->type;
        !          1201:       if (node->constraint)
        !          1202:        constraint = node->constraint;
        !          1203: 
        !          1204:       if (type == CHARACTER && !constraint)
        !          1205:        continue;
        !          1206:       /* If the state has the halt node, the state is a halt state.  */
        !          1207:       else if (type == END_OF_RE)
        !          1208:        newstate->halt = 1;
        !          1209: #ifdef RE_ENABLE_I18N
        !          1210:       else if (type == COMPLEX_BRACKET
        !          1211:               || (type == OP_PERIOD && MB_CUR_MAX > 1))
        !          1212:        newstate->accept_mb = 1;
        !          1213: #endif /* RE_ENABLE_I18N */
        !          1214:       else if (type == OP_BACK_REF)
        !          1215:        newstate->has_backref = 1;
        !          1216:       else if (type == ANCHOR)
        !          1217:        constraint = node->opr.ctx_type;
        !          1218: 
        !          1219:       if (constraint)
        !          1220:        {
        !          1221:          if (newstate->entrance_nodes == &newstate->nodes)
        !          1222:            {
        !          1223:              newstate->entrance_nodes = re_malloc (re_node_set, 1);
        !          1224:              if (BE (newstate->entrance_nodes == NULL, 0))
        !          1225:                {
        !          1226:                  free_state (newstate);
        !          1227:                  return NULL;
        !          1228:                }
        !          1229:              re_node_set_init_copy (newstate->entrance_nodes, nodes);
        !          1230:              nctx_nodes = 0;
        !          1231:              newstate->has_constraint = 1;
        !          1232:            }
        !          1233: 
        !          1234:          if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
        !          1235:            {
        !          1236:              re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
        !          1237:              ++nctx_nodes;
        !          1238:            }
        !          1239:        }
        !          1240:     }
        !          1241:   err = register_state (dfa, newstate, hash);
        !          1242:   if (BE (err != REG_NOERROR, 0))
        !          1243:     {
        !          1244:       free_state (newstate);
        !          1245:       newstate = NULL;
        !          1246:     }
        !          1247:   return  newstate;
        !          1248: }
        !          1249: 
        !          1250: static void
        !          1251: free_state (state)
        !          1252:      re_dfastate_t *state;
        !          1253: {
        !          1254:   if (state->entrance_nodes != &state->nodes)
        !          1255:     {
        !          1256:       re_node_set_free (state->entrance_nodes);
        !          1257:       re_free (state->entrance_nodes);
        !          1258:     }
        !          1259:   re_node_set_free (&state->nodes);
        !          1260:   re_free (state->trtable);
        !          1261:   re_free (state->trtable_search);
        !          1262:   re_free (state);
        !          1263: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>