File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcre_scanner.cc
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 08:25:55 2013 UTC (10 years, 11 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, v8_33, v8_31, v8_30, v8_21, HEAD
8.33

    1: // Copyright (c) 2005, Google Inc.
    2: // All rights reserved.
    3: //
    4: // Redistribution and use in source and binary forms, with or without
    5: // modification, are permitted provided that the following conditions are
    6: // met:
    7: //
    8: //     * Redistributions of source code must retain the above copyright
    9: // notice, this list of conditions and the following disclaimer.
   10: //     * Redistributions in binary form must reproduce the above
   11: // copyright notice, this list of conditions and the following disclaimer
   12: // in the documentation and/or other materials provided with the
   13: // distribution.
   14: //     * Neither the name of Google Inc. nor the names of its
   15: // contributors may be used to endorse or promote products derived from
   16: // this software without specific prior written permission.
   17: //
   18: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   22: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   23: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   24: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   25: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   26: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   27: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   28: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   29: //
   30: // Author: Sanjay Ghemawat
   31: 
   32: #ifdef HAVE_CONFIG_H
   33: #include "config.h"
   34: #endif
   35: 
   36: #include <vector>
   37: #include <assert.h>
   38: 
   39: #include "pcrecpp_internal.h"
   40: #include "pcre_scanner.h"
   41: 
   42: using std::vector;
   43: 
   44: namespace pcrecpp {
   45: 
   46: Scanner::Scanner()
   47:   : data_(),
   48:     input_(data_),
   49:     skip_(NULL),
   50:     should_skip_(false),
   51:     skip_repeat_(false),
   52:     save_comments_(false),
   53:     comments_(NULL),
   54:     comments_offset_(0) {
   55: }
   56: 
   57: Scanner::Scanner(const string& in)
   58:   : data_(in),
   59:     input_(data_),
   60:     skip_(NULL),
   61:     should_skip_(false),
   62:     skip_repeat_(false),
   63:     save_comments_(false),
   64:     comments_(NULL),
   65:     comments_offset_(0) {
   66: }
   67: 
   68: Scanner::~Scanner() {
   69:   delete skip_;
   70:   delete comments_;
   71: }
   72: 
   73: void Scanner::SetSkipExpression(const char* re) {
   74:   delete skip_;
   75:   if (re != NULL) {
   76:     skip_ = new RE(re);
   77:     should_skip_ = true;
   78:     skip_repeat_ = true;
   79:     ConsumeSkip();
   80:   } else {
   81:     skip_ = NULL;
   82:     should_skip_ = false;
   83:     skip_repeat_ = false;
   84:   }
   85: }
   86: 
   87: void Scanner::Skip(const char* re) {
   88:   delete skip_;
   89:   if (re != NULL) {
   90:     skip_ = new RE(re);
   91:     should_skip_ = true;
   92:     skip_repeat_ = false;
   93:     ConsumeSkip();
   94:   } else {
   95:     skip_ = NULL;
   96:     should_skip_ = false;
   97:     skip_repeat_ = false;
   98:   }
   99: }
  100: 
  101: void Scanner::DisableSkip() {
  102:   assert(skip_ != NULL);
  103:   should_skip_ = false;
  104: }
  105: 
  106: void Scanner::EnableSkip() {
  107:   assert(skip_ != NULL);
  108:   should_skip_ = true;
  109:   ConsumeSkip();
  110: }
  111: 
  112: int Scanner::LineNumber() const {
  113:   // TODO: Make it more efficient by keeping track of the last point
  114:   // where we computed line numbers and counting newlines since then.
  115:   // We could use std:count, but not all systems have it. :-(
  116:   int count = 1;
  117:   for (const char* p = data_.data(); p < input_.data(); ++p)
  118:     if (*p == '\n')
  119:       ++count;
  120:   return count;
  121: }
  122: 
  123: int Scanner::Offset() const {
  124:   return (int)(input_.data() - data_.c_str());
  125: }
  126: 
  127: bool Scanner::LookingAt(const RE& re) const {
  128:   int consumed;
  129:   return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
  130: }
  131: 
  132: 
  133: bool Scanner::Consume(const RE& re,
  134:                       const Arg& arg0,
  135:                       const Arg& arg1,
  136:                       const Arg& arg2) {
  137:   const bool result = re.Consume(&input_, arg0, arg1, arg2);
  138:   if (result && should_skip_) ConsumeSkip();
  139:   return result;
  140: }
  141: 
  142: // helper function to consume *skip_ and honour save_comments_
  143: void Scanner::ConsumeSkip() {
  144:   const char* start_data = input_.data();
  145:   while (skip_->Consume(&input_)) {
  146:     if (!skip_repeat_) {
  147:       // Only one skip allowed.
  148:       break;
  149:     }
  150:   }
  151:   if (save_comments_) {
  152:     if (comments_ == NULL) {
  153:       comments_ = new vector<StringPiece>;
  154:     }
  155:     // already pointing one past end, so no need to +1
  156:     int length = (int)(input_.data() - start_data);
  157:     if (length > 0) {
  158:       comments_->push_back(StringPiece(start_data, length));
  159:     }
  160:   }
  161: }
  162: 
  163: 
  164: void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
  165:   // short circuit out if we've not yet initialized comments_
  166:   // (e.g., when save_comments is false)
  167:   if (!comments_) {
  168:     return;
  169:   }
  170:   // TODO: if we guarantee that comments_ will contain StringPieces
  171:   // that are ordered by their start, then we can do a binary search
  172:   // for the first StringPiece at or past start and then scan for the
  173:   // ones contained in the range, quit early (use equal_range or
  174:   // lower_bound)
  175:   for (vector<StringPiece>::const_iterator it = comments_->begin();
  176:        it != comments_->end(); ++it) {
  177:     if ((it->data() >= data_.c_str() + start &&
  178:          it->data() + it->size() <= data_.c_str() + end)) {
  179:       ranges->push_back(*it);
  180:     }
  181:   }
  182: }
  183: 
  184: 
  185: void Scanner::GetNextComments(vector<StringPiece> *ranges) {
  186:   // short circuit out if we've not yet initialized comments_
  187:   // (e.g., when save_comments is false)
  188:   if (!comments_) {
  189:     return;
  190:   }
  191:   for (vector<StringPiece>::const_iterator it =
  192:          comments_->begin() + comments_offset_;
  193:        it != comments_->end(); ++it) {
  194:     ranges->push_back(*it);
  195:     ++comments_offset_;
  196:   }
  197: }
  198: 
  199: }   // namespace pcrecpp
  200: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>