Annotation of embedaddon/pcre/pcre_scanner.h, revision 1.1

1.1     ! misho       1: // Copyright (c) 2005, Google Inc.
        !             2: // All rights reserved.
        !             3: //
        !             4: // Redistribution and use in source and binary forms, with or without
        !             5: // modification, are permitted provided that the following conditions are
        !             6: // met:
        !             7: //
        !             8: //     * Redistributions of source code must retain the above copyright
        !             9: // notice, this list of conditions and the following disclaimer.
        !            10: //     * Redistributions in binary form must reproduce the above
        !            11: // copyright notice, this list of conditions and the following disclaimer
        !            12: // in the documentation and/or other materials provided with the
        !            13: // distribution.
        !            14: //     * Neither the name of Google Inc. nor the names of its
        !            15: // contributors may be used to endorse or promote products derived from
        !            16: // this software without specific prior written permission.
        !            17: //
        !            18: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
        !            19: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
        !            20: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
        !            21: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
        !            22: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
        !            23: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
        !            24: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
        !            25: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
        !            26: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
        !            27: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
        !            28: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        !            29: //
        !            30: // Author: Sanjay Ghemawat
        !            31: //
        !            32: // Regular-expression based scanner for parsing an input stream.
        !            33: //
        !            34: // Example 1: parse a sequence of "var = number" entries from input:
        !            35: //
        !            36: //      Scanner scanner(input);
        !            37: //      string var;
        !            38: //      int number;
        !            39: //      scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
        !            40: //      while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
        !            41: //        ...;
        !            42: //      }
        !            43: 
        !            44: #ifndef _PCRE_SCANNER_H
        !            45: #define _PCRE_SCANNER_H
        !            46: 
        !            47: #include <assert.h>
        !            48: #include <string>
        !            49: #include <vector>
        !            50: 
        !            51: #include <pcrecpp.h>
        !            52: #include <pcre_stringpiece.h>
        !            53: 
        !            54: namespace pcrecpp {
        !            55: 
        !            56: class PCRECPP_EXP_DEFN Scanner {
        !            57:  public:
        !            58:   Scanner();
        !            59:   explicit Scanner(const std::string& input);
        !            60:   ~Scanner();
        !            61: 
        !            62:   // Return current line number.  The returned line-number is
        !            63:   // one-based.  I.e. it returns 1 + the number of consumed newlines.
        !            64:   //
        !            65:   // Note: this method may be slow.  It may take time proportional to
        !            66:   // the size of the input.
        !            67:   int LineNumber() const;
        !            68: 
        !            69:   // Return the byte-offset that the scanner is looking in the
        !            70:   // input data;
        !            71:   int Offset() const;
        !            72: 
        !            73:   // Return true iff the start of the remaining input matches "re"
        !            74:   bool LookingAt(const RE& re) const;
        !            75: 
        !            76:   // Return true iff all of the following are true
        !            77:   //    a. the start of the remaining input matches "re",
        !            78:   //    b. if any arguments are supplied, matched sub-patterns can be
        !            79:   //       parsed and stored into the arguments.
        !            80:   // If it returns true, it skips over the matched input and any
        !            81:   // following input that matches the "skip" regular expression.
        !            82:   bool Consume(const RE& re,
        !            83:                const Arg& arg0 = RE::no_arg,
        !            84:                const Arg& arg1 = RE::no_arg,
        !            85:                const Arg& arg2 = RE::no_arg
        !            86:                // TODO: Allow more arguments?
        !            87:                );
        !            88: 
        !            89:   // Set the "skip" regular expression.  If after consuming some data,
        !            90:   // a prefix of the input matches this RE, it is automatically
        !            91:   // skipped.  For example, a programming language scanner would use
        !            92:   // a skip RE that matches white space and comments.
        !            93:   //
        !            94:   //    scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
        !            95:   //
        !            96:   // Skipping repeats as long as it succeeds.  We used to let people do
        !            97:   // this by writing "(...)*" in the regular expression, but that added
        !            98:   // up to lots of recursive calls within the pcre library, so now we
        !            99:   // control repetition explicitly via the function call API.
        !           100:   //
        !           101:   // You can pass NULL for "re" if you do not want any data to be skipped.
        !           102:   void Skip(const char* re);   // DEPRECATED; does *not* repeat
        !           103:   void SetSkipExpression(const char* re);
        !           104: 
        !           105:   // Temporarily pause "skip"ing. This
        !           106:   //   Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
        !           107:   // is similar to
        !           108:   //   Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
        !           109:   // but avoids creating/deleting new RE objects.
        !           110:   void DisableSkip();
        !           111: 
        !           112:   // Reenable previously paused skipping.  Any prefix of the input
        !           113:   // that matches the skip pattern is immediately dropped.
        !           114:   void EnableSkip();
        !           115: 
        !           116:   /***** Special wrappers around SetSkip() for some common idioms *****/
        !           117: 
        !           118:   // Arranges to skip whitespace, C comments, C++ comments.
        !           119:   // The overall RE is a disjunction of the following REs:
        !           120:   //    \\s                     whitespace
        !           121:   //    //.*\n                  C++ comment
        !           122:   //    /[*](.|\n)*?[*]/        C comment (x*? means minimal repetitions of x)
        !           123:   // We get repetition via the semantics of SetSkipExpression, not by using *
        !           124:   void SkipCXXComments() {
        !           125:     SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
        !           126:   }
        !           127: 
        !           128:   void set_save_comments(bool comments) {
        !           129:     save_comments_ = comments;
        !           130:   }
        !           131: 
        !           132:   bool save_comments() {
        !           133:     return save_comments_;
        !           134:   }
        !           135: 
        !           136:   // Append to vector ranges the comments found in the
        !           137:   // byte range [start,end] (inclusive) of the input data.
        !           138:   // Only comments that were extracted entirely within that
        !           139:   // range are returned: no range splitting of atomically-extracted
        !           140:   // comments is performed.
        !           141:   void GetComments(int start, int end, std::vector<StringPiece> *ranges);
        !           142: 
        !           143:   // Append to vector ranges the comments added
        !           144:   // since the last time this was called. This
        !           145:   // functionality is provided for efficiency when
        !           146:   // interleaving scanning with parsing.
        !           147:   void GetNextComments(std::vector<StringPiece> *ranges);
        !           148: 
        !           149:  private:
        !           150:   std::string   data_;          // All the input data
        !           151:   StringPiece   input_;         // Unprocessed input
        !           152:   RE*           skip_;          // If non-NULL, RE for skipping input
        !           153:   bool          should_skip_;   // If true, use skip_
        !           154:   bool          skip_repeat_;   // If true, repeat skip_ as long as it works
        !           155:   bool          save_comments_; // If true, aggregate the skip expression
        !           156: 
        !           157:   // the skipped comments
        !           158:   // TODO: later consider requiring that the StringPieces be added
        !           159:   // in order by their start position
        !           160:   std::vector<StringPiece> *comments_;
        !           161: 
        !           162:   // the offset into comments_ that has been returned by GetNextComments
        !           163:   int           comments_offset_;
        !           164: 
        !           165:   // helper function to consume *skip_ and honour
        !           166:   // save_comments_
        !           167:   void ConsumeSkip();
        !           168: };
        !           169: 
        !           170: }   // namespace pcrecpp
        !           171: 
        !           172: #endif /* _PCRE_SCANNER_H */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>