Annotation of embedaddon/pcre/pcre_scanner.h, revision 1.1
1.1 ! misho 1: // Copyright (c) 2005, Google Inc.
! 2: // All rights reserved.
! 3: //
! 4: // Redistribution and use in source and binary forms, with or without
! 5: // modification, are permitted provided that the following conditions are
! 6: // met:
! 7: //
! 8: // * Redistributions of source code must retain the above copyright
! 9: // notice, this list of conditions and the following disclaimer.
! 10: // * Redistributions in binary form must reproduce the above
! 11: // copyright notice, this list of conditions and the following disclaimer
! 12: // in the documentation and/or other materials provided with the
! 13: // distribution.
! 14: // * Neither the name of Google Inc. nor the names of its
! 15: // contributors may be used to endorse or promote products derived from
! 16: // this software without specific prior written permission.
! 17: //
! 18: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! 19: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
! 20: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
! 21: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
! 22: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
! 23: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
! 24: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
! 25: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
! 26: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
! 27: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
! 28: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
! 29: //
! 30: // Author: Sanjay Ghemawat
! 31: //
! 32: // Regular-expression based scanner for parsing an input stream.
! 33: //
! 34: // Example 1: parse a sequence of "var = number" entries from input:
! 35: //
! 36: // Scanner scanner(input);
! 37: // string var;
! 38: // int number;
! 39: // scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
! 40: // while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
! 41: // ...;
! 42: // }
! 43:
! 44: #ifndef _PCRE_SCANNER_H
! 45: #define _PCRE_SCANNER_H
! 46:
! 47: #include <assert.h>
! 48: #include <string>
! 49: #include <vector>
! 50:
! 51: #include <pcrecpp.h>
! 52: #include <pcre_stringpiece.h>
! 53:
! 54: namespace pcrecpp {
! 55:
! 56: class PCRECPP_EXP_DEFN Scanner {
! 57: public:
! 58: Scanner();
! 59: explicit Scanner(const std::string& input);
! 60: ~Scanner();
! 61:
! 62: // Return current line number. The returned line-number is
! 63: // one-based. I.e. it returns 1 + the number of consumed newlines.
! 64: //
! 65: // Note: this method may be slow. It may take time proportional to
! 66: // the size of the input.
! 67: int LineNumber() const;
! 68:
! 69: // Return the byte-offset that the scanner is looking in the
! 70: // input data;
! 71: int Offset() const;
! 72:
! 73: // Return true iff the start of the remaining input matches "re"
! 74: bool LookingAt(const RE& re) const;
! 75:
! 76: // Return true iff all of the following are true
! 77: // a. the start of the remaining input matches "re",
! 78: // b. if any arguments are supplied, matched sub-patterns can be
! 79: // parsed and stored into the arguments.
! 80: // If it returns true, it skips over the matched input and any
! 81: // following input that matches the "skip" regular expression.
! 82: bool Consume(const RE& re,
! 83: const Arg& arg0 = RE::no_arg,
! 84: const Arg& arg1 = RE::no_arg,
! 85: const Arg& arg2 = RE::no_arg
! 86: // TODO: Allow more arguments?
! 87: );
! 88:
! 89: // Set the "skip" regular expression. If after consuming some data,
! 90: // a prefix of the input matches this RE, it is automatically
! 91: // skipped. For example, a programming language scanner would use
! 92: // a skip RE that matches white space and comments.
! 93: //
! 94: // scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
! 95: //
! 96: // Skipping repeats as long as it succeeds. We used to let people do
! 97: // this by writing "(...)*" in the regular expression, but that added
! 98: // up to lots of recursive calls within the pcre library, so now we
! 99: // control repetition explicitly via the function call API.
! 100: //
! 101: // You can pass NULL for "re" if you do not want any data to be skipped.
! 102: void Skip(const char* re); // DEPRECATED; does *not* repeat
! 103: void SetSkipExpression(const char* re);
! 104:
! 105: // Temporarily pause "skip"ing. This
! 106: // Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
! 107: // is similar to
! 108: // Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
! 109: // but avoids creating/deleting new RE objects.
! 110: void DisableSkip();
! 111:
! 112: // Reenable previously paused skipping. Any prefix of the input
! 113: // that matches the skip pattern is immediately dropped.
! 114: void EnableSkip();
! 115:
! 116: /***** Special wrappers around SetSkip() for some common idioms *****/
! 117:
! 118: // Arranges to skip whitespace, C comments, C++ comments.
! 119: // The overall RE is a disjunction of the following REs:
! 120: // \\s whitespace
! 121: // //.*\n C++ comment
! 122: // /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x)
! 123: // We get repetition via the semantics of SetSkipExpression, not by using *
! 124: void SkipCXXComments() {
! 125: SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
! 126: }
! 127:
! 128: void set_save_comments(bool comments) {
! 129: save_comments_ = comments;
! 130: }
! 131:
! 132: bool save_comments() {
! 133: return save_comments_;
! 134: }
! 135:
! 136: // Append to vector ranges the comments found in the
! 137: // byte range [start,end] (inclusive) of the input data.
! 138: // Only comments that were extracted entirely within that
! 139: // range are returned: no range splitting of atomically-extracted
! 140: // comments is performed.
! 141: void GetComments(int start, int end, std::vector<StringPiece> *ranges);
! 142:
! 143: // Append to vector ranges the comments added
! 144: // since the last time this was called. This
! 145: // functionality is provided for efficiency when
! 146: // interleaving scanning with parsing.
! 147: void GetNextComments(std::vector<StringPiece> *ranges);
! 148:
! 149: private:
! 150: std::string data_; // All the input data
! 151: StringPiece input_; // Unprocessed input
! 152: RE* skip_; // If non-NULL, RE for skipping input
! 153: bool should_skip_; // If true, use skip_
! 154: bool skip_repeat_; // If true, repeat skip_ as long as it works
! 155: bool save_comments_; // If true, aggregate the skip expression
! 156:
! 157: // the skipped comments
! 158: // TODO: later consider requiring that the StringPieces be added
! 159: // in order by their start position
! 160: std::vector<StringPiece> *comments_;
! 161:
! 162: // the offset into comments_ that has been returned by GetNextComments
! 163: int comments_offset_;
! 164:
! 165: // helper function to consume *skip_ and honour
! 166: // save_comments_
! 167: void ConsumeSkip();
! 168: };
! 169:
! 170: } // namespace pcrecpp
! 171:
! 172: #endif /* _PCRE_SCANNER_H */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>