Annotation of embedaddon/pcre/pcrecpp_unittest.cc, revision 1.1.1.1
1.1 misho 1: // -*- coding: utf-8 -*-
2: //
3: // Copyright (c) 2005 - 2010, Google Inc.
4: // All rights reserved.
5: //
6: // Redistribution and use in source and binary forms, with or without
7: // modification, are permitted provided that the following conditions are
8: // met:
9: //
10: // * Redistributions of source code must retain the above copyright
11: // notice, this list of conditions and the following disclaimer.
12: // * Redistributions in binary form must reproduce the above
13: // copyright notice, this list of conditions and the following disclaimer
14: // in the documentation and/or other materials provided with the
15: // distribution.
16: // * Neither the name of Google Inc. nor the names of its
17: // contributors may be used to endorse or promote products derived from
18: // this software without specific prior written permission.
19: //
20: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31: //
32: // Author: Sanjay Ghemawat
33: //
34: // TODO: Test extractions for PartialMatch/Consume
35:
36: #ifdef HAVE_CONFIG_H
37: #include "config.h"
38: #endif
39:
40: #include <stdio.h>
41: #include <string.h> /* for memset and strcmp */
42: #include <cassert>
43: #include <vector>
44: #include "pcrecpp.h"
45:
46: using pcrecpp::StringPiece;
47: using pcrecpp::RE;
48: using pcrecpp::RE_Options;
49: using pcrecpp::Hex;
50: using pcrecpp::Octal;
51: using pcrecpp::CRadix;
52:
53: static bool VERBOSE_TEST = false;
54:
55: // CHECK dies with a fatal error if condition is not true. It is *not*
56: // controlled by NDEBUG, so the check will be executed regardless of
57: // compilation mode. Therefore, it is safe to do things like:
58: // CHECK_EQ(fp->Write(x), 4)
59: #define CHECK(condition) do { \
60: if (!(condition)) { \
61: fprintf(stderr, "%s:%d: Check failed: %s\n", \
62: __FILE__, __LINE__, #condition); \
63: exit(1); \
64: } \
65: } while (0)
66:
67: #define CHECK_EQ(a, b) CHECK(a == b)
68:
69: static void Timing1(int num_iters) {
70: // Same pattern lots of times
71: RE pattern("ruby:\\d+");
72: StringPiece p("ruby:1234");
73: for (int j = num_iters; j > 0; j--) {
74: CHECK(pattern.FullMatch(p));
75: }
76: }
77:
78: static void Timing2(int num_iters) {
79: // Same pattern lots of times
80: RE pattern("ruby:(\\d+)");
81: int i;
82: for (int j = num_iters; j > 0; j--) {
83: CHECK(pattern.FullMatch("ruby:1234", &i));
84: CHECK_EQ(i, 1234);
85: }
86: }
87:
88: static void Timing3(int num_iters) {
89: string text_string;
90: for (int j = num_iters; j > 0; j--) {
91: text_string += "this is another line\n";
92: }
93:
94: RE line_matcher(".*\n");
95: string line;
96: StringPiece text(text_string);
97: int counter = 0;
98: while (line_matcher.Consume(&text)) {
99: counter++;
100: }
101: printf("Matched %d lines\n", counter);
102: }
103:
104: #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
105:
106: static void LeakTest() {
107: // Check for memory leaks
108: unsigned long long initial_size = 0;
109: for (int i = 0; i < 100000; i++) {
110: if (i == 50000) {
111: initial_size = VirtualProcessSize();
112: printf("Size after 50000: %llu\n", initial_size);
113: }
114: char buf[100]; // definitely big enough
115: sprintf(buf, "pat%09d", i);
116: RE newre(buf);
117: }
118: uint64 final_size = VirtualProcessSize();
119: printf("Size after 100000: %llu\n", final_size);
120: const double growth = double(final_size - initial_size) / final_size;
121: printf("Growth: %0.2f%%", growth * 100);
122: CHECK(growth < 0.02); // Allow < 2% growth
123: }
124:
125: #endif
126:
127: static void RadixTests() {
128: printf("Testing hex\n");
129:
130: #define CHECK_HEX(type, value) \
131: do { \
132: type v; \
133: CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134: CHECK_EQ(v, 0x ## value); \
135: CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136: CHECK_EQ(v, 0x ## value); \
137: } while(0)
138:
139: CHECK_HEX(short, 2bad);
140: CHECK_HEX(unsigned short, 2badU);
141: CHECK_HEX(int, dead);
142: CHECK_HEX(unsigned int, deadU);
143: CHECK_HEX(long, 7eadbeefL);
144: CHECK_HEX(unsigned long, deadbeefUL);
145: #ifdef HAVE_LONG_LONG
146: CHECK_HEX(long long, 12345678deadbeefLL);
147: #endif
148: #ifdef HAVE_UNSIGNED_LONG_LONG
149: CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150: #endif
151:
152: #undef CHECK_HEX
153:
154: printf("Testing octal\n");
155:
156: #define CHECK_OCTAL(type, value) \
157: do { \
158: type v; \
159: CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160: CHECK_EQ(v, 0 ## value); \
161: CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162: CHECK_EQ(v, 0 ## value); \
163: } while(0)
164:
165: CHECK_OCTAL(short, 77777);
166: CHECK_OCTAL(unsigned short, 177777U);
167: CHECK_OCTAL(int, 17777777777);
168: CHECK_OCTAL(unsigned int, 37777777777U);
169: CHECK_OCTAL(long, 17777777777L);
170: CHECK_OCTAL(unsigned long, 37777777777UL);
171: #ifdef HAVE_LONG_LONG
172: CHECK_OCTAL(long long, 777777777777777777777LL);
173: #endif
174: #ifdef HAVE_UNSIGNED_LONG_LONG
175: CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176: #endif
177:
178: #undef CHECK_OCTAL
179:
180: printf("Testing decimal\n");
181:
182: #define CHECK_DECIMAL(type, value) \
183: do { \
184: type v; \
185: CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186: CHECK_EQ(v, value); \
187: CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188: CHECK_EQ(v, value); \
189: } while(0)
190:
191: CHECK_DECIMAL(short, -1);
192: CHECK_DECIMAL(unsigned short, 9999);
193: CHECK_DECIMAL(int, -1000);
194: CHECK_DECIMAL(unsigned int, 12345U);
195: CHECK_DECIMAL(long, -10000000L);
196: CHECK_DECIMAL(unsigned long, 3083324652U);
197: #ifdef HAVE_LONG_LONG
198: CHECK_DECIMAL(long long, -100000000000000LL);
199: #endif
200: #ifdef HAVE_UNSIGNED_LONG_LONG
201: CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202: #endif
203:
204: #undef CHECK_DECIMAL
205:
206: }
207:
208: static void TestReplace() {
209: printf("Testing Replace\n");
210:
211: struct ReplaceTest {
212: const char *regexp;
213: const char *rewrite;
214: const char *original;
215: const char *single;
216: const char *global;
217: int global_count; // the expected return value from ReplaceAll
218: };
219: static const ReplaceTest tests[] = {
220: { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221: "\\2\\1ay",
222: "the quick brown fox jumps over the lazy dogs.",
223: "ethay quick brown fox jumps over the lazy dogs.",
224: "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225: 9 },
226: { "\\w+",
227: "\\0-NOSPAM",
228: "paul.haahr@google.com",
229: "paul-NOSPAM.haahr@google.com",
230: "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231: 4 },
232: { "^",
233: "(START)",
234: "foo",
235: "(START)foo",
236: "(START)foo",
237: 1 },
238: { "^",
239: "(START)",
240: "",
241: "(START)",
242: "(START)",
243: 1 },
244: { "$",
245: "(END)",
246: "",
247: "(END)",
248: "(END)",
249: 1 },
250: { "b",
251: "bb",
252: "ababababab",
253: "abbabababab",
254: "abbabbabbabbabb",
255: 5 },
256: { "b",
257: "bb",
258: "bbbbbb",
259: "bbbbbbb",
260: "bbbbbbbbbbbb",
261: 6 },
262: { "b+",
263: "bb",
264: "bbbbbb",
265: "bb",
266: "bb",
267: 1 },
268: { "b*",
269: "bb",
270: "bbbbbb",
271: "bb",
272: "bbbb",
273: 2 },
274: { "b*",
275: "bb",
276: "aaaaa",
277: "bbaaaaa",
278: "bbabbabbabbabbabb",
279: 6 },
280: { "b*",
281: "bb",
282: "aa\naa\n",
283: "bbaa\naa\n",
284: "bbabbabb\nbbabbabb\nbb",
285: 7 },
286: { "b*",
287: "bb",
288: "aa\raa\r",
289: "bbaa\raa\r",
290: "bbabbabb\rbbabbabb\rbb",
291: 7 },
292: { "b*",
293: "bb",
294: "aa\r\naa\r\n",
295: "bbaa\r\naa\r\n",
296: "bbabbabb\r\nbbabbabb\r\nbb",
297: 7 },
298: // Check empty-string matching (it's tricky!)
299: { "aa|b*",
300: "@",
301: "aa",
302: "@",
303: "@@",
304: 2 },
305: { "b*|aa",
306: "@",
307: "aa",
308: "@aa",
309: "@@@",
310: 3 },
311: #ifdef SUPPORT_UTF8
312: { "b*",
313: "bb",
314: "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
315: "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316: "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317: 5 },
318: { "b*",
319: "bb",
320: "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
321: "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322: ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323: "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324: 9 },
325: #endif
326: { "", NULL, NULL, NULL, NULL, 0 }
327: };
328:
329: #ifdef SUPPORT_UTF8
330: const bool support_utf8 = true;
331: #else
332: const bool support_utf8 = false;
333: #endif
334:
335: for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336: RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
337: assert(re.error().empty());
338: string one(t->original);
339: CHECK(re.Replace(t->rewrite, &one));
340: CHECK_EQ(one, t->single);
341: string all(t->original);
342: const int replace_count = re.GlobalReplace(t->rewrite, &all);
343: CHECK_EQ(all, t->global);
344: CHECK_EQ(replace_count, t->global_count);
345: }
346:
347: // One final test: test \r\n replacement when we're not in CRLF mode
348: {
349: RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
350: assert(re.error().empty());
351: string all("aa\r\naa\r\n");
352: CHECK_EQ(re.GlobalReplace("bb", &all), 9);
353: CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
354: }
355: {
356: RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
357: assert(re.error().empty());
358: string all("aa\r\naa\r\n");
359: CHECK_EQ(re.GlobalReplace("bb", &all), 9);
360: CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
361: }
362: // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
363: // Alas, the answer depends on how pcre was compiled.
364: }
365:
366: static void TestExtract() {
367: printf("Testing Extract\n");
368:
369: string s;
370:
371: CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
372: CHECK_EQ(s, "kremvax!boris");
373:
374: // check the RE interface as well
375: CHECK(RE(".*").Extract("'\\0'", "foo", &s));
376: CHECK_EQ(s, "'foo'");
377: CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
378: CHECK_EQ(s, "'foo'");
379: }
380:
381: static void TestConsume() {
382: printf("Testing Consume\n");
383:
384: string word;
385:
386: string s(" aaa b!@#$@#$cccc");
387: StringPiece input(s);
388:
389: RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
390: CHECK(r.Consume(&input, &word));
391: CHECK_EQ(word, "aaa");
392: CHECK(r.Consume(&input, &word));
393: CHECK_EQ(word, "b");
394: CHECK(! r.Consume(&input, &word));
395: }
396:
397: static void TestFindAndConsume() {
398: printf("Testing FindAndConsume\n");
399:
400: string word;
401:
402: string s(" aaa b!@#$@#$cccc");
403: StringPiece input(s);
404:
405: RE r("(\\w+)"); // matches a word
406: CHECK(r.FindAndConsume(&input, &word));
407: CHECK_EQ(word, "aaa");
408: CHECK(r.FindAndConsume(&input, &word));
409: CHECK_EQ(word, "b");
410: CHECK(r.FindAndConsume(&input, &word));
411: CHECK_EQ(word, "cccc");
412: CHECK(! r.FindAndConsume(&input, &word));
413: }
414:
415: static void TestMatchNumberPeculiarity() {
416: printf("Testing match-number peculiarity\n");
417:
418: string word1;
419: string word2;
420: string word3;
421:
422: RE r("(foo)|(bar)|(baz)");
423: CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
424: CHECK_EQ(word1, "foo");
425: CHECK_EQ(word2, "");
426: CHECK_EQ(word3, "");
427: CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
428: CHECK_EQ(word1, "");
429: CHECK_EQ(word2, "bar");
430: CHECK_EQ(word3, "");
431: CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
432: CHECK_EQ(word1, "");
433: CHECK_EQ(word2, "");
434: CHECK_EQ(word3, "baz");
435: CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
436:
437: string a;
438: CHECK(RE("(foo)|hello").FullMatch("hello", &a));
439: CHECK_EQ(a, "");
440: }
441:
442: static void TestRecursion() {
443: printf("Testing recursion\n");
444:
445: // Get one string that passes (sometimes), one that never does.
446: string text_good("abcdefghijk");
447: string text_bad("acdefghijkl");
448:
449: // According to pcretest, matching text_good against (\w+)*b
450: // requires match_limit of at least 8192, and match_recursion_limit
451: // of at least 37.
452:
453: RE_Options options_ml;
454: options_ml.set_match_limit(8192);
455: RE re("(\\w+)*b", options_ml);
456: CHECK(re.PartialMatch(text_good) == true);
457: CHECK(re.PartialMatch(text_bad) == false);
458: CHECK(re.FullMatch(text_good) == false);
459: CHECK(re.FullMatch(text_bad) == false);
460:
461: options_ml.set_match_limit(1024);
462: RE re2("(\\w+)*b", options_ml);
463: CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
464: CHECK(re2.PartialMatch(text_bad) == false);
465: CHECK(re2.FullMatch(text_good) == false);
466: CHECK(re2.FullMatch(text_bad) == false);
467:
468: RE_Options options_mlr;
469: options_mlr.set_match_limit_recursion(50);
470: RE re3("(\\w+)*b", options_mlr);
471: CHECK(re3.PartialMatch(text_good) == true);
472: CHECK(re3.PartialMatch(text_bad) == false);
473: CHECK(re3.FullMatch(text_good) == false);
474: CHECK(re3.FullMatch(text_bad) == false);
475:
476: options_mlr.set_match_limit_recursion(10);
477: RE re4("(\\w+)*b", options_mlr);
478: CHECK(re4.PartialMatch(text_good) == false);
479: CHECK(re4.PartialMatch(text_bad) == false);
480: CHECK(re4.FullMatch(text_good) == false);
481: CHECK(re4.FullMatch(text_bad) == false);
482: }
483:
484: // A meta-quoted string, interpreted as a pattern, should always match
485: // the original unquoted string.
486: static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
487: string quoted = RE::QuoteMeta(unquoted);
488: RE re(quoted, options);
489: CHECK(re.FullMatch(unquoted));
490: }
491:
492: // A string containing meaningful regexp characters, which is then meta-
493: // quoted, should not generally match a string the unquoted string does.
494: static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
495: RE_Options options = RE_Options()) {
496: string quoted = RE::QuoteMeta(unquoted);
497: RE re(quoted, options);
498: CHECK(!re.FullMatch(should_not_match));
499: }
500:
501: // Tests that quoted meta characters match their original strings,
502: // and that a few things that shouldn't match indeed do not.
503: static void TestQuotaMetaSimple() {
504: TestQuoteMeta("foo");
505: TestQuoteMeta("foo.bar");
506: TestQuoteMeta("foo\\.bar");
507: TestQuoteMeta("[1-9]");
508: TestQuoteMeta("1.5-2.0?");
509: TestQuoteMeta("\\d");
510: TestQuoteMeta("Who doesn't like ice cream?");
511: TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
512: TestQuoteMeta("((?!)xxx).*yyy");
513: TestQuoteMeta("([");
514: TestQuoteMeta(string("foo\0bar", 7));
515: }
516:
517: static void TestQuoteMetaSimpleNegative() {
518: NegativeTestQuoteMeta("foo", "bar");
519: NegativeTestQuoteMeta("...", "bar");
520: NegativeTestQuoteMeta("\\.", ".");
521: NegativeTestQuoteMeta("\\.", "..");
522: NegativeTestQuoteMeta("(a)", "a");
523: NegativeTestQuoteMeta("(a|b)", "a");
524: NegativeTestQuoteMeta("(a|b)", "(a)");
525: NegativeTestQuoteMeta("(a|b)", "a|b");
526: NegativeTestQuoteMeta("[0-9]", "0");
527: NegativeTestQuoteMeta("[0-9]", "0-9");
528: NegativeTestQuoteMeta("[0-9]", "[9]");
529: NegativeTestQuoteMeta("((?!)xxx)", "xxx");
530: }
531:
532: static void TestQuoteMetaLatin1() {
533: TestQuoteMeta("3\xb2 = 9");
534: }
535:
536: static void TestQuoteMetaUtf8() {
537: #ifdef SUPPORT_UTF8
538: TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
539: TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
540: TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
541: TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
542: TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
543: TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
544: TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
545: NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
546: "27\\\xc2\\\xb0",
547: pcrecpp::UTF8());
548: #endif
549: }
550:
551: static void TestQuoteMetaAll() {
552: printf("Testing QuoteMeta\n");
553: TestQuotaMetaSimple();
554: TestQuoteMetaSimpleNegative();
555: TestQuoteMetaLatin1();
556: TestQuoteMetaUtf8();
557: }
558:
559: //
560: // Options tests contributed by
561: // Giuseppe Maxia, CTO, Stardata s.r.l.
562: // July 2005
563: //
564: static void GetOneOptionResult(
565: const char *option_name,
566: const char *regex,
567: const char *str,
568: RE_Options options,
569: bool full,
570: string expected) {
571:
572: printf("Testing Option <%s>\n", option_name);
573: if(VERBOSE_TEST)
574: printf("/%s/ finds \"%s\" within \"%s\" \n",
575: regex,
576: expected.c_str(),
577: str);
578: string captured("");
579: if (full)
580: RE(regex,options).FullMatch(str, &captured);
581: else
582: RE(regex,options).PartialMatch(str, &captured);
583: CHECK_EQ(captured, expected);
584: }
585:
586: static void TestOneOption(
587: const char *option_name,
588: const char *regex,
589: const char *str,
590: RE_Options options,
591: bool full,
592: bool assertive = true) {
593:
594: printf("Testing Option <%s>\n", option_name);
595: if (VERBOSE_TEST)
596: printf("'%s' %s /%s/ \n",
597: str,
598: (assertive? "matches" : "doesn't match"),
599: regex);
600: if (assertive) {
601: if (full)
602: CHECK(RE(regex,options).FullMatch(str));
603: else
604: CHECK(RE(regex,options).PartialMatch(str));
605: } else {
606: if (full)
607: CHECK(!RE(regex,options).FullMatch(str));
608: else
609: CHECK(!RE(regex,options).PartialMatch(str));
610: }
611: }
612:
613: static void Test_CASELESS() {
614: RE_Options options;
615: RE_Options options2;
616:
617: options.set_caseless(true);
618: TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
619: TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
620: TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
621:
622: TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
623: TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
624: options.set_caseless(false);
625: TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
626: }
627:
628: static void Test_MULTILINE() {
629: RE_Options options;
630: RE_Options options2;
631: const char *str = "HELLO\n" "cruel\n" "world\n";
632:
633: options.set_multiline(true);
634: TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
635: TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
636: TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
637: options.set_multiline(false);
638: TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
639: }
640:
641: static void Test_DOTALL() {
642: RE_Options options;
643: RE_Options options2;
644: const char *str = "HELLO\n" "cruel\n" "world";
645:
646: options.set_dotall(true);
647: TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
648: TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
649: TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
650: options.set_dotall(false);
651: TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
652: }
653:
654: static void Test_DOLLAR_ENDONLY() {
655: RE_Options options;
656: RE_Options options2;
657: const char *str = "HELLO world\n";
658:
659: TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
660: options.set_dollar_endonly(true);
661: TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
662: TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
663: }
664:
665: static void Test_EXTRA() {
666: RE_Options options;
667: const char *str = "HELLO";
668:
669: options.set_extra(true);
670: TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
671: TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
672: options.set_extra(false);
673: TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
674: }
675:
676: static void Test_EXTENDED() {
677: RE_Options options;
678: RE_Options options2;
679: const char *str = "HELLO world";
680:
681: options.set_extended(true);
682: TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
683: TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
684: TestOneOption("EXTENDED (class)",
685: "^ HE L{2} O "
686: "\\s+ "
687: "\\w+ $ ",
688: str,
689: options,
690: false);
691:
692: TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
693: TestOneOption("EXTENDED (function)",
694: "^ HE L{2} O "
695: "\\s+ "
696: "\\w+ $ ",
697: str,
698: pcrecpp::EXTENDED(),
699: false);
700:
701: options.set_extended(false);
702: TestOneOption("no EXTENDED", "HELLO world", str, options, false);
703: }
704:
705: static void Test_NO_AUTO_CAPTURE() {
706: RE_Options options;
707: const char *str = "HELLO world";
708: string captured;
709:
710: printf("Testing Option <no NO_AUTO_CAPTURE>\n");
711: if (VERBOSE_TEST)
712: printf("parentheses capture text\n");
713: RE re("(world|universe)$", options);
714: CHECK(re.Extract("\\1", str , &captured));
715: CHECK_EQ(captured, "world");
716: options.set_no_auto_capture(true);
717: printf("testing Option <NO_AUTO_CAPTURE>\n");
718: if (VERBOSE_TEST)
719: printf("parentheses do not capture text\n");
720: re.Extract("\\1",str, &captured );
721: CHECK_EQ(captured, "world");
722: }
723:
724: static void Test_UNGREEDY() {
725: RE_Options options;
726: const char *str = "HELLO, 'this' is the 'world'";
727:
728: options.set_ungreedy(true);
729: GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
730: GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
731: GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
732:
733: options.set_ungreedy(false);
734: GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
735: GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
736: }
737:
738: static void Test_all_options() {
739: const char *str = "HELLO\n" "cruel\n" "world";
740: RE_Options options;
741: options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
742:
743: TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
744: options.set_all_options(0);
745: TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
746: options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
747:
748: TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
749: TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
750: " ^ c r u e l $ ",
751: str,
752: RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
753: false);
754:
755: TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
756: " ^ c r u e l $ ",
757: str,
758: RE_Options()
759: .set_multiline(true)
760: .set_extended(true),
761: false);
762:
763: options.set_all_options(0);
764: TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
765:
766: }
767:
768: static void TestOptions() {
769: printf("Testing Options\n");
770: Test_CASELESS();
771: Test_MULTILINE();
772: Test_DOTALL();
773: Test_DOLLAR_ENDONLY();
774: Test_EXTENDED();
775: Test_NO_AUTO_CAPTURE();
776: Test_UNGREEDY();
777: Test_EXTRA();
778: Test_all_options();
779: }
780:
781: static void TestConstructors() {
782: printf("Testing constructors\n");
783:
784: RE_Options options;
785: options.set_dotall(true);
786: const char *str = "HELLO\n" "cruel\n" "world";
787:
788: RE orig("HELLO.*world", options);
789: CHECK(orig.FullMatch(str));
790:
791: RE copy1(orig);
792: CHECK(copy1.FullMatch(str));
793:
794: RE copy2("not a match");
795: CHECK(!copy2.FullMatch(str));
796: copy2 = copy1;
797: CHECK(copy2.FullMatch(str));
798: copy2 = orig;
799: CHECK(copy2.FullMatch(str));
800:
801: // Make sure when we assign to ourselves, nothing bad happens
802: orig = orig;
803: copy1 = copy1;
804: copy2 = copy2;
805: CHECK(orig.FullMatch(str));
806: CHECK(copy1.FullMatch(str));
807: CHECK(copy2.FullMatch(str));
808: }
809:
810: int main(int argc, char** argv) {
811: // Treat any flag as --help
812: if (argc > 1 && argv[1][0] == '-') {
813: printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
814: " If 'timingX ###' is specified, run the given timing test\n"
815: " with the given number of iterations, rather than running\n"
816: " the default corectness test.\n", argv[0]);
817: return 0;
818: }
819:
820: if (argc > 1) {
821: if ( argc == 2 || atoi(argv[2]) == 0) {
822: printf("timing mode needs a num-iters argument\n");
823: return 1;
824: }
825: if (!strcmp(argv[1], "timing1"))
826: Timing1(atoi(argv[2]));
827: else if (!strcmp(argv[1], "timing2"))
828: Timing2(atoi(argv[2]));
829: else if (!strcmp(argv[1], "timing3"))
830: Timing3(atoi(argv[2]));
831: else
832: printf("Unknown argument '%s'\n", argv[1]);
833: return 0;
834: }
835:
836: printf("PCRE C++ wrapper tests\n");
837: printf("Testing FullMatch\n");
838:
839: int i;
840: string s;
841:
842: /***** FullMatch with no args *****/
843:
844: CHECK(RE("h.*o").FullMatch("hello"));
845: CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
846: CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
847: CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
848: CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
849: CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
850:
851: /***** FullMatch with args *****/
852:
853: // Zero-arg
854: CHECK(RE("\\d+").FullMatch("1001"));
855:
856: // Single-arg
857: CHECK(RE("(\\d+)").FullMatch("1001", &i));
858: CHECK_EQ(i, 1001);
859: CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
860: CHECK_EQ(i, -123);
861: CHECK(!RE("()\\d+").FullMatch("10", &i));
862: CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
863: &i));
864:
865: // Digits surrounding integer-arg
866: CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
867: CHECK_EQ(i, 23);
868: CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
869: CHECK_EQ(i, 1);
870: CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
871: CHECK_EQ(i, -1);
872: CHECK(RE("(\\d)").PartialMatch("1234", &i));
873: CHECK_EQ(i, 1);
874: CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
875: CHECK_EQ(i, -1);
876:
877: // String-arg
878: CHECK(RE("h(.*)o").FullMatch("hello", &s));
879: CHECK_EQ(s, string("ell"));
880:
881: // StringPiece-arg
882: StringPiece sp;
883: CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
884: CHECK_EQ(sp.size(), 4);
885: CHECK(memcmp(sp.data(), "ruby", 4) == 0);
886: CHECK_EQ(i, 1234);
887:
888: // Multi-arg
889: CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
890: CHECK_EQ(s, string("ruby"));
891: CHECK_EQ(i, 1234);
892:
893: // Ignore non-void* NULL arg
894: CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
895: CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
896: CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
897: CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
898: #ifdef HAVE_LONG_LONG
899: CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
900: #endif
901: CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
902: CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
903:
904: // Fail on non-void* NULL arg if the match doesn't parse for the given type.
905: CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
906: CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
907: CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
908: CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
909: CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
910:
911: // Ignored arg
912: CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
913: CHECK_EQ(s, string("ruby"));
914: CHECK_EQ(i, 1234);
915:
916: // Type tests
917: {
918: char c;
919: CHECK(RE("(H)ello").FullMatch("Hello", &c));
920: CHECK_EQ(c, 'H');
921: }
922: {
923: unsigned char c;
924: CHECK(RE("(H)ello").FullMatch("Hello", &c));
925: CHECK_EQ(c, static_cast<unsigned char>('H'));
926: }
927: {
928: short v;
929: CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
930: CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
931: CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
932: CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
933: CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
934: CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
935: }
936: {
937: unsigned short v;
938: CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
939: CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
940: CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
941: CHECK(!RE("(\\d+)").FullMatch("65536", &v));
942: }
943: {
944: int v;
945: static const int max_value = 0x7fffffff;
946: static const int min_value = -max_value - 1;
947: CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
948: CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
949: CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
950: CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
951: CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
952: CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
953: }
954: {
955: unsigned int v;
956: static const unsigned int max_value = 0xfffffffful;
957: CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
958: CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
959: CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
960: }
961: #ifdef HAVE_LONG_LONG
962: # if defined(__MINGW__) || defined(__MINGW32__)
963: # define LLD "%I64d"
964: # define LLU "%I64u"
965: # else
966: # define LLD "%lld"
967: # define LLU "%llu"
968: # endif
969: {
970: long long v;
971: static const long long max_value = 0x7fffffffffffffffLL;
972: static const long long min_value = -max_value - 1;
973: char buf[32]; // definitely big enough for a long long
974:
975: CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
976: CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
977:
978: sprintf(buf, LLD, max_value);
979: CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
980:
981: sprintf(buf, LLD, min_value);
982: CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
983:
984: sprintf(buf, LLD, max_value);
985: assert(buf[strlen(buf)-1] != '9');
986: buf[strlen(buf)-1]++;
987: CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
988:
989: sprintf(buf, LLD, min_value);
990: assert(buf[strlen(buf)-1] != '9');
991: buf[strlen(buf)-1]++;
992: CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
993: }
994: #endif
995: #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
996: {
997: unsigned long long v;
998: long long v2;
999: static const unsigned long long max_value = 0xffffffffffffffffULL;
1000: char buf[32]; // definitely big enough for a unsigned long long
1001:
1002: CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1003: CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1004:
1005: sprintf(buf, LLU, max_value);
1006: CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1007:
1008: assert(buf[strlen(buf)-1] != '9');
1009: buf[strlen(buf)-1]++;
1010: CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1011: }
1012: #endif
1013: {
1014: float v;
1015: CHECK(RE("(.*)").FullMatch("100", &v));
1016: CHECK(RE("(.*)").FullMatch("-100.", &v));
1017: CHECK(RE("(.*)").FullMatch("1e23", &v));
1018: }
1019: {
1020: double v;
1021: CHECK(RE("(.*)").FullMatch("100", &v));
1022: CHECK(RE("(.*)").FullMatch("-100.", &v));
1023: CHECK(RE("(.*)").FullMatch("1e23", &v));
1024: }
1025:
1026: // Check that matching is fully anchored
1027: CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1028: CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1029: CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1030: CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1031:
1032: // Braces
1033: CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1034: CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1035: CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1036:
1037: // Complicated RE
1038: CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1039: CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1040: CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1041: CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1042:
1043: // Check full-match handling (needs '$' tacked on internally)
1044: CHECK(RE("fo|foo").FullMatch("fo"));
1045: CHECK(RE("fo|foo").FullMatch("foo"));
1046: CHECK(RE("fo|foo$").FullMatch("fo"));
1047: CHECK(RE("fo|foo$").FullMatch("foo"));
1048: CHECK(RE("foo$").FullMatch("foo"));
1049: CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1050: CHECK(!RE("fo|bar").FullMatch("fox"));
1051:
1052: // Uncomment the following if we change the handling of '$' to
1053: // prevent it from matching a trailing newline
1054: if (false) {
1055: // Check that we don't get bitten by pcre's special handling of a
1056: // '\n' at the end of the string matching '$'
1057: CHECK(!RE("foo$").PartialMatch("foo\n"));
1058: }
1059:
1060: // Number of args
1061: int a[16];
1062: CHECK(RE("").FullMatch(""));
1063:
1064: memset(a, 0, sizeof(0));
1065: CHECK(RE("(\\d){1}").FullMatch("1",
1066: &a[0]));
1067: CHECK_EQ(a[0], 1);
1068:
1069: memset(a, 0, sizeof(0));
1070: CHECK(RE("(\\d)(\\d)").FullMatch("12",
1071: &a[0], &a[1]));
1072: CHECK_EQ(a[0], 1);
1073: CHECK_EQ(a[1], 2);
1074:
1075: memset(a, 0, sizeof(0));
1076: CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1077: &a[0], &a[1], &a[2]));
1078: CHECK_EQ(a[0], 1);
1079: CHECK_EQ(a[1], 2);
1080: CHECK_EQ(a[2], 3);
1081:
1082: memset(a, 0, sizeof(0));
1083: CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1084: &a[0], &a[1], &a[2], &a[3]));
1085: CHECK_EQ(a[0], 1);
1086: CHECK_EQ(a[1], 2);
1087: CHECK_EQ(a[2], 3);
1088: CHECK_EQ(a[3], 4);
1089:
1090: memset(a, 0, sizeof(0));
1091: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1092: &a[0], &a[1], &a[2],
1093: &a[3], &a[4]));
1094: CHECK_EQ(a[0], 1);
1095: CHECK_EQ(a[1], 2);
1096: CHECK_EQ(a[2], 3);
1097: CHECK_EQ(a[3], 4);
1098: CHECK_EQ(a[4], 5);
1099:
1100: memset(a, 0, sizeof(0));
1101: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1102: &a[0], &a[1], &a[2],
1103: &a[3], &a[4], &a[5]));
1104: CHECK_EQ(a[0], 1);
1105: CHECK_EQ(a[1], 2);
1106: CHECK_EQ(a[2], 3);
1107: CHECK_EQ(a[3], 4);
1108: CHECK_EQ(a[4], 5);
1109: CHECK_EQ(a[5], 6);
1110:
1111: memset(a, 0, sizeof(0));
1112: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1113: &a[0], &a[1], &a[2], &a[3],
1114: &a[4], &a[5], &a[6]));
1115: CHECK_EQ(a[0], 1);
1116: CHECK_EQ(a[1], 2);
1117: CHECK_EQ(a[2], 3);
1118: CHECK_EQ(a[3], 4);
1119: CHECK_EQ(a[4], 5);
1120: CHECK_EQ(a[5], 6);
1121: CHECK_EQ(a[6], 7);
1122:
1123: memset(a, 0, sizeof(0));
1124: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1125: "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1126: "1234567890123456",
1127: &a[0], &a[1], &a[2], &a[3],
1128: &a[4], &a[5], &a[6], &a[7],
1129: &a[8], &a[9], &a[10], &a[11],
1130: &a[12], &a[13], &a[14], &a[15]));
1131: CHECK_EQ(a[0], 1);
1132: CHECK_EQ(a[1], 2);
1133: CHECK_EQ(a[2], 3);
1134: CHECK_EQ(a[3], 4);
1135: CHECK_EQ(a[4], 5);
1136: CHECK_EQ(a[5], 6);
1137: CHECK_EQ(a[6], 7);
1138: CHECK_EQ(a[7], 8);
1139: CHECK_EQ(a[8], 9);
1140: CHECK_EQ(a[9], 0);
1141: CHECK_EQ(a[10], 1);
1142: CHECK_EQ(a[11], 2);
1143: CHECK_EQ(a[12], 3);
1144: CHECK_EQ(a[13], 4);
1145: CHECK_EQ(a[14], 5);
1146: CHECK_EQ(a[15], 6);
1147:
1148: /***** PartialMatch *****/
1149:
1150: printf("Testing PartialMatch\n");
1151:
1152: CHECK(RE("h.*o").PartialMatch("hello"));
1153: CHECK(RE("h.*o").PartialMatch("othello"));
1154: CHECK(RE("h.*o").PartialMatch("hello!"));
1155: CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1156:
1157: /***** other tests *****/
1158:
1159: RadixTests();
1160: TestReplace();
1161: TestExtract();
1162: TestConsume();
1163: TestFindAndConsume();
1164: TestQuoteMetaAll();
1165: TestMatchNumberPeculiarity();
1166:
1167: // Check the pattern() accessor
1168: {
1169: const string kPattern = "http://([^/]+)/.*";
1170: const RE re(kPattern);
1171: CHECK_EQ(kPattern, re.pattern());
1172: }
1173:
1174: // Check RE error field.
1175: {
1176: RE re("foo");
1177: CHECK(re.error().empty()); // Must have no error
1178: }
1179:
1180: #ifdef SUPPORT_UTF8
1181: // Check UTF-8 handling
1182: {
1183: printf("Testing UTF-8 handling\n");
1184:
1185: // Three Japanese characters (nihongo)
1186: const unsigned char utf8_string[] = {
1187: 0xe6, 0x97, 0xa5, // 65e5
1188: 0xe6, 0x9c, 0xac, // 627c
1189: 0xe8, 0xaa, 0x9e, // 8a9e
1190: 0
1191: };
1192: const unsigned char utf8_pattern[] = {
1193: '.',
1194: 0xe6, 0x9c, 0xac, // 627c
1195: '.',
1196: 0
1197: };
1198:
1199: // Both should match in either mode, bytes or UTF-8
1200: RE re_test1(".........");
1201: CHECK(re_test1.FullMatch(utf8_string));
1202: RE re_test2("...", pcrecpp::UTF8());
1203: CHECK(re_test2.FullMatch(utf8_string));
1204:
1205: // Check that '.' matches one byte or UTF-8 character
1206: // according to the mode.
1207: string ss;
1208: RE re_test3("(.)");
1209: CHECK(re_test3.PartialMatch(utf8_string, &ss));
1210: CHECK_EQ(ss, string("\xe6"));
1211: RE re_test4("(.)", pcrecpp::UTF8());
1212: CHECK(re_test4.PartialMatch(utf8_string, &ss));
1213: CHECK_EQ(ss, string("\xe6\x97\xa5"));
1214:
1215: // Check that string matches itself in either mode
1216: RE re_test5(utf8_string);
1217: CHECK(re_test5.FullMatch(utf8_string));
1218: RE re_test6(utf8_string, pcrecpp::UTF8());
1219: CHECK(re_test6.FullMatch(utf8_string));
1220:
1221: // Check that pattern matches string only in UTF8 mode
1222: RE re_test7(utf8_pattern);
1223: CHECK(!re_test7.FullMatch(utf8_string));
1224: RE re_test8(utf8_pattern, pcrecpp::UTF8());
1225: CHECK(re_test8.FullMatch(utf8_string));
1226: }
1227:
1228: // Check that ungreedy, UTF8 regular expressions don't match when they
1229: // oughtn't -- see bug 82246.
1230: {
1231: // This code always worked.
1232: const char* pattern = "\\w+X";
1233: const string target = "a aX";
1234: RE match_sentence(pattern);
1235: RE match_sentence_re(pattern, pcrecpp::UTF8());
1236:
1237: CHECK(!match_sentence.FullMatch(target));
1238: CHECK(!match_sentence_re.FullMatch(target));
1239: }
1240:
1241: {
1242: const char* pattern = "(?U)\\w+X";
1243: const string target = "a aX";
1244: RE match_sentence(pattern);
1245: RE match_sentence_re(pattern, pcrecpp::UTF8());
1246:
1247: CHECK(!match_sentence.FullMatch(target));
1248: CHECK(!match_sentence_re.FullMatch(target));
1249: }
1250: #endif /* def SUPPORT_UTF8 */
1251:
1252: printf("Testing error reporting\n");
1253:
1254: { RE re("a\\1"); CHECK(!re.error().empty()); }
1255: {
1256: RE re("a[x");
1257: CHECK(!re.error().empty());
1258: }
1259: {
1260: RE re("a[z-a]");
1261: CHECK(!re.error().empty());
1262: }
1263: {
1264: RE re("a[[:foobar:]]");
1265: CHECK(!re.error().empty());
1266: }
1267: {
1268: RE re("a(b");
1269: CHECK(!re.error().empty());
1270: }
1271: {
1272: RE re("a\\");
1273: CHECK(!re.error().empty());
1274: }
1275:
1276: // Test that recursion is stopped
1277: TestRecursion();
1278:
1279: // Test Options
1280: if (getenv("VERBOSE_TEST") != NULL)
1281: VERBOSE_TEST = true;
1282: TestOptions();
1283:
1284: // Test the constructors
1285: TestConstructors();
1286:
1287: // Done
1288: printf("OK\n");
1289:
1290: return 0;
1291: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>