embedaddon/pcre/pcrecpp_unittest.cc - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / pcrecpp_unittest.cc
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 08:25:55 2013 UTC (10 years, 10 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_34, v8_33, v8_31, v8_30, v8_21, HEAD

8.33

1: // -*- coding: utf-8 -*- 2: // 3: // Copyright (c) 2005 - 2010, Google Inc. 4: // All rights reserved. 5: // 6: // Redistribution and use in source and binary forms, with or without 7: // modification, are permitted provided that the following conditions are 8: // met: 9: // 10: // * Redistributions of source code must retain the above copyright 11: // notice, this list of conditions and the following disclaimer. 12: // * Redistributions in binary form must reproduce the above 13: // copyright notice, this list of conditions and the following disclaimer 14: // in the documentation and/or other materials provided with the 15: // distribution. 16: // * Neither the name of Google Inc. nor the names of its 17: // contributors may be used to endorse or promote products derived from 18: // this software without specific prior written permission. 19: // 20: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31: // 32: // Author: Sanjay Ghemawat 33: // 34: // TODO: Test extractions for PartialMatch/Consume 35: 36: #ifdef HAVE_CONFIG_H 37: #include "config.h" 38: #endif 39: 40: #include <stdio.h> 41: #include <string.h> /* for memset and strcmp */ 42: #include <cassert> 43: #include <vector> 44: #include "pcrecpp.h" 45: 46: using pcrecpp::StringPiece; 47: using pcrecpp::RE; 48: using pcrecpp::RE_Options; 49: using pcrecpp::Hex; 50: using pcrecpp::Octal; 51: using pcrecpp::CRadix; 52: 53: static bool VERBOSE_TEST = false; 54: 55: // CHECK dies with a fatal error if condition is not true. It is *not* 56: // controlled by NDEBUG, so the check will be executed regardless of 57: // compilation mode. Therefore, it is safe to do things like: 58: // CHECK_EQ(fp->Write(x), 4) 59: #define CHECK(condition) do { \ 60: if (!(condition)) { \ 61: fprintf(stderr, "%s:%d: Check failed: %s\n", \ 62: __FILE__, __LINE__, #condition); \ 63: exit(1); \ 64: } \ 65: } while (0) 66: 67: #define CHECK_EQ(a, b) CHECK(a == b) 68: 69: static void Timing1(int num_iters) { 70: // Same pattern lots of times 71: RE pattern("ruby:\\d+"); 72: StringPiece p("ruby:1234"); 73: for (int j = num_iters; j > 0; j--) { 74: CHECK(pattern.FullMatch(p)); 75: } 76: } 77: 78: static void Timing2(int num_iters) { 79: // Same pattern lots of times 80: RE pattern("ruby:(\\d+)"); 81: int i; 82: for (int j = num_iters; j > 0; j--) { 83: CHECK(pattern.FullMatch("ruby:1234", &i)); 84: CHECK_EQ(i, 1234); 85: } 86: } 87: 88: static void Timing3(int num_iters) { 89: string text_string; 90: for (int j = num_iters; j > 0; j--) { 91: text_string += "this is another line\n"; 92: } 93: 94: RE line_matcher(".*\n"); 95: string line; 96: StringPiece text(text_string); 97: int counter = 0; 98: while (line_matcher.Consume(&text)) { 99: counter++; 100: } 101: printf("Matched %d lines\n", counter); 102: } 103: 104: #if 0 // uncomment this if you have a way of defining VirtualProcessSize() 105: 106: static void LeakTest() { 107: // Check for memory leaks 108: unsigned long long initial_size = 0; 109: for (int i = 0; i < 100000; i++) { 110: if (i == 50000) { 111: initial_size = VirtualProcessSize(); 112: printf("Size after 50000: %llu\n", initial_size); 113: } 114: char buf[100]; // definitely big enough 115: sprintf(buf, "pat%09d", i); 116: RE newre(buf); 117: } 118: uint64 final_size = VirtualProcessSize(); 119: printf("Size after 100000: %llu\n", final_size); 120: const double growth = double(final_size - initial_size) / final_size; 121: printf("Growth: %0.2f%%", growth * 100); 122: CHECK(growth < 0.02); // Allow < 2% growth 123: } 124: 125: #endif 126: 127: static void RadixTests() { 128: printf("Testing hex\n"); 129: 130: #define CHECK_HEX(type, value) \ 131: do { \ 132: type v; \ 133: CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \ 134: CHECK_EQ(v, 0x ## value); \ 135: CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \ 136: CHECK_EQ(v, 0x ## value); \ 137: } while(0) 138: 139: CHECK_HEX(short, 2bad); 140: CHECK_HEX(unsigned short, 2badU); 141: CHECK_HEX(int, dead); 142: CHECK_HEX(unsigned int, deadU); 143: CHECK_HEX(long, 7eadbeefL); 144: CHECK_HEX(unsigned long, deadbeefUL); 145: #ifdef HAVE_LONG_LONG 146: CHECK_HEX(long long, 12345678deadbeefLL); 147: #endif 148: #ifdef HAVE_UNSIGNED_LONG_LONG 149: CHECK_HEX(unsigned long long, cafebabedeadbeefULL); 150: #endif 151: 152: #undef CHECK_HEX 153: 154: printf("Testing octal\n"); 155: 156: #define CHECK_OCTAL(type, value) \ 157: do { \ 158: type v; \ 159: CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \ 160: CHECK_EQ(v, 0 ## value); \ 161: CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \ 162: CHECK_EQ(v, 0 ## value); \ 163: } while(0) 164: 165: CHECK_OCTAL(short, 77777); 166: CHECK_OCTAL(unsigned short, 177777U); 167: CHECK_OCTAL(int, 17777777777); 168: CHECK_OCTAL(unsigned int, 37777777777U); 169: CHECK_OCTAL(long, 17777777777L); 170: CHECK_OCTAL(unsigned long, 37777777777UL); 171: #ifdef HAVE_LONG_LONG 172: CHECK_OCTAL(long long, 777777777777777777777LL); 173: #endif 174: #ifdef HAVE_UNSIGNED_LONG_LONG 175: CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); 176: #endif 177: 178: #undef CHECK_OCTAL 179: 180: printf("Testing decimal\n"); 181: 182: #define CHECK_DECIMAL(type, value) \ 183: do { \ 184: type v; \ 185: CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \ 186: CHECK_EQ(v, value); \ 187: CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \ 188: CHECK_EQ(v, value); \ 189: } while(0) 190: 191: CHECK_DECIMAL(short, -1); 192: CHECK_DECIMAL(unsigned short, 9999); 193: CHECK_DECIMAL(int, -1000); 194: CHECK_DECIMAL(unsigned int, 12345U); 195: CHECK_DECIMAL(long, -10000000L); 196: CHECK_DECIMAL(unsigned long, 3083324652U); 197: #ifdef HAVE_LONG_LONG 198: CHECK_DECIMAL(long long, -100000000000000LL); 199: #endif 200: #ifdef HAVE_UNSIGNED_LONG_LONG 201: CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); 202: #endif 203: 204: #undef CHECK_DECIMAL 205: 206: } 207: 208: static void TestReplace() { 209: printf("Testing Replace\n"); 210: 211: struct ReplaceTest { 212: const char *regexp; 213: const char *rewrite; 214: const char *original; 215: const char *single; 216: const char *global; 217: int global_count; // the expected return value from ReplaceAll 218: }; 219: static const ReplaceTest tests[] = { 220: { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", 221: "\\2\\1ay", 222: "the quick brown fox jumps over the lazy dogs.", 223: "ethay quick brown fox jumps over the lazy dogs.", 224: "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", 225: 9 }, 226: { "\\w+", 227: "\\0-NOSPAM", 228: "paul.haahr@google.com", 229: "paul-NOSPAM.haahr@google.com", 230: "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM", 231: 4 }, 232: { "^", 233: "(START)", 234: "foo", 235: "(START)foo", 236: "(START)foo", 237: 1 }, 238: { "^", 239: "(START)", 240: "", 241: "(START)", 242: "(START)", 243: 1 }, 244: { "$", 245: "(END)", 246: "", 247: "(END)", 248: "(END)", 249: 1 }, 250: { "b", 251: "bb", 252: "ababababab", 253: "abbabababab", 254: "abbabbabbabbabb", 255: 5 }, 256: { "b", 257: "bb", 258: "bbbbbb", 259: "bbbbbbb", 260: "bbbbbbbbbbbb", 261: 6 }, 262: { "b+", 263: "bb", 264: "bbbbbb", 265: "bb", 266: "bb", 267: 1 }, 268: { "b*", 269: "bb", 270: "bbbbbb", 271: "bb", 272: "bbbb", 273: 2 }, 274: { "b*", 275: "bb", 276: "aaaaa", 277: "bbaaaaa", 278: "bbabbabbabbabbabb", 279: 6 }, 280: { "b*", 281: "bb", 282: "aa\naa\n", 283: "bbaa\naa\n", 284: "bbabbabb\nbbabbabb\nbb", 285: 7 }, 286: { "b*", 287: "bb", 288: "aa\raa\r", 289: "bbaa\raa\r", 290: "bbabbabb\rbbabbabb\rbb", 291: 7 }, 292: { "b*", 293: "bb", 294: "aa\r\naa\r\n", 295: "bbaa\r\naa\r\n", 296: "bbabbabb\r\nbbabbabb\r\nbb", 297: 7 }, 298: // Check empty-string matching (it's tricky!) 299: { "aa|b*", 300: "@", 301: "aa", 302: "@", 303: "@@", 304: 2 }, 305: { "b*|aa", 306: "@", 307: "aa", 308: "@aa", 309: "@@@", 310: 3 }, 311: #ifdef SUPPORT_UTF8 312: { "b*", 313: "bb", 314: "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 315: "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", 316: "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", 317: 5 }, 318: { "b*", 319: "bb", 320: "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 321: "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", 322: ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" 323: "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), 324: 9 }, 325: #endif 326: { "", NULL, NULL, NULL, NULL, 0 } 327: }; 328: 329: #ifdef SUPPORT_UTF8 330: const bool support_utf8 = true; 331: #else 332: const bool support_utf8 = false; 333: #endif 334: 335: for (const ReplaceTest *t = tests; t->original != NULL; ++t) { 336: RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); 337: assert(re.error().empty()); 338: string one(t->original); 339: CHECK(re.Replace(t->rewrite, &one)); 340: CHECK_EQ(one, t->single); 341: string all(t->original); 342: const int replace_count = re.GlobalReplace(t->rewrite, &all); 343: CHECK_EQ(all, t->global); 344: CHECK_EQ(replace_count, t->global_count); 345: } 346: 347: // One final test: test \r\n replacement when we're not in CRLF mode 348: { 349: RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); 350: assert(re.error().empty()); 351: string all("aa\r\naa\r\n"); 352: CHECK_EQ(re.GlobalReplace("bb", &all), 9); 353: CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 354: } 355: { 356: RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); 357: assert(re.error().empty()); 358: string all("aa\r\naa\r\n"); 359: CHECK_EQ(re.GlobalReplace("bb", &all), 9); 360: CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 361: } 362: // TODO: test what happens when no PCRE_NEWLINE_* flag is set. 363: // Alas, the answer depends on how pcre was compiled. 364: } 365: 366: static void TestExtract() { 367: printf("Testing Extract\n"); 368: 369: string s; 370: 371: CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s)); 372: CHECK_EQ(s, "kremvax!boris"); 373: 374: // check the RE interface as well 375: CHECK(RE(".*").Extract("'\\0'", "foo", &s)); 376: CHECK_EQ(s, "'foo'"); 377: CHECK(!RE("bar").Extract("'\\0'", "baz", &s)); 378: CHECK_EQ(s, "'foo'"); 379: } 380: 381: static void TestConsume() { 382: printf("Testing Consume\n"); 383: 384: string word; 385: 386: string s(" aaa b!@#$@#$cccc"); 387: StringPiece input(s); 388: 389: RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace 390: CHECK(r.Consume(&input, &word)); 391: CHECK_EQ(word, "aaa"); 392: CHECK(r.Consume(&input, &word)); 393: CHECK_EQ(word, "b"); 394: CHECK(! r.Consume(&input, &word)); 395: } 396: 397: static void TestFindAndConsume() { 398: printf("Testing FindAndConsume\n"); 399: 400: string word; 401: 402: string s(" aaa b!@#$@#$cccc"); 403: StringPiece input(s); 404: 405: RE r("(\\w+)"); // matches a word 406: CHECK(r.FindAndConsume(&input, &word)); 407: CHECK_EQ(word, "aaa"); 408: CHECK(r.FindAndConsume(&input, &word)); 409: CHECK_EQ(word, "b"); 410: CHECK(r.FindAndConsume(&input, &word)); 411: CHECK_EQ(word, "cccc"); 412: CHECK(! r.FindAndConsume(&input, &word)); 413: } 414: 415: static void TestMatchNumberPeculiarity() { 416: printf("Testing match-number peculiarity\n"); 417: 418: string word1; 419: string word2; 420: string word3; 421: 422: RE r("(foo)|(bar)|(baz)"); 423: CHECK(r.PartialMatch("foo", &word1, &word2, &word3)); 424: CHECK_EQ(word1, "foo"); 425: CHECK_EQ(word2, ""); 426: CHECK_EQ(word3, ""); 427: CHECK(r.PartialMatch("bar", &word1, &word2, &word3)); 428: CHECK_EQ(word1, ""); 429: CHECK_EQ(word2, "bar"); 430: CHECK_EQ(word3, ""); 431: CHECK(r.PartialMatch("baz", &word1, &word2, &word3)); 432: CHECK_EQ(word1, ""); 433: CHECK_EQ(word2, ""); 434: CHECK_EQ(word3, "baz"); 435: CHECK(!r.PartialMatch("f", &word1, &word2, &word3)); 436: 437: string a; 438: CHECK(RE("(foo)|hello").FullMatch("hello", &a)); 439: CHECK_EQ(a, ""); 440: } 441: 442: static void TestRecursion() { 443: printf("Testing recursion\n"); 444: 445: // Get one string that passes (sometimes), one that never does. 446: string text_good("abcdefghijk"); 447: string text_bad("acdefghijkl"); 448: 449: // According to pcretest, matching text_good against (\w+)*b 450: // requires match_limit of at least 8192, and match_recursion_limit 451: // of at least 37. 452: 453: RE_Options options_ml; 454: options_ml.set_match_limit(8192); 455: RE re("(\\w+)*b", options_ml); 456: CHECK(re.PartialMatch(text_good) == true); 457: CHECK(re.PartialMatch(text_bad) == false); 458: CHECK(re.FullMatch(text_good) == false); 459: CHECK(re.FullMatch(text_bad) == false); 460: 461: options_ml.set_match_limit(1024); 462: RE re2("(\\w+)*b", options_ml); 463: CHECK(re2.PartialMatch(text_good) == false); // because of match_limit 464: CHECK(re2.PartialMatch(text_bad) == false); 465: CHECK(re2.FullMatch(text_good) == false); 466: CHECK(re2.FullMatch(text_bad) == false); 467: 468: RE_Options options_mlr; 469: options_mlr.set_match_limit_recursion(50); 470: RE re3("(\\w+)*b", options_mlr); 471: CHECK(re3.PartialMatch(text_good) == true); 472: CHECK(re3.PartialMatch(text_bad) == false); 473: CHECK(re3.FullMatch(text_good) == false); 474: CHECK(re3.FullMatch(text_bad) == false); 475: 476: options_mlr.set_match_limit_recursion(10); 477: RE re4("(\\w+)*b", options_mlr); 478: CHECK(re4.PartialMatch(text_good) == false); 479: CHECK(re4.PartialMatch(text_bad) == false); 480: CHECK(re4.FullMatch(text_good) == false); 481: CHECK(re4.FullMatch(text_bad) == false); 482: } 483: 484: // A meta-quoted string, interpreted as a pattern, should always match 485: // the original unquoted string. 486: static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { 487: string quoted = RE::QuoteMeta(unquoted); 488: RE re(quoted, options); 489: CHECK(re.FullMatch(unquoted)); 490: } 491: 492: // A string containing meaningful regexp characters, which is then meta- 493: // quoted, should not generally match a string the unquoted string does. 494: static void NegativeTestQuoteMeta(string unquoted, string should_not_match, 495: RE_Options options = RE_Options()) { 496: string quoted = RE::QuoteMeta(unquoted); 497: RE re(quoted, options); 498: CHECK(!re.FullMatch(should_not_match)); 499: } 500: 501: // Tests that quoted meta characters match their original strings, 502: // and that a few things that shouldn't match indeed do not. 503: static void TestQuotaMetaSimple() { 504: TestQuoteMeta("foo"); 505: TestQuoteMeta("foo.bar"); 506: TestQuoteMeta("foo\\.bar"); 507: TestQuoteMeta("[1-9]"); 508: TestQuoteMeta("1.5-2.0?"); 509: TestQuoteMeta("\\d"); 510: TestQuoteMeta("Who doesn't like ice cream?"); 511: TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); 512: TestQuoteMeta("((?!)xxx).*yyy"); 513: TestQuoteMeta("(["); 514: TestQuoteMeta(string("foo\0bar", 7)); 515: } 516: 517: static void TestQuoteMetaSimpleNegative() { 518: NegativeTestQuoteMeta("foo", "bar"); 519: NegativeTestQuoteMeta("...", "bar"); 520: NegativeTestQuoteMeta("\\.", "."); 521: NegativeTestQuoteMeta("\\.", ".."); 522: NegativeTestQuoteMeta("(a)", "a"); 523: NegativeTestQuoteMeta("(a|b)", "a"); 524: NegativeTestQuoteMeta("(a|b)", "(a)"); 525: NegativeTestQuoteMeta("(a|b)", "a|b"); 526: NegativeTestQuoteMeta("[0-9]", "0"); 527: NegativeTestQuoteMeta("[0-9]", "0-9"); 528: NegativeTestQuoteMeta("[0-9]", "[9]"); 529: NegativeTestQuoteMeta("((?!)xxx)", "xxx"); 530: } 531: 532: static void TestQuoteMetaLatin1() { 533: TestQuoteMeta("3\xb2 = 9"); 534: } 535: 536: static void TestQuoteMetaUtf8() { 537: #ifdef SUPPORT_UTF8 538: TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); 539: TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 540: TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) 541: TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character 542: TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) 543: TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) 544: TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work 545: NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) 546: "27\\\xc2\\\xb0", 547: pcrecpp::UTF8()); 548: #endif 549: } 550: 551: static void TestQuoteMetaAll() { 552: printf("Testing QuoteMeta\n"); 553: TestQuotaMetaSimple(); 554: TestQuoteMetaSimpleNegative(); 555: TestQuoteMetaLatin1(); 556: TestQuoteMetaUtf8(); 557: } 558: 559: // 560: // Options tests contributed by 561: // Giuseppe Maxia, CTO, Stardata s.r.l. 562: // July 2005 563: // 564: static void GetOneOptionResult( 565: const char *option_name, 566: const char *regex, 567: const char *str, 568: RE_Options options, 569: bool full, 570: string expected) { 571: 572: printf("Testing Option <%s>\n", option_name); 573: if(VERBOSE_TEST) 574: printf("/%s/ finds \"%s\" within \"%s\" \n", 575: regex, 576: expected.c_str(), 577: str); 578: string captured(""); 579: if (full) 580: RE(regex,options).FullMatch(str, &captured); 581: else 582: RE(regex,options).PartialMatch(str, &captured); 583: CHECK_EQ(captured, expected); 584: } 585: 586: static void TestOneOption( 587: const char *option_name, 588: const char *regex, 589: const char *str, 590: RE_Options options, 591: bool full, 592: bool assertive = true) { 593: 594: printf("Testing Option <%s>\n", option_name); 595: if (VERBOSE_TEST) 596: printf("'%s' %s /%s/ \n", 597: str, 598: (assertive? "matches" : "doesn't match"), 599: regex); 600: if (assertive) { 601: if (full) 602: CHECK(RE(regex,options).FullMatch(str)); 603: else 604: CHECK(RE(regex,options).PartialMatch(str)); 605: } else { 606: if (full) 607: CHECK(!RE(regex,options).FullMatch(str)); 608: else 609: CHECK(!RE(regex,options).PartialMatch(str)); 610: } 611: } 612: 613: static void Test_CASELESS() { 614: RE_Options options; 615: RE_Options options2; 616: 617: options.set_caseless(true); 618: TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); 619: TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); 620: TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); 621: 622: TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); 623: TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); 624: options.set_caseless(false); 625: TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); 626: } 627: 628: static void Test_MULTILINE() { 629: RE_Options options; 630: RE_Options options2; 631: const char *str = "HELLO\n" "cruel\n" "world\n"; 632: 633: options.set_multiline(true); 634: TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); 635: TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); 636: TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); 637: options.set_multiline(false); 638: TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); 639: } 640: 641: static void Test_DOTALL() { 642: RE_Options options; 643: RE_Options options2; 644: const char *str = "HELLO\n" "cruel\n" "world"; 645: 646: options.set_dotall(true); 647: TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); 648: TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); 649: TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); 650: options.set_dotall(false); 651: TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); 652: } 653: 654: static void Test_DOLLAR_ENDONLY() { 655: RE_Options options; 656: RE_Options options2; 657: const char *str = "HELLO world\n"; 658: 659: TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); 660: options.set_dollar_endonly(true); 661: TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); 662: TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); 663: } 664: 665: static void Test_EXTRA() { 666: RE_Options options; 667: const char *str = "HELLO"; 668: 669: options.set_extra(true); 670: TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); 671: TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); 672: options.set_extra(false); 673: TestOneOption("no EXTRA", "\\HELL\\O", str, options, true ); 674: } 675: 676: static void Test_EXTENDED() { 677: RE_Options options; 678: RE_Options options2; 679: const char *str = "HELLO world"; 680: 681: options.set_extended(true); 682: TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); 683: TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); 684: TestOneOption("EXTENDED (class)", 685: "^ HE L{2} O " 686: "\\s+ " 687: "\\w+ $ ", 688: str, 689: options, 690: false); 691: 692: TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); 693: TestOneOption("EXTENDED (function)", 694: "^ HE L{2} O " 695: "\\s+ " 696: "\\w+ $ ", 697: str, 698: pcrecpp::EXTENDED(), 699: false); 700: 701: options.set_extended(false); 702: TestOneOption("no EXTENDED", "HELLO world", str, options, false); 703: } 704: 705: static void Test_NO_AUTO_CAPTURE() { 706: RE_Options options; 707: const char *str = "HELLO world"; 708: string captured; 709: 710: printf("Testing Option <no NO_AUTO_CAPTURE>\n"); 711: if (VERBOSE_TEST) 712: printf("parentheses capture text\n"); 713: RE re("(world|universe)$", options); 714: CHECK(re.Extract("\\1", str , &captured)); 715: CHECK_EQ(captured, "world"); 716: options.set_no_auto_capture(true); 717: printf("testing Option <NO_AUTO_CAPTURE>\n"); 718: if (VERBOSE_TEST) 719: printf("parentheses do not capture text\n"); 720: re.Extract("\\1",str, &captured ); 721: CHECK_EQ(captured, "world"); 722: } 723: 724: static void Test_UNGREEDY() { 725: RE_Options options; 726: const char *str = "HELLO, 'this' is the 'world'"; 727: 728: options.set_ungreedy(true); 729: GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); 730: GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); 731: GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); 732: 733: options.set_ungreedy(false); 734: GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); 735: GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); 736: } 737: 738: static void Test_all_options() { 739: const char *str = "HELLO\n" "cruel\n" "world"; 740: RE_Options options; 741: options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); 742: 743: TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); 744: options.set_all_options(0); 745: TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); 746: options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); 747: 748: TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); 749: TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", 750: " ^ c r u e l $ ", 751: str, 752: RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), 753: false); 754: 755: TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", 756: " ^ c r u e l $ ", 757: str, 758: RE_Options() 759: .set_multiline(true) 760: .set_extended(true), 761: false); 762: 763: options.set_all_options(0); 764: TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); 765: 766: } 767: 768: static void TestOptions() { 769: printf("Testing Options\n"); 770: Test_CASELESS(); 771: Test_MULTILINE(); 772: Test_DOTALL(); 773: Test_DOLLAR_ENDONLY(); 774: Test_EXTENDED(); 775: Test_NO_AUTO_CAPTURE(); 776: Test_UNGREEDY(); 777: Test_EXTRA(); 778: Test_all_options(); 779: } 780: 781: static void TestConstructors() { 782: printf("Testing constructors\n"); 783: 784: RE_Options options; 785: options.set_dotall(true); 786: const char *str = "HELLO\n" "cruel\n" "world"; 787: 788: RE orig("HELLO.*world", options); 789: CHECK(orig.FullMatch(str)); 790: 791: RE copy1(orig); 792: CHECK(copy1.FullMatch(str)); 793: 794: RE copy2("not a match"); 795: CHECK(!copy2.FullMatch(str)); 796: copy2 = copy1; 797: CHECK(copy2.FullMatch(str)); 798: copy2 = orig; 799: CHECK(copy2.FullMatch(str)); 800: 801: // Make sure when we assign to ourselves, nothing bad happens 802: orig = orig; 803: copy1 = copy1; 804: copy2 = copy2; 805: CHECK(orig.FullMatch(str)); 806: CHECK(copy1.FullMatch(str)); 807: CHECK(copy2.FullMatch(str)); 808: } 809: 810: int main(int argc, char** argv) { 811: // Treat any flag as --help 812: if (argc > 1 && argv[1][0] == '-') { 813: printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" 814: " If 'timingX ###' is specified, run the given timing test\n" 815: " with the given number of iterations, rather than running\n" 816: " the default corectness test.\n", argv[0]); 817: return 0; 818: } 819: 820: if (argc > 1) { 821: if ( argc == 2 || atoi(argv[2]) == 0) { 822: printf("timing mode needs a num-iters argument\n"); 823: return 1; 824: } 825: if (!strcmp(argv[1], "timing1")) 826: Timing1(atoi(argv[2])); 827: else if (!strcmp(argv[1], "timing2")) 828: Timing2(atoi(argv[2])); 829: else if (!strcmp(argv[1], "timing3")) 830: Timing3(atoi(argv[2])); 831: else 832: printf("Unknown argument '%s'\n", argv[1]); 833: return 0; 834: } 835: 836: printf("PCRE C++ wrapper tests\n"); 837: printf("Testing FullMatch\n"); 838: 839: int i; 840: string s; 841: 842: /***** FullMatch with no args *****/ 843: 844: CHECK(RE("h.*o").FullMatch("hello")); 845: CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front 846: CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end 847: CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op 848: CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op 849: CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops 850: 851: /***** FullMatch with args *****/ 852: 853: // Zero-arg 854: CHECK(RE("\\d+").FullMatch("1001")); 855: 856: // Single-arg 857: CHECK(RE("(\\d+)").FullMatch("1001", &i)); 858: CHECK_EQ(i, 1001); 859: CHECK(RE("(-?\\d+)").FullMatch("-123", &i)); 860: CHECK_EQ(i, -123); 861: CHECK(!RE("()\\d+").FullMatch("10", &i)); 862: CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890", 863: &i)); 864: 865: // Digits surrounding integer-arg 866: CHECK(RE("1(\\d*)4").FullMatch("1234", &i)); 867: CHECK_EQ(i, 23); 868: CHECK(RE("(\\d)\\d+").FullMatch("1234", &i)); 869: CHECK_EQ(i, 1); 870: CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i)); 871: CHECK_EQ(i, -1); 872: CHECK(RE("(\\d)").PartialMatch("1234", &i)); 873: CHECK_EQ(i, 1); 874: CHECK(RE("(-\\d)").PartialMatch("-1234", &i)); 875: CHECK_EQ(i, -1); 876: 877: // String-arg 878: CHECK(RE("h(.*)o").FullMatch("hello", &s)); 879: CHECK_EQ(s, string("ell")); 880: 881: // StringPiece-arg 882: StringPiece sp; 883: CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i)); 884: CHECK_EQ(sp.size(), 4); 885: CHECK(memcmp(sp.data(), "ruby", 4) == 0); 886: CHECK_EQ(i, 1234); 887: 888: // Multi-arg 889: CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i)); 890: CHECK_EQ(s, string("ruby")); 891: CHECK_EQ(i, 1234); 892: 893: // Ignore non-void* NULL arg 894: CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); 895: CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); 896: CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); 897: CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); 898: #ifdef HAVE_LONG_LONG 899: CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); 900: #endif 901: CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); 902: CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); 903: 904: // Fail on non-void* NULL arg if the match doesn't parse for the given type. 905: CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); 906: CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); 907: CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); 908: CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); 909: CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); 910: 911: // Ignored arg 912: CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); 913: CHECK_EQ(s, string("ruby")); 914: CHECK_EQ(i, 1234); 915: 916: // Type tests 917: { 918: char c; 919: CHECK(RE("(H)ello").FullMatch("Hello", &c)); 920: CHECK_EQ(c, 'H'); 921: } 922: { 923: unsigned char c; 924: CHECK(RE("(H)ello").FullMatch("Hello", &c)); 925: CHECK_EQ(c, static_cast<unsigned char>('H')); 926: } 927: { 928: short v; 929: CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 930: CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 931: CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 932: CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768); 933: CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v)); 934: CHECK(!RE("(-?\\d+)").FullMatch("32768", &v)); 935: } 936: { 937: unsigned short v; 938: CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 939: CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 940: CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535); 941: CHECK(!RE("(\\d+)").FullMatch("65536", &v)); 942: } 943: { 944: int v; 945: static const int max_value = 0x7fffffff; 946: static const int min_value = -max_value - 1; 947: CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 948: CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 949: CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value); 950: CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value); 951: CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v)); 952: CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v)); 953: } 954: { 955: unsigned int v; 956: static const unsigned int max_value = 0xfffffffful; 957: CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 958: CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value); 959: CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); 960: } 961: #ifdef HAVE_LONG_LONG 962: # if defined(__MINGW__) || defined(__MINGW32__) 963: # define LLD "%I64d" 964: # define LLU "%I64u" 965: # else 966: # define LLD "%lld" 967: # define LLU "%llu" 968: # endif 969: { 970: long long v; 971: static const long long max_value = 0x7fffffffffffffffLL; 972: static const long long min_value = -max_value - 1; 973: char buf[32]; // definitely big enough for a long long 974: 975: CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 976: CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); 977: 978: sprintf(buf, LLD, max_value); 979: CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 980: 981: sprintf(buf, LLD, min_value); 982: CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); 983: 984: sprintf(buf, LLD, max_value); 985: assert(buf[strlen(buf)-1] != '9'); 986: buf[strlen(buf)-1]++; 987: CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 988: 989: sprintf(buf, LLD, min_value); 990: assert(buf[strlen(buf)-1] != '9'); 991: buf[strlen(buf)-1]++; 992: CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 993: } 994: #endif 995: #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG 996: { 997: unsigned long long v; 998: long long v2; 999: static const unsigned long long max_value = 0xffffffffffffffffULL; 1000: char buf[32]; // definitely big enough for a unsigned long long 1001: 1002: CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); 1003: CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); 1004: 1005: sprintf(buf, LLU, max_value); 1006: CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 1007: 1008: assert(buf[strlen(buf)-1] != '9'); 1009: buf[strlen(buf)-1]++; 1010: CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 1011: } 1012: #endif 1013: { 1014: float v; 1015: CHECK(RE("(.*)").FullMatch("100", &v)); 1016: CHECK(RE("(.*)").FullMatch("-100.", &v)); 1017: CHECK(RE("(.*)").FullMatch("1e23", &v)); 1018: } 1019: { 1020: double v; 1021: CHECK(RE("(.*)").FullMatch("100", &v)); 1022: CHECK(RE("(.*)").FullMatch("-100.", &v)); 1023: CHECK(RE("(.*)").FullMatch("1e23", &v)); 1024: } 1025: 1026: // Check that matching is fully anchored 1027: CHECK(!RE("(\\d+)").FullMatch("x1001", &i)); 1028: CHECK(!RE("(\\d+)").FullMatch("1001x", &i)); 1029: CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001); 1030: CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001); 1031: 1032: // Braces 1033: CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd")); 1034: CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde")); 1035: CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc")); 1036: 1037: // Complicated RE 1038: CHECK(RE("foo|bar|[A-Z]").FullMatch("foo")); 1039: CHECK(RE("foo|bar|[A-Z]").FullMatch("bar")); 1040: CHECK(RE("foo|bar|[A-Z]").FullMatch("X")); 1041: CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY")); 1042: 1043: // Check full-match handling (needs '$' tacked on internally) 1044: CHECK(RE("fo|foo").FullMatch("fo")); 1045: CHECK(RE("fo|foo").FullMatch("foo")); 1046: CHECK(RE("fo|foo$").FullMatch("fo")); 1047: CHECK(RE("fo|foo$").FullMatch("foo")); 1048: CHECK(RE("foo$").FullMatch("foo")); 1049: CHECK(!RE("foo\\$").FullMatch("foo$bar")); 1050: CHECK(!RE("fo|bar").FullMatch("fox")); 1051: 1052: // Uncomment the following if we change the handling of '$' to 1053: // prevent it from matching a trailing newline 1054: if (false) { 1055: // Check that we don't get bitten by pcre's special handling of a 1056: // '\n' at the end of the string matching '$' 1057: CHECK(!RE("foo$").PartialMatch("foo\n")); 1058: } 1059: 1060: // Number of args 1061: int a[16]; 1062: CHECK(RE("").FullMatch("")); 1063: 1064: memset(a, 0, sizeof(0)); 1065: CHECK(RE("(\\d){1}").FullMatch("1", 1066: &a[0])); 1067: CHECK_EQ(a[0], 1); 1068: 1069: memset(a, 0, sizeof(0)); 1070: CHECK(RE("(\\d)(\\d)").FullMatch("12", 1071: &a[0], &a[1])); 1072: CHECK_EQ(a[0], 1); 1073: CHECK_EQ(a[1], 2); 1074: 1075: memset(a, 0, sizeof(0)); 1076: CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123", 1077: &a[0], &a[1], &a[2])); 1078: CHECK_EQ(a[0], 1); 1079: CHECK_EQ(a[1], 2); 1080: CHECK_EQ(a[2], 3); 1081: 1082: memset(a, 0, sizeof(0)); 1083: CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234", 1084: &a[0], &a[1], &a[2], &a[3])); 1085: CHECK_EQ(a[0], 1); 1086: CHECK_EQ(a[1], 2); 1087: CHECK_EQ(a[2], 3); 1088: CHECK_EQ(a[3], 4); 1089: 1090: memset(a, 0, sizeof(0)); 1091: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345", 1092: &a[0], &a[1], &a[2], 1093: &a[3], &a[4])); 1094: CHECK_EQ(a[0], 1); 1095: CHECK_EQ(a[1], 2); 1096: CHECK_EQ(a[2], 3); 1097: CHECK_EQ(a[3], 4); 1098: CHECK_EQ(a[4], 5); 1099: 1100: memset(a, 0, sizeof(0)); 1101: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456", 1102: &a[0], &a[1], &a[2], 1103: &a[3], &a[4], &a[5])); 1104: CHECK_EQ(a[0], 1); 1105: CHECK_EQ(a[1], 2); 1106: CHECK_EQ(a[2], 3); 1107: CHECK_EQ(a[3], 4); 1108: CHECK_EQ(a[4], 5); 1109: CHECK_EQ(a[5], 6); 1110: 1111: memset(a, 0, sizeof(0)); 1112: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567", 1113: &a[0], &a[1], &a[2], &a[3], 1114: &a[4], &a[5], &a[6])); 1115: CHECK_EQ(a[0], 1); 1116: CHECK_EQ(a[1], 2); 1117: CHECK_EQ(a[2], 3); 1118: CHECK_EQ(a[3], 4); 1119: CHECK_EQ(a[4], 5); 1120: CHECK_EQ(a[5], 6); 1121: CHECK_EQ(a[6], 7); 1122: 1123: memset(a, 0, sizeof(0)); 1124: CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" 1125: "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch( 1126: "1234567890123456", 1127: &a[0], &a[1], &a[2], &a[3], 1128: &a[4], &a[5], &a[6], &a[7], 1129: &a[8], &a[9], &a[10], &a[11], 1130: &a[12], &a[13], &a[14], &a[15])); 1131: CHECK_EQ(a[0], 1); 1132: CHECK_EQ(a[1], 2); 1133: CHECK_EQ(a[2], 3); 1134: CHECK_EQ(a[3], 4); 1135: CHECK_EQ(a[4], 5); 1136: CHECK_EQ(a[5], 6); 1137: CHECK_EQ(a[6], 7); 1138: CHECK_EQ(a[7], 8); 1139: CHECK_EQ(a[8], 9); 1140: CHECK_EQ(a[9], 0); 1141: CHECK_EQ(a[10], 1); 1142: CHECK_EQ(a[11], 2); 1143: CHECK_EQ(a[12], 3); 1144: CHECK_EQ(a[13], 4); 1145: CHECK_EQ(a[14], 5); 1146: CHECK_EQ(a[15], 6); 1147: 1148: /***** PartialMatch *****/ 1149: 1150: printf("Testing PartialMatch\n"); 1151: 1152: CHECK(RE("h.*o").PartialMatch("hello")); 1153: CHECK(RE("h.*o").PartialMatch("othello")); 1154: CHECK(RE("h.*o").PartialMatch("hello!")); 1155: CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); 1156: 1157: /***** other tests *****/ 1158: 1159: RadixTests(); 1160: TestReplace(); 1161: TestExtract(); 1162: TestConsume(); 1163: TestFindAndConsume(); 1164: TestQuoteMetaAll(); 1165: TestMatchNumberPeculiarity(); 1166: 1167: // Check the pattern() accessor 1168: { 1169: const string kPattern = "http://([^/]+)/.*"; 1170: const RE re(kPattern); 1171: CHECK_EQ(kPattern, re.pattern()); 1172: } 1173: 1174: // Check RE error field. 1175: { 1176: RE re("foo"); 1177: CHECK(re.error().empty()); // Must have no error 1178: } 1179: 1180: #ifdef SUPPORT_UTF8 1181: // Check UTF-8 handling 1182: { 1183: printf("Testing UTF-8 handling\n"); 1184: 1185: // Three Japanese characters (nihongo) 1186: const unsigned char utf8_string[] = { 1187: 0xe6, 0x97, 0xa5, // 65e5 1188: 0xe6, 0x9c, 0xac, // 627c 1189: 0xe8, 0xaa, 0x9e, // 8a9e 1190: 0 1191: }; 1192: const unsigned char utf8_pattern[] = { 1193: '.', 1194: 0xe6, 0x9c, 0xac, // 627c 1195: '.', 1196: 0 1197: }; 1198: 1199: // Both should match in either mode, bytes or UTF-8 1200: RE re_test1("........."); 1201: CHECK(re_test1.FullMatch(utf8_string)); 1202: RE re_test2("...", pcrecpp::UTF8()); 1203: CHECK(re_test2.FullMatch(utf8_string)); 1204: 1205: // Check that '.' matches one byte or UTF-8 character 1206: // according to the mode. 1207: string ss; 1208: RE re_test3("(.)"); 1209: CHECK(re_test3.PartialMatch(utf8_string, &ss)); 1210: CHECK_EQ(ss, string("\xe6")); 1211: RE re_test4("(.)", pcrecpp::UTF8()); 1212: CHECK(re_test4.PartialMatch(utf8_string, &ss)); 1213: CHECK_EQ(ss, string("\xe6\x97\xa5")); 1214: 1215: // Check that string matches itself in either mode 1216: RE re_test5(utf8_string); 1217: CHECK(re_test5.FullMatch(utf8_string)); 1218: RE re_test6(utf8_string, pcrecpp::UTF8()); 1219: CHECK(re_test6.FullMatch(utf8_string)); 1220: 1221: // Check that pattern matches string only in UTF8 mode 1222: RE re_test7(utf8_pattern); 1223: CHECK(!re_test7.FullMatch(utf8_string)); 1224: RE re_test8(utf8_pattern, pcrecpp::UTF8()); 1225: CHECK(re_test8.FullMatch(utf8_string)); 1226: } 1227: 1228: // Check that ungreedy, UTF8 regular expressions don't match when they 1229: // oughtn't -- see bug 82246. 1230: { 1231: // This code always worked. 1232: const char* pattern = "\\w+X"; 1233: const string target = "a aX"; 1234: RE match_sentence(pattern); 1235: RE match_sentence_re(pattern, pcrecpp::UTF8()); 1236: 1237: CHECK(!match_sentence.FullMatch(target)); 1238: CHECK(!match_sentence_re.FullMatch(target)); 1239: } 1240: 1241: { 1242: const char* pattern = "(?U)\\w+X"; 1243: const string target = "a aX"; 1244: RE match_sentence(pattern); 1245: RE match_sentence_re(pattern, pcrecpp::UTF8()); 1246: 1247: CHECK(!match_sentence.FullMatch(target)); 1248: CHECK(!match_sentence_re.FullMatch(target)); 1249: } 1250: #endif /* def SUPPORT_UTF8 */ 1251: 1252: printf("Testing error reporting\n"); 1253: 1254: { RE re("a\\1"); CHECK(!re.error().empty()); } 1255: { 1256: RE re("a[x"); 1257: CHECK(!re.error().empty()); 1258: } 1259: { 1260: RE re("a[z-a]"); 1261: CHECK(!re.error().empty()); 1262: } 1263: { 1264: RE re("a[[:foobar:]]"); 1265: CHECK(!re.error().empty()); 1266: } 1267: { 1268: RE re("a(b"); 1269: CHECK(!re.error().empty()); 1270: } 1271: { 1272: RE re("a\\"); 1273: CHECK(!re.error().empty()); 1274: } 1275: 1276: // Test that recursion is stopped 1277: TestRecursion(); 1278: 1279: // Test Options 1280: if (getenv("VERBOSE_TEST") != NULL) 1281: VERBOSE_TEST = true; 1282: TestOptions(); 1283: 1284: // Test the constructors 1285: TestConstructors(); 1286: 1287: // Done 1288: printf("OK\n"); 1289: 1290: return 0; 1291: }