--- embedaddon/pcre/pcre_jit_test.c 2012/02/21 23:50:25 1.1.1.2 +++ embedaddon/pcre/pcre_jit_test.c 2012/10/09 09:19:17 1.1.1.3 @@ -188,7 +188,8 @@ static struct regression_test_case regression_test_cas { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" }, { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" }, { 0, 0, "ab$", "ab" }, - { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" }, + { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" }, + { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" }, { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" }, { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" }, { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" }, @@ -652,10 +653,43 @@ static struct regression_test_case regression_test_cas { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" }, { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" }, + /* Partial matching. */ + { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" }, + { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" }, + { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" }, + { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" }, + { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" }, + { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" }, + { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" }, + { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" }, + + /* (*MARK) verb. */ + { MUA, 0, "a(*MARK:aa)a", "ababaa" }, + { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" }, + { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" }, + { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" }, + { MUA, 0, "(?>a(*:aa))b|ac", "ac" }, + { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" }, + { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" }, + { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" }, + { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, + { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" }, + { MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" }, + { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, + { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" }, + { MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" }, + + /* (*COMMIT) verb. */ + { MUA, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" }, + { MUA, 0, "aa(*COMMIT)b", "xaxaab" }, + { MUA, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" }, + { MUA, 0, "(?=a(*COMMIT)b|ac)ac|(*:m)(a)c", "ac" }, + { MUA, 0, "(?!a(*COMMIT)(*:msg)b)a(c)|cd", "acd" }, + /* Deep recursion. */ { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " }, { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " }, - { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" }, + { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaa b" }, /* Deep recursion: Stack limit reached. */ { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" }, @@ -674,7 +708,7 @@ static const unsigned char *tables(int mode) static unsigned char *tables_copy; const char *errorptr; int erroroffset; - const unsigned char *default_tables; + unsigned char *default_tables; #ifdef SUPPORT_PCRE8 pcre *regex; char null_str[1] = { 0 }; @@ -860,27 +894,33 @@ static int regression_tests(void) { struct regression_test_case *current = regression_test_cases; const char *error; - const char *cpu_info; + char *cpu_info; int i, err_offs; int is_successful, is_ascii_pattern, is_ascii_input; int total = 0; int successful = 0; + int successful_row = 0; int counter = 0; + int study_mode; #ifdef SUPPORT_PCRE8 pcre *re8; pcre_extra *extra8; + pcre_extra dummy_extra8; int ovector8_1[32]; int ovector8_2[32]; int return_value8_1, return_value8_2; + unsigned char *mark8_1, *mark8_2; int utf8 = 0, ucp8 = 0; int disabled_flags8 = 0; #endif #ifdef SUPPORT_PCRE16 pcre16 *re16; pcre16_extra *extra16; + pcre16_extra dummy_extra16; int ovector16_1[32]; int ovector16_2[32]; int return_value16_1, return_value16_2; + PCRE_UCHAR16 *mark16_1, *mark16_2; int utf16 = 0, ucp16 = 0; int disabled_flags16 = 0; int length16; @@ -929,18 +969,24 @@ static int regression_tests(void) is_ascii_input = check_ascii(current->input); } + if (current->flags & PCRE_PARTIAL_SOFT) + study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE; + else if (current->flags & PCRE_PARTIAL_HARD) + study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE; + else + study_mode = PCRE_STUDY_JIT_COMPILE; error = NULL; #ifdef SUPPORT_PCRE8 re8 = NULL; if (!(current->start_offset & F_NO8)) re8 = pcre_compile(current->pattern, - current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8), + current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags8), &error, &err_offs, tables(0)); extra8 = NULL; if (re8) { error = NULL; - extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error); + extra8 = pcre_study(re8, study_mode, &error); if (!extra8) { printf("\n8 bit: Cannot study pattern: %s\n", current->pattern); pcre_free(re8); @@ -952,6 +998,7 @@ static int regression_tests(void) pcre_free(re8); re8 = NULL; } + extra8->flags |= PCRE_EXTRA_MARK; } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8)) printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern); #endif @@ -964,13 +1011,13 @@ static int regression_tests(void) re16 = NULL; if (!(current->start_offset & F_NO16)) re16 = pcre16_compile(regtest_buf, - current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16), + current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags16), &error, &err_offs, tables(0)); extra16 = NULL; if (re16) { error = NULL; - extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error); + extra16 = pcre16_study(re16, study_mode, &error); if (!extra16) { printf("\n16 bit: Cannot study pattern: %s\n", current->pattern); pcre16_free(re16); @@ -982,6 +1029,7 @@ static int regression_tests(void) pcre16_free(re16); re16 = NULL; } + extra16->flags |= PCRE_EXTRA_MARK; } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16)) printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern); #endif @@ -1004,11 +1052,17 @@ static int regression_tests(void) for (i = 0; i < 32; ++i) ovector8_2[i] = -2; if (re8) { + mark8_1 = NULL; + mark8_2 = NULL; setstack8(extra8); + extra8->mark = &mark8_1; return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK, - current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32); - return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK, - current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32); + current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32); + memset(&dummy_extra8, 0, sizeof(pcre_extra)); + dummy_extra8.flags = PCRE_EXTRA_MARK; + dummy_extra8.mark = &mark8_2; + return_value8_2 = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK, + current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32); } #endif @@ -1020,18 +1074,26 @@ static int regression_tests(void) for (i = 0; i < 32; ++i) ovector16_2[i] = -2; if (re16) { + mark16_1 = NULL; + mark16_2 = NULL; setstack16(extra16); if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV)) length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH); else length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH); + extra16->mark = &mark16_1; return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK, - current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32); - return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK, - current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32); + current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32); + memset(&dummy_extra16, 0, sizeof(pcre16_extra)); + dummy_extra16.flags = PCRE_EXTRA_MARK; + dummy_extra16.mark = &mark16_2; + return_value16_2 = pcre16_exec(re16, &dummy_extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK, + current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32); } #endif + /* printf("[%d-%d|%d-%d|%d-%d]%s", return_value8_1, return_value16_1, ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */ + /* If F_DIFF is set, just run the test, but do not compare the results. Segfaults can still be captured. */ @@ -1045,8 +1107,15 @@ static int regression_tests(void) return_value8_1, return_value8_2, return_value16_1, return_value16_2, total, current->pattern, current->input); is_successful = 0; - } else if (return_value8_1 >= 0) { - return_value8_1 *= 2; + } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) { + if (return_value8_1 == PCRE_ERROR_PARTIAL) { + return_value8_1 = 2; + return_value16_1 = 2; + } else { + return_value8_1 *= 2; + return_value16_1 *= 2; + } + /* Transform back the results. */ if (current->flags & PCRE_UTF8) { for (i = 0; i < return_value8_1; ++i) { @@ -1073,8 +1142,12 @@ static int regression_tests(void) printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", return_value8_1, return_value8_2, total, current->pattern, current->input); is_successful = 0; - } else if (return_value8_1 >= 0) { - return_value8_1 *= 2; + } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) { + if (return_value8_1 == PCRE_ERROR_PARTIAL) + return_value8_1 = 2; + else + return_value8_1 *= 2; + for (i = 0; i < return_value8_1; ++i) if (ovector8_1[i] != ovector8_2[i]) { printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", @@ -1089,8 +1162,12 @@ static int regression_tests(void) printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", return_value16_1, return_value16_2, total, current->pattern, current->input); is_successful = 0; - } else if (return_value16_1 >= 0) { - return_value16_1 *= 2; + } else if (return_value16_1 >= 0 || return_value16_1 == PCRE_ERROR_PARTIAL) { + if (return_value16_1 == PCRE_ERROR_PARTIAL) + return_value16_1 = 2; + else + return_value16_1 *= 2; + for (i = 0; i < return_value16_1; ++i) if (ovector16_1[i] != ovector16_2[i]) { printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", @@ -1138,8 +1215,22 @@ static int regression_tests(void) #endif } - if (is_successful) - successful++; + if (is_successful) { +#ifdef SUPPORT_PCRE8 + if (mark8_1 != mark8_2) { + printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } +#endif +#ifdef SUPPORT_PCRE16 + if (mark16_1 != mark16_2) { + printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", + total, current->pattern, current->input); + is_successful = 0; + } +#endif + } #ifdef SUPPORT_PCRE8 if (re8) { @@ -1154,8 +1245,17 @@ static int regression_tests(void) } #endif - /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */ - printf("."); + if (is_successful) { + successful++; + successful_row++; + printf("."); + if (successful_row >= 60) { + successful_row = 0; + printf("\n"); + } + } else + successful_row = 0; + fflush(stdout); current++; }