|
version 1.1, 2012/02/21 23:05:51
|
version 1.1.1.3, 2013/07/22 08:25:55
|
|
Line 6
|
Line 6
|
| and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
| |
|
| Written by Philip Hazel |
Written by Philip Hazel |
| Copyright (c) 1997-2009 University of Cambridge | Copyright (c) 1997-2012 University of Cambridge |
| |
|
| ----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
|
Line 67 Arguments:
|
Line 67 Arguments:
|
| type the newline type |
type the newline type |
| endptr pointer to the end of the string |
endptr pointer to the end of the string |
| lenptr where to return the length |
lenptr where to return the length |
| utf8 TRUE if in utf8 mode | utf TRUE if in utf mode |
| |
|
| Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
| */ |
*/ |
| |
|
| BOOL |
BOOL |
| _pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8) | PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr, |
| | BOOL utf) |
| { |
{ |
| int c; | pcre_uint32 c; |
| if (utf8) { GETCHAR(c, ptr); } else c = *ptr; | (void)utf; |
| | #ifdef SUPPORT_UTF |
| | if (utf) |
| | { |
| | GETCHAR(c, ptr); |
| | } |
| | else |
| | #endif /* SUPPORT_UTF */ |
| | c = *ptr; |
| |
|
| |
/* Note that this function is called only for ANY or ANYCRLF. */ |
| |
|
| if (type == NLTYPE_ANYCRLF) switch(c) |
if (type == NLTYPE_ANYCRLF) switch(c) |
| { |
{ |
| case 0x000a: *lenptr = 1; return TRUE; /* LF */ | case CHAR_LF: *lenptr = 1; return TRUE; |
| case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; | case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; |
| return TRUE; /* CR */ | return TRUE; |
| default: return FALSE; |
default: return FALSE; |
| } |
} |
| |
|
|
Line 90 if (type == NLTYPE_ANYCRLF) switch(c)
|
Line 101 if (type == NLTYPE_ANYCRLF) switch(c)
|
| |
|
| else switch(c) |
else switch(c) |
| { |
{ |
| case 0x000a: /* LF */ | #ifdef EBCDIC |
| case 0x000b: /* VT */ | case CHAR_NEL: |
| case 0x000c: *lenptr = 1; return TRUE; /* FF */ | #endif |
| case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; | case CHAR_LF: |
| return TRUE; /* CR */ | case CHAR_VT: |
| case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ | case CHAR_FF: *lenptr = 1; return TRUE; |
| | |
| | case CHAR_CR: |
| | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; |
| | return TRUE; |
| | |
| | #ifndef EBCDIC |
| | #ifdef COMPILE_PCRE8 |
| | case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; |
| case 0x2028: /* LS */ |
case 0x2028: /* LS */ |
| case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
| |
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ |
| |
case CHAR_NEL: |
| |
case 0x2028: /* LS */ |
| |
case 0x2029: *lenptr = 1; return TRUE; /* PS */ |
| |
#endif /* COMPILE_PCRE8 */ |
| |
#endif /* Not EBCDIC */ |
| |
|
| default: return FALSE; |
default: return FALSE; |
| } |
} |
| } |
} |
|
Line 116 Arguments:
|
Line 142 Arguments:
|
| type the newline type |
type the newline type |
| startptr pointer to the start of the string |
startptr pointer to the start of the string |
| lenptr where to return the length |
lenptr where to return the length |
| utf8 TRUE if in utf8 mode | utf TRUE if in utf mode |
| |
|
| Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
| */ |
*/ |
| |
|
| BOOL |
BOOL |
| _pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8) | PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr, |
| | BOOL utf) |
| { |
{ |
| int c; | pcre_uint32 c; |
| | (void)utf; |
| ptr--; |
ptr--; |
| #ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
| if (utf8) | if (utf) |
| { |
{ |
| BACKCHAR(ptr); |
BACKCHAR(ptr); |
| GETCHAR(c, ptr); |
GETCHAR(c, ptr); |
| } |
} |
| else c = *ptr; | else |
| #else /* no UTF-8 support */ | #endif /* SUPPORT_UTF */ |
| c = *ptr; | c = *ptr; |
| #endif /* SUPPORT_UTF8 */ | |
| |
|
| |
/* Note that this function is called only for ANY or ANYCRLF. */ |
| |
|
| if (type == NLTYPE_ANYCRLF) switch(c) |
if (type == NLTYPE_ANYCRLF) switch(c) |
| { |
{ |
| case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; | case CHAR_LF: |
| return TRUE; /* LF */ | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; |
| case 0x000d: *lenptr = 1; return TRUE; /* CR */ | return TRUE; |
| | |
| | case CHAR_CR: *lenptr = 1; return TRUE; |
| default: return FALSE; |
default: return FALSE; |
| } |
} |
| |
|
| |
/* NLTYPE_ANY */ |
| |
|
| else switch(c) |
else switch(c) |
| { |
{ |
| case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; | case CHAR_LF: |
| return TRUE; /* LF */ | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; |
| case 0x000b: /* VT */ | return TRUE; |
| case 0x000c: /* FF */ | |
| case 0x000d: *lenptr = 1; return TRUE; /* CR */ | #ifdef EBCDIC |
| case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ | case CHAR_NEL: |
| case 0x2028: /* LS */ | #endif |
| case 0x2029: *lenptr = 3; return TRUE; /* PS */ | case CHAR_VT: |
| | case CHAR_FF: |
| | case CHAR_CR: *lenptr = 1; return TRUE; |
| | |
| | #ifndef EBCDIC |
| | #ifdef COMPILE_PCRE8 |
| | case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; |
| | case 0x2028: /* LS */ |
| | case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
| | #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ |
| | case CHAR_NEL: |
| | case 0x2028: /* LS */ |
| | case 0x2029: *lenptr = 1; return TRUE; /* PS */ |
| | #endif /* COMPILE_PCRE8 */ |
| | #endif /* NotEBCDIC */ |
| | |
| default: return FALSE; |
default: return FALSE; |
| } |
} |
| } |
} |