version 1.1, 2012/02/21 23:05:51
|
version 1.1.1.3, 2013/07/22 08:25:55
|
Line 6
|
Line 6
|
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
|
|
Written by Philip Hazel |
Written by Philip Hazel |
Copyright (c) 1997-2009 University of Cambridge | Copyright (c) 1997-2012 University of Cambridge |
|
|
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
Line 67 Arguments:
|
Line 67 Arguments:
|
type the newline type |
type the newline type |
endptr pointer to the end of the string |
endptr pointer to the end of the string |
lenptr where to return the length |
lenptr where to return the length |
utf8 TRUE if in utf8 mode | utf TRUE if in utf mode |
|
|
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
*/ |
*/ |
|
|
BOOL |
BOOL |
_pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8) | PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr, |
| BOOL utf) |
{ |
{ |
int c; | pcre_uint32 c; |
if (utf8) { GETCHAR(c, ptr); } else c = *ptr; | (void)utf; |
| #ifdef SUPPORT_UTF |
| if (utf) |
| { |
| GETCHAR(c, ptr); |
| } |
| else |
| #endif /* SUPPORT_UTF */ |
| c = *ptr; |
|
|
|
/* Note that this function is called only for ANY or ANYCRLF. */ |
|
|
if (type == NLTYPE_ANYCRLF) switch(c) |
if (type == NLTYPE_ANYCRLF) switch(c) |
{ |
{ |
case 0x000a: *lenptr = 1; return TRUE; /* LF */ | case CHAR_LF: *lenptr = 1; return TRUE; |
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; | case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; |
return TRUE; /* CR */ | return TRUE; |
default: return FALSE; |
default: return FALSE; |
} |
} |
|
|
Line 90 if (type == NLTYPE_ANYCRLF) switch(c)
|
Line 101 if (type == NLTYPE_ANYCRLF) switch(c)
|
|
|
else switch(c) |
else switch(c) |
{ |
{ |
case 0x000a: /* LF */ | #ifdef EBCDIC |
case 0x000b: /* VT */ | case CHAR_NEL: |
case 0x000c: *lenptr = 1; return TRUE; /* FF */ | #endif |
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; | case CHAR_LF: |
return TRUE; /* CR */ | case CHAR_VT: |
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ | case CHAR_FF: *lenptr = 1; return TRUE; |
| |
| case CHAR_CR: |
| *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; |
| return TRUE; |
| |
| #ifndef EBCDIC |
| #ifdef COMPILE_PCRE8 |
| case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; |
case 0x2028: /* LS */ |
case 0x2028: /* LS */ |
case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
|
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ |
|
case CHAR_NEL: |
|
case 0x2028: /* LS */ |
|
case 0x2029: *lenptr = 1; return TRUE; /* PS */ |
|
#endif /* COMPILE_PCRE8 */ |
|
#endif /* Not EBCDIC */ |
|
|
default: return FALSE; |
default: return FALSE; |
} |
} |
} |
} |
Line 116 Arguments:
|
Line 142 Arguments:
|
type the newline type |
type the newline type |
startptr pointer to the start of the string |
startptr pointer to the start of the string |
lenptr where to return the length |
lenptr where to return the length |
utf8 TRUE if in utf8 mode | utf TRUE if in utf mode |
|
|
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
*/ |
*/ |
|
|
BOOL |
BOOL |
_pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8) | PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr, |
| BOOL utf) |
{ |
{ |
int c; | pcre_uint32 c; |
| (void)utf; |
ptr--; |
ptr--; |
#ifdef SUPPORT_UTF8 | #ifdef SUPPORT_UTF |
if (utf8) | if (utf) |
{ |
{ |
BACKCHAR(ptr); |
BACKCHAR(ptr); |
GETCHAR(c, ptr); |
GETCHAR(c, ptr); |
} |
} |
else c = *ptr; | else |
#else /* no UTF-8 support */ | #endif /* SUPPORT_UTF */ |
c = *ptr; | c = *ptr; |
#endif /* SUPPORT_UTF8 */ | |
|
|
|
/* Note that this function is called only for ANY or ANYCRLF. */ |
|
|
if (type == NLTYPE_ANYCRLF) switch(c) |
if (type == NLTYPE_ANYCRLF) switch(c) |
{ |
{ |
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; | case CHAR_LF: |
return TRUE; /* LF */ | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; |
case 0x000d: *lenptr = 1; return TRUE; /* CR */ | return TRUE; |
| |
| case CHAR_CR: *lenptr = 1; return TRUE; |
default: return FALSE; |
default: return FALSE; |
} |
} |
|
|
|
/* NLTYPE_ANY */ |
|
|
else switch(c) |
else switch(c) |
{ |
{ |
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; | case CHAR_LF: |
return TRUE; /* LF */ | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; |
case 0x000b: /* VT */ | return TRUE; |
case 0x000c: /* FF */ | |
case 0x000d: *lenptr = 1; return TRUE; /* CR */ | #ifdef EBCDIC |
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ | case CHAR_NEL: |
case 0x2028: /* LS */ | #endif |
case 0x2029: *lenptr = 3; return TRUE; /* PS */ | case CHAR_VT: |
| case CHAR_FF: |
| case CHAR_CR: *lenptr = 1; return TRUE; |
| |
| #ifndef EBCDIC |
| #ifdef COMPILE_PCRE8 |
| case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; |
| case 0x2028: /* LS */ |
| case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
| #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ |
| case CHAR_NEL: |
| case 0x2028: /* LS */ |
| case 0x2029: *lenptr = 1; return TRUE; /* PS */ |
| #endif /* COMPILE_PCRE8 */ |
| #endif /* NotEBCDIC */ |
| |
default: return FALSE; |
default: return FALSE; |
} |
} |
} |
} |