Annotation of embedaddon/php/sapi/cli/php_http_parser.c, revision 1.1.1.1
1.1 misho 1: /* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2: *
3: * Permission is hereby granted, free of charge, to any person obtaining a copy
4: * of this software and associated documentation files (the "Software"), to
5: * deal in the Software without restriction, including without limitation the
6: * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7: * sell copies of the Software, and to permit persons to whom the Software is
8: * furnished to do so, subject to the following conditions:
9: *
10: * The above copyright notice and this permission notice shall be included in
11: * all copies or substantial portions of the Software.
12: *
13: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19: * IN THE SOFTWARE.
20: */
21: #include <assert.h>
22: #include <stddef.h>
23: #include "php_http_parser.h"
24:
25:
26: #ifndef MIN
27: # define MIN(a,b) ((a) < (b) ? (a) : (b))
28: #endif
29:
30:
31: #define CALLBACK2(FOR) \
32: do { \
33: if (settings->on_##FOR) { \
34: if (0 != settings->on_##FOR(parser)) return (p - data); \
35: } \
36: } while (0)
37:
38:
39: #define MARK(FOR) \
40: do { \
41: FOR##_mark = p; \
42: } while (0)
43:
44: #define CALLBACK_NOCLEAR(FOR) \
45: do { \
46: if (FOR##_mark) { \
47: if (settings->on_##FOR) { \
48: if (0 != settings->on_##FOR(parser, \
49: FOR##_mark, \
50: p - FOR##_mark)) \
51: { \
52: return (p - data); \
53: } \
54: } \
55: } \
56: } while (0)
57:
58: #ifdef PHP_WIN32
59: # undef CALLBACK
60: #endif
61: #define CALLBACK(FOR) \
62: do { \
63: CALLBACK_NOCLEAR(FOR); \
64: FOR##_mark = NULL; \
65: } while (0)
66:
67:
68: #define PROXY_CONNECTION "proxy-connection"
69: #define CONNECTION "connection"
70: #define CONTENT_LENGTH "content-length"
71: #define TRANSFER_ENCODING "transfer-encoding"
72: #define UPGRADE "upgrade"
73: #define CHUNKED "chunked"
74: #define KEEP_ALIVE "keep-alive"
75: #define CLOSE "close"
76:
77:
78: static const char *method_strings[] =
79: { "DELETE"
80: , "GET"
81: , "HEAD"
82: , "POST"
83: , "PUT"
84: , "CONNECT"
85: , "OPTIONS"
86: , "TRACE"
87: , "COPY"
88: , "LOCK"
89: , "MKCOL"
90: , "MOVE"
91: , "PROPFIND"
92: , "PROPPATCH"
93: , "UNLOCK"
94: , "REPORT"
95: , "MKACTIVITY"
96: , "CHECKOUT"
97: , "MERGE"
98: , "M-SEARCH"
99: , "NOTIFY"
100: , "SUBSCRIBE"
101: , "UNSUBSCRIBE"
102: };
103:
104:
105: /* Tokens as defined by rfc 2616. Also lowercases them.
106: * token = 1*<any CHAR except CTLs or separators>
107: * separators = "(" | ")" | "<" | ">" | "@"
108: * | "," | ";" | ":" | "\" | <">
109: * | "/" | "[" | "]" | "?" | "="
110: * | "{" | "}" | SP | HT
111: */
112: static const char tokens[256] = {
113: /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
114: 0, 0, 0, 0, 0, 0, 0, 0,
115: /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
116: 0, 0, 0, 0, 0, 0, 0, 0,
117: /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
118: 0, 0, 0, 0, 0, 0, 0, 0,
119: /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
120: 0, 0, 0, 0, 0, 0, 0, 0,
121: /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
122: ' ', '!', '"', '#', '$', '%', '&', '\'',
123: /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
124: 0, 0, '*', '+', 0, '-', '.', '/',
125: /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
126: '0', '1', '2', '3', '4', '5', '6', '7',
127: /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
128: '8', '9', 0, 0, 0, 0, 0, 0,
129: /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
130: 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
131: /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
132: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
133: /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
134: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
135: /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
136: 'x', 'y', 'z', 0, 0, 0, '^', '_',
137: /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
138: '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
139: /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
140: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
141: /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
142: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
143: /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
144: 'x', 'y', 'z', 0, '|', '}', '~', 0 };
145:
146:
147: static const int8_t unhex[256] =
148: {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
149: ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
150: ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
151: , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
152: ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
153: ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
154: ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
155: ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
156: };
157:
158:
159: static const uint8_t normal_url_char[256] = {
160: /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
161: 0, 0, 0, 0, 0, 0, 0, 0,
162: /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
163: 0, 0, 0, 0, 0, 0, 0, 0,
164: /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
165: 0, 0, 0, 0, 0, 0, 0, 0,
166: /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
167: 0, 0, 0, 0, 0, 0, 0, 0,
168: /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
169: 0, 1, 1, 0, 1, 1, 1, 1,
170: /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
171: 1, 1, 1, 1, 1, 1, 1, 1,
172: /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
173: 1, 1, 1, 1, 1, 1, 1, 1,
174: /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
175: 1, 1, 1, 1, 1, 1, 1, 0,
176: /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
177: 1, 1, 1, 1, 1, 1, 1, 1,
178: /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
179: 1, 1, 1, 1, 1, 1, 1, 1,
180: /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
181: 1, 1, 1, 1, 1, 1, 1, 1,
182: /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
183: 1, 1, 1, 1, 1, 1, 1, 1,
184: /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
185: 1, 1, 1, 1, 1, 1, 1, 1,
186: /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
187: 1, 1, 1, 1, 1, 1, 1, 1,
188: /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
189: 1, 1, 1, 1, 1, 1, 1, 1,
190: /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
191: 1, 1, 1, 1, 1, 1, 1, 0 };
192:
193:
194: enum state
195: { s_dead = 1 /* important that this is > 0 */
196:
197: , s_start_req_or_res
198: , s_res_or_resp_H
199: , s_start_res
200: , s_res_H
201: , s_res_HT
202: , s_res_HTT
203: , s_res_HTTP
204: , s_res_first_http_major
205: , s_res_http_major
206: , s_res_first_http_minor
207: , s_res_http_minor
208: , s_res_first_status_code
209: , s_res_status_code
210: , s_res_status
211: , s_res_line_almost_done
212:
213: , s_start_req
214:
215: , s_req_method
216: , s_req_spaces_before_url
217: , s_req_schema
218: , s_req_schema_slash
219: , s_req_schema_slash_slash
220: , s_req_host
221: , s_req_port
222: , s_req_path
223: , s_req_query_string_start
224: , s_req_query_string
225: , s_req_fragment_start
226: , s_req_fragment
227: , s_req_http_start
228: , s_req_http_H
229: , s_req_http_HT
230: , s_req_http_HTT
231: , s_req_http_HTTP
232: , s_req_first_http_major
233: , s_req_http_major
234: , s_req_first_http_minor
235: , s_req_http_minor
236: , s_req_line_almost_done
237:
238: , s_header_field_start
239: , s_header_field
240: , s_header_value_start
241: , s_header_value
242:
243: , s_header_almost_done
244:
245: , s_headers_almost_done
246: /* Important: 's_headers_almost_done' must be the last 'header' state. All
247: * states beyond this must be 'body' states. It is used for overflow
248: * checking. See the PARSING_HEADER() macro.
249: */
250: , s_chunk_size_start
251: , s_chunk_size
252: , s_chunk_size_almost_done
253: , s_chunk_parameters
254: , s_chunk_data
255: , s_chunk_data_almost_done
256: , s_chunk_data_done
257:
258: , s_body_identity
259: , s_body_identity_eof
260: };
261:
262:
263: #define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING))
264:
265:
266: enum header_states
267: { h_general = 0
268: , h_C
269: , h_CO
270: , h_CON
271:
272: , h_matching_connection
273: , h_matching_proxy_connection
274: , h_matching_content_length
275: , h_matching_transfer_encoding
276: , h_matching_upgrade
277:
278: , h_connection
279: , h_content_length
280: , h_transfer_encoding
281: , h_upgrade
282:
283: , h_matching_transfer_encoding_chunked
284: , h_matching_connection_keep_alive
285: , h_matching_connection_close
286:
287: , h_transfer_encoding_chunked
288: , h_connection_keep_alive
289: , h_connection_close
290: };
291:
292:
293: enum flags
294: { F_CHUNKED = 1 << 0
295: , F_CONNECTION_KEEP_ALIVE = 1 << 1
296: , F_CONNECTION_CLOSE = 1 << 2
297: , F_TRAILING = 1 << 3
298: , F_UPGRADE = 1 << 4
299: , F_SKIPBODY = 1 << 5
300: };
301:
302:
303: #define CR '\r'
304: #define LF '\n'
305: #define LOWER(c) (unsigned char)(c | 0x20)
306: #define TOKEN(c) tokens[(unsigned char)c]
307:
308:
309: #define start_state (parser->type == PHP_HTTP_REQUEST ? s_start_req : s_start_res)
310:
311:
312: #if HTTP_PARSER_STRICT
313: # define STRICT_CHECK(cond) if (cond) goto error
314: # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
315: #else
316: # define STRICT_CHECK(cond)
317: # define NEW_MESSAGE() start_state
318: #endif
319:
320:
321: size_t php_http_parser_execute (php_http_parser *parser,
322: const php_http_parser_settings *settings,
323: const char *data,
324: size_t len)
325: {
326: char c, ch;
327: const char *p = data, *pe;
328: size_t to_read;
329:
330: enum state state = (enum state) parser->state;
331: enum header_states header_state = (enum header_states) parser->header_state;
332: uint32_t index = parser->index;
333: uint32_t nread = parser->nread;
334:
335: /* technically we could combine all of these (except for url_mark) into one
336: variable, saving stack space, but it seems more clear to have them
337: separated. */
338: const char *header_field_mark = 0;
339: const char *header_value_mark = 0;
340: const char *fragment_mark = 0;
341: const char *query_string_mark = 0;
342: const char *path_mark = 0;
343: const char *url_mark = 0;
344:
345: if (len == 0) {
346: if (state == s_body_identity_eof) {
347: CALLBACK2(message_complete);
348: }
349: return 0;
350: }
351:
352: if (state == s_header_field)
353: header_field_mark = data;
354: if (state == s_header_value)
355: header_value_mark = data;
356: if (state == s_req_fragment)
357: fragment_mark = data;
358: if (state == s_req_query_string)
359: query_string_mark = data;
360: if (state == s_req_path)
361: path_mark = data;
362: if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
363: || state == s_req_schema_slash_slash || state == s_req_port
364: || state == s_req_query_string_start || state == s_req_query_string
365: || state == s_req_host
366: || state == s_req_fragment_start || state == s_req_fragment)
367: url_mark = data;
368:
369: for (p=data, pe=data+len; p != pe; p++) {
370: ch = *p;
371:
372: if (PARSING_HEADER(state)) {
373: ++nread;
374: /* Buffer overflow attack */
375: if (nread > PHP_HTTP_MAX_HEADER_SIZE) goto error;
376: }
377:
378: switch (state) {
379:
380: case s_dead:
381: /* this state is used after a 'Connection: close' message
382: * the parser will error out if it reads another message
383: */
384: goto error;
385:
386: case s_start_req_or_res:
387: {
388: if (ch == CR || ch == LF)
389: break;
390: parser->flags = 0;
391: parser->content_length = -1;
392:
393: CALLBACK2(message_begin);
394:
395: if (ch == 'H')
396: state = s_res_or_resp_H;
397: else {
398: parser->type = PHP_HTTP_REQUEST;
399: goto start_req_method_assign;
400: }
401: break;
402: }
403:
404: case s_res_or_resp_H:
405: if (ch == 'T') {
406: parser->type = PHP_HTTP_RESPONSE;
407: state = s_res_HT;
408: } else {
409: if (ch != 'E') goto error;
410: parser->type = PHP_HTTP_REQUEST;
411: parser->method = PHP_HTTP_HEAD;
412: index = 2;
413: state = s_req_method;
414: }
415: break;
416:
417: case s_start_res:
418: {
419: parser->flags = 0;
420: parser->content_length = -1;
421:
422: CALLBACK2(message_begin);
423:
424: switch (ch) {
425: case 'H':
426: state = s_res_H;
427: break;
428:
429: case CR:
430: case LF:
431: break;
432:
433: default:
434: goto error;
435: }
436: break;
437: }
438:
439: case s_res_H:
440: STRICT_CHECK(ch != 'T');
441: state = s_res_HT;
442: break;
443:
444: case s_res_HT:
445: STRICT_CHECK(ch != 'T');
446: state = s_res_HTT;
447: break;
448:
449: case s_res_HTT:
450: STRICT_CHECK(ch != 'P');
451: state = s_res_HTTP;
452: break;
453:
454: case s_res_HTTP:
455: STRICT_CHECK(ch != '/');
456: state = s_res_first_http_major;
457: break;
458:
459: case s_res_first_http_major:
460: if (ch < '1' || ch > '9') goto error;
461: parser->http_major = ch - '0';
462: state = s_res_http_major;
463: break;
464:
465: /* major HTTP version or dot */
466: case s_res_http_major:
467: {
468: if (ch == '.') {
469: state = s_res_first_http_minor;
470: break;
471: }
472:
473: if (ch < '0' || ch > '9') goto error;
474:
475: parser->http_major *= 10;
476: parser->http_major += ch - '0';
477:
478: if (parser->http_major > 999) goto error;
479: break;
480: }
481:
482: /* first digit of minor HTTP version */
483: case s_res_first_http_minor:
484: if (ch < '0' || ch > '9') goto error;
485: parser->http_minor = ch - '0';
486: state = s_res_http_minor;
487: break;
488:
489: /* minor HTTP version or end of request line */
490: case s_res_http_minor:
491: {
492: if (ch == ' ') {
493: state = s_res_first_status_code;
494: break;
495: }
496:
497: if (ch < '0' || ch > '9') goto error;
498:
499: parser->http_minor *= 10;
500: parser->http_minor += ch - '0';
501:
502: if (parser->http_minor > 999) goto error;
503: break;
504: }
505:
506: case s_res_first_status_code:
507: {
508: if (ch < '0' || ch > '9') {
509: if (ch == ' ') {
510: break;
511: }
512: goto error;
513: }
514: parser->status_code = ch - '0';
515: state = s_res_status_code;
516: break;
517: }
518:
519: case s_res_status_code:
520: {
521: if (ch < '0' || ch > '9') {
522: switch (ch) {
523: case ' ':
524: state = s_res_status;
525: break;
526: case CR:
527: state = s_res_line_almost_done;
528: break;
529: case LF:
530: state = s_header_field_start;
531: break;
532: default:
533: goto error;
534: }
535: break;
536: }
537:
538: parser->status_code *= 10;
539: parser->status_code += ch - '0';
540:
541: if (parser->status_code > 999) goto error;
542: break;
543: }
544:
545: case s_res_status:
546: /* the human readable status. e.g. "NOT FOUND"
547: * we are not humans so just ignore this */
548: if (ch == CR) {
549: state = s_res_line_almost_done;
550: break;
551: }
552:
553: if (ch == LF) {
554: state = s_header_field_start;
555: break;
556: }
557: break;
558:
559: case s_res_line_almost_done:
560: STRICT_CHECK(ch != LF);
561: state = s_header_field_start;
562: break;
563:
564: case s_start_req:
565: {
566: if (ch == CR || ch == LF)
567: break;
568: parser->flags = 0;
569: parser->content_length = -1;
570:
571: CALLBACK2(message_begin);
572:
573: if (ch < 'A' || 'Z' < ch) goto error;
574:
575: start_req_method_assign:
576: parser->method = (enum php_http_method) 0;
577: index = 1;
578: switch (ch) {
579: case 'C': parser->method = PHP_HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
580: case 'D': parser->method = PHP_HTTP_DELETE; break;
581: case 'G': parser->method = PHP_HTTP_GET; break;
582: case 'H': parser->method = PHP_HTTP_HEAD; break;
583: case 'L': parser->method = PHP_HTTP_LOCK; break;
584: case 'M': parser->method = PHP_HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
585: case 'N': parser->method = PHP_HTTP_NOTIFY; break;
586: case 'O': parser->method = PHP_HTTP_OPTIONS; break;
587: case 'P': parser->method = PHP_HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
588: case 'R': parser->method = PHP_HTTP_REPORT; break;
589: case 'S': parser->method = PHP_HTTP_SUBSCRIBE; break;
590: case 'T': parser->method = PHP_HTTP_TRACE; break;
591: case 'U': parser->method = PHP_HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
592: default: goto error;
593: }
594: state = s_req_method;
595: break;
596: }
597:
598: case s_req_method:
599: {
600: const char *matcher;
601: if (ch == '\0')
602: goto error;
603:
604: matcher = method_strings[parser->method];
605: if (ch == ' ' && matcher[index] == '\0') {
606: state = s_req_spaces_before_url;
607: } else if (ch == matcher[index]) {
608: ; /* nada */
609: } else if (parser->method == PHP_HTTP_CONNECT) {
610: if (index == 1 && ch == 'H') {
611: parser->method = PHP_HTTP_CHECKOUT;
612: } else if (index == 2 && ch == 'P') {
613: parser->method = PHP_HTTP_COPY;
614: }
615: } else if (parser->method == PHP_HTTP_MKCOL) {
616: if (index == 1 && ch == 'O') {
617: parser->method = PHP_HTTP_MOVE;
618: } else if (index == 1 && ch == 'E') {
619: parser->method = PHP_HTTP_MERGE;
620: } else if (index == 1 && ch == '-') {
621: parser->method = PHP_HTTP_MSEARCH;
622: } else if (index == 2 && ch == 'A') {
623: parser->method = PHP_HTTP_MKACTIVITY;
624: }
625: } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'R') {
626: parser->method = PHP_HTTP_PROPFIND; /* or HTTP_PROPPATCH */
627: } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'U') {
628: parser->method = PHP_HTTP_PUT;
629: } else if (index == 2 && parser->method == PHP_HTTP_UNLOCK && ch == 'S') {
630: parser->method = PHP_HTTP_UNSUBSCRIBE;
631: } else if (index == 4 && parser->method == PHP_HTTP_PROPFIND && ch == 'P') {
632: parser->method = PHP_HTTP_PROPPATCH;
633: } else {
634: goto error;
635: }
636:
637: ++index;
638: break;
639: }
640: case s_req_spaces_before_url:
641: {
642: if (ch == ' ') break;
643:
644: if (ch == '/' || ch == '*') {
645: MARK(url);
646: MARK(path);
647: state = s_req_path;
648: break;
649: }
650:
651: c = LOWER(ch);
652:
653: if (c >= 'a' && c <= 'z') {
654: MARK(url);
655: state = s_req_schema;
656: break;
657: }
658:
659: goto error;
660: }
661:
662: case s_req_schema:
663: {
664: c = LOWER(ch);
665:
666: if (c >= 'a' && c <= 'z') break;
667:
668: if (ch == ':') {
669: state = s_req_schema_slash;
670: break;
671: } else if (ch == '.') {
672: state = s_req_host;
673: break;
674: } else if ('0' <= ch && ch <= '9') {
675: state = s_req_host;
676: break;
677: }
678:
679: goto error;
680: }
681:
682: case s_req_schema_slash:
683: STRICT_CHECK(ch != '/');
684: state = s_req_schema_slash_slash;
685: break;
686:
687: case s_req_schema_slash_slash:
688: STRICT_CHECK(ch != '/');
689: state = s_req_host;
690: break;
691:
692: case s_req_host:
693: {
694: c = LOWER(ch);
695: if (c >= 'a' && c <= 'z') break;
696: if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
697: switch (ch) {
698: case ':':
699: state = s_req_port;
700: break;
701: case '/':
702: MARK(path);
703: state = s_req_path;
704: break;
705: case ' ':
706: /* The request line looks like:
707: * "GET http://foo.bar.com HTTP/1.1"
708: * That is, there is no path.
709: */
710: CALLBACK(url);
711: state = s_req_http_start;
712: break;
713: default:
714: goto error;
715: }
716: break;
717: }
718:
719: case s_req_port:
720: {
721: if (ch >= '0' && ch <= '9') break;
722: switch (ch) {
723: case '/':
724: MARK(path);
725: state = s_req_path;
726: break;
727: case ' ':
728: /* The request line looks like:
729: * "GET http://foo.bar.com:1234 HTTP/1.1"
730: * That is, there is no path.
731: */
732: CALLBACK(url);
733: state = s_req_http_start;
734: break;
735: default:
736: goto error;
737: }
738: break;
739: }
740:
741: case s_req_path:
742: {
743: if (normal_url_char[(unsigned char)ch]) break;
744:
745: switch (ch) {
746: case ' ':
747: CALLBACK(url);
748: CALLBACK(path);
749: state = s_req_http_start;
750: break;
751: case CR:
752: CALLBACK(url);
753: CALLBACK(path);
754: parser->http_major = 0;
755: parser->http_minor = 9;
756: state = s_req_line_almost_done;
757: break;
758: case LF:
759: CALLBACK(url);
760: CALLBACK(path);
761: parser->http_major = 0;
762: parser->http_minor = 9;
763: state = s_header_field_start;
764: break;
765: case '?':
766: CALLBACK(path);
767: state = s_req_query_string_start;
768: break;
769: case '#':
770: CALLBACK(path);
771: state = s_req_fragment_start;
772: break;
773: default:
774: goto error;
775: }
776: break;
777: }
778:
779: case s_req_query_string_start:
780: {
781: if (normal_url_char[(unsigned char)ch]) {
782: MARK(query_string);
783: state = s_req_query_string;
784: break;
785: }
786:
787: switch (ch) {
788: case '?':
789: break; /* XXX ignore extra '?' ... is this right? */
790: case ' ':
791: CALLBACK(url);
792: state = s_req_http_start;
793: break;
794: case CR:
795: CALLBACK(url);
796: parser->http_major = 0;
797: parser->http_minor = 9;
798: state = s_req_line_almost_done;
799: break;
800: case LF:
801: CALLBACK(url);
802: parser->http_major = 0;
803: parser->http_minor = 9;
804: state = s_header_field_start;
805: break;
806: case '#':
807: state = s_req_fragment_start;
808: break;
809: default:
810: goto error;
811: }
812: break;
813: }
814:
815: case s_req_query_string:
816: {
817: if (normal_url_char[(unsigned char)ch]) break;
818:
819: switch (ch) {
820: case '?':
821: /* allow extra '?' in query string */
822: break;
823: case ' ':
824: CALLBACK(url);
825: CALLBACK(query_string);
826: state = s_req_http_start;
827: break;
828: case CR:
829: CALLBACK(url);
830: CALLBACK(query_string);
831: parser->http_major = 0;
832: parser->http_minor = 9;
833: state = s_req_line_almost_done;
834: break;
835: case LF:
836: CALLBACK(url);
837: CALLBACK(query_string);
838: parser->http_major = 0;
839: parser->http_minor = 9;
840: state = s_header_field_start;
841: break;
842: case '#':
843: CALLBACK(query_string);
844: state = s_req_fragment_start;
845: break;
846: default:
847: goto error;
848: }
849: break;
850: }
851:
852: case s_req_fragment_start:
853: {
854: if (normal_url_char[(unsigned char)ch]) {
855: MARK(fragment);
856: state = s_req_fragment;
857: break;
858: }
859:
860: switch (ch) {
861: case ' ':
862: CALLBACK(url);
863: state = s_req_http_start;
864: break;
865: case CR:
866: CALLBACK(url);
867: parser->http_major = 0;
868: parser->http_minor = 9;
869: state = s_req_line_almost_done;
870: break;
871: case LF:
872: CALLBACK(url);
873: parser->http_major = 0;
874: parser->http_minor = 9;
875: state = s_header_field_start;
876: break;
877: case '?':
878: MARK(fragment);
879: state = s_req_fragment;
880: break;
881: case '#':
882: break;
883: default:
884: goto error;
885: }
886: break;
887: }
888:
889: case s_req_fragment:
890: {
891: if (normal_url_char[(unsigned char)ch]) break;
892:
893: switch (ch) {
894: case ' ':
895: CALLBACK(url);
896: CALLBACK(fragment);
897: state = s_req_http_start;
898: break;
899: case CR:
900: CALLBACK(url);
901: CALLBACK(fragment);
902: parser->http_major = 0;
903: parser->http_minor = 9;
904: state = s_req_line_almost_done;
905: break;
906: case LF:
907: CALLBACK(url);
908: CALLBACK(fragment);
909: parser->http_major = 0;
910: parser->http_minor = 9;
911: state = s_header_field_start;
912: break;
913: case '?':
914: case '#':
915: break;
916: default:
917: goto error;
918: }
919: break;
920: }
921:
922: case s_req_http_start:
923: switch (ch) {
924: case 'H':
925: state = s_req_http_H;
926: break;
927: case ' ':
928: break;
929: default:
930: goto error;
931: }
932: break;
933:
934: case s_req_http_H:
935: STRICT_CHECK(ch != 'T');
936: state = s_req_http_HT;
937: break;
938:
939: case s_req_http_HT:
940: STRICT_CHECK(ch != 'T');
941: state = s_req_http_HTT;
942: break;
943:
944: case s_req_http_HTT:
945: STRICT_CHECK(ch != 'P');
946: state = s_req_http_HTTP;
947: break;
948:
949: case s_req_http_HTTP:
950: STRICT_CHECK(ch != '/');
951: state = s_req_first_http_major;
952: break;
953:
954: /* first digit of major HTTP version */
955: case s_req_first_http_major:
956: if (ch < '1' || ch > '9') goto error;
957: parser->http_major = ch - '0';
958: state = s_req_http_major;
959: break;
960:
961: /* major HTTP version or dot */
962: case s_req_http_major:
963: {
964: if (ch == '.') {
965: state = s_req_first_http_minor;
966: break;
967: }
968:
969: if (ch < '0' || ch > '9') goto error;
970:
971: parser->http_major *= 10;
972: parser->http_major += ch - '0';
973:
974: if (parser->http_major > 999) goto error;
975: break;
976: }
977:
978: /* first digit of minor HTTP version */
979: case s_req_first_http_minor:
980: if (ch < '0' || ch > '9') goto error;
981: parser->http_minor = ch - '0';
982: state = s_req_http_minor;
983: break;
984:
985: /* minor HTTP version or end of request line */
986: case s_req_http_minor:
987: {
988: if (ch == CR) {
989: state = s_req_line_almost_done;
990: break;
991: }
992:
993: if (ch == LF) {
994: state = s_header_field_start;
995: break;
996: }
997:
998: /* XXX allow spaces after digit? */
999:
1000: if (ch < '0' || ch > '9') goto error;
1001:
1002: parser->http_minor *= 10;
1003: parser->http_minor += ch - '0';
1004:
1005: if (parser->http_minor > 999) goto error;
1006: break;
1007: }
1008:
1009: /* end of request line */
1010: case s_req_line_almost_done:
1011: {
1012: if (ch != LF) goto error;
1013: state = s_header_field_start;
1014: break;
1015: }
1016:
1017: case s_header_field_start:
1018: {
1019: if (ch == CR) {
1020: state = s_headers_almost_done;
1021: break;
1022: }
1023:
1024: if (ch == LF) {
1025: /* they might be just sending \n instead of \r\n so this would be
1026: * the second \n to denote the end of headers*/
1027: state = s_headers_almost_done;
1028: goto headers_almost_done;
1029: }
1030:
1031: c = TOKEN(ch);
1032:
1033: if (!c) goto error;
1034:
1035: MARK(header_field);
1036:
1037: index = 0;
1038: state = s_header_field;
1039:
1040: switch (c) {
1041: case 'c':
1042: header_state = h_C;
1043: break;
1044:
1045: case 'p':
1046: header_state = h_matching_proxy_connection;
1047: break;
1048:
1049: case 't':
1050: header_state = h_matching_transfer_encoding;
1051: break;
1052:
1053: case 'u':
1054: header_state = h_matching_upgrade;
1055: break;
1056:
1057: default:
1058: header_state = h_general;
1059: break;
1060: }
1061: break;
1062: }
1063:
1064: case s_header_field:
1065: {
1066: c = TOKEN(ch);
1067:
1068: if (c) {
1069: switch (header_state) {
1070: case h_general:
1071: break;
1072:
1073: case h_C:
1074: index++;
1075: header_state = (c == 'o' ? h_CO : h_general);
1076: break;
1077:
1078: case h_CO:
1079: index++;
1080: header_state = (c == 'n' ? h_CON : h_general);
1081: break;
1082:
1083: case h_CON:
1084: index++;
1085: switch (c) {
1086: case 'n':
1087: header_state = h_matching_connection;
1088: break;
1089: case 't':
1090: header_state = h_matching_content_length;
1091: break;
1092: default:
1093: header_state = h_general;
1094: break;
1095: }
1096: break;
1097:
1098: /* connection */
1099:
1100: case h_matching_connection:
1101: index++;
1102: if (index > sizeof(CONNECTION)-1
1103: || c != CONNECTION[index]) {
1104: header_state = h_general;
1105: } else if (index == sizeof(CONNECTION)-2) {
1106: header_state = h_connection;
1107: }
1108: break;
1109:
1110: /* proxy-connection */
1111:
1112: case h_matching_proxy_connection:
1113: index++;
1114: if (index > sizeof(PROXY_CONNECTION)-1
1115: || c != PROXY_CONNECTION[index]) {
1116: header_state = h_general;
1117: } else if (index == sizeof(PROXY_CONNECTION)-2) {
1118: header_state = h_connection;
1119: }
1120: break;
1121:
1122: /* content-length */
1123:
1124: case h_matching_content_length:
1125: index++;
1126: if (index > sizeof(CONTENT_LENGTH)-1
1127: || c != CONTENT_LENGTH[index]) {
1128: header_state = h_general;
1129: } else if (index == sizeof(CONTENT_LENGTH)-2) {
1130: header_state = h_content_length;
1131: }
1132: break;
1133:
1134: /* transfer-encoding */
1135:
1136: case h_matching_transfer_encoding:
1137: index++;
1138: if (index > sizeof(TRANSFER_ENCODING)-1
1139: || c != TRANSFER_ENCODING[index]) {
1140: header_state = h_general;
1141: } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1142: header_state = h_transfer_encoding;
1143: }
1144: break;
1145:
1146: /* upgrade */
1147:
1148: case h_matching_upgrade:
1149: index++;
1150: if (index > sizeof(UPGRADE)-1
1151: || c != UPGRADE[index]) {
1152: header_state = h_general;
1153: } else if (index == sizeof(UPGRADE)-2) {
1154: header_state = h_upgrade;
1155: }
1156: break;
1157:
1158: case h_connection:
1159: case h_content_length:
1160: case h_transfer_encoding:
1161: case h_upgrade:
1162: if (ch != ' ') header_state = h_general;
1163: break;
1164:
1165: default:
1166: assert(0 && "Unknown header_state");
1167: break;
1168: }
1169: break;
1170: }
1171:
1172: if (ch == ':') {
1173: CALLBACK(header_field);
1174: state = s_header_value_start;
1175: break;
1176: }
1177:
1178: if (ch == CR) {
1179: state = s_header_almost_done;
1180: CALLBACK(header_field);
1181: break;
1182: }
1183:
1184: if (ch == LF) {
1185: CALLBACK(header_field);
1186: state = s_header_field_start;
1187: break;
1188: }
1189:
1190: goto error;
1191: }
1192:
1193: case s_header_value_start:
1194: {
1195: if (ch == ' ') break;
1196:
1197: MARK(header_value);
1198:
1199: state = s_header_value;
1200: index = 0;
1201:
1202: c = LOWER(ch);
1203:
1204: if (ch == CR) {
1205: CALLBACK(header_value);
1206: header_state = h_general;
1207: state = s_header_almost_done;
1208: break;
1209: }
1210:
1211: if (ch == LF) {
1212: CALLBACK(header_value);
1213: state = s_header_field_start;
1214: break;
1215: }
1216:
1217: switch (header_state) {
1218: case h_upgrade:
1219: parser->flags |= F_UPGRADE;
1220: header_state = h_general;
1221: break;
1222:
1223: case h_transfer_encoding:
1224: /* looking for 'Transfer-Encoding: chunked' */
1225: if ('c' == c) {
1226: header_state = h_matching_transfer_encoding_chunked;
1227: } else {
1228: header_state = h_general;
1229: }
1230: break;
1231:
1232: case h_content_length:
1233: if (ch < '0' || ch > '9') goto error;
1234: parser->content_length = ch - '0';
1235: break;
1236:
1237: case h_connection:
1238: /* looking for 'Connection: keep-alive' */
1239: if (c == 'k') {
1240: header_state = h_matching_connection_keep_alive;
1241: /* looking for 'Connection: close' */
1242: } else if (c == 'c') {
1243: header_state = h_matching_connection_close;
1244: } else {
1245: header_state = h_general;
1246: }
1247: break;
1248:
1249: default:
1250: header_state = h_general;
1251: break;
1252: }
1253: break;
1254: }
1255:
1256: case s_header_value:
1257: {
1258: c = LOWER(ch);
1259:
1260: if (ch == CR) {
1261: CALLBACK(header_value);
1262: state = s_header_almost_done;
1263: break;
1264: }
1265:
1266: if (ch == LF) {
1267: CALLBACK(header_value);
1268: goto header_almost_done;
1269: }
1270:
1271: switch (header_state) {
1272: case h_general:
1273: break;
1274:
1275: case h_connection:
1276: case h_transfer_encoding:
1277: assert(0 && "Shouldn't get here.");
1278: break;
1279:
1280: case h_content_length:
1281: if (ch == ' ') break;
1282: if (ch < '0' || ch > '9') goto error;
1283: parser->content_length *= 10;
1284: parser->content_length += ch - '0';
1285: break;
1286:
1287: /* Transfer-Encoding: chunked */
1288: case h_matching_transfer_encoding_chunked:
1289: index++;
1290: if (index > sizeof(CHUNKED)-1
1291: || c != CHUNKED[index]) {
1292: header_state = h_general;
1293: } else if (index == sizeof(CHUNKED)-2) {
1294: header_state = h_transfer_encoding_chunked;
1295: }
1296: break;
1297:
1298: /* looking for 'Connection: keep-alive' */
1299: case h_matching_connection_keep_alive:
1300: index++;
1301: if (index > sizeof(KEEP_ALIVE)-1
1302: || c != KEEP_ALIVE[index]) {
1303: header_state = h_general;
1304: } else if (index == sizeof(KEEP_ALIVE)-2) {
1305: header_state = h_connection_keep_alive;
1306: }
1307: break;
1308:
1309: /* looking for 'Connection: close' */
1310: case h_matching_connection_close:
1311: index++;
1312: if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1313: header_state = h_general;
1314: } else if (index == sizeof(CLOSE)-2) {
1315: header_state = h_connection_close;
1316: }
1317: break;
1318:
1319: case h_transfer_encoding_chunked:
1320: case h_connection_keep_alive:
1321: case h_connection_close:
1322: if (ch != ' ') header_state = h_general;
1323: break;
1324:
1325: default:
1326: state = s_header_value;
1327: header_state = h_general;
1328: break;
1329: }
1330: break;
1331: }
1332:
1333: case s_header_almost_done:
1334: header_almost_done:
1335: {
1336: STRICT_CHECK(ch != LF);
1337:
1338: state = s_header_field_start;
1339:
1340: switch (header_state) {
1341: case h_connection_keep_alive:
1342: parser->flags |= F_CONNECTION_KEEP_ALIVE;
1343: break;
1344: case h_connection_close:
1345: parser->flags |= F_CONNECTION_CLOSE;
1346: break;
1347: case h_transfer_encoding_chunked:
1348: parser->flags |= F_CHUNKED;
1349: break;
1350: default:
1351: break;
1352: }
1353: break;
1354: }
1355:
1356: case s_headers_almost_done:
1357: headers_almost_done:
1358: {
1359: STRICT_CHECK(ch != LF);
1360:
1361: if (parser->flags & F_TRAILING) {
1362: /* End of a chunked request */
1363: CALLBACK2(message_complete);
1364: state = NEW_MESSAGE();
1365: break;
1366: }
1367:
1368: nread = 0;
1369:
1370: if (parser->flags & F_UPGRADE || parser->method == PHP_HTTP_CONNECT) {
1371: parser->upgrade = 1;
1372: }
1373:
1374: /* Here we call the headers_complete callback. This is somewhat
1375: * different than other callbacks because if the user returns 1, we
1376: * will interpret that as saying that this message has no body. This
1377: * is needed for the annoying case of recieving a response to a HEAD
1378: * request.
1379: */
1380: if (settings->on_headers_complete) {
1381: switch (settings->on_headers_complete(parser)) {
1382: case 0:
1383: break;
1384:
1385: case 1:
1386: parser->flags |= F_SKIPBODY;
1387: break;
1388:
1389: default:
1390: return p - data; /* Error */
1391: }
1392: }
1393:
1394: /* Exit, the rest of the connect is in a different protocol. */
1395: if (parser->upgrade) {
1396: CALLBACK2(message_complete);
1397: return (p - data);
1398: }
1399:
1400: if (parser->flags & F_SKIPBODY) {
1401: CALLBACK2(message_complete);
1402: state = NEW_MESSAGE();
1403: } else if (parser->flags & F_CHUNKED) {
1404: /* chunked encoding - ignore Content-Length header */
1405: state = s_chunk_size_start;
1406: } else {
1407: if (parser->content_length == 0) {
1408: /* Content-Length header given but zero: Content-Length: 0\r\n */
1409: CALLBACK2(message_complete);
1410: state = NEW_MESSAGE();
1411: } else if (parser->content_length > 0) {
1412: /* Content-Length header given and non-zero */
1413: state = s_body_identity;
1414: } else {
1415: if (parser->type == PHP_HTTP_REQUEST || php_http_should_keep_alive(parser)) {
1416: /* Assume content-length 0 - read the next */
1417: CALLBACK2(message_complete);
1418: state = NEW_MESSAGE();
1419: } else {
1420: /* Read body until EOF */
1421: state = s_body_identity_eof;
1422: }
1423: }
1424: }
1425:
1426: break;
1427: }
1428:
1429: case s_body_identity:
1430: to_read = MIN(pe - p, (size_t)parser->content_length);
1431: if (to_read > 0) {
1432: if (settings->on_body) settings->on_body(parser, p, to_read);
1433: p += to_read - 1;
1434: parser->content_length -= to_read;
1435: if (parser->content_length == 0) {
1436: CALLBACK2(message_complete);
1437: state = NEW_MESSAGE();
1438: }
1439: }
1440: break;
1441:
1442: /* read until EOF */
1443: case s_body_identity_eof:
1444: to_read = pe - p;
1445: if (to_read > 0) {
1446: if (settings->on_body) settings->on_body(parser, p, to_read);
1447: p += to_read - 1;
1448: }
1449: break;
1450:
1451: case s_chunk_size_start:
1452: {
1453: assert(parser->flags & F_CHUNKED);
1454:
1455: c = unhex[(unsigned char)ch];
1456: if (c == -1) goto error;
1457: parser->content_length = c;
1458: state = s_chunk_size;
1459: break;
1460: }
1461:
1462: case s_chunk_size:
1463: {
1464: assert(parser->flags & F_CHUNKED);
1465:
1466: if (ch == CR) {
1467: state = s_chunk_size_almost_done;
1468: break;
1469: }
1470:
1471: c = unhex[(unsigned char)ch];
1472:
1473: if (c == -1) {
1474: if (ch == ';' || ch == ' ') {
1475: state = s_chunk_parameters;
1476: break;
1477: }
1478: goto error;
1479: }
1480:
1481: parser->content_length *= 16;
1482: parser->content_length += c;
1483: break;
1484: }
1485:
1486: case s_chunk_parameters:
1487: {
1488: assert(parser->flags & F_CHUNKED);
1489: /* just ignore this shit. TODO check for overflow */
1490: if (ch == CR) {
1491: state = s_chunk_size_almost_done;
1492: break;
1493: }
1494: break;
1495: }
1496:
1497: case s_chunk_size_almost_done:
1498: {
1499: assert(parser->flags & F_CHUNKED);
1500: STRICT_CHECK(ch != LF);
1501:
1502: if (parser->content_length == 0) {
1503: parser->flags |= F_TRAILING;
1504: state = s_header_field_start;
1505: } else {
1506: state = s_chunk_data;
1507: }
1508: break;
1509: }
1510:
1511: case s_chunk_data:
1512: {
1513: assert(parser->flags & F_CHUNKED);
1514:
1515: to_read = MIN(pe - p, (size_t)(parser->content_length));
1516:
1517: if (to_read > 0) {
1518: if (settings->on_body) settings->on_body(parser, p, to_read);
1519: p += to_read - 1;
1520: }
1521:
1522: if (to_read == parser->content_length) {
1523: state = s_chunk_data_almost_done;
1524: }
1525:
1526: parser->content_length -= to_read;
1527: break;
1528: }
1529:
1530: case s_chunk_data_almost_done:
1531: assert(parser->flags & F_CHUNKED);
1532: STRICT_CHECK(ch != CR);
1533: state = s_chunk_data_done;
1534: break;
1535:
1536: case s_chunk_data_done:
1537: assert(parser->flags & F_CHUNKED);
1538: STRICT_CHECK(ch != LF);
1539: state = s_chunk_size_start;
1540: break;
1541:
1542: default:
1543: assert(0 && "unhandled state");
1544: goto error;
1545: }
1546: }
1547:
1548: CALLBACK_NOCLEAR(header_field);
1549: CALLBACK_NOCLEAR(header_value);
1550: CALLBACK_NOCLEAR(fragment);
1551: CALLBACK_NOCLEAR(query_string);
1552: CALLBACK_NOCLEAR(path);
1553: CALLBACK_NOCLEAR(url);
1554:
1555: parser->state = state;
1556: parser->header_state = header_state;
1557: parser->index = index;
1558: parser->nread = nread;
1559:
1560: return len;
1561:
1562: error:
1563: parser->state = s_dead;
1564: return (p - data);
1565: }
1566:
1567:
1568: int
1569: php_http_should_keep_alive (php_http_parser *parser)
1570: {
1571: if (parser->http_major > 0 && parser->http_minor > 0) {
1572: /* HTTP/1.1 */
1573: if (parser->flags & F_CONNECTION_CLOSE) {
1574: return 0;
1575: } else {
1576: return 1;
1577: }
1578: } else {
1579: /* HTTP/1.0 or earlier */
1580: if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1581: return 1;
1582: } else {
1583: return 0;
1584: }
1585: }
1586: }
1587:
1588:
1589: const char * php_http_method_str (enum php_http_method m)
1590: {
1591: return method_strings[m];
1592: }
1593:
1594:
1595: void
1596: php_http_parser_init (php_http_parser *parser, enum php_http_parser_type t)
1597: {
1598: parser->type = t;
1599: parser->state = (t == PHP_HTTP_REQUEST ? s_start_req : (t == PHP_HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1600: parser->nread = 0;
1601: parser->upgrade = 0;
1602: parser->flags = 0;
1603: parser->method = 0;
1604: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>