/http-parser/http_parser.c

http://github.com/nicolasff/webdis · C · 1591 lines · 1265 code · 222 blank · 104 comment · 401 complexity · 905d5bb8563c67c64c366984ff508ae6 MD5 · raw file

  1. /* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to
  5. * deal in the Software without restriction, including without limitation the
  6. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7. * sell copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19. * IN THE SOFTWARE.
  20. */
  21. #include <http_parser.h>
  22. #include <assert.h>
  23. #include <stddef.h>
  24. #ifndef MIN
  25. # define MIN(a,b) ((a) < (b) ? (a) : (b))
  26. #endif
  27. #define CALLBACK2(FOR) \
  28. do { \
  29. if (settings->on_##FOR) { \
  30. if (0 != settings->on_##FOR(parser)) return (p - data); \
  31. } \
  32. } while (0)
  33. #define MARK(FOR) \
  34. do { \
  35. FOR##_mark = p; \
  36. } while (0)
  37. #define CALLBACK_NOCLEAR(FOR) \
  38. do { \
  39. if (FOR##_mark) { \
  40. if (settings->on_##FOR) { \
  41. if (0 != settings->on_##FOR(parser, \
  42. FOR##_mark, \
  43. p - FOR##_mark)) \
  44. { \
  45. return (p - data); \
  46. } \
  47. } \
  48. } \
  49. } while (0)
  50. #define CALLBACK(FOR) \
  51. do { \
  52. CALLBACK_NOCLEAR(FOR); \
  53. FOR##_mark = NULL; \
  54. } while (0)
  55. #define PROXY_CONNECTION "proxy-connection"
  56. #define CONNECTION "connection"
  57. #define CONTENT_LENGTH "content-length"
  58. #define TRANSFER_ENCODING "transfer-encoding"
  59. #define UPGRADE "upgrade"
  60. #define CHUNKED "chunked"
  61. #define KEEP_ALIVE "keep-alive"
  62. #define CLOSE "close"
  63. static const char *method_strings[] =
  64. { "DELETE"
  65. , "GET"
  66. , "HEAD"
  67. , "POST"
  68. , "PUT"
  69. , "CONNECT"
  70. , "OPTIONS"
  71. , "TRACE"
  72. , "COPY"
  73. , "LOCK"
  74. , "MKCOL"
  75. , "MOVE"
  76. , "PROPFIND"
  77. , "PROPPATCH"
  78. , "UNLOCK"
  79. , "REPORT"
  80. , "MKACTIVITY"
  81. , "CHECKOUT"
  82. , "MERGE"
  83. , "M-SEARCH"
  84. , "NOTIFY"
  85. , "SUBSCRIBE"
  86. , "UNSUBSCRIBE"
  87. };
  88. /* Tokens as defined by rfc 2616. Also lowercases them.
  89. * token = 1*<any CHAR except CTLs or separators>
  90. * separators = "(" | ")" | "<" | ">" | "@"
  91. * | "," | ";" | ":" | "\" | <">
  92. * | "/" | "[" | "]" | "?" | "="
  93. * | "{" | "}" | SP | HT
  94. */
  95. static const char tokens[256] = {
  96. /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
  97. 0, 0, 0, 0, 0, 0, 0, 0,
  98. /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
  99. 0, 0, 0, 0, 0, 0, 0, 0,
  100. /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
  101. 0, 0, 0, 0, 0, 0, 0, 0,
  102. /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
  103. 0, 0, 0, 0, 0, 0, 0, 0,
  104. /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
  105. ' ', '!', '"', '#', '$', '%', '&', '\'',
  106. /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
  107. 0, 0, '*', '+', 0, '-', '.', '/',
  108. /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
  109. '0', '1', '2', '3', '4', '5', '6', '7',
  110. /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
  111. '8', '9', 0, 0, 0, 0, 0, 0,
  112. /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
  113. 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
  114. /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
  115. 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  116. /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
  117. 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
  118. /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
  119. 'x', 'y', 'z', 0, 0, 0, '^', '_',
  120. /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
  121. '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
  122. /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
  123. 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  124. /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
  125. 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
  126. /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
  127. 'x', 'y', 'z', 0, '|', '}', '~', 0 };
  128. static const int8_t unhex[256] =
  129. {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  130. ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  131. ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  132. , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
  133. ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
  134. ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  135. ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
  136. ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
  137. };
  138. static const uint8_t normal_url_char[256] = {
  139. /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
  140. 0, 0, 0, 0, 0, 0, 0, 0,
  141. /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
  142. 0, 0, 0, 0, 0, 0, 0, 0,
  143. /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
  144. 0, 0, 0, 0, 0, 0, 0, 0,
  145. /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
  146. 0, 0, 0, 0, 0, 0, 0, 0,
  147. /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
  148. 0, 1, 1, 0, 1, 1, 1, 1,
  149. /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
  150. 1, 1, 1, 1, 1, 1, 1, 1,
  151. /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
  152. 1, 1, 1, 1, 1, 1, 1, 1,
  153. /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
  154. 1, 1, 1, 1, 1, 1, 1, 0,
  155. /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
  156. 1, 1, 1, 1, 1, 1, 1, 1,
  157. /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
  158. 1, 1, 1, 1, 1, 1, 1, 1,
  159. /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
  160. 1, 1, 1, 1, 1, 1, 1, 1,
  161. /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
  162. 1, 1, 1, 1, 1, 1, 1, 1,
  163. /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
  164. 1, 1, 1, 1, 1, 1, 1, 1,
  165. /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
  166. 1, 1, 1, 1, 1, 1, 1, 1,
  167. /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
  168. 1, 1, 1, 1, 1, 1, 1, 1,
  169. /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
  170. 1, 1, 1, 1, 1, 1, 1, 0 };
  171. enum state
  172. { s_dead = 1 /* important that this is > 0 */
  173. , s_start_req_or_res
  174. , s_res_or_resp_H
  175. , s_start_res
  176. , s_res_H
  177. , s_res_HT
  178. , s_res_HTT
  179. , s_res_HTTP
  180. , s_res_first_http_major
  181. , s_res_http_major
  182. , s_res_first_http_minor
  183. , s_res_http_minor
  184. , s_res_first_status_code
  185. , s_res_status_code
  186. , s_res_status
  187. , s_res_line_almost_done
  188. , s_start_req
  189. , s_req_method
  190. , s_req_spaces_before_url
  191. , s_req_schema
  192. , s_req_schema_slash
  193. , s_req_schema_slash_slash
  194. , s_req_host
  195. , s_req_port
  196. , s_req_path
  197. , s_req_query_string_start
  198. , s_req_query_string
  199. , s_req_fragment_start
  200. , s_req_fragment
  201. , s_req_http_start
  202. , s_req_http_H
  203. , s_req_http_HT
  204. , s_req_http_HTT
  205. , s_req_http_HTTP
  206. , s_req_first_http_major
  207. , s_req_http_major
  208. , s_req_first_http_minor
  209. , s_req_http_minor
  210. , s_req_line_almost_done
  211. , s_header_field_start
  212. , s_header_field
  213. , s_header_value_start
  214. , s_header_value
  215. , s_header_almost_done
  216. , s_headers_almost_done
  217. /* Important: 's_headers_almost_done' must be the last 'header' state. All
  218. * states beyond this must be 'body' states. It is used for overflow
  219. * checking. See the PARSING_HEADER() macro.
  220. */
  221. , s_chunk_size_start
  222. , s_chunk_size
  223. , s_chunk_size_almost_done
  224. , s_chunk_parameters
  225. , s_chunk_data
  226. , s_chunk_data_almost_done
  227. , s_chunk_data_done
  228. , s_body_identity
  229. , s_body_identity_eof
  230. };
  231. #define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING))
  232. enum header_states
  233. { h_general = 0
  234. , h_C
  235. , h_CO
  236. , h_CON
  237. , h_matching_connection
  238. , h_matching_proxy_connection
  239. , h_matching_content_length
  240. , h_matching_transfer_encoding
  241. , h_matching_upgrade
  242. , h_connection
  243. , h_content_length
  244. , h_transfer_encoding
  245. , h_upgrade
  246. , h_matching_transfer_encoding_chunked
  247. , h_matching_connection_keep_alive
  248. , h_matching_connection_close
  249. , h_transfer_encoding_chunked
  250. , h_connection_keep_alive
  251. , h_connection_close
  252. };
  253. #define CR '\r'
  254. #define LF '\n'
  255. #define LOWER(c) (unsigned char)(c | 0x20)
  256. #define TOKEN(c) tokens[(unsigned char)c]
  257. #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
  258. #if HTTP_PARSER_STRICT
  259. # define STRICT_CHECK(cond) if (cond) goto error
  260. # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
  261. #else
  262. # define STRICT_CHECK(cond)
  263. # define NEW_MESSAGE() start_state
  264. #endif
  265. size_t http_parser_execute (http_parser *parser,
  266. const http_parser_settings *settings,
  267. const char *data,
  268. size_t len)
  269. {
  270. char c, ch;
  271. const char *p = data, *pe;
  272. int64_t to_read;
  273. enum state state = (enum state) parser->state;
  274. enum header_states header_state = (enum header_states) parser->header_state;
  275. uint64_t index = parser->index;
  276. uint64_t nread = parser->nread;
  277. if (len == 0) {
  278. if (state == s_body_identity_eof) {
  279. CALLBACK2(message_complete);
  280. }
  281. return 0;
  282. }
  283. /* technically we could combine all of these (except for url_mark) into one
  284. variable, saving stack space, but it seems more clear to have them
  285. separated. */
  286. const char *header_field_mark = 0;
  287. const char *header_value_mark = 0;
  288. const char *fragment_mark = 0;
  289. const char *query_string_mark = 0;
  290. const char *path_mark = 0;
  291. const char *url_mark = 0;
  292. if (state == s_header_field)
  293. header_field_mark = data;
  294. if (state == s_header_value)
  295. header_value_mark = data;
  296. if (state == s_req_fragment)
  297. fragment_mark = data;
  298. if (state == s_req_query_string)
  299. query_string_mark = data;
  300. if (state == s_req_path)
  301. path_mark = data;
  302. if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
  303. || state == s_req_schema_slash_slash || state == s_req_port
  304. || state == s_req_query_string_start || state == s_req_query_string
  305. || state == s_req_host
  306. || state == s_req_fragment_start || state == s_req_fragment)
  307. url_mark = data;
  308. for (p=data, pe=data+len; p != pe; p++) {
  309. ch = *p;
  310. if (PARSING_HEADER(state)) {
  311. ++nread;
  312. /* Buffer overflow attack */
  313. if (nread > HTTP_MAX_HEADER_SIZE) goto error;
  314. }
  315. switch (state) {
  316. case s_dead:
  317. /* this state is used after a 'Connection: close' message
  318. * the parser will error out if it reads another message
  319. */
  320. goto error;
  321. case s_start_req_or_res:
  322. {
  323. if (ch == CR || ch == LF)
  324. break;
  325. parser->flags = 0;
  326. parser->content_length = -1;
  327. CALLBACK2(message_begin);
  328. if (ch == 'H')
  329. state = s_res_or_resp_H;
  330. else {
  331. parser->type = HTTP_REQUEST;
  332. goto start_req_method_assign;
  333. }
  334. break;
  335. }
  336. case s_res_or_resp_H:
  337. if (ch == 'T') {
  338. parser->type = HTTP_RESPONSE;
  339. state = s_res_HT;
  340. } else {
  341. if (ch != 'E') goto error;
  342. parser->type = HTTP_REQUEST;
  343. parser->method = HTTP_HEAD;
  344. index = 2;
  345. state = s_req_method;
  346. }
  347. break;
  348. case s_start_res:
  349. {
  350. parser->flags = 0;
  351. parser->content_length = -1;
  352. CALLBACK2(message_begin);
  353. switch (ch) {
  354. case 'H':
  355. state = s_res_H;
  356. break;
  357. case CR:
  358. case LF:
  359. break;
  360. default:
  361. goto error;
  362. }
  363. break;
  364. }
  365. case s_res_H:
  366. STRICT_CHECK(ch != 'T');
  367. state = s_res_HT;
  368. break;
  369. case s_res_HT:
  370. STRICT_CHECK(ch != 'T');
  371. state = s_res_HTT;
  372. break;
  373. case s_res_HTT:
  374. STRICT_CHECK(ch != 'P');
  375. state = s_res_HTTP;
  376. break;
  377. case s_res_HTTP:
  378. STRICT_CHECK(ch != '/');
  379. state = s_res_first_http_major;
  380. break;
  381. case s_res_first_http_major:
  382. if (ch < '1' || ch > '9') goto error;
  383. parser->http_major = ch - '0';
  384. state = s_res_http_major;
  385. break;
  386. /* major HTTP version or dot */
  387. case s_res_http_major:
  388. {
  389. if (ch == '.') {
  390. state = s_res_first_http_minor;
  391. break;
  392. }
  393. if (ch < '0' || ch > '9') goto error;
  394. parser->http_major *= 10;
  395. parser->http_major += ch - '0';
  396. if (parser->http_major > 999) goto error;
  397. break;
  398. }
  399. /* first digit of minor HTTP version */
  400. case s_res_first_http_minor:
  401. if (ch < '0' || ch > '9') goto error;
  402. parser->http_minor = ch - '0';
  403. state = s_res_http_minor;
  404. break;
  405. /* minor HTTP version or end of request line */
  406. case s_res_http_minor:
  407. {
  408. if (ch == ' ') {
  409. state = s_res_first_status_code;
  410. break;
  411. }
  412. if (ch < '0' || ch > '9') goto error;
  413. parser->http_minor *= 10;
  414. parser->http_minor += ch - '0';
  415. if (parser->http_minor > 999) goto error;
  416. break;
  417. }
  418. case s_res_first_status_code:
  419. {
  420. if (ch < '0' || ch > '9') {
  421. if (ch == ' ') {
  422. break;
  423. }
  424. goto error;
  425. }
  426. parser->status_code = ch - '0';
  427. state = s_res_status_code;
  428. break;
  429. }
  430. case s_res_status_code:
  431. {
  432. if (ch < '0' || ch > '9') {
  433. switch (ch) {
  434. case ' ':
  435. state = s_res_status;
  436. break;
  437. case CR:
  438. state = s_res_line_almost_done;
  439. break;
  440. case LF:
  441. state = s_header_field_start;
  442. break;
  443. default:
  444. goto error;
  445. }
  446. break;
  447. }
  448. parser->status_code *= 10;
  449. parser->status_code += ch - '0';
  450. if (parser->status_code > 999) goto error;
  451. break;
  452. }
  453. case s_res_status:
  454. /* the human readable status. e.g. "NOT FOUND"
  455. * we are not humans so just ignore this */
  456. if (ch == CR) {
  457. state = s_res_line_almost_done;
  458. break;
  459. }
  460. if (ch == LF) {
  461. state = s_header_field_start;
  462. break;
  463. }
  464. break;
  465. case s_res_line_almost_done:
  466. STRICT_CHECK(ch != LF);
  467. state = s_header_field_start;
  468. break;
  469. case s_start_req:
  470. {
  471. if (ch == CR || ch == LF)
  472. break;
  473. parser->flags = 0;
  474. parser->content_length = -1;
  475. CALLBACK2(message_begin);
  476. if (ch < 'A' || 'Z' < ch) goto error;
  477. start_req_method_assign:
  478. parser->method = (enum http_method) 0;
  479. index = 1;
  480. switch (ch) {
  481. case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
  482. case 'D': parser->method = HTTP_DELETE; break;
  483. case 'G': parser->method = HTTP_GET; break;
  484. case 'H': parser->method = HTTP_HEAD; break;
  485. case 'L': parser->method = HTTP_LOCK; break;
  486. case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
  487. case 'N': parser->method = HTTP_NOTIFY; break;
  488. case 'O': parser->method = HTTP_OPTIONS; break;
  489. case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
  490. case 'R': parser->method = HTTP_REPORT; break;
  491. case 'S': parser->method = HTTP_SUBSCRIBE; break;
  492. case 'T': parser->method = HTTP_TRACE; break;
  493. case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
  494. default: goto error;
  495. }
  496. state = s_req_method;
  497. break;
  498. }
  499. case s_req_method:
  500. {
  501. if (ch == '\0')
  502. goto error;
  503. const char *matcher = method_strings[parser->method];
  504. if (ch == ' ' && matcher[index] == '\0') {
  505. state = s_req_spaces_before_url;
  506. } else if (ch == matcher[index]) {
  507. ; /* nada */
  508. } else if (parser->method == HTTP_CONNECT) {
  509. if (index == 1 && ch == 'H') {
  510. parser->method = HTTP_CHECKOUT;
  511. } else if (index == 2 && ch == 'P') {
  512. parser->method = HTTP_COPY;
  513. }
  514. } else if (parser->method == HTTP_MKCOL) {
  515. if (index == 1 && ch == 'O') {
  516. parser->method = HTTP_MOVE;
  517. } else if (index == 1 && ch == 'E') {
  518. parser->method = HTTP_MERGE;
  519. } else if (index == 1 && ch == '-') {
  520. parser->method = HTTP_MSEARCH;
  521. } else if (index == 2 && ch == 'A') {
  522. parser->method = HTTP_MKACTIVITY;
  523. }
  524. } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
  525. parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
  526. } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
  527. parser->method = HTTP_PUT;
  528. } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
  529. parser->method = HTTP_UNSUBSCRIBE;
  530. } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
  531. parser->method = HTTP_PROPPATCH;
  532. } else {
  533. goto error;
  534. }
  535. ++index;
  536. break;
  537. }
  538. case s_req_spaces_before_url:
  539. {
  540. if (ch == ' ') break;
  541. if (ch == '/' || ch == '*') {
  542. MARK(url);
  543. MARK(path);
  544. state = s_req_path;
  545. break;
  546. }
  547. c = LOWER(ch);
  548. if (c >= 'a' && c <= 'z') {
  549. MARK(url);
  550. state = s_req_schema;
  551. break;
  552. }
  553. goto error;
  554. }
  555. case s_req_schema:
  556. {
  557. c = LOWER(ch);
  558. if (c >= 'a' && c <= 'z') break;
  559. if (ch == ':') {
  560. state = s_req_schema_slash;
  561. break;
  562. } else if (ch == '.') {
  563. state = s_req_host;
  564. break;
  565. } else if ('0' <= ch && ch <= '9') {
  566. state = s_req_host;
  567. break;
  568. }
  569. goto error;
  570. }
  571. case s_req_schema_slash:
  572. STRICT_CHECK(ch != '/');
  573. state = s_req_schema_slash_slash;
  574. break;
  575. case s_req_schema_slash_slash:
  576. STRICT_CHECK(ch != '/');
  577. state = s_req_host;
  578. break;
  579. case s_req_host:
  580. {
  581. c = LOWER(ch);
  582. if (c >= 'a' && c <= 'z') break;
  583. if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
  584. switch (ch) {
  585. case ':':
  586. state = s_req_port;
  587. break;
  588. case '/':
  589. MARK(path);
  590. state = s_req_path;
  591. break;
  592. case ' ':
  593. /* The request line looks like:
  594. * "GET http://foo.bar.com HTTP/1.1"
  595. * That is, there is no path.
  596. */
  597. CALLBACK(url);
  598. state = s_req_http_start;
  599. break;
  600. default:
  601. goto error;
  602. }
  603. break;
  604. }
  605. case s_req_port:
  606. {
  607. if (ch >= '0' && ch <= '9') break;
  608. switch (ch) {
  609. case '/':
  610. MARK(path);
  611. state = s_req_path;
  612. break;
  613. case ' ':
  614. /* The request line looks like:
  615. * "GET http://foo.bar.com:1234 HTTP/1.1"
  616. * That is, there is no path.
  617. */
  618. CALLBACK(url);
  619. state = s_req_http_start;
  620. break;
  621. default:
  622. goto error;
  623. }
  624. break;
  625. }
  626. case s_req_path:
  627. {
  628. if (normal_url_char[(unsigned char)ch]) break;
  629. switch (ch) {
  630. case ' ':
  631. CALLBACK(url);
  632. CALLBACK(path);
  633. state = s_req_http_start;
  634. break;
  635. case CR:
  636. CALLBACK(url);
  637. CALLBACK(path);
  638. parser->http_major = 0;
  639. parser->http_minor = 9;
  640. state = s_req_line_almost_done;
  641. break;
  642. case LF:
  643. CALLBACK(url);
  644. CALLBACK(path);
  645. parser->http_major = 0;
  646. parser->http_minor = 9;
  647. state = s_header_field_start;
  648. break;
  649. case '?':
  650. CALLBACK(path);
  651. state = s_req_query_string_start;
  652. break;
  653. case '#':
  654. CALLBACK(path);
  655. state = s_req_fragment_start;
  656. break;
  657. default:
  658. goto error;
  659. }
  660. break;
  661. }
  662. case s_req_query_string_start:
  663. {
  664. if (normal_url_char[(unsigned char)ch]) {
  665. MARK(query_string);
  666. state = s_req_query_string;
  667. break;
  668. }
  669. switch (ch) {
  670. case '?':
  671. break; /* XXX ignore extra '?' ... is this right? */
  672. case ' ':
  673. CALLBACK(url);
  674. state = s_req_http_start;
  675. break;
  676. case CR:
  677. CALLBACK(url);
  678. parser->http_major = 0;
  679. parser->http_minor = 9;
  680. state = s_req_line_almost_done;
  681. break;
  682. case LF:
  683. CALLBACK(url);
  684. parser->http_major = 0;
  685. parser->http_minor = 9;
  686. state = s_header_field_start;
  687. break;
  688. case '#':
  689. state = s_req_fragment_start;
  690. break;
  691. default:
  692. goto error;
  693. }
  694. break;
  695. }
  696. case s_req_query_string:
  697. {
  698. if (normal_url_char[(unsigned char)ch]) break;
  699. switch (ch) {
  700. case '?':
  701. /* allow extra '?' in query string */
  702. break;
  703. case ' ':
  704. CALLBACK(url);
  705. CALLBACK(query_string);
  706. state = s_req_http_start;
  707. break;
  708. case CR:
  709. CALLBACK(url);
  710. CALLBACK(query_string);
  711. parser->http_major = 0;
  712. parser->http_minor = 9;
  713. state = s_req_line_almost_done;
  714. break;
  715. case LF:
  716. CALLBACK(url);
  717. CALLBACK(query_string);
  718. parser->http_major = 0;
  719. parser->http_minor = 9;
  720. state = s_header_field_start;
  721. break;
  722. case '#':
  723. CALLBACK(query_string);
  724. state = s_req_fragment_start;
  725. break;
  726. default:
  727. goto error;
  728. }
  729. break;
  730. }
  731. case s_req_fragment_start:
  732. {
  733. if (normal_url_char[(unsigned char)ch]) {
  734. MARK(fragment);
  735. state = s_req_fragment;
  736. break;
  737. }
  738. switch (ch) {
  739. case ' ':
  740. CALLBACK(url);
  741. state = s_req_http_start;
  742. break;
  743. case CR:
  744. CALLBACK(url);
  745. parser->http_major = 0;
  746. parser->http_minor = 9;
  747. state = s_req_line_almost_done;
  748. break;
  749. case LF:
  750. CALLBACK(url);
  751. parser->http_major = 0;
  752. parser->http_minor = 9;
  753. state = s_header_field_start;
  754. break;
  755. case '?':
  756. MARK(fragment);
  757. state = s_req_fragment;
  758. break;
  759. case '#':
  760. break;
  761. default:
  762. goto error;
  763. }
  764. break;
  765. }
  766. case s_req_fragment:
  767. {
  768. if (normal_url_char[(unsigned char)ch]) break;
  769. switch (ch) {
  770. case ' ':
  771. CALLBACK(url);
  772. CALLBACK(fragment);
  773. state = s_req_http_start;
  774. break;
  775. case CR:
  776. CALLBACK(url);
  777. CALLBACK(fragment);
  778. parser->http_major = 0;
  779. parser->http_minor = 9;
  780. state = s_req_line_almost_done;
  781. break;
  782. case LF:
  783. CALLBACK(url);
  784. CALLBACK(fragment);
  785. parser->http_major = 0;
  786. parser->http_minor = 9;
  787. state = s_header_field_start;
  788. break;
  789. case '?':
  790. case '#':
  791. break;
  792. default:
  793. goto error;
  794. }
  795. break;
  796. }
  797. case s_req_http_start:
  798. switch (ch) {
  799. case 'H':
  800. state = s_req_http_H;
  801. break;
  802. case ' ':
  803. break;
  804. default:
  805. goto error;
  806. }
  807. break;
  808. case s_req_http_H:
  809. STRICT_CHECK(ch != 'T');
  810. state = s_req_http_HT;
  811. break;
  812. case s_req_http_HT:
  813. STRICT_CHECK(ch != 'T');
  814. state = s_req_http_HTT;
  815. break;
  816. case s_req_http_HTT:
  817. STRICT_CHECK(ch != 'P');
  818. state = s_req_http_HTTP;
  819. break;
  820. case s_req_http_HTTP:
  821. STRICT_CHECK(ch != '/');
  822. state = s_req_first_http_major;
  823. break;
  824. /* first digit of major HTTP version */
  825. case s_req_first_http_major:
  826. if (ch < '1' || ch > '9') goto error;
  827. parser->http_major = ch - '0';
  828. state = s_req_http_major;
  829. break;
  830. /* major HTTP version or dot */
  831. case s_req_http_major:
  832. {
  833. if (ch == '.') {
  834. state = s_req_first_http_minor;
  835. break;
  836. }
  837. if (ch < '0' || ch > '9') goto error;
  838. parser->http_major *= 10;
  839. parser->http_major += ch - '0';
  840. if (parser->http_major > 999) goto error;
  841. break;
  842. }
  843. /* first digit of minor HTTP version */
  844. case s_req_first_http_minor:
  845. if (ch < '0' || ch > '9') goto error;
  846. parser->http_minor = ch - '0';
  847. state = s_req_http_minor;
  848. break;
  849. /* minor HTTP version or end of request line */
  850. case s_req_http_minor:
  851. {
  852. if (ch == CR) {
  853. state = s_req_line_almost_done;
  854. break;
  855. }
  856. if (ch == LF) {
  857. state = s_header_field_start;
  858. break;
  859. }
  860. /* XXX allow spaces after digit? */
  861. if (ch < '0' || ch > '9') goto error;
  862. parser->http_minor *= 10;
  863. parser->http_minor += ch - '0';
  864. if (parser->http_minor > 999) goto error;
  865. break;
  866. }
  867. /* end of request line */
  868. case s_req_line_almost_done:
  869. {
  870. if (ch != LF) goto error;
  871. state = s_header_field_start;
  872. break;
  873. }
  874. case s_header_field_start:
  875. {
  876. if (ch == CR) {
  877. state = s_headers_almost_done;
  878. break;
  879. }
  880. if (ch == LF) {
  881. /* they might be just sending \n instead of \r\n so this would be
  882. * the second \n to denote the end of headers*/
  883. state = s_headers_almost_done;
  884. goto headers_almost_done;
  885. }
  886. c = TOKEN(ch);
  887. if (!c) goto error;
  888. MARK(header_field);
  889. index = 0;
  890. state = s_header_field;
  891. switch (c) {
  892. case 'c':
  893. header_state = h_C;
  894. break;
  895. case 'p':
  896. header_state = h_matching_proxy_connection;
  897. break;
  898. case 't':
  899. header_state = h_matching_transfer_encoding;
  900. break;
  901. case 'u':
  902. header_state = h_matching_upgrade;
  903. break;
  904. default:
  905. header_state = h_general;
  906. break;
  907. }
  908. break;
  909. }
  910. case s_header_field:
  911. {
  912. c = TOKEN(ch);
  913. if (c) {
  914. switch (header_state) {
  915. case h_general:
  916. break;
  917. case h_C:
  918. index++;
  919. header_state = (c == 'o' ? h_CO : h_general);
  920. break;
  921. case h_CO:
  922. index++;
  923. header_state = (c == 'n' ? h_CON : h_general);
  924. break;
  925. case h_CON:
  926. index++;
  927. switch (c) {
  928. case 'n':
  929. header_state = h_matching_connection;
  930. break;
  931. case 't':
  932. header_state = h_matching_content_length;
  933. break;
  934. default:
  935. header_state = h_general;
  936. break;
  937. }
  938. break;
  939. /* connection */
  940. case h_matching_connection:
  941. index++;
  942. if (index > sizeof(CONNECTION)-1
  943. || c != CONNECTION[index]) {
  944. header_state = h_general;
  945. } else if (index == sizeof(CONNECTION)-2) {
  946. header_state = h_connection;
  947. }
  948. break;
  949. /* proxy-connection */
  950. case h_matching_proxy_connection:
  951. index++;
  952. if (index > sizeof(PROXY_CONNECTION)-1
  953. || c != PROXY_CONNECTION[index]) {
  954. header_state = h_general;
  955. } else if (index == sizeof(PROXY_CONNECTION)-2) {
  956. header_state = h_connection;
  957. }
  958. break;
  959. /* content-length */
  960. case h_matching_content_length:
  961. index++;
  962. if (index > sizeof(CONTENT_LENGTH)-1
  963. || c != CONTENT_LENGTH[index]) {
  964. header_state = h_general;
  965. } else if (index == sizeof(CONTENT_LENGTH)-2) {
  966. header_state = h_content_length;
  967. }
  968. break;
  969. /* transfer-encoding */
  970. case h_matching_transfer_encoding:
  971. index++;
  972. if (index > sizeof(TRANSFER_ENCODING)-1
  973. || c != TRANSFER_ENCODING[index]) {
  974. header_state = h_general;
  975. } else if (index == sizeof(TRANSFER_ENCODING)-2) {
  976. header_state = h_transfer_encoding;
  977. }
  978. break;
  979. /* upgrade */
  980. case h_matching_upgrade:
  981. index++;
  982. if (index > sizeof(UPGRADE)-1
  983. || c != UPGRADE[index]) {
  984. header_state = h_general;
  985. } else if (index == sizeof(UPGRADE)-2) {
  986. header_state = h_upgrade;
  987. }
  988. break;
  989. case h_connection:
  990. case h_content_length:
  991. case h_transfer_encoding:
  992. case h_upgrade:
  993. if (ch != ' ') header_state = h_general;
  994. break;
  995. default:
  996. assert(0 && "Unknown header_state");
  997. break;
  998. }
  999. break;
  1000. }
  1001. if (ch == ':') {
  1002. CALLBACK(header_field);
  1003. state = s_header_value_start;
  1004. break;
  1005. }
  1006. if (ch == CR) {
  1007. state = s_header_almost_done;
  1008. CALLBACK(header_field);
  1009. break;
  1010. }
  1011. if (ch == LF) {
  1012. CALLBACK(header_field);
  1013. state = s_header_field_start;
  1014. break;
  1015. }
  1016. goto error;
  1017. }
  1018. case s_header_value_start:
  1019. {
  1020. if (ch == ' ') break;
  1021. MARK(header_value);
  1022. state = s_header_value;
  1023. index = 0;
  1024. c = LOWER(ch);
  1025. if (ch == CR) {
  1026. CALLBACK(header_value);
  1027. header_state = h_general;
  1028. state = s_header_almost_done;
  1029. break;
  1030. }
  1031. if (ch == LF) {
  1032. CALLBACK(header_value);
  1033. state = s_header_field_start;
  1034. break;
  1035. }
  1036. switch (header_state) {
  1037. case h_upgrade:
  1038. parser->flags |= F_UPGRADE;
  1039. header_state = h_general;
  1040. break;
  1041. case h_transfer_encoding:
  1042. /* looking for 'Transfer-Encoding: chunked' */
  1043. if ('c' == c) {
  1044. header_state = h_matching_transfer_encoding_chunked;
  1045. } else {
  1046. header_state = h_general;
  1047. }
  1048. break;
  1049. case h_content_length:
  1050. if (ch < '0' || ch > '9') goto error;
  1051. parser->content_length = ch - '0';
  1052. break;
  1053. case h_connection:
  1054. /* looking for 'Connection: keep-alive' */
  1055. if (c == 'k') {
  1056. header_state = h_matching_connection_keep_alive;
  1057. /* looking for 'Connection: close' */
  1058. } else if (c == 'c') {
  1059. header_state = h_matching_connection_close;
  1060. } else {
  1061. header_state = h_general;
  1062. }
  1063. break;
  1064. default:
  1065. header_state = h_general;
  1066. break;
  1067. }
  1068. break;
  1069. }
  1070. case s_header_value:
  1071. {
  1072. c = LOWER(ch);
  1073. if (ch == CR) {
  1074. CALLBACK(header_value);
  1075. state = s_header_almost_done;
  1076. break;
  1077. }
  1078. if (ch == LF) {
  1079. CALLBACK(header_value);
  1080. goto header_almost_done;
  1081. }
  1082. switch (header_state) {
  1083. case h_general:
  1084. break;
  1085. case h_connection:
  1086. case h_transfer_encoding:
  1087. assert(0 && "Shouldn't get here.");
  1088. break;
  1089. case h_content_length:
  1090. if (ch == ' ') break;
  1091. if (ch < '0' || ch > '9') goto error;
  1092. parser->content_length *= 10;
  1093. parser->content_length += ch - '0';
  1094. break;
  1095. /* Transfer-Encoding: chunked */
  1096. case h_matching_transfer_encoding_chunked:
  1097. index++;
  1098. if (index > sizeof(CHUNKED)-1
  1099. || c != CHUNKED[index]) {
  1100. header_state = h_general;
  1101. } else if (index == sizeof(CHUNKED)-2) {
  1102. header_state = h_transfer_encoding_chunked;
  1103. }
  1104. break;
  1105. /* looking for 'Connection: keep-alive' */
  1106. case h_matching_connection_keep_alive:
  1107. index++;
  1108. if (index > sizeof(KEEP_ALIVE)-1
  1109. || c != KEEP_ALIVE[index]) {
  1110. header_state = h_general;
  1111. } else if (index == sizeof(KEEP_ALIVE)-2) {
  1112. header_state = h_connection_keep_alive;
  1113. }
  1114. break;
  1115. /* looking for 'Connection: close' */
  1116. case h_matching_connection_close:
  1117. index++;
  1118. if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
  1119. header_state = h_general;
  1120. } else if (index == sizeof(CLOSE)-2) {
  1121. header_state = h_connection_close;
  1122. }
  1123. break;
  1124. case h_transfer_encoding_chunked:
  1125. case h_connection_keep_alive:
  1126. case h_connection_close:
  1127. if (ch != ' ') header_state = h_general;
  1128. break;
  1129. default:
  1130. state = s_header_value;
  1131. header_state = h_general;
  1132. break;
  1133. }
  1134. break;
  1135. }
  1136. case s_header_almost_done:
  1137. header_almost_done:
  1138. {
  1139. STRICT_CHECK(ch != LF);
  1140. state = s_header_field_start;
  1141. switch (header_state) {
  1142. case h_connection_keep_alive:
  1143. parser->flags |= F_CONNECTION_KEEP_ALIVE;
  1144. break;
  1145. case h_connection_close:
  1146. parser->flags |= F_CONNECTION_CLOSE;
  1147. break;
  1148. case h_transfer_encoding_chunked:
  1149. parser->flags |= F_CHUNKED;
  1150. break;
  1151. default:
  1152. break;
  1153. }
  1154. break;
  1155. }
  1156. case s_headers_almost_done:
  1157. headers_almost_done:
  1158. {
  1159. STRICT_CHECK(ch != LF);
  1160. if (parser->flags & F_TRAILING) {
  1161. /* End of a chunked request */
  1162. CALLBACK2(message_complete);
  1163. state = NEW_MESSAGE();
  1164. break;
  1165. }
  1166. nread = 0;
  1167. if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
  1168. parser->upgrade = 1;
  1169. }
  1170. /* Here we call the headers_complete callback. This is somewhat
  1171. * different than other callbacks because if the user returns 1, we
  1172. * will interpret that as saying that this message has no body. This
  1173. * is needed for the annoying case of recieving a response to a HEAD
  1174. * request.
  1175. */
  1176. if (settings->on_headers_complete) {
  1177. switch (settings->on_headers_complete(parser)) {
  1178. case 0:
  1179. break;
  1180. case 1:
  1181. parser->flags |= F_SKIPBODY;
  1182. break;
  1183. default:
  1184. return p - data; /* Error */
  1185. }
  1186. }
  1187. /* Exit, the rest of the connect is in a different protocol. */
  1188. if (parser->upgrade) {
  1189. CALLBACK2(message_complete);
  1190. return (p - data);
  1191. }
  1192. if (parser->flags & F_SKIPBODY) {
  1193. CALLBACK2(message_complete);
  1194. state = NEW_MESSAGE();
  1195. } else if (parser->flags & F_CHUNKED) {
  1196. /* chunked encoding - ignore Content-Length header */
  1197. state = s_chunk_size_start;
  1198. } else {
  1199. if (parser->content_length == 0) {
  1200. /* Content-Length header given but zero: Content-Length: 0\r\n */
  1201. CALLBACK2(message_complete);
  1202. state = NEW_MESSAGE();
  1203. } else if (parser->content_length > 0) {
  1204. /* Content-Length header given and non-zero */
  1205. state = s_body_identity;
  1206. } else {
  1207. if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
  1208. /* Assume content-length 0 - read the next */
  1209. CALLBACK2(message_complete);
  1210. state = NEW_MESSAGE();
  1211. } else {
  1212. /* Read body until EOF */
  1213. state = s_body_identity_eof;
  1214. }
  1215. }
  1216. }
  1217. break;
  1218. }
  1219. case s_body_identity:
  1220. to_read = MIN(pe - p, (int64_t)parser->content_length);
  1221. if (to_read > 0) {
  1222. if (settings->on_body) settings->on_body(parser, p, to_read);
  1223. p += to_read - 1;
  1224. parser->content_length -= to_read;
  1225. if (parser->content_length == 0) {
  1226. CALLBACK2(message_complete);
  1227. state = NEW_MESSAGE();
  1228. }
  1229. }
  1230. break;
  1231. /* read until EOF */
  1232. case s_body_identity_eof:
  1233. to_read = pe - p;
  1234. if (to_read > 0) {
  1235. if (settings->on_body) settings->on_body(parser, p, to_read);
  1236. p += to_read - 1;
  1237. }
  1238. break;
  1239. case s_chunk_size_start:
  1240. {
  1241. assert(parser->flags & F_CHUNKED);
  1242. c = unhex[(unsigned char)ch];
  1243. if (c == -1) goto error;
  1244. parser->content_length = c;
  1245. state = s_chunk_size;
  1246. break;
  1247. }
  1248. case s_chunk_size:
  1249. {
  1250. assert(parser->flags & F_CHUNKED);
  1251. if (ch == CR) {
  1252. state = s_chunk_size_almost_done;
  1253. break;
  1254. }
  1255. c = unhex[(unsigned char)ch];
  1256. if (c == -1) {
  1257. if (ch == ';' || ch == ' ') {
  1258. state = s_chunk_parameters;
  1259. break;
  1260. }
  1261. goto error;
  1262. }
  1263. parser->content_length *= 16;
  1264. parser->content_length += c;
  1265. break;
  1266. }
  1267. case s_chunk_parameters:
  1268. {
  1269. assert(parser->flags & F_CHUNKED);
  1270. /* just ignore this shit. TODO check for overflow */
  1271. if (ch == CR) {
  1272. state = s_chunk_size_almost_done;
  1273. break;
  1274. }
  1275. break;
  1276. }
  1277. case s_chunk_size_almost_done:
  1278. {
  1279. assert(parser->flags & F_CHUNKED);
  1280. STRICT_CHECK(ch != LF);
  1281. if (parser->content_length == 0) {
  1282. parser->flags |= F_TRAILING;
  1283. state = s_header_field_start;
  1284. } else {
  1285. state = s_chunk_data;
  1286. }
  1287. break;
  1288. }
  1289. case s_chunk_data:
  1290. {
  1291. assert(parser->flags & F_CHUNKED);
  1292. to_read = MIN(pe - p, (int64_t)(parser->content_length));
  1293. if (to_read > 0) {
  1294. if (settings->on_body) settings->on_body(parser, p, to_read);
  1295. p += to_read - 1;
  1296. }
  1297. if (to_read == parser->content_length) {
  1298. state = s_chunk_data_almost_done;
  1299. }
  1300. parser->content_length -= to_read;
  1301. break;
  1302. }
  1303. case s_chunk_data_almost_done:
  1304. assert(parser->flags & F_CHUNKED);
  1305. STRICT_CHECK(ch != CR);
  1306. state = s_chunk_data_done;
  1307. break;
  1308. case s_chunk_data_done:
  1309. assert(parser->flags & F_CHUNKED);
  1310. STRICT_CHECK(ch != LF);
  1311. state = s_chunk_size_start;
  1312. break;
  1313. default:
  1314. assert(0 && "unhandled state");
  1315. goto error;
  1316. }
  1317. }
  1318. CALLBACK_NOCLEAR(header_field);
  1319. CALLBACK_NOCLEAR(header_value);
  1320. CALLBACK_NOCLEAR(fragment);
  1321. CALLBACK_NOCLEAR(query_string);
  1322. CALLBACK_NOCLEAR(path);
  1323. CALLBACK_NOCLEAR(url);
  1324. parser->state = state;
  1325. parser->header_state = header_state;
  1326. parser->index = index;
  1327. parser->nread = nread;
  1328. return len;
  1329. error:
  1330. parser->state = s_dead;
  1331. return (p - data);
  1332. }
  1333. int
  1334. http_should_keep_alive (http_parser *parser)
  1335. {
  1336. if (parser->http_major > 0 && parser->http_minor > 0) {
  1337. /* HTTP/1.1 */
  1338. if (parser->flags & F_CONNECTION_CLOSE) {
  1339. return 0;
  1340. } else {
  1341. return 1;
  1342. }
  1343. } else {
  1344. /* HTTP/1.0 or earlier */
  1345. if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
  1346. return 1;
  1347. } else {
  1348. return 0;
  1349. }
  1350. }
  1351. }
  1352. const char * http_method_str (enum http_method m)
  1353. {
  1354. return method_strings[m];
  1355. }
  1356. void
  1357. http_parser_init (http_parser *parser, enum http_parser_type t)
  1358. {
  1359. parser->type = t;
  1360. parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
  1361. parser->nread = 0;
  1362. parser->upgrade = 0;
  1363. parser->flags = 0;
  1364. parser->method = 0;
  1365. }