/src/kilim/http/HttpRequestParser.rl

http://github.com/kilim/kilim · Unknown · 335 lines · 283 code · 52 blank · 0 comment · 0 complexity · 7b07e07ec3dfb09fd1dc7ac8406c759d MD5 · raw file

  1. /* Copyright (c) 2006, Sriram Srinivasan
  2. *
  3. * You may distribute this software under the terms of the license
  4. * specified in the file "License"
  5. */
  6. package kilim.http;
  7. /**
  8. * --- DO NOT EDIT -----
  9. * HttpRequestParser.java generated from RAGEL (http://www.complang.org/ragel/) from the
  10. * specification file HttpRequestParser.rl. All changes must be made in the .rl file.
  11. **/
  12. import java.util.TimeZone;
  13. import java.util.GregorianCalendar;
  14. import java.nio.charset.Charset;
  15. import java.nio.ByteBuffer;
  16. import java.io.UnsupportedEncodingException;
  17. import java.io.IOException;
  18. import java.net.URLDecoder;
  19. public class HttpRequestParser {
  20. public static final Charset UTF8 = Charset.forName("UTF-8");
  21. %%{
  22. # A variation of the Ragel grammar from Zed Shaw's mongrel parser. Thanks, Zed.
  23. machine http_parser;
  24. action mark {mark = fpc; }
  25. action start_query {query_start = fpc; }
  26. action extract_field_name {
  27. field_name = kw_lookup(data, mark, fpc);
  28. if (field_name == null) {// not a known keyword
  29. field_name = req.extractRange(mark, fpc);
  30. }
  31. }
  32. action extract_value {
  33. int value = encodeRange(mark, fpc);
  34. req.addField(field_name, value);
  35. }
  36. action request_path {
  37. req.uriPath = req.extractRange(mark, fpc);
  38. String s = req.uriPath;
  39. int len = s.length();
  40. boolean need_decode;
  41. // Scan the string to see if the string requires any conversion.
  42. for (int i = 0; i < len; i++) {
  43. char c = s.charAt(i);
  44. if (c == '%' || c > 0x7F) {
  45. try {
  46. // TODO: Correct this. URLDecoder is broken for path (upto
  47. // JDK1.6): it converts'+' to ' ', which should
  48. // be done only for the query part of the url.
  49. req.uriPath = URLDecoder.decode(req.uriPath, "UTF-8");
  50. break;
  51. } catch (UnsupportedEncodingException ignore){}
  52. }
  53. }
  54. }
  55. action uri {
  56. System.out.println("URI::::" + req.extractRange(mark, fpc));
  57. //req.uri = req.extractRange(mark, fpc);
  58. }
  59. action end_query {
  60. req.queryStringRange = encodeRange(query_start, fpc);
  61. }
  62. action fragment {
  63. req.uriFragmentRange = encodeRange(mark, fpc);
  64. }
  65. action version {
  66. req.versionRange = encodeRange(mark, fpc);
  67. }
  68. CRLF = "\r\n";
  69. # character types
  70. CTL = (cntrl | 127);
  71. safe = ("$" | "-" | "_" | ".");
  72. extra = ("!" | "*" | "'" | "(" | ")" | ",");
  73. reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
  74. unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
  75. national = any -- (alpha | digit | reserved | extra | safe | unsafe);
  76. unreserved = (alpha | digit | safe | extra | national);
  77. escape = ("%" xdigit xdigit);
  78. uchar = (unreserved | escape);
  79. pchar = (uchar | ":" | "@" | "&" | "=" | "+");
  80. tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
  81. # elements
  82. token = (ascii -- (CTL | tspecials));
  83. # URI schemes and absolute paths
  84. scheme = ( alpha | digit | "+" | "-" | "." )* ;
  85. absolute_uri = (scheme ":" (uchar | reserved )*);
  86. #path = ( pchar+ ( "/" pchar* )* ) ;
  87. query = ( uchar | reserved )* %end_query ;
  88. param = ( pchar | "/" )* ;
  89. params = ( param ( ";" param )* ) ;
  90. path = (any -- [ ?#;])+ %request_path ;
  91. rel_path = ( path? (";" params)? ) ("?" %start_query query)?;
  92. absolute_path = ( "/"+ rel_path );
  93. uri = ( absolute_uri | absolute_path ) >mark ;# %uri;
  94. fragment = ( uchar | reserved )* >mark %fragment;
  95. field_name = ( any -- ":" )+ >mark %extract_field_name;
  96. field_value = any* >mark %extract_value;
  97. fields = field_name ":" " "* field_value :> CRLF;
  98. get = 'GET'i @{req.method = "GET";};
  99. post = 'POST'i @{req.method = "POST";};
  100. delete = 'DELETE'i @{req.method = "DELETE";};
  101. head = 'HEAD'i @{req.method = "HEAD";};
  102. put = 'PUT'i @{req.method = "PUT";};
  103. options = 'OPTIONS'i @{req.method = "OPTIONS";};
  104. method = get | delete | post | put | head | options;
  105. version = "HTTP/" ( digit+ "." digit+ ) >mark %version;
  106. start_line = ( method " "+ uri ("#" fragment){0,1} " "+ version CRLF ) ;
  107. header = start_line ( fields )* CRLF ;
  108. main := header %err{err("Malformed Header. Error at " + p + "\n" + new String(data, 0, pe, UTF8));};
  109. }%%
  110. %% write data;
  111. public static void err(String msg) throws IOException{
  112. throw new IOException(msg);
  113. }
  114. public static void initHeader(HttpRequest req, int headerLength) throws IOException {
  115. ByteBuffer bb = req.buffer;
  116. /* required variables */
  117. byte[] data = bb.array();
  118. int p = 0;
  119. int pe = headerLength;
  120. // int eof = pe;
  121. int cs = 0;
  122. // variables used by actions in http_req_parser machine above.
  123. int query_start = 0;
  124. int mark = 0;
  125. String field_name = "";
  126. %% write init;
  127. %% write exec;
  128. if (cs == http_parser_error) {
  129. throw new IOException("Malformed HTTP Header. p = " + p +", cs = " + cs);
  130. }
  131. }
  132. /**
  133. * encode the start pos and length as ints;
  134. */
  135. public static int encodeRange(int start, int end) {
  136. return (start << 16) + end ;
  137. }
  138. %%{
  139. machine http_keywords;
  140. main := |*
  141. 'Accept'i => { kw = "Accept";};
  142. 'Accept-Charset'i => { kw = "Accept-Charset";};
  143. 'Accept-Encoding'i => { kw = "Accept-Encoding";};
  144. 'Accept-Language'i => { kw = "Accept-Language";};
  145. 'Accept-Ranges'i => { kw = "Accept-Ranges";};
  146. 'Age'i => { kw = "Age";};
  147. 'Allow'i => { kw = "Allow";};
  148. 'Authorization'i => { kw = "Authorization";};
  149. 'Cache-Control'i => { kw = "Cache-Control";};
  150. 'Connection'i => { kw = "Connection";};
  151. 'Content-Encoding'i => { kw = "Content-Encoding";};
  152. 'Content-Language'i => { kw = "Content-Language";};
  153. 'Content-Length'i => { kw = "Content-Length";};
  154. 'Content-Location'i => { kw = "Content-Location";};
  155. 'Content-MD5'i => { kw = "Content-MD5";};
  156. 'Content-Range'i => { kw = "Content-Range";};
  157. 'Content-Type'i => { kw = "Content-Type";};
  158. 'Date'i => { kw = "Date";};
  159. 'ETag'i => { kw = "ETag";};
  160. 'Expect'i => { kw = "Expect";};
  161. 'Expires'i => { kw = "Expires";};
  162. 'From'i => { kw = "From";};
  163. 'Host'i => { kw = "Host";};
  164. 'If-Match'i => { kw = "If-Match";};
  165. 'If-Modified-Since'i => { kw = "If-Modified-Since";};
  166. 'If-None-Match'i => { kw = "If-None-Match";};
  167. 'If-Range'i => { kw = "If-Range";};
  168. 'If-Unmodified-Since'i => { kw = "If-Unmodified-Since";};
  169. 'Last-Modified'i => { kw = "Last-Modified";};
  170. 'Location'i => { kw = "Location";};
  171. 'Max-Forwards'i => { kw = "Max-Forwards";};
  172. 'Pragma'i => { kw = "Pragma";};
  173. 'Proxy-Authenticate'i => { kw = "Proxy-Authenticate";};
  174. 'Proxy-Authorization'i => { kw = "Proxy-Authorization";};
  175. 'Range'i => { kw = "Range";};
  176. 'Referer'i => { kw = "Referer";};
  177. 'Retry-After'i => { kw = "Retry-After";};
  178. 'Server'i => { kw = "Server";};
  179. 'TE'i => { kw = "TE";};
  180. 'Trailer'i => { kw = "Trailer";};
  181. 'Transfer-Encoding'i => { kw = "Transfer-Encoding";};
  182. 'Upgrade'i => { kw = "Upgrade";};
  183. 'User-Agent'i => { kw = "User-Agent";};
  184. 'Vary'i => { kw = "Vary";};
  185. 'Via'i => { kw = "Via";};
  186. 'Warning'i => { kw = "Warning";};
  187. 'WWW-Authenticate'i => { kw = "WWW-Authenticate";};
  188. *|;
  189. write data;
  190. }%%
  191. @SuppressWarnings("unused")
  192. public static String kw_lookup(byte[] data, int start, int len) {
  193. // String req = null;
  194. int ts, te, act;
  195. // int wb = 0;
  196. int p = start;
  197. int pe = start + len;
  198. int eof = pe;
  199. int cs;
  200. String kw = null;
  201. %% write init;
  202. %% write exec;
  203. return kw;
  204. }
  205. %%{
  206. # Parses strings of the form (from rfc2616)
  207. # 1. Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
  208. # 2. Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
  209. # 3. Sun Nov 6 08:49:37 1994 ; ANSI Cs asctime() format
  210. machine http_date;
  211. SP = ' '+;
  212. day = digit+ @{day = day * 10 + (data[fpc] - 48);};
  213. year = digit+ @{year = year * 10 + (data[fpc] - 48);};
  214. hh = digit+ @{hh = hh * 10 + (data[fpc] - 48) ;};
  215. mm = digit+ @{mm = mm * 10 + (data[fpc] - 48) ;};
  216. ss = digit+ @{ss = ss * 10 + (data[fpc] - 48) ;};
  217. wkday = "Mon" | "Tue" | "Wed" | "Thu" | "Fri" | "Sat" | "Sun";
  218. weekday = "Monday" | "Tuesday" | "Wednesday" | "Thursday" | "Friday" | "Saturday" | "Sunday";
  219. month =
  220. ("Jan" @{ month = 0;}) |
  221. ("Feb" @{ month = 1;}) |
  222. ("Mar" @{ month = 2;}) |
  223. ("Apr" @{ month = 3;}) |
  224. ("May" @{ month = 4;}) |
  225. ("Jun" @{ month = 5;}) |
  226. ("Jul" @{ month = 6;}) |
  227. ("Aug" @{ month = 7;}) |
  228. ("Sep" @{ month = 8;}) |
  229. ("Oct" @{ month = 90;}) |
  230. ("Nov" @{ month = 10;}) |
  231. ("Dec" @{ month = 11;}) ;
  232. date1 = day SP month SP year;
  233. date2 = day "-" month "-" year;
  234. date3 = month SP day;
  235. time = hh ":" mm ":" ss;
  236. asctime_date = wkday SP date3 SP time SP year;
  237. rfc850_date = wkday "," SP date2 SP time SP "GMT";
  238. rfc1123_date = wkday "," SP date1 SP time SP "GMT";
  239. HTTP_date = rfc1123_date | rfc850_date | asctime_date;
  240. main := HTTP_date;
  241. write data;
  242. }%%
  243. public static TimeZone GMT = TimeZone.getTimeZone("GMT");
  244. public static long parseDate(byte[] data, int pos, int len) {
  245. int p = 0;
  246. int pe = len;
  247. // int eof = pe;
  248. int cs;
  249. // int wkday = 0;
  250. int day = 0, month = 0, year = 0;
  251. int hh = 0, mm = 0, ss = 0;
  252. %%write init;
  253. %%write exec;
  254. if (year < 100) {year += 1900;}
  255. GregorianCalendar gc = new GregorianCalendar();
  256. gc.set(year, month, day, hh, mm, ss);
  257. gc.setTimeZone(GMT);
  258. return gc.getTimeInMillis();
  259. }
  260. public static String crlf = "\r\n";
  261. public static void main(String args[]) throws Exception {
  262. /// Testing
  263. String s =
  264. "GET /favicon.ico#test HTTP/1.1\r\n" +
  265. "Host: localhost:7262\r\n" +
  266. "User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10 Ubiquity/0.1.5\r\n" +
  267. "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" +
  268. "Accept-Language: en-us,en;q=0.5\r\n" +
  269. "Accept-Encoding: gzip,deflate\r\n" +
  270. "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" +
  271. "Keep-Alive: 300\r\n" +
  272. "Connection: keep-alive\r\n\r\n";
  273. System.out.println("Input Request: (" + s.length() + " bytes)");System.out.println(s);
  274. byte[] data = s.getBytes();
  275. int len = data.length;
  276. System.out.println("=============================================================");
  277. HttpRequest req = new HttpRequest();
  278. req.buffer = ByteBuffer.allocate(2048);
  279. req.buffer.put(data);
  280. initHeader(req, len);
  281. System.out.println(req);
  282. }
  283. }