/src/kilim/http/HttpRequestParser.rl
Unknown | 335 lines | 283 code | 52 blank | 0 comment | 0 complexity | 7b07e07ec3dfb09fd1dc7ac8406c759d MD5 | raw file
1/* Copyright (c) 2006, Sriram Srinivasan 2 * 3 * You may distribute this software under the terms of the license 4 * specified in the file "License" 5 */ 6 7package kilim.http; 8 9/** 10 * --- DO NOT EDIT ----- 11 * HttpRequestParser.java generated from RAGEL (http://www.complang.org/ragel/) from the 12 * specification file HttpRequestParser.rl. All changes must be made in the .rl file. 13 **/ 14 15import java.util.TimeZone; 16import java.util.GregorianCalendar; 17import java.nio.charset.Charset; 18import java.nio.ByteBuffer; 19import java.io.UnsupportedEncodingException; 20import java.io.IOException; 21import java.net.URLDecoder; 22 23public class HttpRequestParser { 24 public static final Charset UTF8 = Charset.forName("UTF-8"); 25 26 %%{ 27 # A variation of the Ragel grammar from Zed Shaw's mongrel parser. Thanks, Zed. 28 29 machine http_parser; 30 31 action mark {mark = fpc; } 32 33 action start_query {query_start = fpc; } 34 35 action extract_field_name { 36 field_name = kw_lookup(data, mark, fpc); 37 if (field_name == null) {// not a known keyword 38 field_name = req.extractRange(mark, fpc); 39 } 40 } 41 42 action extract_value { 43 int value = encodeRange(mark, fpc); 44 req.addField(field_name, value); 45 } 46 47 action request_path { 48 req.uriPath = req.extractRange(mark, fpc); 49 String s = req.uriPath; 50 int len = s.length(); 51 boolean need_decode; 52 // Scan the string to see if the string requires any conversion. 53 for (int i = 0; i < len; i++) { 54 char c = s.charAt(i); 55 if (c == '%' || c > 0x7F) { 56 try { 57 // TODO: Correct this. URLDecoder is broken for path (upto 58 // JDK1.6): it converts'+' to ' ', which should 59 // be done only for the query part of the url. 60 req.uriPath = URLDecoder.decode(req.uriPath, "UTF-8"); 61 break; 62 } catch (UnsupportedEncodingException ignore){} 63 } 64 } 65 } 66 67 action uri { 68 System.out.println("URI::::" + req.extractRange(mark, fpc)); 69 //req.uri = req.extractRange(mark, fpc); 70 } 71 72 action end_query { 73 req.queryStringRange = encodeRange(query_start, fpc); 74 } 75 76 action fragment { 77 req.uriFragmentRange = encodeRange(mark, fpc); 78 } 79 80 action version { 81 req.versionRange = encodeRange(mark, fpc); 82 } 83 84 CRLF = "\r\n"; 85 86 # character types 87 CTL = (cntrl | 127); 88 safe = ("$" | "-" | "_" | "."); 89 extra = ("!" | "*" | "'" | "(" | ")" | ","); 90 reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+"); 91 unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">"); 92 national = any -- (alpha | digit | reserved | extra | safe | unsafe); 93 unreserved = (alpha | digit | safe | extra | national); 94 escape = ("%" xdigit xdigit); 95 uchar = (unreserved | escape); 96 pchar = (uchar | ":" | "@" | "&" | "=" | "+"); 97 tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t"); 98 99 # elements 100 token = (ascii -- (CTL | tspecials)); 101 102 # URI schemes and absolute paths 103 scheme = ( alpha | digit | "+" | "-" | "." )* ; 104 absolute_uri = (scheme ":" (uchar | reserved )*); 105 #path = ( pchar+ ( "/" pchar* )* ) ; 106 query = ( uchar | reserved )* %end_query ; 107 param = ( pchar | "/" )* ; 108 params = ( param ( ";" param )* ) ; 109 path = (any -- [ ?#;])+ %request_path ; 110 rel_path = ( path? (";" params)? ) ("?" %start_query query)?; 111 absolute_path = ( "/"+ rel_path ); 112 uri = ( absolute_uri | absolute_path ) >mark ;# %uri; 113 fragment = ( uchar | reserved )* >mark %fragment; 114 115 field_name = ( any -- ":" )+ >mark %extract_field_name; 116 field_value = any* >mark %extract_value; 117 fields = field_name ":" " "* field_value :> CRLF; 118 119 get = 'GET'i @{req.method = "GET";}; 120 post = 'POST'i @{req.method = "POST";}; 121 delete = 'DELETE'i @{req.method = "DELETE";}; 122 head = 'HEAD'i @{req.method = "HEAD";}; 123 put = 'PUT'i @{req.method = "PUT";}; 124 options = 'OPTIONS'i @{req.method = "OPTIONS";}; 125 method = get | delete | post | put | head | options; 126 127 version = "HTTP/" ( digit+ "." digit+ ) >mark %version; 128 129 start_line = ( method " "+ uri ("#" fragment){0,1} " "+ version CRLF ) ; 130 131 header = start_line ( fields )* CRLF ; 132 133 main := header %err{err("Malformed Header. Error at " + p + "\n" + new String(data, 0, pe, UTF8));}; 134 135 }%% 136 137 %% write data; 138 139 public static void err(String msg) throws IOException{ 140 throw new IOException(msg); 141 } 142 143 public static void initHeader(HttpRequest req, int headerLength) throws IOException { 144 ByteBuffer bb = req.buffer; 145 /* required variables */ 146 byte[] data = bb.array(); 147 int p = 0; 148 int pe = headerLength; 149// int eof = pe; 150 int cs = 0; 151 152 // variables used by actions in http_req_parser machine above. 153 int query_start = 0; 154 int mark = 0; 155 String field_name = ""; 156 157 %% write init; 158 %% write exec; 159 160 if (cs == http_parser_error) { 161 throw new IOException("Malformed HTTP Header. p = " + p +", cs = " + cs); 162 } 163 } 164 165 /** 166 * encode the start pos and length as ints; 167 */ 168 public static int encodeRange(int start, int end) { 169 return (start << 16) + end ; 170 } 171 172 %%{ 173 machine http_keywords; 174 175 main := |* 176 'Accept'i => { kw = "Accept";}; 177 'Accept-Charset'i => { kw = "Accept-Charset";}; 178 'Accept-Encoding'i => { kw = "Accept-Encoding";}; 179 'Accept-Language'i => { kw = "Accept-Language";}; 180 'Accept-Ranges'i => { kw = "Accept-Ranges";}; 181 'Age'i => { kw = "Age";}; 182 'Allow'i => { kw = "Allow";}; 183 'Authorization'i => { kw = "Authorization";}; 184 'Cache-Control'i => { kw = "Cache-Control";}; 185 'Connection'i => { kw = "Connection";}; 186 'Content-Encoding'i => { kw = "Content-Encoding";}; 187 'Content-Language'i => { kw = "Content-Language";}; 188 'Content-Length'i => { kw = "Content-Length";}; 189 'Content-Location'i => { kw = "Content-Location";}; 190 'Content-MD5'i => { kw = "Content-MD5";}; 191 'Content-Range'i => { kw = "Content-Range";}; 192 'Content-Type'i => { kw = "Content-Type";}; 193 'Date'i => { kw = "Date";}; 194 'ETag'i => { kw = "ETag";}; 195 'Expect'i => { kw = "Expect";}; 196 'Expires'i => { kw = "Expires";}; 197 'From'i => { kw = "From";}; 198 'Host'i => { kw = "Host";}; 199 'If-Match'i => { kw = "If-Match";}; 200 'If-Modified-Since'i => { kw = "If-Modified-Since";}; 201 'If-None-Match'i => { kw = "If-None-Match";}; 202 'If-Range'i => { kw = "If-Range";}; 203 'If-Unmodified-Since'i => { kw = "If-Unmodified-Since";}; 204 'Last-Modified'i => { kw = "Last-Modified";}; 205 'Location'i => { kw = "Location";}; 206 'Max-Forwards'i => { kw = "Max-Forwards";}; 207 'Pragma'i => { kw = "Pragma";}; 208 'Proxy-Authenticate'i => { kw = "Proxy-Authenticate";}; 209 'Proxy-Authorization'i => { kw = "Proxy-Authorization";}; 210 'Range'i => { kw = "Range";}; 211 'Referer'i => { kw = "Referer";}; 212 'Retry-After'i => { kw = "Retry-After";}; 213 'Server'i => { kw = "Server";}; 214 'TE'i => { kw = "TE";}; 215 'Trailer'i => { kw = "Trailer";}; 216 'Transfer-Encoding'i => { kw = "Transfer-Encoding";}; 217 'Upgrade'i => { kw = "Upgrade";}; 218 'User-Agent'i => { kw = "User-Agent";}; 219 'Vary'i => { kw = "Vary";}; 220 'Via'i => { kw = "Via";}; 221 'Warning'i => { kw = "Warning";}; 222 'WWW-Authenticate'i => { kw = "WWW-Authenticate";}; 223 *|; 224 225 write data; 226 }%% 227 228 @SuppressWarnings("unused") 229 public static String kw_lookup(byte[] data, int start, int len) { 230// String req = null; 231 int ts, te, act; 232 233// int wb = 0; 234 int p = start; 235 int pe = start + len; 236 int eof = pe; 237 int cs; 238 String kw = null; 239 %% write init; 240 %% write exec; 241 242 return kw; 243 } 244 245 %%{ 246 # Parses strings of the form (from rfc2616) 247 # 1. Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 248 # 2. Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 249 # 3. Sun Nov 6 08:49:37 1994 ; ANSI Cs asctime() format 250 251 machine http_date; 252 253 SP = ' '+; 254 day = digit+ @{day = day * 10 + (data[fpc] - 48);}; 255 year = digit+ @{year = year * 10 + (data[fpc] - 48);}; 256 hh = digit+ @{hh = hh * 10 + (data[fpc] - 48) ;}; 257 mm = digit+ @{mm = mm * 10 + (data[fpc] - 48) ;}; 258 ss = digit+ @{ss = ss * 10 + (data[fpc] - 48) ;}; 259 wkday = "Mon" | "Tue" | "Wed" | "Thu" | "Fri" | "Sat" | "Sun"; 260 weekday = "Monday" | "Tuesday" | "Wednesday" | "Thursday" | "Friday" | "Saturday" | "Sunday"; 261 month = 262 ("Jan" @{ month = 0;}) | 263 ("Feb" @{ month = 1;}) | 264 ("Mar" @{ month = 2;}) | 265 ("Apr" @{ month = 3;}) | 266 ("May" @{ month = 4;}) | 267 ("Jun" @{ month = 5;}) | 268 ("Jul" @{ month = 6;}) | 269 ("Aug" @{ month = 7;}) | 270 ("Sep" @{ month = 8;}) | 271 ("Oct" @{ month = 90;}) | 272 ("Nov" @{ month = 10;}) | 273 ("Dec" @{ month = 11;}) ; 274 275 date1 = day SP month SP year; 276 date2 = day "-" month "-" year; 277 date3 = month SP day; 278 time = hh ":" mm ":" ss; 279 asctime_date = wkday SP date3 SP time SP year; 280 rfc850_date = wkday "," SP date2 SP time SP "GMT"; 281 rfc1123_date = wkday "," SP date1 SP time SP "GMT"; 282 HTTP_date = rfc1123_date | rfc850_date | asctime_date; 283 main := HTTP_date; 284 write data; 285 }%% 286 287 public static TimeZone GMT = TimeZone.getTimeZone("GMT"); 288 289 public static long parseDate(byte[] data, int pos, int len) { 290 int p = 0; 291 int pe = len; 292// int eof = pe; 293 int cs; 294// int wkday = 0; 295 int day = 0, month = 0, year = 0; 296 int hh = 0, mm = 0, ss = 0; 297 298 %%write init; 299 %%write exec; 300 301 if (year < 100) {year += 1900;} 302 303 GregorianCalendar gc = new GregorianCalendar(); 304 gc.set(year, month, day, hh, mm, ss); 305 gc.setTimeZone(GMT); 306 return gc.getTimeInMillis(); 307 } 308 309 310 public static String crlf = "\r\n"; 311 public static void main(String args[]) throws Exception { 312 /// Testing 313 String s = 314 "GET /favicon.ico#test HTTP/1.1\r\n" + 315 "Host: localhost:7262\r\n" + 316 "User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10 Ubiquity/0.1.5\r\n" + 317 "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" + 318 "Accept-Language: en-us,en;q=0.5\r\n" + 319 "Accept-Encoding: gzip,deflate\r\n" + 320 "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" + 321 "Keep-Alive: 300\r\n" + 322 "Connection: keep-alive\r\n\r\n"; 323 System.out.println("Input Request: (" + s.length() + " bytes)");System.out.println(s); 324 byte[] data = s.getBytes(); 325 int len = data.length; 326 327 System.out.println("============================================================="); 328 HttpRequest req = new HttpRequest(); 329 req.buffer = ByteBuffer.allocate(2048); 330 req.buffer.put(data); 331 initHeader(req, len); 332 System.out.println(req); 333 } 334} 335