PageRenderTime 61ms CodeModel.GetById 14ms app.highlight 43ms RepoModel.GetById 1ms app.codeStats 0ms

/src/kilim/http/HttpRequestParser.rl

http://github.com/kilim/kilim
Unknown | 335 lines | 283 code | 52 blank | 0 comment | 0 complexity | 7b07e07ec3dfb09fd1dc7ac8406c759d MD5 | raw file
  1/* Copyright (c) 2006, Sriram Srinivasan
  2 *
  3 * You may distribute this software under the terms of the license 
  4 * specified in the file "License"
  5 */
  6
  7package kilim.http;
  8
  9/**
 10 * --- DO NOT EDIT -----
 11 * HttpRequestParser.java generated from RAGEL (http://www.complang.org/ragel/) from the
 12 * specification file HttpRequestParser.rl. All changes must be made in the .rl file.
 13 **/
 14
 15import java.util.TimeZone;
 16import java.util.GregorianCalendar;
 17import java.nio.charset.Charset;
 18import java.nio.ByteBuffer;
 19import java.io.UnsupportedEncodingException;
 20import java.io.IOException;
 21import java.net.URLDecoder;
 22
 23public class HttpRequestParser {
 24  public static final Charset UTF8 = Charset.forName("UTF-8");
 25
 26  %%{
 27    # A variation of the Ragel grammar from Zed Shaw's mongrel parser. Thanks, Zed.
 28
 29    machine http_parser;
 30
 31    action mark {mark = fpc; }
 32
 33    action start_query {query_start = fpc; }
 34
 35    action extract_field_name { 
 36      field_name = kw_lookup(data, mark, fpc);
 37      if (field_name == null) {// not a known keyword
 38        field_name = req.extractRange(mark, fpc);
 39      }
 40    }
 41
 42    action extract_value {
 43      int value = encodeRange(mark, fpc);
 44      req.addField(field_name, value);
 45    }
 46
 47    action request_path {
 48      req.uriPath = req.extractRange(mark, fpc);
 49      String s = req.uriPath;
 50      int len = s.length();
 51      boolean need_decode;
 52      // Scan the string to see if the string requires any conversion.
 53      for (int i = 0; i < len; i++) {
 54         char c = s.charAt(i);
 55         if (c == '%' || c > 0x7F) {
 56           try {
 57              // TODO: Correct this. URLDecoder is broken for path (upto
 58              // JDK1.6): it converts'+' to ' ', which should
 59              // be done only for the query part of the url.
 60              req.uriPath = URLDecoder.decode(req.uriPath, "UTF-8");
 61              break;
 62           } catch (UnsupportedEncodingException ignore){}
 63         }
 64      }
 65    }
 66
 67    action uri {
 68      System.out.println("URI::::" + req.extractRange(mark, fpc));
 69      //req.uri = req.extractRange(mark, fpc);
 70    }
 71
 72    action end_query {
 73      req.queryStringRange = encodeRange(query_start, fpc);
 74    }
 75
 76    action fragment { 
 77      req.uriFragmentRange = encodeRange(mark, fpc);
 78    }
 79  
 80    action version {
 81      req.versionRange = encodeRange(mark, fpc);
 82    }
 83
 84    CRLF = "\r\n";
 85
 86    # character types
 87    CTL = (cntrl | 127);
 88    safe = ("$" | "-" | "_" | ".");
 89    extra = ("!" | "*" | "'" | "(" | ")" | ",");
 90    reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
 91    unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
 92    national = any -- (alpha | digit | reserved | extra | safe | unsafe);
 93    unreserved = (alpha | digit | safe | extra | national);
 94    escape = ("%" xdigit xdigit);
 95    uchar = (unreserved | escape);
 96    pchar = (uchar | ":" | "@" | "&" | "=" | "+");
 97    tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
 98
 99    # elements
100    token = (ascii -- (CTL | tspecials));
101
102    # URI schemes and absolute paths
103    scheme = ( alpha | digit | "+" | "-" | "." )* ;
104    absolute_uri = (scheme ":" (uchar | reserved )*);
105    #path = ( pchar+ ( "/" pchar* )* ) ;
106    query = ( uchar | reserved )* %end_query ;
107    param = ( pchar | "/" )* ;
108    params = ( param ( ";" param )* ) ;
109    path  = (any -- [ ?#;])+ %request_path ;
110    rel_path = ( path? (";" params)? ) ("?" %start_query query)?;
111    absolute_path = ( "/"+ rel_path );
112    uri = ( absolute_uri | absolute_path ) >mark ;# %uri;
113    fragment = ( uchar | reserved )* >mark %fragment;
114
115    field_name = ( any -- ":" )+ >mark %extract_field_name;
116    field_value = any* >mark %extract_value;
117    fields = field_name ":" " "* field_value :> CRLF;
118
119    get = 'GET'i  @{req.method = "GET";};
120    post = 'POST'i  @{req.method = "POST";};
121    delete = 'DELETE'i  @{req.method = "DELETE";};
122    head = 'HEAD'i  @{req.method = "HEAD";};
123    put = 'PUT'i @{req.method = "PUT";};
124    options = 'OPTIONS'i @{req.method = "OPTIONS";};
125    method = get | delete | post | put | head | options;
126
127    version = "HTTP/"  ( digit+ "." digit+ )  >mark %version;
128
129    start_line = ( method " "+ uri ("#" fragment){0,1} " "+ version CRLF ) ;
130
131    header = start_line ( fields )* CRLF ;
132
133    main := header %err{err("Malformed Header. Error at " + p + "\n" + new String(data, 0, pe, UTF8));};
134
135  }%%
136
137  %% write data;
138
139  public static void err(String msg) throws IOException{
140    throw new IOException(msg);
141  }
142
143  public static void initHeader(HttpRequest req, int headerLength) throws IOException {
144    ByteBuffer bb = req.buffer;
145    /* required variables */
146    byte[] data = bb.array();
147    int p = 0;
148    int pe = headerLength;
149//  int eof = pe;
150    int cs = 0;
151
152    // variables used by actions in http_req_parser machine above.
153    int query_start = 0;
154    int mark = 0;
155    String field_name = "";
156
157    %% write init;
158    %% write exec;
159    
160    if (cs == http_parser_error) {
161      throw new IOException("Malformed HTTP Header. p = " + p +", cs = " + cs);
162    }
163  }
164
165  /**
166   * encode the start pos and length as ints;
167   */
168  public static int encodeRange(int start, int end) {
169    return (start << 16) + end ;
170  }
171
172  %%{
173    machine http_keywords;
174
175    main := |*
176    'Accept'i => { kw = "Accept";};
177    'Accept-Charset'i => { kw = "Accept-Charset";};
178    'Accept-Encoding'i => { kw = "Accept-Encoding";};
179    'Accept-Language'i => { kw = "Accept-Language";};
180    'Accept-Ranges'i => { kw = "Accept-Ranges";};
181    'Age'i => { kw = "Age";};
182    'Allow'i => { kw = "Allow";};
183    'Authorization'i => { kw = "Authorization";};
184    'Cache-Control'i => { kw = "Cache-Control";};
185    'Connection'i => { kw = "Connection";};
186    'Content-Encoding'i => { kw = "Content-Encoding";};
187    'Content-Language'i => { kw = "Content-Language";};
188    'Content-Length'i => { kw = "Content-Length";};
189    'Content-Location'i => { kw = "Content-Location";};
190    'Content-MD5'i => { kw = "Content-MD5";};
191    'Content-Range'i => { kw = "Content-Range";};
192    'Content-Type'i => { kw = "Content-Type";};
193    'Date'i => { kw = "Date";};
194    'ETag'i => { kw = "ETag";};
195    'Expect'i => { kw = "Expect";};
196    'Expires'i => { kw = "Expires";};
197    'From'i => { kw = "From";};
198    'Host'i => { kw = "Host";};
199    'If-Match'i => { kw = "If-Match";};
200    'If-Modified-Since'i => { kw = "If-Modified-Since";};
201    'If-None-Match'i => { kw = "If-None-Match";};
202    'If-Range'i => { kw = "If-Range";};
203    'If-Unmodified-Since'i => { kw = "If-Unmodified-Since";};
204    'Last-Modified'i => { kw = "Last-Modified";};
205    'Location'i => { kw = "Location";};
206    'Max-Forwards'i => { kw = "Max-Forwards";};
207    'Pragma'i => { kw = "Pragma";};
208    'Proxy-Authenticate'i => { kw = "Proxy-Authenticate";};
209    'Proxy-Authorization'i => { kw = "Proxy-Authorization";};
210    'Range'i => { kw = "Range";};
211    'Referer'i => { kw = "Referer";};
212    'Retry-After'i => { kw = "Retry-After";};
213    'Server'i => { kw = "Server";};
214    'TE'i => { kw = "TE";};
215    'Trailer'i => { kw = "Trailer";};
216    'Transfer-Encoding'i => { kw = "Transfer-Encoding";};
217    'Upgrade'i => { kw = "Upgrade";};
218    'User-Agent'i => { kw = "User-Agent";};
219    'Vary'i => { kw = "Vary";};
220    'Via'i => { kw = "Via";};
221    'Warning'i => { kw = "Warning";};
222    'WWW-Authenticate'i => { kw = "WWW-Authenticate";};
223    *|;
224
225    write data;
226  }%%
227
228  @SuppressWarnings("unused")
229  public static String kw_lookup(byte[] data, int start, int len) {
230//    String req = null;
231    int ts, te, act;
232
233//    int wb = 0;
234    int p = start;
235    int pe = start + len;
236    int eof = pe;
237    int cs;
238    String kw = null;
239    %% write init;
240    %% write exec;
241
242    return kw;
243  }
244
245  %%{
246      # Parses strings of the form (from rfc2616)
247      # 1. Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
248      # 2. Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
249      # 3. Sun Nov  6 08:49:37 1994       ; ANSI Cs asctime() format
250
251      machine http_date;
252
253      SP = ' '+;
254      day = digit+  @{day = day * 10 + (data[fpc] - 48);}; 
255      year = digit+ @{year = year * 10 + (data[fpc] - 48);};
256      hh = digit+ @{hh = hh * 10 + (data[fpc] - 48) ;}; 
257      mm = digit+ @{mm = mm * 10 + (data[fpc] - 48) ;}; 
258      ss = digit+ @{ss = ss * 10 + (data[fpc] - 48) ;}; 
259      wkday   = "Mon" | "Tue" | "Wed" | "Thu" | "Fri" | "Sat" | "Sun";
260      weekday = "Monday" | "Tuesday" | "Wednesday" | "Thursday" | "Friday" | "Saturday" | "Sunday";
261      month   = 
262        ("Jan" @{ month = 0;}) | 
263        ("Feb" @{ month = 1;}) | 
264        ("Mar" @{ month = 2;}) | 
265        ("Apr" @{ month = 3;}) | 
266        ("May" @{ month = 4;}) | 
267        ("Jun" @{ month = 5;}) | 
268        ("Jul" @{ month = 6;}) | 
269        ("Aug" @{ month = 7;}) | 
270        ("Sep" @{ month = 8;}) | 
271        ("Oct" @{ month = 90;}) | 
272        ("Nov" @{ month = 10;}) | 
273        ("Dec" @{ month = 11;}) ;
274
275      date1        = day SP month SP year;
276      date2        = day "-" month "-" year;
277      date3        = month SP  day;
278      time         = hh ":" mm ":" ss;
279      asctime_date = wkday SP date3 SP time SP year;
280      rfc850_date  = wkday "," SP date2 SP time SP "GMT";
281      rfc1123_date = wkday "," SP date1 SP time SP "GMT";
282      HTTP_date    = rfc1123_date | rfc850_date | asctime_date;
283      main := HTTP_date;
284      write data;
285    }%%
286
287    public static TimeZone GMT = TimeZone.getTimeZone("GMT");
288
289  public static long parseDate(byte[] data, int pos, int len) {
290    int p = 0;
291    int pe = len;
292//    int eof = pe;
293    int cs;
294//    int wkday = 0;
295    int day = 0, month = 0, year = 0;
296    int hh = 0, mm = 0, ss = 0;
297        
298    %%write init;
299    %%write exec;
300
301    if (year < 100) {year += 1900;}
302
303    GregorianCalendar gc = new GregorianCalendar();
304    gc.set(year, month, day, hh, mm, ss);
305    gc.setTimeZone(GMT);
306    return gc.getTimeInMillis();
307  }
308
309
310  public static String crlf = "\r\n";
311  public static void main(String args[]) throws Exception {
312    /// Testing
313    String s = 
314      "GET /favicon.ico#test HTTP/1.1\r\n" +
315      "Host: localhost:7262\r\n" +
316      "User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10 Ubiquity/0.1.5\r\n" +
317      "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" +
318      "Accept-Language: en-us,en;q=0.5\r\n" +
319      "Accept-Encoding: gzip,deflate\r\n" +
320      "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" +
321      "Keep-Alive: 300\r\n" +
322      "Connection: keep-alive\r\n\r\n";
323    System.out.println("Input Request: (" + s.length() + " bytes)");System.out.println(s);
324    byte[] data = s.getBytes();
325    int len = data.length;
326    
327    System.out.println("=============================================================");
328    HttpRequest req = new HttpRequest();
329    req.buffer = ByteBuffer.allocate(2048);
330    req.buffer.put(data);
331    initHeader(req, len);
332    System.out.println(req);
333  }
334}
335