/HttpRequest.h

https://github.com/privacore/open-source-search-engine · C Header · 259 lines · 127 code · 60 blank · 72 comment · 7 complexity · 3c233cd07471ac37a10c5bf897e06367 MD5 · raw file

  1. // Matt Wells, copyright Sep 2001
  2. // . class to parse and form HTTP requests
  3. #ifndef GB_HTTPREQUEST_H
  4. #define GB_HTTPREQUEST_H
  5. // . allow for up to 256 cgi fields
  6. // . this was stopping us from having more than about 253 banned ips, so i
  7. // raised it to 600
  8. //#define MAX_CGI_PARMS 600
  9. // . new prioirty controls has 128 rows!!
  10. #define MAX_CGI_PARMS 1400
  11. // for getting a file from http server
  12. #define MAX_HTTP_FILENAME_LEN 1024
  13. // i raised this from 1.3k to 5.3k so we can log the full request better
  14. //#define MAX_REQ_LEN (1024*5+300)
  15. //#define MAX_REQ_LEN (8024*5+300)
  16. // keep it small now that we use m_reqBuf
  17. //#define MAX_REQ_LEN (1024)
  18. #include "SafeBuf.h"
  19. class TcpSocket;
  20. #include "GbFormat.h"
  21. #include <time.h>
  22. class HttpRequest {
  23. public:
  24. // . form an HTTP request
  25. // . use size 0 for HEAD requests
  26. // . use size -1 for GET whole doc requests
  27. // . fill in your own offset/size for partial GET requests
  28. // . returns false and sets errno on error
  29. bool set ( char *url , int32_t offset = 0 , int32_t size = -1 ,
  30. time_t ifModifiedSince = 0 , const char *userAgent = NULL ,
  31. const char *proto = "HTTP/1.0" ,
  32. bool doPost = false ,
  33. const char *cookieJar = NULL ,
  34. const char *additionalHeader = NULL , // does not incl \r\n
  35. int32_t postContentLen = -1 , // for content-length of POST
  36. int32_t proxyIp = 0 ,
  37. const char *proxyUsernamePwdAuth = NULL );
  38. // use this
  39. SafeBuf m_reqBuf;
  40. bool m_reqBufValid;
  41. // get the request length
  42. int32_t getRequestLen() const { return m_reqBuf.length(); }
  43. // . get the outgoing request we made by calling set() above
  44. // . OR get the first line of an incoming request
  45. const char *getRequest() const {
  46. if ( m_reqBufValid ) return m_reqBuf.getBufStart();
  47. else return NULL;
  48. }
  49. // FORMAT_HTML FORMAT_JSON FORMAT_XML
  50. char getFormat() const { return getReplyFormat(); }
  51. char getReplyFormat() const;
  52. mutable bool m_replyFormatValid;
  53. mutable char m_replyFormat;
  54. // get the referer field of the MIME header
  55. char *getReferer () { return m_ref; }
  56. // this is NULL terminated too
  57. char *getUserAgent () { return m_userAgent; }
  58. // just does a simply gbmemcpy() operation, since it should be pointing
  59. // into the TcpSocket's buffer which is safe until after reply is sent
  60. // . returns false and sets g_errno on error, true otherwise
  61. bool copy(const HttpRequest *r);
  62. // . the url being reuqested
  63. // . removes &code= facebook cruft
  64. bool getCurrentUrl ( SafeBuf &cu );
  65. bool getCurrentUrlPath ( SafeBuf &cup );
  66. // . parse an incoming request
  67. // . returns false and set errno on error
  68. // . may alloc mem for m_cgiBuf to hold cgi vars from GET or POST op
  69. bool set ( char *req , int32_t reqSize , TcpSocket *s );
  70. // for gigablast's own rendering of squid
  71. bool m_isSquidProxyRequest;
  72. char *m_squidProxiedUrl;
  73. int32_t m_squidProxiedUrlLen;
  74. // is it this type of request?
  75. bool isGETRequest () const { return (m_requestType == 0); }
  76. bool isHEADRequest () const { return (m_requestType == 1); }
  77. bool isPOSTRequest () const { return (m_requestType == 2); }
  78. const char *getFilename () const { return m_filename; }
  79. int32_t getFilenameLen () const { return m_filenameLen; }
  80. int32_t getFileOffset () const { return m_fileOffset; }
  81. int32_t getFileSize () const { return m_fileSize; }
  82. const char *getOrigUrlRequest() const { return m_origUrlRequest; }
  83. int32_t getOrigUrlRequestLen() const { return m_origUrlRequestLen; }
  84. const char *getHost () const { return m_host; }
  85. int32_t getHostLen () const { return m_hostLen; }
  86. bool isLocal () const { return m_isLocal; }
  87. // . the &ucontent= cgi var does not get its value decoded
  88. // because it's already decoded
  89. // . this is so Mark doesn't have to url encode his injected content
  90. const char *getUnencodedContent() const { return m_ucontent; }
  91. int32_t getUnencodedContentLen() const { return m_ucontentLen; }
  92. // . for parsing the terms in a cgi url
  93. // . the returned string is NOT NULL terminated
  94. const char *getString ( const char *field, int32_t *len = NULL,
  95. const char *defaultString = NULL , int32_t *next=NULL) const;
  96. bool getBool ( const char *field, bool defaultBool ) const;
  97. int32_t getLong ( const char *field, int32_t defaultLong ) const;
  98. int64_t getLongLong ( const char *field, int64_t defaultLongLong ) const;
  99. float getFloat ( const char *field, double defaultFloat ) const;
  100. double getDouble ( const char *field, double defaultDouble ) const;
  101. float getFloatFromCookie ( const char *field, float def ) const;
  102. int32_t getLongFromCookie ( const char *field, int32_t def ) const;
  103. int64_t getLongLongFromCookie( const char *field, int64_t def ) const;
  104. bool getBoolFromCookie ( const char *field, bool def ) const;
  105. const char *getStringFromCookie ( const char *field, int32_t *len = NULL,
  106. const char *defaultString = NULL ,
  107. int32_t *next=NULL) const;
  108. bool hasField ( const char *field ) const;
  109. // are we a redir? if so return non-NULL
  110. const char *getRedir() const { return m_redir; }
  111. int32_t getRedirLen() const { return m_redirLen; }
  112. HttpRequest();
  113. HttpRequest( const HttpRequest &a );
  114. ~HttpRequest();
  115. void reset();
  116. const char *getPath() const { return m_path; }
  117. int32_t getPathLen() const { return m_plen; }
  118. // . get value of cgi "field" term in the requested filename
  119. // . you know GET /myfile.html?q=123&name=nathaniel
  120. const char *getValue ( const char *field , int32_t *len=NULL, int32_t *next=NULL) const;
  121. // get value of the ith field
  122. const char *getValue ( int32_t i, int32_t *len = NULL) const;
  123. // get the ith cgi parameter name, return NULL if none
  124. int32_t getNumFields( ) const { return m_numFields; }
  125. const char *getField( int32_t i ) const {
  126. if ( i >= m_numFields ) return NULL;
  127. return m_fields[i];
  128. }
  129. int32_t getFieldLen ( int32_t i ) const {
  130. if ( i >= m_numFields ) return 0;
  131. return m_fieldLens[i];
  132. }
  133. private:
  134. // . s is a cgi string
  135. // . either the stuff after the '?' in a url
  136. // . or the content in a POST operation
  137. // . returns false and sets errno on error
  138. bool addCgi ( char *s , int32_t slen );
  139. // . parse cgi field terms into m_fields,m_fieldLens,m_fieldValues
  140. // . "s" should point to cgi string right after the '?' if it exists
  141. // . s should have had all it's &'s replaced with /0's
  142. // . slen should include the last \0
  143. void parseFields ( char *s , int32_t slen ) ;
  144. void parseFieldsMultipart ( char *s , int32_t slen ) ;
  145. // 0 for GET, 1 for HEAD
  146. char m_requestType;
  147. // we decode the filename into this buffer (no cgi)
  148. char m_filename[MAX_HTTP_FILENAME_LEN];
  149. int32_t m_filenameLen; // excludes ?cgistuff
  150. // if request is like "GET /poo?foo=bar"
  151. // then origUrlRequest is "/poo?foo=bar"
  152. // references into TcpSocket::m_readBuf
  153. char *m_origUrlRequest;
  154. int32_t m_origUrlRequestLen;
  155. // virtual host in the Host: field of the mime
  156. char m_host[256];
  157. int32_t m_hostLen;
  158. // are we coming from a local machine?
  159. bool m_isLocal;
  160. // . decoded cgi data stored here
  161. // . this just points into TcpSocket::m_readBuf
  162. // . now it points into m_reqBuf.m_buf[]
  163. char *m_cgiBuf ;
  164. int32_t m_cgiBufLen ;
  165. int32_t m_cgiBufMaxLen ;
  166. // partial GET file read info
  167. int32_t m_fileOffset;
  168. int32_t m_fileSize;
  169. // . cgi field term info stored in here
  170. // . set by parseFields()
  171. char *m_fields [ MAX_CGI_PARMS ];
  172. int32_t m_fieldLens [ MAX_CGI_PARMS ];
  173. char *m_fieldValues [ MAX_CGI_PARMS ];
  174. int32_t m_numFields;
  175. int32_t m_userIP;
  176. bool m_isSSL;
  177. // . ptr to the thing we're getting in the request
  178. // . used by PageAddUrl4.cpp
  179. char *m_path;
  180. int32_t m_plen;
  181. char m_redir[128];
  182. int32_t m_redirLen;
  183. // referer, NULL terminated, from Referer: field in MIME
  184. char m_ref [ 256 ];
  185. int32_t m_refLen;
  186. // NULL terminated User-Agent: field in MIME
  187. char m_userAgent[128];
  188. // this points into m_cgiBuf
  189. char *m_ucontent;
  190. int32_t m_ucontentLen;
  191. char *m_cookiePtr;
  192. int32_t m_cookieLen;
  193. // buffer for adding extra parms
  194. char *m_cgiBuf2;
  195. int32_t m_cgiBuf2Size;
  196. };
  197. const int HTTP_REQUEST_DEFAULT_REQUEST_VERSION = 2;
  198. int getVersionFromRequest ( HttpRequest *r );
  199. #endif // GB_HTTPREQUEST_H