PageRenderTime 3462ms CodeModel.GetById 38ms RepoModel.GetById 0ms app.codeStats 0ms

/HttpRequest.h

https://github.com/gigablast/open-source-search-engine
C Header | 305 lines | 136 code | 67 blank | 102 comment | 7 complexity | 483b4ca78529bea3e8bd63323383b7b3 MD5 | raw file
Possible License(s): Apache-2.0
  1. // Matt Wells, copyright Sep 2001
  2. // . class to parse and form HTTP requests
  3. #ifndef _HTTPREQUEST_H_
  4. #define _HTTPREQUEST_H_
  5. // . allow for up to 256 cgi fields
  6. // . this was stopping us from having more than about 253 banned ips, so i
  7. // raised it to 600
  8. //#define MAX_CGI_PARMS 600
  9. // . new prioirty controls has 128 rows!!
  10. #define MAX_CGI_PARMS 1400
  11. // for getting a file from http server
  12. #define MAX_HTTP_FILENAME_LEN 1024
  13. // i raised this from 1.3k to 5.3k so we can log the full request better
  14. //#define MAX_REQ_LEN (1024*5+300)
  15. //#define MAX_REQ_LEN (8024*5+300)
  16. // keep it small now that we use m_reqBuf
  17. //#define MAX_REQ_LEN (1024)
  18. #include "SafeBuf.h"
  19. #include "Mem.h" // mdup
  20. #include "Url.h" // Url class
  21. #include "TcpSocket.h"
  22. // values for HttpRequest::m_replyFormat
  23. #define FORMAT_HTML 1
  24. #define FORMAT_XML 2
  25. #define FORMAT_JSON 3
  26. #define FORMAT_CSV 4
  27. #define FORMAT_TXT 5
  28. #define FORMAT_PROCOG 6
  29. #define FORMAT_WIDGET_IFRAME 7
  30. #define FORMAT_WIDGET_AJAX 8
  31. // used by ajax widget to create search results to APPEND to the end of widget
  32. #define FORMAT_WIDGET_APPEND 9
  33. class HttpRequest {
  34. public:
  35. // . form an HTTP request
  36. // . use size 0 for HEAD requests
  37. // . use size -1 for GET whole doc requests
  38. // . fill in your own offset/size for partial GET requests
  39. // . returns false and sets errno on error
  40. bool set ( char *url , int32_t offset = 0 , int32_t size = -1 ,
  41. time_t ifModifiedSince = 0 , char *userAgent = NULL ,
  42. char *proto = "HTTP/1.0" ,
  43. bool doPost = false ,
  44. char *cookie = NULL ,
  45. char *additionalHeader = NULL , // does not incl \r\n
  46. int32_t postContentLen = -1 , // for content-length of POST
  47. int32_t proxyIp = 0 ,
  48. char *proxyUsernamePwdAuth = NULL );
  49. // use this
  50. SafeBuf m_reqBuf;
  51. bool m_reqBufValid;
  52. // get the request length
  53. int32_t getRequestLen() { return m_reqBuf.length(); };//m_bufLen; };
  54. // . get the outgoing request we made by calling set() above
  55. // . OR get the first line of an incoming request
  56. char *getRequest () {
  57. if ( m_reqBufValid ) return m_reqBuf.getBufStart();
  58. else return NULL;
  59. //return m_buf;
  60. };
  61. // FORMAT_HTML FORMAT_JSON FORMAT_XML
  62. char getFormat() { return getReplyFormat(); };
  63. char getReplyFormat();
  64. bool m_replyFormatValid;
  65. char m_replyFormat;
  66. // get the referer field of the MIME header
  67. char *getReferer () { return m_ref; };
  68. // this is NULL terminated too
  69. char *getUserAgent () { return m_userAgent; };
  70. // just does a simply gbmemcpy() operation, since it should be pointing
  71. // into the TcpSocket's buffer which is safe until after reply is sent
  72. // . returns false and sets g_errno on error, true otherwise
  73. bool copy ( class HttpRequest *r , bool steal = false ) ;
  74. // like copy() but doesn't do a copy, steals the ptrs and sets
  75. // hr->m_usingStack to true so it won't free its buffer
  76. bool stealBuf ( class HttpRequest *hr ) {return copy ( hr , true ); }
  77. // . the url being requested
  78. // . removes &code= facebook cruft
  79. bool getCurrentUrl ( SafeBuf &cu );
  80. bool getCurrentUrlPath ( SafeBuf &cup );
  81. // . parse an incoming request
  82. // . returns false and set errno on error
  83. // . may alloc mem for m_cgiBuf to hold cgi vars from GET or POST op
  84. bool set ( char *req , int32_t reqSize , TcpSocket *s );
  85. // for gigablast's own rendering of squid
  86. bool m_isSquidProxyRequest;
  87. char *m_squidProxiedUrl;
  88. int32_t m_squidProxiedUrlLen;
  89. // is it this type of request?
  90. bool isGETRequest () { return (m_requestType == 0); };
  91. bool isHEADRequest () { return (m_requestType == 1); };
  92. bool isPOSTRequest () { return (m_requestType == 2); };
  93. char *getFilename () { return m_filename; };
  94. int32_t getFilenameLen () { return m_filenameLen; };
  95. int32_t getFileOffset () { return m_fileOffset; };
  96. int32_t getFileSize () { return m_fileSize; };
  97. char *getHost () { return m_host; };
  98. int32_t getHostLen () { return m_hostLen; };
  99. //bool isLocal () { return m_isLocal; };
  100. //bool isAdmin () { return m_isMasterAdmin; };
  101. bool isLocal () { return m_isLocal; };
  102. // is this the admin of a collection?
  103. //bool isCollAdmin () { return m_isCollAdmin; }
  104. // . the &ucontent= cgi var does not get its value decoded
  105. // because it's already decoded
  106. // . this is so Mark doesn't have to url encode his injected content
  107. char *getUnencodedContent ( ) { return m_ucontent; };
  108. int32_t getUnencodedContentLen ( ) { return m_ucontentLen; };
  109. // . for parsing the terms in a cgi url
  110. // . the returned string is NOT NULL terminated
  111. char *getString ( char *field, int32_t *len = NULL,
  112. char *defaultString = NULL , int32_t *next=NULL);
  113. bool getBool ( char *field, bool defaultBool );
  114. int32_t getLong ( char *field, int32_t defaultLong );
  115. int64_t getLongLong ( char *field, int64_t defaultLongLong );
  116. float getFloat ( char *field, double defaultFloat );
  117. double getDouble ( char *field, double defaultDouble );
  118. float getFloatFromCookie ( char *field, float def );
  119. int32_t getLongFromCookie ( char *field, int32_t def );
  120. int64_t getLongLongFromCookie( char *field, int64_t def );
  121. bool getBoolFromCookie ( char *field, bool def );
  122. char *getStringFromCookie ( char *field, int32_t *len = NULL,
  123. char *defaultString = NULL ,
  124. int32_t *next=NULL);
  125. bool hasField ( char *field );
  126. bool isGuestAdmin ( ) ;
  127. // are we a redir? if so return non-NULL
  128. char *getRedir ( ) { return m_redir; };
  129. int32_t getRedirLen ( ) { return m_redirLen; };
  130. HttpRequest();
  131. HttpRequest( const HttpRequest &a );
  132. ~HttpRequest();
  133. void reset();
  134. char *getPath ( ) { return m_path; };
  135. int32_t getPathLen ( ) { return m_plen; };
  136. bool isMSIE ( ) { return m_isMSIE; };
  137. // private:
  138. // . get value of cgi "field" term in the requested filename
  139. // . you know GET /myfile.html?q=123&name=nathaniel
  140. char *getValue ( char *field , int32_t *len=NULL, int32_t *next=NULL) ;
  141. // get value of the ith field
  142. char *getValue ( int32_t i, int32_t *len = NULL);
  143. // get the ith cgi parameter name, return NULL if none
  144. int32_t getNumFields ( ) { return m_numFields; };
  145. char *getField ( int32_t i ) {
  146. if ( i >= m_numFields ) return NULL; return m_fields[i]; };
  147. int32_t getFieldLen ( int32_t i ) {
  148. if ( i >= m_numFields ) return 0 ; return m_fieldLens[i]; };
  149. // . s is a cgi string
  150. // . either the stuff after the '?' in a url
  151. // . or the content in a POST operation
  152. // . returns false and sets errno on error
  153. bool addCgi ( char *s , int32_t slen );
  154. // . parse cgi field terms into m_fields,m_fieldLens,m_fieldValues
  155. // . "s" should point to cgi string right after the '?' if it exists
  156. // . s should have had all it's &'s replaced with /0's
  157. // . slen should include the last \0
  158. void parseFields ( char *s , int32_t slen ) ;
  159. void parseFieldsMultipart ( char *s , int32_t slen ) ;
  160. void addExtraParms(char *s, int32_t slen);
  161. // . decodes "s/slen" and stores into "dest"
  162. // . returns the number of bytes stored into "dest"
  163. // . converts %3A, %2F, etc to their appropriate chars
  164. int32_t decode ( char *dest , char *s , int32_t slen );
  165. // 0 for GET, 1 for HEAD
  166. char m_requestType;
  167. // we decode the filename into this buffer (no cgi)
  168. char m_filename[MAX_HTTP_FILENAME_LEN];
  169. int32_t m_filenameLen; // excludes ?cgistuff
  170. // the TcpSocket::m_readBuf basically
  171. //char *m_origReq;
  172. //int32_t m_origReqLen;
  173. // if request is like "GET /poo?foo=bar"
  174. // then origUrlRequest is "/poo?foo=bar"
  175. // references into TcpSocket::m_readBuf
  176. char *m_origUrlRequest;
  177. int32_t m_origUrlRequestLen;
  178. // virtual host in the Host: field of the mime
  179. char m_host[256];
  180. int32_t m_hostLen;
  181. // are we coming from a local machine?
  182. bool m_isLocal;
  183. // is it the microsoft internet explorer browser?
  184. bool m_isMSIE;
  185. // does the connecting machine have admin privledges?
  186. //bool m_isMasterAdmin;
  187. // . decoded cgi data stored here
  188. // . this just points into TcpSocket::m_readBuf
  189. // . now it points into m_reqBuf.m_buf[]
  190. char *m_cgiBuf ;
  191. int32_t m_cgiBufLen ;
  192. int32_t m_cgiBufMaxLen ;
  193. // partial GET file read info
  194. int32_t m_fileOffset;
  195. int32_t m_fileSize;
  196. // we use this buf to make requests from a url and to hold incoming
  197. // requests
  198. //char m_buf[MAX_REQ_LEN];
  199. //int32_t m_bufLen;
  200. // . cgi field term info stored in here
  201. // . set by parseFields()
  202. char *m_fields [ MAX_CGI_PARMS ];
  203. int32_t m_fieldLens [ MAX_CGI_PARMS ];
  204. char *m_fieldValues [ MAX_CGI_PARMS ];
  205. int32_t m_numFields;
  206. //int32_t getNumCgiParms ( ) { return m_numFields; };
  207. //char *getCgiParm ( int32_t i , int32_t *len ) {
  208. // *len = m_fieldLens[i]; return m_fields[i]; };
  209. //char *getCgiValue ( int32_t i ) { return m_fieldValues[i]; };
  210. int32_t m_userIP;
  211. bool m_isSSL;
  212. // . ptr to the thing we're getting in the request
  213. // . used by PageAddUrl4.cpp
  214. char *m_path;
  215. int32_t m_plen;
  216. char m_redir[128];
  217. int32_t m_redirLen;
  218. // referer, NULL terminated, from Referer: field in MIME
  219. char m_ref [ 256 ];
  220. int32_t m_refLen;
  221. // NULL terminated User-Agent: field in MIME
  222. char m_userAgent[128];
  223. // this points into m_cgiBuf
  224. char *m_ucontent;
  225. int32_t m_ucontentLen;
  226. // buffer for the cookie
  227. //char m_cookieBuf[1024];
  228. //int32_t m_cookieBufLen;
  229. char *m_cookiePtr;
  230. int32_t m_cookieLen;
  231. char *m_metaCookie;
  232. // buffer for adding extra parms
  233. char *m_cgiBuf2;
  234. int32_t m_cgiBuf2Size;
  235. };
  236. const int HTTP_REQUEST_DEFAULT_REQUEST_VERSION = 2;
  237. int getVersionFromRequest ( HttpRequest *r );
  238. #endif