PageRenderTime 38ms CodeModel.GetById 26ms app.highlight 10ms RepoModel.GetById 0ms app.codeStats 0ms

/HttpRequest.h

https://github.com/gigablast/open-source-search-engine
C Header | 305 lines | 136 code | 67 blank | 102 comment | 7 complexity | 483b4ca78529bea3e8bd63323383b7b3 MD5 | raw file
Possible License(s): Apache-2.0
  1// Matt Wells, copyright Sep 2001
  2
  3// . class to parse and form HTTP requests
  4
  5#ifndef _HTTPREQUEST_H_
  6#define _HTTPREQUEST_H_
  7
  8// . allow for up to 256 cgi fields
  9// . this was stopping us from having more than about 253 banned ips, so i
 10//   raised it to 600
 11//#define MAX_CGI_PARMS 600
 12// . new prioirty controls has 128 rows!!
 13#define MAX_CGI_PARMS 1400
 14
 15// for getting a file from http server
 16#define MAX_HTTP_FILENAME_LEN 1024
 17
 18// i raised this from 1.3k to 5.3k so we can log the full request better
 19//#define MAX_REQ_LEN (1024*5+300)
 20//#define MAX_REQ_LEN (8024*5+300)
 21
 22// keep it small now that we use m_reqBuf
 23//#define MAX_REQ_LEN (1024)
 24
 25#include "SafeBuf.h"
 26#include "Mem.h"       // mdup
 27#include "Url.h"       // Url class
 28#include "TcpSocket.h"
 29
 30// values for HttpRequest::m_replyFormat
 31#define FORMAT_HTML 1
 32#define FORMAT_XML  2
 33#define FORMAT_JSON 3
 34#define FORMAT_CSV  4
 35#define FORMAT_TXT  5
 36#define FORMAT_PROCOG 6
 37#define FORMAT_WIDGET_IFRAME 7
 38#define FORMAT_WIDGET_AJAX 8
 39// used by ajax widget to create search results to APPEND to the end of widget
 40#define FORMAT_WIDGET_APPEND 9
 41
 42class HttpRequest {
 43
 44 public:
 45
 46	// . form an HTTP request 
 47	// . use size 0 for HEAD requests
 48	// . use size -1 for GET whole doc requests
 49	// . fill in your own offset/size for partial GET requests
 50	// . returns false and sets errno on error
 51	bool set ( char *url , int32_t offset = 0 , int32_t size = -1 ,
 52		   time_t ifModifiedSince = 0 , char *userAgent = NULL ,
 53		   char *proto = "HTTP/1.0" ,
 54		   bool doPost = false ,
 55		   char *cookie = NULL ,
 56		   char *additionalHeader = NULL , // does not incl \r\n
 57		   int32_t postContentLen = -1 , // for content-length of POST
 58		   int32_t proxyIp = 0 ,
 59		   char *proxyUsernamePwdAuth = NULL );
 60
 61	// use this
 62	SafeBuf m_reqBuf;
 63	bool    m_reqBufValid;
 64
 65	// get the request length
 66	int32_t getRequestLen() { return m_reqBuf.length(); };//m_bufLen; };
 67
 68	// . get the outgoing request we made by calling set() above
 69	// . OR get the first line of an incoming request
 70	char *getRequest  () { 
 71		if ( m_reqBufValid ) return m_reqBuf.getBufStart();
 72		else return NULL;
 73		//return m_buf;
 74	};
 75
 76	// FORMAT_HTML FORMAT_JSON FORMAT_XML
 77	char getFormat() { return getReplyFormat(); };
 78	char getReplyFormat();
 79	bool m_replyFormatValid;
 80	char m_replyFormat;
 81
 82	// get the referer field of the MIME header
 83	char *getReferer () { return m_ref; };
 84
 85	// this is NULL terminated too
 86	char *getUserAgent () { return m_userAgent; };
 87
 88	// just does a simply gbmemcpy() operation, since it should be pointing
 89	// into the TcpSocket's buffer which is safe until after reply is sent
 90	// . returns false and sets g_errno on error, true otherwise
 91	bool copy ( class HttpRequest *r , bool steal = false ) ;
 92
 93	// like copy() but doesn't do a copy, steals the ptrs and sets
 94	// hr->m_usingStack to true so it won't free its buffer
 95	bool stealBuf ( class HttpRequest *hr ) {return copy ( hr , true ); }
 96
 97	// . the url being requested
 98	// . removes &code= facebook cruft
 99	bool getCurrentUrl ( SafeBuf &cu );
100	bool getCurrentUrlPath ( SafeBuf &cup );
101
102	// . parse an incoming request
103	// . returns false and set errno on error
104	// . may alloc mem for m_cgiBuf to hold cgi vars from GET or POST op
105	bool set ( char *req , int32_t reqSize , TcpSocket *s );
106
107	// for gigablast's own rendering of squid
108	bool m_isSquidProxyRequest;
109	char *m_squidProxiedUrl;
110	int32_t m_squidProxiedUrlLen;
111
112	// is it this type of request?
113	bool isGETRequest  () { return (m_requestType == 0); };
114	bool isHEADRequest () { return (m_requestType == 1); };
115	bool isPOSTRequest () { return (m_requestType == 2); };
116
117	char *getFilename    () { return m_filename; };
118	int32_t  getFilenameLen () { return m_filenameLen; };
119	int32_t  getFileOffset  () { return m_fileOffset; };
120	int32_t  getFileSize    () { return m_fileSize; };
121
122	char *getHost        () { return m_host;    };
123	int32_t  getHostLen     () { return m_hostLen; };
124	//bool  isLocal        () { return m_isLocal; };
125	//bool  isAdmin        () { return m_isMasterAdmin; };
126	bool  isLocal        () { return m_isLocal; };
127
128	// is this the admin of a collection?
129	//bool isCollAdmin () { return m_isCollAdmin; }
130
131	// . the &ucontent= cgi var does not get its value decoded
132	//   because it's already decoded
133	// . this is so Mark doesn't have to url encode his injected content
134	char *getUnencodedContent    ( ) { return m_ucontent; };
135	int32_t  getUnencodedContentLen ( ) { return m_ucontentLen; };
136	
137	// . for parsing the terms in a cgi url
138	// . the returned string is NOT NULL terminated
139	char      *getString   ( char *field, int32_t *len = NULL,
140				 char *defaultString = NULL , int32_t *next=NULL);
141	bool       getBool     ( char *field, bool defaultBool );
142	int32_t       getLong     ( char *field, int32_t defaultLong           );
143	int64_t  getLongLong ( char *field, int64_t defaultLongLong    );
144	float      getFloat    ( char *field, double defaultFloat );
145	double     getDouble   ( char *field, double defaultDouble );
146
147	float      getFloatFromCookie    ( char *field, float def );
148	int32_t       getLongFromCookie    ( char *field, int32_t def );
149	int64_t  getLongLongFromCookie( char *field, int64_t def );
150	bool       getBoolFromCookie    ( char *field, bool def );
151	char      *getStringFromCookie  ( char *field, int32_t *len = NULL,
152					  char *defaultString = NULL , 
153					  int32_t *next=NULL);
154	
155
156	bool hasField ( char *field );
157
158	bool isGuestAdmin ( ) ;
159
160	// are we a redir? if so return non-NULL
161	char      *getRedir    ( ) { return m_redir;    };
162	int32_t       getRedirLen ( ) { return m_redirLen; };
163
164	HttpRequest();
165	HttpRequest( const HttpRequest &a );
166	~HttpRequest();
167	void reset();
168
169	char *getPath    ( ) { return m_path; };
170	int32_t  getPathLen ( ) { return m_plen; };
171
172	bool isMSIE ( ) { return m_isMSIE; };
173
174	// private:
175
176	// . get value of cgi "field" term in the requested filename
177	// . you know GET /myfile.html?q=123&name=nathaniel
178	char *getValue ( char *field , int32_t *len=NULL, int32_t *next=NULL) ;
179
180	// get value of the ith field
181	char *getValue ( int32_t i, int32_t *len = NULL);
182
183	// get the ith cgi parameter name, return NULL if none
184	int32_t  getNumFields   ( ) { return m_numFields; };
185	char *getField    ( int32_t i ) {
186		if ( i >= m_numFields ) return NULL; return m_fields[i]; };
187	int32_t  getFieldLen ( int32_t i ) {
188		if ( i >= m_numFields ) return 0   ; return m_fieldLens[i]; };
189
190	// . s is a cgi string
191	// . either the stuff after the '?' in a url
192	// . or the content in a POST operation
193	// . returns false and sets errno on error
194	bool addCgi ( char *s , int32_t slen );
195
196	// . parse cgi field terms into m_fields,m_fieldLens,m_fieldValues
197	// . "s" should point to cgi string right after the '?' if it exists
198	// . s should have had all it's &'s replaced with /0's
199	// . slen should include the last \0
200	void parseFields ( char *s , int32_t slen ) ;
201	void parseFieldsMultipart ( char *s , int32_t slen ) ;
202	void addExtraParms(char *s, int32_t slen);
203	// . decodes "s/slen" and stores into "dest"
204	// . returns the number of bytes stored into "dest"
205	// . converts %3A, %2F, etc to their appropriate chars
206	int32_t decode ( char *dest , char *s , int32_t slen );
207
208	// 0 for GET, 1 for HEAD
209	char  m_requestType;
210
211	// we decode the filename into this buffer (no cgi)
212	char  m_filename[MAX_HTTP_FILENAME_LEN];
213	int32_t  m_filenameLen;  // excludes ?cgistuff
214
215	// the TcpSocket::m_readBuf basically
216	//char *m_origReq;
217	//int32_t  m_origReqLen;
218
219	// if request is like "GET /poo?foo=bar"
220	// then origUrlRequest is "/poo?foo=bar"
221	// references into TcpSocket::m_readBuf
222	char *m_origUrlRequest;
223	int32_t  m_origUrlRequestLen;
224
225
226	// virtual host in the Host: field of the mime
227	char  m_host[256];
228	int32_t  m_hostLen;
229
230	// are we coming from a local machine? 
231	bool  m_isLocal;
232
233	// is it the microsoft internet explorer browser?
234	bool m_isMSIE;
235
236	// does the connecting machine have admin privledges?
237	//bool  m_isMasterAdmin;
238
239	// . decoded cgi data stored here 
240	// . this just points into TcpSocket::m_readBuf
241	// . now it points into m_reqBuf.m_buf[]
242	char *m_cgiBuf       ;
243	int32_t  m_cgiBufLen    ;
244	int32_t  m_cgiBufMaxLen ;
245
246	// partial GET file read info
247	int32_t  m_fileOffset;
248	int32_t  m_fileSize;
249
250	// we use this buf to make requests from a url and to hold incoming
251	// requests
252	//char  m_buf[MAX_REQ_LEN];
253	//int32_t  m_bufLen;
254
255	// . cgi field term info stored in here
256	// . set by parseFields()
257	char *m_fields      [ MAX_CGI_PARMS ];
258	int32_t  m_fieldLens   [ MAX_CGI_PARMS ];
259	char *m_fieldValues [ MAX_CGI_PARMS ];
260	int32_t  m_numFields;
261	//int32_t  getNumCgiParms ( ) { return m_numFields; };
262	//char *getCgiParm     ( int32_t i , int32_t *len ) { 
263	//	*len = m_fieldLens[i]; return m_fields[i]; };
264	//char *getCgiValue    ( int32_t i ) { return m_fieldValues[i]; };
265
266	int32_t m_userIP;
267	bool m_isSSL;
268
269	// . ptr to the thing we're getting in the request
270	// . used by PageAddUrl4.cpp
271	char *m_path;
272	int32_t  m_plen;
273
274	char  m_redir[128];
275	int32_t  m_redirLen;
276
277	// referer, NULL terminated, from Referer: field in MIME
278	char  m_ref [ 256 ];
279	int32_t  m_refLen;
280
281	// NULL terminated User-Agent: field in MIME
282	char  m_userAgent[128];
283
284	// this points into m_cgiBuf
285	char *m_ucontent;
286	int32_t  m_ucontentLen;
287
288	// buffer for the cookie
289	//char  m_cookieBuf[1024];
290	//int32_t  m_cookieBufLen;
291	char *m_cookiePtr;
292	int32_t  m_cookieLen;
293
294	char *m_metaCookie;
295
296	// buffer for adding extra parms
297	char *m_cgiBuf2;
298	int32_t  m_cgiBuf2Size;
299};
300
301const int HTTP_REQUEST_DEFAULT_REQUEST_VERSION = 2;
302
303int getVersionFromRequest ( HttpRequest *r );
304
305#endif