PageRenderTime 44ms CodeModel.GetById 33ms app.highlight 9ms RepoModel.GetById 0ms app.codeStats 0ms

/HttpServer.h

https://github.com/gigablast/open-source-search-engine
C Header | 253 lines | 115 code | 45 blank | 93 comment | 1 complexity | 145a362781c9f7d023994831703bfccf MD5 | raw file
Possible License(s): Apache-2.0
  1// Copyright Matt Wells Nov 2000
  2
  3// . derived from TcpServer
  4// . fill in our own getMsgSize () -- looks for Content-Length:xxx
  5// . fill in our own getMsgPiece() -- looks on disk
  6// . fill in our own putMsgPiece() -- ??? for spidering big files!
  7
  8// . all the shit is just a generic non-blocking i/o system
  9// . move data from one file/mem to another file/mem that might be remote
 10// 
 11
 12//TODO: handle SIG_PIPEs!! use sigaction() ...
 13
 14//TODO: first packet should have some file in it, not just MIME hdr (avoid TCP delayed ACKS)
 15
 16// TODO: what's TCP_CORK??? it delays sending a packet until it's full
 17//       which improves performance quite a bit. unsetting TCP_CORK flushes it.
 18// TODO: investigate sendfile() (copies data between file descriptors)
 19
 20#ifndef _HTTPSERVER_H_
 21#define _HTTPSERVER_H_
 22
 23//#define BGCOLOR "89e3A9" // green
 24#define BGCOLOR "ffffff" // white
 25//#define BGCOLOR "d0cfc0" // gray
 26//#define BGCOLOR "d0d0d9"   // blue gray
 27//#define BGCOLOR "d0cfd0" // gray
 28//#define BGCOLOR "d6ced6" // bluish gray
 29#define MAX_DOWNLOADS (MAX_TCP_SOCKS-50)
 30
 31#include "TcpServer.h"
 32#include "Url.h"
 33#include "HttpRequest.h"          // for parsing/forming HTTP requests
 34#include "HttpMime.h"
 35
 36#define DEFAULT_HTTP_PROTO "HTTP/1.0"
 37// prevent HTTP STATUS 206
 38// not acceptable response by using 1.1
 39// instead of 1.0 for www.mindanews.com.
 40// keep-alive is controlled by the client/spider so should be ok
 41// to not support it.
 42// MDW: crap, we don't support chunked transfer encoding so until we do
 43// we have to use 1.0
 44// Transfer-Encoding: chunked\r\n
 45//#define DEFAULT_SPIDER_HTTP_PROTO "HTTP/1.1"
 46#define DEFAULT_SPIDER_HTTP_PROTO "HTTP/1.0"
 47
 48//this is for low priority requests which come in while we are
 49//in a quickpoll
 50#define MAX_REQUEST_QUEUE 128
 51struct QueuedRequest {
 52	HttpRequest  m_r;
 53	TcpSocket   *m_s;
 54	int32_t         m_page;
 55};
 56
 57typedef void (*tcp_callback_t)(void *, TcpSocket *);
 58int32_t getMsgSize ( char *buf , int32_t bufSize , TcpSocket *s );
 59
 60bool sendPageAddEvent ( TcpSocket *s , HttpRequest *r );
 61
 62class HttpServer {
 63
 64 public:
 65
 66	// reset the tcp server
 67	void reset();
 68
 69	// returns false if initialization was unsuccessful
 70	bool init ( int16_t port,
 71		    int16_t sslPort ,
 72		    void handlerWrapper ( TcpSocket *s) = NULL);
 73
 74	// . returns false if blocked, true otherwise
 75	// . sets errno on error
 76	// . supports partial gets with "offset" and "size"
 77	// . IMPORTANT: we free read/send bufs of TcpSocket after callback
 78	// . IMPORTANT: if you don't like this set s->m_read/sendBuf to NULL
 79	//              in your callback function
 80	// . NOTE: this should always block unless errno is set
 81	// . the TcpSocket's callbackData is a file ptr
 82	// . replies MUST fit in memory (we have NOT implemented putMsgPiece())
 83	// . uses the HTTP partial GET command if size is > 0
 84	// . uses regular GET if size is -1
 85	// . otherwise uses the HTTP HEAD command
 86	// . the document will be in the s->m_readBuf/s->m_bytesRead of "s"
 87	// . use Mime class to help parse the readBuf
 88	// . timeout is in milliseconds since last read OR write
 89	// . this now ensures that the read content is NULL terminated!
 90	bool getDoc ( char   *url      , // Url    *url      ,
 91		      int32_t    ip       ,
 92		      int32_t    offset   ,
 93		      int32_t    size     ,
 94		      time_t  ifModifiedSince ,
 95		      void   *state    ,
 96		      void   (* callback) ( void *state , TcpSocket *s ) ,
 97		      int32_t    timeout  , // 60*1000 
 98		      int32_t    proxyIp  ,
 99		      int16_t   proxyPort,
100		      int32_t    maxTextDocLen  ,
101		      int32_t    maxOtherDocLen ,
102		      char   *userAgent = NULL ,
103		      //bool    respectDownloadLimit = false ,
104		      // . say HTTP/1.1 instead of 1.0 so we can communicate
105		      //   with room alert...
106		      // . we do not support 1.1 that is why you should always
107		      //   use 1.0
108		      char   *proto = DEFAULT_HTTP_PROTO , // "HTTP/1.0" ,
109		      bool    doPost = false ,
110		      char   *cookie = NULL ,
111		      char *additionalHeader = NULL , // does not include \r\n
112		      // specify your own mime and post data here...
113		      char *fullRequest = NULL ,
114		      char *postContent = NULL ,
115		      char *proxyUsernamePwdAuth = NULL );
116
117	bool getDoc ( int32_t ip,
118		      int32_t port,
119		      char *request,
120		      int32_t requestLen,
121		      void   *state    ,
122		      void   (* callback)( void *state , TcpSocket *s ) ,
123		      int32_t    timeout  ,
124		      int32_t    maxTextDocLen  ,
125		      int32_t    maxOtherDocLen );
126		      //bool    respectDownloadLimit = false );
127
128	bool gotDoc ( int32_t n , TcpSocket *s );
129
130	// just make a request with size set to 0 and it'll do a HEAD request
131	/*
132	bool getMime ( char  *url       ,
133		       int32_t   timeout   ,
134		       int32_t   proxyIp   ,
135		       int16_t  proxyPort ,
136		       void  *state     ,
137		       void  (* callback) ( void *state , TcpSocket *s )) {
138		return getDoc (url,0,0,0,state,callback,
139			       timeout,proxyIp,proxyPort,-1,-1); };
140	*/
141
142	// . this is public so requestHandlerWrapper() can call it
143	// . if it returns false "s" will be destroyed w/o a reply
144	void requestHandler ( TcpSocket *s );
145
146	// send an error reply, like "HTTP/1.1 404 Not Found"
147	bool sendErrorReply ( TcpSocket *s , int32_t error , char *errmsg ,
148			      int32_t *bytesSent = NULL ); 
149	bool sendErrorReply ( class GigablastRequest *gr );
150	// xml and json uses this
151	bool sendSuccessReply ( class GigablastRequest *gr,char *addMsg=NULL);
152	bool sendSuccessReply (TcpSocket *s , char format , char *addMsg=NULL);
153	// send a "prettier" error reply, formatted in XML if necessary
154	bool sendQueryErrorReply ( TcpSocket *s , int32_t error , char *errmsg,
155				   // FORMAT_HTML=0,FORMAT_XML,FORMAT_JSON
156				   char format, int errnum, 
157				   char *content=NULL); 
158	
159
160	// these are for stopping annoying seo bots
161	void getKey ( int32_t *key, char *kname, 
162		      char *q , int32_t qlen , int32_t now , int32_t s , int32_t n ) ;
163	void getKeys ( int32_t *key1, int32_t *key2, char *kname1, char *kname2,
164		       char *q , int32_t qlen , int32_t now , int32_t s , int32_t n ) ;
165	bool hasPermission ( int32_t ip , HttpRequest *r , 
166			     char *q , int32_t qlen , int32_t s , int32_t n ) ;
167
168	// . used by the HttpPageX.h classes after making their dynamic content
169	// . returns false if blocked, true otherwise
170	// . sets errno on error
171	// . a cacheTime of -2 means browser should not cache when user
172	//   is clicking forward or hitting back button OR anytime -- no cache!
173	// . a cacheTime of -1 means browser should not cache when user
174	//   is clicking forward, but caching when clicking back button is ok
175	// . a cacheTime of  0 tells browser to use local caching rules
176	bool sendDynamicPage  ( TcpSocket *s , char *page , int32_t pageLen ,
177				int32_t cacheTime = -1 , bool POSTReply = false ,
178				char *contentType = NULL,
179				int32_t httpStatus = -1,
180				char *cookie = NULL,
181				char *charset = NULL ,
182				HttpRequest *hr = NULL );
183
184	// for PageSockets
185	TcpServer *getTcp()    { return &m_tcp; };
186	TcpServer *getSSLTcp() { return &m_ssltcp; };
187
188	// we contain our own tcp server
189	TcpServer m_tcp;
190	TcpServer m_ssltcp;
191
192	// cancel the transaction that had this state
193	void cancel ( void *state ) {
194		//void (*callback)(void *state, TcpSocket *s) ) {
195		m_tcp.cancel ( state );//, callback );
196	};
197
198	int32_t m_maxOpenSockets;
199
200	//for content-encoding: gzip, we unzip the reply and edit the
201	//header to reflect the new size and encoding 
202	TcpSocket *unzipReply(TcpSocket* s);
203	
204	float getCompressionRatio() {
205		if ( m_bytesDownloaded )
206			return (float)m_uncompressedBytes/m_bytesDownloaded;
207		else
208			return 0.0;
209	};
210
211	//this is for low priority requests which come in while we are
212	//in a quickpoll
213	bool addToQueue(TcpSocket *s, HttpRequest *r, int32_t page);
214	bool callQueuedPages();
215
216	bool processSquidProxyRequest ( TcpSocket *sock, HttpRequest *hr);
217
218	// private:
219
220	// like above but you supply the ip
221	bool sendRequest ( int32_t   ip       ,
222			   int16_t  port     ,
223			   char  *request  ,
224			   void  *state    ,
225			   void (* callback) ( void *state , TcpSocket *s ));
226
227	// go ahead and start sending the file ("path") over the socket
228	bool sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin);
229
230	bool sendReply2 ( char *mime, 
231			  int32_t  mimeLen ,
232			  char *content  ,
233			  int32_t  contentLen ,
234			  TcpSocket *s ,
235			  bool alreadyCompressed = false ,
236			  HttpRequest *hr = NULL) ;
237
238	void *states[MAX_DOWNLOADS];
239	tcp_callback_t callbacks[MAX_DOWNLOADS];
240
241	int64_t m_bytesDownloaded;
242	int64_t m_uncompressedBytes;
243
244	//QueuedRequest m_requestQueue[MAX_REQUEST_QUEUE];
245	//int32_t          m_lastSlotUsed;
246
247};
248
249extern class HttpServer g_httpServer;
250
251#endif
252
253