PageRenderTime 172ms CodeModel.GetById 31ms RepoModel.GetById 1ms app.codeStats 0ms

/HttpServer.h

https://github.com/gigablast/open-source-search-engine
C Header | 253 lines | 115 code | 45 blank | 93 comment | 1 complexity | 145a362781c9f7d023994831703bfccf MD5 | raw file
Possible License(s): Apache-2.0
  1. // Copyright Matt Wells Nov 2000
  2. // . derived from TcpServer
  3. // . fill in our own getMsgSize () -- looks for Content-Length:xxx
  4. // . fill in our own getMsgPiece() -- looks on disk
  5. // . fill in our own putMsgPiece() -- ??? for spidering big files!
  6. // . all the shit is just a generic non-blocking i/o system
  7. // . move data from one file/mem to another file/mem that might be remote
  8. //
  9. //TODO: handle SIG_PIPEs!! use sigaction() ...
  10. //TODO: first packet should have some file in it, not just MIME hdr (avoid TCP delayed ACKS)
  11. // TODO: what's TCP_CORK??? it delays sending a packet until it's full
  12. // which improves performance quite a bit. unsetting TCP_CORK flushes it.
  13. // TODO: investigate sendfile() (copies data between file descriptors)
  14. #ifndef _HTTPSERVER_H_
  15. #define _HTTPSERVER_H_
  16. //#define BGCOLOR "89e3A9" // green
  17. #define BGCOLOR "ffffff" // white
  18. //#define BGCOLOR "d0cfc0" // gray
  19. //#define BGCOLOR "d0d0d9" // blue gray
  20. //#define BGCOLOR "d0cfd0" // gray
  21. //#define BGCOLOR "d6ced6" // bluish gray
  22. #define MAX_DOWNLOADS (MAX_TCP_SOCKS-50)
  23. #include "TcpServer.h"
  24. #include "Url.h"
  25. #include "HttpRequest.h" // for parsing/forming HTTP requests
  26. #include "HttpMime.h"
  27. #define DEFAULT_HTTP_PROTO "HTTP/1.0"
  28. // prevent HTTP STATUS 206
  29. // not acceptable response by using 1.1
  30. // instead of 1.0 for www.mindanews.com.
  31. // keep-alive is controlled by the client/spider so should be ok
  32. // to not support it.
  33. // MDW: crap, we don't support chunked transfer encoding so until we do
  34. // we have to use 1.0
  35. // Transfer-Encoding: chunked\r\n
  36. //#define DEFAULT_SPIDER_HTTP_PROTO "HTTP/1.1"
  37. #define DEFAULT_SPIDER_HTTP_PROTO "HTTP/1.0"
  38. //this is for low priority requests which come in while we are
  39. //in a quickpoll
  40. #define MAX_REQUEST_QUEUE 128
  41. struct QueuedRequest {
  42. HttpRequest m_r;
  43. TcpSocket *m_s;
  44. int32_t m_page;
  45. };
  46. typedef void (*tcp_callback_t)(void *, TcpSocket *);
  47. int32_t getMsgSize ( char *buf , int32_t bufSize , TcpSocket *s );
  48. bool sendPageAddEvent ( TcpSocket *s , HttpRequest *r );
  49. class HttpServer {
  50. public:
  51. // reset the tcp server
  52. void reset();
  53. // returns false if initialization was unsuccessful
  54. bool init ( int16_t port,
  55. int16_t sslPort ,
  56. void handlerWrapper ( TcpSocket *s) = NULL);
  57. // . returns false if blocked, true otherwise
  58. // . sets errno on error
  59. // . supports partial gets with "offset" and "size"
  60. // . IMPORTANT: we free read/send bufs of TcpSocket after callback
  61. // . IMPORTANT: if you don't like this set s->m_read/sendBuf to NULL
  62. // in your callback function
  63. // . NOTE: this should always block unless errno is set
  64. // . the TcpSocket's callbackData is a file ptr
  65. // . replies MUST fit in memory (we have NOT implemented putMsgPiece())
  66. // . uses the HTTP partial GET command if size is > 0
  67. // . uses regular GET if size is -1
  68. // . otherwise uses the HTTP HEAD command
  69. // . the document will be in the s->m_readBuf/s->m_bytesRead of "s"
  70. // . use Mime class to help parse the readBuf
  71. // . timeout is in milliseconds since last read OR write
  72. // . this now ensures that the read content is NULL terminated!
  73. bool getDoc ( char *url , // Url *url ,
  74. int32_t ip ,
  75. int32_t offset ,
  76. int32_t size ,
  77. time_t ifModifiedSince ,
  78. void *state ,
  79. void (* callback) ( void *state , TcpSocket *s ) ,
  80. int32_t timeout , // 60*1000
  81. int32_t proxyIp ,
  82. int16_t proxyPort,
  83. int32_t maxTextDocLen ,
  84. int32_t maxOtherDocLen ,
  85. char *userAgent = NULL ,
  86. //bool respectDownloadLimit = false ,
  87. // . say HTTP/1.1 instead of 1.0 so we can communicate
  88. // with room alert...
  89. // . we do not support 1.1 that is why you should always
  90. // use 1.0
  91. char *proto = DEFAULT_HTTP_PROTO , // "HTTP/1.0" ,
  92. bool doPost = false ,
  93. char *cookie = NULL ,
  94. char *additionalHeader = NULL , // does not include \r\n
  95. // specify your own mime and post data here...
  96. char *fullRequest = NULL ,
  97. char *postContent = NULL ,
  98. char *proxyUsernamePwdAuth = NULL );
  99. bool getDoc ( int32_t ip,
  100. int32_t port,
  101. char *request,
  102. int32_t requestLen,
  103. void *state ,
  104. void (* callback)( void *state , TcpSocket *s ) ,
  105. int32_t timeout ,
  106. int32_t maxTextDocLen ,
  107. int32_t maxOtherDocLen );
  108. //bool respectDownloadLimit = false );
  109. bool gotDoc ( int32_t n , TcpSocket *s );
  110. // just make a request with size set to 0 and it'll do a HEAD request
  111. /*
  112. bool getMime ( char *url ,
  113. int32_t timeout ,
  114. int32_t proxyIp ,
  115. int16_t proxyPort ,
  116. void *state ,
  117. void (* callback) ( void *state , TcpSocket *s )) {
  118. return getDoc (url,0,0,0,state,callback,
  119. timeout,proxyIp,proxyPort,-1,-1); };
  120. */
  121. // . this is public so requestHandlerWrapper() can call it
  122. // . if it returns false "s" will be destroyed w/o a reply
  123. void requestHandler ( TcpSocket *s );
  124. // send an error reply, like "HTTP/1.1 404 Not Found"
  125. bool sendErrorReply ( TcpSocket *s , int32_t error , char *errmsg ,
  126. int32_t *bytesSent = NULL );
  127. bool sendErrorReply ( class GigablastRequest *gr );
  128. // xml and json uses this
  129. bool sendSuccessReply ( class GigablastRequest *gr,char *addMsg=NULL);
  130. bool sendSuccessReply (TcpSocket *s , char format , char *addMsg=NULL);
  131. // send a "prettier" error reply, formatted in XML if necessary
  132. bool sendQueryErrorReply ( TcpSocket *s , int32_t error , char *errmsg,
  133. // FORMAT_HTML=0,FORMAT_XML,FORMAT_JSON
  134. char format, int errnum,
  135. char *content=NULL);
  136. // these are for stopping annoying seo bots
  137. void getKey ( int32_t *key, char *kname,
  138. char *q , int32_t qlen , int32_t now , int32_t s , int32_t n ) ;
  139. void getKeys ( int32_t *key1, int32_t *key2, char *kname1, char *kname2,
  140. char *q , int32_t qlen , int32_t now , int32_t s , int32_t n ) ;
  141. bool hasPermission ( int32_t ip , HttpRequest *r ,
  142. char *q , int32_t qlen , int32_t s , int32_t n ) ;
  143. // . used by the HttpPageX.h classes after making their dynamic content
  144. // . returns false if blocked, true otherwise
  145. // . sets errno on error
  146. // . a cacheTime of -2 means browser should not cache when user
  147. // is clicking forward or hitting back button OR anytime -- no cache!
  148. // . a cacheTime of -1 means browser should not cache when user
  149. // is clicking forward, but caching when clicking back button is ok
  150. // . a cacheTime of 0 tells browser to use local caching rules
  151. bool sendDynamicPage ( TcpSocket *s , char *page , int32_t pageLen ,
  152. int32_t cacheTime = -1 , bool POSTReply = false ,
  153. char *contentType = NULL,
  154. int32_t httpStatus = -1,
  155. char *cookie = NULL,
  156. char *charset = NULL ,
  157. HttpRequest *hr = NULL );
  158. // for PageSockets
  159. TcpServer *getTcp() { return &m_tcp; };
  160. TcpServer *getSSLTcp() { return &m_ssltcp; };
  161. // we contain our own tcp server
  162. TcpServer m_tcp;
  163. TcpServer m_ssltcp;
  164. // cancel the transaction that had this state
  165. void cancel ( void *state ) {
  166. //void (*callback)(void *state, TcpSocket *s) ) {
  167. m_tcp.cancel ( state );//, callback );
  168. };
  169. int32_t m_maxOpenSockets;
  170. //for content-encoding: gzip, we unzip the reply and edit the
  171. //header to reflect the new size and encoding
  172. TcpSocket *unzipReply(TcpSocket* s);
  173. float getCompressionRatio() {
  174. if ( m_bytesDownloaded )
  175. return (float)m_uncompressedBytes/m_bytesDownloaded;
  176. else
  177. return 0.0;
  178. };
  179. //this is for low priority requests which come in while we are
  180. //in a quickpoll
  181. bool addToQueue(TcpSocket *s, HttpRequest *r, int32_t page);
  182. bool callQueuedPages();
  183. bool processSquidProxyRequest ( TcpSocket *sock, HttpRequest *hr);
  184. // private:
  185. // like above but you supply the ip
  186. bool sendRequest ( int32_t ip ,
  187. int16_t port ,
  188. char *request ,
  189. void *state ,
  190. void (* callback) ( void *state , TcpSocket *s ));
  191. // go ahead and start sending the file ("path") over the socket
  192. bool sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin);
  193. bool sendReply2 ( char *mime,
  194. int32_t mimeLen ,
  195. char *content ,
  196. int32_t contentLen ,
  197. TcpSocket *s ,
  198. bool alreadyCompressed = false ,
  199. HttpRequest *hr = NULL) ;
  200. void *states[MAX_DOWNLOADS];
  201. tcp_callback_t callbacks[MAX_DOWNLOADS];
  202. int64_t m_bytesDownloaded;
  203. int64_t m_uncompressedBytes;
  204. //QueuedRequest m_requestQueue[MAX_REQUEST_QUEUE];
  205. //int32_t m_lastSlotUsed;
  206. };
  207. extern class HttpServer g_httpServer;
  208. #endif