PageRenderTime 54ms CodeModel.GetById 32ms app.highlight 18ms RepoModel.GetById 1ms app.codeStats 0ms

/Parms.h

https://github.com/gigablast/open-source-search-engine
C Header | 561 lines | 297 code | 89 blank | 175 comment | 0 complexity | 32cf1aaba6cd2476ff1417bec70a4d14 MD5 | raw file
Possible License(s): Apache-2.0
  1// Matt Wells, copyright Feb 2002
  2
  3// Ideally, CollectionRec.h and SearchInput.h should be automatically generated
  4// from Parms.cpp. But Parms need to be marked if they contribute to 
  5// SearchInput::makeKey() for caching the SERPS.
  6
  7#ifndef _PARMS_H_
  8#define _PARMS_H_
  9
 10#include "Rdb.h"
 11
 12//#include "CollectionRec.h"
 13
 14void handleRequest3e ( UdpSlot *slot , int32_t niceness ) ;
 15void handleRequest3f ( UdpSlot *slot , int32_t niceness ) ;
 16
 17// "url filters profile" values. used to set default crawl rules
 18// in Collectiondb.cpp's CollectionRec::setUrlFiltersToDefaults(). 
 19// for instance, UFP_NEWS spiders sites more frequently but less deep in
 20// order to get "news" pages and articles
 21//enum {
 22//	UFP_CUSTOM = 0 ,
 23//	UFP_NONE   = 0 ,
 24//	UFP_WEB    = 1 ,
 25//	UFP_NEWS   = 2 ,
 26//	UFP_LANG = 3,
 27//	UFP_SHALLOW = 4
 28//};
 29
 30// special priorities for the priority drop down 
 31// in the url filters table
 32//enum {
 33//	SPIDER_PRIORITY_FILTERED  = -3 ,
 34//	SPIDER_PRIORITY_BANNED    = -2 ,
 35//	SPIDER_PRIORITY_UNDEFINED = -1 };
 36
 37enum {
 38	OBJ_CONF    = 1 ,
 39	OBJ_COLL        ,
 40	OBJ_SI          , // SearchInput class
 41	OBJ_GBREQUEST   , // for GigablastRequest class of parms
 42	OBJ_IR          , // InjectionRequest class from PageInject.h
 43	OBJ_NONE
 44};
 45
 46enum {
 47	TYPE_BOOL       = 1 ,
 48	TYPE_BOOL2          ,
 49	TYPE_CHECKBOX       ,
 50	TYPE_CHAR           ,
 51	TYPE_CHAR2          , //needed to display char as a number (maxNumHops)
 52	TYPE_CMD            ,
 53	TYPE_FLOAT          ,
 54	TYPE_IP             ,
 55	TYPE_LONG           ,
 56	TYPE_LONG_LONG      , // 10
 57	TYPE_NONE           ,
 58	TYPE_PRIORITY       ,
 59	TYPE_PRIORITY2      ,
 60	TYPE_PRIORITY_BOXES ,
 61	TYPE_RETRIES        ,
 62	TYPE_STRING         ,
 63	TYPE_STRINGBOX      ,
 64	TYPE_STRINGNONEMPTY ,
 65	TYPE_TIME           ,
 66	TYPE_DATE2          , // 20
 67	TYPE_DATE           ,
 68	TYPE_RULESET        ,
 69	TYPE_FILTER         ,
 70	TYPE_COMMENT        ,
 71        TYPE_CONSTANT       ,
 72	TYPE_MONOD2         ,
 73	TYPE_MONOM2         ,
 74	TYPE_LONG_CONST     ,
 75	TYPE_SITERULE       , // 29
 76	TYPE_SAFEBUF        ,
 77	TYPE_UFP            ,
 78	TYPE_FILEUPLOADBUTTON,
 79	TYPE_DOUBLE,
 80	TYPE_CHARPTR
 81};
 82
 83//forward decls to make compiler happy:
 84class HttpRequest;
 85class TcpSocket;
 86
 87class Page {
 88 public:
 89	int32_t  m_page;     // from the PAGE_* enums above
 90	char *m_bgcolor;  // color of the cells in the table
 91	char *m_topcolor; // color of the table's first row
 92	char *m_title;    // browser title bar
 93};
 94
 95#include "Msg4.h"
 96
 97// generic gigablast request. for all apis offered.
 98class GigablastRequest {
 99 public:
100
101	//
102	// make a copy of the http request because the original is
103	// on the stack. AND the "char *" types below will reference into
104	// this because they are listed as TYPE_CHARPTR in Parms.cpp.
105	// that saves us memory as opposed to making them all SafeBufs.
106	//
107	HttpRequest m_hr;
108
109	// ptr to socket to send reply back on
110	TcpSocket *m_socket;
111
112	// TYPE_CHARPTR
113	char *m_coll;
114
115	// pretty universal char ptr
116	char *m_formatStr;
117
118	////////////
119	//
120	// /admin/inject parms
121	//
122	////////////
123	// these all reference into m_hr or into the Parm::m_def string!
124	char *m_url; // also for /get
125	//char *m_queryToScrape;
126	//char *m_contentDelim;
127	//char  m_containerContentType; // CT_UNKNOWN, CT_WARC, CT_ARC
128	//int32_t m_injectDocIp;
129	//char *m_contentTypeStr;
130	//char *m_contentFile;
131	//char *m_content;
132	//char *m_diffbotReply; // secret thing from dan
133	//char  m_injectLinks;
134	//char  m_spiderLinks;
135	//char  m_shortReply;
136	//char  m_newOnly;
137	//char  m_deleteUrl;
138	//char  m_recycle;
139	//char  m_dedup;
140	//char  m_hasMime;
141	//char  m_doConsistencyTesting;
142	//char  m_getSections;
143	//char  m_gotSections;
144	//int32_t  m_charset;
145	//int32_t  m_hopCount; // hopcount
146	//collnum_t m_collnum; // more reliable than m_coll
147	// older ones
148	//uint32_t m_firstIndexed; // firstimdexed
149	//uint32_t m_lastSpidered; // lastspidered;
150	//SafeBuf  m_contentBuf; // for holding a warc/arc file
151
152
153
154
155
156	///////////
157	//
158	// /admin/import parms
159	//
160	///////////
161	char *m_importDir; // TYPE_CHARPTR
162	int32_t  m_importInjects;
163
164
165	///////////
166	//
167	// /get parms (for getting cached web pages)
168	//
169	///////////
170	int64_t m_docId;
171	int32_t      m_strip;
172	char      m_includeHeader;
173	char      m_highlightQuery;
174
175	///////////
176	//
177	// /admin/addurl parms
178	//
179	///////////
180	char *m_urlsBuf;
181	char  m_stripBox;
182	char  m_harvestLinks;
183	SafeBuf m_listBuf;
184	Msg4 m_msg4;
185
186	/////////////
187	//
188	// /admin/reindex parms
189	//
190	////////////
191	char *m_query;
192	int32_t  m_srn;
193	int32_t  m_ern;
194	char *m_qlang;
195        bool  m_forceDel;
196	char  m_recycleContent;
197	// useful bufs to copy data over
198	SafeBuf m_tmpBuf1;
199	SafeBuf m_tmpBuf2;
200	SafeBuf m_tmpBuf3;
201};
202
203
204// values for Parm::m_subMenu
205#define SUBMENU_DISPLAY     1
206#define SUBMENU_MAP         2
207#define SUBMENU_CALENDAR    3
208#define SUBMENU_LOCATION    4
209#define SUBMENU_SOCIAL      5
210#define SUBMENU_TIME        6
211#define SUBMENU_CATEGORIES  7
212#define SUBMENU_LINKS       8
213#define SUBMENU_WIDGET      9
214#define SUBMENU_SUGGESTIONS 10
215#define SUBMENU_SEARCH      11
216#define SUBMENU_CHECKBOX    0x80 // flag
217
218// values for Parm::m_flags
219#define PF_COOKIE  0x01  // store in cookie?
220#define PF_REDBOX  0x02  // redbox constraint on search results
221#define PF_SUBMENU_HEADER  0x04
222#define PF_WIDGET_PARM     0x08
223#define PF_API             0x10
224#define PF_REBUILDURLFILTERS 0x20
225#define PF_NOSYNC            0x40
226#define PF_DIFFBOT           0x80
227
228#define PF_HIDDEN   0x0100
229#define PF_NOSAVE   0x0200
230#define PF_DUP      0x0400
231#define PF_TEXTAREA 0x0800
232#define PF_COLLDEFAULT 0x1000
233#define PF_NOAPI       0x2000
234#define PF_REQUIRED    0x4000
235#define PF_REBUILDPROXYTABLE 0x8000
236
237#define PF_NOHTML      0x10000
238
239#define PF_CLONE       0x20000
240#define PF_PRIVATE     0x40000 // for password to not show in api
241#define PF_SMALLTEXTAREA 0x80000
242#define PF_REBUILDACTIVELIST 0x100000
243
244class Parm {
245 public:
246	char *m_title; // displayed above m_desc on admin gui page
247	char *m_desc;  // description of variable displayed on admin gui page
248	char *m_cgi;   // cgi name, contains %i if an array
249	char *m_cgi2;  // alias
250	char *m_cgi3;  // alias
251	char *m_cgi4;  // alias
252	char *m_xml;   // default to rendition of m_title if NULL
253	int32_t  m_off;   // this variable's offset into the CollectionRec class
254	char  m_colspan;
255	char  m_type;  // TYPE_BOOL, TYPE_LONG, ...
256	int32_t  m_page;  // PAGE_MASTER, PAGE_SPIDER, ... see Pages.h
257	char  m_obj;   // OBJ_CONF or OBJ_COLL
258	// the maximum number of elements supported in the array.
259	// this is 1 if NOT an array (i.e. array of only one parm).
260	// in such cases a "count" is NOT stored before the parm in 
261	// CollectionRec.h or Conf.h.
262	bool isArray() { return (m_max>1); };
263
264	int32_t getNumInArray() ;
265
266	int32_t  m_max;   // max elements in the array
267	// if array is fixed size, how many elements in it?
268	// this is 0 if not a FIXED size array.
269	int32_t  m_fixed; 
270	int32_t  m_size;  // max string size
271	char *m_def;   // default value of this variable if not in either conf
272	int32_t  m_defOff; // if default value points to a collectionrec parm!
273	char  m_cast;  // true if we should broadcast to all hosts (default)
274	char *m_units;
275	char  m_addin; // add "insert above" link to gui when displaying array
276	char  m_rowid; // id of row controls are in, if any
277	char  m_rdonly;// if in read-only mode, blank out this control?
278	char  m_hdrs;  // print headers for row or print title/desc for single?
279	char  m_perms; // 0 means same as WebPages' m_perms
280	char  m_subMenu;
281	int32_t  m_flags;
282	char *m_class;
283	char *m_icon;
284	char *m_qterm;
285	char *m_pstr; // for sorting by in sendPageAPI()
286	int32_t  m_parmNum; // slot # in the m_parms[] array that we are
287	//bool (*m_func)(TcpSocket *s , HttpRequest *r,
288	//	       bool (*cb)(TcpSocket *s , HttpRequest *r));
289	bool (*m_func)(char *parmRec);
290	// some functions can block, like when deleting a coll because
291	// the tree might be saving, so they take a "we" ptr
292	bool (*m_func2)(char *parmRec,class WaitEntry *we);
293	int32_t  m_plen;  // offset of length for TYPE_STRINGS (m_htmlHeadLen...)
294	char  m_group; // start of a new group of controls?
295	// m_priv = 1 means gigablast's software license clients cannot see
296	//            or change.
297	// m_priv = 2 means gigablast's software license clients, including
298	//            even metalincs, cannot see or change.
299	// m_priv = 3 means nobody can see in admin controls, but can be 
300	//            in search input by anybody. really a hack for yaron
301	//            from quigo so he can set "t2" to something bigger.
302	char  m_priv;  // true if gigablast's software clients cannot see
303	char  m_save;  // save to xml file? almost always true
304	int32_t  m_min;
305	// these are used for search parms in PageResults.cpp
306	//char m_sparm;// is this a search parm? for passing to PageResults.cpp
307	//char *m_scgi;  // parm in the search url
308	char  m_spriv; // is it private? only admins can see/use private parms
309	//char *m_scmd;  // the url path for this m_scgi variable
310	//int32_t  m_sdefo; // offset of default into CollectionRec (use m_off)
311	int32_t  m_sminc ;// offset of min in CollectionRec (-1 for none)
312	int32_t  m_smaxc ;// offset of max in CollectionRec (-1 for none)
313	int32_t  m_smin;  // absolute min
314	int32_t  m_smax;  // absolute max
315	//int32_t  m_soff;  // offset into SearchInput to store value in
316	char  m_sprpg; // propagate the cgi variable to other pages via GET?
317	char  m_sprpp; // propagate the cgi variable to other pages via POST?
318	bool  m_sync;  // this parm should be synced
319	int32_t  m_hash;  // hash of "title"
320	int32_t  m_cgiHash; // hash of m_cgi
321
322	bool   getValueAsBool   ( class SearchInput *si ) ;
323	int32_t   getValueAsLong   ( class SearchInput *si ) ;
324	char * getValueAsString ( class SearchInput *si ) ;	
325
326	int32_t getNumInArray ( collnum_t collnum ) ;
327
328	bool printVal ( class SafeBuf *sb , collnum_t collnum , int32_t occNum ) ;
329};
330
331#define MAX_PARMS 940
332
333#define MAX_XML_CONF (200*1024)
334
335#include "Xml.h"
336#include "SafeBuf.h"
337
338struct SerParm;
339
340class Parms {
341
342 public:
343
344	Parms();
345
346	void init();
347	
348	bool sendPageGeneric ( class TcpSocket *s, class HttpRequest *r );
349
350	bool printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r );
351
352	//char *printParms (char *p, char *pend, TcpSocket *s, HttpRequest *r);
353	bool printParms (SafeBuf* sb, TcpSocket *s , HttpRequest *r );
354
355	bool printParms2 (SafeBuf* sb, 
356			  int32_t page,
357			  CollectionRec *cr,
358			  int32_t nc , 
359			  int32_t pd ,
360			  bool isCrawlbot ,
361			  char format, //bool isJSON,
362			  TcpSocket *sock,
363			  bool isMasterAdmin,
364			  bool isCollAdmin
365			  );
366
367	/*
368	char *printParm ( char *p    , 
369			  char *pend ,
370			  //int32_t  user ,
371			  char *username,
372			  Parm *m    , 
373			  int32_t  mm   , // m = &m_parms[mm]
374			  int32_t  j    ,
375			  int32_t  jend ,
376			  char *THIS ,
377			  char *coll ,
378			  char *pwd  ,
379			  char *bg   ,
380			  int32_t  nc   ,
381			  int32_t  pd   ) ;
382	*/
383
384	bool printParm ( SafeBuf* sb,
385			 //int32_t  user ,
386			  char *username,
387			  Parm *m    , 
388			  int32_t  mm   , // m = &m_parms[mm]
389			  int32_t  j    ,
390			  int32_t  jend ,
391			  char *THIS ,
392			  char *coll ,
393			  char *pwd  ,
394			  char *bg   ,
395			  int32_t  nc   ,
396			 int32_t  pd   ,
397			 bool lastRow ,
398			 bool isCrawlbot ,//= false,
399			 char format , //= FORMAT_HTML,
400			 bool isMasterAdmin ,
401			 bool isCollAdmin ,
402			 class TcpSocket *sock );
403
404	char *getTHIS ( HttpRequest *r , int32_t page );
405
406	class Parm *getParmFromParmHash ( int32_t parmHash );
407
408	bool setFromRequest ( HttpRequest *r , //int32_t user,
409			      TcpSocket* s,
410			      class CollectionRec *newcr ,
411			      char *THIS ,
412			      int32_t objType );
413	
414	bool insertParm ( int32_t i , int32_t an , char *THIS ) ;
415	bool removeParm ( int32_t i , int32_t an , char *THIS ) ;
416
417	void setParm ( char *THIS, Parm *m, int32_t mm, int32_t j, char *s,
418		       bool isHtmlEncoded , bool fromRequest ) ;
419	
420	void setToDefault ( char *THIS , char objType ,
421			    CollectionRec *argcr );//= NULL ) ;
422
423	bool setFromFile ( void *THIS        , 
424			   char *filename    , 
425			   char *filenameDef ,
426			   char  objType ) ;
427
428	bool setParmsFromXml ( Xml &xml , void *THIS, char objType ) ;
429
430	bool setXmlFromFile(Xml *xml, char *filename, class SafeBuf *sb );
431
432	bool saveToXml ( char *THIS , char *f , char objType ) ;
433
434	bool convertToXml ( char *buf , char *THIS , char objType ) ;
435
436	// get the parm with the associated cgi name. must be NULL terminated.
437	Parm *getParm ( char *cgi ) ;
438
439	bool getParmHtmlEncoded ( SafeBuf *sb , Parm *m , char *s );
440
441	bool setGigablastRequest ( class TcpSocket *s ,
442				   class HttpRequest *hr ,
443				   class GigablastRequest *gr );
444
445	// . make it so a collectionrec can be copied in Collectiondb.cpp
446	// . so the rec can be copied and the old one deleted without
447	//   freeing the safebufs now used by the new one.
448	void detachSafeBufs ( class CollectionRec *cr ) ;
449
450	// calc checksum of parms
451	uint32_t calcChecksum();
452
453	// get size of serialized parms
454	//int32_t getStoredSize();
455	// . serialized to buf
456	// . if buf is NULL, just calcs size
457	//bool serialize( char *buf, int32_t *bufSize );
458	//void deserialize( char *buf );
459
460	void overlapTest ( char step ) ;
461
462
463	/////
464	//
465	// parms now in parmdb
466	//
467	/////
468
469	// all parm recs need to be in the tree
470	//Rdb m_rdb;
471
472	//
473	// new functions
474	//
475
476	bool addNewParmToList1 ( SafeBuf *parmList ,
477				 collnum_t collnum ,
478				 char *parmValString ,
479				 int32_t  occNum ,
480				 char *parmName ) ;
481	bool addNewParmToList2 ( SafeBuf *parmList ,
482				 collnum_t collnum , 
483				 char *parmValString ,
484				 int32_t occNum ,
485				 Parm *m ) ;
486	bool addCurrentParmToList1 ( SafeBuf *parmList ,
487				     CollectionRec *cr , 
488				     char *parmName ) ;
489	bool addCurrentParmToList2 ( SafeBuf *parmList ,
490				     collnum_t collnum , 
491				     int32_t occNum ,
492				     Parm *m ) ;
493	bool convertHttpRequestToParmList (HttpRequest *hr,SafeBuf *parmList,
494					   int32_t page , TcpSocket *sock );
495	Parm *getParmFast2 ( int32_t cgiHash32 ) ;
496	Parm *getParmFast1 ( char *cgi , int32_t *occNum ) ;
497	bool broadcastParmList ( SafeBuf *parmList ,
498				 void    *state ,
499				 void   (* callback)(void *) ,
500				 bool sendToGrunts  = true ,
501				 bool sendToProxies = false ,
502				 // send to this single hostid? -1 means all
503				 int32_t hostId = -1 ,
504				 int32_t hostId2 = -1 ); // hostid range?
505	bool doParmSendingLoop ( ) ;
506	bool syncParmsWithHost0 ( ) ;
507	bool makeSyncHashList ( SafeBuf *hashList ) ;
508	int32_t getNumInArray ( collnum_t collnum ) ;
509	bool addAllParmsToList ( SafeBuf *parmList, collnum_t collnum ) ;
510	bool updateParm ( char *rec , class WaitEntry *we ) ;
511
512	bool cloneCollRec ( char *srcCR , char *dstCR ) ;
513
514	//
515	// end new functions
516	//
517
518	bool m_inSyncWithHost0;
519	bool m_triedToSync;
520
521	bool m_isDefaultLoaded;
522
523	Page m_pages [ 50 ];
524	int32_t m_numPages;
525	
526	Parm m_parms [ MAX_PARMS ];
527	int32_t m_numParms;
528
529	// just those Parms that have a m_sparm of 1
530	Parm *m_searchParms [ MAX_PARMS ];
531	int32_t m_numSearchParms;
532
533	/*
534 private:
535	// these return true if overflow
536	bool serializeConfParm( Parm *m, int32_t i, char **p, char *end, 
537				int32_t size, int32_t cnt, 
538				bool sizeChk, int32_t *bufSz );
539	bool serializeCollParm( class CollectionRec *cr, 
540				Parm *m, int32_t i, char **p, char *end,
541				int32_t size, int32_t cnt,
542				bool sizeChk, int32_t *bufSz );
543			
544
545	void deserializeConfParm( Parm *m, SerParm *sp, char **p,
546				   bool *confChgd );
547	void deserializeCollParm( class CollectionRec *cr,
548				  Parm *m, SerParm *sp, char **p );
549	*/
550
551	// for holding default.conf file for collection recs for OBJ_COLL
552	char m_buf [ MAX_XML_CONF ];
553
554	// for parsing default.conf file for collection recs for OBJ_COLL
555	Xml m_xml2;
556};
557
558extern Parms g_parms;
559
560#endif
561