PageRenderTime 53ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/Parms.h

https://github.com/gigablast/open-source-search-engine
C Header | 561 lines | 297 code | 89 blank | 175 comment | 0 complexity | 32cf1aaba6cd2476ff1417bec70a4d14 MD5 | raw file
Possible License(s): Apache-2.0
  1. // Matt Wells, copyright Feb 2002
  2. // Ideally, CollectionRec.h and SearchInput.h should be automatically generated
  3. // from Parms.cpp. But Parms need to be marked if they contribute to
  4. // SearchInput::makeKey() for caching the SERPS.
  5. #ifndef _PARMS_H_
  6. #define _PARMS_H_
  7. #include "Rdb.h"
  8. //#include "CollectionRec.h"
  9. void handleRequest3e ( UdpSlot *slot , int32_t niceness ) ;
  10. void handleRequest3f ( UdpSlot *slot , int32_t niceness ) ;
  11. // "url filters profile" values. used to set default crawl rules
  12. // in Collectiondb.cpp's CollectionRec::setUrlFiltersToDefaults().
  13. // for instance, UFP_NEWS spiders sites more frequently but less deep in
  14. // order to get "news" pages and articles
  15. //enum {
  16. // UFP_CUSTOM = 0 ,
  17. // UFP_NONE = 0 ,
  18. // UFP_WEB = 1 ,
  19. // UFP_NEWS = 2 ,
  20. // UFP_LANG = 3,
  21. // UFP_SHALLOW = 4
  22. //};
  23. // special priorities for the priority drop down
  24. // in the url filters table
  25. //enum {
  26. // SPIDER_PRIORITY_FILTERED = -3 ,
  27. // SPIDER_PRIORITY_BANNED = -2 ,
  28. // SPIDER_PRIORITY_UNDEFINED = -1 };
  29. enum {
  30. OBJ_CONF = 1 ,
  31. OBJ_COLL ,
  32. OBJ_SI , // SearchInput class
  33. OBJ_GBREQUEST , // for GigablastRequest class of parms
  34. OBJ_IR , // InjectionRequest class from PageInject.h
  35. OBJ_NONE
  36. };
  37. enum {
  38. TYPE_BOOL = 1 ,
  39. TYPE_BOOL2 ,
  40. TYPE_CHECKBOX ,
  41. TYPE_CHAR ,
  42. TYPE_CHAR2 , //needed to display char as a number (maxNumHops)
  43. TYPE_CMD ,
  44. TYPE_FLOAT ,
  45. TYPE_IP ,
  46. TYPE_LONG ,
  47. TYPE_LONG_LONG , // 10
  48. TYPE_NONE ,
  49. TYPE_PRIORITY ,
  50. TYPE_PRIORITY2 ,
  51. TYPE_PRIORITY_BOXES ,
  52. TYPE_RETRIES ,
  53. TYPE_STRING ,
  54. TYPE_STRINGBOX ,
  55. TYPE_STRINGNONEMPTY ,
  56. TYPE_TIME ,
  57. TYPE_DATE2 , // 20
  58. TYPE_DATE ,
  59. TYPE_RULESET ,
  60. TYPE_FILTER ,
  61. TYPE_COMMENT ,
  62. TYPE_CONSTANT ,
  63. TYPE_MONOD2 ,
  64. TYPE_MONOM2 ,
  65. TYPE_LONG_CONST ,
  66. TYPE_SITERULE , // 29
  67. TYPE_SAFEBUF ,
  68. TYPE_UFP ,
  69. TYPE_FILEUPLOADBUTTON,
  70. TYPE_DOUBLE,
  71. TYPE_CHARPTR
  72. };
  73. //forward decls to make compiler happy:
  74. class HttpRequest;
  75. class TcpSocket;
  76. class Page {
  77. public:
  78. int32_t m_page; // from the PAGE_* enums above
  79. char *m_bgcolor; // color of the cells in the table
  80. char *m_topcolor; // color of the table's first row
  81. char *m_title; // browser title bar
  82. };
  83. #include "Msg4.h"
  84. // generic gigablast request. for all apis offered.
  85. class GigablastRequest {
  86. public:
  87. //
  88. // make a copy of the http request because the original is
  89. // on the stack. AND the "char *" types below will reference into
  90. // this because they are listed as TYPE_CHARPTR in Parms.cpp.
  91. // that saves us memory as opposed to making them all SafeBufs.
  92. //
  93. HttpRequest m_hr;
  94. // ptr to socket to send reply back on
  95. TcpSocket *m_socket;
  96. // TYPE_CHARPTR
  97. char *m_coll;
  98. // pretty universal char ptr
  99. char *m_formatStr;
  100. ////////////
  101. //
  102. // /admin/inject parms
  103. //
  104. ////////////
  105. // these all reference into m_hr or into the Parm::m_def string!
  106. char *m_url; // also for /get
  107. //char *m_queryToScrape;
  108. //char *m_contentDelim;
  109. //char m_containerContentType; // CT_UNKNOWN, CT_WARC, CT_ARC
  110. //int32_t m_injectDocIp;
  111. //char *m_contentTypeStr;
  112. //char *m_contentFile;
  113. //char *m_content;
  114. //char *m_diffbotReply; // secret thing from dan
  115. //char m_injectLinks;
  116. //char m_spiderLinks;
  117. //char m_shortReply;
  118. //char m_newOnly;
  119. //char m_deleteUrl;
  120. //char m_recycle;
  121. //char m_dedup;
  122. //char m_hasMime;
  123. //char m_doConsistencyTesting;
  124. //char m_getSections;
  125. //char m_gotSections;
  126. //int32_t m_charset;
  127. //int32_t m_hopCount; // hopcount
  128. //collnum_t m_collnum; // more reliable than m_coll
  129. // older ones
  130. //uint32_t m_firstIndexed; // firstimdexed
  131. //uint32_t m_lastSpidered; // lastspidered;
  132. //SafeBuf m_contentBuf; // for holding a warc/arc file
  133. ///////////
  134. //
  135. // /admin/import parms
  136. //
  137. ///////////
  138. char *m_importDir; // TYPE_CHARPTR
  139. int32_t m_importInjects;
  140. ///////////
  141. //
  142. // /get parms (for getting cached web pages)
  143. //
  144. ///////////
  145. int64_t m_docId;
  146. int32_t m_strip;
  147. char m_includeHeader;
  148. char m_highlightQuery;
  149. ///////////
  150. //
  151. // /admin/addurl parms
  152. //
  153. ///////////
  154. char *m_urlsBuf;
  155. char m_stripBox;
  156. char m_harvestLinks;
  157. SafeBuf m_listBuf;
  158. Msg4 m_msg4;
  159. /////////////
  160. //
  161. // /admin/reindex parms
  162. //
  163. ////////////
  164. char *m_query;
  165. int32_t m_srn;
  166. int32_t m_ern;
  167. char *m_qlang;
  168. bool m_forceDel;
  169. char m_recycleContent;
  170. // useful bufs to copy data over
  171. SafeBuf m_tmpBuf1;
  172. SafeBuf m_tmpBuf2;
  173. SafeBuf m_tmpBuf3;
  174. };
  175. // values for Parm::m_subMenu
  176. #define SUBMENU_DISPLAY 1
  177. #define SUBMENU_MAP 2
  178. #define SUBMENU_CALENDAR 3
  179. #define SUBMENU_LOCATION 4
  180. #define SUBMENU_SOCIAL 5
  181. #define SUBMENU_TIME 6
  182. #define SUBMENU_CATEGORIES 7
  183. #define SUBMENU_LINKS 8
  184. #define SUBMENU_WIDGET 9
  185. #define SUBMENU_SUGGESTIONS 10
  186. #define SUBMENU_SEARCH 11
  187. #define SUBMENU_CHECKBOX 0x80 // flag
  188. // values for Parm::m_flags
  189. #define PF_COOKIE 0x01 // store in cookie?
  190. #define PF_REDBOX 0x02 // redbox constraint on search results
  191. #define PF_SUBMENU_HEADER 0x04
  192. #define PF_WIDGET_PARM 0x08
  193. #define PF_API 0x10
  194. #define PF_REBUILDURLFILTERS 0x20
  195. #define PF_NOSYNC 0x40
  196. #define PF_DIFFBOT 0x80
  197. #define PF_HIDDEN 0x0100
  198. #define PF_NOSAVE 0x0200
  199. #define PF_DUP 0x0400
  200. #define PF_TEXTAREA 0x0800
  201. #define PF_COLLDEFAULT 0x1000
  202. #define PF_NOAPI 0x2000
  203. #define PF_REQUIRED 0x4000
  204. #define PF_REBUILDPROXYTABLE 0x8000
  205. #define PF_NOHTML 0x10000
  206. #define PF_CLONE 0x20000
  207. #define PF_PRIVATE 0x40000 // for password to not show in api
  208. #define PF_SMALLTEXTAREA 0x80000
  209. #define PF_REBUILDACTIVELIST 0x100000
  210. class Parm {
  211. public:
  212. char *m_title; // displayed above m_desc on admin gui page
  213. char *m_desc; // description of variable displayed on admin gui page
  214. char *m_cgi; // cgi name, contains %i if an array
  215. char *m_cgi2; // alias
  216. char *m_cgi3; // alias
  217. char *m_cgi4; // alias
  218. char *m_xml; // default to rendition of m_title if NULL
  219. int32_t m_off; // this variable's offset into the CollectionRec class
  220. char m_colspan;
  221. char m_type; // TYPE_BOOL, TYPE_LONG, ...
  222. int32_t m_page; // PAGE_MASTER, PAGE_SPIDER, ... see Pages.h
  223. char m_obj; // OBJ_CONF or OBJ_COLL
  224. // the maximum number of elements supported in the array.
  225. // this is 1 if NOT an array (i.e. array of only one parm).
  226. // in such cases a "count" is NOT stored before the parm in
  227. // CollectionRec.h or Conf.h.
  228. bool isArray() { return (m_max>1); };
  229. int32_t getNumInArray() ;
  230. int32_t m_max; // max elements in the array
  231. // if array is fixed size, how many elements in it?
  232. // this is 0 if not a FIXED size array.
  233. int32_t m_fixed;
  234. int32_t m_size; // max string size
  235. char *m_def; // default value of this variable if not in either conf
  236. int32_t m_defOff; // if default value points to a collectionrec parm!
  237. char m_cast; // true if we should broadcast to all hosts (default)
  238. char *m_units;
  239. char m_addin; // add "insert above" link to gui when displaying array
  240. char m_rowid; // id of row controls are in, if any
  241. char m_rdonly;// if in read-only mode, blank out this control?
  242. char m_hdrs; // print headers for row or print title/desc for single?
  243. char m_perms; // 0 means same as WebPages' m_perms
  244. char m_subMenu;
  245. int32_t m_flags;
  246. char *m_class;
  247. char *m_icon;
  248. char *m_qterm;
  249. char *m_pstr; // for sorting by in sendPageAPI()
  250. int32_t m_parmNum; // slot # in the m_parms[] array that we are
  251. //bool (*m_func)(TcpSocket *s , HttpRequest *r,
  252. // bool (*cb)(TcpSocket *s , HttpRequest *r));
  253. bool (*m_func)(char *parmRec);
  254. // some functions can block, like when deleting a coll because
  255. // the tree might be saving, so they take a "we" ptr
  256. bool (*m_func2)(char *parmRec,class WaitEntry *we);
  257. int32_t m_plen; // offset of length for TYPE_STRINGS (m_htmlHeadLen...)
  258. char m_group; // start of a new group of controls?
  259. // m_priv = 1 means gigablast's software license clients cannot see
  260. // or change.
  261. // m_priv = 2 means gigablast's software license clients, including
  262. // even metalincs, cannot see or change.
  263. // m_priv = 3 means nobody can see in admin controls, but can be
  264. // in search input by anybody. really a hack for yaron
  265. // from quigo so he can set "t2" to something bigger.
  266. char m_priv; // true if gigablast's software clients cannot see
  267. char m_save; // save to xml file? almost always true
  268. int32_t m_min;
  269. // these are used for search parms in PageResults.cpp
  270. //char m_sparm;// is this a search parm? for passing to PageResults.cpp
  271. //char *m_scgi; // parm in the search url
  272. char m_spriv; // is it private? only admins can see/use private parms
  273. //char *m_scmd; // the url path for this m_scgi variable
  274. //int32_t m_sdefo; // offset of default into CollectionRec (use m_off)
  275. int32_t m_sminc ;// offset of min in CollectionRec (-1 for none)
  276. int32_t m_smaxc ;// offset of max in CollectionRec (-1 for none)
  277. int32_t m_smin; // absolute min
  278. int32_t m_smax; // absolute max
  279. //int32_t m_soff; // offset into SearchInput to store value in
  280. char m_sprpg; // propagate the cgi variable to other pages via GET?
  281. char m_sprpp; // propagate the cgi variable to other pages via POST?
  282. bool m_sync; // this parm should be synced
  283. int32_t m_hash; // hash of "title"
  284. int32_t m_cgiHash; // hash of m_cgi
  285. bool getValueAsBool ( class SearchInput *si ) ;
  286. int32_t getValueAsLong ( class SearchInput *si ) ;
  287. char * getValueAsString ( class SearchInput *si ) ;
  288. int32_t getNumInArray ( collnum_t collnum ) ;
  289. bool printVal ( class SafeBuf *sb , collnum_t collnum , int32_t occNum ) ;
  290. };
  291. #define MAX_PARMS 940
  292. #define MAX_XML_CONF (200*1024)
  293. #include "Xml.h"
  294. #include "SafeBuf.h"
  295. struct SerParm;
  296. class Parms {
  297. public:
  298. Parms();
  299. void init();
  300. bool sendPageGeneric ( class TcpSocket *s, class HttpRequest *r );
  301. bool printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r );
  302. //char *printParms (char *p, char *pend, TcpSocket *s, HttpRequest *r);
  303. bool printParms (SafeBuf* sb, TcpSocket *s , HttpRequest *r );
  304. bool printParms2 (SafeBuf* sb,
  305. int32_t page,
  306. CollectionRec *cr,
  307. int32_t nc ,
  308. int32_t pd ,
  309. bool isCrawlbot ,
  310. char format, //bool isJSON,
  311. TcpSocket *sock,
  312. bool isMasterAdmin,
  313. bool isCollAdmin
  314. );
  315. /*
  316. char *printParm ( char *p ,
  317. char *pend ,
  318. //int32_t user ,
  319. char *username,
  320. Parm *m ,
  321. int32_t mm , // m = &m_parms[mm]
  322. int32_t j ,
  323. int32_t jend ,
  324. char *THIS ,
  325. char *coll ,
  326. char *pwd ,
  327. char *bg ,
  328. int32_t nc ,
  329. int32_t pd ) ;
  330. */
  331. bool printParm ( SafeBuf* sb,
  332. //int32_t user ,
  333. char *username,
  334. Parm *m ,
  335. int32_t mm , // m = &m_parms[mm]
  336. int32_t j ,
  337. int32_t jend ,
  338. char *THIS ,
  339. char *coll ,
  340. char *pwd ,
  341. char *bg ,
  342. int32_t nc ,
  343. int32_t pd ,
  344. bool lastRow ,
  345. bool isCrawlbot ,//= false,
  346. char format , //= FORMAT_HTML,
  347. bool isMasterAdmin ,
  348. bool isCollAdmin ,
  349. class TcpSocket *sock );
  350. char *getTHIS ( HttpRequest *r , int32_t page );
  351. class Parm *getParmFromParmHash ( int32_t parmHash );
  352. bool setFromRequest ( HttpRequest *r , //int32_t user,
  353. TcpSocket* s,
  354. class CollectionRec *newcr ,
  355. char *THIS ,
  356. int32_t objType );
  357. bool insertParm ( int32_t i , int32_t an , char *THIS ) ;
  358. bool removeParm ( int32_t i , int32_t an , char *THIS ) ;
  359. void setParm ( char *THIS, Parm *m, int32_t mm, int32_t j, char *s,
  360. bool isHtmlEncoded , bool fromRequest ) ;
  361. void setToDefault ( char *THIS , char objType ,
  362. CollectionRec *argcr );//= NULL ) ;
  363. bool setFromFile ( void *THIS ,
  364. char *filename ,
  365. char *filenameDef ,
  366. char objType ) ;
  367. bool setParmsFromXml ( Xml &xml , void *THIS, char objType ) ;
  368. bool setXmlFromFile(Xml *xml, char *filename, class SafeBuf *sb );
  369. bool saveToXml ( char *THIS , char *f , char objType ) ;
  370. bool convertToXml ( char *buf , char *THIS , char objType ) ;
  371. // get the parm with the associated cgi name. must be NULL terminated.
  372. Parm *getParm ( char *cgi ) ;
  373. bool getParmHtmlEncoded ( SafeBuf *sb , Parm *m , char *s );
  374. bool setGigablastRequest ( class TcpSocket *s ,
  375. class HttpRequest *hr ,
  376. class GigablastRequest *gr );
  377. // . make it so a collectionrec can be copied in Collectiondb.cpp
  378. // . so the rec can be copied and the old one deleted without
  379. // freeing the safebufs now used by the new one.
  380. void detachSafeBufs ( class CollectionRec *cr ) ;
  381. // calc checksum of parms
  382. uint32_t calcChecksum();
  383. // get size of serialized parms
  384. //int32_t getStoredSize();
  385. // . serialized to buf
  386. // . if buf is NULL, just calcs size
  387. //bool serialize( char *buf, int32_t *bufSize );
  388. //void deserialize( char *buf );
  389. void overlapTest ( char step ) ;
  390. /////
  391. //
  392. // parms now in parmdb
  393. //
  394. /////
  395. // all parm recs need to be in the tree
  396. //Rdb m_rdb;
  397. //
  398. // new functions
  399. //
  400. bool addNewParmToList1 ( SafeBuf *parmList ,
  401. collnum_t collnum ,
  402. char *parmValString ,
  403. int32_t occNum ,
  404. char *parmName ) ;
  405. bool addNewParmToList2 ( SafeBuf *parmList ,
  406. collnum_t collnum ,
  407. char *parmValString ,
  408. int32_t occNum ,
  409. Parm *m ) ;
  410. bool addCurrentParmToList1 ( SafeBuf *parmList ,
  411. CollectionRec *cr ,
  412. char *parmName ) ;
  413. bool addCurrentParmToList2 ( SafeBuf *parmList ,
  414. collnum_t collnum ,
  415. int32_t occNum ,
  416. Parm *m ) ;
  417. bool convertHttpRequestToParmList (HttpRequest *hr,SafeBuf *parmList,
  418. int32_t page , TcpSocket *sock );
  419. Parm *getParmFast2 ( int32_t cgiHash32 ) ;
  420. Parm *getParmFast1 ( char *cgi , int32_t *occNum ) ;
  421. bool broadcastParmList ( SafeBuf *parmList ,
  422. void *state ,
  423. void (* callback)(void *) ,
  424. bool sendToGrunts = true ,
  425. bool sendToProxies = false ,
  426. // send to this single hostid? -1 means all
  427. int32_t hostId = -1 ,
  428. int32_t hostId2 = -1 ); // hostid range?
  429. bool doParmSendingLoop ( ) ;
  430. bool syncParmsWithHost0 ( ) ;
  431. bool makeSyncHashList ( SafeBuf *hashList ) ;
  432. int32_t getNumInArray ( collnum_t collnum ) ;
  433. bool addAllParmsToList ( SafeBuf *parmList, collnum_t collnum ) ;
  434. bool updateParm ( char *rec , class WaitEntry *we ) ;
  435. bool cloneCollRec ( char *srcCR , char *dstCR ) ;
  436. //
  437. // end new functions
  438. //
  439. bool m_inSyncWithHost0;
  440. bool m_triedToSync;
  441. bool m_isDefaultLoaded;
  442. Page m_pages [ 50 ];
  443. int32_t m_numPages;
  444. Parm m_parms [ MAX_PARMS ];
  445. int32_t m_numParms;
  446. // just those Parms that have a m_sparm of 1
  447. Parm *m_searchParms [ MAX_PARMS ];
  448. int32_t m_numSearchParms;
  449. /*
  450. private:
  451. // these return true if overflow
  452. bool serializeConfParm( Parm *m, int32_t i, char **p, char *end,
  453. int32_t size, int32_t cnt,
  454. bool sizeChk, int32_t *bufSz );
  455. bool serializeCollParm( class CollectionRec *cr,
  456. Parm *m, int32_t i, char **p, char *end,
  457. int32_t size, int32_t cnt,
  458. bool sizeChk, int32_t *bufSz );
  459. void deserializeConfParm( Parm *m, SerParm *sp, char **p,
  460. bool *confChgd );
  461. void deserializeCollParm( class CollectionRec *cr,
  462. Parm *m, SerParm *sp, char **p );
  463. */
  464. // for holding default.conf file for collection recs for OBJ_COLL
  465. char m_buf [ MAX_XML_CONF ];
  466. // for parsing default.conf file for collection recs for OBJ_COLL
  467. Xml m_xml2;
  468. };
  469. extern Parms g_parms;
  470. #endif