PageRenderTime 57ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/PageRoot.cpp

https://github.com/gigablast/open-source-search-engine
C++ | 3434 lines | 1560 code | 341 blank | 1533 comment | 205 complexity | c7c3cacbb1af6425871752ada1930fa5 MD5 | raw file
Possible License(s): Apache-2.0
  1. #include "gb-include.h"
  2. #include "Indexdb.h" // makeKey(int64_t docId)
  3. #include "Titledb.h"
  4. #include "Spider.h"
  5. #include "Tagdb.h"
  6. #include "Dns.h"
  7. //#include "PageResults.h" // for query buf, g_qbuf
  8. #include "Collectiondb.h"
  9. //#include "CollectionRec.h"
  10. #include "Clusterdb.h" // for getting # of docs indexed
  11. //#include "Checksumdb.h" // should migrate to this one, though
  12. #include "Pages.h"
  13. #include "Query.h" // MAX_QUERY_LEN
  14. #include "SafeBuf.h"
  15. #include "LanguageIdentifier.h"
  16. #include "LanguagePages.h"
  17. #include "Users.h"
  18. #include "Address.h" // getIPLocation
  19. #include "Proxy.h"
  20. //char *printNumResultsDropDown ( char *p, int32_t n, bool *printedDropDown);
  21. bool printNumResultsDropDown ( SafeBuf& sb, int32_t n, bool *printedDropDown);
  22. //static char *printTopDirectory ( char *p, char *pend );
  23. static bool printTopDirectory ( SafeBuf& sb , char format );
  24. // this prints the last five queries
  25. //static int32_t printLastQueries ( char *p , char *pend ) ;
  26. //static char *expandRootHtml ( char *p , int32_t plen ,
  27. /*
  28. static bool expandRootHtml ( SafeBuf& sb,
  29. uint8_t *html , int32_t htmlLen ,
  30. char *q , int32_t qlen ,
  31. HttpRequest *r ,
  32. TcpSocket *s ,
  33. int64_t docsInColl ,
  34. CollectionRec *cr ) ;
  35. */
  36. bool sendPageRoot ( TcpSocket *s, HttpRequest *r ){
  37. return sendPageRoot ( s, r, NULL );
  38. }
  39. bool printNav ( SafeBuf &sb , HttpRequest *r ) {
  40. /*
  41. char *root = "";
  42. char *rootSecure = "";
  43. if ( g_conf.m_isMattWells ) {
  44. root = "http://www.gigablast.com";
  45. rootSecure = "https://www.gigablast.com";
  46. }
  47. sb.safePrintf("<center><b><p class=nav>"
  48. "<a href=%s/about.html>About</a>"
  49. " &nbsp; &nbsp; "
  50. "<a href=%s/contact.html>Contact</a>"
  51. " &nbsp; &nbsp; "
  52. "<a href=%s/help.html>Help</a>"
  53. " &nbsp; &nbsp; "
  54. "<a href=%s/privacy.html>Privacy Policy</a>"
  55. " &nbsp; &nbsp; "
  56. // TODO: API page must also provide a description
  57. // of the output... like searchfeed.html does already.
  58. // put that in the api page as well.
  59. "<a href=%s/api>API</a>"
  60. , root
  61. , root
  62. , root
  63. , root
  64. , root
  65. );
  66. if ( g_conf.m_isMattWells )
  67. sb.safePrintf(" &nbsp; &nbsp; "
  68. "<a href=%s/seoapi.html>SEO API</a>"
  69. " &nbsp; &nbsp; "
  70. "<a href=%s/account>My Account</a> "
  71. , root
  72. , rootSecure
  73. //" &nbsp; &nbsp; <a href=/logout>Logout</a>"
  74. );
  75. //if ( r->isLocal() )
  76. sb.safePrintf("&nbsp; &nbsp; &nbsp; [<a style=color:green; "
  77. "href=\"/admin/settings\">"
  78. "Admin</a>]");
  79. sb.safePrintf("</p></b></center>");
  80. */
  81. sb.safePrintf("</TD></TR></TABLE>"
  82. "</body></html>");
  83. return true;
  84. }
  85. //////////////
  86. //
  87. // BEGIN expandHtml() helper functions
  88. //
  89. //////////////
  90. bool printFamilyFilter ( SafeBuf& sb , bool familyFilterOn ) {
  91. char *s1 = "";
  92. char *s2 = "";
  93. if ( familyFilterOn ) s1 = " checked";
  94. else s2 = " checked";
  95. //p += sprintf ( p ,
  96. return sb.safePrintf (
  97. "Family filter: "
  98. "<input type=radio name=ff value=1%s>On &nbsp; "
  99. "<input type=radio name=ff value=0%s>Off &nbsp; " ,
  100. s1 , s2 );
  101. //return p;
  102. }
  103. //char *printNumResultsDropDown ( char *p , int32_t n , bool *printedDropDown ) {
  104. bool printNumResultsDropDown ( SafeBuf& sb , int32_t n , bool *printedDropDown ) {
  105. if ( n!=10 && n!=20 && n!=30 && n!=50 && n!=100 )
  106. //return p;
  107. return true;
  108. *printedDropDown = true;
  109. char *d1 = "";
  110. char *d2 = "";
  111. char *d3 = "";
  112. char *d4 = "";
  113. char *d5 = "";
  114. if ( n == 10 ) d1 = " selected";
  115. if ( n == 20 ) d2 = " selected";
  116. if ( n == 30 ) d3 = " selected";
  117. if ( n == 50 ) d4 = " selected";
  118. if ( n ==100 ) d5 = " selected";
  119. //p += sprintf ( p ,
  120. return sb.safePrintf (
  121. "<select name=n>\n"
  122. "<option value=10%s>10\n"
  123. "<option value=20%s>20\n"
  124. "<option value=30%s>30\n"
  125. "<option value=50%s>50\n"
  126. "<option value=100%s>100\n"
  127. "</select>",
  128. d1,d2,d3,d4,d5);
  129. //return p;
  130. }
  131. //char *printDirectorySearchType ( char *p, int32_t sdirt ) {
  132. bool printDirectorySearchType ( SafeBuf& sb, int32_t sdirt ) {
  133. // default to entire directory
  134. if (sdirt < 1 || sdirt > 4)
  135. sdirt = 3;
  136. // by default search the whole thing
  137. sb.safePrintf("<input type=\"radio\" name=\"sdirt\" value=\"3\"");
  138. if (sdirt == 3) sb.safePrintf(" checked>");
  139. else sb.safePrintf(">");
  140. sb.safePrintf("Entire Directory<br>\n");
  141. // entire category
  142. sb.safePrintf("<input type=\"radio\" name=\"sdirt\" value=\"1\"");
  143. if (sdirt == 1) sb.safePrintf(" checked>");
  144. else sb.safePrintf(">");
  145. sb.safePrintf("Entire Category<br>\n");
  146. // base category only
  147. sb.safePrintf("<nobr><input type=\"radio\" name=\"sdirt\" value=\"2\"");
  148. if (sdirt == 2) sb.safePrintf(" checked>");
  149. else sb.safePrintf(">");
  150. sb.safePrintf("Pages in Base Category</nobr><br>\n");
  151. // sites in base category
  152. sb.safePrintf("<input type=\"radio\" name=\"sdirt\" value=\"7\"");
  153. if (sdirt == 7) sb.safePrintf(" checked>");
  154. else sb.safePrintf(">");
  155. sb.safePrintf("Sites in Base Category<br>\n");
  156. // sites in entire category
  157. sb.safePrintf("<input type=\"radio\" name=\"sdirt\" value=\"6\"");
  158. if (sdirt == 6) sb.safePrintf(" checked>");
  159. else sb.safePrintf(">");
  160. sb.safePrintf("Sites in Entire Category<br>\n");
  161. // end it
  162. return true;
  163. }
  164. #include "SearchInput.h"
  165. bool printRadioButtons ( SafeBuf& sb , SearchInput *si ) {
  166. // don't display this for directory search
  167. // look it up. returns catId <= 0 if dmoz not setup yet.
  168. // From PageDirectory.cpp
  169. //int32_t catId= g_categories->getIdFromPath(decodedPath, decodedPathLen);
  170. // if /Top print the directory homepage
  171. //if ( catId == 1 || catId <= 0 )
  172. // return true;
  173. // site
  174. /*
  175. if ( si->m_siteLen > 0 ) {
  176. // . print rest of search box etc.
  177. // . print cobranding radio buttons
  178. //if ( p + si->m_siteLen + 1 >= pend ) return p;
  179. //p += sprintf ( p ,
  180. return sb.safePrintf (
  181. //" &nbsp; "
  182. //"<font size=-1>"
  183. //"<b><a href=\"/\"><font color=red>"
  184. //"Powered by Gigablast</font></a></b>"
  185. //"<br>"
  186. //"<tr align=center><td></td><td>"
  187. "<input type=radio name=site value=\"\">"
  188. "Search the Web "
  189. "<input type=radio name=site "
  190. "value=\"%s\" checked>Search %s" ,
  191. //"</td></tr></table><br>"
  192. //"</td></tr>"
  193. //"<font size=-1>" ,
  194. si->m_site , si->m_site );
  195. }
  196. else if ( si->m_sitesLen > 0 ) {
  197. */
  198. if ( si->m_sites && si->m_sites[0] ) {
  199. // . print rest of search box etc.
  200. // . print cobranding radio buttons
  201. //if ( p + si->m_sitesLen + 1 >= pend ) return p;
  202. // if not explicitly instructed to print all sites
  203. // and they are a int32_t list, do not print all
  204. /*
  205. char tmp[1000];
  206. char *x = si->m_sites;
  207. if ( si->m_sitesLen > 255){//&&!st->m_printAllSites){
  208. // copy what's there
  209. strncpy ( tmp , si->m_sites , 255 );
  210. x = tmp + 254 ;
  211. // do not hack off in the middle of a site
  212. while ( is_alnum(*x) && x > tmp ) x--;
  213. // overwrite it with [more] link
  214. //x += sprintf ( x , "<a href=\"/search?" );
  215. // our current query parameters
  216. //if ( x + uclen + 10 >= xend ) goto skipit;
  217. sprintf ( x , " ..." );
  218. x = tmp;
  219. }
  220. */
  221. //p += sprintf ( p ,
  222. sb.safePrintf (
  223. //" &nbsp; "
  224. //"<font size=-1>"
  225. //"<b><a href=\"/\"><font color=red>"
  226. //"Powered by Gigablast</font></a></b>"
  227. //"<br>"
  228. //"<tr align=center><td></td><td>"
  229. "<input type=radio name=sites value=\"\">"
  230. "Search the Web "
  231. "<input type=radio name=sites "
  232. "value=\"%s\" checked>Search ",
  233. //"</td></tr></table><br>"
  234. //"</td></tr>"
  235. //"<font size=-1>" ,
  236. si->m_sites );
  237. sb.safeTruncateEllipsis ( si->m_sites, 255 );
  238. }
  239. return true;
  240. }
  241. bool printLogo ( SafeBuf& sb , SearchInput *si ) {
  242. // if an image was provided...
  243. if ( ! si->m_imgUrl || ! si->m_imgUrl[0] ) {
  244. // no, now we default to our logo
  245. //return true;
  246. //p += sprintf ( p ,
  247. return sb.safePrintf (
  248. "<a href=\"/\">"
  249. "<img valign=top width=250 height=61 border=0 "
  250. // avoid https for this, so make it absolute
  251. "src=\"/logo-med.jpg\"></a>" );
  252. //return p;
  253. }
  254. // do we have a link?
  255. if ( si->m_imgLink && si->m_imgLink[0])
  256. //p += sprintf ( p , "<a href=\"%s\">",si->m_imgLink);
  257. sb.safePrintf ( "<a href=\"%s\">", si->m_imgLink );
  258. // print image width and length
  259. if ( si->m_imgWidth >= 0 && si->m_imgHeight >= 0 )
  260. //p += sprintf ( p , "<img width=%"INT32" height=%"INT32" ",
  261. sb.safePrintf( "<img width=%"INT32" height=%"INT32" ",
  262. si->m_imgWidth , si->m_imgHeight );
  263. else
  264. //p += sprintf ( p , "<img " );
  265. sb.safePrintf ( "<img " );
  266. //p += sprintf ( p , "border=0 src=\"%s\">",
  267. sb.safePrintf( "border=0 src=\"%s\">",
  268. si->m_imgUrl );
  269. // end the link if we had one
  270. if ( si->m_imgLink && si->m_imgLink[0] )
  271. //p += sprintf ( p , "</a>");
  272. sb.safePrintf ( "</a>");
  273. return true;
  274. }
  275. /////////////
  276. //
  277. // END expandHtml() helper functions
  278. //
  279. /////////////
  280. bool expandHtml ( SafeBuf& sb,
  281. char *head ,
  282. int32_t hlen ,
  283. char *q ,
  284. int32_t qlen ,
  285. HttpRequest *r ,
  286. SearchInput *si,
  287. char *method ,
  288. CollectionRec *cr ) {
  289. //char *pend = p + plen;
  290. // store custom header into buf now
  291. //for ( int32_t i = 0 ; i < hlen && p+10 < pend ; i++ ) {
  292. for ( int32_t i = 0 ; i < hlen; i++ ) {
  293. if ( head[i] != '%' ) {
  294. // *p++ = head[i];
  295. sb.safeMemcpy((char*)&head[i], 1);
  296. continue;
  297. }
  298. if ( i + 1 >= hlen ) {
  299. // *p++ = head[i];
  300. sb.safeMemcpy((char*)&head[i], 1);
  301. continue;
  302. }
  303. if ( head[i+1] == 'S' ) {
  304. // now we got the %S, insert "spiders are [on/off]"
  305. bool spidersOn = true;
  306. if ( ! g_conf.m_spideringEnabled ) spidersOn = false;
  307. if ( ! cr->m_spideringEnabled ) spidersOn = false;
  308. if ( spidersOn )
  309. sb.safePrintf("Spiders are on");
  310. else
  311. sb.safePrintf("Spiders are off");
  312. // skip over %S
  313. i += 1;
  314. continue;
  315. }
  316. if ( head[i+1] == 'q' ) {
  317. // now we got the %q, insert the query
  318. char *p = (char*) sb.getBuf();
  319. char *pend = (char*) sb.getBufEnd();
  320. int32_t eqlen = dequote ( p , pend , q , qlen );
  321. //p += eqlen;
  322. sb.incrementLength(eqlen);
  323. // skip over %q
  324. i += 1;
  325. continue;
  326. }
  327. if ( head[i+1] == 'c' ) {
  328. // now we got the %q, insert the query
  329. if ( cr ) sb.safeStrcpy(cr->m_coll);
  330. // skip over %c
  331. i += 1;
  332. continue;
  333. }
  334. if ( head[i+1] == 'w' &&
  335. head[i+2] == 'h' &&
  336. head[i+3] == 'e' &&
  337. head[i+4] == 'r' &&
  338. head[i+5] == 'e' ) {
  339. // insert the location
  340. int32_t whereLen;
  341. char *where = r->getString("where",&whereLen);
  342. // get it from cookie as well!
  343. if ( ! where )
  344. where = r->getStringFromCookie("where",
  345. &whereLen);
  346. // fix for getStringFromCookie
  347. if ( where && ! where[0] ) where = NULL;
  348. // skip over the %where
  349. i += 5;
  350. // if empty, base it on IP
  351. if ( ! where ) {
  352. double lat;
  353. double lon;
  354. double radius;
  355. char *city,*state,*ctry;
  356. // use this by default
  357. int32_t ip = r->m_userIP;
  358. // ip for testing?
  359. int32_t iplen;
  360. char *ips = r->getString("uip",&iplen);
  361. if ( ips ) ip = atoip(ips);
  362. // returns true if found in db
  363. char buf[128];
  364. getIPLocation ( ip ,
  365. &lat ,
  366. &lon ,
  367. &radius,
  368. &city ,
  369. &state ,
  370. &ctry ,
  371. buf ,
  372. 128 ) ;
  373. if ( city && state )
  374. sb.safePrintf("%s, %s",city,state);
  375. }
  376. else
  377. sb.dequote (where,whereLen);
  378. continue;
  379. }
  380. if ( head[i+1] == 'w' &&
  381. head[i+2] == 'h' &&
  382. head[i+3] == 'e' &&
  383. head[i+4] == 'n' ) {
  384. // insert the location
  385. int32_t whenLen;
  386. char *when = r->getString("when",&whenLen);
  387. // skip over the %when
  388. i += 4;
  389. if ( ! when ) continue;
  390. sb.dequote (when,whenLen);
  391. continue;
  392. }
  393. // %sortby
  394. if ( head[i+1] == 's' &&
  395. head[i+2] == 'o' &&
  396. head[i+3] == 'r' &&
  397. head[i+4] == 't' &&
  398. head[i+5] == 'b' &&
  399. head[i+6] == 'y' ) {
  400. // insert the location
  401. int32_t sortBy = r->getLong("sortby",1);
  402. // print the radio buttons
  403. char *cs[5];
  404. cs[0]="";
  405. cs[1]="";
  406. cs[2]="";
  407. cs[3]="";
  408. cs[4]="";
  409. if ( sortBy >=1 && sortBy <=4 )
  410. cs[sortBy] = " checked";
  411. sb.safePrintf(
  412. "<input type=radio name=sortby value=1%s>date "
  413. "<input type=radio name=sortby value=2%s>distance "
  414. "<input type=radio name=sortby value=3%s>relevancy "
  415. "<input type=radio name=sortby value=4%s>popularity",
  416. cs[1],cs[2],cs[3],cs[4]);
  417. // skip over the %sortby
  418. i += 6;
  419. continue;
  420. }
  421. if ( head[i+1] == 'e' ) {
  422. // now we got the %e, insert the query
  423. char *p = (char*) sb.getBuf();
  424. int32_t plen = sb.getAvail();
  425. int32_t eqlen = urlEncode ( p , plen , q , qlen );
  426. //p += eqlen;
  427. sb.incrementLength(eqlen);
  428. // skip over %e
  429. i += 1;
  430. continue;
  431. }
  432. if ( head[i+1] == 'N' ) {
  433. // now we got the %N, insert the global doc count
  434. //int64_t c=g_checksumdb.getRdb()->getNumGlobalRecs();
  435. //now each host tells us how many docs it has in itsping
  436. int64_t c = g_hostdb.getNumGlobalRecs();
  437. c += g_conf.m_docCountAdjustment;
  438. // never allow to go negative
  439. if ( c < 0 ) c = 0;
  440. //p+=ulltoa(p,c);
  441. char *p = (char*) sb.getBuf();
  442. sb.reserve2x(16);
  443. int32_t len = ulltoa(p, c);
  444. sb.incrementLength(len);
  445. // skip over %N
  446. i += 1;
  447. continue;
  448. }
  449. /*
  450. if ( head[i+1] == 'E' ) {
  451. // now each host tells us how many docs it has in its
  452. // ping request
  453. int64_t c = g_hostdb.getNumGlobalEvents();
  454. char *p = (char*) sb.getBuf();
  455. sb.reserve2x(16);
  456. int32_t len = ulltoa(p, c);
  457. sb.incrementLength(len);
  458. // skip over %E
  459. i += 1;
  460. continue;
  461. }
  462. */
  463. if ( head[i+1] == 'n' ) {
  464. // now we got the %n, insert the collection doc count
  465. //p+=ulltoa(p,docsInColl);
  466. char *p = (char*) sb.getBuf();
  467. sb.reserve2x(16);
  468. int64_t docsInColl = 0;
  469. if ( cr ) docsInColl = cr->getNumDocsIndexed();
  470. int32_t len = ulltoa(p, docsInColl);
  471. sb.incrementLength(len);
  472. // skip over %n
  473. i += 1;
  474. continue;
  475. }
  476. /*
  477. if ( head[i+1] == 'T' ) {
  478. // . print the final tail
  479. // . only print admin link if we're local
  480. //int32_t user = g_pages.getUserType ( s , r );
  481. //char *username = g_users.getUsername(r);
  482. //char *pwd = r->getString ( "pwd" );
  483. char *p = (char*) sb.getBuf();
  484. int32_t plen = sb.getAvail();
  485. //p = g_pages.printTail ( p , p + plen , user , pwd );
  486. char *n = g_pages.printTail(p , p + plen ,
  487. r->isLocal());
  488. sb.incrementLength(n - p);
  489. // skip over %T
  490. i += 1;
  491. continue;
  492. }
  493. */
  494. // print the drop down menu for selecting the # of reslts
  495. if ( head[i+1] == 'D' ) {
  496. // skip over %D
  497. i += 1;
  498. // skip if not enough buffer
  499. //if ( p + 1000 >= pend ) continue;
  500. // # results
  501. //int32_t n = r->getLong("n",10);
  502. //bool printedDropDown;
  503. //p = printNumResultsDropDown(p,n,&printedDropDown);
  504. //printNumResultsDropDown(sb,n,&printedDropDown);
  505. continue;
  506. }
  507. if ( head[i+1] == 'H' ) {
  508. // . insert the secret key here, to stop seo bots
  509. // . TODO: randomize its position to make parsing more
  510. // difficult
  511. // . this secret key is for submitting a new query
  512. // int32_t key;
  513. // char kname[4];
  514. // g_httpServer.getKey (&key,kname,NULL,0,time(NULL),0,
  515. // 10);
  516. //sprintf (p , "<input type=hidden name=%s value=%"INT32">",
  517. // kname,key);
  518. //p += gbstrlen ( p );
  519. // sb.safePrintf( "<input type=hidden name=%s "
  520. //"value=%"INT32">",
  521. // kname,key);
  522. //adds param for default screen size
  523. //if(cr)
  524. // sb.safePrintf("<input type=hidden "
  525. //"id='screenWidth' name='ws' value=%"INT32">",
  526. //cr->m_screenWidth);
  527. // insert collection name too
  528. int32_t collLen;
  529. char *coll = r->getString ( "c" , &collLen );
  530. if ( collLen > 0 && collLen < MAX_COLL_LEN ) {
  531. //sprintf (p,"<input type=hidden name=c "
  532. // "value=\"");
  533. //p += gbstrlen ( p );
  534. sb.safePrintf("<input type=hidden name=c "
  535. "value=\"");
  536. //gbmemcpy ( p , coll , collLen );
  537. //p += collLen;
  538. sb.safeMemcpy(coll, collLen);
  539. //sprintf ( p , "\">\n");
  540. //p += gbstrlen ( p );
  541. sb.safePrintf("\">\n");
  542. }
  543. // pass this crap on so zak can do searches
  544. //char *username = g_users.getUsername(r);
  545. // this is null because not in the cookie and we are
  546. // logged in
  547. //char *pwd = r->getString ( "pwd" );
  548. //sb.safePrintf("<input type=hidden name=pwd "
  549. //"value=\"%s\">\n",
  550. //pwd);
  551. //sb.safePrintf("<input type=hidden name=username "
  552. // "value=\"%s\">\n",username);
  553. // skip over %H
  554. i += 1;
  555. continue;
  556. }
  557. // %t, print Top Directory section
  558. if ( head[i+1] == 't' ) {
  559. i += 1;
  560. //p = printTopDirectory ( p, pend );
  561. printTopDirectory ( sb , FORMAT_HTML );
  562. continue;
  563. }
  564. // MDW
  565. if ( head[i+1] == 'F' ) {
  566. i += 1;
  567. //p = printTopDirectory ( p, pend );
  568. if ( ! method ) method = "GET";
  569. sb.safePrintf("<form method=%s action=\"/search\" "
  570. "name=\"f\">\n",method);
  571. continue;
  572. }
  573. if ( head[i+1] == 'L' ) {
  574. i += 1;
  575. //p = printTopDirectory ( p, pend );
  576. printLogo ( sb , si );
  577. continue;
  578. }
  579. if ( head[i+1] == 'f' ) {
  580. i += 1;
  581. //p = printTopDirectory ( p, pend );
  582. printFamilyFilter ( sb , si->m_familyFilter );
  583. continue;
  584. }
  585. if ( head[i+1] == 'R' ) {
  586. i += 1;
  587. //p = printTopDirectory ( p, pend );
  588. printRadioButtons ( sb , si );
  589. continue;
  590. }
  591. // MDW
  592. // *p++ = head[i];
  593. sb.safeMemcpy((char*)&head[i], 1);
  594. continue;
  595. }
  596. //return p;
  597. return true;
  598. }
  599. bool printLeftColumnRocketAndTabs ( SafeBuf *sb ,
  600. bool isSearchResultsPage ,
  601. CollectionRec *cr ,
  602. char *tabName ) {
  603. class MenuItem {
  604. public:
  605. char *m_text;
  606. char *m_url;
  607. };
  608. static MenuItem mi[] = {
  609. {"SEARCH","/"},
  610. // {"DISCUSSIONS","/?searchtype=discussions"},
  611. // {"PRODUCTS","/?searchtype=products"},
  612. // {"ARTICLES","/?searchtype=articles"},
  613. // {"IMAGES","/?searchtype=images"},
  614. {"DIRECTORY","/Top"},
  615. {"ADVANCED","/adv.html"},
  616. {"ADD URL","/addurl"},
  617. {"WIDGETS","/widgets.html"},
  618. {"SYNTAX","/syntax.html"},
  619. {"USERS","/users.html"},
  620. {"ABOUT","/about.html"},
  621. {"BLOG","/blog.html"},
  622. // take this out for now
  623. //{"FEED","/searchfeed.html"},
  624. {"FAQ","/faq.html"},
  625. {"API","/api.html"}
  626. };
  627. char *coll = "";
  628. if ( cr ) coll = cr->m_coll;
  629. //
  630. // first the nav column
  631. //
  632. sb->safePrintf(
  633. "<TD bgcolor=#%s " // f3c714 " // yellow/gold
  634. "valign=top "
  635. "style=\"width:210px;"
  636. "border-right:3px solid blue;"
  637. "\">"
  638. "<br>"
  639. "<center>"
  640. "<a href=/?c=%s>"
  641. "<div style=\""
  642. "background-color:white;"
  643. "padding:10px;"
  644. "border-radius:100px;"
  645. "border-color:blue;"
  646. "border-width:3px;"
  647. "border-style:solid;"
  648. "width:100px;"
  649. "height:100px;"
  650. "\">"
  651. , GOLD
  652. , coll
  653. );
  654. if ( strcmp(tabName,"appliance") == 0 )
  655. sb->safePrintf("<img style=margin-top:21px; width=90 "
  656. "height=57 src=/computer2.png>");
  657. else
  658. sb->safePrintf("<br style=line-height:10px;>"
  659. "<img border=0 "
  660. "width=54 height=79 src=/rocket.jpg>"
  661. );
  662. sb->safePrintf ( "</div>"
  663. "</a>"
  664. "</center>"
  665. "<br>"
  666. "<br>"
  667. );
  668. int32_t n = sizeof(mi) / sizeof(MenuItem);
  669. for ( int32_t i = 0 ; i < n ; i++ ) {
  670. // just show search, directory and advanced tab in serps
  671. if ( isSearchResultsPage && i >= 3 ) break;
  672. // what was this for?
  673. // if ( i >= 1 && i <= 4 &&
  674. // cr->m_diffbotApiUrl.length() >= 0 )
  675. // continue;
  676. char delim = '?';
  677. if ( strstr ( mi[i].m_url,"?") ) delim = '&';
  678. sb->safePrintf(
  679. "<a href=%s%cc=%s>"
  680. "<div style=\""
  681. "padding:5px;"
  682. "position:relative;"
  683. "text-align:right;"
  684. "border-width:3px;"
  685. "border-right-width:0px;"
  686. "border-style:solid;"
  687. "margin-left:10px;"
  688. "border-top-left-radius:10px;"
  689. "border-bottom-left-radius:10px;"
  690. "font-size:14px;"
  691. "x-overflow:;"
  692. , mi[i].m_url
  693. , delim
  694. , coll
  695. );
  696. //if ( i == pageNum )
  697. bool matched = false;
  698. if ( strcasecmp(mi[i].m_text,tabName) == 0 )
  699. matched = true;
  700. if ( matched )
  701. sb->safePrintf(
  702. "border-color:blue;"
  703. "color:black;"
  704. "background-color:white;\" ");
  705. else
  706. sb->safePrintf("border-color:white;"
  707. "color:white;"
  708. "background-color:blue;\" "
  709. " onmouseover=\""
  710. "this.style.backgroundColor='lightblue';"
  711. "this.style.color='black';\""
  712. " onmouseout=\""
  713. "this.style.backgroundColor='blue';"
  714. "this.style.color='white';\""
  715. );
  716. sb->safePrintf(">"
  717. // make button wider
  718. "<nobr>"
  719. "&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "
  720. "<b>%s</b> &nbsp; &nbsp;</nobr>"
  721. , mi[i].m_text
  722. );
  723. //
  724. // begin hack: white out the blue border line!!
  725. //
  726. if ( matched )
  727. sb->safePrintf(
  728. "<div style=padding:5px;top:0;"
  729. "background-color:white;"
  730. "display:inline-block;"
  731. "position:absolute;>"
  732. "&nbsp;"
  733. "</div>"
  734. );
  735. // end hack
  736. sb->safePrintf(
  737. "</div>"
  738. "</a>"
  739. "<br>"
  740. );
  741. }
  742. // admin link
  743. if ( isSearchResultsPage ) return true;
  744. sb->safePrintf(
  745. "<a href=/admin/settings?c=%s>"
  746. "<div style=\"background-color:green;"
  747. // for try it out bubble:
  748. //"position:relative;"
  749. "padding:5px;"
  750. "text-align:right;"
  751. "border-width:3px;"
  752. "border-right-width:0px;"
  753. "border-style:solid;"
  754. "margin-left:10px;"
  755. "border-color:white;"
  756. "border-top-left-radius:10px;"
  757. "border-bottom-left-radius:10px;"
  758. "font-size:14px;"
  759. "color:white;"
  760. "cursor:hand;"
  761. "cursor:pointer;\" "
  762. " onmouseover=\""
  763. "this.style.backgroundColor='lightgreen';"
  764. "this.style.color='black';\""
  765. " onmouseout=\""
  766. "this.style.backgroundColor='green';"
  767. "this.style.color='white';\""
  768. ">"
  769. /*
  770. // try it out bubble div
  771. "<div "
  772. " onmouseover=\""
  773. "this.style.box-shadow='10px 10px 5px #888888';"
  774. "\""
  775. " onmouseout=\""
  776. "this.style.box-shadow='';"
  777. "\""
  778. "style=\""
  779. "vertical-align:middle;"
  780. "text-align:left;"
  781. "cursor:pointer;"
  782. "cursor:hand;"
  783. //"border-color:black;"
  784. //"border-style:solid;"
  785. //"border-width:2px;"
  786. "padding:3px;"
  787. //"width:30px;"
  788. //"height:20px;"
  789. //"margin-top:-20px;"
  790. "margin-left:-120px;"
  791. "position:absolute;"
  792. //"top:-20px;"
  793. //"left:10px;"
  794. "display:inline-block;"
  795. "\""
  796. ">"
  797. "<b style=font-size:11px;>"
  798. "Click for demo"
  799. "</b>"
  800. "</div>"
  801. */
  802. // end try it out bubble div
  803. "<b>ADMIN</b> &nbsp; &nbsp;"
  804. "</div>"
  805. "</a>"
  806. "<br>"
  807. "</TD>"
  808. , coll
  809. );
  810. return true;
  811. }
  812. bool printFrontPageShell ( SafeBuf *sb , char *tabName , CollectionRec *cr ,
  813. bool printGigablast ) {
  814. sb->safePrintf("<html>\n");
  815. sb->safePrintf("<head>\n");
  816. //sb->safePrintf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf8\">");
  817. sb->safePrintf("<meta name=\"description\" content=\"A powerful, new search engine that does real-time indexing!\">\n");
  818. sb->safePrintf("<meta name=\"keywords\" content=\"search, search engine, search engines, search the web, fresh index, green search engine, green search, clean search engine, clean search\">\n");
  819. //char *title = "An Alternative Open Source Search Engine";
  820. char *title = "An Alternative Open Source Search Engine";
  821. if ( strcasecmp(tabName,"search") ) title = tabName;
  822. // if ( pageNum == 1 ) title = "Directory";
  823. // if ( pageNum == 2 ) title = "Advanced";
  824. // if ( pageNum == 3 ) title = "Add Url";
  825. // if ( pageNum == 4 ) title = "About";
  826. // if ( pageNum == 5 ) title = "Help";
  827. // if ( pageNum == 6 ) title = "API";
  828. sb->safePrintf("<title>Gigablast - %s</title>\n",title);
  829. sb->safePrintf("<style><!--\n");
  830. sb->safePrintf("body {\n");
  831. sb->safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
  832. sb->safePrintf("color: #000000;\n");
  833. sb->safePrintf("font-size: 12px;\n");
  834. sb->safePrintf("margin: 0px 0px;\n");
  835. sb->safePrintf("letter-spacing: 0.04em;\n");
  836. sb->safePrintf("}\n");
  837. sb->safePrintf("a {text-decoration:none;}\n");
  838. //sb->safePrintf("a:link {color:#00c}\n");
  839. //sb->safePrintf("a:visited {color:#551a8b}\n");
  840. //sb->safePrintf("a:active {color:#f00}\n");
  841. sb->safePrintf(".bold {font-weight: bold;}\n");
  842. sb->safePrintf(".bluetable {background:#d1e1ff;margin-bottom:15px;font-size:12px;}\n");
  843. sb->safePrintf(".url {color:#008000;}\n");
  844. sb->safePrintf(".cached, .cached a {font-size: 10px;color: #666666;\n");
  845. sb->safePrintf("}\n");
  846. sb->safePrintf("table {\n");
  847. sb->safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
  848. sb->safePrintf("color: #000000;\n");
  849. sb->safePrintf("font-size: 12px;\n");
  850. sb->safePrintf("}\n");
  851. sb->safePrintf(".directory {font-size: 16px;}\n"
  852. ".nav {font-size:20px;align:right;}\n"
  853. );
  854. sb->safePrintf("-->\n");
  855. sb->safePrintf("</style>\n");
  856. sb->safePrintf("\n");
  857. sb->safePrintf("</head>\n");
  858. sb->safePrintf("<script>\n");
  859. sb->safePrintf("<!--\n");
  860. sb->safePrintf("function x(){document.f.q.focus();}\n");
  861. sb->safePrintf("// --></script>\n");
  862. sb->safePrintf("<body onload=\"x()\">\n");
  863. //sb->safePrintf("<body>\n");
  864. //g_proxy.insertLoginBarDirective ( &sb );
  865. //
  866. // DIVIDE INTO TWO PANES, LEFT COLUMN and MAIN COLUMN
  867. //
  868. sb->safePrintf("<TABLE border=0 height=100%% cellspacing=0 "
  869. "cellpadding=0>"
  870. "\n<TR>\n");
  871. // . also prints <TD>...</TD>
  872. // . false = isSearchResultsPage?
  873. printLeftColumnRocketAndTabs ( sb , false , cr , tabName );
  874. //
  875. // now the MAIN column
  876. //
  877. sb->safePrintf("\n<TD valign=top style=padding-left:30px;>\n");
  878. sb->safePrintf("<br><br>");
  879. if ( ! printGigablast )
  880. return true;
  881. sb->safePrintf("<a href=/><img border=0 width=470 "
  882. "height=44 src=/gigablast.jpg></a>\n");
  883. // sb->safePrintf("<br>"
  884. // "<img border=0 width=470 "
  885. // "height=15 src=/bar.jpg>\n");
  886. return true;
  887. }
  888. bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
  889. SearchInput si;
  890. si.set ( sock , r );
  891. // if there's a ton of sites use the post method otherwise
  892. // they won't fit into the http request, the browser will reject
  893. // sending such a large request with "GET"
  894. char *method = "GET";
  895. if ( si.m_sites && gbstrlen(si.m_sites)>800 ) method = "POST";
  896. // if the provided their own
  897. CollectionRec *cr = g_collectiondb.getRec ( r );
  898. if ( cr && cr->m_htmlRoot.length() ) {
  899. return expandHtml ( sb ,
  900. cr->m_htmlRoot.getBufStart(),
  901. cr->m_htmlRoot.length(),
  902. NULL,
  903. 0,
  904. r ,
  905. &si,
  906. //TcpSocket *s ,
  907. method , // "GET" or "POST"
  908. cr );//CollectionRec *cr ) {
  909. }
  910. // . search special types
  911. // . defaults to web which is "search"
  912. // . can be like "images" "products" "articles"
  913. char *searchType = r->getString("searchtype",NULL,"search",NULL);
  914. log("searchtype=%s",searchType);
  915. // pass searchType in as tabName
  916. printFrontPageShell ( &sb , searchType , cr , true );
  917. //sb.safePrintf("<br><br>\n");
  918. // try to avoid using https for images. it is like 10ms slower.
  919. // if ( g_conf.m_isMattWells )
  920. // sb.safePrintf("<center><a href=/><img border=0 width=500 "
  921. // "height=122 src=http://www.gigablast.com/logo-"
  922. // "med.jpg></a>\n");
  923. // else
  924. sb.safePrintf("<br><br>\n");
  925. sb.safePrintf("<br><br><br>\n");
  926. /*
  927. sb.safePrintf("<b>web</b> &nbsp;&nbsp;&nbsp;&nbsp; ");
  928. if ( g_conf.m_isMattWells )
  929. sb.safePrintf("<a href=http://www.gigablast.com/seo>seo</a> "
  930. "&nbsp;&nbsp;&nbsp;&nbsp; "
  931. );
  932. sb.safePrintf( "<a href=\"/Top\">directory</a> "
  933. "&nbsp;&nbsp;&nbsp;&nbsp; \n");
  934. sb.safePrintf("<a href=/adv.html>advanced search</a>");
  935. sb.safePrintf(" &nbsp;&nbsp;&nbsp;&nbsp; ");
  936. sb.safePrintf("<a href=/addurl title=\"Instantly add your url to "
  937. "Gigablast's index\">add url</a>");
  938. sb.safePrintf("\n");
  939. sb.safePrintf("<br><br>\n");
  940. */
  941. // submit to https now
  942. sb.safePrintf("<form method=%s "
  943. "action=/search name=f>\n", method);
  944. if ( cr )
  945. sb.safePrintf("<input type=hidden name=c value=\"%s\">",
  946. cr->m_coll);
  947. // put search box in a box
  948. sb.safePrintf("<div style="
  949. "background-color:#%s;"//fcc714;"
  950. "border-style:solid;"
  951. "border-width:3px;"
  952. "border-color:blue;"
  953. //"background-color:blue;"
  954. "padding:20px;"
  955. "border-radius:20px;"
  956. ">"
  957. ,GOLD
  958. );
  959. sb.safePrintf("<input name=q type=text "
  960. "style=\""
  961. //"width:%"INT32"px;"
  962. "height:26px;"
  963. "padding:0px;"
  964. "font-weight:bold;"
  965. "padding-left:5px;"
  966. //"border-radius:10px;"
  967. "margin:0px;"
  968. "border:1px inset lightgray;"
  969. "background-color:#ffffff;"
  970. "font-size:18px;"
  971. "\" "
  972. "size=40 value=\"\">&nbsp; &nbsp;"
  973. //"<input type=\"submit\" value=\"Search\">"
  974. "<div onclick=document.f.submit(); "
  975. " onmouseover=\""
  976. "this.style.backgroundColor='lightgreen';"
  977. "this.style.color='black';\""
  978. " onmouseout=\""
  979. "this.style.backgroundColor='green';"
  980. "this.style.color='white';\" "
  981. "style=border-radius:28px;"
  982. "cursor:pointer;"
  983. "cursor:hand;"
  984. "border-color:white;"
  985. "border-style:solid;"
  986. "border-width:3px;"
  987. "padding:12px;"
  988. "width:20px;"
  989. "height:20px;"
  990. "display:inline-block;"
  991. "background-color:green;color:white;>"
  992. "<b style=margin-left:-5px;font-size:18px;"
  993. ">GO</b>"
  994. "</div>"
  995. "\n"
  996. );
  997. sb.safePrintf("</div>\n");
  998. sb.safePrintf("\n");
  999. sb.safePrintf("</form>\n");
  1000. sb.safePrintf("<br>\n");
  1001. sb.safePrintf("\n");
  1002. if ( cr && cr->m_coll ) { // && strcmp(cr->m_coll,"main") ) {
  1003. sb.safePrintf("<center>"
  1004. "Searching the <b>%s</b> collection."
  1005. "</center>",
  1006. cr->m_coll);
  1007. sb.safePrintf("<br>\n");
  1008. sb.safePrintf("\n");
  1009. }
  1010. // take this out for now
  1011. /*
  1012. // always the option to add event guru to their list of
  1013. // search engine in their browser
  1014. sb.safePrintf("<br>"
  1015. //"<br>"
  1016. "<script>\n"
  1017. "function addEngine() {\n"
  1018. "if (window.external && "
  1019. "('AddSearchProvider' in window.external)) {\n"
  1020. // Firefox 2 and IE 7, OpenSearch
  1021. "window.external.AddSearchProvider('http://"
  1022. "www.gigablast.com/searchbar.xml');\n"
  1023. "}\n"
  1024. "else if (window.sidebar && ('addSearchEngine' "
  1025. "in window.sidebar)) {\n"
  1026. // Firefox <= 1.5, Sherlock
  1027. "window.sidebar.addSearchEngine('http://"
  1028. "www.gigablast.com/searchbar.xml',"
  1029. //"example.com/search-plugin.src',"
  1030. "'http://www.gigablast.com/rocket.jpg'," //guru.png
  1031. "'Search Plugin', '');\n"
  1032. "}\n"
  1033. "else {"
  1034. // No search engine support (IE 6, Opera, etc).
  1035. "alert('No search engine support');\n"
  1036. "}\n"
  1037. // do not ask again if they tried to add it
  1038. // meta cookie should store this
  1039. //"document.getElementById('addedse').value='1';\n"
  1040. // NEVER ask again! permanent cookie
  1041. "document.cookie = 'didse=3';"
  1042. // make it invisible again
  1043. //"var e = document.getElementById('addse');\n"
  1044. //"e.style.display = 'none';\n"
  1045. "}\n"
  1046. "</script>\n"
  1047. "<center>"
  1048. "<a onclick='addEngine();' style="
  1049. "cursor:pointer;"
  1050. "cursor:hand;"
  1051. "color:blue;"
  1052. ">"
  1053. "<img height=16 width=16 border=0 src=/rocket16.png>"
  1054. "<font color=#505050>"
  1055. "%c%c%c "
  1056. "</font>"
  1057. "&nbsp; "
  1058. "Add Gigablast to your browser's "
  1059. "search engines"
  1060. "</a>"
  1061. "</center>"
  1062. "<br>"
  1063. "<br>"
  1064. // print triangle
  1065. ,0xe2
  1066. ,0x96
  1067. ,0xbc
  1068. );
  1069. */
  1070. // print any red boxes we might need to
  1071. if ( printRedBox2 ( &sb , sock , r ) ) // true ) )
  1072. sb.safePrintf("<br>\n");
  1073. sb.safePrintf("<br><center><table cellpadding=3>\n");
  1074. sb.safePrintf("\n");
  1075. char *root = "";
  1076. if ( g_conf.m_isMattWells )
  1077. root = "http://www.gigablast.com";
  1078. sb.safePrintf("<tr valign=top>\n");
  1079. //sb.safePrintf("<td align=center><div style=width:50px;height:50px;display:inline-block;background-color:red;></div></td>\n");
  1080. sb.safePrintf("<td width=10%% "
  1081. "align=center><img style=padding-right:10px; "
  1082. "height=71px width=50px "
  1083. "src=%s/opensource.png></td>\n"
  1084. , root );
  1085. sb.safePrintf("<td width=45%%><font size=+1><b>Open Source!</b>"
  1086. "</font><br><br>\n");
  1087. sb.brify2("Gigablast is now available as an <a href=https://github.com/gigablast/open-source-search-engine>open source search engine</a> on github.com. Download it today. Finally a robust, scalable search solution in C/C++ that has been in development and used commercially since 2000. <a href=http://www.gigablast.com/faq.html#features>Features</a>."
  1088. ,40);
  1089. //sb.safePrintf("<br><br>");
  1090. sb.safePrintf("</td>");
  1091. sb.safePrintf("<td><font size=+1><b>ScreenShots</b>"
  1092. "</font><br><br>\n");
  1093. sb.safePrintf("<a href=/ss_settings.png><img width=150 height=81 src=ss_settings_thumb.png></a>");
  1094. sb.safePrintf("<br><br>");
  1095. sb.safePrintf("<a href=/ss_hosts.png><img width=150 height=81 src=ss_hosts_thumb.png></a>");
  1096. sb.safePrintf("<br><br>");
  1097. sb.safePrintf("<a href=/ss_filters.png><img width=150 height=81 src=ss_filters_thumb.png></a>");
  1098. sb.safePrintf("</td>");
  1099. sb.safePrintf("</tr>\n");
  1100. sb.safePrintf("</table></center>\n");
  1101. /*
  1102. do not show table for open source installs
  1103. // donate with paypal
  1104. sb.safePrintf("<tr valign=top>\n");
  1105. sb.safePrintf("<td align=center style=padding-right:20px;><center>"
  1106. // BEGIN PAYPAL DONATE BUTTON
  1107. "<form action=\"https://www.paypal.com/cgi-bin/webscr\" method=\"post\" target=\"_top\">"
  1108. "<input type=\"hidden\" name=\"cmd\" value=\"_donations\">"
  1109. "<input type=\"hidden\" name=\"business\" value=\"2SFSFLUY3KS9Y\">"
  1110. "<input type=\"hidden\" name=\"lc\" value=\"US\">"
  1111. "<input type=\"hidden\" name=\"item_name\" value=\"Gigablast, Inc.\">"
  1112. "<input type=\"hidden\" name=\"currency_code\" value=\"USD\">"
  1113. "<input type=\"hidden\" name=\"bn\" value=\"PP-DonationsBF:btn_donateCC_LG.gif:NonHosted\">"
  1114. "<input type=\"image\" src=\"https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif\" border=\"0\" name=\"submit\" alt=\"PayPal - The safer, easier way to pay online!\" height=47 width=147>"
  1115. "<img alt=\"\" border=\"0\" src=\"https://www.paypalobjects.com/en_US/i/scr/pixel.gif\" width=\"1\" height=\"1\">"
  1116. "</form>"
  1117. // END PAYPAY BUTTON
  1118. "</center></div></center></td>\n"
  1119. );
  1120. sb.safePrintf("<td><font size=+1><b>"
  1121. "Support Gigablast"
  1122. "</b></font><br>\n"
  1123. );
  1124. sb.brify2(
  1125. "Donations of $100 or more receive a black "
  1126. "Gigablast T-shirt "
  1127. "with embroidered logo while quantities last. "
  1128. "State your address and size "
  1129. "in an <a href=/contact.html>email</a>. "
  1130. "PayPal accepted. "
  1131. "Help Gigablast continue "
  1132. "to grow and add new features."
  1133. , 80
  1134. );
  1135. sb.safePrintf("</td></tr>\n");
  1136. */
  1137. /*
  1138. sb.safePrintf("<tr valign=top>\n");
  1139. // 204x143
  1140. sb.safePrintf("<td><img height=52px width=75px "
  1141. "src=%s/eventguru.png></td>\n"
  1142. , root );
  1143. sb.safePrintf("<td><font size=+1><b>Event Guru Returns</b></font><br>\n");
  1144. sb.brify2("<a href=http://www.eventguru.com/>Event Guru</a> datamines events from the web. It identifies events on a web page, or even plain text, using the same rules of deduction used by the human mind. It also has Facebook integration and lots of other cool things.",80);
  1145. sb.safePrintf("<br><br></td></tr>\n");
  1146. sb.safePrintf("\n");
  1147. sb.safePrintf("\n");
  1148. */
  1149. /*
  1150. sb.safePrintf("<tr valign=top>\n");
  1151. sb.safePrintf("<td align=center><div style=width:50px;height:50px;display:inline-block;background-color:green;></div></td>\n");
  1152. sb.safePrintf("<td><font size=+1><b>The Green Search Engine</b></font><br>\n");
  1153. sb.brify2("Gigablast is the only clean-powered web search engine. 90% of its power usage comes from wind energy. Astoundingly, Gigablast is one of ONLY four search engines in the United States indexing over a billion pages.",80);
  1154. sb.safePrintf("<br><br></td></tr>\n");
  1155. sb.safePrintf("\n");
  1156. sb.safePrintf("\n");
  1157. */
  1158. /*
  1159. sb.safePrintf("<tr valign=top>\n");
  1160. sb.safePrintf("<td align=center><img src=%s/gears.png "
  1161. "height=50 width=50></div></td>\n"
  1162. , root );
  1163. sb.safePrintf("<td><font size=+1><b>The Transparent Search Engine</b></font><br>\n");
  1164. sb.brify2("Gigablast is the first truly transparent search engine. It tells you exactly why the search results are ranked the way they are. There is nothing left to the imagination.",85);
  1165. sb.safePrintf("<br><br>");
  1166. sb.safePrintf("</td></tr>\n");
  1167. sb.safePrintf("\n");
  1168. sb.safePrintf("\n");
  1169. */
  1170. /*
  1171. if ( g_conf.m_isMattWells ) {
  1172. sb.safePrintf("<tr valign=top>\n");
  1173. sb.safePrintf("<td align=center><center><img src=%s/dollargear.png "
  1174. "height=50 width=50></center></div></center></td>\n"
  1175. , root );
  1176. sb.safePrintf("<td><font size=+1><b>The SEO Search Engine</b></font><br>\n");
  1177. sb.brify2("When it comes to search-engine based SEO, Gigablast is the place to be. With a frothy set of unique and effective <a href=http://www.gigablast.com/seo>SEO tools</a>, you will find all you need to execute a simple yet effective SEO strategy. Stop the guesswork, and let a search engine tell you how to SEO it.",85);
  1178. sb.safePrintf("</td></tr>\n");
  1179. }
  1180. */
  1181. /*
  1182. sb.safePrintf("<tr valign=top>\n");
  1183. sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:ff3030;></td>\n");
  1184. sb.safePrintf("<td><font size=+1><b>Xml Search Feed</b></font><br>\n");
  1185. sb.brify2("Utilize Gigablast's results on your own site or product by connecting with Gigablast's <a href=/searchfeed.html>XML search feed</a>. It's now simpler than ever to setup and use. You can also add the web pages you want into the index in near real-time.",85);
  1186. sb.safePrintf("</td></tr>\n");
  1187. */
  1188. /*
  1189. sb.safePrintf("<tr valign=top>\n");
  1190. sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:black;></td>\n");
  1191. sb.safePrintf("<td><font size=+1><b>The Private Search Engine</b>"
  1192. "</font><br>\n");
  1193. sb.brify2("Gigablast does not allow the NSA or any third party "
  1194. "to spy on the queries your IP address is doing, "
  1195. "unlike "
  1196. "<a href=http://www.guardian.co.uk/world/2013/jun/"
  1197. "06/us-tech-giants-nsa-data>"
  1198. "other large search engines</a>. "
  1199. "Gigablast is the only "
  1200. "<a href=/privacy.html>truly private search engine</a> "
  1201. "in the United States."
  1202. //" Everyone else has fundamental "
  1203. //"gaps in their "
  1204. //"security as explained by the above link."
  1205. //"Tell Congress "
  1206. //"to <a href=https://optin.stopwatching.us/>stop spying "
  1207. //"on you</a>."
  1208. ,85);
  1209. sb.safePrintf("</td></tr>\n");
  1210. */
  1211. /*
  1212. sb.safePrintf("<tr valign=top>\n");
  1213. sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:black;></td>\n");
  1214. sb.safePrintf("<td><font size=+1><b>No Tax Dodging</b></font><br>\n");
  1215. sb.brify2("Gigablast pays its taxes when it makes a profit. "
  1216. "Google and Bing <a href=http://www.bloomberg.com/news/"
  1217. "2010-10-21/google-2-4-rate-shows-how-60-billion-u-s-"
  1218. "revenue-lost-to-tax-loopholes.html>do not</a>. They "
  1219. "stash their profits in "
  1220. "offshore tax havens to avoid paying taxes. "
  1221. //"The end result is that taxes are higher for you. "
  1222. "You may think Google and Bing are free to use, but in "
  1223. "reality, <u>you</u> pay for it in increased taxes."
  1224. ,85);
  1225. sb.safePrintf("</td></tr>\n");
  1226. */
  1227. //
  1228. // begin new stuff
  1229. //
  1230. /*
  1231. // gradients
  1232. sb.safePrintf("<style><!--\n");
  1233. sb.safePrintf(".grad {");
  1234. sb.safePrintf("background: rgb(190,201,247);");
  1235. sb.safePrintf("background: url();");
  1236. sb.safePrintf("background: -moz-linear-gradient(-45deg, rgba(190,201,247,1) 0%%, rgba(11,60,237,1) 100%%);");
  1237. sb.safePrintf("background: -webkit-gradient(linear, left top, right bottom, color-stop(0%%,rgba(190,201,247,1)), color-stop(100%%,rgba(11,60,237,1)));");
  1238. sb.safePrintf("background: -webkit-linear-gradient(-45deg, rgba(190,201,247,1) 0%%,rgba(11,60,237,1) 100%%);");
  1239. sb.safePrintf("background: -o-linear-gradient(-45deg, rgba(190,201,247,1) 0%%,rgba(11,60,237,1) 100%%);");
  1240. sb.safePrintf("background: -ms-linear-gradient(-45deg, rgba(190,201,247,1) 0%%,rgba(11,60,237,1) 100%%);");
  1241. sb.safePrintf("background: linear-gradient(135deg, rgba(190,201,247,1) 0%%,rgba(11,60,237,1) 100%%);");
  1242. sb.safePrintf("filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#bec9f7', endColorstr='#0b3ced',GradientType=1 );");
  1243. sb.safePrintf("}");
  1244. sb.safePrintf("-->");
  1245. sb.safePrintf("</style>\n");
  1246. sb.safePrintf("<br>");
  1247. sb.safePrintf("<div class=grad style=\"border-radius:200px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:320px;height:320px;display:inline-block;z-index:100;color:black;position:relative;background-color:lightgray;\">");
  1248. sb.safePrintf("<br>");
  1249. sb.safePrintf("<b>");
  1250. sb.safePrintf("<font style=font-size:18px;margin-left:80px;>");
  1251. sb.safePrintf("Build Your Own");
  1252. sb.safePrintf("</font>");
  1253. sb.safePrintf("<br>");
  1254. sb.safePrintf("<font style=font-size:18px;margin-left:80px;>");
  1255. sb.safePrintf("Search Engine in the");
  1256. sb.safePrintf("</font>");
  1257. sb.safePrintf("<br>");
  1258. sb.safePrintf("<font style=font-size:18px;margin-left:80px;>");
  1259. sb.safePrintf("Cloud");
  1260. sb.safePrintf("</font>");
  1261. sb.safePrintf("</b>");
  1262. sb.safePrintf("<br>");
  1263. sb.safePrintf("<br>");
  1264. sb.safePrintf("<div style=margin-left:20px;width:270px;>");
  1265. sb.safePrintf("<a href=/admin/addcoll><img style=float:left;padding-right:15px; height=188px width=101px src=/robot3.png></a>");
  1266. //sb.safePrintf("<br>");
  1267. sb.safePrintf("<b>STEP 1.</b> <a href=/admin/addcoll?guide=1>"
  1268. "Click here to");
  1269. sb.safePrintf("<br>");
  1270. sb.safePrintf("<b>name your engine</b></a>.");
  1271. sb.safePrintf("<br>");
  1272. sb.safePrintf("<br>");
  1273. sb.safePrintf("<b>STEP 2.</b> <a href=/admin/settings?guide=1>"
  1274. "Click here to ");
  1275. sb.safePrintf("<br>");
  1276. sb.safePrintf("<b>add websites to index</b></a>.");
  1277. sb.safePrintf("<br>");
  1278. sb.safePrintf("<br>");
  1279. sb.safePrintf("<b>STEP 3.</b> <a href=/widgets.html?guide=1>"
  1280. "Click here to");
  1281. sb.safePrintf("<br>");
  1282. sb.safePrintf("<b>insert search box</b></a>.");
  1283. sb.safePrintf("</div>");
  1284. sb.safePrintf("</div>");
  1285. */
  1286. /*
  1287. sb.safePrintf("<div class=grad style=\"border-radius:200px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:280px;height:280px;display:inline-block;z-index:105;color:black;margin-left:-50px;position:absolute;margin-top:50px;background-color:lightgray;\">");
  1288. sb.safePrintf("<br>");
  1289. sb.safePrintf("<br style=line-height:25px;>");
  1290. sb.safePrintf("<b>");
  1291. sb.safePrintf("<font style=font-size:18px;margin-left:40px;>");
  1292. sb.safePrintf("Web Search Appliance");
  1293. sb.safePrintf("</font>");
  1294. sb.safePrintf("<br>");
  1295. sb.safePrintf("<br>");
  1296. sb.safePrintf("<br>");
  1297. sb.safePrintf("</b>");
  1298. sb.safePrintf("<div style=margin-left:20px;width:270px;>");
  1299. sb.safePrintf("<a href=http://www.gigablast.com/appliance.html><img style=float:left;padding-bottom:20px;padding-right:10px; height=81px width=121px src=/computer2.png></a>");
  1300. sb.safePrintf("Put the web in your closet. ");
  1301. sb.safePrintf("Jump start your efforts with four 1U supermicro servers loaded with the top 2 billion pages from the web. <a href=http://www.gigablast.com/appliance.html>[learn more]</a>");
  1302. sb.safePrintf("</font>");
  1303. sb.safePrintf("</div>");
  1304. sb.safePrintf("</div>");
  1305. */
  1306. /*
  1307. sb.safePrintf("<div class=grad style=\"border-radius:300px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:240px;height:240px;display:inline-block;z-index:110;color:black;margin-left:-240px;position:absolute;margin-top:230px;background-color:lightgray;\">");
  1308. sb.safePrintf("<br>");
  1309. sb.safePrintf("<b>");
  1310. sb.safePrintf("<font style=font-size:18px;margin-left:60px;>");
  1311. sb.safePrintf("Open Source");
  1312. sb.safePrintf("</font>");
  1313. sb.safePrintf("<br>");
  1314. sb.safePrintf("<br>");
  1315. sb.safePrintf("</b>");
  1316. sb.safePrintf("<div style=margin-left:30px;margin-right:5px;>");
  1317. sb.safePrintf("<a href=http://www.gigablast.com/faq.html#features><img style=float:left;padding-right:10px height=71px width=71px src=/unlocked2.png></a>");
  1318. sb.safePrintf("Gigablast is now available as an <a href=https://github.com/gigablast/open-source-search-engine>open source search engine</a> on github.com. Download it today. Finally a robust, scalable search solution in C/C++ that has been in development and used commercially since 2000. <a href=http://www.gigablast.com/faq.html#features>Features</a>.");
  1319. sb.safePrintf("</div>");
  1320. sb.safePrintf("</div>");
  1321. //
  1322. // donate with paypal bubble
  1323. //
  1324. sb.safePrintf("<div class=grad style=\"border-radius:300px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:180px;height:180px;display:inline-block;z-index:120;color:black;margin-left:10px;position:absolute;margin-top:270px;background-color:lightgray;\">");
  1325. sb.safePrintf("<br>");
  1326. sb.safePrintf("<b>");
  1327. sb.safePrintf("<font style=font-size:18px;margin-left:40px;>");
  1328. sb.safePrintf("Contribute");
  1329. sb.safePrintf("</font>");
  1330. sb.safePrintf("<br>");
  1331. sb.safePrintf("<br>");
  1332. sb.safePrintf("</b>");
  1333. sb.safePrintf("<div style=margin-left:15px;margin-right:5px;>");
  1334. sb.safePrintf(
  1335. "Help Gigablast development with PayPal."
  1336. "<br>"
  1337. "<br>"
  1338. // BEGIN PAYPAL DONATE BUTTON
  1339. "<form action=\"https://www.paypal.com/cgi-bin/webscr\" method=\"post\" target=\"_top\">"
  1340. "<input type=\"hidden\" name=\"cmd\" value=\"_donations\">"
  1341. "<input type=\"hidden\" name=\"business\" value=\"2SFSFLUY3KS9Y\">"
  1342. "<input type=\"hidden\" name=\"lc\" value=\"US\">"
  1343. "<input type=\"hidden\" name=\"item_name\" value=\"Gigablast, Inc.\">"
  1344. "<input type=\"hidden\" name=\"currency_code\" value=\"USD\">"
  1345. "<input type=\"hidden\" name=\"bn\" value=\"PP-DonationsBF:btn_donateCC_LG.gif:NonHosted\">"
  1346. "<input type=\"image\" src=\"https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif\" border=\"0\" name=\"submit\" alt=\"PayPal - The safer, easier way to pay online!\" height=47 width=147>"
  1347. "<img alt=\"\" border=\"0\" src=\"https://www.paypalobjects.com/en_US/i/scr/pixel.gif\" width=\"1\" height=\"1\">"
  1348. "</form>"
  1349. // END PAYPAY BUTTON
  1350. "</center></div></center>"
  1351. //"</td>\n"
  1352. );
  1353. */
  1354. //
  1355. // end new stuff
  1356. //
  1357. sb.safePrintf("\n");
  1358. sb.safePrintf("\n");
  1359. //sb.safePrintf("</table>\n");
  1360. sb.safePrintf("<br><br>\n");
  1361. printNav ( sb , r );
  1362. return true;
  1363. }
  1364. bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
  1365. CollectionRec *cr = g_collectiondb.getRec ( r );
  1366. printFrontPageShell ( &sb , "add url" , cr , true );
  1367. sb.safePrintf("<script type=\"text/javascript\">\n"
  1368. "function handler() {\n"
  1369. "if(this.readyState == 4 ) {\n"
  1370. "document.getElementById('msgbox').innerHTML="
  1371. "this.responseText;\n"
  1372. //"alert(this.status+this.statusText+"
  1373. //"this.responseXML+this.responseText);\n"
  1374. "}}\n"
  1375. "</script>\n");
  1376. sb.safePrintf("<br><br>\n");
  1377. sb.safePrintf("<br><br><br>\n");
  1378. /*
  1379. sb.safePrintf("<b>web</b> &nbsp;&nbsp;&nbsp;&nbsp; ");
  1380. if ( g_conf.m_isMattWells )
  1381. sb.safePrintf("<a href=http://www.gigablast.com/seo>seo</a> "
  1382. "&nbsp;&nbsp;&nbsp;&nbsp; "
  1383. );
  1384. sb.safePrintf( "<a href=\"/Top\">directory</a> "
  1385. "&nbsp;&nbsp;&nbsp;&nbsp; \n");
  1386. sb.safePrintf("<a href=/adv.html>advanced search</a>");
  1387. sb.safePrintf(" &nbsp;&nbsp;&nbsp;&nbsp; ");
  1388. sb.safePrintf("<a href=/addurl title=\"Instantly add your url to "
  1389. "Gigablast's index\">add url</a>");
  1390. sb.safePrintf("\n");
  1391. sb.safePrintf("<br><br>\n");
  1392. */
  1393. // submit to https now
  1394. sb.safePrintf("<form method=GET "
  1395. "action=/addurl name=f>\n" );
  1396. char *coll = "";
  1397. if ( cr ) coll = cr->m_coll;
  1398. if ( cr )
  1399. sb.safePrintf("<input type=hidden name=c value=\"%s\">",
  1400. cr->m_coll);
  1401. // put search box in a box
  1402. sb.safePrintf("<div style="
  1403. "background-color:#%s;" // fcc714;"
  1404. "border-style:solid;"
  1405. "border-width:3px;"
  1406. "border-color:blue;"
  1407. //"background-color:blue;"
  1408. "padding:20px;"
  1409. "border-radius:20px;"
  1410. ">"
  1411. , GOLD
  1412. );
  1413. sb.safePrintf("<input name=urls type=text "
  1414. "style=\""
  1415. //"width:%"INT32"px;"
  1416. "height:26px;"
  1417. "padding:0px;"
  1418. "font-weight:bold;"
  1419. "padding-left:5px;"
  1420. //"border-radius:10px;"
  1421. "margin:0px;"
  1422. "border:1px inset lightgray;"
  1423. "background-color:#ffffff;"
  1424. "font-size:18px;"
  1425. "\" "
  1426. "size=40 value=\""
  1427. );
  1428. if ( url ) {
  1429. SafeBuf tmp;
  1430. tmp.safePrintf("%s",url);
  1431. // don't let double quotes in the url close our val attribute
  1432. tmp.replace("\"","%22");
  1433. sb.safeMemcpy(&tmp);
  1434. }
  1435. else
  1436. sb.safePrintf("http://");
  1437. sb.safePrintf("\">&nbsp; &nbsp;"
  1438. //"<input type=\"submit\" value=\"Add Url\">\n"
  1439. "<div onclick=document.f.submit(); "
  1440. " onmouseover=\""
  1441. "this.style.backgroundColor='lightgreen';"
  1442. "this.style.color='black';\""
  1443. " onmouseout=\""
  1444. "this.style.backgroundColor='green';"
  1445. "this.style.color='white';\" "
  1446. "style=border-radius:28px;"
  1447. "cursor:pointer;"
  1448. "cursor:hand;"
  1449. "border-color:white;"
  1450. "border-style:solid;"
  1451. "border-width:3px;"
  1452. "padding:12px;"
  1453. "width:20px;"
  1454. "height:20px;"
  1455. "display:inline-block;"
  1456. "background-color:green;color:white;>"
  1457. "<b style=margin-left:-5px;font-size:18px;>GO</b>"
  1458. "</div>"
  1459. "\n"
  1460. );
  1461. sb.safePrintf("\n");
  1462. sb.safePrintf("</div>\n");
  1463. sb.safePrintf("\n");
  1464. sb.safePrintf("<br>\n");
  1465. sb.safePrintf("\n");
  1466. // if addurl is turned off, just print "disabled" msg
  1467. char *msg = NULL;
  1468. if ( ! g_conf.m_addUrlEnabled )
  1469. msg = "Add url is temporarily disabled";
  1470. // can also be turned off in the collection rec
  1471. //if ( ! cr->m_addUrlEnabled )
  1472. // msg = "Add url is temporarily disabled";
  1473. // or if in read-only mode
  1474. if ( g_conf.m_readOnlyMode )
  1475. msg = "Add url is temporarily disabled";
  1476. sb.safePrintf("<br><center>"
  1477. "Add a url to the <b>%s</b> collection</center>",coll);
  1478. // if url is non-empty the ajax will receive this identical msg
  1479. // and display it in the div, so do not duplicate the msg!
  1480. if ( msg && ! url )
  1481. sb.safePrintf("<br><br>%s",msg);
  1482. // . the ajax msgbox div
  1483. // . when loaded with the main page for the first time it will
  1484. // immediately replace its content...
  1485. if ( url ) {
  1486. char *root = "";
  1487. if ( g_conf.m_isMattWells )
  1488. root = "http://www.gigablast.com";
  1489. sb.safePrintf("<br>"
  1490. "<br>"
  1491. "<div id=msgbox>"
  1492. //"<b>Injecting your url. Please wait...</b>"
  1493. "<center>"
  1494. "<img src=%s/gears.gif width=50 height=50>"
  1495. "</center>"
  1496. "<script type=text/javascript>"
  1497. //"alert('shit');"
  1498. "var client = new XMLHttpRequest();\n"
  1499. "client.onreadystatechange = handler;\n"
  1500. "var url='/addurl?urls="
  1501. , root );
  1502. sb.urlEncode ( url );
  1503. // propagate "admin" if set
  1504. //int32_t admin = hr->getLong("admin",-1);
  1505. //if ( admin != -1 ) sb.safePrintf("&admin=%"INT32"",admin);
  1506. // provide hash of the query so clients can't just pass in
  1507. // a bogus id to get search results from us
  1508. uint32_t h32 = hash32n(url);
  1509. if ( h32 == 0 ) h32 = 1;
  1510. uint64_t rand64 = gettimeofdayInMillisecondsLocal();
  1511. // msg7 needs an explicit collection for /addurl for injecting
  1512. // in PageInject.cpp. it does not use defaults for safety.
  1513. sb.safePrintf("&id=%"UINT32"&c=%s&rand=%"UINT64"';\n"
  1514. "client.open('GET', url );\n"
  1515. "client.send();\n"
  1516. "</script>\n"
  1517. , h32
  1518. , coll
  1519. , rand64
  1520. );
  1521. sb.safePrintf("</div>\n");
  1522. }
  1523. sb.safePrintf("</form>\n");
  1524. sb.safePrintf("<br>\n");
  1525. sb.safePrintf("\n");
  1526. sb.safePrintf("<br><br>\n");
  1527. printNav ( sb , r );
  1528. return true;
  1529. }
  1530. bool printDirHomePage ( SafeBuf &sb , HttpRequest *r ) {
  1531. char format = r->getReplyFormat();
  1532. if ( format != FORMAT_HTML )
  1533. return printTopDirectory ( sb , format );
  1534. CollectionRec *cr = g_collectiondb.getRec ( r );
  1535. printFrontPageShell ( &sb , "directory" , cr , true );
  1536. sb.safePrintf("<br><br>\n");
  1537. sb.safePrintf("<br><br><br>\n");
  1538. // submit to https now
  1539. sb.safePrintf("<form method=GET "
  1540. "action=/search name=f>\n");
  1541. if ( cr )
  1542. sb.safePrintf("<input type=hidden name=c value=\"%s\">",
  1543. cr->m_coll);
  1544. // put search box in a box
  1545. sb.safePrintf("<div style="
  1546. "background-color:#%s;" // fcc714;"
  1547. "border-style:solid;"
  1548. "border-width:3px;"
  1549. "border-color:blue;"
  1550. //"background-color:blue;"
  1551. "padding:20px;"
  1552. "border-radius:20px;"
  1553. ">"
  1554. ,GOLD
  1555. );
  1556. sb.safePrintf("<input name=q type=text "
  1557. "style=\""
  1558. //"width:%"INT32"px;"
  1559. "height:26px;"
  1560. "padding:0px;"
  1561. "font-weight:bold;"
  1562. "padding-left:5px;"
  1563. //"border-radius:10px;"
  1564. "margin:0px;"
  1565. "border:1px inset lightgray;"
  1566. "background-color:#ffffff;"
  1567. "font-size:18px;"
  1568. "\" "
  1569. "size=40 value=\"\">&nbsp; &nbsp;"
  1570. //"<input type=\"submit\" value=\"Search\">\n");
  1571. "<div onclick=document.f.submit(); "
  1572. " onmouseover=\""
  1573. "this.style.backgroundColor='lightgreen';"
  1574. "this.style.color='black';\""
  1575. " onmouseout=\""
  1576. "this.style.backgroundColor='green';"
  1577. "this.style.color='white';\" "
  1578. "style=border-radius:28px;"
  1579. "cursor:pointer;"
  1580. "cursor:hand;"
  1581. "border-color:white;"
  1582. "border-style:solid;"
  1583. "border-width:3px;"
  1584. "padding:12px;"
  1585. "width:20px;"
  1586. "height:20px;"
  1587. "display:inline-block;"
  1588. "background-color:green;color:white;>"
  1589. "<b style=margin-left:-5px;font-size:18px;>GO</b>"
  1590. "</div>"
  1591. "\n"
  1592. );
  1593. sb.safePrintf("</div>\n");
  1594. sb.safePrintf("\n");
  1595. sb.safePrintf("</form>\n");
  1596. sb.safePrintf("<br>\n");
  1597. sb.safePrintf("\n");
  1598. printTopDirectory ( sb , FORMAT_HTML );
  1599. sb.safePrintf("<br><br>\n");
  1600. printNav ( sb , r);
  1601. return true;
  1602. }
  1603. // . returns false if blocked, true otherwise
  1604. // . sets errno on error
  1605. // . make a web page displaying the config of this host
  1606. // . call g_httpServer.sendDynamicPage() to send it
  1607. bool sendPageRoot ( TcpSocket *s , HttpRequest *r, char *cookie ) {
  1608. // don't allow pages bigger than 128k in cache
  1609. char buf [ 10*1024 ];//+ MAX_QUERY_LEN ];
  1610. // a ptr into "buf"
  1611. //char *p = buf;
  1612. //char *pend = buf + 10*1024 + MAX_QUERY_LEN - 100 ;
  1613. SafeBuf sb(buf, 10*1024 );//+ MAX_QUERY_LEN);
  1614. // print bgcolors, set focus, set font style
  1615. //p = g_httpServer.printFocus ( p , pend );
  1616. //p = g_httpServer.printColors ( p , pend );
  1617. //int32_t qlen;
  1618. //char *q = r->getString ( "q" , &qlen , NULL );
  1619. // insert collection name too
  1620. CollectionRec *cr = g_collectiondb.getRec(r);
  1621. if ( ! cr ) {
  1622. g_errno = ENOCOLLREC;
  1623. return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
  1624. }
  1625. // get the collection rec
  1626. /*
  1627. CollectionRec *cr = g_collectiondb.getRec ( coll );
  1628. uint8_t *hp = NULL;
  1629. int32_t hpLen;
  1630. int64_t docsInColl = -1;
  1631. if ( ! cr ) {
  1632. // use the default
  1633. Parm *pp = g_parms.getParm ( "hp" );
  1634. if ( ! pp ) {
  1635. g_errno = ENOTFOUND;
  1636. g_msg = " (error: no such collection)";
  1637. return g_httpServer.sendErrorReply(s,500,
  1638. mstrerror(g_errno));
  1639. }
  1640. hp = (uint8_t*)pp->m_def;
  1641. if ( hp ) hpLen = uint8strlen ( hp );
  1642. if ( hpLen <= 0 || ! hp )
  1643. log(LOG_INFO,"http: No root page html present.");
  1644. } else {
  1645. if(cr->m_useLanguagePages) {
  1646. uint8_t lang = g_langId.guessGBLanguageFromUrl(r->getHost());
  1647. if(lang && (hp = g_languagePages.getLanguagePage(lang)) != NULL) {
  1648. hpLen = uint8strlen(hp);
  1649. // Set sort language as well
  1650. // This might not be a good idea, as it
  1651. // overrides any other setting. May be
  1652. // better to let the user agent string
  1653. // tell us what the user wants.
  1654. strcpy(cr->m_defaultSortLanguage,
  1655. getLanguageAbbr(lang));
  1656. }
  1657. }
  1658. if(!hp) {
  1659. hp = (uint8_t*)cr->m_htmlRoot;
  1660. hpLen = cr->m_htmlRootLen;
  1661. }
  1662. //RdbBase *base = getRdbBase ( RDB_CHECKSUMDB , coll );
  1663. RdbBase *base = getRdbBase ( (uint8_t)RDB_CLUSTERDB , coll );
  1664. if ( base ) docsInColl = base->getNumGlobalRecs();
  1665. }
  1666. */
  1667. // print the page out
  1668. /*
  1669. expandRootHtml ( sb,
  1670. hp , hpLen ,
  1671. q , qlen , r , s , docsInColl ,
  1672. cr );
  1673. */
  1674. //if ( ! strcmp(coll,"dmoz" ) )
  1675. // printDirHomePage(sb,r);
  1676. //else
  1677. printWebHomePage(sb,r,s);
  1678. // . print last 5 queries
  1679. // . put 'em in a table
  1680. // . disable for now, impossible to monitor/control
  1681. //p += printLastQueries ( p , pend );
  1682. // are we the admin?
  1683. //bool isAdmin = g_collectiondb.isAdmin ( r , s );
  1684. // calculate bufLen
  1685. //int32_t bufLen = p - buf;
  1686. // . now encapsulate it in html head/tail and send it off
  1687. // . the 0 means browser caches for however int32_t it's set for
  1688. // . but we don't use 0 anymore, use -2 so it never gets cached so
  1689. // our display of the # of pages in the index is fresh
  1690. // . no, but that will piss people off, its faster to keep it cached
  1691. //return g_httpServer.sendDynamicPage ( s , buf , bufLen , -1 );
  1692. return g_httpServer.sendDynamicPage ( s,
  1693. (char*) sb.getBufStart(),
  1694. sb.length(),
  1695. // 120 seconds cachetime
  1696. // don't cache anymore since
  1697. // we have the login bar at
  1698. // the top of the page
  1699. 0,//120, // cachetime
  1700. false,// post?
  1701. "text/html",
  1702. 200,
  1703. NULL, // cookie
  1704. "UTF-8",
  1705. r);
  1706. }
  1707. // . store into "p"
  1708. // . returns bytes stored into "p"
  1709. // . used for entertainment purposes
  1710. /*
  1711. int32_t printLastQueries ( char *p , char *pend ) {
  1712. // if not 512 bytes left, bail
  1713. if ( pend - p < 512 ) return 0;
  1714. // return w/ no table if no queries have been added to g_qbuf yet
  1715. if ( ! g_nextq == -1 ) return 0;
  1716. // remember start for returning # of bytes stored
  1717. char *start = p;
  1718. // begin table (no border)
  1719. sprintf (p,"<br><table border=0><tr><td><center>Last %"INT32" queries:"
  1720. "</td></tr>", (int32_t)QBUF_NUMQUERIES );
  1721. p += gbstrlen ( p );
  1722. // point to last query added
  1723. int32_t n = g_nextq - 1;
  1724. // . wrap it if we need to
  1725. // . QBUF_NUMQUERIES is defined to be 5 in PageResults.h
  1726. if ( n < 0 ) n = QBUF_NUMQUERIES - 1;
  1727. // . print up to five queries
  1728. // . queries are stored by advancing g_nextq, so "i" should go backward
  1729. int32_t count = 0;
  1730. for ( int32_t i = n ; count < QBUF_NUMQUERIES ; count++ , i-- ) {
  1731. // wrap i if we need to
  1732. if ( i == -1 ) i = QBUF_NUMQUERIES - 1;
  1733. // if this query is empty, skip it (might be uninitialized)
  1734. if ( g_qbuf[i][0] == '\0' ) continue;
  1735. // point to the query (these are NULL terminated)
  1736. char *q = g_qbuf[i];
  1737. int32_t qlen = gbstrlen(q);
  1738. // bail if too big
  1739. if ( p + qlen + 32 + 1024 >= pend ) return p - start;
  1740. // otherwise, print this query to the page
  1741. sprintf ( p , "<tr><td><a href=/cgi/0.cgi?q=" );
  1742. p += gbstrlen ( p );
  1743. // store encoded query as cgi parm
  1744. p += urlEncode ( p , q , qlen );
  1745. // end a href tag
  1746. *p++ = '>';
  1747. // . then print the actual query to the page
  1748. // . use htmlEncode so nobody can abuse it
  1749. p += saftenTags ( p , pend - p , q , qlen );
  1750. // wrap it up
  1751. sprintf ( p , "</a></td></tr>" );
  1752. p += gbstrlen ( p );
  1753. }
  1754. // end the table
  1755. sprintf ( p , "</table>");
  1756. p += gbstrlen ( p );
  1757. // return bytes written
  1758. return p - start;
  1759. }
  1760. */
  1761. //char *printTopDirectory ( char *p, char *pend ) {
  1762. bool printTopDirectory ( SafeBuf& sb , char format ) {
  1763. int32_t nr = g_catdb.getRdb()->getNumTotalRecs();
  1764. // if no recs in catdb, print instructions
  1765. if ( nr == 0 && format == FORMAT_HTML)
  1766. return sb.safePrintf("<center>"
  1767. "<b>DMOZ functionality is not set up.</b>"
  1768. "<br>"
  1769. "<br>"
  1770. "<b>"
  1771. "Please follow the set up "
  1772. "<a href=/faq.html#dmoz>"
  1773. "instructions"
  1774. "</a>."
  1775. "</b>"
  1776. "</center>");
  1777. // send back an xml/json error reply
  1778. if ( nr == 0 && format != FORMAT_HTML ) {
  1779. g_errno = EDMOZNOTREADY;
  1780. return false;
  1781. }
  1782. //char topList[4096];
  1783. //sprintf(topList,
  1784. return sb.safePrintf (
  1785. "<center>"
  1786. "<table cellspacing=\"4\" cellpadding=\"4\"><tr><td valign=top>\n"
  1787. "<b><a href=\"/Top/Arts/\">Arts</a></b><br>"
  1788. "<small>"
  1789. "<a href=\"/Top/Arts/Movies/\">Movies</a>, "
  1790. "<a href=\"/Top/Arts/Television/\">Television</a>, "
  1791. "<a href=\"/Top/Arts/Music/\">Music</a>..."
  1792. "</small>\n"
  1793. "</td><td valign=top>"
  1794. "<b><a href=\"/Top/Business/\">Business</a></b><br>"
  1795. "<small>"
  1796. "<a href=\"/Top/Business/Employment/\">Jobs</a>, "
  1797. "<a href=\"/Top/Business/Real_Estate/\">Real Estate</a>, "
  1798. "<a href=\"/Top/Business/Investing/\">Investing</a>..."
  1799. "</small>\n"
  1800. "</td><td valign=top>"
  1801. "<b><a href=\"/Top/Computers/\">Computers</a></b><br>"
  1802. "<small>"
  1803. "<a href=\"/Top/Computers/Internet/\">Internet</a>, "
  1804. "<a href=\"/Top/Computers/Software/\">Software</a>, "
  1805. "<a href=\"/Top/Computers/Hardware/\">Hardware</a>..."
  1806. "</small>\n"
  1807. "</td></tr><tr><td valign=top>"
  1808. "<b><a href=\"/Top/Games/\">Games</a></b><br>"
  1809. "<small>"
  1810. "<a href=\"/Top/Games/Video_Games/\">Video Games</a>, "
  1811. "<a href=\"/Top/Games/Roleplaying/\">RPGs</a>, "
  1812. "<a href=\"/Top/Games/Gambling/\">Gambling</a>..."
  1813. "</small>\n"
  1814. "</td><td valign=top>"
  1815. "<b><a href=\"/Top/Health/\">Health</a></b><br>"
  1816. "<small>"
  1817. "<a href=\"/Top/Health/Fitness/\">Fitness</a>, "
  1818. "<a href=\"/Top/Health/Medicine/\">Medicine</a>, "
  1819. "<a href=\"/Top/Health/Alternative/\">Alternative</a>..."
  1820. "</small>\n"
  1821. "</td><td valign=top>"
  1822. "<b><a href=\"/Top/Home/\">Home</a></b><br>"
  1823. "<small>"
  1824. "<a href=\"/Top/Home/Family/\">Family</a>, "
  1825. "<a href=\"/Top/Home/Consumer_Information/\">Consumers</a>, "
  1826. "<a href=\"/Top/Home/Cooking/\">Cooking</a>..."
  1827. "</small>\n"
  1828. "</td></tr><tr><td valign=top>"
  1829. //"<b><a href=\"/Top/Kids_and_Teens/\">"
  1830. //"<font color=\"#ff0000\">K</font>"
  1831. //"<font color=\"339900\">i</font>"
  1832. //"<font color=\"#ff6600\">d</font>"
  1833. //"<font color=\"#0066ff\">s</font>"
  1834. //" and Teens</a></b><br>"
  1835. "<b><a href=\"/Top/Kids_and_Teens/\">Kids and Teens</a></b><br>"
  1836. "<small>"
  1837. "<a href=\"/Top/Kids_and_Teens/Arts/\">Arts</a>, "
  1838. "<a href=\"/Top/Kids_and_Teens/School_Time/\">School Time</a>, "
  1839. "<a href=\"/Top/Kids_and_Teens/Teen_Life/\">Teen Life</a>..."
  1840. "</small>\n"
  1841. "</td><td valign=top>"
  1842. "<b><a href=\"/Top/News/\">News</a></b><br>"
  1843. "<small>"
  1844. "<a href=\"/Top/News/Media/\">Media</a>, "
  1845. "<a href=\"/Top/News/Newspapers/\">Newspapers</a>, "
  1846. "<a href=\"/Top/News/Weather/\">Weather</a>..."
  1847. "</small>\n"
  1848. "</td><td valign=top>"
  1849. "<b><a href=\"/Top/Recreation/\">Recreation</a></b><br>"
  1850. "<small>"
  1851. "<a href=\"/Top/Recreation/Travel/\">Travel</a>, "
  1852. "<a href=\"/Top/Recreation/Food/\">Food</a>, "
  1853. "<a href=\"/Top/Recreation/Outdoors/\">Outdoors</a>, "
  1854. "<a href=\"/Top/Recreation/Humor/\">Humor</a>..."
  1855. "</small>\n"
  1856. "</td></tr><tr><td valign=top>"
  1857. "<b><a href=\"/Top/Reference/\">Reference</a></b><br>"
  1858. "<small>"
  1859. "<a href=\"/Top/Reference/Maps/\">Maps</a>, "
  1860. "<a href=\"/Top/Reference/Education/\">Education</a>, "
  1861. "<a href=\"/Top/Reference/Libraries/\">Libraries</a>..."
  1862. "</small>\n"
  1863. "</td><td valign=top>"
  1864. "<b><a href=\"/Top/Regional/\">Regional</a></b><br>"
  1865. "<small>"
  1866. "<a href=\"/Top/Regional/North_America/United_States/\">US</a>, "
  1867. "<a href=\"/Top/Regional/North_America/Canada/\">Canada</a>, "
  1868. "<a href=\"/Top/Regional/Europe/United_Kingdom/\">UK</a>, "
  1869. "<a href=\"/Top/Regional/Europe/\">Europe</a>..."
  1870. "</small>\n"
  1871. "</td><td valign=top>"
  1872. "<b><a href=\"/Top/Science/\">Science</a></b><br>"
  1873. "<small>"
  1874. "<a href=\"/Top/Science/Biology/\">Biology</a>, "
  1875. "<a href=\"/Top/Science/Social_Sciences/Psychology/\">Psychology</a>, "
  1876. "<a href=\"/Top/Science/Physics/\">Physics</a>..."
  1877. "</small>\n"
  1878. "</td></tr><tr><td valign=top>"
  1879. "<b><a href=\"/Top/Shopping/\">Shopping</a></b><br>"
  1880. "<small>"
  1881. "<a href=\"/Top/Shopping/Vehicles/Autos/\">Autos</a>, "
  1882. "<a href=\"/Top/Shopping/Clothing/\">Clothing</a>, "
  1883. "<a href=\"/Top/Shopping/Gifts/\">Gifts</a>..."
  1884. "</small>\n"
  1885. "</td><td valign=top>"
  1886. "<b><a href=\"/Top/Society/\">Society</a></b><br>"
  1887. "<small>"
  1888. "<a href=\"/Top/Society/People/\">People</a>, "
  1889. "<a href=\"/Top/Society/Religion_and_Spirituality/\">Religion</a>, "
  1890. "<a href=\"/Top/Society/Issues/\">Issues</a>..."
  1891. "</small>\n"
  1892. "</td><td valign=top>"
  1893. "<b><a href=\"/Top/Sports/\">Sports</a></b><br>"
  1894. "<small>"
  1895. "<a href=\"/Top/Sports/Baseball/\">Baseball</a>, "
  1896. "<a href=\"/Top/Sports/Soccer/\">Soccer</a>, "
  1897. "<a href=\"/Top/Sports/Basketball/\">Basketball</a>..."
  1898. "</small>\n"
  1899. "</td></tr>"
  1900. "<tr><td colspan=3 valign=top>"
  1901. "<b><a href=\"/Top/World/\">World</a></b><br>"
  1902. "<small>"
  1903. "<a href=\"/Top/World/Deutsch/\">Deutsch</a>, "
  1904. "<a href=\"/Top/World/Espa%%c3%%b1ol/\">Espa%c%col</a>, "
  1905. "<a href=\"/Top/World/Fran%%c3%%a7ais/\">Fran%c%cais</a>, "
  1906. "<a href=\"/Top/World/Italiano/\">Italiano</a>, "
  1907. "<a href=\"/Top/World/Japanese/\">Japanese</a>, "
  1908. "<a href=\"/Top/World/Nederlands/\">Nederlands</a>, "
  1909. "<a href=\"/Top/World/Polska/\">Polska</a>, "
  1910. "<a href=\"/Top/World/Dansk/\">Dansk</a>, "
  1911. "<a href=\"/Top/World/Svenska/\">Svenska</a>..."
  1912. "</small>\n"
  1913. "</td></tr></table></center>\n",
  1914. 195, 177, 195, 167);
  1915. // make sure there's room
  1916. //int32_t topListLen = gbstrlen(topList);
  1917. //if (pend - p <= topListLen+1)
  1918. // return p;
  1919. // copy it in
  1920. //gbmemcpy(p, topList, topListLen);
  1921. //p += topListLen;
  1922. //*p = '\0';
  1923. //return p;
  1924. }
  1925. /////////////////
  1926. //
  1927. // ADD URL PAGE
  1928. //
  1929. /////////////////
  1930. #include "PageInject.h"
  1931. #include "TuringTest.h"
  1932. #include "AutoBan.h"
  1933. //#include "CollectionRec.h"
  1934. #include "Users.h"
  1935. #include "Spider.h"
  1936. //static bool sendReply ( void *state , bool addUrlEnabled );
  1937. static bool canSubmit (uint32_t h, int32_t now, int32_t maxUrlsPerIpDom);
  1938. //static void addedStuff ( void *state );
  1939. void resetPageAddUrl ( ) ;
  1940. /*
  1941. class State2 {
  1942. public:
  1943. Url m_url;
  1944. //char *m_buf;
  1945. //int32_t m_bufLen;
  1946. //int32_t m_bufMaxLen;
  1947. };
  1948. */
  1949. class State1i {
  1950. public:
  1951. //Msg4 m_msg4;
  1952. Msg7 m_msg7;
  1953. TcpSocket *m_socket;
  1954. bool m_isMasterAdmin;
  1955. char m_coll[MAX_COLL_LEN+1];
  1956. bool m_goodAnswer;
  1957. bool m_doTuringTest;
  1958. int32_t m_ufuLen;
  1959. char m_ufu[MAX_URL_LEN];
  1960. //int32_t m_urlLen;
  1961. //char m_url[MAX_URL_LEN];
  1962. //char m_username[MAX_USER_SIZE];
  1963. bool m_strip;
  1964. bool m_spiderLinks;
  1965. bool m_forceRespider;
  1966. // buf filled by the links coming from google, msn, yahoo, etc
  1967. //State2 m_state2[5]; // gb, goog, yahoo, msn, ask
  1968. int32_t m_numSent;
  1969. int32_t m_numReceived;
  1970. //int32_t m_raw;
  1971. //SpiderRequest m_sreq;
  1972. };
  1973. // only allow up to 1 Msg10's to be in progress at a time
  1974. static bool s_inprogress = false;
  1975. void doneInjectingWrapper3 ( void *st ) ;
  1976. // . returns false if blocked, true otherwise
  1977. // . sets g_errno on error
  1978. bool sendPageAddUrl ( TcpSocket *sock , HttpRequest *hr ) {
  1979. // . get fields from cgi field of the requested url
  1980. // . get the search query
  1981. int32_t urlLen = 0;
  1982. char *url = hr->getString ( "urls" , &urlLen , NULL /*default*/);
  1983. // see if they provided a url of a file of urls if they did not
  1984. // provide a url to add directly
  1985. bool isAdmin = g_conf.isCollAdmin ( sock , hr );
  1986. int32_t ufuLen = 0;
  1987. char *ufu = NULL;
  1988. //if ( isAdmin )
  1989. // // get the url of a file of urls (ufu)
  1990. // ufu = hr->getString ( "ufu" , &ufuLen , NULL );
  1991. // can't be too long, that's obnoxious
  1992. if ( urlLen > MAX_URL_LEN || ufuLen > MAX_URL_LEN ) {
  1993. g_errno = EBUFTOOSMALL;
  1994. g_msg = " (error: url too long)";
  1995. return g_httpServer.sendErrorReply(sock,500,"url too long");
  1996. }
  1997. // get the collection
  1998. //int32_t collLen = 0;
  1999. //char *coll9 = r->getString("c",NULL);//&collLen);
  2000. //if ( ! coll || ! coll[0] ) {
  2001. // //coll = g_conf.m_defaultColl;
  2002. // coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
  2003. // collLen = gbstrlen(coll);
  2004. //}
  2005. // get collection rec
  2006. CollectionRec *cr = g_collectiondb.getRec ( hr );
  2007. // bitch if no collection rec found
  2008. if ( ! cr ) {
  2009. g_errno = ENOCOLLREC;
  2010. g_msg = " (error: no collection)";
  2011. return g_httpServer.sendErrorReply(sock,500,"no coll rec");
  2012. }
  2013. // . make sure the ip is not banned
  2014. // . we may also have an exclusive list of IPs for private collections
  2015. if ( ! cr->hasSearchPermission ( sock ) ) {
  2016. g_errno = ENOPERM;
  2017. g_msg = " (error: permission denied)";
  2018. return g_httpServer.sendErrorReply(sock,500,mstrerror(g_errno));
  2019. }
  2020. //
  2021. // if no url, print the main homepage page
  2022. //
  2023. if ( ! url ) {
  2024. SafeBuf sb;
  2025. printAddUrlHomePage ( sb , NULL , hr );
  2026. return g_httpServer.sendDynamicPage(sock,
  2027. sb.getBufStart(),
  2028. sb.length(),
  2029. // 120 secs cachetime
  2030. // don't cache any more
  2031. // since we have the
  2032. // login bar at top of page
  2033. 0,//120 ,// cachetime
  2034. false,// post?
  2035. "text/html",
  2036. 200,
  2037. NULL, // cookie
  2038. "UTF-8",
  2039. hr);
  2040. }
  2041. //
  2042. // run the ajax script on load to submit the url now
  2043. //
  2044. int32_t id = hr->getLong("id",0);
  2045. // if we are not being called by the ajax loader, the put the
  2046. // ajax loader script into the html now
  2047. if ( id == 0 ) {
  2048. SafeBuf sb;
  2049. printAddUrlHomePage ( sb , url , hr );
  2050. return g_httpServer.sendDynamicPage ( sock,
  2051. sb.getBufStart(),
  2052. sb.length(),
  2053. // don't cache any more
  2054. // since we have the
  2055. // login bar at top of
  2056. //page
  2057. 0,//3600,// cachetime
  2058. false,// post?
  2059. "text/html",
  2060. 200,
  2061. NULL, // cookie
  2062. "UTF-8",
  2063. hr);
  2064. }
  2065. //
  2066. // ok, inject the provided url!!
  2067. //
  2068. //
  2069. // check for errors first
  2070. //
  2071. // if addurl is turned off, just print "disabled" msg
  2072. char *msg = NULL;
  2073. if ( ! g_conf.m_addUrlEnabled )
  2074. msg = "Add url is temporarily disabled";
  2075. // can also be turned off in the collection rec
  2076. //if ( ! cr->m_addUrlEnabled )
  2077. // msg = "Add url is temporarily disabled";
  2078. // or if in read-only mode
  2079. if ( g_conf.m_readOnlyMode )
  2080. msg = "Add url is temporarily disabled";
  2081. // cannot add if another Msg10 from here is still in progress
  2082. if ( s_inprogress )
  2083. msg = "Add url is currently busy! Try again in a second.";
  2084. // . send msg back to the ajax request
  2085. // . use cachetime of 3600 so it does not re-inject if you hit the
  2086. // back button!
  2087. if ( msg ) {
  2088. SafeBuf sb;
  2089. sb.safePrintf("%s",msg);
  2090. g_httpServer.sendDynamicPage (sock,
  2091. sb.getBufStart(),
  2092. sb.length(),
  2093. 3600,//-1, // cachetime
  2094. false,// post?
  2095. "text/html",
  2096. 200, // http status
  2097. NULL, // cookie
  2098. "UTF-8");
  2099. return true;
  2100. }
  2101. // make a new state
  2102. State1i *st1 ;
  2103. try { st1 = new (State1i); }
  2104. catch ( ... ) {
  2105. g_errno = ENOMEM;
  2106. log("PageAddUrl: new(%i): %s",
  2107. (int)sizeof(State1i),mstrerror(g_errno));
  2108. return g_httpServer.sendErrorReply(sock,500,mstrerror(g_errno)); }
  2109. mnew ( st1 , sizeof(State1i) , "PageAddUrl" );
  2110. // save socket and isAdmin
  2111. st1->m_socket = sock;
  2112. st1->m_isMasterAdmin = isAdmin;
  2113. /*
  2114. // save the url
  2115. st1->m_url[0] = '\0';
  2116. if ( url ) {
  2117. // normalize and add www. if it needs it
  2118. Url uu;
  2119. uu.set ( url , gbstrlen(url) , true );
  2120. // remove >'s i guess and store in st1->m_url[] buffer
  2121. st1->m_urlLen=cleanInput ( st1->m_url,
  2122. MAX_URL_LEN,
  2123. uu.getUrl(),
  2124. uu.getUrlLen() );
  2125. }
  2126. */
  2127. // save the "ufu" (url of file of urls)
  2128. st1->m_ufu[0] = '\0';
  2129. st1->m_ufuLen = ufuLen;
  2130. gbmemcpy ( st1->m_ufu , ufu , ufuLen );
  2131. st1->m_ufu[ufuLen] = '\0';
  2132. st1->m_doTuringTest = cr->m_doTuringTest;
  2133. st1->m_spiderLinks = true;
  2134. st1->m_strip = true;
  2135. // save the collection name in the State1i class
  2136. //if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN;
  2137. //strncpy ( st1->m_coll , coll , collLen );
  2138. //st1->m_coll [ collLen ] = '\0';
  2139. strcpy ( st1->m_coll , cr->m_coll );
  2140. // assume they answered turing test correctly
  2141. st1->m_goodAnswer = true;
  2142. // get ip of submitter
  2143. //uint32_t h = ipdom ( s->m_ip );
  2144. // . use top 2 bytes now, some isps have large blocks
  2145. // . if this causes problems, then they can do pay for inclusion
  2146. uint32_t h = iptop ( sock->m_ip );
  2147. int32_t codeLen;
  2148. char* code = hr->getString("code", &codeLen);
  2149. if(g_autoBan.hasCode(code, codeLen, sock->m_ip)) {
  2150. int32_t uipLen = 0;
  2151. char* uip = hr->getString("uip",&uipLen);
  2152. int32_t hip = 0;
  2153. //use the uip when we have a raw query to test if
  2154. //we can submit
  2155. if(uip) {
  2156. hip = atoip(uip, uipLen);
  2157. h = iptop( hip );
  2158. }
  2159. }
  2160. st1->m_strip = hr->getLong("strip",0);
  2161. // . Remember, for cgi, if the box is not checked, then it is not
  2162. // reported in the request, so set default return value to 0
  2163. // . support both camel case and all lower-cases
  2164. st1->m_spiderLinks = hr->getLong("spiderLinks",0);
  2165. st1->m_spiderLinks = hr->getLong("spiderlinks",st1->m_spiderLinks);
  2166. // . should we force it into spiderdb even if already in there
  2167. // . use to manually update spider times for a url
  2168. // . however, will not remove old scheduled spider times
  2169. // . mdw: made force on the default
  2170. st1->m_forceRespider = hr->getLong("force",1); // 0);
  2171. int32_t now = getTimeGlobal();
  2172. // . allow 1 submit every 1 hour
  2173. // . restrict by submitter domain ip
  2174. if ( ! st1->m_isMasterAdmin &&
  2175. ! canSubmit ( h , now , cr->m_maxAddUrlsPerIpDomPerDay ) ) {
  2176. // return error page
  2177. //g_errno = ETOOEARLY;
  2178. SafeBuf sb;
  2179. sb.safePrintf("You breached your add url quota.");
  2180. mdelete ( st1 , sizeof(State1i) , "PageAddUrl" );
  2181. delete (st1);
  2182. // use cachetime of 3600 so it does not re-inject if you hit
  2183. // the back button!
  2184. g_httpServer.sendDynamicPage (sock,
  2185. sb.getBufStart(),
  2186. sb.length(),
  2187. 3600,//-1, // cachetime
  2188. false,// post?
  2189. "text/html",
  2190. 200, // http status
  2191. NULL, // cookie
  2192. "UTF-8");
  2193. return true;
  2194. }
  2195. //st1->m_query = r->getString( "qts", &st1->m_queryLen );
  2196. // check it, if turing test is enabled for this collection
  2197. /*
  2198. if ( ! st1->m_isMasterAdmin && cr->m_doTuringTest &&
  2199. ! g_turingTest.isHuman(r) ) {
  2200. // log note so we know it didn't make it
  2201. g_msg = " (error: bad answer)";
  2202. //log("PageAddUrl:: addurl failed for %s : bad answer",
  2203. // iptoa(sock->m_ip));
  2204. st1->m_goodAnswer = false;
  2205. return sendReply ( st1 , true ); // addUrl enabled?
  2206. }
  2207. */
  2208. Msg7 *msg7 = &st1->m_msg7;
  2209. // set this.
  2210. InjectionRequest *ir = &msg7->m_injectionRequest;
  2211. // default to zero
  2212. memset ( ir , 0 , sizeof(InjectionRequest) );
  2213. // this will fill in GigablastRequest so all the parms we need are set
  2214. //setInjectionRequestFromParms ( sock , hr , cr , ir );
  2215. int32_t collLen = 0;
  2216. char *coll = hr->getString( "c" , &collLen ,NULL );
  2217. if ( ! coll || ! coll[0] ) {
  2218. coll = g_conf.getDefaultColl( hr->getHost(), hr->getHostLen());
  2219. collLen = gbstrlen(coll);
  2220. }
  2221. ir->m_collnum = g_collectiondb.getCollnum ( coll );
  2222. ir->ptr_url = hr->getString("u",NULL);
  2223. if ( ! ir->ptr_url ) ir->ptr_url = hr->getString("url",NULL);
  2224. if ( ! ir->ptr_url ) ir->ptr_url = hr->getString("urls",NULL);
  2225. if ( ! ir->ptr_url ) {
  2226. g_errno = EBADURL;
  2227. doneInjectingWrapper3 ( st1 );
  2228. return true;
  2229. }
  2230. // include \0 in size
  2231. ir->size_url = gbstrlen(ir->ptr_url)+1;
  2232. // get back a short reply so we can show the status code easily
  2233. ir->m_shortReply = 1;
  2234. ir->m_spiderLinks = st1->m_spiderLinks;
  2235. // this is really an injection, not add url, so make
  2236. // GigablastRequest::m_url point to Gigablast::m_urlsBuf because
  2237. // the PAGE_ADDURLS2 parms in Parms.cpp fill in the m_urlsBuf.
  2238. // HACK!
  2239. //gr->m_url = gr->m_urlsBuf;
  2240. //ir->ptr_url = gr->m_urlsBuf;
  2241. //
  2242. // inject using msg7
  2243. //
  2244. // . pass in the cleaned url
  2245. // . returns false if blocked, true otherwise
  2246. if ( ! msg7->sendInjectionRequestToHost ( ir, st1 ,
  2247. doneInjectingWrapper3 ) ) {
  2248. // there was an error
  2249. log("http: error sending injection request: %s"
  2250. ,mstrerror(g_errno));
  2251. // we did not block, but had an error
  2252. return true;
  2253. }
  2254. //log("http: injection did not block");
  2255. // some kinda error, g_errno should be set i guess
  2256. //doneInjectingWrapper3 ( st1 );
  2257. // we did not block
  2258. //return true;
  2259. // wait for the reply, this 'blocked'
  2260. return false;
  2261. }
  2262. void doneInjectingWrapper3 ( void *st ) {
  2263. State1i *st1 = (State1i *)st;
  2264. // allow others to add now
  2265. s_inprogress = false;
  2266. // get the state properly
  2267. //State1i *st1 = (State1i *) state;
  2268. // in order to see what sites are being added log it, then we can
  2269. // more easily remove sites from sitesearch.gigablast.com that are
  2270. // being added but not being searched
  2271. //char *url = st1->m_msg7.m_xd.m_firstUrl.m_url;
  2272. Msg7 *msg7 = &st1->m_msg7;
  2273. InjectionRequest *ir = &msg7->m_injectionRequest;
  2274. char *url = ir->ptr_url;
  2275. log(LOG_INFO,"http: add url %s (%s)",url ,mstrerror(g_errno));
  2276. // extract info from state
  2277. TcpSocket *sock = st1->m_socket;
  2278. //bool isAdmin = st1->m_isMasterAdmin;
  2279. //char *url = NULL;
  2280. //if ( st1->m_urlLen ) url = st1->m_url;
  2281. // re-null it out if just http://
  2282. //bool printUrl = true;
  2283. //if ( st1->m_urlLen == 0 ) printUrl = false;
  2284. //if ( ! st1->m_url ) printUrl = false;
  2285. //if(st1->m_urlLen==7&&st1->m_url&&!strncasecmp(st1->m_url,"http://",7)
  2286. // printUrl = false;
  2287. // page is not more than 32k
  2288. char buf[1024*32+MAX_URL_LEN*2];
  2289. SafeBuf sb(buf, 1024*32+MAX_URL_LEN*2);
  2290. //char rawbuf[1024*8];
  2291. //SafeBuf rb(rawbuf, 1024*8);
  2292. //rb.safePrintf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
  2293. //rb.safePrintf("<status>\n");
  2294. //CollectionRec *cr = g_collectiondb.getRec ( st1->m_coll );
  2295. // collection name
  2296. char *coll = st1->m_coll;
  2297. if ( ! coll ) coll = "";
  2298. //char tt [ 128 ];
  2299. //tt[0] = '\0';
  2300. //if ( st1->m_coll[0] != '\0' && ! isAdmin )
  2301. // sprintf ( tt , " for %s", st1->m_coll );
  2302. //
  2303. // what we print here will just be the error msg, because the
  2304. // ajax will fill the text we print here into the div below
  2305. // the add url box
  2306. //
  2307. // if there was an error let them know
  2308. //char msg[MAX_URL_LEN + 1024];
  2309. char *pm = "";
  2310. if ( g_errno ) {
  2311. if ( g_errno == ETOOEARLY ) {
  2312. pm = "Error. 100 urls have "
  2313. "already been submitted by "
  2314. "this IP address for the last 24 hours. "
  2315. "<a href=/addurlerror.html>Explanation</a>.";
  2316. log("addurls: Failed for user at %s: "
  2317. "quota breeched.", iptoa(sock->m_ip));
  2318. //rb.safePrintf("Error. %"INT32" urls have "
  2319. // "already been submitted by "
  2320. // "this IP address for the "
  2321. // "last 24 hours. ",
  2322. // cr->m_maxAddUrlsPerIpDomPerDay);
  2323. sb.safePrintf("%s",pm);
  2324. }
  2325. else {
  2326. sb.safePrintf("Error adding url(s): <b>%s[%i]</b>",
  2327. mstrerror(g_errno) , g_errno);
  2328. //pm = msg;
  2329. //rb.safePrintf("Error adding url(s): %s[%i]",
  2330. // mstrerror(g_errno) , g_errno);
  2331. //sb.safePrintf("%s",pm);
  2332. }
  2333. }
  2334. else {
  2335. if ( ! g_conf.m_addUrlEnabled ) {
  2336. pm = "<font color=#ff0000>"
  2337. "Sorry, this feature is temporarily disabled. "
  2338. "Please try again later.</font>";
  2339. if ( url )
  2340. log("addurls: failed for user at %s: "
  2341. "add url is disabled. "
  2342. "Enable add url on the "
  2343. "Master Controls page and "
  2344. "on the Spider Controls page for "
  2345. "this collection.",
  2346. iptoa(sock->m_ip));
  2347. sb.safePrintf("%s",pm);
  2348. //rb.safePrintf("Sorry, this feature is temporarily "
  2349. // "disabled. Please try again later.");
  2350. }
  2351. else if ( s_inprogress ) {
  2352. pm = "Add url busy. Try again later.";
  2353. log("addurls: Failed for user at %s: "
  2354. "busy adding another.", iptoa(sock->m_ip));
  2355. //rb.safePrintf("Add url busy. Try again later.");
  2356. sb.safePrintf("%s",pm);
  2357. }
  2358. // did they fail the turing test?
  2359. else if ( ! st1->m_goodAnswer ) {
  2360. pm = "<font color=#ff0000>"
  2361. "Oops, you did not enter the 4 large letters "
  2362. "you see below. Please try again.</font>";
  2363. //rb.safePrintf("could not add the url"
  2364. // " because the turing test"
  2365. // " is enabled.");
  2366. sb.safePrintf("%s",pm);
  2367. }
  2368. else if ( msg7->m_replyIndexCode ) {
  2369. //st1->m_msg7.m_xd.m_indexCodeValid &&
  2370. // st1->m_msg7.m_xd.m_indexCode ) {
  2371. //int32_t ic = st1->m_msg7.m_xd.m_indexCode;
  2372. sb.safePrintf("<b>Had error injecting url: %s</b>",
  2373. mstrerror(msg7->m_replyIndexCode));
  2374. }
  2375. /*
  2376. if ( url && ! st1->m_ufu[0] && url[0] && printUrl ) {
  2377. sprintf ( msg ,"<u>%s</u> added to spider "
  2378. "queue "
  2379. "successfully", url );
  2380. //rb.safePrintf("%s added to spider "
  2381. // "queue successfully", url );
  2382. }
  2383. else if ( st1->m_ufu[0] ) {
  2384. sprintf ( msg ,"urls in <u>%s</u> "
  2385. "added to spider queue "
  2386. "successfully", st1->m_ufu );
  2387. //rb.safePrintf("urls in %s added to spider "
  2388. // "queue successfully", url );
  2389. }
  2390. */
  2391. else {
  2392. //rb.safePrintf("Add the url you want:");
  2393. // avoid hitting browser page cache
  2394. uint32_t rand32 = rand();
  2395. // in the mime to 0 seconds!
  2396. sb.safePrintf("<b>Url successfully added. "
  2397. "<a href=/search?rand=%"UINT32"&"
  2398. "c=%s&q=url%%3A",
  2399. rand32,
  2400. coll);
  2401. sb.urlEncode(url);
  2402. sb.safePrintf(">Check it</a>"// or "
  2403. //"<a href=http://www.gigablast."
  2404. //"com/seo?u=");
  2405. //sb.urlEncode(url);
  2406. //sb.safePrintf(">SEO it</a>"
  2407. "."
  2408. "</b>");
  2409. }
  2410. //pm = msg;
  2411. //url = "http://";
  2412. //else
  2413. // pm = "Don't forget to <a href=/gigaboost.html>"
  2414. // "Gigaboost</a> your URL.";
  2415. }
  2416. // store it
  2417. sb.safePrintf("<b>%s</b>",pm );
  2418. // clear g_errno, if any, so our reply send goes through
  2419. g_errno = 0;
  2420. // nuke state
  2421. mdelete ( st1 , sizeof(State1i) , "PageAddUrl" );
  2422. delete (st1);
  2423. // this reply should be loaded from the ajax loader so use a cache
  2424. // time of 1 hour so it does not re-inject the url if you hit the
  2425. // back button
  2426. g_httpServer.sendDynamicPage (sock,
  2427. sb.getBufStart(),
  2428. sb.length(),
  2429. 3600, // cachetime
  2430. false,// post?
  2431. "text/html",
  2432. 200, // http status
  2433. NULL, // cookie
  2434. "UTF-8");
  2435. }
  2436. // we get like 100k submissions a day!!!
  2437. static HashTable s_htable;
  2438. static bool s_init = false;
  2439. static int32_t s_lastTime = 0;
  2440. bool canSubmit ( uint32_t h , int32_t now , int32_t maxAddUrlsPerIpDomPerDay ) {
  2441. // . sometimes no limit
  2442. // . 0 means no limit because if they don't want any submission they
  2443. // can just turn off add url and we want to avoid excess
  2444. // troubleshooting for why a url can't be added
  2445. if ( maxAddUrlsPerIpDomPerDay <= 0 ) return true;
  2446. // init the table
  2447. if ( ! s_init ) {
  2448. s_htable.set ( 50000 );
  2449. s_init = true;
  2450. }
  2451. // clean out table every 24 hours
  2452. if ( now - s_lastTime > 24*60*60 ) {
  2453. s_lastTime = now;
  2454. s_htable.clear();
  2455. }
  2456. // . if table almost full clean out ALL slots
  2457. // . TODO: just clean out oldest slots
  2458. if ( s_htable.getNumSlotsUsed() > 47000 ) s_htable.clear ();
  2459. // . how many times has this IP domain submitted?
  2460. // . allow 10 times per day
  2461. int32_t n = s_htable.getValue ( h );
  2462. // if over 24hr limit then bail
  2463. if ( n >= maxAddUrlsPerIpDomPerDay ) return false;
  2464. // otherwise, inc it
  2465. n++;
  2466. // add to table, will replace old values
  2467. s_htable.addKey ( h , n );
  2468. return true;
  2469. }
  2470. void resetPageAddUrl ( ) {
  2471. s_htable.reset();
  2472. }
  2473. /*
  2474. bool sendPageAdvanced ( TcpSocket *sock , HttpRequest *hr ) {
  2475. SafeBuf sb;
  2476. CollectionRec *cr = g_collectiondb.getRec ( hr );
  2477. printFrontPageShell ( &sb , "advanced" , cr , true );
  2478. sb.safePrintf("<br><br>\n");
  2479. sb.safePrintf("<br><br><br>\n");
  2480. // submit to https now
  2481. sb.safePrintf("<form method=GET "
  2482. "action=/search name=f>\n" );
  2483. char *coll = "";
  2484. if ( cr ) coll = cr->m_coll;
  2485. if ( cr )
  2486. sb.safePrintf("<input type=hidden name=c value=\"%s\">",
  2487. cr->m_coll);
  2488. sb.safePrintf(
  2489. "<script type=text/javascript>"
  2490. "<!--"
  2491. "function x(){document.f.q.focus();}"
  2492. "// -->"
  2493. "</script>"
  2494. "</head>"
  2495. ""
  2496. "<body onload=x()>"
  2497. //"<form method=get action=/search>"
  2498. " <table width=605 border=0 align=center cellpadding=5 cellspacing=3>"
  2499. " <tbody>"
  2500. " <tr align=left valign=middle>"
  2501. " <th colspan=3>Search for...</th>"
  2502. " </tr>"
  2503. " <tr align=left valign=middle>"
  2504. " <td><strong>all</strong> of these words</td>"
  2505. " <td><input type=text name=plus size=40 />"
  2506. "</td><td>"
  2507. "<div onclick=document.f.submit(); "
  2508. " onmouseover=\""
  2509. "this.style.backgroundColor='lightgreen';"
  2510. "this.style.color='black';\""
  2511. " onmouseout=\""
  2512. "this.style.backgroundColor='green';"
  2513. "this.style.color='white';\" "
  2514. "style=border-radius:28px;"
  2515. "cursor:pointer;"
  2516. "cursor:hand;"
  2517. "border-color:white;"
  2518. "border-style:solid;"
  2519. "border-width:3px;"
  2520. "padding:12px;"
  2521. "width:20px;"
  2522. "height:20px;"
  2523. "display:inline-block;"
  2524. "background-color:green;color:white;>"
  2525. "<b style=margin-left:-5px;font-size:18px;"
  2526. ">GO</b>"
  2527. "</div>"
  2528. "</td>"
  2529. " </tr>"
  2530. " <tr align=left valign=middle>"
  2531. " <td>this <strong>exact phrase</strong></td>"
  2532. " <td colspan=2><input type=text name=quote1 size=40 /></td>"
  2533. " </tr>"
  2534. " <tr align=left valign=middle>"
  2535. " <td>and this <strong>exact phrase</strong></td>"
  2536. " <td colspan=2><input type=text name=quote2 size=40 /></td>"
  2537. " </tr>"
  2538. " <tr align=left valign=middle>"
  2539. " <td><strong>any</strong> of these words</td>"
  2540. " <td colspan=2><input type=text name=q size=40 /></td>"
  2541. " </tr>"
  2542. " <tr align=left valign=middle>"
  2543. " <td><strong>none</strong> of these words</td>"
  2544. " <td colspan=2><input type=text name=minus size=40 /></td>"
  2545. " </tr>"
  2546. ""
  2547. " <tr align=left valign=middle>"
  2548. " <td>In this language:"
  2549. " </td>"
  2550. " <td colspan=2>"
  2551. " <select name=gblang>"
  2552. " <option value=0>Any</option>"
  2553. " <option value=1>English</option>"
  2554. "<option value=2>French</option> "
  2555. "<option value=3>Spanish</option>"
  2556. "<option value=4>Russian</option>"
  2557. "<option value=5>Turkish</option>"
  2558. "<option value=6>Japanese</option>"
  2559. "<option value=7>ChineseTrad</option>"
  2560. "<option value=8>ChineseSimp</option>"
  2561. "<option value=9>Korean</option>"
  2562. "<option value=10>German</option>"
  2563. "<option value=11>Dutch</option>"
  2564. "<option value=12>Italian</option>"
  2565. "<option value=13>Finnish</option>"
  2566. "<option value=14>Swedish</option>"
  2567. "<option value=15>Norwegian</option>"
  2568. "<option value=16>Portuguese</option>"
  2569. "<option value=17>Vietnamese</option>"
  2570. "<option value=18>Arabic</option>"
  2571. "<option value=19>Hebrew</option>"
  2572. "<option value=20>Indonesian</option>"
  2573. "<option value=21>Greek</option>"
  2574. "<option value=22>Thai</option>"
  2575. "<option value=23>Hindi</option>"
  2576. "<option value=24>Bengala</option>"
  2577. "<option value=25>Polish</option>"
  2578. "<option value=26>Tagalog</option>"
  2579. " </select>"
  2580. " </td>"
  2581. " </tr>"
  2582. ""
  2583. ""
  2584. " <tr align=left valign=middle>"
  2585. " <td>Restrict to this URL</td>"
  2586. " <td colspan=2><input type=text name=url size=40 /></td>"
  2587. " </tr>"
  2588. " <tr align=left valign=middle>"
  2589. " <td>Pages that link to this URL</td>"
  2590. " <td colspan=2><input type=text name=link size=40 /></td>"
  2591. " </tr>"
  2592. " <tr align=left valign=middle>"
  2593. " <td>Site Clustering</td>"
  2594. " <td colspan=2><input type=radio name=sc value=1 checked=checked />yes&nbsp;&nbsp;&nbsp;<input type=radio name=sc value=0 />no</td>"
  2595. " </tr>"
  2596. " <tr align=left valign=middle>"
  2597. " <td>Number of summary excerpts</td>"
  2598. " <td colspan=2><input type=radio name=ns value=0 />0&nbsp;&nbsp;&nbsp;<input type=radio name=ns value=1 />1&nbsp;&nbsp;&nbsp;<input type=radio name=ns value=2 />2&nbsp;&nbsp;&nbsp;<input type=radio name=ns value=3 checked=checked />3&nbsp;&nbsp;&nbsp;<input type=radio name=ns value=4 />4&nbsp;&nbsp;&nbsp;<input type=radio name=ns value=5 />5</td>"
  2599. " </tr>"
  2600. " <tr align=left valign=middle>"
  2601. " <td>Results per Page</td>"
  2602. " <td colspan=2><input type=radio name=n value=10 checked=checked />10&nbsp;&nbsp;<input type=radio name=n value=20 />20&nbsp;&nbsp;<input type=radio name=n value=30 />30&nbsp;&nbsp;<input type=radio name=n value=40 />40&nbsp;&nbsp;<input type=radio name=n value=50 />50&nbsp;&nbsp;<input type=radio name=n value=100 />100</td>"
  2603. " </tr>"
  2604. " <tr align=left valign=middle>"
  2605. " <td>Restrict to these Sites</td>"
  2606. " <td colspan=2><textarea rows=10 cols=40 name=sites></textarea></td>"
  2607. " </tr>"
  2608. " </tbody>"
  2609. " </table>"
  2610. );
  2611. sb.safePrintf("</form>\n");
  2612. sb.safePrintf("<br>\n");
  2613. sb.safePrintf("\n");
  2614. sb.safePrintf("<br><br>\n");
  2615. printNav ( sb , hr );
  2616. g_httpServer.sendDynamicPage (sock,
  2617. sb.getBufStart(),
  2618. sb.length(),
  2619. 3600, // cachetime
  2620. false,// post?
  2621. "text/html",
  2622. 200, // http status
  2623. NULL, // cookie
  2624. "UTF-8");
  2625. return true;
  2626. }
  2627. */
  2628. bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
  2629. SafeBuf sb;
  2630. CollectionRec *cr = g_collectiondb.getRec ( hr );
  2631. printFrontPageShell ( &sb , "syntax" , cr , true );
  2632. sb.safePrintf("<br><br>\n");
  2633. sb.safePrintf("<br><br><br>\n");
  2634. // submit to https now
  2635. //sb.safePrintf("<form method=GET "
  2636. // "action=/addurl name=f>\n" );
  2637. // char *coll = "";
  2638. // if ( cr ) coll = cr->m_coll;
  2639. // if ( cr )
  2640. // sb.safePrintf("<input type=hidden name=c value=\"%s\">",
  2641. // cr->m_coll);
  2642. char *qc = "demo";
  2643. char *host = "http://www.gigablast.com";
  2644. // for debug make it local on laptop
  2645. host = "";
  2646. sb.safePrintf(
  2647. "<br>"
  2648. "<table width=650px cellpadding=5 cellspacing=0 border=0>"
  2649. ""
  2650. // yellow/gold bar
  2651. "<tr>"
  2652. "<td colspan=2 bgcolor=#%s>" // f3c714>"
  2653. "<b>"
  2654. "Basic Query Syntax"
  2655. "</b>"
  2656. "</td>"
  2657. "</tr>\n"
  2658. "<tr bgcolor=#0340fd>"
  2659. ""
  2660. "<th><font color=33dcff>Example Query</font></th>"
  2661. "<th><font color=33dcff>Description</font></th>"
  2662. "</tr>"
  2663. "<tr> "
  2664. "<td><a href=%s/search?c=%s&q=cat+dog>cat dog</a></td>"
  2665. " <td>Search results have the word <em>cat</em> and the word <em>dog</em> "
  2666. " in them. They could also have <i>cats</i> and <i>dogs</i>.</td>"
  2667. " </tr>"
  2668. ""
  2669. ""
  2670. " <tr bgcolor=#E1FFFF> "
  2671. " <td><a href=%s/search?c=%s&q=%%2Bcat>+cat</a></td>"
  2672. " <td>Search results have the word <em>cat</em> in them. If the search results has the word <i>cats</i> then it will not be included. The plus sign indicates an exact match and not to use synonyms, hypernyms or hyponyms or any other form of the word.</td>"
  2673. " </tr>"
  2674. ""
  2675. ""
  2676. " <tr> "
  2677. " <td height=10><a href=%s/search?c=%s&q=mp3+%%22take+five%%22>mp3&nbsp;\"take&nbsp;five\"</a></td>"
  2678. " <td>Search results have the word <em>mp3</em> and the exact phrase <em>take "
  2679. " five</em> in them.</td>"
  2680. " </tr>"
  2681. " <tr bgcolor=#E1FFFF> "
  2682. " <td><a href=%s/search?c=%s&q=%%22john+smith%%22+-%%22bob+dole%%22>\"john&nbsp;smith\"&nbsp;-\"bob&nbsp;dole\"</a></td>"
  2683. " <td>Search results have the phrase <em>john smith</em> but NOT the "
  2684. " phrase <em>bob dole</em> in them.</td>"
  2685. " </tr>"
  2686. " <tr> "
  2687. " <td><a href=%s/search?c=%s&q=bmx+-game>bmx&nbsp;-game</a></td>"
  2688. " <td>Search results have the word <em>bmx</em> but not <em>game</em>.</td>"
  2689. " </tr>"
  2690. // " <tr> "
  2691. // " <td><a href=/search?q=inurl%%3Aedu+title%%3Auniversity><b>inurl:</b></a><a href=/search?q=inurl%%3Aedu+title%%3Auniversity>edu <b>title:</b>university</a></td>"
  2692. // " <td>Search results have <em>university</em> in their title and <em>edu</em> "
  2693. // " in their url.</td>"
  2694. // " </tr>"
  2695. // " <tr bgcolor=#E1FFFF> "
  2696. // " <td><a href=/search?q=site%%3Awww.ibm.com+%%22big+blue%%22><b>site:</b></a><a href=/search?q=site%%3Awww.ibm.com+%%22big+blue%%22>www.ibm.com&nbsp;\"big&nbsp;blue\"</a></td>"
  2697. // " <td>Search results are from the site <em>www.ibm.com</em> and have the phrase "
  2698. // " <em>big blue</em> in them.</td>"
  2699. // " </tr>"
  2700. // " <tr> "
  2701. // " <td><a href=/search?q=url%%3Awww.yahoo.com><b>url:</b></a><a href=/search?q=url%%3Awww.yahoo.com&n=10>www.yahoo.com</a></td>"
  2702. // " <td>Search result is the single URL www.yahoo.com, if it is indexed.</td>"
  2703. // " </tr>"
  2704. // " <tr bgcolor=#E1FFFF> "
  2705. // " <td><nobr><a href=/search?q=title%%3A%%22the+news%%22+-%%22weather+report%%22><b>title:</b>\"the "
  2706. // " news\" -\"weather report\"</a></nobr></td>"
  2707. // " <td>Search results have the phrase <em>the news</em> in their title, "
  2708. // " and do NOT have the phrase <em>weather report</em> anywhere in their "
  2709. // " content.</td>"
  2710. // " </tr>"
  2711. // " <tr> "
  2712. // " <td><a href=/search?q=ip%%3A216.32.120+cars><b>ip:</b></a><a href=/search?q=ip%%3A216.32.120>216.32.120</a></td>"
  2713. // " <td>Search results are from the the ip 216.32.120.*.</td>"
  2714. // " </tr>"
  2715. // ""
  2716. // " <tr bgcolor=#E1FFFF> "
  2717. // " <td><a href=/search?q=type%%3Apdf+nutrition><b>type:</b>pdf nutrition</a></td>"
  2718. // " <td>Search results are PDF (Portable Document Format) documents that "
  2719. // " contain the word <em>nutrition</em>.</td>"
  2720. // " </tr>"
  2721. // " <tr> "
  2722. // " <td><a href=/search?q=type%%3Adoc><b>type:</b>doc</a></td>"
  2723. // " <td>Search results are Microsoft Word documents.</td>"
  2724. // " </tr>"
  2725. // " <tr bgcolor=#E1FFFF> "
  2726. // " <td><a href=/search?q=type%%3Axls><b>type:</b>xls</a></td>"
  2727. // " <td>Search results are Microsoft Excel documents.</td>"
  2728. // " </tr>"
  2729. // " <tr> "
  2730. // " <td><a href=/search?q=type%%3Appt><b>type:</b>ppt</a></td>"
  2731. // " <td>Search results are Microsoft Power Point documents.</td>"
  2732. // " </tr>"
  2733. // " <tr bgcolor=#E1FFFF> "
  2734. // " <td><a href=/search?q=type%%3Aps><b>type:</b>ps</a></td>"
  2735. // " <td>Search results are Postscript documents.</td>"
  2736. // " </tr>"
  2737. // " <tr> "
  2738. // " <td><a href=/search?q=type%%3Atext><b>type:</b>text</a></td>"
  2739. // " <td>Search results are plain text documents.</td>"
  2740. // " </tr>"
  2741. // " <tr bgcolor=#E1FFFF> "
  2742. // " <td><a href=/search?q=filetype%%3Apdf><b>filetype:</b>pdf</a></td>"
  2743. // " <td>Search results are PDF documents.</td>"
  2744. // " </tr>"
  2745. // ""
  2746. // ""
  2747. // " <tr bgcolor=#E1FFFF> "
  2748. // " <td><a href=/search?q=link%%3Awww.yahoo.com><b>link:</b>www.yahoo.com</a></td>"
  2749. // " <td>All the pages that link to www.yahoo.com.</td>"
  2750. // " </tr>"
  2751. // ""
  2752. // " <tr bgcolor=#E1FFFF> "
  2753. // " <td><a href=/search?q=sitelink%%3Awww.yahoo.com><b>sitelink:</b>www.yahoo.com</a></td>"
  2754. // " <td>All the pages that link to any page on www.yahoo.com.</td>"
  2755. // " </tr>"
  2756. // ""
  2757. // " <tr bgcolor=#E1FFFF> "
  2758. // " <td><a href=/search?q=ext%%3Atxt><b>ext:</b>txt</a></td>"
  2759. // " <td>All the pages whose url ends in the .txt extension.</td>"
  2760. // " </tr>"
  2761. // ""
  2762. // ""
  2763. , GOLD
  2764. , host
  2765. , qc
  2766. , host
  2767. , qc
  2768. , host
  2769. , qc
  2770. , host
  2771. , qc
  2772. , host
  2773. , qc
  2774. );
  2775. sb.safePrintf(
  2776. // spacer
  2777. //"<tr><td><br></td><td></td></tr>"
  2778. //"<tr bgcolor=#0340fd>"
  2779. // "<td><font color=33dcff><b>Special Query</b>"
  2780. // "</font></td>"
  2781. //"<td><font color=33dcff><b>Description</b></font></td>"
  2782. // "</tr>"
  2783. "<tr bgcolor=#E1FFFF>"
  2784. "<td><a href=%s/search?c=%s&q=cat|dog>cat | dog</a>"
  2785. "</td><td>"
  2786. "Match documents that have cat and dog in them, but "
  2787. "do not allow cat to affect the ranking score, only "
  2788. "dog. This is called a <i>query refinement</i>."
  2789. "</td></tr>\n"
  2790. "<tr bgcolor=#ffFFFF>"
  2791. "<td><a href=%s/search?c=%s&q=document.title:paper>"
  2792. "document.title:paper</a></td><td>"
  2793. "That query will match a JSON document like "
  2794. "<i>"
  2795. "{ \"document\":{\"title\":\"This is a good paper.\" "
  2796. "}}</i> or, alternatively, an XML document like <i>"
  2797. , host
  2798. , qc
  2799. , host
  2800. , qc
  2801. );
  2802. sb.htmlEncode("<document><title>This is a good paper"
  2803. "</title></document>" );
  2804. sb.safePrintf("</i></td></tr>\n");
  2805. char *bg1 = "#E1FFFF";
  2806. char *bg2 = "#ffffff";
  2807. char *bgcolor = bg1;
  2808. // table of the query keywords
  2809. int32_t n = getNumFieldCodes();
  2810. for ( int32_t i = 0 ; i < n ; i++ ) {
  2811. // get field #i
  2812. QueryField *f = &g_fields[i];
  2813. if ( g_fields[i].m_flag & QTF_HIDE ) continue;
  2814. // new table?
  2815. if ( g_fields[i].m_flag & QTF_BEGINNEWTABLE ) {
  2816. sb.safePrintf("</table>"
  2817. "<br>"
  2818. "<br>"
  2819. "<br>"
  2820. "<table width=650px "
  2821. "cellpadding=5 cellspacing=0 border=0>"
  2822. // yellow/gold bar
  2823. "<tr>"
  2824. "<td colspan=2 bgcolor=#%s>"//f3c714>"
  2825. "<b>"
  2826. "%s"
  2827. "</b>"
  2828. "</td>"
  2829. "</tr>\n"
  2830. "<tr bgcolor=#0340fd>"
  2831. "<th><font color=33dcff>"
  2832. "Example Query</font></th>"
  2833. "<th><font color=33dcff>"
  2834. "Description</font></th>"
  2835. "</tr>\n"
  2836. , GOLD
  2837. , g_fields[i].m_title
  2838. );
  2839. }
  2840. // print it out
  2841. char *d = f->desc;
  2842. // fix table internal cell bordering
  2843. if ( ! d || d[0] == '\0' ) d = "&nbsp;";
  2844. sb.safePrintf("<tr bgcolor=%s>"
  2845. "<td><nobr><a href=\"%s/search?c=%s&q="
  2846. , bgcolor
  2847. , host
  2848. , qc
  2849. );
  2850. sb.urlEncode ( f->example );
  2851. sb.safePrintf("\">");
  2852. sb.safePrintf("%s</a></nobr></td>"
  2853. "<td>%s</td></tr>\n",
  2854. f->example,
  2855. d);
  2856. if ( bgcolor == bg1 ) bgcolor = bg2;
  2857. else bgcolor = bg1;
  2858. }
  2859. sb.safePrintf(
  2860. // " <tr> "
  2861. // " <td style=padding-bottom:12px;>&nbsp;</td>"
  2862. // " <td style=padding-bottom:12px;>&nbsp;</td>"
  2863. // " </tr>"
  2864. // ""
  2865. "</table>"
  2866. "<br><br><br>"
  2867. "<table width=650px "
  2868. "cellpadding=5 cellspacing=0 border=0>"
  2869. // yellow/gold bar
  2870. "<tr>"
  2871. "<td colspan=2 bgcolor=#%s>" // f3c714>"
  2872. "<b>"
  2873. "Boolean Queries"
  2874. "</b>"
  2875. "</td>"
  2876. "</tr>\n"
  2877. "<tr bgcolor=#0340fd>"
  2878. ""
  2879. " <th><font color=33dcff>Example Query</font></th>"
  2880. " <th><font color=33dcff>Description</font></th>"
  2881. ""
  2882. " </tr>"
  2883. ""
  2884. " <tr> "
  2885. " <td colspan=2 bgcolor=#FFFFCC><center>"
  2886. " Note: boolean operators must be in UPPER CASE. "
  2887. " </td>"
  2888. " </tr>"
  2889. " <tr> "
  2890. " <td><a href=%s/search?c=%s&q=cat+AND+dog>cat&nbsp;AND&nbsp;dog</a></td>"
  2891. " <td>Search results have the word <em>cat</em> AND the word <em>dog</em> "
  2892. " in them.</td>"
  2893. " </tr>"
  2894. " <tr bgcolor=#E1FFFF> "
  2895. " <td><a href=%s/search?c=%s&q=cat+OR+dog>cat&nbsp;OR&nbsp;dog</a></td>"
  2896. " <td>Search results have the word <em>cat</em> OR the word <em>dog</em> "
  2897. " in them, but preference is given to results that have both words.</td>"
  2898. " </tr>"
  2899. " <tr> "
  2900. " <td><a href=%s/search?c=%s&q=cat+dog+OR+pig>cat&nbsp;dog&nbsp;OR&nbsp;pig</a></td>"
  2901. " <td>Search results have the two words <em>cat</em> and <em>dog</em> "
  2902. " OR search results have the word <em>pig</em>, but preference is "
  2903. " given to results that have all three words. This illustrates how "
  2904. " the individual words of one operand are all required for that operand "
  2905. " to be true.</td>"
  2906. " </tr>"
  2907. " <tr bgcolor=#E1FFFF> "
  2908. " <td><a href=%s/search?c=%s&q=%%22cat+dog%%22+OR+pig>\"cat&nbsp;dog\"&nbsp;OR&nbsp;pig</a></td>"
  2909. " <td>Search results have the phrase <em>\"cat dog\"</em> in them OR they "
  2910. " have the word <em>pig</em>, but preference is given to results that "
  2911. " have both.</td>"
  2912. " </tr>"
  2913. " <tr> "
  2914. " <td><a href=%s/search?c=%s&q=title%%3A%%22cat+dog%%22+OR+pig>title</a><a href=%s/search?c=%s&q=title%%3A%%22cat+dog%%22+OR+pig>:\"cat "
  2915. " dog\" OR pig</a></td>"
  2916. " <td>Search results have the phrase <em>\"cat dog\"</em> in their title "
  2917. " OR they have the word <em>pig</em>, but preference is given to results "
  2918. " that have both.</td>"
  2919. " </tr>"
  2920. " <tr bgcolor=#E1FFFF> "
  2921. " <td><a href=%s/search?c=%s&q=cat+OR+dog+OR+pig>cat&nbsp;OR&nbsp;dog&nbsp;OR&nbsp;pig</a></td>"
  2922. " <td>Search results need only have one word, <em>cat</em> or <em>dog</em> "
  2923. " or <em>pig</em>, but preference is given to results that have the "
  2924. " most of the words.</td>"
  2925. " </tr>"
  2926. " <tr> "
  2927. " <td><a href=%s/search?c=%s&q=cat+OR+dog+AND+pig>cat&nbsp;OR&nbsp;dog&nbsp;AND&nbsp;pig</a></td>"
  2928. " <td>Search results have <em>dog</em> and <em>pig</em>, but they may "
  2929. " or may not have <em>cat</em>. Preference is given to results that "
  2930. " have all three. To evaluate expressions with more than two operands, "
  2931. " as in this case where we have three, you can divide the expression "
  2932. " up into sub-expressions that consist of only one operator each. "
  2933. " In this case we would have the following two sub-expressions: <em>cat "
  2934. " OR dog</em> and <em>dog AND pig</em>. Then, for the original expression "
  2935. " to be true, at least one of the sub-expressions that have an OR "
  2936. " operator must be true, and, in addition, all of the sub-expressions "
  2937. " that have AND operators must be true. Using this logic you can evaluate "
  2938. " expressions with more than one boolean operator.</td>"
  2939. " </tr>"
  2940. " <tr bgcolor=#E1FFFF> "
  2941. " <td><a href=%s/search?c=%s&q=cat+AND+NOT+dog>cat&nbsp;AND&nbsp;NOT&nbsp;dog</a></td>"
  2942. " <td>Search results have <em>cat</em> but do not have <em>dog</em>.</td>"
  2943. " </tr>"
  2944. " <tr> "
  2945. " <td><a href=%s/search?c=%s&q=cat+AND+NOT+%%28dog+OR+pig%%29>cat&nbsp;AND&nbsp;NOT&nbsp;(dog&nbsp;OR&nbsp;pig)</a></td>"
  2946. " <td>Search results have <em>cat</em> but do not have <em>dog</em> "
  2947. " and do not have <em>pig</em>. When evaluating a boolean expression "
  2948. " that contains ()'s you can evaluate the sub-expression in the ()'s "
  2949. " first. So if a document has <em>dog</em> or it has <em>pig</em> "
  2950. " or it has both, then the expression, <em>(dog OR pig)</em> would "
  2951. " be true. So you could, in this case, substitute <em>true</em> for "
  2952. " that expression to get the following: <em>cat AND NOT (true) = cat "
  2953. " AND false = false</em>. Does anyone actually read this far?</td>"
  2954. " </tr>"
  2955. " <tr bgcolor=#E1FFFF> "
  2956. " <td><a href=%s/search?c=%s&q=%%28cat+OR+dog%%29+AND+NOT+%%28cat+AND+dog%%29>(cat&nbsp;OR&nbsp;dog)&nbsp;AND&nbsp;NOT&nbsp;(cat&nbsp;AND&nbsp;dog)</a></td>"
  2957. " <td>Search results have <em>cat</em> or <em>dog</em> but not both.</td>"
  2958. " </tr>"
  2959. " <tr> "
  2960. " <td>left-operand&nbsp;&nbsp;OPERATOR&nbsp;&nbsp;right-operand</td>"
  2961. " <td>This is the general format of a boolean expression. The possible "
  2962. " operators are: OR and AND. The operands can themselves be boolean "
  2963. " expressions and can be optionally enclosed in parentheses. A NOT "
  2964. " operator can optionally preceed the left or the right operand.</td>"
  2965. " </tr>"
  2966. ""
  2967. //" </table>"
  2968. ""
  2969. ""
  2970. ""
  2971. //"</td></tr>"
  2972. //"</table>"
  2973. //"<br>"
  2974. , GOLD
  2975. , host
  2976. , qc
  2977. , host
  2978. , qc
  2979. , host
  2980. , qc
  2981. , host
  2982. , qc
  2983. , host
  2984. , qc
  2985. , host
  2986. , qc
  2987. , host
  2988. , qc
  2989. , host
  2990. , qc
  2991. , host
  2992. , qc
  2993. , host
  2994. , qc
  2995. , host
  2996. , qc
  2997. );
  2998. //sb.safePrintf("<tr><td></td><td></td></tr>\n");
  2999. //sb.safePrintf("<tr><td></td><td></td></tr>\n");
  3000. //sb.safePrintf("<tr><td></td><td></td></tr>\n");
  3001. //sb.safePrintf("<tr><td></td><td></td></tr>\n");
  3002. sb.safePrintf("</table>");
  3003. //sb.safePrintf("</form>\n");
  3004. sb.safePrintf("<br>\n");
  3005. sb.safePrintf("\n");
  3006. sb.safePrintf("<br><br>\n");
  3007. printNav ( sb , hr );
  3008. g_httpServer.sendDynamicPage (sock,
  3009. sb.getBufStart(),
  3010. sb.length(),
  3011. 3600, // cachetime
  3012. false,// post?
  3013. "text/html",
  3014. 200, // http status
  3015. NULL, // cookie
  3016. "UTF-8");
  3017. return true;
  3018. }