PageRenderTime 92ms CodeModel.GetById 25ms app.highlight 59ms RepoModel.GetById 0ms app.codeStats 0ms

/Blaster.cpp

https://github.com/gigablast/open-source-search-engine
C++ | 1285 lines | 887 code | 92 blank | 306 comment | 176 complexity | 9271c9afd5c844ce5bbb58a22b3bca01 MD5 | raw file
Possible License(s): Apache-2.0
   1// Matt Wells, copyright Sep 2001
   2
   3// the main program that brings it all together
   4
   5#include "gb-include.h"
   6
   7#include "Blaster.h"
   8#include "Titledb.h" // TITLEREC_CURRENT_VERSION
   9#include "Linkdb.h"
  10
  11Blaster g_blaster;
  12static void gotDocWrapper1 ( void *state , TcpSocket *s ) ;
  13static void gotDocWrapper2 ( void *state , TcpSocket *s ) ;
  14static void gotDocWrapper3 ( void *state , TcpSocket *s ) ;
  15static void gotDocWrapper4 ( void *state , TcpSocket *s ) ;
  16static void sleepWrapper ( int fd , void *state ) ;
  17static void sleepWrapperLog(int fd, void *state);
  18
  19Blaster::Blaster() {}
  20
  21Blaster::~Blaster() {
  22	if (m_buf1)
  23		mfree(m_buf1,m_bufSize1,"blaster1");
  24	if (m_buf2)
  25		mfree(m_buf2,m_bufSize2,"blaster2");
  26}
  27
  28
  29bool Blaster::init(){
  30	// let's ensure our core file can dump
  31	struct rlimit lim;
  32	lim.rlim_cur = lim.rlim_max = RLIM_INFINITY;
  33	if ( setrlimit(RLIMIT_CORE,&lim) )
  34		log("blaster::setrlimit: %s", mstrerror(errno) );
  35	
  36	g_conf.m_maxMem = 500000000;
  37	
  38	// init our table for doing zobrist hashing
  39	if ( ! hashinit() ) {
  40		log("blaster::hashinit failed" ); return 0; }
  41
  42	// init the memory class after conf since it gets maxMem from Conf
  43	if ( ! g_mem.init ( ) ) {//200000000 ) ) {
  44		log("blaster::Mem init failed" ); return 0; }
  45	// start up log file
  46	if ( ! g_log.init( "/tmp/blasterLog" )        ) {
  47		log("blaster::Log open /tmp/blasterLog failed" ); return 0; }
  48
  49	/*
  50	// get dns ip from /etc/resolv.conf
  51	g_conf.m_dnsIps[0] = 0;
  52	FILE *fd = fopen ( "/etc/resolv.conf" , "r" );
  53	if ( ! fd ) {
  54		log("blaster::fopen: /etc/resolve.conf %s",
  55		    mstrerror(errno)); return 0; }
  56
  57	char tmp[1024];
  58	while ( fgets ( tmp , 1024 , fd ) ) {
  59		// tmp buf ptr
  60		char *p = tmp;
  61		// skip comments
  62		if ( *p == '#' ) continue;
  63		// skip nameserver name
  64		if ( ! isdigit(*p) ) while ( ! isspace ( *p ) ) p++ ;
  65		// skip spaces
  66		while ( isspace ( *p ) ) p++;
  67		// if this is not a digit, continue
  68		if ( ! isdigit(*p) ) continue;
  69		// get ip
  70		g_conf.m_dnsIps[0] = atoip ( p , gbstrlen(p) );
  71		// done
  72		break;
  73	}
  74	fclose ( fd );
  75
  76	// if no dns server found, bail
  77	if ( g_conf.m_dnsIps[0] == 0 ) {
  78		log("blaster:: no dns ip found in /etc/resolv.conf");return 0;}
  79
  80	// hack # of dns servers
  81	g_conf.m_numDns         = 1;
  82	g_conf.m_dnsPorts[0]    = 53;
  83	*/
  84
  85	g_conf.m_askRootNameservers = true;
  86
  87	//g_conf.m_dnsIps  [0]    = atoip ( "192.168.0.1", 11 );
  88	//g_conf.m_dnsClientPort  = 9909;
  89	g_conf.m_dnsMaxCacheMem = 1024*10;
  90	// hack http server port to -1 (none)
  91	//g_conf.m_httpPort           = 0;
  92	g_conf.m_httpMaxSockets     = 200;
  93	//g_conf.m_httpMaxReadBufSize = 102*1024*1024;
  94	g_conf.m_httpMaxSendBufSize = 16*1024;
  95
  96
  97	// init the loop
  98	if ( ! g_loop.init() ) {
  99		log("blaster::Loop init failed" ); return 0; }
 100	// . then dns client
 101	// . server should listen to a socket and register with g_loop
 102	if ( ! g_dns.init(6000)        ) {
 103		log("blaster::Dns client init failed" ); return 0; }
 104	// . then webserver
 105	// . server should listen to a socket and register with g_loop
 106	if ( ! g_httpServer.init( 8333 , 9334 ) ) {
 107		log("blaster::HttpServer init failed" ); return 0; }
 108	return 1;
 109}
 110	
 111void Blaster::runBlaster(char *file1,char *file2,
 112			 int32_t maxNumThreads, int32_t wait, bool isLogFile,
 113			 bool verbose,bool justDisplay,
 114			 bool useProxy ,
 115			 bool injectUrlWithLinks ,
 116			 bool injectUrl ) {
 117	if (!init())
 118		return;
 119	m_blasterDiff=true;
 120	if (!file2)
 121		m_blasterDiff=false;
 122	
 123	// set File class
 124	File f1;
 125	f1.set ( file1 );
 126
 127	// open files
 128	if ( ! f1.open ( O_RDONLY ) ) {
 129		log("blaster:open: %s %s",file1,mstrerror(g_errno)); 
 130		return; 
 131	}
 132
 133	// get file size
 134	int32_t fileSize1 = f1.getFileSize() ;
 135	// store a \0 at the end
 136	int32_t m_bufSize1 = fileSize1 + 1;
 137
 138	m_doInjectionWithLinks = injectUrlWithLinks;
 139	m_doInjection = injectUrl;
 140
 141	// make buffers to hold all
 142	m_buf1 = (char *) mmalloc ( m_bufSize1 , "blaster1" );
 143	if ( ! m_buf1) {
 144		log("blaster:mmalloc: %s",mstrerror(errno));
 145		return;
 146	}
 147
 148	//char *bufEnd = buf + bufSize;
 149
 150	// set m_p1
 151	m_p1    = m_buf1;
 152	m_p1end = m_buf1 + m_bufSize1 - 1;
 153
 154	// read em all in
 155	if ( ! f1.read ( m_buf1 , fileSize1 , 0 ) ) {
 156		log("blaster:read: %s %s",file1,mstrerror(g_errno));
 157		return;
 158	}
 159
 160	// change \n to \0
 161	//char *p = buf;
 162	int32_t  n = 0;
 163	for ( int32_t i = 0 ; i < m_bufSize1 ; i++ ) {
 164		if ( m_buf1[i] != '\n' ) continue;
 165		m_buf1[i] = '\0';
 166		n++;
 167	}
 168
 169
 170	if (m_blasterDiff){
 171		File f2;
 172		f2.set ( file2 );
 173		if ( ! f2.open ( O_RDONLY ) ) {
 174			log("blaster:open: %s %s",file2,mstrerror(g_errno)); 
 175			return; 
 176		}
 177		int32_t fileSize2 = f2.getFileSize() ;
 178		int32_t m_bufSize2 = fileSize2 + 1;
 179		m_buf2 = (char *) mmalloc ( m_bufSize2 , "blaster2" );
 180		if ( ! m_buf2) {
 181			log("blaster:mmalloc: %s",mstrerror(errno));
 182			return;
 183		}
 184		// set m_p2
 185		m_p2    = m_buf2;
 186		m_p2end = m_buf2 + m_bufSize2 - 1;
 187		if ( ! f2.read ( m_buf2 , fileSize2 , 0 ) ) {
 188			log("blaster:read: %s %s",file2,mstrerror(g_errno));
 189			return;
 190		}
 191		int32_t m=0;
 192		for ( int32_t i = 0 ; i < m_bufSize2 ; i++ ) {
 193			if ( m_buf2[i] != '\n' ) continue;
 194			m_buf2[i] = '\0';
 195			m++;
 196		}
 197		// Working on only the least number of urls from both files, 
 198		//because we need to work in pairs
 199		if (m<n) n=m;
 200		else m=n;
 201		m_totalUrls=n;
 202
 203		// should we print out all the logs?
 204		m_verbose=verbose;
 205		// Should we use the proxy for getting the first Doc
 206		m_useProxy=useProxy;
 207		// Should we just display the not present links and not fetch
 208		// the page to see if they are actually present ?
 209		m_justDisplay=justDisplay;
 210	}
 211	else{
 212		m_isLogFile=isLogFile;
 213		
 214		/*if reading a gigablast log file, find the lines that have 
 215		  GET and POST commands for search, and register a sleep
 216		  callback for those lines with sleepWrapperLog*/
 217		if(!isLogFile)
 218			m_totalUrls=n;
 219		else {
 220			m_totalUrls=0;
 221			char *p=m_buf1;
 222			char *pend=p+m_bufSize1;
 223			
 224			// start is the time in milliseconds of the first log 
 225			// message
 226			int64_t start=atoll(m_buf1);
 227			while(p<pend) {
 228				char *lineStart=p;
 229				char *urlStart=strstr(p," GET /search");
 230				if (!urlStart)
 231					urlStart=strstr(p," POST /search");
 232				if(!urlStart){
 233					p+=gbstrlen(p)+1; //goto next line
 234					continue;
 235				}
 236				urlStart++;
 237				m_wait=atoll(lineStart)-start;
 238				// register it here
 239				g_loop.registerSleepCallback(m_wait , 
 240							     urlStart, 
 241							     sleepWrapperLog);
 242				m_totalUrls++;
 243				p+=gbstrlen(p)+1;
 244			}
 245		}
 246	}
 247	log(LOG_INIT,"blaster: read %"INT32" urls into memory", 
 248	    m_totalUrls );
 249
 250	if(!isLogFile){
 251		// get min time between each spider in milliseconds
 252		m_wait = wait;
 253			
 254		// # of threads
 255		m_maxNumThreads = maxNumThreads;
 256		
 257		m_launched=0;
 258		
 259		m_portSwitch = 0;
 260		//if ( argc == 4 ) m_portSwitch = 1;
 261		//else             m_portSwitch = 0;
 262			
 263		// start our spider loop
 264		//startSpidering( );
 265		
 266		// wakeup wrapper every X ms
 267		g_loop.registerSleepCallback ( m_wait , NULL , 
 268					       sleepWrapper );
 269	}
 270	// this print to print how many docs have been processed
 271	m_print=false;
 272	m_startTime=gettimeofdayInMilliseconds();
 273	m_totalDone=0;
 274	// . now start g_loops main interrupt handling loop
 275	// . it should block forever
 276	// . when it gets a signal it dispatches to a server or db to handle it
 277	if ( ! g_loop.runLoop()    ) {
 278		log("blaster::runLoop failed" ); return; }
 279	// dummy return (0-->normal exit status for the shell)
 280	return;
 281}
 282
 283void sleepWrapper ( int fd , void *state ) {
 284	g_blaster.startBlastering();
 285}
 286
 287void sleepWrapperLog(int fd, void *state) {
 288	// unregister the sleepCallback
 289	g_loop.unregisterSleepCallback(state,sleepWrapperLog);
 290	g_blaster.processLogFile(state);
 291}
 292
 293void Blaster:: processLogFile(void *state){
 294	// No need to print how many docs processed in log
 295	// because this is called at epochs given in the log
 296	char *urlStart=(char*)state;
 297	if (!urlStart){
 298		log(LOG_WARN,"blaster: got NULL urlStart");
 299		return;
 300	}
 301	//	log(LOG_WARN,"blaster:: Line is %s",urlStart);
 302	char tmp[1024];
 303	if (urlStart[0]=='P'){ //POST
 304		// advance by "POST /search HTTP/1.1 " = 22 chars
 305		urlStart+=22;
 306		sprintf(tmp,"http://www.gigablast.com/search?%s",urlStart);
 307	}
 308	else if (urlStart[0]=='G'){ //GET
 309		// advance by "GET "= 4 chars
 310		urlStart+=4;
 311		char *end=strstr(urlStart," HTTP/1.");
 312		if (end)
 313			end[0]='\0';
 314		sprintf(tmp,"http://www.gigablast.com%s",urlStart);
 315	}
 316	//	log(LOG_WARN,"blaster: URL=%s",tmp);
 317	StateBD *st;
 318	try { st = new (StateBD); }
 319	catch ( ... ) {
 320		g_errno = ENOMEM;
 321		log("blaster: Failed. "
 322		    "Could not allocate %"INT32" bytes for query. "
 323		    "Returning HTTP status of 500.",
 324		    (int32_t)sizeof(StateBD));
 325		return;
 326	}
 327	mnew ( st , sizeof(StateBD) , "BlasterDiff3" );
 328	//st->m_u1.set(tmp,gbstrlen(tmp));
 329	st->m_buf1=NULL;
 330	// get it
 331	bool status = g_httpServer.getDoc ( tmp, // &(st->m_u1) , // url
 332					    0 , // ip (none)
 333					    0 ,  // offset
 334					    -1 ,  // size
 335					    0 , // ifModifiedSince
 336					    st,  // state
 337					    gotDocWrapper1, // callback
 338					    20*1000, // timeout
 339					    0, // proxy ip
 340					    0, // proxy port
 341					    30*1024*1024, //maxLen
 342					    30*1024*1024);//maxOtherLen
 343	// continue if it blocked
 344	if ( status )
 345		// else there was error
 346		log("blaster: got doc %s: %s", urlStart,mstrerror(g_errno) );
 347	return;
 348}
 349	
 350
 351void Blaster::startBlastering(){
 352	int64_t now=gettimeofdayInMilliseconds();
 353	if(m_print && m_totalDone>0 && (m_totalDone % 20)==0){
 354		log("blaster: Processed %"INT32" urls in %"INT32" ms",m_totalDone,
 355		    (int32_t) (now-m_startTime));
 356		m_print=false;
 357	}
 358	//Launch the maximum number of threads that are allowed
 359	while ( m_p1 < m_p1end && m_launched < m_maxNumThreads && m_totalUrls){
 360		// clear any error
 361		g_errno = 0;
 362		// make a new state
 363		StateBD *st;
 364		try { st = new (StateBD); }
 365		catch ( ... ) {
 366			g_errno = ENOMEM;
 367			log("blaster: Failed. "
 368			    "Could not allocate %"INT32" bytes for query. "
 369			    "Returning HTTP status of 500.",
 370			    (int32_t)sizeof(StateBD));
 371			return;
 372		}
 373		mnew ( st , sizeof(StateBD) , "BlasterDiff3" );
 374		st->m_buf1=NULL;
 375		m_totalUrls--;
 376		// make into a url class. Set both u1 and u2 here.
 377		//st->m_u1.set ( m_p1 , gbstrlen(m_p1) );
 378		st->m_u1 = m_p1;
 379		// is it an injection url
 380		if ( m_doInjection || m_doInjectionWithLinks ) {
 381			// get host #0 i guess
 382			Host *h0 = g_hostdb.getHost(0);
 383			if ( ! h0 ) { char *xx=NULL;*xx=0; }
 384			static bool s_flag = true;
 385			if ( s_flag ) {
 386				s_flag = false;
 387				log("blaster: injecting to host #0 at %s on "
 388				    "http/tcp port %"INT32"",
 389				    iptoa(h0->m_ip),
 390				    (int32_t)h0->m_httpPort);
 391			}
 392			// use spiderlinks=1 so we add the outlinks to spiderdb
 393			// but that will slow the spider rate down since it 
 394			// will have to do a dns lookup on the domain of every
 395			// outlink.
 396			st->m_injectUrl.safePrintf("http://127.0.0.1:8000/"
 397						   "admin/inject?");
 398			if ( m_doInjectionWithLinks )
 399				st->m_injectUrl.safePrintf("spiderlinks=1&");
 400			else
 401				st->m_injectUrl.safePrintf("spiderlinks=0&");
 402			st->m_injectUrl.safePrintf("u=");
 403			st->m_injectUrl.urlEncode(m_p1);
 404			st->m_injectUrl.pushChar('\0');
 405			st->m_u1 = st->m_injectUrl.getBufStart();
 406		}
 407		// skip to next url
 408		m_p1 += gbstrlen ( m_p1 ) + 1;
 409		if (m_blasterDiff){
 410			//st->m_u2.set ( m_p2 , gbstrlen(m_p2) );
 411			st->m_u2 = m_p2;
 412			m_p2 += gbstrlen ( m_p2 ) + 1;
 413		}
 414
 415		//		log(LOG_WARN,"\n");
 416		log(LOG_WARN,"blaster: Downloading %s",st->m_u1);
 417		// set port if port switch is true
 418		//if ( m_portSwitch ) {
 419		//	int32_t r = rand() % 32;
 420		//	u.setPort ( 8000 + r );
 421		//}
 422
 423		// count it
 424		m_launched++;
 425		int32_t ip=0;
 426		int32_t port=0;
 427		if (m_useProxy){
 428			ip=atoip("66.154.102.20",13);
 429			port=3128;
 430		}
 431		// get it
 432		bool status = g_httpServer.getDoc ( st->m_u1 , // url
 433						    0, // ip
 434						    0 ,  // offset
 435						    -1 ,  // size
 436						    0 , // ifModifiedSince
 437						    st ,  // state
 438						    gotDocWrapper1, // callback
 439						    60*1000, // timeout
 440						    ip,
 441						    port,
 442						    30*1024*1024, //maxLen
 443						    30*1024*1024);
 444		// continue if it blocked
 445		if ( ! status ) continue;
 446		// If not blocked, there is an error.
 447		m_launched--;
 448		// log msg
 449		log("From file1, got doc1 %s: %s", st->m_u1 , 
 450		    mstrerror(g_errno) );
 451		// we gotta wait
 452		break;
 453	}
 454	// bail if not done yet
 455	//if ( m_launched > 0 ) return;
 456	if (m_totalUrls) return;
 457	//otherwise return if launched have not come back
 458	if (m_launched) return;
 459	// exit now
 460	//	g_conf.save();
 461	//	closeALL(NULL,NULL);
 462	exit ( 0 );
 463}
 464
 465
 466void gotDocWrapper1 ( void *state , TcpSocket *s ) {
 467	g_blaster.gotDoc1(state,s);
 468}
 469
 470void Blaster::gotDoc1( void *state, TcpSocket *s){
 471	StateBD *st=(StateBD *)state;
 472	// Even if we loose the request, still count it as done.
 473	m_totalDone++;
 474	m_print=true;
 475	// bail if got cut off
 476	if ( s->m_readOffset == 0 ) {
 477		log("blaster: lost the Request in gotDoc1");
 478		m_launched--;
 479		freeStateBD(st);
 480		return;
 481	}
 482
 483	//if we are not doing diff
 484	if (!m_blasterDiff){
 485		m_launched--;
 486	}
 487	int64_t now = gettimeofdayInMilliseconds();
 488	// get hash
 489	char *reply = s->m_readBuf ;
 490	int32_t  size  = s->m_readOffset;
 491	HttpMime mime;
 492	mime.set ( reply , size , NULL );
 493	char *content    = reply + mime.getMimeLen();
 494	int32_t  contentLen = size  - mime.getMimeLen();
 495	uint32_t h = hash32 ( content , contentLen );
 496	// log msg
 497	if ( g_errno ) 
 498		logf(LOG_INFO,"blaster: got doc (%"INT32") (%"INT32" ms) %s : %s",
 499		     s->m_readOffset      , 
 500		     (int32_t)(now - s->m_startTime) , 
 501		     st->m_u1   , 
 502		     mstrerror(g_errno)   );
 503	else
 504		logf(LOG_INFO,"blaster: got doc (%"INT32") (%"INT32" ms) "
 505		     "(hash=%"XINT32") %s",
 506		     s->m_readOffset      , 
 507		     (int32_t)(now - s->m_startTime) , 
 508		     h ,
 509		     st->m_u1       );
 510	if (!m_blasterDiff){
 511		// try to launch another if not using log file
 512		freeStateBD(st);
 513		if (!m_isLogFile){
 514			startBlastering();
 515		}
 516		if (m_isLogFile && --m_totalUrls==0) exit(0);
 517		return;
 518	}
 519
 520	// Store the buffer from socket so that it does not get destroyed
 521	// at the end. Also, add another space because in gotDoc2 xml.set
 522	// demands the content to be null ended, so we need to store the
 523	// null character there. So as a precaution, just allocating the
 524	// max buf size.
 525	st->m_buf1=(char*) mcalloc(s->m_readBufSize,"Blaster5");
 526	gbmemcpy(st->m_buf1,s->m_readBuf,s->m_readOffset);
 527	//st->m_buf1=(char*) mdup(s->m_readBuf,s->m_readOffset,"Blaster5");
 528	st->m_buf1Len=s->m_readOffset;
 529	st->m_buf1MaxLen=s->m_readBufSize;
 530
 531	// . don't let TcpServer free m_buf when socket is recycled/closed
 532	// . we own it now and are responsible for freeing it. DON'T do this
 533	// because I believe this makes malloc crash, since TcpServer says
 534	// that it has freed the memory so malloc tries to allocate wrong
 535	// memory and gives a seg fault.
 536	//	s->m_readBuf = NULL;
 537	
 538	log(LOG_WARN,"blaster: Downloading %s",st->m_u2);
 539	//char *ss="www.gigablast.com/search?q=hoopla&code=gbmonitor";
 540	//	st->m_u2.set(ss,gbstrlen(ss));
 541	// get it
 542	bool status = g_httpServer.getDoc ( st->m_u2 , // url
 543					    0,//ip
 544					    0 ,  // offset
 545					    -1 ,  // size
 546					    0 , // ifModifiedSince
 547					    st ,  // state
 548					    gotDocWrapper2, // callback
 549					    60*1000, // timeout
 550					    0,//atoip("66.154.102.20",13),//proxy ip
 551					    0,//3128,//80, // proxy port
 552					    30*1024*1024, //maxLen
 553					    30*1024*1024);//maxOtherLen
 554	// continue if it blocked
 555	if ( ! status ) return;
 556	// If not blocked, there is an error.
 557	m_launched--;
 558	// log msg
 559	log("From file2, gotdoc2 %s: %s", st->m_u2,
 560	    mstrerror(g_errno) );
 561	// No need to point p2 ahead because already been done
 562	// Free stateBD
 563	freeStateBD(st);
 564	return;
 565	
 566}
 567
 568void gotDocWrapper2 ( void *state , TcpSocket *s ) {
 569	g_blaster.gotDoc2(state,s);
 570}
 571
 572void Blaster::gotDoc2 ( void *state, TcpSocket *s){
 573	StateBD *st=(StateBD *)state;
 574	// bail if got cut off
 575	if ( s->m_readOffset == 0 ) {
 576		log("blaster: Lost the Request in gotDoc2");
 577		m_launched--;
 578		//No need to point p2
 579		// Free stateBD
 580		freeStateBD(st);
 581		return;
 582	}
 583	
 584	// . don't let TcpServer free m_buf when socket is recycled/closed
 585	// . we own it now and are responsible for freeing it
 586	//	s->m_readBuf = NULL;
 587
 588	int64_t now = gettimeofdayInMilliseconds();
 589	// So now after getting both docIds, get their contents
 590	char *reply1 = st->m_buf1 ;
 591	int32_t  size1  = st->m_buf1Len;
 592	HttpMime mime1;
 593	mime1.set ( reply1 , size1 , NULL );
 594	char *content1    = reply1 + mime1.getMimeLen();
 595	int32_t  content1Len = size1  - mime1.getMimeLen();
 596	uint32_t h = hash32 ( content1 , content1Len );
 597	// log msg
 598	if ( g_errno ) 
 599		logf(LOG_INFO,"blaster: got doc (%"INT32") (%"INT32" ms) %s : %s",
 600		     s->m_readOffset      , 
 601		     (int32_t)(now - s->m_startTime) , 
 602		     st->m_u2   , 
 603		     mstrerror(g_errno)   );
 604	else
 605		logf(LOG_INFO,"blaster: got doc (%"INT32") (%"INT32" ms) "
 606		     "(hash=%"XINT32") %s",
 607		     s->m_readOffset      , 
 608		     (int32_t)(now - s->m_startTime) , 
 609		     h ,
 610		     st->m_u2       );
 611
 612
 613	if (m_verbose){
 614		log(LOG_WARN,"blaster: content1len=%"INT32", Content1 is =%s",
 615		    content1Len,content1);
 616		log(LOG_WARN,"\n");
 617	}
 618	char *reply2 = s->m_readBuf ;
 619	int32_t  size2  = s->m_readOffset;
 620	HttpMime mime2;
 621	mime2.set ( reply2 , size2 , NULL );
 622	char *content2    = reply2 + mime2.getMimeLen();
 623	int32_t  content2Len = size2  - mime2.getMimeLen();
 624	if (m_verbose)	
 625		log(LOG_WARN,"blaster: content2len=%"INT32", Content2 is =%s",
 626		    content2Len,content2);
 627
 628	// Now that we've got the contents, lets get the url links out 
 629	// of these pages.Passing them to function getSearchLinks should 
 630	// get the first x links found out.
 631	/*	st->m_links1=(char *) mmalloc(200*MAX_URL_LEN,"Blaster3");
 632	st->m_links2=st->m_links1+100*MAX_URL_LEN;
 633	st->m_numLinks1=100;
 634	st->m_numLinks2=100;*/
 635
 636	/*	int32_t numLinks1=getSearchLinks(content1,content1Len,
 637				      st->m_links1,st->m_numLinks1);
 638	int32_t numLinks2=getSearchLinks(content2,content2Len,
 639	st->m_links2,st->m_numLinks2);*/
 640
 641
 642	content1[content1Len]='\0';
 643	//int16_t csEnum1= get_iana_charset(mime1.getCharset(), 
 644	//				mime1.getCharsetLen());
 645	/*	if (csEnum1== csUnknown)
 646		log(LOG_DEBUG, "blaster: Unknown charset : %s", mime2.getCharset());*/
 647	Xml xml1;
 648	// assume utf8
 649	if (!xml1.set(content1, 
 650		     content1Len,
 651		     false,
 652		     0,
 653		     false,
 654		      TITLEREC_CURRENT_VERSION ,
 655		      true , // set parents
 656		      0 , // niceness 
 657		      CT_XML )){ // content type
 658		log(LOG_WARN,"blaster: Couldn't set XML1 Class in gotDoc2");
 659	}
 660	Links links1;
 661	Url parent; parent.set ( st->m_u1);
 662	if (!links1.set(false , // userellnofollow
 663			&xml1,
 664			&parent,//mime1.getLocationUrl(), parent Url
 665			false, // setLinkHashes
 666			NULL  , // baseUrl
 667			TITLEREC_CURRENT_VERSION, // version
 668			0 , // niceness
 669			false , // parent is permalink?
 670			NULL )) { // oldLinks
 671		log(LOG_WARN,"blaster: Couldn't set Links Class in gotDoc2");
 672	}
 673
 674	content2[content2Len]='\0';
 675	//int16_t csEnum2= get_iana_charset(mime2.getCharset(), 
 676	//				mime2.getCharsetLen());
 677	/*	if (csEnum2== csUnknown)
 678		log(LOG_DEBUG, "blaster: Unknown charset : %s", mime2.getCharset());*/
 679	Xml xml2;
 680	if (!xml2.set(content2, 
 681		     content2Len,
 682		     false,
 683		     0,
 684		     false,
 685		      TITLEREC_CURRENT_VERSION,
 686		      true , // setparents
 687		      0 , // niceness
 688		      CT_XML )){
 689		log(LOG_WARN,"blaster: Couldn't set XML2 Class in gotDoc2");
 690	}
 691	Links links2;
 692	parent.set(st->m_u2);
 693	if (!links2.set(0,//siterec xml
 694			&xml2,
 695			&parent,//&st->m_u2,//mime2.getLocationUrl(),
 696			false,
 697			NULL,
 698			TITLEREC_CURRENT_VERSION,
 699			0,
 700			false,
 701			NULL)){
 702		log(LOG_WARN,"blaster: Couldn't set links2 Class in gotDoc2");
 703	}
 704	
 705
 706	// put the hash of the sites into a hashtable, since we have
 707	// about a 100 or so of them
 708	HashTableT<uint32_t, bool> urlHash;
 709	// put the urls from doc2 into the hastable, but first check if
 710	// they are links to google or gigablast (for now). For msn and
 711	// yahoo we have to add other checks.
 712	char domain2[256];
 713	int32_t dlen = 0;
 714	char *dom = getDomFast ( st->m_u2 , &dlen );
 715	if ( dom ) strncpy(domain2,dom,dlen);
 716	domain2[dlen]='\0';
 717	for (int32_t i=0;i<links2.getNumLinks();i++){
 718		// The dots check if exactly google or gigablast are present
 719		// in the link
 720		char *ss=links2.getLink(i);
 721		char *p;
 722		p=strstr(ss,domain2);
 723		if(p) continue;
 724		p=strstr(ss,"google.");
 725		if(p) continue;
 726		p=strstr(ss,"cache:");  //googles cache page
 727		if(p) continue;
 728		p= strstr(ss,"gigablast.");
 729		if(p) continue;
 730		p= strstr(ss,"web.archive.org");//older copies on gigablast
 731		if(p) continue;
 732		p= strstr(ss,"search.yahoo.com");//from gigablast search
 733		if(p) continue;
 734		p= strstr(ss,"search.msn.com");//from gigablast search
 735		if(p) continue;
 736		p= strstr(ss,"s.teoma.com");//from gigablast search
 737		if(p) continue;
 738		p= strstr(ss,"search.dmoz.org");//from gigablast search
 739		if(p) continue;
 740		p= strstr(ss,"www.answers.com");//from gigablast search
 741		if(p) continue;
 742		p= strstr(ss,"cc.msncache.com");//msn's cache page
 743		if(p) continue;
 744		if (m_verbose)
 745			log(LOG_WARN,"blaster: link in Doc2=%s"
 746			    ,links2.getLink(i));
 747		uint32_t h=hash32Lower_a(links2.getLink(i),
 748					    links2.getLinkLen(i));
 749		//should i check for conflict. no, because it doesn't matter
 750		urlHash.addKey(h,1);
 751	}
 752	// now check if the urls from doc1 are in doc2. save the
 753	// ones that are not
 754	// in there for later.
 755	/*	int32_t numUrlsToCheck=links2.getNumLinks();*/
 756	int32_t numUrlsNotFound=0;
 757	/*if (numLinks1<numUrlsToCheck)
 758	numUrlsToCheck=numLinks1;*/
 759	char domain1[256];
 760	dlen = 0;
 761	dom = getDomFast ( st->m_u1 ,&dlen );
 762	if ( dom ) strncpy(domain1,dom,dlen);
 763	domain1[dlen]='\0';
 764	for (int32_t i=0;i<links1.getNumLinks();i++){
 765		char *ss=links1.getLink(i);
 766		char *p;
 767		p=strstr(ss,domain1);
 768		if(p) continue;
 769		p=strstr(ss,"google.");
 770		if(p) continue;
 771		p=strstr(ss,"cache:");  //googles cache page
 772		if(p) continue;
 773		p= strstr(ss,"gigablast.");
 774		if(p) continue;
 775		p= strstr(ss,"web.archive.org");//older copies on gigablast
 776		if(p) continue;
 777		p= strstr(ss,"search.yahoo.com");//from gigablast search
 778		if(p) continue;
 779		p= strstr(ss,"search.msn.com");//from gigablast search
 780		if(p) continue;
 781		p= strstr(ss,"s.teoma.com");//from gigablast search
 782		if(p) continue;
 783		p= strstr(ss,"search.dmoz.org");//from gigablast search
 784		if(p) continue;
 785		p= strstr(ss,"www.answers.com");//from gigablast search
 786		if(p) continue;
 787		p= strstr(ss,"cc.msncache.com");//msn's cache page
 788		if(p) continue;
 789		if (m_verbose)
 790			log(LOG_WARN,"blaster: link in Doc1=%s"
 791			    ,links1.getLink(i));
 792		uint32_t h=hash32Lower_a(links1.getLink(i),
 793					    links1.getLinkLen(i));
 794		int32_t slot= urlHash.getSlot(h);		
 795		if(slot!=-1) continue;
 796
 797		// if url is not present, get its doc.
 798		if (m_verbose || m_justDisplay)
 799			log(LOG_WARN,"blaster: NOT FOUND %s in %s"
 800			    ,links1.getLink(i),domain2);
 801		numUrlsNotFound++;
 802		//Don't do anything else if just have to display the urls
 803		if (m_justDisplay) continue;
 804		//now get the doc of these urls
 805		//initialize
 806		st->m_numUrlDocsReceived=0;
 807
 808		StateBD2 *st2;
 809		try { st2 = new (StateBD2); }
 810		catch ( ... ) {
 811			g_errno = ENOMEM;
 812			log("blaster: Failed. "
 813			    "Could not allocate %"INT32" bytes for query. "
 814			    "Returning HTTP status of 500.",
 815			    (int32_t)sizeof(StateBD2));
 816			return;
 817		}
 818		mnew ( st2 , sizeof(StateBD2) , "Blaster4" );
 819		//Point to the big state;
 820		st2->m_st=st;
 821		//Msg16 does 6 redirects, so I do 6 too
 822		st2->m_numRedirects=6;
 823		//st2->m_url.set(links1.getLink(i),links1.getLinkLen(i));
 824		st2->m_url = links1.getLink(i);
 825		// No need for a proxy ip here, since we are fetching
 826		// doc's from different IPs. Faster this way
 827		bool status = g_httpServer.getDoc ( st2->m_url, // url
 828						    0,//ip
 829						    0 ,  // offset
 830						    -1 ,  // size
 831						    0 , // ifModifiedSince
 832						    st2,  // state
 833						    gotDocWrapper3, // callback
 834						    60*1000, // timeout
 835						    0, // proxy ip
 836						    0, // proxy port
 837						    30*1024*1024, //maxLen
 838						    30*1024*1024);//maxOtherLen
 839		// continue if it blocked
 840		if ( ! status ) continue;
 841		// If not blocked, there is an error.
 842		st->m_numUrlDocsReceived++;
 843	}
 844	st->m_numUrlDocsSent=numUrlsNotFound;
 845
 846	//There might have been an error while sending the docs, so if there
 847	//has been put a check
 848	if ( st->m_numUrlDocsReceived > 0 && 
 849	     st->m_numUrlDocsReceived <= st->m_numUrlDocsSent ){
 850		log(LOG_WARN,"blaster: %"INT32" docs could not be sent due to "
 851		    "error",st->m_numUrlDocsReceived);
 852		m_launched--;
 853		freeStateBD(st);
 854		return;
 855	}
 856		
 857	if (numUrlsNotFound==0){
 858		//job done for this pair
 859		log(LOG_WARN,"blaster: All urls from %s found in "
 860		    "%s",domain1,domain2);
 861		m_launched--;
 862		// Free stateBD
 863		freeStateBD(st);
 864		return;
 865	}
 866	log(LOG_WARN,"blaster: %"INT32" urls from %s Not found in %s",
 867	    numUrlsNotFound,domain1,domain2);
 868	if(m_justDisplay){
 869		m_launched--;
 870		// Free stateBD
 871		freeStateBD(st);
 872	}
 873	return;
 874}
 875
 876// This is not a generic function as yet. Gigablast stores the link in tag
 877// <span class="url"> and google stores it in tag <font color=#008000>. Takes
 878// the content to search for links, the array in which to store the links and
 879// the length of the array as arguments.Returns number of links it found in
 880// the page. This function is not being used as yet as Xml and Links are used
 881#if 0
 882int32_t Blaster::getSearchLinks(char *content,
 883				 int32_t contentLen,
 884				 char *links,
 885				 int32_t numLinks){
 886	char *p=content;
 887	char *pend=content+contentLen;
 888	char *p2;
 889	int32_t linksFound=0;
 890
 891	//considering code given is raw=1
 892	/*	while (p<pend){
 893		if (p=strstr(p,"http://"))
 894			p2=strstr(p,"\n");
 895		else break;
 896		int32_t length=p2-p;
 897		if (length>=MAX_URL_LEN) length=255;
 898		strncpy(links+linksFound*MAX_URL_LEN,p,length);
 899		links[linksFound*MAX_URL_LEN+length]='\0';
 900		log(LOG_WARN,"blaster: The url is=%s",
 901		    links+linksFound*MAX_URL_LEN);
 902		linksFound++;
 903		p+=7;
 904	}
 905	return linksFound;*/
 906
 907	// Deciding if it is gigablast 1 or google 0 or else 2
 908	int32_t isGB;
 909	if (contentLen<19) {
 910		log(LOG_WARN,"blaster: Contentlen is less");
 911		return 0;
 912	}
 913	if (strstr(content,"<span class=\"url\">"))
 914		isGB=1;
 915	else isGB=0;
 916	p=content;
 917	if (isGB){
 918		while (p && p<pend && linksFound<numLinks){
 919			
 920			p=strstr(p,"<span class=\"url\">");
 921			if (!p) break;
 922			p2=strstr(p,"</span>");
 923			if (!p2) break;
 924			
 925			//point to the url
 926			p+=18;
 927			//Check if it is in bounds. Also need to put '\0' at
 928			// the end.
 929			int32_t length=p2-p;
 930			if (length>=MAX_URL_LEN) length=MAX_URL_LEN-1;
 931			//Copy into the links buffer
 932			strncpy(links+linksFound*MAX_URL_LEN,p,length);
 933			links[linksFound*MAX_URL_LEN+length]='\0';
 934			log(LOG_WARN,"blaster:the url is=%s",
 935			    links+linksFound*MAX_URL_LEN);
 936			//advance p2 too
 937			p2+=7;
 938			linksFound++;
 939		} 
 940	}
 941	else{
 942		while (p && p<pend && linksFound<numLinks){
 943			p=strstr(p,"<font color=#008000>");
 944			if(!p) break;
 945			p2=strstr(p,"</font>");
 946			if (!p2) break;
 947			
 948			//point to the url
 949			p+=20;
 950			//Check if it is in bounds. Also need to put '\0' at
 951			// the end.
 952			int32_t length=p2-p;
 953			if (length>=MAX_URL_LEN) length=255;
 954			//Copy into the links buffer
 955			strncpy(links+linksFound*MAX_URL_LEN,p,length);
 956			links[linksFound*MAX_URL_LEN+length]='\0';
 957			log(LOG_WARN,"blaster:the url is=%s",
 958			    links+linksFound*MAX_URL_LEN);
 959			//advance p2 too
 960			p2+=7;
 961			linksFound++;
 962		}
 963	}
 964	return linksFound;
 965}
 966#endif
 967
 968void gotDocWrapper3 ( void *state , TcpSocket *s ) {
 969	g_blaster.gotDoc3(state,s);
 970}
 971
 972void Blaster::gotDoc3 ( void *state, TcpSocket *s){
 973	StateBD2 *st2=(StateBD2 *)state;
 974	StateBD *st=st2->m_st;
 975	if (!s) {
 976		log (LOG_WARN,"blaster: Got a null s in gotDoc3."
 977		     "Happened because ip could not be found");
 978		st->m_numUrlDocsReceived++;
 979		//Free StateBD2
 980		mdelete(st2,sizeof(StateBD2),"Blaster4");
 981		if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
 982			m_launched--;
 983			// Free stateBD
 984			freeStateBD(st);
 985		}
 986		return;
 987	}
 988	// bail if got cut off
 989	if ( s->m_readOffset == 0 ) {
 990		log("blasterDiff : lost the Request in gotDoc3");
 991		st->m_numUrlDocsReceived++;
 992		//Free StateBD2
 993		mdelete(st2,sizeof(StateBD2),"Blaster4");
 994		if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
 995			m_launched--;
 996			// Free stateBD
 997			freeStateBD(st);
 998		}
 999		return;
1000	}
1001	char *reply = s->m_readBuf ;
1002	int32_t  size  = s->m_readOffset;
1003	HttpMime mime;
1004	mime.set(reply,size,NULL);
1005
1006	int32_t httpStatus=mime.getHttpStatus();
1007	if(httpStatus==404){
1008		if (m_verbose)
1009			log(LOG_WARN,"blaster: The page was not found - 404");
1010		st->m_numUrlDocsReceived++;
1011	}
1012	// If the url is a redirect check if it is still http (might have
1013	// become https or something else, in which case we aren't going to
1014	// follow it
1015	else if (httpStatus>=300){
1016		Url *u=mime.getLocationUrl();
1017
1018		//If max number of redirects done, bail
1019		if(!st2->m_numRedirects--){
1020			log(LOG_WARN,"blaster: Max number of redirects "
1021			    "reached.");
1022			st->m_numUrlDocsReceived++;
1023		}
1024		//check if it is still http (might have become https or
1025		// something else, in which case we aren't going to follow it
1026		else if (!u->isHttp()){
1027			log(LOG_WARN,"blaster: Redirection not for an http "
1028			    "page for url %s",u->getUrl());
1029			st->m_numUrlDocsReceived++;
1030		}
1031		// sometimes idiots don't supply us with a Location: mime
1032		else if ( u->getUrlLen() == 0 ) {
1033			log(LOG_WARN,"blaster: Redirect url is of 0 length");
1034			st->m_numUrlDocsReceived++;
1035		}
1036		else{
1037			// I'm not checking as yet if the redirect url is the
1038			// same as the earlier url, as I've set the max number
1039			// of redirs to 6 Now lets get the redirect url. Do not
1040			// increase the numDocsReceived because this wrapper
1041			// will be called back  for the page
1042			if (m_verbose)
1043				log(LOG_WARN,"blaster: Downloading redirect"
1044				    " %s",u->getUrl());
1045			//Changing the url to the new place
1046			//st2->m_url.set(u,false);
1047			st2->m_url = u->getUrl();
1048			bool status = g_httpServer.getDoc (st2->m_url, // url
1049							    0,//ip
1050							    0 ,  // offset
1051							    -1 ,  // size
1052							    0 ,
1053							    st2 ,  // state
1054							    gotDocWrapper3,
1055							    60*1000, // timeout
1056							    0, // proxy ip
1057							    0, // proxy port
1058						    30*1024*1024, //maxLen
1059							    30*1024*1024);
1060			// If not blocked, there is an error.
1061			if (status ) 
1062				st->m_numUrlDocsReceived++;
1063		}
1064	}
1065	else if(httpStatus<200){
1066		log(LOG_WARN,"blaster: Bad HTTP status %"INT32"",httpStatus);
1067		st->m_numUrlDocsReceived++;
1068	}
1069	else{
1070		// This means the page is still there, somewhere. Status must 
1071		// be 200 So find it on server2. This server is assumed to be
1072		// running an instance of gb, so it shall be given the query in
1073		// the format 'xxxxx.com/search?q=url%3Ayyyy&code=gbmonitor. 
1074		// Then check if we have the exact page in the search results 
1075		// that have come back. So now the problem is that we do
1076		// not know which url has been got. So I get the location
1077		// url from mime.
1078		// The site name is in st->m_u2.getSite()
1079		// But copy it because it is not nulled.
1080		char tmp[1024];
1081		//char site[1024];//how long could a site be?
1082		int32_t siteLen = 0;
1083		char *site    = getHostFast(st->m_u2,&siteLen);
1084		char c = site[siteLen];
1085		site[siteLen] = 0;
1086		//strncpy(site,st->m_u2.getSite(),
1087		//	st->m_u2.getSiteLen());
1088		//site[st->m_u2.getSiteLen()]='\0';
1089		sprintf(tmp,"%ssearch?"
1090			"code=gbmonitor&"
1091			"q=url%%3A%s",site,st2->m_url);
1092		site[siteLen] = c;
1093		if (m_verbose)
1094			log(LOG_WARN,"blaster: Checking %s",tmp);
1095		//Url u;
1096		//u.set(tmp,gbstrlen(tmp));
1097		//Now get the doc
1098		bool status = g_httpServer.getDoc ( tmp,//&u,
1099						    0,//ip
1100						    0,  // offset
1101						    -1 ,  // size
1102						    0 ,
1103						    st , // state
1104						    gotDocWrapper4,
1105						    60*1000, // timeout
1106						    0,//atoip("66.154.102.20",13),//proxy ip
1107						    0,//3128,//proxy port
1108						    30*1024*1024,
1109						    30*1024*1024);
1110		// continue if it blocked
1111		// If not blocked, there is an error. Since we are
1112		// getting the doc from a gigablast server, report it
1113		if (status ){
1114			st->m_numUrlDocsReceived++;
1115			log(LOG_WARN,"blaster: could not get back"
1116				    "%s from server in gotDoc3",tmp);
1117		}
1118	}
1119	// If we reached here, that means all the url redirects have been 
1120	// finished, and there is no need for st2. Free it
1121	mdelete(st2,sizeof(StateBD2),"Blaster4");
1122
1123
1124	if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
1125		m_launched--;
1126		// Free stateBD
1127		freeStateBD(st);
1128	}
1129	return;
1130}
1131
1132void gotDocWrapper4 ( void *state , TcpSocket *s ) {
1133	g_blaster.gotDoc4(state,s);
1134}
1135
1136void Blaster::gotDoc4 ( void *state, TcpSocket *s){
1137	StateBD *st=(StateBD *)state;
1138	st->m_numUrlDocsReceived++;
1139	if (!s) {
1140		//Shouldn't happen, but still putting a checkpoint
1141		log (LOG_WARN,"blaster: Got a null s in gotDoc4."
1142		     "Happened because ip could not be found for gigablast"
1143		     "server");
1144		if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
1145			m_launched--;
1146			// Free stateBD
1147			freeStateBD(st);
1148		}
1149		return;
1150	}
1151	// bail if got cut off
1152	if ( s->m_readOffset == 0 ) {
1153		log("blasterDiff : lost the Request in gotDoc4");
1154		if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
1155			m_launched--;
1156			freeStateBD(st);
1157		}
1158		return;
1159	}
1160	char *reply = s->m_readBuf ;
1161	int32_t  size  = s->m_readOffset;
1162	HttpMime mime;
1163	mime.set ( reply , size , NULL );
1164	char *content    = reply + mime.getMimeLen();
1165	int32_t  contentLen = size  - mime.getMimeLen();
1166
1167	//int16_t csEnum = get_iana_charset(mime.getCharset(), 
1168	//				mime.getCharsetLen());
1169	/*	if (csEnum == csUnknown)
1170		log(LOG_DEBUG, "blaster: Unknown charset : %s", mime.getCharset());*/
1171	
1172	Xml xml;
1173	if (!xml.set(
1174		     content, 
1175		     contentLen,
1176		     false,
1177		     0,
1178		     false,
1179		     TITLEREC_CURRENT_VERSION,
1180		     true, // setparents
1181		     0, // niceness
1182		     CT_XML )){
1183		log(LOG_WARN,"blaster: Couldn't set XML Class in gotDoc4");
1184	}
1185	Links links;
1186	Url *url=mime.getLocationUrl();
1187	if (!links.set(0,//siterec xml
1188		       &xml,
1189		       url,
1190		       false,
1191		       NULL,
1192		       TITLEREC_CURRENT_VERSION,
1193		       0,
1194		       false,
1195		       NULL)){
1196		log(LOG_WARN, "blaster: Couldn't set Links class in gotDoc4");
1197	}
1198	for (int32_t i=0;i<links.getNumLinks();i++){
1199		char *ss=links.getLink(i);
1200		char *p;
1201		// This page *should* always be a gigablast page. So not adding
1202		// checks for msn or yahoo or google page.
1203		p=strstr(ss,"google.");
1204		if(p) continue;
1205		p=strstr(ss,"cache:");  //googles cache page
1206		if(p) continue;
1207		p= strstr(ss,"gigablast.");
1208		if(p) continue;
1209		p= strstr(ss,"web.archive.org");//older copies on gigablast
1210		if(p) continue;
1211		p= strstr(ss,"search.yahoo.com");//from gigablast search
1212		if(p) continue;
1213		p= strstr(ss,"search.msn.com");//from gigablast search
1214		if(p) continue;
1215		p= strstr(ss,"s.teoma.com");//from gigablast search
1216		if(p) continue;
1217		p= strstr(ss,"search.dmoz.org");//from gigablast search
1218		if(p) continue;
1219		p= strstr(ss,"www.answers.com");//from gigablast search
1220		if(p) continue;
1221       		if (m_verbose)
1222			log(LOG_WARN,"blaster: Link Present on server2=%s",ss);
1223	}
1224	
1225	// So if one of the links that is returned is the exact url,
1226	// then we know that the url is present.So get the url from the
1227	// mime, search for it in the links that are returned.
1228	char tmp[1024];
1229	char *sendBuf=s->m_sendBuf;
1230	char *p1,*p2;
1231
1232	// First get the Host, which is the domain. Since socket s is going to
1233	// be useless after this function, changing m_sendBuf instead of using 
1234	// more space
1235	p1=strstr(sendBuf,"%3A");
1236	if(p1){
1237		p1+=3;
1238		p2=strstr(p1," HTTP");
1239		if (p2){
1240			//Since I do not care about the sendbuf anymore
1241			*p2='\0';
1242		}
1243	}
1244	if (!p1 || !p2){
1245		log(LOG_WARN,"blasterdiff: Could not find search link"
1246		    "from m_sendBuf in gotdoc4");
1247	}
1248	else{
1249		sprintf(tmp,"%s",p1);
1250		//log(LOG_WARN,"blaster: tmp in gotDoc4 = %s",tmp);
1251		bool isFound=false;
1252		// So now we search for tmp in the links
1253		for (int32_t i=0;i<links.getNumLinks();i++){
1254			if(strstr(links.getLink(i),tmp) && 
1255			   links.getLinkLen(i)==(int)gbstrlen(tmp)){
1256				isFound=true;
1257				log(LOG_WARN,"blaster: %s in results1 but not"
1258				    " in results2 for query %s but does exist"
1259				    " in server2",tmp,st->m_u1);//->getQuery()
1260			}
1261		}
1262		if (!isFound)
1263			log(LOG_WARN,"blaster: %s in results1 but not"
1264			    " in results2 for query %s and does NOT exist"
1265			    " in server2",tmp,st->m_u1); // ->getQuery()
1266	}
1267	
1268
1269      	if (st->m_numUrlDocsReceived==st->m_numUrlDocsSent){
1270		m_launched--;
1271		// Free stateBD
1272		freeStateBD(st);
1273	}
1274	return;
1275}
1276
1277
1278
1279void Blaster::freeStateBD(StateBD *st){
1280	// Free stateBD's buf
1281	if (!st) return;
1282	if (st->m_buf1)
1283	        mfree(st->m_buf1,st->m_buf1MaxLen,"Blaster5");
1284	mdelete(st,sizeof(StateBD),"Blaster3");
1285}