PageRenderTime 57ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/docs/html/search.php

https://github.com/FMP196/ppnetwork
PHP | 382 lines | 341 code | 19 blank | 22 comment | 42 complexity | 8663fe4221dd2cbfb59afd50a20eebbc MD5 | raw file
  1. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  2. <html><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
  3. <title>Search</title>
  4. <link href="doxygen.css" rel="stylesheet" type="text/css">
  5. <link href="tabs.css" rel="stylesheet" type="text/css">
  6. </head><body>
  7. <!-- Generated by Doxygen 1.5.6 -->
  8. <div class="tabs">
  9. <ul>
  10. <li><a href="index.html"><span>Main&nbsp;Page</span></a></li>
  11. <li><a href="annotated.html"><span>Data&nbsp;Structures</span></a></li>
  12. <li><a href="files.html"><span>Files</span></a></li>
  13. <li><a href="dirs.html"><span>Directories</span></a></li>
  14. <li>
  15. <form action="search.php" method="get">
  16. <table cellspacing="0" cellpadding="0" border="0">
  17. <tr>
  18. <td><label>&nbsp;<u>S</u>earch&nbsp;for&nbsp;</label></td>
  19. <?php
  20. function search_results()
  21. {
  22. return "Search Results";
  23. }
  24. function matches_text($num)
  25. {
  26. if ($num==0)
  27. {
  28. return "Sorry, no documents matching your query.";
  29. }
  30. else if ($num==1)
  31. {
  32. return "Found <b>1</b> document matching your query.";
  33. }
  34. else // $num>1
  35. {
  36. return "Found <b>$num</b> documents matching your query. Showing best matches first.";
  37. }
  38. }
  39. function report_matches()
  40. {
  41. return "Matches: ";
  42. }
  43. function end_form($value)
  44. {
  45. echo " <td><input type=\"text\" name=\"query\" value=\"$value\" size=\"20\" accesskey=\"s\"/></td>\n </tr>\n </table>\n </form>\n </li>\n </ul>\n</div>\n";
  46. }
  47. function readInt($file)
  48. {
  49. $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
  50. $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
  51. return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
  52. }
  53. function readString($file)
  54. {
  55. $result="";
  56. while (ord($c=fgetc($file))) $result.=$c;
  57. return $result;
  58. }
  59. function readHeader($file)
  60. {
  61. $header =fgetc($file); $header.=fgetc($file);
  62. $header.=fgetc($file); $header.=fgetc($file);
  63. return $header;
  64. }
  65. function computeIndex($word)
  66. {
  67. // Fast string hashing
  68. //$lword = strtolower($word);
  69. //$l = strlen($lword);
  70. //for ($i=0;$i<$l;$i++)
  71. //{
  72. // $c = ord($lword{$i});
  73. // $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff;
  74. //}
  75. //return $v;
  76. // Simple hashing that allows for substring search
  77. if (strlen($word)<2) return -1;
  78. // high char of the index
  79. $hi = ord($word{0});
  80. if ($hi==0) return -1;
  81. // low char of the index
  82. $lo = ord($word{1});
  83. if ($lo==0) return -1;
  84. // return index
  85. return $hi*256+$lo;
  86. }
  87. function search($file,$word,&$statsList)
  88. {
  89. $index = computeIndex($word);
  90. if ($index!=-1) // found a valid index
  91. {
  92. fseek($file,$index*4+4); // 4 bytes per entry, skip header
  93. $index = readInt($file);
  94. if ($index) // found words matching the hash key
  95. {
  96. $start=sizeof($statsList);
  97. $count=$start;
  98. fseek($file,$index);
  99. $w = readString($file);
  100. while ($w)
  101. {
  102. $statIdx = readInt($file);
  103. if ($word==substr($w,0,strlen($word)))
  104. { // found word that matches (as substring)
  105. $statsList[$count++]=array(
  106. "word"=>$word,
  107. "match"=>$w,
  108. "index"=>$statIdx,
  109. "full"=>strlen($w)==strlen($word),
  110. "docs"=>array()
  111. );
  112. }
  113. $w = readString($file);
  114. }
  115. $totalHi=0;
  116. $totalFreqHi=0;
  117. $totalFreqLo=0;
  118. for ($count=$start;$count<sizeof($statsList);$count++)
  119. {
  120. $statInfo = &$statsList[$count];
  121. $multiplier = 1;
  122. // whole word matches have a double weight
  123. if ($statInfo["full"]) $multiplier=2;
  124. fseek($file,$statInfo["index"]);
  125. $numDocs = readInt($file);
  126. $docInfo = array();
  127. // read docs info + occurrence frequency of the word
  128. for ($i=0;$i<$numDocs;$i++)
  129. {
  130. $idx=readInt($file);
  131. $freq=readInt($file);
  132. $docInfo[$i]=array("idx" => $idx,
  133. "freq" => $freq>>1,
  134. "rank" => 0.0,
  135. "hi" => $freq&1
  136. );
  137. if ($freq&1) // word occurs in high priority doc
  138. {
  139. $totalHi++;
  140. $totalFreqHi+=$freq*$multiplier;
  141. }
  142. else // word occurs in low priority doc
  143. {
  144. $totalFreqLo+=$freq*$multiplier;
  145. }
  146. }
  147. // read name and url info for the doc
  148. for ($i=0;$i<$numDocs;$i++)
  149. {
  150. fseek($file,$docInfo[$i]["idx"]);
  151. $docInfo[$i]["name"]=readString($file);
  152. $docInfo[$i]["url"]=readString($file);
  153. }
  154. $statInfo["docs"]=$docInfo;
  155. }
  156. $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
  157. for ($count=$start;$count<sizeof($statsList);$count++)
  158. {
  159. $statInfo = &$statsList[$count];
  160. $multiplier = 1;
  161. // whole word matches have a double weight
  162. if ($statInfo["full"]) $multiplier=2;
  163. for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
  164. {
  165. $docInfo = &$statInfo["docs"];
  166. // compute frequency rank of the word in each doc
  167. $freq=$docInfo[$i]["freq"];
  168. if ($docInfo[$i]["hi"])
  169. {
  170. $statInfo["docs"][$i]["rank"]=
  171. (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
  172. }
  173. else
  174. {
  175. $statInfo["docs"][$i]["rank"]=
  176. (float)($freq*$multiplier)/$totalFreq;
  177. }
  178. }
  179. }
  180. }
  181. }
  182. return $statsList;
  183. }
  184. function combine_results($results,&$docs)
  185. {
  186. foreach ($results as $wordInfo)
  187. {
  188. $docsList = &$wordInfo["docs"];
  189. foreach ($docsList as $di)
  190. {
  191. $key=$di["url"];
  192. $rank=$di["rank"];
  193. if (in_array($key, array_keys($docs)))
  194. {
  195. $docs[$key]["rank"]+=$rank;
  196. }
  197. else
  198. {
  199. $docs[$key] = array("url"=>$key,
  200. "name"=>$di["name"],
  201. "rank"=>$rank
  202. );
  203. }
  204. $docs[$key]["words"][] = array(
  205. "word"=>$wordInfo["word"],
  206. "match"=>$wordInfo["match"],
  207. "freq"=>$di["freq"]
  208. );
  209. }
  210. }
  211. return $docs;
  212. }
  213. function filter_results($docs,&$requiredWords,&$forbiddenWords)
  214. {
  215. $filteredDocs=array();
  216. while (list ($key, $val) = each ($docs))
  217. {
  218. $words = &$docs[$key]["words"];
  219. $copy=1; // copy entry by default
  220. if (sizeof($requiredWords)>0)
  221. {
  222. foreach ($requiredWords as $reqWord)
  223. {
  224. $found=0;
  225. foreach ($words as $wordInfo)
  226. {
  227. $found = $wordInfo["word"]==$reqWord;
  228. if ($found) break;
  229. }
  230. if (!$found)
  231. {
  232. $copy=0; // document contains none of the required words
  233. break;
  234. }
  235. }
  236. }
  237. if (sizeof($forbiddenWords)>0)
  238. {
  239. foreach ($words as $wordInfo)
  240. {
  241. if (in_array($wordInfo["word"],$forbiddenWords))
  242. {
  243. $copy=0; // document contains a forbidden word
  244. break;
  245. }
  246. }
  247. }
  248. if ($copy) $filteredDocs[$key]=$docs[$key];
  249. }
  250. return $filteredDocs;
  251. }
  252. function compare_rank($a,$b)
  253. {
  254. if ($a["rank"] == $b["rank"])
  255. {
  256. return 0;
  257. }
  258. return ($a["rank"]>$b["rank"]) ? -1 : 1;
  259. }
  260. function sort_results($docs,&$sorted)
  261. {
  262. $sorted = $docs;
  263. usort($sorted,"compare_rank");
  264. return $sorted;
  265. }
  266. function report_results(&$docs)
  267. {
  268. echo "<table cellspacing=\"2\">\n";
  269. echo " <tr>\n";
  270. echo " <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
  271. echo " </tr>\n";
  272. $numDocs = sizeof($docs);
  273. if ($numDocs==0)
  274. {
  275. echo " <tr>\n";
  276. echo " <td colspan=\"2\">".matches_text(0)."</td>\n";
  277. echo " </tr>\n";
  278. }
  279. else
  280. {
  281. echo " <tr>\n";
  282. echo " <td colspan=\"2\">".matches_text($numDocs);
  283. echo "\n";
  284. echo " </td>\n";
  285. echo " </tr>\n";
  286. $num=1;
  287. foreach ($docs as $doc)
  288. {
  289. echo " <tr>\n";
  290. echo " <td align=\"right\">$num.</td>";
  291. echo "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
  292. echo " <tr>\n";
  293. echo " <td></td><td class=\"tiny\">".report_matches()." ";
  294. foreach ($doc["words"] as $wordInfo)
  295. {
  296. $word = $wordInfo["word"];
  297. $matchRight = substr($wordInfo["match"],strlen($word));
  298. echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
  299. }
  300. echo " </td>\n";
  301. echo " </tr>\n";
  302. $num++;
  303. }
  304. }
  305. echo "</table>\n";
  306. }
  307. function main()
  308. {
  309. if(strcmp('4.1.0', phpversion()) > 0)
  310. {
  311. die("Error: PHP version 4.1.0 or above required!");
  312. }
  313. if (!($file=fopen("search.idx","rb")))
  314. {
  315. die("Error: Search index file could NOT be opened!");
  316. }
  317. if (readHeader($file)!="DOXS")
  318. {
  319. die("Error: Header of index file is invalid!");
  320. }
  321. $query="";
  322. if (array_key_exists("query", $_GET))
  323. {
  324. $query=$_GET["query"];
  325. }
  326. end_form(ereg_replace("[^[:alnum:]:\\.\\t ]", " ", $query ));
  327. echo "&nbsp;\n<div class=\"searchresults\">\n";
  328. $results = array();
  329. $requiredWords = array();
  330. $forbiddenWords = array();
  331. $foundWords = array();
  332. $word=strtok($query," ");
  333. while ($word) // for each word in the search query
  334. {
  335. if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
  336. if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
  337. if (!in_array($word,$foundWords))
  338. {
  339. $foundWords[]=$word;
  340. search($file,strtolower($word),$results);
  341. }
  342. $word=strtok(" ");
  343. }
  344. $docs = array();
  345. combine_results($results,$docs);
  346. // filter out documents with forbidden word or that do not contain
  347. // required words
  348. $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
  349. // sort the results based on rank
  350. $sorted = array();
  351. sort_results($filteredDocs,$sorted);
  352. // report results to the user
  353. report_results($sorted);
  354. echo "</div>\n";
  355. fclose($file);
  356. }
  357. main();
  358. ?>
  359. <hr size="1"><address style="text-align: right;"><small>Generated on Fri Sep 30 19:04:15 2011 for CSP-Network (Client Server Proxy Network) by&nbsp;
  360. <a href="http://www.doxygen.org/index.html">
  361. <img src="doxygen.png" alt="doxygen" align="middle" border="0"></a> 1.5.6 </small></address>
  362. </body>
  363. </html>