PageRenderTime 60ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/server/protected/controller/SearchController.php

https://github.com/wilk/SPAM
PHP | 1276 lines | 968 code | 78 blank | 230 comment | 206 complexity | 70bba194034495d7a5ca32d149ce87fd MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. include_once 'protected/model/PostModel.php';
  3. include_once 'protected/model/UserModel.php';
  4. include_once 'protected/model/ThesModel.php';
  5. include_once 'protected/view/PostView.php';
  6. include_once 'protected/controller/ErrorController.php';
  7. include_once 'protected/module/simple_html_dom.php';
  8. class SearchController extends DooController {
  9. private $listaPost = array();
  10. private $toMerge = array();
  11. static $from = "2011/01/01 00:00:00";
  12. private $salt;
  13. private $request;
  14. private $SRV;
  15. private $time;
  16. private $respamOf = null;
  17. private $replyOf = null;
  18. private $listOfReply = array();
  19. public function beforeRun($resource, $action) {
  20. $role;
  21. session_name("ltwlogin");
  22. session_start();
  23. if (!(isset($_SESSION['user']['username']))) {
  24. if (ini_get("session.use_cookies")) {
  25. $params = session_get_cookie_params();
  26. setcookie(session_name(), '', time() - 42000, $params["path"], $params["domain"], $params["secure"], $params["httponly"]
  27. );
  28. }
  29. //termino la sessione
  30. session_destroy();
  31. session_name("nologin");
  32. session_start();
  33. $role = 'anonymous';
  34. }
  35. else
  36. $role = $_SESSION['user']['group'];
  37. //if not login, group = anonymous
  38. //$role = (isset($_SESSION['user']['group'])) ? $_SESSION['user']['group'] : 'anonymous';
  39. //check against the ACL rules
  40. if ($rs = $this->acl()->process($role, $resource, $action)) {
  41. //echo $role .' is not allowed for '. $resource . ' '. $action;
  42. return $rs;
  43. }
  44. }
  45. public function searchServer() {
  46. $this->searchMain(TRUE);
  47. }
  48. /* il booleano $extRequest viene settano nel route a TRUE se si tratta di /searchserver */
  49. public function searchMain($extRequest = FALSE) {
  50. $limite = $this->params['limit'];
  51. if ($limite != "all" && !is_numeric($limite))
  52. ErrorController::badReq("O numeri o 'all' altro non è consentito");
  53. $tipo = $this->params['type'];
  54. /* Qui definisco i tipi di ricerca */
  55. $types = array(
  56. 'author',
  57. 'following',
  58. 'recent',
  59. 'related',
  60. 'fulltext',
  61. 'affinity'
  62. );
  63. $this->load()->helper('DooRestClient');
  64. $this->request = new DooRestClient;
  65. $this->SRV = new SRVModel($this->request);
  66. switch ($tipo) {
  67. case $types[0]: //author
  68. if (!(isset($this->params['var1'])) || !(isset($this->params['var2'])))
  69. //BAD REQUEST
  70. return 400;
  71. $srv = $this->params['var1'];
  72. $usr = urldecode($this->params['var2']);
  73. if ($srv == 'Spammers') {//richiesta interna
  74. $user = new UserModel($usr);
  75. if (!$user->ifUserExist())
  76. ErrorController::notFound("Errore: l'utente $usr non esiste.\n");
  77. if (!$user->checkPosts())
  78. ErrorController::notFound("Errore: l'utente $usr non ha pubblicato messaggi.\n");
  79. $this->rcvFromINTServer($user, $limite);
  80. $this->displayPosts();
  81. } else {//richiesta esterna
  82. $parametri = array($limite, $tipo, $srv, $usr);
  83. $metodo = 'searchserver/' . implode('/', $parametri);
  84. $res = $this->rcvFromEXTServer($srv, $metodo);
  85. if (is_numeric($res))
  86. return $res;
  87. print $res;
  88. }
  89. break;
  90. case $types[1]: //following
  91. if (isset($_SESSION['user']['username'])) {
  92. $user = new UserModel($_SESSION['user']['username']);
  93. $follows = $user->getFollows();
  94. $size = sizeof($follows);
  95. if ($size == 0)
  96. ErrorController::notFound('Attualmente non ci sono utenti seguiti.');
  97. foreach ($follows as $follow) {
  98. $posts;
  99. list($srv, $usr) = explode('/', $follow);
  100. if ($srv == 'Spammers') {//richiesta interna
  101. $utente = new UserModel($usr);
  102. $this->rcvFromINTServer($utente, $limite);
  103. } else {//richiesta esterna
  104. $parametri = array($limite, $types[0], $srv, $usr);
  105. $metodo = 'searchserver/' . implode('/', $parametri);
  106. $XMLresult = $this->rcvFromEXTServer($srv, $metodo);
  107. if ($XMLresult != false)
  108. $posts = $this->parseEXTContent($XMLresult);
  109. }
  110. }
  111. //qui devo ordinare la mia lista
  112. $this->sortPost($limite);
  113. $this->displayPosts();
  114. } else
  115. return 401;
  116. break;
  117. case $types[2]: //recent
  118. $pIDs = 0;
  119. $posts;
  120. $post = new PostModel();
  121. if (isset($this->params['var1'])) {
  122. $termine = $this->params['var1'];
  123. $tesauro = new ThesModel(); //oggetto del tesauro
  124. $pathTerm = $tesauro->returnPath($termine);
  125. $tesauro = new ThesModel(TRUE); //oggetto del tesapost
  126. if ($pathTerm !== false)
  127. $pIDs = $tesauro->getPostsFromThes($pathTerm, $limite, TRUE);
  128. else
  129. $pIDs = $tesauro->getPostsByCtag($termine, $limite);
  130. if ($pIDs != 0)
  131. $posts = $post->getPostArray($pIDs);
  132. }
  133. else // qui ricerco senza termine --> OCCHIO ALLA RELATED
  134. $posts = $post->getPostArray(NULL, $limite);
  135. if (isset($posts)) {
  136. // echo sizeof($posts); die();
  137. foreach ($posts as $post) {
  138. $nodo['articolo'] = $post;
  139. $nodo['peso'] = strtotime($post[key($post)]['http://purl.org/dc/terms/created'][0]);
  140. //print_r($nodo); die();
  141. array_push($this->listaPost, $nodo);
  142. array_push($this->toMerge, $nodo['peso']);
  143. }
  144. }
  145. if ($extRequest === FALSE) {
  146. $servers;
  147. $this->initServers($servers);
  148. $metodo = '/' . $tipo;
  149. if (isset($this->params['var1']))
  150. $metodo .= '/' . $this->params['var1'];
  151. if ($this->rcvFromEXTServers($servers, $limite, $metodo)) {
  152. $badServer = array();
  153. foreach ($servers as $value) {
  154. if ($value['code'] === 200)
  155. $this->parseEXTContent($value['data']);
  156. else if ($value['code'] === 500)
  157. array_push($badServer, $value['name']);
  158. //$test[] = $value['url'].' => '.$value['code']."\n";
  159. }
  160. //print_r($test);die();
  161. //qui fanculizzo i server
  162. /* if (count($badServer))
  163. $this->funcoolizer($badServer);
  164. */
  165. } else
  166. return 500;
  167. }
  168. $this->sortPost($limite);
  169. $this->displayPosts();
  170. break;
  171. case $types[3]: //related
  172. if (!(isset($this->params['var1'])))
  173. //BAD REQUEST
  174. return 400;
  175. $this->salt = strtotime("now") - strtotime(self::$from);
  176. $termine = $this->params['var1'];
  177. $tesauro = new ThesModel(); //oggetto del tesauro
  178. $pathTerm = $tesauro->returnPath($termine);
  179. //$pIDs = 0;
  180. if ($pathTerm === false)
  181. ErrorController::notFound("Il termine non è presente nel tesauro.\n");
  182. $tesauro = new ThesModel(TRUE); //oggetto del tesapost
  183. $pIDs = $tesauro->getPostsFromThes($pathTerm, $limite);
  184. $posts;
  185. $post = new PostModel();
  186. if ($pIDs)
  187. $posts = $post->getPostArray($pIDs);
  188. if (isset($posts)) {
  189. foreach ($posts as $post) {
  190. $nodo['articolo'] = $post;
  191. $nodo['peso'] = strtotime($post[key($post)]['http://purl.org/dc/terms/created'][0]);
  192. $nodo['peso'] += $this->salt * $this->calcWeight($post, $pathTerm);
  193. array_push($this->listaPost, $nodo);
  194. array_push($this->toMerge, $nodo['peso']);
  195. }
  196. } //print_r($this->listaPost); die();
  197. if ($extRequest === FALSE) {
  198. $servers;
  199. $this->initServers($servers);
  200. $metodo = '/' . implode('/', array($tipo, $termine));
  201. if ($this->rcvFromEXTServers($servers, $limite, $metodo)) {
  202. $badServer = array();
  203. foreach ($servers as $value) {
  204. if ($value['code'] === 200)
  205. $this->parseEXTContent($value['data'], $pathTerm);
  206. else if ($value['code'] === 500)
  207. array_push($badServer, $value['name']);
  208. //$test[] = $value['url'].' => '.$value['data']."\n";
  209. }
  210. //print_r($test);die();
  211. //qui fanculizzo i server
  212. /* if (count($badServer))
  213. $this->funcoolizer($badServer);
  214. */
  215. } else
  216. return 500;
  217. }
  218. //print_r($this->listaPost); die();
  219. $this->sortPost($limite);
  220. $this->displayPosts();
  221. break;
  222. case $types[4]: //fulltext
  223. if (!(isset($this->params['var1'])))
  224. //BAD REQUEST
  225. ErrorController::badReq("Devi specificare il testo da cercare!!");
  226. $stringToSearch = urldecode($this->params['var1']);
  227. //Inizializzo il timer e inizio a cercare in locale
  228. // $mtime = microtime();
  229. // $mtime = explode(' ', $mtime);
  230. // $mtime = $mtime[1] + $mtime[0];
  231. // $starttime = $mtime;
  232. $listOfWords = $this->utf8_str_word_count($stringToSearch, 1);
  233. $listOfWords = array_unique($listOfWords);
  234. $this->fullTextCore($listOfWords, $extRequest, $limite);
  235. //print "numero di elementi in listapost: " . count($this->listaPost) . "\n\r";
  236. //$c = count($this->listaPost);
  237. //print "listaPost è fatto di: $c elementi";
  238. //print_r($this->listaPost);die();
  239. ////////////////// NEW //////////////////////////
  240. // ksort($this->listaPost, SORT_NUMERIC);
  241. // $this->listaPost = array_reverse($this->listaPost, TRUE);
  242. // $toRender = array();
  243. // foreach ($this->listaPost as $k => $array) {
  244. // if ($limite == 0)
  245. // break;
  246. // $arrayPesi = array();
  247. // foreach ($array as $key => $post)
  248. // $arrayPesi[$key] = $post['peso'];
  249. // array_multisort($arrayPesi, SORT_DESC, $this->listaPost[$k]);
  250. // $n = count($array);
  251. // if (is_numeric($limite) && $n > $limite) {
  252. // $toRender = array_merge($toRender, array_slice($array, 0, $limite));
  253. // break;
  254. // }
  255. // $toRender = array_merge($toRender, $array);
  256. // $limite -= $n;
  257. // }
  258. // $this->listaPost = $toRender;
  259. ////////////////////////////////////////////////
  260. //print_r($this->listaPost);die();
  261. // for ($i = $c; $i > 0; $i--) {
  262. // $arrayPesi = array();
  263. // foreach ($this->listaPost[$i] as $key => $post) {
  264. // //print ("\n\rla key è: $key e il peso è: " .$post['peso']);die();
  265. // $arrayPesi[$key] = $post['peso'];
  266. // //$arrayPost[$key]=$post['post'];
  267. // }
  268. // array_multisort($arrayPesi, SORT_DESC, $this->listaPost[$i]);
  269. // }
  270. // //TODO:far tornare array con solo n elementi
  271. // $postToRender = array();
  272. // $internalCount = 0;
  273. // $i = count($this->listaPost);
  274. // for ($i; $i > 0; $i--) {
  275. // foreach ($this->listaPost[$i] as $key => $post) {
  276. // if ($limite != "all" && $internalCount == $limite)
  277. // break;
  278. // $postToRender[] = $post;
  279. // $internalCount++;
  280. // }
  281. // }
  282. // $this->listaPost = $postToRender;
  283. $this->sortPost($limite);
  284. $this->displayPosts();
  285. // ErrorController::notImpl();
  286. break;
  287. case $types[5]: //affinity
  288. if ((!(isset($this->params['var1'])) && $this->params['var1'] != "") ||
  289. (!(isset($this->params['var2'])) && $this->params['var2'] != "") ||
  290. (!(isset($this->params['var3'])) && $this->params['var3'] != ""))
  291. //BAD REQUEST
  292. ErrorController::badReq("Non tutti i parametri sono stati specificati");
  293. // break;
  294. $srv = urldecode($this->params['var1']);
  295. $usr = urldecode($this->params['var2']);
  296. $pid = urldecode($this->params['var3']);
  297. $content;
  298. $timeOfPost;
  299. $origLimite = $limite;
  300. $post = new PostModel();
  301. if ($srv == 'Spammers') {
  302. $ID = 'spam:/' . implode('/', array($srv, $usr, $pid));
  303. if (!$post->postExist($ID))
  304. ErrorController::notFound('Questo post non esiste!!');
  305. $art = $post->getPost($ID);
  306. // print_r($art);
  307. if (isset($art[key($art)]['http://vitali.web.cs.unibo.it/vocabulary/respamOf'])) {
  308. $respamOf = explode('spam:', $art[key($art)]['http://vitali.web.cs.unibo.it/vocabulary/respamOf'][0]);
  309. $this->respamOf = $respamOf[1];
  310. $limite--;
  311. //print $respamOf;
  312. } else if (isset($art[key($art)]['http://rdfs.org/sioc/ns#reply_of'])) {
  313. $replyOf = explode('spam:', $art[key($art)]['http://rdfs.org/sioc/ns#reply_of'][0]);
  314. $this->replyOf = $replyOf[1];
  315. $limite--;
  316. //print $replyOf;
  317. }
  318. if (isset($art[key($art)]['http://rdfs.org/sioc/ns#has_reply'])) {
  319. foreach ($art[key($art)]['http://rdfs.org/sioc/ns#has_reply'] as $key => $replyPost) {
  320. $replyPost = explode("spam:", $replyPost);
  321. $this->listOfReply[] = $replyPost[1];
  322. $limite--;
  323. }
  324. }
  325. $content = html_entity_decode($art[key($art)]['http://rdfs.org/sioc/ns#content'][0], ENT_COMPAT, 'UTF-8');
  326. $timeOfPost = $art[key($art)]["http://purl.org/dc/terms/created"][0];
  327. } else {
  328. $url = $this->SRV->getUrl($srv);
  329. if ($url) {
  330. //print "La richiesta è:".$url."postserver/$usr/$pid\n\r";
  331. $this->request->connect_to($url . "postserver/$usr/$pid")
  332. ->accept(DooRestClient::HTML)
  333. ->get();
  334. if (!$this->request->isSuccess()) {
  335. header("Status:" . $this->request->resultCode());
  336. die("C'è stato un problema nella ricezione del post dall'esterno.");
  337. }
  338. $content = str_get_html($this->request->result());
  339. $timeOfPost = $content->find('article', 0)->content;
  340. $content = $content->find('article', 0)->innertext;
  341. $tempContent = str_get_html($content);
  342. if (isset($tempContent->find("span[rel=tweb:respamOf]", 0)->resource)) {
  343. $respamOf = $tempContent->find("span[rel=tweb:respamOf]", 0)->resource;
  344. $limite--;
  345. } else if (isset($tempContent->find("span[rel=sioc:reply_of]", 0)->resource)) {
  346. $replyOf = $tempContent->find("span[rel=sioc:reply_of]", 0)->resource;
  347. $limite--;
  348. }
  349. foreach ($tempContent->find("span[rel=sioc:has_reply]") as $reply) {
  350. $listOfReply[] = $reply->resource;
  351. $limite--;
  352. }
  353. }else
  354. ErrorController::notFound("il server non esiste");
  355. }////l'articolo da affinare!
  356. // print ("$content\n\r");
  357. $html = str_get_html(html_entity_decode($content));
  358. $arr = array();
  359. foreach ($html->find("span[typeof=skos:Concept]") as $tag) {
  360. $arr[$tag->about] = 0;
  361. }
  362. // print "Gli hashtag contenuti nell'articolo";
  363. // print_r($arr);
  364. // die();
  365. // //Se non ci sono hashtag faccio partire una fulltext
  366. //TODO: Implementare ricerca fulltext in caso di nessun #hashtag
  367. if (count($arr) == 0) {
  368. $stringToSearch = urlencode($html->plaintext);
  369. $listOfWords = $this->utf8_str_word_count($stringToSearch, 1);
  370. $listOfWords = array_unique($listOfWords);
  371. $this->fullTextCore($listOfWords, TRUE, $origLimite);
  372. // print_r ($this->listaPost);die();
  373. // ksort($this->listaPost, SORT_NUMERIC);
  374. // $this->listaPost = array_reverse($this->listaPost, TRUE);
  375. //print_r ($this->listaPost);
  376. //ELIMINO IL POST DI ORIGINE PER EVITARE CLONI
  377. $toCheck = implode('/', array($srv, $usr, $pid));
  378. foreach ($this->listaPost as $key => $art) {
  379. if (is_array($art['articolo'])) {
  380. if (key($art['articolo']) == "spam:/$toCheck") {
  381. unset($this->listaPost[$key]);
  382. unset($this->toMerge[$key]);
  383. array_values($this->listaPost);
  384. array_values($this->toMerge);
  385. }
  386. } else {
  387. if (strstr("about=\"/$toCheck\"", $art['articolo'])) {
  388. unset($this->listaPost[$key]);
  389. unset($this->toMerge[$key]);
  390. array_values($this->listaPost);
  391. array_values($this->toMerge);
  392. }
  393. }
  394. }
  395. $this->processReleatedPostsFullText($listOfWords);
  396. $this->sortPost($origLimite);
  397. $this->displayPosts();
  398. // ErrorController::notImpl();
  399. break;
  400. }
  401. //Peso i post del nostro server
  402. $allPost = $post->getPostArray(NULL, 'all');
  403. // print (key($art));
  404. // $key=array_search(key($art),$allPost[]);
  405. // print "La chiave è: $key\n\r";
  406. // unset($allPost[$key]);
  407. // print"tutti i post\n\r";
  408. //print_r($allPost);
  409. $tempoPostConfronto = strtotime($timeOfPost);
  410. if (isset($art)) {
  411. foreach ($allPost as $key => $myPost) {
  412. //print key($myPost)." questo è mypost mentre art vale ".key($art)."\n\r";
  413. if (key($myPost) == key($art)) {
  414. //print "c'èèèè";
  415. unset($allPost[$key]);
  416. break;
  417. }
  418. }
  419. }
  420. // print_r($allPost);
  421. // die();
  422. foreach ($allPost as $i => $pID) {
  423. //print "Il mio post $postContentHTML";die();
  424. //print "questo è l'articolo:\n\r";
  425. //print_r ($pID);
  426. //print "\n\r";
  427. $tempoPostConfrontato = strtotime($pID[key($pID)]["http://purl.org/dc/terms/created"][0]);
  428. $numDislike = $pID[key($pID)]["http://vitali.web.cs.unibo.it/vocabulary/countDislike"][0];
  429. $numLike = $pID[key($pID)]["http://vitali.web.cs.unibo.it/vocabulary/countLike"][0];
  430. $this->pesoAffinity($pID, $arr, $tempoPostConfrontato, $tempoPostConfronto, $numDislike, $numLike);
  431. }
  432. // print "Adesso parte la richiesta esterna\n\r";
  433. if ($extRequest === FALSE) {
  434. $servers;
  435. $this->initServers($servers);
  436. $metodo = '/' . $tipo;
  437. $metodo .= '/' . $this->params['var1'] . '/' . $this->params['var2'] . '/' . $this->params['var3'] . '/1/1';
  438. // print "$metodo\n\r";
  439. if ($this->rcvFromEXTServers($servers, $limite, $metodo)) {
  440. $badServer = array();
  441. foreach ($servers as $value) {
  442. if ($value['code'] === 200)
  443. $this->parseEXTContent3($value['data'], $arr, $tempoPostConfronto);
  444. else if ($value['code'] === 500)
  445. array_push($badServer, $value['name']);
  446. //$test[] = $value['url'].' => '.$value['code']."\n";
  447. }
  448. //print_r($test);die();
  449. //qui fanculizzo i server
  450. /* if (count($badServer))
  451. $this->funcoolizer($badServer);
  452. */
  453. } else
  454. return 500;
  455. }
  456. // print "\n\rEcco gli articoli con rispettivi pesi(solo quelli il cui valore è positivo\n\r";
  457. $this->processReleatedPostsAffinity($arr, $tempoPostConfronto);
  458. // print "\n\rEcco la lista degli articoli\n\r";
  459. // print_r($this->listaPost);
  460. // die();
  461. $this->sortPost($origLimite);
  462. $this->displayPosts();
  463. break;
  464. default: //beh, altrimenti errore
  465. ErrorController::notImpl();
  466. break;
  467. }
  468. }
  469. private function initServers(&$servers) {
  470. if (isset($_SESSION['user']['username'])) {
  471. $ext = TRUE;
  472. $user = new UserModel($_SESSION['user']['username']);
  473. $servers = $user->getServers();
  474. } else //qui la ricerca è interna, ma non ci sono utenti loggati
  475. $servers = $this->SRV->getDefaults();
  476. $a = array();
  477. foreach ($servers as $value) {
  478. if ($value != 'Spammers') {
  479. $k['name'] = $value;
  480. $k['url'] = $this->SRV->getUrl($value);
  481. $k['code'] = 0;
  482. $k['data'] = 0;
  483. array_push($a, $k);
  484. }
  485. }
  486. $servers = $a;
  487. return;
  488. }
  489. private function calcWeight($articolo, $term) {
  490. $arr = array();
  491. if (!is_string($articolo))
  492. $articolo = html_entity_decode($articolo[key($articolo)]['http://rdfs.org/sioc/ns#content'][0], ENT_QUOTES, 'utf-8');
  493. $html = str_get_html($articolo);
  494. foreach ($html->find("span[typeof=skos:Concept]") as $tag)
  495. $arr[$tag->about] = 0;
  496. foreach ($arr as $tag => $peso) {
  497. $termtmp = $term;
  498. $none = 0;
  499. $lenght = sizeof($termtmp);
  500. while ($none < $termtmp) {
  501. $term2search = '/' . implode('/', $termtmp);
  502. //echo $term2search; echo $tag; die();
  503. if (stristr($tag, $term2search)) {
  504. $avanzati = sizeof(explode('/', substr($tag, strlen($term2search)))) - 1;
  505. $totali = sizeof(explode('/', $tag)) - 1;
  506. $arr[$tag] = 1 - ($none / $lenght) - ($avanzati / $totali);
  507. break;
  508. } else {
  509. $none++;
  510. array_pop($termtmp);
  511. }
  512. }
  513. if ($arr[$tag] == 0)
  514. $arr[$tag] -= $none;
  515. }
  516. //anche qui ho aggiunto sto controllo per le related dall'esterno
  517. if (sizeof($arr)) {
  518. arsort($arr, SORT_NUMERIC);
  519. return current($arr);
  520. } else
  521. return -1; //qui, purtroppo, gestisco se qualcuno mi manda un post senza tag (i.e. da non credere!)
  522. }
  523. private function sortPost($limite) {
  524. if (/* !(isset($this->listaPost)) || */sizeof($this->listaPost) == 0)
  525. ErrorController::notFound("La ricerca non ha prodotto risultati.\n");
  526. if (!isset($this->toMerge))
  527. ErrorController::internalError();
  528. arsort($this->toMerge, SORT_DESC);
  529. if ($limite != "all")
  530. $toRender = array_slice($this->toMerge, 0, $limite, TRUE);
  531. else
  532. $toRender = $this->toMerge;
  533. $temp = array();
  534. foreach ($toRender as $k => $n)
  535. array_push($temp, $this->listaPost[$k]);
  536. $this->listaPost = $temp;
  537. }
  538. private function funcoolizer(&$badS) {
  539. if (!isset($_SESSION['user']['username']))
  540. return;
  541. $user = new UserModel($_SESSION['user']['username']);
  542. $listaServers = $user->getServers();
  543. $user->setServers(array_diff($listaServers, $badS));
  544. }
  545. private function displayPosts() {
  546. if (sizeof($this->listaPost) == 0)
  547. ErrorController::notFound("La ricerca non ha prodotto risultati.\n");
  548. if (isset($_SESSION['username']['user']))
  549. $XMLPosts = PostView::renderMultiplePost($this->listaPost, $_SESSION['username']['user']);
  550. $XMLPosts = PostView::renderMultiplePost($this->listaPost);
  551. $this->setContentType('xml');
  552. print $XMLPosts;
  553. }
  554. private function rcvFromEXTServer($server, $method) {
  555. $url = $this->SRV->getUrl($server);
  556. if ($url) {
  557. $this->request->connect_to($url . $method)
  558. ->accept(DooRestClient::XML)
  559. ->get();
  560. if ($this->request->isSuccess())
  561. return $this->request->result();
  562. else
  563. return $this->request->resultCode();
  564. }
  565. }
  566. private function rcvFromEXTServers(&$servers, $limite, $metodo) {
  567. if (count($servers) <= 0)
  568. return false;
  569. $hArr = array(); //handle array
  570. foreach ($servers as $k => $server) {
  571. $url = $server['url'] . 'searchserver/' . $limite . $metodo;
  572. //print "il mio url è: $url\n\r";
  573. $h = curl_init();
  574. curl_setopt($h, CURLOPT_URL, $url);
  575. curl_setopt($h, CURLOPT_HEADER, 0);
  576. curl_setopt($h, CURLOPT_RETURNTRANSFER, 1);
  577. curl_setopt($h, CURLOPT_HTTPHEADER, array(
  578. "Content-Type: application/xml; charset=utf-8"
  579. ));
  580. curl_setopt($h, CURLOPT_TIMEOUT, 5);
  581. array_push($hArr, $h);
  582. }
  583. $mh = curl_multi_init();
  584. foreach ($hArr as $k => $h)
  585. curl_multi_add_handle($mh, $h);
  586. $running = null;
  587. do
  588. curl_multi_exec($mh, $running); while ($running > 0);
  589. // get the result and save it in the result ARRAY
  590. foreach ($hArr as $k => $h) {
  591. $servers[$k]['data'] = curl_multi_getcontent($h);
  592. $servers[$k]['code'] = curl_getinfo($h, CURLINFO_HTTP_CODE);
  593. }
  594. //close all the connections
  595. foreach ($hArr as $k => $h)
  596. curl_multi_remove_handle($mh, $h);
  597. curl_multi_close($mh);
  598. return true;
  599. }
  600. private function parseEXTContent($toParse, $pathTerm = NULL) {
  601. // print ("\n\rL'xml che mi arriva:\n\r");
  602. // print_r($toParse);
  603. if (!($this->validateXML($toParse)))
  604. return;
  605. // print "\r\nValidato";
  606. $html = str_get_html($toParse);
  607. foreach ($html->find('article') as $articolo) {
  608. $node['articolo'] = $articolo->outertext;
  609. $node['peso'] = strtotime($articolo->content);
  610. if ($pathTerm) {
  611. $weight = $this->calcWeight($articolo->innertext, $pathTerm);
  612. //faccio sto controllo caso mai il post di cui ho calcolato il peso non c'entra nulla
  613. if ($weight < 0)
  614. continue;
  615. $node['peso'] += $this->salt * $weight;
  616. }
  617. array_push($this->listaPost, $node);
  618. array_push($this->toMerge, $node['peso']);
  619. }
  620. }
  621. //Usata per la fulltext
  622. private function parseEXTContent2($toParse, $listOfWords) {
  623. // print ("\n\rL'xml che mi arriva:\n\r");
  624. // print_r($toParse);
  625. if (!($this->validateXML($toParse)))
  626. return;
  627. // print "\r\nValidato";
  628. $html = str_get_html($toParse);
  629. foreach ($html->find('article') as $articolo) {
  630. $findTerm;
  631. $creato = $articolo->content;
  632. $myPeso = $this->pesoFullText($articolo, $listOfWords, $findTerm, $creato);
  633. if ($findTerm != 0) {
  634. $this->listaPost[] = array(
  635. "articolo" => $articolo->outertext,
  636. "peso" => $myPeso,
  637. );
  638. $this->toMerge[] = array(
  639. "peso" => $myPeso,
  640. );
  641. }
  642. }
  643. }
  644. //Usata per l'affinity
  645. private function parseEXTContent3($toParse, $arr, $tempoPostConfronto) {
  646. // print ("\n\rL'xml che mi arriva:\n\r");
  647. // print_r($toParse);
  648. if (!($this->validateXML($toParse)))
  649. return;
  650. // print "\r\nValidato";
  651. $html = str_get_html($toParse);
  652. foreach ($html->find('article') as $articolo) {
  653. // print "L'articolo é:\n\r" . $articolo->outertext . "\n\r";
  654. // print "blabla bla";
  655. $tempoPostConfrontato = strtotime($articolo->content);
  656. // print "Tempo dell'articolo che ricevo: $tempoPostConfrontato\n\r";
  657. // print "Tempo articolo: $tempoPostConfronto\n\r";
  658. $numDislike = $articolo->find('span[property=tweb:countDislike]', 0)->content;
  659. $numLike = $articolo->find('span[property=tweb:countLike]', 0)->content;
  660. $this->pesoAffinity($articolo->outertext, $arr, $tempoPostConfrontato, $tempoPostConfronto, $numDislike, $numLike);
  661. }
  662. }
  663. private function pesoAffinity($articolo, $arr, $tempoPostConfrontato, $tempoPostConfronto, $numDislike, $numLike, $bonus= 0, $toCheck=null) {
  664. // print "\n\rL'articolo che considero:\n\r";
  665. // print_r($articolo);
  666. // print "\n\r";
  667. foreach ($arr as $key => $peso) {
  668. $pathTerm = explode('/', $key);
  669. unset($pathTerm[0]);
  670. // print "Stampo il pathterm come array:\n\r";
  671. // print_r($pathTerm);
  672. // print "\n\r";
  673. $arr[$key] = $this->calcWeight($articolo, $pathTerm);
  674. // print "Il peso per $key è: $arr[$key]\n\r";
  675. }
  676. // print "il peso totale per questo articolo è:" . array_sum($arr) . "\n\r";
  677. $sumPeso = array_sum($arr) + $bonus;
  678. //Se il peso è positivo allora considero l'articolo
  679. if ($sumPeso > 0 && $tempoPostConfrontato != $tempoPostConfronto) {
  680. //$tempoPostConfrontato = strtotime($pID[key($pID)]["http://purl.org/dc/terms/created"][0]);
  681. //$tempoPostConfronto = strtotime($timeOfPost);
  682. if ($tempoPostConfrontato > $tempoPostConfronto)
  683. $realPeso = ($sumPeso * 1000) / (($tempoPostConfrontato - $tempoPostConfronto) / 3600);
  684. else
  685. $realPeso = ( $sumPeso * 1000) / (($tempoPostConfronto - $tempoPostConfrontato) / 3600);
  686. // $numDislike = $pID[key($pID)]["http://vitali.web.cs.unibo.it/vocabulary/countDislike"][0];
  687. // $numLike = $pID[key($pID)]["http://vitali.web.cs.unibo.it/vocabulary/countLike"][0];
  688. if ($numDislike > $numLike)
  689. $realPeso = $realPeso / ($numDislike - $numLike);
  690. else if ($numLike > $numDislike)
  691. $realPeso = $realPeso * ($numLike - $numDislike);
  692. if ($toCheck == null) {
  693. $this->listaPost[] = array(
  694. "articolo" => $articolo,
  695. "peso" => round($realPeso, 5),
  696. );
  697. $this->toMerge[] = array(
  698. "peso" => round($realPeso, 5),
  699. );
  700. } else {
  701. foreach ($this->listaPost as $key => $art) {
  702. // print "Stampo per capire che succede\n\r";
  703. // print_r ($art['articolo']);die();
  704. if (is_array($art['articolo'])) {
  705. // print "\n\rSono un array\n\r";
  706. // print "la key è". key($art['articolo'])."\n\r";
  707. // print "L'articolo è $toCheck\n\r";
  708. if (key($art['articolo']) == "spam:$toCheck") {
  709. // print "siamo uguali ovviamente e il mio peso è:" .round($realPeso, 5);
  710. // print "\n\rMentre il peso attuale è ". $art['peso'];
  711. $this->listaPost[$key]['peso'] = round($realPeso, 5);
  712. $this->toMerge[$key]['peso'] = round($realPeso, 5);
  713. return;
  714. }
  715. } else {
  716. if (strstr("about=\"$toCheck\"", $art['articolo'])) {
  717. $this->listaPost[$key]['peso'] = round($realPeso, 5);
  718. $this->toMerge[$key]['peso'] = round($realPeso, 5);
  719. return;
  720. }
  721. }
  722. }
  723. $this->listaPost[] = array(
  724. "articolo" => $articolo,
  725. "peso" => round($realPeso, 5),
  726. );
  727. $this->toMerge[] = array(
  728. "peso" => round($realPeso, 5),
  729. );
  730. }
  731. }
  732. }
  733. private function pesoFullText($articolo, $listOfWords, &$findTerm, $creato) {
  734. $content = $articolo->plaintext;
  735. $findTerm = 0;
  736. $matchEsatto = 0;
  737. $matchParziale = 0;
  738. $wordInContent = $this->utf8_str_word_count($content, 1);
  739. //print_r($wordInContent);
  740. foreach ($listOfWords as $indice => $word) {
  741. $find = false;
  742. if (strlen((string) $word) > 1) {
  743. if (stristr((string) $word, "'") !== false) {
  744. $word = explode("'", (string) $word);
  745. $word = $word[1];
  746. }
  747. //print "Sto cercando questo termine: $word\n\r";
  748. foreach ($wordInContent as $indice => $thisWord) {
  749. if (stristr((string) $thisWord, "'") !== false) {
  750. $thisWord = explode("'", (string) $thisWord);
  751. $thisWord = $thisWord[1];
  752. }
  753. //print "Sto controllando questo termine: $thisWord\n\r";
  754. if ($thisWord == $word) {
  755. // print ("trovato il match di $word con $thisWord\n\r");
  756. $matchEsatto++;
  757. $find = true;
  758. //print ("numero di matchEsatti: $matchEsatto\n\r");
  759. } else if (stristr((string) $thisWord, (string) $word)) {
  760. //print ("trovata l'occorrenza di $word in $thisWord\n\r");
  761. $matchParziale++;
  762. $find = true;
  763. //print ("numero di matchParziali: $matchParziale\n\r");
  764. }
  765. }
  766. if ($find)
  767. $findTerm++;
  768. }
  769. }
  770. //print ("totale termini trovati: $findTerm\n\r");
  771. if ($findTerm != 0) {
  772. // print ("$matchEsatto\n\r");
  773. // print ("$matchParziale\n\r");
  774. // print (time());
  775. // print (strtotime($pID[key($pID)]["http://purl.org/dc/terms/created"][0]));
  776. $tempo = $this->time - (strtotime($creato) - 3600);
  777. //print ("Differenza di tempo è:$tempo\n\r");
  778. //$peso = ((($matchEsatto + ($matchParziale * 0.5)) * 3600000) * ($findTerm * $findTerm)) / $tempo;
  779. $peso = ((($matchEsatto + ($matchParziale * 0.5)) * 3600000) / $tempo) * ($findTerm * $findTerm);
  780. // print $peso;
  781. // print "Termini trovati $findTerm";
  782. return round($peso, 5);
  783. }
  784. }
  785. private function rcvFromINTServer($usr, $countPost) {
  786. $post = new PostModel();
  787. $postIDs = $usr->getPosts($countPost);
  788. $posts = $post->getPostArray($postIDs);
  789. foreach ($posts as $post) {
  790. $nodo['articolo'] = $post;
  791. $nodo['peso'] = strtotime($post[key($post)]['http://purl.org/dc/terms/created'][0]);
  792. array_push($this->listaPost, $nodo);
  793. array_push($this->toMerge, $nodo['peso']);
  794. }
  795. }
  796. function utf8_str_word_count($string, $format = 0, $charlist = null) {
  797. $result = array();
  798. if (preg_match_all('~[\p{L}\p{Mn}\p{Pd}\'\x{2019}' . preg_quote($charlist, '~') . ']+~u', $string, $result) > 0) {
  799. if (array_key_exists(0, $result) === true)
  800. $result = $result[0];
  801. }
  802. if ($format == 0) {
  803. $result = count($result);
  804. }
  805. if (is_array($result)) {
  806. foreach ($result as $k => $v) {
  807. $result[$k] = strtolower((string) $v);
  808. $temp = stristr((string) $v, "'");
  809. if ($temp != false)
  810. $result[$k] = $temp;
  811. }
  812. }
  813. return $result;
  814. }
  815. private function validateXML($toParse) {
  816. if ($toParse == "")
  817. return false;
  818. libxml_use_internal_errors(true);
  819. $xdoc = new DomDocument;
  820. $xmlschema = 'data/archive.xsd';
  821. $xdoc->loadXML($toParse);
  822. if ($xdoc->schemaValidate($xmlschema)) {
  823. return true;
  824. }
  825. /* GESTIONE ERRORI SULLA VALIDAZIONE
  826. * Decommentare in caso di debug
  827. */
  828. // $errors = libxml_get_errors();
  829. // if (empty($errors)) {
  830. // return true;
  831. // }
  832. // print "lista errori\n\r";
  833. // print_r ($errors);
  834. // print "\n\r";
  835. // $error = $errors[0];
  836. //
  837. //// $lines = explode("\r", $toParse);
  838. //// $line = $lines[($error->line) - 1];
  839. //
  840. // print "\n\r" . $error->message . ' at line ' . $error->line . ':<br />'. "\n\r";
  841. return false;
  842. }
  843. private function processReleatedPostsAffinity($arr, $tempoPostConfronto) {
  844. if ($this->respamOf != null) {
  845. $tempArray = explode('/', $this->respamOf);
  846. $srv = $tempArray[1];
  847. $usr = $tempArray[2];
  848. $pid = $tempArray[3];
  849. if ($srv != "Spammers") {
  850. $url = $this->SRV->getUrl($srv);
  851. if ($url) {
  852. //print "La richiesta è:".$url."postserver/$usr/$pid\n\r";
  853. $this->request->connect_to($url . "postserver/$usr/$pid")
  854. ->accept(DooRestClient::HTML)
  855. ->get();
  856. if ($this->request->isSuccess()) {
  857. $articolo = str_get_html($this->request->result());
  858. $tempoPostConfrontato = strtotime($articolo->content);
  859. // print "Tempo dell'articolo che ricevo: $tempoPostConfrontato\n\r";
  860. // print "Tempo articolo: $tempoPostConfronto\n\r";
  861. $numDislike = $articolo->find('span[property=tweb:countDislike]', 0)->content;
  862. $numLike = $articolo->find('span[property=tweb:countLike]', 0)->content;
  863. $this->pesoAffinity($articolo->outertext, $arr, $tempoPostConfrontato, $tempoPostConfronto, $numDislike, $numLike, 5, $this->respamOf);
  864. }
  865. }
  866. } else {
  867. $post = new PostModel();
  868. $pID = 'spam:/' . implode('/', array($srv, $usr, $pid));
  869. if ($post->postExist($pID)) {
  870. $art = $post->getPost($pID);
  871. $tempoPostConfrontato = strtotime($art[key($art)]["http://purl.org/dc/terms/created"][0]);
  872. $numDislike = $art[key($art)]["http://vitali.web.cs.unibo.it/vocabulary/countDislike"][0];
  873. $numLike = $art[key($art)]["http://vitali.web.cs.unibo.it/vocabulary/countLike"][0];
  874. $this->pesoAffinity($art, $arr, $tempoPostConfrontato, $tempoPostConfronto, $numDislike, $numLike, 5, $this->respamOf);
  875. }
  876. }
  877. } else if ($this->replyOf != null) {
  878. $tempArray = explode('/', $this->replyOf);
  879. $srv = $tempArray[1];
  880. $usr = $tempArray[2];
  881. $pid = $tempArray[3];
  882. if ($srv != "Spammers") {
  883. $url = $this->SRV->getUrl($srv);
  884. if ($url) {
  885. //print "La richiesta è:".$url."postserver/$usr/$pid\n\r";
  886. $this->request->connect_to($url . "postserver/$usr/$pid")
  887. ->accept(DooRestClient::HTML)
  888. ->get();
  889. if ($this->request->isSuccess()) {
  890. $articolo = str_get_html($this->request->result());
  891. $tempoPostConfrontato = strtotime($articolo->content);
  892. // print "Tempo dell'articolo che ricevo: $tempoPostConfrontato\n\r";
  893. // print "Tempo articolo: $tempoPostConfronto\n\r";
  894. $numDislike = $articolo->find('span[property=tweb:countDislike]', 0)->content;
  895. $numLike = $articolo->find('span[property=tweb:countLike]', 0)->content;
  896. $this->pesoAffinity($articolo->outertext, $arr, $tempoPostConfrontato, $tempoPostConfronto, $numDislike, $numLike, 5, $this->replyOf);
  897. }
  898. }
  899. } else {
  900. $post = new PostModel();
  901. $pID = 'spam:/' . implode('/', array($srv, $usr, $pid));
  902. if ($post->postExist($pID)) {
  903. $art = $post->getPost($pID);
  904. $tempoPostConfrontato = strtotime($art[key($art)]["http://purl.org/dc/terms/created"][0]);
  905. $numDislike = $art[key($art)]["http://vitali.web.cs.unibo.it/vocabulary/countDislike"][0];
  906. $numLike = $art[key($art)]["http://vitali.web.cs.unibo.it/vocabulary/countLike"][0];
  907. $this->pesoAffinity($art, $arr, $tempoPostConfrontato, $tempoPostConfronto, $numDislike, $numLike, 5, $this->replyOf);
  908. }
  909. }
  910. }
  911. if (sizeof($this->listOfReply) > 0) {
  912. foreach ($this->listOfReply as $artReply) {
  913. $tempArray = explode('/', $artReply);
  914. $srv = $tempArray[1];
  915. $usr = $tempArray[2];
  916. $pid = $tempArray[3];
  917. if ($srv != "Spammers") {
  918. $url = $this->SRV->getUrl($srv);
  919. if ($url) {
  920. //print "La richiesta è:".$url."postserver/$usr/$pid\n\r";
  921. $this->request->connect_to($url . "postserver/$usr/$pid")
  922. ->accept(DooRestClient::HTML)
  923. ->get();
  924. if ($this->request->isSuccess()) {
  925. $articolo = str_get_html($this->request->result());
  926. $tempoPostConfrontato = strtotime($articolo->content);
  927. // print "Tempo dell'articolo che ricevo: $tempoPostConfrontato\n\r";
  928. // print "Tempo articolo: $tempoPostConfronto\n\r";
  929. $numDislike = $articolo->find('span[property=tweb:countDislike]', 0)->content;
  930. $numLike = $articolo->find('span[property=tweb:countLike]', 0)->content;
  931. $this->pesoAffinity($articolo->outertext, $arr, $tempoPostConfrontato, $tempoPostConfronto, $numDislike, $numLike, 2, $artReply);
  932. }
  933. }
  934. } else {
  935. $post = new PostModel();
  936. $pID = 'spam:/' . implode('/', array($srv, $usr, $pid));
  937. if ($post->postExist($pID)) {
  938. $art = $post->getPost($pID);
  939. $tempoPostConfrontato = strtotime($art[key($art)]["http://purl.org/dc/terms/created"][0]);
  940. $numDislike = $art[key($art)]["http://vitali.web.cs.unibo.it/vocabulary/countDislike"][0];
  941. $numLike = $art[key($art)]["http://vitali.web.cs.unibo.it/vocabulary/countLike"][0];
  942. $this->pesoAffinity($art, $arr, $tempoPostConfrontato, $tempoPostConfronto, $numDislike, $numLike, 2, $artReply);
  943. }
  944. }
  945. }
  946. }
  947. }
  948. private function processReleatedPostsFullText($listOfWords) {
  949. if ($this->replyOf != null) {
  950. $tempArray = explode('/', $this->replyOf);
  951. $srv = $tempArray[1];
  952. $usr = $tempArray[2];
  953. $pid = $tempArray[3];
  954. if ($srv != "Spammers") {
  955. $url = $this->SRV->getUrl($srv);
  956. if ($url) {
  957. //print "La richiesta è:".$url."postserver/$usr/$pid\n\r";
  958. $this->request->connect_to($url . "postserver/$usr/$pid")
  959. ->accept(DooRestClient::HTML)
  960. ->get();
  961. if ($this->request->isSuccess()) {
  962. $articolo = str_get_html($this->request->result());
  963. $creato = strtotime($articolo->content);
  964. $findTerm;
  965. $myPeso = $this->pesoFullText($articolo->outertext, $listOfWords, $findTerm, $creato) + 100;
  966. $update = false;
  967. foreach ($this->listaPost as $key => $art) {
  968. if (is_array($art['articolo'])) {
  969. // print "\n\rSono un array\n\r";
  970. // print "la key è". key($art['articolo'])."\n\r";
  971. // print "L'articolo è $toCheck\n\r";
  972. if (key($art['articolo']) == "spam:$this->replyOf") {
  973. // print "siamo uguali ovviamente e il mio peso è:" .round($realPeso, 5);
  974. // print "\n\rMentre il peso attuale è ". $art['peso'];
  975. $this->listaPost[$key]['peso'] = $myPeso;
  976. $this->toMerge[$key]['peso'] = $myPeso;
  977. $update = true;
  978. }
  979. } else {
  980. if (strstr("about=\"$this->replyOf\"", $art['articolo'])) {
  981. $this->listaPost[$key]['peso'] = $myPeso;
  982. $this->toMerge[$key]['peso'] = $myPeso;
  983. $update = true;
  984. }

Large files files are truncated, but you can click here to view the full file