PageRenderTime 48ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/library/UrlTube/Bot.php

https://bitbucket.org/baruffaldi/webapp-urltube
PHP | 480 lines | 361 code | 94 blank | 25 comment | 79 complexity | 977f674abef17113e5f33ec5eecc9a1c MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.0, MIT
  1. <?php
  2. require 'Zend' . DIRECTORY_SEPARATOR . 'Http' . DIRECTORY_SEPARATOR . 'Client.php';
  3. class Zend_UrlTube_Client
  4. {
  5. public $url = NULL;
  6. public $protocol = NULL;
  7. public $service = NULL;
  8. public $mimetype = NULL;
  9. public $mime = NULL;
  10. public $type = NULL;
  11. public $remoteHostname = NULL;
  12. public $remoteIp = NULL;
  13. public $remotePort = NULL;
  14. public $localHostname = NULL;
  15. public $localIp = NULL;
  16. public $localPort = NULL;
  17. public $netStatus = NULL;
  18. public $serviceStatus = NULL;
  19. public $serviceStatusText = NULL;
  20. public $title = NULL;
  21. public $description = NULL;
  22. public $html_options = array( );
  23. public $html_handler = NULL;
  24. public $defaultTestMaxRedirects = 10;
  25. public $defaultTestTimeout = 3;
  26. public $table_url = NULL;
  27. public function Zend_UrlTube_Client( $url )
  28. {
  29. global $_SITE;
  30. $data = $_SITE['UrlTube']['Parser']->getDataByURI( $url );
  31. $this->url = $url;
  32. $this->protocol = $data[1];
  33. $this->table_url = $_SITE['config']['handler']->sil->database->table_url;
  34. $this->service = $_SITE['UrlTube']['Parser']->getServiceByURI( $this->url );
  35. }
  36. public function scan( $type = NULL, $url )
  37. {
  38. global $_SITE;
  39. switch ( $type )
  40. {
  41. default:
  42. case NULL:
  43. if ( defined( '__DEBUG__' ) && __DEBUG__ ) var_dump( 'SCAN ENDED WITH FATAL ERROR, NO TYPE SPECIFIED' );
  44. return -2;
  45. break;
  46. case 'web':
  47. $uri = new UrlTube_Parse();
  48. $uri = $uri->getDataByURI( $url['url'] );
  49. $filename = array_pop( explode( '/', $url['url'] ) );
  50. $name = explode( '.', $filename );
  51. array_pop( $name );
  52. $name = str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( implode( '.', $name ) ) ) ) );
  53. $desc = str_replace( '/', ' ', str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( $uri[4] ) ) ) ) );
  54. $mimetype = $url['mimetype'];
  55. $query = $_SITE['database']['handler']->select( )
  56. ->from( $_SITE['config']['handler']->sil->database->table_scans, array( 'id' ) )
  57. ->where( "id_url = ?", $url['id_url'] )
  58. ->order( array( 'scan_date DESC' ) )
  59. ->limit( 1 );
  60. $handle = $query->query();
  61. $lastScan = $handle->fetchAll();
  62. $image = 'Y';
  63. if ( is_array( $lastScan[0] ) )
  64. $_SITE['database']['handler']->update( $_SITE['config']['handler']->sil->database->table_scans,
  65. $scan_results,
  66. "id_url = '{$url['id_url']}'" );
  67. else
  68. $_SITE['database']['handler']->insert( $_SITE['config']['handler']->sil->database->table_scans,
  69. array_merge( array( 'id_url' => $url['id_url'] ), $scan_results ) );
  70. break;
  71. case 'image':
  72. if ( defined( '__DEBUG__' ) && __DEBUG__ ) var_dump( 'SCAN BEGIN' );
  73. $uri = new UrlTube_Parse();
  74. $uri = $uri->getDataByURI( $url['url'] );
  75. $filename = array_pop( explode( '/', $url['url'] ) );
  76. $name = explode( '.', $filename );
  77. array_pop( $name );
  78. $name = str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( implode( '.', $name ) ) ) ) );
  79. $desc = str_replace( '/', ' ', str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( $uri[4] ) ) ) ) );
  80. $info = getimagesize( $url['url'] );
  81. $resolution = "{$info[0]}x{$info[1]}";
  82. $mimetype = ( ! empty( $info['mimetype'] ) ) ? $info['mimetype'] : $url['mimetype'];
  83. $bits = $info['bits'];
  84. $channels = $info['channels'];
  85. $codec = array_pop( $_SITE['UrlTube']['Parse']->getDataByMimetype( $mimetype ) );
  86. $imageScore = new ImageFilter();
  87. $imageS = $imageScore->GetScore( $url['url'] );
  88. if ( ! file_exists( $_SITE['config']['fs']['path_public'] . DIRECTORY_SEPARATOR . 'images' . DIRECTORY_SEPARATOR . 'thumbnails' . DIRECTORY_SEPARATOR . md5( $url['id_url'] ) . '.jpg' ) )
  89. {
  90. $thumb = new UrlTube_Thumb();
  91. $thumb->getThumb( 'image', $url );
  92. }
  93. $image = ( file_exists( $_SITE['config']['fs']['path_public'] . DIRECTORY_SEPARATOR . 'images' . DIRECTORY_SEPARATOR . 'thumbnails' . DIRECTORY_SEPARATOR . md5( $url['id_url'] ) . '.jpg' ) ) ? 'Y' : 'N';
  94. $query = $_SITE['database']['handler']->select( )
  95. ->from( $_SITE['config']['handler']->sil->database->table_scans, array( 'id' ) )
  96. ->where( "id_url = ?", $url['id_url'] )
  97. ->order( array( 'scan_date DESC' ) )
  98. ->limit( 1 );
  99. $handle = $query->query();
  100. $lastScan = $handle->fetchAll();
  101. $scan_results = array( 'bits' => $bits, 'channels' => $channels, 'codec' => $codec, 'scan_date' => time(), 'title' => $name, 'description' => $desc, 'tags' => html_entity_decode( implode( ',', explode( ' ', $name ) ).implode( ',', explode( ' ', $desc ) ) ), 'resolution' => $resolution, 'image' => $image, 'xrate' => $imageS );
  102. if ( is_array( $lastScan[0] ) )
  103. $_SITE['database']['handler']->update( $_SITE['config']['handler']->sil->database->table_scans,
  104. $scan_results,
  105. "id_url = '{$url['id_url']}'" );
  106. else
  107. $_SITE['database']['handler']->insert( $_SITE['config']['handler']->sil->database->table_scans,
  108. array_merge( array( 'id_url' => $url['id_url'] ), $scan_results ) );
  109. break;
  110. }
  111. if ( ! is_null( $scan_results ) ) return $scan_results;
  112. else return FALSE;
  113. }
  114. public function scanStatus( )
  115. {
  116. global $_SITE;
  117. if ( ! is_string( $this->url ) ) return FALSE;
  118. if ( is_null( $this->test_handler ) ) $this->doClient( );
  119. if ( is_null( $this->netStatus ) ) $this->ping( $this->url );
  120. $return['net_status'] = $this->netStatus;
  121. switch( $this->protocol )
  122. {
  123. default:
  124. case 'http' || 'https' || 'ftp' || 'rtsp':
  125. $return['date'] = $this->test_handler->getLastResponse()->getHeader( 'last-modified' );
  126. if ( is_null( $return['date'] ) )
  127. $return['date'] = $this->test_handler->getLastResponse()->getHeader( 'date' );
  128. $return['size'] = $this->test_handler->getLastResponse()->getHeader( 'content-length' );
  129. $return['update'] = time( );
  130. $return['code'] = $this->test_handler->getLastResponse( )->getStatus( );
  131. $return['text'] = $this->test_handler->getLastResponse( )->responseCodeAsText( $this->test_handler->getLastResponse( )->getStatus( ) );
  132. $mimetype = explode( ';', $this->test_handler->getLastResponse()->getHeader( 'content-type' ) );
  133. $return['mimetype'] = $mimetype[0];
  134. $mimetype = UrlTube_Parse::getDataByMimetype( $return['mimetype'] );
  135. $return['mime'] = $mimetype[1];
  136. $return['type'] = $mimetype[2];
  137. return $return;
  138. break;
  139. }
  140. }
  141. public function scanStatusResult( $url, $status = array( ) )
  142. {
  143. global $_SITE;
  144. // *** SCAN: begin
  145. // ******************
  146. switch( $status['code'] )
  147. {
  148. default:
  149. return FALSE;
  150. break;
  151. case '200':
  152. case '201':
  153. case '202':
  154. case '203':
  155. case '204':
  156. case '205':
  157. case '206':
  158. case '300':
  159. case '302':
  160. case '303':
  161. case '304':
  162. case '411':
  163. case '416':
  164. $status['code'] = '200';
  165. break;
  166. }
  167. $return['netStatus'] = trim( $status['net_status'] );
  168. $return['statusUpdate'] = trim( $status['update'] );
  169. $return['statusCode'] = trim( $status['code'] );
  170. $return['statusText'] = trim( $status['text'] );
  171. $return['modifiedDate'] = ( ! is_null( $status['date'] ) ) ? trim( $status['date'] ) : NULL;
  172. $return['size'] = ( ! is_null( $status['size'] ) ) ? trim( $status['size'] ) : NULL;
  173. $return['mimetype'] = ( ! is_null( $status['mimetype'] ) ) ? trim( $status['mimetype'] ) : NULL;
  174. $return['mime'] = ( ! is_null( $status['mime'] ) ) ? trim( $status['mime'] ) : NULL;
  175. $return['type'] = ( ! is_null( $status['type'] ) ) ? trim( $status['type'] ) : NULL;
  176. if ( $status['net_status'] != 'Y' || $this->netStatus != 'Y' ) return FALSE;
  177. if ( UrlTube::getURLIdByURL( $this->url ) )
  178. $_SITE['database']['handler']->update( $this->table_url, array('url' => $url,
  179. 'net_status' => $return['netStatus'],
  180. 'status_code' => $return['statusCode'],
  181. 'status_text' => $return['statusText'],
  182. 'status_update' => $return['statusUpdate'],
  183. 'modified_date' => $return['modifiedDate'],
  184. 'creation_date' => time(),
  185. 'size' => $return['size'],
  186. 'mimetype' => $return['mimetype'] ),
  187. "url = '$url'" );
  188. // Check if exist
  189. else $_SITE['database']['handler']->insert( $this->table_url, array('url' => $url,
  190. 'net_status' => $return['netStatus'],
  191. 'status_code' => $return['statusCode'],
  192. 'status_text' => $return['statusText'],
  193. 'status_update' => $return['statusUpdate'],
  194. 'creation_date' => time(),
  195. 'modified_date' => $return['modifiedDate'],
  196. 'size' => $return['size'],
  197. 'mimetype' => $return['mimetype'] ) );
  198. // *** SCAN: ends
  199. // ******************
  200. return $return;
  201. }
  202. public function ping( $url, $host = NULL )
  203. {
  204. global $_SITE;
  205. if ( ! is_null( $host ) ) exec( "ping -c 1 $host", $c );
  206. else {
  207. $parser = new UrlTube_Parse();
  208. $host = $parser->getDataByURI( $url );
  209. exec( "ping -c 1 {$host[2]}.{$host[3]}", $c );
  210. }
  211. if ( count( $c ) >= 3 ) {
  212. if ( is_object( $this ) )
  213. {
  214. $this->netStatus = 'Y';
  215. $this->result_ping = $c;
  216. }
  217. return $c;
  218. } else {
  219. if ( is_object( $this ) ) $this->netStatus = 'N';
  220. return FALSE;
  221. }
  222. }
  223. public function doClient( )
  224. {
  225. global $_SITE;
  226. if ( ! is_string( $this->url ) ) return FALSE;
  227. if ( ! is_null( $this->test_handler ) ) return TRUE;
  228. switch( $this->protocol )
  229. {
  230. default:
  231. return -1;
  232. break;
  233. case 'http' || 'https' || 'ftp' || 'rtsp':
  234. $this->test_options = array( 'maxredirects' => $this->defaultTestMaxRedirects,
  235. 'keepalive' => FALSE,
  236. 'useragent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3; ( UrlTube-Bot :: urltube.net )',
  237. 'storeresponse' => TRUE,
  238. 'timeout' => 7 );
  239. // IF TIMEOUT NO EXCEPTION!!! Message:Unable to read response, or response is empty http://i79.photobucket.com/albums/j131/moochi-/japanese%20stars/06-1.jpg
  240. $this->test_handler = new Zend_Http_Client( $this->url, $this->test_options );
  241. $this->test_handler->setHeaders( 'Accept-Encoding', 'chunked' );
  242. if ( $_SERVER['term'] == 'xterm' ) $this->test_handler->setHeaders( 'Range', 'bytes' )
  243. ->setHeaders( 'Request-Range', 'bytes=1-32' );
  244. $this->test_handler->request( );
  245. return TRUE;
  246. break;
  247. }
  248. }
  249. public function getConnectionInfo( )
  250. {
  251. $content = $this->test_handler->getLastResponse()->getHeader( 'content-disposition' );
  252. $files = array_pop( explode( 'filename=', $content ) );
  253. $urlFile = array_pop( explode( '/', $this->url ) );
  254. $urlFile = array_pop( array_reverse( explode( '?', $urlFile ) ) );
  255. $urlFile = explode( '.', $urlFile );
  256. array_pop( $urlFile );
  257. $urlFile = implode( $urlFile );
  258. $size = ( $size / 1000 >= 1 ) ? (int) ( $size / 1000 ) . 'Kb' : NULL;
  259. //$return['title'] = strtoupper( $this->type ) . ': ';
  260. $return['title'] = ( empty( $files ) ) ? $urlFile : $files;
  261. $return['description'] = "$update_time {$this->mime} $size";
  262. return $return;
  263. }
  264. public function autodetectInfo( $url )
  265. {
  266. global $_SITE;
  267. if ( count( $this->twins_counter ) <= 0 &&
  268. $return['type'] == 'text' )
  269. {
  270. if ( 0 ) $offset = 8; // Se è youTube o un servizio con enhancedAutoDetect
  271. elseif ( $return['type'] == 'text' ) $offset = 256; // altrimenti si usa il normale autoDetect
  272. elseif ( $return['type'] == 'audio' ) $offset = 16;
  273. else $offset = 128;
  274. $this->test_handler = new Zend_test_Client( $this->url, $this->test_options );
  275. $this->test_handler->setHeaders( 'Accept-Encoding', 'deflate' )
  276. ->setHeaders( 'Range', 'bytes' )
  277. ->setHeaders( 'Request-Range', 'bytes=1-' . $offset );
  278. $this->test_handler->request( );
  279. }
  280. //$this->statusText = $this->test_handler->getLastResponse()->getMessage();
  281. if ( ! is_string( $url ) ) return FALSE;
  282. if ( is_null( $this->test_handler ) ) $this->doClientTest( $url );
  283. //$this->test_handler = new Zend_test_Client( 'http://sil.baruffaldi.info', $this->test_options );
  284. if ( $this->test_handler->getLastResponse()->getHeader( 'Content-Encoding' ) == 'gzip' ) $site = $this->test_handler->getLastResponse()->decodeGzip( $site );
  285. elseif ( $this->test_handler->getLastResponse()->getHeader( 'Content-Encoding' ) == 'deflate' ) $site = $this->test_handler->getLastResponse()->decodeDeflate( $site );
  286. elseif ( $this->test_handler->getLastResponse()->getHeader( 'Content-Encoding' ) == 'chunked' ) $site = $this->test_handler->getLastResponse()->decodeChunkedBody( $site );
  287. $mimetype = explode( ';', $this->test_handler->getLastResponse()->getHeader( 'Content-Type' ) );
  288. $pattern_mimetype = '^<meta.*http-equiv=..ontent-.ype.*content=.(.*).*>^';
  289. $pattern_mimetype2 = '^<meta.*content=.(.*).*http-equiv=..ontent-.ype.*>^';
  290. if ( preg_match( '^.*\/.*^', $mimetype[0] ) ) $mimetype = $mimetype[0];
  291. else preg_match( $pattern_mimetype, $site, $mimetype );
  292. if ( empty( $mimetype[1] ) ) preg_match( $pattern_mimetype, $site, $mimetype );
  293. if ( empty( $mimetype[1] ) ) preg_match( $pattern_mimetype2, $site, $mimetype );
  294. if ( is_array( $mimetype ) )
  295. {
  296. if ( preg_match( '^.*\/.*;.*^', $mimetype[1] ) )
  297. {
  298. $tmp = explode( ';', $mimetype[1] );
  299. $mimetype = trim( $tmp[0] );
  300. }
  301. }
  302. $this->setMimetype( $mimetype );
  303. /**
  304. * TEXT/*
  305. */
  306. if ( $this->type == 'text' )
  307. {
  308. // Se è youtube o un altro sito con la compatibilita' estesa per la descrizione evitare il piu' possibile il lavoro
  309. $site = $this->test_handler->getLastResponse()->getRawBody();
  310. $pattern_title = '^<title>(.*)<\/title>^';
  311. if ( $this->mime == 'html' || substr( $mimetype, 5 ) == 'xhtml' || substr( $mimetype, 5 ) == 'xml' )
  312. {
  313. $pattern_body = '^<body.*>(.*)<\/body>^';
  314. $pattern_description = '^<meta.*name=.description.*content=.(.*).*>^';
  315. $pattern_description2 = '^<meta.*content=.(.*).*name=.description.*>^';
  316. preg_match( $pattern_title, $site, $title );
  317. preg_match( $pattern_description, $site, $description );
  318. if ( empty( $description[1] ) ) preg_match( $pattern_description2, $site, $description );
  319. if ( empty( $description[1] ) ) {
  320. preg_match( $pattern_body, $site, $body );
  321. $description[1] = $body[0];
  322. }
  323. $description = $description[1];
  324. $title = $title[1];
  325. } else {
  326. $title = substr( $site, 0, 64 );
  327. $description = substr( $site, 0, 256 );;
  328. }
  329. }
  330. /**
  331. * IMAGES/*
  332. */
  333. elseif ( $this->type == 'image' )
  334. {
  335. $connInfo = $this->getConnectionInfo();
  336. $title = $connInfo['title'];
  337. $description = $connInfo['description'];
  338. }
  339. /**
  340. * AUDIO/*
  341. */
  342. elseif ( $this->type == 'audio' )
  343. {
  344. $connInfo = $this->getConnectionInfo();
  345. $title = $connInfo['title'];
  346. $description = $connInfo['description'];
  347. }
  348. /**
  349. * VIDEO/*
  350. */
  351. elseif ( $this->type == 'video' )
  352. {
  353. $connInfo = $this->getConnectionInfo();
  354. $title = $connInfo['title'];
  355. $description = $connInfo['description'];
  356. }
  357. /**
  358. * APPLICATION/* ( pdf, gz, zip, tar, tgz, doc???, rtf??? )
  359. */
  360. elseif ( $this->type == 'application' )
  361. {
  362. if ( $this->mime == 'pdf' ) {
  363. $connInfo = $this->getConnectionInfo();
  364. $title = $connInfo['title'];
  365. $description = $connInfo['description'];
  366. } else if ( $this->mime == 'rtf' ) {
  367. $connInfo = $this->getConnectionInfo();
  368. $title = $connInfo['title'];
  369. $description = $connInfo['description'];
  370. } else if ( $this->mime == 'doc' ) {
  371. $connInfo = $this->getConnectionInfo();
  372. $title = $connInfo['title'];
  373. $description = $connInfo['description'];
  374. }
  375. } else {
  376. $title = "";
  377. $description = "";
  378. }
  379. $return = array( 'title' => trim( $title ),
  380. 'description' => trim( strip_tags( $description ) ),
  381. 'mimetype' => trim( $mimetype ),
  382. 'status_code' => trim( $this->statusCode ),
  383. 'status_text' => trim( $this->statusText ) );
  384. return $return;
  385. }
  386. }