PageRenderTime 167ms CodeModel.GetById 60ms app.highlight 50ms RepoModel.GetById 51ms app.codeStats 0ms

/library/UrlTube/Bot.php

https://bitbucket.org/baruffaldi/webapp-urltube
PHP | 480 lines | 361 code | 94 blank | 25 comment | 79 complexity | 977f674abef17113e5f33ec5eecc9a1c MD5 | raw file
  1<?php
  2
  3require 'Zend' . DIRECTORY_SEPARATOR . 'Http' . DIRECTORY_SEPARATOR . 'Client.php';
  4
  5class Zend_UrlTube_Client
  6{
  7     public $url = NULL;
  8     public $protocol = NULL;
  9     public $service = NULL;
 10     public $mimetype = NULL;
 11     public $mime = NULL;
 12     public $type = NULL;
 13     
 14     public $remoteHostname = NULL;
 15     public $remoteIp = NULL;
 16     public $remotePort = NULL;
 17     public $localHostname = NULL;
 18     public $localIp = NULL;
 19     public $localPort = NULL;
 20     
 21     public $netStatus = NULL;
 22     public $serviceStatus = NULL;
 23     public $serviceStatusText = NULL;
 24     
 25     public $title = NULL;
 26     public $description = NULL;
 27     
 28     public $html_options = array( );
 29     public $html_handler = NULL;
 30     public $defaultTestMaxRedirects = 10;
 31     public $defaultTestTimeout      = 3;
 32     public $table_url = NULL;
 33     
 34     public function Zend_UrlTube_Client( $url )
 35     {
 36          global $_SITE;
 37          
 38          $data = $_SITE['UrlTube']['Parser']->getDataByURI( $url );
 39          
 40          $this->url = $url;
 41          $this->protocol = $data[1];
 42          $this->table_url = $_SITE['config']['handler']->sil->database->table_url;
 43        $this->service = $_SITE['UrlTube']['Parser']->getServiceByURI( $this->url );
 44     }
 45     
 46     public function scan( $type = NULL, $url )
 47     {
 48          global $_SITE;
 49
 50          switch ( $type )
 51          {
 52               default:
 53               case NULL:
 54                    if ( defined( '__DEBUG__' ) && __DEBUG__ ) var_dump( 'SCAN ENDED WITH FATAL ERROR, NO TYPE SPECIFIED' );
 55                    return -2;
 56                    break;
 57                    
 58               case 'web':
 59                    $uri = new UrlTube_Parse();
 60                    $uri = $uri->getDataByURI( $url['url'] );
 61
 62                    $filename = array_pop( explode( '/', $url['url'] ) );
 63                    $name = explode( '.', $filename );
 64
 65                    array_pop( $name );
 66                    
 67                    $name = str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( implode( '.', $name ) ) ) ) );
 68                    $desc = str_replace( '/', ' ', str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( $uri[4] ) ) ) ) );
 69
 70                    $mimetype = $url['mimetype'];
 71                    $query = $_SITE['database']['handler']->select(  )
 72                                                          ->from( $_SITE['config']['handler']->sil->database->table_scans, array( 'id' ) )
 73                                                          ->where( "id_url = ?", $url['id_url'] )
 74                                                          ->order( array( 'scan_date DESC' ) )
 75                                                          ->limit( 1 );
 76                     $handle = $query->query();
 77                     $lastScan = $handle->fetchAll();
 78
 79                     $image = 'Y';
 80                     
 81                     if ( is_array( $lastScan[0] ) )
 82                     $_SITE['database']['handler']->update( $_SITE['config']['handler']->sil->database->table_scans, 
 83                                                            $scan_results,
 84                                                            "id_url = '{$url['id_url']}'" );
 85                     else
 86                     $_SITE['database']['handler']->insert( $_SITE['config']['handler']->sil->database->table_scans, 
 87                                                            array_merge( array( 'id_url' => $url['id_url'] ), $scan_results ) );
 88                    break;
 89                    
 90               case 'image':
 91                    if ( defined( '__DEBUG__' ) && __DEBUG__ ) var_dump( 'SCAN BEGIN' );
 92
 93                    $uri = new UrlTube_Parse();
 94                    $uri = $uri->getDataByURI( $url['url'] );
 95
 96                    $filename = array_pop( explode( '/', $url['url'] ) );
 97                    $name = explode( '.', $filename );
 98
 99                    array_pop( $name );
100                    
101                    $name = str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( implode( '.', $name ) ) ) ) );
102                    $desc = str_replace( '/', ' ', str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( $uri[4] ) ) ) ) );
103                    
104                    
105                    $info = getimagesize( $url['url'] );
106                    $resolution = "{$info[0]}x{$info[1]}";
107
108                    $mimetype = ( ! empty( $info['mimetype'] ) ) ? $info['mimetype'] : $url['mimetype'];
109                    
110                    $bits = $info['bits'];
111                    $channels = $info['channels'];
112                    $codec = array_pop( $_SITE['UrlTube']['Parse']->getDataByMimetype( $mimetype ) );
113
114	                $imageScore = new ImageFilter();
115	                $imageS = $imageScore->GetScore( $url['url'] );
116
117	                if ( ! file_exists( $_SITE['config']['fs']['path_public'] . DIRECTORY_SEPARATOR . 'images' . DIRECTORY_SEPARATOR . 'thumbnails' . DIRECTORY_SEPARATOR . md5( $url['id_url'] ) . '.jpg' ) )
118	                {
119	                    $thumb = new UrlTube_Thumb();                    
120	                    $thumb->getThumb( 'image', $url );
121	                }
122
123                     $image = ( file_exists( $_SITE['config']['fs']['path_public'] . DIRECTORY_SEPARATOR . 'images' . DIRECTORY_SEPARATOR . 'thumbnails' . DIRECTORY_SEPARATOR . md5( $url['id_url'] ) . '.jpg' ) ) ? 'Y' : 'N';
124
125                    $query = $_SITE['database']['handler']->select(  )
126                                                                     ->from( $_SITE['config']['handler']->sil->database->table_scans, array( 'id' ) )
127                                                              ->where( "id_url = ?", $url['id_url'] )
128                                                             ->order( array( 'scan_date DESC' ) )
129                                                               ->limit( 1 );
130                     $handle = $query->query();
131                     $lastScan = $handle->fetchAll();
132                     
133                     $scan_results = array( 'bits' => $bits, 'channels' => $channels, 'codec' => $codec, 'scan_date' => time(), 'title' => $name, 'description' => $desc, 'tags' => html_entity_decode( implode( ',', explode( ' ', $name ) ).implode( ',', explode( ' ', $desc ) ) ), 'resolution' => $resolution, 'image' => $image, 'xrate' => $imageS );
134                     
135                     if ( is_array( $lastScan[0] ) )
136                    $_SITE['database']['handler']->update( $_SITE['config']['handler']->sil->database->table_scans, 
137                                                           $scan_results,
138                                                             "id_url = '{$url['id_url']}'" );
139                    else
140                    $_SITE['database']['handler']->insert( $_SITE['config']['handler']->sil->database->table_scans, 
141                                                  array_merge( array( 'id_url' => $url['id_url'] ), $scan_results ) );
142
143                    break;
144          }
145          
146          if ( ! is_null( $scan_results ) ) return $scan_results;
147          else return FALSE;
148     }
149     
150    public function scanStatus( )
151    {
152          global $_SITE;
153          
154          if ( ! is_string( $this->url ) ) return FALSE;
155          
156          if ( is_null( $this->test_handler ) ) $this->doClient( );
157          if ( is_null( $this->netStatus ) ) $this->ping( $this->url );
158          
159          $return['net_status'] = $this->netStatus;
160
161          switch( $this->protocol )
162          {
163               default:
164               case 'http' || 'https' || 'ftp' || 'rtsp':
165
166                    $return['date'] = $this->test_handler->getLastResponse()->getHeader( 'last-modified' );
167                  if ( is_null( $return['date'] ) ) 
168                  $return['date'] = $this->test_handler->getLastResponse()->getHeader( 'date' );
169
170                    $return['size'] = $this->test_handler->getLastResponse()->getHeader( 'content-length' );
171
172                    $return['update'] = time( );          
173                    $return['code']   = $this->test_handler->getLastResponse( )->getStatus( );
174                    $return['text']   = $this->test_handler->getLastResponse( )->responseCodeAsText( $this->test_handler->getLastResponse( )->getStatus( ) );
175
176                  	$mimetype = explode( ';', $this->test_handler->getLastResponse()->getHeader( 'content-type' ) );
177                    $return['mimetype'] = $mimetype[0];
178                    
179                    $mimetype = UrlTube_Parse::getDataByMimetype( $return['mimetype'] );
180                    $return['mime'] = $mimetype[1];
181                    $return['type'] = $mimetype[2];
182                    return $return;
183                    break;
184          }
185    }
186     
187     public function scanStatusResult( $url, $status = array( ) )
188     {
189          global $_SITE;
190          
191          // *** SCAN: begin
192          // ******************
193
194          switch( $status['code'] )
195          {
196               default:
197                    return FALSE;
198                    break;
199                    
200               case '200':
201               case '201':
202               case '202':
203               case '203':
204               case '204':
205               case '205':
206               case '206':
207               case '300':
208               case '302':
209               case '303':
210               case '304':
211               case '411':
212               case '416':
213                   $status['code'] = '200';
214                    break;
215          }
216
217
218          $return['netStatus']     = trim( $status['net_status'] );
219          $return['statusUpdate']  = trim( $status['update'] );
220          $return['statusCode']    = trim( $status['code'] );
221          $return['statusText']    = trim( $status['text'] );
222
223          $return['modifiedDate']  = ( ! is_null( $status['date'] ) )     ? trim( $status['date'] )     : NULL;
224          $return['size']          = ( ! is_null( $status['size'] ) )     ? trim( $status['size'] )     : NULL;
225          $return['mimetype']      = ( ! is_null( $status['mimetype'] ) ) ? trim( $status['mimetype'] ) : NULL;
226          $return['mime']          = ( ! is_null( $status['mime'] ) )     ? trim( $status['mime'] )     : NULL;
227          $return['type']          = ( ! is_null( $status['type'] ) )     ? trim( $status['type'] )     : NULL;
228
229          if ( $status['net_status'] != 'Y' || $this->netStatus != 'Y' ) return FALSE;
230          
231          if ( UrlTube::getURLIdByURL( $this->url ) )
232                  $_SITE['database']['handler']->update( $this->table_url, array('url'           => $url, 
233                                                                                 'net_status'    => $return['netStatus'], 
234                                                                                 'status_code'        => $return['statusCode'], 
235                                                                                 'status_text'   => $return['statusText'], 
236                                                                                 'status_update' => $return['statusUpdate'], 
237                                                                                 'modified_date' => $return['modifiedDate'],
238                                                                                 'creation_date' => time(),
239                                                                                 'size'          => $return['size'],
240                                                                                 'mimetype'      => $return['mimetype'] ), 
241                                                                                 "url = '$url'" );
242          // Check if exist
243          else $_SITE['database']['handler']->insert( $this->table_url, array('url'           => $url, 
244                                                                              'net_status'    => $return['netStatus'], 
245                                                                              'status_code'        => $return['statusCode'], 
246                                                                              'status_text'   => $return['statusText'], 
247                                                                              'status_update' => $return['statusUpdate'], 
248                                                                              'creation_date' => time(),
249                                                                              'modified_date' => $return['modifiedDate'],
250                                                                              'size'          => $return['size'],
251                                                                              'mimetype'      => $return['mimetype'] ) );
252          // *** SCAN: ends
253          // ******************
254          return $return;
255     }
256     
257     public function ping( $url, $host = NULL )
258     {
259          global $_SITE;
260          
261          if ( ! is_null( $host ) ) exec( "ping -c 1 $host", $c );
262          else {
263               $parser = new UrlTube_Parse();
264               $host = $parser->getDataByURI( $url );
265               exec( "ping -c 1 {$host[2]}.{$host[3]}", $c );
266          }
267          
268          if ( count( $c ) >= 3 ) {
269               if ( is_object( $this ) ) 
270               {
271                    $this->netStatus = 'Y';
272                    $this->result_ping = $c;
273               }
274               return $c;
275          } else {
276               if ( is_object( $this ) ) $this->netStatus = 'N';
277               return FALSE;
278          }
279     }
280     
281     public function doClient( )
282     {
283          global $_SITE;
284                    
285          if ( ! is_string( $this->url ) ) return FALSE;
286          if ( ! is_null( $this->test_handler ) ) return TRUE;
287          
288          switch( $this->protocol )
289          {
290               default:
291                    return -1;
292                    break;
293                    
294               case 'http' || 'https' || 'ftp' || 'rtsp':
295                    $this->test_options = array( 'maxredirects'  => $this->defaultTestMaxRedirects,
296                                                 'keepalive'     => FALSE,
297                                                 'useragent'     => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3; ( UrlTube-Bot :: urltube.net )',
298                                                 'storeresponse' => TRUE,
299                                                 'timeout'       => 7 );
300// IF TIMEOUT NO EXCEPTION!!! Message:Unable to read response, or response is empty http://i79.photobucket.com/albums/j131/moochi-/japanese%20stars/06-1.jpg
301                    $this->test_handler = new Zend_Http_Client( $this->url, $this->test_options );
302                    $this->test_handler->setHeaders( 'Accept-Encoding', 'chunked' );
303
304                    if ( $_SERVER['term'] == 'xterm' ) $this->test_handler->setHeaders( 'Range', 'bytes' )
305                                                                                     ->setHeaders( 'Request-Range', 'bytes=1-32' );
306                                       
307                    $this->test_handler->request( );
308                    return TRUE;
309                    break;
310          }
311     }
312     
313     public function getConnectionInfo( )
314     {
315
316          $content = $this->test_handler->getLastResponse()->getHeader( 'content-disposition' );
317          
318          $files   = array_pop( explode( 'filename=', $content ) );
319          $urlFile = array_pop( explode( '/', $this->url ) );
320          $urlFile = array_pop( array_reverse( explode( '?', $urlFile ) ) );
321          $urlFile = explode( '.', $urlFile );
322          array_pop( $urlFile );
323          $urlFile = implode( $urlFile );
324          
325          $size    = ( $size / 1000 >= 1 ) ? (int) ( $size / 1000 ) . 'Kb' : NULL;
326          
327          //$return['title'] = strtoupper( $this->type ) . ': ';
328          $return['title'] = ( empty( $files ) ) ? $urlFile : $files;
329          $return['description'] = "$update_time {$this->mime} $size";
330          
331          return $return;
332     }
333     
334     public function autodetectInfo( $url )
335     {
336          global $_SITE;
337
338                  if ( count( $this->twins_counter ) <= 0 && 
339                       $return['type'] == 'text' )
340                  {
341                       if ( 0 ) $offset = 8; // Se � youTube o un servizio con enhancedAutoDetect
342                       elseif ( $return['type'] == 'text' ) $offset = 256; // altrimenti si usa il normale autoDetect
343                       elseif ( $return['type'] == 'audio' ) $offset = 16;
344                       else $offset = 128;
345                       
346                       $this->test_handler = new Zend_test_Client( $this->url, $this->test_options );
347                       $this->test_handler->setHeaders( 'Accept-Encoding', 'deflate' )
348                                          ->setHeaders( 'Range', 'bytes' )
349                                          ->setHeaders( 'Request-Range', 'bytes=1-' . $offset );
350                       $this->test_handler->request( );
351                  }
352                  
353                    //$this->statusText = $this->test_handler->getLastResponse()->getMessage();
354          if ( ! is_string( $url ) ) return FALSE;
355          if ( is_null( $this->test_handler ) ) $this->doClientTest( $url );
356          
357          //$this->test_handler = new Zend_test_Client( 'http://sil.baruffaldi.info', $this->test_options );
358          
359          if ( $this->test_handler->getLastResponse()->getHeader( 'Content-Encoding' ) == 'gzip' ) $site = $this->test_handler->getLastResponse()->decodeGzip( $site );
360          elseif ( $this->test_handler->getLastResponse()->getHeader( 'Content-Encoding' ) == 'deflate' ) $site = $this->test_handler->getLastResponse()->decodeDeflate( $site );
361          elseif ( $this->test_handler->getLastResponse()->getHeader( 'Content-Encoding' ) == 'chunked' ) $site = $this->test_handler->getLastResponse()->decodeChunkedBody( $site );
362          
363          $mimetype = explode( ';', $this->test_handler->getLastResponse()->getHeader( 'Content-Type' ) );
364          
365          $pattern_mimetype     = '^<meta.*http-equiv=..ontent-.ype.*content=.(.*).*>^';
366          $pattern_mimetype2    = '^<meta.*content=.(.*).*http-equiv=..ontent-.ype.*>^';
367          
368          if ( preg_match( '^.*\/.*^', $mimetype[0] ) ) $mimetype = $mimetype[0];
369          else preg_match( $pattern_mimetype, $site, $mimetype );
370          
371          if ( empty( $mimetype[1] ) ) preg_match( $pattern_mimetype, $site, $mimetype );
372          if ( empty( $mimetype[1] ) ) preg_match( $pattern_mimetype2, $site, $mimetype );
373          
374          if ( is_array( $mimetype ) )
375          {
376               if ( preg_match( '^.*\/.*;.*^', $mimetype[1] ) )
377               {
378                    $tmp = explode( ';', $mimetype[1] );
379                    $mimetype = trim( $tmp[0] );
380               }
381          }
382          
383          $this->setMimetype( $mimetype );
384          
385          /**
386           * TEXT/*
387           */
388          if ( $this->type == 'text' )
389          {
390               
391               // Se � youtube o un altro sito con la compatibilita' estesa per la descrizione evitare il piu' possibile il lavoro
392               $site = $this->test_handler->getLastResponse()->getRawBody();
393               $pattern_title        = '^<title>(.*)<\/title>^';
394               if ( $this->mime == 'html' || substr( $mimetype, 5 ) == 'xhtml' || substr( $mimetype, 5 ) == 'xml' )
395               {
396               $pattern_body         = '^<body.*>(.*)<\/body>^';
397               $pattern_description  = '^<meta.*name=.description.*content=.(.*).*>^';
398               $pattern_description2 = '^<meta.*content=.(.*).*name=.description.*>^';
399               
400               preg_match( $pattern_title, $site, $title );
401               preg_match( $pattern_description, $site, $description );
402
403               if ( empty( $description[1] ) ) preg_match( $pattern_description2, $site, $description );
404               if ( empty( $description[1] ) ) {
405                    preg_match( $pattern_body, $site, $body );
406                    $description[1] = $body[0];
407               }
408
409               $description = $description[1];
410               $title = $title[1];
411              } else {
412                   $title = substr( $site, 0, 64 );
413                   $description = substr( $site, 0, 256 );;
414              }
415          }
416
417          /**
418           * IMAGES/*
419           */
420          elseif ( $this->type == 'image' )
421          {
422                    $connInfo = $this->getConnectionInfo();
423                    $title = $connInfo['title'];
424                    $description = $connInfo['description'];
425          }
426
427          /**
428           * AUDIO/*
429           */
430          elseif ( $this->type == 'audio' )
431          {
432                    $connInfo = $this->getConnectionInfo();
433                    $title = $connInfo['title'];
434                    $description = $connInfo['description'];
435          }
436
437          /**
438           * VIDEO/*
439           */
440          elseif ( $this->type == 'video' )
441          {
442                    $connInfo = $this->getConnectionInfo();
443                    $title = $connInfo['title'];
444                    $description = $connInfo['description'];
445          }
446
447          /**
448           * APPLICATION/* ( pdf, gz, zip, tar, tgz, doc???, rtf??? )
449           */
450          elseif ( $this->type == 'application' )
451          {
452               if ( $this->mime == 'pdf' ) {
453                    $connInfo = $this->getConnectionInfo();
454                    $title = $connInfo['title'];
455                    $description = $connInfo['description'];
456                    
457               } else if ( $this->mime == 'rtf' ) {
458                    $connInfo = $this->getConnectionInfo();
459                    $title = $connInfo['title'];
460                    $description = $connInfo['description'];
461                    
462               } else if ( $this->mime == 'doc' ) {
463                    $connInfo = $this->getConnectionInfo();
464                    $title = $connInfo['title'];
465                    $description = $connInfo['description'];
466               } 
467          } else {
468               $title = "";
469               $description = "";
470          }
471          
472          $return = array( 'title'       => trim( $title ), 
473                           'description' => trim( strip_tags( $description ) ), 
474                           'mimetype'    => trim( $mimetype ), 
475                           'status_code' => trim( $this->statusCode ), 
476                           'status_text' => trim( $this->statusText ) );
477          
478          return $return;
479     }
480}