/library/UrlTube/Bot.php
PHP | 480 lines | 361 code | 94 blank | 25 comment | 79 complexity | 977f674abef17113e5f33ec5eecc9a1c MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.0, MIT
- <?php
- require 'Zend' . DIRECTORY_SEPARATOR . 'Http' . DIRECTORY_SEPARATOR . 'Client.php';
- class Zend_UrlTube_Client
- {
- public $url = NULL;
- public $protocol = NULL;
- public $service = NULL;
- public $mimetype = NULL;
- public $mime = NULL;
- public $type = NULL;
-
- public $remoteHostname = NULL;
- public $remoteIp = NULL;
- public $remotePort = NULL;
- public $localHostname = NULL;
- public $localIp = NULL;
- public $localPort = NULL;
-
- public $netStatus = NULL;
- public $serviceStatus = NULL;
- public $serviceStatusText = NULL;
-
- public $title = NULL;
- public $description = NULL;
-
- public $html_options = array( );
- public $html_handler = NULL;
- public $defaultTestMaxRedirects = 10;
- public $defaultTestTimeout = 3;
- public $table_url = NULL;
-
- public function Zend_UrlTube_Client( $url )
- {
- global $_SITE;
-
- $data = $_SITE['UrlTube']['Parser']->getDataByURI( $url );
-
- $this->url = $url;
- $this->protocol = $data[1];
- $this->table_url = $_SITE['config']['handler']->sil->database->table_url;
- $this->service = $_SITE['UrlTube']['Parser']->getServiceByURI( $this->url );
- }
-
- public function scan( $type = NULL, $url )
- {
- global $_SITE;
- switch ( $type )
- {
- default:
- case NULL:
- if ( defined( '__DEBUG__' ) && __DEBUG__ ) var_dump( 'SCAN ENDED WITH FATAL ERROR, NO TYPE SPECIFIED' );
- return -2;
- break;
-
- case 'web':
- $uri = new UrlTube_Parse();
- $uri = $uri->getDataByURI( $url['url'] );
- $filename = array_pop( explode( '/', $url['url'] ) );
- $name = explode( '.', $filename );
- array_pop( $name );
-
- $name = str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( implode( '.', $name ) ) ) ) );
- $desc = str_replace( '/', ' ', str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( $uri[4] ) ) ) ) );
- $mimetype = $url['mimetype'];
- $query = $_SITE['database']['handler']->select( )
- ->from( $_SITE['config']['handler']->sil->database->table_scans, array( 'id' ) )
- ->where( "id_url = ?", $url['id_url'] )
- ->order( array( 'scan_date DESC' ) )
- ->limit( 1 );
- $handle = $query->query();
- $lastScan = $handle->fetchAll();
- $image = 'Y';
-
- if ( is_array( $lastScan[0] ) )
- $_SITE['database']['handler']->update( $_SITE['config']['handler']->sil->database->table_scans,
- $scan_results,
- "id_url = '{$url['id_url']}'" );
- else
- $_SITE['database']['handler']->insert( $_SITE['config']['handler']->sil->database->table_scans,
- array_merge( array( 'id_url' => $url['id_url'] ), $scan_results ) );
- break;
-
- case 'image':
- if ( defined( '__DEBUG__' ) && __DEBUG__ ) var_dump( 'SCAN BEGIN' );
- $uri = new UrlTube_Parse();
- $uri = $uri->getDataByURI( $url['url'] );
- $filename = array_pop( explode( '/', $url['url'] ) );
- $name = explode( '.', $filename );
- array_pop( $name );
-
- $name = str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( implode( '.', $name ) ) ) ) );
- $desc = str_replace( '/', ' ', str_replace( '.', ' ', str_replace( '_', ' ', str_replace( '-', ' ', htmlentities( $uri[4] ) ) ) ) );
-
-
- $info = getimagesize( $url['url'] );
- $resolution = "{$info[0]}x{$info[1]}";
- $mimetype = ( ! empty( $info['mimetype'] ) ) ? $info['mimetype'] : $url['mimetype'];
-
- $bits = $info['bits'];
- $channels = $info['channels'];
- $codec = array_pop( $_SITE['UrlTube']['Parse']->getDataByMimetype( $mimetype ) );
- $imageScore = new ImageFilter();
- $imageS = $imageScore->GetScore( $url['url'] );
- if ( ! file_exists( $_SITE['config']['fs']['path_public'] . DIRECTORY_SEPARATOR . 'images' . DIRECTORY_SEPARATOR . 'thumbnails' . DIRECTORY_SEPARATOR . md5( $url['id_url'] ) . '.jpg' ) )
- {
- $thumb = new UrlTube_Thumb();
- $thumb->getThumb( 'image', $url );
- }
- $image = ( file_exists( $_SITE['config']['fs']['path_public'] . DIRECTORY_SEPARATOR . 'images' . DIRECTORY_SEPARATOR . 'thumbnails' . DIRECTORY_SEPARATOR . md5( $url['id_url'] ) . '.jpg' ) ) ? 'Y' : 'N';
- $query = $_SITE['database']['handler']->select( )
- ->from( $_SITE['config']['handler']->sil->database->table_scans, array( 'id' ) )
- ->where( "id_url = ?", $url['id_url'] )
- ->order( array( 'scan_date DESC' ) )
- ->limit( 1 );
- $handle = $query->query();
- $lastScan = $handle->fetchAll();
-
- $scan_results = array( 'bits' => $bits, 'channels' => $channels, 'codec' => $codec, 'scan_date' => time(), 'title' => $name, 'description' => $desc, 'tags' => html_entity_decode( implode( ',', explode( ' ', $name ) ).implode( ',', explode( ' ', $desc ) ) ), 'resolution' => $resolution, 'image' => $image, 'xrate' => $imageS );
-
- if ( is_array( $lastScan[0] ) )
- $_SITE['database']['handler']->update( $_SITE['config']['handler']->sil->database->table_scans,
- $scan_results,
- "id_url = '{$url['id_url']}'" );
- else
- $_SITE['database']['handler']->insert( $_SITE['config']['handler']->sil->database->table_scans,
- array_merge( array( 'id_url' => $url['id_url'] ), $scan_results ) );
- break;
- }
-
- if ( ! is_null( $scan_results ) ) return $scan_results;
- else return FALSE;
- }
-
- public function scanStatus( )
- {
- global $_SITE;
-
- if ( ! is_string( $this->url ) ) return FALSE;
-
- if ( is_null( $this->test_handler ) ) $this->doClient( );
- if ( is_null( $this->netStatus ) ) $this->ping( $this->url );
-
- $return['net_status'] = $this->netStatus;
- switch( $this->protocol )
- {
- default:
- case 'http' || 'https' || 'ftp' || 'rtsp':
- $return['date'] = $this->test_handler->getLastResponse()->getHeader( 'last-modified' );
- if ( is_null( $return['date'] ) )
- $return['date'] = $this->test_handler->getLastResponse()->getHeader( 'date' );
- $return['size'] = $this->test_handler->getLastResponse()->getHeader( 'content-length' );
- $return['update'] = time( );
- $return['code'] = $this->test_handler->getLastResponse( )->getStatus( );
- $return['text'] = $this->test_handler->getLastResponse( )->responseCodeAsText( $this->test_handler->getLastResponse( )->getStatus( ) );
- $mimetype = explode( ';', $this->test_handler->getLastResponse()->getHeader( 'content-type' ) );
- $return['mimetype'] = $mimetype[0];
-
- $mimetype = UrlTube_Parse::getDataByMimetype( $return['mimetype'] );
- $return['mime'] = $mimetype[1];
- $return['type'] = $mimetype[2];
- return $return;
- break;
- }
- }
-
- public function scanStatusResult( $url, $status = array( ) )
- {
- global $_SITE;
-
- // *** SCAN: begin
- // ******************
- switch( $status['code'] )
- {
- default:
- return FALSE;
- break;
-
- case '200':
- case '201':
- case '202':
- case '203':
- case '204':
- case '205':
- case '206':
- case '300':
- case '302':
- case '303':
- case '304':
- case '411':
- case '416':
- $status['code'] = '200';
- break;
- }
- $return['netStatus'] = trim( $status['net_status'] );
- $return['statusUpdate'] = trim( $status['update'] );
- $return['statusCode'] = trim( $status['code'] );
- $return['statusText'] = trim( $status['text'] );
- $return['modifiedDate'] = ( ! is_null( $status['date'] ) ) ? trim( $status['date'] ) : NULL;
- $return['size'] = ( ! is_null( $status['size'] ) ) ? trim( $status['size'] ) : NULL;
- $return['mimetype'] = ( ! is_null( $status['mimetype'] ) ) ? trim( $status['mimetype'] ) : NULL;
- $return['mime'] = ( ! is_null( $status['mime'] ) ) ? trim( $status['mime'] ) : NULL;
- $return['type'] = ( ! is_null( $status['type'] ) ) ? trim( $status['type'] ) : NULL;
- if ( $status['net_status'] != 'Y' || $this->netStatus != 'Y' ) return FALSE;
-
- if ( UrlTube::getURLIdByURL( $this->url ) )
- $_SITE['database']['handler']->update( $this->table_url, array('url' => $url,
- 'net_status' => $return['netStatus'],
- 'status_code' => $return['statusCode'],
- 'status_text' => $return['statusText'],
- 'status_update' => $return['statusUpdate'],
- 'modified_date' => $return['modifiedDate'],
- 'creation_date' => time(),
- 'size' => $return['size'],
- 'mimetype' => $return['mimetype'] ),
- "url = '$url'" );
- // Check if exist
- else $_SITE['database']['handler']->insert( $this->table_url, array('url' => $url,
- 'net_status' => $return['netStatus'],
- 'status_code' => $return['statusCode'],
- 'status_text' => $return['statusText'],
- 'status_update' => $return['statusUpdate'],
- 'creation_date' => time(),
- 'modified_date' => $return['modifiedDate'],
- 'size' => $return['size'],
- 'mimetype' => $return['mimetype'] ) );
- // *** SCAN: ends
- // ******************
- return $return;
- }
-
- public function ping( $url, $host = NULL )
- {
- global $_SITE;
-
- if ( ! is_null( $host ) ) exec( "ping -c 1 $host", $c );
- else {
- $parser = new UrlTube_Parse();
- $host = $parser->getDataByURI( $url );
- exec( "ping -c 1 {$host[2]}.{$host[3]}", $c );
- }
-
- if ( count( $c ) >= 3 ) {
- if ( is_object( $this ) )
- {
- $this->netStatus = 'Y';
- $this->result_ping = $c;
- }
- return $c;
- } else {
- if ( is_object( $this ) ) $this->netStatus = 'N';
- return FALSE;
- }
- }
-
- public function doClient( )
- {
- global $_SITE;
-
- if ( ! is_string( $this->url ) ) return FALSE;
- if ( ! is_null( $this->test_handler ) ) return TRUE;
-
- switch( $this->protocol )
- {
- default:
- return -1;
- break;
-
- case 'http' || 'https' || 'ftp' || 'rtsp':
- $this->test_options = array( 'maxredirects' => $this->defaultTestMaxRedirects,
- 'keepalive' => FALSE,
- 'useragent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3; ( UrlTube-Bot :: urltube.net )',
- 'storeresponse' => TRUE,
- 'timeout' => 7 );
- // IF TIMEOUT NO EXCEPTION!!! Message:Unable to read response, or response is empty http://i79.photobucket.com/albums/j131/moochi-/japanese%20stars/06-1.jpg
- $this->test_handler = new Zend_Http_Client( $this->url, $this->test_options );
- $this->test_handler->setHeaders( 'Accept-Encoding', 'chunked' );
- if ( $_SERVER['term'] == 'xterm' ) $this->test_handler->setHeaders( 'Range', 'bytes' )
- ->setHeaders( 'Request-Range', 'bytes=1-32' );
-
- $this->test_handler->request( );
- return TRUE;
- break;
- }
- }
-
- public function getConnectionInfo( )
- {
- $content = $this->test_handler->getLastResponse()->getHeader( 'content-disposition' );
-
- $files = array_pop( explode( 'filename=', $content ) );
- $urlFile = array_pop( explode( '/', $this->url ) );
- $urlFile = array_pop( array_reverse( explode( '?', $urlFile ) ) );
- $urlFile = explode( '.', $urlFile );
- array_pop( $urlFile );
- $urlFile = implode( $urlFile );
-
- $size = ( $size / 1000 >= 1 ) ? (int) ( $size / 1000 ) . 'Kb' : NULL;
-
- //$return['title'] = strtoupper( $this->type ) . ': ';
- $return['title'] = ( empty( $files ) ) ? $urlFile : $files;
- $return['description'] = "$update_time {$this->mime} $size";
-
- return $return;
- }
-
- public function autodetectInfo( $url )
- {
- global $_SITE;
- if ( count( $this->twins_counter ) <= 0 &&
- $return['type'] == 'text' )
- {
- if ( 0 ) $offset = 8; // Se è youTube o un servizio con enhancedAutoDetect
- elseif ( $return['type'] == 'text' ) $offset = 256; // altrimenti si usa il normale autoDetect
- elseif ( $return['type'] == 'audio' ) $offset = 16;
- else $offset = 128;
-
- $this->test_handler = new Zend_test_Client( $this->url, $this->test_options );
- $this->test_handler->setHeaders( 'Accept-Encoding', 'deflate' )
- ->setHeaders( 'Range', 'bytes' )
- ->setHeaders( 'Request-Range', 'bytes=1-' . $offset );
- $this->test_handler->request( );
- }
-
- //$this->statusText = $this->test_handler->getLastResponse()->getMessage();
- if ( ! is_string( $url ) ) return FALSE;
- if ( is_null( $this->test_handler ) ) $this->doClientTest( $url );
-
- //$this->test_handler = new Zend_test_Client( 'http://sil.baruffaldi.info', $this->test_options );
-
- if ( $this->test_handler->getLastResponse()->getHeader( 'Content-Encoding' ) == 'gzip' ) $site = $this->test_handler->getLastResponse()->decodeGzip( $site );
- elseif ( $this->test_handler->getLastResponse()->getHeader( 'Content-Encoding' ) == 'deflate' ) $site = $this->test_handler->getLastResponse()->decodeDeflate( $site );
- elseif ( $this->test_handler->getLastResponse()->getHeader( 'Content-Encoding' ) == 'chunked' ) $site = $this->test_handler->getLastResponse()->decodeChunkedBody( $site );
-
- $mimetype = explode( ';', $this->test_handler->getLastResponse()->getHeader( 'Content-Type' ) );
-
- $pattern_mimetype = '^<meta.*http-equiv=..ontent-.ype.*content=.(.*).*>^';
- $pattern_mimetype2 = '^<meta.*content=.(.*).*http-equiv=..ontent-.ype.*>^';
-
- if ( preg_match( '^.*\/.*^', $mimetype[0] ) ) $mimetype = $mimetype[0];
- else preg_match( $pattern_mimetype, $site, $mimetype );
-
- if ( empty( $mimetype[1] ) ) preg_match( $pattern_mimetype, $site, $mimetype );
- if ( empty( $mimetype[1] ) ) preg_match( $pattern_mimetype2, $site, $mimetype );
-
- if ( is_array( $mimetype ) )
- {
- if ( preg_match( '^.*\/.*;.*^', $mimetype[1] ) )
- {
- $tmp = explode( ';', $mimetype[1] );
- $mimetype = trim( $tmp[0] );
- }
- }
-
- $this->setMimetype( $mimetype );
-
- /**
- * TEXT/*
- */
- if ( $this->type == 'text' )
- {
-
- // Se è youtube o un altro sito con la compatibilita' estesa per la descrizione evitare il piu' possibile il lavoro
- $site = $this->test_handler->getLastResponse()->getRawBody();
- $pattern_title = '^<title>(.*)<\/title>^';
- if ( $this->mime == 'html' || substr( $mimetype, 5 ) == 'xhtml' || substr( $mimetype, 5 ) == 'xml' )
- {
- $pattern_body = '^<body.*>(.*)<\/body>^';
- $pattern_description = '^<meta.*name=.description.*content=.(.*).*>^';
- $pattern_description2 = '^<meta.*content=.(.*).*name=.description.*>^';
-
- preg_match( $pattern_title, $site, $title );
- preg_match( $pattern_description, $site, $description );
- if ( empty( $description[1] ) ) preg_match( $pattern_description2, $site, $description );
- if ( empty( $description[1] ) ) {
- preg_match( $pattern_body, $site, $body );
- $description[1] = $body[0];
- }
- $description = $description[1];
- $title = $title[1];
- } else {
- $title = substr( $site, 0, 64 );
- $description = substr( $site, 0, 256 );;
- }
- }
- /**
- * IMAGES/*
- */
- elseif ( $this->type == 'image' )
- {
- $connInfo = $this->getConnectionInfo();
- $title = $connInfo['title'];
- $description = $connInfo['description'];
- }
- /**
- * AUDIO/*
- */
- elseif ( $this->type == 'audio' )
- {
- $connInfo = $this->getConnectionInfo();
- $title = $connInfo['title'];
- $description = $connInfo['description'];
- }
- /**
- * VIDEO/*
- */
- elseif ( $this->type == 'video' )
- {
- $connInfo = $this->getConnectionInfo();
- $title = $connInfo['title'];
- $description = $connInfo['description'];
- }
- /**
- * APPLICATION/* ( pdf, gz, zip, tar, tgz, doc???, rtf??? )
- */
- elseif ( $this->type == 'application' )
- {
- if ( $this->mime == 'pdf' ) {
- $connInfo = $this->getConnectionInfo();
- $title = $connInfo['title'];
- $description = $connInfo['description'];
-
- } else if ( $this->mime == 'rtf' ) {
- $connInfo = $this->getConnectionInfo();
- $title = $connInfo['title'];
- $description = $connInfo['description'];
-
- } else if ( $this->mime == 'doc' ) {
- $connInfo = $this->getConnectionInfo();
- $title = $connInfo['title'];
- $description = $connInfo['description'];
- }
- } else {
- $title = "";
- $description = "";
- }
-
- $return = array( 'title' => trim( $title ),
- 'description' => trim( strip_tags( $description ) ),
- 'mimetype' => trim( $mimetype ),
- 'status_code' => trim( $this->statusCode ),
- 'status_text' => trim( $this->statusText ) );
-
- return $return;
- }
- }