PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/NukeViet3.2/includes/class/keywordRank.class.php

http://nuke-viet.googlecode.com/
PHP | 209 lines | 139 code | 32 blank | 38 comment | 30 complexity | d9d778509860811a1cfab021191ace06 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, GPL-2.0
  1. <?php
  2. /**
  3. * @Project NUKEVIET 3.0
  4. * @Author VINADES.,JSC (contact@vinades.vn)
  5. * @Copyright (C) 2010 VINADES.,JSC. All rights reserved
  6. * @Createdate 25/12/2010, 11:2
  7. */
  8. if ( defined( 'NV_CLASS_KEYWORDRANK' ) ) return;
  9. define( 'NV_CLASS_KEYWORDRANK', true );
  10. if ( ! defined( 'NV_CURRENTTIME' ) ) define( 'NV_CURRENTTIME', time() );
  11. if ( ! defined( 'NV_ROOTDIR' ) ) define( 'NV_ROOTDIR', preg_replace( "/[\/]+$/", '', str_replace( '\\', '/', realpath( dirname( __file__ ) . '/../../' ) ) ) );
  12. if ( ! defined( 'NV_SERVER_NAME' ) )
  13. {
  14. $_server_name = ( isset( $_SERVER['SERVER_NAME'] ) and ! empty( $_SERVER['SERVER_NAME'] ) ) ? $_SERVER['SERVER_NAME'] : $_SERVER['HTTP_HOST'];
  15. $_server_name = preg_replace( array( '/^[a-zA-Z]+\:\/\//e' ), '', $_server_name );
  16. define( 'NV_SERVER_NAME', $_server_name );
  17. unset( $_server_name );
  18. }
  19. if ( ! isset( $getContent ) or ! is_object( $getContent ) )
  20. {
  21. if ( ! isset( $global_config ) or empty( $global_config ) )
  22. {
  23. $global_config = array( 'version' => "3.0.12", 'sitekey' => mt_rand() );
  24. }
  25. if ( ! class_exists( 'UrlGetContents' ) )
  26. {
  27. include ( NV_ROOTDIR . "/includes/class/geturl.class.php" );
  28. }
  29. $getContent = new UrlGetContents( $global_config );
  30. }
  31. /**
  32. * keywordRank
  33. *
  34. * @package NUKEVIET 3.0
  35. * @author VINADES.,JSC
  36. * @copyright 2010
  37. * @version $Id$
  38. * @access public
  39. */
  40. class keywordRank
  41. {
  42. private $keyword;
  43. private $lang;
  44. private $accuracy;
  45. private $myDomain;
  46. public $currentDomain;
  47. private $pattern = array( //
  48. 'googleByDomain' => "http://www.google.com/search?hl=en&domains=%s&q=%s&sitesearch=%s%s", //
  49. 'googleByAll' => "http://www.google.com/search?hl=en&q=%s%s" //
  50. );
  51. private $langList = array( //
  52. "af", "sq", "ar", "be", "bg", "ca", "zh-CN", "hr", "cs", "da", "nl", "et", "tl", "fi", "fr", "gl", "de", //
  53. "en", "el", "ht", "iw", "hi", "hu", "is", "id", "ga", "it", "ja", "ko", "lv", "lt", "mk", "ms", "mt", "no", //
  54. "fa", "pl", "pt", "ro", "ru", "sr", "sk", "sl", "es", "sw", "sv", "th", "tr", "uk", "vi", "cy", "yi" //
  55. );
  56. /**
  57. * keywordRank::__construct()
  58. *
  59. * @param mixed $_pattern
  60. * @return
  61. */
  62. function __construct( $_pattern = array() )
  63. {
  64. if ( isset( $_pattern['googleByDomain'] ) ) $this->$pattern['googleByDomain'] = $_pattern['googleByDomain'];
  65. if ( isset( $_pattern['googleByAll'] ) ) $this->$pattern['googleByAll'] = $_pattern['googleByAll'];
  66. $this->myDomain = NV_SERVER_NAME;
  67. //$this->myDomain = "nukeviet.vn";
  68. }
  69. /**
  70. * keywordRank::getInfoFromGoogle()
  71. *
  72. * @return
  73. */
  74. private function getInfoFromGoogle()
  75. {
  76. global $getContent;
  77. $key = $this->keyword;
  78. if ( $this->accuracy == "phrase" ) $key = "\"" . $key . "\"";
  79. $key = urlencode( $key );
  80. $domain = urlencode( $this->currentDomain );
  81. $lang = ! empty( $this->lang ) ? "&lr=lang_" . $this->lang : "";
  82. $url = sprintf( $this->pattern['googleByDomain'], $domain, $key, $domain, $lang );
  83. $content = $getContent->get( $url );
  84. $result = array();
  85. $result['myPages'] = 0;
  86. $result['top10MyPages'] = array();
  87. $result['allPages'] = 0;
  88. $result['top50AllPages'] = array();
  89. $result['rank'] = array();
  90. unset( $match );
  91. if ( preg_match( "/\<div\s+id\=resultStats\>[^\d]*([0-9\,]+)[^\<]*\</is", $content, $match ) )
  92. {
  93. $bl = preg_replace( "/\,/", "", $match[1] );
  94. $result['myPages'] = ( int )$bl;
  95. unset( $links );
  96. preg_match_all( '/\<h3\s+class\=\"?r[^\>]*\>[^\<]*\<a\s+href\s?\=\s?\"([^\"]+)\"[^\>]*>/', $content, $links );
  97. if ( ! empty( $links[1] ) ) $result['top10MyPages'] = $links[1];
  98. }
  99. $url = sprintf( $this->pattern['googleByAll'], $key, $lang );
  100. for ( $i = 0; $i < 5; $i++ )
  101. {
  102. $start = $i * 10;
  103. if ( $start != 0 ) $url .= "&start=" . $start;
  104. $content = $getContent->get( $url );
  105. if ( $start == 0 )
  106. {
  107. unset( $match );
  108. if ( preg_match( "/\<div\s+id\=resultStats\>[^\d]*([0-9\,]+)[^\<]*\</is", $content, $match ) )
  109. {
  110. $bl = preg_replace( "/\,/", "", $match[1] );
  111. $result['allPages'] = ( int )$bl;
  112. }
  113. }
  114. unset( $links );
  115. preg_match_all( '/\<h3\s+class\=\"?r[^\>]*\>[^\<]*\<a\s+href\s?\=\s?\"((http(s?))[^\"]+)\"[^\>]*>/', $content, $links );
  116. if ( ! empty( $links[1] ) ) $result['top50AllPages'] = array_merge( $result['top50AllPages'], $links[1] );
  117. }
  118. if ( ! empty( $result['top50AllPages'] ) )
  119. {
  120. $fl_array = preg_grep( "/^http(s?)\:\/\/[(www.)]*" . preg_quote( $this->currentDomain, "/" ) . "/", $result['top50AllPages'] );
  121. $result['rank'] = array();
  122. foreach ( array_keys( $fl_array ) as $k )
  123. {
  124. $result['rank'][$k] = $k + 1;
  125. }
  126. }
  127. return $result;
  128. }
  129. /**
  130. * keywordRank::process()
  131. *
  132. * @param mixed $_keyword
  133. * @param mixed $_lang
  134. * @param mixed $_accuracy
  135. * @param string $from
  136. * @param string $domain
  137. * @return
  138. */
  139. public function process( $_keyword, $_lang, $_accuracy, $from = "", $domain = "" )
  140. {
  141. $this->keyword = $_keyword;
  142. if ( $_accuracy != "phrase" ) $_accuracy = "keyword";
  143. $this->accuracy = $_accuracy;
  144. if ( ! in_array( $_lang, $this->langList ) ) $_lang = "";
  145. $this->lang = $_lang;
  146. if ( empty( $domain ) )
  147. {
  148. $domain = $this->myDomain;
  149. }
  150. $domain = preg_replace( array( '/^[a-zA-Z]+\:\/\//e' ), '', $domain );
  151. $this->currentDomain = $domain;
  152. if ( ! empty( $from ) ) $from = strtolower( $from );
  153. if ( $from != "yahoo" ) $from = "google";
  154. if ( preg_match( "/^localhost|127\.0\.0/is", $this->currentDomain ) )
  155. {
  156. return false;
  157. }
  158. $result = array();
  159. $result['myDomain'] = $this->currentDomain;
  160. $result['keyword'] = $this->keyword;
  161. $result['lang'] = $this->lang;
  162. $result['accuracy'] = $this->accuracy;
  163. $result['fromEngine'] = $from;
  164. $result['updtime'] = NV_CURRENTTIME;
  165. $result['detail'] = array();
  166. if ( $from == "yahoo" )
  167. {
  168. //Viet sau
  169. }
  170. else
  171. {
  172. $result['detail'] = $this->getInfoFromGoogle();
  173. }
  174. return $result;
  175. }
  176. }
  177. ?>