PageRenderTime 48ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/plugins/SEO/RankChecker.php

https://github.com/CodeYellowBV/piwik
PHP | 376 lines | 226 code | 37 blank | 113 comment | 23 complexity | 3dcd01912572e01ce95642b14d290b60 MD5 | raw file
Possible License(s): LGPL-3.0, JSON, MIT, GPL-3.0, LGPL-2.1, GPL-2.0, AGPL-1.0, BSD-2-Clause, BSD-3-Clause
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik\Plugins\SEO;
  10. use Exception;
  11. use Piwik\Http;
  12. use Piwik\Log;
  13. use Piwik\MetricsFormatter;
  14. /**
  15. * The functions below are derived/adapted from GetRank.org's
  16. * Free PageRank Script v2.0, released under GPL.
  17. *
  18. * @copyright Copyright (C) 2007 - 2010 GetRank.Org All rights reserved.
  19. * @link http://www.getrank.org/free-pagerank-script/
  20. * @license GPL
  21. */
  22. class RankChecker
  23. {
  24. private $url;
  25. private $majesticInfo = null;
  26. public function __construct($url)
  27. {
  28. $this->url = self::extractDomainFromUrl($url);
  29. }
  30. /**
  31. * Extract domain from URL as the web services generally
  32. * expect only a domain name (i.e., no protocol, port, path, query, etc).
  33. *
  34. * @param string $url
  35. * @return string
  36. */
  37. static public function extractDomainFromUrl($url)
  38. {
  39. return preg_replace(
  40. array(
  41. '~^https?\://~si', // strip protocol
  42. '~[/:#?;%&].*~', // strip port, path, query, anchor, etc
  43. '~\.$~', // trailing period
  44. ),
  45. '', $url);
  46. }
  47. /**
  48. * Web service proxy that retrieves the content at the specified URL
  49. *
  50. * @param string $url
  51. * @return string
  52. */
  53. private function getPage($url)
  54. {
  55. try {
  56. return str_replace('&nbsp;', ' ', Http::sendHttpRequest($url, $timeout = 10, @$_SERVER['HTTP_USER_AGENT']));
  57. } catch (Exception $e) {
  58. return '';
  59. }
  60. }
  61. /**
  62. * Returns the google page rank for the current url
  63. *
  64. * @return int
  65. */
  66. public function getPageRank()
  67. {
  68. $chwrite = $this->CheckHash($this->HashURL($this->url));
  69. $url = "http://toolbarqueries.google.com/tbr?client=navclient-auto&ch=" . $chwrite . "&features=Rank&q=info:" . $this->url . "&num=100&filter=0";
  70. $data = $this->getPage($url);
  71. preg_match('#Rank_[0-9]:[0-9]:([0-9]+){1,}#si', $data, $p);
  72. $value = isset($p[1]) ? $p[1] : 0;
  73. return $value;
  74. }
  75. /**
  76. * Returns the alexa traffic rank for the current url
  77. *
  78. * @return int
  79. */
  80. public function getAlexaRank()
  81. {
  82. $xml = @simplexml_load_string($this->getPage('http://data.alexa.com/data?cli=10&url=' . urlencode($this->url)));
  83. return $xml ? $xml->SD->POPULARITY['TEXT'] : '';
  84. }
  85. /**
  86. * Returns the number of Dmoz.org entries for the current url
  87. *
  88. * @return int
  89. */
  90. public function getDmoz()
  91. {
  92. $url = 'http://www.dmoz.org/search?q=' . urlencode($this->url);
  93. $data = $this->getPage($url);
  94. preg_match('#Open Directory Sites[^\(]+\([0-9]-[0-9]+ of ([0-9]+)\)#', $data, $p);
  95. if (!empty($p[1])) {
  96. return (int)$p[1];
  97. }
  98. return 0;
  99. }
  100. /**
  101. * Returns the number of pages google holds in it's index for the current url
  102. *
  103. * @return int
  104. */
  105. public function getIndexedPagesGoogle()
  106. {
  107. $url = 'http://www.google.com/search?hl=en&q=site%3A' . urlencode($this->url);
  108. $data = $this->getPage($url);
  109. if (preg_match('#([0-9\,]+) results#i', $data, $p)) {
  110. $indexedPages = (int)str_replace(',', '', $p[1]);
  111. return $indexedPages;
  112. }
  113. return 0;
  114. }
  115. /**
  116. * Returns the number of pages bing holds in it's index for the current url
  117. *
  118. * @return int
  119. */
  120. public function getIndexedPagesBing()
  121. {
  122. $url = 'http://www.bing.com/search?mkt=en-US&q=site%3A' . urlencode($this->url);
  123. $data = $this->getPage($url);
  124. if (preg_match('#([0-9\,]+) results#i', $data, $p)) {
  125. return (int)str_replace(',', '', $p[1]);
  126. }
  127. return 0;
  128. }
  129. /**
  130. * Returns the domain age for the current url
  131. *
  132. * @return int
  133. */
  134. public function getAge()
  135. {
  136. $ageArchiveOrg = $this->_getAgeArchiveOrg();
  137. $ageWhoIs = $this->_getAgeWhoIs();
  138. $ageWhoisCom = $this->_getAgeWhoisCom();
  139. $ages = array();
  140. if ($ageArchiveOrg > 0) {
  141. $ages[] = $ageArchiveOrg;
  142. }
  143. if ($ageWhoIs > 0) {
  144. $ages[] = $ageWhoIs;
  145. }
  146. if ($ageWhoisCom > 0) {
  147. $ages[] = $ageWhoisCom;
  148. }
  149. if (count($ages) > 1) {
  150. $maxAge = min($ages);
  151. } else {
  152. $maxAge = array_shift($ages);
  153. }
  154. if ($maxAge) {
  155. return MetricsFormatter::getPrettyTimeFromSeconds(time() - $maxAge);
  156. }
  157. return false;
  158. }
  159. /**
  160. * Returns the number backlinks that link to the current site.
  161. *
  162. * @return int
  163. */
  164. public function getExternalBacklinkCount()
  165. {
  166. try {
  167. $majesticInfo = $this->getMajesticInfo();
  168. return $majesticInfo['backlink_count'];
  169. } catch (Exception $e) {
  170. Log::info($e);
  171. return 0;
  172. }
  173. }
  174. /**
  175. * Returns the number of referrer domains that link to the current site.
  176. *
  177. * @return int
  178. */
  179. public function getReferrerDomainCount()
  180. {
  181. try {
  182. $majesticInfo = $this->getMajesticInfo();
  183. return $majesticInfo['referrer_domains_count'];
  184. } catch (Exception $e) {
  185. Log::info($e);
  186. return 0;
  187. }
  188. }
  189. /**
  190. * Returns the domain age archive.org lists for the current url
  191. *
  192. * @return int
  193. */
  194. protected function _getAgeArchiveOrg()
  195. {
  196. $url = str_replace('www.', '', $this->url);
  197. $data = @$this->getPage('http://wayback.archive.org/web/*/' . urlencode($url));
  198. preg_match('#<a href=\"([^>]*)' . preg_quote($url) . '/\">([^<]*)<\/a>#', $data, $p);
  199. if (!empty($p[2])) {
  200. $value = strtotime($p[2]);
  201. if ($value === false) {
  202. return 0;
  203. }
  204. return $value;
  205. }
  206. return 0;
  207. }
  208. /**
  209. * Returns the domain age who.is lists for the current url
  210. *
  211. * @return int
  212. */
  213. protected function _getAgeWhoIs()
  214. {
  215. $url = preg_replace('/^www\./', '', $this->url);
  216. $url = 'http://www.who.is/whois/' . urlencode($url);
  217. $data = $this->getPage($url);
  218. preg_match('#(?:Creation Date|Created On|Registered on)\.*:\s*([ \ta-z0-9\/\-:\.]+)#si', $data, $p);
  219. if (!empty($p[1])) {
  220. $value = strtotime(trim($p[1]));
  221. if ($value === false) {
  222. return 0;
  223. }
  224. return $value;
  225. }
  226. return 0;
  227. }
  228. /**
  229. * Returns the domain age whois.com lists for the current url
  230. *
  231. * @return int
  232. */
  233. protected function _getAgeWhoisCom()
  234. {
  235. $url = preg_replace('/^www\./', '', $this->url);
  236. $url = 'http://www.whois.com/whois/' . urlencode($url);
  237. $data = $this->getPage($url);
  238. preg_match('#(?:Creation Date|Created On):\s*([ \ta-z0-9\/\-:\.]+)#si', $data, $p);
  239. if (!empty($p[1])) {
  240. $value = strtotime(trim($p[1]));
  241. if ($value === false) {
  242. return 0;
  243. }
  244. return $value;
  245. }
  246. return 0;
  247. }
  248. /**
  249. * Convert numeric string to int
  250. *
  251. * @see getPageRank()
  252. *
  253. * @param string $Str
  254. * @param int $Check
  255. * @param int $Magic
  256. * @return int
  257. */
  258. private function StrToNum($Str, $Check, $Magic)
  259. {
  260. $Int32Unit = 4294967296; // 2^32
  261. $length = strlen($Str);
  262. for ($i = 0; $i < $length; $i++) {
  263. $Check *= $Magic;
  264. // If the float is beyond the boundaries of integer (usually +/- 2.15e+9 = 2^31),
  265. // the result of converting to integer is undefined
  266. // refer to http://www.php.net/manual/en/language.types.integer.php
  267. if ($Check >= $Int32Unit) {
  268. $Check = ($Check - $Int32Unit * (int)($Check / $Int32Unit));
  269. //if the check less than -2^31
  270. $Check = ($Check < -2147483648) ? ($Check + $Int32Unit) : $Check;
  271. }
  272. $Check += ord($Str{$i});
  273. }
  274. return $Check;
  275. }
  276. /**
  277. * Generate a hash for a url
  278. *
  279. * @see getPageRank()
  280. *
  281. * @param string $String
  282. * @return int
  283. */
  284. private function HashURL($String)
  285. {
  286. $Check1 = $this->StrToNum($String, 0x1505, 0x21);
  287. $Check2 = $this->StrToNum($String, 0, 0x1003F);
  288. $Check1 >>= 2;
  289. $Check1 = (($Check1 >> 4) & 0x3FFFFC0) | ($Check1 & 0x3F);
  290. $Check1 = (($Check1 >> 4) & 0x3FFC00) | ($Check1 & 0x3FF);
  291. $Check1 = (($Check1 >> 4) & 0x3C000) | ($Check1 & 0x3FFF);
  292. $T1 = (((($Check1 & 0x3C0) << 4) | ($Check1 & 0x3C)) << 2) | ($Check2 & 0xF0F);
  293. $T2 = (((($Check1 & 0xFFFFC000) << 4) | ($Check1 & 0x3C00)) << 0xA) | ($Check2 & 0xF0F0000);
  294. return ($T1 | $T2);
  295. }
  296. /**
  297. * Generate a checksum for the hash string
  298. *
  299. * @see getPageRank()
  300. *
  301. * @param int $Hashnum
  302. * @return string
  303. */
  304. private function CheckHash($Hashnum)
  305. {
  306. $CheckByte = 0;
  307. $Flag = 0;
  308. $HashStr = sprintf('%u', $Hashnum);
  309. $length = strlen($HashStr);
  310. for ($i = $length - 1; $i >= 0; $i--) {
  311. $Re = $HashStr{$i};
  312. if (1 === ($Flag % 2)) {
  313. $Re += $Re;
  314. $Re = (int)($Re / 10) + ($Re % 10);
  315. }
  316. $CheckByte += $Re;
  317. $Flag++;
  318. }
  319. $CheckByte %= 10;
  320. if (0 !== $CheckByte) {
  321. $CheckByte = 10 - $CheckByte;
  322. if (1 === ($Flag % 2)) {
  323. if (1 === ($CheckByte % 2)) {
  324. $CheckByte += 9;
  325. }
  326. $CheckByte >>= 1;
  327. }
  328. }
  329. return '7' . $CheckByte . $HashStr;
  330. }
  331. private function getMajesticInfo()
  332. {
  333. if ($this->majesticInfo === null) {
  334. $client = new MajesticClient();
  335. $this->majesticInfo = $client->getBacklinkStats($this->url);
  336. }
  337. return $this->majesticInfo;
  338. }
  339. }