PageRenderTime 40ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/php/classes/LinkPreview.php

https://gitlab.com/alidzapp/Facebook-Link-Preview
PHP | 216 lines | 166 code | 42 blank | 8 comment | 61 complexity | 8b50da6426e623ea26f38231b136b4a9 MD5 | raw file
  1. <?php
  2. /**
  3. * Copyright (c) 2014 Leonardo Cardoso (http://leocardz.com)
  4. * Dual licensed under the MIT (http://www.opensource.org/licenses/mit-license.php)
  5. * and GPL (http://www.opensource.org/licenses/gpl-license.php) licenses.
  6. *
  7. * Version: 1.3.0
  8. */
  9. /** Important php5-curl must be installed and enabled */
  10. include_once "Media.php";
  11. include_once "Regex.php";
  12. include_once "SetUp.php";
  13. include_once "Url.php";
  14. include_once "Content.php";
  15. include_once "Json.php";
  16. class LinkPreview
  17. {
  18. function __construct()
  19. {
  20. }
  21. function joinAll($matching, $number, $url, $content)
  22. {
  23. for ($i = 0; $i < count($matching[$number]); $i++) {
  24. $imgSrc = $matching[$number][$i] . $matching[$number + 1][$i];
  25. $src = "";
  26. $pathCounter = substr_count($imgSrc, "../");
  27. if (!preg_match(Regex::$httpRegex, $imgSrc)) {
  28. $src = Url::getImageUrl($pathCounter, Url::canonicalLink($imgSrc, $url));
  29. }
  30. if ($src . $imgSrc != $url) {
  31. if ($src == "")
  32. array_push($content, $src . $imgSrc);
  33. else
  34. array_push($content, $src);
  35. }
  36. }
  37. return $content;
  38. }
  39. function crawl($text, $imageQuantity, $header)
  40. {
  41. if (preg_match(Regex::$urlRegex, $text, $match)) {
  42. $title = "";
  43. $description = "";
  44. $videoIframe = "";
  45. $video = "no";
  46. if (strpos($match[0], " ") === 0)
  47. $match[0] = "http://" . substr($match[0], 1);
  48. $finalUrl = $match[0];
  49. $pageUrl = str_replace("https://", "http://", $finalUrl);
  50. if (Content::isImage($pageUrl)) {
  51. $images = $pageUrl;
  52. } else {
  53. $urlData = $this->getPage($pageUrl);
  54. if (!$urlData["content"] && strpos($pageUrl, "//www.") === false) {
  55. if (strpos($pageUrl, "http://") !== false)
  56. $pageUrl = str_replace("http://", "http://www.", $pageUrl);
  57. elseif (strpos($pageUrl, "https://") !== false)
  58. $pageUrl = str_replace("https://", "https://www.", $pageUrl);
  59. $urlData = $this->getPage($pageUrl);
  60. }
  61. $pageUrl = $finalUrl = $urlData["url"];
  62. $raw = $urlData["content"];
  63. $header = $urlData["header"];
  64. $metaTags = Content::getMetaTags($raw);
  65. $tempTitle = Content::extendedTrim($metaTags["title"]);
  66. if ($tempTitle != "")
  67. $title = $tempTitle;
  68. if ($title == "") {
  69. if (preg_match(Regex::$titleRegex, str_replace("\n", " ", $raw), $matching))
  70. $title = $matching[2];
  71. }
  72. $tempDescription = Content::extendedTrim($metaTags["description"]);
  73. if ($tempDescription != "")
  74. $description = $tempDescription;
  75. else
  76. $description = Content::crawlCode($raw);
  77. $descriptionUnderstood = false;
  78. if ($description != "")
  79. $descriptionUnderstood = true;
  80. if (($descriptionUnderstood == false && strlen($title) > strlen($description) && !preg_match(Regex::$urlRegex, $description) && $description != "" && !preg_match('/[A-Z]/', $description)) || $title == $description) {
  81. $title = $description;
  82. $description = Content::crawlCode($raw);
  83. }
  84. if(Content::isJson($title)){
  85. $title = "";
  86. }
  87. if(Content::isJson($description)){
  88. $description = "";
  89. }
  90. $media = $this->getMedia($pageUrl);
  91. $images = count($media) == 0 ? Content::extendedTrim($metaTags["image"]) : $media[0];
  92. $videoIframe = $media[1];
  93. if ($images == "")
  94. $images = Content::getImages($raw, $pageUrl, $imageQuantity);
  95. if ($media != null && $media[0] != "" && $media[1] != "")
  96. $video = "yes";
  97. $title = Content::extendedTrim($title);
  98. $pageUrl = Content::extendedTrim($pageUrl);
  99. $description = Content::extendedTrim($description);
  100. $description = preg_replace(Regex::$scriptRegex, "", $description);
  101. }
  102. $finalLink = explode("&", $finalUrl);
  103. $finalLink = $finalLink[0];
  104. $description = strip_tags($description);
  105. $answer = array("title" => $title, "url" => $finalLink, "pageUrl" => $finalUrl, "canonicalUrl" => Url::canonicalPage($pageUrl), "description" => $description,
  106. "images" => $images, "video" => $video, "videoIframe" => $videoIframe);
  107. $result_json = Json::jsonSafe($answer, $header);
  108. $result_json_decoded = json_decode($result_json);
  109. $flagged = false;
  110. if (!isset($result_json_decoded->title)) {
  111. $title = utf8_encode($title);
  112. $flagged = true;
  113. }
  114. if (!isset($result_json_decoded->description)) {
  115. $description = utf8_encode($description);
  116. $flagged = true;
  117. }
  118. if ($flagged) {
  119. $answer = array("title" => $title, "url" => $finalLink, "pageUrl" => $finalUrl, "canonicalUrl" => Url::canonicalPage($pageUrl), "description" => $description,
  120. "images" => $images, "video" => $video, "videoIframe" => $videoIframe);
  121. return Json::jsonSafe($answer, $header);
  122. } else {
  123. return $result_json;
  124. }
  125. }
  126. return null;
  127. }
  128. function getPage($url)
  129. {
  130. $res = array();
  131. $options = array(CURLOPT_RETURNTRANSFER => true, // return web page
  132. CURLOPT_HEADER => false, // do not return headers
  133. CURLOPT_FOLLOWLOCATION => true, // follow redirects
  134. CURLOPT_USERAGENT => "leocardz", // who am i
  135. CURLOPT_AUTOREFERER => true, // set referer on redirect
  136. CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
  137. CURLOPT_TIMEOUT => 120, // timeout on response
  138. CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
  139. );
  140. $ch = curl_init($url);
  141. curl_setopt_array($ch, $options);
  142. $content = curl_exec($ch);
  143. $header = curl_getinfo($ch);
  144. curl_close($ch);
  145. $hrd = $header["content_type"];
  146. header("Content-Type: " . $hrd, true);
  147. $res['content'] = $content;
  148. $res['url'] = $header['url'];
  149. $res['header'] = $hrd;
  150. return $res;
  151. }
  152. function getMedia($pageUrl)
  153. {
  154. $media = array();
  155. if (strpos($pageUrl, "youtube.com") !== false) {
  156. $media = Media::mediaYoutube($pageUrl);
  157. } else if (strpos($pageUrl, "vimeo.com") !== false) {
  158. $media = Media::mediaVimeo($pageUrl);
  159. } else if (strpos($pageUrl, "vine.co") !== false) {
  160. $media = Media::mediaVine($pageUrl);
  161. } else if (strpos($pageUrl, "metacafe.com") !== false) {
  162. $media = Media::mediaMetacafe($pageUrl);
  163. } else if (strpos($pageUrl, "dailymotion.com") !== false) {
  164. $media = Media::mediaDailymotion($pageUrl);
  165. } else if (strpos($pageUrl, "collegehumor.com") !== false) {
  166. $media = Media::mediaCollegehumor($pageUrl);
  167. } else if (strpos($pageUrl, "blip.tv") !== false) {
  168. $media = Media::mediaBlip($pageUrl);
  169. } else if (strpos($pageUrl, "funnyordie.com") !== false) {
  170. $media = Media::mediaFunnyordie($pageUrl);
  171. }
  172. return $media;
  173. }
  174. }