PageRenderTime 25ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/Crawler/Base.php

https://gitlab.com/hoanghung.dev/phunuvadoisong.com
PHP | 158 lines | 116 code | 28 blank | 14 comment | 26 complexity | cd3220749e1dbfe5127e70a1478fa551 MD5 | raw file
  1. <?php
  2. /**
  3. * Created by PhpStorm.
  4. * User: STEVEN
  5. * Date: 18/08/2016
  6. * Time: 12:29 SA
  7. */
  8. namespace Crawl;
  9. use Application\Admin\Models\News;
  10. class Base
  11. {
  12. public function cUrl($url, array $post_data = array(), $delete = false, $verbose = false, $ref_url = false, $cookie_location = false, $return_transfer = true)
  13. {
  14. $return_val = false;
  15. $pointer = curl_init();
  16. curl_setopt($pointer, CURLOPT_URL, $url);
  17. curl_setopt($pointer, CURLOPT_TIMEOUT, 40);
  18. curl_setopt($pointer, CURLOPT_RETURNTRANSFER, $return_transfer);
  19. curl_setopt($pointer, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.28 Safari/534.10");
  20. curl_setopt($pointer, CURLOPT_SSL_VERIFYHOST, false);
  21. curl_setopt($pointer, CURLOPT_SSL_VERIFYPEER, false);
  22. curl_setopt($pointer, CURLOPT_HEADER, false);
  23. curl_setopt($pointer, CURLOPT_FOLLOWLOCATION, true);
  24. curl_setopt($pointer, CURLOPT_AUTOREFERER, true);
  25. if ($cookie_location !== false) {
  26. curl_setopt($pointer, CURLOPT_COOKIEJAR, $cookie_location);
  27. curl_setopt($pointer, CURLOPT_COOKIEFILE, $cookie_location);
  28. curl_setopt($pointer, CURLOPT_COOKIE, session_name() . '=' . session_id());
  29. }
  30. if ($verbose !== false) {
  31. $verbose_pointer = fopen($verbose, 'w');
  32. curl_setopt($pointer, CURLOPT_VERBOSE, true);
  33. curl_setopt($pointer, CURLOPT_STDERR, $verbose_pointer);
  34. }
  35. if ($ref_url !== false) {
  36. curl_setopt($pointer, CURLOPT_REFERER, $ref_url);
  37. }
  38. if (count($post_data) > 0) {
  39. curl_setopt($pointer, CURLOPT_POST, true);
  40. curl_setopt($pointer, CURLOPT_POSTFIELDS, $post_data);
  41. }
  42. if ($delete !== false) {
  43. curl_setopt($pointer, CURLOPT_CUSTOMREQUEST, "DELETE");
  44. }
  45. $return_val = curl_exec($pointer);
  46. $http_code = curl_getinfo($pointer, CURLINFO_HTTP_CODE);
  47. if ($http_code == 404) {
  48. return false;
  49. }
  50. curl_close($pointer);
  51. unset($pointer);
  52. return $return_val;
  53. }
  54. function toSlug($text)
  55. {
  56. // replace non letter or digits by -
  57. $text = preg_replace('~[^\pL\d]+~u', '-', $text);
  58. // transliterate
  59. $text = iconv('utf-8', 'us-ascii//TRANSLIT', $text);
  60. // remove unwanted characters
  61. $text = preg_replace('~[^-\w]+~', '', $text);
  62. // trim
  63. $text = trim($text, '-');
  64. // remove duplicate -
  65. $text = preg_replace('~-+~', '-', $text);
  66. // lowercase
  67. $text = strtolower($text);
  68. if (empty($text)) {
  69. return 'n-a';
  70. }
  71. return $text;
  72. }
  73. function uploadImageURL($url, $directory, $filename)
  74. {
  75. $path = DIR_UPLOAD;
  76. $url = trim($url);
  77. if ($url && $this->checkUrl($url)) {
  78. $file = fopen($url, "rb");
  79. if ($file) {
  80. $valid_exts = array("jpg", "jpeg", "gif", "png"); // default image only extensions
  81. $ext = pathinfo(parse_url($url)['path'], PATHINFO_EXTENSION);
  82. if (in_array($ext, $valid_exts)) {
  83. $filename = $filename . '.' . $ext;
  84. if (!is_dir($path . $directory)) {
  85. mkdir($path . $directory, 0777, true);
  86. }
  87. $newfile = fopen($path . $directory . $filename, "w"); // creating new file on local server
  88. if ($newfile) {
  89. while (!feof($file)) {
  90. fwrite($newfile, fread($file, 1024 * 8), 1024 * 8); // write the file to the new directory at a rate of 8kb/sec. until we reach the end.
  91. }
  92. return $directory . $filename;
  93. }
  94. }
  95. }
  96. }
  97. }
  98. function checkExist($slug)
  99. {
  100. $newsModel = new News();
  101. $data = $newsModel->getOne('slug = :slug', array(':slug' => $slug), 'news_id');
  102. if (!empty($data)) return true; else return false;
  103. }
  104. public function writeLog($fileName, $content)
  105. {
  106. $dir = DIR_FOLDER . '/Crawler/log/';
  107. $file = $dir . $fileName . '.log';
  108. if ($fp = @fopen($file, "a")) {
  109. fwrite($fp, $content . "\r\n");
  110. fclose($fp);
  111. return true;
  112. } else {
  113. return false;
  114. }
  115. }
  116. public function checkUrl($url)
  117. {
  118. $handle = curl_init($url);
  119. curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
  120. /* Get the HTML or whatever is linked in $url. */
  121. $response = curl_exec($handle);
  122. /* Check for 404 (file not found). */
  123. $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
  124. if ($httpCode == 404 || $httpCode == 403 || $httpCode == 400) {
  125. return false;
  126. } else {
  127. return true;
  128. }
  129. }
  130. }