/Crawler/Base.php
PHP | 158 lines | 116 code | 28 blank | 14 comment | 26 complexity | cd3220749e1dbfe5127e70a1478fa551 MD5 | raw file
- <?php
- /**
- * Created by PhpStorm.
- * User: STEVEN
- * Date: 18/08/2016
- * Time: 12:29 SA
- */
- namespace Crawl;
- use Application\Admin\Models\News;
- class Base
- {
- public function cUrl($url, array $post_data = array(), $delete = false, $verbose = false, $ref_url = false, $cookie_location = false, $return_transfer = true)
- {
- $return_val = false;
- $pointer = curl_init();
- curl_setopt($pointer, CURLOPT_URL, $url);
- curl_setopt($pointer, CURLOPT_TIMEOUT, 40);
- curl_setopt($pointer, CURLOPT_RETURNTRANSFER, $return_transfer);
- curl_setopt($pointer, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.28 Safari/534.10");
- curl_setopt($pointer, CURLOPT_SSL_VERIFYHOST, false);
- curl_setopt($pointer, CURLOPT_SSL_VERIFYPEER, false);
- curl_setopt($pointer, CURLOPT_HEADER, false);
- curl_setopt($pointer, CURLOPT_FOLLOWLOCATION, true);
- curl_setopt($pointer, CURLOPT_AUTOREFERER, true);
- if ($cookie_location !== false) {
- curl_setopt($pointer, CURLOPT_COOKIEJAR, $cookie_location);
- curl_setopt($pointer, CURLOPT_COOKIEFILE, $cookie_location);
- curl_setopt($pointer, CURLOPT_COOKIE, session_name() . '=' . session_id());
- }
- if ($verbose !== false) {
- $verbose_pointer = fopen($verbose, 'w');
- curl_setopt($pointer, CURLOPT_VERBOSE, true);
- curl_setopt($pointer, CURLOPT_STDERR, $verbose_pointer);
- }
- if ($ref_url !== false) {
- curl_setopt($pointer, CURLOPT_REFERER, $ref_url);
- }
- if (count($post_data) > 0) {
- curl_setopt($pointer, CURLOPT_POST, true);
- curl_setopt($pointer, CURLOPT_POSTFIELDS, $post_data);
- }
- if ($delete !== false) {
- curl_setopt($pointer, CURLOPT_CUSTOMREQUEST, "DELETE");
- }
- $return_val = curl_exec($pointer);
- $http_code = curl_getinfo($pointer, CURLINFO_HTTP_CODE);
- if ($http_code == 404) {
- return false;
- }
- curl_close($pointer);
- unset($pointer);
- return $return_val;
- }
- function toSlug($text)
- {
- // replace non letter or digits by -
- $text = preg_replace('~[^\pL\d]+~u', '-', $text);
- // transliterate
- $text = iconv('utf-8', 'us-ascii//TRANSLIT', $text);
- // remove unwanted characters
- $text = preg_replace('~[^-\w]+~', '', $text);
- // trim
- $text = trim($text, '-');
- // remove duplicate -
- $text = preg_replace('~-+~', '-', $text);
- // lowercase
- $text = strtolower($text);
- if (empty($text)) {
- return 'n-a';
- }
- return $text;
- }
- function uploadImageURL($url, $directory, $filename)
- {
- $path = DIR_UPLOAD;
- $url = trim($url);
- if ($url && $this->checkUrl($url)) {
- $file = fopen($url, "rb");
- if ($file) {
- $valid_exts = array("jpg", "jpeg", "gif", "png"); // default image only extensions
- $ext = pathinfo(parse_url($url)['path'], PATHINFO_EXTENSION);
- if (in_array($ext, $valid_exts)) {
- $filename = $filename . '.' . $ext;
- if (!is_dir($path . $directory)) {
- mkdir($path . $directory, 0777, true);
- }
- $newfile = fopen($path . $directory . $filename, "w"); // creating new file on local server
- if ($newfile) {
- while (!feof($file)) {
- fwrite($newfile, fread($file, 1024 * 8), 1024 * 8); // write the file to the new directory at a rate of 8kb/sec. until we reach the end.
- }
- return $directory . $filename;
- }
- }
- }
- }
- }
- function checkExist($slug)
- {
- $newsModel = new News();
- $data = $newsModel->getOne('slug = :slug', array(':slug' => $slug), 'news_id');
- if (!empty($data)) return true; else return false;
- }
- public function writeLog($fileName, $content)
- {
- $dir = DIR_FOLDER . '/Crawler/log/';
- $file = $dir . $fileName . '.log';
- if ($fp = @fopen($file, "a")) {
- fwrite($fp, $content . "\r\n");
- fclose($fp);
- return true;
- } else {
- return false;
- }
- }
- public function checkUrl($url)
- {
- $handle = curl_init($url);
- curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE);
- /* Get the HTML or whatever is linked in $url. */
- $response = curl_exec($handle);
- /* Check for 404 (file not found). */
- $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE);
- if ($httpCode == 404 || $httpCode == 403 || $httpCode == 400) {
- return false;
- } else {
- return true;
- }
- }
- }