PageRenderTime 57ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/Zebra_cURL.php

https://github.com/n4sss/Zebra_cURL
PHP | 1870 lines | 610 code | 211 blank | 1049 comment | 99 complexity | 4dff7500363cdcdccc5aefefcc9b9916 MD5 | raw file
Possible License(s): LGPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. * Zebra_cURL, a high performance PHP cURL library
  4. *
  5. * Zebra_cURL is a high performance PHP library acting as a wrapper to PHP's {@link http://www.php.net/manual/en/book.curl.php libcurl library},
  6. * which not only allows the running of multiple requests at once asynchronously, in parallel, but also as soon as one
  7. * thread finishes it can be processed right away without having to wait for the other threads in the queue to finish.
  8. *
  9. * Also, each time a request is completed another one is added to the queue, thus keeping a constant number of threads
  10. * running at all times and eliminating wasted CPU cycles from busy waiting. This result is a faster and more efficient
  11. * way of processing large quantities of cURL requests (like fetching thousands of RSS feeds at once), drastically reducing
  12. * processing time.
  13. *
  14. * This script supports GET and POST request, basic downloads, downloads from FTP servers, HTTP Authentication, and
  15. * requests through proxy servers.
  16. *
  17. * For maximum efficiency downloads are streamed (bytes downloaded are directly written to disk) removing the unnecessary
  18. * strain from the server of having to read files into memory first, and then writing them to disk.
  19. *
  20. * Zebra_cURL requires the {@link http://www.php.net/manual/en/curl.installation.php PHP cURL extension} to be enabled.
  21. *
  22. * The code is heavily commented and generates no warnings/errors/notices when PHP's error reporting level is set to
  23. * {@link http://www.php.net/manual/en/function.error-reporting.php E_ALL}.
  24. *
  25. * Visit {@link http://stefangabos.ro/php-libraries/zebra-curl/} for more information.
  26. *
  27. * For more resources visit {@link http://stefangabos.ro/}
  28. *
  29. * @author Stefan Gabos <contact@stefangabos.ro>
  30. * @version 1.1.0 (last revision: March 22, 2014)
  31. * @copyright (c) 2014 Stefan Gabos
  32. * @license http://www.gnu.org/licenses/lgpl-3.0.txt GNU LESSER GENERAL PUBLIC LICENSE
  33. * @package Zebra_cURL
  34. */
  35. class Zebra_cURL {
  36. /**
  37. * The number of parallel, asynchronous, requests to be processed by the library at once.
  38. *
  39. * <code>
  40. * // allow execution of 30 simultaneous threads
  41. * $curl->threads = 30;
  42. * </code>
  43. *
  44. * Note that the library will keep this number of parallel threads running at all times (unless, of course, there
  45. * are less remaining URLs to process); it's doing this by starting a new thread as soon as another one finishes,
  46. * instead of waiting for each batch to finish, and so on, until there are no more URLs to process, and thus
  47. * greatly decreasing execution time.
  48. *
  49. * Default is 10.
  50. *
  51. * @var integer
  52. */
  53. public $threads;
  54. /**
  55. * Default value is TRUE, can be changed by giving the constructor parameter value false.
  56. *
  57. * Used by the {@link _process()} method to determine if we run response body through PHP's htmlentities function.
  58. *
  59. * @access private
  60. *
  61. */
  62. private $_htmlentities;
  63. /**
  64. * An associative array linked with all the resources, used to store original URL and file pointer resources, used
  65. * for streaming downloads.
  66. *
  67. * @var array
  68. *
  69. * @access private
  70. */
  71. private $_info;
  72. /**
  73. * Used by the {@link _process()} method to keep track of URLs that need to be processed.
  74. *
  75. * @access private
  76. */
  77. private $_queue;
  78. /**
  79. * The cURL multi handle
  80. *
  81. * @var resource
  82. *
  83. * @access private
  84. */
  85. private $_multi_handle;
  86. /**
  87. * Possible values of the "result" attribute in the object passed to the callback function.
  88. *
  89. * @var array
  90. *
  91. * @access private
  92. */
  93. private $_response_messages = array(
  94. 0 => 'CURLE_OK',
  95. 1 => 'CURLE_UNSUPPORTED_PROTOCOL',
  96. 2 => 'CURLE_FAILED_INIT',
  97. 3 => 'CURLE_URL_MALFORMAT',
  98. 4 => 'CURLE_URL_MALFORMAT_USER',
  99. 5 => 'CURLE_COULDNT_RESOLVE_PROXY',
  100. 6 => 'CURLE_COULDNT_RESOLVE_HOST',
  101. 7 => 'CURLE_COULDNT_CONNECT',
  102. 8 => 'CURLE_FTP_WEIRD_SERVER_REPLY',
  103. 9 => 'CURLE_REMOTE_ACCESS_DENIED',
  104. 11 => 'CURLE_FTP_WEIRD_PASS_REPLY',
  105. 13 => 'CURLE_FTP_WEIRD_PASV_REPLY',
  106. 14 => 'CURLE_FTP_WEIRD_227_FORMAT',
  107. 15 => 'CURLE_FTP_CANT_GET_HOST',
  108. 17 => 'CURLE_FTP_COULDNT_SET_TYPE',
  109. 18 => 'CURLE_PARTIAL_FILE',
  110. 19 => 'CURLE_FTP_COULDNT_RETR_FILE',
  111. 21 => 'CURLE_QUOTE_ERROR',
  112. 22 => 'CURLE_HTTP_RETURNED_ERROR',
  113. 23 => 'CURLE_WRITE_ERROR',
  114. 25 => 'CURLE_UPLOAD_FAILED',
  115. 26 => 'CURLE_READ_ERROR',
  116. 27 => 'CURLE_OUT_OF_MEMORY',
  117. 28 => 'CURLE_OPERATION_TIMEDOUT',
  118. 30 => 'CURLE_FTP_PORT_FAILED',
  119. 31 => 'CURLE_FTP_COULDNT_USE_REST',
  120. 33 => 'CURLE_RANGE_ERROR',
  121. 34 => 'CURLE_HTTP_POST_ERROR',
  122. 35 => 'CURLE_SSL_CONNECT_ERROR',
  123. 36 => 'CURLE_BAD_DOWNLOAD_RESUME',
  124. 37 => 'CURLE_FILE_COULDNT_READ_FILE',
  125. 38 => 'CURLE_LDAP_CANNOT_BIND',
  126. 39 => 'CURLE_LDAP_SEARCH_FAILED',
  127. 41 => 'CURLE_FUNCTION_NOT_FOUND',
  128. 42 => 'CURLE_ABORTED_BY_CALLBACK',
  129. 43 => 'CURLE_BAD_FUNCTION_ARGUMENT',
  130. 45 => 'CURLE_INTERFACE_FAILED',
  131. 47 => 'CURLE_TOO_MANY_REDIRECTS',
  132. 48 => 'CURLE_UNKNOWN_TELNET_OPTION',
  133. 49 => 'CURLE_TELNET_OPTION_SYNTAX',
  134. 51 => 'CURLE_PEER_FAILED_VERIFICATION',
  135. 52 => 'CURLE_GOT_NOTHING',
  136. 53 => 'CURLE_SSL_ENGINE_NOTFOUND',
  137. 54 => 'CURLE_SSL_ENGINE_SETFAILED',
  138. 55 => 'CURLE_SEND_ERROR',
  139. 56 => 'CURLE_RECV_ERROR',
  140. 58 => 'CURLE_SSL_CERTPROBLEM',
  141. 59 => 'CURLE_SSL_CIPHER',
  142. 60 => 'CURLE_SSL_CACERT',
  143. 61 => 'CURLE_BAD_CONTENT_ENCODING',
  144. 62 => 'CURLE_LDAP_INVALID_URL',
  145. 63 => 'CURLE_FILESIZE_EXCEEDED',
  146. 64 => 'CURLE_USE_SSL_FAILED',
  147. 65 => 'CURLE_SEND_FAIL_REWIND',
  148. 66 => 'CURLE_SSL_ENGINE_INITFAILED',
  149. 67 => 'CURLE_LOGIN_DENIED',
  150. 68 => 'CURLE_TFTP_NOTFOUND',
  151. 69 => 'CURLE_TFTP_PERM',
  152. 70 => 'CURLE_REMOTE_DISK_FULL',
  153. 71 => 'CURLE_TFTP_ILLEGAL',
  154. 72 => 'CURLE_TFTP_UNKNOWNID',
  155. 73 => 'CURLE_REMOTE_FILE_EXISTS',
  156. 74 => 'CURLE_TFTP_NOSUCHUSER',
  157. 75 => 'CURLE_CONV_FAILED',
  158. 76 => 'CURLE_CONV_REQD',
  159. 77 => 'CURLE_SSL_CACERT_BADFILE',
  160. 78 => 'CURLE_REMOTE_FILE_NOT_FOUND',
  161. 79 => 'CURLE_SSH',
  162. 80 => 'CURLE_SSL_SHUTDOWN_FAILED',
  163. 81 => 'CURLE_AGAIN',
  164. 82 => 'CURLE_SSL_CRL_BADFILE',
  165. 83 => 'CURLE_SSL_ISSUER_ERROR',
  166. 84 => 'CURLE_FTP_PRET_FAILED',
  167. 84 => 'CURLE_FTP_PRET_FAILED',
  168. 85 => 'CURLE_RTSP_CSEQ_ERROR',
  169. 86 => 'CURLE_RTSP_SESSION_ERROR',
  170. 87 => 'CURLE_FTP_BAD_FILE_LIST',
  171. 88 => 'CURLE_CHUNK_FAILED',
  172. );
  173. /**
  174. * Constructor of the class.
  175. *
  176. * Below is the list of default options set for each request, unless these options are specifically changed by one
  177. * of the methods or via the {@link option()} method:
  178. *
  179. * - <b>CURLINFO_HEADER_OUT</b> - <b>TRUE</b>; get the last request header; if set to FALSE the "last_request"
  180. * entry of the "headers" attribute of the object given as argument to the
  181. * callback function, will be an empty string; <i>you should leave this
  182. * unaltered!</i>;
  183. *
  184. * - <b>CURLOPT_AUTOREFERER</b> - <b>TRUE</b>; automatically set the <i>Referer:</i> field in requests
  185. * where it follows a <i>Location:</i> redirect;
  186. *
  187. * - <b>CURLOPT_COOKIEFILE</b> - <b>empty string</b>; no cookies are loaded, but cookie handling is still
  188. * enabled
  189. *
  190. * - <b>CURLOPT_CONNECTTIMEOUT</b> - <b>10</b>; the number of seconds to wait while trying to connect. use 0
  191. * to wait indefinitely;
  192. *
  193. * - <b>CURLOPT_FOLLOWLOCATION</b> - <b>TRUE</b>; automatically follow any <i>Location:</i> header that the
  194. * server sends as part of the HTTP header (note this is recursive, PHP will
  195. * follow as many <i>Location:</i> headers as specified by the value of
  196. * CURLOPT_MAXREDIRS - see below);
  197. *
  198. * - <b>CURLOPT_HEADER</b> - <b>TRUE</b>; get the response header(s); if set to FALSE the "responses"
  199. * entry of the "headers" attribute of the object given as argument to the
  200. * callback function, will be an empty string; <i>you should leave this
  201. * unaltered!</i>;
  202. *
  203. * - <b>CURLOPT_MAXREDIRS</b> - <b>50</b>; the maximum amount of HTTP redirections to follow; used
  204. * together with CURLOPT_FOLLOWLOCATION;
  205. *
  206. * - <b>CURLOPT_RETURNTRANSFER</b> - <b>TRUE</b>; return the transfer's body as a string instead of outputting
  207. * it directly; if set to FALSE the "body" attribute of the object given as
  208. * argument to a callback function will be an empty string; <b>this will
  209. * always be TRUE and cannot be changed!</b>;
  210. *
  211. * - <b>CURLOPT_SSL_VERIFYHOST</b> - <b>2</b>; check the existence of a common name in the SSL peer certificate
  212. * (for when connecting to HTTPS), and that it matches with the provided
  213. * hostname; see also {@link ssl()};
  214. *
  215. * - <b>CURLOPT_SSL_VERIFYPEER</b> - <b>FALSE</b>; stop cURL from verifying the peer's certificate (which
  216. * would most likely cause the request to fail). see also {@link ssl()};
  217. *
  218. * - <b>CURLOPT_TIMEOUT</b> - <b>10</b>; the maximum number of seconds to allow cURL functions to
  219. * execute;
  220. *
  221. * - <b>CURLOPT_USERAGENT</b> - A (slightly) random user agent (Internet Explorer 9 or 10, on Windows
  222. * Vista, 7 or 8, with other extra strings). Some web services will not
  223. * respond unless a valid user-agent string is provided
  224. *
  225. * @param boolean $htmlentities Instructs the script whether the response body returned by the {@link get()}
  226. * and {@link post()} methods should be run through PHP's
  227. * {@link http://php.net/manual/en/function.htmlentities.php htmlentities()}
  228. * function.
  229. *
  230. * @return void
  231. */
  232. function __construct($htmlentities = true)
  233. {
  234. // if the cURL extension is not available, trigger an error and stop execution
  235. if (!extension_loaded('curl')) trigger_error('php_curl extension is not loaded!', E_USER_ERROR);
  236. // set defaults for accessing HTTPS servers
  237. $this->ssl();
  238. // initialize some internal variables
  239. $this->_multi_handle = false;
  240. $this->_info = array();
  241. // caching is disabled by default
  242. $this->cache(false);
  243. // the default number of parallel, asynchronous, requests to be processed by the library at once.
  244. $this->threads = 10;
  245. // by default, run htmlentities() on the response body
  246. $this->_htmlentities = $htmlentities;
  247. }
  248. /**
  249. * Use this method to enable caching for {@link get() get} and {@link header() header} requests.
  250. *
  251. * <i>Caching is only used for {@link get() get} and {@link header() header} requests, and will be ignored for other
  252. * request types even if it is enabled!</i>
  253. *
  254. * <i>Caching is disabled by default!</i>
  255. *
  256. * <code>
  257. * // the callback function to be executed for each and every
  258. * // request, as soon as a request finishes
  259. * // the callback function receives as argument an object with 4 properties
  260. * // (info, header, body and response)
  261. * function mycallback($result) {
  262. *
  263. * // everything went well
  264. * if ($result->response[1] == CURLE_OK) {
  265. *
  266. * // see all the returned data
  267. * print_r('<pre>');
  268. * print_r($result);
  269. *
  270. * // something went wrong
  271. * // ($result still contains all data that could be gathered)
  272. * } else die('An error occurred: ' . $result->response[1]);
  273. *
  274. * }
  275. *
  276. * // include the Zebra_cURL library
  277. * require 'path/to/Zebra_cURL';
  278. *
  279. * // instantiate the Zebra_cURL object
  280. * $curl = new Zebra_cURL();
  281. *
  282. * // cache results in the "cache" folder and for 86400 seconds (24 hours)
  283. * $curl->cache('cache', 86400);
  284. *
  285. * // let's fetch the RSS feeds of some popular websites
  286. * // execute the "mycallback" function for each request, as soon as it finishes
  287. * $curl->get(array(
  288. * 'http://feeds.feedburner.com/alistapart/main',
  289. * 'http://feeds.feedburner.com/TechCrunch',
  290. * 'http://feeds.mashable.com/mashable',
  291. * ), 'mycallback')
  292. * </code>
  293. *
  294. * @param string $path The path where the cache files to be stored.
  295. *
  296. * Setting this to FALSE will disable caching.
  297. *
  298. * <i>Unless set to FALSE this must point to a writable directory or an error will
  299. * be triggered!</i>
  300. *
  301. * @param integer $lifetime (Optional) The number of seconds after which cache will be considered as expired.
  302. *
  303. * Default is 3600.
  304. *
  305. * @param boolean $compress (Optional) If set to TRUE, cache files will be
  306. * {@link http://php.net/manual/ro/function.gzcompress.php gzcompress}-ed so that
  307. * they occupy less disk space.
  308. *
  309. * Default is TRUE.
  310. *
  311. * @param octal $chmod (Optional) The file system permissions to be set for newly created cache files.
  312. *
  313. * I suggest using the value "0755" (without the quotes) but, if you know what you
  314. * are doing, here is how you can calculate the permission levels:
  315. *
  316. * - 400 Owner Read
  317. * - 200 Owner Write
  318. * - 100 Owner Execute
  319. * - 40 Group Read
  320. * - 20 Group Write
  321. * - 10 Group Execute
  322. * - 4 Global Read
  323. * - 2 Global Write
  324. * - 1 Global Execute
  325. *
  326. * Default is "0755" (without the quotes).
  327. *
  328. * @return null
  329. */
  330. public function cache($path, $lifetime = 3600, $compress = true, $chmod = 0755)
  331. {
  332. // if we have to enable caching
  333. if ($path != false)
  334. // store cache-related properties
  335. $this->cache = array(
  336. 'path' => $path,
  337. 'lifetime' => $lifetime,
  338. 'chmod' => $chmod,
  339. 'compress' => $compress,
  340. );
  341. // if we have to disable caching, disable it
  342. else $this->cache = false;
  343. }
  344. /**
  345. * Sets the path and name of the file to save to / retrieve cookies from, for each accessed URL. (cookie name/data
  346. * will be stored in this file on a per-domain basis). Important when cookies need to stored/restored to maintain
  347. * status/session of the request(s) made to the same domain(s).
  348. *
  349. * This method will automatically set the <b>CURLOPT_COOKIEJAR</b> and <b>CURLOPT_COOKIEFILE</b> options.
  350. *
  351. * @param string $path The path to a file to save to / retrieve cookies from, for each accessed URL.
  352. *
  353. * If file does not exist the library will attempt to create it and if it is unable to
  354. * create it will trigger an error.
  355. *
  356. * @param boolean $keep (Optional) By default, the file to save to / retrieve cookies from is deleted when
  357. * script execution finishes. If you want the file to be preserved, set this argument to
  358. * TRUE.
  359. *
  360. * Default is FALSE.
  361. *
  362. * @return null
  363. */
  364. public function cookies($path, $keep = false)
  365. {
  366. // file does not exist
  367. if (!is_file($path)) {
  368. // attempt to create it
  369. if (!($handle = fopen($path, 'a')))
  370. // if file could not be created, trigger an error
  371. trigger_error('File "' . $path . '" for storing cookies could not be found nor could it automatically be created! Make sure either that the path to the file points to a writable directory, or create the file yourself and make it writable.', E_USER_ERROR);
  372. // if file could be create, release handle
  373. fclose($handle);
  374. }
  375. // set these options
  376. $this->option(array(
  377. CURLOPT_COOKIEJAR => $path,
  378. CURLOPT_COOKIEFILE => $path,
  379. ));
  380. }
  381. /**
  382. * Downloads one or more files from one or more URLs specified by the <i>$url</i> argument, saves the downloaded
  383. * files (with their original name) to the path specified by the <i>$destination_path</i>, and executes the callback
  384. * function specified by the <i>$callback</i> argument for each and every request, as soon as each request finishes.
  385. *
  386. * Downloads are streamed (bytes downloaded are directly written to disk) removing the unnecessary strain from your
  387. * server of reading files into memory first, and then writing them to disk.
  388. *
  389. * This method will automatically set the <b>CURLOPT_BINARYTRANSFER</b> option to TRUE, so you might want to change
  390. * this back to FALSE/0 or "unset" it using the {@link option()} method, before making a {@link get()}, {@link header()}
  391. * or {@link post()} request.
  392. *
  393. * <i>Files are downloaded preserving their name so you may run into trouble when trying to download more images
  394. * having the same name (either from the same, or different servers)!</i>
  395. *
  396. * <i>Multiple requests are made asynchronously, in parallel, and the callback function is called for each and every
  397. * request, as soon as each request finishes. The number of parallel requests to be made at once can be set through
  398. * the {@link threads} property.</i>
  399. *
  400. * <i>Note that in case of multiple URLs, requests may not finish in the same order as initiated!</i>
  401. *
  402. * <code>
  403. * // the callback function to be executed for each and every
  404. * // request, as soon as a request finishes
  405. * // the callback function receives as argument an object with 4 properties
  406. * // (info, header, body and response)
  407. * function mycallback($result) {
  408. *
  409. * // everything went well
  410. * if ($result->response[1] == CURLE_OK) {
  411. *
  412. * // see all the returned data
  413. * print_r('<pre>');
  414. * print_r($result);
  415. *
  416. * // something went wrong
  417. * // ($result still contains all data that could be gathered)
  418. * } else die('An error occured: ' . $result->response[1]);
  419. *
  420. * }
  421. *
  422. * // include the Zebra_cURL library
  423. * require 'path/to/Zebra_cURL';
  424. *
  425. * // instantiate the Zebra_cURL object
  426. * $curl = new Zebra_cURL();
  427. *
  428. * // download 2 images from 2 different websites, and
  429. * // execute the "mycallback" function for each request, as soon as it finishes
  430. * $curl->download(array(
  431. * 'http://www.somewebsite.com/images/alpha.jpg',
  432. * 'http://www.otherwebsite.com/images/omega.jpg',
  433. * ), 'destination/path/', 'mycallback');
  434. * </code>
  435. *
  436. * @param mixed $url A single or an array of URLs to process.
  437. *
  438. * @param string $destination_path The path to where to save the file(s) to.
  439. *
  440. * If path is not pointing to a directory or is not writable, the library will
  441. * trigger an error.
  442. *
  443. * @param mixed $callback (Optional) Callback function to be called as soon as a request finishes.
  444. *
  445. * May be given as a string representing a name of an existing function, as an
  446. * anonymous function created on the fly via {@link http://www.php.net/manual/ro/function.create-function.php
  447. * create_function} or, as of PHP 5.3.0, via a {@link http://www.php.net/manual/ro/function.create-function.php
  448. * closure}.
  449. *
  450. * The callback function receives as first argument <b>an object</b> with <b>4
  451. * properties</b> as described below, while any further arguments passed to the
  452. * {@link download} method will be passed as extra arguments to the callback function:
  453. *
  454. * - <b>info</b> - an associative array containing information about the
  455. * request that just finished, as returned by PHP's
  456. * {@link http://php.net/manual/en/function.curl-getinfo.php curl_getinfo()}
  457. * function; there's also an extra entry called <i>original_url</i>
  458. * because, as curl_getinfo() only returns information
  459. * about the <b>last</b> request, the original URL may
  460. * be lost otherwise.
  461. *
  462. * - <b>headers</b> - an associative array with 2 items:
  463. *
  464. * <b>- last_request</b> an array with a single entry
  465. * containing the request headers generated by <i>the
  466. * last request</i>; so, remember, if there are redirects
  467. * involved, there will be more requests made, but only
  468. * information from the last one will be available; if
  469. * explicitly disabled via the {@link option()} method
  470. * by setting <b>CURLINFO_HEADER_OUT</b> to 0 or FALSE,
  471. * this will be an empty string;
  472. *
  473. * <b>- responses</b> an array with one or more entries
  474. * (if there are redirects involved) with the response
  475. * headers of all the requests made; if explicitly disabled
  476. * via the {@link option()} method by setting
  477. * <b>CURLOPT_HEADER</b> to 0 or FALSE, this will be an
  478. * empty string;
  479. *
  480. * <i>Unless disabled, each entry in the headers' array
  481. * is an associative array in the form of property =>
  482. * value</i>
  483. *
  484. * - <b>body</b> - the response of the request (the content of the page
  485. * at the URL).
  486. *
  487. * If "body" is explicitly disabled via the {@link option()}
  488. * method by setting <b>CURLOPT_NOBODY</b> to 0 or FALSE,
  489. * this will be an empty string;
  490. *
  491. * - <b>response</b> - the response given by the cURL library as an array
  492. * with 2 entries: the first entry represents the result's
  493. * code, while the second is the textual representation
  494. * of the code; if the request was successful, these
  495. * values will be <i>array(0, CURLE_OK);</i> consult
  496. * {@link http://www.php.net/manual/en/function.curl-errno.php#103128
  497. * this list} to see the possible values of this property;
  498. *
  499. * <samp>If the callback function returns FALSE while {@link cache} is enabled, the library will not cache the
  500. * respective request, making it easy to retry failed requests without having to clear all cache.</samp>
  501. *
  502. * @return null
  503. */
  504. public function download($url, $destination_path, $callback = '')
  505. {
  506. // if destination path is not a directory or is not writable, trigger an error message
  507. if (!is_dir($destination_path) || !is_writable($destination_path)) trigger_error('"' . $destination_path . '" is not a valid path or is not writable!', E_USER_ERROR);
  508. // set download path
  509. $this->download_path = rtrim($destination_path, '/\\') . '/';
  510. // instruct the cURL library that it has to do a binary transfer
  511. $this->option(CURLOPT_BINARYTRANSFER, 1);
  512. // prior to PHP 5.3, func_get_args() cannot be used as a function parameter, so we need this intermediary step
  513. $arguments = func_get_args();
  514. // prepare the arguments to be passed to the callback function
  515. // (consisting from the first 3, plus any additional arguments passed to the "download" method)
  516. $arguments = array_merge(array($url, $callback), array_slice($arguments, 3));
  517. // process request(s)
  518. call_user_func_array(array($this, '_process'), $arguments);
  519. }
  520. /**
  521. * Works exactly like the {@link download()} method only that downloads are made from an FTP server.
  522. *
  523. * Downloads from an FTP server to which the connection is made using the given <i>$username</i> and <i>$password</i>
  524. * arguments, one or more files specified by the <i>$url</i> argument, saves the downloaded files (with their original
  525. * name) to the path specified by the <i>$destination_path</i>, and executes the callback function specified by the
  526. * <i>$callback</i> argument for each and every request, as soon as each request finishes.
  527. *
  528. * Downloads are streamed (bytes downloaded are directly written to disk) removing the unnecessary strain from your
  529. * server of reading files into memory first, and then writing them to disk.
  530. *
  531. * This method will automatically set the <b>CURLOPT_BINARYTRANSFER</b> option to TRUE, so you might want to change
  532. * this back to FALSE/0 or "unset" it using the {@link option()} method, before making a {@link get()}, {@link header()}
  533. * or {@link post()} request.
  534. *
  535. * <i>Files are downloaded preserving their name so you may run into trouble when trying to download more images
  536. * having the same name (either from the same, or different servers)!</i>
  537. *
  538. * <i>Multiple requests are made asynchronously, in parallel, and the callback function is called for each and every
  539. * request, as soon as each request finishes. The number of parallel requests to be made at once can be set through
  540. * the {@link threads} property.</i>
  541. *
  542. * <i>Note that in case of multiple URLs, requests may not finish in the same order as initiated!</i>
  543. *
  544. * <code>
  545. * // the callback function to be executed for each and every
  546. * // request, as soon as a request finishes
  547. * // the callback function receives as argument an object with 4 properties
  548. * // (info, header, body and response)
  549. * function mycallback($result) {
  550. *
  551. * // everything went well
  552. * if ($result->response[1] == CURLE_OK) {
  553. *
  554. * // see all the returned data
  555. * print_r('<pre>');
  556. * print_r($result);
  557. *
  558. * // something went wrong
  559. * // ($result still contains all data that could be gathered)
  560. * } else die('An error occured: ' . $result->response[1]);
  561. *
  562. * }
  563. *
  564. * // include the Zebra_cURL library
  565. * require 'path/to/Zebra_cURL';
  566. *
  567. * // instantiate the Zebra_cURL object
  568. * $curl = new Zebra_cURL();
  569. *
  570. * // connect to the FTP server using the given credential, download a file to a given location and
  571. * // execute the "mycallback" function for each request, as soon as it finishes
  572. * $curl->download('ftp://somefile.ext', 'destination/path/', 'username', 'password', 'mycallback');
  573. * </code>
  574. *
  575. * @param mixed $url A single or an array of URLs to process.
  576. *
  577. * @param string $destination_path The path to where to save the file(s) to.
  578. *
  579. * If path is not pointing to a directory or is not writable, the library will
  580. * trigger an error.
  581. *
  582. * @param string $username (Optional) The username to be used to connect to the FTP server (if required).
  583. *
  584. * @param string $password (Optional) The password to be used to connect to the FTP server (if required).
  585. *
  586. * @param mixed $callback (Optional) Callback function to be called as soon as a request finishes.
  587. *
  588. * May be given as a string representing a name of an existing function, as an
  589. * anonymous function created on the fly via {@link http://www.php.net/manual/ro/function.create-function.php
  590. * create_function} or, as of PHP 5.3.0, via a {@link http://www.php.net/manual/ro/function.create-function.php
  591. * closure}.
  592. *
  593. * The callback function receives as first argument <b>an object</b> with <b>4
  594. * properties</b> as described below, while any further arguments passed to the
  595. * {@link ftp_download} method will be passed as extra arguments to the callback function:
  596. *
  597. * - <b>info</b> - an associative array containing information about the
  598. * request that just finished, as returned by PHP's
  599. * {@link http://php.net/manual/en/function.curl-getinfo.php curl_getinfo()}
  600. * function;
  601. *
  602. * - <b>headers</b> - an associative array with 2 items:
  603. *
  604. * <b>- last_request</b> an array with a single entry
  605. * containing the request headers generated by <i>the
  606. * last request</i>; so, remember, if there are redirects
  607. * involved, there will be more requests made, but only
  608. * information from the last one will be available; if
  609. * explicitly disabled via the {@link option()} method
  610. * by setting <b>CURLINFO_HEADER_OUT</b> to 0 or FALSE,
  611. * this will be an empty string;
  612. *
  613. * <b>- responses</b> an array with one or more entries
  614. * (if there are redirects involved) with the response
  615. * headers of all the requests made; if explicitly disabled
  616. * via the {@link option()} method by setting
  617. * <b>CURLOPT_HEADER</b> to 0 or FALSE, this will be an
  618. * empty string;
  619. *
  620. * <i>Unless disabled, each entry in the headers' array
  621. * is an associative array in the form of property =>
  622. * value</i>
  623. *
  624. * - <b>body</b> - the response of the request (the content of the page
  625. * at the URL).
  626. *
  627. * If "body" is explicitly disabled via the {@link option()}
  628. * method by setting <b>CURLOPT_NOBODY</b> to 0 or FALSE,
  629. * this will be an empty string;
  630. *
  631. * - <b>response</b> - the response given by the cURL library as an array
  632. * with 2 entries: the first entry represents the result's
  633. * code, while the second is the textual representation
  634. * of the code; if the request was successful, these
  635. * values will be <i>array(0, CURLE_OK);</i> consult
  636. * {@link http://www.php.net/manual/en/function.curl-errno.php#103128
  637. * this list} to see the possible values of this property;
  638. *
  639. * <samp>If the callback function returns FALSE while {@link cache} is enabled, the library will not cache the
  640. * respective request, making it easy to retry failed requests without having to clear all cache.</samp>
  641. *
  642. * @return null
  643. */
  644. public function ftp_download($url, $destination_path, $username = '', $password = '', $callback = '')
  645. {
  646. // if he have at least an username, set username/password
  647. if ($username != '') $this->option(CURLOPT_USERPWD, $username . ':' . $password);
  648. // prior to PHP 5.3, func_get_args() cannot be used as a function parameter
  649. // so we need this intermediary step
  650. $arguments = func_get_args();
  651. // prepare the arguments to be passed to the "download" method
  652. // (consisting from the first 3, plus any additional arguments passed to the "ftp_download" method)
  653. $arguments = array_merge(array($url, $destination_path, $callback), array_slice($arguments, 5));
  654. // call the "download" method
  655. call_user_func_array(array($this, 'download'), $arguments);
  656. }
  657. /**
  658. * Performs an HTTP <b>GET</b> request to one or more URLs specified by the <i>$url</i> argument and executes the
  659. * callback function specified by the <i>$callback</i> argument for each and every request, as soon as each request
  660. * finishes.
  661. *
  662. * <i>Multiple requests are made asynchronously, in parallel, and the callback function is called for each and every
  663. * request, as soon as each request finishes. The number of parallel requests to be made at once can be set through
  664. * the {@link threads} property.</i>
  665. *
  666. * <i>Note that in case of multiple URLs, requests may not finish in the same order as initiated!</i>
  667. *
  668. * <code>
  669. * // the callback function to be executed for each and every
  670. * // request, as soon as a request finishes
  671. * // the callback function receives as argument an object with 4 properties
  672. * // (info, header, body and response)
  673. * function mycallback($result) {
  674. *
  675. * // everything went well
  676. * if ($result->response[1] == CURLE_OK) {
  677. *
  678. * // see all the returned data
  679. * print_r('<pre>');
  680. * print_r($result);
  681. *
  682. * // something went wrong
  683. * // ($result still contains all data that could be gathered)
  684. * } else die('An error occured: ' . $result->response[1]);
  685. *
  686. * }
  687. *
  688. * // include the Zebra_cURL library
  689. * require 'path/to/Zebra_cURL';
  690. *
  691. * // instantiate the Zebra_cURL object
  692. * $curl = new Zebra_cURL();
  693. *
  694. * // cache results in the "cache" folder and for 3600 seconds (one hour)
  695. * $curl->cache('cache', 3600);
  696. *
  697. * // let's fetch the RSS feeds of some popular websites
  698. * // execute the "mycallback" function for each request, as soon as it finishes
  699. * $curl->get(array(
  700. * 'http://feeds.feedburner.com/alistapart/main',
  701. * 'http://feeds.feedburner.com/TechCrunch',
  702. * 'http://feeds.mashable.com/mashable',
  703. * ), 'mycallback')
  704. * </code>
  705. *
  706. * @param mixed $url A single or an array of URLs to process.
  707. *
  708. * @param mixed $callback (Optional) Callback function to be called as soon as a request finishes.
  709. *
  710. * May be given as a string representing a name of an existing function, as an anonymous
  711. * function created on the fly via {@link http://www.php.net/manual/ro/function.create-function.php
  712. * create_function} or, as of PHP 5.3.0, via a {@link http://www.php.net/manual/ro/function.create-function.php
  713. * closure}.
  714. *
  715. * The callback function receives as first argument <b>an object</b> with <b>4 properties</b>
  716. * as described below, while any further arguments passed to the {@link get} method will
  717. * be passed as extra arguments to the callback function:
  718. *
  719. * - <b>info</b> - an associative array containing information about the request
  720. * that just finished, as returned by PHP's
  721. * {@link http://php.net/manual/en/function.curl-getinfo.php curl_getinfo()}
  722. * function;
  723. *
  724. * - <b>headers</b> - an associative array with 2 items:
  725. *
  726. * <b>- last_request</b> an array with a single entry containing
  727. * the request headers generated by <i>the last request</i>; so,
  728. * remember, if there are redirects involved, there will be more
  729. * requests made, but only information from the last one will be
  730. * available; if explicitly disabled via the {@link option()}
  731. * method by setting <b>CURLINFO_HEADER_OUT</b> to 0 or FALSE,
  732. * this will be an empty string;
  733. *
  734. * <b>- responses</b> an array with one or more entries (if there
  735. * are redirects involved) with the response headers of all the
  736. * requests made; if explicitly disabled via the {@link option()}
  737. * method by setting <b>CURLOPT_HEADER</b> to 0 or FALSE, this
  738. * will be an empty string;
  739. *
  740. * <i>Unless disabled, each entry in the headers' array is an
  741. * associative array in the form of property => value</i>
  742. *
  743. * - <b>body</b> - the response of the request (the content of the page at the
  744. * URL).
  745. *
  746. * Unless disabled via the {@link __construct() constructor}, all
  747. * applicable characters will be converted to HTML entities via
  748. * PHP's {@link http://php.net/manual/en/function.htmlentities.php htmlentities()}
  749. * function, so remember to use PHP's {@link http://www.php.net/manual/en/function.html-entity-decode.php html_entity_decode()}
  750. * function to do reverse this, if it's the case;
  751. *
  752. * If "body" is explicitly disabled via the {@link option()}
  753. * method by setting <b>CURLOPT_NOBODY</b> to 0 or FALSE, this
  754. * will be an empty string;
  755. *
  756. * - <b>response</b> - the response given by the cURL library as an array with 2
  757. * entries: the first entry represents the result's code, while
  758. * the second is the textual representation of the code; if the
  759. * request was successful, these values will be <i>array(0,
  760. * CURLE_OK);</i> consult {@link http://www.php.net/manual/en/function.curl-errno.php#103128
  761. * this list} to see the possible values of this property;
  762. *
  763. * <samp>If the callback function returns FALSE while {@link cache} is enabled, the library will not cache the
  764. * respective request, making it easy to retry failed requests without having to clear all cache.</samp>
  765. *
  766. * @return null
  767. */
  768. public function get($url, $callback = '')
  769. {
  770. // make sure we perform a GET request
  771. $this->option(CURLOPT_HTTPGET, 1);
  772. // prior to PHP 5.3, func_get_args() cannot be used as a function parameter
  773. // so we need this intermediary step
  774. $arguments = func_get_args();
  775. // process request(s)
  776. call_user_func_array(array($this, '_process'), $arguments);
  777. }
  778. /**
  779. * Works exactly like the {@link get()} method, the only difference being that this method will automatically set
  780. * the <b>CURLOPT_NOBODY</b> option to FALSE and thus the <i>body</i> property of the result will be an empty string.
  781. * Also, <b>CURLINFO_HEADER_OUT</b> and <b>CURLOPT_HEADER</b> will be set to TRUE and therefore header information
  782. * will be available.
  783. *
  784. * <i>Multiple requests are made asynchronously, in parallel, and the callback function is called for each and every
  785. * request, as soon as each request finishes. The number of parallel requests to be made at once can be set through
  786. * the {@link threads} property.</i>
  787. *
  788. * <i>Note that in case of multiple URLs, requests may not finish in the same order as initiated!</i>
  789. *
  790. * <code>
  791. * // the callback function to be executed for each and every
  792. * // request, as soon as a request finishes
  793. * // the callback function receives as argument an object with 4 properties
  794. * // (info, header, body and response)
  795. * function mycallback($result) {
  796. *
  797. * // everything went well
  798. * if ($result->response[1] == CURLE_OK) {
  799. *
  800. * // see all the returned data
  801. * print_r('<pre>');
  802. * print_r($result);
  803. *
  804. * // something went wrong
  805. * // ($result still contains all data that could be gathered)
  806. * } else die('An error occured: ' . $result->response[1]);
  807. *
  808. * }
  809. *
  810. * // include the Zebra_cURL library
  811. * require 'path/to/Zebra_cURL';
  812. *
  813. * // instantiate the Zebra_cURL object
  814. * $curl = new Zebra_cURL();
  815. *
  816. * // process given URLs execute the "mycallback" function for each
  817. * // request, as soon as it finishes
  818. * $curl->header('http://www.somewebsite.com', 'mycallback');
  819. * </code>
  820. *
  821. * @param mixed $url A single or an array of URLs to process.
  822. *
  823. * @param mixed $callback (Optional) Callback function to be called as soon as a request finishes.
  824. *
  825. * May be given as a string representing a name of an existing function, as an anonymous
  826. * function created on the fly via {@link http://www.php.net/manual/ro/function.create-function.php
  827. * create_function} or, as of PHP 5.3.0, via a {@link http://www.php.net/manual/ro/function.create-function.php
  828. * closure}.
  829. *
  830. * The callback function receives as first argument <b>an object</b> with <b>4 properties</b>
  831. * as described below, while any further arguments passed to the {@link header} method
  832. * will be passed as extra arguments to the callback function:
  833. *
  834. * - <b>info</b> - an associative array containing information about the request
  835. * that just finished, as returned by PHP's
  836. * {@link http://php.net/manual/en/function.curl-getinfo.php curl_getinfo()}
  837. * function;
  838. *
  839. * - <b>headers</b> - an associative array with 2 items:
  840. *
  841. * <b>- last_request</b> an array with a single entry containing
  842. * the request headers generated by <i>the last request</i>; so,
  843. * remember, if there are redirects involved, there will be more
  844. * requests made, but only information from the last one will be
  845. * available;
  846. *
  847. * <b>- responses</b> an array with one or more entries (if there
  848. * are redirects involved) with the response headers of all the
  849. * requests made;
  850. *
  851. * <i>Each entry in the headers' arra…

Large files files are truncated, but you can click here to view the full file