PageRenderTime 61ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/Zebra_cURL.php

https://github.com/n4sss/Zebra_cURL
PHP | 1870 lines | 610 code | 211 blank | 1049 comment | 99 complexity | 4dff7500363cdcdccc5aefefcc9b9916 MD5 | raw file
Possible License(s): LGPL-3.0
  1. <?php
  2. /**
  3. * Zebra_cURL, a high performance PHP cURL library
  4. *
  5. * Zebra_cURL is a high performance PHP library acting as a wrapper to PHP's {@link http://www.php.net/manual/en/book.curl.php libcurl library},
  6. * which not only allows the running of multiple requests at once asynchronously, in parallel, but also as soon as one
  7. * thread finishes it can be processed right away without having to wait for the other threads in the queue to finish.
  8. *
  9. * Also, each time a request is completed another one is added to the queue, thus keeping a constant number of threads
  10. * running at all times and eliminating wasted CPU cycles from busy waiting. This result is a faster and more efficient
  11. * way of processing large quantities of cURL requests (like fetching thousands of RSS feeds at once), drastically reducing
  12. * processing time.
  13. *
  14. * This script supports GET and POST request, basic downloads, downloads from FTP servers, HTTP Authentication, and
  15. * requests through proxy servers.
  16. *
  17. * For maximum efficiency downloads are streamed (bytes downloaded are directly written to disk) removing the unnecessary
  18. * strain from the server of having to read files into memory first, and then writing them to disk.
  19. *
  20. * Zebra_cURL requires the {@link http://www.php.net/manual/en/curl.installation.php PHP cURL extension} to be enabled.
  21. *
  22. * The code is heavily commented and generates no warnings/errors/notices when PHP's error reporting level is set to
  23. * {@link http://www.php.net/manual/en/function.error-reporting.php E_ALL}.
  24. *
  25. * Visit {@link http://stefangabos.ro/php-libraries/zebra-curl/} for more information.
  26. *
  27. * For more resources visit {@link http://stefangabos.ro/}
  28. *
  29. * @author Stefan Gabos <contact@stefangabos.ro>
  30. * @version 1.1.0 (last revision: March 22, 2014)
  31. * @copyright (c) 2014 Stefan Gabos
  32. * @license http://www.gnu.org/licenses/lgpl-3.0.txt GNU LESSER GENERAL PUBLIC LICENSE
  33. * @package Zebra_cURL
  34. */
  35. class Zebra_cURL {
  36. /**
  37. * The number of parallel, asynchronous, requests to be processed by the library at once.
  38. *
  39. * <code>
  40. * // allow execution of 30 simultaneous threads
  41. * $curl->threads = 30;
  42. * </code>
  43. *
  44. * Note that the library will keep this number of parallel threads running at all times (unless, of course, there
  45. * are less remaining URLs to process); it's doing this by starting a new thread as soon as another one finishes,
  46. * instead of waiting for each batch to finish, and so on, until there are no more URLs to process, and thus
  47. * greatly decreasing execution time.
  48. *
  49. * Default is 10.
  50. *
  51. * @var integer
  52. */
  53. public $threads;
  54. /**
  55. * Default value is TRUE, can be changed by giving the constructor parameter value false.
  56. *
  57. * Used by the {@link _process()} method to determine if we run response body through PHP's htmlentities function.
  58. *
  59. * @access private
  60. *
  61. */
  62. private $_htmlentities;
  63. /**
  64. * An associative array linked with all the resources, used to store original URL and file pointer resources, used
  65. * for streaming downloads.
  66. *
  67. * @var array
  68. *
  69. * @access private
  70. */
  71. private $_info;
  72. /**
  73. * Used by the {@link _process()} method to keep track of URLs that need to be processed.
  74. *
  75. * @access private
  76. */
  77. private $_queue;
  78. /**
  79. * The cURL multi handle
  80. *
  81. * @var resource
  82. *
  83. * @access private
  84. */
  85. private $_multi_handle;
  86. /**
  87. * Possible values of the "result" attribute in the object passed to the callback function.
  88. *
  89. * @var array
  90. *
  91. * @access private
  92. */
  93. private $_response_messages = array(
  94. 0 => 'CURLE_OK',
  95. 1 => 'CURLE_UNSUPPORTED_PROTOCOL',
  96. 2 => 'CURLE_FAILED_INIT',
  97. 3 => 'CURLE_URL_MALFORMAT',
  98. 4 => 'CURLE_URL_MALFORMAT_USER',
  99. 5 => 'CURLE_COULDNT_RESOLVE_PROXY',
  100. 6 => 'CURLE_COULDNT_RESOLVE_HOST',
  101. 7 => 'CURLE_COULDNT_CONNECT',
  102. 8 => 'CURLE_FTP_WEIRD_SERVER_REPLY',
  103. 9 => 'CURLE_REMOTE_ACCESS_DENIED',
  104. 11 => 'CURLE_FTP_WEIRD_PASS_REPLY',
  105. 13 => 'CURLE_FTP_WEIRD_PASV_REPLY',
  106. 14 => 'CURLE_FTP_WEIRD_227_FORMAT',
  107. 15 => 'CURLE_FTP_CANT_GET_HOST',
  108. 17 => 'CURLE_FTP_COULDNT_SET_TYPE',
  109. 18 => 'CURLE_PARTIAL_FILE',
  110. 19 => 'CURLE_FTP_COULDNT_RETR_FILE',
  111. 21 => 'CURLE_QUOTE_ERROR',
  112. 22 => 'CURLE_HTTP_RETURNED_ERROR',
  113. 23 => 'CURLE_WRITE_ERROR',
  114. 25 => 'CURLE_UPLOAD_FAILED',
  115. 26 => 'CURLE_READ_ERROR',
  116. 27 => 'CURLE_OUT_OF_MEMORY',
  117. 28 => 'CURLE_OPERATION_TIMEDOUT',
  118. 30 => 'CURLE_FTP_PORT_FAILED',
  119. 31 => 'CURLE_FTP_COULDNT_USE_REST',
  120. 33 => 'CURLE_RANGE_ERROR',
  121. 34 => 'CURLE_HTTP_POST_ERROR',
  122. 35 => 'CURLE_SSL_CONNECT_ERROR',
  123. 36 => 'CURLE_BAD_DOWNLOAD_RESUME',
  124. 37 => 'CURLE_FILE_COULDNT_READ_FILE',
  125. 38 => 'CURLE_LDAP_CANNOT_BIND',
  126. 39 => 'CURLE_LDAP_SEARCH_FAILED',
  127. 41 => 'CURLE_FUNCTION_NOT_FOUND',
  128. 42 => 'CURLE_ABORTED_BY_CALLBACK',
  129. 43 => 'CURLE_BAD_FUNCTION_ARGUMENT',
  130. 45 => 'CURLE_INTERFACE_FAILED',
  131. 47 => 'CURLE_TOO_MANY_REDIRECTS',
  132. 48 => 'CURLE_UNKNOWN_TELNET_OPTION',
  133. 49 => 'CURLE_TELNET_OPTION_SYNTAX',
  134. 51 => 'CURLE_PEER_FAILED_VERIFICATION',
  135. 52 => 'CURLE_GOT_NOTHING',
  136. 53 => 'CURLE_SSL_ENGINE_NOTFOUND',
  137. 54 => 'CURLE_SSL_ENGINE_SETFAILED',
  138. 55 => 'CURLE_SEND_ERROR',
  139. 56 => 'CURLE_RECV_ERROR',
  140. 58 => 'CURLE_SSL_CERTPROBLEM',
  141. 59 => 'CURLE_SSL_CIPHER',
  142. 60 => 'CURLE_SSL_CACERT',
  143. 61 => 'CURLE_BAD_CONTENT_ENCODING',
  144. 62 => 'CURLE_LDAP_INVALID_URL',
  145. 63 => 'CURLE_FILESIZE_EXCEEDED',
  146. 64 => 'CURLE_USE_SSL_FAILED',
  147. 65 => 'CURLE_SEND_FAIL_REWIND',
  148. 66 => 'CURLE_SSL_ENGINE_INITFAILED',
  149. 67 => 'CURLE_LOGIN_DENIED',
  150. 68 => 'CURLE_TFTP_NOTFOUND',
  151. 69 => 'CURLE_TFTP_PERM',
  152. 70 => 'CURLE_REMOTE_DISK_FULL',
  153. 71 => 'CURLE_TFTP_ILLEGAL',
  154. 72 => 'CURLE_TFTP_UNKNOWNID',
  155. 73 => 'CURLE_REMOTE_FILE_EXISTS',
  156. 74 => 'CURLE_TFTP_NOSUCHUSER',
  157. 75 => 'CURLE_CONV_FAILED',
  158. 76 => 'CURLE_CONV_REQD',
  159. 77 => 'CURLE_SSL_CACERT_BADFILE',
  160. 78 => 'CURLE_REMOTE_FILE_NOT_FOUND',
  161. 79 => 'CURLE_SSH',
  162. 80 => 'CURLE_SSL_SHUTDOWN_FAILED',
  163. 81 => 'CURLE_AGAIN',
  164. 82 => 'CURLE_SSL_CRL_BADFILE',
  165. 83 => 'CURLE_SSL_ISSUER_ERROR',
  166. 84 => 'CURLE_FTP_PRET_FAILED',
  167. 84 => 'CURLE_FTP_PRET_FAILED',
  168. 85 => 'CURLE_RTSP_CSEQ_ERROR',
  169. 86 => 'CURLE_RTSP_SESSION_ERROR',
  170. 87 => 'CURLE_FTP_BAD_FILE_LIST',
  171. 88 => 'CURLE_CHUNK_FAILED',
  172. );
  173. /**
  174. * Constructor of the class.
  175. *
  176. * Below is the list of default options set for each request, unless these options are specifically changed by one
  177. * of the methods or via the {@link option()} method:
  178. *
  179. * - <b>CURLINFO_HEADER_OUT</b> - <b>TRUE</b>; get the last request header; if set to FALSE the "last_request"
  180. * entry of the "headers" attribute of the object given as argument to the
  181. * callback function, will be an empty string; <i>you should leave this
  182. * unaltered!</i>;
  183. *
  184. * - <b>CURLOPT_AUTOREFERER</b> - <b>TRUE</b>; automatically set the <i>Referer:</i> field in requests
  185. * where it follows a <i>Location:</i> redirect;
  186. *
  187. * - <b>CURLOPT_COOKIEFILE</b> - <b>empty string</b>; no cookies are loaded, but cookie handling is still
  188. * enabled
  189. *
  190. * - <b>CURLOPT_CONNECTTIMEOUT</b> - <b>10</b>; the number of seconds to wait while trying to connect. use 0
  191. * to wait indefinitely;
  192. *
  193. * - <b>CURLOPT_FOLLOWLOCATION</b> - <b>TRUE</b>; automatically follow any <i>Location:</i> header that the
  194. * server sends as part of the HTTP header (note this is recursive, PHP will
  195. * follow as many <i>Location:</i> headers as specified by the value of
  196. * CURLOPT_MAXREDIRS - see below);
  197. *
  198. * - <b>CURLOPT_HEADER</b> - <b>TRUE</b>; get the response header(s); if set to FALSE the "responses"
  199. * entry of the "headers" attribute of the object given as argument to the
  200. * callback function, will be an empty string; <i>you should leave this
  201. * unaltered!</i>;
  202. *
  203. * - <b>CURLOPT_MAXREDIRS</b> - <b>50</b>; the maximum amount of HTTP redirections to follow; used
  204. * together with CURLOPT_FOLLOWLOCATION;
  205. *
  206. * - <b>CURLOPT_RETURNTRANSFER</b> - <b>TRUE</b>; return the transfer's body as a string instead of outputting
  207. * it directly; if set to FALSE the "body" attribute of the object given as
  208. * argument to a callback function will be an empty string; <b>this will
  209. * always be TRUE and cannot be changed!</b>;
  210. *
  211. * - <b>CURLOPT_SSL_VERIFYHOST</b> - <b>2</b>; check the existence of a common name in the SSL peer certificate
  212. * (for when connecting to HTTPS), and that it matches with the provided
  213. * hostname; see also {@link ssl()};
  214. *
  215. * - <b>CURLOPT_SSL_VERIFYPEER</b> - <b>FALSE</b>; stop cURL from verifying the peer's certificate (which
  216. * would most likely cause the request to fail). see also {@link ssl()};
  217. *
  218. * - <b>CURLOPT_TIMEOUT</b> - <b>10</b>; the maximum number of seconds to allow cURL functions to
  219. * execute;
  220. *
  221. * - <b>CURLOPT_USERAGENT</b> - A (slightly) random user agent (Internet Explorer 9 or 10, on Windows
  222. * Vista, 7 or 8, with other extra strings). Some web services will not
  223. * respond unless a valid user-agent string is provided
  224. *
  225. * @param boolean $htmlentities Instructs the script whether the response body returned by the {@link get()}
  226. * and {@link post()} methods should be run through PHP's
  227. * {@link http://php.net/manual/en/function.htmlentities.php htmlentities()}
  228. * function.
  229. *
  230. * @return void
  231. */
  232. function __construct($htmlentities = true)
  233. {
  234. // if the cURL extension is not available, trigger an error and stop execution
  235. if (!extension_loaded('curl')) trigger_error('php_curl extension is not loaded!', E_USER_ERROR);
  236. // set defaults for accessing HTTPS servers
  237. $this->ssl();
  238. // initialize some internal variables
  239. $this->_multi_handle = false;
  240. $this->_info = array();
  241. // caching is disabled by default
  242. $this->cache(false);
  243. // the default number of parallel, asynchronous, requests to be processed by the library at once.
  244. $this->threads = 10;
  245. // by default, run htmlentities() on the response body
  246. $this->_htmlentities = $htmlentities;
  247. }
  248. /**
  249. * Use this method to enable caching for {@link get() get} and {@link header() header} requests.
  250. *
  251. * <i>Caching is only used for {@link get() get} and {@link header() header} requests, and will be ignored for other
  252. * request types even if it is enabled!</i>
  253. *
  254. * <i>Caching is disabled by default!</i>
  255. *
  256. * <code>
  257. * // the callback function to be executed for each and every
  258. * // request, as soon as a request finishes
  259. * // the callback function receives as argument an object with 4 properties
  260. * // (info, header, body and response)
  261. * function mycallback($result) {
  262. *
  263. * // everything went well
  264. * if ($result->response[1] == CURLE_OK) {
  265. *
  266. * // see all the returned data
  267. * print_r('<pre>');
  268. * print_r($result);
  269. *
  270. * // something went wrong
  271. * // ($result still contains all data that could be gathered)
  272. * } else die('An error occurred: ' . $result->response[1]);
  273. *
  274. * }
  275. *
  276. * // include the Zebra_cURL library
  277. * require 'path/to/Zebra_cURL';
  278. *
  279. * // instantiate the Zebra_cURL object
  280. * $curl = new Zebra_cURL();
  281. *
  282. * // cache results in the "cache" folder and for 86400 seconds (24 hours)
  283. * $curl->cache('cache', 86400);
  284. *
  285. * // let's fetch the RSS feeds of some popular websites
  286. * // execute the "mycallback" function for each request, as soon as it finishes
  287. * $curl->get(array(
  288. * 'http://feeds.feedburner.com/alistapart/main',
  289. * 'http://feeds.feedburner.com/TechCrunch',
  290. * 'http://feeds.mashable.com/mashable',
  291. * ), 'mycallback')
  292. * </code>
  293. *
  294. * @param string $path The path where the cache files to be stored.
  295. *
  296. * Setting this to FALSE will disable caching.
  297. *
  298. * <i>Unless set to FALSE this must point to a writable directory or an error will
  299. * be triggered!</i>
  300. *
  301. * @param integer $lifetime (Optional) The number of seconds after which cache will be considered as expired.
  302. *
  303. * Default is 3600.
  304. *
  305. * @param boolean $compress (Optional) If set to TRUE, cache files will be
  306. * {@link http://php.net/manual/ro/function.gzcompress.php gzcompress}-ed so that
  307. * they occupy less disk space.
  308. *
  309. * Default is TRUE.
  310. *
  311. * @param octal $chmod (Optional) The file system permissions to be set for newly created cache files.
  312. *
  313. * I suggest using the value "0755" (without the quotes) but, if you know what you
  314. * are doing, here is how you can calculate the permission levels:
  315. *
  316. * - 400 Owner Read
  317. * - 200 Owner Write
  318. * - 100 Owner Execute
  319. * - 40 Group Read
  320. * - 20 Group Write
  321. * - 10 Group Execute
  322. * - 4 Global Read
  323. * - 2 Global Write
  324. * - 1 Global Execute
  325. *
  326. * Default is "0755" (without the quotes).
  327. *
  328. * @return null
  329. */
  330. public function cache($path, $lifetime = 3600, $compress = true, $chmod = 0755)
  331. {
  332. // if we have to enable caching
  333. if ($path != false)
  334. // store cache-related properties
  335. $this->cache = array(
  336. 'path' => $path,
  337. 'lifetime' => $lifetime,
  338. 'chmod' => $chmod,
  339. 'compress' => $compress,
  340. );
  341. // if we have to disable caching, disable it
  342. else $this->cache = false;
  343. }
  344. /**
  345. * Sets the path and name of the file to save to / retrieve cookies from, for each accessed URL. (cookie name/data
  346. * will be stored in this file on a per-domain basis). Important when cookies need to stored/restored to maintain
  347. * status/session of the request(s) made to the same domain(s).
  348. *
  349. * This method will automatically set the <b>CURLOPT_COOKIEJAR</b> and <b>CURLOPT_COOKIEFILE</b> options.
  350. *
  351. * @param string $path The path to a file to save to / retrieve cookies from, for each accessed URL.
  352. *
  353. * If file does not exist the library will attempt to create it and if it is unable to
  354. * create it will trigger an error.
  355. *
  356. * @param boolean $keep (Optional) By default, the file to save to / retrieve cookies from is deleted when
  357. * script execution finishes. If you want the file to be preserved, set this argument to
  358. * TRUE.
  359. *
  360. * Default is FALSE.
  361. *
  362. * @return null
  363. */
  364. public function cookies($path, $keep = false)
  365. {
  366. // file does not exist
  367. if (!is_file($path)) {
  368. // attempt to create it
  369. if (!($handle = fopen($path, 'a')))
  370. // if file could not be created, trigger an error
  371. trigger_error('File "' . $path . '" for storing cookies could not be found nor could it automatically be created! Make sure either that the path to the file points to a writable directory, or create the file yourself and make it writable.', E_USER_ERROR);
  372. // if file could be create, release handle
  373. fclose($handle);
  374. }
  375. // set these options
  376. $this->option(array(
  377. CURLOPT_COOKIEJAR => $path,
  378. CURLOPT_COOKIEFILE => $path,
  379. ));
  380. }
  381. /**
  382. * Downloads one or more files from one or more URLs specified by the <i>$url</i> argument, saves the downloaded
  383. * files (with their original name) to the path specified by the <i>$destination_path</i>, and executes the callback
  384. * function specified by the <i>$callback</i> argument for each and every request, as soon as each request finishes.
  385. *
  386. * Downloads are streamed (bytes downloaded are directly written to disk) removing the unnecessary strain from your
  387. * server of reading files into memory first, and then writing them to disk.
  388. *
  389. * This method will automatically set the <b>CURLOPT_BINARYTRANSFER</b> option to TRUE, so you might want to change
  390. * this back to FALSE/0 or "unset" it using the {@link option()} method, before making a {@link get()}, {@link header()}
  391. * or {@link post()} request.
  392. *
  393. * <i>Files are downloaded preserving their name so you may run into trouble when trying to download more images
  394. * having the same name (either from the same, or different servers)!</i>
  395. *
  396. * <i>Multiple requests are made asynchronously, in parallel, and the callback function is called for each and every
  397. * request, as soon as each request finishes. The number of parallel requests to be made at once can be set through
  398. * the {@link threads} property.</i>
  399. *
  400. * <i>Note that in case of multiple URLs, requests may not finish in the same order as initiated!</i>
  401. *
  402. * <code>
  403. * // the callback function to be executed for each and every
  404. * // request, as soon as a request finishes
  405. * // the callback function receives as argument an object with 4 properties
  406. * // (info, header, body and response)
  407. * function mycallback($result) {
  408. *
  409. * // everything went well
  410. * if ($result->response[1] == CURLE_OK) {
  411. *
  412. * // see all the returned data
  413. * print_r('<pre>');
  414. * print_r($result);
  415. *
  416. * // something went wrong
  417. * // ($result still contains all data that could be gathered)
  418. * } else die('An error occured: ' . $result->response[1]);
  419. *
  420. * }
  421. *
  422. * // include the Zebra_cURL library
  423. * require 'path/to/Zebra_cURL';
  424. *
  425. * // instantiate the Zebra_cURL object
  426. * $curl = new Zebra_cURL();
  427. *
  428. * // download 2 images from 2 different websites, and
  429. * // execute the "mycallback" function for each request, as soon as it finishes
  430. * $curl->download(array(
  431. * 'http://www.somewebsite.com/images/alpha.jpg',
  432. * 'http://www.otherwebsite.com/images/omega.jpg',
  433. * ), 'destination/path/', 'mycallback');
  434. * </code>
  435. *
  436. * @param mixed $url A single or an array of URLs to process.
  437. *
  438. * @param string $destination_path The path to where to save the file(s) to.
  439. *
  440. * If path is not pointing to a directory or is not writable, the library will
  441. * trigger an error.
  442. *
  443. * @param mixed $callback (Optional) Callback function to be called as soon as a request finishes.
  444. *
  445. * May be given as a string representing a name of an existing function, as an
  446. * anonymous function created on the fly via {@link http://www.php.net/manual/ro/function.create-function.php
  447. * create_function} or, as of PHP 5.3.0, via a {@link http://www.php.net/manual/ro/function.create-function.php
  448. * closure}.
  449. *
  450. * The callback function receives as first argument <b>an object</b> with <b>4
  451. * properties</b> as described below, while any further arguments passed to the
  452. * {@link download} method will be passed as extra arguments to the callback function:
  453. *
  454. * - <b>info</b> - an associative array containing information about the
  455. * request that just finished, as returned by PHP's
  456. * {@link http://php.net/manual/en/function.curl-getinfo.php curl_getinfo()}
  457. * function; there's also an extra entry called <i>original_url</i>
  458. * because, as curl_getinfo() only returns information
  459. * about the <b>last</b> request, the original URL may
  460. * be lost otherwise.
  461. *
  462. * - <b>headers</b> - an associative array with 2 items:
  463. *
  464. * <b>- last_request</b> an array with a single entry
  465. * containing the request headers generated by <i>the
  466. * last request</i>; so, remember, if there are redirects
  467. * involved, there will be more requests made, but only
  468. * information from the last one will be available; if
  469. * explicitly disabled via the {@link option()} method
  470. * by setting <b>CURLINFO_HEADER_OUT</b> to 0 or FALSE,
  471. * this will be an empty string;
  472. *
  473. * <b>- responses</b> an array with one or more entries
  474. * (if there are redirects involved) with the response
  475. * headers of all the requests made; if explicitly disabled
  476. * via the {@link option()} method by setting
  477. * <b>CURLOPT_HEADER</b> to 0 or FALSE, this will be an
  478. * empty string;
  479. *
  480. * <i>Unless disabled, each entry in the headers' array
  481. * is an associative array in the form of property =>
  482. * value</i>
  483. *
  484. * - <b>body</b> - the response of the request (the content of the page
  485. * at the URL).
  486. *
  487. * If "body" is explicitly disabled via the {@link option()}
  488. * method by setting <b>CURLOPT_NOBODY</b> to 0 or FALSE,
  489. * this will be an empty string;
  490. *
  491. * - <b>response</b> - the response given by the cURL library as an array
  492. * with 2 entries: the first entry represents the result's
  493. * code, while the second is the textual representation
  494. * of the code; if the request was successful, these
  495. * values will be <i>array(0, CURLE_OK);</i> consult
  496. * {@link http://www.php.net/manual/en/function.curl-errno.php#103128
  497. * this list} to see the possible values of this property;
  498. *
  499. * <samp>If the callback function returns FALSE while {@link cache} is enabled, the library will not cache the
  500. * respective request, making it easy to retry failed requests without having to clear all cache.</samp>
  501. *
  502. * @return null
  503. */
  504. public function download($url, $destination_path, $callback = '')
  505. {
  506. // if destination path is not a directory or is not writable, trigger an error message
  507. if (!is_dir($destination_path) || !is_writable($destination_path)) trigger_error('"' . $destination_path . '" is not a valid path or is not writable!', E_USER_ERROR);
  508. // set download path
  509. $this->download_path = rtrim($destination_path, '/\\') . '/';
  510. // instruct the cURL library that it has to do a binary transfer
  511. $this->option(CURLOPT_BINARYTRANSFER, 1);
  512. // prior to PHP 5.3, func_get_args() cannot be used as a function parameter, so we need this intermediary step
  513. $arguments = func_get_args();
  514. // prepare the arguments to be passed to the callback function
  515. // (consisting from the first 3, plus any additional arguments passed to the "download" method)
  516. $arguments = array_merge(array($url, $callback), array_slice($arguments, 3));
  517. // process request(s)
  518. call_user_func_array(array($this, '_process'), $arguments);
  519. }
  520. /**
  521. * Works exactly like the {@link download()} method only that downloads are made from an FTP server.
  522. *
  523. * Downloads from an FTP server to which the connection is made using the given <i>$username</i> and <i>$password</i>
  524. * arguments, one or more files specified by the <i>$url</i> argument, saves the downloaded files (with their original
  525. * name) to the path specified by the <i>$destination_path</i>, and executes the callback function specified by the
  526. * <i>$callback</i> argument for each and every request, as soon as each request finishes.
  527. *
  528. * Downloads are streamed (bytes downloaded are directly written to disk) removing the unnecessary strain from your
  529. * server of reading files into memory first, and then writing them to disk.
  530. *
  531. * This method will automatically set the <b>CURLOPT_BINARYTRANSFER</b> option to TRUE, so you might want to change
  532. * this back to FALSE/0 or "unset" it using the {@link option()} method, before making a {@link get()}, {@link header()}
  533. * or {@link post()} request.
  534. *
  535. * <i>Files are downloaded preserving their name so you may run into trouble when trying to download more images
  536. * having the same name (either from the same, or different servers)!</i>
  537. *
  538. * <i>Multiple requests are made asynchronously, in parallel, and the callback function is called for each and every
  539. * request, as soon as each request finishes. The number of parallel requests to be made at once can be set through
  540. * the {@link threads} property.</i>
  541. *
  542. * <i>Note that in case of multiple URLs, requests may not finish in the same order as initiated!</i>
  543. *
  544. * <code>
  545. * // the callback function to be executed for each and every
  546. * // request, as soon as a request finishes
  547. * // the callback function receives as argument an object with 4 properties
  548. * // (info, header, body and response)
  549. * function mycallback($result) {
  550. *
  551. * // everything went well
  552. * if ($result->response[1] == CURLE_OK) {
  553. *
  554. * // see all the returned data
  555. * print_r('<pre>');
  556. * print_r($result);
  557. *
  558. * // something went wrong
  559. * // ($result still contains all data that could be gathered)
  560. * } else die('An error occured: ' . $result->response[1]);
  561. *
  562. * }
  563. *
  564. * // include the Zebra_cURL library
  565. * require 'path/to/Zebra_cURL';
  566. *
  567. * // instantiate the Zebra_cURL object
  568. * $curl = new Zebra_cURL();
  569. *
  570. * // connect to the FTP server using the given credential, download a file to a given location and
  571. * // execute the "mycallback" function for each request, as soon as it finishes
  572. * $curl->download('ftp://somefile.ext', 'destination/path/', 'username', 'password', 'mycallback');
  573. * </code>
  574. *
  575. * @param mixed $url A single or an array of URLs to process.
  576. *
  577. * @param string $destination_path The path to where to save the file(s) to.
  578. *
  579. * If path is not pointing to a directory or is not writable, the library will
  580. * trigger an error.
  581. *
  582. * @param string $username (Optional) The username to be used to connect to the FTP server (if required).
  583. *
  584. * @param string $password (Optional) The password to be used to connect to the FTP server (if required).
  585. *
  586. * @param mixed $callback (Optional) Callback function to be called as soon as a request finishes.
  587. *
  588. * May be given as a string representing a name of an existing function, as an
  589. * anonymous function created on the fly via {@link http://www.php.net/manual/ro/function.create-function.php
  590. * create_function} or, as of PHP 5.3.0, via a {@link http://www.php.net/manual/ro/function.create-function.php
  591. * closure}.
  592. *
  593. * The callback function receives as first argument <b>an object</b> with <b>4
  594. * properties</b> as described below, while any further arguments passed to the
  595. * {@link ftp_download} method will be passed as extra arguments to the callback function:
  596. *
  597. * - <b>info</b> - an associative array containing information about the
  598. * request that just finished, as returned by PHP's
  599. * {@link http://php.net/manual/en/function.curl-getinfo.php curl_getinfo()}
  600. * function;
  601. *
  602. * - <b>headers</b> - an associative array with 2 items:
  603. *
  604. * <b>- last_request</b> an array with a single entry
  605. * containing the request headers generated by <i>the
  606. * last request</i>; so, remember, if there are redirects
  607. * involved, there will be more requests made, but only
  608. * information from the last one will be available; if
  609. * explicitly disabled via the {@link option()} method
  610. * by setting <b>CURLINFO_HEADER_OUT</b> to 0 or FALSE,
  611. * this will be an empty string;
  612. *
  613. * <b>- responses</b> an array with one or more entries
  614. * (if there are redirects involved) with the response
  615. * headers of all the requests made; if explicitly disabled
  616. * via the {@link option()} method by setting
  617. * <b>CURLOPT_HEADER</b> to 0 or FALSE, this will be an
  618. * empty string;
  619. *
  620. * <i>Unless disabled, each entry in the headers' array
  621. * is an associative array in the form of property =>
  622. * value</i>
  623. *
  624. * - <b>body</b> - the response of the request (the content of the page
  625. * at the URL).
  626. *
  627. * If "body" is explicitly disabled via the {@link option()}
  628. * method by setting <b>CURLOPT_NOBODY</b> to 0 or FALSE,
  629. * this will be an empty string;
  630. *
  631. * - <b>response</b> - the response given by the cURL library as an array
  632. * with 2 entries: the first entry represents the result's
  633. * code, while the second is the textual representation
  634. * of the code; if the request was successful, these
  635. * values will be <i>array(0, CURLE_OK);</i> consult
  636. * {@link http://www.php.net/manual/en/function.curl-errno.php#103128
  637. * this list} to see the possible values of this property;
  638. *
  639. * <samp>If the callback function returns FALSE while {@link cache} is enabled, the library will not cache the
  640. * respective request, making it easy to retry failed requests without having to clear all cache.</samp>
  641. *
  642. * @return null
  643. */
  644. public function ftp_download($url, $destination_path, $username = '', $password = '', $callback = '')
  645. {
  646. // if he have at least an username, set username/password
  647. if ($username != '') $this->option(CURLOPT_USERPWD, $username . ':' . $password);
  648. // prior to PHP 5.3, func_get_args() cannot be used as a function parameter
  649. // so we need this intermediary step
  650. $arguments = func_get_args();
  651. // prepare the arguments to be passed to the "download" method
  652. // (consisting from the first 3, plus any additional arguments passed to the "ftp_download" method)
  653. $arguments = array_merge(array($url, $destination_path, $callback), array_slice($arguments, 5));
  654. // call the "download" method
  655. call_user_func_array(array($this, 'download'), $arguments);
  656. }
  657. /**
  658. * Performs an HTTP <b>GET</b> request to one or more URLs specified by the <i>$url</i> argument and executes the
  659. * callback function specified by the <i>$callback</i> argument for each and every request, as soon as each request
  660. * finishes.
  661. *
  662. * <i>Multiple requests are made asynchronously, in parallel, and the callback function is called for each and every
  663. * request, as soon as each request finishes. The number of parallel requests to be made at once can be set through
  664. * the {@link threads} property.</i>
  665. *
  666. * <i>Note that in case of multiple URLs, requests may not finish in the same order as initiated!</i>
  667. *
  668. * <code>
  669. * // the callback function to be executed for each and every
  670. * // request, as soon as a request finishes
  671. * // the callback function receives as argument an object with 4 properties
  672. * // (info, header, body and response)
  673. * function mycallback($result) {
  674. *
  675. * // everything went well
  676. * if ($result->response[1] == CURLE_OK) {
  677. *
  678. * // see all the returned data
  679. * print_r('<pre>');
  680. * print_r($result);
  681. *
  682. * // something went wrong
  683. * // ($result still contains all data that could be gathered)
  684. * } else die('An error occured: ' . $result->response[1]);
  685. *
  686. * }
  687. *
  688. * // include the Zebra_cURL library
  689. * require 'path/to/Zebra_cURL';
  690. *
  691. * // instantiate the Zebra_cURL object
  692. * $curl = new Zebra_cURL();
  693. *
  694. * // cache results in the "cache" folder and for 3600 seconds (one hour)
  695. * $curl->cache('cache', 3600);
  696. *
  697. * // let's fetch the RSS feeds of some popular websites
  698. * // execute the "mycallback" function for each request, as soon as it finishes
  699. * $curl->get(array(
  700. * 'http://feeds.feedburner.com/alistapart/main',
  701. * 'http://feeds.feedburner.com/TechCrunch',
  702. * 'http://feeds.mashable.com/mashable',
  703. * ), 'mycallback')
  704. * </code>
  705. *
  706. * @param mixed $url A single or an array of URLs to process.
  707. *
  708. * @param mixed $callback (Optional) Callback function to be called as soon as a request finishes.
  709. *
  710. * May be given as a string representing a name of an existing function, as an anonymous
  711. * function created on the fly via {@link http://www.php.net/manual/ro/function.create-function.php
  712. * create_function} or, as of PHP 5.3.0, via a {@link http://www.php.net/manual/ro/function.create-function.php
  713. * closure}.
  714. *
  715. * The callback function receives as first argument <b>an object</b> with <b>4 properties</b>
  716. * as described below, while any further arguments passed to the {@link get} method will
  717. * be passed as extra arguments to the callback function:
  718. *
  719. * - <b>info</b> - an associative array containing information about the request
  720. * that just finished, as returned by PHP's
  721. * {@link http://php.net/manual/en/function.curl-getinfo.php curl_getinfo()}
  722. * function;
  723. *
  724. * - <b>headers</b> - an associative array with 2 items:
  725. *
  726. * <b>- last_request</b> an array with a single entry containing
  727. * the request headers generated by <i>the last request</i>; so,
  728. * remember, if there are redirects involved, there will be more
  729. * requests made, but only information from the last one will be
  730. * available; if explicitly disabled via the {@link option()}
  731. * method by setting <b>CURLINFO_HEADER_OUT</b> to 0 or FALSE,
  732. * this will be an empty string;
  733. *
  734. * <b>- responses</b> an array with one or more entries (if there
  735. * are redirects involved) with the response headers of all the
  736. * requests made; if explicitly disabled via the {@link option()}
  737. * method by setting <b>CURLOPT_HEADER</b> to 0 or FALSE, this
  738. * will be an empty string;
  739. *
  740. * <i>Unless disabled, each entry in the headers' array is an
  741. * associative array in the form of property => value</i>
  742. *
  743. * - <b>body</b> - the response of the request (the content of the page at the
  744. * URL).
  745. *
  746. * Unless disabled via the {@link __construct() constructor}, all
  747. * applicable characters will be converted to HTML entities via
  748. * PHP's {@link http://php.net/manual/en/function.htmlentities.php htmlentities()}
  749. * function, so remember to use PHP's {@link http://www.php.net/manual/en/function.html-entity-decode.php html_entity_decode()}
  750. * function to do reverse this, if it's the case;
  751. *
  752. * If "body" is explicitly disabled via the {@link option()}
  753. * method by setting <b>CURLOPT_NOBODY</b> to 0 or FALSE, this
  754. * will be an empty string;
  755. *
  756. * - <b>response</b> - the response given by the cURL library as an array with 2
  757. * entries: the first entry represents the result's code, while
  758. * the second is the textual representation of the code; if the
  759. * request was successful, these values will be <i>array(0,
  760. * CURLE_OK);</i> consult {@link http://www.php.net/manual/en/function.curl-errno.php#103128
  761. * this list} to see the possible values of this property;
  762. *
  763. * <samp>If the callback function returns FALSE while {@link cache} is enabled, the library will not cache the
  764. * respective request, making it easy to retry failed requests without having to clear all cache.</samp>
  765. *
  766. * @return null
  767. */
  768. public function get($url, $callback = '')
  769. {
  770. // make sure we perform a GET request
  771. $this->option(CURLOPT_HTTPGET, 1);
  772. // prior to PHP 5.3, func_get_args() cannot be used as a function parameter
  773. // so we need this intermediary step
  774. $arguments = func_get_args();
  775. // process request(s)
  776. call_user_func_array(array($this, '_process'), $arguments);
  777. }
  778. /**
  779. * Works exactly like the {@link get()} method, the only difference being that this method will automatically set
  780. * the <b>CURLOPT_NOBODY</b> option to FALSE and thus the <i>body</i> property of the result will be an empty string.
  781. * Also, <b>CURLINFO_HEADER_OUT</b> and <b>CURLOPT_HEADER</b> will be set to TRUE and therefore header information
  782. * will be available.
  783. *
  784. * <i>Multiple requests are made asynchronously, in parallel, and the callback function is called for each and every
  785. * request, as soon as each request finishes. The number of parallel requests to be made at once can be set through
  786. * the {@link threads} property.</i>
  787. *
  788. * <i>Note that in case of multiple URLs, requests may not finish in the same order as initiated!</i>
  789. *
  790. * <code>
  791. * // the callback function to be executed for each and every
  792. * // request, as soon as a request finishes
  793. * // the callback function receives as argument an object with 4 properties
  794. * // (info, header, body and response)
  795. * function mycallback($result) {
  796. *
  797. * // everything went well
  798. * if ($result->response[1] == CURLE_OK) {
  799. *
  800. * // see all the returned data
  801. * print_r('<pre>');
  802. * print_r($result);
  803. *
  804. * // something went wrong
  805. * // ($result still contains all data that could be gathered)
  806. * } else die('An error occured: ' . $result->response[1]);
  807. *
  808. * }
  809. *
  810. * // include the Zebra_cURL library
  811. * require 'path/to/Zebra_cURL';
  812. *
  813. * // instantiate the Zebra_cURL object
  814. * $curl = new Zebra_cURL();
  815. *
  816. * // process given URLs execute the "mycallback" function for each
  817. * // request, as soon as it finishes
  818. * $curl->header('http://www.somewebsite.com', 'mycallback');
  819. * </code>
  820. *
  821. * @param mixed $url A single or an array of URLs to process.
  822. *
  823. * @param mixed $callback (Optional) Callback function to be called as soon as a request finishes.
  824. *
  825. * May be given as a string representing a name of an existing function, as an anonymous
  826. * function created on the fly via {@link http://www.php.net/manual/ro/function.create-function.php
  827. * create_function} or, as of PHP 5.3.0, via a {@link http://www.php.net/manual/ro/function.create-function.php
  828. * closure}.
  829. *
  830. * The callback function receives as first argument <b>an object</b> with <b>4 properties</b>
  831. * as described below, while any further arguments passed to the {@link header} method
  832. * will be passed as extra arguments to the callback function:
  833. *
  834. * - <b>info</b> - an associative array containing information about the request
  835. * that just finished, as returned by PHP's
  836. * {@link http://php.net/manual/en/function.curl-getinfo.php curl_getinfo()}
  837. * function;
  838. *
  839. * - <b>headers</b> - an associative array with 2 items:
  840. *
  841. * <b>- last_request</b> an array with a single entry containing
  842. * the request headers generated by <i>the last request</i>; so,
  843. * remember, if there are redirects involved, there will be more
  844. * requests made, but only information from the last one will be
  845. * available;
  846. *
  847. * <b>- responses</b> an array with one or more entries (if there
  848. * are redirects involved) with the response headers of all the
  849. * requests made;
  850. *
  851. * <i>Each entry in the headers' array is an associative array
  852. * in the form of property => value</i>
  853. *
  854. * - <b>body</b> - an empty string
  855. *
  856. * - <b>response</b> - the response given by the cURL library as an array with 2
  857. * entries: the first entry represents the result's code, while
  858. * the second is the textual representation of the code; if the
  859. * request was successful, these values will be <i>array(0,
  860. * CURLE_OK);</i> consult {@link http://www.php.net/manual/en/function.curl-errno.php#103128
  861. * this list} to see the possible values of this property;
  862. *
  863. * <samp>If the callback function returns FALSE while {@link cache} is enabled, the library will not cache the
  864. * respective request, making it easy to retry failed requests without having to clear all cache.</samp>
  865. *
  866. * @return null
  867. */
  868. public function header($url, $callback = '')
  869. {
  870. // no "body" for header requests but make sure we have the headers
  871. $this->option(array(
  872. CURLINFO_HEADER_OUT => 1,
  873. CURLOPT_HEADER => 1,
  874. CURLOPT_HTTPGET => 1,
  875. CURLOPT_NOBODY => 1,
  876. ));
  877. // prior to PHP 5.3, func_get_args() cannot be used as a function parameter
  878. // so we need this intermediary step
  879. $arguments = func_get_args();
  880. // process request(s)
  881. call_user_func_array(array($this, '_process'), $arguments);
  882. }
  883. /**
  884. * Use this method to make requests to pages that requires prior HTTP authentication.
  885. *
  886. * <code>
  887. * // the callback function to be executed for each and every
  888. * // request, as soon as a request finishes
  889. * // the callback function receives as argument an object with 4 properties
  890. * // (info, header, body and response)
  891. * function mycallback($result) {
  892. *
  893. * // everything went well
  894. * if ($result->response[1] == CURLE_OK) {
  895. *
  896. * // see all the returned data
  897. * print_r('<pre>');
  898. * print_r($result);
  899. *
  900. * // something went wrong
  901. * // ($result still contains all data that could be gathered)
  902. * } else die('An error occured: ' . $result->response[1]);
  903. *
  904. * }
  905. *
  906. * // include the Zebra_cURL library
  907. * require 'path/to/Zebra_cURL';
  908. *
  909. * // instantiate the Zebra_cURL object
  910. * $curl = new Zebra_cURL();
  911. *
  912. * // prepare user name and password
  913. * $curl->http_authentication('username', 'password');
  914. *
  915. * // get content from a page that requires prior HTTP authentication
  916. * $curl->get('http://www.some-page-requiring-prior-http-authentication.com', 'mycallback');
  917. * </code>
  918. *
  919. * @param string $username User name to be used for authentication.
  920. *
  921. * @param string $password Password to be used for authentication.
  922. *
  923. * @param string $type (Optional) The HTTP authentication method(s) to use. The options are:
  924. *
  925. * - <b>CURLAUTH_BASIC</b>
  926. * - <b>CURLAUTH_DIGEST</b>
  927. * - <b>CURLAUTH_GSSNEGOTIATE</b>
  928. * - <b>CURLAUTH_NTLM</b>
  929. * - <b>CURLAUTH_ANY</b>
  930. * - CU<b>RLAUTH_ANYSAFE</b>
  931. *
  932. * The bitwise | (or) operator can be used to combine more than one method. If
  933. * this is done, cURL will poll the server to see what methods it supports and
  934. * pick the best one.
  935. *
  936. * <b>CURLAUTH_ANY</b> is an alias for <b>CURLAUTH_BASIC</b> | <b>CURLAUTH_DIGEST</b> |
  937. * <b>CURLAUTH_GSSNEGOTIATE</b> | <b>CURLAUTH_NTLM</b>.
  938. *
  939. * <b>CURLAUTH_ANYSAFE</b> is an alias for <b>CURLAUTH_DIGEST</b> | <b>CURLAUTH_GSSNEGOTIATE</b> |
  940. * <b>CURLAUTH_NTLM</b>.
  941. *
  942. * Default is <b>CURLAUTH_ANY</b>.
  943. *
  944. * @return null
  945. */
  946. public function http_authentication($username, $password, $type = CURLAUTH_ANY)
  947. {
  948. // set the required options
  949. $this->option(array(
  950. CURLOPT_HTTPAUTH => $type,
  951. CURLOPT_USERPWD => $username . ':' . $password,
  952. ));
  953. }
  954. /**
  955. * Allows you to set one or more {@link http://php.net/manual/en/function.curl-setopt.php cURL options}.
  956. *
  957. * <code>
  958. * // include the Zebra_cURL library
  959. * require 'path/to/Zebra_cURL';
  960. *
  961. * // instantiate the Zebra_cURL object
  962. * $curl = new Zebra_cURL();
  963. *
  964. * // setting a single option
  965. * $curl->option(CURLOPT_CONNECTTIMEOUT, 10);
  966. *
  967. * // setting multiple options at once
  968. * $curl->option(array(
  969. * CURLOPT_TIMEOUT => 10,
  970. * CURLOPT_CONNECTTIMEOUT => 10,
  971. * ));
  972. *
  973. * // make a request here...
  974. * </code>
  975. *
  976. * @param mixed $option A single option for which to set a value, or an associative array in the form of
  977. * <i>option</i> => <i>value</i> (in case of an array, the <i>$value</i> argument will
  978. * be disregarded).
  979. *
  980. * <i>Setting a value to</i> <b>null</b> <i>will "unset" that option.</i>
  981. *
  982. * @param mixed $value (Optional) If the <i>$option</i> argument is not an array, then this argument represents
  983. * the value to be set for the respective option. If the <i>$option</i> argument is an
  984. * array, then the value of this argument will be ignored.
  985. *
  986. * <i>Setting a value to</i> <b>null</b> <i>will "unset" that option.</i>
  987. *
  988. * @return null
  989. *
  990. */
  991. public function option($option, $value = '')
  992. {
  993. // if $options is given as an array
  994. if (is_array($option))
  995. // iterate through each of the values
  996. foreach ($option as $name => $value)
  997. // if we need to "unset" an option, unset it
  998. if (is_null($value)) unset($this->options[$name]);
  999. // set the value for the option otherwise
  1000. else $this->options[$name] = $value;
  1001. // if option is not given as an array,
  1002. // if we need to "unset" an option, unset it
  1003. elseif (is_null($value)) unset($this->options[$option]);
  1004. // set the value for the option otherwise
  1005. else $this->options[$option] = $value;
  1006. }
  1007. /**
  1008. * Performs an HTTP <b>POST</b> to one or more URLs specified by the <i>$url</i> argument, using the values specified
  1009. * by the <i>$values</i> argument, and executes the callback function specified by the <i>$callback</i> argument for
  1010. * each and every request, as soon as each request finishes.
  1011. *
  1012. * <i>Multiple requests are made asynchronously, in parallel, and the callback function is called for each and every
  1013. * request, as soon as each request finishes. The number of parallel requests to be made at once can be set through
  1014. * the {@link threads} property.</i>
  1015. *
  1016. * <i>Note that in case of multiple URLs, requests may not finish in the same order as initiated!</i>
  1017. *
  1018. * <code>
  1019. * // the callback function to be executed for each and every
  1020. * // request, as soon as a request finishes
  1021. * // the callback function receives as argument an object with 4 properties
  1022. * // (info, header, body and response)
  1023. * function mycallback($result) {
  1024. *
  1025. * // everything went well
  1026. * if ($result->response[1] == CURLE_OK) {
  1027. *
  1028. * // see all the returned data
  1029. * print_r('<pre>');
  1030. * print_r($result);
  1031. *
  1032. * // something went wrong
  1033. * // ($result still contains all data that could be gathered)
  1034. * } else die('An error occured: ' . $result->response[1]);
  1035. *
  1036. * }
  1037. *
  1038. * // include the Zebra_cURL library
  1039. * require 'path/to/Zebra_cURL';
  1040. *
  1041. * // instantiate the Zebra_cURL object
  1042. * $curl = new Zebra_cURL();
  1043. *
  1044. * // do a POST and execute the "mycallback" function for each
  1045. * // request, as soon as it finishes
  1046. * $curl->post('http://www.somewebsite.com', array(
  1047. * 'field_1' => 'value 1',
  1048. * 'field_2' => 'value 2',
  1049. * ), 'mycallback');
  1050. *
  1051. * // do a POST and execute the "mycallback" function for each
  1052. * // request, as soon as it finishes
  1053. * // note that we're also uploading a file this time
  1054. * // and note that we're prefixing the file name with @
  1055. * $curl->post('http://www.somewebsite.com', array(
  1056. * 'field_1' => 'value 1',
  1057. * 'field_2' => 'value 2',
  1058. * 'upload' => '@absolute/path/to/file.ext',
  1059. * ), 'mycallback');
  1060. * </code>
  1061. *
  1062. * @param mixed $url A single or an array of URLs to which to POST to.
  1063. *
  1064. * @param array $values An associative array in the form of <i>element => value</i> representing the data to
  1065. * post in the HTTP "POST" operation.
  1066. *
  1067. * To post a file, prepend the filename with @ and use the full path. The file type can
  1068. * be explicitly specified by following the filename with the type in the format <b>';type=mimetype'.</b>
  1069. * You should always specify the mime type as most of the times cURL will send the wrong
  1070. * mime type...
  1071. *
  1072. * The <i>Content-Type</i> header will be set to <b>multipart/form-data.</b>
  1073. *
  1074. * @param mixed $callback (Optional) Callback function to be called as soon as a request finishes.
  1075. *
  1076. * May be given as a string representing a name of an existing function, as an anonymous
  1077. * function created on the fly via {@link http://www.php.net/manual/ro/function.create-function.php
  1078. * create_function} or, as of PHP 5.3.0, via a {@link http://www.php.net/manual/ro/function.create-function.php
  1079. * closure}.
  1080. *
  1081. * The callback function receives as first argument <b>an object</b> with <b>4 properties</b>
  1082. * as described below, while any further arguments passed to the {@link post} method
  1083. * will be passed as extra arguments to the callback function:
  1084. *
  1085. * - <b>info</b> - an associative array containing information about the request
  1086. * that just finished, as returned by PHP's
  1087. * {@link http://php.net/manual/en/function.curl-getinfo.php curl_getinfo()}
  1088. * function;
  1089. *
  1090. * - <b>headers</b> - an associative array with 2 items:
  1091. *
  1092. * <b>- last_request</b> an array with a single entry containing
  1093. * the request headers generated by <i>the last request</i>; so,
  1094. * remember, if there are redirects involved, there will be more
  1095. * requests made, but only information from the last one will be
  1096. * available; if explicitly disabled via the {@link option()}
  1097. * method by setting <b>CURLINFO_HEADER_OUT</b> to 0 or FALSE,
  1098. * this will be an empty string;
  1099. *
  1100. * <b>- responses</b> an array with one or more entries (if there
  1101. * are redirects involved) with the response headers of all the
  1102. * requests made; if explicitly disabled via the {@link option()}
  1103. * method by setting <b>CURLOPT_HEADER</b> to 0 or FALSE, this
  1104. * will be an empty string;
  1105. *
  1106. * <i>Unless disabled, each entry in the headers' array is an
  1107. * associative array in the form of property => value</i>
  1108. *
  1109. * - <b>body</b> - the response of the request (the content of the page at the
  1110. * URL).
  1111. *
  1112. * Unless disabled via the {@link __construct() constructor}, all
  1113. * applicable characters will be converted to HTML entities via
  1114. * PHP's {@link http://php.net/manual/en/function.htmlentities.php htmlentities()}
  1115. * function, so remember to use PHP's {@link http://www.php.net/manual/en/function.html-entity-decode.php html_entity_decode()}
  1116. * function to do reverse this, if it's the case;
  1117. *
  1118. * If "body" is explicitly disabled via the {@link option()}
  1119. * method by setting <b>CURLOPT_NOBODY</b> to 0 or FALSE, this
  1120. * will be an empty string;
  1121. *
  1122. * - <b>response</b> - the response given by the cURL library as an array with 2
  1123. * entries: the first entry represents the result's code, while
  1124. * the second is the textual representation of the code; if the
  1125. * request was successful, these values will be <i>array(0,
  1126. * CURLE_OK);</i> consult {@link http://www.php.net/manual/en/function.curl-errno.php#103128
  1127. * this list} to see the possible values of this property;
  1128. *
  1129. * <samp>If the callback function returns FALSE while {@link cache} is enabled, the library will not cache the
  1130. * respective request, making it easy to retry failed requests without having to clear all cache.</samp>
  1131. *
  1132. * @return null
  1133. */
  1134. public function post($url, $values, $callback = '')
  1135. {
  1136. // if second argument is not an array, trigger an error
  1137. if (!is_array($values)) trigger_error('Second argument to method "post" must be an array!', E_USER_ERROR);
  1138. // prepare cURL for making a POST
  1139. $this->option(array(
  1140. CURLOPT_POST => 1,
  1141. CURLOPT_POSTFIELDS => http_build_query($values, NULL, '&'),
  1142. ));
  1143. // prior to PHP 5.3, func_get_args() cannot be used as a function parameter
  1144. // so we need this intermediary step
  1145. $arguments = func_get_args();
  1146. // process request(s)
  1147. call_user_func_array(array($this, '_process'), $arguments);
  1148. }
  1149. /**
  1150. * Instruct the library to tunnel all requests through a proxy server.
  1151. *
  1152. * <code>
  1153. * // the callback function to be executed for each and every
  1154. * // request, as soon as a request finishes
  1155. * function mycallback($result) {
  1156. *
  1157. * // everything went well
  1158. * if ($result->response[1] == CURLE_OK) {
  1159. *
  1160. * // see all the returned data
  1161. * print_r('<pre>');
  1162. * print_r($result);
  1163. *
  1164. * // something went wrong
  1165. * // ($result still contains all data that could be gathered)
  1166. * } else die('An error occured: ' . $result->response[1]);
  1167. *
  1168. * }
  1169. *
  1170. * // include the Zebra_cURL library
  1171. * require 'path/to/Zebra_cURL';
  1172. *
  1173. * // instantiate the class
  1174. * $curl = new Zebra_cURL();
  1175. *
  1176. * // connect to a proxy server
  1177. * // (that's a random one i got from http://www.hidemyass.com/proxy-list/)
  1178. * $curl->proxy('187.63.32.250', '3128');
  1179. *
  1180. * // fetch a page
  1181. * $curl->get('http://www.somewebsite.com/', 'mycallback');
  1182. * </code>
  1183. *
  1184. * @param string $proxy The HTTP proxy to tunnel requests through.
  1185. *
  1186. * Can be an URL or an IP address.
  1187. *
  1188. * <i>This option can also be set using the {@link option()} method and setting </i>
  1189. * <b>CURLOPT_PROXY</b> <i> option to the desired value</i>.
  1190. *
  1191. * Setting this argument to FALSE will "unset" all the proxy-related options.
  1192. *
  1193. * @param string $port (Optional) The port number of the proxy to connect to.
  1194. *
  1195. * Default is 80.
  1196. *
  1197. * <i>This option can also be set using the {@link option()} method and setting </i>
  1198. * <b>CURLOPT_PROXYPORT</b> <i> option to the desired value</i>.
  1199. *
  1200. * @param string $username (Optional) The username to be used for the connection to the proxy (if required
  1201. * by the proxy)
  1202. *
  1203. * Default is "" (an empty string)
  1204. *
  1205. * <i>The username and the password can also be set using the {@link option()} method
  1206. * and setting </i> <b>CURLOPT_PROXYUSERPWD</b> <i> option to the desired value
  1207. * formatted like </i> <b>[username]:[password]</b>. .
  1208. *
  1209. * @param string $password (Optional) The password to be used for the connection to the proxy (if required
  1210. * by the proxy)
  1211. *
  1212. * Default is "" (an empty string)
  1213. *
  1214. * <i>The username and the password can also be set using the {@link option()} method
  1215. * and setting </i> <b>CURLOPT_PROXYUSERPWD</b> <i> option to the desired value
  1216. * formatted like </i> <b>[username]:[password]</b>. .
  1217. *
  1218. * @return null
  1219. */
  1220. public function proxy($proxy, $port = 80, $username = '', $password = '')
  1221. {
  1222. // if not disabled
  1223. if ($proxy) {
  1224. // set the required options
  1225. $this->option(array(
  1226. CURLOPT_HTTPPROXYTUNNEL => 1,
  1227. CURLOPT_PROXY => $proxy,
  1228. CURLOPT_PROXYPORT => $port,
  1229. ));
  1230. // if a username is also specified
  1231. if ($username != '')
  1232. // set authentication values
  1233. $this->option(CURLOPT_PROXYUSERPWD, $username . ':' . $password);
  1234. // if disabled
  1235. } else
  1236. // unset proxy-related options
  1237. $this->option(array(
  1238. CURLOPT_HTTPPROXYTUNNEL => null,
  1239. CURLOPT_PROXY => null,
  1240. CURLOPT_PROXYPORT => null,
  1241. ));
  1242. }
  1243. /**
  1244. * Requests made to HTTPS servers sometimes require additional configuration, depending on the server. Most of the
  1245. * times {@link __construct() the defaults} set by the library will get you through, but if defaults are not working,
  1246. * you can set specific options using this method.
  1247. *
  1248. * <code>
  1249. * // include the Zebra_cURL library
  1250. * require 'path/to/Zebra_cURL';
  1251. *
  1252. * // instantiate the class
  1253. * $curl = new Zebra_cURL();
  1254. *
  1255. * // instruct the library to verify peer's SSL certificate
  1256. * // (ignored if request is not made through HTTPS)
  1257. * $curl->ssl(true);
  1258. *
  1259. * // fetch a page
  1260. * $curl->get('https://www.somewebsite.com/', create_function('$result', 'print_r("<pre>"); print_r($result);'));
  1261. * </code>
  1262. *
  1263. * @param boolean $verify_peer (Optional) Should the peer's certificate be verified by cURL?
  1264. *
  1265. * Default is FALSE.
  1266. *
  1267. * <i>This option can also be set using the {@link option()} method and
  1268. * setting </i> <b>CURLOPT_SSL_VERIFYPEER</b> <i> option to the desired value</i>.
  1269. *
  1270. * @param integer $verify_host (Optional) Specifies whether or not to check the existence of a common
  1271. * name in the SSL peer certificate and that it matches with the provided
  1272. * hostname.
  1273. *
  1274. * - 1 to check the existence of a common name in the SSL peer certificate;
  1275. * - 2 to check the existence of a common name and also verify that it
  1276. * matches the hostname provided; in production environments the value
  1277. * of this option should be kept at 2;
  1278. *
  1279. * Default is 2
  1280. *
  1281. * <samp>Support for value 1 removed in cURL 7.28.1</samp>
  1282. *
  1283. * <i>This option can also be set using the {@link option()} method and
  1284. * setting </i> <b>CURLOPT_SSL_VERIFYHOST</b> <i> option to the desired value</i>.
  1285. *
  1286. * @param mixed $file (Optional) An absolute path to a file holding one or more certificates to
  1287. * verify the peer with. This only makes sense if <b>CURLOPT_SSL_VERIFYPEER</b>
  1288. * is set to TRUE.
  1289. *
  1290. * Default is FALSE.
  1291. *
  1292. * <i>This option can also be set using the {@link option()} method and
  1293. * setting </i> <b>CURLOPT_CAINFO</b> <i> option to the desired value</i>.
  1294. *
  1295. * @param mixed $path (Optional) An absolute path to a directory that holds multiple CA
  1296. * certificates. This only makes sense if <b>CURLOPT_SSL_VERIFYPEER</b> is
  1297. * set to TRUE.
  1298. *
  1299. * Default is FALSE.
  1300. *
  1301. * <i>This option can also be set using the {@link option()} method and
  1302. * setting </i> <b>CURLOPT_CAPATH</b> <i> option to the desired value</i>.
  1303. *
  1304. * @return null
  1305. */
  1306. public function ssl($verify_peer = false, $verify_host = 2, $file = false, $path = false)
  1307. {
  1308. // set default options
  1309. $this->option(array(
  1310. CURLOPT_SSL_VERIFYPEER => $verify_peer,
  1311. CURLOPT_SSL_VERIFYHOST => $verify_host,
  1312. ));
  1313. // if a path to a file holding one or more certificates to verify the peer with was given
  1314. if ($file !== false)
  1315. // if file could be found, use it
  1316. if (is_file($file)) $this->option(CURLOPT_CAINFO, $file);
  1317. // if file was not found, trigger an error
  1318. else trigger_error('File "' . $file . '", holding one or more certificates to verify the peer with, was not found!', E_USER_ERROR);
  1319. // if a directory holding multiple CA certificates was given
  1320. if ($path !== false)
  1321. // if folder could be found, use it
  1322. if (is_dir($path)) $this->option(CURLOPT_CAPATH, $path);
  1323. // if folder was not found, trigger an error
  1324. else trigger_error('Directory "' . $path . '", holding one or more CA certificates to verify the peer with, was not found!', E_USER_ERROR);
  1325. }
  1326. /**
  1327. * Returns the set options in "human-readable" format.
  1328. *
  1329. * @return string Returns the set options in "human-readable" format.
  1330. *
  1331. * @access private
  1332. */
  1333. private function _debug()
  1334. {
  1335. $result = '';
  1336. // iterate through the defined constants
  1337. foreach(get_defined_constants() as $name => $number)
  1338. // iterate through the set options
  1339. foreach ($this->options as $index => $value)
  1340. // if this is a curl-related constant and it is one of the options that are set, add it to the result
  1341. if (substr($name, 0, 7) == 'CURLOPT' && $number == $index) $result .= $name . ' => ' . $value . '<br>';
  1342. // return the result
  1343. return $result;
  1344. }
  1345. /**
  1346. * A helper method used by the {@link _process()} method to process request and response headers. It parses a string
  1347. * containing one or more HTTP headers and returns an array of headers where each entry also contains an associative
  1348. * array of <i>name</i> => <i>value</i> for each row of data in the respective header.
  1349. *
  1350. * @param string $headers A string containing one or more HTTP headers, where multiple headers are separated by
  1351. * a blank line.
  1352. *
  1353. * @return mixed Returns an array of headers where each entry also contains an associative array of
  1354. * <i>name</i> => <i>value</i> for each row of data in the respective header.
  1355. *
  1356. * If CURLOPT_HEADER is set to FALSE or 0, this method will return an empty string.
  1357. *
  1358. * @access private
  1359. */
  1360. private function _parse_headers($headers)
  1361. {
  1362. $result = array();
  1363. // if we have nothing to work with
  1364. if ($headers != '') {
  1365. // split multiple headers by blank lines
  1366. $headers = preg_split('/^\s*$/m', trim($headers));
  1367. // iterate through the headers
  1368. foreach($headers as $index => $header) {
  1369. $arguments_count = func_num_args();
  1370. // get all the lines in the header
  1371. // lines in headers look like [name] : [value]
  1372. // also, the first line, the status, does not have a name, so we add the name now
  1373. preg_match_all('/^(.*?)\:\s(.*)$/m', ($arguments_count == 2 ? 'Request Method: ' : 'Status: ') . trim($header), $matches);
  1374. // save results
  1375. foreach ($matches[0] as $key => $value)
  1376. $result[$index][$matches[1][$key]] = trim($matches[2][$key]);
  1377. }
  1378. }
  1379. // return headers as an array
  1380. return $result;
  1381. }
  1382. /**
  1383. * Does the actual work.
  1384. *
  1385. * @return null
  1386. *
  1387. * @access private
  1388. */
  1389. private function _process($urls, $callback = '')
  1390. {
  1391. // if caching is enabled but path doesn't exist or is not writable
  1392. if ($this->cache !== false && (!is_dir($this->cache['path']) || !is_writable($this->cache['path'])))
  1393. // trigger an error and stop execution
  1394. trigger_error('Cache path does not exists or is not writable!', E_USER_ERROR);
  1395. // if callback function doesn't exists
  1396. if ($callback != '' && !is_callable($callback))
  1397. // trigger an error and stop execution
  1398. trigger_error('Callback function "' . $callback . '" does not exist!', E_USER_ERROR);
  1399. $urls = !is_array($urls) ? (array)$urls : $urls;
  1400. // only if we're making a GET request, and caching is enabled
  1401. if (isset($this->options[CURLOPT_HTTPGET]) && $this->options[CURLOPT_HTTPGET] == 1 && $this->cache !== false) {
  1402. // iterate through the URLs
  1403. foreach ($urls as $url) {
  1404. // get the path to the cache file associated with the URL
  1405. $cache_path = rtrim($this->cache['path'], '/') . '/' . md5($url);
  1406. // if cache file exists and is not expired
  1407. if (file_exists($cache_path) && filemtime($cache_path) + $this->cache['lifetime'] > time()) {
  1408. // if we have a callback
  1409. if ($callback != '') {
  1410. // the arguments passed to the "_process" method
  1411. $arguments = func_get_args();
  1412. // prepare the arguments to pass to the callback function
  1413. $arguments = array_merge(
  1414. // made of the result from the cache file...
  1415. array(unserialize($this->cache['compress'] ? gzuncompress(file_get_contents($cache_path)) : file_get_contents($cache_path))),
  1416. // ...and any additional arguments (minus the first 2)
  1417. array_slice($arguments, 2)
  1418. );
  1419. // feed them as arguments to the callback function
  1420. call_user_func_array($callback, $arguments);
  1421. }
  1422. // if no cache file, or cache file is expired
  1423. } else $this->_queue[] = $url;
  1424. }
  1425. // if we're not making a GET request or caching is disabled, we don't bother with cache: we need to process all the URLs
  1426. } else $this->_queue = $urls;
  1427. // if there are any URLs to process
  1428. if (!empty($this->_queue)) {
  1429. // initialize the multi handle
  1430. // this will allow us to process multiple cURL handles in parallel
  1431. $this->_multi_handle = curl_multi_init();
  1432. // queue the first batch of URLs
  1433. // (as many as defined by the "threads" property or less if there aren't as many URLs)
  1434. $this->_queue_requests();
  1435. $running = null;
  1436. // loop
  1437. do {
  1438. // get status update
  1439. while (($status = curl_multi_exec($this->_multi_handle, $running)) == CURLM_CALL_MULTI_PERFORM);
  1440. // if no request has finished yet, keep looping
  1441. if ($status != CURLM_OK) break;
  1442. // if a request was just completed, we'll have to find out which one
  1443. while ($info = curl_multi_info_read($this->_multi_handle)) {
  1444. // get handle of the completed request
  1445. $handle = $info['handle'];
  1446. // get content associated with the handle
  1447. $content = curl_multi_getcontent($handle);
  1448. // get the handle's ID
  1449. $resource_number = preg_replace('/Resource id #/', '', $handle);
  1450. // create a new object in which we will store all the data associated with the handle,
  1451. // as properties of this object
  1452. $result = new stdClass();
  1453. // get information about the request
  1454. $result->info = curl_getinfo($handle);
  1455. // extend the "info" property with the original URL
  1456. $result->info = array('original_url' => $this->_info['fh' . $resource_number]['original_url']) + $result->info;
  1457. // last request headers
  1458. $result->headers['last_request'] =
  1459. (
  1460. // if CURLINFO_HEADER_OUT is set
  1461. isset($this->options[CURLINFO_HEADER_OUT]) &&
  1462. // if CURLINFO_HEADER_OUT is TRUE
  1463. $this->options[CURLINFO_HEADER_OUT] == 1 &&
  1464. // if we actually have this information
  1465. isset($result->info['request_header'])
  1466. // extract request headers
  1467. ) ? $this->_parse_headers($result->info['request_header'], true) : '';
  1468. // remove request headers information from its previous location
  1469. unset($result->info['request_header']);
  1470. // get headers (unless we were explicitly told not to)
  1471. $result->headers['responses'] = (isset($this->options[CURLOPT_HEADER]) && $this->options[CURLOPT_HEADER] == 1) ?
  1472. $this->_parse_headers(substr($content, 0, $result->info['header_size'])) :
  1473. '';
  1474. // get output (unless we were explicitly told not to)
  1475. $result->body = (!isset($this->options[CURLOPT_NOBODY]) || (isset($this->options[CURLOPT_NOBODY]) && $this->options[CURLOPT_NOBODY] == 0)) ?
  1476. ((isset($this->options[CURLOPT_HEADER]) && $this->options[CURLOPT_HEADER] == 1) ?
  1477. substr($content, $result->info['header_size']) :
  1478. $content) :
  1479. '';
  1480. // if we have a body, we're not doing a binary transfer, and _htmlentities is set to TRUE, run htmlentities() on it
  1481. if (!empty($result->body) && !isset($this->options[CURLOPT_BINARYTRANSFER]) && $this->_htmlentities) $result->body = htmlentities($result->body);
  1482. // get CURLs response code and associated message
  1483. $result->response = array($this->_response_messages[$info['result']], $info['result']);
  1484. // if we have a callback
  1485. if ($callback != '') {
  1486. // the arguments passed to the "_process" method
  1487. $arguments = func_get_args();
  1488. // prepare the arguments to pass to the callback function
  1489. $arguments = array_merge(
  1490. // made of the "result" object...
  1491. array($result),
  1492. // ...and any additional arguments (minus the first 2)
  1493. array_slice($arguments, 2)
  1494. );
  1495. // feed them as arguments to the callback function
  1496. // and save the callback's response, if any
  1497. $callback_response = call_user_func_array($callback, $arguments);
  1498. // if no callback function, we assume the response is TRUE
  1499. } else $callback_response = true;
  1500. // if caching is enabled and we're making a GET request *and* the callback function did not return FALSE
  1501. if ($this->cache !== false && isset($this->options[CURLOPT_HTTPGET]) && $this->options[CURLOPT_HTTPGET] == 1 && $callback_response !== false) {
  1502. // get the path to the cache file associated with the URL
  1503. $cache_path = rtrim($this->cache['path'], '/') . '/' . md5($result->info['original_url']);
  1504. // cache the result
  1505. file_put_contents($cache_path, $this->cache['compress'] ? gzcompress(serialize($result)) : serialize($result));
  1506. // set rights on the file
  1507. chmod($cache_path, intval($this->cache['chmod'], 8));
  1508. }
  1509. // if there are more URLs to process, queue the next one
  1510. if (!empty($this->_queue)) $this->_queue_requests();
  1511. // remove the handle that we finished processing
  1512. // this needs to be done *after* we've already queued a new URL for processing
  1513. curl_multi_remove_handle($this->_multi_handle, $handle);
  1514. // make sure the handle gets closed
  1515. curl_close($handle);
  1516. // if we're downloading something
  1517. if (isset($this->options[CURLOPT_BINARYTRANSFER]) && $this->options[CURLOPT_BINARYTRANSFER])
  1518. // close the associated file pointer
  1519. fclose($this->_info['fh' . $resource_number]['file_handler']);
  1520. // remove information associated with this resource
  1521. unset($this->_info['fh' . $resource_number]);
  1522. }
  1523. // waits until curl_multi_exec() returns CURLM_CALL_MULTI_PERFORM or until the timeout, whatever happens first
  1524. // perform a usleep if a select returns -1 - workaround for PHP bug: https://bugs.php.net/bug.php?id=61141
  1525. if ($running && curl_multi_select($this->_multi_handle) === -1) usleep(100);
  1526. // as long as there are threads running
  1527. } while ($running);
  1528. // close the multi curl handle
  1529. curl_multi_close($this->_multi_handle);
  1530. }
  1531. }
  1532. /**
  1533. * A helper method used by the {@link _process()} method, which takes care of keeping a constant number of requests
  1534. * queued, so that as soon as one request finishes another one will instantly take its place, thus making sure that
  1535. * the maximum allowed number of parallel threads are running all the time.
  1536. *
  1537. * @return null
  1538. *
  1539. * @access private
  1540. */
  1541. private function _queue_requests()
  1542. {
  1543. // get the length of the queue
  1544. $queue_length = count($this->_queue);
  1545. // iterate through the items in the queue
  1546. for ($i = 0; $i < ($queue_length < $this->threads ? $queue_length : $this->threads); $i++) {
  1547. // remove first URL from the queue
  1548. $url = array_shift($this->_queue);
  1549. // initialize individual cURL handle with the URL
  1550. $handle = curl_init($url);
  1551. // make sure defaults are set
  1552. $this->_set_defaults();
  1553. // get the handle's ID
  1554. $resource_number = preg_replace('/Resource id #/', '', $handle);
  1555. // save the original URL
  1556. // (because there may be redirects, and because "curl_getinfo" returns information only about the last
  1557. // request, this can be lost otherwise)
  1558. $this->_info['fh' . $resource_number]['original_url'] = $url;
  1559. // if we're downloading something
  1560. if (isset($this->options[CURLOPT_BINARYTRANSFER]) && $this->options[CURLOPT_BINARYTRANSFER]) {
  1561. // open a file and save the file pointer
  1562. $this->_info['fh' . $resource_number]['file_handler'] = fopen($this->download_path . basename($url), 'w');
  1563. // no headers
  1564. $this->option(CURLOPT_HEADER, 0);
  1565. // tell cURL to use the file for streaming the download
  1566. $this->option(CURLOPT_FILE, $this->_info['fh' . $resource_number]['file_handler']);
  1567. }
  1568. // set options for the handle
  1569. curl_setopt_array($handle, $this->options);
  1570. // add the normal handle to the multi handle
  1571. curl_multi_add_handle($this->_multi_handle, $handle);
  1572. }
  1573. }
  1574. /**
  1575. * A helper method used by the {@link _process()} method, which sets the default cURL options for each request.
  1576. *
  1577. * @return null
  1578. *
  1579. * @access private
  1580. */
  1581. private function _set_defaults()
  1582. {
  1583. // if "CURLOPT_AUTOREFERER" has not been explicitly set, make it TRUE
  1584. // (automatically set the "Referer:" field where it follows a "Location:" redirect)
  1585. if (!isset($this->options[CURLOPT_AUTOREFERER])) $this->option(CURLOPT_AUTOREFERER, 1);
  1586. // if "CURLOPT_COOKIEFILE" has not been explicitly set, set it to the default value
  1587. // (name of the file containing the cookie data; if the name is an empty string, no cookies are
  1588. // loaded, but cookie handling is still enabled)
  1589. if (!isset($this->options[CURLOPT_COOKIEFILE])) $this->option(CURLOPT_COOKIEFILE, '');
  1590. // if "CURLOPT_CONNECTTIMEOUT" has not been explicitly set, set it to the default value
  1591. // (the number of seconds to wait while trying to connect)
  1592. if (!isset($this->options[CURLOPT_CONNECTTIMEOUT])) $this->option(CURLOPT_CONNECTTIMEOUT, 10);
  1593. // if "CURLOPT_FOLLOWLOCATION" has not been explicitly set, make it TRUE
  1594. // (follow any "Location:" header that the server sends as part of the HTTP header - note this is recursive
  1595. // and that PHP will follow as many "Location:" headers as specified by CURLOPT_MAXREDIRS)
  1596. if (!isset($this->options[CURLOPT_FOLLOWLOCATION])) $this->option(CURLOPT_FOLLOWLOCATION, 1);
  1597. // if "CURLOPT_HEADER" has not been explicitly set, make it TRUE
  1598. // (include the response header(s) as a property of the object given as argument to the callback)
  1599. if (!isset($this->options[CURLOPT_HEADER])) $this->option(CURLOPT_HEADER, 1);
  1600. // if "CURLINFO_HEADER_OUT" has not been explicitly set, make it TRUE
  1601. // (include the last request headers as a property of the object given as argument to the callback)
  1602. if (!isset($this->options[CURLINFO_HEADER_OUT])) $this->option(CURLINFO_HEADER_OUT, 1);
  1603. // if "CURLOPT_MAXREDIRS" has not been explicitly set, set it to the default value
  1604. // (the maximum amount of HTTP redirections to follow; used together with CURLOPT_FOLLOWLOCATION)
  1605. if (!isset($this->options[CURLOPT_MAXREDIRS])) $this->option(CURLOPT_MAXREDIRS, 50);
  1606. // if "CURLOPT_TIMEOUT" has not been explicitly set, set it to the default value
  1607. // (the maximum number of seconds to allow cURL functions to execute)
  1608. if (!isset($this->options[CURLOPT_TIMEOUT])) $this->option(CURLOPT_TIMEOUT, 30);
  1609. // if "CURLOPT_USERAGENT" has not been explicitly set, use a random user agent
  1610. // (some services/websites will block the request if there's no/invalid user agent)
  1611. // note that the user agent will change whenever you run the script!
  1612. if (!isset($this->options[CURLOPT_USERAGENT])) $this->option(CURLOPT_USERAGENT, $this->_user_agent());
  1613. // if "CURLOPT_RETURNTRANSFER" is always TRUE
  1614. // (return the transfer as a string of instead of outputting it out directly)
  1615. $this->option(CURLOPT_RETURNTRANSFER, 1);
  1616. }
  1617. /**
  1618. * Generates a (slightly) random user agent (Internet Explorer 9 or 10, on Windows Vista, 7 or 8, with other extra
  1619. * strings)
  1620. *
  1621. * Some web services will not respond unless a valid user-agent string is provided.
  1622. *
  1623. * @return null
  1624. *
  1625. * @access private
  1626. */
  1627. private function _user_agent()
  1628. {
  1629. // browser version: 9 or 10
  1630. $version = rand(9, 10);
  1631. // windows version; here are the meanings:
  1632. // Windows NT 6.2 -> Windows 8 // can have IE10
  1633. // Windows NT 6.1 -> Windows 7 // can have IE9 or IE10
  1634. // Windows NT 6.0 -> Windows Vista // can have IE9
  1635. $major_version = 6;
  1636. $minor_version =
  1637. // for IE9 Windows can have "0", "1" or "2" as minor version number
  1638. $version == 8 || $version == 9 ? rand(0, 2) :
  1639. // for IE10 Windows will have "2" as major version number
  1640. 2;
  1641. // add some extra information
  1642. $extras = rand(0, 3);
  1643. // return the random user agent string
  1644. return 'Mozilla/5.0 (compatible; MSIE ' . $version . '.0; Windows NT ' . $major_version . '.' . $minor_version . ($extras == 1 ? '; WOW64' : ($extras == 2 ? '; Win64; IA64' : ($extras == 3 ? '; Win64; x64' : ''))) . ')';
  1645. }
  1646. }
  1647. ?>