PageRenderTime 50ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/magmi/inc/remotefilegetter.php

https://gitlab.com/myurd/magmi-git
PHP | 348 lines | 269 code | 40 blank | 39 comment | 49 complexity | 51d71fed2dccae834fd063aace099dcf MD5 | raw file
  1. <?php
  2. abstract class RemoteFileGetter
  3. {
  4. protected $_errors;
  5. protected $_user;
  6. protected $_password;
  7. protected $_logger = null;
  8. public function setLogger($logger)
  9. {
  10. $this->_logger = $logger;
  11. }
  12. public function log($data)
  13. {
  14. if ($this->_logger != null) {
  15. $this->_logger->log($data);
  16. }
  17. }
  18. abstract public function urlExists($url);
  19. abstract public function copyRemoteFile($url, $dest);
  20. // using credentials
  21. public function setCredentials($user = null, $passwd = null)
  22. {
  23. $this->_user = $user;
  24. $this->_password = $passwd;
  25. }
  26. public function getErrors()
  27. {
  28. return $this->_errors;
  29. }
  30. }
  31. class CURL_RemoteFileGetter extends RemoteFileGetter
  32. {
  33. protected $_cookie;
  34. protected $_lookup_opts;
  35. protected $_dl_opts;
  36. protected $_lookup;
  37. protected $_protocol;
  38. protected $_creds;
  39. protected $_opts;
  40. protected $_user;
  41. protected $_password;
  42. public function __construct()
  43. {
  44. $this->_opts=array(
  45. 'http'=>array('lookup'=>$this->initBaseOptions('http', 'lookup'),
  46. 'dl'=>$this->initBaseOptions('http', 'dl')),
  47. 'https'=>array('lookup'=>$this->initBaseOptions('https', 'lookup'),
  48. 'dl'=>$this->initBaseOptions('https', 'dl')),
  49. 'ftp'=>array('dl'=>$this->initBaseOptions('ftp', 'dl'))
  50. );
  51. }
  52. public function initBaseOptions($protocol, $mode)
  53. {
  54. $curlopts=array();
  55. switch ($protocol) {
  56. case 'http':
  57. case 'https':
  58. switch ($mode) {
  59. case 'lookup':
  60. $curlopts=array(
  61. // we want the response
  62. CURLOPT_RETURNTRANSFER=>true,
  63. // we want the headers
  64. CURLOPT_HEADER=>true,
  65. // we don't want the body
  66. CURLOPT_NOBODY=>true,
  67. // some stats on target
  68. CURLOPT_FILETIME=>true);
  69. break;
  70. case 'dl':
  71. $curlopts=array(
  72. // force get
  73. CURLOPT_HTTPGET=>true,
  74. // no header
  75. CURLOPT_HEADER=>false,
  76. // we want body
  77. CURLOPT_NOBODY=>false,
  78. // handle 100 continue
  79. CURLOPT_HTTPHEADER=>array('Expect:'),
  80. // we don't want the response as we will store it in a file
  81. CURLOPT_RETURNTRANSFER=>false,
  82. //use binary
  83. CURLOPT_BINARYTRANSFER=>true
  84. );
  85. break;
  86. default:
  87. break;
  88. }
  89. //fix for some servers not able to follow location & failing downloads
  90. //only set follow location if compatible with PHP settings
  91. if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')) {
  92. $curlopts[CURLOPT_FOLLOWLOCATION]=1;
  93. }
  94. break;
  95. /*
  96. * Initializing for ftp
  97. */
  98. case 'ftp':
  99. $curlopts = array(
  100. //longer timeouts for big files
  101. CURLOPT_TIMEOUT =>300,
  102. //use binary
  103. CURLOPT_BINARYTRANSFER=>true,
  104. CURLOPT_FOLLOWLOCATION=> 1,
  105. //Better compatibility with some FTP Servers
  106. CURLOPT_FTP_USE_EPSV=>0,
  107. //no need to return anything, we'll have a file pointer
  108. CURLOPT_RETURNTRANSFER=>0);
  109. break;
  110. }
  111. return $curlopts;
  112. }
  113. public function setAuthOptions($context, &$opts, $user=null, $pass=null)
  114. {
  115. $creds="";
  116. if ($user == null) {
  117. $user=$this->_user;
  118. $pass=$this->_password;
  119. }
  120. if ($user) {
  121. $creds=$user.":";
  122. }
  123. if ($pass) {
  124. $creds.=$pass;
  125. }
  126. if (!is_null($creds) && $creds != "" && !isset($opts[CURLOPT_USERPWD])) {
  127. if (substr($context['scheme'], 0, 4) == "http") {
  128. $opts[CURLOPT_HTTPAUTH] = CURLAUTH_ANY;
  129. $opts[CURLOPT_UNRESTRICTED_AUTH] = true;
  130. }
  131. $opts[CURLOPT_USERPWD] = "$creds";
  132. }
  133. }
  134. /*
  135. * Creating a CURL context with adequate options from an URL For a given URL host/port/user , the same context is reused for optimizing performance
  136. */
  137. public function createContext($url)
  138. {
  139. // parsing url components
  140. $comps = parse_url($url);
  141. if ($comps == false || !isset($this->_opts[$comps['scheme']])) {
  142. throw new Exception("Unsupported URL : $url");
  143. }
  144. // create a curl context
  145. $ch = curl_init();
  146. $opts=$this->_opts[$comps['scheme']];
  147. $ctx=array("curlhandle"=>$ch,"opts"=>$opts,"scheme"=>$comps['scheme']);
  148. /*
  149. * Inline user/pass if in url
  150. */
  151. if (isset($comps['user'])) {
  152. $ctx["creds"]=array($comps['user'],$comps['password']);
  153. }
  154. return $ctx;
  155. }
  156. public function destroyContext($context)
  157. {
  158. curl_close($context["curlhandle"]);
  159. }
  160. public function urlExists($remoteurl)
  161. {
  162. $context = $this->createContext($remoteurl);
  163. // assume existing urls
  164. if (!isset($context["opts"]["lookup"])) {
  165. return true;
  166. }
  167. $ch=$context["curlhandle"];
  168. $opts=$context["opts"]["lookup"];
  169. $this->setAuthOptions($context, $opts);
  170. //adding url to curl
  171. $this->setURLOptions($remoteurl, $opts);
  172. // optimized lookup through curl
  173. curl_setopt_array($ch, $opts);
  174. /* Get the HTML or whatever is linked in $url. */
  175. $response = curl_exec($ch);
  176. if ($context['scheme'] == "http" || $context['scheme'] == "https") {
  177. /* Check for 404 (file not found). */
  178. $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  179. $exists = ($httpCode < 400);
  180. /* retry on error */
  181. if ($httpCode == 503 or $httpCode == 403) {
  182. /* wait for a half second */
  183. usleep(500000);
  184. $response = curl_exec($ch);
  185. $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  186. $exists = ($httpCode < 400);
  187. }
  188. }
  189. $this->destroyContext($context);
  190. return $exists;
  191. }
  192. // using credentials
  193. public function setCredentials($user = null, $passwd = null)
  194. {
  195. $this->_user = $user;
  196. $this->_password = $passwd;
  197. }
  198. // using cookie
  199. public function setCookie($cookie = null)
  200. {
  201. $this->_cookie = $cookie;
  202. }
  203. public function copyRemoteFile($url, $dest)
  204. {
  205. $result = false;
  206. $this->_errors=array();
  207. try {
  208. $result = $this->getRemoteFile($url, $dest, $this->_cookie);
  209. } catch (Exception $e) {
  210. $this->_errors = array("type"=>"source error","message"=>$e->getMessage(),"exception"=>$e);
  211. }
  212. return $result;
  213. }
  214. public function setURLOptions($url, &$optab)
  215. {
  216. // handle spaces in url
  217. $curl_url = str_replace(" ", "%20", $url);
  218. $optab[CURLOPT_URL] = $curl_url;
  219. }
  220. public function getRemoteFile($url, $dest, $authmode = null, $cookies = null)
  221. {
  222. $context = $this->createContext($url);
  223. $ch=$context['curlhandle'];
  224. $dl_opts = $context['opts']['dl'];
  225. $outname = $dest;
  226. if ($cookies) {
  227. if (substr($url, 0, 4) == "http") {
  228. $dl_opts[CURLOPT_COOKIE] = $cookies;
  229. }
  230. }
  231. $fp = fopen($outname, "w");
  232. if ($fp == false) {
  233. $this->destroyContext($context);
  234. throw new Exception("Cannot write file:$outname");
  235. }
  236. $dl_opts[CURLOPT_FILE] = $fp;
  237. $this->setURLOptions($url, $dl_opts);
  238. $this->setAuthOptions($context, $dl_opts);
  239. // Download the file , force expect to nothing to avoid buffer save problem
  240. curl_setopt_array($ch, $dl_opts);
  241. $inf = curl_getinfo($ch);
  242. if (!curl_exec($ch)) {
  243. if (curl_error($ch) != "") {
  244. $err = "Cannot fetch $url :" . curl_error($ch);
  245. } else {
  246. $err = "CURL Error downloading $url";
  247. }
  248. $this->destroyContext($context);
  249. fclose($fp);
  250. unlink($dest);
  251. throw new Exception($err);
  252. } else {
  253. $proto=$context['scheme'];
  254. if ($proto=='http' || $proto=='https') {
  255. $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  256. $ok = ($httpCode < 400);
  257. if (!$ok) {
  258. fclose($fp);
  259. @unlink($outname);
  260. throw new Exception('Cannot fetch URL :'.$url);
  261. }
  262. }
  263. }
  264. fclose($fp);
  265. $this->destroyContext($context);
  266. return true;
  267. }
  268. }
  269. class URLFopen_RemoteFileGetter extends RemoteFileGetter
  270. {
  271. public function urlExists($url)
  272. {
  273. $fname = $url;
  274. $h = @fopen($fname, "r");
  275. if ($h !== false) {
  276. $exists = true;
  277. fclose($h);
  278. }
  279. unset($h);
  280. }
  281. public function copyRemoteFile($url, $dest)
  282. {
  283. if (!$this->urlExists($url)) {
  284. $this->_errors = array("type"=>"target error","message"=>"URL $url is unreachable");
  285. return false;
  286. }
  287. $ok = @copy($url, $dest);
  288. if (!$ok) {
  289. $this->_errors = error_get_last();
  290. }
  291. return $ok;
  292. }
  293. }
  294. class RemoteFileGetterFactory
  295. {
  296. private static $__fginsts = array();
  297. public static function getFGInstance($id = "default")
  298. {
  299. if (!isset(self::$__fginsts[$id])) {
  300. if (function_exists("curl_init")) {
  301. self::$__fginsts[$id] = new CURL_RemoteFileGetter();
  302. } else {
  303. self::$__fginsts[$id] = new URLFopen_RemoteFileGetter();
  304. }
  305. }
  306. return self::$__fginsts[$id];
  307. }
  308. }