PageRenderTime 74ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/public/external/pydio/plugins/access.s3/aS3StreamWrapper/lib/wrapper/aS3StreamWrapper.class.php

https://github.com/costinu/cms
PHP | 1364 lines | 862 code | 97 blank | 405 comment | 126 complexity | 3cec160277cb21c2ccdc6cef6fc04f4f MD5 | raw file
Possible License(s): BSD-2-Clause, Apache-2.0, LGPL-3.0, LGPL-2.1, MPL-2.0-no-copyleft-exception, BSD-3-Clause, LGPL-2.0, AGPL-3.0
  1. <?php
  2. /**
  3. * TODO:
  4. *
  5. * optional buffering on disk rather than in RAM
  6. * Use of ../ within a URL should be allowed (and automatically resolved) for ease of coding
  7. *
  8. * A stream wrapper for Amazon S3 based on Amazon's offical PHP API.
  9. * Amazon S3 files can be accessed at s3://bucketname/path/to/object. Unlike other
  10. * wrappers, this wrapper supports opendir/readdir/closedir for any subdirectory level
  11. * (to the limit of S3 key length). Although S3 does not have a concept exactly matching
  12. * subdirectories, it can do prefix matches and return common prefixes, which is just as good.
  13. * Just keep in mind that you don't really have to create or remove directories. mkdir and rmdir
  14. * return success and do nothing (however, rmdir fails if the "directory" is not empty in order to
  15. * better emulate what regular filesystems do since some code may rely on this behavior).
  16. * Any subdirectories returned by readdir() have a trailing / attached to allow both your code
  17. * and the stat() function to distinguish them from files without an expensive network call.
  18. *
  19. * THERE IS A 5GB LIMIT ON EACH FILE, AND YOUR PHP MEMORY LIMIT WILL PROBABLY STOP YOU LONG
  20. * BEFORE YOU GET THERE, since currently everything is read and written as a complete file and
  21. * buffered in its entirety in memory. I'm looking into changing this, but one step at a time.
  22. *
  23. * Usage (with a public ACL so people can see your files via the web):
  24. *
  25. * $wrapper = new aS3StreamWrapper();
  26. * $wrapper->register(array('key' => 'xyz', 'secretKey' => 'abc', 'region' => AmazonS3::REGION_US_E1, 'acl' => AmazonS3::ACL_PUBLIC));
  27. * Now fopen("s3://mybucket/path/to/myobject.txt", "r") and friends work.
  28. *
  29. * You can get buffering of the first 8K of every file and the stat() results in a local cache by passing a
  30. * cache option, which must be an object supporting get($key) and set($key, $value, $lifetime_in_seconds)
  31. * (such as any implementation of sfCache). This cache must be consistently consulted by all
  32. * servers of course or it will not work properly
  33. *
  34. * Built for Apostrophe apostrophenow.com
  35. *
  36. * @class aS3StreamWrapper
  37. * @license BSD
  38. * @author tom@punkave.com Tom Boutell of P'unk Avenue
  39. */
  40. require dirname(__FILE__) . '/../../../aws-sdk/sdk.class.php';
  41. require dirname(__FILE__) . '/aS3StreamWrapperMimeTypes.class.php';
  42. class aS3StreamWrapper
  43. {
  44. /**
  45. * Create an object of this class with new and then call register() on it.
  46. *
  47. * Internal dev note: supposedly the constructor of a stream wrapper does not get called
  48. * except on stream_open, so keep that in mind
  49. */
  50. public function __construct()
  51. {
  52. }
  53. /**
  54. * Options specified in the register() call. These can be overridden
  55. * for individual streams using a stream context
  56. */
  57. static protected $protocolOptions;
  58. static protected $registeredInstances = array();
  59. static protected $statCache;
  60. /**
  61. * Stream context, set by fopen and friends if stream_create_context was used.
  62. * We can call stream_context_get_options on this
  63. */
  64. public $context;
  65. /**
  66. * Final set of options arrived at by merging the above
  67. */
  68. protected $options;
  69. /**
  70. * Protocol name. This is usually s3 but you can register more than one, and
  71. * we use this to distinguish sets of options. Usually $this->init() sets this
  72. * from the $path but the rename() operation, which takes two paths, sets it
  73. * on its own
  74. */
  75. protected $protocol;
  76. /**
  77. * Returns options set at register() time, unless overridden by
  78. * options set with stream_context_create() for this particular stream.
  79. * Looks for options in the 's3' key of the array passed to stream_context_create()
  80. */
  81. protected function getOption($o, $default = null)
  82. {
  83. if (!isset($this->options))
  84. {
  85. $this->options = array();
  86. if (isset(self::$protocolOptions[$this->protocol]))
  87. {
  88. $this->options = self::$protocolOptions[$this->protocol];
  89. }
  90. if ($this->context)
  91. {
  92. $streamOptions = stream_context_get_options($this->context);
  93. if (isset($streamOptions['s3']))
  94. {
  95. $this->options = array_merge($this->options, $streamOptions['s3']);
  96. }
  97. }
  98. }
  99. if (isset($this->options[$o]))
  100. {
  101. return $this->options[$o];
  102. }
  103. return $default;
  104. }
  105. protected function getRegion()
  106. {
  107. return $this->getOption('region', AmazonS3::REGION_US_E1);
  108. }
  109. /**
  110. * Register the stream wrapper. Options passed here are available to
  111. * the stream wrapper methods at any time via getOption. Note that you can
  112. * register two different s3 "protocols" with different credentials (the default
  113. * protocol name is s3).
  114. *
  115. * The following options are required, either here or in stream_create_context:
  116. * 'key', 'secretKey'
  117. *
  118. * The optional 'region' option specifies what Amazon S3 region to create
  119. * new buckets in, if you choose to create buckets with mkdir() calls.
  120. * It defaults to AmazonS3::REGION_US_E1. See services/s3.class.php in the SDK
  121. *
  122. * The optional 'protocol' option changes the name of the protocol from s3 to
  123. * something else. You can register multiple protocols
  124. *
  125. */
  126. public function register(array $options = array())
  127. {
  128. // Every protocol gets its own set of options
  129. $protocol = isset($options['protocol']) ? $options['protocol'] : 's3';
  130. self::$protocolOptions[$protocol] = $options;
  131. self::$registeredInstances[$protocol] = $this;
  132. stream_wrapper_register($protocol, get_class($this));
  133. }
  134. /**
  135. * Directory listing data doled out by readdir
  136. */
  137. protected $dirInfo = false;
  138. /**
  139. * Offset into directory data
  140. */
  141. protected $dirOffset = 0;
  142. /**
  143. * Array with protocol, bucket and path keys once init() is called successfully
  144. */
  145. protected $info = false;
  146. /**
  147. * Amazon S3 service objects. Usually just one exists but if you make
  148. * requests with custom credentials via a stream context or multiple
  149. * protocol registrations more than one can be created
  150. */
  151. static protected $services = array();
  152. /**
  153. * Bust up the path of interest into its component parts.
  154. * The "site" name must be a bucket name. The path (key) will
  155. * always be at least / for consistency
  156. */
  157. protected function init($path)
  158. {
  159. $info = $this->parse($path);
  160. if (!$info)
  161. {
  162. return false;
  163. }
  164. $this->info = $info;
  165. $this->protocol = $info['protocol'];
  166. return true;
  167. }
  168. protected function parse($path)
  169. {
  170. $info = array();
  171. $parsed = parse_url($path);
  172. if (!$parsed)
  173. {
  174. return false;
  175. }
  176. $info['protocol'] = $parsed['scheme'];
  177. $info['bucket'] = $parsed['host'];
  178. // No leading / in S3 (otherwise our public S3 URLs are strange)
  179. if (isset($parsed['path']))
  180. {
  181. $info['path'] = substr($parsed['path'], 1);
  182. // Lame: substr() returns false, not the empty string, if you
  183. // attempt to take an empty substring starting right after the end
  184. if ($info['path'] === false)
  185. {
  186. $info['path'] = '';
  187. }
  188. }
  189. else
  190. {
  191. $info['path'] = '';
  192. }
  193. // Consecutive slashes make no difference in the filesystems we're emulating here
  194. $info['path'] = preg_replace('/\/+/', '/', $info['path']);
  195. return $info;
  196. }
  197. /**
  198. * Allow separate S3 objects for separate credentials but don't
  199. * make redundant S3 objects
  200. */
  201. protected function getService()
  202. {
  203. $key = $this->getOption('key', '');
  204. $secretKey = $this->getOption('secretKey', '');
  205. $token = $this->getOption('token', '');
  206. $id = $key . ':' . $secretKey . ':' . $token;
  207. if (!isset(self::$services[$id]))
  208. {
  209. $options = array('certificate_authority' => true);
  210. if (!empty($key)) $options['key'] = $key;
  211. if (!empty($secretKey)) $options['secret'] = $secretKey;
  212. if (!empty($token)) $options['token'] = $token;
  213. self::$services[$id] = new AmazonS3($options);
  214. }
  215. return self::$services[$id];
  216. }
  217. public static function getAWSServiceForProtocol($protocol){
  218. if(isSet(self::$registeredInstances[$protocol])){
  219. return self::$registeredInstances[$protocol]->getService();
  220. }
  221. return null;
  222. }
  223. protected $dirResults = null;
  224. protected $dirPosition = 0;
  225. /**
  226. * Implements opendir(). Pulls a list of "files" and "directories" at
  227. * $path from S3 and preps them to be returned one by one by readdir().
  228. * Note that directories are suffixed with a / to distinguish them
  229. */
  230. public function dir_opendir ($path, $optionsDummy)
  231. {
  232. if (!$this->init($path))
  233. {
  234. return false;
  235. }
  236. $this->dirResults = $this->getDirectoryListing($this->info);
  237. if ($this->dirResults === false)
  238. {
  239. $this->dirResults = null;
  240. return false;
  241. }
  242. $this->dirPosition = 0;
  243. return true;
  244. }
  245. /**
  246. * Set up options array for a call to list_objects. If delimited
  247. * is true, return "subdirectories" plus "files" at this level,
  248. * rather than all objects
  249. */
  250. protected function getOptionsForDirectory($options = array())
  251. {
  252. $s3Options = array();
  253. // Usually the path is the single path this operation cares about, but not always
  254. $path = isset($options['path']) ? $options['path'] : $this->info['path'];
  255. // Append a / unless we are listing items at the root
  256. if (strlen($path) && (!preg_match('/\/$/', $path)))
  257. {
  258. $path .= '/';
  259. }
  260. $s3Options['prefix'] = $path;
  261. if (isset($options['delimited']) && (!$options['delimited']))
  262. {
  263. // No delimiter wanted (for instance, we want a simple "are there any files darn it" test on just one XML query)
  264. }
  265. else
  266. {
  267. // Normal case: return everything in the same "subdirectory" as a subdirectory
  268. $s3Options['delimiter'] = '/';
  269. }
  270. return $s3Options;
  271. }
  272. protected function getDirectoryListing($info = null, $options = array())
  273. {
  274. if ($info === null)
  275. {
  276. $info = $this->info;
  277. }
  278. $options = $this->getOptionsForDirectory(array_merge($options, array('path' => $info['path'])));
  279. $results = array();
  280. // Markers can be fetched more than once according to the spec, don't return them twice,
  281. // but don't blindly assume we scan skip the first result either in case they surprise us
  282. $have = array();
  283. do
  284. {
  285. $list = $this->getService()->list_objects($info['bucket'], $options);
  286. if (!$list->isOK())
  287. {
  288. return false;
  289. }
  290. self::$statCache[$info["bucket"]."-".$info["path"]] = $list->body->to_array();
  291. $keys = $list->body->query('descendant-or-self::Prefix');
  292. if ($keys)
  293. {
  294. foreach ($keys as $key)
  295. {
  296. $key = (string) $key;
  297. if (strlen($key) <= strlen($options['prefix']))
  298. {
  299. // S3 tells us about the directory itself as a prefix, which is not interesting
  300. continue;
  301. }
  302. // results of readdir() do not include the path, just the basename
  303. $key = substr($key, strlen($options['prefix']));
  304. if (!isset($have[$key]))
  305. {
  306. // Make sure there is no XML object funny business returned
  307. // Leave the delimiter attached, it allows us to identify
  308. // directories without more network calls
  309. $results[] = $key;
  310. $have[$key] = true;
  311. }
  312. }
  313. }
  314. // Files
  315. $keys = $list->body->query('descendant-or-self::Key');
  316. if ($keys)
  317. {
  318. foreach ($keys as $key)
  319. {
  320. $key = (string) $key;
  321. // results of readdir() do not include the path, just the basename
  322. if (strlen($key) <= strlen($options['prefix']))
  323. {
  324. // If something is both a file and a directory - possible in s3 where directories
  325. // are virtual - it could show up in its own listing. This tends to result in
  326. // nasty infinite loops in recursive delete functions etc. Defend against this by
  327. // not returning it
  328. continue;
  329. }
  330. $key = substr($key, strlen($options['prefix']));
  331. if (!isset($have[$key]))
  332. {
  333. // Make sure there is no XML object funny business returned
  334. $results[] = (string) $key;
  335. $have[$key] = true;
  336. }
  337. }
  338. }
  339. // Pick up where we left off
  340. /**
  341. * Make sure that you send the correct marker not just the last file in the list
  342. * 2012-04-05 Giles Smith <tech@superrb.com>
  343. */
  344. $options = array_merge($options, array('marker' => $list->body->NextMarker));
  345. } while (((string) $list->body->IsTruncated) === 'true');
  346. return $results;
  347. }
  348. /**
  349. * Implements readdir(), reading the name of the next file or subdirectory
  350. * in the directory or returning false if there are no more or opendir() was
  351. * never called. Subdirectories returned are suffixed with '/' to distinguish them
  352. * from files without repeated API calls
  353. */
  354. public function dir_readdir()
  355. {
  356. if (isset($this->dirResults))
  357. {
  358. if ($this->dirPosition < count($this->dirResults))
  359. {
  360. return $this->dirResults[$this->dirPosition++];
  361. }
  362. }
  363. return false;
  364. }
  365. /**
  366. * Implements closedir(), closing the directory listing
  367. */
  368. public function dir_closedir()
  369. {
  370. //self::$statCache = null;
  371. $this->dirResults = null;
  372. $this->dirPosition = 0;
  373. return true;
  374. }
  375. /**
  376. * Rewind to start of directory listing so we can start calling
  377. * readdir again from the top
  378. */
  379. public function dir_rewinddir()
  380. {
  381. if (isset($this->dirResults))
  382. {
  383. $this->dirPosition = 0;
  384. return true;
  385. }
  386. return false;
  387. }
  388. /**
  389. * Implements mkdir for the s3 protocol
  390. * Make a directory. If $path is s3://bucketname or s3://bucketname/
  391. * with no subdirectory name, we attempt to create that bucket and
  392. * return failure if it already exists or it otherwise cannot be made.
  393. * Buckets are created in the region specified by the
  394. * region option when register() is called, defaulting to
  395. * AmazonS3::REGION_US_E1 (see services/s3.class.php in the SDK).
  396. *
  397. * If there is a subdirectory name, we always return success since
  398. * you don't really have to create common prefixes with S3, they
  399. * just work. Note that in this case we assume the bucket already exists for
  400. * performance reasons (if it isn't you'll find out soon enough when
  401. * you try to manipulate files or read directory contents).
  402. */
  403. public function mkdir($path, $mode, $options)
  404. {
  405. if (!$this->init($path))
  406. {
  407. return false;
  408. }
  409. $path = $this->info['path'];
  410. if ($path === '')
  411. {
  412. return $this->getService()->create_bucket($this->info['bucket'], $this->getRegion())->isOK();
  413. }else{
  414. $this->getService()->create_object($this->info['bucket'], $path.'/.create', array("body" => "empty"));
  415. }
  416. // Subdirectory creation always succeeds because subdirectories are implemented
  417. // using the prefix/delimiter mechanism, which doesn't require creating anything first
  418. return true;
  419. }
  420. /**
  421. * Implements rmdir for the s3 protocol
  422. * Remove a directory. If the URL is s3://bucketname/ or just s3://bucketname we
  423. * attempt to remove the entire bucket, returning failure if it is not empty or
  424. * otherwise not a valid bucket to delete. If the URL has a subdirectory in it,
  425. * we just return success as long as the subdirectory is not empty, because this is what
  426. * other file systems do, and some code may use it as a test. S3 doesn't really
  427. * need us to physically delete a "directory" since it does not have directory
  428. * objects, just a prefix/delimiter mechanism for queries. But let's emulate
  429. * the semantics as closely as possible
  430. */
  431. public function rmdir($path, $options)
  432. {
  433. if (!$this->init($path))
  434. {
  435. return false;
  436. }
  437. $path = $this->info['path'];
  438. if ($path === '')
  439. {
  440. // On success this returns a CFResponse, on failure it returns false.
  441. // Convert the CFResponse to plain old true
  442. return !!$this->getService()->delete_bucket($this->info['bucket']);
  443. }
  444. if ($this->hasDirectoryContents())
  445. {
  446. return false;
  447. }
  448. return true;
  449. }
  450. protected function hasDirectoryContents()
  451. {
  452. $list = $this->getService()->list_objects($this->info['bucket'], array_merge($this->getOptionsForDirectory(array('delimited' => false)), array('max-keys' => 1)));
  453. $keys = $list->body->query('descendant-or-self::Key');
  454. return !!count($keys);
  455. }
  456. /**
  457. * Implement unlink() for the s3 protocol. Removes files only, not folders or buckets
  458. * (see rmdir()).
  459. */
  460. public function unlink($path)
  461. {
  462. if (!$this->init($path))
  463. {
  464. return false;
  465. }
  466. $this->deleteCache();
  467. return $this->getService()->delete_object($this->info['bucket'], $this->info['path'])->isOK();
  468. }
  469. /**
  470. * Implement rename() for the s3 protocol
  471. * Rename a file or directory. WARNING: S3 does NOT have a native rename feature,
  472. * so this method must COPY EVERYTHING INVOLVED. If you rename a bucket, the
  473. * ENTIRE BUCKET MUST BE COPIED. If you copy a subdirectory, everything in that
  474. * subdirectory must be copied, etc. That equals a lot of S3 traffic.
  475. *
  476. * For safety, this method does not delete the old material at $from until the copy operation has
  477. * completely succeeded.
  478. *
  479. * If, after the copy has completely succeeded, there are errors during the deletion
  480. * of the source or its contents, this method returns false but the new copy remains
  481. * in place along with whatever portions of the old copy could not be removed. Otherwise
  482. * you could be left with no way to recover a portion of your data.
  483. *
  484. * THERE IS A 5GB LIMIT ON THE SIZE OF INDIVIDUAL OBJECTS INVOLVED IN A rename() OPERATION.
  485. * This is a limitation of the copy_object API in Amazon S3.
  486. */
  487. public function rename($from, $to)
  488. {
  489. $fromInfo = $this->parse($from);
  490. if (!$fromInfo)
  491. {
  492. return false;
  493. }
  494. $this->protocol = $fromInfo['protocol'];
  495. $toInfo = $this->parse($to);
  496. if (!$toInfo)
  497. {
  498. return false;
  499. }
  500. if ($fromInfo['protocol'] !== $toInfo['protocol'])
  501. {
  502. // You cannot "rename" across protocols
  503. return false;
  504. }
  505. $service = $this->getService();
  506. // See if this is a simple copy of an object. If $from is an object rather than a bucket or
  507. // subdirectory then this operation will succeed. Don't try this if either from or to is
  508. // the root of a bucket
  509. if (strlen($fromInfo['path']) && strlen($toInfo['path']))
  510. {
  511. if ($service->copy_object(array('bucket' => $fromInfo['bucket'], 'filename' => $fromInfo['path']),
  512. array('bucket' => $toInfo['bucket'], 'filename' => $toInfo['path']), array('acl' => $this->getOption('acl')))->isOK())
  513. {
  514. // Make sure we reset the mime type based on the new file extension.
  515. // Otherwise added extensions like .tmp tend to mean everything winds up
  516. // application/octet-stream even after it is renamed to remove .tmp
  517. if (!$service->change_content_type($toInfo['bucket'], $toInfo['path'], $this->getMimeType($toInfo['path']))->isOK())
  518. {
  519. $service->delete_object($toInfo['bucket'], $toInfo['path']);
  520. return false;
  521. }
  522. // That worked so delete the original
  523. $this->deleteCache($fromInfo);
  524. if ($service->delete_object($fromInfo['bucket'], $fromInfo['path'])->isOK())
  525. {
  526. return true;
  527. }
  528. // The delete failed, but the copy succeeded. No way to be that specific in our error message
  529. return false;
  530. }
  531. }
  532. $createdBucket = true;
  533. // If $to is the root of a bucket, create the bucket
  534. if ($toInfo['path'] === '')
  535. {
  536. if (!$service->create_bucket($toInfo['bucket'], $this->getRegion())->isOK())
  537. {
  538. return false;
  539. }
  540. }
  541. // Get a full list of objects at $from
  542. $objects = $this->getDirectoryListing($fromInfo, array('delimited' => false));
  543. if ($objects === false)
  544. {
  545. if ($createdBucket)
  546. {
  547. $service->delete_bucket($toInfo['bucket']);
  548. }
  549. return false;
  550. }
  551. $fromPaths = array();
  552. $toPaths = array();
  553. foreach ($objects as $object)
  554. {
  555. if (strlen($fromInfo['path']))
  556. {
  557. $fromPaths[] = $fromInfo['path'] . '/' . $object;
  558. }
  559. else
  560. {
  561. $fromPaths[] = $object;
  562. }
  563. if (strlen($toInfo['path']))
  564. {
  565. $toPaths[] = $toInfo['path'] . '/' . $object;
  566. }
  567. else
  568. {
  569. $toPaths[] = $object;
  570. }
  571. }
  572. // and copy them all to $to
  573. for ($i = 0; ($i < count($objects)); $i++)
  574. {
  575. // Make sure we reset the mime type based on the new file extension.
  576. // Otherwise added extensions like .tmp tend to mean everything winds up
  577. // application/octet-stream even after it is renamed to remove .tmp
  578. if ((!$service->copy_object(array('bucket' => $fromInfo['bucket'], 'filename' => $fromPaths[$i]),
  579. array('bucket' => $toInfo['bucket'], 'filename' => $toPaths[$i]),
  580. array('acl' => $this->getOption('acl'), 'contentType' => $this->getMimeType($toPaths[$i])))->isOK()) ||
  581. (!$service->change_content_type($toInfo['bucket'], $toPaths[$i], $this->getMimeType($toPaths[$i]))))
  582. {
  583. for ($j = 0; ($j <= $i); $j++)
  584. {
  585. $service->delete_object($toInfo['bucket'], $toPaths[$j]);
  586. }
  587. if ($createdBucket)
  588. {
  589. $service->delete_bucket($toInfo['bucket']);
  590. }
  591. return false;
  592. }
  593. }
  594. // BEGIN DELETION UNDER THE ORIGINAL NAME
  595. // Once we get started with the deletions of the old copy it is better not to delete the
  596. // new copy if something goes wrong, because then we have no copies at all.
  597. for ($i = 0; ($i < count($objects)); $i++)
  598. {
  599. $this->deleteCache(array_merge($fromInfo, array('path' => $fromPaths[$i])));
  600. if (!$service->delete_object($fromInfo['bucket'], $fromPaths[$i])->isOK())
  601. {
  602. return false;
  603. }
  604. }
  605. // If $from is the root of a bucket delete the old bucket
  606. if ($fromInfo['path'] === '')
  607. {
  608. if (!$service->delete_bucket($fromInfo['bucket']))
  609. {
  610. return false;
  611. }
  612. }
  613. return true;
  614. }
  615. /**
  616. * s3 does not have a select() operation, so we can't cast to a resource
  617. */
  618. public function stream_cast ($cast_as)
  619. {
  620. return false;
  621. }
  622. /**
  623. * Data to be written to or read from a stream. Alas S3's limited semantics pretty much
  624. * require we read or write the entire object at a time (even if it's massive) which leads
  625. * to practical limitations due to memory usage. Possibly we can use multipart upload later
  626. * to ameliorate this in the case of writing big new objects
  627. * https://forums.aws.amazon.com/thread.jspa?threadID=10752&start=25&tstart=0
  628. */
  629. protected $data = null;
  630. /**
  631. * Offset into the data of the seek pointer at this time
  632. */
  633. protected $dataOffset = 0;
  634. /**
  635. * True if the data was modified in any way and therefore we must write on close
  636. */
  637. protected $dirty = false;
  638. /**
  639. * Whether we are expecting read operations
  640. */
  641. protected $read = false;
  642. /**
  643. * Whether we are expecting write operations
  644. */
  645. protected $write = false;
  646. /**
  647. * When a cache is configured, we cache the first 8K block of each file whenever
  648. * possible to avoid unnecessary slow S3 calls for things like getimagesize()
  649. * or exif_read_info() etc. etc. stream_open sets $this->start to that initial
  650. * block of 8K bytes (or less, if the file is smaller than 8K bytes) as retrieved
  651. * from the cache
  652. */
  653. protected $start = null;
  654. /**
  655. * When a cache is configured, we also cache the results of stat() for quick
  656. * access
  657. */
  658. protected $stat = null;
  659. /**
  660. * After the first block is read from $this->start there must be a hint to the
  661. * next stream_read call to call $this->fullRead() and move the pointer to the
  662. * 8K boundary. This is that hint
  663. */
  664. protected $afterStart = false;
  665. /**
  666. * If a stream_seek is attempted to somewhere other than byte 0, we need to
  667. * give up on the start cache - that is, if they actually read from that point
  668. * in the stream. However we don't want to give up right away in case the caller
  669. * is just implementing the fseek(SEEK_END) ... ftell()... fseek(SEEK_SET)
  670. * pattern to measure the length and then come back to the top because they are
  671. * afraid to use stat() (I'm looking at you, exif_read_file)
  672. */
  673. protected $startSeeking = false;
  674. /**
  675. * Opens a stream, as in fopen() or file_get_contents()
  676. */
  677. public function stream_open ($path, $mode, $options, &$opened_path)
  678. {
  679. if (!$this->init($path))
  680. {
  681. return false;
  682. }
  683. $end = false;
  684. $create = false;
  685. $modes = array_flip(str_split($mode));
  686. if (isset($modes['r']))
  687. {
  688. $this->read = true;
  689. $this->write = false;
  690. }
  691. elseif (isset($modes['a']))
  692. {
  693. $this->read = true;
  694. $this->write = true;
  695. $end = true;
  696. $create = true;
  697. }
  698. elseif (isset($modes['w']))
  699. {
  700. // Read nothing in, get ready to write to the buffer
  701. $this->read = false;
  702. $this->write = true;
  703. $create = true;
  704. }
  705. elseif (isset($modes['x']))
  706. {
  707. $this->read = false;
  708. $this->write = true;
  709. $create = true;
  710. $response = $this->getService()->get_object_headers($this->info['bucket'], $this->info['path']);
  711. if ($response->isOK())
  712. {
  713. // x does not allow opening an existing file
  714. return false;
  715. }
  716. }
  717. elseif (isset($modes['c']))
  718. {
  719. $this->read = false;
  720. $this->write = true;
  721. $create = true;
  722. }
  723. else
  724. {
  725. // Unsupported mode
  726. return false;
  727. }
  728. if (isset($modes['+']))
  729. {
  730. $this->read = true;
  731. $this->write = true;
  732. }
  733. $this->data = '';
  734. $this->dataOffset = 0;
  735. $this->dirty = false;
  736. if ($this->read && (!$this->write) && (!$end))
  737. {
  738. // Read-only operations support an optional cache of the first 8K block so that
  739. // repeated operations like getimagesize() can succeed quickly. Note that
  740. // PHP fread()s in 8K blocks
  741. $cacheInfo = $this->getCacheInfo();
  742. if ($cacheInfo)
  743. {
  744. $this->stat = $cacheInfo['stat'];
  745. $this->start = $cacheInfo['start'];
  746. return true;
  747. }
  748. }
  749. if ($this->read || isset($modes['c']))
  750. {
  751. $result = $this->fullRead();
  752. if (!$result)
  753. {
  754. if ($end)
  755. {
  756. // It's OK if an append operation starts a new file
  757. // Mark it dirty so we know the creation of the file is needed even if
  758. // nothing gets written to it
  759. $this->dirty = true;
  760. return true;
  761. }
  762. else
  763. {
  764. // Otherwise failure to find an existing object here is an error
  765. return false;
  766. }
  767. }
  768. else
  769. {
  770. if ($end)
  771. {
  772. $this->dataOffset = strlen($this->data);
  773. }
  774. }
  775. }
  776. else
  777. {
  778. // If we are not reading, and creating missing files is
  779. // implied by the mode, then make sure we mark the file dirty
  780. // so that we upload it even if 0 bytes are written
  781. if ($create)
  782. {
  783. $this->dirty = true;
  784. }
  785. }
  786. return true;
  787. }
  788. /**
  789. * Fetch and unserialize cache contents for the specified file or the
  790. * file indicated by $this->info
  791. */
  792. protected function getCacheInfo($info = null)
  793. {
  794. if (is_null($info))
  795. {
  796. $info = $this->info;
  797. }
  798. $cache = $this->getCache();
  799. if ($cache)
  800. {
  801. $info = $cache->get($this->getCacheKey($info));
  802. if (!is_null($info))
  803. {
  804. $info = unserialize($info);
  805. return $info;
  806. }
  807. }
  808. return null;
  809. }
  810. /**
  811. * cache key for a given protocol/bucket/path
  812. */
  813. protected function getCacheKey($info = null)
  814. {
  815. if ($info === null)
  816. {
  817. $info = $this->info;
  818. }
  819. return $info['protocol'] . ':' . $info['bucket'] . ':' . $info['path'];
  820. }
  821. protected function fullRead()
  822. {
  823. $result = $this->getService()->get_object($this->info['bucket'], $this->info['path']);
  824. if (!$result->isOK())
  825. {
  826. return false;
  827. }
  828. $this->data = (string) $result->body;
  829. /**
  830. * Theoretically redundant, but if S3 files are created by a non-cache-aware tool this lets us
  831. * gradually roll that information into the cache
  832. */
  833. $this->updateCache();
  834. return true;
  835. }
  836. protected function updateCache()
  837. {
  838. // Cache the first 8K and the stat() results for future calls, if desired
  839. $cache = $this->getCache();
  840. if ($cache)
  841. {
  842. $cache->set($this->getCacheKey(), serialize(array('start' => substr($this->data, 0, 8192), 'stat' => $this->getStatInfo(false, strlen($this->data), time()))), 365 * 86400);
  843. }
  844. }
  845. protected function deleteCache($info = null)
  846. {
  847. $cache = $this->getCache();
  848. if ($cache)
  849. {
  850. $cache->remove($this->getCacheKey($info));
  851. }
  852. }
  853. protected function getCache()
  854. {
  855. if ($this->getOption('cache'))
  856. {
  857. return $this->getOption('cache');
  858. }
  859. return null;
  860. }
  861. /**
  862. * Close a stream opened with stream_open. Implements fclose() and is also closed by
  863. * file_put_contents and the like
  864. */
  865. public function stream_close()
  866. {
  867. if (is_null($this->data))
  868. {
  869. // No stream open
  870. return false;
  871. }
  872. $result = $this->stream_flush();
  873. // If this distresses you should call fflush separately first and make sure it works.
  874. // That's necessary with any filesystem in principle although we rarely bother
  875. // to check with the regular filesystem (and then we get busted by "disk full")
  876. $this->data = null;
  877. return $result;
  878. }
  879. /**
  880. * Flush any unstored data in the buffer to S3. Implements fflush() and is used by stream_close
  881. */
  882. public function stream_flush()
  883. {
  884. if ($this->write)
  885. {
  886. if ($this->dirty)
  887. {
  888. $response = $this->getService()->create_object($this->info['bucket'], $this->info['path'], array('body' => $this->data, 'acl' => $this->getOption('acl'), 'contentType' => $this->getMimeType($this->info['path']), 'headers' => $this->getOption('headers')));
  889. if (!$response->isOK())
  890. {
  891. // PHP calls stream_flush when closing a stream (before calling stream_close, FYI),
  892. // but it doesn't pay any attention to the return value of stream_flush:
  893. // PHP bug https://bugs.php.net/bug.php?id=60110
  894. // Call trigger_error so the programmer is not completely in the dark.
  895. // This is similar to what the native file functionality does on I/O errors
  896. trigger_error("Unable to write to bucket " . $this->info['bucket'] . ", path " . $this->info['path'], E_USER_WARNING);
  897. return false;
  898. }
  899. $this->updateCache();
  900. $this->dirty = false;
  901. }
  902. }
  903. return true;
  904. }
  905. /**
  906. * Returns true if we are at the end of a stream.
  907. *
  908. */
  909. public function stream_eof()
  910. {
  911. return (strlen($this->data) === $this->dataOffset);
  912. }
  913. /**
  914. * You can't lock an S3 "file"
  915. */
  916. public function stream_lock($operation)
  917. {
  918. return false;
  919. }
  920. /**
  921. * You can't unlock an S3 "file"
  922. */
  923. public function stream_unlock($operation)
  924. {
  925. return false;
  926. }
  927. /**
  928. * Someday: stream_metadata. Doesn't exist in 5.3
  929. */
  930. /**
  931. * Read specified # of bytes. Implements fread() among other things
  932. */
  933. public function stream_read($bytes)
  934. {
  935. if (!$this->read)
  936. {
  937. // Not supposed to be reading
  938. return false;
  939. }
  940. // If we have a cache of the first 8K block and that's what we've been asked for, cough it up
  941. if (!is_null($this->start))
  942. {
  943. if (($bytes === 8192) && (!$this->startSeeking))
  944. {
  945. $result = $this->start;
  946. $this->start = null;
  947. $this->afterStart = true;
  948. $this->dataOffset = min(strlen($result), 8192);
  949. return $result;
  950. }
  951. else
  952. {
  953. // We were asked for something else. Don't get fancy, just revert to a normal open
  954. $this->start = false;
  955. $this->fullRead();
  956. }
  957. }
  958. // The second read call, after the first resulted in returning a cached first block.
  959. // The caller wants more than just that first 8K, so we need to do a real read now and
  960. // skip the first 8K of it. TODO: it would be nice to use a byte range here to avoid
  961. // reading that first 8K from S3
  962. if ($this->afterStart)
  963. {
  964. if (!$this->fullRead())
  965. {
  966. return false;
  967. }
  968. // Don't try to reset dataOffset here as a seek() right after the
  969. // first fread() gets lost that way (getimagesize() on certain JPEGs for example)
  970. $this->afterStart = null;
  971. }
  972. $total = strlen($this->data);
  973. $remaining = $total - $this->dataOffset;
  974. if ($bytes > $remaining)
  975. {
  976. $bytes = $remaining;
  977. }
  978. $result = substr($this->data, $this->dataOffset, $bytes);
  979. $this->dataOffset += $bytes;
  980. return $result;
  981. }
  982. /**
  983. * Write specified # of bytes. Implements fwrite() among other things
  984. */
  985. public function stream_write($data)
  986. {
  987. if (!$this->write)
  988. {
  989. // Not supposed to be writing
  990. return 0;
  991. }
  992. $len = strlen($data);
  993. $this->data = substr_replace($this->data, $data, $this->dataOffset);
  994. $this->dataOffset += $len;
  995. $this->dirty = true;
  996. return $len;
  997. }
  998. /**
  999. * Seek to the specified point the buffer. Implements fseek(), sort of.
  1000. * PHP will sometimes just adjust its own read buffer instead
  1001. */
  1002. public function stream_seek($offset, $whence)
  1003. {
  1004. // Seeking potentially invalidates the first-block cache, but
  1005. // don't panic unless we actually try to read something after the seek
  1006. if ($this->stat)
  1007. {
  1008. // The stat cache is available only when we are doing read-only operations,
  1009. // and it means that we can calculate this correctly even if we haven't
  1010. // really loaded the full data yet for this file
  1011. $len = $this->stat['size'];
  1012. }
  1013. else
  1014. {
  1015. // In other cases we have the full data because we're writing, or reading
  1016. // and writing
  1017. $len = strlen($this->data);
  1018. }
  1019. $newOffset = 0;
  1020. if ($whence === SEEK_SET)
  1021. {
  1022. $newOffset = $offset;
  1023. }
  1024. elseif ($whence === SEEK_CUR)
  1025. {
  1026. $newOffset += $offset;
  1027. }
  1028. elseif ($whence === SEEK_END)
  1029. {
  1030. $newOffset = $len + $offset;
  1031. }
  1032. else
  1033. {
  1034. // Unknown whence value
  1035. return false;
  1036. }
  1037. if ($newOffset < 0)
  1038. {
  1039. return false;
  1040. }
  1041. if ($newOffset > $len)
  1042. {
  1043. return false;
  1044. }
  1045. $this->dataOffset = $newOffset;
  1046. if (!is_null($this->start))
  1047. {
  1048. if ($this->dataOffset !== 0)
  1049. {
  1050. $this->startSeeking = true;
  1051. } else
  1052. {
  1053. // If they seek right back again after calling ftell()
  1054. // we can cancel the red alert and keep the start of the
  1055. // file coming from the cache
  1056. $this->startSeeking = false;
  1057. }
  1058. }
  1059. return true;
  1060. }
  1061. public function stream_tell()
  1062. {
  1063. return $this->dataOffset;
  1064. }
  1065. /**
  1066. * Nonblocking and such. We don't support this at this time.
  1067. * It's possible to have a read timeout. Maybe later after we
  1068. * get around to pulling files in chunks rather than all at once
  1069. */
  1070. public function stream_set_option($option, $arg1, $arg2)
  1071. {
  1072. return false;
  1073. }
  1074. /**
  1075. * Implements stat($url)
  1076. */
  1077. public function url_stat($path, $flags)
  1078. {
  1079. if (!$this->init($path))
  1080. {
  1081. return false;
  1082. }
  1083. return $this->stream_stat();
  1084. }
  1085. /**
  1086. * Implements fstat($resource) - stat on an already opened file
  1087. */
  1088. public function stream_stat()
  1089. {
  1090. if (is_null($this->info))
  1091. {
  1092. // No file open
  1093. return false;
  1094. }
  1095. if (!is_null($this->stat))
  1096. {
  1097. // stream_open already pulled it in when loading the cache
  1098. return $this->stat;
  1099. }
  1100. else
  1101. {
  1102. $cacheInfo = $this->getCacheInfo();
  1103. if ($cacheInfo)
  1104. {
  1105. $this->stat = $cacheInfo['stat'];
  1106. return $this->stat;
  1107. }
  1108. }
  1109. $dir = false;
  1110. if ($this->info['path'] === '')
  1111. {
  1112. // We want to know about the bucket
  1113. if ($this->getService()->if_bucket_exists($this->info['bucket']))
  1114. {
  1115. $dir = true;
  1116. }
  1117. }
  1118. if ((!$dir) && (preg_match('/\/$/', $this->info['path'])))
  1119. {
  1120. $dir = true;
  1121. }
  1122. else
  1123. {
  1124. $parentPath = rtrim(dirname($this->info["path"]), ".");
  1125. $cacheKey = $this->info["bucket"]."-".$parentPath;
  1126. if(isSet(self::$statCache[$cacheKey])){
  1127. $baseName = basename($this->info["path"]);
  1128. $list = self::$statCache[$cacheKey];
  1129. $keys = array();
  1130. if(isSet($list["Contents"])){
  1131. $keys = array_merge($keys, $list["Contents"]);
  1132. }
  1133. if(isSet($list["CommonPrefixes"])){
  1134. $keys = array_merge($keys, $list["CommonPrefixes"]);
  1135. }
  1136. //var_dump($keys);
  1137. //var_dump($baseName);
  1138. foreach($keys as $entry){
  1139. if(isSet($entry["Key"]) && $entry["Key"] == $baseName){
  1140. $dir = false;
  1141. $mtime = strtotime($entry['LastModified']);
  1142. $size = intval($entry['Size']);
  1143. break;
  1144. }else if(isSet($entry["Prefix"]) && $entry["Prefix"] == $baseName){
  1145. $dir = true;
  1146. $mtime = time();
  1147. $size = 0;
  1148. break;
  1149. }
  1150. }
  1151. if(isSet($mtime)){
  1152. //var_dump($this->info);
  1153. $this->stat = $this->getStatInfo($dir, $size, $mtime);
  1154. return $this->stat;
  1155. }
  1156. }
  1157. //AJXP_Logger::debug("Wrapper : stating ".$this->info["path"], debug_backtrace() );
  1158. $response = $this->getService()->get_object_headers($this->info['bucket'], $this->info['path']);
  1159. if (!$response->isOK())
  1160. {
  1161. // Hmm. Let's take another shot at this possibly being a folder
  1162. if ($this->hasDirectoryContents())
  1163. {
  1164. $dir = true;
  1165. }
  1166. else
  1167. {
  1168. return false;
  1169. }
  1170. }
  1171. }
  1172. if ($dir)
  1173. {
  1174. $mtime = time();
  1175. $size = 0;
  1176. }
  1177. else
  1178. {
  1179. $mtime = strtotime($response->header['last-modified']);
  1180. $size = (int) $response->header['content-length'];
  1181. }
  1182. $this->stat = $this->getStatInfo($dir, $size, $mtime);
  1183. return $this->stat;
  1184. }
  1185. /**
  1186. * Fake a stat() response array using the three pieces of
  1187. * information we really have
  1188. */
  1189. protected function getStatInfo($dir, $size, $mtime)
  1190. {
  1191. if ($dir)
  1192. {
  1193. // Paths ending in a slash are always considered folders, and folders don't need to be
  1194. // explicitly created in S3
  1195. $mode = 0040000 + 0777;
  1196. }
  1197. else
  1198. {
  1199. // Bitflags for st_mode indicating a regular file that everyone can read/write/execute
  1200. $mode = 0100000 + 0777;
  1201. }
  1202. return array(
  1203. // 0 dev device number
  1204. 0,
  1205. // 1 ino inode number
  1206. 0,
  1207. // Permissions and file type
  1208. $mode,
  1209. // nlink number of links 1 is a reasonable value
  1210. 1,
  1211. // uid of owner
  1212. 0,
  1213. // gid of owner
  1214. 0,
  1215. // device type, if inode device
  1216. 0,
  1217. // size in bytes
  1218. $size,
  1219. // atime time of last access (unix timestamp). Most systems, including Linux, don't really maintain this separately from mtime
  1220. $mtime,
  1221. // mtime time of last modification (unix timestamp)
  1222. $mtime,
  1223. // ctime time of last inode change (unix timestamp)
  1224. $mtime,
  1225. // blksize blocksize of filesystem IO (-1 where not relevant)
  1226. -1,
  1227. // blocks number of 512-byte blocks allocated (-1 where not relevant)
  1228. -1,
  1229. 'dev' => 0,
  1230. 'ino' => 0,
  1231. 'mode' => $mode,
  1232. 'nlink' => 1,
  1233. 'uid' => 0,
  1234. 'gid' => 0,
  1235. 'rdev' => 0,
  1236. 'size' => $size,
  1237. 'atime' => $mtime,
  1238. 'mtime' => $mtime,
  1239. 'ctime' => $mtime,
  1240. 'blksize' => -1,
  1241. 'blocks' => -1
  1242. );
  1243. }
  1244. /**
  1245. * Override me if you hate our mime types list
  1246. */
  1247. public function getMimeType($path)
  1248. {
  1249. $dot = strrpos($path, '.');
  1250. if ($dot !== false)
  1251. {
  1252. $extension = substr($path, $dot + 1);
  1253. }
  1254. else
  1255. {
  1256. $extension = '';
  1257. }
  1258. if (isset(aS3StreamWrapperMimeTypes::$mimeTypes[$extension]))
  1259. {
  1260. return aS3StreamWrapperMimeTypes::$mimeTypes[$extension];
  1261. }
  1262. else
  1263. {
  1264. return 'application/octet-stream';
  1265. }
  1266. }
  1267. }