PageRenderTime 182ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 1ms

/amanda/tags/amanda261/device-src/s3.c

#
C | 1879 lines | 1328 code | 267 blank | 284 comment | 288 complexity | 2643b37ce265c04c99d2ecc9b2452bdc MD5 | raw file
  1. /*
  2. * Copyright (c) 2005-2008 Zmanda Inc. All Rights Reserved.
  3. *
  4. * This library is free software; you can redistribute it and/or modify it
  5. * under the terms of the GNU Lesser General Public License version 2.1 as
  6. * published by the Free Software Foundation.
  7. *
  8. * This library is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  10. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  11. * License for more details.
  12. *
  13. * You should have received a copy of the GNU Lesser General Public License
  14. * along with this library; if not, write to the Free Software Foundation,
  15. * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  16. *
  17. * Contact information: Zmanda Inc., 465 S Mathlida Ave, Suite 300
  18. * Sunnyvale, CA 94086, USA, or: http://www.zmanda.com
  19. */
  20. /* TODO
  21. * - collect speed statistics
  22. * - debugging mode
  23. */
  24. #ifdef HAVE_CONFIG_H
  25. /* use a relative path here to avoid conflicting with Perl's config.h. */
  26. #include "../config/config.h"
  27. #endif
  28. #include <string.h>
  29. #include "s3.h"
  30. #include "s3-util.h"
  31. #ifdef HAVE_REGEX_H
  32. #include <regex.h>
  33. #endif
  34. #ifdef HAVE_SYS_TYPES_H
  35. #include <sys/types.h>
  36. #endif
  37. #ifdef HAVE_SYS_STAT_H
  38. #include <sys/stat.h>
  39. #endif
  40. #ifdef HAVE_UNISTD_H
  41. #include <unistd.h>
  42. #endif
  43. #ifdef HAVE_DIRENT_H
  44. #include <dirent.h>
  45. #endif
  46. #ifdef HAVE_TIME_H
  47. #include <time.h>
  48. #endif
  49. #ifdef HAVE_UTIL_H
  50. #include "util.h"
  51. #endif
  52. #ifdef HAVE_AMANDA_H
  53. #include "amanda.h"
  54. #endif
  55. #include <curl/curl.h>
  56. /* Constant renamed after version 7.10.7 */
  57. #ifndef CURLINFO_RESPONSE_CODE
  58. #define CURLINFO_RESPONSE_CODE CURLINFO_HTTP_CODE
  59. #endif
  60. /* We don't need OpenSSL's kerberos support, and it's broken in
  61. * RHEL 3 anyway. */
  62. #define OPENSSL_NO_KRB5
  63. #ifdef HAVE_OPENSSL_HMAC_H
  64. # include <openssl/hmac.h>
  65. #else
  66. # ifdef HAVE_CRYPTO_HMAC_H
  67. # include <crypto/hmac.h>
  68. # else
  69. # ifdef HAVE_HMAC_H
  70. # include <hmac.h>
  71. # endif
  72. # endif
  73. #endif
  74. #include <openssl/err.h>
  75. #include <openssl/ssl.h>
  76. #include <openssl/md5.h>
  77. /* Maximum key length as specified in the S3 documentation
  78. * (*excluding* null terminator) */
  79. #define S3_MAX_KEY_LENGTH 1024
  80. #define AMAZON_SECURITY_HEADER "x-amz-security-token"
  81. #define AMAZON_BUCKET_CONF_TEMPLATE "\
  82. <CreateBucketConfiguration>\n\
  83. <LocationConstraint>%s</LocationConstraint>\n\
  84. </CreateBucketConfiguration>"
  85. /* parameters for exponential backoff in the face of retriable errors */
  86. /* start at 0.01s */
  87. #define EXPONENTIAL_BACKOFF_START_USEC G_USEC_PER_SEC/100
  88. /* double at each retry */
  89. #define EXPONENTIAL_BACKOFF_BASE 2
  90. /* retry 14 times (for a total of about 3 minutes spent waiting) */
  91. #define EXPONENTIAL_BACKOFF_MAX_RETRIES 14
  92. /* general "reasonable size" parameters */
  93. #define MAX_ERROR_RESPONSE_LEN (100*1024)
  94. /* Results which should always be retried */
  95. #define RESULT_HANDLING_ALWAYS_RETRY \
  96. { 400, S3_ERROR_RequestTimeout, 0, S3_RESULT_RETRY }, \
  97. { 404, S3_ERROR_NoSuchBucket, 0, S3_RESULT_RETRY }, \
  98. { 409, S3_ERROR_OperationAborted, 0, S3_RESULT_RETRY }, \
  99. { 412, S3_ERROR_PreconditionFailed, 0, S3_RESULT_RETRY }, \
  100. { 500, S3_ERROR_InternalError, 0, S3_RESULT_RETRY }, \
  101. { 501, S3_ERROR_NotImplemented, 0, S3_RESULT_RETRY }, \
  102. { 0, 0, CURLE_COULDNT_CONNECT, S3_RESULT_RETRY }, \
  103. { 0, 0, CURLE_PARTIAL_FILE, S3_RESULT_RETRY }, \
  104. { 0, 0, CURLE_OPERATION_TIMEOUTED, S3_RESULT_RETRY }, \
  105. { 0, 0, CURLE_SEND_ERROR, S3_RESULT_RETRY }, \
  106. { 0, 0, CURLE_RECV_ERROR, S3_RESULT_RETRY }, \
  107. { 0, 0, CURLE_GOT_NOTHING, S3_RESULT_RETRY }
  108. /*
  109. * Data structures and associated functions
  110. */
  111. struct S3Handle {
  112. /* (all strings in this struct are freed by s3_free()) */
  113. char *access_key;
  114. char *secret_key;
  115. char *user_token;
  116. char *bucket_location;
  117. CURL *curl;
  118. gboolean verbose;
  119. gboolean use_ssl;
  120. /* information from the last request */
  121. char *last_message;
  122. guint last_response_code;
  123. s3_error_code_t last_s3_error_code;
  124. CURLcode last_curl_code;
  125. guint last_num_retries;
  126. void *last_response_body;
  127. guint last_response_body_size;
  128. };
  129. typedef struct {
  130. CurlBuffer resp_buf;
  131. s3_write_func write_func;
  132. s3_reset_func reset_func;
  133. gpointer write_data;
  134. gboolean headers_done;
  135. char *etag;
  136. } S3InternalData;
  137. /* Callback function to examine headers one-at-a-time
  138. *
  139. * @note this is the same as CURLOPT_HEADERFUNCTION
  140. *
  141. * @param data: The pointer to read data from
  142. * @param size: The size of each "element" of the data buffer in bytes
  143. * @param nmemb: The number of elements in the data buffer.
  144. * So, the buffer's size is size*nmemb bytes.
  145. * @param stream: the header_data (an opaque pointer)
  146. *
  147. * @return The number of bytes written to the buffer or
  148. * CURL_WRITEFUNC_PAUSE to pause.
  149. * If it's the number of bytes written, it should match the buffer size
  150. */
  151. typedef size_t (*s3_header_func)(void *data, size_t size, size_t nmemb, void *stream);
  152. /*
  153. * S3 errors */
  154. /* (see preprocessor magic in s3.h) */
  155. static char * s3_error_code_names[] = {
  156. #define S3_ERROR(NAME) #NAME
  157. S3_ERROR_LIST
  158. #undef S3_ERROR
  159. };
  160. /* Convert an s3 error name to an error code. This function
  161. * matches strings case-insensitively, and is appropriate for use
  162. * on data from the network.
  163. *
  164. * @param s3_error_code: the error name
  165. * @returns: the error code (see constants in s3.h)
  166. */
  167. static s3_error_code_t
  168. s3_error_code_from_name(char *s3_error_name);
  169. /* Convert an s3 error code to a string
  170. *
  171. * @param s3_error_code: the error code to convert
  172. * @returns: statically allocated string
  173. */
  174. static const char *
  175. s3_error_name_from_code(s3_error_code_t s3_error_code);
  176. /*
  177. * result handling */
  178. /* result handling is specified by a static array of result_handling structs,
  179. * which match based on response_code (from HTTP) and S3 error code. The result
  180. * given for the first match is used. 0 acts as a wildcard for both response_code
  181. * and s3_error_code. The list is terminated with a struct containing 0 for both
  182. * response_code and s3_error_code; the result for that struct is the default
  183. * result.
  184. *
  185. * See RESULT_HANDLING_ALWAYS_RETRY for an example.
  186. */
  187. typedef enum {
  188. S3_RESULT_RETRY = -1,
  189. S3_RESULT_FAIL = 0,
  190. S3_RESULT_OK = 1
  191. } s3_result_t;
  192. typedef struct result_handling {
  193. guint response_code;
  194. s3_error_code_t s3_error_code;
  195. CURLcode curl_code;
  196. s3_result_t result;
  197. } result_handling_t;
  198. /* Lookup a result in C{result_handling}.
  199. *
  200. * @param result_handling: array of handling specifications
  201. * @param response_code: response code from operation
  202. * @param s3_error_code: s3 error code from operation, if any
  203. * @param curl_code: the CURL error, if any
  204. * @returns: the matching result
  205. */
  206. static s3_result_t
  207. lookup_result(const result_handling_t *result_handling,
  208. guint response_code,
  209. s3_error_code_t s3_error_code,
  210. CURLcode curl_code);
  211. /*
  212. * Precompiled regular expressions */
  213. static regex_t etag_regex, error_name_regex, message_regex, subdomain_regex,
  214. location_con_regex;
  215. /*
  216. * Utility functions
  217. */
  218. /* Construct the URL for an Amazon S3 REST request.
  219. *
  220. * A new string is allocated and returned; it is the responsiblity of the caller.
  221. *
  222. * @param hdl: the S3Handle object
  223. * @param verb: capitalized verb for this request ('PUT', 'GET', etc.)
  224. * @param bucket: the bucket being accessed, or NULL for none
  225. * @param key: the key being accessed, or NULL for none
  226. * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
  227. * @param use_subdomain: if TRUE, a subdomain of s3.amazonaws.com will be used
  228. */
  229. static char *
  230. build_url(const char *bucket,
  231. const char *key,
  232. const char *subresource,
  233. const char *query,
  234. gboolean use_subdomain,
  235. gboolean use_ssl);
  236. /* Create proper authorization headers for an Amazon S3 REST
  237. * request to C{headers}.
  238. *
  239. * @note: C{X-Amz} headers (in C{headers}) must
  240. * - be in lower-case
  241. * - be in alphabetical order
  242. * - have no spaces around the colon
  243. * (don't yell at me -- see the Amazon Developer Guide)
  244. *
  245. * @param hdl: the S3Handle object
  246. * @param verb: capitalized verb for this request ('PUT', 'GET', etc.)
  247. * @param bucket: the bucket being accessed, or NULL for none
  248. * @param key: the key being accessed, or NULL for none
  249. * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
  250. * @param md5_hash: the MD5 hash of the request body, or NULL for none
  251. * @param use_subdomain: if TRUE, a subdomain of s3.amazonaws.com will be used
  252. */
  253. static struct curl_slist *
  254. authenticate_request(S3Handle *hdl,
  255. const char *verb,
  256. const char *bucket,
  257. const char *key,
  258. const char *subresource,
  259. const char *md5_hash,
  260. gboolean use_subdomain);
  261. /* Interpret the response to an S3 operation, assuming CURL completed its request
  262. * successfully. This function fills in the relevant C{hdl->last*} members.
  263. *
  264. * @param hdl: The S3Handle object
  265. * @param body: the response body
  266. * @param body_len: the length of the response body
  267. * @param etag: The response's ETag header
  268. * @param content_md5: The hex-encoded MD5 hash of the request body,
  269. * which will be checked against the response's ETag header.
  270. * If NULL, the header is not checked.
  271. * If non-NULL, then the body should have the response headers at its beginnning.
  272. * @returns: TRUE if the response should be retried (e.g., network error)
  273. */
  274. static gboolean
  275. interpret_response(S3Handle *hdl,
  276. CURLcode curl_code,
  277. char *curl_error_buffer,
  278. gchar *body,
  279. guint body_len,
  280. const char *etag,
  281. const char *content_md5);
  282. /* Perform an S3 operation. This function handles all of the details
  283. * of retryig requests and so on.
  284. *
  285. * The concepts of bucket and keys are defined by the Amazon S3 API.
  286. * See: "Components of Amazon S3" - API Version 2006-03-01 pg. 8
  287. *
  288. * Individual sub-resources are defined in several places. In the REST API,
  289. * they they are represented by a "flag" in the "query string".
  290. * See: "Constructing the CanonicalizedResource Element" - API Version 2006-03-01 pg. 60
  291. *
  292. * @param hdl: the S3Handle object
  293. * @param verb: the HTTP request method
  294. * @param bucket: the bucket to access, or NULL for none
  295. * @param key: the key to access, or NULL for none
  296. * @param subresource: the "sub-resource" to request (e.g. "acl") or NULL for none
  297. * @param query: the query string to send (not including th initial '?'),
  298. * or NULL for none
  299. * @param read_func: the callback for reading data
  300. * Will use s3_empty_read_func if NULL is passed in.
  301. * @param read_reset_func: the callback for to reset reading data
  302. * @param size_func: the callback to get the number of bytes to upload
  303. * @param md5_func: the callback to get the MD5 hash of the data to upload
  304. * @param read_data: pointer to pass to the above functions
  305. * @param write_func: the callback for writing data.
  306. * Will use s3_counter_write_func if NULL is passed in.
  307. * @param write_reset_func: the callback for to reset writing data
  308. * @param write_data: pointer to pass to C{write_func}
  309. * @param progress_func: the callback for progress information
  310. * @param progress_data: pointer to pass to C{progress_func}
  311. * @param result_handling: instructions for handling the results; see above.
  312. * @returns: the result specified by result_handling; details of the response
  313. * are then available in C{hdl->last*}
  314. */
  315. static s3_result_t
  316. perform_request(S3Handle *hdl,
  317. const char *verb,
  318. const char *bucket,
  319. const char *key,
  320. const char *subresource,
  321. const char *query,
  322. s3_read_func read_func,
  323. s3_reset_func read_reset_func,
  324. s3_size_func size_func,
  325. s3_md5_func md5_func,
  326. gpointer read_data,
  327. s3_write_func write_func,
  328. s3_reset_func write_reset_func,
  329. gpointer write_data,
  330. s3_progress_func progress_func,
  331. gpointer progress_data,
  332. const result_handling_t *result_handling);
  333. /*
  334. * a CURLOPT_WRITEFUNCTION to save part of the response in memory and
  335. * call an external function if one was provided.
  336. */
  337. static size_t
  338. s3_internal_write_func(void *ptr, size_t size, size_t nmemb, void * stream);
  339. /*
  340. * a function to reset to our internal buffer
  341. */
  342. static void
  343. s3_internal_reset_func(void * stream);
  344. /*
  345. * a CURLOPT_HEADERFUNCTION to save the ETag header only.
  346. */
  347. static size_t
  348. s3_internal_header_func(void *ptr, size_t size, size_t nmemb, void * stream);
  349. static gboolean
  350. compile_regexes(void);
  351. /*
  352. * Static function implementations
  353. */
  354. static s3_error_code_t
  355. s3_error_code_from_name(char *s3_error_name)
  356. {
  357. int i;
  358. if (!s3_error_name) return S3_ERROR_Unknown;
  359. /* do a brute-force search through the list, since it's not sorted */
  360. for (i = 0; i < S3_ERROR_END; i++) {
  361. if (g_strcasecmp(s3_error_name, s3_error_code_names[i]) == 0)
  362. return i;
  363. }
  364. return S3_ERROR_Unknown;
  365. }
  366. static const char *
  367. s3_error_name_from_code(s3_error_code_t s3_error_code)
  368. {
  369. if (s3_error_code >= S3_ERROR_END)
  370. s3_error_code = S3_ERROR_Unknown;
  371. return s3_error_code_names[s3_error_code];
  372. }
  373. gboolean
  374. s3_curl_supports_ssl(void)
  375. {
  376. static int supported = -1;
  377. if (supported == -1) {
  378. #if defined(CURL_VERSION_SSL)
  379. curl_version_info_data *info = curl_version_info(CURLVERSION_NOW);
  380. if (info->features & CURL_VERSION_SSL)
  381. supported = 1;
  382. else
  383. supported = 0;
  384. #else
  385. supported = 0;
  386. #endif
  387. }
  388. return supported;
  389. }
  390. static s3_result_t
  391. lookup_result(const result_handling_t *result_handling,
  392. guint response_code,
  393. s3_error_code_t s3_error_code,
  394. CURLcode curl_code)
  395. {
  396. while (result_handling->response_code
  397. || result_handling->s3_error_code
  398. || result_handling->curl_code) {
  399. if ((result_handling->response_code && result_handling->response_code != response_code)
  400. || (result_handling->s3_error_code && result_handling->s3_error_code != s3_error_code)
  401. || (result_handling->curl_code && result_handling->curl_code != curl_code)) {
  402. result_handling++;
  403. continue;
  404. }
  405. return result_handling->result;
  406. }
  407. /* return the result for the terminator, as the default */
  408. return result_handling->result;
  409. }
  410. static char *
  411. build_url(const char *bucket,
  412. const char *key,
  413. const char *subresource,
  414. const char *query,
  415. gboolean use_subdomain,
  416. gboolean use_ssl)
  417. {
  418. GString *url = NULL;
  419. char *esc_bucket = NULL, *esc_key = NULL;
  420. /* scheme */
  421. url = g_string_new("http");
  422. if (use_ssl)
  423. g_string_append(url, "s");
  424. g_string_append(url, "://");
  425. /* domain */
  426. if (use_subdomain && bucket)
  427. g_string_append_printf(url, "%s.s3.amazonaws.com/", bucket);
  428. else
  429. g_string_append(url, "s3.amazonaws.com/");
  430. /* path */
  431. if (!use_subdomain && bucket) {
  432. esc_bucket = curl_escape(bucket, 0);
  433. if (!esc_bucket) goto cleanup;
  434. g_string_append_printf(url, "%s", esc_bucket);
  435. if (key)
  436. g_string_append(url, "/");
  437. }
  438. if (key) {
  439. esc_key = curl_escape(key, 0);
  440. if (!esc_key) goto cleanup;
  441. g_string_append_printf(url, "%s", esc_key);
  442. }
  443. /* query string */
  444. if (subresource || query)
  445. g_string_append(url, "?");
  446. if (subresource)
  447. g_string_append(url, subresource);
  448. if (subresource && query)
  449. g_string_append(url, "&");
  450. if (query)
  451. g_string_append(url, query);
  452. cleanup:
  453. if (esc_bucket) curl_free(esc_bucket);
  454. if (esc_key) curl_free(esc_key);
  455. return g_string_free(url, FALSE);
  456. }
  457. static struct curl_slist *
  458. authenticate_request(S3Handle *hdl,
  459. const char *verb,
  460. const char *bucket,
  461. const char *key,
  462. const char *subresource,
  463. const char *md5_hash,
  464. gboolean use_subdomain)
  465. {
  466. time_t t;
  467. struct tm tmp;
  468. char date[100];
  469. char *buf = NULL;
  470. HMAC_CTX ctx;
  471. GByteArray *md = NULL;
  472. char *auth_base64 = NULL;
  473. struct curl_slist *headers = NULL;
  474. char *esc_bucket = NULL, *esc_key = NULL;
  475. GString *auth_string = NULL;
  476. /* Build the string to sign, per the S3 spec.
  477. * See: "Authenticating REST Requests" - API Version 2006-03-01 pg 58
  478. */
  479. /* verb */
  480. auth_string = g_string_new(verb);
  481. g_string_append(auth_string, "\n");
  482. /* Content-MD5 header */
  483. if (md5_hash)
  484. g_string_append(auth_string, md5_hash);
  485. g_string_append(auth_string, "\n");
  486. /* Content-Type is empty*/
  487. g_string_append(auth_string, "\n");
  488. /* calculate the date */
  489. t = time(NULL);
  490. #ifdef _WIN32
  491. if (!localtime_s(&tmp, &t)) g_debug("localtime error");
  492. #else
  493. if (!localtime_r(&t, &tmp)) perror("localtime");
  494. #endif
  495. if (!strftime(date, sizeof(date), "%a, %d %b %Y %H:%M:%S %Z", &tmp))
  496. perror("strftime");
  497. g_string_append(auth_string, date);
  498. g_string_append(auth_string, "\n");
  499. if (hdl->user_token) {
  500. g_string_append(auth_string, AMAZON_SECURITY_HEADER);
  501. g_string_append(auth_string, ":");
  502. g_string_append(auth_string, hdl->user_token);
  503. g_string_append(auth_string, ",");
  504. g_string_append(auth_string, STS_PRODUCT_TOKEN);
  505. g_string_append(auth_string, "\n");
  506. }
  507. /* CanonicalizedResource */
  508. g_string_append(auth_string, "/");
  509. if (bucket) {
  510. if (use_subdomain)
  511. g_string_append(auth_string, bucket);
  512. else {
  513. esc_bucket = curl_escape(bucket, 0);
  514. if (!esc_bucket) goto cleanup;
  515. g_string_append(auth_string, esc_bucket);
  516. }
  517. }
  518. if (bucket && (use_subdomain || key))
  519. g_string_append(auth_string, "/");
  520. if (key) {
  521. esc_key = curl_escape(key, 0);
  522. if (!esc_key) goto cleanup;
  523. g_string_append(auth_string, esc_key);
  524. }
  525. if (subresource) {
  526. g_string_append(auth_string, "?");
  527. g_string_append(auth_string, subresource);
  528. }
  529. /* run HMAC-SHA1 on the canonicalized string */
  530. md = g_byte_array_sized_new(EVP_MAX_MD_SIZE+1);
  531. HMAC_CTX_init(&ctx);
  532. HMAC_Init_ex(&ctx, hdl->secret_key, (int) strlen(hdl->secret_key), EVP_sha1(), NULL);
  533. HMAC_Update(&ctx, (unsigned char*) auth_string->str, auth_string->len);
  534. HMAC_Final(&ctx, md->data, &md->len);
  535. HMAC_CTX_cleanup(&ctx);
  536. auth_base64 = s3_base64_encode(md);
  537. /* append the new headers */
  538. if (hdl->user_token) {
  539. /* Devpay headers are included in hash. */
  540. buf = g_strdup_printf(AMAZON_SECURITY_HEADER ": %s", hdl->user_token);
  541. headers = curl_slist_append(headers, buf);
  542. g_free(buf);
  543. buf = g_strdup_printf(AMAZON_SECURITY_HEADER ": %s", STS_PRODUCT_TOKEN);
  544. headers = curl_slist_append(headers, buf);
  545. g_free(buf);
  546. }
  547. buf = g_strdup_printf("Authorization: AWS %s:%s",
  548. hdl->access_key, auth_base64);
  549. headers = curl_slist_append(headers, buf);
  550. g_free(buf);
  551. if (md5_hash && '\0' != md5_hash[0]) {
  552. buf = g_strdup_printf("Content-MD5: %s", md5_hash);
  553. headers = curl_slist_append(headers, buf);
  554. g_free(buf);
  555. }
  556. buf = g_strdup_printf("Date: %s", date);
  557. headers = curl_slist_append(headers, buf);
  558. g_free(buf);
  559. cleanup:
  560. g_free(esc_bucket);
  561. g_free(esc_key);
  562. g_byte_array_free(md, TRUE);
  563. g_free(auth_base64);
  564. g_string_free(auth_string, TRUE);
  565. return headers;
  566. }
  567. static gboolean
  568. interpret_response(S3Handle *hdl,
  569. CURLcode curl_code,
  570. char *curl_error_buffer,
  571. gchar *body,
  572. guint body_len,
  573. const char *etag,
  574. const char *content_md5)
  575. {
  576. long response_code = 0;
  577. regmatch_t pmatch[2];
  578. char *error_name = NULL, *message = NULL;
  579. char *body_copy = NULL;
  580. gboolean ret = TRUE;
  581. if (!hdl) return FALSE;
  582. if (hdl->last_message) g_free(hdl->last_message);
  583. hdl->last_message = NULL;
  584. /* bail out from a CURL error */
  585. if (curl_code != CURLE_OK) {
  586. hdl->last_curl_code = curl_code;
  587. hdl->last_message = g_strdup_printf("CURL error: %s", curl_error_buffer);
  588. return FALSE;
  589. }
  590. /* CURL seems to think things were OK, so get its response code */
  591. curl_easy_getinfo(hdl->curl, CURLINFO_RESPONSE_CODE, &response_code);
  592. hdl->last_response_code = response_code;
  593. /* check ETag, if present */
  594. if (etag && content_md5 && 200 == response_code) {
  595. if (etag && g_strcasecmp(etag, content_md5))
  596. hdl->last_message = g_strdup("S3 Error: Possible data corruption (ETag returned by Amazon did not match the MD5 hash of the data sent)");
  597. else
  598. ret = FALSE;
  599. return ret;
  600. }
  601. if (200 <= response_code && response_code < 400) {
  602. /* 2xx and 3xx codes won't have a response body we care about */
  603. hdl->last_s3_error_code = S3_ERROR_None;
  604. return FALSE;
  605. }
  606. /* Now look at the body to try to get the actual Amazon error message. Rather
  607. * than parse out the XML, just use some regexes. */
  608. /* impose a reasonable limit on body size */
  609. if (body_len > MAX_ERROR_RESPONSE_LEN) {
  610. hdl->last_message = g_strdup("S3 Error: Unknown (response body too large to parse)");
  611. return FALSE;
  612. } else if (!body || body_len == 0) {
  613. hdl->last_message = g_strdup("S3 Error: Unknown (empty response body)");
  614. return TRUE; /* perhaps a network error; retry the request */
  615. }
  616. /* use strndup to get a zero-terminated string */
  617. body_copy = g_strndup(body, body_len);
  618. if (!body_copy) goto cleanup;
  619. if (!s3_regexec_wrap(&error_name_regex, body_copy, 2, pmatch, 0))
  620. error_name = find_regex_substring(body_copy, pmatch[1]);
  621. if (!s3_regexec_wrap(&message_regex, body_copy, 2, pmatch, 0))
  622. message = find_regex_substring(body_copy, pmatch[1]);
  623. if (error_name) {
  624. hdl->last_s3_error_code = s3_error_code_from_name(error_name);
  625. }
  626. if (message) {
  627. hdl->last_message = message;
  628. message = NULL; /* steal the reference to the string */
  629. }
  630. cleanup:
  631. g_free(body_copy);
  632. g_free(message);
  633. g_free(error_name);
  634. return FALSE;
  635. }
  636. /* a CURLOPT_READFUNCTION to read data from a buffer. */
  637. size_t
  638. s3_buffer_read_func(void *ptr, size_t size, size_t nmemb, void * stream)
  639. {
  640. CurlBuffer *data = stream;
  641. guint bytes_desired = (guint) size * nmemb;
  642. /* check the number of bytes remaining, just to be safe */
  643. if (bytes_desired > data->buffer_len - data->buffer_pos)
  644. bytes_desired = data->buffer_len - data->buffer_pos;
  645. memcpy((char *)ptr, data->buffer + data->buffer_pos, bytes_desired);
  646. data->buffer_pos += bytes_desired;
  647. return bytes_desired;
  648. }
  649. size_t
  650. s3_buffer_size_func(void *stream)
  651. {
  652. CurlBuffer *data = stream;
  653. return data->buffer_len;
  654. }
  655. GByteArray*
  656. s3_buffer_md5_func(void *stream)
  657. {
  658. CurlBuffer *data = stream;
  659. GByteArray req_body_gba = {(guint8 *)data->buffer, data->buffer_len};
  660. return s3_compute_md5_hash(&req_body_gba);
  661. }
  662. void
  663. s3_buffer_reset_func(void *stream)
  664. {
  665. CurlBuffer *data = stream;
  666. data->buffer_pos = 0;
  667. }
  668. /* a CURLOPT_WRITEFUNCTION to write data to a buffer. */
  669. size_t
  670. s3_buffer_write_func(void *ptr, size_t size, size_t nmemb, void *stream)
  671. {
  672. CurlBuffer * data = stream;
  673. guint new_bytes = (guint) size * nmemb;
  674. guint bytes_needed = data->buffer_pos + new_bytes;
  675. /* error out if the new size is greater than the maximum allowed */
  676. if (data->max_buffer_size && bytes_needed > data->max_buffer_size)
  677. return 0;
  678. /* reallocate if necessary. We use exponential sizing to make this
  679. * happen less often. */
  680. if (bytes_needed > data->buffer_len) {
  681. guint new_size = MAX(bytes_needed, data->buffer_len * 2);
  682. if (data->max_buffer_size) {
  683. new_size = MIN(new_size, data->max_buffer_size);
  684. }
  685. data->buffer = g_realloc(data->buffer, new_size);
  686. data->buffer_len = new_size;
  687. }
  688. if (!data->buffer)
  689. return 0; /* returning zero signals an error to libcurl */
  690. /* actually copy the data to the buffer */
  691. memcpy(data->buffer + data->buffer_pos, ptr, new_bytes);
  692. data->buffer_pos += new_bytes;
  693. /* signal success to curl */
  694. return new_bytes;
  695. }
  696. /* a CURLOPT_READFUNCTION that writes nothing. */
  697. size_t
  698. s3_empty_read_func(G_GNUC_UNUSED void *ptr, G_GNUC_UNUSED size_t size, G_GNUC_UNUSED size_t nmemb, G_GNUC_UNUSED void * stream)
  699. {
  700. return 0;
  701. }
  702. size_t
  703. s3_empty_size_func(G_GNUC_UNUSED void *stream)
  704. {
  705. return 0;
  706. }
  707. GByteArray*
  708. s3_empty_md5_func(G_GNUC_UNUSED void *stream)
  709. {
  710. static const GByteArray empty = {(guint8 *) "", 0};
  711. return s3_compute_md5_hash(&empty);
  712. }
  713. /* a CURLOPT_WRITEFUNCTION to write data that just counts data.
  714. * s3_write_data should be NULL or a pointer to an gint64.
  715. */
  716. size_t
  717. s3_counter_write_func(G_GNUC_UNUSED void *ptr, size_t size, size_t nmemb, void *stream)
  718. {
  719. gint64 *count = (gint64*) stream, inc = nmemb*size;
  720. if (count) *count += inc;
  721. return inc;
  722. }
  723. void
  724. s3_counter_reset_func(void *stream)
  725. {
  726. gint64 *count = (gint64*) stream;
  727. if (count) *count = 0;
  728. }
  729. #ifdef _WIN32
  730. /* a CURLOPT_READFUNCTION to read data from a file. */
  731. size_t
  732. s3_file_read_func(void *ptr, size_t size, size_t nmemb, void * stream)
  733. {
  734. HANDLE *hFile = (HANDLE *) stream;
  735. DWORD bytes_read;
  736. ReadFile(hFile, ptr, (DWORD) size*nmemb, &bytes_read, NULL);
  737. return bytes_read;
  738. }
  739. size_t
  740. s3_file_size_func(void *stream)
  741. {
  742. HANDLE *hFile = (HANDLE *) stream;
  743. DWORD size = GetFileSize(hFile, NULL);
  744. if (INVALID_FILE_SIZE == size) {
  745. return -1;
  746. } else {
  747. return size;
  748. }
  749. }
  750. GByteArray*
  751. s3_file_md5_func(void *stream)
  752. {
  753. #define S3_MD5_BUF_SIZE (10*1024)
  754. HANDLE *hFile = (HANDLE *) stream;
  755. guint8 buf[S3_MD5_BUF_SIZE];
  756. DWORD bytes_read;
  757. MD5_CTX md5_ctx;
  758. GByteArray *ret = NULL;
  759. g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
  760. ret = g_byte_array_sized_new(S3_MD5_HASH_BYTE_LEN);
  761. g_byte_array_set_size(ret, S3_MD5_HASH_BYTE_LEN);
  762. MD5_Init(&md5_ctx);
  763. while (ReadFile(hFile, buf, S3_MD5_BUF_SIZE, &bytes_read, NULL)) {
  764. MD5_Update(&md5_ctx, buf, bytes_read);
  765. }
  766. MD5_Final(ret->data, &md5_ctx);
  767. g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
  768. return ret;
  769. #undef S3_MD5_BUF_SIZE
  770. }
  771. GByteArray*
  772. s3_file_reset_func(void *stream)
  773. {
  774. g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
  775. }
  776. /* a CURLOPT_WRITEFUNCTION to write data to a file. */
  777. size_t
  778. s3_file_write_func(void *ptr, size_t size, size_t nmemb, void *stream)
  779. {
  780. HANDLE *hFile = (HANDLE *) stream;
  781. DWORD bytes_written;
  782. WriteFile(hFile, ptr, (DWORD) size*nmemb, &bytes_written, NULL);
  783. return bytes_written;
  784. }
  785. #endif
  786. static int
  787. curl_debug_message(CURL *curl G_GNUC_UNUSED,
  788. curl_infotype type,
  789. char *s,
  790. size_t len,
  791. void *unused G_GNUC_UNUSED)
  792. {
  793. char *lineprefix;
  794. char *message;
  795. char **lines, **line;
  796. switch (type) {
  797. case CURLINFO_TEXT:
  798. lineprefix="";
  799. break;
  800. case CURLINFO_HEADER_IN:
  801. lineprefix="Hdr In: ";
  802. break;
  803. case CURLINFO_HEADER_OUT:
  804. lineprefix="Hdr Out: ";
  805. break;
  806. default:
  807. /* ignore data in/out -- nobody wants to see that in the
  808. * debug logs! */
  809. return 0;
  810. }
  811. /* split the input into lines */
  812. message = g_strndup(s, (gsize) len);
  813. lines = g_strsplit(message, "\n", -1);
  814. g_free(message);
  815. for (line = lines; *line; line++) {
  816. if (**line == '\0') continue; /* skip blank lines */
  817. g_debug("%s%s", lineprefix, *line);
  818. }
  819. g_strfreev(lines);
  820. return 0;
  821. }
  822. static s3_result_t
  823. perform_request(S3Handle *hdl,
  824. const char *verb,
  825. const char *bucket,
  826. const char *key,
  827. const char *subresource,
  828. const char *query,
  829. s3_read_func read_func,
  830. s3_reset_func read_reset_func,
  831. s3_size_func size_func,
  832. s3_md5_func md5_func,
  833. gpointer read_data,
  834. s3_write_func write_func,
  835. s3_reset_func write_reset_func,
  836. gpointer write_data,
  837. s3_progress_func progress_func,
  838. gpointer progress_data,
  839. const result_handling_t *result_handling)
  840. {
  841. gboolean use_subdomain;
  842. char *url = NULL;
  843. s3_result_t result = S3_RESULT_FAIL; /* assume the worst.. */
  844. CURLcode curl_code = CURLE_OK;
  845. char curl_error_buffer[CURL_ERROR_SIZE] = "";
  846. struct curl_slist *headers = NULL;
  847. S3InternalData int_writedata = {{NULL, 0, 0, MAX_ERROR_RESPONSE_LEN}, NULL, NULL, NULL, FALSE, NULL};
  848. gboolean should_retry;
  849. guint retries = 0;
  850. gulong backoff = EXPONENTIAL_BACKOFF_START_USEC;
  851. /* corresponds to PUT, HEAD, GET, and POST */
  852. int curlopt_upload = 0, curlopt_nobody = 0, curlopt_httpget = 0, curlopt_post = 0;
  853. /* do we want to examine the headers */
  854. const char *curlopt_customrequest = NULL;
  855. /* for MD5 calculation */
  856. GByteArray *md5_hash = NULL;
  857. gchar *md5_hash_hex = NULL, *md5_hash_b64 = NULL;
  858. size_t request_body_size = 0;
  859. g_assert(hdl != NULL && hdl->curl != NULL);
  860. s3_reset(hdl);
  861. use_subdomain = hdl->bucket_location? TRUE : FALSE;
  862. url = build_url(bucket, key, subresource, query, use_subdomain, hdl->use_ssl);
  863. if (!url) goto cleanup;
  864. /* libcurl may behave strangely if these are not set correctly */
  865. if (!strncmp(verb, "PUT", 4)) {
  866. curlopt_upload = 1;
  867. } else if (!strncmp(verb, "GET", 4)) {
  868. curlopt_httpget = 1;
  869. } else if (!strncmp(verb, "POST", 5)) {
  870. curlopt_post = 1;
  871. } else if (!strncmp(verb, "HEAD", 5)) {
  872. curlopt_nobody = 1;
  873. } else {
  874. curlopt_customrequest = verb;
  875. }
  876. if (size_func) {
  877. request_body_size = size_func(read_data);
  878. }
  879. if (md5_func) {
  880. md5_hash = md5_func(read_data);
  881. if (md5_hash) {
  882. md5_hash_b64 = s3_base64_encode(md5_hash);
  883. md5_hash_hex = s3_hex_encode(md5_hash);
  884. g_byte_array_free(md5_hash, TRUE);
  885. }
  886. }
  887. if (!read_func) {
  888. /* Curl will use fread() otherwise */
  889. read_func = s3_empty_read_func;
  890. }
  891. if (write_func) {
  892. int_writedata.write_func = write_func;
  893. int_writedata.reset_func = write_reset_func;
  894. int_writedata.write_data = write_data;
  895. } else {
  896. /* Curl will use fwrite() otherwise */
  897. int_writedata.write_func = s3_counter_write_func;
  898. int_writedata.reset_func = s3_counter_reset_func;
  899. int_writedata.write_data = NULL;
  900. }
  901. while (1) {
  902. /* reset things */
  903. if (headers) {
  904. curl_slist_free_all(headers);
  905. }
  906. curl_error_buffer[0] = '\0';
  907. if (read_reset_func) {
  908. read_reset_func(read_data);
  909. }
  910. /* calls write_reset_func */
  911. s3_internal_reset_func(&int_writedata);
  912. /* set up the request */
  913. headers = authenticate_request(hdl, verb, bucket, key, subresource,
  914. md5_hash_b64, hdl->bucket_location? TRUE : FALSE);
  915. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_VERBOSE, hdl->verbose)))
  916. goto curl_error;
  917. if (hdl->verbose) {
  918. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_DEBUGFUNCTION,
  919. curl_debug_message)))
  920. goto curl_error;
  921. }
  922. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_ERRORBUFFER,
  923. curl_error_buffer)))
  924. goto curl_error;
  925. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_NOPROGRESS, 1)))
  926. goto curl_error;
  927. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_FOLLOWLOCATION, 1)))
  928. goto curl_error;
  929. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_URL, url)))
  930. goto curl_error;
  931. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HTTPHEADER,
  932. headers)))
  933. goto curl_error;
  934. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_WRITEFUNCTION, s3_internal_write_func)))
  935. goto curl_error;
  936. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_WRITEDATA, &int_writedata)))
  937. goto curl_error;
  938. /* Note: we always have to set this apparently, for consistent "end of header" detection */
  939. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HEADERFUNCTION, s3_internal_header_func)))
  940. goto curl_error;
  941. /* Note: if set, CURLOPT_HEADERDATA seems to also be used for CURLOPT_WRITEDATA ? */
  942. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HEADERDATA, &int_writedata)))
  943. goto curl_error;
  944. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_PROGRESSFUNCTION, progress_func)))
  945. goto curl_error;
  946. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_PROGRESSDATA, progress_data)))
  947. goto curl_error;
  948. #ifdef CURLOPT_INFILESIZE_LARGE
  949. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)request_body_size)))
  950. goto curl_error;
  951. #else
  952. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_INFILESIZE, (long)request_body_size)))
  953. goto curl_error;
  954. #endif
  955. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HTTPGET, curlopt_httpget)))
  956. goto curl_error;
  957. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_UPLOAD, curlopt_upload)))
  958. goto curl_error;
  959. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_POST, curlopt_post)))
  960. goto curl_error;
  961. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_NOBODY, curlopt_nobody)))
  962. goto curl_error;
  963. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_CUSTOMREQUEST,
  964. curlopt_customrequest)))
  965. goto curl_error;
  966. if (curlopt_upload) {
  967. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READFUNCTION, read_func)))
  968. goto curl_error;
  969. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READDATA, read_data)))
  970. goto curl_error;
  971. } else {
  972. /* Clear request_body options. */
  973. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READFUNCTION,
  974. NULL)))
  975. goto curl_error;
  976. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READDATA,
  977. NULL)))
  978. goto curl_error;
  979. }
  980. /* Perform the request */
  981. curl_code = curl_easy_perform(hdl->curl);
  982. /* interpret the response into hdl->last* */
  983. curl_error: /* (label for short-circuiting the curl_easy_perform call) */
  984. should_retry = interpret_response(hdl, curl_code, curl_error_buffer,
  985. int_writedata.resp_buf.buffer, int_writedata.resp_buf.buffer_pos, int_writedata.etag, md5_hash_hex);
  986. /* and, unless we know we need to retry, see what we're to do now */
  987. if (!should_retry) {
  988. result = lookup_result(result_handling, hdl->last_response_code,
  989. hdl->last_s3_error_code, hdl->last_curl_code);
  990. /* break out of the while(1) unless we're retrying */
  991. if (result != S3_RESULT_RETRY)
  992. break;
  993. }
  994. if (retries >= EXPONENTIAL_BACKOFF_MAX_RETRIES) {
  995. /* we're out of retries, so annotate hdl->last_message appropriately and bail
  996. * out. */
  997. char *m = g_strdup_printf("Too many retries; last message was '%s'", hdl->last_message);
  998. if (hdl->last_message) g_free(hdl->last_message);
  999. hdl->last_message = m;
  1000. result = S3_RESULT_FAIL;
  1001. break;
  1002. }
  1003. g_usleep(backoff);
  1004. retries++;
  1005. backoff *= EXPONENTIAL_BACKOFF_BASE;
  1006. }
  1007. if (result != S3_RESULT_OK) {
  1008. g_debug(_("%s %s failed with %d/%s"), verb, url,
  1009. hdl->last_response_code,
  1010. s3_error_name_from_code(hdl->last_s3_error_code));
  1011. }
  1012. cleanup:
  1013. g_free(url);
  1014. if (headers) curl_slist_free_all(headers);
  1015. g_free(md5_hash_b64);
  1016. g_free(md5_hash_hex);
  1017. /* we don't deallocate the response body -- we keep it for later */
  1018. hdl->last_response_body = int_writedata.resp_buf.buffer;
  1019. hdl->last_response_body_size = int_writedata.resp_buf.buffer_pos;
  1020. hdl->last_num_retries = retries;
  1021. return result;
  1022. }
  1023. static size_t
  1024. s3_internal_write_func(void *ptr, size_t size, size_t nmemb, void * stream)
  1025. {
  1026. S3InternalData *data = (S3InternalData *) stream;
  1027. size_t bytes_saved;
  1028. if (!data->headers_done)
  1029. return size*nmemb;
  1030. bytes_saved = s3_buffer_write_func(ptr, size, nmemb, &data->resp_buf);
  1031. if (data->write_func) {
  1032. return data->write_func(ptr, size, nmemb, data->write_data);
  1033. } else {
  1034. return bytes_saved;
  1035. }
  1036. }
  1037. static void
  1038. s3_internal_reset_func(void * stream)
  1039. {
  1040. S3InternalData *data = (S3InternalData *) stream;
  1041. s3_buffer_reset_func(&data->resp_buf);
  1042. data->headers_done = FALSE;
  1043. data->etag = NULL;
  1044. if (data->reset_func) {
  1045. data->reset_func(data->write_data);
  1046. }
  1047. }
  1048. static size_t
  1049. s3_internal_header_func(void *ptr, size_t size, size_t nmemb, void * stream)
  1050. {
  1051. static const char *final_header = "\r\n";
  1052. char *header;
  1053. regmatch_t pmatch[2];
  1054. S3InternalData *data = (S3InternalData *) stream;
  1055. header = g_strndup((gchar *) ptr, (gsize) size*nmemb);
  1056. if (!s3_regexec_wrap(&etag_regex, header, 2, pmatch, 0))
  1057. data->etag = find_regex_substring(header, pmatch[1]);
  1058. if (!strcmp(final_header, header))
  1059. data->headers_done = TRUE;
  1060. return size*nmemb;
  1061. }
  1062. static gboolean
  1063. compile_regexes(void)
  1064. {
  1065. #ifdef HAVE_REGEX_H
  1066. /* using POSIX regular expressions */
  1067. struct {const char * str; int flags; regex_t *regex;} regexes[] = {
  1068. {"<Code>[[:space:]]*([^<]*)[[:space:]]*</Code>", REG_EXTENDED | REG_ICASE, &error_name_regex},
  1069. {"^ETag:[[:space:]]*\"([^\"]+)\"[[:space:]]*$", REG_EXTENDED | REG_ICASE | REG_NEWLINE, &etag_regex},
  1070. {"<Message>[[:space:]]*([^<]*)[[:space:]]*</Message>", REG_EXTENDED | REG_ICASE, &message_regex},
  1071. {"^[a-z0-9]((-*[a-z0-9])|(\\.[a-z0-9])){2,62}$", REG_EXTENDED | REG_NOSUB, &subdomain_regex},
  1072. {"(/>)|(>([^<]*)</LocationConstraint>)", REG_EXTENDED | REG_ICASE, &location_con_regex},
  1073. {NULL, 0, NULL}
  1074. };
  1075. char regmessage[1024];
  1076. int size, i;
  1077. int reg_result;
  1078. for (i = 0; regexes[i].str; i++) {
  1079. reg_result = regcomp(regexes[i].regex, regexes[i].str, regexes[i].flags);
  1080. if (reg_result != 0) {
  1081. size = regerror(reg_result, regexes[i].regex, regmessage, sizeof(regmessage));
  1082. g_error(_("Regex error: %s"), regmessage);
  1083. return FALSE;
  1084. }
  1085. }
  1086. #else /* ! HAVE_REGEX_H */
  1087. /* using PCRE via GLib */
  1088. struct {const char * str; int flags; regex_t *regex;} regexes[] = {
  1089. {"<Code>\\s*([^<]*)\\s*</Code>",
  1090. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1091. &error_name_regex},
  1092. {"^ETag:\\s*\"([^\"]+)\"\\s*$",
  1093. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1094. &etag_regex},
  1095. {"<Message>\\s*([^<]*)\\s*</Message>",
  1096. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1097. &message_regex},
  1098. {"^[a-z0-9]((-*[a-z0-9])|(\\.[a-z0-9])){2,62}$",
  1099. G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE,
  1100. &subdomain_regex},
  1101. {"(/>)|(>([^<]*)</LocationConstraint>)",
  1102. G_REGEX_CASELESS,
  1103. &location_con_regex},
  1104. {NULL, 0, NULL}
  1105. };
  1106. int i;
  1107. GError *err = NULL;
  1108. for (i = 0; regexes[i].str; i++) {
  1109. *(regexes[i].regex) = g_regex_new(regexes[i].str, regexes[i].flags, 0, &err);
  1110. if (err) {
  1111. g_error(_("Regex error: %s"), err->message);
  1112. g_error_free(err);
  1113. return FALSE;
  1114. }
  1115. }
  1116. #endif
  1117. return TRUE;
  1118. }
  1119. /*
  1120. * Public function implementations
  1121. */
  1122. gboolean s3_init(void)
  1123. {
  1124. static GStaticMutex mutex = G_STATIC_MUTEX_INIT;
  1125. static gboolean init = FALSE, ret;
  1126. /* n.b. curl_global_init is called in common-src/glib-util.c:glib_init() */
  1127. g_static_mutex_lock (&mutex);
  1128. if (!init) {
  1129. ret = compile_regexes();
  1130. init = TRUE;
  1131. }
  1132. g_static_mutex_unlock(&mutex);
  1133. return ret;
  1134. }
  1135. gboolean
  1136. s3_curl_location_compat(void)
  1137. {
  1138. curl_version_info_data *info;
  1139. info = curl_version_info(CURLVERSION_NOW);
  1140. return info->version_num > 0x070a02;
  1141. }
  1142. gboolean
  1143. s3_bucket_location_compat(const char *bucket)
  1144. {
  1145. return !s3_regexec_wrap(&subdomain_regex, bucket, 0, NULL, 0);
  1146. }
  1147. S3Handle *
  1148. s3_open(const char *access_key,
  1149. const char *secret_key,
  1150. const char *user_token,
  1151. const char *bucket_location
  1152. ) {
  1153. S3Handle *hdl;
  1154. hdl = g_new0(S3Handle, 1);
  1155. if (!hdl) goto error;
  1156. hdl->verbose = FALSE;
  1157. hdl->use_ssl = s3_curl_supports_ssl();
  1158. g_assert(access_key);
  1159. hdl->access_key = g_strdup(access_key);
  1160. g_assert(secret_key);
  1161. hdl->secret_key = g_strdup(secret_key);
  1162. /* NULL is okay */
  1163. hdl->user_token = g_strdup(user_token);
  1164. /* NULL is okay */
  1165. hdl->bucket_location = g_strdup(bucket_location);
  1166. hdl->curl = curl_easy_init();
  1167. if (!hdl->curl) goto error;
  1168. return hdl;
  1169. error:
  1170. s3_free(hdl);
  1171. return NULL;
  1172. }
  1173. void
  1174. s3_free(S3Handle *hdl)
  1175. {
  1176. s3_reset(hdl);
  1177. if (hdl) {
  1178. g_free(hdl->access_key);
  1179. g_free(hdl->secret_key);
  1180. if (hdl->user_token) g_free(hdl->user_token);
  1181. if (hdl->bucket_location) g_free(hdl->bucket_location);
  1182. if (hdl->curl) curl_easy_cleanup(hdl->curl);
  1183. g_free(hdl);
  1184. }
  1185. }
  1186. void
  1187. s3_reset(S3Handle *hdl)
  1188. {
  1189. if (hdl) {
  1190. /* We don't call curl_easy_reset here, because doing that in curl
  1191. * < 7.16 blanks the default CA certificate path, and there's no way
  1192. * to get it back. */
  1193. if (hdl->last_message) {
  1194. g_free(hdl->last_message);
  1195. hdl->last_message = NULL;
  1196. }
  1197. hdl->last_response_code = 0;
  1198. hdl->last_curl_code = 0;
  1199. hdl->last_s3_error_code = 0;
  1200. hdl->last_num_retries = 0;
  1201. if (hdl->last_response_body) {
  1202. g_free(hdl->last_response_body);
  1203. hdl->last_response_body = NULL;
  1204. }
  1205. hdl->last_response_body_size = 0;
  1206. }
  1207. }
  1208. void
  1209. s3_error(S3Handle *hdl,
  1210. const char **message,
  1211. guint *response_code,
  1212. s3_error_code_t *s3_error_code,
  1213. const char **s3_error_name,
  1214. CURLcode *curl_code,
  1215. guint *num_retries)
  1216. {
  1217. if (hdl) {
  1218. if (message) *message = hdl->last_message;
  1219. if (response_code) *response_code = hdl->last_response_code;
  1220. if (s3_error_code) *s3_error_code = hdl->last_s3_error_code;
  1221. if (s3_error_name) *s3_error_name = s3_error_name_from_code(hdl->last_s3_error_code);
  1222. if (curl_code) *curl_code = hdl->last_curl_code;
  1223. if (num_retries) *num_retries = hdl->last_num_retries;
  1224. } else {
  1225. /* no hdl? return something coherent, anyway */
  1226. if (message) *message = "NULL S3Handle";
  1227. if (response_code) *response_code = 0;
  1228. if (s3_error_code) *s3_error_code = 0;
  1229. if (s3_error_name) *s3_error_name = NULL;
  1230. if (curl_code) *curl_code = 0;
  1231. if (num_retries) *num_retries = 0;
  1232. }
  1233. }
  1234. void
  1235. s3_verbose(S3Handle *hdl, gboolean verbose)
  1236. {
  1237. hdl->verbose = verbose;
  1238. }
  1239. gboolean
  1240. s3_use_ssl(S3Handle *hdl, gboolean use_ssl)
  1241. {
  1242. gboolean ret = TRUE;
  1243. if (use_ssl & !s3_curl_supports_ssl()) {
  1244. ret = FALSE;
  1245. } else {
  1246. hdl->use_ssl = use_ssl;
  1247. }
  1248. return ret;
  1249. }
  1250. char *
  1251. s3_strerror(S3Handle *hdl)
  1252. {
  1253. const char *message;
  1254. guint response_code;
  1255. const char *s3_error_name;
  1256. CURLcode curl_code;
  1257. guint num_retries;
  1258. char s3_info[256] = "";
  1259. char response_info[16] = "";
  1260. char curl_info[32] = "";
  1261. char retries_info[32] = "";
  1262. s3_error(hdl, &message, &response_code, NULL, &s3_error_name, &curl_code, &num_retries);
  1263. if (!message)
  1264. message = "Unkonwn S3 error";
  1265. if (s3_error_name)
  1266. g_snprintf(s3_info, sizeof(s3_info), " (%s)", s3_error_name);
  1267. if (response_code)
  1268. g_snprintf(response_info, sizeof(response_info), " (HTTP %d)", response_code);
  1269. if (curl_code)
  1270. g_snprintf(curl_info, sizeof(curl_info), " (CURLcode %d)", curl_code);
  1271. if (num_retries)
  1272. g_snprintf(retries_info, sizeof(retries_info), " (after %d retries)", num_retries);
  1273. return g_strdup_printf("%s%s%s%s%s", message, s3_info, curl_info, response_info, retries_info);
  1274. }
  1275. /* Perform an upload. When this function returns, KEY and
  1276. * BUFFER remain the responsibility of the caller.
  1277. *
  1278. * @param self: the s3 device
  1279. * @param bucket: the bucket to which the upload should be made
  1280. * @param key: the key to which the upload should be made
  1281. * @param buffer: the data to be uploaded
  1282. * @param buffer_len: the length of the data to upload
  1283. * @returns: false if an error ocurred
  1284. */
  1285. gboolean
  1286. s3_upload(S3Handle *hdl,
  1287. const char *bucket,
  1288. const char *key,
  1289. s3_read_func read_func,
  1290. s3_reset_func reset_func,
  1291. s3_size_func size_func,
  1292. s3_md5_func md5_func,
  1293. gpointer read_data,
  1294. s3_progress_func progress_func,
  1295. gpointer progress_data)
  1296. {
  1297. s3_result_t result = S3_RESULT_FAIL;
  1298. static result_handling_t result_handling[] = {
  1299. { 200, 0, 0, S3_RESULT_OK },
  1300. RESULT_HANDLING_ALWAYS_RETRY,
  1301. { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
  1302. };
  1303. g_assert(hdl != NULL);
  1304. result = perform_request(hdl, "PUT", bucket, key, NULL, NULL,
  1305. read_func, reset_func, size_func, md5_func, read_data,
  1306. NULL, NULL, NULL, progress_func, progress_data,
  1307. result_handling);
  1308. return result == S3_RESULT_OK;
  1309. }
  1310. /* Private structure for our "thunk", which tracks where the user is in the list
  1311. * of keys. */
  1312. struct list_keys_thunk {
  1313. GSList *filename_list; /* all pending filenames */
  1314. gboolean in_contents; /* look for "key" entities in here */
  1315. gboolean in_common_prefixes; /* look for "prefix" entities in here */
  1316. gboolean is_truncated;
  1317. gchar *next_marker;
  1318. gboolean want_text;
  1319. gchar *text;
  1320. gsize text_len;
  1321. };
  1322. /* Functions for a SAX parser to parse the XML from Amazon */
  1323. static void
  1324. list_start_element(GMarkupParseContext *context G_GNUC_UNUSED,
  1325. const gchar *element_name,
  1326. const gchar **attribute_names G_GNUC_UNUSED,
  1327. const gchar **attribute_values G_GNUC_UNUSED,
  1328. gpointer user_data,
  1329. GError **error G_GNUC_UNUSED)
  1330. {
  1331. struct list_keys_thunk *thunk = (struct list_keys_thunk *)user_data;
  1332. thunk->want_text = 0;
  1333. if (g_strcasecmp(element_name, "contents") == 0) {
  1334. thunk->in_contents = 1;
  1335. } else if (g_strcasecmp(element_name, "commonprefixes") == 0) {
  1336. thunk->in_common_prefixes = 1;
  1337. } else if (g_strcasecmp(element_name, "prefix") == 0 && thunk->in_common_prefixes) {
  1338. thunk->want_text = 1;
  1339. } else if (g_strcasecmp(element_name, "key") == 0 && thunk->in_contents) {
  1340. thunk->want_text = 1;
  1341. } else if (g_strcasecmp(element_name, "istruncated")) {
  1342. thunk->want_text = 1;
  1343. } else if (g_strcasecmp(element_name, "nextmarker")) {
  1344. thunk->want_text = 1;
  1345. }
  1346. }
  1347. static void
  1348. list_end_element(GMarkupParseContext *context G_GNUC_UNUSED,
  1349. const gchar *element_name,
  1350. gpointer user_data,
  1351. GError **error G_GNUC_UNUSED)
  1352. {
  1353. struct list_keys_thunk *thunk = (struct list_keys_thunk *)user_data;
  1354. if (g_strcasecmp(element_name, "contents") == 0) {
  1355. thunk->in_contents = 0;
  1356. } else if (g_strcasecmp(element_name, "commonprefixes") == 0) {
  1357. thunk->in_common_prefixes = 0;
  1358. } else if (g_strcasecmp(element_name, "key") == 0 && thunk->in_contents) {
  1359. thunk->filename_list = g_slist_prepend(thunk->filename_list, thunk->text);
  1360. thunk->text = NULL;
  1361. } else if (g_strcasecmp(element_name, "prefix") == 0 && thunk->in_common_prefixes) {
  1362. thunk->filename_list = g_slist_prepend(thunk->filename_list, thunk->text);
  1363. thunk->text = NULL;
  1364. } else if (g_strcasecmp(element_name, "istruncated") == 0) {
  1365. if (thunk->text && g_strncasecmp(thunk->text, "false", 5) != 0)
  1366. thunk->is_truncated = TRUE;
  1367. } else if (g_strcasecmp(element_name, "nextmarker") == 0) {
  1368. if (thunk->next_marker) g_free(thunk->next_marker);
  1369. thunk->next_marker = thunk->text;
  1370. thunk->text = NULL;
  1371. }
  1372. }
  1373. static void
  1374. list_text(GMarkupParseContext *context G_GNUC_UNUSED,
  1375. const gchar *text,
  1376. gsize text_len,
  1377. gpointer user_data,
  1378. GError **error G_GNUC_UNUSED)
  1379. {
  1380. struct list_keys_thunk *thunk = (struct list_keys_thunk *)user_data;
  1381. if (thunk->want_text) {
  1382. if (thunk->text) g_free(thunk->text);
  1383. thunk->text = g_strndup(text, text_len);
  1384. }
  1385. }
  1386. /* Perform a fetch from S3; several fetches may be involved in a
  1387. * single listing operation */
  1388. static s3_result_t
  1389. list_fetch(S3Handle *hdl,
  1390. const char *bucket,
  1391. const char *prefix,
  1392. const char *delimiter,
  1393. const char *marker,
  1394. const char *max_keys)
  1395. {
  1396. s3_result_t result = S3_RESULT_FAIL;
  1397. static result_handling_t result_handling[] = {
  1398. { 200, 0, 0, S3_RESULT_OK },
  1399. RESULT_HANDLING_ALWAYS_RETRY,
  1400. { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
  1401. };
  1402. const char* pos_parts[][2] = {
  1403. {"prefix", prefix},
  1404. {"delimiter", delimiter},
  1405. {"marker", marker},
  1406. {"make-keys", max_keys},
  1407. {NULL, NULL}
  1408. };
  1409. char *esc_value;
  1410. GString *query;
  1411. guint i;
  1412. gboolean have_prev_part = FALSE;
  1413. /* loop over possible parts to build query string */
  1414. query = g_string_new("");
  1415. for (i = 0; pos_parts[i][0]; i++) {
  1416. if (pos_parts[i][1]) {
  1417. if (have_prev_part)
  1418. g_string_append(query, "&");
  1419. else
  1420. have_prev_part = TRUE;
  1421. esc_value = curl_escape(pos_parts[i][1], 0);
  1422. g_string_append_printf(query, "%s=%s", pos_parts[i][0], esc_value);
  1423. curl_free(esc_value);
  1424. }
  1425. }
  1426. /* and perform the request on that URI */
  1427. result = perform_request(hdl, "GET", bucket, NULL, NULL, query->str,
  1428. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  1429. result_handling);
  1430. if (query) g_string_free(query, TRUE);
  1431. return result;
  1432. }
  1433. gboolean
  1434. s3_list_keys(S3Handle *hdl,
  1435. const char *bucket,
  1436. const char *prefix,
  1437. const char *delimiter,
  1438. GSList **list)
  1439. {
  1440. struct list_keys_thunk thunk;
  1441. GMarkupParseContext *ctxt = NULL;
  1442. static GMarkupParser parser = { list_start_element, list_end_element, list_text, NULL, NULL };
  1443. GError *err = NULL;
  1444. s3_result_t result = S3_RESULT_FAIL;
  1445. g_assert(list);
  1446. *list = NULL;
  1447. thunk.filename_list = NULL;
  1448. thunk.text = NULL;
  1449. thunk.next_marker = NULL;
  1450. /* Loop until S3 has given us the entire picture */
  1451. do {
  1452. /* get some data from S3 */
  1453. result = list_fetch(hdl, bucket, prefix, delimiter, thunk.next_marker, NULL);
  1454. if (result != S3_RESULT_OK) goto cleanup;
  1455. /* run the parser over it */
  1456. thunk.in_contents = FALSE;
  1457. thunk.in_common_prefixes = FALSE;
  1458. thunk.is_truncated = FALSE;
  1459. thunk.want_text = FALSE;
  1460. ctxt = g_markup_parse_context_new(&parser, 0, (gpointer)&thunk, NULL);
  1461. if (!g_markup_parse_context_parse(ctxt, hdl->last_response_body,
  1462. hdl->last_response_body_size, &err)) {
  1463. if (hdl->last_message) g_free(hdl->last_message);
  1464. hdl->last_message = g_strdup(err->message);
  1465. result = S3_RESULT_FAIL;
  1466. goto cleanup;
  1467. }
  1468. if (!g_markup_parse_context_end_parse(ctxt, &err)) {
  1469. if (hdl->last_message) g_free(hdl->last_message);
  1470. hdl->last_message = g_strdup(err->message);
  1471. result = S3_RESULT_FAIL;
  1472. goto cleanup;
  1473. }
  1474. g_markup_parse_context_free(ctxt);
  1475. ctxt = NULL;
  1476. } while (thunk.next_marker);
  1477. cleanup:
  1478. if (err) g_error_free(err);
  1479. if (thunk.text) g_free(thunk.text);
  1480. if (thunk.next_marker) g_free(thunk.next_marker);
  1481. if (ctxt) g_markup_parse_context_free(ctxt);
  1482. if (result != S3_RESULT_OK) {
  1483. g_slist_free(thunk.filename_list);
  1484. return FALSE;
  1485. } else {
  1486. *list = thunk.filename_list;
  1487. return TRUE;
  1488. }
  1489. }
  1490. gboolean
  1491. s3_read(S3Handle *hdl,
  1492. const char *bucket,
  1493. const char *key,
  1494. s3_write_func write_func,
  1495. s3_reset_func reset_func,
  1496. gpointer write_data,
  1497. s3_progress_func progress_func,
  1498. gpointer progress_data)
  1499. {
  1500. s3_result_t result = S3_RESULT_FAIL;
  1501. static result_handling_t result_handling[] = {
  1502. { 200, 0, 0, S3_RESULT_OK },
  1503. RESULT_HANDLING_ALWAYS_RETRY,
  1504. { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
  1505. };
  1506. g_assert(hdl != NULL);
  1507. g_assert(write_func != NULL);
  1508. result = perform_request(hdl, "GET", bucket, key, NULL, NULL,
  1509. NULL, NULL, NULL, NULL, NULL, write_func, reset_func, write_data,
  1510. progress_func, progress_data, result_handling);
  1511. return result == S3_RESULT_OK;
  1512. }
  1513. gboolean
  1514. s3_delete(S3Handle *hdl,
  1515. const char *bucket,
  1516. const char *key)
  1517. {
  1518. s3_result_t result = S3_RESULT_FAIL;
  1519. static result_handling_t result_handling[] = {
  1520. { 204, 0, 0, S3_RESULT_OK },
  1521. RESULT_HANDLING_ALWAYS_RETRY,
  1522. { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
  1523. };
  1524. g_assert(hdl != NULL);
  1525. result = perform_request(hdl, "DELETE", bucket, key, NULL, NULL,
  1526. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  1527. result_handling);
  1528. return result == S3_RESULT_OK;
  1529. }
  1530. gboolean
  1531. s3_make_bucket(S3Handle *hdl,
  1532. const char *bucket)
  1533. {
  1534. char *body = NULL;
  1535. s3_result_t result = S3_RESULT_FAIL;
  1536. static result_handling_t result_handling[] = {
  1537. { 200, 0, 0, S3_RESULT_OK },
  1538. RESULT_HANDLING_ALWAYS_RETRY,
  1539. { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
  1540. };
  1541. regmatch_t pmatch[4];
  1542. char *loc_end_open, *loc_content;
  1543. CurlBuffer buf = {NULL, 0, 0, 0}, *ptr = NULL;
  1544. s3_read_func read_func = NULL;
  1545. s3_reset_func reset_func = NULL;
  1546. s3_md5_func md5_func = NULL;
  1547. s3_size_func size_func = NULL;
  1548. g_assert(hdl != NULL);
  1549. if (hdl->bucket_location) {
  1550. if (s3_bucket_location_compat(bucket)) {
  1551. ptr = &buf;
  1552. buf.buffer = g_strdup_printf(AMAZON_BUCKET_CONF_TEMPLATE, hdl->bucket_location);
  1553. buf.buffer_len = (guint) strlen(body);
  1554. buf.buffer_pos = 0;
  1555. buf.max_buffer_size = buf.buffer_len;
  1556. read_func = s3_buffer_read_func;
  1557. reset_func = s3_buffer_reset_func;
  1558. size_func = s3_buffer_size_func;
  1559. md5_func = s3_buffer_md5_func;
  1560. } else {
  1561. hdl->last_message = g_strdup_printf(_(
  1562. "Location constraint given for Amazon S3 bucket, "
  1563. "but the bucket name (%s) is not usable as a subdomain."), bucket);
  1564. return FALSE;
  1565. }
  1566. }
  1567. result = perform_request(hdl, "PUT", bucket, NULL, NULL, NULL,
  1568. read_func, reset_func, size_func, md5_func, ptr,
  1569. NULL, NULL, NULL, NULL, NULL, result_handling);
  1570. if (result == S3_RESULT_OK ||
  1571. (hdl->bucket_location && result != S3_RESULT_OK
  1572. && hdl->last_s3_error_code == S3_ERROR_BucketAlreadyOwnedByYou)) {
  1573. /* verify the that the location constraint on the existing bucket matches
  1574. * the one that's configured.
  1575. */
  1576. result = perform_request(hdl, "GET", bucket, NULL, "location", NULL,
  1577. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  1578. NULL, NULL, result_handling);
  1579. /* note that we can check only one of the three AND conditions above
  1580. * and infer that the others are true
  1581. */
  1582. if (result == S3_RESULT_OK && hdl->bucket_location) {
  1583. /* return to the default state of failure */
  1584. result = S3_RESULT_FAIL;
  1585. if (body) g_free(body);
  1586. /* use strndup to get a null-terminated string */
  1587. body = g_strndup(hdl->last_response_body, hdl->last_response_body_size);
  1588. if (!body) goto cleanup;
  1589. if (!s3_regexec_wrap(&location_con_regex, body, 4, pmatch, 0)) {
  1590. loc_end_open = find_regex_substring(body, pmatch[1]);
  1591. loc_content = find_regex_substring(body, pmatch[3]);
  1592. /* The case of an empty string is special because XML allows
  1593. * "self-closing" tags
  1594. */
  1595. if ('\0' == hdl->bucket_location[0] &&
  1596. '/' != loc_end_open[0] && '\0' != hdl->bucket_location[0])
  1597. hdl->last_message = _("An empty location constraint is "
  1598. "configured, but the bucket has a non-empty location constraint");
  1599. else if (strncmp(loc_content, hdl->bucket_location, strlen(hdl->bucket_location)))
  1600. hdl->last_message = _("The location constraint configured "
  1601. "does not match the constraint currently on the bucket");
  1602. else
  1603. result = S3_RESULT_OK;
  1604. } else {
  1605. hdl->last_message = _("Unexpected location response from Amazon S3");
  1606. }
  1607. }
  1608. }
  1609. cleanup:
  1610. if (body) g_free(body);
  1611. return result == S3_RESULT_OK;
  1612. }