PageRenderTime 92ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/amanda/tags/3_1_0_mac01/device-src/s3.c

#
C | 2013 lines | 1418 code | 287 blank | 308 comment | 306 complexity | 1eb978cedd06654f09aacf1bf6c1cc8f MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * Copyright (c) 2008,2009 Zmanda, Inc. All Rights Reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms of the GNU General Public License version 2 as published
  6. * by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  10. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  11. * for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. *
  17. * Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
  18. * Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
  19. */
  20. /* TODO
  21. * - collect speed statistics
  22. * - debugging mode
  23. */
  24. #ifdef HAVE_CONFIG_H
  25. /* use a relative path here to avoid conflicting with Perl's config.h. */
  26. #include "../config/config.h"
  27. #endif
  28. #include <string.h>
  29. #include "s3.h"
  30. #include "s3-util.h"
  31. #ifdef HAVE_REGEX_H
  32. #include <regex.h>
  33. #endif
  34. #ifdef HAVE_SYS_TYPES_H
  35. #include <sys/types.h>
  36. #endif
  37. #ifdef HAVE_SYS_STAT_H
  38. #include <sys/stat.h>
  39. #endif
  40. #ifdef HAVE_UNISTD_H
  41. #include <unistd.h>
  42. #endif
  43. #ifdef HAVE_DIRENT_H
  44. #include <dirent.h>
  45. #endif
  46. #ifdef HAVE_TIME_H
  47. #include <time.h>
  48. #endif
  49. #ifdef HAVE_UTIL_H
  50. #include "util.h"
  51. #endif
  52. #ifdef HAVE_AMANDA_H
  53. #include "amanda.h"
  54. #endif
  55. #include <curl/curl.h>
  56. /* Constant renamed after version 7.10.7 */
  57. #ifndef CURLINFO_RESPONSE_CODE
  58. #define CURLINFO_RESPONSE_CODE CURLINFO_HTTP_CODE
  59. #endif
  60. /* We don't need OpenSSL's kerberos support, and it's broken in
  61. * RHEL 3 anyway. */
  62. #define OPENSSL_NO_KRB5
  63. #ifdef HAVE_OPENSSL_HMAC_H
  64. # include <openssl/hmac.h>
  65. #else
  66. # ifdef HAVE_CRYPTO_HMAC_H
  67. # include <crypto/hmac.h>
  68. # else
  69. # ifdef HAVE_HMAC_H
  70. # include <hmac.h>
  71. # endif
  72. # endif
  73. #endif
  74. #include <openssl/err.h>
  75. #include <openssl/ssl.h>
  76. #include <openssl/md5.h>
  77. /* Maximum key length as specified in the S3 documentation
  78. * (*excluding* null terminator) */
  79. #define S3_MAX_KEY_LENGTH 1024
  80. #define AMAZON_SECURITY_HEADER "x-amz-security-token"
  81. #define AMAZON_BUCKET_CONF_TEMPLATE "\
  82. <CreateBucketConfiguration>\n\
  83. <LocationConstraint>%s</LocationConstraint>\n\
  84. </CreateBucketConfiguration>"
  85. #define AMAZON_WILDCARD_LOCATION "*"
  86. /* parameters for exponential backoff in the face of retriable errors */
  87. /* start at 0.01s */
  88. #define EXPONENTIAL_BACKOFF_START_USEC G_USEC_PER_SEC/100
  89. /* double at each retry */
  90. #define EXPONENTIAL_BACKOFF_BASE 2
  91. /* retry 14 times (for a total of about 3 minutes spent waiting) */
  92. #define EXPONENTIAL_BACKOFF_MAX_RETRIES 14
  93. /* general "reasonable size" parameters */
  94. #define MAX_ERROR_RESPONSE_LEN (100*1024)
  95. /* Results which should always be retried */
  96. #define RESULT_HANDLING_ALWAYS_RETRY \
  97. { 400, S3_ERROR_RequestTimeout, 0, S3_RESULT_RETRY }, \
  98. { 409, S3_ERROR_OperationAborted, 0, S3_RESULT_RETRY }, \
  99. { 412, S3_ERROR_PreconditionFailed, 0, S3_RESULT_RETRY }, \
  100. { 500, S3_ERROR_InternalError, 0, S3_RESULT_RETRY }, \
  101. { 501, S3_ERROR_NotImplemented, 0, S3_RESULT_RETRY }, \
  102. { 0, 0, CURLE_COULDNT_CONNECT, S3_RESULT_RETRY }, \
  103. { 0, 0, CURLE_COULDNT_RESOLVE_HOST, S3_RESULT_RETRY }, \
  104. { 0, 0, CURLE_PARTIAL_FILE, S3_RESULT_RETRY }, \
  105. { 0, 0, CURLE_OPERATION_TIMEOUTED, S3_RESULT_RETRY }, \
  106. { 0, 0, CURLE_SEND_ERROR, S3_RESULT_RETRY }, \
  107. { 0, 0, CURLE_RECV_ERROR, S3_RESULT_RETRY }, \
  108. { 0, 0, CURLE_GOT_NOTHING, S3_RESULT_RETRY }
  109. /*
  110. * Data structures and associated functions
  111. */
  112. struct S3Handle {
  113. /* (all strings in this struct are freed by s3_free()) */
  114. char *access_key;
  115. char *secret_key;
  116. char *user_token;
  117. char *bucket_location;
  118. char *ca_info;
  119. CURL *curl;
  120. gboolean verbose;
  121. gboolean use_ssl;
  122. guint64 max_send_speed;
  123. guint64 max_recv_speed;
  124. /* information from the last request */
  125. char *last_message;
  126. guint last_response_code;
  127. s3_error_code_t last_s3_error_code;
  128. CURLcode last_curl_code;
  129. guint last_num_retries;
  130. void *last_response_body;
  131. guint last_response_body_size;
  132. };
  133. typedef struct {
  134. CurlBuffer resp_buf;
  135. s3_write_func write_func;
  136. s3_reset_func reset_func;
  137. gpointer write_data;
  138. gboolean headers_done;
  139. gboolean int_write_done;
  140. char *etag;
  141. } S3InternalData;
  142. /* Callback function to examine headers one-at-a-time
  143. *
  144. * @note this is the same as CURLOPT_HEADERFUNCTION
  145. *
  146. * @param data: The pointer to read data from
  147. * @param size: The size of each "element" of the data buffer in bytes
  148. * @param nmemb: The number of elements in the data buffer.
  149. * So, the buffer's size is size*nmemb bytes.
  150. * @param stream: the header_data (an opaque pointer)
  151. *
  152. * @return The number of bytes written to the buffer or
  153. * CURL_WRITEFUNC_PAUSE to pause.
  154. * If it's the number of bytes written, it should match the buffer size
  155. */
  156. typedef size_t (*s3_header_func)(void *data, size_t size, size_t nmemb, void *stream);
  157. /*
  158. * S3 errors */
  159. /* (see preprocessor magic in s3.h) */
  160. static char * s3_error_code_names[] = {
  161. #define S3_ERROR(NAME) #NAME
  162. S3_ERROR_LIST
  163. #undef S3_ERROR
  164. };
  165. /* Convert an s3 error name to an error code. This function
  166. * matches strings case-insensitively, and is appropriate for use
  167. * on data from the network.
  168. *
  169. * @param s3_error_code: the error name
  170. * @returns: the error code (see constants in s3.h)
  171. */
  172. static s3_error_code_t
  173. s3_error_code_from_name(char *s3_error_name);
  174. /* Convert an s3 error code to a string
  175. *
  176. * @param s3_error_code: the error code to convert
  177. * @returns: statically allocated string
  178. */
  179. static const char *
  180. s3_error_name_from_code(s3_error_code_t s3_error_code);
  181. /*
  182. * result handling */
  183. /* result handling is specified by a static array of result_handling structs,
  184. * which match based on response_code (from HTTP) and S3 error code. The result
  185. * given for the first match is used. 0 acts as a wildcard for both response_code
  186. * and s3_error_code. The list is terminated with a struct containing 0 for both
  187. * response_code and s3_error_code; the result for that struct is the default
  188. * result.
  189. *
  190. * See RESULT_HANDLING_ALWAYS_RETRY for an example.
  191. */
  192. typedef enum {
  193. S3_RESULT_RETRY = -1,
  194. S3_RESULT_FAIL = 0,
  195. S3_RESULT_OK = 1
  196. } s3_result_t;
  197. typedef struct result_handling {
  198. guint response_code;
  199. s3_error_code_t s3_error_code;
  200. CURLcode curl_code;
  201. s3_result_t result;
  202. } result_handling_t;
  203. /* Lookup a result in C{result_handling}.
  204. *
  205. * @param result_handling: array of handling specifications
  206. * @param response_code: response code from operation
  207. * @param s3_error_code: s3 error code from operation, if any
  208. * @param curl_code: the CURL error, if any
  209. * @returns: the matching result
  210. */
  211. static s3_result_t
  212. lookup_result(const result_handling_t *result_handling,
  213. guint response_code,
  214. s3_error_code_t s3_error_code,
  215. CURLcode curl_code);
  216. /*
  217. * Precompiled regular expressions */
  218. static regex_t etag_regex, error_name_regex, message_regex, subdomain_regex,
  219. location_con_regex;
  220. /*
  221. * Utility functions
  222. */
  223. /* Check if a string is non-empty
  224. *
  225. * @param str: string to check
  226. * @returns: true iff str is non-NULL and not "\0"
  227. */
  228. static gboolean is_non_empty_string(const char *str);
  229. /* Construct the URL for an Amazon S3 REST request.
  230. *
  231. * A new string is allocated and returned; it is the responsiblity of the caller.
  232. *
  233. * @param hdl: the S3Handle object
  234. * @param verb: capitalized verb for this request ('PUT', 'GET', etc.)
  235. * @param bucket: the bucket being accessed, or NULL for none
  236. * @param key: the key being accessed, or NULL for none
  237. * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
  238. * @param use_subdomain: if TRUE, a subdomain of s3.amazonaws.com will be used
  239. */
  240. static char *
  241. build_url(const char *bucket,
  242. const char *key,
  243. const char *subresource,
  244. const char *query,
  245. gboolean use_subdomain,
  246. gboolean use_ssl);
  247. /* Create proper authorization headers for an Amazon S3 REST
  248. * request to C{headers}.
  249. *
  250. * @note: C{X-Amz} headers (in C{headers}) must
  251. * - be in lower-case
  252. * - be in alphabetical order
  253. * - have no spaces around the colon
  254. * (don't yell at me -- see the Amazon Developer Guide)
  255. *
  256. * @param hdl: the S3Handle object
  257. * @param verb: capitalized verb for this request ('PUT', 'GET', etc.)
  258. * @param bucket: the bucket being accessed, or NULL for none
  259. * @param key: the key being accessed, or NULL for none
  260. * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
  261. * @param md5_hash: the MD5 hash of the request body, or NULL for none
  262. * @param use_subdomain: if TRUE, a subdomain of s3.amazonaws.com will be used
  263. */
  264. static struct curl_slist *
  265. authenticate_request(S3Handle *hdl,
  266. const char *verb,
  267. const char *bucket,
  268. const char *key,
  269. const char *subresource,
  270. const char *md5_hash,
  271. gboolean use_subdomain);
  272. /* Interpret the response to an S3 operation, assuming CURL completed its request
  273. * successfully. This function fills in the relevant C{hdl->last*} members.
  274. *
  275. * @param hdl: The S3Handle object
  276. * @param body: the response body
  277. * @param body_len: the length of the response body
  278. * @param etag: The response's ETag header
  279. * @param content_md5: The hex-encoded MD5 hash of the request body,
  280. * which will be checked against the response's ETag header.
  281. * If NULL, the header is not checked.
  282. * If non-NULL, then the body should have the response headers at its beginnning.
  283. * @returns: TRUE if the response should be retried (e.g., network error)
  284. */
  285. static gboolean
  286. interpret_response(S3Handle *hdl,
  287. CURLcode curl_code,
  288. char *curl_error_buffer,
  289. gchar *body,
  290. guint body_len,
  291. const char *etag,
  292. const char *content_md5);
  293. /* Perform an S3 operation. This function handles all of the details
  294. * of retryig requests and so on.
  295. *
  296. * The concepts of bucket and keys are defined by the Amazon S3 API.
  297. * See: "Components of Amazon S3" - API Version 2006-03-01 pg. 8
  298. *
  299. * Individual sub-resources are defined in several places. In the REST API,
  300. * they they are represented by a "flag" in the "query string".
  301. * See: "Constructing the CanonicalizedResource Element" - API Version 2006-03-01 pg. 60
  302. *
  303. * @param hdl: the S3Handle object
  304. * @param verb: the HTTP request method
  305. * @param bucket: the bucket to access, or NULL for none
  306. * @param key: the key to access, or NULL for none
  307. * @param subresource: the "sub-resource" to request (e.g. "acl") or NULL for none
  308. * @param query: the query string to send (not including th initial '?'),
  309. * or NULL for none
  310. * @param read_func: the callback for reading data
  311. * Will use s3_empty_read_func if NULL is passed in.
  312. * @param read_reset_func: the callback for to reset reading data
  313. * @param size_func: the callback to get the number of bytes to upload
  314. * @param md5_func: the callback to get the MD5 hash of the data to upload
  315. * @param read_data: pointer to pass to the above functions
  316. * @param write_func: the callback for writing data.
  317. * Will use s3_counter_write_func if NULL is passed in.
  318. * @param write_reset_func: the callback for to reset writing data
  319. * @param write_data: pointer to pass to C{write_func}
  320. * @param progress_func: the callback for progress information
  321. * @param progress_data: pointer to pass to C{progress_func}
  322. * @param result_handling: instructions for handling the results; see above.
  323. * @returns: the result specified by result_handling; details of the response
  324. * are then available in C{hdl->last*}
  325. */
  326. static s3_result_t
  327. perform_request(S3Handle *hdl,
  328. const char *verb,
  329. const char *bucket,
  330. const char *key,
  331. const char *subresource,
  332. const char *query,
  333. s3_read_func read_func,
  334. s3_reset_func read_reset_func,
  335. s3_size_func size_func,
  336. s3_md5_func md5_func,
  337. gpointer read_data,
  338. s3_write_func write_func,
  339. s3_reset_func write_reset_func,
  340. gpointer write_data,
  341. s3_progress_func progress_func,
  342. gpointer progress_data,
  343. const result_handling_t *result_handling);
  344. /*
  345. * a CURLOPT_WRITEFUNCTION to save part of the response in memory and
  346. * call an external function if one was provided.
  347. */
  348. static size_t
  349. s3_internal_write_func(void *ptr, size_t size, size_t nmemb, void * stream);
  350. /*
  351. * a function to reset to our internal buffer
  352. */
  353. static void
  354. s3_internal_reset_func(void * stream);
  355. /*
  356. * a CURLOPT_HEADERFUNCTION to save the ETag header only.
  357. */
  358. static size_t
  359. s3_internal_header_func(void *ptr, size_t size, size_t nmemb, void * stream);
  360. static gboolean
  361. compile_regexes(void);
  362. /*
  363. * Static function implementations
  364. */
  365. static s3_error_code_t
  366. s3_error_code_from_name(char *s3_error_name)
  367. {
  368. int i;
  369. if (!s3_error_name) return S3_ERROR_Unknown;
  370. /* do a brute-force search through the list, since it's not sorted */
  371. for (i = 0; i < S3_ERROR_END; i++) {
  372. if (g_strcasecmp(s3_error_name, s3_error_code_names[i]) == 0)
  373. return i;
  374. }
  375. return S3_ERROR_Unknown;
  376. }
  377. static const char *
  378. s3_error_name_from_code(s3_error_code_t s3_error_code)
  379. {
  380. if (s3_error_code >= S3_ERROR_END)
  381. s3_error_code = S3_ERROR_Unknown;
  382. return s3_error_code_names[s3_error_code];
  383. }
  384. gboolean
  385. s3_curl_supports_ssl(void)
  386. {
  387. static int supported = -1;
  388. if (supported == -1) {
  389. #if defined(CURL_VERSION_SSL)
  390. curl_version_info_data *info = curl_version_info(CURLVERSION_NOW);
  391. if (info->features & CURL_VERSION_SSL)
  392. supported = 1;
  393. else
  394. supported = 0;
  395. #else
  396. supported = 0;
  397. #endif
  398. }
  399. return supported;
  400. }
  401. static gboolean
  402. s3_curl_throttling_compat(void)
  403. {
  404. /* CURLOPT_MAX_SEND_SPEED_LARGE added in 7.15.5 */
  405. #if LIBCURL_VERSION_NUM >= 0x070f05
  406. curl_version_info_data *info;
  407. /* check the runtime version too */
  408. info = curl_version_info(CURLVERSION_NOW);
  409. return info->version_num >= 0x070f05;
  410. #else
  411. return FALSE;
  412. #endif
  413. }
  414. static s3_result_t
  415. lookup_result(const result_handling_t *result_handling,
  416. guint response_code,
  417. s3_error_code_t s3_error_code,
  418. CURLcode curl_code)
  419. {
  420. while (result_handling->response_code
  421. || result_handling->s3_error_code
  422. || result_handling->curl_code) {
  423. if ((result_handling->response_code && result_handling->response_code != response_code)
  424. || (result_handling->s3_error_code && result_handling->s3_error_code != s3_error_code)
  425. || (result_handling->curl_code && result_handling->curl_code != curl_code)) {
  426. result_handling++;
  427. continue;
  428. }
  429. return result_handling->result;
  430. }
  431. /* return the result for the terminator, as the default */
  432. return result_handling->result;
  433. }
  434. static gboolean
  435. is_non_empty_string(const char *str)
  436. {
  437. return str && str[0] != '\0';
  438. }
  439. static char *
  440. build_url(const char *bucket,
  441. const char *key,
  442. const char *subresource,
  443. const char *query,
  444. gboolean use_subdomain,
  445. gboolean use_ssl)
  446. {
  447. GString *url = NULL;
  448. char *esc_bucket = NULL, *esc_key = NULL;
  449. /* scheme */
  450. url = g_string_new("http");
  451. if (use_ssl)
  452. g_string_append(url, "s");
  453. g_string_append(url, "://");
  454. /* domain */
  455. if (use_subdomain && bucket)
  456. g_string_append_printf(url, "%s.s3.amazonaws.com/", bucket);
  457. else
  458. g_string_append(url, "s3.amazonaws.com/");
  459. /* path */
  460. if (!use_subdomain && bucket) {
  461. esc_bucket = curl_escape(bucket, 0);
  462. if (!esc_bucket) goto cleanup;
  463. g_string_append_printf(url, "%s", esc_bucket);
  464. if (key)
  465. g_string_append(url, "/");
  466. }
  467. if (key) {
  468. esc_key = curl_escape(key, 0);
  469. if (!esc_key) goto cleanup;
  470. g_string_append_printf(url, "%s", esc_key);
  471. }
  472. /* query string */
  473. if (subresource || query)
  474. g_string_append(url, "?");
  475. if (subresource)
  476. g_string_append(url, subresource);
  477. if (subresource && query)
  478. g_string_append(url, "&");
  479. if (query)
  480. g_string_append(url, query);
  481. cleanup:
  482. if (esc_bucket) curl_free(esc_bucket);
  483. if (esc_key) curl_free(esc_key);
  484. return g_string_free(url, FALSE);
  485. }
  486. static struct curl_slist *
  487. authenticate_request(S3Handle *hdl,
  488. const char *verb,
  489. const char *bucket,
  490. const char *key,
  491. const char *subresource,
  492. const char *md5_hash,
  493. gboolean use_subdomain)
  494. {
  495. time_t t;
  496. struct tm tmp;
  497. char *date = NULL;
  498. char *buf = NULL;
  499. HMAC_CTX ctx;
  500. GByteArray *md = NULL;
  501. char *auth_base64 = NULL;
  502. struct curl_slist *headers = NULL;
  503. char *esc_bucket = NULL, *esc_key = NULL;
  504. GString *auth_string = NULL;
  505. /* From RFC 2616 */
  506. static const char *wkday[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
  507. static const char *month[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
  508. "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
  509. /* Build the string to sign, per the S3 spec.
  510. * See: "Authenticating REST Requests" - API Version 2006-03-01 pg 58
  511. */
  512. /* verb */
  513. auth_string = g_string_new(verb);
  514. g_string_append(auth_string, "\n");
  515. /* Content-MD5 header */
  516. if (md5_hash)
  517. g_string_append(auth_string, md5_hash);
  518. g_string_append(auth_string, "\n");
  519. /* Content-Type is empty*/
  520. g_string_append(auth_string, "\n");
  521. /* calculate the date */
  522. t = time(NULL);
  523. #ifdef _WIN32
  524. if (!gmtime_s(&tmp, &t)) g_debug("localtime error");
  525. #else
  526. if (!gmtime_r(&t, &tmp)) perror("localtime");
  527. #endif
  528. date = g_strdup_printf("%s, %02d %s %04d %02d:%02d:%02d GMT",
  529. wkday[tmp.tm_wday], tmp.tm_mday, month[tmp.tm_mon], 1900+tmp.tm_year,
  530. tmp.tm_hour, tmp.tm_min, tmp.tm_sec);
  531. g_string_append(auth_string, date);
  532. g_string_append(auth_string, "\n");
  533. if (is_non_empty_string(hdl->user_token)) {
  534. g_string_append(auth_string, AMAZON_SECURITY_HEADER);
  535. g_string_append(auth_string, ":");
  536. g_string_append(auth_string, hdl->user_token);
  537. g_string_append(auth_string, ",");
  538. g_string_append(auth_string, STS_PRODUCT_TOKEN);
  539. g_string_append(auth_string, "\n");
  540. }
  541. /* CanonicalizedResource */
  542. g_string_append(auth_string, "/");
  543. if (bucket) {
  544. if (use_subdomain)
  545. g_string_append(auth_string, bucket);
  546. else {
  547. esc_bucket = curl_escape(bucket, 0);
  548. if (!esc_bucket) goto cleanup;
  549. g_string_append(auth_string, esc_bucket);
  550. }
  551. }
  552. if (bucket && (use_subdomain || key))
  553. g_string_append(auth_string, "/");
  554. if (key) {
  555. esc_key = curl_escape(key, 0);
  556. if (!esc_key) goto cleanup;
  557. g_string_append(auth_string, esc_key);
  558. }
  559. if (subresource) {
  560. g_string_append(auth_string, "?");
  561. g_string_append(auth_string, subresource);
  562. }
  563. /* run HMAC-SHA1 on the canonicalized string */
  564. md = g_byte_array_sized_new(EVP_MAX_MD_SIZE+1);
  565. HMAC_CTX_init(&ctx);
  566. HMAC_Init_ex(&ctx, hdl->secret_key, (int) strlen(hdl->secret_key), EVP_sha1(), NULL);
  567. HMAC_Update(&ctx, (unsigned char*) auth_string->str, auth_string->len);
  568. HMAC_Final(&ctx, md->data, &md->len);
  569. HMAC_CTX_cleanup(&ctx);
  570. auth_base64 = s3_base64_encode(md);
  571. /* append the new headers */
  572. if (is_non_empty_string(hdl->user_token)) {
  573. /* Devpay headers are included in hash. */
  574. buf = g_strdup_printf(AMAZON_SECURITY_HEADER ": %s", hdl->user_token);
  575. headers = curl_slist_append(headers, buf);
  576. g_free(buf);
  577. buf = g_strdup_printf(AMAZON_SECURITY_HEADER ": %s", STS_PRODUCT_TOKEN);
  578. headers = curl_slist_append(headers, buf);
  579. g_free(buf);
  580. }
  581. buf = g_strdup_printf("Authorization: AWS %s:%s",
  582. hdl->access_key, auth_base64);
  583. headers = curl_slist_append(headers, buf);
  584. g_free(buf);
  585. if (md5_hash && '\0' != md5_hash[0]) {
  586. buf = g_strdup_printf("Content-MD5: %s", md5_hash);
  587. headers = curl_slist_append(headers, buf);
  588. g_free(buf);
  589. }
  590. buf = g_strdup_printf("Date: %s", date);
  591. headers = curl_slist_append(headers, buf);
  592. g_free(buf);
  593. cleanup:
  594. g_free(date);
  595. g_free(esc_bucket);
  596. g_free(esc_key);
  597. g_byte_array_free(md, TRUE);
  598. g_free(auth_base64);
  599. g_string_free(auth_string, TRUE);
  600. return headers;
  601. }
  602. static gboolean
  603. interpret_response(S3Handle *hdl,
  604. CURLcode curl_code,
  605. char *curl_error_buffer,
  606. gchar *body,
  607. guint body_len,
  608. const char *etag,
  609. const char *content_md5)
  610. {
  611. long response_code = 0;
  612. regmatch_t pmatch[2];
  613. char *error_name = NULL, *message = NULL;
  614. char *body_copy = NULL;
  615. gboolean ret = TRUE;
  616. if (!hdl) return FALSE;
  617. if (hdl->last_message) g_free(hdl->last_message);
  618. hdl->last_message = NULL;
  619. /* bail out from a CURL error */
  620. if (curl_code != CURLE_OK) {
  621. hdl->last_curl_code = curl_code;
  622. hdl->last_message = g_strdup_printf("CURL error: %s", curl_error_buffer);
  623. return FALSE;
  624. }
  625. /* CURL seems to think things were OK, so get its response code */
  626. curl_easy_getinfo(hdl->curl, CURLINFO_RESPONSE_CODE, &response_code);
  627. hdl->last_response_code = response_code;
  628. /* check ETag, if present */
  629. if (etag && content_md5 && 200 == response_code) {
  630. if (etag && g_strcasecmp(etag, content_md5))
  631. hdl->last_message = g_strdup("S3 Error: Possible data corruption (ETag returned by Amazon did not match the MD5 hash of the data sent)");
  632. else
  633. ret = FALSE;
  634. return ret;
  635. }
  636. if (200 <= response_code && response_code < 400) {
  637. /* 2xx and 3xx codes won't have a response body we care about */
  638. hdl->last_s3_error_code = S3_ERROR_None;
  639. return FALSE;
  640. }
  641. /* Now look at the body to try to get the actual Amazon error message. Rather
  642. * than parse out the XML, just use some regexes. */
  643. /* impose a reasonable limit on body size */
  644. if (body_len > MAX_ERROR_RESPONSE_LEN) {
  645. hdl->last_message = g_strdup("S3 Error: Unknown (response body too large to parse)");
  646. return FALSE;
  647. } else if (!body || body_len == 0) {
  648. hdl->last_message = g_strdup("S3 Error: Unknown (empty response body)");
  649. return TRUE; /* perhaps a network error; retry the request */
  650. }
  651. /* use strndup to get a zero-terminated string */
  652. body_copy = g_strndup(body, body_len);
  653. if (!body_copy) goto cleanup;
  654. if (!s3_regexec_wrap(&error_name_regex, body_copy, 2, pmatch, 0))
  655. error_name = find_regex_substring(body_copy, pmatch[1]);
  656. if (!s3_regexec_wrap(&message_regex, body_copy, 2, pmatch, 0))
  657. message = find_regex_substring(body_copy, pmatch[1]);
  658. if (error_name) {
  659. hdl->last_s3_error_code = s3_error_code_from_name(error_name);
  660. }
  661. if (message) {
  662. hdl->last_message = message;
  663. message = NULL; /* steal the reference to the string */
  664. }
  665. cleanup:
  666. g_free(body_copy);
  667. g_free(message);
  668. g_free(error_name);
  669. return FALSE;
  670. }
  671. /* a CURLOPT_READFUNCTION to read data from a buffer. */
  672. size_t
  673. s3_buffer_read_func(void *ptr, size_t size, size_t nmemb, void * stream)
  674. {
  675. CurlBuffer *data = stream;
  676. guint bytes_desired = (guint) size * nmemb;
  677. /* check the number of bytes remaining, just to be safe */
  678. if (bytes_desired > data->buffer_len - data->buffer_pos)
  679. bytes_desired = data->buffer_len - data->buffer_pos;
  680. memcpy((char *)ptr, data->buffer + data->buffer_pos, bytes_desired);
  681. data->buffer_pos += bytes_desired;
  682. return bytes_desired;
  683. }
  684. size_t
  685. s3_buffer_size_func(void *stream)
  686. {
  687. CurlBuffer *data = stream;
  688. return data->buffer_len;
  689. }
  690. GByteArray*
  691. s3_buffer_md5_func(void *stream)
  692. {
  693. CurlBuffer *data = stream;
  694. GByteArray req_body_gba = {(guint8 *)data->buffer, data->buffer_len};
  695. return s3_compute_md5_hash(&req_body_gba);
  696. }
  697. void
  698. s3_buffer_reset_func(void *stream)
  699. {
  700. CurlBuffer *data = stream;
  701. data->buffer_pos = 0;
  702. }
  703. /* a CURLOPT_WRITEFUNCTION to write data to a buffer. */
  704. size_t
  705. s3_buffer_write_func(void *ptr, size_t size, size_t nmemb, void *stream)
  706. {
  707. CurlBuffer * data = stream;
  708. guint new_bytes = (guint) size * nmemb;
  709. guint bytes_needed = data->buffer_pos + new_bytes;
  710. /* error out if the new size is greater than the maximum allowed */
  711. if (data->max_buffer_size && bytes_needed > data->max_buffer_size)
  712. return 0;
  713. /* reallocate if necessary. We use exponential sizing to make this
  714. * happen less often. */
  715. if (bytes_needed > data->buffer_len) {
  716. guint new_size = MAX(bytes_needed, data->buffer_len * 2);
  717. if (data->max_buffer_size) {
  718. new_size = MIN(new_size, data->max_buffer_size);
  719. }
  720. data->buffer = g_realloc(data->buffer, new_size);
  721. data->buffer_len = new_size;
  722. }
  723. if (!data->buffer)
  724. return 0; /* returning zero signals an error to libcurl */
  725. /* actually copy the data to the buffer */
  726. memcpy(data->buffer + data->buffer_pos, ptr, new_bytes);
  727. data->buffer_pos += new_bytes;
  728. /* signal success to curl */
  729. return new_bytes;
  730. }
  731. /* a CURLOPT_READFUNCTION that writes nothing. */
  732. size_t
  733. s3_empty_read_func(G_GNUC_UNUSED void *ptr, G_GNUC_UNUSED size_t size, G_GNUC_UNUSED size_t nmemb, G_GNUC_UNUSED void * stream)
  734. {
  735. return 0;
  736. }
  737. size_t
  738. s3_empty_size_func(G_GNUC_UNUSED void *stream)
  739. {
  740. return 0;
  741. }
  742. GByteArray*
  743. s3_empty_md5_func(G_GNUC_UNUSED void *stream)
  744. {
  745. static const GByteArray empty = {(guint8 *) "", 0};
  746. return s3_compute_md5_hash(&empty);
  747. }
  748. /* a CURLOPT_WRITEFUNCTION to write data that just counts data.
  749. * s3_write_data should be NULL or a pointer to an gint64.
  750. */
  751. size_t
  752. s3_counter_write_func(G_GNUC_UNUSED void *ptr, size_t size, size_t nmemb, void *stream)
  753. {
  754. gint64 *count = (gint64*) stream, inc = nmemb*size;
  755. if (count) *count += inc;
  756. return inc;
  757. }
  758. void
  759. s3_counter_reset_func(void *stream)
  760. {
  761. gint64 *count = (gint64*) stream;
  762. if (count) *count = 0;
  763. }
  764. #ifdef _WIN32
  765. /* a CURLOPT_READFUNCTION to read data from a file. */
  766. size_t
  767. s3_file_read_func(void *ptr, size_t size, size_t nmemb, void * stream)
  768. {
  769. HANDLE *hFile = (HANDLE *) stream;
  770. DWORD bytes_read;
  771. ReadFile(hFile, ptr, (DWORD) size*nmemb, &bytes_read, NULL);
  772. return bytes_read;
  773. }
  774. size_t
  775. s3_file_size_func(void *stream)
  776. {
  777. HANDLE *hFile = (HANDLE *) stream;
  778. DWORD size = GetFileSize(hFile, NULL);
  779. if (INVALID_FILE_SIZE == size) {
  780. return -1;
  781. } else {
  782. return size;
  783. }
  784. }
  785. GByteArray*
  786. s3_file_md5_func(void *stream)
  787. {
  788. #define S3_MD5_BUF_SIZE (10*1024)
  789. HANDLE *hFile = (HANDLE *) stream;
  790. guint8 buf[S3_MD5_BUF_SIZE];
  791. DWORD bytes_read;
  792. MD5_CTX md5_ctx;
  793. GByteArray *ret = NULL;
  794. g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
  795. ret = g_byte_array_sized_new(S3_MD5_HASH_BYTE_LEN);
  796. g_byte_array_set_size(ret, S3_MD5_HASH_BYTE_LEN);
  797. MD5_Init(&md5_ctx);
  798. while (ReadFile(hFile, buf, S3_MD5_BUF_SIZE, &bytes_read, NULL)) {
  799. MD5_Update(&md5_ctx, buf, bytes_read);
  800. }
  801. MD5_Final(ret->data, &md5_ctx);
  802. g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
  803. return ret;
  804. #undef S3_MD5_BUF_SIZE
  805. }
  806. GByteArray*
  807. s3_file_reset_func(void *stream)
  808. {
  809. g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
  810. }
  811. /* a CURLOPT_WRITEFUNCTION to write data to a file. */
  812. size_t
  813. s3_file_write_func(void *ptr, size_t size, size_t nmemb, void *stream)
  814. {
  815. HANDLE *hFile = (HANDLE *) stream;
  816. DWORD bytes_written;
  817. WriteFile(hFile, ptr, (DWORD) size*nmemb, &bytes_written, NULL);
  818. return bytes_written;
  819. }
  820. #endif
  821. static int
  822. curl_debug_message(CURL *curl G_GNUC_UNUSED,
  823. curl_infotype type,
  824. char *s,
  825. size_t len,
  826. void *unused G_GNUC_UNUSED)
  827. {
  828. char *lineprefix;
  829. char *message;
  830. char **lines, **line;
  831. switch (type) {
  832. case CURLINFO_TEXT:
  833. lineprefix="";
  834. break;
  835. case CURLINFO_HEADER_IN:
  836. lineprefix="Hdr In: ";
  837. break;
  838. case CURLINFO_HEADER_OUT:
  839. lineprefix="Hdr Out: ";
  840. break;
  841. default:
  842. /* ignore data in/out -- nobody wants to see that in the
  843. * debug logs! */
  844. return 0;
  845. }
  846. /* split the input into lines */
  847. message = g_strndup(s, (gsize) len);
  848. lines = g_strsplit(message, "\n", -1);
  849. g_free(message);
  850. for (line = lines; *line; line++) {
  851. if (**line == '\0') continue; /* skip blank lines */
  852. g_debug("%s%s", lineprefix, *line);
  853. }
  854. g_strfreev(lines);
  855. return 0;
  856. }
  857. static s3_result_t
  858. perform_request(S3Handle *hdl,
  859. const char *verb,
  860. const char *bucket,
  861. const char *key,
  862. const char *subresource,
  863. const char *query,
  864. s3_read_func read_func,
  865. s3_reset_func read_reset_func,
  866. s3_size_func size_func,
  867. s3_md5_func md5_func,
  868. gpointer read_data,
  869. s3_write_func write_func,
  870. s3_reset_func write_reset_func,
  871. gpointer write_data,
  872. s3_progress_func progress_func,
  873. gpointer progress_data,
  874. const result_handling_t *result_handling)
  875. {
  876. gboolean use_subdomain;
  877. char *url = NULL;
  878. s3_result_t result = S3_RESULT_FAIL; /* assume the worst.. */
  879. CURLcode curl_code = CURLE_OK;
  880. char curl_error_buffer[CURL_ERROR_SIZE] = "";
  881. struct curl_slist *headers = NULL;
  882. S3InternalData int_writedata = {{NULL, 0, 0, MAX_ERROR_RESPONSE_LEN}, NULL, NULL, NULL, FALSE, FALSE, NULL};
  883. gboolean should_retry;
  884. guint retries = 0;
  885. gulong backoff = EXPONENTIAL_BACKOFF_START_USEC;
  886. /* corresponds to PUT, HEAD, GET, and POST */
  887. int curlopt_upload = 0, curlopt_nobody = 0, curlopt_httpget = 0, curlopt_post = 0;
  888. /* do we want to examine the headers */
  889. const char *curlopt_customrequest = NULL;
  890. /* for MD5 calculation */
  891. GByteArray *md5_hash = NULL;
  892. gchar *md5_hash_hex = NULL, *md5_hash_b64 = NULL;
  893. size_t request_body_size = 0;
  894. g_assert(hdl != NULL && hdl->curl != NULL);
  895. s3_reset(hdl);
  896. use_subdomain = is_non_empty_string(hdl->bucket_location);
  897. url = build_url(bucket, key, subresource, query, use_subdomain, hdl->use_ssl);
  898. if (!url) goto cleanup;
  899. /* libcurl may behave strangely if these are not set correctly */
  900. if (!strncmp(verb, "PUT", 4)) {
  901. curlopt_upload = 1;
  902. } else if (!strncmp(verb, "GET", 4)) {
  903. curlopt_httpget = 1;
  904. } else if (!strncmp(verb, "POST", 5)) {
  905. curlopt_post = 1;
  906. } else if (!strncmp(verb, "HEAD", 5)) {
  907. curlopt_nobody = 1;
  908. } else {
  909. curlopt_customrequest = verb;
  910. }
  911. if (size_func) {
  912. request_body_size = size_func(read_data);
  913. }
  914. if (md5_func) {
  915. md5_hash = md5_func(read_data);
  916. if (md5_hash) {
  917. md5_hash_b64 = s3_base64_encode(md5_hash);
  918. md5_hash_hex = s3_hex_encode(md5_hash);
  919. g_byte_array_free(md5_hash, TRUE);
  920. }
  921. }
  922. if (!read_func) {
  923. /* Curl will use fread() otherwise */
  924. read_func = s3_empty_read_func;
  925. }
  926. if (write_func) {
  927. int_writedata.write_func = write_func;
  928. int_writedata.reset_func = write_reset_func;
  929. int_writedata.write_data = write_data;
  930. } else {
  931. /* Curl will use fwrite() otherwise */
  932. int_writedata.write_func = s3_counter_write_func;
  933. int_writedata.reset_func = s3_counter_reset_func;
  934. int_writedata.write_data = NULL;
  935. }
  936. while (1) {
  937. /* reset things */
  938. if (headers) {
  939. curl_slist_free_all(headers);
  940. }
  941. curl_error_buffer[0] = '\0';
  942. if (read_reset_func) {
  943. read_reset_func(read_data);
  944. }
  945. /* calls write_reset_func */
  946. s3_internal_reset_func(&int_writedata);
  947. /* set up the request */
  948. headers = authenticate_request(hdl, verb, bucket, key, subresource,
  949. md5_hash_b64, is_non_empty_string(hdl->bucket_location));
  950. if (hdl->use_ssl && hdl->ca_info) {
  951. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_CAINFO, hdl->ca_info)))
  952. goto curl_error;
  953. }
  954. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_VERBOSE, hdl->verbose)))
  955. goto curl_error;
  956. if (hdl->verbose) {
  957. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_DEBUGFUNCTION,
  958. curl_debug_message)))
  959. goto curl_error;
  960. }
  961. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_ERRORBUFFER,
  962. curl_error_buffer)))
  963. goto curl_error;
  964. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_NOPROGRESS, 1)))
  965. goto curl_error;
  966. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_FOLLOWLOCATION, 1)))
  967. goto curl_error;
  968. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_URL, url)))
  969. goto curl_error;
  970. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HTTPHEADER,
  971. headers)))
  972. goto curl_error;
  973. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_WRITEFUNCTION, s3_internal_write_func)))
  974. goto curl_error;
  975. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_WRITEDATA, &int_writedata)))
  976. goto curl_error;
  977. /* Note: we always have to set this apparently, for consistent "end of header" detection */
  978. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HEADERFUNCTION, s3_internal_header_func)))
  979. goto curl_error;
  980. /* Note: if set, CURLOPT_HEADERDATA seems to also be used for CURLOPT_WRITEDATA ? */
  981. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HEADERDATA, &int_writedata)))
  982. goto curl_error;
  983. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_PROGRESSFUNCTION, progress_func)))
  984. goto curl_error;
  985. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_PROGRESSDATA, progress_data)))
  986. goto curl_error;
  987. /* CURLOPT_INFILESIZE_LARGE added in 7.11.0 */
  988. #if LIBCURL_VERSION_NUM >= 0x070b00
  989. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)request_body_size)))
  990. goto curl_error;
  991. #else
  992. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_INFILESIZE, (long)request_body_size)))
  993. goto curl_error;
  994. #endif
  995. /* CURLOPT_MAX_{RECV,SEND}_SPEED_LARGE added in 7.15.5 */
  996. #if LIBCURL_VERSION_NUM >= 0x070f05
  997. if (s3_curl_throttling_compat()) {
  998. if (hdl->max_send_speed)
  999. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_MAX_SEND_SPEED_LARGE, (curl_off_t)hdl->max_send_speed)))
  1000. goto curl_error;
  1001. if (hdl->max_recv_speed)
  1002. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_MAX_SEND_SPEED_LARGE, (curl_off_t)hdl->max_recv_speed)))
  1003. goto curl_error;
  1004. }
  1005. #endif
  1006. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HTTPGET, curlopt_httpget)))
  1007. goto curl_error;
  1008. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_UPLOAD, curlopt_upload)))
  1009. goto curl_error;
  1010. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_POST, curlopt_post)))
  1011. goto curl_error;
  1012. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_NOBODY, curlopt_nobody)))
  1013. goto curl_error;
  1014. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_CUSTOMREQUEST,
  1015. curlopt_customrequest)))
  1016. goto curl_error;
  1017. if (curlopt_upload) {
  1018. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READFUNCTION, read_func)))
  1019. goto curl_error;
  1020. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READDATA, read_data)))
  1021. goto curl_error;
  1022. } else {
  1023. /* Clear request_body options. */
  1024. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READFUNCTION,
  1025. NULL)))
  1026. goto curl_error;
  1027. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READDATA,
  1028. NULL)))
  1029. goto curl_error;
  1030. }
  1031. /* Perform the request */
  1032. curl_code = curl_easy_perform(hdl->curl);
  1033. /* interpret the response into hdl->last* */
  1034. curl_error: /* (label for short-circuiting the curl_easy_perform call) */
  1035. should_retry = interpret_response(hdl, curl_code, curl_error_buffer,
  1036. int_writedata.resp_buf.buffer, int_writedata.resp_buf.buffer_pos, int_writedata.etag, md5_hash_hex);
  1037. /* and, unless we know we need to retry, see what we're to do now */
  1038. if (!should_retry) {
  1039. result = lookup_result(result_handling, hdl->last_response_code,
  1040. hdl->last_s3_error_code, hdl->last_curl_code);
  1041. /* break out of the while(1) unless we're retrying */
  1042. if (result != S3_RESULT_RETRY)
  1043. break;
  1044. }
  1045. if (retries >= EXPONENTIAL_BACKOFF_MAX_RETRIES) {
  1046. /* we're out of retries, so annotate hdl->last_message appropriately and bail
  1047. * out. */
  1048. char *m = g_strdup_printf("Too many retries; last message was '%s'", hdl->last_message);
  1049. if (hdl->last_message) g_free(hdl->last_message);
  1050. hdl->last_message = m;
  1051. result = S3_RESULT_FAIL;
  1052. break;
  1053. }
  1054. g_usleep(backoff);
  1055. retries++;
  1056. backoff *= EXPONENTIAL_BACKOFF_BASE;
  1057. }
  1058. if (result != S3_RESULT_OK) {
  1059. g_debug(_("%s %s failed with %d/%s"), verb, url,
  1060. hdl->last_response_code,
  1061. s3_error_name_from_code(hdl->last_s3_error_code));
  1062. }
  1063. cleanup:
  1064. g_free(url);
  1065. if (headers) curl_slist_free_all(headers);
  1066. g_free(md5_hash_b64);
  1067. g_free(md5_hash_hex);
  1068. /* we don't deallocate the response body -- we keep it for later */
  1069. hdl->last_response_body = int_writedata.resp_buf.buffer;
  1070. hdl->last_response_body_size = int_writedata.resp_buf.buffer_pos;
  1071. hdl->last_num_retries = retries;
  1072. return result;
  1073. }
  1074. static size_t
  1075. s3_internal_write_func(void *ptr, size_t size, size_t nmemb, void * stream)
  1076. {
  1077. S3InternalData *data = (S3InternalData *) stream;
  1078. size_t bytes_saved;
  1079. if (!data->headers_done)
  1080. return size*nmemb;
  1081. /* call write on internal buffer (if not full) */
  1082. if (data->int_write_done) {
  1083. bytes_saved = 0;
  1084. } else {
  1085. bytes_saved = s3_buffer_write_func(ptr, size, nmemb, &data->resp_buf);
  1086. if (!bytes_saved) {
  1087. data->int_write_done = TRUE;
  1088. }
  1089. }
  1090. /* call write on user buffer */
  1091. if (data->write_func) {
  1092. return data->write_func(ptr, size, nmemb, data->write_data);
  1093. } else {
  1094. return bytes_saved;
  1095. }
  1096. }
  1097. static void
  1098. s3_internal_reset_func(void * stream)
  1099. {
  1100. S3InternalData *data = (S3InternalData *) stream;
  1101. s3_buffer_reset_func(&data->resp_buf);
  1102. data->headers_done = FALSE;
  1103. data->int_write_done = FALSE;
  1104. data->etag = NULL;
  1105. if (data->reset_func) {
  1106. data->reset_func(data->write_data);
  1107. }
  1108. }
  1109. static size_t
  1110. s3_internal_header_func(void *ptr, size_t size, size_t nmemb, void * stream)
  1111. {
  1112. static const char *final_header = "\r\n";
  1113. char *header;
  1114. regmatch_t pmatch[2];
  1115. S3InternalData *data = (S3InternalData *) stream;
  1116. header = g_strndup((gchar *) ptr, (gsize) size*nmemb);
  1117. if (!s3_regexec_wrap(&etag_regex, header, 2, pmatch, 0))
  1118. data->etag = find_regex_substring(header, pmatch[1]);
  1119. if (!strcmp(final_header, header))
  1120. data->headers_done = TRUE;
  1121. return size*nmemb;
  1122. }
  1123. static gboolean
  1124. compile_regexes(void)
  1125. {
  1126. #ifdef HAVE_REGEX_H
  1127. /* using POSIX regular expressions */
  1128. struct {const char * str; int flags; regex_t *regex;} regexes[] = {
  1129. {"<Code>[[:space:]]*([^<]*)[[:space:]]*</Code>", REG_EXTENDED | REG_ICASE, &error_name_regex},
  1130. {"^ETag:[[:space:]]*\"([^\"]+)\"[[:space:]]*$", REG_EXTENDED | REG_ICASE | REG_NEWLINE, &etag_regex},
  1131. {"<Message>[[:space:]]*([^<]*)[[:space:]]*</Message>", REG_EXTENDED | REG_ICASE, &message_regex},
  1132. {"^[a-z0-9](-*[a-z0-9]){2,62}$", REG_EXTENDED | REG_NOSUB, &subdomain_regex},
  1133. {"(/>)|(>([^<]*)</LocationConstraint>)", REG_EXTENDED | REG_ICASE, &location_con_regex},
  1134. {NULL, 0, NULL}
  1135. };
  1136. char regmessage[1024];
  1137. int size, i;
  1138. int reg_result;
  1139. for (i = 0; regexes[i].str; i++) {
  1140. reg_result = regcomp(regexes[i].regex, regexes[i].str, regexes[i].flags);
  1141. if (reg_result != 0) {
  1142. size = regerror(reg_result, regexes[i].regex, regmessage, sizeof(regmessage));
  1143. g_error(_("Regex error: %s"), regmessage);
  1144. return FALSE;
  1145. }
  1146. }
  1147. #else /* ! HAVE_REGEX_H */
  1148. /* using PCRE via GLib */
  1149. struct {const char * str; int flags; regex_t *regex;} regexes[] = {
  1150. {"<Code>\\s*([^<]*)\\s*</Code>",
  1151. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1152. &error_name_regex},
  1153. {"^ETag:\\s*\"([^\"]+)\"\\s*$",
  1154. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1155. &etag_regex},
  1156. {"<Message>\\s*([^<]*)\\s*</Message>",
  1157. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1158. &message_regex},
  1159. {"^[a-z0-9]((-*[a-z0-9])|(\\.[a-z0-9])){2,62}$",
  1160. G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE,
  1161. &subdomain_regex},
  1162. {"(/>)|(>([^<]*)</LocationConstraint>)",
  1163. G_REGEX_CASELESS,
  1164. &location_con_regex},
  1165. {NULL, 0, NULL}
  1166. };
  1167. int i;
  1168. GError *err = NULL;
  1169. for (i = 0; regexes[i].str; i++) {
  1170. *(regexes[i].regex) = g_regex_new(regexes[i].str, regexes[i].flags, 0, &err);
  1171. if (err) {
  1172. g_error(_("Regex error: %s"), err->message);
  1173. g_error_free(err);
  1174. return FALSE;
  1175. }
  1176. }
  1177. #endif
  1178. return TRUE;
  1179. }
  1180. /*
  1181. * Public function implementations
  1182. */
  1183. gboolean s3_init(void)
  1184. {
  1185. static GStaticMutex mutex = G_STATIC_MUTEX_INIT;
  1186. static gboolean init = FALSE, ret;
  1187. /* n.b. curl_global_init is called in common-src/glib-util.c:glib_init() */
  1188. g_static_mutex_lock (&mutex);
  1189. if (!init) {
  1190. ret = compile_regexes();
  1191. init = TRUE;
  1192. }
  1193. g_static_mutex_unlock(&mutex);
  1194. return ret;
  1195. }
  1196. gboolean
  1197. s3_curl_location_compat(void)
  1198. {
  1199. curl_version_info_data *info;
  1200. info = curl_version_info(CURLVERSION_NOW);
  1201. return info->version_num > 0x070a02;
  1202. }
  1203. gboolean
  1204. s3_bucket_location_compat(const char *bucket)
  1205. {
  1206. return !s3_regexec_wrap(&subdomain_regex, bucket, 0, NULL, 0);
  1207. }
  1208. S3Handle *
  1209. s3_open(const char *access_key,
  1210. const char *secret_key,
  1211. const char *user_token,
  1212. const char *bucket_location,
  1213. const char *ca_info
  1214. ) {
  1215. S3Handle *hdl;
  1216. hdl = g_new0(S3Handle, 1);
  1217. if (!hdl) goto error;
  1218. hdl->verbose = FALSE;
  1219. hdl->use_ssl = s3_curl_supports_ssl();
  1220. g_assert(access_key);
  1221. hdl->access_key = g_strdup(access_key);
  1222. g_assert(secret_key);
  1223. hdl->secret_key = g_strdup(secret_key);
  1224. /* NULL is okay */
  1225. hdl->user_token = g_strdup(user_token);
  1226. /* NULL is okay */
  1227. hdl->bucket_location = g_strdup(bucket_location);
  1228. /* NULL is okay */
  1229. hdl->ca_info = g_strdup(ca_info);
  1230. hdl->curl = curl_easy_init();
  1231. if (!hdl->curl) goto error;
  1232. return hdl;
  1233. error:
  1234. s3_free(hdl);
  1235. return NULL;
  1236. }
  1237. void
  1238. s3_free(S3Handle *hdl)
  1239. {
  1240. s3_reset(hdl);
  1241. if (hdl) {
  1242. g_free(hdl->access_key);
  1243. g_free(hdl->secret_key);
  1244. if (hdl->user_token) g_free(hdl->user_token);
  1245. if (hdl->bucket_location) g_free(hdl->bucket_location);
  1246. if (hdl->curl) curl_easy_cleanup(hdl->curl);
  1247. g_free(hdl);
  1248. }
  1249. }
  1250. void
  1251. s3_reset(S3Handle *hdl)
  1252. {
  1253. if (hdl) {
  1254. /* We don't call curl_easy_reset here, because doing that in curl
  1255. * < 7.16 blanks the default CA certificate path, and there's no way
  1256. * to get it back. */
  1257. if (hdl->last_message) {
  1258. g_free(hdl->last_message);
  1259. hdl->last_message = NULL;
  1260. }
  1261. hdl->last_response_code = 0;
  1262. hdl->last_curl_code = 0;
  1263. hdl->last_s3_error_code = 0;
  1264. hdl->last_num_retries = 0;
  1265. if (hdl->last_response_body) {
  1266. g_free(hdl->last_response_body);
  1267. hdl->last_response_body = NULL;
  1268. }
  1269. hdl->last_response_body_size = 0;
  1270. }
  1271. }
  1272. void
  1273. s3_error(S3Handle *hdl,
  1274. const char **message,
  1275. guint *response_code,
  1276. s3_error_code_t *s3_error_code,
  1277. const char **s3_error_name,
  1278. CURLcode *curl_code,
  1279. guint *num_retries)
  1280. {
  1281. if (hdl) {
  1282. if (message) *message = hdl->last_message;
  1283. if (response_code) *response_code = hdl->last_response_code;
  1284. if (s3_error_code) *s3_error_code = hdl->last_s3_error_code;
  1285. if (s3_error_name) *s3_error_name = s3_error_name_from_code(hdl->last_s3_error_code);
  1286. if (curl_code) *curl_code = hdl->last_curl_code;
  1287. if (num_retries) *num_retries = hdl->last_num_retries;
  1288. } else {
  1289. /* no hdl? return something coherent, anyway */
  1290. if (message) *message = "NULL S3Handle";
  1291. if (response_code) *response_code = 0;
  1292. if (s3_error_code) *s3_error_code = 0;
  1293. if (s3_error_name) *s3_error_name = NULL;
  1294. if (curl_code) *curl_code = 0;
  1295. if (num_retries) *num_retries = 0;
  1296. }
  1297. }
  1298. void
  1299. s3_verbose(S3Handle *hdl, gboolean verbose)
  1300. {
  1301. hdl->verbose = verbose;
  1302. }
  1303. gboolean
  1304. s3_set_max_send_speed(S3Handle *hdl, guint64 max_send_speed)
  1305. {
  1306. if (!s3_curl_throttling_compat())
  1307. return FALSE;
  1308. hdl->max_send_speed = max_send_speed;
  1309. return TRUE;
  1310. }
  1311. gboolean
  1312. s3_set_max_recv_speed(S3Handle *hdl, guint64 max_recv_speed)
  1313. {
  1314. if (!s3_curl_throttling_compat())
  1315. return FALSE;
  1316. hdl->max_recv_speed = max_recv_speed;
  1317. return TRUE;
  1318. }
  1319. gboolean
  1320. s3_use_ssl(S3Handle *hdl, gboolean use_ssl)
  1321. {
  1322. gboolean ret = TRUE;
  1323. if (use_ssl & !s3_curl_supports_ssl()) {
  1324. ret = FALSE;
  1325. } else {
  1326. hdl->use_ssl = use_ssl;
  1327. }
  1328. return ret;
  1329. }
  1330. char *
  1331. s3_strerror(S3Handle *hdl)
  1332. {
  1333. const char *message;
  1334. guint response_code;
  1335. const char *s3_error_name;
  1336. CURLcode curl_code;
  1337. guint num_retries;
  1338. char s3_info[256] = "";
  1339. char response_info[16] = "";
  1340. char curl_info[32] = "";
  1341. char retries_info[32] = "";
  1342. s3_error(hdl, &message, &response_code, NULL, &s3_error_name, &curl_code, &num_retries);
  1343. if (!message)
  1344. message = "Unknown S3 error";
  1345. if (s3_error_name)
  1346. g_snprintf(s3_info, sizeof(s3_info), " (%s)", s3_error_name);
  1347. if (response_code)
  1348. g_snprintf(response_info, sizeof(response_info), " (HTTP %d)", response_code);
  1349. if (curl_code)
  1350. g_snprintf(curl_info, sizeof(curl_info), " (CURLcode %d)", curl_code);
  1351. if (num_retries)
  1352. g_snprintf(retries_info, sizeof(retries_info), " (after %d retries)", num_retries);
  1353. return g_strdup_printf("%s%s%s%s%s", message, s3_info, curl_info, response_info, retries_info);
  1354. }
  1355. /* Perform an upload. When this function returns, KEY and
  1356. * BUFFER remain the responsibility of the caller.
  1357. *
  1358. * @param self: the s3 device
  1359. * @param bucket: the bucket to which the upload should be made
  1360. * @param key: the key to which the upload should be made
  1361. * @param buffer: the data to be uploaded
  1362. * @param buffer_len: the length of the data to upload
  1363. * @returns: false if an error ocurred
  1364. */
  1365. gboolean
  1366. s3_upload(S3Handle *hdl,
  1367. const char *bucket,
  1368. const char *key,
  1369. s3_read_func read_func,
  1370. s3_reset_func reset_func,
  1371. s3_size_func size_func,
  1372. s3_md5_func md5_func,
  1373. gpointer read_data,
  1374. s3_progress_func progress_func,
  1375. gpointer progress_data)
  1376. {
  1377. s3_result_t result = S3_RESULT_FAIL;
  1378. static result_handling_t result_handling[] = {
  1379. { 200, 0, 0, S3_RESULT_OK },
  1380. RESULT_HANDLING_ALWAYS_RETRY,
  1381. { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
  1382. };
  1383. g_assert(hdl != NULL);
  1384. result = perform_request(hdl, "PUT", bucket, key, NULL, NULL,
  1385. read_func, reset_func, size_func, md5_func, read_data,
  1386. NULL, NULL, NULL, progress_func, progress_data,
  1387. result_handling);
  1388. return result == S3_RESULT_OK;
  1389. }
  1390. /* Private structure for our "thunk", which tracks where the user is in the list
  1391. * of keys. */
  1392. struct list_keys_thunk {
  1393. GSList *filename_list; /* all pending filenames */
  1394. gboolean in_contents; /* look for "key" entities in here */
  1395. gboolean in_common_prefixes; /* look for …

Large files files are truncated, but you can click here to view the full file