PageRenderTime 58ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/amanda/tags/3_3_0_qa07/device-src/s3.c

#
C | 2089 lines | 1476 code | 296 blank | 317 comment | 320 complexity | b54e45a7e68b19e7a1f6b34d96353f17 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * Copyright (c) 2008, 2009, 2010 Zmanda, Inc. All Rights Reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms of the GNU General Public License version 2 as published
  6. * by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  10. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  11. * for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. *
  17. * Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
  18. * Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
  19. */
  20. /* TODO
  21. * - collect speed statistics
  22. * - debugging mode
  23. */
  24. #ifdef HAVE_CONFIG_H
  25. /* use a relative path here to avoid conflicting with Perl's config.h. */
  26. #include "../config/config.h"
  27. #endif
  28. #include <string.h>
  29. #include "s3.h"
  30. #include "s3-util.h"
  31. #ifdef HAVE_REGEX_H
  32. #include <regex.h>
  33. #endif
  34. #ifdef HAVE_SYS_TYPES_H
  35. #include <sys/types.h>
  36. #endif
  37. #ifdef HAVE_SYS_STAT_H
  38. #include <sys/stat.h>
  39. #endif
  40. #ifdef HAVE_UNISTD_H
  41. #include <unistd.h>
  42. #endif
  43. #ifdef HAVE_DIRENT_H
  44. #include <dirent.h>
  45. #endif
  46. #ifdef HAVE_TIME_H
  47. #include <time.h>
  48. #endif
  49. #ifdef HAVE_UTIL_H
  50. #include "util.h"
  51. #endif
  52. #ifdef HAVE_AMANDA_H
  53. #include "amanda.h"
  54. #endif
  55. #include <curl/curl.h>
  56. /* Constant renamed after version 7.10.7 */
  57. #ifndef CURLINFO_RESPONSE_CODE
  58. #define CURLINFO_RESPONSE_CODE CURLINFO_HTTP_CODE
  59. #endif
  60. /* We don't need OpenSSL's kerberos support, and it's broken in
  61. * RHEL 3 anyway. */
  62. #define OPENSSL_NO_KRB5
  63. #ifdef HAVE_OPENSSL_HMAC_H
  64. # include <openssl/hmac.h>
  65. #else
  66. # ifdef HAVE_CRYPTO_HMAC_H
  67. # include <crypto/hmac.h>
  68. # else
  69. # ifdef HAVE_HMAC_H
  70. # include <hmac.h>
  71. # endif
  72. # endif
  73. #endif
  74. #include <openssl/err.h>
  75. #include <openssl/ssl.h>
  76. #include <openssl/md5.h>
  77. /* Maximum key length as specified in the S3 documentation
  78. * (*excluding* null terminator) */
  79. #define S3_MAX_KEY_LENGTH 1024
  80. #define AMAZON_SECURITY_HEADER "x-amz-security-token"
  81. #define AMAZON_BUCKET_CONF_TEMPLATE "\
  82. <CreateBucketConfiguration>\n\
  83. <LocationConstraint>%s</LocationConstraint>\n\
  84. </CreateBucketConfiguration>"
  85. #define AMAZON_STORAGE_CLASS_HEADER "x-amz-storage-class"
  86. #define AMAZON_WILDCARD_LOCATION "*"
  87. /* parameters for exponential backoff in the face of retriable errors */
  88. /* start at 0.01s */
  89. #define EXPONENTIAL_BACKOFF_START_USEC G_USEC_PER_SEC/100
  90. /* double at each retry */
  91. #define EXPONENTIAL_BACKOFF_BASE 2
  92. /* retry 14 times (for a total of about 3 minutes spent waiting) */
  93. #define EXPONENTIAL_BACKOFF_MAX_RETRIES 14
  94. /* general "reasonable size" parameters */
  95. #define MAX_ERROR_RESPONSE_LEN (100*1024)
  96. /* Results which should always be retried */
  97. #define RESULT_HANDLING_ALWAYS_RETRY \
  98. { 400, S3_ERROR_RequestTimeout, 0, S3_RESULT_RETRY }, \
  99. { 403, S3_ERROR_RequestTimeTooSkewed,0, S3_RESULT_RETRY }, \
  100. { 409, S3_ERROR_OperationAborted, 0, S3_RESULT_RETRY }, \
  101. { 412, S3_ERROR_PreconditionFailed, 0, S3_RESULT_RETRY }, \
  102. { 500, S3_ERROR_InternalError, 0, S3_RESULT_RETRY }, \
  103. { 501, S3_ERROR_NotImplemented, 0, S3_RESULT_RETRY }, \
  104. { 0, 0, CURLE_COULDNT_CONNECT, S3_RESULT_RETRY }, \
  105. { 0, 0, CURLE_COULDNT_RESOLVE_HOST, S3_RESULT_RETRY }, \
  106. { 0, 0, CURLE_PARTIAL_FILE, S3_RESULT_RETRY }, \
  107. { 0, 0, CURLE_OPERATION_TIMEOUTED, S3_RESULT_RETRY }, \
  108. { 0, 0, CURLE_SSL_CONNECT_ERROR, S3_RESULT_RETRY }, \
  109. { 0, 0, CURLE_SEND_ERROR, S3_RESULT_RETRY }, \
  110. { 0, 0, CURLE_RECV_ERROR, S3_RESULT_RETRY }, \
  111. { 0, 0, CURLE_GOT_NOTHING, S3_RESULT_RETRY }
  112. /*
  113. * Data structures and associated functions
  114. */
  115. struct S3Handle {
  116. /* (all strings in this struct are freed by s3_free()) */
  117. char *access_key;
  118. char *secret_key;
  119. char *user_token;
  120. /* attributes for new objects */
  121. char *bucket_location;
  122. char *storage_class;
  123. char *host;
  124. char *service_path;
  125. gboolean use_subdomain;
  126. char *ca_info;
  127. CURL *curl;
  128. gboolean verbose;
  129. gboolean use_ssl;
  130. guint64 max_send_speed;
  131. guint64 max_recv_speed;
  132. /* information from the last request */
  133. char *last_message;
  134. guint last_response_code;
  135. s3_error_code_t last_s3_error_code;
  136. CURLcode last_curl_code;
  137. guint last_num_retries;
  138. void *last_response_body;
  139. guint last_response_body_size;
  140. /* offset with s3 */
  141. time_t time_offset_with_s3;
  142. };
  143. typedef struct {
  144. CurlBuffer resp_buf;
  145. s3_write_func write_func;
  146. s3_reset_func reset_func;
  147. gpointer write_data;
  148. gboolean headers_done;
  149. gboolean int_write_done;
  150. char *etag;
  151. /* Points to current handle: Added to get hold of s3 offset */
  152. struct S3Handle *hdl;
  153. } S3InternalData;
  154. /* Callback function to examine headers one-at-a-time
  155. *
  156. * @note this is the same as CURLOPT_HEADERFUNCTION
  157. *
  158. * @param data: The pointer to read data from
  159. * @param size: The size of each "element" of the data buffer in bytes
  160. * @param nmemb: The number of elements in the data buffer.
  161. * So, the buffer's size is size*nmemb bytes.
  162. * @param stream: the header_data (an opaque pointer)
  163. *
  164. * @return The number of bytes written to the buffer or
  165. * CURL_WRITEFUNC_PAUSE to pause.
  166. * If it's the number of bytes written, it should match the buffer size
  167. */
  168. typedef size_t (*s3_header_func)(void *data, size_t size, size_t nmemb, void *stream);
  169. /*
  170. * S3 errors */
  171. /* (see preprocessor magic in s3.h) */
  172. static char * s3_error_code_names[] = {
  173. #define S3_ERROR(NAME) #NAME
  174. S3_ERROR_LIST
  175. #undef S3_ERROR
  176. };
  177. /* Convert an s3 error name to an error code. This function
  178. * matches strings case-insensitively, and is appropriate for use
  179. * on data from the network.
  180. *
  181. * @param s3_error_code: the error name
  182. * @returns: the error code (see constants in s3.h)
  183. */
  184. static s3_error_code_t
  185. s3_error_code_from_name(char *s3_error_name);
  186. /* Convert an s3 error code to a string
  187. *
  188. * @param s3_error_code: the error code to convert
  189. * @returns: statically allocated string
  190. */
  191. static const char *
  192. s3_error_name_from_code(s3_error_code_t s3_error_code);
  193. /*
  194. * result handling */
  195. /* result handling is specified by a static array of result_handling structs,
  196. * which match based on response_code (from HTTP) and S3 error code. The result
  197. * given for the first match is used. 0 acts as a wildcard for both response_code
  198. * and s3_error_code. The list is terminated with a struct containing 0 for both
  199. * response_code and s3_error_code; the result for that struct is the default
  200. * result.
  201. *
  202. * See RESULT_HANDLING_ALWAYS_RETRY for an example.
  203. */
  204. typedef enum {
  205. S3_RESULT_RETRY = -1,
  206. S3_RESULT_FAIL = 0,
  207. S3_RESULT_OK = 1
  208. } s3_result_t;
  209. typedef struct result_handling {
  210. guint response_code;
  211. s3_error_code_t s3_error_code;
  212. CURLcode curl_code;
  213. s3_result_t result;
  214. } result_handling_t;
  215. /* Lookup a result in C{result_handling}.
  216. *
  217. * @param result_handling: array of handling specifications
  218. * @param response_code: response code from operation
  219. * @param s3_error_code: s3 error code from operation, if any
  220. * @param curl_code: the CURL error, if any
  221. * @returns: the matching result
  222. */
  223. static s3_result_t
  224. lookup_result(const result_handling_t *result_handling,
  225. guint response_code,
  226. s3_error_code_t s3_error_code,
  227. CURLcode curl_code);
  228. /*
  229. * Precompiled regular expressions */
  230. static regex_t etag_regex, error_name_regex, message_regex, subdomain_regex,
  231. location_con_regex, date_sync_regex;
  232. /*
  233. * Utility functions
  234. */
  235. /* Check if a string is non-empty
  236. *
  237. * @param str: string to check
  238. * @returns: true iff str is non-NULL and not "\0"
  239. */
  240. static gboolean is_non_empty_string(const char *str);
  241. /* Construct the URL for an Amazon S3 REST request.
  242. *
  243. * A new string is allocated and returned; it is the responsiblity of the caller.
  244. *
  245. * @param hdl: the S3Handle object
  246. * @param verb: capitalized verb for this request ('PUT', 'GET', etc.)
  247. * @param host: the host name to connect to, 's3.amazonaws.com'
  248. * @param service_path: A path to add in the URL, or NULL for none.
  249. * @param bucket: the bucket being accessed, or NULL for none
  250. * @param key: the key being accessed, or NULL for none
  251. * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
  252. * @param use_subdomain: if TRUE, a subdomain of 'host' will be used
  253. * @param use_ssl: if TRUE, use 'https'
  254. *
  255. * !use_subdomain: http://host/service_path/bucket/key
  256. * use_subdomain : http://bucket.host/service_path/key
  257. *
  258. */
  259. static char *
  260. build_url(
  261. const char *host,
  262. const char *service_path,
  263. const char *bucket,
  264. const char *key,
  265. const char *subresource,
  266. const char *query,
  267. gboolean use_subdomain,
  268. gboolean use_ssl);
  269. /* Create proper authorization headers for an Amazon S3 REST
  270. * request to C{headers}.
  271. *
  272. * @note: C{X-Amz} headers (in C{headers}) must
  273. * - be in lower-case
  274. * - be in alphabetical order
  275. * - have no spaces around the colon
  276. * (don't yell at me -- see the Amazon Developer Guide)
  277. *
  278. * @param hdl: the S3Handle object
  279. * @param verb: capitalized verb for this request ('PUT', 'GET', etc.)
  280. * @param bucket: the bucket being accessed, or NULL for none
  281. * @param key: the key being accessed, or NULL for none
  282. * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
  283. * @param md5_hash: the MD5 hash of the request body, or NULL for none
  284. */
  285. static struct curl_slist *
  286. authenticate_request(S3Handle *hdl,
  287. const char *verb,
  288. const char *bucket,
  289. const char *key,
  290. const char *subresource,
  291. const char *md5_hash);
  292. /* Interpret the response to an S3 operation, assuming CURL completed its request
  293. * successfully. This function fills in the relevant C{hdl->last*} members.
  294. *
  295. * @param hdl: The S3Handle object
  296. * @param body: the response body
  297. * @param body_len: the length of the response body
  298. * @param etag: The response's ETag header
  299. * @param content_md5: The hex-encoded MD5 hash of the request body,
  300. * which will be checked against the response's ETag header.
  301. * If NULL, the header is not checked.
  302. * If non-NULL, then the body should have the response headers at its beginnning.
  303. * @returns: TRUE if the response should be retried (e.g., network error)
  304. */
  305. static gboolean
  306. interpret_response(S3Handle *hdl,
  307. CURLcode curl_code,
  308. char *curl_error_buffer,
  309. gchar *body,
  310. guint body_len,
  311. const char *etag,
  312. const char *content_md5);
  313. /* Perform an S3 operation. This function handles all of the details
  314. * of retryig requests and so on.
  315. *
  316. * The concepts of bucket and keys are defined by the Amazon S3 API.
  317. * See: "Components of Amazon S3" - API Version 2006-03-01 pg. 8
  318. *
  319. * Individual sub-resources are defined in several places. In the REST API,
  320. * they they are represented by a "flag" in the "query string".
  321. * See: "Constructing the CanonicalizedResource Element" - API Version 2006-03-01 pg. 60
  322. *
  323. * @param hdl: the S3Handle object
  324. * @param verb: the HTTP request method
  325. * @param bucket: the bucket to access, or NULL for none
  326. * @param key: the key to access, or NULL for none
  327. * @param subresource: the "sub-resource" to request (e.g. "acl") or NULL for none
  328. * @param query: the query string to send (not including th initial '?'),
  329. * or NULL for none
  330. * @param read_func: the callback for reading data
  331. * Will use s3_empty_read_func if NULL is passed in.
  332. * @param read_reset_func: the callback for to reset reading data
  333. * @param size_func: the callback to get the number of bytes to upload
  334. * @param md5_func: the callback to get the MD5 hash of the data to upload
  335. * @param read_data: pointer to pass to the above functions
  336. * @param write_func: the callback for writing data.
  337. * Will use s3_counter_write_func if NULL is passed in.
  338. * @param write_reset_func: the callback for to reset writing data
  339. * @param write_data: pointer to pass to C{write_func}
  340. * @param progress_func: the callback for progress information
  341. * @param progress_data: pointer to pass to C{progress_func}
  342. * @param result_handling: instructions for handling the results; see above.
  343. * @returns: the result specified by result_handling; details of the response
  344. * are then available in C{hdl->last*}
  345. */
  346. static s3_result_t
  347. perform_request(S3Handle *hdl,
  348. const char *verb,
  349. const char *bucket,
  350. const char *key,
  351. const char *subresource,
  352. const char *query,
  353. s3_read_func read_func,
  354. s3_reset_func read_reset_func,
  355. s3_size_func size_func,
  356. s3_md5_func md5_func,
  357. gpointer read_data,
  358. s3_write_func write_func,
  359. s3_reset_func write_reset_func,
  360. gpointer write_data,
  361. s3_progress_func progress_func,
  362. gpointer progress_data,
  363. const result_handling_t *result_handling);
  364. /*
  365. * a CURLOPT_WRITEFUNCTION to save part of the response in memory and
  366. * call an external function if one was provided.
  367. */
  368. static size_t
  369. s3_internal_write_func(void *ptr, size_t size, size_t nmemb, void * stream);
  370. /*
  371. * a function to reset to our internal buffer
  372. */
  373. static void
  374. s3_internal_reset_func(void * stream);
  375. /*
  376. * a CURLOPT_HEADERFUNCTION to save the ETag header only.
  377. */
  378. static size_t
  379. s3_internal_header_func(void *ptr, size_t size, size_t nmemb, void * stream);
  380. static gboolean
  381. compile_regexes(void);
  382. /*
  383. * Static function implementations
  384. */
  385. static s3_error_code_t
  386. s3_error_code_from_name(char *s3_error_name)
  387. {
  388. int i;
  389. if (!s3_error_name) return S3_ERROR_Unknown;
  390. /* do a brute-force search through the list, since it's not sorted */
  391. for (i = 0; i < S3_ERROR_END; i++) {
  392. if (g_ascii_strcasecmp(s3_error_name, s3_error_code_names[i]) == 0)
  393. return i;
  394. }
  395. return S3_ERROR_Unknown;
  396. }
  397. static const char *
  398. s3_error_name_from_code(s3_error_code_t s3_error_code)
  399. {
  400. if (s3_error_code >= S3_ERROR_END)
  401. s3_error_code = S3_ERROR_Unknown;
  402. return s3_error_code_names[s3_error_code];
  403. }
  404. gboolean
  405. s3_curl_supports_ssl(void)
  406. {
  407. static int supported = -1;
  408. if (supported == -1) {
  409. #if defined(CURL_VERSION_SSL)
  410. curl_version_info_data *info = curl_version_info(CURLVERSION_NOW);
  411. if (info->features & CURL_VERSION_SSL)
  412. supported = 1;
  413. else
  414. supported = 0;
  415. #else
  416. supported = 0;
  417. #endif
  418. }
  419. return supported;
  420. }
  421. static gboolean
  422. s3_curl_throttling_compat(void)
  423. {
  424. /* CURLOPT_MAX_SEND_SPEED_LARGE added in 7.15.5 */
  425. #if LIBCURL_VERSION_NUM >= 0x070f05
  426. curl_version_info_data *info;
  427. /* check the runtime version too */
  428. info = curl_version_info(CURLVERSION_NOW);
  429. return info->version_num >= 0x070f05;
  430. #else
  431. return FALSE;
  432. #endif
  433. }
  434. static s3_result_t
  435. lookup_result(const result_handling_t *result_handling,
  436. guint response_code,
  437. s3_error_code_t s3_error_code,
  438. CURLcode curl_code)
  439. {
  440. while (result_handling->response_code
  441. || result_handling->s3_error_code
  442. || result_handling->curl_code) {
  443. if ((result_handling->response_code && result_handling->response_code != response_code)
  444. || (result_handling->s3_error_code && result_handling->s3_error_code != s3_error_code)
  445. || (result_handling->curl_code && result_handling->curl_code != curl_code)) {
  446. result_handling++;
  447. continue;
  448. }
  449. return result_handling->result;
  450. }
  451. /* return the result for the terminator, as the default */
  452. return result_handling->result;
  453. }
  454. static gboolean
  455. is_non_empty_string(const char *str)
  456. {
  457. return str && str[0] != '\0';
  458. }
  459. static char *
  460. build_url(
  461. const char *host,
  462. const char *service_path,
  463. const char *bucket,
  464. const char *key,
  465. const char *subresource,
  466. const char *query,
  467. gboolean use_subdomain,
  468. gboolean use_ssl)
  469. {
  470. GString *url = NULL;
  471. char *esc_bucket = NULL, *esc_key = NULL;
  472. /* scheme */
  473. url = g_string_new("http");
  474. if (use_ssl)
  475. g_string_append(url, "s");
  476. g_string_append(url, "://");
  477. /* domain */
  478. if (use_subdomain && bucket)
  479. g_string_append_printf(url, "%s.%s", bucket, host);
  480. else
  481. g_string_append_printf(url, "%s", host);
  482. if (service_path) {
  483. g_string_append_printf(url, "%s/", service_path);
  484. } else {
  485. g_string_append(url, "/");
  486. }
  487. /* path */
  488. if (!use_subdomain && bucket) {
  489. esc_bucket = curl_escape(bucket, 0);
  490. if (!esc_bucket) goto cleanup;
  491. g_string_append_printf(url, "%s", esc_bucket);
  492. if (key)
  493. g_string_append(url, "/");
  494. }
  495. if (key) {
  496. esc_key = curl_escape(key, 0);
  497. if (!esc_key) goto cleanup;
  498. g_string_append_printf(url, "%s", esc_key);
  499. }
  500. /* query string */
  501. if (subresource || query)
  502. g_string_append(url, "?");
  503. if (subresource)
  504. g_string_append(url, subresource);
  505. if (subresource && query)
  506. g_string_append(url, "&");
  507. if (query)
  508. g_string_append(url, query);
  509. cleanup:
  510. if (esc_bucket) curl_free(esc_bucket);
  511. if (esc_key) curl_free(esc_key);
  512. return g_string_free(url, FALSE);
  513. }
  514. static struct curl_slist *
  515. authenticate_request(S3Handle *hdl,
  516. const char *verb,
  517. const char *bucket,
  518. const char *key,
  519. const char *subresource,
  520. const char *md5_hash)
  521. {
  522. time_t t;
  523. struct tm tmp;
  524. char *date = NULL;
  525. char *buf = NULL;
  526. HMAC_CTX ctx;
  527. GByteArray *md = NULL;
  528. char *auth_base64 = NULL;
  529. struct curl_slist *headers = NULL;
  530. char *esc_bucket = NULL, *esc_key = NULL;
  531. GString *auth_string = NULL;
  532. /* From RFC 2616 */
  533. static const char *wkday[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
  534. static const char *month[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
  535. "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
  536. /* Build the string to sign, per the S3 spec.
  537. * See: "Authenticating REST Requests" - API Version 2006-03-01 pg 58
  538. */
  539. /* verb */
  540. auth_string = g_string_new(verb);
  541. g_string_append(auth_string, "\n");
  542. /* Content-MD5 header */
  543. if (md5_hash)
  544. g_string_append(auth_string, md5_hash);
  545. g_string_append(auth_string, "\n");
  546. /* Content-Type is empty*/
  547. g_string_append(auth_string, "\n");
  548. /* calculate the date */
  549. t = time(NULL);
  550. /* sync clock with amazon s3 */
  551. t = t + hdl->time_offset_with_s3;
  552. #ifdef _WIN32
  553. if (!gmtime_s(&tmp, &t)) g_debug("localtime error");
  554. #else
  555. if (!gmtime_r(&t, &tmp)) perror("localtime");
  556. #endif
  557. date = g_strdup_printf("%s, %02d %s %04d %02d:%02d:%02d GMT",
  558. wkday[tmp.tm_wday], tmp.tm_mday, month[tmp.tm_mon], 1900+tmp.tm_year,
  559. tmp.tm_hour, tmp.tm_min, tmp.tm_sec);
  560. g_string_append(auth_string, date);
  561. g_string_append(auth_string, "\n");
  562. /* CanonicalizedAmzHeaders, sorted lexicographically */
  563. if (is_non_empty_string(hdl->user_token)) {
  564. g_string_append(auth_string, AMAZON_SECURITY_HEADER);
  565. g_string_append(auth_string, ":");
  566. g_string_append(auth_string, hdl->user_token);
  567. g_string_append(auth_string, ",");
  568. g_string_append(auth_string, STS_PRODUCT_TOKEN);
  569. g_string_append(auth_string, "\n");
  570. }
  571. if (is_non_empty_string(hdl->storage_class)) {
  572. g_string_append(auth_string, AMAZON_STORAGE_CLASS_HEADER);
  573. g_string_append(auth_string, ":");
  574. g_string_append(auth_string, hdl->storage_class);
  575. g_string_append(auth_string, "\n");
  576. }
  577. /* CanonicalizedResource */
  578. if (hdl->service_path) {
  579. g_string_append(auth_string, hdl->service_path);
  580. }
  581. g_string_append(auth_string, "/");
  582. if (bucket) {
  583. if (hdl->use_subdomain)
  584. g_string_append(auth_string, bucket);
  585. else {
  586. esc_bucket = curl_escape(bucket, 0);
  587. if (!esc_bucket) goto cleanup;
  588. g_string_append(auth_string, esc_bucket);
  589. }
  590. }
  591. if (bucket && (hdl->use_subdomain || key))
  592. g_string_append(auth_string, "/");
  593. if (key) {
  594. esc_key = curl_escape(key, 0);
  595. if (!esc_key) goto cleanup;
  596. g_string_append(auth_string, esc_key);
  597. }
  598. if (subresource) {
  599. g_string_append(auth_string, "?");
  600. g_string_append(auth_string, subresource);
  601. }
  602. /* run HMAC-SHA1 on the canonicalized string */
  603. md = g_byte_array_sized_new(EVP_MAX_MD_SIZE+1);
  604. HMAC_CTX_init(&ctx);
  605. HMAC_Init_ex(&ctx, hdl->secret_key, (int) strlen(hdl->secret_key), EVP_sha1(), NULL);
  606. HMAC_Update(&ctx, (unsigned char*) auth_string->str, auth_string->len);
  607. HMAC_Final(&ctx, md->data, &md->len);
  608. HMAC_CTX_cleanup(&ctx);
  609. auth_base64 = s3_base64_encode(md);
  610. /* append the new headers */
  611. if (is_non_empty_string(hdl->user_token)) {
  612. /* Devpay headers are included in hash. */
  613. buf = g_strdup_printf(AMAZON_SECURITY_HEADER ": %s", hdl->user_token);
  614. headers = curl_slist_append(headers, buf);
  615. g_free(buf);
  616. buf = g_strdup_printf(AMAZON_SECURITY_HEADER ": %s", STS_PRODUCT_TOKEN);
  617. headers = curl_slist_append(headers, buf);
  618. g_free(buf);
  619. }
  620. if (is_non_empty_string(hdl->storage_class)) {
  621. buf = g_strdup_printf(AMAZON_STORAGE_CLASS_HEADER ": %s", hdl->storage_class);
  622. headers = curl_slist_append(headers, buf);
  623. g_free(buf);
  624. }
  625. buf = g_strdup_printf("Authorization: AWS %s:%s",
  626. hdl->access_key, auth_base64);
  627. headers = curl_slist_append(headers, buf);
  628. g_free(buf);
  629. if (md5_hash && '\0' != md5_hash[0]) {
  630. buf = g_strdup_printf("Content-MD5: %s", md5_hash);
  631. headers = curl_slist_append(headers, buf);
  632. g_free(buf);
  633. }
  634. buf = g_strdup_printf("Date: %s", date);
  635. headers = curl_slist_append(headers, buf);
  636. g_free(buf);
  637. cleanup:
  638. g_free(date);
  639. g_free(esc_bucket);
  640. g_free(esc_key);
  641. g_byte_array_free(md, TRUE);
  642. g_free(auth_base64);
  643. g_string_free(auth_string, TRUE);
  644. return headers;
  645. }
  646. static gboolean
  647. interpret_response(S3Handle *hdl,
  648. CURLcode curl_code,
  649. char *curl_error_buffer,
  650. gchar *body,
  651. guint body_len,
  652. const char *etag,
  653. const char *content_md5)
  654. {
  655. long response_code = 0;
  656. regmatch_t pmatch[2];
  657. char *error_name = NULL, *message = NULL;
  658. char *body_copy = NULL;
  659. gboolean ret = TRUE;
  660. if (!hdl) return FALSE;
  661. if (hdl->last_message) g_free(hdl->last_message);
  662. hdl->last_message = NULL;
  663. /* bail out from a CURL error */
  664. if (curl_code != CURLE_OK) {
  665. hdl->last_curl_code = curl_code;
  666. hdl->last_message = g_strdup_printf("CURL error: %s", curl_error_buffer);
  667. return FALSE;
  668. }
  669. /* CURL seems to think things were OK, so get its response code */
  670. curl_easy_getinfo(hdl->curl, CURLINFO_RESPONSE_CODE, &response_code);
  671. hdl->last_response_code = response_code;
  672. /* check ETag, if present */
  673. if (etag && content_md5 && 200 == response_code) {
  674. if (etag && g_ascii_strcasecmp(etag, content_md5))
  675. hdl->last_message = g_strdup("S3 Error: Possible data corruption (ETag returned by Amazon did not match the MD5 hash of the data sent)");
  676. else
  677. ret = FALSE;
  678. return ret;
  679. }
  680. if (200 <= response_code && response_code < 400) {
  681. /* 2xx and 3xx codes won't have a response body we care about */
  682. hdl->last_s3_error_code = S3_ERROR_None;
  683. return FALSE;
  684. }
  685. /* Now look at the body to try to get the actual Amazon error message. Rather
  686. * than parse out the XML, just use some regexes. */
  687. /* impose a reasonable limit on body size */
  688. if (body_len > MAX_ERROR_RESPONSE_LEN) {
  689. hdl->last_message = g_strdup("S3 Error: Unknown (response body too large to parse)");
  690. return FALSE;
  691. } else if (!body || body_len == 0) {
  692. hdl->last_message = g_strdup("S3 Error: Unknown (empty response body)");
  693. return TRUE; /* perhaps a network error; retry the request */
  694. }
  695. /* use strndup to get a zero-terminated string */
  696. body_copy = g_strndup(body, body_len);
  697. if (!body_copy) goto cleanup;
  698. if (!s3_regexec_wrap(&error_name_regex, body_copy, 2, pmatch, 0))
  699. error_name = find_regex_substring(body_copy, pmatch[1]);
  700. if (!s3_regexec_wrap(&message_regex, body_copy, 2, pmatch, 0))
  701. message = find_regex_substring(body_copy, pmatch[1]);
  702. if (error_name) {
  703. hdl->last_s3_error_code = s3_error_code_from_name(error_name);
  704. }
  705. if (message) {
  706. hdl->last_message = message;
  707. message = NULL; /* steal the reference to the string */
  708. }
  709. cleanup:
  710. g_free(body_copy);
  711. g_free(message);
  712. g_free(error_name);
  713. return FALSE;
  714. }
  715. /* a CURLOPT_READFUNCTION to read data from a buffer. */
  716. size_t
  717. s3_buffer_read_func(void *ptr, size_t size, size_t nmemb, void * stream)
  718. {
  719. CurlBuffer *data = stream;
  720. guint bytes_desired = (guint) size * nmemb;
  721. /* check the number of bytes remaining, just to be safe */
  722. if (bytes_desired > data->buffer_len - data->buffer_pos)
  723. bytes_desired = data->buffer_len - data->buffer_pos;
  724. memcpy((char *)ptr, data->buffer + data->buffer_pos, bytes_desired);
  725. data->buffer_pos += bytes_desired;
  726. return bytes_desired;
  727. }
  728. size_t
  729. s3_buffer_size_func(void *stream)
  730. {
  731. CurlBuffer *data = stream;
  732. return data->buffer_len;
  733. }
  734. GByteArray*
  735. s3_buffer_md5_func(void *stream)
  736. {
  737. CurlBuffer *data = stream;
  738. GByteArray req_body_gba = {(guint8 *)data->buffer, data->buffer_len};
  739. return s3_compute_md5_hash(&req_body_gba);
  740. }
  741. void
  742. s3_buffer_reset_func(void *stream)
  743. {
  744. CurlBuffer *data = stream;
  745. data->buffer_pos = 0;
  746. }
  747. /* a CURLOPT_WRITEFUNCTION to write data to a buffer. */
  748. size_t
  749. s3_buffer_write_func(void *ptr, size_t size, size_t nmemb, void *stream)
  750. {
  751. CurlBuffer * data = stream;
  752. guint new_bytes = (guint) size * nmemb;
  753. guint bytes_needed = data->buffer_pos + new_bytes;
  754. /* error out if the new size is greater than the maximum allowed */
  755. if (data->max_buffer_size && bytes_needed > data->max_buffer_size)
  756. return 0;
  757. /* reallocate if necessary. We use exponential sizing to make this
  758. * happen less often. */
  759. if (bytes_needed > data->buffer_len) {
  760. guint new_size = MAX(bytes_needed, data->buffer_len * 2);
  761. if (data->max_buffer_size) {
  762. new_size = MIN(new_size, data->max_buffer_size);
  763. }
  764. data->buffer = g_realloc(data->buffer, new_size);
  765. data->buffer_len = new_size;
  766. }
  767. if (!data->buffer)
  768. return 0; /* returning zero signals an error to libcurl */
  769. /* actually copy the data to the buffer */
  770. memcpy(data->buffer + data->buffer_pos, ptr, new_bytes);
  771. data->buffer_pos += new_bytes;
  772. /* signal success to curl */
  773. return new_bytes;
  774. }
  775. /* a CURLOPT_READFUNCTION that writes nothing. */
  776. size_t
  777. s3_empty_read_func(G_GNUC_UNUSED void *ptr, G_GNUC_UNUSED size_t size, G_GNUC_UNUSED size_t nmemb, G_GNUC_UNUSED void * stream)
  778. {
  779. return 0;
  780. }
  781. size_t
  782. s3_empty_size_func(G_GNUC_UNUSED void *stream)
  783. {
  784. return 0;
  785. }
  786. GByteArray*
  787. s3_empty_md5_func(G_GNUC_UNUSED void *stream)
  788. {
  789. static const GByteArray empty = {(guint8 *) "", 0};
  790. return s3_compute_md5_hash(&empty);
  791. }
  792. /* a CURLOPT_WRITEFUNCTION to write data that just counts data.
  793. * s3_write_data should be NULL or a pointer to an gint64.
  794. */
  795. size_t
  796. s3_counter_write_func(G_GNUC_UNUSED void *ptr, size_t size, size_t nmemb, void *stream)
  797. {
  798. gint64 *count = (gint64*) stream, inc = nmemb*size;
  799. if (count) *count += inc;
  800. return inc;
  801. }
  802. void
  803. s3_counter_reset_func(void *stream)
  804. {
  805. gint64 *count = (gint64*) stream;
  806. if (count) *count = 0;
  807. }
  808. #ifdef _WIN32
  809. /* a CURLOPT_READFUNCTION to read data from a file. */
  810. size_t
  811. s3_file_read_func(void *ptr, size_t size, size_t nmemb, void * stream)
  812. {
  813. HANDLE *hFile = (HANDLE *) stream;
  814. DWORD bytes_read;
  815. ReadFile(hFile, ptr, (DWORD) size*nmemb, &bytes_read, NULL);
  816. return bytes_read;
  817. }
  818. size_t
  819. s3_file_size_func(void *stream)
  820. {
  821. HANDLE *hFile = (HANDLE *) stream;
  822. DWORD size = GetFileSize(hFile, NULL);
  823. if (INVALID_FILE_SIZE == size) {
  824. return -1;
  825. } else {
  826. return size;
  827. }
  828. }
  829. GByteArray*
  830. s3_file_md5_func(void *stream)
  831. {
  832. #define S3_MD5_BUF_SIZE (10*1024)
  833. HANDLE *hFile = (HANDLE *) stream;
  834. guint8 buf[S3_MD5_BUF_SIZE];
  835. DWORD bytes_read;
  836. MD5_CTX md5_ctx;
  837. GByteArray *ret = NULL;
  838. g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
  839. ret = g_byte_array_sized_new(S3_MD5_HASH_BYTE_LEN);
  840. g_byte_array_set_size(ret, S3_MD5_HASH_BYTE_LEN);
  841. MD5_Init(&md5_ctx);
  842. while (ReadFile(hFile, buf, S3_MD5_BUF_SIZE, &bytes_read, NULL)) {
  843. MD5_Update(&md5_ctx, buf, bytes_read);
  844. }
  845. MD5_Final(ret->data, &md5_ctx);
  846. g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
  847. return ret;
  848. #undef S3_MD5_BUF_SIZE
  849. }
  850. GByteArray*
  851. s3_file_reset_func(void *stream)
  852. {
  853. g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
  854. }
  855. /* a CURLOPT_WRITEFUNCTION to write data to a file. */
  856. size_t
  857. s3_file_write_func(void *ptr, size_t size, size_t nmemb, void *stream)
  858. {
  859. HANDLE *hFile = (HANDLE *) stream;
  860. DWORD bytes_written;
  861. WriteFile(hFile, ptr, (DWORD) size*nmemb, &bytes_written, NULL);
  862. return bytes_written;
  863. }
  864. #endif
  865. static int
  866. curl_debug_message(CURL *curl G_GNUC_UNUSED,
  867. curl_infotype type,
  868. char *s,
  869. size_t len,
  870. void *unused G_GNUC_UNUSED)
  871. {
  872. char *lineprefix;
  873. char *message;
  874. char **lines, **line;
  875. switch (type) {
  876. case CURLINFO_TEXT:
  877. lineprefix="";
  878. break;
  879. case CURLINFO_HEADER_IN:
  880. lineprefix="Hdr In: ";
  881. break;
  882. case CURLINFO_HEADER_OUT:
  883. lineprefix="Hdr Out: ";
  884. break;
  885. default:
  886. /* ignore data in/out -- nobody wants to see that in the
  887. * debug logs! */
  888. return 0;
  889. }
  890. /* split the input into lines */
  891. message = g_strndup(s, (gsize) len);
  892. lines = g_strsplit(message, "\n", -1);
  893. g_free(message);
  894. for (line = lines; *line; line++) {
  895. if (**line == '\0') continue; /* skip blank lines */
  896. g_debug("%s%s", lineprefix, *line);
  897. }
  898. g_strfreev(lines);
  899. return 0;
  900. }
  901. static s3_result_t
  902. perform_request(S3Handle *hdl,
  903. const char *verb,
  904. const char *bucket,
  905. const char *key,
  906. const char *subresource,
  907. const char *query,
  908. s3_read_func read_func,
  909. s3_reset_func read_reset_func,
  910. s3_size_func size_func,
  911. s3_md5_func md5_func,
  912. gpointer read_data,
  913. s3_write_func write_func,
  914. s3_reset_func write_reset_func,
  915. gpointer write_data,
  916. s3_progress_func progress_func,
  917. gpointer progress_data,
  918. const result_handling_t *result_handling)
  919. {
  920. char *url = NULL;
  921. s3_result_t result = S3_RESULT_FAIL; /* assume the worst.. */
  922. CURLcode curl_code = CURLE_OK;
  923. char curl_error_buffer[CURL_ERROR_SIZE] = "";
  924. struct curl_slist *headers = NULL;
  925. /* Set S3Internal Data */
  926. S3InternalData int_writedata = {{NULL, 0, 0, MAX_ERROR_RESPONSE_LEN}, NULL, NULL, NULL, FALSE, FALSE, NULL, hdl};
  927. gboolean should_retry;
  928. guint retries = 0;
  929. gulong backoff = EXPONENTIAL_BACKOFF_START_USEC;
  930. /* corresponds to PUT, HEAD, GET, and POST */
  931. int curlopt_upload = 0, curlopt_nobody = 0, curlopt_httpget = 0, curlopt_post = 0;
  932. /* do we want to examine the headers */
  933. const char *curlopt_customrequest = NULL;
  934. /* for MD5 calculation */
  935. GByteArray *md5_hash = NULL;
  936. gchar *md5_hash_hex = NULL, *md5_hash_b64 = NULL;
  937. size_t request_body_size = 0;
  938. g_assert(hdl != NULL && hdl->curl != NULL);
  939. s3_reset(hdl);
  940. url = build_url(hdl->host, hdl->service_path, bucket, key, subresource,
  941. query, hdl->use_subdomain, hdl->use_ssl);
  942. if (!url) goto cleanup;
  943. /* libcurl may behave strangely if these are not set correctly */
  944. if (!strncmp(verb, "PUT", 4)) {
  945. curlopt_upload = 1;
  946. } else if (!strncmp(verb, "GET", 4)) {
  947. curlopt_httpget = 1;
  948. } else if (!strncmp(verb, "POST", 5)) {
  949. curlopt_post = 1;
  950. } else if (!strncmp(verb, "HEAD", 5)) {
  951. curlopt_nobody = 1;
  952. } else {
  953. curlopt_customrequest = verb;
  954. }
  955. if (size_func) {
  956. request_body_size = size_func(read_data);
  957. }
  958. if (md5_func) {
  959. md5_hash = md5_func(read_data);
  960. if (md5_hash) {
  961. md5_hash_b64 = s3_base64_encode(md5_hash);
  962. md5_hash_hex = s3_hex_encode(md5_hash);
  963. g_byte_array_free(md5_hash, TRUE);
  964. }
  965. }
  966. if (!read_func) {
  967. /* Curl will use fread() otherwise */
  968. read_func = s3_empty_read_func;
  969. }
  970. if (write_func) {
  971. int_writedata.write_func = write_func;
  972. int_writedata.reset_func = write_reset_func;
  973. int_writedata.write_data = write_data;
  974. } else {
  975. /* Curl will use fwrite() otherwise */
  976. int_writedata.write_func = s3_counter_write_func;
  977. int_writedata.reset_func = s3_counter_reset_func;
  978. int_writedata.write_data = NULL;
  979. }
  980. while (1) {
  981. /* reset things */
  982. if (headers) {
  983. curl_slist_free_all(headers);
  984. }
  985. curl_error_buffer[0] = '\0';
  986. if (read_reset_func) {
  987. read_reset_func(read_data);
  988. }
  989. /* calls write_reset_func */
  990. s3_internal_reset_func(&int_writedata);
  991. /* set up the request */
  992. headers = authenticate_request(hdl, verb, bucket, key, subresource,
  993. md5_hash_b64);
  994. if (hdl->use_ssl && hdl->ca_info) {
  995. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_CAINFO, hdl->ca_info)))
  996. goto curl_error;
  997. }
  998. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_VERBOSE, hdl->verbose)))
  999. goto curl_error;
  1000. if (hdl->verbose) {
  1001. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_DEBUGFUNCTION,
  1002. curl_debug_message)))
  1003. goto curl_error;
  1004. }
  1005. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_ERRORBUFFER,
  1006. curl_error_buffer)))
  1007. goto curl_error;
  1008. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_NOPROGRESS, 1)))
  1009. goto curl_error;
  1010. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_FOLLOWLOCATION, 1)))
  1011. goto curl_error;
  1012. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_URL, url)))
  1013. goto curl_error;
  1014. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HTTPHEADER,
  1015. headers)))
  1016. goto curl_error;
  1017. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_WRITEFUNCTION, s3_internal_write_func)))
  1018. goto curl_error;
  1019. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_WRITEDATA, &int_writedata)))
  1020. goto curl_error;
  1021. /* Note: we always have to set this apparently, for consistent "end of header" detection */
  1022. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HEADERFUNCTION, s3_internal_header_func)))
  1023. goto curl_error;
  1024. /* Note: if set, CURLOPT_HEADERDATA seems to also be used for CURLOPT_WRITEDATA ? */
  1025. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HEADERDATA, &int_writedata)))
  1026. goto curl_error;
  1027. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_PROGRESSFUNCTION, progress_func)))
  1028. goto curl_error;
  1029. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_PROGRESSDATA, progress_data)))
  1030. goto curl_error;
  1031. /* CURLOPT_INFILESIZE_LARGE added in 7.11.0 */
  1032. #if LIBCURL_VERSION_NUM >= 0x070b00
  1033. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)request_body_size)))
  1034. goto curl_error;
  1035. #else
  1036. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_INFILESIZE, (long)request_body_size)))
  1037. goto curl_error;
  1038. #endif
  1039. /* CURLOPT_MAX_{RECV,SEND}_SPEED_LARGE added in 7.15.5 */
  1040. #if LIBCURL_VERSION_NUM >= 0x070f05
  1041. if (s3_curl_throttling_compat()) {
  1042. if (hdl->max_send_speed)
  1043. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_MAX_SEND_SPEED_LARGE, (curl_off_t)hdl->max_send_speed)))
  1044. goto curl_error;
  1045. if (hdl->max_recv_speed)
  1046. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_MAX_SEND_SPEED_LARGE, (curl_off_t)hdl->max_recv_speed)))
  1047. goto curl_error;
  1048. }
  1049. #endif
  1050. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HTTPGET, curlopt_httpget)))
  1051. goto curl_error;
  1052. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_UPLOAD, curlopt_upload)))
  1053. goto curl_error;
  1054. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_POST, curlopt_post)))
  1055. goto curl_error;
  1056. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_NOBODY, curlopt_nobody)))
  1057. goto curl_error;
  1058. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_CUSTOMREQUEST,
  1059. curlopt_customrequest)))
  1060. goto curl_error;
  1061. if (curlopt_upload) {
  1062. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READFUNCTION, read_func)))
  1063. goto curl_error;
  1064. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READDATA, read_data)))
  1065. goto curl_error;
  1066. } else {
  1067. /* Clear request_body options. */
  1068. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READFUNCTION,
  1069. NULL)))
  1070. goto curl_error;
  1071. if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READDATA,
  1072. NULL)))
  1073. goto curl_error;
  1074. }
  1075. /* Perform the request */
  1076. curl_code = curl_easy_perform(hdl->curl);
  1077. /* interpret the response into hdl->last* */
  1078. curl_error: /* (label for short-circuiting the curl_easy_perform call) */
  1079. should_retry = interpret_response(hdl, curl_code, curl_error_buffer,
  1080. int_writedata.resp_buf.buffer, int_writedata.resp_buf.buffer_pos, int_writedata.etag, md5_hash_hex);
  1081. /* and, unless we know we need to retry, see what we're to do now */
  1082. if (!should_retry) {
  1083. result = lookup_result(result_handling, hdl->last_response_code,
  1084. hdl->last_s3_error_code, hdl->last_curl_code);
  1085. /* break out of the while(1) unless we're retrying */
  1086. if (result != S3_RESULT_RETRY)
  1087. break;
  1088. }
  1089. if (retries >= EXPONENTIAL_BACKOFF_MAX_RETRIES) {
  1090. /* we're out of retries, so annotate hdl->last_message appropriately and bail
  1091. * out. */
  1092. char *m = g_strdup_printf("Too many retries; last message was '%s'", hdl->last_message);
  1093. if (hdl->last_message) g_free(hdl->last_message);
  1094. hdl->last_message = m;
  1095. result = S3_RESULT_FAIL;
  1096. break;
  1097. }
  1098. g_usleep(backoff);
  1099. retries++;
  1100. backoff *= EXPONENTIAL_BACKOFF_BASE;
  1101. }
  1102. if (result != S3_RESULT_OK) {
  1103. g_debug(_("%s %s failed with %d/%s"), verb, url,
  1104. hdl->last_response_code,
  1105. s3_error_name_from_code(hdl->last_s3_error_code));
  1106. }
  1107. cleanup:
  1108. g_free(url);
  1109. if (headers) curl_slist_free_all(headers);
  1110. g_free(md5_hash_b64);
  1111. g_free(md5_hash_hex);
  1112. /* we don't deallocate the response body -- we keep it for later */
  1113. hdl->last_response_body = int_writedata.resp_buf.buffer;
  1114. hdl->last_response_body_size = int_writedata.resp_buf.buffer_pos;
  1115. hdl->last_num_retries = retries;
  1116. return result;
  1117. }
  1118. static size_t
  1119. s3_internal_write_func(void *ptr, size_t size, size_t nmemb, void * stream)
  1120. {
  1121. S3InternalData *data = (S3InternalData *) stream;
  1122. size_t bytes_saved;
  1123. if (!data->headers_done)
  1124. return size*nmemb;
  1125. /* call write on internal buffer (if not full) */
  1126. if (data->int_write_done) {
  1127. bytes_saved = 0;
  1128. } else {
  1129. bytes_saved = s3_buffer_write_func(ptr, size, nmemb, &data->resp_buf);
  1130. if (!bytes_saved) {
  1131. data->int_write_done = TRUE;
  1132. }
  1133. }
  1134. /* call write on user buffer */
  1135. if (data->write_func) {
  1136. return data->write_func(ptr, size, nmemb, data->write_data);
  1137. } else {
  1138. return bytes_saved;
  1139. }
  1140. }
  1141. static void
  1142. s3_internal_reset_func(void * stream)
  1143. {
  1144. S3InternalData *data = (S3InternalData *) stream;
  1145. s3_buffer_reset_func(&data->resp_buf);
  1146. data->headers_done = FALSE;
  1147. data->int_write_done = FALSE;
  1148. data->etag = NULL;
  1149. if (data->reset_func) {
  1150. data->reset_func(data->write_data);
  1151. }
  1152. }
  1153. static size_t
  1154. s3_internal_header_func(void *ptr, size_t size, size_t nmemb, void * stream)
  1155. {
  1156. static const char *final_header = "\r\n";
  1157. time_t remote_time_in_sec,local_time;
  1158. char *header;
  1159. regmatch_t pmatch[2];
  1160. S3InternalData *data = (S3InternalData *) stream;
  1161. header = g_strndup((gchar *) ptr, (gsize) size*nmemb);
  1162. if (!s3_regexec_wrap(&etag_regex, header, 2, pmatch, 0))
  1163. data->etag = find_regex_substring(header, pmatch[1]);
  1164. if (!strcmp(final_header, header))
  1165. data->headers_done = TRUE;
  1166. /* If date header is found */
  1167. if (!s3_regexec_wrap(&date_sync_regex, header, 2, pmatch, 0)){
  1168. char *date = find_regex_substring(header, pmatch[1]);
  1169. /* Remote time is always in GMT: RFC 2616 */
  1170. /* both curl_getdate and time operate in UTC, so no timezone math is necessary */
  1171. if ( (remote_time_in_sec = curl_getdate(date, NULL)) < 0 ){
  1172. g_debug("Error: Conversion of remote time to seconds failed.");
  1173. data->hdl->time_offset_with_s3 = 0;
  1174. }else{
  1175. local_time = time(NULL);
  1176. /* Offset time */
  1177. data->hdl->time_offset_with_s3 = remote_time_in_sec - local_time;
  1178. if (data->hdl->verbose)
  1179. g_debug("Time Offset (remote - local) :%ld",(long)data->hdl->time_offset_with_s3);
  1180. }
  1181. g_free(date);
  1182. }
  1183. g_free(header);
  1184. return size*nmemb;
  1185. }
  1186. static gboolean
  1187. compile_regexes(void)
  1188. {
  1189. #ifdef HAVE_REGEX_H
  1190. /* using POSIX regular expressions */
  1191. struct {const char * str; int flags; regex_t *regex;} regexes[] = {
  1192. {"<Code>[[:space:]]*([^<]*)[[:space:]]*</Code>", REG_EXTENDED | REG_ICASE, &error_name_regex},
  1193. {"^ETag:[[:space:]]*\"([^\"]+)\"[[:space:]]*$", REG_EXTENDED | REG_ICASE | REG_NEWLINE, &etag_regex},
  1194. {"<Message>[[:space:]]*([^<]*)[[:space:]]*</Message>", REG_EXTENDED | REG_ICASE, &message_regex},
  1195. {"^[a-z0-9](-*[a-z0-9]){2,62}$", REG_EXTENDED | REG_NOSUB, &subdomain_regex},
  1196. {"(/>)|(>([^<]*)</LocationConstraint>)", REG_EXTENDED | REG_ICASE, &location_con_regex},
  1197. {"^Date:(.*)\r",REG_EXTENDED | REG_ICASE | REG_NEWLINE, &date_sync_regex},
  1198. {NULL, 0, NULL}
  1199. };
  1200. char regmessage[1024];
  1201. int i;
  1202. int reg_result;
  1203. for (i = 0; regexes[i].str; i++) {
  1204. reg_result = regcomp(regexes[i].regex, regexes[i].str, regexes[i].flags);
  1205. if (reg_result != 0) {
  1206. regerror(reg_result, regexes[i].regex, regmessage, sizeof(regmessage));
  1207. g_error(_("Regex error: %s"), regmessage);
  1208. return FALSE;
  1209. }
  1210. }
  1211. #else /* ! HAVE_REGEX_H */
  1212. /* using PCRE via GLib */
  1213. struct {const char * str; int flags; regex_t *regex;} regexes[] = {
  1214. {"<Code>\\s*([^<]*)\\s*</Code>",
  1215. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1216. &error_name_regex},
  1217. {"^ETag:\\s*\"([^\"]+)\"\\s*$",
  1218. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1219. &etag_regex},
  1220. {"<Message>\\s*([^<]*)\\s*</Message>",
  1221. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1222. &message_regex},
  1223. {"^[a-z0-9]((-*[a-z0-9])|(\\.[a-z0-9])){2,62}$",
  1224. G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE,
  1225. &subdomain_regex},
  1226. {"(/>)|(>([^<]*)</LocationConstraint>)",
  1227. G_REGEX_CASELESS,
  1228. &location_con_regex},
  1229. {"^Date:(.*)\\r",
  1230. G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
  1231. &date_sync_regex},
  1232. {NULL, 0, NULL}
  1233. };
  1234. int i;
  1235. GError *err = NULL;
  1236. for (i = 0; regexes[i].str; i++) {
  1237. *(regexes[i].regex) = g_regex_new(regexes[i].str, regexes[i].flags, 0, &err);
  1238. if (err) {
  1239. g_error(_("Regex error: %s"), err->message);
  1240. g_error_free(err);
  1241. return FALSE;
  1242. }
  1243. }
  1244. #endif
  1245. return TRUE;
  1246. }
  1247. /*
  1248. * Public function implementations
  1249. */
  1250. gboolean s3_init(void)
  1251. {
  1252. static GStaticMutex mutex = G_STATIC_MUTEX_INIT;
  1253. static gboolean init = FALSE, ret;
  1254. /* n.b. curl_global_init is called in common-src/glib-util.c:glib_init() */
  1255. g_static_mutex_lock (&mutex);
  1256. if (!init) {
  1257. ret = compile_regexes();
  1258. init = TRUE;
  1259. }
  1260. g_static_mutex_unlock(&mutex);
  1261. return ret;
  1262. }
  1263. gboolean
  1264. s3_curl_location_compat(void)
  1265. {
  1266. curl_version_info_data *info;
  1267. info = curl_version_info(CURLVERSION_NOW);
  1268. return info->version_num > 0x070a02;
  1269. }
  1270. gboolean
  1271. s3_bucket_location_compat(const char *bucket)
  1272. {
  1273. return !s3_regexec_wrap(&subdomain_regex, bucket, 0, NULL, 0);
  1274. }
  1275. S3Handle *
  1276. s3_open(const char *access_key,
  1277. const char *secret_key,
  1278. const char *host,
  1279. const char *service_path,
  1280. const gboolean use_subdomain,
  1281. const char *user_token,
  1282. const char *bucket_location,
  1283. const char *storage_class,
  1284. const char *ca_info
  1285. )
  1286. {
  1287. S3Handle *hdl;
  1288. hdl = g_new0(S3Handle, 1);
  1289. if (!hdl) goto error;
  1290. hdl->verbose = FALSE;
  1291. hdl->use_ssl = s3_curl_supports_ssl();
  1292. g_assert(access_key);
  1293. hdl->access_key = g_strdup(access_key);
  1294. g_assert(secret_key);
  1295. hdl->secret_key = g_strdup(secret_key);
  1296. /* NULL is okay */
  1297. hdl->user_token = g_strdup(user_token);
  1298. /* NULL is okay */
  1299. hdl->bucket_location = g_strdup(bucket_location);
  1300. /* NULL is ok */
  1301. hdl->storage_class = g_strdup(storage_class);
  1302. /* NULL is okay */
  1303. hdl->ca_info = g_strdup(ca_info);
  1304. if (!is_non_empty_string(host))
  1305. host = "s3.amazonaws.com";
  1306. hdl->host = g_strdup(host);
  1307. hdl->use_subdomain = use_subdomain ||
  1308. (strcmp(host, "s3.amazonaws.com") == 0 &&
  1309. is_non_empty_string(hdl->bucket_location));
  1310. if (service_path) {
  1311. if (service_path[0] != '/')
  1312. hdl->service_path = g_strdup_printf("/%s", service_path);
  1313. else
  1314. hdl->service_path = g_strdup(service_path);
  1315. } else {
  1316. hdl->service_path = NULL;
  1317. }
  1318. hdl->curl = curl_easy_init();
  1319. if (!hdl->curl) goto error;
  1320. return hdl;
  1321. error:
  1322. s3_free(hdl);
  1323. return NULL;
  1324. }
  1325. void
  1326. s3_free(S3Handle *hdl)
  1327. {
  1328. s3_reset(hdl);
  1329. if (hdl) {
  1330. g_free(hdl->access_key);
  1331. g_free(hdl->secret_key);
  1332. if (hdl->user_token) g_free(hdl->user_token);
  1333. if (hdl->bucket_location) g_free(hdl->bucket_location);
  1334. if (hdl->storage_class) g_free(hdl->storage_class);
  1335. if (hdl->host) g_free(hdl->host);
  1336. if (hdl->service_path) g_free(hdl->service_path);
  1337. if (hdl->curl) curl_easy_cleanup(hdl->curl);
  1338. g_free(hdl);
  1339. }
  1340. }
  1341. void
  1342. s3_reset(S3Handle *hdl)
  1343. {
  1344. if (hdl) {
  1345. /* We don't call curl_easy_reset here, because doing that in curl
  1346. * < 7.16 blanks the default CA certificate path, and there's no way
  1347. * to get it back. */
  1348. if (hdl->last_message) {
  1349. g_free(hdl->last_message);
  1350. hdl->last_message = NULL;
  1351. }
  1352. hdl->last_response_code = 0;
  1353. hdl->last_curl_code = 0;
  1354. hdl->last_s3_error_code = 0;
  1355. hdl->last_num_retries = 0;
  1356. if (hdl->last_response_body) {
  1357. g_free(hdl->last_response_body);
  1358. hdl->last_response_body = NULL;
  1359. }
  1360. hdl->last_response_body_size = 0;
  1361. }
  1362. }
  1363. void
  1364. s3_error(S3Handle *hdl,
  1365. const char **message,
  1366. guint *response_code,
  1367. s3_error_code_t *s3_error_code,
  1368. const char **s3_error_name,
  1369. CURLcode *curl_code,
  1370. guint *num_retries)
  1371. {
  1372. if (hdl) {
  1373. if (message) *message = hdl->last_message;
  1374. if (response_code) *response_code = hdl->last_response_code;
  1375. if (s3_error_code) *s3_error_code = hdl->last_s3_error_code;
  1376. if (s3_error_name) *s3_error_name = s3_error_name_from_code(hdl->last_s3_error_code);
  1377. if (curl_code) *curl_code = hdl->last_curl_code;
  1378. if (num_retries) *num_retries = hdl->last_num_retries;
  1379. } else {
  1380. /* no hdl? return something coherent, anyway */
  1381. if (message) *message = "NULL S3Hand…

Large files files are truncated, but you can click here to view the full file