/src/server/pfs/s3/multipart.go

https://github.com/pachyderm/pachyderm · Go · 401 lines · 319 code · 76 blank · 6 comment · 113 complexity · 256f8ceef2c2fa0fa6a4f26d9f0fd9bb MD5 · raw file

  1. package s3
  2. import (
  3. "fmt"
  4. "io"
  5. "net/http"
  6. "path"
  7. "regexp"
  8. "strconv"
  9. "strings"
  10. "github.com/gogo/protobuf/types"
  11. "github.com/pachyderm/pachyderm/src/client"
  12. pfsClient "github.com/pachyderm/pachyderm/src/client/pfs"
  13. "github.com/pachyderm/pachyderm/src/client/pkg/errors"
  14. pfsServer "github.com/pachyderm/pachyderm/src/server/pfs"
  15. "github.com/pachyderm/pachyderm/src/server/pkg/errutil"
  16. "github.com/pachyderm/pachyderm/src/server/pkg/uuid"
  17. "github.com/pachyderm/s2"
  18. )
  19. var multipartChunkPathMatcher = regexp.MustCompile(`([^/]+)/([^/]+)/(.+)/([^/]+)/(\d+)`)
  20. var multipartKeepPathMatcher = regexp.MustCompile(`([^/]+)/([^/]+)/(.+)/([^/]+)/\.keep`)
  21. func multipartChunkArgs(path string) (repo string, branch string, key string, uploadID string, partNumber int, err error) {
  22. match := multipartChunkPathMatcher.FindStringSubmatch(path)
  23. if len(match) == 0 {
  24. err = errors.New("invalid file path found in multipath bucket")
  25. return
  26. }
  27. repo = match[1]
  28. branch = match[2]
  29. key = match[3]
  30. uploadID = match[4]
  31. partNumber, err = strconv.Atoi(match[5])
  32. if err != nil {
  33. err = errors.Wrapf(err, "invalid file path found in multipath bucket")
  34. return
  35. }
  36. return
  37. }
  38. func multipartKeepArgs(path string) (repo string, branch string, key string, uploadID string, err error) {
  39. match := multipartKeepPathMatcher.FindStringSubmatch(path)
  40. if len(match) == 0 {
  41. err = errors.New("invalid file path found in multipath bucket")
  42. return
  43. }
  44. repo = match[1]
  45. branch = match[2]
  46. key = match[3]
  47. uploadID = match[4]
  48. return
  49. }
  50. func parentDirPath(repo, branch, key, uploadID string) string {
  51. return path.Join(repo, branch, key, uploadID)
  52. }
  53. func chunkPath(repo, branch, key, uploadID string, partNumber int) string {
  54. return path.Join(parentDirPath(repo, branch, key, uploadID), strconv.Itoa(partNumber))
  55. }
  56. func keepPath(repo, branch, key, uploadID string) string {
  57. return path.Join(parentDirPath(repo, branch, key, uploadID), ".keep")
  58. }
  59. func (c *controller) ensureRepo(pc *client.APIClient) error {
  60. _, err := pc.InspectBranch(c.repo, "master")
  61. if err != nil {
  62. err = pc.UpdateRepo(c.repo)
  63. if err != nil {
  64. return err
  65. }
  66. err = pc.CreateBranch(c.repo, "master", "", nil)
  67. if err != nil {
  68. return err
  69. }
  70. }
  71. return nil
  72. }
  73. func (c *controller) ListMultipart(r *http.Request, bucketName, keyMarker, uploadIDMarker string, maxUploads int) (*s2.ListMultipartResult, error) {
  74. c.logger.Debugf("ListMultipart: bucketName=%+v, keyMarker=%+v, uploadIDMarker=%+v, maxUploads=%+v", bucketName, keyMarker, uploadIDMarker, maxUploads)
  75. pc, err := c.requestClient(r)
  76. if err != nil {
  77. return nil, err
  78. }
  79. if err = c.ensureRepo(pc); err != nil {
  80. return nil, err
  81. }
  82. bucket, err := c.driver.bucket(pc, r, bucketName)
  83. if err != nil {
  84. return nil, err
  85. }
  86. result := s2.ListMultipartResult{
  87. Uploads: []*s2.Upload{},
  88. }
  89. globPattern := path.Join(bucket.Repo, bucket.Commit, "*", "*", ".keep")
  90. err = pc.GlobFileF(c.repo, "master", globPattern, func(fileInfo *pfsClient.FileInfo) error {
  91. _, _, key, uploadID, err := multipartKeepArgs(fileInfo.File.Path)
  92. if err != nil {
  93. return nil
  94. }
  95. if key <= keyMarker || uploadID <= uploadIDMarker {
  96. return nil
  97. }
  98. if len(result.Uploads) >= maxUploads {
  99. if maxUploads > 0 {
  100. result.IsTruncated = true
  101. }
  102. return errutil.ErrBreak
  103. }
  104. timestamp, err := types.TimestampFromProto(fileInfo.Committed)
  105. if err != nil {
  106. return err
  107. }
  108. result.Uploads = append(result.Uploads, &s2.Upload{
  109. Key: key,
  110. UploadID: uploadID,
  111. Initiator: defaultUser,
  112. StorageClass: globalStorageClass,
  113. Initiated: timestamp,
  114. })
  115. return nil
  116. })
  117. return &result, err
  118. }
  119. func (c *controller) InitMultipart(r *http.Request, bucketName, key string) (string, error) {
  120. c.logger.Debugf("InitMultipart: bucketName=%+v, key=%+v", bucketName, key)
  121. pc, err := c.requestClient(r)
  122. if err != nil {
  123. return "", err
  124. }
  125. if err = c.ensureRepo(pc); err != nil {
  126. return "", err
  127. }
  128. bucket, err := c.driver.bucket(pc, r, bucketName)
  129. if err != nil {
  130. return "", err
  131. }
  132. bucketCaps, err := c.driver.bucketCapabilities(pc, r, bucket)
  133. if err != nil {
  134. return "", err
  135. }
  136. if !bucketCaps.writable {
  137. return "", s2.NotImplementedError(r)
  138. }
  139. uploadID := uuid.NewWithoutDashes()
  140. _, err = pc.PutFileOverwrite(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID), strings.NewReader(""), 0)
  141. if err != nil {
  142. return "", err
  143. }
  144. return uploadID, nil
  145. }
  146. func (c *controller) AbortMultipart(r *http.Request, bucketName, key, uploadID string) error {
  147. c.logger.Debugf("AbortMultipart: bucketName=%+v, key=%+v, uploadID=%+v", bucketName, key, uploadID)
  148. pc, err := c.requestClient(r)
  149. if err != nil {
  150. return err
  151. }
  152. if err = c.ensureRepo(pc); err != nil {
  153. return err
  154. }
  155. bucket, err := c.driver.bucket(pc, r, bucketName)
  156. if err != nil {
  157. return err
  158. }
  159. _, err = pc.InspectFile(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID))
  160. if err != nil {
  161. return s2.NoSuchUploadError(r)
  162. }
  163. err = pc.DeleteFile(c.repo, "master", parentDirPath(bucket.Repo, bucket.Commit, key, uploadID))
  164. if err != nil {
  165. return s2.InternalError(r, err)
  166. }
  167. return nil
  168. }
  169. func (c *controller) CompleteMultipart(r *http.Request, bucketName, key, uploadID string, parts []*s2.Part) (*s2.CompleteMultipartResult, error) {
  170. c.logger.Debugf("CompleteMultipart: bucketName=%+v, key=%+v, uploadID=%+v, parts=%+v", bucketName, key, uploadID, parts)
  171. pc, err := c.requestClient(r)
  172. if err != nil {
  173. return nil, err
  174. }
  175. if err = c.ensureRepo(pc); err != nil {
  176. return nil, err
  177. }
  178. bucket, err := c.driver.bucket(pc, r, bucketName)
  179. if err != nil {
  180. return nil, err
  181. }
  182. bucketCaps, err := c.driver.bucketCapabilities(pc, r, bucket)
  183. if err != nil {
  184. return nil, err
  185. }
  186. if !bucketCaps.writable {
  187. return nil, s2.NotImplementedError(r)
  188. }
  189. _, err = pc.InspectFile(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID))
  190. if err != nil {
  191. if pfsServer.IsFileNotFoundErr(err) {
  192. return nil, s2.NoSuchUploadError(r)
  193. }
  194. return nil, err
  195. }
  196. // check if the destination file already exists, and if so, delete it
  197. _, err = pc.InspectFile(bucket.Repo, bucket.Commit, key)
  198. if err != nil && !pfsServer.IsFileNotFoundErr(err) && !pfsServer.IsNoHeadErr(err) {
  199. return nil, err
  200. } else if err == nil {
  201. err = pc.DeleteFile(bucket.Repo, bucket.Commit, key)
  202. if err != nil {
  203. if errutil.IsWriteToOutputBranchError(err) {
  204. return nil, writeToOutputBranchError(r)
  205. }
  206. return nil, err
  207. }
  208. }
  209. for i, part := range parts {
  210. srcPath := chunkPath(bucket.Repo, bucket.Commit, key, uploadID, part.PartNumber)
  211. fileInfo, err := pc.InspectFile(c.repo, "master", srcPath)
  212. if err != nil {
  213. if pfsServer.IsFileNotFoundErr(err) {
  214. return nil, s2.InvalidPartError(r)
  215. }
  216. return nil, err
  217. }
  218. // Only verify the ETag when it's of the same length as PFS file
  219. // hashes. This is because s3 clients will generally use md5 for
  220. // ETags, and would otherwise fail.
  221. expectedETag := fmt.Sprintf("%x", fileInfo.Hash)
  222. if len(part.ETag) == len(expectedETag) && part.ETag != expectedETag {
  223. return nil, s2.InvalidPartError(r)
  224. }
  225. if i < len(parts)-1 && fileInfo.SizeBytes < 5*1024*1024 {
  226. // each part, except for the last, is expected to be at least 5mb
  227. // in s3
  228. return nil, s2.EntityTooSmallError(r)
  229. }
  230. err = pc.CopyFile(c.repo, "master", srcPath, bucket.Repo, bucket.Commit, key, false)
  231. if err != nil {
  232. if errutil.IsWriteToOutputBranchError(err) {
  233. return nil, writeToOutputBranchError(r)
  234. }
  235. return nil, err
  236. }
  237. }
  238. err = pc.DeleteFile(c.repo, "master", parentDirPath(bucket.Repo, bucket.Commit, key, uploadID))
  239. if err != nil {
  240. return nil, err
  241. }
  242. fileInfo, err := pc.InspectFile(bucket.Repo, bucket.Commit, key)
  243. if err != nil && !pfsServer.IsOutputCommitNotFinishedErr(err) {
  244. return nil, err
  245. }
  246. result := s2.CompleteMultipartResult{Location: globalLocation}
  247. if fileInfo != nil {
  248. result.ETag = fmt.Sprintf("%x", fileInfo.Hash)
  249. result.Version = fileInfo.File.Commit.ID
  250. }
  251. return &result, nil
  252. }
  253. func (c *controller) ListMultipartChunks(r *http.Request, bucketName, key, uploadID string, partNumberMarker, maxParts int) (*s2.ListMultipartChunksResult, error) {
  254. c.logger.Debugf("ListMultipartChunks: bucketName=%+v, key=%+v, uploadID=%+v, partNumberMarker=%+v, maxParts=%+v", bucketName, key, uploadID, partNumberMarker, maxParts)
  255. pc, err := c.requestClient(r)
  256. if err != nil {
  257. return nil, err
  258. }
  259. if err = c.ensureRepo(pc); err != nil {
  260. return nil, err
  261. }
  262. bucket, err := c.driver.bucket(pc, r, bucketName)
  263. if err != nil {
  264. return nil, err
  265. }
  266. result := s2.ListMultipartChunksResult{
  267. Initiator: &defaultUser,
  268. Owner: &defaultUser,
  269. StorageClass: globalStorageClass,
  270. Parts: []*s2.Part{},
  271. }
  272. globPattern := path.Join(parentDirPath(bucket.Repo, bucket.Commit, key, uploadID), "*")
  273. err = pc.GlobFileF(c.repo, "master", globPattern, func(fileInfo *pfsClient.FileInfo) error {
  274. _, _, _, _, partNumber, err := multipartChunkArgs(fileInfo.File.Path)
  275. if err != nil {
  276. return nil
  277. }
  278. if partNumber <= partNumberMarker {
  279. return nil
  280. }
  281. if len(result.Parts) >= maxParts {
  282. if maxParts > 0 {
  283. result.IsTruncated = true
  284. }
  285. return errutil.ErrBreak
  286. }
  287. result.Parts = append(result.Parts, &s2.Part{
  288. PartNumber: partNumber,
  289. ETag: fmt.Sprintf("%x", fileInfo.Hash),
  290. })
  291. return nil
  292. })
  293. return &result, err
  294. }
  295. func (c *controller) UploadMultipartChunk(r *http.Request, bucketName, key, uploadID string, partNumber int, reader io.Reader) (string, error) {
  296. c.logger.Debugf("UploadMultipartChunk: bucketName=%+v, key=%+v, uploadID=%+v partNumber=%+v", bucketName, key, uploadID, partNumber)
  297. pc, err := c.requestClient(r)
  298. if err != nil {
  299. return "", err
  300. }
  301. if err = c.ensureRepo(pc); err != nil {
  302. return "", err
  303. }
  304. bucket, err := c.driver.bucket(pc, r, bucketName)
  305. if err != nil {
  306. return "", err
  307. }
  308. _, err = pc.InspectFile(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID))
  309. if err != nil {
  310. if pfsServer.IsFileNotFoundErr(err) {
  311. return "", s2.NoSuchUploadError(r)
  312. }
  313. return "", err
  314. }
  315. path := chunkPath(bucket.Repo, bucket.Commit, key, uploadID, partNumber)
  316. _, err = pc.PutFileOverwrite(c.repo, "master", path, reader, 0)
  317. if err != nil {
  318. return "", err
  319. }
  320. fileInfo, err := pc.InspectFile(c.repo, "master", path)
  321. if err != nil {
  322. return "", err
  323. }
  324. return fmt.Sprintf("%x", fileInfo.Hash), nil
  325. }