/bagman/validator.go

https://github.com/APTrust/bagman · Go · 180 lines · 137 code · 15 blank · 28 comment · 41 complexity · 1ea123cc054aa22dd2306c1fdb2f0f10 MD5 · raw file

  1. // validator.go contains functions to allow partner institutions
  2. // to validate bags before they send them. This code is run by
  3. // users at our partner institutions, on their desktops and laptops.
  4. // It's not intended to run on APTrust servers.
  5. package bagman
  6. import (
  7. "fmt"
  8. "os"
  9. "path/filepath"
  10. "regexp"
  11. "strings"
  12. )
  13. const (
  14. // Item to validate is a tar file
  15. VAL_TYPE_TAR = 1
  16. // Item to validate is a directory
  17. VAL_TYPE_DIR = 2
  18. // Item is something we can't validate
  19. VAL_TYPE_ERR = 3
  20. )
  21. type Validator struct {
  22. PathToFile string
  23. TarResult *TarResult
  24. BagReadResult *BagReadResult
  25. ErrorMessage string
  26. }
  27. // Returns a new Validator suitable for partners to validate
  28. // bags before sending. For server-side use, use IngestHelper.
  29. func NewValidator(pathToFile string) (*Validator, error) {
  30. absPath, err := filepath.Abs(pathToFile)
  31. if err != nil {
  32. return nil, fmt.Errorf("Cannot determine absolute path from '%s': %v",
  33. pathToFile, err)
  34. }
  35. return &Validator{
  36. PathToFile: absPath,
  37. }, nil
  38. }
  39. func (validator *Validator) IsValid() (bool) {
  40. domain, err := validator.InstitutionDomain()
  41. if err != nil {
  42. validator.ErrorMessage = err.Error()
  43. return false
  44. }
  45. if validator.LooksLikeMultipart() && !validator.IsValidMultipartName() {
  46. validator.ErrorMessage = "This looks like a multipart bag, but it does not conform to " +
  47. "naming conventions. Multipart bags should end with a suffix like '.b01.of12.tar'. " +
  48. "See the APTrust BagIt specification for details."
  49. return false
  50. }
  51. fileType, err := validator.FileType()
  52. if err != nil {
  53. validator.ErrorMessage = err.Error()
  54. return false
  55. }
  56. untarredDirExisted := FileExists(validator.UntarredDir())
  57. weUntarredThisFile := false
  58. if fileType == VAL_TYPE_TAR {
  59. validator.TarResult = Untar(validator.PathToFile, domain,
  60. validator.TarFileName(), false)
  61. if validator.TarResult.ErrorMessage != "" {
  62. if untarredDirExisted == false {
  63. // Untar failed, but we just created a directory and possibly
  64. // several files inside it. We don't want to leave a bunch of
  65. // trash hanging around, so clean up!
  66. os.RemoveAll(validator.UntarredDir())
  67. }
  68. validator.ErrorMessage = validator.TarResult.ErrorMessage
  69. return false
  70. }
  71. weUntarredThisFile = true
  72. }
  73. validator.BagReadResult = ReadBag(validator.UntarredDir())
  74. if weUntarredThisFile == true && untarredDirExisted == false {
  75. // Clean up the files we untarred.
  76. os.RemoveAll(validator.UntarredDir())
  77. }
  78. if validator.BagReadResult.ErrorMessage != "" {
  79. validator.ErrorMessage = validator.BagReadResult.ErrorMessage
  80. // Augment some of the more vague messages from the underlying
  81. // bag parsing library.
  82. if strings.Contains(validator.BagReadResult.ErrorMessage, "Unable to parse a manifest") {
  83. validator.ErrorMessage = "Required checksum file manifest-md5.txt is missing."
  84. } else if strings.Contains(validator.BagReadResult.ErrorMessage, "Payload directory does not exist") {
  85. validator.ErrorMessage = "Bag is missing the data directory, which should contain the payload files."
  86. }
  87. return false
  88. }
  89. return true
  90. }
  91. // Returns the path to the directory that holds the untarred
  92. // contents of the bag.
  93. func (validator *Validator) UntarredDir() (string) {
  94. re := regexp.MustCompile("\\.tar$")
  95. return re.ReplaceAllString(validator.PathToFile, "")
  96. }
  97. // Get the instution domain from the file, or return a descriptive
  98. // error if the file doesn't include the institution name.
  99. func (validator *Validator) InstitutionDomain() (string, error) {
  100. if validator.PathToFile == "" {
  101. return "", fmt.Errorf("You must specify the tar file or directory to validate.")
  102. }
  103. base := filepath.Base(validator.PathToFile)
  104. parts := strings.Split(base, ".")
  105. if len(parts) < 3 || len(parts) == 3 && parts[2] == "tar" {
  106. message := fmt.Sprintf(
  107. "Bag name '%s' should start with your institution's " +
  108. "domain name,\n followed by a period and the object name.\n" +
  109. "For example, 'university.edu.my_archive.tar' " +
  110. "for a tar file,\n" +
  111. "or 'university.edu.my_archive' for a directory.",
  112. base)
  113. return "", fmt.Errorf(message)
  114. }
  115. instName := fmt.Sprintf("%s.%s", parts[0], parts[1])
  116. return instName, nil
  117. }
  118. // Returns true if the bag name looks like a multipart bag.
  119. // This catches both correct multipart bag names and some
  120. // common incorrect variants, such as "bag1of2"
  121. func (validator *Validator) LooksLikeMultipart() (bool) {
  122. reMisnamedMultiPartBag := regexp.MustCompile(`\.b\d+\.?of\d+$|\.bag\d+\.?of\d+$`)
  123. return reMisnamedMultiPartBag.MatchString(validator.UntarredDir())
  124. }
  125. // Returns true if the bag has a valid multipart bag name.
  126. func (validator *Validator) IsValidMultipartName() (bool) {
  127. _, err := validator.InstitutionDomain()
  128. return err == nil && MultipartSuffix.MatchString(validator.UntarredDir())
  129. }
  130. // Returns the name of the tar file that the user wants to validate.
  131. // If this is a directory, returns the name of the directory with a
  132. // .tar suffix.
  133. func (validator *Validator) TarFileName() (string) {
  134. base := filepath.Base(validator.PathToFile)
  135. if !strings.HasSuffix(base, ".tar") {
  136. base += ".tar"
  137. }
  138. return base
  139. }
  140. // Returns either VAL_TYPE_TAR, VAL_TYPE_DIR or VAL_TYPE_ERR
  141. // to describe what type of item the user wants to validate.
  142. func (validator *Validator) FileType() (int, error) {
  143. if validator.PathToFile == "" {
  144. return VAL_TYPE_ERR, fmt.Errorf("You must specify the tar file or directory to validate.")
  145. }
  146. f, err := os.Open(validator.PathToFile)
  147. if err != nil {
  148. return VAL_TYPE_ERR, err
  149. }
  150. fileInfo, err := f.Stat()
  151. if err != nil {
  152. return VAL_TYPE_ERR, err
  153. }
  154. mode := fileInfo.Mode()
  155. if mode.IsDir() {
  156. return VAL_TYPE_DIR, nil
  157. }
  158. base := filepath.Base(validator.PathToFile)
  159. if strings.HasSuffix(base, ".tar") {
  160. return VAL_TYPE_TAR, nil
  161. }
  162. return VAL_TYPE_ERR, fmt.Errorf(
  163. "Bag '%s' must be either a tar file or a directory",
  164. validator.PathToFile)
  165. }