// validator.go contains functions to allow partner institutions // to validate bags before they send them. This code is run by // users at our partner institutions, on their desktops and laptops. // It's not intended to run on APTrust servers. package bagman import ( "fmt" "os" "path/filepath" "regexp" "strings" ) const ( // Item to validate is a tar file VAL_TYPE_TAR = 1 // Item to validate is a directory VAL_TYPE_DIR = 2 // Item is something we can't validate VAL_TYPE_ERR = 3 ) type Validator struct { PathToFile string TarResult *TarResult BagReadResult *BagReadResult ErrorMessage string } // Returns a new Validator suitable for partners to validate // bags before sending. For server-side use, use IngestHelper. func NewValidator(pathToFile string) (*Validator, error) { absPath, err := filepath.Abs(pathToFile) if err != nil { return nil, fmt.Errorf("Cannot determine absolute path from '%s': %v", pathToFile, err) } return &Validator{ PathToFile: absPath, }, nil } func (validator *Validator) IsValid() (bool) { domain, err := validator.InstitutionDomain() if err != nil { validator.ErrorMessage = err.Error() return false } if validator.LooksLikeMultipart() && !validator.IsValidMultipartName() { validator.ErrorMessage = "This looks like a multipart bag, but it does not conform to " + "naming conventions. Multipart bags should end with a suffix like '.b01.of12.tar'. " + "See the APTrust BagIt specification for details." return false } fileType, err := validator.FileType() if err != nil { validator.ErrorMessage = err.Error() return false } untarredDirExisted := FileExists(validator.UntarredDir()) weUntarredThisFile := false if fileType == VAL_TYPE_TAR { validator.TarResult = Untar(validator.PathToFile, domain, validator.TarFileName(), false) if validator.TarResult.ErrorMessage != "" { if untarredDirExisted == false { // Untar failed, but we just created a directory and possibly // several files inside it. We don't want to leave a bunch of // trash hanging around, so clean up! os.RemoveAll(validator.UntarredDir()) } validator.ErrorMessage = validator.TarResult.ErrorMessage return false } weUntarredThisFile = true } validator.BagReadResult = ReadBag(validator.UntarredDir()) if weUntarredThisFile == true && untarredDirExisted == false { // Clean up the files we untarred. os.RemoveAll(validator.UntarredDir()) } if validator.BagReadResult.ErrorMessage != "" { validator.ErrorMessage = validator.BagReadResult.ErrorMessage // Augment some of the more vague messages from the underlying // bag parsing library. if strings.Contains(validator.BagReadResult.ErrorMessage, "Unable to parse a manifest") { validator.ErrorMessage = "Required checksum file manifest-md5.txt is missing." } else if strings.Contains(validator.BagReadResult.ErrorMessage, "Payload directory does not exist") { validator.ErrorMessage = "Bag is missing the data directory, which should contain the payload files." } return false } return true } // Returns the path to the directory that holds the untarred // contents of the bag. func (validator *Validator) UntarredDir() (string) { re := regexp.MustCompile("\\.tar$") return re.ReplaceAllString(validator.PathToFile, "") } // Get the instution domain from the file, or return a descriptive // error if the file doesn't include the institution name. func (validator *Validator) InstitutionDomain() (string, error) { if validator.PathToFile == "" { return "", fmt.Errorf("You must specify the tar file or directory to validate.") } base := filepath.Base(validator.PathToFile) parts := strings.Split(base, ".") if len(parts) < 3 || len(parts) == 3 && parts[2] == "tar" { message := fmt.Sprintf( "Bag name '%s' should start with your institution's " + "domain name,\n followed by a period and the object name.\n" + "For example, 'university.edu.my_archive.tar' " + "for a tar file,\n" + "or 'university.edu.my_archive' for a directory.", base) return "", fmt.Errorf(message) } instName := fmt.Sprintf("%s.%s", parts[0], parts[1]) return instName, nil } // Returns true if the bag name looks like a multipart bag. // This catches both correct multipart bag names and some // common incorrect variants, such as "bag1of2" func (validator *Validator) LooksLikeMultipart() (bool) { reMisnamedMultiPartBag := regexp.MustCompile(`\.b\d+\.?of\d+$|\.bag\d+\.?of\d+$`) return reMisnamedMultiPartBag.MatchString(validator.UntarredDir()) } // Returns true if the bag has a valid multipart bag name. func (validator *Validator) IsValidMultipartName() (bool) { _, err := validator.InstitutionDomain() return err == nil && MultipartSuffix.MatchString(validator.UntarredDir()) } // Returns the name of the tar file that the user wants to validate. // If this is a directory, returns the name of the directory with a // .tar suffix. func (validator *Validator) TarFileName() (string) { base := filepath.Base(validator.PathToFile) if !strings.HasSuffix(base, ".tar") { base += ".tar" } return base } // Returns either VAL_TYPE_TAR, VAL_TYPE_DIR or VAL_TYPE_ERR // to describe what type of item the user wants to validate. func (validator *Validator) FileType() (int, error) { if validator.PathToFile == "" { return VAL_TYPE_ERR, fmt.Errorf("You must specify the tar file or directory to validate.") } f, err := os.Open(validator.PathToFile) if err != nil { return VAL_TYPE_ERR, err } fileInfo, err := f.Stat() if err != nil { return VAL_TYPE_ERR, err } mode := fileInfo.Mode() if mode.IsDir() { return VAL_TYPE_DIR, nil } base := filepath.Base(validator.PathToFile) if strings.HasSuffix(base, ".tar") { return VAL_TYPE_TAR, nil } return VAL_TYPE_ERR, fmt.Errorf( "Bag '%s' must be either a tar file or a directory", validator.PathToFile) }