/backup-manager-purge
Perl | 337 lines | 160 code | 42 blank | 135 comment | 21 complexity | 0f0af2429fa7bb24e4ee3c4495fc6afd MD5 | raw file
Possible License(s): GPL-2.0
1#!/usr/bin/perl 2# Copyright Š 2005-2016 The Backup Manager Authors 3# 4# See the AUTHORS file for details. 5# 6# This program is free software; you can redistribute it and/or 7# modify it under the terms of the GNU General Public License 8# as published by the Free Software Foundation; either version 2 9# of the License, or (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, write to the Free Software 18# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 20use strict; 21use warnings; 22 23=pod 24 25=head1 NAME 26 27backup-manager-purge - backup-manager's wrapper for outdating files 28 29=head1 SYNOPSIS 30 31backup-manager-purge [TTL] <options> 32 33=head1 DESCRIPTION 34 35B<backup-manager-purge> is the only authorized entity that can say if an archive 36should be purged or not. Any tasks used by backup-manager may have to know if 37an archive is deprecated (eg: the purging phase of an upload method). This tool 38is here to fulfill that need. 39 40Given a I<time to live (TTL)> and a list of archives, B<backup-manager-purge> 41will return another list of archives, corresponding to the ones that are 42outdated by the TTL. 43 44=head1 REQUIRED ARGS 45 46=over 4 47 48=item B<--ttl=>I<time-to-live> 49 50Specify the time to live (in days) for the archives. Any archive that is older 51than I<ttl> days will be outdated. 52 53=back 54 55=head1 OPTIONAL ARGS 56 57=over 4 58 59=item B<--files-from=>file 60 61A file containing a list of archives to parse, one archive per line. 62If this option is not used, STDIN will be used for catching the files to parse. 63 64=back 65 66=head1 RETURN 67 68B<backup-manager-purge> will return the list of outdated files on STDOUT, one 69file per line. 70 71=head1 ERROR CODES 72 73If an error occurs, it will print the error message on stderr and will exit with 74an error code greater than 0. 75 76Here are the possible error codes: 77 78=over 4 79 80=item bad command line (wrong arguments) : 10 81 82=item internal error (should be reported as a bug) : 20 83 84=back 85 86=head1 SEE ALSO 87 88backup-manager(8) backup-manager-upload(8) 89 90=head1 AUTHORS 91 92Concept and design by Alexis Sukrieh and Jan Metzger. 93 94=cut 95 96############################################################## 97# Uses 98############################################################## 99use BackupManager::Config; 100use BackupManager::Logger; 101use BackupManager::Dialog; 102use POSIX qw(strftime); 103use File::Basename; 104use Data::Dumper; 105 106############################################################## 107# Constants 108############################################################## 109use constant E_SUCCESS => 0; 110use constant E_INVALID => 10; 111use constant E_INTERNAL => 20; 112use constant TRUE => 1; 113use constant FALSE => 0; 114use constant DIALOG_VERBOSE => 0; 115use constant MSG_INTERNAL => "Internal system error, please report the bug."; 116 117############################################################## 118# Global variables 119############################################################## 120my $g_ttl = undef; 121my $g_filelist = undef; 122my @g_archives = (); 123my @g_outdated = (); 124my $g_fh = *STDIN; 125my $g_rh_archives = {}; 126 127############################################################## 128# Command line parsing 129############################################################## 130BackupManager::Config::getopt("$0 -ttl=<TTL> --files-from=<FILE>\n 131--ttl|-t: the time to live for outdating files 132--files-from|-f: a file that contains the list of archives to process", 133'ttl|t=s' => \$g_ttl, 134'files-from|f=s' => \$g_filelist, 135); 136 137############################################################## 138# Subs 139############################################################## 140 141# Takes an archive an returns all meta-data contained in its name 142sub parse_archive ($) 143{ 144 my ($archive) = @_; 145 unless (defined $archive) { 146 print_error MSG_INTERNAL; 147 exit E_INTERNAL; 148 } 149 my ($prefix, $name, $date, $master, $filetype); 150 $archive = basename ($archive); 151 152 if ($archive =~ m/^\s*($ENV{BM_ARCHIVE_PREFIX})-?(\S+)?\.?(\d{8})\.(master\.)?(\S+)\s*$/) { 153 ($prefix, $name, $date, $master, $filetype) = ($1, $2, $3, $4, $5); 154 $master = $master ? 1 : 0; 155 $name = "$prefix-md5" if $filetype eq 'md5' and not $name; 156 } 157 158 # The archive pattern 159 elsif ($archive =~ /^\s*([^-]+)-(\S+)\.(\d{8})\.(\S+)\s*$/) { 160 $prefix = $1; 161 $name = $2; 162 $date = $3; 163 my $suffix = $4; 164 if ($suffix =~ /master\.(\S+)/) { 165 $master = 1; 166 $filetype = $1; 167 } 168 elsif ($suffix =~ /\.?(.+)/) { 169 $master = 0; 170 $filetype = $1; 171 } 172 } 173 174 # The md5 file pattern 175 elsif ($archive =~ /^\s*([^-]+)-(\d{8})\.md5\s*$/) { 176 $prefix = $1; 177 $name = "$prefix-md5"; 178 $date = $2; 179 $filetype = "md5"; 180 $master = 0; 181 } 182 183 # Unknown pattern 184 else { 185 return undef; 186 } 187 188 return { prefix => $prefix, 189 name => $name, 190 date => $date, 191 master => $master, 192 filetype => $filetype}; 193} 194 195# Takes a file handle and an array ref, parse the file's content 196# and store in the array exiting filenames. 197sub read_archives($$) 198{ 199 my ($ra_archives, $fh) = @_; 200 201 my $archive = ""; 202 while (<$fh>) { 203 chomp(); 204 205 if (/^\s*(\S+)\s*$/) { 206 $archive = $1; 207 } 208 209 my $rh_data = parse_archive ($archive); 210 next unless defined $rh_data; 211 next unless defined $rh_data->{date}; 212 213 if ($rh_data->{master}) { 214 $g_rh_archives->{$rh_data->{name}}{pathByDateMasters}{$rh_data->{date}} = $archive; 215 } 216 $g_rh_archives->{$rh_data->{name}}{pathByDate}{$rh_data->{date}} = $archive; 217 $g_rh_archives->{dataByPath}{$archive} = $rh_data; 218 219 push @{$ra_archives}, $archive; 220 } 221} 222 223# Takes two array refs. Reads from the first one the list of archives 224# to process, and push in the second one the outdated archives. 225sub outdate_archives($$) 226{ 227 my ($ra_archives, $ra_outdated) = @_; 228 unless (defined $ra_archives and 229 defined $ra_outdated) { 230 exit E_INTERNAL; 231 } 232 233 my $purge_date = strftime ('%Y%m%d', 234 localtime(time() - $g_ttl * 24 * 3600)); 235 print_info "Outdating archives made before $purge_date"; 236 237 my %outdated = (); # set of outdated archives. Will be converted to a list 238 # at the end of this function 239 my %seen = (); 240 my $outdate_master_notmaster = sub 241 { 242 my $do_master = shift; 243 244 245 ARCHIVE_LOOP: 246 foreach my $archive (sort @{$ra_archives}) { 247 my $data = $g_rh_archives->{dataByPath}{$archive}; 248 next unless defined $data; 249 next unless defined $data->{date}; 250 251 # if the date of the archive is older than $purge_date, we may have to outdate it 252 # unless, nothing to do for that archive. 253 next if ($data->{date} > $purge_date); 254 255 # We can outdate a master only if a younger master exists 256 if ($data->{master} && $do_master) { 257 next if $seen{$archive}; 258 $seen{$archive} = 1; 259 260 my $pathByDateMasters = $g_rh_archives->{$data->{name}}{pathByDateMasters}; 261 foreach my $master_date ( keys %$pathByDateMasters) { 262 if ($master_date > $data->{date}) { 263 $outdated{$archive} = 1; 264 last; 265 } 266 } 267 } 268 269 # here the archive is deprecated, its date is < to $purge_date 270 if (!$data->{master} && !$do_master) { 271 272 next if $seen{$archive}; 273 $seen{$archive} = 1; 274 275 # An incremental archive should not be deleted if its master is 276 # still around. At this point I looked through all the masters 277 # and I know which ones I'm keeping. Any archive younger then 278 # the oldest master is kept 279 my $pathByDateMasters = $g_rh_archives->{$data->{name}}{pathByDateMasters}; 280 foreach my $master_date ( keys %$pathByDateMasters) { 281 if ($master_date < $data->{date} && !$outdated{$pathByDateMasters->{$master_date}}) { 282 # I found an older master that I decided to keep. This 283 # archive should thus be kept as well 284 next ARCHIVE_LOOP; 285 } 286 } 287 288 # if BM_ARCHIVE_STRICTPURGE is true, we can only purge 289 # an archive prefixed with BM_ARCHIVE_PREFIX 290 next if (($ENV{BM_ARCHIVE_STRICTPURGE} eq "true") and 291 ($data->{prefix} ne $ENV{BM_ARCHIVE_PREFIX})); 292 293 # now, we're sure we can outdate the archive 294 $outdated{$archive} = 1; 295 } 296 } 297 }; 298 299 300 $outdate_master_notmaster->(1); # masters first 301 $outdate_master_notmaster->(0); # then the others 302 303 push @{$ra_outdated}, sort keys %outdated; 304} 305 306############################################################## 307# Main 308############################################################## 309 310# Init 311init_dialog (DIALOG_VERBOSE); 312 313# Args check 314unless (defined $g_ttl) { 315 print_error "No TTL given"; 316 exit E_INVALID; 317} 318 319# In 320if (defined $g_filelist and -f $g_filelist) { 321 print_info "Reading archives from $g_filelist"; 322 open $g_fh, $g_filelist or die "Unable to open $g_filelist"; 323} 324else { 325 print_info "Reading archives from STDIN"; 326} 327read_archives (\@g_archives, $g_fh); 328 329# Process 330outdate_archives (\@g_archives, \@g_outdated); 331 332# Out 333foreach my $archive (@g_outdated) { 334 print "$archive\n"; 335} 336 337exit E_SUCCESS;