From d218d5372dd6ed5a57f28dc6be520b952e2e1533 Mon Sep 17 00:00:00 2001 From: hadaq Date: Mon, 29 Nov 2010 14:00:04 +0000 Subject: [PATCH] Compare data on disks to the archived data on tape. Sergey. --- disks/archived_data.pl | 351 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100755 disks/archived_data.pl diff --git a/disks/archived_data.pl b/disks/archived_data.pl new file mode 100755 index 0000000..5f16778 --- /dev/null +++ b/disks/archived_data.pl @@ -0,0 +1,351 @@ +#!/usr/bin/perl -w + +use strict; +use Getopt::Long; +use Data::Dumper; +use File::stat; +use Time::Local; +use FileHandle; +use File::Basename; + +my $opt_help = 0; +my $opt_prefix = "be"; +my $opt_startDate; +my $opt_endDate; +my $opt_arch; +my $opt_output = "tape"; # tape|disk +my $opt_rm; + +GetOptions ('h|help' => \$opt_help, + 's|start=s' => \$opt_startDate, + 'e|end=s' => \$opt_endDate, + 'p|prefix=s' => \$opt_prefix, + 'a|arch=s' => \$opt_arch, + 'o|out=s' => \$opt_output, + 'r|rm=s' => \$opt_rm + ); + +if( $opt_help ) { + &help(); + exit(0); +} + +my %gstore_hash; +my $gstore_href = \%gstore_hash; +my %eb_hash; +my $eb_href = \%eb_hash; + +my $startSec = &date2sec($opt_startDate); +my $endSec = &date2sec($opt_endDate); + +if(defined $opt_rm){ + &rm_files(); +} +else{ + &read_hld_tape(); + &read_hld_disk(); + &cmp_files(); +} + +exit(0); + +########################### END OF MAIN ############################ + +sub help() +{ + print "\n"; + print << 'EOF'; +archived_data.pl + +This script prints two hld file lists: +1. The list of files which were archived already and can be deleted. +2. The list of files which were not archived yet. + +Usage: + + Command line: archived_data.pl + [-h|--help] : Show this help. + [-s|--start ] : Beginning of time interval. + [-t|--end ] : End of time interval. + [-p|--prefix ] : Archive only these prefixes. + [-a|--arch] : Name of archive. + [-o|--out ] : Print output format: + tape : files on tape|cache + disk : files on EB disks only + all : all files + [-r|--rm ] : File path with hld files to be removed + from the disks of EB. + +Examples: + + Check all hld files on tape from archive hadesoct10raw with prefix 'be': + archived_data.pl -a hadesoct10raw -p be -o tape + +EOF +} + +sub date2sec() +{ + my ($date_time) = @_; + + my $sec_epoch; + + return $sec_epoch unless( defined $date_time ); + + if( $date_time =~ /(\d{4})-(\d{2})-(\d{2})_(\d{2}):(\d{2}):(\d{2})/ ){ + + #- Correct to get proper format if needed + my $year = $1; + my $mon = $2 - 1; # 0..11 + my $mday = $3; # 1..31 + my $hour = $4; + my $min = $5; + my $sec = $6; + + #- Convert to Epoch seconds in a local time zone + $sec_epoch = timelocal($sec, $min, $hour, $mday, $mon, $year); + } + else{ + print "Wrong format: $date_time\nExit.\n"; + exit(0); + } + + return $sec_epoch; +} + +sub read_hld_tape() +{ + my $gstore_cmd = "gstore query \"*\" " . $opt_arch . " \"*\""; + + my @gstore_list = `$gstore_cmd`; + + foreach my $line (@gstore_list){ + my ($n1, $path, $user, $date, $time, $status, $size) = split(/\s+/, $line); + my ($name) = fileparse($path); + + if($name =~ /$opt_prefix\d+\.hld/){ + $gstore_href->{$name}->{'size'} = $size; + $gstore_href->{$name}->{'status'} = $status; + } + } +} + +sub read_hld_disk() +{ + my $eb_cmd = "ls -ltr /data*/data/" . $opt_prefix . "*.hld"; + #print "cmd: $eb_cmd\n"; + my @eb_list = `$eb_cmd`; + + foreach my $line (@eb_list){ + my ($mode, $n1, $user, $group, $size, $day, $month, $time, $path) = split(/\s+/, $line); + + #- Check time interval for the files on disks + if( defined $startSec && defined $endSec ){ + next unless(stat($path)->mtime > $startSec && stat($path)->mtime < $endSec); + } + + my ($name) = fileparse($path); + + if($name =~ /$opt_prefix\d+\.hld/){ + $eb_href->{$name}->{'size'} = $size; + $eb_href->{$name}->{'path'} = $path; + } + } +} + +sub cmp_files() +{ + my @tape_list; # file on tape + my @tape_diffsize_list; # on tape but different size + my @cach_list; # file in cache + my @cach_diffsize_list; # in cache but different size + + my %other_hash; # file with other status + my $other_href = \%other_hash; + my %other_diffsize_hash; # but different size + my $other_diffsize_href = \%other_diffsize_hash; + + my @disk_list; # file is only on EB disks + + foreach my $eb_file (sort keys %$eb_href){ + my $eb_size = $eb_href->{$eb_file}->{'size'}; + my $eb_path = $eb_href->{$eb_file}->{'path'}; + + #- Look only at the files above 1kByte + next if($eb_size < 1000); + + if( defined $gstore_href->{$eb_file} ){ + my $gstore_size = $gstore_href->{$eb_file}->{'size'}; + my $gstore_status = $gstore_href->{$eb_file}->{'status'}; + + if($gstore_size == $eb_size){ + if( $gstore_status eq "TAPE"){ + push(@tape_list, $eb_path); + } + elsif($gstore_status eq "CACHE"){ + push(@cach_list, $eb_path); + } + else{ + $other_href->{$eb_path} = $gstore_status; + } + } + else{ + if( $gstore_status eq "TAPE"){ + push(@tape_diffsize_list, $eb_path); + } + elsif($gstore_status eq "CACHE"){ + push(@cach_diffsize_list, $eb_path); + } + else{ + $other_diffsize_href->{$eb_path} = $gstore_status; + } + } + } + else{ + #- If the file is not on tape + + push(@disk_list, $eb_path); + } + } + + #- Print all the lists + if($opt_output eq "all" || $opt_output eq "tape"){ + my $file2rm = "/tmp/Files_on_TAPE_can_be_removed.txt"; + print "Files on TAPE ($file2rm):\n"; + my $fh = new FileHandle(">$file2rm"); + if(!$fh) { + my $txt = "\nError! Could not open file \"$file2rm\" for output. Exit.\n"; + print STDERR $txt; + exit(128); + } + foreach my $file (@tape_list){ + print "$file\n"; + print $fh "$file\n"; + } + $fh->close(); + print "\n\n"; + + print "Files on TAPE have different size:\n"; + foreach my $file (@tape_diffsize_list){ + print "$file\n"; + } + print "\n\n"; + + print "Files in CACHE:\n"; + foreach my $file (@cach_list){ + print "$file\n"; + } + print "\n\n"; + + print "Files in CACHE have different size:\n"; + foreach my $file (@cach_diffsize_list){ + print "$file\n"; + } + print "\n\n"; + + print "Files with other status:\n"; + foreach my $file (sort keys %$other_href){ + my $status = $other_href->{$file}; + print "$file $status\n"; + } + print "\n\n"; + + print "Files with other status and different size:\n"; + foreach my $file (sort keys %$other_diffsize_href){ + my $status = $other_diffsize_href->{$file}; + print "$file $status\n"; + } + print "\n\n"; + } + + if($opt_output eq "all" || $opt_output eq "disk"){ + print "Files on EB disks only:\n"; + foreach my $file (@disk_list){ + print "$file\n"; + } + print "\n\n"; + } +} + +sub rm_files() +{ + print "Remove all hld files from $opt_rm...\n"; + &askUser(); + + my $fh = new FileHandle("$opt_rm", "r"); + + &isItDefined($fh, $opt_rm); + + while(<$fh>){ + chomp($_); + my $cmd = "rm $_"; + print "cmd: $cmd\n"; + system($cmd); + } + + $fh->close(); +} + +sub isItDefined() +{ + my ($fh, $name) = @_; + + if(!$fh) { + my $txt = "\nError! Could not open file \'$name\'. Exit.\n"; + print STDERR $txt; + print $txt; + exit(128); + } + + return 0; +} + +sub askUser() +{ + my $answer = &promptUser("Continue?", "yes/no"); + if( $answer eq "no" || $answer eq "n" ){ + print "Exit.\n"; + exit(0); + } + else{ + print "Continue...\n"; + } +} + +sub promptUser { + + # two possible input arguments - $promptString, and $defaultValue + # make the input arguments local variables. + + my ($promptString,$defaultValue) = @_; + + # if there is a default value, use the first print statement; if + # no default is provided, print the second string. + + if ($defaultValue) { + print $promptString, "[", $defaultValue, "]: "; + } else { + print $promptString, ": "; + } + + $| = 1; # force a flush after our print + my $input = ; # get the input from STDIN (presumably the keyboard) + + # remove the newline character from the end of the input the user gave us + + chomp($input); + + # if we had a $default value, and the user gave us input, then + # return the input; if we had a default, and they gave us no + # no input, return the $defaultValue. + # + # if we did not have a default value, then just return whatever + # the user gave us. if they just hit the key, + # the calling routine will have to deal with that. + + if ("$defaultValue") { + return $input ? $input : $defaultValue; # return $input if it has a value + } else { + return $input; + } +} -- 2.43.0