--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+use Getopt::Long;
+use Data::Dumper;
+use File::stat;
+use Time::Local;
+use FileHandle;
+use File::Basename;
+
+my $opt_help = 0;
+my $opt_prefix = "be";
+my $opt_startDate;
+my $opt_endDate;
+my $opt_arch;
+my $opt_output = "tape"; # tape|disk
+my $opt_rm;
+
+GetOptions ('h|help' => \$opt_help,
+ 's|start=s' => \$opt_startDate,
+ 'e|end=s' => \$opt_endDate,
+ 'p|prefix=s' => \$opt_prefix,
+ 'a|arch=s' => \$opt_arch,
+ 'o|out=s' => \$opt_output,
+ 'r|rm=s' => \$opt_rm
+ );
+
+if( $opt_help ) {
+ &help();
+ exit(0);
+}
+
+my %gstore_hash;
+my $gstore_href = \%gstore_hash;
+my %eb_hash;
+my $eb_href = \%eb_hash;
+
+my $startSec = &date2sec($opt_startDate);
+my $endSec = &date2sec($opt_endDate);
+
+if(defined $opt_rm){
+ &rm_files();
+}
+else{
+ &read_hld_tape();
+ &read_hld_disk();
+ &cmp_files();
+}
+
+exit(0);
+
+########################### END OF MAIN ############################
+
+sub help()
+{
+ print "\n";
+ print << 'EOF';
+archived_data.pl
+
+This script prints two hld file lists:
+1. The list of files which were archived already and can be deleted.
+2. The list of files which were not archived yet.
+
+Usage:
+
+ Command line: archived_data.pl
+ [-h|--help] : Show this help.
+ [-s|--start <date_time>] : Beginning of time interval.
+ [-t|--end <date_time>] : End of time interval.
+ [-p|--prefix <prefix>] : Archive only these prefixes.
+ [-a|--arch] : Name of archive.
+ [-o|--out <tape|disk|all>] : Print output format:
+ tape : files on tape|cache
+ disk : files on EB disks only
+ all : all files
+ [-r|--rm <file>] : File path with hld files to be removed
+ from the disks of EB.
+
+Examples:
+
+ Check all hld files on tape from archive hadesoct10raw with prefix 'be':
+ archived_data.pl -a hadesoct10raw -p be -o tape
+
+EOF
+}
+
+sub date2sec()
+{
+ my ($date_time) = @_;
+
+ my $sec_epoch;
+
+ return $sec_epoch unless( defined $date_time );
+
+ if( $date_time =~ /(\d{4})-(\d{2})-(\d{2})_(\d{2}):(\d{2}):(\d{2})/ ){
+
+ #- Correct to get proper format if needed
+ my $year = $1;
+ my $mon = $2 - 1; # 0..11
+ my $mday = $3; # 1..31
+ my $hour = $4;
+ my $min = $5;
+ my $sec = $6;
+
+ #- Convert to Epoch seconds in a local time zone
+ $sec_epoch = timelocal($sec, $min, $hour, $mday, $mon, $year);
+ }
+ else{
+ print "Wrong format: $date_time\nExit.\n";
+ exit(0);
+ }
+
+ return $sec_epoch;
+}
+
+sub read_hld_tape()
+{
+ my $gstore_cmd = "gstore query \"*\" " . $opt_arch . " \"*\"";
+
+ my @gstore_list = `$gstore_cmd`;
+
+ foreach my $line (@gstore_list){
+ my ($n1, $path, $user, $date, $time, $status, $size) = split(/\s+/, $line);
+ my ($name) = fileparse($path);
+
+ if($name =~ /$opt_prefix\d+\.hld/){
+ $gstore_href->{$name}->{'size'} = $size;
+ $gstore_href->{$name}->{'status'} = $status;
+ }
+ }
+}
+
+sub read_hld_disk()
+{
+ my $eb_cmd = "ls -ltr /data*/data/" . $opt_prefix . "*.hld";
+ #print "cmd: $eb_cmd\n";
+ my @eb_list = `$eb_cmd`;
+
+ foreach my $line (@eb_list){
+ my ($mode, $n1, $user, $group, $size, $day, $month, $time, $path) = split(/\s+/, $line);
+
+ #- Check time interval for the files on disks
+ if( defined $startSec && defined $endSec ){
+ next unless(stat($path)->mtime > $startSec && stat($path)->mtime < $endSec);
+ }
+
+ my ($name) = fileparse($path);
+
+ if($name =~ /$opt_prefix\d+\.hld/){
+ $eb_href->{$name}->{'size'} = $size;
+ $eb_href->{$name}->{'path'} = $path;
+ }
+ }
+}
+
+sub cmp_files()
+{
+ my @tape_list; # file on tape
+ my @tape_diffsize_list; # on tape but different size
+ my @cach_list; # file in cache
+ my @cach_diffsize_list; # in cache but different size
+
+ my %other_hash; # file with other status
+ my $other_href = \%other_hash;
+ my %other_diffsize_hash; # but different size
+ my $other_diffsize_href = \%other_diffsize_hash;
+
+ my @disk_list; # file is only on EB disks
+
+ foreach my $eb_file (sort keys %$eb_href){
+ my $eb_size = $eb_href->{$eb_file}->{'size'};
+ my $eb_path = $eb_href->{$eb_file}->{'path'};
+
+ #- Look only at the files above 1kByte
+ next if($eb_size < 1000);
+
+ if( defined $gstore_href->{$eb_file} ){
+ my $gstore_size = $gstore_href->{$eb_file}->{'size'};
+ my $gstore_status = $gstore_href->{$eb_file}->{'status'};
+
+ if($gstore_size == $eb_size){
+ if( $gstore_status eq "TAPE"){
+ push(@tape_list, $eb_path);
+ }
+ elsif($gstore_status eq "CACHE"){
+ push(@cach_list, $eb_path);
+ }
+ else{
+ $other_href->{$eb_path} = $gstore_status;
+ }
+ }
+ else{
+ if( $gstore_status eq "TAPE"){
+ push(@tape_diffsize_list, $eb_path);
+ }
+ elsif($gstore_status eq "CACHE"){
+ push(@cach_diffsize_list, $eb_path);
+ }
+ else{
+ $other_diffsize_href->{$eb_path} = $gstore_status;
+ }
+ }
+ }
+ else{
+ #- If the file is not on tape
+
+ push(@disk_list, $eb_path);
+ }
+ }
+
+ #- Print all the lists
+ if($opt_output eq "all" || $opt_output eq "tape"){
+ my $file2rm = "/tmp/Files_on_TAPE_can_be_removed.txt";
+ print "Files on TAPE ($file2rm):\n";
+ my $fh = new FileHandle(">$file2rm");
+ if(!$fh) {
+ my $txt = "\nError! Could not open file \"$file2rm\" for output. Exit.\n";
+ print STDERR $txt;
+ exit(128);
+ }
+ foreach my $file (@tape_list){
+ print "$file\n";
+ print $fh "$file\n";
+ }
+ $fh->close();
+ print "\n\n";
+
+ print "Files on TAPE have different size:\n";
+ foreach my $file (@tape_diffsize_list){
+ print "$file\n";
+ }
+ print "\n\n";
+
+ print "Files in CACHE:\n";
+ foreach my $file (@cach_list){
+ print "$file\n";
+ }
+ print "\n\n";
+
+ print "Files in CACHE have different size:\n";
+ foreach my $file (@cach_diffsize_list){
+ print "$file\n";
+ }
+ print "\n\n";
+
+ print "Files with other status:\n";
+ foreach my $file (sort keys %$other_href){
+ my $status = $other_href->{$file};
+ print "$file $status\n";
+ }
+ print "\n\n";
+
+ print "Files with other status and different size:\n";
+ foreach my $file (sort keys %$other_diffsize_href){
+ my $status = $other_diffsize_href->{$file};
+ print "$file $status\n";
+ }
+ print "\n\n";
+ }
+
+ if($opt_output eq "all" || $opt_output eq "disk"){
+ print "Files on EB disks only:\n";
+ foreach my $file (@disk_list){
+ print "$file\n";
+ }
+ print "\n\n";
+ }
+}
+
+sub rm_files()
+{
+ print "Remove all hld files from $opt_rm...\n";
+ &askUser();
+
+ my $fh = new FileHandle("$opt_rm", "r");
+
+ &isItDefined($fh, $opt_rm);
+
+ while(<$fh>){
+ chomp($_);
+ my $cmd = "rm $_";
+ print "cmd: $cmd\n";
+ system($cmd);
+ }
+
+ $fh->close();
+}
+
+sub isItDefined()
+{
+ my ($fh, $name) = @_;
+
+ if(!$fh) {
+ my $txt = "\nError! Could not open file \'$name\'. Exit.\n";
+ print STDERR $txt;
+ print $txt;
+ exit(128);
+ }
+
+ return 0;
+}
+
+sub askUser()
+{
+ my $answer = &promptUser("Continue?", "yes/no");
+ if( $answer eq "no" || $answer eq "n" ){
+ print "Exit.\n";
+ exit(0);
+ }
+ else{
+ print "Continue...\n";
+ }
+}
+
+sub promptUser {
+
+ # two possible input arguments - $promptString, and $defaultValue
+ # make the input arguments local variables.
+
+ my ($promptString,$defaultValue) = @_;
+
+ # if there is a default value, use the first print statement; if
+ # no default is provided, print the second string.
+
+ if ($defaultValue) {
+ print $promptString, "[", $defaultValue, "]: ";
+ } else {
+ print $promptString, ": ";
+ }
+
+ $| = 1; # force a flush after our print
+ my $input = <STDIN>; # get the input from STDIN (presumably the keyboard)
+
+ # remove the newline character from the end of the input the user gave us
+
+ chomp($input);
+
+ # if we had a $default value, and the user gave us input, then
+ # return the input; if we had a default, and they gave us no
+ # no input, return the $defaultValue.
+ #
+ # if we did not have a default value, then just return whatever
+ # the user gave us. if they just hit the <enter> key,
+ # the calling routine will have to deal with that.
+
+ if ("$defaultValue") {
+ return $input ? $input : $defaultValue; # return $input if it has a value
+ } else {
+ return $input;
+ }
+}