From 384066cb26913fd3ed19540af3f8a7b0cc34bab3 Mon Sep 17 00:00:00 2001 From: hadaq Date: Thu, 22 Jul 2010 16:03:42 +0000 Subject: [PATCH] Independent cleanup of disks. Sergey. --- disks/cleanup.pl | 260 ++++++++++++++++++++++++----------------------- 1 file changed, 134 insertions(+), 126 deletions(-) diff --git a/disks/cleanup.pl b/disks/cleanup.pl index a9b77ad..f258d5d 100755 --- a/disks/cleanup.pl +++ b/disks/cleanup.pl @@ -7,6 +7,7 @@ use threads; use threads::shared; use IO::Socket; use IO::Select; +use File::stat; # Assume that there are four (or less) EB processes running per server. # Each process writes to disks about 40 MB/s. @@ -25,19 +26,47 @@ use IO::Select; # 0.5 TB / 2 GB = 250 files to move # -my $threshold = 20; # free disk space in % +my $opt_help = 0; my $opt_test = 0; my $opt_verb = 0; +my $thr_warn = 7; # free disk space in % +my $thr_crit = 4; # free disk space in % +my $rm_persent = 5; # remove 5% of oldest files from total disk space (100GB for 2TB disk) +my $sleep = 300; # secs +my $opt_daemon = 0; GetOptions ('t|test' => \$opt_test, - 'v|verb' => \$opt_verb); + 'w|warn=s' => \$thr_warn, + 'c|crit=s' => \$thr_crit, + 'r|rm=s' => \$rm_persent, + 's|sleep=i' => \$sleep, + 'd|daemon' => \$opt_daemon, + 'v|verb' => \$opt_verb, + 'h|help' => \$opt_help); + +if($opt_help){ + &help(); + exit(0); +} my %disks_hash; # Hash of disks with their available free space my $disks_href = \%disks_hash; +#- Daemonize +if($opt_daemon){ + open(STDIN, '>/dev/null'); + open(STDOUT, '>/dev/null'); + open(STDERR, '>/dev/null'); +} + +#- POSIX signal handlers: see signal(7) or kill(1) for available signals +foreach my $signal ( qw(HUP INT QUIT ILL ABRT FPE SEGV TERM USR1 USR2) ){ + $SIG{$signal} = sub { &finishAndExit( $signal ); }; +} + +my $ExitCode : shared = -1; my $status : shared = "OK"; my @screenPID; - our $server_port = '50501'; our $protocol = 'tcp'; @@ -47,121 +76,127 @@ while(1){ &checkDisks(); &cleanup(); - sleep(600); # 10 minutes + sleep($sleep); # 5 minutes } exit(0); ########################### END OF MAIN ######################### -sub cleanup(){ +sub help() +{ + print "\n"; + print << 'EOF'; +cleanup.pl + + This script checks disk space of the disks of + the Event Builder and removes old hld files if + the used disk space exceedes the limit. + +Usage: + Command line: cleanup.pl + [-h|--help] Print this help. + [-v|--verb] More verbouse. + [-t|--test] Run in test mode. + [-w|--warn %] Minimum free disk space (%) for warning status. + [-c|--crit %] Minimum free disk space (%) for critical status. + [-r|--rm %] Amount of disk space (%) to cleaned up. + [-s|--sleep sec] Sleep time between cleanup actions. + +EOF +} - my $tot_space = 0; - my $free_space = 0; +sub checkDisks() +{ + print "Check disks...\n" if($opt_verb); - #- Loop over disk numbers - foreach my $num (1..22){ - $tot_space = $tot_space + $disks_href->{$num}->{'tot'}; - $free_space = $free_space + $disks_href->{$num}->{'free'}; - } + my @df_info = `df -m`; # in MBytes - my $free_all = int(100*$free_space/$tot_space); + foreach my $line (@df_info){ - $status = ""; + my ($fsys, $tot, $used, $avail, $pers, $mount) = split(/ +/, $line); - print "Total free disk space: $free_all\n" if( $opt_verb ); + chomp($mount); - if( $free_all < $threshold ){ - $status = "WARNING - cleaning up, total free disk space: $free_all"; - &rmData("/data*/data/*", 500); - } - else{ - $status = "OK - total free disk space: $free_all"; + #- Loop over disk numbers + foreach my $num (1..22){ + my $diskName = sprintf("/data%02d", $num); + + if($diskName eq $mount){ + #- Same free space for given '/dataxx' disk + $disks_href->{$num}->{'tot'} = $tot; + $disks_href->{$num}->{'free'} = $avail; + } + } } +} - # There are special data disks, where - # EB procs start to write their first files before EB procs - # got disk numbers from daq_disks process. - # The numbers of these disks are equal to the numbers of - # shared mem segments of EB procs. - # We have to take special care of cleaning these disks up. +sub cleanup() +{ + my $min_free_num = 1; + my $min_free_space = $disks_href->{1}->{'free'}; - my @eb_list; + foreach my $num (1..22){ + my $tot_space = $disks_href->{$num}->{'tot'}; # MBytes + my $free_space = $disks_href->{$num}->{'free'}; # MBytes + + if( $min_free_space > $free_space ){ + $min_free_space = $free_space; + $min_free_num = $num; + } - #- Get EB numbers from shared mem segment names - foreach my $num (1..16){ - my $shmem = "/dev/shm/daq_evtbuild" . $num . ".shm"; + #- Check if it is time to cleanup + my $persent = (100 * $free_space) / $tot_space; - #- If the shared memory segment exists - if( -e $shmem ){ - push(@eb_list, $num); - } + if( $persent < $thr_warn ){ + my $datapath = sprintf("/data%02d/data", $num); + my $nrOfMB2rm = $rm_persent * $tot_space / 100; # Number of MB to remove + &rmData($datapath, $nrOfMB2rm); + } } - #- Check free disk space on the disks with the numbers from @eb_list - foreach my $ebnum (@eb_list){ - my $free = $disks_href->{$ebnum}->{'free'}; - my $tot = $disks_href->{$ebnum}->{'tot'}; - - #- If free space is below 20% - if( 100 * $free / $tot < 20 ){ - my $path = sprintf("/data%02d/data/*", $ebnum); + my $datadisk = sprintf("/data%02d", $min_free_num); - &rmData("/data*/data/*", 100); - } + my $persent = int( (100 * $min_free_space) / $disks_href->{$min_free_num}->{'tot'} ); + + if( $persent < $thr_crit ){ + $status = "CRITICAL - $datadisk free space: $min_free_space MB ($persent%)"; + } + elsif( $persent < $thr_warn ){ + $status = "WARNING - $datadisk free space: $min_free_space MB ($persent%), cleaning up..."; } + else{ + $status = "OK - $datadisk free space: $min_free_space MB ($persent%)"; + } + + print "$status\n" if($opt_verb); } sub rmData() { #- Remove old data + my ($path, $nrOfMB2rm) = @_; - my ($path, $numOfFiles) = @_; + print "Remove $nrOfMB2rm MB of old data in $path\n" if($opt_verb); - my @data = glob($path); + my @data = glob("$path/*.hld"); - #- Get the files sorted by size. File with largest size comes first. + #- Get the files sorted by date. my @sorted_data = sort {-M $b <=> -M $a} @data; - my $file_counter = 0; + my $total_rm_size = 0; foreach my $hldfile (@sorted_data){ + my $cmd = "rm $hldfile"; - print "exe: $cmd\n" if( $opt_verb ); + #print "exe: $cmd, total RM size: $total_rm_size MB\n" if( $opt_verb ); #system($cmd); - if($file_counter >= $numOfFiles){ - last; - } - - $file_counter++; - - sleep(1); - } - - exit(0); -} - -sub checkDisks() -{ - - my @df_info = `df -m`; - - foreach my $line (@df_info){ + #- Convert to MBytes + $total_rm_size = $total_rm_size + int((stat($hldfile)->size) / 1024 / 1024); - my ($fsys, $tot, $used, $avail, $pers, $mount) = split(/ +/, $line); - - chomp($mount); - - #- Loop over disk numbers - foreach my $num (1..22){ - my $diskName = sprintf("/data%02d", $num); - - if($diskName eq $mount){ - #- Same free space for given '/dataxx' disk - $disks_href->{$num}->{'tot'} = $tot; - $disks_href->{$num}->{'free'} = $avail; - } + if($total_rm_size >= $nrOfMB2rm){ + last; } } } @@ -201,57 +236,30 @@ sub statusServer{ my $server_socket; } } } + + if( $ExitCode ne "-1" ){ + print "Exit status server thread.\n"; + close( $server_socket ); + return; + } } } -sub mvData() +sub finishAndExit() { - #- Move old data - # - # This is required for special data disks, where - # EB procs start to write their first files before EB procs - # got disk numbers from daq_disks process. - - my @eb_list; - - #- Get EB numbers from shared mem segment names - foreach my $num (1..16){ - my $shmem = "/dev/shm/daq_evtbuild" . $num . ".shm"; - - #- If the shared memory segment exists - if( -e $shmem ){ - push(@eb_list, $num); - } - } - - #- Check free disk space on the disks with the numbers from @eb_list - foreach my $ebnum (@eb_list){ - my $free = $disks_href->{$ebnum}->{'free'}; - my $tot = $disks_href->{$ebnum}->{'tot'}; - - #- If free space is below 20% - if( 100 * $free / $tot < 20 ){ - - my $diskName = sprintf("/data%02d", $ebnum); + # don't allow nested signal handling + return if ($ExitCode ne "-1"); - my @data = glob("$diskName/data/*"); + # this will stop the treads, too + $ExitCode = shift; - #- In this sorting, oldest file comes first - my @sorted_data = sort {-M $b <=> -M $a} @data; + print "cleanup.pl exited (signal/exit code: $ExitCode).\n"; - #- In this sorting, disk with largest free space comes first - my @disk_nums = sort { $disks_hash{$b}{'free'} cmp $disks_hash{$a}{'free'} } keys %$disks_href; + # wait until all threads ended - don't join the main thread or ourselves + foreach my $thread (threads->list()){ + $thread->join() + if ($thread->tid() && !threads::equal( $thread, threads->self() )); + } - #- Move 0.5 TB (250 files of 2 GB size) to other disks - my $i_file = 0; - my $i_disk = 0; - - foreach my $file (@sorted_data){ - - my $free_space = $disks_href->{$disk_nums[$i_disk]}->{'free'}; - - #print "empty_disk: $empty_disk, $fspace\n"; - } - } - } + exit(1); } -- 2.43.0