#!/usr/bin/perl -w # # watchdog - Check disk space and load, send mails if specified numbers # are exceeded. # # Author: Jochen Wiedmann # Am Eisteich 9 # 72555 Metzingen # Germany # # E-Mail: joe@ispsoft.de # ############################################################################ use strict; # # Configurable section # my $MAX_LOAD = 3; # Send mail, if this load is exceeded my $MAX_DISC = 80; # Send mail, if a partition's usage in percent # exceeds this number my $MAX_DISC_INC = 10; # Send mail, if a partitions capacity raises # this number of percents between two checks my $ADMIN = 'root'; # Send mail to this address my $HOST = 'monitor.wuestenrot.de'; # Local host name ############################################################################ # # Features: # # - Mail will be sent only once for a specified event until the # event happens to be fixed. # # These features depend on the existence of the following cache file: # ############################################################################ my $CACHE_FILE = $^O eq 'linux' ? "/var/log/watchdog.log" : $^O eq 'sco3.2v5.0' ? '/var/adm/watchdog.log' : die "Unknown OS: $^O"; # Command to use for sending mail # Must read the mail body from stdin. my $MAIL = (($^O eq 'linux' || $^O eq 'sco3.2v5.0') ? '/usr/bin/mail -s $subject $to' : die "Unknown OS: $^O"); # Command to determine disk usage; must print to stdout my $DF = ($^O eq 'linux' ? '/bin/df -vk' : $^O eq 'sco3.2v5.0' ? '/bin/df -Bk' : die "Unknown OS: $^O"); # Command to determine inode usage; must print to stdout my $DFI = ($^O eq 'linux' ? '/bin/df -i' : $^O eq 'sco3.2v5.0' ? '/bin/df -I' : die "Unknown OS: $^O"); # Command to determine the load; must print to stdout my $UPTIME = (($^O eq 'linux' || $^O eq 'sco3.2v5.0') ? '/usr/bin/uptime' : die "Unknown OS: $^O"); my $USE_LOCKS = ($^O eq 'linux' ? 1 : $^O eq 'sco3.2v5.0' ? 0 : die "Unknown OS: $^O"); # If you want to specify a different capacity limit for some drive, you # can do it here. By default $MAX_DISC will be used. my %CAPACITIES = ( # Example: # '/dev/hda1' => 75, # '/dev/hda2' => 70 ); use vars qw($debug $verbose); ############################################################################ # # We use *no* external modules here. # This command will be executed from within cron, thus it should be # small and not use much resources. # ############################################################################ # This code stolen from the "Symbol" module. See "perldoc Symbol". package Symbol; { my $genpkg = "Symbol::"; my $genseq = 0; sub gensym () { my $name = "GEN" . $genseq++; no strict 'refs'; my $ref = \*{$genpkg . $name}; delete $$genpkg{$name}; $ref; } } # This code similar to Data::Dumper. We save a complex hash ref # { var1 => 'val1', # var2 => 'val2', # var3 => { var4 => 'val4', # var5 => 'val5' } # } # into the following format: # var1=val1 # var2=val2 # var3__var4=val4 # var3__var5=val5 # package Dump; sub _new { my $proto = shift; my $ref = shift; my $prefix = shift; my $dump = ''; while (my($var, $val) = each %$ref) { if (ref($val)) { $dump .= $proto->_new($val, $prefix ? "$prefix\__$var" : $var); } else { $dump .= "$prefix\__$var=$val\n"; } } $dump; } sub new { my $proto = shift; my $ref = shift; my $dump = $proto->_new($ref, ''); my $self = \$dump; bless($self, (ref($proto) || $proto)); } sub Dump { my $str = shift; $$str } sub Read { my $proto = shift; my $file = shift; my $ref = {}; my $num = 0; my $fh = Symbol::gensym(); if (!open($fh, "<$file")) { print STDERR "Failed to open file $file: $!\n"; return $ref; } while(defined(my $line = <$fh>)) { ++$num; # Ignore comments and empty lines next if $line =~ /^\s*$/ || $line =~ /^\s*\#/; if (my($var, $val) = ($line =~ /^(\w+)=(.*)/)) { my $r = $ref; my @vars = split(/__/, $var); print "Dump::Read: Setting ", join("->", @vars), " to $val.\n" if $main::verbose; while (defined(my $v = shift @vars)) { if (@vars) { $r->{$v} ||= {}; $r = $r->{$v}; } else { $r->{$v} = $val; } } } else { print STDERR "Invalid line $num in file $file.\n"; } } $ref; } package main; sub Mail ($$;$) { my($subject, $body, $to) = @_; # $to defaults to $ADMIN $to ||= $ADMIN; my $command = $MAIL; $command =~ s/\$subject/quotemeta($subject)/eg; $command =~ s/\$to/quotemeta($to)/eg; if ($verbose) { print "Sending mail via command: $command\n$body\n"; } return if $debug; open(PIPE, "| $command") and (print PIPE $body) and close(PIPE); } sub DiskUsage ($$$$$) { my($DF, $MAX_DISC, $MAX_DISC_INC, $cache, $key) = @_; my $ph = Symbol::gensym(); if (!open($ph, "$DF |")) { print STDERR "Failed to open pipe to command $DF: $!\n"; return 1; } while (defined(my $line = <$ph>)) { if (my($device, $total, $used, $avail, $capacity, $mount) = ($line =~ /^(\S+) # Device \s+(\d+) # Blocks total \s+(\d+) # Blocks used \s+(\d+) # Blocks available \s+(\d+(?:\.\d+)?)\% # Capacity (in percent) \s+(\S.*)/x)) { # Mount point my $name = $key . $device; $name =~ s/\//_/g; my $old_capacity = $cache->{'diskusage'}->{$name}->{'capacity'}; $cache->{'diskusage'}->{$name}->{'capacity'} = $capacity; print "Capacity of $mount ($device) is $capacity\%.\n" if $verbose; my $cap = exists($CAPACITIES{$device}) ? $CAPACITIES{$device} : $MAX_DISC; if (!($capacity <= $cap)) { print "=> Exceeds allowed capacity of $cap\%.\n" if $verbose; if ($cache->{'diskusage'}->{$name}->{'mail_sent'}) { print "Mail already sent, ignoring.\n" if $verbose; } else { Mail("Disk capacity exceeded on $HOST", <<"EOF"); Drive $mount ($device) exceeds its allowed disk capacity of $cap \%. The current parameters are: Mount point: $mount Device: $device Blocks total: $total used: $used avail: $avail Capacity: $capacity % Allowed: $cap % This message is generated by the script $0. EOF $cache->{'diskusage'}->{$name}->{'mail_sent'} = 1; } } else { $cache->{'diskusage'}->{$name}->{'mail_sent'} = 0; print "=> Doesn't exceed allowed capacity of $cap\%.\n" if $verbose; } printf("Checking increment for $device: $capacity <=> %s.\n", defined($old_capacity) ? $old_capacity : "undef") if $verbose; if (!(!defined($old_capacity) || ($MAX_DISC_INC && ($capacity < $old_capacity + $MAX_DISC_INC)))) { print "=> Exceeds allowed increment $MAX_DISC_INC\%.\n" if $verbose; if ($cache->{'diskusage'}->{$name}->{'inc_mail_sent'}) { print "Mail already sent, ignoring.\n" if $verbose; } else { Mail("Disk capacity raised on $HOST", <<"EOF"); Drive $mount ($device) has raised its capacity from $old_capacity to $capacity since the last check. The current parameters are: Mount point: $mount Device: $device Blocks total: $total used: $used avail: $avail Capacity: $capacity % Allowed: $cap % Old capacity: $old_capacity % This message is generated by the script $0. EOF $cache->{'diskusage'}->{$name}->{'inc_mail_sent'} = 1; } } else { print "=> Doesn't exceed allowed increment of $MAX_DISC_INC.\n" if $verbose; $cache->{'diskusage'}->{$name}->{'inc_mail_sent'} = 0; } } } return 0; } sub Uptime ($$) { my $MAX_LOAD = shift; my $cache = shift; my $ph = Symbol::gensym(); if (!open($ph, "$UPTIME |")) { print STDERR "Failed to open pipe to load command $UPTIME: $!\n"; return 1; } my $line = <$ph>; if (!$line) { print STDERR "Uptime command $UPTIME returned empty output.\n"; return 1; } undef $ph; if (my($one, $five, $fifteen) = ($line =~ /load\s+average\:\s+ (\d+(?:\.\d+)?) # 1 Minute average ,\s+(\d+(?:\.\d+)?) # 5 Minutes average ,\s+(\d+(?:\.\d+)?) # 15 Minutes average $/x)) { printf("Detected average loads %s, %s, %s (1, 5 and 15 minutes)\n", $one, $five, $fifteen) if $verbose; if (!($one <= $MAX_LOAD && $five <= $MAX_LOAD && $fifteen <= $MAX_LOAD)) { print "=> Exceeds allowed maximum of $MAX_LOAD.\n" if $verbose; if ($cache->{'uptime'}->{'mail_sent'}) { print "Mail already sent, ignoring.\n" if $verbose; } else { Mail("Maximum load exceeded on $HOST", <<"EOF"); The maximum load of $MAX_LOAD is exceeded on host $HOST. The average loads are: Last minute: $one Last 5 minutes: $five Last 15 minutes: $fifteen This message is generated by the script $0. EOF $cache->{'uptime'}->{'mail_sent'} = 1; } } else { print "=> Doesn't exceed allowed maximum of $MAX_LOAD.\n" if $verbose; $cache->{'uptime'}->{'mail_sent'} = 0; } } else { print STDERR "Failed to parse output of Uptime command $UPTIME: $!\n"; return 1; } return 0; } sub Usage { print STDERR <<"EOF"; Usage: $0 [options] Possible options are: --cache-file= Store results in the given file; will be used for watching increments and to prevent sending more than one mail for a certain event. Defaults to $CACHE_FILE. --max-load= Set maximum load; defaults to $MAX_LOAD --max-disc= Set maximum disk capacity in percent; defaults to $MAX_DISC. --max-disc-inc= Set the maximum increment of the disk capacity between two checks. Defaults to $MAX_DISC_INC. --debug Enable debugging mode (implies --verbose) --verbose Enable verbose mode --help Print this message EOF exit 1; } sub LoadCacheFile ($$) { my $file = shift; my $ref = shift; return unless ($file and $file ne "none" and -f $file); print "Loading cache file $file.\n" if $verbose; if ($USE_LOCKS) { my $lfh = Symbol::gensym(); if (!open($lfh, ">>$file.lock") || !flock($lfh, 1)) { print STDERR "Failed to open lock file $file.lock: $!"; return 1; } } %$ref = %{Dump->Read($file)}; return 0; } sub SaveCacheFile ($$) { my $file = shift; my $ref = shift; if ($USE_LOCKS) { my $lfh = Symbol::gensym(); if (!open($lfh, ">>$file.lock") || !flock($lfh, 2)) { print STDERR "Failed to open lock file $file.lock: $!"; return 1; } } my $cfh = Symbol::gensym(); my $dump = Dump->new($ref)->Dump(); my $time = localtime(); $dump = <<"EOF"; # # Automatically generated by $0 at $time. # Do not edit. # $dump EOF print "Saving cache:\n$dump\n" if $verbose; return 0 if $debug; if (!open($cfh, ">$file.new") or !(print $cfh $dump) or !close($cfh)) { print STDERR "Failed to open cache file $file.new: $!"; return 1; } if (-f "$file.bak" && !unlink "$file.bak") { print STDERR "Failed to remove backup file $file.bak: $!"; return 1; } if (-f $file && !rename $file, "$file.bak") { print STDERR "Failed to rename $file to $file.bak: $!"; return 1; } if (!rename "$file.new", $file) { print STDERR "Failed to rename $file.new to $file: $!"; return 1; } return 0; } ############################################################################ # # This is main(). # ############################################################################ { my $max_load = $MAX_LOAD; my $max_disc = $MAX_DISC; my $max_disc_inc = $MAX_DISC_INC; my $cache_file = $CACHE_FILE; while (defined(my $arg = shift @ARGV)) { if ($arg =~ /^\-\-cache-file(?:=(.*))?$/) { $cache_file = defined $1 ? $1 : shift(@ARGV); Usage() unless defined $cache_file; } elsif ($arg =~ /^\-\-max\-load(?:=(.*))?$/) { $max_load = defined $1 ? $1 : shift(@ARGV); Usage() unless defined $max_load; } elsif ($arg =~ /^\-\-max\-disc-inc(?:=(.*))?$/) { $max_disc_inc = defined $1 ? $1 : shift(@ARGV); Usage() unless defined $max_disc_inc; } elsif ($arg =~ /^\-\-max\-disc(?:=(.*))?$/) { $max_disc = defined $1 ? $1 : shift(@ARGV); Usage() unless defined $max_disc; } elsif ($arg eq "--debug") { $debug = $verbose = 1; } elsif ($arg eq "--verbose") { $verbose = 1; } else { Usage(); } } my %cache; my $status = 0; $status ||= LoadCacheFile($cache_file, \%cache); $status ||= DiskUsage($DF, $max_disc, $max_disc_inc, \%cache, "f"); $status ||= DiskUsage($DFI, $max_disc, $max_disc_inc, \%cache, "i"); $status ||= Uptime($max_load, \%cache); $status ||= SaveCacheFile($cache_file, \%cache); exit $status; }