#!/usr/bin/perl # heartbeat.pl by stbaram [ Amulet, Inc. ] 2004.2.19 sleep(60); # wait for all interfaces coming up system("rm -f /var/log/heartbeat.*"); # clean up old log files $logfile = "/var/log/heartbeatlog"; $quorumfile ="/var/quorum/dat/quorum"; $tmpfile = "/var/log/heartbeat.$$"; $interval = 300; # sleep time in second. $fatal_count = 2; # error count limit. $mailto = 'target@maintain.company'; $mailfrom = 'root@local.servername'; $program = '/root/admin/heartbeat.pl'; $hostname = & gethostname(); %hoststat = { 'seki_lb1/ping_up', 0, 'seki_lb1/ping_down', 0, 'seki_lb1/cpu', 0, 'seki_lb1/swap', 0, 'seki_lb2/ping_up', 0, 'seki_lb2/ping_down', 0, 'seki_lb2/cpu', 0, 'seki_lb2/swap', 0, 'seki_ap1/ping', 0, 'seki_ap1/cpu', 0, 'seki_ap1/swap', 0, 'seki_ap2/ping', 0, 'seki_ap2/cpu', 0, 'seki_ap2/swap', 0, 'seki_db1/ping', 0, 'seki_db1/cpu', 0, 'seki_db1/swap', 0, 'seki_db1/raid', 0, 'seki_db1/pg_start', 0, 'seki_db1/quorum', 0, 'seki_db2/ping_up', 0, 'seki_db2/ping_down', 0, 'seki_db2/cpu', 0, 'seki_db2/swap', 0, 'seki_db2/raid', 0, 'seki_db2/quorum', 0, }; & main(); sub main { my($error, $key, $val); & clean_up(); while (1) { $error = 0; $error += & check_network(); $error += & check_cpuload(); $error += & check_swap(); $error += & check_raid(); $error += & check_pgsql(); $error += & check_quorum(); if ($error > 0) { & mail2admin(); } while (($key, $val) = each(%hoststat)) { if ($val > $fatal_count) { if ($hostname =~ /seki_lb1/) { & failover_lb1(); } elsif ($hostname =~ /seki_lb2/) { & failover_lb2(); } elsif ($hostname =~ /seki_ap1/) { & failover_ap1(); } elsif ($hostname =~ /seki_ap2/) { & failover_ap2(); } elsif ($hostname =~ /seki_db1/) { & failover_db1(); } elsif ($hostname =~ /seki_db2/) { & failover_db2(); } } } sleep($interval); } } sub clean_up { my($iofile, $input, @tmp); open(iofile, "/bin/ps auxw | /bin/grep $program | /bin/grep -v /bin/grep |") || die "$0: $!: Cannot read ps command¥n"; while ($input = ) { print "$input"; next unless ($input =~ /$program/); @tmp = split(/[ ¥t¥n¥r]+/, $input); next if ($tmp[1] eq $$); print "Stopping $$: /bin/kill -9 $tmp[1]¥n"; system("/bin/kill -9 $tmp[1]"); } close(iofile); } sub check_network { if ($hostname =~ /seki_lb1/) { & ping_stat('seki_lb1/ping_up', 'ping.to.out.test'); & ping_stat('seki_lb1/ping_down', '192.168.0.2'); } elsif ($hostname =~ /seki_lb2/) { & ping_stat('seki_lb2/ping_up', 'ping.to.out.test'); & ping_stat('seki_lb2/ping_down', '192.168.0.1'); } elsif ($hostname =~ /seki_ap1/) { & ping_stat('seki_ap1/ping', '192.168.0.4'); } elsif ($hostname =~ /seki_ap2/) { & ping_stat('seki_ap2/ping', '192.168.0.3'); } elsif ($hostname =~ /seki_db1/) { & ping_stat('seki_db1/ping', '192.168.0.6'); } elsif ($hostname =~ /seki_db2/) { & ping_stat('seki_db2/ping_up', '192.168.0.5'); & ping_stat('seki_db2/ping_down', '172.16.1.3'); } else { & errorlog("Hostname($hostname) is NG!"); & printlog("Hostname($hostname) is NG!"); return(1); } return(0); } sub ping_stat { my($stat_name, $ping_to) = @_; my($status); $status = system("/bin/ping -c 1 -w 2 $ping_to > /dev/null 2>&1"); if ($status != 0) { & errorlog("ping to $ping_to status was: $status NG!"); & printlog("ping to $ping_to status was: $status NG!"); $hoststat{$stat_name} += 1; } else { # & printlog("ping to $ping_to status was: $status OK"); $hoststat{$stat_name} = 0; } return($status); } sub check_cpuload { my($iofile, $input, @tmp, $i, $status); $status = 0; open(iofile, "/usr/bin/uptime |") || die "$0: $!: Cannot read uptime.¥n"; $input = ; close(iofile); $input =~ s/^.*load average: //; @tmp = split(/[ ¥t¥n¥r]+/, $input); if ($tmp[0] > 3) { & errorlog("Load average $tmp[0] is NG!"); & printlog("Load average $tmp[0] is NG!"); $hoststat{"$hostname/cpu"} += 1; $status = 1; } else { # & printlog("Load average $tmp[0] is OK."); $hoststat{"$hostname/cpu"} = 0; } return($status); } sub check_swap { my($iofile, $input, @tmp, $i, $status); $status = 0; open(iofile, "/usr/bin/free | tail -1 |") || die "$0: $!: Cannot read swap space.¥n"; $input = ; close(iofile); @tmp = split(/[ ¥t¥n¥r]+/, $input); if ($tmp[2] > ($tmp[1] / 2)) { & errorlog("Swap usage $tmp[2] of $tmp[1] is NG!"); & printlog("Swap usage $tmp[2] of $tmp[1] is NG!"); $hoststat{"$hostname/swap"} += 1; $status = 1; } else { # & printlog("Swap usage $tmp[2] of $tmp[1] is OK."); $hoststat{"$hostname/swap"} = 0; } return($status); } sub check_raid { my($iofile, $iofile2, $fname, $input, $prevlog, $prev_error_exists, $errors, $status); $status = 0; if (($hostname !~ /seki_db1/) && ($hostname !~ /seki_db2/)) { return(0); } if ( -r "/tmp/heartbeat.raid" ) { open(iofile, "/tmp/heartbeat.raid") || return(0); while ($prevlog = ) { } close(iofile); } system("/usr/bin/find /var/database/pgsql -print > /dev/null 2>&1"); system("/usr/bin/find /var/backup -print > /dev/null 2>&1"); $fname = "/tmp/heartbeat.fsck.$$"; system("/bin/grep 'EXT3-fs error' /var/log/messages > $fname"); open(iofile, "$fname") || die "$0: $!: Cannot read $fname.¥n"; while ($input = ) { ++$errors; if ($input eq $prevlog) { ++$prev_error_exists; } } close(iofile); if ($prev_error_exists > 0) { open(iofile, "$fname") || die "$0: $!: Cannot read $fname¥n"; open(iofile2, "> /tmp/heartbeat.raid") || die "$0: $!: Cannot write /tmp/heartbeat.raid¥n"; while ($input = ) { last if ($input eq $prevlog); } while ($input = ) { $status = 1; print iofile2 $input; } close(iofile); close(iofile2); } elsif ($errors > 0) { system("/bin/cat $fname >> $tmpfile"); $hoststat{"$hostname/raid"} += 1; $status = 1; } else { $hoststat{"$hostname/raid"} = 0; } unlink($fname); return($status); } sub check_pgsql { my($status, $ping_status); $status = 0; if ($hostname =~ /seki_db1/) { $ping_status = & ping_stat('seki_db2/dummy', '192.168.0.1'); $hoststat{'seki_db2/dummy'} = 0; if ($ping_status == 0) { $status = system("su - postgres -c 'pg_ctl -D /var/database/pgsql status'"); } else { return(0); } } elsif ($hostname =~ /seki_db2/) { $ping_status = & ping_stat('seki_db2/dummy', '192.168.0.6'); $hoststat{'seki_db2/dummy'} = 0; if ($ping_status == 0) { # & printlog("192.168.0.6 is alive. Do not check PostgreSQL.."); $status = 0; return(0); } else { $status = system("su - postgres -c 'pg_ctl -D /var/database/pgsql status'"); } } else { return(0); } if ($status == 0) { # & printlog("PostgreSQL works OK..."); } else { & printlog("PostgreSQL error..."); & errorlog("PostgreSQL error..."); system("/bin/mount /dev/sda1 /var/database"); system("/bin/mount /dev/sda2 /var/backup"); system("/bin/mount /dev/sda3 /var/quorum"); system("ssh 192.168.0.3 /sbin/service tomcat stop"); system("ssh 192.168.0.4 /sbin/service tomcat stop"); $status = system("su - postgres -c 'pg_ctl -w -D /var/database/pgsql start'"); if ($status == 0) { & printlog("Starting PostgreSQL succeeded..."); & errorlog("Starting PostgreSQL succeeded..."); system("ssh 192.168.0.3 /sbin/service tomcat start"); system("ssh 192.168.0.4 /sbin/service tomcat start"); $hoststat{"$hostname/pg_start"} = 0; } else { & printlog("Could not start PostgreSQL..."); & errorlog("Could not start PostgreSQL..."); $hoststat{"$hostname/pg_start"} += 1; } } return($status); } sub check_quorum { my($status, $iofile, $iofile2, $time1, $time2, $hostname, $ping_status, $ng, $input); $hostname = & gethostname(); $status = 0; if (($hostname !~ /seki_db1/) && ($hostname !~ /seki_db2/)) { return(0); } if ($hostname =~ /seki_db1/) { $ping_status = & ping_stat('seki_db1/dummy', '192.168.0.1'); $status = system("/bin/date +%s > $quorumfile && /bin/sync"); if ($status != 0) { & errorlog("# Cannot write quorum time."); & printlog("# Cannot write quorum time."); $hoststat{"$hostname/quorum"} += 1; $status = 1; } else { system("cd /var/quorum/dat && tar cf /dev/sda4 ./quorum && cd /root/admin/"); & printlog("# Write new quorum time."); $hoststat{"$hostname/quorum"} = 0; } } elsif ($hostname =~ /seki_db2/) { system("cd /var/quorum/dat && tar xf /dev/sda4 && cd /root/admin/"); if (-e $quorumfile) { open (iofile, "< $quorumfile"); $time1 = ; $time1 =~ s/[¥n¥r]//g; close(iofile); open (iofile2, "/bin/date +%s |") || die "$0: $!: Cannot read date result¥n"; $time2 = ; $time2 =~ s/[¥n¥r]//g; close(iofile2); if ($time2 > $time1 + $interval * 2) { open(iofile, "/bin/mount |") || die "$0: $!: Cannot read mount result¥n"; while ($input = ) { if (($input =~ /sda1/) && ($input =~ /¥(rw¥)/)) { $ng .= $input; } } if (length($ng) > 0) { & printlog("Do not check quorum."); $status=0; } else { & errorlog("# quorum stamp is old."); & printlog("# quorum stamp is old."); $status = 2; } } else { & printlog("# quorum stamp is OK."); $status = 0; } } else { & errorlog("# Cannot read quorum file."); & printlog("# Cannot read quorum file."); $status = 1; } } return($status); } sub errorlog { my($input) = @_; my($iofile, $logdate); $logdate = & getdaytime(); open(iofile, ">> $tmpfile") || die "$0: $!: Cannot write $tmpfile¥n"; print iofile "$logdate $hostname $input¥n"; close(iofile); } sub mail2admin { my($cmd) = @_; system("/usr/bin/mhmail -s ¥"Heartbeat Error on $hostname.¥" -from $mailfrom $mailto < $tmpfile"); unlink($tmpfile); } sub printlog { my($input) = @_; my($iofile, $logdate); $logdate = & getdaytime(); open(iofile, ">> $logfile") || die "$0: $!: Cannot write $logfile¥n"; print iofile "$logdate $hostname $input¥n"; close(iofile); } sub getdaytime { my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst); my($logdate); ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $mon += 1; if ($year < 1900) { $year += 1900; } $logdate = sprintf("%4.4d/%2.2d/%2.2d:%2.2d:%2.2d:%2.2d", $year, $mon, $mday, $hour, $min, $sec); return ($logdate); } sub gethostname { my($iofile, $input); open(iofile, "/bin/hostname |") || die "$0: $!: Cannot read hostname¥n"; $input = ; close(iofile); $input =~ s/[¥n¥r]//g; return($input); } sub failover_lb1 { my($do_mail); $do_mail = 0; if ($hoststat{'seki_lb1/ping_up'} > $fatal_count) { & errorlog("# lb1 stops only when lb2 ordered."); & printlog("# lb1 stops only when lb2 ordered."); $hoststat{'seki_lb1/ping_up'} = 0; ++$do_mail; } if ($hoststat{'seki_lb1/ping_down'} > $fatal_count) { & errorlog("# lb1 stops only when lb2 ordered."); & printlog("# lb1 stops only when lb2 ordered."); $hoststat{'seki_lb1/ping_down'} = 0; ++$do_mail; } if ($hoststat{'seki_lb1/cpu'} > $fatal_count) { & errorlog("Heavy process running. Please check!"); & printlog("Heavy process running. Please check!"); $hoststat{'seki_lb1/cpu'} = 0; ++$do_mail; } if ($hoststat{'seki_lb1/swap'} > $fatal_count) { & errorlog("Too much swap is used. Please check!"); & printlog("Too much swap is used. Please check!"); $hoststat{'seki_lb1/swap'} = 0; ++$do_mail; } if ($do_mail > 0) { & mail2admin(); } } sub failover_lb2 { my($failover, $do_mail, $status, $mynet); $failover = 0; $do_mail = 0; $mynet = & ping_stat('seki_lb2/dummy', 'ping.to.out.test'); $mynet += & ping_stat('seki_lb2/dummy', '192.168.0.5'); $hoststat{'seki_lb2/dummy'} = 0; if ($mynet > 0) { & errorlog("** Not doing failover because my network is DEAD!"); & printlog("** Not doing failover because my network is DEAD!"); $hoststat{'seki_lb2/ping_up'} = 0; $hoststat{'seki_lb2/ping_down'} = 0; ++$do_mail; } elsif ($hoststat{'seki_lb2/ping_up'} > $fatal_count) { $status = & ping_stat('seki_lb2/ping_down', '192.168.0.1'); if ($status == 0) { $status = system("/usr/bin/ssh 192.168.0.1 /sbin/shutdown -h now"); if ($status != 0) { & errorlog("Stopping seki_lb1 through 192.168.0.1 FAILED!"); & printlog("Stopping seki_lb1 through 192.168.0.1 FAILED!"); $failover = 0; } else { & errorlog("Stopping seki_lb1 through 192.168.0.1 succeeded."); & printlog("Stopping seki_lb1 through 192.168.0.1 succeeded."); ++$failover; } } else { & errorlog("ping to seki_lb1 FAILED. lb1 may be DEAD!"); & printlog("ping to seki_lb1 FAILED. lb1 may be DEAD!"); $hoststat{'seki_lb2/ping_down'} = 0; ++$failover; } $hoststat{'seki_lb2/ping_up'} = 0; ++$do_mail; } elsif ($hoststat{'seki_lb2/ping_down'} > $fatal_count) { $status = & ping_stat('seki_lb2/ping_up', 'ping.to.out.test'); if ($status == 0) { $status = system("/usr/bin/ssh ping.to.out.test /sbin/shutdown -h now"); if ($status != 0) { & errorlog("Stopping seki_lb1 through ping.to.out.test FAILED!"); & printlog("Stopping seki_lb1 through ping.to.out.test FAILED!"); $failover = 0; } else { & errorlog("Stopping seki_lb1 through ping.to.out.test succeeded."); & printlog("Stopping seki_lb1 through ping.to.out.test succeeded."); ++$failover; } } else { & errorlog("ping to seki_lb1 FAILED. lb1 may be DEAD!"); & printlog("ping to seki_lb1 FAILED. lb1 may be DEAD!"); $hoststat{'seki_lb1/ping_up'} = 0; ++$failover; } $hoststat{'seki_lb2/ping_down'} = 0; ++$do_mail; } if ($hoststat{'seki_lb2/cpu'} > $fatal_count) { & errorlog("Heavy process running. Please check!"); & printlog("Heavy process running. Please check!"); $hoststat{'seki_lb2/cpu'} = 0; ++$do_mail; } if ($hoststat{'seki_lb2/swap'} > $fatal_count) { & errorlog("Too much swap is used. Please check!"); & printlog("Too much swap is used. Please check!"); $hoststat{'seki_lb2/swap'} = 0; ++$do_mail; } if ($hoststat{'seki_lb2/cpu'} > $fatal_count) { & errorlog("Heavy process running. Please check!"); & printlog("Heavy process running. Please check!"); $hoststat{'seki_lb2/cpu'} = 0; ++$do_mail; } if ($failover > 0) { & errorlog("Doing FAILOVER on seki_lb2!"); & printlog("Doing FAILOVER on seki_lb2!"); system('/sbin/ifconfig eth0 down'); system('/sbin/ifconfig eth1 down'); system('/sbin/ifconfig eth0 out.to.wan.interface netmask 255.255.255.248 up'); system('/sbin/ifconfig eth1 192.168.0.1 netmask 255.255.255.0 up'); system('/sbin/route add default gw 211.19.97.41 eth0'); system('/etc/init.d/httpd start'); # system('/etc/init.d/iptables restart'); $failover = 0; } if ($do_mail > 0) { & mail2admin(); } } sub failover_ap1 { if ($hoststat{'seki_ap1/ping'} > $fatal_count) { & errorlog("192.168.0.4 is not alive. Please check!"); & printlog("192.168.0.4 is not alive. Please check!"); $hoststat{'seki_ap1/ping'} = 0; ++$do_mail; } if ($hoststat{'seki_ap1/cpu'} > $fatal_count) { & errorlog("Heavy process running. Please check!"); & printlog("Heavy process running. Please check!"); $hoststat{'seki_ap1/cpu'} = 0; ++$do_mail; } if ($hoststat{'seki_ap1/swap'} > $fatal_count) { & errorlog("Too much swap is used. Please check!"); & printlog("Too much swap is used. Please check!"); $hoststat{'seki_ap1/swap'} = 0; ++$do_mail; } if ($do_mail > 0) { & mail2admin(); } } sub failover_ap2 { if ($hoststat{'seki_ap2/ping'} > $fatal_count) { & errorlog("192.168.0.3 is not alive. Please check!"); & printlog("192.168.0.3 is not alive. Please check!"); $hoststat{'seki_ap2/ping'} = 0; ++$do_mail; } if ($hoststat{'seki_ap2/cpu'} > $fatal_count) { & errorlog("Heavy process running. Please check!"); & printlog("Heavy process running. Please check!"); $hoststat{'seki_ap2/cpu'} = 0; ++$do_mail; } if ($hoststat{'seki_ap2/swap'} > $fatal_count) { & errorlog("Too much swap is used. Please check!"); & printlog("Too much swap is used. Please check!"); $hoststat{'seki_ap2/swap'} = 0; ++$do_mail; } if ($do_mail > 0) { & mail2admin(); } } sub failover_db1 { my($failover, $do_mail,$mynet); $failover = 0; $do_mail = 0; $mynet = 0; if ($hoststat{'seki_db1/ping'} > $fatal_count) { $mynet = & ping_stat('seki_db1/dummy', '192.168.0.1'); $hoststat{'seki_db1/dummy'} = 0; if ($mynet == 0) { & errorlog("** Not doing failover because my network is ALIVE!"); & printlog("** Not doing failover because my network is ALIVE!"); $hoststat{'seki_db1/ping'} = 0; ++$do_mail; } else { & errorlog("Doing FAILOVER on seki_db1!"); & printlog("Doing FAILOVER on seki_db1!"); system('/sbin/ifconfig eth0 down'); system('/etc/init.d/smb stop'); $status = system("su - postgres -c 'pg_ctl -m f -D /var/database/pgsql stop'"); if ($status == 0) { & printlog("Stopping PostgreSQL succeeded..."); & errorlog("Stopping PostgreSQL succeeded..."); } else { & printlog("Could not stop PostgreSQL..."); & errorlog("Could not stop PostgreSQL..."); } sleep(1); system('/bin/umount -r /var/database'); system('/bin/umount -r /var/backup'); system('/bin/umount -r /var/quorum'); $hoststat{'seki_db1/ping'} = 0; ++$failover; ++$do_mail; } } if ($hoststat{'seki_db1/pg_start'} > $fatal_count) { & errorlog("Giving up... Shutting down eth0..."); & printlog("Giving up... Shutting down eth0..."); system('/sbin/ifconfig eth0 down'); $hoststat{'seki_db1/pg_start'} = 0; ++$do_mail; } if ($hoststat{'seki_db1/quorum'} > $fatal_count) { & errorlog("Doing FAILOVER on seki_db1!"); & printlog("Doing FAILOVER on seki_db1!"); system('/sbin/ifconfig eth0 down'); system('/etc/init.d/smb stop'); $status = system("su - postgres -c 'pg_ctl -m f -D /var/database/pgsql stop'"); if ($status == 0) { & printlog("Stopping PostgreSQL succeeded..."); & errorlog("Stopping PostgreSQL succeeded..."); } else { & printlog("Could not stop PostgreSQL..."); & errorlog("Could not stop PostgreSQL..."); } sleep(1); system('/bin/umount -r /var/database'); system('/bin/umount -r /var/backup'); system('/bin/umount -r /var/quorum'); $hoststat{'seki_db1/ping'} = 0; ++$do_mail; $hoststat{'seki_db1/quorum'} = 0; ++$do_mail; } if ($hoststat{'seki_db1/cpu'} > $fatal_count) { & errorlog("Heavy process running. Please check!"); & printlog("Heavy process running. Please check!"); $hoststat{'seki_db1/cpu'} = 0; ++$do_mail; } if ($hoststat{'seki_db1/swap'} > $fatal_count) { & errorlog("Too much swap is used. Please check!"); & printlog("Too much swap is used. Please check!"); $hoststat{'seki_db1/swap'} = 0; ++$do_mail; } if ($hoststat{'seki_db1/raid'} > $fatal_count) { & errorlog("Something wrong with shared disk. Please check!"); & printlog("Something wrong with shared disk. Please check!"); $hoststat{'seki_db1/raid'} = 0; ++$do_mail; } if ($do_mail > 0) { & mail2admin(); } } sub failover_db2 { my($failover, $do_mail, $mynet,$raid); $failover = 0; $do_mail = 0; $mynet = & ping_stat('seki_db2/dummy', '192.168.0.1'); $hoststat{'seki_db2/dummy'} = 0; if ($mynet > 0) { & errorlog("** Not doing failover because my network is DEAD!"); & printlog("** Not doing failover because my network is DEAD!"); $hoststat{'seki_db2/ping_up'} = 0; ++$do_mail; } elsif ($hoststat{'seki_db2/ping_up'} > $fatal_count) { & errorlog("Doing FAILOVER on $hostname!"); & printlog("Doing FAILOVER on $hostname!"); $raid = & mount_raid(); if ($raid > 0) { & errorlog("Giving up mount RAID5 directories!"); & printlog("Giving up mount RAID5 directories!"); & mail2admin(); return(0); } system('/sbin/ifconfig eth0 down'); system('/sbin/ifconfig eth0 192.168.0.5 netmask 255.255.255.0 up'); system('/sbin/route add default gw 192.168.0.1'); sleep(3); system("ssh 192.168.0.3 /sbin/service tomcat stop"); system("ssh 192.168.0.4 /sbin/service tomcat stop"); system("/etc/init.d/smb start"); if (-e "/var/database/pgsql/postmaster.pid") { system("/bin/rm -f /var/database/pgsql/postmaster.pid"); } $status = system("su - postgres -c 'pg_ctl -w -D /var/database/pgsql start'"); if ($status == 0) { & printlog("Starting PostgreSQL succeeded..."); & errorlog("Starting PostgreSQL succeeded..."); system("ssh 192.168.0.3 /sbin/service tomcat start"); system("ssh 192.168.0.4 /sbin/service tomcat start"); } else { & printlog("Could not start PostgreSQL..."); & errorlog("Could not start PostgreSQL..."); } $hoststat{'seki_db2/ping_up'} = 0; ++$do_mail; } if ($hoststat{'seki_db2/ping_down'} > $fatal_count) { & errorlog("172.16.1.3 is not alive. Please check!"); & printlog("172.16.1.3 is not alive. Please check!"); $hoststat{'seki_db2/ping_down'} = 0; ++$do_mail; } if ($hoststat{'seki_db2/cpu'} > $fatal_count) { & errorlog("Heavy process running. Please check!"); & printlog("Heavy process running. Please check!"); $hoststat{'seki_db2/cpu'} = 0; ++$do_mail; } if ($hoststat{'seki_db2/swap'} > $fatal_count) { & errorlog("Too much swap is used. Please check!"); & printlog("Too much swap is used. Please check!"); $hoststat{'seki_db2/swap'} = 0; ++$do_mail; } if ($hoststat{'seki_db2/raid'} > $fatal_count) { & errorlog("Something wrong with shared disk. Please check!"); & printlog("Something wrong with shared disk. Please check!"); $hoststat{'seki_db2/raid'} = 0; ++$do_mail; } if ($do_mail > 0) { & mail2admin(); } } sub mount_raid { local(*iofile, $input, $status, $ng, $hostname, $otherhost, @tmp, $i, $check, $ok, $quorum); sleep(10); $check = & ping_stat('seki_db1/ping_down', '172.16.1.3'); $quorum = & check_quorum(); if ( $check > 0 ) { if ($quorum == 2) { & printlog("No ANSWER from DB1!"); & errorlog("No ANSWER from DB1!"); system('/bin/mount /dev/sda1 /var/database'); system('/bin/mount /dev/sda2 /var/backup'); ++$do_mail; return(0); } elsif ($quorum == 0) { & printlog("DB1 may access Shared DISK!"); & errorlog("DB1 may access Shared DISK!"); ++$do_mail; return(1); } else { & printlog("Check FAILED!"); & errorlog("Check FAILED!"); ++$do_mail; return(1); } } open(iofile, "ssh 172.16.1.3 /bin/mount |") || die "$0: $!: Cannot read mount result¥n"; while ($input = ) { if (($input =~ /sda1/) && ($input =~ /¥(rw¥)/)) { $ng .= $input; } elsif (($input =~ /sda2/) && ($input =~ /¥(rw¥)/)) { $ng .= $input; } elsif (($input =~ /hda2/) && ($input =~ /¥(rw¥)/)) { $ok .= $input; } } if (length($ng) < 1 && length($ok) > 1) { & printlog("SSH succeeded. DB1 unmount OK."); system('/bin/mount /dev/sda1 /var/database'); system('/bin/mount /dev/sda2 /var/backup'); ++$do_mail; return(0) } elsif (length($ng) > 1 && length($ok) > 1) { & printlog("SSH succeeded. But DB1 still mount shared Disk."); & errorlog("SSH succeeded. But DB1 still mount shared Disk."); ++$do_mail; return(1) } elsif ($quorum == 2) { & printlog("SSH Failed. But quorum is old!"); & errorlog("SSH Failed. But quorum is old!"); system('/bin/mount /dev/sda1 /var/database'); system('/bin/mount /dev/sda2 /var/backup'); ++$do_mail; return(0); } else { & printlog("Device is busy!"); & errorlog("Device is busy!"); ++$do_mail; return(1); } }