#!/usr/bin/perl -w

# check_zfs Nagios plugin for monitoring ZFS on Linux ZFS zpools
# Copyright (c) 2007 Written by Nathan Butcher
# Reworked for ZFS on Linux bij Sander Smeenk / BIT

# Released under the GNU Public License

# Verbose levels:-
# 1 - Only alert us of zpool health and size stats
# 2 - ...also alert us of failed devices when things go bad
# 3 - ...alert us of the status of all devices regardless of health
#
# Usage:   check_zfs <zpool> <verbose level 1-3> <minspace>
# Example: check_zfs zeepool 1 1048576000
#	ZPOOL zeedata : ONLINE {Size:3.97G Used:183K Avail:3.97G Cap:0%}

use strict;

my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2);
my $state="UNKNOWN";
my $msg="FAILURE";

my $pool=$ARGV[0] || "";
my $verbose=$ARGV[1] || "";
my $min_reservable_space=$ARGV[2] || -1;

if ($pool eq "" or $verbose eq "" or $min_reservable_space == -1) {
    print "Argument errors\n";
    exit $ERRORS{$state};
}

my $size="";
my $used="";
my $avail="";
my $cap="";
my $health="";
my $dmge="";

if ($verbose < 1 || $verbose > 3) {
	print "Verbose levels range from 1-3\n";
	exit $ERRORS{$state};
}

my $statcommand="zpool list $pool";

if (! open STAT, "$statcommand|") {
	print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n");
	exit $ERRORS{$state};
}

while(<STAT>) {
	chomp;
	next if (/^NAME\s+SIZE\s+USED\s+AVAIL\s+CAP\s+HEALTH\s+ALTROOT/);
	if (/^${pool}\s+/) {
		($size, $used, $avail, undef, undef, $cap, undef, $health) = /^${pool}\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
	}
}
close(STAT);

## check for valid zpool list response from zpool
if (! $health ) {
	$state = "CRITICAL";
	$msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool;
	print $state, " ", $msg;
	exit ($ERRORS{$state});
}

## determine health of zpool and subsequent error status
if ($health eq "ONLINE" ) {
	$state = "OK";
} else {
	if ($health eq "DEGRADED") {
		$state = "WARNING";
	} else {
		$state = "CRITICAL";
	}
}

## get more detail on possible device failure
## flag to detect section of zpool status involving our zpool
my $poolfind=0;

$statcommand="zpool status $pool";
if (! open STAT, "$statcommand|") {
	$state = 'CRITICAL';
	print ("$state '$statcommand' command returns no result! NOTE: This plugin needs OS support for ZFS, and execution with root privileges.\n");
	exit $ERRORS{$state};
}

## go through zfs status output to find zpool fses and devices
while(<STAT>) {
	chomp;

	if (/^\s+${pool}/ && $poolfind==1) {
		$poolfind=2;
		next;
	} elsif ( $poolfind==1 ) {
		$poolfind=0;
	}

	if (/NAME\s+STATE\s+READ\s+WRITE\s+CKSUM/) {
		$poolfind=1;
	}

	if ( /^$/ ) {
		$poolfind=0;
	}

	if ($poolfind == 2) {

		## special cases pertaining to full verbose
		if (/^\sspares/) {
			next unless $verbose == 3;
			$dmge=$dmge . "[SPARES]:- ";
			next;
		}
		if (/^\s{5}spare\s/) {
			next unless $verbose == 3;
			my ($sta) = /spare\s+(\S+)/;
			$dmge=$dmge . "[SPARE:${sta}]:- ";
			next;
		}
		if (/^\s{5}replacing\s/) {
			next unless $verbose == 3;
			my $perc;
			my ($sta) = /^\s+\S+\s+(\S+)/;
			if (/%/) {
				($perc) = /([0-9]+%)/;	
			} else {
				$perc = "working";
			}
			$dmge=$dmge . "[REPLACING:${sta} (${perc})]:- ";
			next;
		}
        if (/^\s+cache\s*$/) {
            next;
        }

		## other cases
		my ($dev, $sta) = /^\s+(\S+)\s+(\S+)/;

		## pool online, not degraded thanks to dead/corrupted disk
		if ($state eq "OK" && $sta eq "UNAVAIL") {
			$state="WARNING";
			
			## switching to verbose level 2 to explain weirdness
			if ($verbose == 1) {
				$verbose =2;
			}
		}

		## no display for verbose level 1
		next if ($verbose==1);
		## don't display working devices for verbose level 2
		next if ($verbose==2 && $state eq "OK");
		next if ($verbose==2 && ($sta eq "ONLINE" || $sta eq "AVAIL" || $sta eq "INUSE"));
	
		## show everything else
		if (/^\s{3}(\S+)/) {
			$dmge=$dmge . "<" . $dev . ":" . $sta . "> ";
		} elsif (/^\s{7}(\S+)/) {
			$dmge=$dmge . "(" . $dev . ":" . $sta . ") ";
		} else {
			$dmge=$dmge . $dev . ":" . $sta . " ";
		}
	}
}

##
## Test for enough room for reservations
my $to_be_reserved = 0;
open(FD, "df -B1 /$pool|") or die "Can't fork df: $!\n";
while (<FD>) {
    next if !/^$pool/;
    if (/^$pool\s+\d+\s+\d+\s(\d+)\s/) {
        $to_be_reserved = $1;
        last;
    }
}
close(FD);
if (not $to_be_reserved or $to_be_reserved < $min_reservable_space) {
    $dmge = "UNRESERVED POOL SPACE BELOW 1TB ($to_be_reserved bytes)";
    $state = "CRITICAL";
}

## calling all goats!
$msg = sprintf "ZPOOL %s : %s {Size:%s Used:%s Avail:%s Cap:%s} %s\n", $pool, $health, $size, $used, $avail, $cap, $dmge;
print $state . ":" . $msg;
exit ($ERRORS{$state});
