#!/bin/bash # This Nagios script was written against version 3.3 & 3.4 of Gluster. Older # versions will most likely not work at all with this monitoring script. # # Gluster currently requires elevated permissions to do anything. In order to # accommodate this, you need to allow your Nagios user some additional # permissions via sudo. The line you want to add will look something like the # following in /etc/sudoers (or something equivalent): # # Defaults:nagios !requiretty # nagios ALL=(root) NOPASSWD:/usr/sbin/gluster volume status [[\:graph\:]]* detail,/usr/sbin/gluster volume heal [[\:graph\:]]* info # # That should give us all the access we need to check the status of any # currently defined peers and volumes. # Inspired by a script of Mark Nipper # # 2013, Mark Ruys, mark.ruys@peercode.nl # # Modified by Steve Thomas # PATH=/sbin:/bin:/usr/sbin:/usr/bin PROGNAME=$(basename -- $0) PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'` REVISION="1.0.0" . $PROGPATH/../utils.sh # parse command line usage () { echo "" echo "USAGE: " echo " $PROGNAME -v VOLUME -n BRICKS [-w GB -c GB]" echo " -n BRICKS: number of bricks" echo " -w and -c values in GB" exit $STATE_UNKNOWN } while getopts "v:n:w:c:" opt; do case $opt in v) VOLUME=${OPTARG} ;; n) BRICKS=${OPTARG} ;; w) WARN=${OPTARG} ;; c) CRIT=${OPTARG} ;; *) usage ;; esac done if [ -z "${VOLUME}" -o -z "${BRICKS}" ]; then usage fi Exit () { $ECHO "$1: ${2:0}" status=STATE_$1 exit ${!status} } # check for commands for cmd in basename bc awk sudo pidof gluster; do if ! type -p "$cmd" > /dev/null; then Exit UNKNOWN "$cmd not found\n" fi done # check for glusterd (management daemon) if ! pidof glusterd > /dev/null; then Exit CRITICAL "glusterd management daemon not running\n" fi # check for glusterfsd (brick daemon) if ! pidof glusterfsd > /dev/null; then Exit CRITICAL "glusterfsd brick daemon not running\n" fi # get volume heal status heal=0 for entries in $(sudo gluster volume heal ${VOLUME} info | awk '/^Number of entries: /{print $4}'); do if [ "$entries" -gt 0 ]; then let $((heal+=entries)) fi done if [ "$heal" -gt 0 ]; then errors=("${errors[@]}" "$heal unsynched entries") fi # get volume status bricksfound=0 freegb=9999999 shopt -s nullglob while read -r line; do field=($(echo $line)) case ${field[0]} in Brick) brick=${field[@]:2} ;; Disk) key=${field[@]:0:3} if [ "${key}" = "Disk Space Free" ]; then freeunit=${field[@]:4} unit=${freeunit: -2} free=${freeunit%$unit} if [ "$unit" != "GB" ]; then Exit UNKNOWN "Unknown disk space size $freeunit\n" fi if (( $(bc <<< "${free} < ${freegb}") == 1 )); then freegb=$free fi fi ;; Online) online=${field[@]:2} if [ "${online}" = "Y" ]; then let $((bricksfound++)) else errors=("${errors[@]}" "$brick offline") fi ;; esac done < <( sudo gluster volume status ${VOLUME} detail) if [ $bricksfound -eq 0 ]; then Exit CRITICAL "No bricks found\n" elif [ $bricksfound -lt $BRICKS ]; then errors=("${errors[@]}" "found $bricksfound bricks, expected $BRICKS\n") fi if [ -n "$CRIT" -a -n "$WARN" ]; then if (( $(bc <<< "${CRIT} > ${WARN}") == 1 )); then Exit UNKNOWN "Critical threshold below warning\n" elif (( $(bc <<< "${freegb} < ${CRIT}") == 1 )); then Exit CRITICAL "Free space ${freegb}GB\n" elif (( $(bc <<< "${freegb} < ${WARN}") == 1 )); then errors=("${errors[@]}" "Free space ${freegb}GB\n") fi fi # exit with warning if errors if [ -n "$errors" ]; then sep='; ' msg=$(printf "${sep}%s" "${errors[@]}") msg=${msg:${#sep}} Exit WARNING "${msg}" fi # exit with no errors Exit OK "${bricksfound} bricks; free space ${freegb}GB\n"