#!/bin/bash
# vim: cindent:shiftwidth=4:tabstop=4:smarttab:textwidth=100

set -o errexit
set -o pipefail
set -o nounset
#set -o xtrace

#$title$ PHY network interface bonding configuration
#$check$ sufficient number of active bond members, primary/secondary reversion, LACP partner communication, logical interface status
#$ref$ KB:InterfaceBonding, /proc/net/bonding
#$author$ Rafal Rzeczkowski
#$version$ 0.7.0

level_check short

#CHANGELOG
#0.50	initial based on nic-phy
#0.51	active-backup mode support
#0.52	LACP mode support
#0.53	do not run when there is no bonding support
#0.54	remove the temporary STATUS file
#0.55	BUG: set status as "warning" not "warn"
#0.56	awk code cleanup
#0.57	also check PHY slave status using 'ip link'
#0.58	explain how to change active slave interface
#0.59	use UNKNOWN exit code when no bonding support
#0.60	metadata headers, help messages
#0.61	restyle according to https://kb.clearcable.ca/KB/ProgrammingStyleStandards
#0.62	publish the reason for setting unknown status
#0.7.0	run on short schedule

declare -r BONDING_STATUS_DIR='/proc/net/bonding'
if [[ -d $BONDING_STATUS_DIR ]]; then
	cd $BONDING_STATUS_DIR
else
	unknown "missing $BONDING_STATUS_DIR - no bonds defined"
	exit
fi

for BOND in *; do
	STATUS_FILE=$(mktemp)
	if ! awk 'BEGIN { FS = ": " } {
	if ($1 == "Slave Interface") {
		interface=$2
	} else if ($1 == "MII Status") {
		if ($2 != "up" ) {
			print $0
			exit 1
		}
	}
	}' $BOND > $STATUS_FILE
	then
		STATUS=$(<$STATUS_FILE)
		rm $STATUS_FILE
		fail warning "$BOND: $STATUS"
		helpmsg 'ensure that all cables are connected and switch ports are configured'
		exit
	fi
	rm $STATUS_FILE

	BONDING_MODE=$(
	awk 'BEGIN { FS = ": " } {
	if ($1 == "Bonding Mode")
		print $2
	}' $BOND)

	declare -a INTERFACES
	INTERFACES=($(
	awk 'BEGIN { FS = ": " } {
	if ($1 == "Slave Interface")
		print $2
	}' $BOND))
	INT_COUNT_CUR=${#INTERFACES[*]}

	case $BONDING_MODE in
	'fault-tolerance (active-backup)')
		MODE='active-backup'

		INT_COUNT_EXP=2
		if [[ $INT_COUNT_CUR -ne $INT_COUNT_EXP ]]; then
			fail warning "$BOND: $MODE with $INT_COUNT_CUR links, expected $INT_COUNT_EXP"
			exit
		fi

		ACTIVE_EXP=${INTERFACES[0]}
		ACTIVE_CUR=$(
		awk 'BEGIN { FS = ": " } {
			if ($1 == "Currently Active Slave")
				print $2
			}' $BOND)
		if [[ $ACTIVE_EXP != $ACTIVE_CUR ]]; then
			fail warning "$BOND: $MODE primary interface is $ACTIVE_CUR, expected $ACTIVE_EXP"
			helpmsg "use {ifenslave --change-active $BOND $ACTIVE_EXP} to change"
			exit
		fi
		echodebug "$BOND $MODE OK for [${INTERFACES[*]}]"
		;;

	'IEEE 802.3ad Dynamic link aggregation')
		MODE='LACP'

		PORT_COUNT=$(
		awk 'BEGIN { FS = ": " } {
			if ($1 ~ "Number of ports")
				print $2
			}' $BOND)

		PORT_COUNT_MIN=2
		if [[ $PORT_COUNT -lt $PORT_COUNT_MIN ]]; then
			fail warning "$BOND: $MODE PORT_COUNT is $PORT_COUNT, expected at least $PORT_COUNT_MIN"
			helpmsg 'check local LACP configuration and check PHY'
			exit
		fi

		MAC_ADDRESS=$(
		awk 'BEGIN { FS = ": " } {
			if ($1 ~ "Partner Mac Address") {
				gsub(":","",$2)
				print $2
			}}' $BOND)
		MAC_ADDRESS_DEC=$[ 0x$MAC_ADDRESS ]
		if [[ $MAC_ADDRESS_DEC -eq 0 ]]; then
			fail warning "$BOND: $MODE has no link partner (remote MAC=0)"
			helpmsg 'check LACP configuration on the switch'
			exit
		fi
		echodebug "$BOND $MODE OK for [${INTERFACES[*]}] - link partner $MAC_ADDRESS"
		;;

	*)
		fail warning "$BOND: unknown BONDING_MODE $BONDING_MODE"
		helpmsg 'consult plugin maintainer'
		exit
		;;
	esac

	# explicit interface link status check : problems may not show up in /proc/net/bonding/* status files
	for INTERFACE in ${INTERFACES[*]}; do
		INTERFACE_STATUS_GOOD='BROADCAST,MULTICAST,SLAVE,UP,LOWER_UP'
		INTERFACE_STATUS=$(ip -oneline link show dev $INTERFACE|awk -F'<|>' '{print $2}')
		if [[ $INTERFACE_STATUS != $INTERFACE_STATUS_GOOD ]]; then
			fail warning "$BOND: slave device $INTERFACE has bad status: {$INTERFACE_STATUS} expected {$INTERFACE_STATUS_GOOD}"
			helpmsg 'was the interface state altererd manually outside of bonding control?'
			exit
		fi
	done
done
ok
