#!/bin/bash
# vim: cindent:shiftwidth=4:tabstop=4:smarttab:textwidth=100

set -o posix
set -o errexit
set -o pipefail
set -o nounset
#set -o xtrace

#$title$ physical network interface errors
#$check$ error counters (RX_ERR RX_DRP RX_OVR TX_ERR TX_DRP TX_OVR) are not incrementing
#$ref$ KB:InterfaceBonding, /proc/net/dev
#$author$ Rafal Rzeczkowski
#$version$ 0.8.1

level_check short

#CHANGELOG
#0.5.0	initial
#0.5.1	check only PHY and bonding interfaces
#0.6.0	re-factored design for data collection and display
#0.6.1	save current state right after loading the old one
#0.6.2	metadata headers, help message
#0.7.0	refactored to use check_error_counter_list()
#0.7.1	consider only physical Ethernet interfaces (not bondX)
#0.7.2	ignore RX_DRP counter on backup bond member (Ubuntu Bug #1041070)
#0.7.3	ignore RX_DRP counter on interfaces not in bond
#0.7.4	more robust logic for 0.73
#0.7.5	restyle according to https://kb.clearcable.ca/KB/ProgrammingStyleStandards
#0.7.6	use /proc/net/dev as the data source (Debian 10+ compatibility)
#0.7.7	enumerate interfaces based on their (PCI) bus connectivity, not eth* name
#0.7.8	link TX_OVR stat to Transmit_fifo not Transmit_drop
#0.8.0	blacklist Intel 82576
#0.8.1	improve robustness of udevadm parser

#$ cat /proc/net/dev
#Inter-|   Receive                                                |  Transmit
# face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed
#xenbr22: 10837347   84419    0    0    0     0          0         0  2675335   14100    0    0    0     0       0          0
#vlan22: 10908892   84167    0    0    0     0          0     62159  3881263   27300    0    0    0     0       0          0
#  eth3:       0       0    0    0    0     0          0         0        0       0    0    0    0     0       0          0
#vif1.1:       0       0    0    0    0     0          0         0    28806     415    0    0    0     0       0          0
#  eth0: 16279866  146154    0 1918    0     0          0    120989  3963357   28097    0    0    0     0       0          0
#xenbr23: 1597218   29137    0    0    0     0          0         0    33616     312    0    0    0     0       0          0
#  eth2:       0       0    0    0    0     0          0         0        0       0    0    0    0     0       0          0
#vlan23: 1603712   29167    0    0    0     0          0     29109    33616     312    0    0    0     0       0          0
#vif1.0: 1022230   13209    0    0    0     0          0         0  7278644   67846    0    0    0     0       0          0
#  eth1:       0       0    0    0    0     0          0         0        0       0    0    0    0     0       0          0
#    lo:    2832      14    0    0    0     0          0         0     2832      14    0    0    0     0       0          0

pci_nics_all=$(find /sys/class/net -type l -lname '*/devices/pci*' -printf '%f\n' | sort)

declare -a pci_nics
for iface in $pci_nics_all; do
	eval $(udevadm info "/sys/class/net/$iface" | awk '
BEGIN { FS="E:[[:space:]]" }
{ if (index($2,"ID_")==1) {
	sub("=","=\"",$2)
	sub("$","\"",$2)
	print $2
}}')

	# process blacklist
	echodebug "$iface $ID_VENDOR_ID/$ID_MODEL_ID"
	case "$ID_VENDOR_ID/$ID_MODEL_ID" in
		'0x8086/0x10c9') continue;;
		*) pci_nics+=($iface);;
	esac
done

pci_nics_flat=$(printf '%s:' ${pci_nics[@]})

STAT_FILE='/proc/net/dev'
OBJECT_DESC='physical network interface'
FIELD_LIST=(RX_ERR RX_DRP RX_OVR TX_ERR TX_DRP TX_OVR)
STATE_CUR=$(awk --assign PCI_NICS="$pci_nics_flat" '
BEGIN {
	FS=":[[:space:]]+"
	split(PCI_NICS,pci_nics,":")
	for (i in pci_nics)
		is_pci_nic[pci_nics[i]]=1
}
{
	if (NR<=2)
		next
	Interface=$1
	sub("^[[:space:]]+","",Interface)
	if (is_pci_nic[Interface]) {
		split($2, field, "[[:space:]]+")
		Receive_bytes=field[1]
		Receive_packets=field[2]
		Receive_errs=field[3]
		Receive_drop=field[4]
		Receive_fifo=field[5]
		Receive_frame=field[6]
		Receive_compressed=field[7]
		Receive_multicast=field[8]

		Transmit_bytes=field[9]
		Transmit_packets=field[10]
		Transmit_errs=field[11]
		Transmit_drop=field[12]
		Transmit_fifo=field[13]
		Transmit_colls=field[14]
		Transmit_carrier=field[15]
		Transmit_compressed=field[16]

		# Receive_drop ignored due to:
		# https://www.novell.com/support/kb/doc.php?id=7007165
		# Now, the rx_dropped counter shows statistics for dropped frames because of:
		# Bad / Unintended VLAN tags
		Receive_drop="U"
		
		print Interface"\t"Receive_errs"\t"Receive_drop"\t"Receive_fifo"\t"Transmit_errs"\t"Transmit_drop"\t"Transmit_fifo
	}
}' $STAT_FILE)
HELPMSG='verify {PHY and low level MAC settings}; replace cable; change switch port'

echodebug "$STAT_FILE:"
IFS=$'\t'
echodebug "dev${IFS}${FIELD_LIST[*]}"
echodebug "$STATE_CUR"

check_error_counter_list
