#!/usr/bin/bash
# vim: cindent:shiftwidth=4:tabstop=4:smarttab:textwidth=100

set -o posix
set -o errexit
set -o pipefail
set -o nounset
#set -o xtrace

shopt -s nullglob

#$title$ system and service manager
#$check$ overall system state
#$ref$ systemd(1)
#$author$ Rafal Rzeczkowski
#$version$ 0.6.7

level_check short settling

#CHANGELOG
#0.5.0	initial
#0.5.1	output failed counters under debug only
#0.6.0	detect broken symbolic links in /etc/systemd
#0.6.1	avoid running right after boot
#0.6.2	list failed services when SystemState=degraded
#0.6.3	detect local customizations to service definitions
#0.6.4	scan for defined timers that are not scheduled
#0.6.5	filter out timers associated with services in activating state
#0.6.6	filter out local customizations marked as permanent
#0.6.7	filter out all timers with on_boot designation

declare -r -a PROBLEMS=(STATE_DEGRADED STATE_INVALID BROKEN_SYMLINK OVERRIDE INVALID_TIMERS)
test -n ${#PROBLEMS[@]}

declare -r SYSTEMD_CONFIG_DIR='/etc/systemd'

eval $(
systemctl show --no-pager |
	awk '{
		field1_length=index($0,"=")-1
		print substr($0,1,field1_length) "=\"" substr($0,field1_length+2) "\""
	}'
)

echodebug "systemd Version $Version"

# https://www.freedesktop.org/software/systemd/man/latest/org.freedesktop.systemd1.html
# SystemState contains the current state of the system manager. The possible values are:
# "initializing"	The system is booting, and basic.target has not been reached yet.
# "starting"	The system is booting, and basic.target has been reached.
# "running"		The system has finished booting, and no units are in the failed state.
# "degraded"	The system has finished booting, but some units are in the failed state.
# "maintenance"	The system has finished booting, but it has been put in rescue or maintenance mode.
# "stopping"	The system is shutting down.
if [[ "$SystemState" = 'running' ]]; then
	echodebug "SystemState $SystemState"
elif [[ "$SystemState" = 'degraded' ]]; then
	mapfile -t failed_units < <(systemctl --no-pager list-units --state=failed | awk '{if ($1=="●"){print $2}}')
	fail warning "SystemState $SystemState: ${failed_units[*]}"
	problem STATE_DEGRADED $(printf '%s;' ${failed_units[@]})
	exit
else
	fail warning "SystemState $SystemState"
	problem STATE_INVALID $SystemState
	exit
fi

echodebug "NFailedUnits $NFailedUnits"
echodebug "NFailedJobs $NFailedJobs"

mapfile -d $'\0' -t broken_symlinks < <(find $SYSTEMD_CONFIG_DIR -xtype l -print0)
if [[ "${#broken_symlinks[@]}" -gt 0 ]]; then
	symlink_plural=$(plural_text ${#broken_symlinks[@]} link)
	fail caution "${#broken_symlinks[@]} broken symbolic $symlink_plural in $SYSTEMD_CONFIG_DIR"
	problem BROKEN_SYMLINK "$(printf '%s;' "${broken_symlinks[@]}")"
	exit
fi

pushd '/etc/systemd/system' >/dev/null
suspicious_local_customizations=(*.d)
declare -a local_customizations=()
for drop_in_directory in ${suspicious_local_customizations[@]}; do
	group_name_of_owner=$(stat --printf='%G' "$drop_in_directory")
	if [[ $group_name_of_owner != 'adm' ]]; then
		local_customizations+=($drop_in_directory)
	fi
done
if [[ ${#local_customizations[@]} -gt 0 ]]; then
	fail caution "local customizations: ${local_customizations[*]}"
	problem OVERRIDE "$(printf '%s;' "${local_customizations[@]}")"
	exit
fi
popd >/dev/null

mapfile -t suspicious_timers < <(systemctl --no-pager --full list-timers |
	awk '{
		if ($1 == "-" || $2 == "-")
			if (substr($NF,length($NF)-15) != "-on_boot.service")
				print $NF
	}')
declare -a invalid_timers=()
for service_name in ${suspicious_timers[@]}; do
	is_active=$(systemctl is-active $service_name)
	if [[ $is_active != 'activating' ]]; then
		invalid_timers+=(${service_name%.service})
	fi
done
if [[ ${#invalid_timers[@]} -gt 0 ]]; then
	fail caution "invalid timers: ${invalid_timers[*]}"
	problem INVALID_TIMERS "$(printf '%s;' "${invalid_timers[@]}")"
	exit
fi

ok
