#! /bin/bash
# vim: cindent:shiftwidth=4:tabstop=4:smarttab:textwidth=100

# MySQL database backup
# dump all databases to .sql files
# compress them and create differentials

# [restore procedure]
# lzop -d roundcube.2012-07-19.sql.lzop
# lz4 -d roundcube.2012-07-19.sql.lz4
# xdelta3 -d roundcube.2012-07-20.sql.vcdiff
# mysql roundcube < roundcube.2012-07-20.sql

set -o errexit
set -o nounset
set -o pipefail
test ${SUDO_USER+xxx} && set -o xtrace

#CHANGELOG
#0.5.0	initial
#0.5.1	cleanup using xargs
#0.5.2	only backup schema for selected (cache) databases
#0.5.3	by default add table locks for faster restores
#0.5.4	use 'BEGIN SQL' for InnoDB roundcube database (concurrent access)
#0.6.0	differential backups support
#0.6.1	differential backups bugfixes
#0.6.2	differential backups QA testing
#0.6.3	reduced default MAX_FULL to 2
#0.6.4	dump ARCHIVE tables separately
#0.6.5	run xdelta3 and their decompression pipelines under nice/ionice
#0.6.6	exclude information_schema and lost+found databases
#0.6.7	properly clear ARCHIVE_ table list for each database
#0.6.8	SOE4.2: initialize new arrays via zero-member assignment
#0.6.9	SOE4.2: exclude 'performance_schema' internal table
#0.7.0	restyle according to https://kb.clearcable.ca/KB/ProgrammingStyleStandards
#0.7.1	exclude radius data from backups
#0.7.2	cleanup leftover files in the DST_DIR backup directory
#0.7.3	exclude observium/eventlog table
#0.8.0	select LZ4 compression algorithm if available

# restrict permissions to protect data
umask 'u=r,g=r,o='

# compression algorithm selection
if [[ -x '/usr/bin/lz4' ]]; then
	declare -r COMPRESSION='lz4'
elif [[ -x '/usr/bin/lzop' ]]; then
	declare -r COMPRESSION='lzop'
else
	echo 'no supported compression programs found'
	exit 1
fi

NICE='nice ionice -c3'
DATE=$(date --rfc-3339=date)
MAX_FULL=2	#number of full backups to keep

if [[ -f /etc/default/mysql ]]; then
    . /etc/default/mysql
fi

TMP_DIR=$(mktemp -d)
DST_DIR="${BACKUP_DIR:-/var/cache/mysql}"
cd $DST_DIR

DBs=($(mysql --batch --skip-column-names --execute='SHOW DATABASES'))
for DB_NAME in ${DBs[*]}; do
	#options for specific databases
	OPTIONS='--add-locks'	# default
	case $DB_NAME in
		spamassassin_bayes) OPTIONS='--no-data';;
		radius) OPTIONS='--no-data';;
		roundcube) OPTIONS="$OPTIONS --single-transaction";;
		information_schema) continue;; # read-only table views
		performance_schema) continue;; # server execution monitoring
		*lost+found) continue;;	# ext2 pre-allocation container
	esac

	# list of full backups
	FULLs=($(ls -1 --sort=time $DB_NAME.*.$COMPRESSION || true))

	IFS_ORIG="$IFS"
	IFS=$'\n'
	TABLEs=($(mysql --batch --skip-column-names --execute="SELECT TABLE_NAME, ENGINE FROM information_schema.TABLES where TABLE_SCHEMA = \"$DB_NAME\";"))

	ARCHIVE_EXCLUDEs=()
	ARCHIVE_INCLUDEs=()
	for (( i=0; $i<${#TABLEs[*]}; $((i++)) ))
	do
		IFS=$'\t' TABLE_DATA=(${TABLEs[$i]})
		TBL_NAME=${TABLE_DATA[0]}
		TBL_ENGINE=${TABLE_DATA[1]}
		if [[ $TBL_ENGINE = 'ARCHIVE' ]]; then
			ARCHIVE_EXCLUDEs+=("--ignore-table=$DB_NAME.$TBL_NAME")
			ARCHIVE_INCLUDEs+=($TBL_NAME)
		fi
		if [[ $DB_NAME = 'observium' && $TBL_NAME = 'eventlog' ]]; then
			ARCHIVE_EXCLUDEs+=("--ignore-table=$DB_NAME.$TBL_NAME")
		fi
	done
	IFS="$IFS_ORIG"
	if [[ ${#ARCHIVE_EXCLUDEs[*]} -gt 0 ]]; then
		TABLE_EXCLUDE="${ARCHIVE_EXCLUDEs[*]}"
	else
		TABLE_EXCLUDE=""
	fi

	DST_FILE="$DB_NAME.$DATE.sql"
	$NICE mysqldump $OPTIONS $TABLE_EXCLUDE $DB_NAME | $NICE $COMPRESSION > "$DST_FILE".tmp
	if [[ ${#ARCHIVE_INCLUDEs[*]} -gt 0 ]]; then
		$NICE mysqldump $OPTIONS $DB_NAME ${ARCHIVE_INCLUDEs[*]} | $NICE $COMPRESSION >> "$DST_FILE".tmp
	fi
	mv --force "$DST_FILE".tmp "$DST_FILE".$COMPRESSION

	day_of_week=$(date +%w) #(0..6); 0 is Sunday
	if [[ $day_of_week -gt 0 && ${#FULLs[*]} -gt 0 ]]; then
		# convert to differential
		# on days other than Sunday
		LAST_FULL=${FULLs[0]}
		XS=$TMP_DIR/${LAST_FULL%.*}
		XD=$TMP_DIR/$DST_FILE
		mkfifo $XS $XD
		$NICE $COMPRESSION --decompress < $LAST_FULL > $XS &
		$NICE $COMPRESSION --decompress < $DST_FILE.$COMPRESSION > $XD &
		$NICE xdelta3 -s $XS $XD > $DST_FILE.vcdiff
		rm $DST_FILE.$COMPRESSION
	fi

	# cleanup old full backups
	i=$MAX_FULL
	while [[ $i -lt ${#FULLs[*]} ]]; do
		rm ${FULLs[$i]}
		i=$[$i+1]
	done
	# rescan the list of full backups
	# and cleanup old partial backups older than the oldest full backup
	FULLs=($(ls -1 --sort=time $DB_NAME.*.$COMPRESSION))
	find . -xdev -maxdepth 1 -type f -name '*.vcdiff' -not -newer ${FULLs[$[${#FULLs[*]}-1]]} -print0 |
		xargs --null --no-run-if-empty rm --
done

# cleanup old backups (deleted databases, etc.)
find $DST_DIR -xdev -maxdepth 1 -type f -mtime +$((MAX_FULL*7*2)) -print0 |
	xargs --null --no-run-if-empty rm --verbose --

rm --recursive $TMP_DIR
