#!/bin/bash
# FN: pgSnapBack3.generic
# AB: creates and mounts snapshot of $PGDATA;
#		rsyncs to backup host;
#		unmounts and removes snapshot;
#		reinitializes database on backup server
# DT: 2003-03-05
# AU: S. Murthy Kambhampaty


PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
# What is this script called and where is it running:
ThisCMD=$(echo -e "$(basename $0)" | awk 'BEGIN{FS="."}{print $1}')
pgHost=$(echo -e "$(hostname)" | awk 'BEGIN{FS="."}{print $1}')
ThisSession=$$

# The postgresql superuser
# (if you want to mail error logs somewhere else, either
# set your mail server to forward pgSuper's mail, or add a pgMonitor
# and change the two sendmail command lines below:
pgSuper=postgres
BackupHost=bkhost
DataFS=/home/db
pgData=/home/db/pgsql/pgdata

RsyncDM="mirrors_$pgHost" # What is the rsync module for this backup called
#
# As long as you have installed postgresql in
# "/usr/bin/pgsql-M.m/, where M.m=$(echo -n $(cat $PGDATA/PG_VERSION)),
# on both the production server and the backup server,
# you shouldn;t have to edit beyond this point
#


# If $pgData is a symlink, redefine pgData
[[ -L "$pgData" ]] && pgData=$(ls -l "$pgData" | awk '{FS=" "}{print $NF}')
pgMinor=$(echo -n "$(cat "$pgData/PG_VERSION")")

# A function to execute commands on the backup server:
# 	Requires passwordless logins to $pgSuper@$BackupHost using SSH
#	See, for example,
# 	http://www.onlamp.com/pub/a/bsd/2002/11/14/FreeBSD_Basics.html
RMTExec () {
	ssh -i "/home/$pgSuper/.ssh/id_rsa" $pgSuper@$BackupHost "$1"
}


# A function to do the rsync
# 	/etc/rsync.d on the backup server must be configured
#	A module named mirrors_$pgHost must point to a directory
#	on the backup server. Within that folder a subdirectory
# 	named pgdata must exist, and will contain the backup
#	copy of the database
RsyncLockFile="/home/$pgSuper/$ThisCMD.lock"
# rsync options:
# The -B option to increase block size on large files:
# 1. Reduces the chance of changes being missed
# 2. Uses more bandwidth
# If you are really paranoid about missing changes, or if most
# tables have very little common data from backup to backup, add the
# -W option to rsync whole files, rather than changes only.
# NOTE: give the -n option to rsync until sure that it
#	is configured properly. rsync is not forgiviging.
RsyncCMD () {
	rsync -avW --stats \
	 --delete --force --exclude='postmaster.pid' \
	$1/ \
	$BackupHost::$RsyncDM/pgdata/$2
	# If you don't give a second arguement, $2 is null;
	# so it is optional
}
# When the postmaster is started on the backup server,
#	the data directory is:
pgDataBK="$(RMTExec "cat /etc/rsyncd.conf" | sed -n /$RsyncDM/,/^$/p | grep path | awk 'BEGIN{FS=" = "}{print $2}')""/pgdata"


# A function to print a formatted timestamp in the log
DateCMD () {
	# Two trailing spaces are intentional
	echo -n "$(date +%Y-%m-%d) $(date +%X)  "
	return 0
}


# A function to call when exiting this script with errors
ExitOnError () {
	# This function takes two arguments an "exit code" and and "error message"
	# in order
	rm -f "$RsyncLockFile"
	RMTExec "rm -f ~/RsyncLockFile"
	lvremove -f "$DataVG/$SnapNM"
	lvremove -f "$DataVG/$xlSnapNM" # &> /dev/null
	find /mnt/ -maxdepth 1 -type d -name "*snap_$ThisSession" -exec rm -rf {} \;
	echo $'\n\n'"$(DateCMD) $2"$'\n\t'\
		"Notified superuser"\
		| tee -a "$STASFile" >&2
	exec  >&6 6>&-
	exec 2>&7 7>&-
	cat <(echo "Subject: ATTN: $2"\
			$'\n'"X-Priority: 2 (high)"\
			$'\n'"!"\
			$'\n\t'"Review $(echo -n "$LogFile"), the command log"\
			$'\n\t\t'"Error report listed below:" $'\n')\
		"$LogFile.err"\
		| sendmail "$pgSuper"
	exit $1
	return $1 # Superfluous: this function does not return
}


# A function for LVM snapshot creation and mounting
#	There must exist a directory named /mnt/$SnapNM
#	to which the snapshot is mounted
DataLV="$(mount | grep "/home/db\>[^/]" | awk '{print $1}')"
DataVG="$(dirname "$DataLV")" # Please keep the xlogLV and DataLV in the same VG
SnapNM="snap_$ThisSession"
xlogLV="$(mount | grep "$pgData/pg_xlog\>[^/]" | awk 'BEGIN{FS=" "}{print $1}')"
[[ -n "$xlogLV" ]] && { xlSnapMB=512; xlSnapNM="xlsnap_$ThisSession"; }
DoSnap () {
	AvailMB="$(echo -n $(vgdisplay --colon "$DataVG") | awk 'BEGIN{FS=":"}{print $16 *$13/2048}')" # 2048 512-byte blocks per MB
	if [[ -n "$(lvscan | grep Snapshot | grep "$DataLV$")" || \
		-n "$(lvscan | grep Snapshot | grep "$xlogLV$")" ]]; then
		echo $'\n\t'"FATAL: Snapshot of $DataLV or $xlogLV already exists." >&2
		lvscan | grep Snapshot
		echo $'\n\t'"Terminating ..." >&2
		SnapTest=32 # lvcreate error: "32 snapshot already exists"; sort of ...
	elif ((-xlSnapMB +AvailMB <=0)); then # Variables need not be preceeded by '$' in arithmetic expansion;
			# the above syntax does not work if you test ((-$xlSnapMB +$AvailMB)), so you'll
			# have to put up with the confusion over referencing variables without the preceding
			# $-sign sometimes
		echo $'\n\t'"FATAL: No space left in $DataVG for snapshot creation. Terminating ..." >&2
		SnapTest=19 # lvcreate error: "19 not enoungh space available to create logical volume"
	else
		# Use all the free space available in $DataVG for the snapshot!
		[[ -n "$xlogLV" ]] && xfs_freeze -f "$pgData/pg_xlog"
		xfs_freeze -f "$DataFS"
		[[ -n "$xlogLV" ]] && { lvcreate -s -L "$xlSnapMB"M -n "$xlSnapNM" "$xlogLV"; xlSnapTest=$?; }
		lvcreate -s -L "$((-xlSnapMB +AvailMB))"M -n "$SnapNM" "$DataLV"
		SnapTest=$?
		xfs_freeze -u "$DataFS"
		[[ -n "$xlogLV" ]] && xfs_freeze -u "$pgData/pg_xlog"
	fi
	# If snapshot creation failed bail; else mount the snapshot volume
	if ((+$xlSnapTest +$SnapTest >0)); then
		ExitOnError 991$SnapTextXL$SnapTest "pgBackup: Snapshot creation failed"
	else
		echo $'\n'"$(DateCMD) Successfully created snapshot(s):"
		lvscan | grep "snap_$ThisSession" # Report snapshot volumes list
		# If /mnt/ subfolders named "*snap_$ThisSession" exist,
		# delete them
		for SDir in $(find /mnt/ -maxdepth 1 -type d -name "*snap_$ThisSession"); do
			rm -rf $Sdir
		done
		mkdir "/mnt/$SnapNM"
		if [[ -n "$xlogLV" ]]; then
			mkdir "/mnt/$xlSnapNM"
			mount -t xfs "$DataVG/$xlSnapNM" "/mnt/$xlSnapNM" -o nouuid,ro
			xlMountTest=$?
		fi
		LogDev="$(cat "/etc/mtab" | grep "$DataFS\>[^/]" | awk 'BEGIN{FS="logdev="}{print $2}' | awk 'BEGIN{FS=","}{print $1}')"
		if [[ -n "$LogDev" ]]; then
				mount -t xfs "$DataVG/$SnapNM" "/mnt/$SnapNM" -o nouuid,ro,logdev="$LogDev"
				MountTest=$?
		else
				mount -t xfs "$DataVG/$SnapNM" "/mnt/$SnapNM" -o nouuid,ro
				MountTest=$?
		fi
	fi
	# If snapshot mount failed, then bail;
	# else print mount report and return
	if ((+xlMountTest +MountTest >0)); then
		if [[ -n "$xlogLV" ]]; then
			umount "$DataVG/$xlSnapNM" &> /dev/null
			lvremove -f "$DataVG/$xlSnapNM"
		fi
			umount "$DataVG/$SnapNM" &> /dev/null
			lvremove -f "$DataVG/$SnapNM"
			ExitOnError 992 "pgBackup; Snapshot mount failed"
	else
		echo $'\n'"$(DateCMD) Successfully mounted snapshot(s):"
		mount | grep "snap_$ThisSession" # Report the mounts
	fi
	return 0 # Only returns on success
}


# Setup logging, and get going:
STASFile="$pgData/pgSnapBack.status"
LogFile="/home/$pgSuper/$ThisCMD-$(date +%Y%m%d-%H%M)"
exec 6>&1; exec  > "$LogFile"
exec 7>&2; exec 2> "$LogFile.err"
echo "$(DateCMD) Backup initiated"\
	| tee "$STASFile"


# Phase 1 rsync
# If the previous backup is still running, wait a bit:
while [[ -f "$RsyncLockFile" ]]; do
		echo "$(DateCMD) previous Rsync still running"$'\n'\
		"Waiting for completion" >&2
		sleep 300
done
# and now that it's not, proceed
echo $'\n\n'\
	"$(DateCMD) Phase 1 - rsync from pgdata to mirror on $BackupHost"\
	| tee -a "$STASFile"
# Before you start the backup, make sure that no postmaster is running on the backup folder:
# 	(test cribbed from the pg_ctl script)
while [[ -n $(RMTExec "sed -n 1p $pgDataBK/postmaster.pid 2>/dev/null") ]]; do
		echo "$(DateCMD) A postmaster is running at the remote \
			location; waiting 5 minutes ..."
		sleep 300
done
# You don't want to start simultaneous rsyncs, and you don't want to
# start the postmaster on the "reflection" while an rsync is running;
# so put down some lock files # (there was something on linux-xfs
# about xfs' not recovering inodes allocated to empty files that
# are deleted, so don't just "touch" a lockfile, put something in it).
echo "$(DateCMD) Phase 1 - rsync" > "$RsyncLockFile"
tail -n1 "$RsyncLockFile" | RMTExec "cat - > ~/RsyncLockFile"
RsyncCMD "$pgData/" &&
	echo $'\n'"$(DateCMD) Rsync complete"$'\n\n'

# Now create and mount the snapshot volume
echo $'\n\n'"$(DateCMD) Snapshot creation and mounting"\
	| tee -a "$STASFile"
DoSnap


# Phase 2 rsync
echo $'\n\n'"$(DateCMD) Phase 2 - rsync from snapshot to mirror on $BackupHost"
echo "$(DateCMD) Phase 2 rsync" > "$RsyncLockFile"
tail -n1 "$RsyncLockFile" | RMTExec "cat - > ~/RsyncLockFile"
( if [[ -n "$xlogLV" ]]; then
	RsyncCMD "--exclude=pg_xlog/ $(echo $pgData | sed -e "s|$DataFS|/mnt/$SnapNM|")/" &&
	RsyncCMD "/mnt/$xlSnapNM/" "pg_xlog/"
else
	RsyncCMD "$(echo $pgData | sed -e "s|$DataFS|/mnt/$SnapNM|")/"
fi ) &&
	echo $'\n\t'"$(DateCMD) Phase 2 rsync complete"$'\n'\
	| tee -a "$STASFile"


# Now remove the snapshot
rm -f "$RsyncLockFile"
RMTExec "rm -f ~/RsyncLockFile"
if [[ -n "$xlogLV" ]]; then
	umount "$DataVG/$xlSnapNM"
	lvremove -f "$DataVG/$xlSnapNM"
fi
umount "$DataVG/$SnapNM"
lvremove -f "$DataVG/$SnapNM"
find /mnt/ -maxdepth 1 -type d -name "*snap_$ThisSession" -exec rm -rf {} \;


# "Reinitialize" the snapshot of the database on the backup server
echo $'\n\n'"$(DateCMD) Start postmaster on $BackupHost, capture output, and shutdown"
RMTExec "/usr/local/pgsql-$pgMinor/bin/postmaster -B4096 -p55432\
	-D $pgDataBK 2>&1" &
sleep 300 # give the postmaster time for recovery
RMTExec "/usr/local/pgsql-$pgMinor/bin/pg_ctl stop -D $pgDataBK -m immediate" &&
	echo $'\n\n'"$(DateCMD) Attempted to reinitialize backup; check for errors above"


# Mop up
if [[ ! -s "$LogFile.err" ]]; then
	echo "Completed, no errors" > "$LogFile.err"
	rm -f "$LogFile.err"
	echo $'\n\n'"$(DateCMD) Backup completed."\
		| tee -a "$STASFile"
	echo "$(DateCMD) Verify postmaster-messages, listed above, for success."
	exec  >&6 6>&-
	exec 2>&7 7>&-
	exit 0
else
	ExitOnError 999 "pgBackup; Backup encountered errors"
fi
true # Superflous
