#!/bin/bash
# pg-zfs-snapshot.sh - PostgreSQL-aware ZFS snapshot script for standby servers
# Uses zfs-auto-snapshot naming conventions for consistency

set -euo pipefail

# Configuration
PG_VERSION="17"
PG_DATA="/var/lib/postgresql/${PG_VERSION}/main"
PG_USER="postgres"
POOL="zfspool"
DATASETS=("zfspool/encrypted/pgdata" "zfspool/encrypted/postgres-wal")
SNAPSHOT_LABEL_DIR="/etc/postgres/snapshot_labels"  # Backup labels for primary
CONF=/etc/pg-zfs-snapshot.conf

# Snapshot retention defaults (can be overridden)
declare -A KEEP_DEFAULTS=(
    ["frequent"]=4
    ["hourly"]=24
    ["daily"]=31
    ["weekly"]=8
    ["monthly"]=12
    ["quarterly"]=28  # 7 years * 4 quarters
    ["yearly"]=3
)
if [ -f "$CONF" ]
then
    source "$CONF"
fi
# Check if running from cron (no TTY)
if [ -t 1 ]; then
    INTERACTIVE="true"
else
    INTERACTIVE="false"
fi

# Colors for output (only in interactive mode)
if [[ "$INTERACTIVE" == "true" ]]; then
    RED='\033[0;31m'
    GREEN='\033[0;32m'
    YELLOW='\033[1;33m'
    NC='\033[0m'
else
    RED=''
    GREEN=''
    YELLOW=''
    NC=''
fi

# Functions
usage() {
    cat << EOF
Usage: $0 -l LABEL [-k KEEP] [-n] [-v] [-q] [-h]

PostgreSQL-aware ZFS snapshot for primary and standby servers

Options:
    -l LABEL    Snapshot label (frequent|hourly|daily|weekly|monthly|quarterly|yearly)
    -k KEEP     Number of snapshots to keep (default: varies by label)
    -n          Dry run - show what would be done
    -v          Verbose output
    -q          Quiet mode - suppress all output except errors
    -h          Show this help message

Examples:
    $0 -l hourly                    # Create hourly snapshot, keep 24
    $0 -l daily -k 7                # Create daily snapshot, keep 7
    $0 -l monthly -k 6 -v           # Create monthly snapshot, keep 6, verbose

Cron examples (runs quietly by default):
    */15 * * * * /usr/local/bin/pg-zfs-snapshot.sh -l frequent
    0 * * * * /usr/local/bin/pg-zfs-snapshot.sh -l hourly
    0 0 * * * /usr/local/bin/pg-zfs-snapshot.sh -l daily
    0 0 * * 0 /usr/local/bin/pg-zfs-snapshot.sh -l weekly
    0 0 1 * * /usr/local/bin/pg-zfs-snapshot.sh -l monthly
    0 0 1 1 * /usr/local/bin/pg-zfs-snapshot.sh -l yearly

Note: Monthly snapshots on Jan 1, Apr 1, Jul 1, Oct 1 are automatically 
      converted to quarterly snapshots and kept for 7 years.

Primary server backup labels are stored in: $SNAPSHOT_LABEL_DIR
EOF
}

log() {
    # Only output in interactive mode or with verbose flag
    if [[ "$INTERACTIVE" == "true" ]] || [[ "$VERBOSE" == "true" ]]; then
        if [[ "$QUIET" != "true" ]]; then
            echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $*"
        fi
    fi
    syslog "info" "$*"
}

error() {
    # Always output errors to stderr
    echo -e "${RED}[$(date '+%Y-%m-%d %H:%M:%S')] ERROR:${NC} $*" >&2
    syslog "error" "$*"
}

warning() {
    # Only output warnings in interactive mode or with verbose flag
    if [[ "$INTERACTIVE" == "true" ]] || [[ "$VERBOSE" == "true" ]]; then
        if [[ "$QUIET" != "true" ]]; then
            echo -e "${YELLOW}[$(date '+%Y-%m-%d %H:%M:%S')] WARNING:${NC} $*" >&2
        fi
    fi
    syslog "warning" "$*"
}

# Log to syslog for cron runs
syslog() {
    local priority="$1"
    local message="$2"
    logger -t "pg-zfs-snapshot" -p "local0.${priority}" "$message"
}

# Check if today is the start of a quarter
is_quarter_start() {
    local month=$(date +%m)
    local day=$(date +%d)
    
    # First day of Jan, Apr, Jul, Oct
    if [[ "$day" == "01" ]] && [[ "$month" =~ ^(01|04|07|10)$ ]]; then
        return 0
    fi
    return 1
}

# Detect server role (primary or standby)
detect_server_role() {
    local is_standby
    is_standby=$(sudo -u "$PG_USER" psql -tAc "SELECT pg_is_in_recovery();" 2>/dev/null || echo "error")
    
    if [[ "$is_standby" == "error" ]]; then
        error "Could not connect to PostgreSQL"
    exit 1
    elif [[ "$is_standby" == "t" ]]; then
        SERVER_ROLE="standby"
        log "Detected server role: STANDBY"
    else
        SERVER_ROLE="primary"
        log "Detected server role: PRIMARY"
    fi
}

# Primary server: start backup mode
start_primary_backup() {
    local backup_name="$1"
    log "Starting PostgreSQL backup mode..."
    
    BACKUP_START_INFO=$(sudo -u "$PG_USER" psql -tAc "SELECT pg_backup_start('$backup_name', fast => true);" 2>&1)
    
    if [[ $? -ne 0 ]]; then
        error "Failed to start backup mode: $BACKUP_START_INFO"
        return 1
    fi
    
    log "Backup mode started successfully"
    return 0
}

# Primary server: stop backup mode and save label
stop_primary_backup() {
    local snapshot_name="$1"
    log "Stopping PostgreSQL backup mode..."
    
    # Get backup stop info including label file contents
    BACKUP_STOP_INFO=$(sudo -u "$PG_USER" psql -tAc "SELECT labelfile FROM pg_backup_stop();" 2>&1)
    
    if [[ $? -ne 0 ]]; then
        error "Failed to stop backup mode: $BACKUP_STOP_INFO"
        return 1
    fi
    
    # Create label directory if it doesn't exist
    if ! mkdir -p "$SNAPSHOT_LABEL_DIR"; then
        error "Failed to create label directory: $SNAPSHOT_LABEL_DIR"
        return 1
    fi
    
    # Save backup label info
    local label_file="${SNAPSHOT_LABEL_DIR}/${snapshot_name}.label"
    echo "$BACKUP_STOP_INFO" > "$label_file"
    
    if [[ -f "$label_file" ]]; then
        log "Backup mode stopped. Label saved to $label_file"
    else
        error "Failed to save backup label to $label_file"
        return 1
    fi
    
    return 0
}

# Standby server: pause WAL replay
pause_standby_replay() {
    log "Pausing WAL recovery for consistent snapshot..."
    if ! sudo -u "$PG_USER" psql -c "SELECT pg_wal_replay_pause();" >/dev/null 2>&1; then
        error "Failed to pause WAL recovery"
        syslog "error" "Failed to pause WAL recovery"
        return 1
    fi
    
    # Wait for pause to take effect
    local max_wait=30
    local waited=0
    while [[ $waited -lt $max_wait ]]; do
        local is_paused
        is_paused=$(sudo -u "$PG_USER" psql -tAc "SELECT pg_is_wal_replay_paused();" 2>/dev/null)
        if [[ "$is_paused" == "t" ]]; then
            log "WAL recovery paused successfully"
            return 0
        fi
        sleep 1
        ((waited++))
    done
    
    error "WAL recovery did not pause within ${max_wait} seconds"
    syslog "error" "WAL recovery did not pause within ${max_wait} seconds"
    return 1
}

# Standby server: resume WAL replay
resume_standby_replay() {
    log "Resuming WAL recovery..."
    if ! sudo -u "$PG_USER" psql -c "SELECT pg_wal_replay_resume();" >/dev/null 2>&1; then
        warning "Failed to resume WAL recovery - manual intervention may be required"
        syslog "warning" "Failed to resume WAL recovery"
        return 1
    fi
    log "WAL recovery resumed"
}

# Function to extract pool name from dataset path
get_pool_name() {
    local dataset="$1"
    
    # Remove leading slash if present
    dataset="${dataset#/}"
    
    # Extract first component (pool name)
    echo "${dataset%%/*}"
}

# Create snapshots
create_snapshots() {
    local label="$1"
    local effective_label="$label"
    local timestamp
    timestamp=$(date +%Y-%m-%d-%H%M%S)
    
    # Check if this is a monthly snapshot on a quarter start
    if [[ "$label" == "monthly" ]] && is_quarter_start; then
        effective_label="quarterly"
        log "First day of quarter detected - creating quarterly snapshot instead of monthly"
    fi
    
    local snapshot_name="zfs-auto-snap_${effective_label}-${timestamp}"
    
    # Prepare for snapshot based on server role
    if [[ "$SERVER_ROLE" == "primary" ]]; then
        if ! start_primary_backup "$snapshot_name"; then
            return 1
        fi
    else
        if ! pause_standby_replay; then
            return 1
        fi
    fi
    
    # Create atomic snapshots
    local snapshot_failed=0
    for dataset in "${DATASETS[@]}"; do
        local full_snapshot="${dataset}@${snapshot_name}"
        local pool_name=$(get_pool_name "${dataset}")
	zpool sync $pool_name
        if [[ "$DRY_RUN" == "true" ]]; then
            log "[DRY RUN] Would create snapshot: $full_snapshot"
        else
            log "Creating snapshot: $full_snapshot"
            if ! zfs snapshot "$full_snapshot" 2>/dev/null; then
                error "Failed to create snapshot: $full_snapshot"
                snapshot_failed=1
                break
            fi
        fi
    done
    
    # Cleanup based on server role
    if [[ "$SERVER_ROLE" == "primary" ]]; then
        if [[ $snapshot_failed -eq 0 ]] && [[ "$DRY_RUN" != "true" ]]; then
            stop_primary_backup "$snapshot_name"
        else
            # Try to stop backup mode even on failure
            sudo -u "$PG_USER" psql -c "SELECT pg_backup_stop();" >/dev/null 2>&1
        fi
    else
        resume_standby_replay
    fi
    
    if [[ $snapshot_failed -eq 1 ]]; then
        return 1
    fi
    
    # Log successful creation to syslog if running from cron
    if [[ "$INTERACTIVE" == "false" ]] && [[ "$DRY_RUN" != "true" ]]; then
        if [[ "$effective_label" != "$label" ]]; then
            log "Successfully created quarterly snapshots on $SERVER_ROLE (requested: $label)"
        else
            log "Successfully created $label snapshots on $SERVER_ROLE"
        fi
    fi
    
    # Store the effective label for cleanup
    EFFECTIVE_LABEL="$effective_label"
    
    return 0
}
# Clean old snapshots
is_script_running() {
    local other_pids
    other_pids=$(pgrep -f "$SCRIPT_NAME" 2>/dev/null | grep -v "^$$\$" || true)
    
    if [[ -n "$other_pids" ]]; then
        #echo "$other_pids" | head -1
        return 0
    fi
    return 1
}

cleanup_snapshots() {
    local label="$1"
    local keep="$2"
    local total_removed=0
    
    # Use the effective label if it was changed (monthly -> quarterly)
    if [[ -n "${EFFECTIVE_LABEL:-}" ]]; then
        label="$EFFECTIVE_LABEL"
        # For quarterly snapshots, always use 7 years retention
        if [[ "$label" == "quarterly" ]]; then
            keep="${KEEP_DEFAULTS[quarterly]}"
            log "Using quarterly retention: $keep snapshots (7 years)"
        fi
    fi
    
    for dataset in "${DATASETS[@]}"; do
        # Get list of snapshots for this label, sorted by creation time
        local snapshots
        snapshots=$(zfs list -t snapshot -o name -s creation -H "$dataset" 2>/dev/null | \
                   grep "@zfs-auto-snap_${label}-" || true)
        
        # Count snapshots - handle empty case
        local count=0
        if [[ -n "$snapshots" ]]; then
            count=$(echo "$snapshots" | wc -l)
        fi
        
        if [[ $count -gt $keep ]]; then
            local to_delete=$((count - keep))
            log "Found $count $label snapshots for $dataset, removing $to_delete old ones"
            
            # Process deletions without subshell to preserve counter
            while IFS= read -r snapshot; do
                if [[ -z "$snapshot" ]]; then
                    continue
                fi
                
                if [[ "$DRY_RUN" == "true" ]]; then
                    log "[DRY RUN] Would destroy snapshot: $snapshot"
                else
                    if [[ "$VERBOSE" == "true" ]]; then
                        log "Destroying old snapshot: $snapshot"
                    fi
                    if zfs destroy "$snapshot" 2>/dev/null; then
                        total_removed=$((total_removed + 1))
                    else
                        warning "Failed to destroy snapshot: $snapshot"
                    fi
                fi
            done < <(echo "$snapshots" | head -n "$to_delete")
        elif [[ "$VERBOSE" == "true" ]]; then
            log "Found $count $label snapshots for $dataset, keeping all (limit: $keep)"
        fi
    done
    
    # Clean up old backup labels for primary server
    if [[ "$SERVER_ROLE" == "primary" ]] && [[ "$DRY_RUN" != "true" ]]; then
        if [[ -d "$SNAPSHOT_LABEL_DIR" ]]; then
            log "Cleaning old backup label files from $SNAPSHOT_LABEL_DIR"
            find "$SNAPSHOT_LABEL_DIR" -name "*${label}*.label" -mtime +$((keep * 2)) -delete
        fi
    fi
    
    # Log cleanup to syslog if running from cron and snapshots were removed
    if [[ "$INTERACTIVE" == "false" ]] && [[ $total_removed -gt 0 ]] && [[ "$DRY_RUN" != "true" ]]; then
        log "Cleaned up $total_removed old $label snapshots on $SERVER_ROLE"
    fi
}

# Main script
main() {
    local label=""
    local keep=""
    DRY_RUN="false"
    VERBOSE="false"
    QUIET="false"
    EFFECTIVE_LABEL=""
    SERVER_ROLE=""
    
    # Default to quiet mode if running from cron
    if [[ "$INTERACTIVE" == "false" ]]; then
        QUIET="true"
    fi
    LOCKFILE="/var/run/$(basename "$0").lock"

    # Try to acquire exclusive lock, exit if can't
    exec 200>"$LOCKFILE"
    if ! flock -n 200; then
	echo "Another instance is already running"
        exit 0
    fi
    # Parse arguments
    while getopts "l:k:nvqh" opt; do
        case $opt in
            l)
                label="$OPTARG"
                ;;
            k)
                keep="$OPTARG"
                ;;
            n)
                DRY_RUN="true"
                ;;
            v)
                VERBOSE="true"
                QUIET="false"  # Verbose overrides quiet
                ;;
            q)
                QUIET="true"
                ;;
            h)
                usage
                exit 0
                ;;
            \?)
                error "Invalid option: -$OPTARG"
                usage
                exit 1
                ;;
        esac
    done
    
    # Validate label
    if [[ -z "$label" ]]; then
        error "Label is required"
        usage
        exit 1
    fi
    
    if [[ ! "${KEEP_DEFAULTS[$label]+exists}" ]]; then
        error "Invalid label: $label"
        error "Valid labels: ${!KEEP_DEFAULTS[*]}"
        exit 1
    fi
    
    # Set keep value if not provided
    if [[ -z "$keep" ]]; then
        keep="${KEEP_DEFAULTS[$label]}"
    fi
    
    # Validate keep value
    if ! [[ "$keep" =~ ^[0-9]+$ ]] || [[ "$keep" -lt 1 ]]; then
        error "Keep value must be a positive integer"
        exit 1
    fi
    
    if [[ "$DRY_RUN" == "true" ]]; then
        log "DRY RUN MODE - No changes will be made"
    fi
    SCRIPT_NAME="zfs_error_repair"
    if is_script_running; then
	log "running: $SCRIPT_NAME"
        exit 1
    fi
    SCRIPT_NAME="$(basename "$0")"
    # Detect if we're on primary or standby
    detect_server_role
    
    log "Starting $label snapshot (keep=$keep) for PostgreSQL $SERVER_ROLE"
    
    # Create snapshots with appropriate method
    if ! create_snapshots "$label"; then
        error "Snapshot creation failed"
        exit 1
    fi
    
    # Cleanup old snapshots
    cleanup_snapshots "$label" "$keep"
    
    log "Snapshot operation completed successfully on $SERVER_ROLE"
}

# Check if running as root
if [[ $EUID -ne 0 ]]; then
    error "This script must be run as root"
    exit 1
fi

# Run main function
main "$@"