#!/usr/bin/env bash set -Eeuo pipefail umask 077 VM_NAMES_DEFAULT=("Windows-Builder" "Ubuntu-Codex") BACKUP_ROOT="${AGRARIAN_VM_BACKUP_ROOT:-/mnt/user/backups/agrarian-game/vms}" SNAPSHOT_ROOT="$BACKUP_ROOT/snapshots" LOCK_FILE="${AGRARIAN_VM_BACKUP_LOCK:-/var/lock/agrarian-vm-backup.lock}" SHUTDOWN_TIMEOUT_SECONDS="${AGRARIAN_VM_SHUTDOWN_TIMEOUT_SECONDS:-900}" RETENTION_DAYS="${AGRARIAN_VM_BACKUP_RETENTION_DAYS:-120}" ALLOW_SHUTDOWN=0 DRY_RUN=0 START_AFTER=1 VM_NAMES=("${VM_NAMES_DEFAULT[@]}") STARTED_BY_SCRIPT=() BACKED_UP_COUNT=0 log() { printf '[%s] %s\n' "$(date -Is)" "$*" } die() { log "ERROR: $*" exit 1 } usage() { cat <<'USAGE' Usage: agrarian_vm_backup_unraid.sh [--shutdown-running] [--no-start-after] [--dry-run] [--vm NAME ...] Backs up Agrarian development VMs on Unraid to /mnt/user/backups. Default behavior is safe for scheduled runs: running VMs are skipped. Use --shutdown-running during a maintenance window to gracefully stop running VMs, back them up, and start them again after a successful backup. USAGE } parse_args() { local custom_vms=() while [[ $# -gt 0 ]]; do case "$1" in --shutdown-running) ALLOW_SHUTDOWN=1 ;; --no-start-after) START_AFTER=0 ;; --dry-run) DRY_RUN=1 ;; --vm) shift [[ $# -gt 0 ]] || die "--vm requires a VM name" custom_vms+=("$1") ;; -h|--help) usage exit 0 ;; *) die "Unknown argument: $1" ;; esac shift done if [[ "${#custom_vms[@]}" -gt 0 ]]; then VM_NAMES=("${custom_vms[@]}") fi } require_tools() { for tool in virsh qemu-img sha256sum find; do command -v "$tool" >/dev/null || die "$tool is required" done } vm_state() { virsh domstate "$1" 2>/dev/null | awk 'NR == 1 {print $0}' } wait_for_shutdown() { local vm="$1" local waited=0 while [[ "$(vm_state "$vm")" != "shut off" ]]; do if (( waited >= SHUTDOWN_TIMEOUT_SECONDS )); then die "$vm did not shut down within ${SHUTDOWN_TIMEOUT_SECONDS}s" fi sleep 5 waited=$((waited + 5)) done } shutdown_vm_for_backup() { local vm="$1" local state="$2" [[ "$state" == "running" ]] || return 0 if [[ "$ALLOW_SHUTDOWN" != "1" ]]; then log "Skipping running VM without --shutdown-running: $vm" return 1 fi log "Gracefully shutting down $vm for backup" if [[ "$DRY_RUN" == "1" ]]; then log "Dry run: would shut down $vm" return 0 fi virsh shutdown "$vm" wait_for_shutdown "$vm" STARTED_BY_SCRIPT+=("$vm") } restart_vms_started_by_script() { [[ "$START_AFTER" == "1" ]] || return 0 local vm for vm in "${STARTED_BY_SCRIPT[@]}"; do if [[ "$(vm_state "$vm")" == "shut off" ]]; then log "Starting $vm after backup" virsh start "$vm" fi done } disk_sources_for_vm() { local vm="$1" virsh domblklist "$vm" --details \ | awk '$1 == "file" && $2 == "disk" && $4 != "-" {print $3 "\t" $4}' } copy_nvram_if_present() { local vm="$1" local dest="$2" local nvram nvram="$(virsh dumpxml "$vm" | sed -n "s:.*]*>\\(.*\\).*:\\1:p" | head -n 1)" [[ -n "$nvram" && -f "$nvram" ]] || return 0 mkdir -p "$dest/nvram" cp -a "$nvram" "$dest/nvram/" } backup_vm() { local vm="$1" local snapshot_dir="$2" local state state="$(vm_state "$vm")" [[ -n "$state" ]] || die "VM not found: $vm" log "Preparing VM backup: $vm ($state)" if ! shutdown_vm_for_backup "$vm" "$state"; then return 0 fi local vm_dir="$snapshot_dir/vms/$vm" mkdir -p "$vm_dir/disks" "$vm_dir/xml" log "Saving VM XML: $vm" virsh dumpxml "$vm" > "$vm_dir/xml/$vm.xml" copy_nvram_if_present "$vm" "$vm_dir" local disk_target disk_source disk_dest disk_base while IFS=$'\t' read -r disk_target disk_source; do [[ -n "$disk_source" ]] || continue [[ -f "$disk_source" ]] || die "Disk source not found for $vm: $disk_source" disk_base="$(basename "$disk_source")" disk_dest="$vm_dir/disks/${disk_target}-${disk_base}.qcow2" log "Converting disk for $vm: $disk_source -> $disk_dest" if [[ "$DRY_RUN" == "1" ]]; then log "Dry run: would qemu-img convert $disk_source" continue fi qemu-img convert -p -O qcow2 -c "$disk_source" "$disk_dest" qemu-img info "$disk_dest" > "$disk_dest.info.txt" done < <(disk_sources_for_vm "$vm") BACKED_UP_COUNT=$((BACKED_UP_COUNT + 1)) } write_manifest() { local snapshot_dir="$1" local timestamp="$2" { echo "backup_timestamp=$timestamp" echo "host=$(hostname)" echo "backup_root=$BACKUP_ROOT" echo "allow_shutdown=$ALLOW_SHUTDOWN" echo "start_after=$START_AFTER" echo echo "[vms]" printf '%s\n' "${VM_NAMES[@]}" echo echo "[virsh_list]" virsh list --all echo echo "[disk_usage]" df -h /mnt/user /mnt/cache 2>/dev/null || true } > "$snapshot_dir/MANIFEST.txt" mkdir -p "$snapshot_dir/unraid-config" cp -a /boot/config/domain.cfg "$snapshot_dir/unraid-config/" 2>/dev/null || true cp -a /boot/config/vfio-pci.cfg "$snapshot_dir/unraid-config/" 2>/dev/null || true cp -a /boot/config/shares "$snapshot_dir/unraid-config/" 2>/dev/null || true ( cd "$snapshot_dir" find . -type f ! -name SHA256SUMS -print0 | sort -z | xargs -0 sha256sum ) > "$snapshot_dir/SHA256SUMS" } verify_snapshot() { local snapshot_dir="$1" [[ "$DRY_RUN" == "1" ]] && return 0 ( cd "$snapshot_dir" sha256sum -c SHA256SUMS >/dev/null ) } prune_old_snapshots() { [[ "$DRY_RUN" == "1" ]] && return 0 [[ -d "$SNAPSHOT_ROOT" ]] || return 0 find "$SNAPSHOT_ROOT" -mindepth 1 -maxdepth 1 -type d -mtime "+$RETENTION_DAYS" -print0 \ | while IFS= read -r -d '' old; do log "Pruning old VM backup: $old" rm -rf -- "$old" done } main() { parse_args "$@" require_tools exec 9>"$LOCK_FILE" flock -n 9 || { log "Another Agrarian VM backup is already running; skipping" exit 0 } trap restart_vms_started_by_script EXIT local timestamp snapshot_dir incomplete_dir final_dir timestamp="$(date +'%Y%m%d-%H%M%S')" incomplete_dir="$SNAPSHOT_ROOT/.incomplete-$timestamp" final_dir="$SNAPSHOT_ROOT/$timestamp" mkdir -p "$SNAPSHOT_ROOT" rm -rf -- "$incomplete_dir" "$final_dir" mkdir -p "$incomplete_dir" log "Starting Agrarian VM backup: $timestamp" local vm for vm in "${VM_NAMES[@]}"; do backup_vm "$vm" "$incomplete_dir" done if [[ "$BACKED_UP_COUNT" == "0" ]]; then log "No VMs were eligible for backup; no snapshot published" rm -rf -- "$incomplete_dir" return 0 fi write_manifest "$incomplete_dir" "$timestamp" verify_snapshot "$incomplete_dir" if [[ "$DRY_RUN" == "1" ]]; then log "Dry run complete; no snapshot published" rm -rf -- "$incomplete_dir" return 0 fi mv "$incomplete_dir" "$final_dir" printf '%s\n' "$timestamp" > "$BACKUP_ROOT/LATEST.txt" prune_old_snapshots log "Agrarian VM backup completed: $final_dir" } main "$@"