Implement Unraid VM backup job
This commit is contained in:
Executable
+300
@@ -0,0 +1,300 @@
|
||||
#!/usr/bin/env bash
|
||||
set -Eeuo pipefail
|
||||
|
||||
umask 077
|
||||
|
||||
VM_NAMES_DEFAULT=("Windows-Builder" "Ubuntu-Codex")
|
||||
BACKUP_ROOT="${AGRARIAN_VM_BACKUP_ROOT:-/mnt/user/backups/agrarian-game/vms}"
|
||||
SNAPSHOT_ROOT="$BACKUP_ROOT/snapshots"
|
||||
LOCK_FILE="${AGRARIAN_VM_BACKUP_LOCK:-/var/lock/agrarian-vm-backup.lock}"
|
||||
SHUTDOWN_TIMEOUT_SECONDS="${AGRARIAN_VM_SHUTDOWN_TIMEOUT_SECONDS:-900}"
|
||||
RETENTION_DAYS="${AGRARIAN_VM_BACKUP_RETENTION_DAYS:-120}"
|
||||
|
||||
ALLOW_SHUTDOWN=0
|
||||
DRY_RUN=0
|
||||
START_AFTER=1
|
||||
VM_NAMES=("${VM_NAMES_DEFAULT[@]}")
|
||||
STARTED_BY_SCRIPT=()
|
||||
BACKED_UP_COUNT=0
|
||||
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date -Is)" "$*"
|
||||
}
|
||||
|
||||
die() {
|
||||
log "ERROR: $*"
|
||||
exit 1
|
||||
}
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: agrarian_vm_backup_unraid.sh [--shutdown-running] [--no-start-after] [--dry-run] [--vm NAME ...]
|
||||
|
||||
Backs up Agrarian development VMs on Unraid to /mnt/user/backups.
|
||||
|
||||
Default behavior is safe for scheduled runs: running VMs are skipped. Use
|
||||
--shutdown-running during a maintenance window to gracefully stop running VMs,
|
||||
back them up, and start them again after a successful backup.
|
||||
USAGE
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
local custom_vms=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--shutdown-running)
|
||||
ALLOW_SHUTDOWN=1
|
||||
;;
|
||||
--no-start-after)
|
||||
START_AFTER=0
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
;;
|
||||
--vm)
|
||||
shift
|
||||
[[ $# -gt 0 ]] || die "--vm requires a VM name"
|
||||
custom_vms+=("$1")
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
die "Unknown argument: $1"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [[ "${#custom_vms[@]}" -gt 0 ]]; then
|
||||
VM_NAMES=("${custom_vms[@]}")
|
||||
fi
|
||||
}
|
||||
|
||||
require_tools() {
|
||||
for tool in virsh qemu-img sha256sum find; do
|
||||
command -v "$tool" >/dev/null || die "$tool is required"
|
||||
done
|
||||
}
|
||||
|
||||
vm_state() {
|
||||
virsh domstate "$1" 2>/dev/null | awk 'NR == 1 {print $0}'
|
||||
}
|
||||
|
||||
wait_for_shutdown() {
|
||||
local vm="$1"
|
||||
local waited=0
|
||||
|
||||
while [[ "$(vm_state "$vm")" != "shut off" ]]; do
|
||||
if (( waited >= SHUTDOWN_TIMEOUT_SECONDS )); then
|
||||
die "$vm did not shut down within ${SHUTDOWN_TIMEOUT_SECONDS}s"
|
||||
fi
|
||||
sleep 5
|
||||
waited=$((waited + 5))
|
||||
done
|
||||
}
|
||||
|
||||
shutdown_vm_for_backup() {
|
||||
local vm="$1"
|
||||
local state="$2"
|
||||
|
||||
[[ "$state" == "running" ]] || return 0
|
||||
|
||||
if [[ "$ALLOW_SHUTDOWN" != "1" ]]; then
|
||||
log "Skipping running VM without --shutdown-running: $vm"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log "Gracefully shutting down $vm for backup"
|
||||
|
||||
if [[ "$DRY_RUN" == "1" ]]; then
|
||||
log "Dry run: would shut down $vm"
|
||||
return 0
|
||||
fi
|
||||
|
||||
virsh shutdown "$vm"
|
||||
wait_for_shutdown "$vm"
|
||||
STARTED_BY_SCRIPT+=("$vm")
|
||||
}
|
||||
|
||||
restart_vms_started_by_script() {
|
||||
[[ "$START_AFTER" == "1" ]] || return 0
|
||||
|
||||
local vm
|
||||
for vm in "${STARTED_BY_SCRIPT[@]}"; do
|
||||
if [[ "$(vm_state "$vm")" == "shut off" ]]; then
|
||||
log "Starting $vm after backup"
|
||||
virsh start "$vm"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
disk_sources_for_vm() {
|
||||
local vm="$1"
|
||||
|
||||
virsh domblklist "$vm" --details \
|
||||
| awk '$1 == "file" && $2 == "disk" && $4 != "-" {print $3 "\t" $4}'
|
||||
}
|
||||
|
||||
copy_nvram_if_present() {
|
||||
local vm="$1"
|
||||
local dest="$2"
|
||||
local nvram
|
||||
|
||||
nvram="$(virsh dumpxml "$vm" | sed -n "s:.*<nvram[^>]*>\\(.*\\)</nvram>.*:\\1:p" | head -n 1)"
|
||||
[[ -n "$nvram" && -f "$nvram" ]] || return 0
|
||||
|
||||
mkdir -p "$dest/nvram"
|
||||
cp -a "$nvram" "$dest/nvram/"
|
||||
}
|
||||
|
||||
backup_vm() {
|
||||
local vm="$1"
|
||||
local snapshot_dir="$2"
|
||||
local state
|
||||
|
||||
state="$(vm_state "$vm")"
|
||||
[[ -n "$state" ]] || die "VM not found: $vm"
|
||||
|
||||
log "Preparing VM backup: $vm ($state)"
|
||||
|
||||
if ! shutdown_vm_for_backup "$vm" "$state"; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local vm_dir="$snapshot_dir/vms/$vm"
|
||||
mkdir -p "$vm_dir/disks" "$vm_dir/xml"
|
||||
|
||||
log "Saving VM XML: $vm"
|
||||
virsh dumpxml "$vm" > "$vm_dir/xml/$vm.xml"
|
||||
copy_nvram_if_present "$vm" "$vm_dir"
|
||||
|
||||
local disk_target disk_source disk_dest disk_base
|
||||
while IFS=$'\t' read -r disk_target disk_source; do
|
||||
[[ -n "$disk_source" ]] || continue
|
||||
[[ -f "$disk_source" ]] || die "Disk source not found for $vm: $disk_source"
|
||||
|
||||
disk_base="$(basename "$disk_source")"
|
||||
disk_dest="$vm_dir/disks/${disk_target}-${disk_base}.qcow2"
|
||||
|
||||
log "Converting disk for $vm: $disk_source -> $disk_dest"
|
||||
|
||||
if [[ "$DRY_RUN" == "1" ]]; then
|
||||
log "Dry run: would qemu-img convert $disk_source"
|
||||
continue
|
||||
fi
|
||||
|
||||
qemu-img convert -p -O qcow2 -c "$disk_source" "$disk_dest"
|
||||
qemu-img info "$disk_dest" > "$disk_dest.info.txt"
|
||||
done < <(disk_sources_for_vm "$vm")
|
||||
|
||||
BACKED_UP_COUNT=$((BACKED_UP_COUNT + 1))
|
||||
}
|
||||
|
||||
write_manifest() {
|
||||
local snapshot_dir="$1"
|
||||
local timestamp="$2"
|
||||
|
||||
{
|
||||
echo "backup_timestamp=$timestamp"
|
||||
echo "host=$(hostname)"
|
||||
echo "backup_root=$BACKUP_ROOT"
|
||||
echo "allow_shutdown=$ALLOW_SHUTDOWN"
|
||||
echo "start_after=$START_AFTER"
|
||||
echo
|
||||
echo "[vms]"
|
||||
printf '%s\n' "${VM_NAMES[@]}"
|
||||
echo
|
||||
echo "[virsh_list]"
|
||||
virsh list --all
|
||||
echo
|
||||
echo "[disk_usage]"
|
||||
df -h /mnt/user /mnt/cache 2>/dev/null || true
|
||||
} > "$snapshot_dir/MANIFEST.txt"
|
||||
|
||||
mkdir -p "$snapshot_dir/unraid-config"
|
||||
cp -a /boot/config/domain.cfg "$snapshot_dir/unraid-config/" 2>/dev/null || true
|
||||
cp -a /boot/config/vfio-pci.cfg "$snapshot_dir/unraid-config/" 2>/dev/null || true
|
||||
cp -a /boot/config/shares "$snapshot_dir/unraid-config/" 2>/dev/null || true
|
||||
|
||||
(
|
||||
cd "$snapshot_dir"
|
||||
find . -type f ! -name SHA256SUMS -print0 | sort -z | xargs -0 sha256sum
|
||||
) > "$snapshot_dir/SHA256SUMS"
|
||||
}
|
||||
|
||||
verify_snapshot() {
|
||||
local snapshot_dir="$1"
|
||||
|
||||
[[ "$DRY_RUN" == "1" ]] && return 0
|
||||
|
||||
(
|
||||
cd "$snapshot_dir"
|
||||
sha256sum -c SHA256SUMS >/dev/null
|
||||
)
|
||||
}
|
||||
|
||||
prune_old_snapshots() {
|
||||
[[ "$DRY_RUN" == "1" ]] && return 0
|
||||
[[ -d "$SNAPSHOT_ROOT" ]] || return 0
|
||||
|
||||
find "$SNAPSHOT_ROOT" -mindepth 1 -maxdepth 1 -type d -mtime "+$RETENTION_DAYS" -print0 \
|
||||
| while IFS= read -r -d '' old; do
|
||||
log "Pruning old VM backup: $old"
|
||||
rm -rf -- "$old"
|
||||
done
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
require_tools
|
||||
|
||||
exec 9>"$LOCK_FILE"
|
||||
flock -n 9 || {
|
||||
log "Another Agrarian VM backup is already running; skipping"
|
||||
exit 0
|
||||
}
|
||||
|
||||
trap restart_vms_started_by_script EXIT
|
||||
|
||||
local timestamp snapshot_dir incomplete_dir final_dir
|
||||
timestamp="$(date +'%Y%m%d-%H%M%S')"
|
||||
incomplete_dir="$SNAPSHOT_ROOT/.incomplete-$timestamp"
|
||||
final_dir="$SNAPSHOT_ROOT/$timestamp"
|
||||
|
||||
mkdir -p "$SNAPSHOT_ROOT"
|
||||
rm -rf -- "$incomplete_dir" "$final_dir"
|
||||
mkdir -p "$incomplete_dir"
|
||||
|
||||
log "Starting Agrarian VM backup: $timestamp"
|
||||
|
||||
local vm
|
||||
for vm in "${VM_NAMES[@]}"; do
|
||||
backup_vm "$vm" "$incomplete_dir"
|
||||
done
|
||||
|
||||
if [[ "$BACKED_UP_COUNT" == "0" ]]; then
|
||||
log "No VMs were eligible for backup; no snapshot published"
|
||||
rm -rf -- "$incomplete_dir"
|
||||
return 0
|
||||
fi
|
||||
|
||||
write_manifest "$incomplete_dir" "$timestamp"
|
||||
verify_snapshot "$incomplete_dir"
|
||||
|
||||
if [[ "$DRY_RUN" == "1" ]]; then
|
||||
log "Dry run complete; no snapshot published"
|
||||
rm -rf -- "$incomplete_dir"
|
||||
return 0
|
||||
fi
|
||||
|
||||
mv "$incomplete_dir" "$final_dir"
|
||||
printf '%s\n' "$timestamp" > "$BACKUP_ROOT/LATEST.txt"
|
||||
prune_old_snapshots
|
||||
|
||||
log "Agrarian VM backup completed: $final_dir"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user