#!/bin/bash
# pvmig - Proxmox VM Migration Tool (Cluster)
# Using zynclib for ZFS replication
VERSION="0.2.0"
set -euo pipefail

TOOL_NAME="pvmig"
TOOL_LOGNAME="Migration"

DAEMON_LIB="${DAEMON_LIB:-/usr/lib/pvmzync/daemon_lib}"
source "$DAEMON_LIB" || { echo "ERROR: daemon_lib not found" >&2; exit 1; }
source "$ZYNCLIB"     || { echo "ERROR: zynclib not found" >&2; exit 1; }

show_usage() {
  cat <<EOF
USAGE:
  pvmig <VMID> <REMOTE_NODE> [MODE] [OPTIONS]

DESCRIPTION:
  A specialized tool for safe VM/LXC migration between Proxmox cluster nodes.
  Utilizes 'rpool/migrate' as a staging area for ZFS datasets to ensure 
  data integrity and easy rollback before final cutover.

  By default pushes VM from local to remote. Use --pull to migrate a VM
  from the remote node to the local one.

CONSTRAINTS:
  - Supports ZFS-backed local disks and Shared Storage.

FEATURES:
  - Auto-Bootstrap: Automatically creates 'rpool/migrate' on both source 
    and target nodes if it does not exist (with canmount=noauto).
  - Safety: Keeps old data in 'rpool/migrate' on the source node after 
    migration for instant recovery if needed.

MODES:
  prepare   (Default) Performs initial synchronization. Creates snapshots 
            and sends ZFS datasets to the target node into 'rpool/migrate/'.
            The VM remains running on the source node.

  migrate   Executes the final cutover:
            1. Shuts down the VM/Container.
            2. Performs a final incremental ZFS sync.
            3. Swaps datasets: Renames 'rpool/migrate' to production paths 
               on target, and moves old source disks to 'rpool/migrate'.
            4. Relocates the PVE config file to the target node.
            5. Starts the VM on the target node if it was previously running.

  cleanup   Removes migration artifacts (staged datasets in 'rpool/migrate' 
            and temporary snapshots) for the specified VMID on both nodes.

OPTIONS:
  --pull           Pull VM from remote node to local (default: push).
  -f, --force      Override safety checks (e.g., overwrite existing staged data).
  --savelogs       Save all logs to pvmig_logs_<timestamp>.tar.gz in current directory.
  -h, --help       Display this help message.

EXAMPLES:
  pvmig 105 pve-02           # Stage disks for VM 105 (push to pve-02)
  pvmig 105 pve-02 migrate   # Perform final move and start on pve-02
  pvmig 105 pve-02 cleanup   # Purge old data after successful migration
  pvmig 105 pve-03 --pull    # Pull VM 105 from pve-03 to this node
  pvmig                      # Monitor running migration

SEE ALSO:
  pvmcp(1) - Copy VMs between non-cluster nodes
  pdscp(1) - Copy arbitrary ZFS datasets
EOF
}

# --- Migration helpers ---

create_final_snap() {
  log "Final snap for $dev"
  local src_snap=${src_ds}@omix_migrate
  local dst_snap=${dst_ds}@omix_migrate
  if ssh_src "zfs list $src_snap &>/dev/null"; then
    ssh_src "zfs destroy $src_snap" || error "Cant destroy old snapshot $src_snap on source"
  fi
  ssh_src "zfs snapshot $src_snap" || error "Cant create snapshot $src_snap on source"
  if ssh_dst "zfs list $dst_snap &>/dev/null"; then
    ssh_dst "zfs destroy $dst_snap" || error "Cant destroy old snapshot $dst_snap on target"
  fi
}

destroy_final_snap() {
  log "Destroy final snap for $dev"
  local src_snap=${src_ds}@omix_migrate
  local dst_snap=${dst_ds}@omix_migrate
  if ssh_src "zfs list $src_snap &>/dev/null"; then
    ssh_src "zfs destroy $src_snap" || log "WARNING: Cant destroy snapshot $src_snap on source"
  fi
  if ssh_dst "zfs list $dst_snap &>/dev/null"; then
    ssh_dst "zfs destroy $dst_snap" || log "WARNING: Cant destroy snapshot $dst_snap on target"
  fi
}

# --- Callbacks for sync_vm_disks ---

disk_unsupported_cb() {
  error "$dev: migrate storage '$stg_vol' is not supported. Cant migrate"
}

disk_paths_cb() {
  zpool="${src_zpath%%/*}"
  src_ds="${src_zpath}/${volname}"
  local migrate_ds="${zpool}/migrate"
  local vmid_ds="${migrate_ds}/${vmid}"
  for host_cmd in ssh_src ssh_dst; do
    $host_cmd "zfs list $migrate_ds &>/dev/null || zfs create -o canmount=noauto $migrate_ds"
    $host_cmd "zfs list $vmid_ds &>/dev/null   || zfs create -o canmount=noauto $vmid_ds"
  done
  dst_ds="${vmid_ds}/${dev}"
  volpathmap+="$src_ds $dst_ds"$'\n'
}

disk_pre_sync_cb() {
  [[ $mode = migrate ]] && create_final_snap
}

disk_post_sync_cb() {
  [[ $mode = migrate ]] && destroy_final_snap
}

shutdown() {
  if ssh_src "$pvectl status $vmid" | grep -q running; then
    vm_state=running
  else
    vm_state=stopped
  fi

  log "VM state: $vm_state"
  if [[ $vm_state = running ]]; then
    log "Shutting down VM..."
    ssh_src "$pvectl shutdown $vmid" || error "Cant shutdown VM."
  fi
}

on_error_exit() {
  if [[ $vm_state = running ]]; then ssh_src "$pvectl start $vmid"; fi
  while read workpath migpath; do
    ssh_dst "zfs rename $workpath $migpath &>/dev/null" || log "WARNING: Failed to rename $workpath on target"
  done <<<$volpathmap
}

_lh() {
  [[ $1 == "local" ]] && hostname || echo $1
}

# --- Mode functions ---

mode_prepare() {
  log "=== PREPARE MODE ==="
  volpathmap=
  sync_vm_disks
  log "=== PREPARE COMPLETE ==="
  log "Next: pvmig $vmid $remote_host migrate$([[ $direction == pull ]] && echo ' --pull')"
}

mode_migrate() {
  log "=== MIGRATE MODE ==="
  trap 'on_error_exit' ERR 

  # Shutdown if running
  local downtime_start=$(date +%s)
  shutdown
  volpathmap=
  sync_vm_disks

  log "Renaming datasets on target..."
  while read workpath migpath; do
    ssh_dst "zfs rename $migpath $workpath" || error "CRITICAL: Failed to rename $migpath on target"
  done <<<$volpathmap

  # Move PVE config to target node (pmxcfs is cluster-wide, accessible from either side)
  local pathpart=qemu-server; [[ "$vm_type" == "lxc" ]] && pathpart=lxc
  ssh_src "mv /etc/pve/nodes/$(_lh $src_host)/${pathpart}/${vmid}.conf /etc/pve/nodes/$(_lh $dst_host)/${pathpart}/${vmid}.conf" \
    || error "Cant move $vmconf"

  if [[ $vm_state = running ]]; then
    ssh_dst "$pvectl start $vmid" || error "Cant start $vmid on target"
  fi
  trap - ERR

  while read workpath migpath; do
    ssh_src "zfs rename $workpath $migpath" || log "WARNING: Failed to rename $workpath on source"
  done <<<$volpathmap

  local downtime_end=$(date +%s)
  local downtime=$((downtime_end - downtime_start))

  log "=== MIGRATION COMPLETE ==="
  log "VM $vmid is now on $dst_host node"
  log "Downtime: ${downtime}s"
  log "Old data in migrate buffer on source"
  log "Next: pvmig $vmid $remote_host cleanup$([[ $direction == pull ]] && echo ' --pull')"
}

mode_cleanup() {
  log "=== CLEANUP MODE ==="

  # Remove datasets in migrate buffer on both sides
  for zpath in "${storage_map[@]}"; do
    [[ "$zpath" == "#shared" ]] && continue
    [[ -z "$zpath" ]] && continue

    zpool="${zpath%%/*}"
    migds="${zpool:?Pool is empty}/migrate/${vmid:?VMID is empty}"

    ssh_src "zfs list $migds &>/dev/null && zfs destroy -r $migds 2>/dev/null"
    ssh_dst "zfs list $migds &>/dev/null && zfs destroy -r $migds 2>/dev/null"
  done

  log "=== CLEANUP COMPLETE ==="
}

# --- Argument parsing ---

parse_args() {
  declare -g vmid remote_host vmconf vm_type vm_state \
    mode="prepare" FORCE=false pvectl volpathmap zpool

  while [[ $# -gt 0 ]]; do
    case "$1" in
      -h|--help)
        show_usage
        exit 0
        ;;
      --pull)
        direction="pull"
        shift
        ;;
      -f|--force)
        FORCE=true
        shift
        ;;
      --savelogs)
        SAVELOGS=true
        shift
        ;;
      -*)
        error "Unknown option: $1"
        ;;
      *)
        if [[ -z "${vmid:-}" ]]; then
          vmid="$1"
        elif [[ -z "${remote_host:-}" ]]; then
          remote_host="$1"
        elif [[ "$1" =~ ^(prepare|migrate|cleanup)$ ]]; then
          mode="$1"
        else
          error "Unknown argument: $1"
        fi
        shift
        ;;
    esac
  done

  # Validate
  [[ -z "$vmid" ]] && error "VMID is required"
  [[ -z "$remote_host" ]] && error "REMOTE_NODE is required"
  [[ ! "$vmid" =~ ^[0-9]+$ ]] && error "VMID must be numeric"
  return 0
}

# --- Main tool logic ---

tool_main() {
  vmconf=$(ssh_src "ls /etc/pve/qemu-server/${vmid}.conf /etc/pve/lxc/${vmid}.conf 2>/dev/null" || true)
  [[ -z "$vmconf" ]] && error "VM $vmid not found on source node"

  vm_type="kvm"
  [[ "$vmconf" =~ /lxc/ ]] && vm_type="lxc"
  pvectl=qm; [[ "$vm_type" == "lxc" ]] && pvectl=pct

  parse_storage "$(ssh_src "ls /etc/pve/nodes -m | sed 's/ //g'")" < <(ssh_src "cat /etc/pve/storage.cfg")
  parse_vmconf < <(ssh_src "cat $vmconf")
  log "VM: $vm_name ($vmid) | Direction: $direction"
  log "Disks: ${!vm_disks[@]}"

  case "$mode" in
    prepare) mode_prepare ;;
    migrate) mode_migrate ;;
    cleanup) mode_cleanup ;;
  esac
}

main "$@"
