#!/bin/bash
#
# NAME
#
#   btrbk-verify - check latest btrbk snapshot/backup pairs
#
#
# SYNOPSIS
#
#   btrbk-verify [options] <command> [filter...]
#
#
# DESCRIPTION
#
#   Compare btrbk backups. Reads all files and attributes, and
#   compares checksums of source and target. Uses rsync(1) as backend,
#   in dry-run mode with all preserve options enabled.
#
#   Resolves snapshot/backup pairs by evaluating the output of
#   "btrbk list latest [filter...]". The filter argument is passed
#   directly to btrbk, see btrbk(1) FILTER STATEMENTS.
#
#   Restrictions:
#    - ".d..t...... ./" lines are ignored by default:
#      Root folder timestamp always differ.
#    - "cd+++++++++ .*" lines are ignored by default:
#      Nested subvolumes appear as new empty directories.
#    - btrbk raw targets are skipped
#    - rsync needs root in most cases (see --ssh-* options)
#
#   NOTE: Depending on your setup (hardware, btrfs mount options),
#   btrbk-verify may eat all your CPU power and use high bandwidth!
#   Consider nice(1), ionice(1).

#   Incomplete resource eater list:
#    - rsync: checksums, heavy disk I/O
#    - btrfs: decompression, encryption
#    - ssh: compression, encryption
#
#
# EXAMPLES
#
#   btrbk-verify latest /mnt/btr_pool
#
#     Verify latest backups from targets configured in
#     /etc/btrbk/btrbk.conf, matching the "/mnt/btr_pool" filter.
#
#   btrbk-verify all
#
#     Verify ALL backups from targets in /etc/btrbk/btrbk.conf.
#     NOTE: This really re-checksums ALL files FOR EACH BACKUP,
#     even if they were not touched between backups!
#
#   btrbk-verify latest -n -v -v
#
#     Print detailed log as well as command executed by this script,
#     without actually executing rsync commands (-n, --dry-run).
#
#   btrbk-verify --ssh-agent --ssh-user root --ssh-identity /etc/btrbk/ssh/id_ed25519
#
#     Use "ssh -i /etc/btrbk/ssh/id_ed25519 -l root" for rsync rsh
#     (override settings from btrbk.conf), start an ssh-agent(1) for
#     this session and verify all latest snapshot / backups.
#
#
# SEE ALSO
#
#   btrbk(1), btrbk.conf(5), rsync(1), nice(1), ionice(1)
#
#
# AUTHOR
#
#   Axel Burri <axel@tty0.ch>
#

set -u
set -e
set -o pipefail

btrbk_version_min='0.32.0'

# defaults: ignore subvol dirs and root folder timestamp change
ignore_nested_subvolume_dir=1
ignore_root_folder_timestamp=1
ssh_identity=
ssh_user=
ssh_start_agent=

verbose=0
stats_enabled=
dryrun=

print_usage()
{
#80-----------------------------------------------------------------------------
    cat 1>&2 <<EOF
usage: btrbk-verify [options] <command> [btrbk-list-options...] [filter...]

options:
   -h, --help            display this help message
   -c, --config FILE     specify btrbk configuration file
   -n, --dry-run         perform a trial run without verifying subvolumes
   -v, --verbose         be verbose (set twice for debug loglevel)
   --stats               print rsync stats to stderr (--info=stats2)
   --strict              treat all rsync diffs as errors
   --ignore-acls         ignore acls when verifying subvolumes
   --ignore-xattrs       ignore xattrs when verifying subvolumes
   --ssh-identity FILE   override ssh_identity from btrbk.conf(5) with FILE,
                         and clear all other ssh_* options (use with --ssh-user)
   --ssh-user USER       override ssh_user from btrbk.conf(5) with USER, and
                         clear all other ssh_* options(use with --ssh-identity)
   --ssh-agent           start ssh-agent(1) and add identity

commands:
   latest    verify most recent snapshots and backups (btrbk list latest)
   all       verify all snapshots and backups (btrbk list backups)

For additional information, see <https://digint.ch/btrbk/>
EOF
#80-----------------------------------------------------------------------------
    exit ${1:-0}
}

list_subcommand=
btrbk_args=()
rsync_args=(-n --itemize-changes --checksum -a --delete --numeric-ids --hard-links --acls --xattrs --devices --specials)

while [[ "$#" -ge 1 ]]; do
    key="$1"
    case $key in
      latest)
          [[ -n "$list_subcommand" ]] && print_usage 2;
          list_subcommand="latest"
          ;;
      all)
          [[ -n "$list_subcommand" ]] && print_usage 2;
          list_subcommand="backups"
          ;;
      -n|--dry-run)
          dryrun=1
          ;;
      --stats)
          # enable rsync stats2 (transfer statistics)
          rsync_args+=(--info=stats2)
          stats_enabled=1
          ;;
      --strict)
          # treat all rsync diffs as errors:
          #  - empty directories (nested subvolumes)
          #  - root folder timestamp mismatch
          ignore_nested_subvolume_dir=
          ignore_root_folder_timestamp=
          ;;
      --ignore-*)  # --ignore-acls, --ignore-xattrs, --ignore-device, ...
          # remove "--xxx" flag from rsync_args for --ignore-xxx
          rsync_args=(${rsync_args[@]/"--"${key#"--ignore-"}})
          ;;
      --ssh-identity)
          # use different ssh identity (-i option) for rsync rsh.
          # NOTE: this overrides all btrbk ssh_* options
          ssh_identity="$2"
          shift
          ;;
      --ssh-user)
          # use different ssh user (-l option) for rsync rsh
          # NOTE: this overrides all btrbk ssh_* options
          ssh_user="$2"
          shift
          ;;
      --ssh-agent)
          ssh_start_agent=1
          ;;
      -v|--verbose)
          verbose=$((verbose+1))
          btrbk_args+=("-v")
          ;;
      -h|--help)
          print_usage 0
          ;;
      *)
          # all other args are passed to btrbk (filter, -c,--config=FILE)
          btrbk_args+=("$key")
          ;;
    esac
    shift
done

log_line()
{
    echo "$@" 1>&2
}
log_stats ()  { [[ -n "$stats_enabled" ]] && log_line "$@" ; return 0; }
log_verbose() { [[ $verbose -ge 1 ]] && log_line "$@" ; return 0; }
log_debug()   { [[ $verbose -ge 2 ]] && log_line "$@" ; return 0; }
log_cmd()
{
    local prefix=""
    [[ -n "$dryrun" ]] && prefix="(dryrun) "
    log_debug "### ${prefix}$@"
}

tlog()
{
    # same output as btrbk transaction log
    local status=$1
    local comment=${2:-}
    [[ -n "$dryrun" ]] && [[ "$status" == "starting" ]] && status="dryrun_starting"
    local line="$(date --iso-8601=seconds) verify-rsync ${status} ${target} ${source} - -"
    [[ -n "$comment" ]] && line="$line # $comment";
    tlog_text+="$line\n"
    log_debug "$line"
}
tlog_print()
{
    # tlog goes to stdout
    echo -e "\nTRANSACTION LOG\n---------------\n${tlog_text:-}"
}

# parse "rsync -i,--itemize-changes" output.
# prints ndiffs to stdout, and detailed log messages to stderr
count_rsync_diffs()
{
    local nn=0
    local rsync_line_match='^(...........) (.*)$'
    local dump_stats_mode=

    # unset IFS: no word splitting, trimming (read literal line)
    while IFS= read -r rsync_line; do
        local postfix_txt=""
        if [[ -n "$dump_stats_mode" ]]; then
            # dump_stats_mode enabled, echo to stderr
            log_stats "${rsync_line}"
        elif [[ "$rsync_line" == "" ]]; then
            # empty line denotes start of --info=stats, enable dump_stats_mode
            dump_stats_mode=1
            log_stats "--- BEGIN rsync stats2 dump ---"
        elif [[ "$rsync_line" =~ $rsync_line_match ]]; then
            rl_flags="${BASH_REMATCH[1]}"
            rl_path="${BASH_REMATCH[2]}"
            if [[ -n "$ignore_root_folder_timestamp" ]] && [[ "$rsync_line" == ".d..t...... ./" ]]; then
                # ignore timestamp on root folder, for some reason this does not match
                postfix_txt="   # IGNORE reason=ignore_root_folder_timestamp"
            elif [[ -n "$ignore_nested_subvolume_dir" ]] && [[ "$rl_flags" == "cd+++++++++" ]]; then
                # nested subvolumes appear as new empty directories ("cd+++++++++") in rsync (btrfs bug?)
                postfix_txt="   # IGNORE reason=ignore_nested_subvolume_dir"
            else
                nn=$((nn+1))
                postfix_txt="   # FAIL ndiffs=$nn"
            fi
            log_verbose "[rsync] ${rsync_line}${postfix_txt}"
        else
            nn=$((nn+1))
            log_line "btrbk-verify: ERROR: failed to parse rsync line: ${rsync_line}"
        fi
    done
    [[ -n "$dump_stats_mode" ]] && log_stats "--- END rsync stats2 dump ---"
    echo $nn
    return 0
}

rsync_rsh()
{
    # btrbk v0.27.0 sets source_rsh="ssh [flags...] ssh_user@ssh_host"
    # this returns "ssh [flags...] -l ssh_user"
    local rsh=$1
    local rsh_match="(.*) ([a-z0-9_-]+)@([a-zA-Z0-9.-]+)$"

    if [[ -z "$rsh" ]]; then
        return
    elif [[ -n "$ssh_user" ]] || [[ -n "$ssh_identity" ]]; then
        # override btrbk.conf from command line arguments
        log_debug "Overriding all ssh_* options from btrbk.conf"
        local cmd="ssh -q"
        [[ -n "$ssh_identity" ]] && cmd="$cmd -i '$ssh_identity'"
        [[ -n "$ssh_user" ]] && cmd="$cmd -l '$ssh_user'"
        echo "$cmd"
    elif [[ $rsh =~ $rsh_match ]]; then
        echo "${BASH_REMATCH[1]} -l ${BASH_REMATCH[2]}"
    else
        log_line "btrbk-verify: ERROR: failed to parse source_rsh: $rsh"
        exit 1
    fi
}

kill_ssh_agent()
{
    echo "Stopping SSH agent"
    eval `ssh-agent -k`
}

start_ssh_agent()
{
    if [[ -z "$ssh_identity" ]]; then
        log_line "btrbk-verify: ERROR: no SSH identity specified for agent"
        print_usage 2
    fi
    echo "Starting SSH agent"
    eval `ssh-agent -s`
    ssh_agent_running=1
    trap 'exit_trap_action' EXIT
    ssh-add "$ssh_identity"
}


eval_btrbk_resolved_line()
{
    local line=" $1"
    local prefix=$2
    local required_keys=$3
    # reset all variables first
    for vv in $required_keys; do
        eval "${prefix}${vv}="
    done
    for vv in $required_keys; do
        # basic input validation, set prefixed variable (eval)
        local match=" ${vv}='([^']*('\\\\''[^']*)*)'"
        if [[ $line =~ $match ]] ; then
            eval "${prefix}${vv}='${BASH_REMATCH[1]}'" || return 1
        else
            log_line "btrbk-verify: ERROR: Missing variable \"${vv}\""
            return 1
        fi
    done
}

exit_trap_action()
{
    [[ -n "${ssh_agent_running:-}" ]] && kill_ssh_agent
    [[ $verbose -gt 0 ]] && tlog_print
}

# start ssh-agent(1)
[[ -n "$ssh_start_agent" ]] && start_ssh_agent

# run "btrbk list"
[[ -z "$list_subcommand" ]] && print_usage 2
log_verbose "Resolving btrbk $list_subcommand"
btrbk_cmd=("btrbk" "list" "$list_subcommand" "--format=raw" "-q" "${btrbk_args[@]}")
log_debug "### ${btrbk_cmd[@]}"
btrbk_list=$("${btrbk_cmd[@]}")
btrbk_list_exitstatus=$?
if [[ $btrbk_list_exitstatus -ne 0 ]]; then
    log_line "btrbk-verify: ERROR: Command execution failed (status=$btrbk_list_exitstatus): ${btrbk_cmd[@]}"
    exit 1
fi
log_debug "--- BEGIN btrbk list $list_subcommand ---"
log_debug "$btrbk_list"
log_debug "--- END btrbk list $list_subcommand ---"

tlog_text=""
exitstatus=0
# trap on EXIT (includes all signals)
trap 'exit_trap_action' EXIT

while read -r btrbk_list_line; do
    # set R_xxx variables from format=raw line (table format "resolved")
    log_debug "Evaluating [btrbk list] line: $btrbk_list_line"
    [[ -z "$btrbk_list_line" ]] && continue
    if ! eval_btrbk_resolved_line "$btrbk_list_line" \
         "R_" "snapshot_subvolume target_subvolume source_host target_host target_type source_rsh target_rsh"
    then
        log_line "btrbk-verify: ERROR: Parse error of command output: ${btrbk_cmd[@]}"
        log_line "Make sure to have >=btrbk-${btrbk_version_min} installed!"
        exitstatus=1
        break
    fi

    source="${R_snapshot_subvolume}/"
    target="${R_target_subvolume}/"
    [[ -n "$R_source_host" ]] && source="${R_source_host}:${source}"
    [[ -n "$R_target_host" ]] && target="${R_target_host}:${target}"

    if [[ -z "$R_snapshot_subvolume" ]]; then
        log_line "WARNING: Skipping task (missing snapshot): target=$target"
    elif [[ -z "$R_target_subvolume" ]]; then
        log_line "Skipping task (no target): source=$source"
    elif [[ "$R_target_type" != "send-receive" ]]; then
        log_line "Skipping task (target_type=$R_target_type): source=$source, target=$target"
    elif [[ -n "$R_source_rsh" ]] && [[ -n "$R_target_rsh" ]]; then
        log_line "WARNING: Skipping task (SSH for both source and target is not supported): target=$target"
    else
        log_line "Comparing [rsync] $source $target"

        # rsync rsh is either source_rsh or target_rsh or empty
        eff_rsh="$R_source_rsh"
        [[ -z "$eff_rsh" ]] && eff_rsh="$R_target_rsh"
        eff_rsh=$(rsync_rsh "$eff_rsh")

        rsync_cmd=("rsync" "${rsync_args[@]}")
        [[ -n "$eff_rsh" ]] && rsync_cmd+=("-e" "$eff_rsh")
        rsync_cmd+=("${source}" "${target}")
        log_cmd "${rsync_cmd[@]}"
        [[ -n "$dryrun" ]] && rsync_cmd=("cat" "/dev/null")

        #rsync_cmd=("echo" '........... SHOULD/FAIL/');   # simulate failure
        #rsync_cmd=("echo" 'cd+++++++++ SHOULD/IGNORE/'); # simulate ignored

        # execute rsync
        tlog "starting"
        set +e
        ndiffs=$("${rsync_cmd[@]}" | count_rsync_diffs)
        rsync_exitstatus=$?
        set -e

        if [[ $rsync_exitstatus -ne 0 ]] || [[ -z "$ndiffs" ]]; then
            log_line "btrbk-verify: ERROR: Command execution failed (status=$rsync_exitstatus): ${rsync_cmd[@]}"
            tlog "ERROR"
            exitstatus=10
        elif [[ $ndiffs -gt 0 ]]; then
            log_line "VERIFY FAIL (ndiffs=$ndiffs): ${source} ${target}"
            tlog "fail" "ndiffs=$ndiffs"
            exitstatus=10
        else
            log_verbose "Compare success (ndiffs=$ndiffs)"
            tlog "success"
        fi
    fi
done <<< "$btrbk_list"
#done < <(echo "$btrbk_list")  # more posix'ish

# NOTE: this triggers exit_trap_action()
exit $exitstatus
