backup2mdisc/backup2mdisc.sh
first e0d1777965 Update backup2mdisc.sh
Signed-off-by: first <first@noreply.git.r21.io>
2025-01-24 06:46:04 +00:00

177 lines
5 KiB
Bash

#!/usr/bin/env bash
#
# backup2mdisc.sh
#
# Purpose:
# Creates multiple self-contained 100GB (default) backup archives, each encrypted
# independently. Useful for writing to large-capacity M-Discs where you want
# each disc to be decryptable on its own.
#
# Requirements:
# - bash
# - gpg (for encryption)
# - lz4 (for fast compression)
# - tar
# - sha256sum (or 'shasum -a 256' on macOS/FreeBSD)
#
# Usage:
# ./backup2mdisc.sh /path/to/source /path/to/destination [chunk_size]
#
# Example:
# ./backup2mdisc.sh /home/user/documents /mnt/backup 100G
#
# Tips:
# - If you want to burn these archives to disc afterward, you can:
# genisoimage -o chunk_001.iso chunk_001.tar.lz4.gpg
# Then burn the ISO using growisofs or hdiutil, etc.
#
# - Each chunk is standalone. If chunk #3 is lost, the rest are unaffected,
# but you lose only the files on chunk #3.
#
# - If you have a file larger than 'chunk_size', this script won't handle it
# gracefully. You'd need to adjust or handle large files differently.
#
set -e
# Default chunk size
DEFAULT_CHUNK_SIZE="100G"
#####################################
# HELPER FUNCTIONS #
#####################################
function compute_sha256() {
if command -v sha256sum >/dev/null 2>&1; then
sha256sum "$1"
else
# macOS/FreeBSD fallback:
shasum -a 256 "$1"
fi
}
function usage() {
echo "Usage: $0 /path/to/source /path/to/destination [chunk_size]"
echo "Example: $0 /data /backup 100G"
exit 1
}
#####################################
# MAIN PROGRAM #
#####################################
# Parse arguments
SOURCE_DIR="$1"
DEST_DIR="$2"
CHUNK_SIZE="${3:-$DEFAULT_CHUNK_SIZE}"
if [[ -z "$SOURCE_DIR" || -z "$DEST_DIR" ]]; then
usage
fi
if [[ ! -d "$SOURCE_DIR" ]]; then
echo "ERROR: Source directory does not exist: $SOURCE_DIR"
exit 1
fi
if [[ ! -d "$DEST_DIR" ]]; then
echo "ERROR: Destination directory does not exist: $DEST_DIR"
exit 1
fi
# Prompt for GPG passphrase (do not store in script)
echo -n "Enter GPG passphrase (will not be displayed): "
read -s GPG_PASSPHRASE
echo
# Create a working subdir
WORK_DIR="${DEST_DIR}/individual_chunks_$(date +%Y%m%d_%H%M%S)"
mkdir -p "$WORK_DIR"
# This file will track which files are in which chunk, plus checksums
MANIFEST_FILE="${WORK_DIR}/manifest_individual_chunks.txt"
touch "$MANIFEST_FILE"
echo "Manifest for individual-chunk backup" > "$MANIFEST_FILE"
echo "Source: $SOURCE_DIR" >> "$MANIFEST_FILE"
echo "Timestamp: $(date)" >> "$MANIFEST_FILE"
echo "Chunk size: $CHUNK_SIZE" >> "$MANIFEST_FILE"
echo >> "$MANIFEST_FILE"
# List of all files with size, sorted by file size ascending
# If you prefer alphabetical, remove the "-printf '%s %p\n'| sort -n" logic
FILE_LIST=$(mktemp)
find "$SOURCE_DIR" -type f -printf "%s %p\n" | sort -n > "$FILE_LIST"
CHUNK_INDEX=1
CURRENT_CHUNK_SIZE=0
TMP_FILELIST=$(mktemp)
function start_new_chunk() {
# We'll reset the chunk accumulators
rm -f "$TMP_FILELIST"
touch "$TMP_FILELIST"
CURRENT_CHUNK_SIZE=0
}
# Initialize chunk
start_new_chunk
while read -r line; do
FILE_SIZE=$(echo "$line" | awk '{print $1}')
FILE_PATH=$(echo "$line" | cut -d' ' -f2-)
# If adding this file exceeds chunk size, finalize the current chunk first
if [[ $(( CURRENT_CHUNK_SIZE + FILE_SIZE )) -gt $(( $(numfmt --from=iec $CHUNK_SIZE) )) ]]; then
# Finalize the chunk
# 1) Tar all the files in TMP_FILELIST
# 2) Compress with lz4
# 3) Encrypt with gpg
# 4) Output a .tar.lz4.gpg in WORK_DIR
CHUNK_NAME=$(printf "chunk_%03d.tar.lz4.gpg" $CHUNK_INDEX)
echo "==> Creating chunk #$CHUNK_INDEX with the collected files..."
tar -cf - -T "$TMP_FILELIST" \
| lz4 -c \
| gpg --batch --yes --cipher-algo AES256 --passphrase "$GPG_PASSPHRASE" -c \
> "${WORK_DIR}/${CHUNK_NAME}"
# Compute checksum & record
CHUNK_SHA256=$(compute_sha256 "${WORK_DIR}/${CHUNK_NAME}")
echo "Chunk #$CHUNK_INDEX -> ${CHUNK_NAME}" >> "$MANIFEST_FILE"
echo "$CHUNK_SHA256" >> "$MANIFEST_FILE"
echo >> "$MANIFEST_FILE"
((CHUNK_INDEX++))
start_new_chunk
fi
# Add current file to the chunk
echo "$FILE_PATH" >> "$TMP_FILELIST"
CURRENT_CHUNK_SIZE=$(( CURRENT_CHUNK_SIZE + FILE_SIZE ))
done < "$FILE_LIST"
# If TMP_FILELIST still has leftover files, finalize the last chunk
LAST_LIST_SIZE=$(wc -l < "$TMP_FILELIST")
if [[ "$LAST_LIST_SIZE" -gt 0 ]]; then
CHUNK_NAME=$(printf "chunk_%03d.tar.lz4.gpg" $CHUNK_INDEX)
echo "==> Creating final chunk #$CHUNK_INDEX..."
tar -cf - -T "$TMP_FILELIST" \
| lz4 -c \
| gpg --batch --yes --cipher-algo AES256 --passphrase "$GPG_PASSPHRASE" -c \
> "${WORK_DIR}/${CHUNK_NAME}"
# Compute checksum & record
CHUNK_SHA256=$(compute_sha256 "${WORK_DIR}/${CHUNK_NAME}")
echo "Chunk #$CHUNK_INDEX -> ${CHUNK_NAME}" >> "$MANIFEST_FILE"
echo "$CHUNK_SHA256" >> "$MANIFEST_FILE"
echo >> "$MANIFEST_FILE"
fi
echo "=== All Chunks Created ==="
echo "Chunks and manifest are located in: $WORK_DIR"
echo "Manifest file: $MANIFEST_FILE"
# Cleanup
rm -f "$FILE_LIST" "$TMP_FILELIST"
exit 0