diff --git a/bootstrap.sh b/bootstrap.sh index 58009c0..cdeef5e 100644 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -1,345 +1,357 @@ #!/bin/bash ### # Part of a stupid script to backup some stuff # # This bootstrap.sh file does nothing by itself but loads useful stuff. # # This file is loaded from 'backup-everything.sh' or 'rotate.sh' # # Author: 2020-2024 Valerio Bozzolan, contributors # License: MIT ## # current directory export DIR="${BASH_SOURCE%/*}" if [[ ! -d "$DIR" ]]; then DIR="$PWD"; fi # check if the standard input is not a terminal export INTERACTIVE= if [ -t 0 ]; then INTERACTIVE=1 fi # # Check if this is the quiet mode # # Default - not quite. # # Actually we are in quiet mode if it's not interactive. # This lazy behavior is to avoid stupid emails from the crontab # without the need to specify some --quiet etc. # Note that in quiet mode only WARN and ERROR messages are shown. # I've not created a --quiet flag because nobody is needing it. # # Edit your options - do not edit here. # #QUIET= # # Eventually write a log file # # Default - write a log file. # # Edit your options - do not edit here. # export WRITELOG=1 # path to the instructions file export INSTRUCTIONS="$DIR/backup-instructions.conf" # path to the configuration file export CONFIG="$DIR/options.conf" # no config no party if [ ! -f "$CONFIG" ]; then echo "missing options expected in $CONFIG" exit 1 fi # default mysql commands # --batch: avoid fancy columns (auto-enabled, but better to specify it) # --silent: avoid the column name to be included export MYSQL="mysql --batch --silent" export MYSQLDUMP="mysqldump --routines --triggers" # default rsync command # --archive: Try to keep all the properties # --fuzzy: Try to check if a file was renamed instead of delete and download a new one # It's efficient for example with log rotated files. # --delete: Delete the destination files if not present in the source # NOTE: we want this behaviour but it's not a good idea toghether with --fuzzy # that's why we do not use --delete but we use the next flags # --delay-updates Put all updated files into place at end (useful with fuzzy and delete modes) # --delete-delay Delete after everything (useful with fuzzy and delete modes) # NOTE: sometime some data is kept in damn .~tmp~ directories # So we are deprecating --delete-delay, and going back to --delete # and so removing --fuzzy # --hard-links Try to look for hard links during the transfer to do not copy separate files #RSYNC="rsync --archive --fuzzy --delay-updates --delete-delay --hard-links" # default rsync command # --archive: Try to keep all the properties # --delete: Delete the destination files if not present in the source # --hard-links Try to look for hard links during the transfer to do not copy separate files -export RSYNC="rsync --archive --delete --hard-links" +# --no-inc-recursive Disables the new incremental recursion algorithm of the --recursive option. +# This makes rsync scan the full file list before it begins to transfer files. +# This may improve how hard links are copied successfully. +export RSYNC="rsync --archive --delete --hard-links --no-inc-recursive" # rsync used in remote transfers # --compress Use more CPU to save network bandwidth export RSYNC_REMOTE="$RSYNC --compress" # default base backup directory for all backups export BASE="/home/backups" # default box name BOX="$(hostname)" export BOX # set to 1 to avoid any disservice (e.g. systemctl stop/start) export NO_DISSERVICE= # set to 1 to do nothing export PORCELAIN= # How many hours should pass between each execution. # This is just a sane default to avoid daylight saving issues. export HOURS_INTERVAL=12 # include the configuration to eventually override some options # shellcheck source=config.sh . "$CONFIG" # as default, if not interactive, set quite mode if [ -z "$QUIET" ] && [ "$INTERACTIVE" != 1 ]; then QUIET=1 fi # full pathnames to the backup directories export BASEBOX="$BASE/$BOX" export DAILY="$BASEBOX/daily" export DAILY_FILES="$DAILY/files" export DAILY_DATABASES="$DAILY/databases" export DAILY_LASTLOG="$DAILY/last.log" export DAILY_LASTTIME="$DAILY/last.timestamp" export DAILY_STARTTIME="$DAILY/start.timestamp" # apply the porcelain to the rsync command if [ "$PORCELAIN" = 1 ]; then RSYNC="$RSYNC --dry-run" RSYNC_REMOTE="$RSYNC_REMOTE --dry-run" fi # set default backup_last_log() lines if [ -z "$BACKUP_LAST_LOG_LINES" ]; then BACKUP_LAST_LOG_LINES=8000 fi ## # Receive in input a file path, and a number of hours, and check whenever # enough time (in hours) was passed or not. # # If the file was never created, we assume that enough time was passed. # # @param string timestamp_file # @param int hours # function are_enough_hours_passed() { # No args, no party. # Note that the file argument will be checked later. local timestamp_file="$1" local expected_hours="$2" if [ -z "$expected_hours" ]; then echo "Error: Missing argument expected hours." exit 2 fi local expected_seconds=$((expected_hours * 3600)) are_enough_seconds_passed "$timestamp_file" "$expected_seconds" } ## # Receive in input a file path, and a number of hours, and check whenever # enough time (in seconds) was passed or not. # # If the file was never created, we assume that enough time was passed. # # @param string timestamp_file # @param int seconds # function are_enough_seconds_passed() { # No args, no party. local timestamp_file="$1" local expected_hours="$2" if [ -z "$timestamp_file" ]; then echo "Error: Missing argument timestamp file." exit 2 fi if [ -z "$expected_hours" ]; then echo "Error: Missing argument expected hours." exit 2 fi if [ -f "$timestamp_file" ]; then # Read the file, if it has sense. local last_timestamp=$(<"$timestamp_file") if [ "$last_timestamp" -lt 1000 ]; then echo "Error: Bad format in file $timestamp_file" exit 2 fi local current_timestamp=$(date +%s) local diff_seconds=$((current_timestamp - last_timestamp)) # If enough time is passed, return true. [ "$diff_seconds" -ge "$expected_seconds" ]; fi # The file doesn't exist. Return nothing special (0, that is True). } ## # Receive in input a file path, and write there the current timestamp. # # @param string timestamp_file # function write_timestamp() { # No arg, no party. local timestamp_file="$1" if [ -z "$timestamp_file" ]; then echo "Error: Missing timestamp file argument." exit 1 fi # Write the current Unix timestamp. date +%s > "$timestamp_file" } ### # Print something # # It also put the message in the backup directory # # @param string severity # @param string message # function printthis() { local msg msg="[$(date)][$1] $2" # print to standard output if it's not in quiet mode if [ "$QUIET" != 1 ]; then printf "%s\n" "$msg" fi # put in the log file if possible if [ -f "$DAILY_LASTLOG" ] && [ "$WRITELOG" = 1 ]; then printf "%s\n" "$msg" >> "$DAILY_LASTLOG" fi } ### # Run an rsync archive copy # @param string source # @param string destionation # function copy() { # show what we are doing log "copy $*" # run the rsync command if [ "$PORCELAIN" != 1 ]; then $RSYNC $@ fi } ### # Run an rsync archive copy, # creating hard-links into the destination, # instead of copying files. # This saves a lot of space but you MUST pay attention # to never touch the source files, or you will overwrite # destination files. # @param string source # @param string destionation +# @param string source_for_hardlinks # function copy_using_hard_links() { local source="$1" local dest="$2" - local source_abs=$(realpath "$source") + local source_hardlinks="$3" - if [ "$#" != 2 ]; then - echo "Bad usage of $0. Must have 2 arguments. Current arguments: $@" + if [ "$#" != 3 ]; then + echo "Bad usage of $0. Must have 3 arguments. Current arguments: $@" exit 3 fi + # No source for hardlinks, no party + source_hardlinks_abs=$(realpath "$source_hardlinks") + if ! [ -d "$source_hardlinks_abs" ]; then + echo "Source for hardlinks not existing: $source_hardlinks_abs given from source_hardlinks from current path $(pwd)" + exit 4 + fi + # Show what we are doing, without being verbose. - log "copy using hard links from $source to $dest" + log "copy using hard links from $source to $dest (source for hard links: $source_hardlinks)" + log "$RSYNC --link-dest=$source_hardlinks_abs $source $dest" # run the rsync command if [ "$PORCELAIN" != 1 ]; then - $RSYNC --link-dest="$source_abs" "$source" "$dest" + $RSYNC --link-dest="$source_hardlinks_abs" "$source" "$dest" fi } ### # Remove a pathname # function drop() { # show what we are doing log "drop $*" # well, proceed... finger crossed... with some protections if [ "$PORCELAIN" != 1 ]; then rm --recursive --force --one-file-system --preserve-root -- $@ fi } ### # Move something somewhere # function move() { # show what we are doing log "move $*" if [ "$PORCELAIN" != 1 ]; then mv --force $@ fi } ### # Print a information message # # @param msg Message # function log() { printthis INFO "$1" } ### # Print a warning message # # @param msg Message # function warn() { printthis WARN "$1" } ### # Print an error message # # @param msg Message # function error() { printthis ERROR "$1" } diff --git a/rotate.sh b/rotate.sh index fae3520..f7f86f3 100755 --- a/rotate.sh +++ b/rotate.sh @@ -1,226 +1,236 @@ #!/bin/bash ### # Stupid script to rotate backup, # creating incremental backups thanks to # hard-links. # # Author: Valerio B. # Date: Wed 4 Ago 2020 # License: CC 0 - public domain ## # do not proceed in case of errors set -e # current directory MYDIR="$(dirname "$(realpath "$0")")" # as default don't be quiet while rotating QUIET=0 # as default don't write in the log while rotating WRITELOG=0 # # Maximum time that your rotation could last # # Right now this should be a good default since it doesn't make much sense # for a rotation to take more than this number of hours. # # If the script takes longer than this, the next rotation may not run. # # Note: at the moment this must be shorter than a single day. # # Current default: 6 hours (6 * 60 * 60 = 21600 seconds) MAX_ROTATE_SECONDS=21600 # include all the stuff and useful functions . "$MYDIR"/bootstrap.sh # arguments place="$1" days="$2" max="$3" # expected file containing last timestamp last_timestamp_file="$place.timestamp" # show usage function show_help_rotate() { echo "USAGE" echo " $0 PATH DAYS MAX_ROTATIONS" echo "EXAMPLE" echo " $0 /home/backups 1 30" } function harden() { local harden_path="$1" # no path no party if [ -z "$harden_path" ]; then echo "Wrong usage of harden" exit 2 fi # Harden rotations # # Note that non-privileged users should be able to push their last copy, # but MUST not in any way be able to touch older copies chown root:root "$harden_path" chmod 600 "$harden_path" } # all the arguments must exist (just check the last one) if [ -z "$max" ]; then echo "Bad usage" show_help_rotate exit 1 fi # the place to be rotated must exist if [ ! -e "$place" ]; then error "unexisting directory '$place'" exit 2 fi # validate max parameter if [ "$max" -lt 2 ]; then echo "The MAX parameter must be greater than 1" show_help_rotate exit 3 fi # expected seconds from the last rotation before continuing # NOTE: leave the star escaped to avoid syntax error in expr expected_seconds=$(expr "$days" "*" 86400) # check if the duration in seconds is a day or more if [ "$expected_seconds" -ge 86400 ]; then # the expected time since the last execution is never exactly the number of days in seconds # Solution: remove few hours from the expected (just to say, uhm, 5 hours) expected_seconds=$(expr "$expected_seconds" - "$MAX_ROTATE_SECONDS") fi # do not proceed if not enough time passed since last execution on that directory # this avoids daylight saving time change problems # this also avoids race conditions when starting parallel executions by mistake if ! are_enough_seconds_passed "$last_timestamp_file" "$expected_seconds"; then warn "doing nothing: last rotation was executed too recently on $place: now-last $(date +%s)-$(< "$last_timestamp_file") - expected at least $expected_seconds seconds" exit 0 fi # save the last timestamp before rotating everything # this will avoid even parallel rotations write_timestamp "$last_timestamp_file" # eventually drop the last backup step # if it does not exist, don't care max_path="$place.$max" drop "$max_path" # shift all the backups after="$max" while [[ "$after" -gt 1 ]]; do before=$(expr "$after" - 1) # do not process the root directory for no reason in the world if you type that by mistake # the --preserve-root is already implicit but... let's be sure! asd before_path="$place.$before" after_path="$place.$after" # the source must exist. asd if [ -e "$before_path" ]; then # the trailing slash means: copy files and not just the directory move "$before_path/" "$after_path" harden "$after_path" fi # next after="$before" done # at the end, move the base forward # the trailing slash means: copy files and not just the directory # this should be able to create a copy that is lightweight than the original if few # things changed. So later "rdfind" has less work to do. -copy_using_hard_links "$place/" "$place.1" +# Micro-optimization: if we already have an older backup, use that as source for hard links. +# This drammatically reduces I/O on disk and saves A LOT of disk space, +# if you know what you are doing, and as long as you consider ".2" as read-only. +if [ -d "$place.2" ]; then + # Source Destin SourceForHardLinks + copy_using_hard_links "$place/" "$place.1" "$place.2" +else + # Source Destin + copy "$place/" "$place.1" +fi # Make sure that other users cannot see this path. # This is usually since you may want to preserve original context in YOUR files, # but having a strict parent context in OUR generated files. harden "$place.1" # # De-duplicate the first rotations. # This saves ~1TB of data for 20 rotations of 100 computers in my use-case :) # # - Why not de-duplicating "$place"? # We avoid to touch the source, since the source can be wrote and that's an additional risk. # I'm 9999.99% sure that we could also write the original source "$place", # but again, it's an unnecessary risk. # So this is a security mitigation. # - Why de-duplicating only "$place.1" and "$place.2"? # It's not useful to rotate ALL rotations since the 1+2 will be rotated. # It's rare that you generate a duplicate file between rotation 1 and rotation 30, # So this is a micro-optimization. # # Do these path exists? if [ -d "$place.1" ] && [ -d "$place.2" ]; then # Check that rdfind is installed in your system. if which rdfind > /dev/null; then # rdfind arguments: # -makehardlinks: this does the de-duplication trick while keeping data consistency. # Yuppie! # -removeidentinode: this sounds scary for backups, and was disabled. # I don't even know why it is not auto-disabled with the next one. # -makeresultsfile For some reasons this script creates a report. We don't want it. # We are already happy with its output. # -checksum: the default is sha1, but it's not a secure method nowadays, # so, adopting sha256 is better, slower but more safe. # The risk with sha1 is that an attacker can push exact copies # with same size and same hash and same initial and final bytes, # to try to compromise future rotations. # -> please avoid sha1 # -> please adopt sha256 or bigger (but think about performance) # -minsize Under this size in bytes, files are skipped. # The default is just 1 bytes, to skip empty files. # It may be really inconvenient to process billions of tiny files, # so, I suggest to keep this at least at some megabytes, # To receive some visible benefits in terms of saved disk space, # and do not cause intensive read operations to run sha256 (or whatever) # over millions of files. # -sleep: This is just a sleep between each file, to do not over-heat your drives. # This is just to extend the life of your backup drives. # This is probably reasonable since this backup is supposed to be executed # on daily basis, so 1 millisecond at least between each file is probably # a good default. log "de-duplication started on $place.1 and $place.2" rdfind \ -makehardlinks true \ -removeidentinode false \ -makeresultsfile false \ -minsize 1000000 \ -checksum sha256 \ -sleep 1ms \ "$place.1" \ "$place.2" log "de-duplication concluded" else warn "rdfind is not installed in this system so we cannot de-duplicate your backup rotations" fi fi # yeah! log "rotation concluded" +