diff --git a/rotate.sh b/rotate.sh index 11551e2..26b6c56 100755 --- a/rotate.sh +++ b/rotate.sh @@ -1,252 +1,253 @@ #!/bin/bash ### # Stupid script to rotate backup, # creating incremental backups thanks to # hard-links. # # Author: Valerio B. # Date: Wed 4 Ago 2020 # License: CC 0 - public domain ## # do not proceed in case of errors set -e # current directory MYDIR="$(dirname "$(realpath "$0")")" # as default don't be quiet while rotating QUIET=0 # as default don't write in the log while rotating WRITELOG=0 # # Maximum time that your rotation could last # # Right now this should be a good default since it doesn't make much sense # for a rotation to take more than this number of hours. # # If the script takes longer than this, the next rotation may not run. # # Note: at the moment this must be shorter than a single day. # # Current default: 6 hours (6 * 60 * 60 = 21600 seconds) MAX_ROTATE_SECONDS=21600 # include all the stuff and useful functions . "$MYDIR"/bootstrap.sh # arguments place="$1" days="$2" max="$3" # expected file containing last timestamp last_timestamp_file="$place.timestamp" # show usage function show_help_rotate() { echo "USAGE" echo " $0 PATH DAYS MAX_ROTATIONS" echo "EXAMPLE" echo " $0 /home/backups 1 30" } function harden() { local harden_path="$1" # no path no party if [ -z "$harden_path" ]; then echo "Wrong usage of harden" exit 2 fi # Harden rotations # # Note that non-privileged users should be able to push their last copy, # but MUST not in any way be able to touch older copies chown root:root "$harden_path" chmod 600 "$harden_path" } # all the arguments must exist (just check the last one) if [ -z "$max" ]; then echo "Bad usage" show_help_rotate exit 1 fi # the place to be rotated must exist if [ ! -e "$place" ]; then error "unexisting directory '$place'" exit 2 fi # validate max parameter if [ "$max" -lt 2 ]; then echo "The MAX parameter must be greater than 1" show_help_rotate exit 3 fi # expected seconds from the last rotation before continuing # NOTE: leave the star escaped to avoid syntax error in expr expected_seconds=$(expr "$days" "*" 86400) # check if the duration in seconds is a day or more if [ "$expected_seconds" -ge 86400 ]; then # the expected time since the last execution is never exactly the number of days in seconds # Solution: remove few hours from the expected (just to say, uhm, 5 hours) expected_seconds=$(expr "$expected_seconds" - "$MAX_ROTATE_SECONDS") fi # do not proceed if not enough time passed since last execution on that directory # this avoids daylight saving time change problems # this also avoids race conditions when starting parallel executions by mistake if ! are_enough_seconds_passed "$last_timestamp_file" "$expected_seconds"; then warn "doing nothing: last rotation was executed too recently on $place: now-last $(date +%s)-$(< "$last_timestamp_file") - expected at least $expected_seconds seconds" exit 0 fi # save the last timestamp before rotating everything # this will avoid even parallel rotations write_timestamp "$last_timestamp_file" # eventually drop the last backup step # if it does not exist, don't care max_path="$place.$max" drop "$max_path" # shift all the backups after="$max" while [[ "$after" -gt 1 ]]; do before=$(expr "$after" - 1) # do not process the root directory for no reason in the world if you type that by mistake # the --preserve-root is already implicit but... let's be sure! asd before_path="$place.$before" after_path="$place.$after" # the source must exist. asd if [ -e "$before_path" ]; then # the trailing slash means: copy files and not just the directory move "$before_path/" "$after_path" harden "$after_path" fi # next after="$before" done # at the end, move the base forward # the trailing slash means: copy files and not just the directory. # We were trying to adopt "hard links" copy but it does not work very well. if [ -d "$place.2" ]; then # Try to create the "$place.1" minimizing I/O and disk usage. copy_using_hard_links "$place/" "$place.1" "$place.2" else # Create "$place.1" in the traditional way. copy "$place/" "$place.1" fi # Make sure that other users cannot see this path. # This is usually since you may want to preserve original context in YOUR files, # but having a strict parent context in OUR generated files. harden "$place.1" # # De-duplicate the first rotations. # This saves ~1TB of data for 20 rotations of 100 computers in my use-case :) # # - Why not de-duplicating "$place"? # We avoid to touch the source, since the source can be wrote and that's an additional risk. # I'm 9999.99% sure that we could also write the original source "$place", # but again, it's an unnecessary risk. # So this is a security mitigation. # - Why de-duplicating only "$place.1" and "$place.2"? # It's not useful to rotate ALL rotations since the 1+2 will be rotated. # It's rare that you generate a duplicate file between rotation 1 and rotation 30, # So this is a micro-optimization. # # Do these path exists? if [ -d "$place.1" ] && [ -d "$place.2" ] && [ -d "$place.3" ]; then # If we have BTRFS on board if is_btrfs "$place.1"; then log "$place.1 is btrfs" - # Check that rdfind is installed in your system. + # Check if this peculiar BTRFS de-duplication tool is installed in your system. + # https://btrfs.readthedocs.io/en/latest/Deduplication.html if which duperemove > /dev/null; then log "de-duplication with duperemove started on $place.1 and $place.2 and $place.3 (and nothing more for performance reasons)" duperemove -rdh \ "$place.1" \ "$place.2" \ "$place.3" else warn "duperemove is not installed in this system so we cannot de-duplicate your backup rotations" fi else log "$place.1 is not btrfs" # Check that rdfind is installed in your system. if which rdfind > /dev/null; then # rdfind arguments: # -makehardlinks: this does the de-duplication trick while keeping data consistency. # Yuppie! # -removeidentinode: this sounds scary for backups, and was disabled. # I don't even know why it is not auto-disabled with the next one. # -makeresultsfile For some reasons this script creates a report. We don't want it. # We are already happy with its output. # -checksum: the default is sha1, but it's not a secure method nowadays, # so, adopting sha256 is better, slower but more safe. # The risk with sha1 is that an attacker can push exact copies # with same size and same hash and same initial and final bytes, # to try to compromise future rotations. # -> please avoid sha1 # -> please adopt sha256 or bigger (but think about performance) # -minsize Under this size in bytes, files are skipped. # The default is just 1 bytes, to skip empty files. # It may be really inconvenient to process billions of tiny files, # so, I suggest to keep this at least at some megabytes, # To receive some visible benefits in terms of saved disk space, # and do not cause intensive read operations to run sha256 (or whatever) # over millions of files. # -sleep: This is just a sleep between each file, to do not over-heat your drives. # This is just to extend the life of your backup drives. # This is probably reasonable since this backup is supposed to be executed # on daily basis, so 1 millisecond at least between each file is probably # a good value. Or maybe not lol. # log "de-duplication started with rdfind on $place.1 and $place.2 and $place.3 (and nothing more for performance reasons)" rdfind \ -makehardlinks true \ -removeidentinode false \ -makeresultsfile false \ -minsize 1000000 \ -checksum sha256 \ "$place.1" \ "$place.2" \ "$place.3" log "de-duplication concluded" else warn "rdfind is not installed in this system so we cannot de-duplicate your backup rotations" fi fi fi # yeah! log "rotation concluded"