From 063298dd4642676900a50c24158fb915a48f5811 Mon Sep 17 00:00:00 2001 From: Lee Ockert Date: Mon, 23 Oct 2023 00:31:34 -0400 Subject: [PATCH] Enhance UI, preserve timestamps Removed my oh-so-clever "shadowdir" and "shadowfile" concepts for the directory to be examined for potential deduplication. Instead we call them subject directories and subject files, because they are subject to our process. Now creates a temporary file, removed on exit, as our temporary access and modification time holder. When replacing a file with a hard link, we first copy the atime/mtime of its directory to the temporary file. THEN we perform the link. After the link, we restore the original atime/mtime of the enclosing directory from the temporary file. This is useful for backups, where we do not want the directories to have newer timestamps (timestamps matching the date/time that we run deduplication, because creating the link counts as a modification of the directory). --- dirdedupe.sh | 86 ++++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/dirdedupe.sh b/dirdedupe.sh index 7b525b3..3c98600 100755 --- a/dirdedupe.sh +++ b/dirdedupe.sh @@ -1,11 +1,6 @@ #!/usr/bin/env bash -# BITSC LICENSE NOTICE (MODIFIED ISC LICENSE) -# -# DIRECTORY DE-DUPLICATION ("Dirty Dupe") -# -# Copyright (c) 2022 Lee Ockert -# https://github.com/torstenvl +# Copyright (c) 2023 Joshua Lee Ockert # # THIS WORK IS PROVIDED "AS IS" WITH NO WARRANTY OF ANY KIND. THE IMPLIED # WARRANTIES OF MERCHANTABILITY, FITNESS, NON-INFRINGEMENT, AND TITLE ARE @@ -25,22 +20,19 @@ DIRECTORY DE-DUPLICATOR USAGE - dirdedupe.sh [--execute] masterdir shadowdir + dirdedupe.sh [--execute] masterdir subjectdir DESCRIPTION - For each file in shadowdir, replace it with a hard link to the matching file - (if any) in masterdir. A file will be considered a match if, and only if, - it shares the same file name, relative path, and contents. + For each file in subjectdir, replace it with a hard link to the matching + file (if any) in masterdir. A file will be considered a match if, and + only if, it shares the same file name, relative path, and contents. OPTIONS --execute Actually remove and link duplicate files. By default, this program runs in test mode. -MISC - - The name of this utility is pronounced "dirty dupe." " } @@ -52,8 +44,8 @@ MISC # CHECK FOR THE EXECUTE FLAG (DEFAULT IS TESTING-ONLY MODE) REALLYRUN=0 if [ "${1}" == "--execute" ]; then - REALLYRUN=1 - shift + REALLYRUN=1 + shift fi # MAKE SURE WE HAVE THE RIGHT NUMBER OF ARGUMENTS AND THEY'RE VALID @@ -61,41 +53,55 @@ if [ ! $# -eq 2 ]; then echo && echo "Wrong number of arguments!" && printusage && exit else masterdir=$1 - shadowdir=$2 + subjectdir=$2 if [ ! -d "${masterdir}" ]; then echo && echo "${masterdir} is not a directory!" && printusage && exit - elif [ ! -d "${shadowdir}" ]; then - echo && echo "${shadowdir} is not a directory!" && printusage && exit + elif [ ! -d "${subjectdir}" ]; then + echo && echo "${subjectdir} is not a directory!" && printusage && exit fi fi +############################################################################ +## MAKE A TEMPORARY FILE FOR PRESERVING TIMESTAMPS ## +############################################################################ +TEMPFILE = $(mktemp) +trap "rm -f ${TEMPFILE}" EXIT ############################################################################ ## HARDLINK THE DUPLICATES (OR NOT) ## ############################################################################ -find "${shadowdir}" -print0 | while read -d $'\0' shadowfile +find "${subjectdir}" -print0 | while read -d $'\0' subjectfile do - if [ -f "${shadowfile}" ]; then - masterfile="${shadowfile/#${shadowdir}/${masterdir}}" - if [ -f "${masterfile}" ]; then - if [ "${shadowfile}" -ef "${masterfile}" ]; then - echo "ID \"${masterfile}\" <-> \"${shadowfile}\"" - else - cmp -s "${masterfile}" "${shadowfile}" - if [ $? -eq 0 ]; then - if [ $REALLYRUN -gt 0 ]; then - echo "LINK \"${masterfile}\" <-- \"${shadowfile}\"" - ln -Pf "${masterfile}" "${shadowfile}" - else - echo "HYPO \"${masterfile}\" <~~ \"${shadowfile}\"" - fi - else - echo "MOD \"${masterfile}\" \"${shadowfile}\"" - fi # end check for file equality - fi # end check for inode equality - else - echo "NEW \"${shadowfile}\" " - fi # end check if master file exists + if [ -f "${subjectfile}" ]; then + masterfile="${subjectfile/#${subjectdir}/${masterdir}}" + if [ -f "${masterfile}" ]; then + if [ ! "${subjectfile}" -ef "${masterfile}" ]; then + cmp -s "${masterfile}" "${subjectfile}" + if [ $? -eq 0 ]; then + if [ $REALLYRUN -gt 0 ]; then + echo "LINK \"${masterfile}\" <-- \"${subjectfile}\"" + # Store the mtime/atime of subject file's directory + TEMPSUBJDIR=`dirname "${subjectfile}"` + #touch -r "${TEMPSUBJDIR}" "${TEMPFILE}" + # Link the subject file to the corresponding file in + # the master directory + ln -Pf "${masterfile}" "${subjectfile}" + # Restore the mtime/atime of subject file's directory + #touch -r "${TEMPFILE}" "${TEMPSUBJDIR}" + else + echo "HYPO \"${masterfile}\" <~~ \"${subjectfile}\"" + TEMPSUBJDIR=`dirname "${subjectfile}"` + echo " Saving atime/mtime of |${TEMPSUBJDIR}|" + fi + #else + #echo "MOD \"${masterfile}\" \"${subjectfile}\"" + fi #END check for files being the same + #else + #echo "ID \"${masterfile}\" <-> \"${subjectfile}\"" + fi # END check for inode equality + #else + #echo "NEW \"${subjectfile}\" " + fi # END check if master file exists fi done