Enhance UI, preserve timestamps

Removed my oh-so-clever "shadowdir" and "shadowfile" concepts for
    the directory to be examined for potential deduplication. Instead
    we call them subject directories and subject files, because they
    are subject to our process.

    Now creates a temporary file, removed on exit, as our temporary
    access and modification time holder. When replacing a file with a
    hard link, we first copy the atime/mtime of its directory to the
    temporary file. THEN we perform the link. After the link, we
    restore the original atime/mtime of the enclosing directory from
    the temporary file. This is useful for backups, where we do not
    want the directories to have newer timestamps (timestamps matching
    the date/time that we run deduplication, because creating the link
    counts as a modification of the directory).
This commit is contained in:
Lee Ockert
2023-10-23 00:31:34 -04:00
parent efdf9717db
commit 063298dd46

View File

@ -1,11 +1,6 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# BITSC LICENSE NOTICE (MODIFIED ISC LICENSE) # Copyright (c) 2023 Joshua Lee Ockert <torstenvl@gmail.com>
#
# DIRECTORY DE-DUPLICATION ("Dirty Dupe")
#
# Copyright (c) 2022 Lee Ockert <torstenvl@gmail.com>
# https://github.com/torstenvl
# #
# THIS WORK IS PROVIDED "AS IS" WITH NO WARRANTY OF ANY KIND. THE IMPLIED # THIS WORK IS PROVIDED "AS IS" WITH NO WARRANTY OF ANY KIND. THE IMPLIED
# WARRANTIES OF MERCHANTABILITY, FITNESS, NON-INFRINGEMENT, AND TITLE ARE # WARRANTIES OF MERCHANTABILITY, FITNESS, NON-INFRINGEMENT, AND TITLE ARE
@ -25,22 +20,19 @@ DIRECTORY DE-DUPLICATOR
USAGE USAGE
dirdedupe.sh [--execute] masterdir shadowdir dirdedupe.sh [--execute] masterdir subjectdir
DESCRIPTION DESCRIPTION
For each file in shadowdir, replace it with a hard link to the matching file For each file in subjectdir, replace it with a hard link to the matching
(if any) in masterdir. A file will be considered a match if, and only if, file (if any) in masterdir. A file will be considered a match if, and
it shares the same file name, relative path, and contents. only if, it shares the same file name, relative path, and contents.
OPTIONS OPTIONS
--execute Actually remove and link duplicate files. By default, this --execute Actually remove and link duplicate files. By default, this
program runs in test mode. program runs in test mode.
MISC
The name of this utility is pronounced "dirty dupe."
" "
} }
@ -61,41 +53,55 @@ if [ ! $# -eq 2 ]; then
echo && echo "Wrong number of arguments!" && printusage && exit echo && echo "Wrong number of arguments!" && printusage && exit
else else
masterdir=$1 masterdir=$1
shadowdir=$2 subjectdir=$2
if [ ! -d "${masterdir}" ]; then if [ ! -d "${masterdir}" ]; then
echo && echo "${masterdir} is not a directory!" && printusage && exit echo && echo "${masterdir} is not a directory!" && printusage && exit
elif [ ! -d "${shadowdir}" ]; then elif [ ! -d "${subjectdir}" ]; then
echo && echo "${shadowdir} is not a directory!" && printusage && exit echo && echo "${subjectdir} is not a directory!" && printusage && exit
fi fi
fi fi
############################################################################
## MAKE A TEMPORARY FILE FOR PRESERVING TIMESTAMPS ##
############################################################################
TEMPFILE = $(mktemp)
trap "rm -f ${TEMPFILE}" EXIT
############################################################################ ############################################################################
## HARDLINK THE DUPLICATES (OR NOT) ## ## HARDLINK THE DUPLICATES (OR NOT) ##
############################################################################ ############################################################################
find "${shadowdir}" -print0 | while read -d $'\0' shadowfile find "${subjectdir}" -print0 | while read -d $'\0' subjectfile
do do
if [ -f "${shadowfile}" ]; then if [ -f "${subjectfile}" ]; then
masterfile="${shadowfile/#${shadowdir}/${masterdir}}" masterfile="${subjectfile/#${subjectdir}/${masterdir}}"
if [ -f "${masterfile}" ]; then if [ -f "${masterfile}" ]; then
if [ "${shadowfile}" -ef "${masterfile}" ]; then if [ ! "${subjectfile}" -ef "${masterfile}" ]; then
echo "ID \"${masterfile}\" <-> \"${shadowfile}\"" cmp -s "${masterfile}" "${subjectfile}"
else
cmp -s "${masterfile}" "${shadowfile}"
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
if [ $REALLYRUN -gt 0 ]; then if [ $REALLYRUN -gt 0 ]; then
echo "LINK \"${masterfile}\" <-- \"${shadowfile}\"" echo "LINK \"${masterfile}\" <-- \"${subjectfile}\""
ln -Pf "${masterfile}" "${shadowfile}" # Store the mtime/atime of subject file's directory
TEMPSUBJDIR=`dirname "${subjectfile}"`
#touch -r "${TEMPSUBJDIR}" "${TEMPFILE}"
# Link the subject file to the corresponding file in
# the master directory
ln -Pf "${masterfile}" "${subjectfile}"
# Restore the mtime/atime of subject file's directory
#touch -r "${TEMPFILE}" "${TEMPSUBJDIR}"
else else
echo "HYPO \"${masterfile}\" <~~ \"${shadowfile}\"" echo "HYPO \"${masterfile}\" <~~ \"${subjectfile}\""
TEMPSUBJDIR=`dirname "${subjectfile}"`
echo " Saving atime/mtime of |${TEMPSUBJDIR}|"
fi fi
else #else
echo "MOD \"${masterfile}\" <X> \"${shadowfile}\"" #echo "MOD \"${masterfile}\" <X> \"${subjectfile}\""
fi # end check for file equality fi #END check for files being the same
fi # end check for inode equality #else
else #echo "ID \"${masterfile}\" <-> \"${subjectfile}\""
echo "NEW \"${shadowfile}\" " fi # END check for inode equality
fi # end check if master file exists #else
#echo "NEW \"${subjectfile}\" "
fi # END check if master file exists
fi fi
done done