bin: extend scripts for duplicate detection

This commit is contained in:
xeruf 2023-01-31 18:13:32 +01:00
parent 7470358536
commit 99053ecd32
2 changed files with 25 additions and 4 deletions

17
.local/bin/scripts/dupcheck Executable file
View File

@ -0,0 +1,17 @@
#!/bin/sh
# Check the first file against all other ones given and print duplicates.
# Checks first size then diff.
# TODO diff initial bytes
if ! test $# -lt 2
then echo "Need at least 2 files to compare!" 2>/dev/null
exit 2
fi
target="$1"
shift
for existing
do test "$(realpath "$target")" != "$(realpath "$existing")" -a -f "$existing" &&
test "$(stat --format %s "$target")" -eq "$(stat --format %s "$existing")" &&
diff -q "$target" "$existing" >/dev/null &&
echo "$existing"
#test "$(md5sum "$existing" | cut -d\ -f1)" = "$(md5sum "$filepath" | cut -d\ -f1)" &&
done

View File

@ -1,11 +1,15 @@
#!/bin/sh
# Delete files under current or given path which exist elsewhere as listed in the locate database
# Matches first by name, then by checksum (currently inefficiently via md5)
# Delete files under current or given path
# which exist elsewhere as listed by locate.
# Args: [threshold (MB)] <filepaths...>
# OptDepends: synct (for checking against original filename)
case $1 in ([0-9]|[0-9][0-9]) threshold=$1; shift;; esac
find "$@" -size +${threshold:-50}M -type f -exec sh -c "IFS=$'\n'"'
filepath="{}"
target="$(synct-unarchive "$filepath")"
target="$(synct-unarchive "$filepath" || echo "$filepath")"
highlight "$filepath"
for existing in $(locate -b "$target")
do test "$(realpath "$target")" != "$(realpath "$existing")" -a -f "$existing" && test "$(md5sum "$existing" | cut -d\ -f1)" = "$(md5sum "$filepath" | cut -d\ -f1)" && echo "Found duplicate at $existing" && rm -vi "$filepath" && break
do test -n "$(dupcheck "$target" "$existing") &&
echo "Duplicate found at $existing"
rm -vi "$filepath" && break
done' \;