bin: extend scripts for duplicate detection
This commit is contained in:
parent
7470358536
commit
99053ecd32
|
@ -0,0 +1,17 @@
|
|||
#!/bin/sh
|
||||
# Check the first file against all other ones given and print duplicates.
|
||||
# Checks first size then diff.
|
||||
# TODO diff initial bytes
|
||||
if ! test $# -lt 2
|
||||
then echo "Need at least 2 files to compare!" 2>/dev/null
|
||||
exit 2
|
||||
fi
|
||||
target="$1"
|
||||
shift
|
||||
for existing
|
||||
do test "$(realpath "$target")" != "$(realpath "$existing")" -a -f "$existing" &&
|
||||
test "$(stat --format %s "$target")" -eq "$(stat --format %s "$existing")" &&
|
||||
diff -q "$target" "$existing" >/dev/null &&
|
||||
echo "$existing"
|
||||
#test "$(md5sum "$existing" | cut -d\ -f1)" = "$(md5sum "$filepath" | cut -d\ -f1)" &&
|
||||
done
|
|
@ -1,11 +1,15 @@
|
|||
#!/bin/sh
|
||||
# Delete files under current or given path which exist elsewhere as listed in the locate database
|
||||
# Matches first by name, then by checksum (currently inefficiently via md5)
|
||||
# Delete files under current or given path
|
||||
# which exist elsewhere as listed by locate.
|
||||
# Args: [threshold (MB)] <filepaths...>
|
||||
# OptDepends: synct (for checking against original filename)
|
||||
case $1 in ([0-9]|[0-9][0-9]) threshold=$1; shift;; esac
|
||||
find "$@" -size +${threshold:-50}M -type f -exec sh -c "IFS=$'\n'"'
|
||||
filepath="{}"
|
||||
target="$(synct-unarchive "$filepath")"
|
||||
target="$(synct-unarchive "$filepath" || echo "$filepath")"
|
||||
highlight "$filepath"
|
||||
for existing in $(locate -b "$target")
|
||||
do test "$(realpath "$target")" != "$(realpath "$existing")" -a -f "$existing" && test "$(md5sum "$existing" | cut -d\ -f1)" = "$(md5sum "$filepath" | cut -d\ -f1)" && echo "Found duplicate at $existing" && rm -vi "$filepath" && break
|
||||
do test -n "$(dupcheck "$target" "$existing") &&
|
||||
echo "Duplicate found at $existing"
|
||||
rm -vi "$filepath" && break
|
||||
done' \;
|
||||
|
|
Loading…
Reference in New Issue