#!/bin/sh # rem-dups - Finds duplicate files, puts them in rem-duplicates.sh for removal # # To use this script, go to the command-line, and then change to the directory # you want to check, then type 'rem_dups' to begin running this script. This # script will collect all files that contain identical contents and output the # results in a text script 'rem-duplicates.sh' which you can choose and select # the files you want to remove by deleting the '#' in front of the rm command # for the files you want to remove. # # 2003apr06, Jarno Elonen, released rem-dups to Public Domain # http://elonen.iki.fi/code/misc-notes/remove-duplicate-files/ # OUTF=rem-duplicates.sh; echo "#! /bin/sh" > $OUTF; echo "# File created by $0 $1 $2" >> $OUTF; echo "cd $(pwd)" >> $OUTF; find "$@" -type f -exec md5sum {} \; | sort --key=1,32 | uniq -w 32 -d --all-repeated=separate | sed -r 's/^[0-9a-f]*( )*//;s/([^a-zA-Z0-9./_-])/\\\1/g;s/(.+)/#rm \1/' >> $OUTF; chmod a+x $OUTF; ls -l $OUTF