#!/bin/sh
# rem-dups - Finds duplicate files, puts them in rem-duplicates.sh for removal
#
# To use this script, go to the command-line, and then change to the directory
# you want to check, then type 'rem_dups' to begin running this script. This
# script will collect all files that contain identical contents and output the
# results in a text script 'rem-duplicates.sh' which you can choose and select
# the files you want to remove by deleting the '#' in front of the rm command
# for the files you want to remove.
#
# 2003apr06, Jarno Elonen, released rem-dups to Public Domain
#		http://elonen.iki.fi/code/misc-notes/remove-duplicate-files/
#
OUTF=rem-duplicates.sh;

echo "#! /bin/sh" > $OUTF;
echo "# File created by $0 $1 $2" >> $OUTF;
echo "cd $(pwd)" >> $OUTF;
find "$@" -type f -exec md5sum {} \; |
  sort --key=1,32 | uniq -w 32 -d --all-repeated=separate |
  sed -r 's/^[0-9a-f]*( )*//;s/([^a-zA-Z0-9./_-])/\\\1/g;s/(.+)/#rm \1/' >> $OUTF;
chmod a+x $OUTF;
ls -l $OUTF
