通过文件大小和MD5校验识别重复文件

来源:互联网 发布:2345好压软件 编辑:程序博客网 时间:2024/05/14 14:04
#!/bin/sh
if [ $# != "1" ] || [ $1 = "--help" ];then
echo "Sytax:md5-sum.sh directory";
exit;
fi
echo "Begin"
#file-list
list=$(find $1);


#rm
$(rm -fr file_tmp.txt)
$(rm -fr file_result.txt)


#Loop
for item in $list;
do
if [ -f "${item}" ];then
md5str="$(md5sum ${item} 2>/dev/null | awk '{print $1}')";
    filesize="$(du -sk ${item} 2>/dev/null | awk '{print $1}')";
    echo "${item},${filesize},${md5str}" >> file_tmp.txt
fi
done
echo "End"


#

awk 'BEGIN {FS=","}{if (!($2,$3) in tarry){tarry[$2,$3]=$1}else{tarry[$2,$3]=tarry[$2,$3]","$1}}END{for(k in tarry){split(k,tarr2,SUBSEP);print tarr2[1],"*",tarr2[2],"=",tarry[k];}}' file_tmp.txt >> file_result.txt


0 0