pyton练习,对两个文件夹子下文件名进行比较

来源:互联网 发布:漫画控无法连接网络 编辑:程序博客网 时间:2024/04/28 12:39

新代码的引擎有问题,引擎有若干版本源代码,不知道哪个版本是正确的。写个python程序,从git版本管理上每个版本取一次,自动比较,直到找到最相近的为止。

练习雏形

在/etc/profile 尾部添加我的shell脚本路径。所有又用的脚本都放到该路径下,以后调用这些脚本的时候就像调用系统命令一下,不用再输入路径了。比如我的脚本路径是/home/wulong/program/myshell。在/etc/profile尾部添加路径

#todo: wulong add my shellmyshell_path=/home/wulong/program/myshellexport PATH=$PATH:$myshell_pathexport myshell_path#endif


compare_folder.sh和compare_folder.py文件都放在myshell路径下

比如要击比较 ./src  与 ./old_src 。初版只比较文件数目。后面再添加 “比较文件名"--->比较文件内容0---->合并文件内容

调用方法如下 

$compare_folder.sh  ./src  ./old_src


compare_folder.sh内容如下

#!/bin/bashif [ $# -eq 2 ] then    parent_path=`pwd`    one_path=`cd $parent_path ; cd $1 ; pwd`    two_path=`cd $parent_path ; cd $2 ; pwd`            sh_whole_path=`which compare_folder.sh`     sh_path=`dirname $sh_whole_path`    cd $sh_path  #sh 与py在同一个文件夹下,文件路径要加到系统path中    python compare_folder.py  $one_path $two_path     cd $parent_pathelse    echo "need two params"fi

compare_folder.py内容如下

#!/usr/bin/pythonimport sysimport osdef get_file_list(path):    f_l = []            if os.path.isdir(path):        for tmp in os.listdir(path):            tmp = path + "/" + tmp            if os.path.isdir(tmp):                ret_list = get_file_list(tmp)                f_l.extend(ret_list)#                for tmp in ret_list:#                    f_l.append(tmp)            else:                f_l.append(tmp)                    else:        f_1.append(path)            return f_ldef filter_path_name_list(path_list, parent_path):    new_list = []    if len(path_list):        for tmp in path_list:            tmp = tmp.replace(parent_path,"")            new_list.append(tmp)    return new_list            andengine_path=""baofen_path=""if len(sys.argv) == 3:    andengine_path = sys.argv[1]    baofen_path = sys.argv[2]print len(sys.argv),", ",sys.argv[1],", ",sys.argv[2]print andengine_pathprint baofen_pathif os.path.isdir(andengine_path) and os.path.isdir(baofen_path):    andengine_file_list = get_file_list(andengine_path)    baofen_file_list = get_file_list(baofen_path)    andengine_len = len(andengine_file_list)    baofen_len = len(baofen_file_list)    print (andengine_len - baofen_len),", ",andengine_len,", ",baofen_lenelse:    print "param is not directory "



git版本管理后的代码,想把所有commit 的ID过滤出来,放到统一的文件中,挨个检出各个版本代码,然后工具进行比较。

$git  log  > log_list.txt

wulong@wulong-Aspire-4750:/mnt/d/work/android.dooyogame/dy_baofen/test/and_git$ git logcommit c61bb629609d6731f44bc286b8241a09db486c8fMerge: 2055e9e 958b3a6Author: Nicolas Gramlich <ngramlich@zynga.com>Date:   Fri Nov 15 00:32:17 2013 -0800    Merge pull request #218 from ebbybeh/patch-1        Fixed a bug in ManhattanHeuristic.commit 2055e9eb4bcc80140d07f90f2ae00f0f1e5c0d49Author: Nicolas Gramlich <ngramlich@zynga.com>Date:   Sat Jun 8 17:06:53 2013 -0600    Added crashlytics xml file to gitignore.
我只想要后面的id号2055e9eb4bcc80140d07f90f2ae00f0f1e5c0d49 统一写入一个文件commit_list.txt

commit 2055e9eb4bcc80140d07f90f2ae00f0f1e5c0d49

代码如下:

#!/usr/bin/python# -*- coding: utf-8 -*-#从git log生成的log_list.txt文件中分离出各个版本代号import osimport sysdef filter_middle_space(buf):    ret = ""    if (len(buf) > 0):        tmp_list = buf.split(" ")        index = 0        while ( index < len(tmp_list)):            if ( len(tmp_list[index]) > 0):                index = index + 1            else:                del tmp_list[index]        if (len(tmp_list) > 0):            ret = " ".join(tmp_list)    return ret                                            commit_list_file = "commit_list.txt";if os.path.isfile(commit_list_file):    os.remove(commit_list_file)w_f = file(commit_list_file,"a")r_f = file("log_list.txt","r")count = 0;commit_list = []while True:    line = r_f.readline()    if len(line) <= 0:        break        if len(line) and line.startswith("commit "):        line = line.strip()        line = filter_middle_space(line)        line_list = line.split(" ")        if ( len(line_list) == 2 ):            commit_list.append(line_list[1])            count = count + 1w_f.writelines(os.linesep.join(commit_list))w_f.close()r_f.close()print "splite over cout ",commit_list,"\n count = ",count


最后生成文件内容如下,每个commit  id占一行

c61bb629609d6731f44bc286b8241a09db486c8f2055e9eb4bcc80140d07f90f2ae00f0f1e5c0d498440e9ab27d657ed5c68ffe939410ca2436b22c9958b3a6d522b3a8ee9f9922f9af7f26e50882cc92bc477d1cfab6b7b9568e5d114bec09be6f18a9cba00560e775ea75755f10dd6480d106649928f9ac7ad23890cce706643f244734f1f7899dac4924655ded8f40d965d73f0c84e108c7727dcf21a6c36665d86bc0f9f61e24706fce69444a1a6f9afbf77


根据commit_list.txt检出and_git仓库中每个版本的源代码,拷贝到and_parent下,新文件夹命名为 "andengine_" + "commit id号"。比如

commit 2055e9eb4bcc80140d07f90f2ae00f0f1e5c0d49 就拷贝为  andengine_2055e9eb4bcc80140d07f90f2ae00f0f1e5c0d49

共有1759个版本的源代码。

copy_andengine_src_by_commit_id.sh代码如下:


dst_path=`cd ../dy_baofen_sandayijava_init_init; pwd`src_parent=`cd $root_path; pwd`"/and_parent"bak_path=`cd $root_path; pwd`"/and_git"tagid_list=`cd $root_path; cat commit_list.txt`if [ ! -d $src_parent ]then    mkdir -p $src_parentfitag_count=0for id_name in ${tagid_list[*]}do    echo "commit id = "$id_name    cd $root_path        src_folder="andengine"    rm -rf $src_folder    cp -rf $bak_path $src_folder    cd $src_folder    git reset --hard $id_name        cd ../     new_path=$src_parent"/"$src_folder"_"$id_name    mv $src_folder $new_path    #file_counter=`find ./src/org -type f | wc -l`     #echo "file counter "$file_counter            #compare_folder.sh ./$src_path/src/org  $dst_path"/src/org"done



第二版:

git_log_filter.py

#!/usr/bin/python# -*- coding: utf-8 -*-#从git log生成的log_list.txt文件中分离出各个版本代号import osimport sysdef filter_middle_space(buf):    ret = ""    if (len(buf) > 0):        tmp_list = buf.split(" ")        index = 0        while ( index < len(tmp_list)):            if ( len(tmp_list[index]) > 0):                index = index + 1            else:                del tmp_list[index]        if (len(tmp_list) > 0):            ret = " ".join(tmp_list)    return ret                                            commit_list_file = "commit_list.txt";if os.path.isfile(commit_list_file):    os.remove(commit_list_file)w_f = file(commit_list_file,"a")r_f = file("log_list.txt","r")count = 0;commit_list = []while True:    line = r_f.readline()    if len(line) <= 0:        break        if len(line) and line.startswith("commit "):        line = line.strip()        line = filter_middle_space(line)        line_list = line.split(" ")        if ( len(line_list) == 2 ):            commit_list.append(line_list[1])            count = count + 1w_f.writelines(os.linesep.join(commit_list))w_f.close()r_f.close()print "splite over cout ",commit_list,"\n count = ",count

copy_andengine_src_by_commit_id.sh

#!/bin/bashroot_path=`pwd`dst_path=`cd ../dy_baofen_sandayijava_init_init; pwd`src_parent=`cd $root_path; pwd`"/and_parent"bak_path=`cd $root_path; pwd`"/and_git"tagid_list=`cd $root_path; cat commit_list.txt`if [ ! -d $src_parent ]then    mkdir -p $src_parentfitag_count=0for id_name in ${tagid_list[*]}do    echo "commit id = "$id_name    cd $root_path        src_folder="andengine"    rm -rf $src_folder    cp -rf $bak_path $src_folder    cd $src_folder    git reset --hard $id_name        cd ../    new_path=$src_parent"/"$src_folder"_"$id_name    mv $src_folder $new_path    #file_counter=`find ./src/org -type f | wc -l`     #echo "file counter "$file_counter            #compare_folder.sh ./$src_path/src/org  $dst_path"/src/org"done

compare_list.sh

#!/bin/bashroot_path=`pwd`dst_path=`cd ../dy_baofen_sandayijava_init_init/src/org/anddev; pwd`src_list=`cd $root_path; find and_parent -mindepth 4 -maxdepth 4 -type d -name "anddev"`compare_count=0for src_path in ${src_list[*]}do    echo "commit id = "$src_path >> log.txt    echo $src_path    result=`compare_folder.sh $src_path  $dst_path`    echo $result >> log.txt    echo $resultdone

compare_folder.sh

#!/bin/bashif [ $# -eq 2 ]then    parent_path=`pwd`    one_path=`cd $parent_path ; cd $1 ; pwd`    two_path=`cd $parent_path ; cd $2 ; pwd`            sh_whole_path=`which compare_folder.sh`     sh_path=`dirname $sh_whole_path`    cd $sh_path  #sh 与py在同一个文件夹下,文件路径要加到系统path中    python compare_folder.py  $one_path $two_path     cd $parent_pathelse    echo "need two params"fi


compare_folder.py

#!/usr/bin/pythonimport sysimport osdef get_file_list(path):    f_l = []        if os.path.isdir(path):        for tmp in os.listdir(path):            tmp = path + os.sep + tmp            if os.path.isdir(tmp):                ret_list = get_file_list(tmp)                f_l.extend(ret_list)#                for tmp in ret_list:#                    f_l.append(tmp)            else:                f_l.append(tmp)                    else:        f_1.append(path)            return f_ldef filter_delete_parent_path(path_list, parent_path):    new_list = []    if len(path_list):        for tmp in path_list:            tmp = tmp.replace(parent_path,"")            new_list.append(tmp)    return new_listdef filter_not_in_first_list(first_list, second_list):    ret = []    for f_n in second_list:        if ( first_list.count(f_n) == 0) and ( ret.count(f_n) == 0 ):            ret.append(f_n)    return ret                            first_path=""second_path=""if len(sys.argv) == 3:    first_path = sys.argv[1]    second_path = sys.argv[2]first_path.strip()second_path.strip()if os.path.isdir(first_path) and os.path.isdir(second_path):    first_file_list = get_file_list(first_path)    second_file_list = get_file_list(second_path)    first_file_list = filter_delete_parent_path(first_file_list, first_path)    second_file_list = filter_delete_parent_path(second_file_list, second_path)    first_len = len(first_file_list)    second_len = len(second_file_list)    only_in_first = filter_not_in_first_list(second_file_list, first_file_list)    only_in_second = filter_not_in_first_list(first_file_list, second_file_list)        print "less: ",(first_len - second_len),", ",first_len,", ",second_len, \            ",",len(only_in_first),\            ",",len(only_in_second)                else:    print "param is not directory "