python 文件内搜索关键词并保存行号

来源:互联网 发布:围墙设计图纸含数据 编辑:程序博客网 时间:2024/05/22 15:23

main(DirPath , KeyWord ,OutputPath ,THREAD_NUM)

在DirPath文件夹内搜索KeyWord ,并将搜索结果(KeyWord 所在的行号)保存在OutputPath 中

采用多线程实现,线程数可以自己设置THREAD_NUM

#! /usr/bin/env python# endcoding:utf-8import os ,sys,timeimport threadingFileState = 0def GetAllFilesList(dirPath):    fileList = []    for root, dirs, files in os.walk(dirPath):        for fileObj in files:              fileList.append(os.path.join(root,fileObj))    return fileListdef searchStrInFile(FilePath , KeyWord , OutputPath ):    global FileState    f = open(FilePath , "r")    content = f.readlines()    f.close()    searchRecordList = []    for line,ele in enumerate(content):        if KeyWord in ele:            searchRecordList.append(line+1)   #    searchRecordList = [line+1 for line,ele in enumerate(content) if KeyWord in ele]    if len(searchRecordList) :#        print FilePath + " : " +str(len(searchRecordList))  # print the result        while(FileState!=0):            "do nothing"        if FileState == 0:            FileState = 1            fp_ouput = open( OutputPath,"a" )            fp_ouput.write("\n"+ FilePath + ": " + str(len(searchRecordList)) +"\n")            for ele in searchRecordList :                fp_ouput.write( "    "+ str(ele) )            fp_ouput.close()#            time.sleep(0.1)            FileState = 0def main(DirPath , KeyWord ,OutputPath ,THREAD_NUM):    FileList =  GetAllFilesList(DirPath)    fp_ouput = open( OutputPath,"w" )    fp_ouput.write("search " +KeyWord +" in " + dirPath + "  " +time.strftime( '%Y-%m-%d %X', time.localtime() ))    fp_ouput.close()    for FilePath in FileList :        add_flag = 0        while(add_flag == 0 ):            if( threading.activeCount() - 1 < THREAD_NUM ):                t = threading.Thread(target=searchStrInFile,args=(FilePath , KeyWord , OutputPath))                t.start()                add_flag = 1if __name__=="__main__":      DirPath = "D:\\ResultData\\0_100M"       KeyWord = "computer"    OutputPath = os.path.join(os.getcwd(),"search_result_muti.txt")    THREAD_NUM = 5#    begin = time()    main(DirPath , KeyWord ,OutputPath ,THREAD_NUM)        






1 0