python 文件去重复

来源:互联网 发布:美国飓风网络路径 编辑:程序博客网 时间:2024/05/22 11:47


1.strip() set()

def modi_File(filename):
    sFile="out/"+filename
    oFile="out1/"+filename
    fp = file(sFile,"r")
    lines = fp.readlines()
    fp.close()
    index =0
    count =len(lines)
    while index<count:
        lines[index]=lines[index].strip("\n")
        index +=1
    flines=list(set(lines))
    fp_w=file(oFile,"w")
    count=0
    for line in flines:
        fp_w.write(str(line)+"\n")
        count +=1
    fp_w.write("数据总量:%s"%count)
    fp_w.close()

2.

#!/usr/bin/python2
# coding: utf-8


def modi (filename):
    IN    = '/tmp/' + filename        # "out/" + filename
    OUT   = '/tmp/' + '_' + filename  # "out1/" + filename
    fhi   = open (IN)
    fho   = open (OUT, 'w')
    uniq  = set ()
    count = 0

    for line in fhi:
        if line in uniq: continue
        uniq.add (line)
        count += 1
        fho.write (line)

    fho.write ("数据总量:%s" % count)
    fhi.close ()
    fho.close ()


modi ('xyz')



0 0