生信脚本练习(8)合并文件 ①

来源:互联网 发布:语音广告制作软件 编辑:程序博客网 时间:2024/05/21 09:15

这道题有难度,要把这三个文件合并到一起。

文件一1   161514631   T   C|0.132632|(272,140,47,16)2   222301193   A   C|0.078624|(260,115,23,9)3   89259567    A   C|0.043716|(200,150,8,8) 4   55979552    G   A|0.211921|(236,121,59,37) 6   26032069    C   G|0.078212|(247,83,23,5)6   93973579    A   G|0.066667|(96,30,5,4)7   87168642    G   C|0.062016|(84,37,4,4)文件二2   222301193   A   C|0.181507|(312,166,78,28)6   93973579    A   G|0.168981|(282,77,52,21)14  102551631   T   G|0.291411|(195,36,72,23)19  54410004    C   T|0.486056|(96,33,70,52)20  30671857    C   A|0.130909|(336,142,42,30)文件三2   222301193   A   C|0.181507|(312,166,78,28)17  78867580    C   T|0.223464|(296,121,83,37)19  2207712     C   T|0.093567|(89,66,8,8)19  54410004    C   T|0.486056|(96,33,70,52)20  30671857    C   A|0.130909|(336,142,42,30)要求合并成这样子:Chr Position    Ref test03_1    test03_2    test03_31   161514631   T   C|0.132632|(272,140,47,16)      -   -2   222301193   A   C|0.078624|(260,115,23,9)       C|0.181507|(312,166,78,28)  C|0.181507|(312,166,78,28)3   89259567    A   C|0.043716|(200,150,8,8)        -   -4   55979552    G   A|0.211921|(236,121,59,37)      -   -6   26032069    C   G|0.078212|(247,83,23,5)        -   -6   93973579    A   G|0.066667|(96,30,5,4)      G|0.168981|(282,77,52,21)   -7   87168642    G   C|0.062016|(84,37,4,4)      -   -14  102551631   T   -       G|0.291411|(195,36,72,23)   -19  54410004    C   -       T|0.486056|(96,33,70,52)    T|0.486056|(96,33,70,52)20  30671857    C   -       A|0.130909|(336,142,42,30)  A|0.130909|(336,142,42,30)17  78867580    C   -       -   T|0.223464|(296,121,83,37)19  2207712     C   -       -   T|0.093567|(89,66,8,8)

我用了逐个合并的方法,合并一次生成一个字典。
像吃糖葫芦,一个一个吞

file1 = 'c:/Test3_1.info'file2 = 'c:/Test3_2.info'file3 = 'c:/Test3_3.info'list_all = dict()#list_title = []#list_2d = {{'1':'2'}:'3'}list_2d = {}def read(file):       dictt = {}    array = []    fragment = []    f = open(file,"r")    try:        line = f.readlines()        array = line[:]        for i in array:            fragment.append(i.strip().split("\t"))    finally:        f.close()    for arr in fragment:        dictt[(arr)[0]+"\t"+(arr)[1]+"\t"+(arr)[2]] = (arr)[3]    return dicttlist1 = (read(file1))list2 = (read(file2))list3 = (read(file3))list_all.update(list1)list_all.update(list2)list_all.update(list3)list_all_2.update(list1)list_all_2.update(list2)list_all_2.update(list3)#生成一个包含所有染色体的字典,值全是“-for k,v in list_all.items():    list_all[k] = "-"for k,v in list_all_2.items():    list_all[k] = "-"for k in list1:    for key in list_all:        if k in key:            list_all[key] = list1[k]            for k in list2:    for key in list_all_2:        if k in key:            list_all_2[key] = list2[k]            for k in list3:    for key in list_all_2:        if k in key:            list_all_3[key] = list3[k]   #生成key中包含list1的字典list_all_all = []for k,v in list_all.items():    list_all_all.append(str(k)+"\t"+str(v)+"\t")         dic_all_all = {}for i in list_all_all:    dic_all_all[i] = "-"#添加第2个文件!for k in list2:    for key in dic_all_all:        if k in key:            dic_all_all[key] = list2[k]#生成key中包含list1,2的字典list_all_all2 = []for k,v in dic_all_all.items():    list_all_all2.append(str(k)+"\t"+str(v)+"\t")         dic_all_all2 = {}for i in list_all_all2:    dic_all_all2[i] = "-"#添加第3个文件!for k in list3:    for key in dic_all_all2:        if k in key:            dic_all_all2[key] = list3[k]#print(dic_all_all) #for k,v in list_all.items():    #print(k,v) #for k,v in dic_all_all.items():    #print(k,v)for k,v in dic_all_all2.items():    print(k,v)with open("t3.txt","w") as f:    f.write("Chr\tPosition\tRef\ttest03_1\ttest03_2\ttest03_3\n")    #f.write(str(dic_all_all2))    for k,v in dic_all_all2.items():        f.write(str(k)+str(v)+"\n")

8.14 更新更好的解法,函数式编程
但是感觉还是不够模块化
以后能不能用class写一下?

import os import repath = 'c:/Usersos.chdir(path)road = os.listdir(path)def find_file(road):     file_list = []    regex = re.compile(r'Test3_\d.info')    for i in road:        m = re.findall(regex,i)        if m:            m = m[0]            file_list.append(m)            file_list.sort()    return file_listdef read(file):      dict_all = {}    dictt = {}    fragment = []    f = open(file,"r")    try:        line = f.readlines()        for i in line:            fragment.append(i.strip().split("\t"))    finally:        f.close()    for arr in fragment:        dictt[(arr)[0]+"\t"+(arr)[1]+"\t"+(arr)[2]] = (arr)[3]    return [line,dictt]def update(file):     dict_all = {}    file_list = find_file(road)    for file in file_list:        dict_all.update(read(file)[1])            for k in dict_all:         dict_all[k] = ''    return dict_alldict_all = update(file)     def add(road):    for f in find_file(road):            r = read(f)        for k in dict_all:            if k not in r[1]:                  dict_all[k] +='\t' + '-'            else:                dict_all[k] += '\t' +  r[1][k]     return dict_allall = add(road)    with open("tt3.txt","w") as f:    f.write("Chr\tPosition\tRef\t")    for i in find_file(road):        f.write(i.split('.')[0]+'\t')        f.write('\t')    f.write('\n')

感觉代码可以维护了!

原创粉丝点击