计算CDS中密码子的数量

来源:互联网 发布:烟花算法 编辑:程序博客网 时间:2024/04/30 07:10

看到一个现金求助的题目:

http://www.timedoo.com/task-id-1194.html

代码如下:

#!/usr/bin/env python# -*- coding: utf-8 -*-__author__ = "Sheng-Wei Ma"from Bio import SeqIOfrom collections import OrderedDictrecords = (r for r in SeqIO.parse('cds.fa', "fasta"))count_file = open('cds.count.txt', 'w')fre_file = open('cds.frequency.txt', 'w')count_file.writelines('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % ('name', 'TTT', 'TTC', 'TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG', 'ATT', 'ATC', 'ATA', 'ATG', 'GTT', 'GTC', 'GTA',      'GTG', 'TAT', 'TAC', 'TAA', 'TAG', 'CAT', 'CAC', 'CAA', 'CAG', 'AAT', 'AAC', 'AAA', 'AAG', 'GAT', 'GAC', 'GAA',      'GAG', 'TCT', 'TCC', 'TCA', 'TCG', 'CCT', 'CCC', 'CCA', 'CCG', 'ACT', 'ACC', 'ACA', 'ACG', 'GCT', 'GCC', 'GCA',      'GCG', 'TGT', 'TGC', 'TGA', 'TGG', 'CGT', 'CGC', 'CGA', 'CGG', 'AGT','AGC', 'AGA', 'AGG', 'GGT', 'GGC', 'GGA', 'GGG'))fre_file.writelines(    '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (    'name', 'TTT', 'TTC', 'TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG', 'ATT', 'ATC', 'ATA', 'ATG', 'GTT', 'GTC', 'GTA',    'GTG', 'TAT', 'TAC', 'TAA', 'TAG', 'CAT', 'CAC', 'CAA', 'CAG', 'AAT', 'AAC', 'AAA', 'AAG', 'GAT', 'GAC', 'GAA',    'GAG', 'TCT', 'TCC', 'TCA', 'TCG', 'CCT', 'CCC', 'CCA', 'CCG', 'ACT', 'ACC', 'ACA', 'ACG', 'GCT', 'GCC', 'GCA',    'GCG', 'TGT', 'TGC', 'TGA', 'TGG', 'CGT', 'CGC', 'CGA', 'CGG', 'AGT', 'AGC', 'AGA', 'AGG', 'GGT', 'GGC', 'GGA',    'GGG'))for i in records:    CodonsDict = OrderedDict([('TTT', 0), ('TTC', 0), ('TTA', 0), ('TTG', 0), ('CTT', 0), ('CTC', 0), ('CTA', 0),                             ('CTG', 0), ('ATT', 0), ('ATC', 0),                             ('ATA', 0), ('ATG', 0), ('GTT', 0), ('GTC', 0), ('GTA', 0), ('GTG', 0), ('TAT', 0),                             ('TAC', 0), ('TAA', 0), ('TAG', 0),                             ('CAT', 0), ('CAC', 0), ('CAA', 0), ('CAG', 0), ('AAT', 0), ('AAC', 0), ('AAA', 0),                             ('AAG', 0), ('GAT', 0), ('GAC', 0),                             ('GAA', 0), ('GAG', 0), ('TCT', 0), ('TCC', 0), ('TCA', 0), ('TCG', 0), ('CCT', 0),                             ('CCC', 0), ('CCA', 0), ('CCG', 0),                             ('ACT', 0), ('ACC', 0), ('ACA', 0), ('ACG', 0), ('GCT', 0), ('GCC', 0), ('GCA', 0),                             ('GCG', 0), ('TGT', 0), ('TGC', 0),                             ('TGA', 0), ('TGG', 0), ('CGT', 0), ('CGC', 0), ('CGA', 0), ('CGG', 0), ('AGT', 0),                             ('AGC', 0), ('AGA', 0), ('AGG', 0),                             ('GGT', 0), ('GGC', 0), ('GGA', 0), ('GGG', 0)])    if i.seq.startswith('ATG') and 'N' not in i.seq and 'Y' not in i.seq and 'K' not in i.seq and 'R' not in i.seq and 'M' not in i.seq and 'S' not in i.seq and 'W' not in i.seq:        for j in range(0, len(str(i.seq)), 3):            codon = str(i.seq)[j:j+3]            if codon in CodonsDict.keys():                CodonsDict[codon] +=1        count_file.writelines('%s\t' % i.id)        fre_file.writelines('%s\t' % i.id)        total = sum([CodonsDict[key] for key in CodonsDict.keys()])        print total,        for key in CodonsDict.keys():            count_file.writelines('%s\t' % CodonsDict[key])            fre_file.writelines('%.2f\t' % (CodonsDict[key]/float(total)))        count_file.writelines('\n')        fre_file.writelines('\n')count_file.close()fre_file.close()
0 0
原创粉丝点击