9月28号Python生物信息学数据管理

来源:互联网 发布:同花顺手机炒股软件wp 编辑:程序博客网 时间:2024/05/16 00:27

第五章笔记

1,range(start, stop[, step]),不包括stop。

2,字典的形式{键:值},键为不可改变对象(数字,字符串,元祖)。值可改变。

定义字典的方法:a,直接定义: dict = {'b':'n','c':10}。 b,逐个分配:>>>dict = {} >>>dict['b'] = 'n'。

3,if 循环,第二个是elif,不是if。

4,列表转化为字符用''.join(list)。

字符转化为列表:


5,使用input()函数,后面将其做变量时,前面插入的字符不算字符数。

自测题

1,

codon_aa = {'UAA' : 'stop','UAG' : 'stop','UGA' : 'STOP','AUG' : 'Start','GGG' : 'Glycin'}print(codon_aa['UAA'])
2,
codon_table = {    'GCU':'A', 'GCC':'A', 'GCA':'A', 'GCG':'A', 'CGU':'R', 'CGC':'R',       'CGA':'R', 'CGG':'R', 'AGA':'R', 'AGG':'R', 'UCU':'S', 'UCC':'S',    'UCA':'S', 'UCG':'S', 'AGU':'S', 'AGC':'S', 'AUU':'I', 'AUC':'I',    'AUA':'I', 'UUA':'L', 'CUU':'L', 'CUC':'L', 'CUA':'L',    'CUG':'L', 'GGU':'G', 'GGC':'G', 'GGA':'G', 'GGG':'G', 'GUU':'V',    'GUC':'V', 'GUA':'V', 'ACU':'T', 'ACC':'T', 'ACA':'T',    'ACG':'T', 'CCU':'P', 'CCC':'P', 'CCA':'P', 'CCG':'P', 'AAU':'N',    'AAC':'N', 'GAU':'D', 'GAC':'D', 'UGU':'C', 'UGC':'C', 'CAA':'Q',    'CAG':'Q', 'GAA':'E', 'GAG':'E', 'CAU':'H', 'CAC':'H', 'AAA':'K',    'AAG':'K', 'UUU':'F', 'UUC':'F', 'UAU':'Y', 'UAC':'Y',     'UGG':'W',    'UAG':'STOP', 'UGA':'STOP', 'UAA':'STOP','AUG':'START','GUG':'START','UUG':'START'    }# read the RNA sequence into a single stringrna = ''for line in open('A06662-RNA.fasta'):    if not line.startswith('>'):         rna = rna + line.strip()# translate one frame at a timefor frame in range(3):    prot = ''     print('Reading frame' + str(frame + 1))    count_start = 0    count_end = 0    for i in range(frame, len(rna), 3):        codon = rna[i:i + 3]        if codon in codon_table:            if codon_table[codon] == 'STOP':                prot = prot + '*'                count_end += 1             elif codon_table[codon] == 'START':                prot = prot + '#'                count_start += 1            else:                 prot = prot + codon_table[codon]        else:            prot = prot + '-'    print('起始密码子:',count_start)    print('终止密码子:',count_end)

3,

f = open('text1.txt')text = f.readlines()str = ''s=str + ' '.join(text)if 'MA' in s and 'OSS' in s:    print('找到了')elif 'MA' in s and 'OSS' not in s or 'OSS' in s and 'MA' not in s:    print('找到一个')else:    print('都没找到')
4,
dict_stru = {'A':[1.45,0.97],'C':[0.77,1.30],'D':[0.98,0.80],'E':[1.53,0.26],             'F':[1.12,1.28],'G':[0.53,0.81],'H':[1.24,0.71],'I':[1.00,1.60],             'K':[1.07,0.74],'L':[1.34,1.22],'M':[1.20,1.67],'N':[0.73,0.65],             'P':[0.59,0.62],'Q':[1.17,1.23],'R':[0.79,0.90],'S':[0.79,0.72],             'T':[0.82,1.20],'V':[1.14,1.65],'W':[1.14,1.19],'Y':[0.61,1.29]}for codon in dict_stru:    if dict_stru[codon][0] >= 1 and dict_stru[codon][1] < dict_stru[codon][0]:        dict_stru[codon] = 'H'    elif  dict_stru[codon][1] >= 1 and dict_stru[codon][1] > dict_stru[codon][0]:        dict_stru[codon] = 'E'    else:        dict_stru[codon] = 'L'while 1:    in_put = input('请输入氨基酸:')    for aa in in_put:        print(dict_stru[aa],end = '')


5,
dict_table = {'A':0.48,'R':0.84,'D':0.81,'N':0.82,'C':0.32,              'E':0.93,'Q':0.81,'G':0.51,'H':0.66,'I':0.39,              'L':0.41,'K':0.93,'M':0.44,'F':0.42,'P':0.78,              'S':0.70,'T':0.71,'W':0.49,'Y':0.67,'V':0.40}input_seq = ''f = open('proteinseq.fasta')for line in f:    if line[0] != '>':        input_seq = input_seq + ''.join(line).strip()print(input_seq)output_seq = ''for aa in input_seq:    if aa in dict_table:        if dict_table[aa] > 0.7:            output_seq += aa.upper()        else:            output_seq += aa.lower()    else:        print('unrecognized character:',aa)print(output_seq)open('result.txt','w').write(output_seq)


 
原创粉丝点击