酵母同义和非同义的snp的程序

来源：互联网发布：淘宝卖家加入放心淘编辑：程序博客网时间：2024/04/28 19:48
use strict;use warnings;my $filename;my @filename;my $i;#氨基酸hashmy(%genetic_code) = (                  'TCA' => 'S',    # Serine         'TCC' => 'S',    # Serine         'TCG' => 'S',    # Serine         'TCT' => 'S',    # Serine         'TTC' => 'F',    # Phenylalanine         'TTT' => 'F',    # Phenylalanine         'TTA' => 'L',    # Leucine         'TTG' => 'L',    # Leucine         'TAC' => 'Y',    # Tyrosine          'TAT' => 'Y',    # Tyrosine         'TAA' => '_',    # Stop         'TAG' => '_',    # Stop         'TGC' => 'C',    # Cysteine         'TGT' => 'C',    # Cysteine         'TGA' => '_',    # Stop         'TGG' => 'W',    # Tryptophan         'CTA' => 'L',    # Leucine         'CTC' => 'L',    # Leucine         'CTG' => 'L',    # Leucine         'CTT' => 'L',    # Leucine         'CCA' => 'P',    # Proline         'CCC' => 'P',    # Proline         'CCG' => 'P',    # Proline         'CCT' => 'P',    # Proline         'CAC' => 'H',    # Histidine         'CAT' => 'H',    # Histidine         'CAA' => 'Q',    # Glutamine         'CAG' => 'Q',    # Glutamine         'CGA' => 'R',    # Arginine         'CGC' => 'R',    # Arginine         'CGG' => 'R',    # Arginine         'CGT' => 'R',    # Arginine         'ATA' => 'I',    # Isoleucine         'ATC' => 'I',    # Isoleucine         'ATT' => 'I',    # Isoleucine         'ATG' => 'M',    # Methionine         'ACA' => 'T',    # Threonine         'ACC' => 'T',    # Threonine         'ACG' => 'T',    # Threonine         'ACT' => 'T',    # Threonine         'AAC' => 'N',    # Asparagine         'AAT' => 'N',    # Asparagine         'AAA' => 'K',    # Lysine         'AAG' => 'K',    # Lysine         'AGC' => 'S',    # Serine         'AGT' => 'S',    # Serine         'AGA' => 'R',    # Arginine         'AGG' => 'R',    # Arginine         'GTA' => 'V',    # Valine         'GTC' => 'V',    # Valine         'GTG' => 'V',    # Valine         'GTT' => 'V',    # Valine         'GCA' => 'A',    # Alanine         'GCC' => 'A',    # Alanine         'GCG' => 'A',    # Alanine         'GCT' => 'A',    # Alanine             'GAC' => 'D',    # Aspartic Acid         'GAT' => 'D',    # Aspartic Acid         'GAA' => 'E',    # Glutamic Acid         'GAG' => 'E',    # Glutamic Acid         'GGA' => 'G',    # Glycine         'GGC' => 'G',    # Glycine         'GGG' => 'G',    # Glycine         'GGT' => 'G',    # Glycine         );     my $jianji_number=0;my %hash;my $DNA;my @DNA;my $jianji;my $first;my $second;my $base;my @information;my $key1;my $key2;my $key3;my $rever_first;my $rever_second;my $yushu;my $position;#一下程序对每一个染色体做一个hash@filename=qw/I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI/;foreach $filename (@filename){$jianji_number=0;undef(@DNA);open(IN1,"chr$filename")||die("can not open");$i=<IN1>;@DNA=<IN1>;$DNA=join('',@DNA);$DNA=~s/\s//g;for($position=0;$position<length $DNA;++$position)#hash中后面一个数字就是碱基的位置。本来少1个，但是我们人为的加上了1{$base=substr($DNA,$position,1);$hash{$filename}{$position+1}="$base";}}open(OUT,">TY_and_FTY.txt")||die("can not open");open(IN,"information_of_tongyi_or_not.txt")||die("can not open");while(<IN>){chomp;@information=split/\s+/,$_;if($information[8] eq "+"){    $key1=$hash{$information[0]}{$information[4]};    $key2=$hash{$information[0]}{$information[4]+1};    $key3=$hash{$information[0]}{$information[4]+2};    if($key1 eq "A" && $key2 eq "T" && $key3 eq "G")    {    if($information[7]==0)    {    $first=join('',$information[2],$hash{$information[0]}{$information[1]+1},$hash{$information[0]}{$information[1]+2});    $second=join('',$information[3],$hash{$information[0]}{$information[1]+1},$hash{$information[0]}{$information[1]+2});    if($genetic_code{$first} eq $genetic_code{$second})    {    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$first} TY0\n";    }    else    {    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n";    }    }    elsif($information[7]==1)    {    $first=join('',$hash{$information[0]}{$information[1]-1},$information[2],$hash{$information[0]}{$information[1]+1});    $second=join('',$hash{$information[0]}{$information[1]-1},$information[3],$hash{$information[0]}{$information[1]+1});    if($genetic_code{$first} eq $genetic_code{$second})    {    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} TY\n";    }    else    {    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n";    }    }    elsif($information[7]==2)    {    $first=join('',$hash{$information[0]}{$information[1]-2},$hash{$information[0]}{$information[1]-1},$information[2]);    $second=join('',$hash{$information[0]}{$information[1]-2},$hash{$information[0]}{$information[1]-1},$information[3]);    if($genetic_code{$first} eq $genetic_code{$second})    {    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} TY\n";    }    else    {    print OUT "@information[0..8] $genetic_code{$first} $genetic_code{$second} FTY\n";    }    }    }    else    {    next;    }    }elsif($information[8] eq "-"){    $key1=$hash{$information[0]}{$information[5]};    $key2=$hash{$information[0]}{$information[5]-1};    $key3=$hash{$information[0]}{$information[5]-2};    if($key1 eq "T" && $key2 eq "A" && $key3 eq "C")#因为这里是负链，所以对应的也应该是ATC的互补链，下面的翻译过程也要注意，所有的要先对应到正链的位置上，然后在进行翻译。{    print "T\n";$yushu=($information[5]+1-$information[1])%3;if($yushu==1)                #当反向的时候余数为1，那么就是氨基酸的第一位，余数为2，那么是氨基酸的第二位，余数为o为氨基酸的第三位{$first=join('',$information[2],$hash{$information[0]}{$information[1]-1},$hash{$information[0]}{$information[1]-2});$second=join('',$information[3],$hash{$information[0]}{$information[1]-1},$hash{$information[0]}{$information[1]-2});$rever_first=revcom($first);$rever_second=revcom($second);if($genetic_code{$rever_first} eq $genetic_code{$rever_second}){print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n";}else{print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n";}}elsif($yushu==2){$first=join('',$hash{$information[0]}{$information[1]+1},$information[2],$hash{$information[0]}{$information[1]-1});$second=join('',$hash{$information[0]}{$information[1]+1},$information[3],$hash{$information[0]}{$information[1]-1});$rever_first=revcom($first);$rever_second=revcom($second);if($genetic_code{$rever_first} eq $genetic_code{$rever_second}){print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n";}else{print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n";}}elsif($yushu==0){$first=join('',$hash{$information[0]}{$information[1]+2},$hash{$information[0]}{$information[1]+1},$information[2]);$second=join('',$hash{$information[0]}{$information[1]+2},$hash{$information[0]}{$information[1]+1},$information[3]);$rever_first=revcom($first);$rever_second=revcom($second);if($genetic_code{$rever_first} eq $genetic_code{$rever_second}){print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} TY\n";}else{print OUT "@information[0..8] $genetic_code{$rever_first} $genetic_code{$rever_second} FTY\n";}}}}else {next;}}#子程序：获得互补序列sub revcom  {      # A subroutine to compute the reverse complement of DNA sequence       # 一个获取DNA互补序列的子程序      my($dna)=@_;      print $dna."\n";      my ($revcom)=reverse($dna);      $revcom=~tr/ACGTacgt/TGCAtgca/;      return $revcom;  }