远程blast的Perl脚本

来源:互联网 发布:软件系统 编辑:程序博客网 时间:2024/05/18 13:27

问题描述

之前用blast2go太慢,自己写了个Perl脚本进行远程blast,可以将fasta文件拆分后并行运行。最后生成XML文件可以直接导入blast2go软件使用。速度比直接利用blast2go-basic快些。

代码块

#!/usr/bin/perl# huangliangbo 2015-3-30# 利用该脚本进行远程blast,生成XML格式(m7)文件;# 需要安装XML::SAX模块use v5.16;use warnings;use Bio::Tools::Run::RemoteBlast;use Getopt::Long;use File::Basename;my $basename=basename($0);my %opt;GetOptions(\%opt,"program:s","database:s","file:s","evalue","v:i","b:i","gi","help");my $help=<<USAGE;Please enter parameters:-help           print help information-program        string [ blastp, blastn, blastx, tblastn, tblastx ;default: blastp ]-database       string [ swissprot, nr, nt, etc... ]-file           string [ fasta, contains one or more fasta format sequences ]-evalue         string [ default: '1e-5' ]-v              Integer[ Number of database sequences to show one-line descriptions for (V); default:10 ]-b              Integer[ Number of database sequence to show alignments for (B); default:10 ]-gi             [T/F]  [ Show GI in deflines,default:T ]usage:perl $basename -p program -d database -f file [-e e-value] [-b 5] [-v 5]USAGEif($opt{help} or keys %opt < 3){    say $help;    exit;}my $prog = $opt{program};my $db   = $opt{database};my $file = $opt{file};my $e_val= $opt{evalue}||'1e-5';my $description= $opt{v}||10;my $alignment= $opt{b}||10;my $gi= $opt{gi}||'T';my @params = ( '-prog'      => $prog,               '-data'      => $db,               '-expect'    => $e_val,               '-readmethod' => 'xml' );my $factory = Bio::Tools::Run::RemoteBlast->new(@params);#$factory->retrieve_parameter('FORMAT_TYPE', 'XML');#change a query paramter#$Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Homo sapiens [ORGN]';#$Bio::Tools::Run::RemoteBlast::HEADER{'MATRIX_NAME'} = 'PAM30';#$Bio::Tools::Run::RemoteBlast::HEADER{'GAPCOSTS'} = '9 1';#$Bio::Tools::Run::RemoteBlast::HEADER{'WORD_SIZE'} = '2';#Have to request the blast with the right amount of alignments,#$Bio::Tools::Run::RemoteBlast::HEADER{'ALIGNMENTS'} = '1000';#$Bio::Tools::Run::RemoteBlast::HEADER{'DESCRIPTIONS'} = '1000';#change a retrieval parameter$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'DESCRIPTIONS'} = $description;$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'NCBI_GI'} = $gi=~/f/i?'no':'yes';$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'ALIGNMENTS'} = $alignment;$Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'FORMAT_TYPE'} = 'XML';#remove a parameter#delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};#$v is just to turn on and off the messages#$v is just a way to either print out messages or not. So you can toggle whether you want the messages like "waiting ..."my $v = 1;my $str = Bio::SeqIO->new(-file=>$file , -format => 'fasta' );my $num = 1;while (my $input = $str->next_seq()){    #Blast a sequence against a database:    #Alternatively, you could  pass in a file with many    #sequences rather than loop through sequence one at a time    #Remove the loop starting 'while (my $input = $str->next_seq())'    #and swap the two lines below for an example of that.    my $r = $factory->submit_blast($input);    print STDERR "\n$num: waiting...\n" if( $v > 0 );    $num ++;    while ( my @rids = $factory->each_rid ) {        foreach my $rid ( @rids ) {            my $rc = $factory->retrieve_blast($rid);            if( !ref($rc) ) {                if( $rc < 0 ) {                    $factory->remove_rid($rid);                }                print STDERR "." if ( $v > 0 );                sleep 5;            } else {                my $result = $rc->next_result();                #save the output                my $filename = $result->query_name()."\.xml";                $factory->save_output($filename);                `cat $filename >> blastRemoteResult.xml; rm $filename`;                $factory->remove_rid($rid);            }        }    }}
1 0