PageRenderTime 56ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/fls_fastq_screen/perlmodules/perl5/modules/BioPerl-1.6.923/share/perl/5.14.2/Bio/DB/WebDBSeqI.pm

https://gitlab.com/pooja043/Globus_Docker_3
Perl | 878 lines | 661 code | 169 blank | 48 comment | 113 complexity | 870116b755e6fd04a0917926c0595ee9 MD5 | raw file
  1. #
  2. # BioPerl module for Bio::DB::WebDBSeqI
  3. #
  4. # Please direct questions and support issues to <bioperl-l@bioperl.org>
  5. #
  6. # Cared for by Jason Stajich <jason@bioperl.org>
  7. #
  8. # Copyright Jason Stajich
  9. #
  10. # You may distribute this module under the same terms as perl itself
  11. #
  12. # POD documentation - main docs before the code
  13. #
  14. =head1 NAME
  15. Bio::DB::WebDBSeqI - Object Interface to generalize Web Databases
  16. for retrieving sequences
  17. =head1 SYNOPSIS
  18. # get a WebDBSeqI object somehow
  19. # assuming it is a nucleotide db
  20. my $seq = $db->get_Seq_by_id('ROA1_HUMAN')
  21. =head1 DESCRIPTION
  22. Provides core set of functionality for connecting to a web based
  23. database for retriving sequences.
  24. Users wishing to add another Web Based Sequence Dabatase will need to
  25. extend this class (see L<Bio::DB::SwissProt> or L<Bio::DB::NCBIHelper> for
  26. examples) and implement the get_request method which returns a
  27. HTTP::Request for the specified uids (accessions, ids, etc depending
  28. on what query types the database accepts).
  29. =head1 FEEDBACK
  30. =head2 Mailing Lists
  31. User feedback is an integral part of the
  32. evolution of this and other Bioperl modules. Send
  33. your comments and suggestions preferably to one
  34. of the Bioperl mailing lists. Your participation
  35. is much appreciated.
  36. bioperl-l@bioperl.org - General discussion
  37. http://bioperl.org/wiki/Mailing_lists - About the mailing lists
  38. =head2 Support
  39. Please direct usage questions or support issues to the mailing list:
  40. I<bioperl-l@bioperl.org>
  41. rather than to the module maintainer directly. Many experienced and
  42. reponsive experts will be able look at the problem and quickly
  43. address it. Please include a thorough description of the problem
  44. with code and data examples if at all possible.
  45. =head2 Reporting Bugs
  46. Report bugs to the Bioperl bug tracking system to
  47. help us keep track the bugs and their resolution.
  48. Bug reports can be submitted via the web.
  49. https://redmine.open-bio.org/projects/bioperl/
  50. =head1 AUTHOR - Jason Stajich
  51. Email E<lt> jason@bioperl.org E<gt>
  52. =head1 APPENDIX
  53. The rest of the documentation details each of the
  54. object methods. Internal methods are usually
  55. preceded with a _
  56. =cut
  57. # Let the code begin...
  58. package Bio::DB::WebDBSeqI;
  59. use strict;
  60. use vars qw($MODVERSION %RETRIEVAL_TYPES $DEFAULT_RETRIEVAL_TYPE
  61. $DEFAULTFORMAT $LAST_INVOCATION_TIME @ATTRIBUTES);
  62. use Bio::SeqIO;
  63. use Bio::Root::IO;
  64. use LWP::UserAgent;
  65. use POSIX 'setsid';
  66. use HTTP::Request::Common;
  67. use HTTP::Response;
  68. use File::Spec;
  69. use IO::Pipe;
  70. use IO::String;
  71. use Bio::Root::Root;
  72. use base qw(Bio::DB::RandomAccessI);
  73. BEGIN {
  74. $MODVERSION = '0.8';
  75. %RETRIEVAL_TYPES = ('io_string' => 1,
  76. 'tempfile' => 1,
  77. 'pipeline' => 1,
  78. );
  79. $DEFAULT_RETRIEVAL_TYPE = 'pipeline';
  80. $DEFAULTFORMAT = 'fasta';
  81. $LAST_INVOCATION_TIME = 0;
  82. }
  83. sub new {
  84. my ($class, @args) = @_;
  85. my $self = $class->SUPER::new(@args);
  86. my ($baseaddress, $params, $ret_type, $format,$delay,$db) =
  87. $self->_rearrange([qw(BASEADDRESS PARAMS RETRIEVALTYPE FORMAT DELAY DB)],
  88. @args);
  89. $ret_type = $DEFAULT_RETRIEVAL_TYPE unless ( $ret_type);
  90. $baseaddress && $self->url_base_address($baseaddress);
  91. $params && $self->url_params($params);
  92. $db && $self->db($db);
  93. $ret_type && $self->retrieval_type($ret_type);
  94. $delay = $self->delay_policy unless defined $delay;
  95. $self->delay($delay);
  96. # insure we always have a default format set for retrieval
  97. # even though this will be immedietly overwritten by most sub classes
  98. $format = $self->default_format unless ( defined $format &&
  99. $format ne '' );
  100. $self->request_format($format);
  101. my $ua = LWP::UserAgent->new(env_proxy => 1);
  102. $ua->agent(ref($self) ."/$MODVERSION");
  103. $self->ua($ua);
  104. $self->{'_authentication'} = [];
  105. return $self;
  106. }
  107. # from Bio::DB::RandomAccessI
  108. =head2 get_Seq_by_id
  109. Title : get_Seq_by_id
  110. Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
  111. Function: Gets a Bio::Seq object by its name
  112. Returns : a Bio::Seq object
  113. Args : the id (as a string) of a sequence
  114. Throws : "id does not exist" exception
  115. =cut
  116. sub get_Seq_by_id {
  117. my ($self,$seqid) = @_;
  118. $self->_sleep;
  119. my $seqio = $self->get_Stream_by_id([$seqid]);
  120. $self->throw("id does not exist") if( !defined $seqio ) ;
  121. if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
  122. $self->warn("When complexity is set to 0, use get_Stream_by_id\n".
  123. "Returning Bio::SeqIO object");
  124. return $seqio;
  125. }
  126. my @seqs;
  127. while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
  128. $self->throw("id '$seqid' does not exist") unless @seqs;
  129. if( wantarray ) { return @seqs } else { return shift @seqs }
  130. }
  131. =head2 get_Seq_by_acc
  132. Title : get_Seq_by_acc
  133. Usage : $seq = $db->get_Seq_by_acc('X77802');
  134. Function: Gets a Bio::Seq object by accession number
  135. Returns : A Bio::Seq object
  136. Args : accession number (as a string)
  137. Throws : "acc does not exist" exception
  138. =cut
  139. sub get_Seq_by_acc {
  140. my ($self,$seqid) = @_;
  141. $self->_sleep;
  142. my $seqio = $self->get_Stream_by_acc($seqid);
  143. $self->throw("acc '$seqid' does not exist") if( ! defined $seqio );
  144. if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
  145. $self->warn("When complexity is set to 0, use get_Stream_by_acc\n".
  146. "Returning Bio::SeqIO object");
  147. return $seqio;
  148. }
  149. my @seqs;
  150. while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
  151. $self->throw("acc $seqid does not exist") unless @seqs;
  152. if( wantarray ) { return @seqs } else { return shift @seqs }
  153. }
  154. =head2 get_Seq_by_gi
  155. Title : get_Seq_by_gi
  156. Usage : $seq = $db->get_Seq_by_gi('405830');
  157. Function: Gets a Bio::Seq object by gi number
  158. Returns : A Bio::Seq object
  159. Args : gi number (as a string)
  160. Throws : "gi does not exist" exception
  161. =cut
  162. sub get_Seq_by_gi {
  163. my ($self,$seqid) = @_;
  164. $self->_sleep;
  165. my $seqio = $self->get_Stream_by_gi($seqid);
  166. $self->throw("gi does not exist") if( !defined $seqio );
  167. if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
  168. $self->warn("When complexity is set to 0, use get_Stream_by_gi\n".
  169. "Returning Bio::SeqIO object");
  170. return $seqio;
  171. }
  172. my @seqs;
  173. while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
  174. $self->throw("gi does not exist") unless @seqs;
  175. if( wantarray ) { return @seqs } else { return shift @seqs }
  176. }
  177. =head2 get_Seq_by_version
  178. Title : get_Seq_by_version
  179. Usage : $seq = $db->get_Seq_by_version('X77802.1');
  180. Function: Gets a Bio::Seq object by sequence version
  181. Returns : A Bio::Seq object
  182. Args : accession.version (as a string)
  183. Throws : "acc.version does not exist" exception
  184. =cut
  185. sub get_Seq_by_version {
  186. my ($self,$seqid) = @_;
  187. $self->_sleep;
  188. my $seqio = $self->get_Stream_by_version($seqid);
  189. $self->throw("accession.version does not exist") if( !defined $seqio );
  190. if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
  191. $self->warn("When complexity is set to 0, use get_Stream_by_version\n".
  192. "Returning Bio::SeqIO object");
  193. return $seqio;
  194. }
  195. my @seqs;
  196. while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
  197. $self->throw("accession.version does not exist") unless @seqs;
  198. if( wantarray ) { return @seqs } else { return shift @seqs }
  199. }
  200. # implementing class must define these
  201. =head2 get_request
  202. Title : get_request
  203. Usage : my $url = $self->get_request
  204. Function: returns a HTTP::Request object
  205. Returns :
  206. Args : %qualifiers = a hash of qualifiers (ids, format, etc)
  207. =cut
  208. sub get_request {
  209. my ($self) = @_;
  210. my $msg = "Implementing class must define method get_request in class WebDBSeqI";
  211. $self->throw($msg);
  212. }
  213. # class methods
  214. =head2 get_Stream_by_id
  215. Title : get_Stream_by_id
  216. Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
  217. Function: Gets a series of Seq objects by unique identifiers
  218. Returns : a Bio::SeqIO stream object
  219. Args : $ref : a reference to an array of unique identifiers for
  220. the desired sequence entries
  221. =cut
  222. sub get_Stream_by_id {
  223. my ($self, $ids) = @_;
  224. my ($webfmt,$localfmt) = $self->request_format;
  225. return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single',
  226. '-format' => $webfmt);
  227. }
  228. *get_Stream_by_batch = sub {
  229. my $self = shift;
  230. $self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead');
  231. $self->get_Stream_by_id(@_)
  232. };
  233. =head2 get_Stream_by_acc
  234. Title : get_Stream_by_acc
  235. Usage : $seq = $db->get_Stream_by_acc([$acc1, $acc2]);
  236. Function: Gets a series of Seq objects by accession numbers
  237. Returns : a Bio::SeqIO stream object
  238. Args : $ref : a reference to an array of accession numbers for
  239. the desired sequence entries
  240. Note : For GenBank, this just calls the same code for get_Stream_by_id()
  241. =cut
  242. sub get_Stream_by_acc {
  243. my ($self, $ids ) = @_;
  244. return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
  245. }
  246. =head2 get_Stream_by_gi
  247. Title : get_Stream_by_gi
  248. Usage : $seq = $db->get_Stream_by_gi([$gi1, $gi2]);
  249. Function: Gets a series of Seq objects by gi numbers
  250. Returns : a Bio::SeqIO stream object
  251. Args : $ref : a reference to an array of gi numbers for
  252. the desired sequence entries
  253. Note : For GenBank, this just calls the same code for get_Stream_by_id()
  254. =cut
  255. sub get_Stream_by_gi {
  256. my ($self, $ids ) = @_;
  257. return $self->get_seq_stream('-uids' => $ids, '-mode' => 'gi');
  258. }
  259. =head2 get_Stream_by_version
  260. Title : get_Stream_by_version
  261. Usage : $seq = $db->get_Stream_by_version([$version1, $version2]);
  262. Function: Gets a series of Seq objects by accession.versions
  263. Returns : a Bio::SeqIO stream object
  264. Args : $ref : a reference to an array of accession.version strings for
  265. the desired sequence entries
  266. Note : For GenBank, this is implemeted in NCBIHelper
  267. =cut
  268. sub get_Stream_by_version {
  269. my ($self, $ids ) = @_;
  270. # $self->throw("Implementing class should define this method!");
  271. return $self->get_seq_stream('-uids' => $ids, '-mode' => 'version'); # how it should work
  272. }
  273. =head2 get_Stream_by_query
  274. Title : get_Stream_by_query
  275. Usage : $stream = $db->get_Stream_by_query($query);
  276. Function: Gets a series of Seq objects by way of a query string or oject
  277. Returns : a Bio::SeqIO stream object
  278. Args : $query : A string that uses the appropriate query language
  279. for the database or a Bio::DB::QueryI object. It is suggested
  280. that you create the Bio::DB::Query object first and interrogate
  281. it for the entry count before you fetch a potentially large stream.
  282. =cut
  283. sub get_Stream_by_query {
  284. my ($self, $query ) = @_;
  285. return $self->get_seq_stream('-query' => $query, '-mode'=>'query');
  286. }
  287. =head2 default_format
  288. Title : default_format
  289. Usage : my $format = $self->default_format
  290. Function: Returns default sequence format for this module
  291. Returns : string
  292. Args : none
  293. =cut
  294. sub default_format {
  295. return $DEFAULTFORMAT;
  296. }
  297. # sorry, but this is hacked in because of BioFetch problems...
  298. sub db {
  299. my $self = shift;
  300. my $d = $self->{_db};
  301. $self->{_db} = shift if @_;
  302. $d;
  303. }
  304. =head2 request_format
  305. Title : request_format
  306. Usage : my ($req_format, $ioformat) = $self->request_format;
  307. $self->request_format("genbank");
  308. $self->request_format("fasta");
  309. Function: Get/Set sequence format retrieval. The get-form will normally not
  310. be used outside of this and derived modules.
  311. Returns : Array of two strings, the first representing the format for
  312. retrieval, and the second specifying the corresponding SeqIO format.
  313. Args : $format = sequence format
  314. =cut
  315. sub request_format {
  316. my ($self, $value) = @_;
  317. if( defined $value ) {
  318. $self->{'_format'} = [ $value, $value];
  319. }
  320. return @{$self->{'_format'}};
  321. }
  322. =head2 get_seq_stream
  323. Title : get_seq_stream
  324. Usage : my $seqio = $self->get_seq_stream(%qualifiers)
  325. Function: builds a url and queries a web db
  326. Returns : a Bio::SeqIO stream capable of producing sequence
  327. Args : %qualifiers = a hash qualifiers that the implementing class
  328. will process to make a url suitable for web querying
  329. =cut
  330. sub get_seq_stream {
  331. my ($self, %qualifiers) = @_;
  332. my ($rformat, $ioformat) = $self->request_format();
  333. my $seen = 0;
  334. foreach my $key ( keys %qualifiers ) {
  335. if( $key =~ /format/i ) {
  336. $rformat = $qualifiers{$key};
  337. $seen = 1;
  338. }
  339. }
  340. $qualifiers{'-format'} = $rformat if( !$seen);
  341. ($rformat, $ioformat) = $self->request_format($rformat);
  342. # These parameters are implemented for Bio::DB::GenBank objects only
  343. if($self->isa('Bio::DB::GenBank')) {
  344. $self->seq_start() && ($qualifiers{'-seq_start'} = $self->seq_start());
  345. $self->seq_stop() && ($qualifiers{'-seq_stop'} = $self->seq_stop());
  346. $self->strand() && ($qualifiers{'-strand'} = $self->strand());
  347. defined $self->complexity() && ($qualifiers{'-complexity'} = $self->complexity());
  348. }
  349. my $request = $self->get_request(%qualifiers);
  350. $request->proxy_authorization_basic($self->authentication)
  351. if ( $self->authentication);
  352. $self->debug("request is ". $request->as_string(). "\n");
  353. # workaround for MSWin systems
  354. $self->retrieval_type('io_string') if $self->retrieval_type =~ /pipeline/ && $^O =~ /^MSWin/;
  355. if ($self->retrieval_type =~ /pipeline/) {
  356. # Try to create a stream using POSIX fork-and-pipe facility.
  357. # this is a *big* win when fetching thousands of sequences from
  358. # a web database because we can return the first entry while
  359. # transmission is still in progress.
  360. # Also, no need to keep sequence in memory or in a temporary file.
  361. # If this fails (Windows, MacOS 9), we fall back to non-pipelined access.
  362. # fork and pipe: _stream_request()=><STREAM>
  363. my ($result,$stream) = $self->_open_pipe();
  364. if (defined $result) {
  365. $DB::fork_TTY = File::Spec->devnull; # prevents complaints from debugge
  366. if (!$result) { # in child process
  367. $self->_stream_request($request,$stream);
  368. POSIX::_exit(0); #prevent END blocks from executing in this forked child
  369. }
  370. else {
  371. return Bio::SeqIO->new('-verbose' => $self->verbose,
  372. '-format' => $ioformat,
  373. '-fh' => $stream);
  374. }
  375. }
  376. else {
  377. $self->retrieval_type('io_string');
  378. }
  379. }
  380. if ($self->retrieval_type =~ /temp/i) {
  381. my $dir = $self->io->tempdir( CLEANUP => 1);
  382. my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
  383. close $fh;
  384. my $resp = $self->_request($request, $tmpfile);
  385. if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
  386. $self->throw("WebDBSeqI Error - check query sequences!\n");
  387. }
  388. $self->postprocess_data('type' => 'file',
  389. 'location' => $tmpfile);
  390. # this may get reset when requesting batch mode
  391. ($rformat,$ioformat) = $self->request_format();
  392. if( $self->verbose > 0 ) {
  393. open(my $ERR, "<", $tmpfile);
  394. while(<$ERR>) { $self->debug($_);}
  395. }
  396. return Bio::SeqIO->new('-verbose' => $self->verbose,
  397. '-format' => $ioformat,
  398. '-file' => $tmpfile);
  399. }
  400. if ($self->retrieval_type =~ /io_string/i ) {
  401. my $resp = $self->_request($request);
  402. my $content = $resp->content_ref;
  403. $self->debug( "content is $$content\n");
  404. if (!$resp->is_success() || length($$content) == 0) {
  405. $self->throw("WebDBSeqI Error - check query sequences!\n");
  406. }
  407. ($rformat,$ioformat) = $self->request_format();
  408. $self->postprocess_data('type'=> 'string',
  409. 'location' => $content);
  410. $self->debug( "str is $$content\n");
  411. return Bio::SeqIO->new('-verbose' => $self->verbose,
  412. '-format' => $ioformat,
  413. '-fh' => new IO::String($$content));
  414. }
  415. # if we got here, we don't know how to handle the retrieval type
  416. $self->throw("retrieval type " . $self->retrieval_type .
  417. " unsupported\n");
  418. }
  419. =head2 url_base_address
  420. Title : url_base_address
  421. Usage : my $address = $self->url_base_address or
  422. $self->url_base_address($address)
  423. Function: Get/Set the base URL for the Web Database
  424. Returns : Base URL for the Web Database
  425. Args : $address - URL for the WebDatabase
  426. =cut
  427. sub url_base_address {
  428. my $self = shift;
  429. my $d = $self->{'_baseaddress'};
  430. $self->{'_baseaddress'} = shift if @_;
  431. $d;
  432. }
  433. =head2 proxy
  434. Title : proxy
  435. Usage : $httpproxy = $db->proxy('http') or
  436. $db->proxy(['http','ftp'], 'http://myproxy' )
  437. Function: Get/Set a proxy for use of proxy
  438. Returns : a string indicating the proxy
  439. Args : $protocol : an array ref of the protocol(s) to set/get
  440. $proxyurl : url of the proxy to use for the specified protocol
  441. $username : username (if proxy requires authentication)
  442. $password : password (if proxy requires authentication)
  443. =cut
  444. sub proxy {
  445. my ($self,$protocol,$proxy,$username,$password) = @_;
  446. return if ( !defined $self->ua || !defined $protocol
  447. || !defined $proxy );
  448. $self->authentication($username, $password)
  449. if ($username && $password);
  450. return $self->ua->proxy($protocol,$proxy);
  451. }
  452. =head2 authentication
  453. Title : authentication
  454. Usage : $db->authentication($user,$pass)
  455. Function: Get/Set authentication credentials
  456. Returns : Array of user/pass
  457. Args : Array or user/pass
  458. =cut
  459. sub authentication{
  460. my ($self,$u,$p) = @_;
  461. if( defined $u && defined $p ) {
  462. $self->{'_authentication'} = [ $u,$p];
  463. }
  464. return @{$self->{'_authentication'}};
  465. }
  466. =head2 retrieval_type
  467. Title : retrieval_type
  468. Usage : $self->retrieval_type($type);
  469. my $type = $self->retrieval_type
  470. Function: Get/Set a proxy for retrieval_type (pipeline, io_string or tempfile)
  471. Returns : string representing retrieval type
  472. Args : $value - the value to store
  473. This setting affects how the data stream from the remote web server is
  474. processed and passed to the Bio::SeqIO layer. Three types of retrieval
  475. types are currently allowed:
  476. pipeline Perform a fork in an attempt to begin streaming
  477. while the data is still downloading from the remote
  478. server. Disk, memory and speed efficient, but will
  479. not work on Windows or MacOS 9 platforms.
  480. io_string Store downloaded database entry(s) in memory. Can be
  481. problematic for batch downloads because entire set
  482. of entries must fit in memory. Alll entries must be
  483. downloaded before processing can begin.
  484. tempfile Store downloaded database entry(s) in a temporary file.
  485. All entries must be downloaded before processing can
  486. begin.
  487. The default is pipeline, with automatic fallback to io_string if
  488. pipelining is not available.
  489. =cut
  490. sub retrieval_type {
  491. my ($self, $value) = @_;
  492. if( defined $value ) {
  493. $value = lc $value;
  494. if( ! $RETRIEVAL_TYPES{$value} ) {
  495. $self->warn("invalid retrieval type $value must be one of (" .
  496. join(",", keys %RETRIEVAL_TYPES), ")");
  497. $value = $DEFAULT_RETRIEVAL_TYPE;
  498. }
  499. $self->{'_retrieval_type'} = $value;
  500. }
  501. return $self->{'_retrieval_type'};
  502. }
  503. =head2 url_params
  504. Title : url_params
  505. Usage : my $params = $self->url_params or
  506. $self->url_params($params)
  507. Function: Get/Set the URL parameters for the Web Database
  508. Returns : url parameters for Web Database
  509. Args : $params - parameters to be appended to the URL for the WebDatabase
  510. =cut
  511. sub url_params {
  512. my ($self, $value) = @_;
  513. if( defined $value ) {
  514. $self->{'_urlparams'} = $value;
  515. }
  516. }
  517. =head2 ua
  518. Title : ua
  519. Usage : my $ua = $self->ua or
  520. $self->ua($ua)
  521. Function: Get/Set a LWP::UserAgent for use
  522. Returns : reference to LWP::UserAgent Object
  523. Args : $ua - must be a LWP::UserAgent
  524. =cut
  525. sub ua {
  526. my ($self, $ua) = @_;
  527. if( defined $ua && $ua->isa("LWP::UserAgent") ) {
  528. $self->{'_ua'} = $ua;
  529. }
  530. return $self->{'_ua'};
  531. }
  532. =head2 postprocess_data
  533. Title : postprocess_data
  534. Usage : $self->postprocess_data ( 'type' => 'string',
  535. 'location' => \$datastr);
  536. Function: process downloaded data before loading into a Bio::SeqIO
  537. Returns : void
  538. Args : hash with two keys - 'type' can be 'string' or 'file'
  539. - 'location' either file location or string
  540. reference containing data
  541. =cut
  542. sub postprocess_data {
  543. my ( $self, %args) = @_;
  544. return;
  545. }
  546. # private methods
  547. sub _request {
  548. my ($self, $url,$tmpfile) = @_;
  549. my ($resp);
  550. if( defined $tmpfile && $tmpfile ne '' ) {
  551. $resp = $self->ua->request($url, $tmpfile);
  552. } else {
  553. $resp = $self->ua->request($url);
  554. }
  555. if( $resp->is_error ) {
  556. $self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
  557. }
  558. return $resp;
  559. }
  560. #mod_perl-safe replacement for the open(BLEH,'-|') call. if running
  561. #under mod_perl, detects it and closes the child's STDIN and STDOUT
  562. #handles
  563. sub _open_pipe {
  564. my ($self) = @_;
  565. # is mod_perl running? Which API?
  566. my $mp = $self->mod_perl_api;
  567. if($mp and ! our $loaded_apache_sp) {
  568. my $load_api = ($mp == 1) ? 'use Apache::SubProcess': 'use Apache2::SubProcess';
  569. eval $load_api;
  570. $@ and $self->throw("$@\n$load_api module required for running under mod_perl");
  571. $loaded_apache_sp = 1;
  572. }
  573. my $pipe = IO::Pipe->new();
  574. local $SIG{CHLD} = 'IGNORE';
  575. defined(my $pid = fork)
  576. or $self->throw("Couldn't fork: $!");
  577. unless($pid) {
  578. #CHILD
  579. $pipe->writer();
  580. #if we're running under mod_perl, clean up some things after this fork
  581. if ($ENV{MOD_PERL} and my $r = eval{Apache->request} ) {
  582. $r->cleanup_for_exec;
  583. #don't read or write the mod_perl parent's tied filehandles
  584. close STDIN; close STDOUT;
  585. setsid() or $self->throw('Could not detach from parent');
  586. }
  587. } else {
  588. #PARENT
  589. $pipe->reader();
  590. }
  591. return ( $pid, $pipe );
  592. }
  593. # send web request to specified filehandle, or stdout, for streaming purposes
  594. sub _stream_request {
  595. my $self = shift;
  596. my $request = shift;
  597. my $dest_fh = shift || \*STDOUT;
  598. # fork so as to pipe output of fetch process through to
  599. # postprocess_data method call.
  600. my ($child,$fetch) = $self->_open_pipe();
  601. if ($child) {
  602. #PARENT
  603. local ($/) = "//\n"; # assume genbank/swiss format
  604. $| = 1;
  605. my $records = 0;
  606. while (my $record = <$fetch>) {
  607. $records++;
  608. $self->postprocess_data('type' => 'string',
  609. 'location' => \$record);
  610. print $dest_fh $record;
  611. }
  612. $/ = "\n"; # reset to be safe;
  613. close $dest_fh; #must explicitly close here, because the hard
  614. #exits don't cloes them for us
  615. }
  616. else {
  617. #CHILD
  618. $| = 1;
  619. my $resp = $self->ua->request($request,
  620. sub { print $fetch $_[0] }
  621. );
  622. if( $resp->is_error ) {
  623. $self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
  624. }
  625. close $fetch; #must explicitly close here, because the hard exists
  626. #don't close them for us
  627. POSIX::_exit(0);
  628. }
  629. }
  630. sub io {
  631. my ($self,$io) = @_;
  632. if(defined($io) || (! exists($self->{'_io'}))) {
  633. $io = Bio::Root::IO->new() unless $io;
  634. $self->{'_io'} = $io;
  635. }
  636. return $self->{'_io'};
  637. }
  638. =head2 delay
  639. Title : delay
  640. Usage : $secs = $self->delay([$secs])
  641. Function: get/set number of seconds to delay between fetches
  642. Returns : number of seconds to delay
  643. Args : new value
  644. NOTE: the default is to use the value specified by delay_policy().
  645. This can be overridden by calling this method, or by passing the
  646. -delay argument to new().
  647. =cut
  648. sub delay {
  649. my $self = shift;
  650. my $d = $self->{'_delay'};
  651. $self->{'_delay'} = shift if @_;
  652. $d;
  653. }
  654. =head2 delay_policy
  655. Title : delay_policy
  656. Usage : $secs = $self->delay_policy
  657. Function: return number of seconds to delay between calls to remote db
  658. Returns : number of seconds to delay
  659. Args : none
  660. NOTE: The default delay policy is 0s. Override in subclasses to
  661. implement delays. The timer has only second resolution, so the delay
  662. will actually be +/- 1s.
  663. =cut
  664. sub delay_policy {
  665. my $self = shift;
  666. return 0;
  667. }
  668. =head2 _sleep
  669. Title : _sleep
  670. Usage : $self->_sleep
  671. Function: sleep for a number of seconds indicated by the delay policy
  672. Returns : none
  673. Args : none
  674. NOTE: This method keeps track of the last time it was called and only
  675. imposes a sleep if it was called more recently than the delay_policy()
  676. allows.
  677. =cut
  678. sub _sleep {
  679. my $self = shift;
  680. my $last_invocation = $LAST_INVOCATION_TIME;
  681. if (time - $LAST_INVOCATION_TIME < $self->delay) {
  682. my $delay = $self->delay - (time - $LAST_INVOCATION_TIME);
  683. warn "sleeping for $delay seconds\n" if $self->verbose > 0;
  684. sleep $delay;
  685. }
  686. $LAST_INVOCATION_TIME = time;
  687. }
  688. =head2 mod_perl_api
  689. Title : mod_perl_api
  690. Usage : $version = self->mod_perl_api
  691. Function: Returns API version of mod_perl being used based on set env. variables
  692. Returns : mod_perl API version; if mod_perl isn't loaded, returns 0
  693. Args : none
  694. =cut
  695. sub mod_perl_api {
  696. my $self = shift;
  697. my $v = $ENV{MOD_PERL} ?
  698. ( exists $ENV{MOD_PERL_API_VERSION} && $ENV{MOD_PERL_API_VERSION} >= 2 ) ?
  699. 2 :
  700. 1
  701. : 0;
  702. return $v;
  703. }
  704. 1;