PageRenderTime 54ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/WormBase/API/Object/Sequence.pm

https://bitbucket.org/tharris/wormbase/
Perl | 1829 lines | 1090 code | 582 blank | 157 comment | 79 complexity | 9523819335766a0939d271b2460a335d MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause
  1. package WormBase::API::Object::Sequence;
  2. use Moose;
  3. extends 'WormBase::API::Object';
  4. with 'WormBase::API::Role::Object';
  5. with 'WormBase::API::Role::Position';
  6. use Bio::Graphics::Browser2::Markup;
  7. =pod
  8. =head1 NAME
  9. WormBase::API::Object::Sequence
  10. =head1 SYNPOSIS
  11. Model for the Ace ?Sequence class.
  12. =head1 URL
  13. http://wormbase.org/species/sequence
  14. =cut
  15. has 'type' => (
  16. is => 'ro',
  17. lazy_build => 1,
  18. );
  19. has 'length' => (
  20. is => 'rw',
  21. );
  22. has 'gff' => (
  23. is => 'ro',
  24. lazy => 1,
  25. default => sub {
  26. my ($self) = @_;
  27. return $self->gff_dsn;
  28. }
  29. );
  30. has 'sequence' => (
  31. is => 'ro',
  32. lazy => 1,
  33. default => sub {
  34. my ($self) = @_;
  35. return $self ~~ 'Sequence';
  36. }
  37. );
  38. # Role?
  39. has '_method' => (
  40. is => 'ro',
  41. lazy => 1,
  42. default => sub {
  43. my ($self) = @_;
  44. return $self ~~ 'Method';
  45. },
  46. );
  47. has 'genes' => (
  48. is => 'ro',
  49. lazy => 1,
  50. default => sub {
  51. my ($self) = @_;
  52. my %seen;
  53. return [grep {!$seen{$_}++} @{$self ~~ '@Locus'}];
  54. },
  55. );
  56. sub _build_type {
  57. my ($self) = @_;
  58. my $s = $self->object;
  59. # figure out where this sequence comes from
  60. # should rearrange in order of probability
  61. my $type;
  62. if ($s =~ /^cb\d+\.fpc\d+$/) {
  63. $type = 'C. briggsae draft contig';
  64. }
  65. elsif (_is_gap($s)) {
  66. $type = 'gap in genomic sequence -- for accounting purposes';
  67. }
  68. elsif (eval { $s->Genomic_canonical(0) }) {
  69. $type = 'genomic';
  70. }
  71. elsif ($self->_method eq 'Vancouver_fosmid') {
  72. $type = 'genomic -- fosmid';
  73. }
  74. elsif (eval { $s->Pseudogene(0) }) {
  75. $type = 'pseudogene';
  76. }
  77. elsif (eval { $s->RNA_Pseudogene(0) }) {
  78. $type = 'RNA_pseudogene';
  79. }
  80. elsif (eval { $s->Locus }) {
  81. $type = 'confirmed gene';
  82. }
  83. elsif (eval { $s->Coding }) {
  84. $type = 'predicted coding sequence';
  85. }
  86. elsif ($s->get('cDNA')) {
  87. ($type) = $s->get('cDNA');
  88. }
  89. elsif ($self->_method eq 'EST_nematode') {
  90. $type = 'non-Elegans nematode EST sequence';
  91. }
  92. elsif (eval { $s->AC_number }) {
  93. $type = 'external sequence';
  94. }
  95. elsif (eval{_is_merged($s)}) {
  96. $type = 'merged sequence entry';
  97. }
  98. elsif ($self->_method eq 'NDB') {
  99. $type = 'GenBank/EMBL Entry';
  100. # This is going to need more robust processing to traverse object structure
  101. }
  102. elsif (eval { $s->RNA} ) {
  103. $type = eval {$s->RNA} . ' ' . eval {$s->RNA->right};
  104. }
  105. else {
  106. $type = eval {$s->Properties(1)};
  107. }
  108. $type ||= 'unknown';
  109. return $type;
  110. }
  111. #######################################
  112. #
  113. # CLASS METHODS
  114. #
  115. #######################################
  116. =head1 CLASS LEVEL METHODS/URIs
  117. =cut
  118. #######################################
  119. #
  120. # INSTANCE METHODS
  121. #
  122. #######################################
  123. =head1 INSTANCE LEVEL METHODS/URIs
  124. =cut
  125. ############################################################
  126. #
  127. # The Overview widget
  128. #
  129. ############################################################
  130. =head2
  131. =cut
  132. # sub name {}
  133. # Supplied by Role; POD will automatically be inserted here.
  134. # << include name >>
  135. # sub taxonomy {}
  136. # Supplied by Role; POD will automatically be inserted here.
  137. # << include taxonomy >>
  138. # sub description { }
  139. # Supplied by Role; POD will automatically be inserted here.
  140. # << include description >>
  141. =head3 sequence_type
  142. This method will return a data structure containing
  143. which type of sequence the requested object is.
  144. =over
  145. =item PERL API
  146. $data = $model->sequence_type();
  147. =item REST API
  148. B<Request Method>
  149. GET
  150. B<Requires Authentication>
  151. No
  152. B<Parameters>
  153. A Sequence ID (eg JC8.10a)
  154. B<Returns>
  155. =over 4
  156. =item *
  157. 200 OK and JSON, HTML, or XML
  158. =item *
  159. 404 Not Found
  160. =back
  161. B<Request example>
  162. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/sequence_type
  163. B<Response example>
  164. <div class="response-example"></div>
  165. =back
  166. =cut
  167. sub sequence_type {
  168. my ($self) = @_;
  169. return {
  170. description => 'the general type of the sequence',
  171. data => $self->type,
  172. };
  173. }
  174. =head3 identity
  175. This method will return a data structure containing
  176. the brief identity of the requested object.
  177. =over
  178. =item PERL API
  179. $data = $model->identity();
  180. =item REST API
  181. B<Request Method>
  182. GET
  183. B<Requires Authentication>
  184. No
  185. B<Parameters>
  186. A Sequence ID (eg JC8.10a)
  187. B<Returns>
  188. =over 4
  189. =item *
  190. 200 OK and JSON, HTML, or XML
  191. =item *
  192. 404 Not Found
  193. =back
  194. B<Request example>
  195. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/identity
  196. B<Response example>
  197. <div class="response-example"></div>
  198. =back
  199. =cut
  200. sub identity {
  201. my ($self) = @_;
  202. # Cull a brief identification from each gene. Redundant with gene page and
  203. # not necessarily accurate if we are looking at a splice variant.
  204. my $data = join(', ', @{$self->genes}, $self ~~ 'Brief_identification' || ());
  205. $data .= ' (pseudogene)' if $data && $self->type eq 'pseudogene';
  206. return { description => 'the identity of the sequence',
  207. data => $data || undef };
  208. }
  209. # sub method {}
  210. # Supplied by Role; POD will automatically be inserted here.
  211. # << include method >>
  212. # sub remarks {}
  213. # Supplied by Role; POD will automatically be inserted here.
  214. # << include remarks >>
  215. ############################################################
  216. #
  217. # The External Links widget
  218. #
  219. ############################################################
  220. =head2 External Links
  221. =cut
  222. # sub xrefs {}
  223. # Supplied by Role; POD will automatically be inserted here.
  224. # << include xrefs >>
  225. ############################################################
  226. #
  227. # The Region (Genomic) widget.
  228. # Sort of redundant with location....
  229. #
  230. ############################################################
  231. =head2 Region
  232. =head3 corresponding_gene
  233. This method will return a data structure containing
  234. the corresponding gene of the sequence object.
  235. =over
  236. =item PERL API
  237. $data = $model->corresponding_gene();
  238. =item REST API
  239. B<Request Method>
  240. GET
  241. B<Requires Authentication>
  242. No
  243. B<Parameters>
  244. A Sequence ID (eg JC8.10a)
  245. B<Returns>
  246. =over 4
  247. =item *
  248. 200 OK and JSON, HTML, or XML
  249. =item *
  250. 404 Not Found
  251. =back
  252. B<Request example>
  253. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/corresponding_gene
  254. B<Response example>
  255. <div class="response-example"></div>
  256. =back
  257. =cut
  258. sub corresponding_gene {
  259. my $self = shift;
  260. my $object = $self->object;
  261. my @genes = map { $self->_pack_obj($_) } $object->Gene;
  262. return { description => 'corresponding gene of the sequence, if known',
  263. data => @genes? \@genes : undef };
  264. }
  265. =head3 matching_transcript
  266. This method will return a data structure containing
  267. transcripts that match the current sequence.
  268. =over
  269. =item PERL API
  270. $data = $model->matching_transcript();
  271. =item REST API
  272. B<Request Method>
  273. GET
  274. B<Requires Authentication>
  275. No
  276. B<Parameters>
  277. A Sequence ID (eg JC8.10a)
  278. B<Returns>
  279. =over 4
  280. =item *
  281. 200 OK and JSON, HTML, or XML
  282. =item *
  283. 404 Not Found
  284. =back
  285. B<Request example>
  286. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/matching_transcript
  287. B<Response example>
  288. <div class="response-example"></div>
  289. =back
  290. =cut
  291. sub matching_transcript {
  292. my $self = shift;
  293. my $object = $self->object;
  294. my @transcripts = map { $self->_pack_obj($_) } $object->Matching_transcript // undef;
  295. return { description => 'matching transcripts of the sequence',
  296. data => @transcripts ? \@transcripts : undef };
  297. }
  298. =head3 matching_cds
  299. This method will return a data structure containing
  300. matching CDSs of the sequence.
  301. =over
  302. =item PERL API
  303. $data = $model->matching_cds();
  304. =item REST API
  305. B<Request Method>
  306. GET
  307. B<Requires Authentication>
  308. No
  309. B<Parameters>
  310. A Sequence ID (eg JC8.10a)
  311. B<Returns>
  312. =over 4
  313. =item *
  314. 200 OK and JSON, HTML, or XML
  315. =item *
  316. 404 Not Found
  317. =back
  318. B<Request example>
  319. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/matching_cds
  320. B<Response example>
  321. <div class="response-example"></div>
  322. =back
  323. =cut
  324. sub matching_cds {
  325. my $self = shift;
  326. my $object = $self->object;
  327. my @cds = map { $self->_pack_obj($_) } $object->Matching_CDS;
  328. return { description => 'matching CDSs of the sequence',
  329. data => @cds ? \@cds : undef };
  330. }
  331. =head3 corresponding_protein
  332. This method will return a data structure containing
  333. proteins corresponding to the requested object.
  334. =over
  335. =item PERL API
  336. $data = $model->corresponding_protein();
  337. =item REST API
  338. B<Request Method>
  339. GET
  340. B<Requires Authentication>
  341. No
  342. B<Parameters>
  343. A Sequence ID (eg JC8.10a)
  344. B<Returns>
  345. =over 4
  346. =item *
  347. 200 OK and JSON, HTML, or XML
  348. =item *
  349. 404 Not Found
  350. =back
  351. B<Request example>
  352. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/corresponding_protein
  353. B<Response example>
  354. <div class="response-example"></div>
  355. =back
  356. =cut
  357. sub corresponding_protein {
  358. my ($self) = @_;
  359. my @proteins = map { $self->_pack_obj($_) } map { $_->Corresponding_protein } @{$self ~~ '@Matching_CDS'};
  360. return { description => 'the corresponding protein of the sequence',
  361. data => @proteins ? \@proteins : undef };
  362. }
  363. =head3 matching_cdnas
  364. This method will return a data structure containing
  365. CDNAs that match the requested object.
  366. =over
  367. =item PERL API
  368. $data = $model->matching_cdnas();
  369. =item REST API
  370. B<Request Method>
  371. GET
  372. B<Requires Authentication>
  373. No
  374. B<Parameters>
  375. A Sequence ID (eg JC8.10a)
  376. B<Returns>
  377. =over 4
  378. =item *
  379. 200 OK and JSON, HTML, or XML
  380. =item *
  381. 404 Not Found
  382. =back
  383. B<Request example>
  384. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/matching_cdnas
  385. B<Response example>
  386. <div class="response-example"></div>
  387. =back
  388. =cut
  389. sub matching_cdnas {
  390. my ($self) = @_;
  391. my @cDNA = map { $self->_pack_obj($_) } @{$self ~~ '@Matching_cDNA'};
  392. return { description => 'cDNAs that match the sequence',
  393. data => @cDNA ? \@cDNA : undef,
  394. };
  395. }
  396. =head3 transcripts
  397. This method will return a data structure containing
  398. transcripts corresponding to the requested object.
  399. =over
  400. =item PERL API
  401. $data = $model->transcripts();
  402. =item REST API
  403. B<Request Method>
  404. GET
  405. B<Requires Authentication>
  406. No
  407. B<Parameters>
  408. A Sequence ID (eg JC8.10a)
  409. B<Returns>
  410. =over 4
  411. =item *
  412. 200 OK and JSON, HTML, or XML
  413. =item *
  414. 404 Not Found
  415. =back
  416. B<Request example>
  417. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/transcripts
  418. B<Response example>
  419. <div class="response-example"></div>
  420. =back
  421. =cut
  422. sub transcripts {
  423. my ($self) = @_;
  424. my @transcripts;
  425. if (($self ~~ 'Structure' || $self->_method eq 'Vancouver_fosmid') &&
  426. $self->type =~ /genomic|confirmed gene|predicted coding sequence/) {
  427. @transcripts = map { $self->_pack_obj($_) } sort {$a cmp $b } map {$_->info}
  428. map { $_->features('Transcript:Coding_transcript') }
  429. @{$self->_segments};
  430. }
  431. return { description => 'Transcripts in this region of the sequence',
  432. data => @transcripts ? \@transcripts : undef, #class Sequence
  433. };
  434. }
  435. ############################################################
  436. #
  437. # The Location Widget
  438. #
  439. ############################################################
  440. =head2 Location
  441. =cut
  442. # sub genomic_position { }
  443. # Supplied by Role; POD will automatically be inserted here.
  444. # << include genomic_position >>
  445. sub _build_genomic_position {
  446. my ($self) = @_;
  447. my @positions = $self->_genomic_position($self->_segments);
  448. return {
  449. description => 'The genomic location of the sequence',
  450. data => @positions ? \@positions : undef,
  451. };
  452. }
  453. # sub tracks {}
  454. # Supplied by Role; POD will automatically be inserted here.
  455. # << include tracks >>
  456. sub _build_tracks {
  457. my ($self) = @_;
  458. return {
  459. description => 'tracks to display in GBrowse',
  460. data => $self->_parsed_species =~ /elegans/ ? [qw(NG CG CDS PG PCR SNP TcI MOS CLO)] : undef,
  461. };
  462. }
  463. # sub genomic_image { }
  464. # Supplied by Role; POD will automatically be inserted here.
  465. # << include genomic_image >>
  466. # note for AD:
  467. # this one needs some reworking. it currently fetches the first segment
  468. # in $self->segments, recomputes the start & stop, fetches more segments
  469. # if the seq is a CDS or Transcript, and if more than 1 seg, selects the
  470. # first one that matches the start and stop (or just the first one).
  471. # throwing that segment back into genomic_position just wraps it up
  472. sub _build_genomic_image {
  473. my ($self) = @_;
  474. my $seq = $self->object;
  475. return unless(defined $self->_segments && $self->_segments->[0]->length< 100_0000);
  476. my $source = $self->_parsed_species;
  477. my $segment = $self->_segments->[0];
  478. my $ref = $segment->ref;
  479. my $start = $segment->start;
  480. my $stop = $segment->stop;
  481. # add another 10% to left and right
  482. $start = int($start - 0.05*($stop-$start));
  483. $stop = int($stop + 0.05*($stop-$start));
  484. my @segments;
  485. if ($seq->class eq 'CDS' or $seq->class eq 'Transcript') {
  486. my $gene = eval { $seq->Gene;} || $seq;
  487. @segments = $self->gff->segment(-class=>'Coding_transcript',-name=>$gene);
  488. @segments = grep {$_->method eq 'wormbase_cds'} $self->gff->fetch_group(CDS => $seq)
  489. unless @segments; # CB discontinuity
  490. }
  491. # In cases where more than one segment is retrieved
  492. # (ie with EST or OST mappings)
  493. # choose that which matches the original segment.
  494. # This is slightly bizarre but expedient fix.
  495. my $new_segment;
  496. if (@segments > 1) {
  497. foreach (@segments) {
  498. if ($_->start == $start && $_->stop == $stop) {
  499. $new_segment = $_;
  500. last;
  501. }
  502. }
  503. }
  504. my ($position) = $self->_genomic_position([$new_segment || $segment || ()]);
  505. return {
  506. description => 'The genomic location of the sequence to be displayed by GBrowse',
  507. data => $position,
  508. };
  509. }
  510. # sub genetic_position {}
  511. # Supplied by Role; POD will automatically be inserted here.
  512. # << include genetic_position >>
  513. ############################################################
  514. #
  515. # The Origin widget
  516. #
  517. ############################################################
  518. =head2 Origin
  519. =cut
  520. # sub laboratory { }
  521. # Supplied by Role; POD will automatically be inserted here.
  522. # << include laboratory >>
  523. =head3 available_from
  524. This method will return a data structure containing
  525. available sources for the sequence.
  526. =over
  527. =item PERL API
  528. $data = $model->available_from();
  529. =item REST API
  530. B<Request Method>
  531. GET
  532. B<Requires Authentication>
  533. No
  534. B<Parameters>
  535. A Sequence ID (eg JC8.10a)
  536. B<Returns>
  537. =over 4
  538. =item *
  539. 200 OK and JSON, HTML, or XML
  540. =item *
  541. 404 Not Found
  542. =back
  543. B<Request example>
  544. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/available_from
  545. B<Response example>
  546. <div class="response-example"></div>
  547. =back
  548. =cut
  549. sub available_from {
  550. my $self = shift;
  551. my $object = $self->object;
  552. my $data = $self->_method eq 'Vancouver_fosmid' && {
  553. label => 'GeneService',
  554. class => 'Geneservice_fosmids',
  555. };
  556. return { description => 'availability of clones of the sequence',
  557. data => "$data" || undef };
  558. }
  559. =head3 analysis
  560. This method will return a data structure containing
  561. the source analysis of the sequence.
  562. =over
  563. =item PERL API
  564. $data = $model->analysis();
  565. =item REST API
  566. B<Request Method>
  567. GET
  568. B<Requires Authentication>
  569. No
  570. B<Parameters>
  571. A Sequence ID (eg JC8.10a)
  572. B<Returns>
  573. =over 4
  574. =item *
  575. 200 OK and JSON, HTML, or XML
  576. =item *
  577. 404 Not Found
  578. =back
  579. B<Request example>
  580. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/analysis
  581. B<Response example>
  582. <div class="response-example"></div>
  583. =back
  584. =cut
  585. sub analysis {
  586. my ($self) = @_;
  587. my %so_data;
  588. if (my $analysis = $self ~~ 'Analysis') {
  589. $so_data{'Object'} = $analysis;
  590. $so_data{'Description'} = $analysis->Description;
  591. $so_data{'DB Info'} = $analysis->DB_info;
  592. $so_data{'WB Release'} = $analysis->Based_on_WB_Release;
  593. $so_data{'DB Release'} = $analysis->Based_on_DB_Release;
  594. $so_data{'Sample'} = $analysis->Sample;
  595. $so_data{'Paper'} = $analysis->Reference;
  596. $so_data{'Conducted y'} = $analysis->Conducted_by;
  597. $so_data{'Url'} = $analysis->URL;
  598. }
  599. return { description => 'The Analysis info of the sequence',
  600. data => %so_data ? \%so_data : undef };
  601. }
  602. ############################################################
  603. #
  604. # The Reagents Widget
  605. #
  606. ############################################################
  607. =head2 Reagents
  608. =head3 orfeome_assays
  609. This method will return a data structure containing
  610. orfeome_assays related to the sequence.
  611. =over
  612. =item PERL API
  613. $data = $model->orfeome_assays();
  614. =item REST API
  615. B<Request Method>
  616. GET
  617. B<Requires Authentication>
  618. No
  619. B<Parameters>
  620. A Sequence ID (eg JC8.10a)
  621. B<Returns>
  622. =over 4
  623. =item *
  624. 200 OK and JSON, HTML, or XML
  625. =item *
  626. 404 Not Found
  627. =back
  628. B<Request example>
  629. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/orfeome_assays
  630. B<Response example>
  631. <div class="response-example"></div>
  632. =back
  633. =cut
  634. sub orfeome_assays {
  635. my ($self) = @_;
  636. my (@orfeome,@pcr);
  637. if ($self->type =~ /gene|coding sequence|cDNA/) {
  638. @pcr = map {$_->info} map { $_->features('PCR_product:GenePair_STS',
  639. 'structural:PCR_product') }
  640. @{$self->_segments} if @{$self->_segments};
  641. @orfeome = grep {/^mv_/} @pcr;
  642. }
  643. my %data;
  644. foreach my $id (@orfeome) {
  645. $data{id} = $id;
  646. $data{label} = $id. " (".($id->Amplified(1) ? "PCR assay amplified"
  647. : font({-color=>'red'},"PCR assay did NOT amplify")).")";
  648. $data{class} ='pcr';
  649. }
  650. return {
  651. description => 'The ORFeome Assays of the sequence',
  652. data => %data ? \%data : undef,
  653. };
  654. }
  655. =head3 microarray_assays
  656. This method will return a data structure containing
  657. microarray assays related to the requested object.
  658. =over
  659. =item PERL API
  660. $data = $model->microarray_assays();
  661. =item REST API
  662. B<Request Method>
  663. GET
  664. B<Requires Authentication>
  665. No
  666. B<Parameters>
  667. A Sequence ID (eg JC8.10a)
  668. B<Returns>
  669. =over 4
  670. =item *
  671. 200 OK and JSON, HTML, or XML
  672. =item *
  673. 404 Not Found
  674. =back
  675. B<Request example>
  676. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/microarray_assays
  677. B<Response example>
  678. <div class="response-example"></div>
  679. =back
  680. =cut
  681. sub microarray_assays {
  682. my ($self) = @_;
  683. my @microarrays;
  684. if (($self ~~ 'Structure' || $self->_method eq 'Vancouver_fosmid') &&
  685. $self->type =~ /genomic|confirmed gene|predicted coding sequence/) {
  686. @microarrays = map {$self->_pack_obj($_)} sort {$a cmp $b } map {$_->info}
  687. map { $_->features('reagent:Oligo_set') } @{$self->_segments};
  688. }
  689. return {
  690. description => 'The Microarray assays in this region of the sequence',
  691. data => @microarrays ? \@microarrays : undef, #class Oligo_set
  692. };
  693. }
  694. =head3 source_clone
  695. This method will return a data structure containing
  696. the source clone of the sequence.
  697. =over
  698. =item PERL API
  699. $data = $model->source_clone();
  700. =item REST API
  701. B<Request Method>
  702. GET
  703. B<Requires Authentication>
  704. No
  705. B<Parameters>
  706. A Sequence ID (eg JC8.10a)
  707. B<Returns>
  708. =over 4
  709. =item *
  710. 200 OK and JSON, HTML, or XML
  711. =item *
  712. 404 Not Found
  713. =back
  714. B<Request example>
  715. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/source_clone
  716. B<Response example>
  717. <div class="response-example"></div>
  718. =back
  719. =cut
  720. sub source_clone {
  721. my ($self) = @_;
  722. my $clone = $self ~~ 'Clone' || $self->sequence ? $self->sequence->Clone : undef;
  723. return { description => 'The Source clone of the sequence',
  724. data => $clone ? map {$self->_pack_obj($_)} $clone : undef };
  725. }
  726. ############################################################
  727. #
  728. # The Sequence Widget
  729. #
  730. ############################################################
  731. =head2 Sequence
  732. =head3 print_blast
  733. This method will return a data structure containing
  734. links to blast resources.
  735. =over
  736. =item PERL API
  737. $data = $model->print_blast();
  738. =item REST API
  739. B<Request Method>
  740. GET
  741. B<Requires Authentication>
  742. No
  743. B<Parameters>
  744. A Sequence ID (eg JC8.10a)
  745. B<Returns>
  746. =over 4
  747. =item *
  748. 200 OK and JSON, HTML, or XML
  749. =item *
  750. 404 Not Found
  751. =back
  752. B<Request example>
  753. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/print_blast
  754. B<Response example>
  755. <div class="response-example"></div>
  756. =back
  757. =cut
  758. sub print_blast {
  759. my ($self) = @_;
  760. my @target = ('Elegans genome');
  761. push @target,"Elegans protein" if ($self ~~ 'Coding');
  762. return { description => 'links to BLAST analyses',
  763. data => { source => $self ~~ 'name',
  764. target => \@target,
  765. },
  766. };
  767. }
  768. =head3 print_sequence
  769. This method will return a data structure containing
  770. the sequence of the sequence. Um, yeah.
  771. =over
  772. =item PERL API
  773. $data = $model->print_sequence();
  774. =item REST API
  775. B<Request Method>
  776. GET
  777. B<Requires Authentication>
  778. No
  779. B<Parameters>
  780. A Sequence ID (eg JC8.10a)
  781. B<Returns>
  782. =over 4
  783. =item *
  784. 200 OK and JSON, HTML, or XML
  785. =item *
  786. 404 Not Found
  787. =back
  788. B<Request example>
  789. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/print_sequence
  790. B<Response example>
  791. <div class="response-example"></div>
  792. =back
  793. =cut
  794. # TODO: REWRITE THIS. This is very gory code. Some of it doesn't do what
  795. # one would expect due to some Perl details...
  796. sub print_sequence {
  797. my ($self) = @_;
  798. my $s = $self->object;
  799. my %hash;
  800. my $gff = $self->gff;
  801. my $seq_obj;
  802. if ($self->_parsed_species =~ /briggsae/) {
  803. ($seq_obj) = sort {$b->length<=>$a->length}
  804. $self->type =~ /^(genomic|confirmed gene|predicted coding sequence)$/i
  805. ? grep {$_->method eq 'wormbase_cds'} $gff->fetch_group(Transcript => $s),
  806. : '';
  807. }
  808. else {
  809. ($seq_obj) = sort {$b->length<=>$a->length}
  810. # grep {$_->method eq 'full_transcript'} $gff->fetch_group(Transcript => $s);
  811. grep {$_->method eq 'Transcript'} $gff->fetch_group(Transcript => $s);
  812. # BLECH! If provided with a gene ID and alt splices are present just guess
  813. # and fetch the first CDS or Transcript
  814. # We really should display a list for all of these.
  815. ($seq_obj) = $seq_obj ? ($seq_obj) : sort {$b->length<=>$a->length}
  816. # grep {$_->method eq 'full_transcript'} $gff->fetch_group(Transcript => "$s.a");
  817. grep {$_->method eq 'Transcript'} $gff->fetch_group(Transcript => "$s.a");
  818. ($seq_obj) = $seq_obj ? ($seq_obj) : sort {$b->length<=>$a->length}
  819. # grep {$_->method eq 'full_transcript'} $gff->fetch_group(Transcript => "$s.1");
  820. grep {$_->method eq 'Transcript'} $gff->fetch_group(Transcript => "$s.1");
  821. }
  822. ($seq_obj) ||= $gff->fetch_group(Pseudogene => $s);
  823. # Haven't fetched a GFF segment? Try Ace.
  824. if (!$seq_obj || length($seq_obj->dna) < 2) { # miserable broken workaround
  825. # try to use acedb
  826. if (my $fasta = $s->asDNA) {
  827. $hash{dna} = { header=>"FASTA Sequence",
  828. content=>"$fasta\n"
  829. }; ##$fasta;
  830. $self->length(length $fasta);
  831. }
  832. else {
  833. $hash{dna} = "<p>Sequence unavailable. If this is a cDNA, try searching for $s.5 or $s.3</p>";
  834. }
  835. goto END;
  836. }
  837. # print_genomic_position($s,$type);
  838. $hash{est} = "name=$s;class=CDS";
  839. if (eval { $s->Properties eq 'cDNA'} ) {
  840. # try to use acedb
  841. if (my $fasta = $s->asDNA) {
  842. $hash{dna} = $fasta;
  843. }
  844. goto END;
  845. }
  846. my $unspliced = lc $seq_obj->dna;
  847. my $length = length($unspliced);
  848. if (eval { $s->Coding_pseudogene } || eval {$s->Coding} || eval {$s->Corresponding_CDS}) {
  849. my $markup = Bio::Graphics::Browser2::Markup->new;
  850. $markup->add_style('utr' => 'FGCOLOR gray');
  851. $markup->add_style('cds' => 'BGCOLOR cyan');
  852. $markup->add_style('cds0' => 'BGCOLOR yellow');
  853. $markup->add_style('cds1' => 'BGCOLOR orange');
  854. $markup->add_style('uc' => 'UPPERCASE');
  855. $markup->add_style('newline' => "\n");
  856. $markup->add_style('space' => ' ');
  857. my %seenit;
  858. my @features;
  859. if ($s->Species =~ /briggsae/) {
  860. $seq_obj->ref($seq_obj); # local coordinates
  861. @features = sort {$a->start <=> $b->start}
  862. grep { $_->info eq $s && !$seenit{$_->start}++ }
  863. $seq_obj->features('coding_exon:curated','UTR');
  864. }
  865. else {
  866. $seq_obj->ref($seq_obj); # local coordinates
  867. # Is the genefinder specific formatting cruft?
  868. @features =
  869. sort {$a->start <=> $b->start}
  870. grep { $_->info eq $s && !$seenit{$_->start}++ }
  871. ($s->Method eq 'Genefinder') ?
  872. $seq_obj->features('coding_exon:' . $s->Method,'five_prime_UTR','three_prime_UTR')
  873. :
  874. $seq_obj->features(qw/five_prime_UTR:Coding_transcript exon:Pseudogene coding_exon:Coding_transcript three_prime_UTR:Coding_transcript/);
  875. }
  876. $hash{unspliced} = _print_unspliced($markup,$seq_obj,$unspliced,@features);
  877. $hash{spliced} = _print_spliced($markup,@features);
  878. $hash{protein} = _print_protein($markup,\@features) unless eval { $s->Coding_pseudogene };
  879. }
  880. else {
  881. # Otherwise we've got genomic DNA here
  882. $hash{dna} = _to_fasta($s,$unspliced);
  883. }
  884. $self->length($length);
  885. END:
  886. return { description => 'the sequence of the sequence',
  887. data => %hash ? \%hash : undef };
  888. }
  889. =head3 print_homologies
  890. This method will return a data structure containing
  891. homologies of the requested object.
  892. =over
  893. =item PERL API
  894. $data = $model->print_homologies();
  895. =item REST API
  896. B<Request Method>
  897. GET
  898. B<Requires Authentication>
  899. No
  900. B<Parameters>
  901. A Sequence ID (eg JC8.10a)
  902. B<Returns>
  903. =over 4
  904. =item *
  905. 200 OK and JSON, HTML, or XML
  906. =item *
  907. 404 Not Found
  908. =back
  909. B<Request example>
  910. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/print_homologies
  911. B<Response example>
  912. <div class="response-example"></div>
  913. =back
  914. =cut
  915. sub print_homologies {
  916. my ($self) = @_;
  917. my $seq = $self->object;
  918. # Restructuring into the ?CDS class partially kills this query
  919. # Transcripts are not sequence features any more...
  920. my $gff = $self->ace_dsn->raw_query("gif seqget $seq -coords 1 ".$self->length." ; seqfeatures")
  921. if $self->length > 0;
  922. return unless $gff;
  923. my ($origin,$extent,%HITS);
  924. foreach (split("\n",$gff)) {
  925. next if m!^//!; # ignore comments
  926. next if m!^\0!; # ignore ACEDB noise/grunge
  927. if (/^\#\#sequence-region \S+ (\d+) (\d+)/) {
  928. ($origin,$extent) = ($1,$2);
  929. next;
  930. }
  931. next if /^\#/;
  932. my ($junk,$description,$type,@data) = split("\t"); # parse
  933. # This might be broken with WS121 restructuring
  934. next unless $type eq 'similarity';
  935. push @{$HITS{$description}},\@data;
  936. }
  937. my (%hit_objs);
  938. my ($dnas,$proteins); # jalview flags
  939. my @rows;
  940. for my $type (sort keys %HITS) {
  941. my $label = $type=~ /hmmfs/ ? 'Motif' : $type;
  942. # without stepping through whole array, figure out whether
  943. # we have any protein or DNA alignments to display.
  944. my ($class) = $HITS{$type}->[0]->[$#{$HITS{$type}->[0]}] =~ /^([^:]+)/;
  945. $dnas++ if $class eq 'Sequence';
  946. $proteins++ if $class eq 'Protein';
  947. for my $hit (sort {$a->[0] <=> $b->[0] } @{$HITS{$type}}) {
  948. my ($s_start,$s_end,$score,$strand,$frame,$packed_stuff) = @$hit;
  949. my (undef,$xref,$t_start,$t_end) = split(/\s+/,$packed_stuff);
  950. # obscure feature in gff 1a format: the name of the hit
  951. # is preceded by the name of the class and a colon
  952. my $class;
  953. ($class,$xref) = $xref =~ /"([^:]+):(.+)"/;
  954. $class ||= 'Homol';
  955. $s_start -= ($origin - 1);
  956. $s_end -= ($origin - 1);
  957. my ($title);
  958. my $obj = $hit_objs{"$class:$xref"} ||= $self->ace_dsn->fetch(-class=>$class,-name=>$xref,-fill=>1);
  959. if (ref($obj)) {
  960. $title = $obj->get(Title=>1) ||
  961. $obj->get(DB_remark=>1) ||
  962. $obj->get(Remark=>1);
  963. }
  964. $title ||= 'Genomic' if $type =~ /brig/i;
  965. $title ||= 'EST' if $type =~ /EST/;
  966. $title ||= 'Genomic' if $type =~ /cosmid/i;
  967. $title ||= 'Protein' if $type =~ /blastx/i;
  968. $title ||= '&nbsp;';
  969. push @rows, { method => $label,
  970. similarity => ref($obj) ? {label => $obj, id=>$obj, class=>$obj->class}: $obj,
  971. type => $title,
  972. score => $score,
  973. genomic_region => "$s_start&nbsp;-&nbsp;$s_end",
  974. hit_region => "$t_start&nbsp;-&nbsp;$t_end",
  975. strand => $strand,
  976. frame => $frame,
  977. };
  978. }
  979. }
  980. return { description => 'homologous sequences',
  981. data => @rows ? \@rows : undef };
  982. }
  983. =head3 print_feature
  984. This method will return a data structure listing
  985. features contained within the sequence.
  986. =over
  987. =item PERL API
  988. $data = $model->print_feature();
  989. =item REST API
  990. B<Request Method>
  991. GET
  992. B<Requires Authentication>
  993. No
  994. B<Parameters>
  995. A Sequence ID (eg JC8.10a)
  996. B<Returns>
  997. =over 4
  998. =item *
  999. 200 OK and JSON, HTML, or XML
  1000. =item *
  1001. 404 Not Found
  1002. =back
  1003. B<Request example>
  1004. curl -H content-type:application/json http://api.wormbase.org/rest/field/sequence/JC8.10a/print_feature
  1005. B<Response example>
  1006. <div class="response-example"></div>
  1007. =back
  1008. =cut
  1009. sub print_feature {
  1010. my ($self) = @_;
  1011. my $s = $self->object;
  1012. my %hash;
  1013. # NB: This is not completely functional - it doesn't display cloned, named genes
  1014. # (The pre-WS116 version didn't either).
  1015. # That is, transcripts like JC8.10 are not listed under Transcripts in Ace WS116
  1016. if (my @genes = $s->get('Transcript')) {
  1017. print h3('Predicted Genes & Transcriptional Units');
  1018. my %data = map {$_=>$_} $s->follow(-tag=>'Transcript',-filled=>1);
  1019. my @rows;
  1020. foreach (sort {$a->right <=> $b->right} @genes) {
  1021. my $gene = $data{$_};
  1022. # my $href = a({ -href=>Object2URL($gene) },$gene);
  1023. next unless defined $gene;
  1024. my $CDS = $gene->Corresponding_CDS;
  1025. # Fetch the information from the CDS if it exists, else from the transcript
  1026. my $class = ($CDS) ? $CDS : $gene;
  1027. my $locus = eval { $class->Locus };
  1028. my ($desc) = $class->Brief_identification;
  1029. ($desc) ||= $class->Remark ;
  1030. ($desc) ||= $class->DB_remark ;
  1031. # this sounds like important information - why is it undef'd?
  1032. # undef $desc if $desc =~ /possible trans-splice site at \d+/;
  1033. $desc ||= '&nbsp;';
  1034. my ($start,$end)=$_->right->row;
  1035. push @rows, { start=>$start,
  1036. end=>$end,
  1037. name=>{ label => $gene, id=>$gene, class=>$gene->class},
  1038. gene=>$locus ? {label => $locus, id=>$locus, class=>$locus->class} : '-',
  1039. predicted_type=>=> $gene || '?',
  1040. comment=>$desc,
  1041. };
  1042. }
  1043. $hash{predicted_units}{rows}=\@rows;
  1044. }
  1045. if (my @exons = $s->get('Source_Exons')) {
  1046. my ($start,$orientation,$parent) = $self->_get_parent_coords($s);
  1047. # This is just 1 or -1. Should have better formatting.
  1048. # print p("orientation is $orientation");
  1049. my $index = 1;
  1050. my $last;
  1051. my @rows;
  1052. foreach (@exons) {
  1053. my ($es,$ee) = $_->row;
  1054. my $as = $orientation >= 0 ? $start+$es-1 : $start-$es+1;
  1055. my $ae = $orientation >= 0 ? $start+$ee-1 : $start-$ee+1;
  1056. my $last = $ee;
  1057. push @rows, { no=>$index++,
  1058. start=>$es,
  1059. end=>$ee,
  1060. ref_start=>$as,
  1061. ref_end=>=> $ae,
  1062. };
  1063. }
  1064. $hash{exons}={ rows=>\@rows, parent=>$parent, orientation=>$orientation};
  1065. }
  1066. my @feature = $s->get('Feature');
  1067. if (@feature) {
  1068. print h3("Other features");
  1069. my @rows;
  1070. for my $f (@feature) {
  1071. (my $label = $f) =~ s/(inverted|tandem)/$1 repeat/;
  1072. for my $i ($f->col) {
  1073. my @fields = $i->row;
  1074. push @rows, {
  1075. start=>$fields[0],
  1076. end=>$fields[1],
  1077. score=>$fields[2],
  1078. comment=>=> $fields[3],
  1079. };
  1080. }
  1081. $hash{features}={ rows=>\@rows, label =>$label};
  1082. }
  1083. }
  1084. return { description => 'features contained within the sequence',
  1085. data => %hash ? \%hash : undef };
  1086. }
  1087. ############################################################
  1088. #
  1089. # PRIVATE METHODS
  1090. #
  1091. ############################################################
  1092. sub _is_gap {
  1093. return shift =~ /(\b|_)GAP(\b|_)/i;
  1094. }
  1095. sub _is_merged {
  1096. return shift =~ /LINK|CHROMOSOME/i;
  1097. }
  1098. sub _build__segments {
  1099. my ($self) = @_;
  1100. my $object = $self->object;
  1101. # special case: return the union of 3' and 5' EST if possible
  1102. if ($self->type =~ /EST/) {
  1103. if ($object =~ /(.+)\.[35]$/) {
  1104. my $base = $1;
  1105. my ($seg_start) = $self->gff->segment(Sequence => "$base.3");
  1106. my ($seg_stop) = $self->gff->segment(Sequence => "$base.5");
  1107. if ($seg_start && $seg_stop) {
  1108. my $union = $seg_start->union($seg_stop);
  1109. return $union if $union;
  1110. }
  1111. }
  1112. }
  1113. return [map {$_->absolute(1);$_} sort {$b->length<=>$a->length} $self->gff->segment($object->class => $object)];
  1114. }
  1115. sub _print_unspliced {
  1116. my ($markup,$seq_obj,$unspliced,@features) = @_;
  1117. my $name = $seq_obj->info . ' (' . $seq_obj->start . '-' . $seq_obj->stop . ')';
  1118. my $length = length $unspliced;
  1119. if ($length > 0) {
  1120. # mark up the feature locations
  1121. my @markup;
  1122. my $offset = $seq_obj->start;
  1123. my $counter = 0;
  1124. for my $feature (@features) {
  1125. my $start = $feature->start - $offset;
  1126. my $length = $feature->length;
  1127. my $style = $feature->method eq 'CDS' ? 'cds'.$counter++%2
  1128. : $feature->method =~ /exon/ ? 'cds'.$counter++%2
  1129. : $feature->method =~ 'UTR' ? 'utr' : '';
  1130. push @markup,[$style,$start,$start+$length];
  1131. push @markup,['uc',$start,$start+$length] unless $style eq 'utr';
  1132. }
  1133. push @markup,map {['space',10*$_]} (1..length($unspliced)/10);
  1134. push @markup,map {['newline',80*$_]} (1..length($unspliced)/80);
  1135. my $download = _to_fasta("$name|unspliced + UTR - $length bp",$unspliced);
  1136. $markup->markup(\$unspliced,\@markup);
  1137. return {
  1138. #download => $download,
  1139. header=>"unspliced + UTR - $length bp",
  1140. content=>">$name (unspliced + UTR - $length bp)\n".$unspliced,
  1141. };
  1142. }
  1143. }
  1144. # Fetch and markup the spliced DNA
  1145. # markup alternative exons
  1146. sub _print_spliced {
  1147. my ($markup,@features) = @_;
  1148. my $spliced = join('',map {$_->dna} @features);
  1149. my $splen = length $spliced;
  1150. my $last = 0;
  1151. my $counter = 0;
  1152. my @markup = ();
  1153. my $prefasta = $spliced;
  1154. for my $feature (@features) {
  1155. my $length = $feature->stop - $feature->start + 1;
  1156. my $style = $feature->method =~ /UTR/i ? 'utr' : 'cds' . $counter++ %2;
  1157. my $end = $last + $length;
  1158. push @markup,[$style,$last,$end];
  1159. push @markup,['uc',$last,$end] if $feature->method =~ /exon/;
  1160. $last += $length;
  1161. }
  1162. push @markup,map {['space',10*$_]} (1..length($spliced)/10);
  1163. push @markup,map {['newline',80*$_]} (1..length($spliced)/80);
  1164. my $name = eval { $features[0]->refseq->name } ;
  1165. my $download=_to_fasta("$name|spliced + UTR - $splen bp",$spliced);
  1166. $markup->markup(\$spliced,\@markup);
  1167. return { # download => $download ,
  1168. header=>"spliced + UTR - $splen bp",
  1169. content=>">$name (spliced + UTR - $splen bp)\n".$spliced,
  1170. } if $name;
  1171. }
  1172. sub _print_protein {
  1173. my ($markup,$features,$genetic_code) = @_;
  1174. my @markup;
  1175. my $trimmed = join('',map {$_->dna} grep {$_->method eq 'coding_exon'} @$features);
  1176. return unless $trimmed; # Hack for mRNA
  1177. my $peptide = Bio::Seq->new(-seq=>$trimmed)->translate->seq;
  1178. my $change = $peptide =~/\w+\*$/ ? 1 : 0;
  1179. my $plen = length($peptide) - $change;
  1180. @markup = map {['space',10*$_]} (1..length($peptide)/10);
  1181. push @markup,map {['newline',80*$_]} (1..length($peptide)/80);
  1182. my $name = eval { $features->[0]->refseq->name };
  1183. my $download=_to_fasta("$name|conceptual translation - $plen aa",$peptide);
  1184. $markup->markup(\$peptide,\@markup);
  1185. $peptide =~ s/^\s+//;
  1186. return { # download => $download,
  1187. header=>"conceptual translation - $plen aa",
  1188. content=>">$name (conceptual translation - $plen aa)\n".$peptide,
  1189. };
  1190. }
  1191. ##use this or template to format sequence?
  1192. sub _to_fasta {
  1193. my ($name,$dna) = @_;
  1194. $dna ||= '';
  1195. my @markup;
  1196. for (my $i=0; $i < length $dna; $i += 10) {
  1197. push (@markup,[$i,$i % 80 ? ' ':"\n"]);
  1198. }
  1199. _markup(\$dna,\@markup);
  1200. $dna =~ s/^\s+//;
  1201. $dna =~ s/\*$//;
  1202. return { header=>"Genomic Sequence",
  1203. content=>"&gt;$name\n$dna"
  1204. };
  1205. }
  1206. # insert HTML tags into a string without disturbing order
  1207. sub _markup {
  1208. my $string = shift;
  1209. my $markups = shift;
  1210. for my $m (sort {$b->[0]<=>$a->[0]} @$markups) { #insert later tags first so position remains correct
  1211. my ($position,$markup) = @$m;
  1212. next unless $position <= length $$string;
  1213. substr($$string,$position,0) = $markup;
  1214. }
  1215. }
  1216. # get coordinates of parent for exons etc
  1217. sub _get_parent_coords {
  1218. my ($self,$s) = @_;
  1219. my ($parent) = $self->sequence;
  1220. return unless $parent;
  1221. # my $subseq = $parent->get('Subsequence'); # prevent automatic dereferencing
  1222. # Escape the sequence name for fetching
  1223. $s =~ s/\./\\./g;
  1224. # We may be dealing with transcripts, too.
  1225. my $se;
  1226. foreach my $tag (qw/CDS_child Transcript/) {
  1227. my $subseq = $parent->get($tag); # prevent automatic dereferencing
  1228. if ($subseq) {
  1229. $se = $subseq->at($s);
  1230. if ($se) {
  1231. my ($start,$stop) = $se->right->row;
  1232. my $orientation = $start <=> $stop;
  1233. return ($start,-$orientation,$parent);
  1234. }
  1235. }
  1236. }
  1237. return;
  1238. }
  1239. __PACKAGE__->meta->make_immutable;
  1240. 1;