PageRenderTime 64ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/bin/hgvs-web-service

https://bitbucket.org/reece/bio-hgvs-perl
Perl | 381 lines | 354 code | 10 blank | 17 comment | 0 complexity | bd2ed36fdc98985a5b7f834b1c0c0656 MD5 | raw file
Possible License(s): Apache-2.0
  1. #!/usr/bin/perl
  2. # N.B. This script is intended for low-volume connections.
  3. # There is no attempt to make the script thread-safe or to optimize for
  4. # concurrency.
  5. # How to start:
  6. #
  7. # sudo -u locusadm -i
  8. # source /locus/opt/ensembl/config
  9. # hgvs-web-service [-l log] [-p port]
  10. #
  11. # By default, the port is 8000+ensembl version number, but may be overridden.
  12. #
  13. # Then test with:
  14. # curl 'http://localhost:7777/hgvs/translate/NP_003218.2:p.Val713Met'
  15. #
  16. use strict;
  17. use warnings;
  18. use FindBin;
  19. use lib "$FindBin::RealBin/../lib";
  20. use Bio::HGVS;
  21. use File::Basename;
  22. use Getopt::Long qw(:config gnu_getopt);
  23. use HTTP::Daemon;
  24. use HTTP::Status;
  25. use IO::Pipe;
  26. use Log::Log4perl;
  27. use URI::Escape;
  28. use XML::LibXML;
  29. use TryCatch;
  30. use Bio::HGVS::EnsemblConnection;
  31. use Bio::HGVS::Errors;
  32. use Bio::HGVS::Parser;
  33. use Bio::HGVS::Translator;
  34. use Bio::HGVS::utils;
  35. my $FORMAT_VERSION = 2.1;
  36. my $root = dirname( $FindBin::RealBin );
  37. my $jemappelle = basename( $0 );
  38. my $about_xml = about_xml();
  39. my @path_handlers = (
  40. [ qr%/version(?:/|$)% , \&version_handler ],
  41. [ qr%/chr-slice/% , \&chr_slice_handler ],
  42. [ qr%/hgvs/genome-map/% , \&genome_map_handler ],
  43. [ qr%/hgvs/translate/% , \&translate_handler ],
  44. #[ qr%/rs/lookup/% , \&rs_lookup ],
  45. #[ qr%/hgvs/validate/% , \&validate_handler ],
  46. );
  47. my %opts = (
  48. xml_format => 2, # indented, with newlines
  49. sleeptime => 10,
  50. n_start_attempts => 10,
  51. port => $ENV{HGVS_WS_PORT},
  52. queue_size => 5,
  53. log => undef,
  54. log_level => 'INFO',
  55. );
  56. GetOptions(\%opts,
  57. 'log|l=s',
  58. 'sleeptime|s=i',
  59. 'port|p=i'
  60. )
  61. or die("$jemappelle: You've got usage issues, homeboy\n");
  62. my $conf = log4conf(\%opts);
  63. Log::Log4perl::init( \$conf );
  64. my $logger = Log::Log4perl->get_logger($jemappelle);
  65. $logger->info("$jemappelle starting...");
  66. if (defined $opts{log}) {
  67. print(STDERR "logging to $opts{log}\n");
  68. }
  69. my %conn_info = %Bio::HGVS::EnsemblConnection::defaults;
  70. my ($ens,$vm) = connect_to_db(%conn_info);
  71. my $vp = Bio::HGVS::Parser->new();
  72. if (not defined $opts{port}) {
  73. # port = 8000 + ensembl version, e.g., 8065
  74. my $eversion = Bio::HGVS::EnsemblConnection::api_version();
  75. $opts{port} = 8000 + $eversion;
  76. $logger->info("port not specified; selected port $opts{port} for e! $eversion")
  77. }
  78. my $daemon;
  79. try {
  80. $daemon = start_daemon( $opts{n_start_attempts} );
  81. } catch ($e) {
  82. $logger->error($e);
  83. exit(1);
  84. };
  85. $logger->info("$jemappelle available at ", $daemon->url, "\n" );
  86. while( my $c = $daemon->accept ) {
  87. my $ref = sprintf('%s:%s', $c->peerhost, $c->peerport);
  88. $logger->debug(sprintf('[%s]: connection received',$ref));
  89. eval { process_connection($c) };
  90. $logger->debug(sprintf('[%s]: %s', $ref, $@)) if ($@);
  91. $c->close;
  92. $logger->debug(sprintf('[%s]: connection closed', $ref));
  93. }
  94. exit;
  95. sub connect_to_db {
  96. my %conn_info = @_;
  97. $logger->info(sprintf('connecting to Ensembl version %s (%s@%s:%s)...',
  98. Bio::HGVS::EnsemblConnection::api_version(),
  99. @{conn_info{qw(user host port)}}));
  100. $ens = Bio::HGVS::EnsemblConnection->new(%conn_info);
  101. $vm = Bio::HGVS::Translator->new( ens_conn => $ens );
  102. $logger->info(sprintf('Ensembl connection established'));
  103. return ($ens,$vm);
  104. }
  105. sub log4conf {
  106. my $opts = shift;
  107. if (defined $opts->{log}) {
  108. return <<EOF;
  109. log4perl.rootLogger = $opts->{log_level}, Logfile
  110. log4perl.appender.Logfile = Log::Log4perl::Appender::File
  111. log4perl.appender.Logfile.filename = $opts->{log}
  112. log4perl.appender.Logfile.layout = Log::Log4perl::Layout::PatternLayout
  113. log4perl.appender.Logfile.layout.ConversionPattern = %d %c %F:%L %m%n
  114. EOF
  115. }
  116. return <<EOF;
  117. log4perl.rootLogger = $opts->{log_level}, Screen
  118. log4perl.appender.Screen = Log::Log4perl::Appender::Screen
  119. log4perl.appender.Screen.layout = Log::Log4perl::Layout::SimpleLayout
  120. EOF
  121. }
  122. sub start_daemon {
  123. my ($ntries) = @_;
  124. $daemon = HTTP::Daemon->new(
  125. LocalPort => $opts{port},
  126. Listen => $opts{queue_size},
  127. );
  128. (defined $daemon)
  129. || Bio::HGVS::Error->throw(
  130. sprintf("failed to start daemon: $!")
  131. );
  132. return $daemon;
  133. }
  134. sub process_connection {
  135. my ($c) = @_;
  136. my $ref = sprintf('%s:%s', $c->peerhost, $c->peerport);
  137. while ( my $req = $c->get_request ) { # HTTP::Request
  138. $logger->debug(sprintf('[%s]: request: %s %s', $ref, $req->method, $req->uri->path));
  139. if ($req->method ne 'GET') {
  140. $c->send_error(RC_FORBIDDEN);
  141. }
  142. my @handler_matches = grep { $req->uri->path =~ m%^($_->[0])% } @path_handlers;
  143. if (not @handler_matches) {
  144. $c->send_error(RC_FORBIDDEN);
  145. return;
  146. }
  147. if ($#handler_matches > 0) {
  148. $logger->error( sprintf('[%s]: %s matches more than one path handler; using first',
  149. $ref, $req->uri->path) );
  150. }
  151. my $ph = $handler_matches[0];
  152. if (my ($path,$query) = $req->uri->path =~ m%($ph->[0])(.*)%) {
  153. my $xml = XML::LibXML::Element->new("$jemappelle-response");
  154. $xml->setAttribute('path', $path);
  155. $xml->setAttribute('query', $query);
  156. $xml->appendChild($about_xml);
  157. my $xml_result;
  158. try {
  159. $xml_result = $ph->[1]($ref,$query);
  160. } catch ($e) {
  161. $logger->error("[$ref]: $e");
  162. if ($e and not ref $e and $e =~ m/^DBD::/) {
  163. connect_to_db(%conn_info);
  164. $logger->error("[$ref]: reissuing request");
  165. eval { $xml_result = $ph->[1]($ref,$query); };
  166. }
  167. if (not defined $xml_result) {
  168. # if still not defined after possibly reissuing
  169. if (ref($e) and $e->can('toXML')) {
  170. $xml_result = $e->toXML();
  171. } else {
  172. $xml_result = Bio::HGVS::Error->new($e)->toXML();
  173. }
  174. }
  175. };
  176. $xml->appendChild( $xml_result ) if defined $xml_result;
  177. $c->send_response( HTTP::Response->new(
  178. 200,undef,undef,$xml->toString($opts{xml_format})."\n")
  179. );
  180. }
  181. }
  182. }
  183. sub version_handler {
  184. # This used to return version info. Now that version info is returned in
  185. # all responses, this is just a stub.
  186. return;
  187. }
  188. sub chr_slice_handler {
  189. my ($ref,$query) = @_;
  190. my $q = uri_unescape($query);
  191. my ($chr) = $q =~ m/chr=(\d+|[XY])/;
  192. my ($start) = $q =~ m/start=(\d+)/;
  193. if (not (defined $chr and defined $start)) {
  194. Bio::HGVS::Error->throw(
  195. 'Invalid request; query args like .../chr-slice/?chr=12&start=34&end=56 (end optional)'
  196. );
  197. }
  198. my ($stop) = $q =~ m/end=(\d+)/;
  199. $stop ||= $start;
  200. my $slice = $ens->{sa}->fetch_by_region('chromosome', $chr, $start, $stop);
  201. my $e = XML::LibXML::Element->new('chr-slice');
  202. $e->setAttribute('chromosome',$chr);
  203. $e->setAttribute('start',$start);
  204. $e->setAttribute('end',$stop);
  205. $e->setAttribute('sequence',$slice->seq);
  206. return $e;
  207. }
  208. sub genome_map_handler {
  209. my ($ref,$query) = @_;
  210. my $hgvs = uri_unescape($query);
  211. my $v = $vp->parse($hgvs);
  212. $logger->debug(sprintf('[%s]: %s parsed okay; type=%s', $ref, $hgvs, $v->type));
  213. if ($v->type ne 'c') {
  214. Bio::HGVS::Error->throw(
  215. 'moltype ',$v->type, ' not supported (yet)'
  216. );
  217. }
  218. my @g = $vm->convert_cds_to_chr($v);
  219. return join(
  220. '',
  221. "<Coordinates>\n",
  222. (map { _formatter($_) } @g),
  223. "</Coordinates>\n"
  224. );
  225. }
  226. sub _formatter {
  227. my ($g) = shift;
  228. my $chr = $Bio::HGVS::Translator::nc_to_chr{$g->ref} || '?';
  229. sprintf(" <ChromosomalPosition chromosome=\"%d\" start=\"%d\" end=\"%d\" hgvs=\"%s\"/>\n",
  230. $chr, $g->loc->start, $g->loc->end, "$g");
  231. }
  232. sub coordinate_xml {
  233. my ($g) = @_;
  234. my $chr = $Bio::HGVS::Translator::nc_to_chr{$g->ref} || '?';
  235. my $e = XML::LibXML::Element->new('genomic-coordinates');
  236. $e->setAttribute('chromosome',$chr);
  237. $e->setAttribute('start',$g->loc->start->position);
  238. $e->setAttribute('end',$g->loc->end->position);
  239. return $e;
  240. }
  241. sub translate_handler {
  242. my ($ref,$query) = @_;
  243. my $hgvs = uri_unescape($query);
  244. my $v = $vp->parse($hgvs);
  245. $logger->debug(sprintf('[%s]: %s parsed okay; type=%s', $ref, $hgvs, $v->type));
  246. my $xml = XML::LibXML::Element->new('translation-results');
  247. $xml->setAttribute('query',$hgvs);
  248. if ($v->type eq 'g') {
  249. $xml->appendChild( coordinate_xml($v) );
  250. foreach my $c ($vm->convert_chr_to_cds($v)) {
  251. my $xc = XML::LibXML::Element->new('cds-variant');
  252. $xc->setAttribute('hgvs',"$c");
  253. if ($c->loc->is_simple) {
  254. foreach my $p ($vm->convert_cds_to_pro($c)) {
  255. my $xp = XML::LibXML::Element->new('protein-variant');
  256. $xp->setAttribute('hgvs',"$p");
  257. $xc->appendChild($xp);
  258. }
  259. }
  260. $xml->appendChild($xc);
  261. }
  262. } elsif ($v->type eq 'c') {
  263. foreach my $g ($vm->convert_cds_to_chr($v)) {
  264. my $xg = XML::LibXML::Element->new('genomic-variant');
  265. $xg->setAttribute('hgvs',"$g");
  266. $xg->appendChild( coordinate_xml($g) );
  267. $xml->appendChild($xg);
  268. }
  269. if ($v->loc->is_simple) {
  270. foreach my $p ($vm->convert_cds_to_pro($v)) {
  271. my $xp = XML::LibXML::Element->new('protein-variant');
  272. $xp->setAttribute('hgvs',"$p");
  273. $xml->appendChild($xp);
  274. }
  275. }
  276. } elsif ($v->type eq 'p') {
  277. foreach my $c ($vm->convert_pro_to_cds($v)) {
  278. my $xc = XML::LibXML::Element->new('cds-variant');
  279. $xc->setAttribute('hgvs',"$c");
  280. foreach my $g ($vm->convert_cds_to_chr($c)) {
  281. my $xg = XML::LibXML::Element->new('genomic-variant');
  282. $xg->setAttribute('hgvs',"$g");
  283. $xg->appendChild( coordinate_xml($g) );
  284. $xc->appendChild($xg);
  285. }
  286. $xml->appendChild($xc);
  287. }
  288. } else {
  289. Bio::HGVS::TypeError->throw(
  290. 'Only g, c, and p variant types are supported; your variant is type '.$v->type
  291. );
  292. }
  293. return $xml;
  294. }
  295. sub about_xml {
  296. my %hg = Bio::HGVS::utils::fetch_hg_info();
  297. my $xml = XML::LibXML::Element->new('about');
  298. $xml->setAttribute('jemappelle',$jemappelle);
  299. $xml->setAttribute('ensembl-version', Bio::HGVS::EnsemblConnection::api_version() );
  300. $xml->setAttribute('format-version',$FORMAT_VERSION);
  301. $xml->setAttribute($_,$hg{$_}) for qw(changeset tag date);
  302. return $xml;
  303. }
  304. sub _xmlify_exception {
  305. my ($e) = @_;
  306. my $xml = XML::LibXML::Element->new('error');
  307. $xml->setAttribute('message', $e->error);
  308. return $xml;
  309. }