PageRenderTime 21ms CodeModel.GetById 8ms app.highlight 10ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/human_genome_variation/disease_ontology_gene_fuzzy_selector.pl

https://bitbucket.org/cistrome/cistrome-harvard/
Perl | 64 lines | 48 code | 6 blank | 10 comment | 5 complexity | 8b32834c5ba17367acb889451c6d46c5 MD5 | raw file
 1#!/usr/bin/env perl
 2
 3use strict;
 4use warnings;
 5
 6##################################################################
 7# Select genes that are associated with the diseases listed in the
 8# disease ontology.
 9# ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page
10# gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/
11# Sept 2010, switch to doLite
12# input: build outfile sourceFileLoc.loc term or partial term
13##################################################################
14
15if (!@ARGV or @ARGV < 3) { 
16   print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n";
17   exit;
18}
19
20my $build = shift @ARGV;
21my $out = shift @ARGV;
22my $in = shift @ARGV;
23my $term = shift @ARGV;
24$term =~ s/^'//; #remove quotes protecting from shell
25$term =~ s/'$//; 
26my $data;
27open(LOC, $in) or die  "Couldn't open $in, $!\n";
28while (<LOC>) {
29   chomp;
30   if (/^\s*#/) { next; }
31   my @f = split(/\t/);
32   if ($f[0] eq $build) { 
33      if ($f[1] eq 'disease associated genes') { 
34         $data = $f[2]; 
35      }
36   }
37}
38close LOC or die "Couldn't close $in, $!\n";
39if (!$data) { 
40   print "Error $build not found in $in\n";
41   exit; 
42}
43if (!defined $term) { 
44   print "No disease term entered\n";
45   exit;
46}
47
48#start with just fuzzy term matches
49open(OUT, ">", $out) or die "Couldn't open $out, $!\n";
50open(FH, $data) or die "Couldn't open data file $data, $!\n";
51$term =~ s/\s+/|/g; #use OR between words
52while (<FH>) {
53   chomp;
54   my @f = split(/\t/); #chrom start end strand geneName geneID disease
55   if ($f[6] =~ /($term)/i) { 
56      print OUT join("\t", @f), "\n";
57   }elsif ($term eq 'disease') { #print all with disease
58      print OUT join("\t", @f), "\n";
59   }
60}
61close FH or die "Couldn't close data file $data, $!\n";
62close OUT or die "Couldn't close $out, $!\n";
63
64exit;