PageRenderTime 25ms CodeModel.GetById 18ms app.highlight 5ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/taxonomy/processTaxonomy.sh

https://bitbucket.org/cistrome/cistrome-harvard/
Shell | 18 lines | 18 code | 0 blank | 0 comment | 0 complexity | b8a83454ffaeb5202ebf5cd19d5e8fbd MD5 | raw file
 1echo "Getting files from NCBI..."
 2wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
 3wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_nucl.dmp.gz
 4wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_prot.dmp.gz
 5echo "Unzipping untarring..."
 6gunzip -c taxdump.tar.gz | tar xvf -
 7gunzip gi_taxid_nucl.dmp.gz
 8gunzip gi_taxid_prot.dmp.gz
 9cat gi_taxid_nucl.dmp gi_taxid_prot.dmp > gi_taxid_all.dmp
10echo "Sorting gi2tax files..."
11sort -n -k 1 gi_taxid_all.dmp > gi_taxid_sorted.txt
12rm gi_taxid_nucl.dmp gi_taxid_prot.dmp gi_taxid_all.dmp
13echo "Removing parenthesis from names.dmp"
14cat names.dmp | sed s/[\(\)\'\"]/_/g > names.temporary
15mv names.dmp names.dmp.orig
16mv names.temporary names.dmp 
17
18