PageRenderTime 33ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/taxonomy/processTaxonomy.sh

https://bitbucket.org/cistrome/cistrome-harvard/
Shell | 18 lines | 18 code | 0 blank | 0 comment | 0 complexity | b8a83454ffaeb5202ebf5cd19d5e8fbd MD5 | raw file
  1. echo "Getting files from NCBI..."
  2. wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
  3. wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_nucl.dmp.gz
  4. wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/gi_taxid_prot.dmp.gz
  5. echo "Unzipping untarring..."
  6. gunzip -c taxdump.tar.gz | tar xvf -
  7. gunzip gi_taxid_nucl.dmp.gz
  8. gunzip gi_taxid_prot.dmp.gz
  9. cat gi_taxid_nucl.dmp gi_taxid_prot.dmp > gi_taxid_all.dmp
  10. echo "Sorting gi2tax files..."
  11. sort -n -k 1 gi_taxid_all.dmp > gi_taxid_sorted.txt
  12. rm gi_taxid_nucl.dmp gi_taxid_prot.dmp gi_taxid_all.dmp
  13. echo "Removing parenthesis from names.dmp"
  14. cat names.dmp | sed s/[\(\)\'\"]/_/g > names.temporary
  15. mv names.dmp names.dmp.orig
  16. mv names.temporary names.dmp