/test/unit/bio/db/test_nexus.rb

https://github.com/domthu/bioruby · Ruby · 366 lines · 292 code · 58 blank · 16 comment · 4 complexity · b3a30ced63439669044c3a53ef9cb8fc MD5 · raw file

  1. #
  2. # = test/bio/db/nexus.rb - Unit test for Bio::Nexus
  3. #
  4. # Copyright:: Copyright (C) 2006 Christian M Zmasek <cmzmasek@yahoo.com>
  5. #
  6. # License:: The Ruby License
  7. #
  8. # $Id:$
  9. #
  10. # == Description
  11. #
  12. # This file contains unit tests for Bio::Nexus.
  13. #
  14. # loading helper routine for testing bioruby
  15. require 'pathname'
  16. load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3,
  17. 'bioruby_test_helper.rb')).cleanpath.to_s
  18. # libraries needed for the tests
  19. require 'test/unit'
  20. require 'bio/db/nexus'
  21. module Bio
  22. class TestNexus < Test::Unit::TestCase
  23. NEXUS_STRING_1 = <<-END_OF_NEXUS_STRING
  24. #NEXUS
  25. Begin Taxa;
  26. Dimensions [[comment]] ntax=4;
  27. TaxLabels "hag fish" [comment] 'african frog'
  28. [lots of different comment follow]
  29. [] [a] [[a]] [ a ] [[ a ]] [ [ a ] ] [a ]
  30. [[a ]] [ [a ] ] [ a] [[ a]] [ [ a] ] [ ]
  31. [[ ]] [ [ ] ] [ a b ] [[ a b ]] [ [ a b ] ]
  32. [x[ x [x[ x[[x[[xx[x[ x]] ]x ] []]][x]]x]]]
  33. [comment_1 comment_3] "rat snake" 'red
  34. mouse';
  35. End;
  36. [yet another comment End; ]
  37. Begin Characters;
  38. Dimensions nchar=20
  39. ntax=4;
  40. [ ntax=1000; ]
  41. Format DataType=DNA
  42. Missing=x
  43. Gap=- MatchChar=.;
  44. Matrix [comment]
  45. fish ACATA GAGGG
  46. TACCT CTAAG
  47. frog ACTTA GAGGC TACCT CTAGC
  48. snake ACTCA CTGGG TACCT TTGCG
  49. mouse ACTCA GACGG TACCT TTGCG;
  50. End;
  51. Begin Trees;
  52. [comment]
  53. Tree best=(fish,(frog,(snake,mo
  54. use)));
  55. [some long comment]
  56. Tree
  57. other=(snake,
  58. (frog,(fish,mo
  59. use
  60. )));
  61. End;
  62. Begin Trees;
  63. [comment]
  64. Tree worst=(A,(B,(C,D
  65. )));
  66. Tree bad=(a,
  67. (b,(c , d
  68. )
  69. ) );
  70. End;
  71. Begin Distances;
  72. Dimensions nchar=20 ntax=5;
  73. Format Triangle=Both;
  74. Matrix
  75. taxon_1 0.0 1.0
  76. 2.0 4.0 7.0
  77. taxon_2 1.0
  78. 0.0 3.0 5.0 8.0
  79. taxon_3 3.0 4.0 0.0 6.0 9.0
  80. taxon_4 7.0 3.0 2.0 0.0 9.5
  81. taxon_5 1.2 1.3 1.4 1.5 0.0;
  82. End;
  83. Begin Data;
  84. Dimensions ntax=5 nchar=14;
  85. Format Datatype=RNA gap=# MISSING=x MatchChar=^;
  86. TaxLabels ciona
  87. cow [comment1 commentX] ape
  88. 'purple urchin' "green lizard";
  89. Matrix
  90. [ comment [old comment] ]
  91. taxon_1 A- CCGTCGA-GTTA
  92. taxon_2 T- CCG-CGA-GATC
  93. taxon_3 A- C-GTCGA-GATG
  94. taxon_4 A- C C TC G
  95. A - -G T T
  96. T
  97. taxon_5
  98. T-CGGTCGT-CTTA;
  99. End;
  100. Begin Private1;
  101. Something foo=5 bar=20;
  102. Format Datatype=DNA;
  103. Matrix
  104. taxon_1 1111 1111111111
  105. taxon_2 2222 2222222222
  106. taxon_3 3333 3333333333
  107. taxon_4 4444 4444444444
  108. taxon_5 5555 5555555555;
  109. End;
  110. Begin Private1;
  111. some [boring]
  112. interesting [
  113. outdated
  114. ] data be here
  115. End;
  116. END_OF_NEXUS_STRING
  117. DATA_BLOCK_OUTPUT_STRING = <<-DATA_BLOCK_OUTPUT_STRING
  118. Begin Data;
  119. Dimensions NTax=5 NChar=14;
  120. Format DataType=RNA Missing=x Gap=# MatchChar=^;
  121. TaxLabels ciona cow ape purple_urchin green_lizard;
  122. Matrix
  123. taxon_1 A-CCGTCGA-GTTA
  124. taxon_2 T-CCG-CGA-GATC
  125. taxon_3 A-C-GTCGA-GATG
  126. taxon_4 A-CCTCGA--GTTT
  127. taxon_5 T-CGGTCGT-CTTA;
  128. End;
  129. DATA_BLOCK_OUTPUT_STRING
  130. def test_nexus
  131. nexus = Bio::Nexus.new( NEXUS_STRING_1 )
  132. blocks = nexus.get_blocks
  133. assert_equal( 8, blocks.size )
  134. private_blocks = nexus.get_blocks_by_name( "private1" )
  135. data_blocks = nexus.get_data_blocks
  136. character_blocks = nexus.get_characters_blocks
  137. trees_blocks = nexus.get_trees_blocks
  138. distances_blocks = nexus.get_distances_blocks
  139. taxa_blocks = nexus.get_taxa_blocks
  140. assert_equal( 2, private_blocks.size )
  141. assert_equal( 1, data_blocks.size )
  142. assert_equal( 1, character_blocks.size )
  143. assert_equal( 2, trees_blocks.size )
  144. assert_equal( 1, distances_blocks.size )
  145. assert_equal( 1, taxa_blocks.size )
  146. taxa_block = taxa_blocks[ 0 ]
  147. assert_equal( taxa_block.get_number_of_taxa.to_i , 4 )
  148. assert_equal( taxa_block.get_taxa[ 0 ], "hag_fish" )
  149. assert_equal( taxa_block.get_taxa[ 1 ], "african_frog" )
  150. assert_equal( taxa_block.get_taxa[ 2 ], "rat_snake" )
  151. assert_equal( taxa_block.get_taxa[ 3 ], "red_mouse" )
  152. chars_block = character_blocks[ 0 ]
  153. assert_equal( chars_block.get_number_of_taxa.to_i, 4 )
  154. assert_equal( chars_block.get_number_of_characters.to_i, 20 )
  155. assert_equal( chars_block.get_datatype, "DNA" )
  156. assert_equal( chars_block.get_match_character, "." )
  157. assert_equal( chars_block.get_missing, "x" )
  158. assert_equal( chars_block.get_gap_character, "-" )
  159. assert_equal( chars_block.get_matrix.get_value( 0, 0 ), "fish" )
  160. assert_equal( chars_block.get_matrix.get_value( 1, 0 ), "frog" )
  161. assert_equal( chars_block.get_matrix.get_value( 2, 0 ), "snake" )
  162. assert_equal( chars_block.get_matrix.get_value( 3, 0 ), "mouse" )
  163. assert_equal( chars_block.get_matrix.get_value( 0, 20 ), "G" )
  164. assert_equal( chars_block.get_matrix.get_value( 1, 20 ), "C" )
  165. assert_equal( chars_block.get_matrix.get_value( 2, 20 ), "G" )
  166. assert_equal( chars_block.get_matrix.get_value( 3, 20 ), "G" )
  167. assert_equal( chars_block.get_characters_strings_by_name( "fish" )[ 0 ], "ACATAGAGGGTACCTCTAAG" )
  168. assert_equal( chars_block.get_characters_strings_by_name( "frog" )[ 0 ], "ACTTAGAGGCTACCTCTAGC" )
  169. assert_equal( chars_block.get_characters_strings_by_name( "snake" )[ 0 ], "ACTCACTGGGTACCTTTGCG" )
  170. assert_equal( chars_block.get_characters_strings_by_name( "mouse" )[ 0 ], "ACTCAGACGGTACCTTTGCG" )
  171. assert_equal( chars_block.get_characters_string( 0 ), "ACATAGAGGGTACCTCTAAG" )
  172. assert_equal( chars_block.get_characters_string( 1 ), "ACTTAGAGGCTACCTCTAGC" )
  173. assert_equal( chars_block.get_characters_string( 2 ), "ACTCACTGGGTACCTTTGCG" )
  174. assert_equal( chars_block.get_characters_string( 3 ), "ACTCAGACGGTACCTTTGCG" )
  175. assert_equal( chars_block.get_row_name( 1 ), "frog" )
  176. assert_equal( chars_block.get_sequences_by_name( "fish" )[ 0 ].seq.to_s.downcase, "ACATAGAGGGTACCTCTAAG".downcase )
  177. assert_equal( chars_block.get_sequences_by_name( "frog" )[ 0 ].seq.to_s.downcase, "ACTTAGAGGCTACCTCTAGC".downcase )
  178. assert_equal( chars_block.get_sequences_by_name( "snake" )[ 0 ].seq.to_s.downcase, "ACTCACTGGGTACCTTTGCG".downcase )
  179. assert_equal( chars_block.get_sequences_by_name( "mouse" )[ 0 ].seq.to_s.downcase, "ACTCAGACGGTACCTTTGCG".downcase )
  180. assert_equal( chars_block.get_sequences_by_name( "fish" )[ 0 ].definition, "fish" )
  181. assert_equal( chars_block.get_sequences_by_name( "frog" )[ 0 ].definition, "frog" )
  182. assert_equal( chars_block.get_sequences_by_name( "snake" )[ 0 ].definition, "snake" )
  183. assert_equal( chars_block.get_sequences_by_name( "mouse" )[ 0 ].definition, "mouse" )
  184. assert_equal( chars_block.get_sequence( 0 ).seq.to_s.downcase, "ACATAGAGGGTACCTCTAAG".downcase )
  185. assert_equal( chars_block.get_sequence( 1 ).seq.to_s.downcase, "ACTTAGAGGCTACCTCTAGC".downcase )
  186. assert_equal( chars_block.get_sequence( 2 ).seq.to_s.downcase, "ACTCACTGGGTACCTTTGCG".downcase )
  187. assert_equal( chars_block.get_sequence( 3 ).seq.to_s.downcase, "ACTCAGACGGTACCTTTGCG".downcase )
  188. assert_equal( chars_block.get_sequence( 0 ).definition, "fish" )
  189. assert_equal( chars_block.get_sequence( 1 ).definition, "frog" )
  190. assert_equal( chars_block.get_sequence( 2 ).definition, "snake" )
  191. assert_equal( chars_block.get_sequence( 3 ).definition, "mouse" )
  192. tree_block_0 = trees_blocks[ 0 ]
  193. tree_block_1 = trees_blocks[ 1 ]
  194. assert_equal( tree_block_0.get_tree_names[ 0 ], "best" )
  195. assert_equal( tree_block_0.get_tree_names[ 1 ], "other" )
  196. assert_equal( tree_block_0.get_tree_strings_by_name( "best" )[ 0 ], "(fish,(frog,(snake,mouse)));" )
  197. assert_equal( tree_block_0.get_tree_strings_by_name( "other" )[ 0 ], "(snake,(frog,(fish,mouse)));" )
  198. best_tree = tree_block_0.get_trees_by_name( "best" )[ 0 ]
  199. other_tree = tree_block_0.get_trees_by_name( "other" )[ 0 ]
  200. worst_tree = tree_block_1.get_tree( 0 )
  201. bad_tree = tree_block_1.get_tree( 1 )
  202. assert_equal( 6, best_tree.descendents( best_tree.root ).size )
  203. assert_equal( 4, best_tree.leaves.size)
  204. assert_equal( 6, other_tree.descendents( other_tree.root ).size )
  205. assert_equal( 4, other_tree.leaves.size)
  206. fish_leaf_best = best_tree.nodes.find { |x| x.name == 'fish' }
  207. assert_equal( 1, best_tree.ancestors( fish_leaf_best ).size )
  208. fish_leaf_other = other_tree.nodes.find { |x| x.name == 'fish' }
  209. assert_equal( 3, other_tree.ancestors( fish_leaf_other ).size )
  210. a_leaf_worst = worst_tree.nodes.find { |x| x.name == 'A' }
  211. assert_equal( 1, worst_tree.ancestors( a_leaf_worst ).size )
  212. c_leaf_bad = bad_tree.nodes.find { |x| x.name == 'c' }
  213. assert_equal( 3, bad_tree.ancestors( c_leaf_bad ).size )
  214. dist_block = distances_blocks[ 0 ]
  215. assert_equal( dist_block.get_number_of_taxa.to_i, 5 )
  216. assert_equal( dist_block.get_number_of_characters.to_i, 20 )
  217. assert_equal( dist_block.get_triangle, "Both" )
  218. assert_equal( dist_block.get_matrix.get_value( 0, 0 ), "taxon_1" )
  219. assert_equal( dist_block.get_matrix.get_value( 1, 0 ), "taxon_2" )
  220. assert_equal( dist_block.get_matrix.get_value( 2, 0 ), "taxon_3" )
  221. assert_equal( dist_block.get_matrix.get_value( 3, 0 ), "taxon_4" )
  222. assert_equal( dist_block.get_matrix.get_value( 4, 0 ), "taxon_5" )
  223. assert_equal( dist_block.get_matrix.get_value( 0, 5 ).to_f, 7.0 )
  224. assert_equal( dist_block.get_matrix.get_value( 1, 5 ).to_f, 8.0 )
  225. assert_equal( dist_block.get_matrix.get_value( 2, 5 ).to_f, 9.0 )
  226. assert_equal( dist_block.get_matrix.get_value( 3, 5 ).to_f, 9.5 )
  227. assert_equal( dist_block.get_matrix.get_value( 4, 5 ).to_f, 0.0 )
  228. data_block = data_blocks[ 0 ]
  229. assert_equal( data_block.get_number_of_taxa.to_i, 5 )
  230. assert_equal( data_block.get_number_of_characters.to_i, 14 )
  231. assert_equal( data_block.get_datatype, "RNA" )
  232. assert_equal( data_block.get_match_character, "^" )
  233. assert_equal( data_block.get_missing, "x" )
  234. assert_equal( data_block.get_gap_character, "#" )
  235. assert_equal( data_block.get_matrix.get_value( 0, 0 ), "taxon_1" )
  236. assert_equal( data_block.get_matrix.get_value( 1, 0 ), "taxon_2" )
  237. assert_equal( data_block.get_matrix.get_value( 2, 0 ), "taxon_3" )
  238. assert_equal( data_block.get_matrix.get_value( 3, 0 ), "taxon_4" )
  239. assert_equal( data_block.get_matrix.get_value( 4, 0 ), "taxon_5" )
  240. assert_equal( data_block.get_matrix.get_value( 0, 14 ), "A" )
  241. assert_equal( data_block.get_matrix.get_value( 1, 14 ), "C" )
  242. assert_equal( data_block.get_matrix.get_value( 2, 14 ), "G" )
  243. assert_equal( data_block.get_matrix.get_value( 3, 14 ), "T" )
  244. assert_equal( data_block.get_matrix.get_value( 4, 14 ), "A" )
  245. assert_equal( data_block.get_taxa[ 0 ], "ciona" )
  246. assert_equal( data_block.get_taxa[ 1 ], "cow" )
  247. assert_equal( data_block.get_taxa[ 2 ], "ape" )
  248. assert_equal( data_block.get_taxa[ 3 ], "purple_urchin" )
  249. assert_equal( data_block.get_taxa[ 4 ], "green_lizard" )
  250. assert_equal( data_block.get_characters_strings_by_name( "taxon_1" )[ 0 ], "A-CCGTCGA-GTTA" )
  251. assert_equal( data_block.get_characters_strings_by_name( "taxon_2" )[ 0 ], "T-CCG-CGA-GATC" )
  252. assert_equal( data_block.get_characters_strings_by_name( "taxon_3" )[ 0 ], "A-C-GTCGA-GATG" )
  253. assert_equal( data_block.get_characters_strings_by_name( "taxon_4" )[ 0 ], "A-CCTCGA--GTTT" )
  254. assert_equal( data_block.get_characters_strings_by_name( "taxon_5" )[ 0 ], "T-CGGTCGT-CTTA" )
  255. assert_equal( data_block.get_characters_string( 0 ), "A-CCGTCGA-GTTA" )
  256. assert_equal( data_block.get_characters_string( 1 ), "T-CCG-CGA-GATC" )
  257. assert_equal( data_block.get_characters_string( 2 ), "A-C-GTCGA-GATG" )
  258. assert_equal( data_block.get_characters_string( 3 ), "A-CCTCGA--GTTT" )
  259. assert_equal( data_block.get_characters_string( 4 ), "T-CGGTCGT-CTTA" )
  260. assert_equal( data_block.get_row_name( 0 ), "taxon_1" )
  261. assert_equal( data_block.get_row_name( 1 ), "taxon_2" )
  262. assert_equal( data_block.get_row_name( 2 ), "taxon_3" )
  263. assert_equal( data_block.get_row_name( 3 ), "taxon_4" )
  264. assert_equal( data_block.get_row_name( 4 ), "taxon_5" )
  265. assert_equal( data_block.get_sequences_by_name( "taxon_1" )[ 0 ].seq.to_s.downcase, "A-CCGTCGA-GTTA".downcase )
  266. assert_equal( data_block.get_sequences_by_name( "taxon_2" )[ 0 ].seq.to_s.downcase, "T-CCG-CGA-GATC".downcase )
  267. assert_equal( data_block.get_sequences_by_name( "taxon_3" )[ 0 ].seq.to_s.downcase, "A-C-GTCGA-GATG".downcase )
  268. assert_equal( data_block.get_sequences_by_name( "taxon_4" )[ 0 ].seq.to_s.downcase, "A-CCTCGA--GTTT".downcase )
  269. assert_equal( data_block.get_sequences_by_name( "taxon_5" )[ 0 ].seq.to_s.downcase, "T-CGGTCGT-CTTA".downcase )
  270. assert_equal( data_block.get_sequences_by_name( "taxon_1" )[ 0 ].definition, "taxon_1" )
  271. assert_equal( data_block.get_sequences_by_name( "taxon_2" )[ 0 ].definition, "taxon_2" )
  272. assert_equal( data_block.get_sequences_by_name( "taxon_3" )[ 0 ].definition, "taxon_3" )
  273. assert_equal( data_block.get_sequences_by_name( "taxon_4" )[ 0 ].definition, "taxon_4" )
  274. assert_equal( data_block.get_sequences_by_name( "taxon_5" )[ 0 ].definition, "taxon_5" )
  275. assert_equal( data_block.get_sequence( 0 ).seq.to_s.downcase, "A-CCGTCGA-GTTA".downcase )
  276. assert_equal( data_block.get_sequence( 1 ).seq.to_s.downcase, "T-CCG-CGA-GATC".downcase )
  277. assert_equal( data_block.get_sequence( 2 ).seq.to_s.downcase, "A-C-GTCGA-GATG".downcase )
  278. assert_equal( data_block.get_sequence( 3 ).seq.to_s.downcase, "A-CCTCGA--GTTT".downcase )
  279. assert_equal( data_block.get_sequence( 4 ).seq.to_s.downcase, "T-CGGTCGT-CTTA".downcase )
  280. assert_equal( data_block.get_sequence( 0 ).definition, "taxon_1" )
  281. assert_equal( data_block.get_sequence( 1 ).definition, "taxon_2" )
  282. assert_equal( data_block.get_sequence( 2 ).definition, "taxon_3" )
  283. assert_equal( data_block.get_sequence( 3 ).definition, "taxon_4" )
  284. assert_equal( data_block.get_sequence( 4 ).definition, "taxon_5" )
  285. assert_equal( DATA_BLOCK_OUTPUT_STRING, data_block.to_nexus() )
  286. generic_0 = private_blocks[ 0 ]
  287. generic_1 = private_blocks[ 1 ]
  288. assert_equal( generic_0.get_tokens[ 0 ], "Something" )
  289. assert_equal( generic_0.get_tokens[ 1 ], "foo" )
  290. assert_equal( generic_0.get_tokens[ 2 ], "5" )
  291. assert_equal( generic_0.get_tokens[ 3 ], "bar" )
  292. assert_equal( generic_0.get_tokens[ 4 ], "20" )
  293. assert_equal( generic_0.get_tokens[ 5 ], "Format" )
  294. assert_equal( generic_0.get_tokens[ 6 ], "Datatype" )
  295. assert_equal( generic_0.get_tokens[ 7 ], "DNA" )
  296. assert_equal( generic_0.get_tokens[ 8 ], "Matrix" )
  297. assert_equal( generic_0.get_tokens[ 9 ], "taxon_1" )
  298. assert_equal( generic_0.get_tokens[10 ], "1111" )
  299. assert_equal( generic_1.get_tokens[ 0 ], "some" )
  300. assert_equal( generic_1.get_tokens[ 1 ], "interesting" )
  301. assert_equal( generic_1.get_tokens[ 2 ], "data" )
  302. assert_equal( generic_1.get_tokens[ 3 ], "be" )
  303. assert_equal( generic_1.get_tokens[ 4 ], "here" )
  304. end # test_nexus
  305. end # class TestNexus
  306. end # module Bio