PageRenderTime 53ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/test/unit/bio/db/test_gff.rb

https://github.com/nmb/bioruby
Ruby | 1255 lines | 1143 code | 89 blank | 23 comment | 7 complexity | 3231fd8e96db98291db4b49d4736378b MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1
  1. #
  2. # test/unit/bio/db/test_gff.rb - Unit test for Bio::GFF
  3. #
  4. # Copyright:: Copyright (C) 2005, 2008
  5. # Mitsuteru Nakao <n@bioruby.org>
  6. # Naohisa Goto <ng@bioruby.org>
  7. # License:: The Ruby License
  8. #
  9. # $Id:$
  10. #
  11. # loading helper routine for testing bioruby
  12. require 'pathname'
  13. load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3,
  14. 'bioruby_test_helper.rb')).cleanpath.to_s
  15. # libraries needed for the tests
  16. require 'test/unit'
  17. require 'digest/sha1'
  18. require 'bio/db/gff'
  19. module Bio
  20. class TestGFF < Test::Unit::TestCase
  21. def setup
  22. data = <<END_OF_DATA
  23. I sgd CEN 151453 151591 . + . CEN "CEN1" ; Note "CEN1\; Chromosome I Centromere"
  24. I sgd gene 147591 151163 . - . Gene "TFC3" ; Note "transcription factor tau (TFIIIC) subunit 138"
  25. I sgd gene 147591 151163 . - . Gene "FUN24" ; Note "transcription factor tau (TFIIIC) subunit 138"
  26. I sgd gene 147591 151163 . - . Gene "TSV115" ; Note "transcription factor tau (TFIIIC) subunit 138"
  27. I sgd ORF 147591 151163 . - . ORF "YAL001C" ; Note "TFC3\; transcription factor tau (TFIIIC) subunit 138"
  28. I sgd gene 143998 147528 . + . Gene "VPS8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
  29. I sgd gene 143998 147528 . + . Gene "FUN15" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
  30. I sgd gene 143998 147528 . + . Gene "VPT8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
  31. END_OF_DATA
  32. @obj = Bio::GFF.new(data)
  33. end
  34. def test_records
  35. assert_equal(8, @obj.records.size)
  36. end
  37. def test_record_class
  38. assert_equal(Bio::GFF::Record, @obj.records[0].class)
  39. end
  40. end # class TestGFF
  41. class TestGFFRecord < Test::Unit::TestCase
  42. def setup
  43. data =<<END_OF_DATA
  44. I sgd gene 151453 151591 . + . Gene "CEN1" ; Note "Chromosome I Centromere"
  45. END_OF_DATA
  46. @obj = Bio::GFF::Record.new(data)
  47. end
  48. def test_seqname
  49. assert_equal('I', @obj.seqname)
  50. end
  51. def test_source
  52. assert_equal('sgd', @obj.source)
  53. end
  54. def test_feature
  55. assert_equal('gene', @obj.feature)
  56. end
  57. def test_start
  58. assert_equal('151453', @obj.start)
  59. end
  60. def test_end
  61. assert_equal('151591', @obj.end)
  62. end
  63. def test_score
  64. assert_equal('.', @obj.score)
  65. end
  66. def test_strand
  67. assert_equal('+', @obj.strand)
  68. end
  69. def test_frame
  70. assert_equal('.', @obj.frame)
  71. end
  72. def test_attributes
  73. at = {"Note"=>'"Chromosome I Centromere"', "Gene"=>'"CEN1"'}
  74. assert_equal(at, @obj.attributes)
  75. end
  76. def test_comment
  77. assert_equal(nil, @obj.comment)
  78. end
  79. end # class TestGFFRecord
  80. class TestGFFRecordConstruct < Test::Unit::TestCase
  81. def setup
  82. @obj = Bio::GFF.new
  83. end
  84. def test_add_seqname
  85. name = "test"
  86. record = Bio::GFF::Record.new("")
  87. record.seqname = name
  88. @obj.records << record
  89. assert_equal(name, @obj.records[0].seqname)
  90. end
  91. end # class TestGFFRecordConstruct
  92. class TestGFF2 < Test::Unit::TestCase
  93. def setup
  94. data = <<END_OF_DATA
  95. ##gff-version 2
  96. ##date 2008-09-22
  97. I sgd CEN 151453 151591 . + . CEN "CEN1" ; Note "CEN1; Chromosome I Centromere"
  98. I sgd gene 147591 151163 . - . Gene "TFC3" ; Note "transcription factor tau (TFIIIC) subunit 138"
  99. I sgd gene 147591 151163 . - . Gene "FUN24" ; Note "transcription factor tau (TFIIIC) subunit 138"
  100. I sgd gene 147591 151163 . - . Gene "TSV115" ; Note "transcription factor tau (TFIIIC) subunit 138"
  101. I sgd ORF 147591 151163 . - . ORF "YAL001C" ; Note "TFC3; transcription factor tau (TFIIIC) subunit 138"
  102. I sgd gene 143998 147528 . + . Gene "VPS8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
  103. I sgd gene 143998 147528 . + . Gene "FUN15" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
  104. I sgd gene 143998 147528 . + . Gene "VPT8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
  105. END_OF_DATA
  106. @obj = Bio::GFF::GFF2.new(data)
  107. end
  108. def test_const_version
  109. assert_equal(2, Bio::GFF::GFF2::VERSION)
  110. end
  111. def test_gff_version
  112. assert_equal('2', @obj.gff_version)
  113. end
  114. def test_metadata_size
  115. assert_equal(1, @obj.metadata.size)
  116. end
  117. def test_metadata
  118. assert_equal(Bio::GFF::GFF2::MetaData.new('date', '2008-09-22'),
  119. @obj.metadata[0])
  120. end
  121. def test_records_size
  122. assert_equal(8, @obj.records.size)
  123. end
  124. def test_to_s
  125. str = <<END_OF_DATA
  126. ##gff-version 2
  127. ##date 2008-09-22
  128. I sgd CEN 151453 151591 . + . CEN CEN1 ; Note "CEN1; Chromosome I Centromere"
  129. I sgd gene 147591 151163 . - . Gene TFC3 ; Note "transcription factor tau (TFIIIC) subunit 138"
  130. I sgd gene 147591 151163 . - . Gene FUN24 ; Note "transcription factor tau (TFIIIC) subunit 138"
  131. I sgd gene 147591 151163 . - . Gene TSV115 ; Note "transcription factor tau (TFIIIC) subunit 138"
  132. I sgd ORF 147591 151163 . - . ORF YAL001C ; Note "TFC3; transcription factor tau (TFIIIC) subunit 138"
  133. I sgd gene 143998 147528 . + . Gene VPS8 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
  134. I sgd gene 143998 147528 . + . Gene FUN15 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
  135. I sgd gene 143998 147528 . + . Gene VPT8 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
  136. END_OF_DATA
  137. assert_equal(str, @obj.to_s)
  138. end
  139. end #class TestGFF2
  140. class TestGFF2Record < Test::Unit::TestCase
  141. def setup
  142. str = "seq1\tBLASTX\tsimilarity\t101\t235\t87.1\t+\t0\tTarget \"HBA_HUMAN\" 11 55 ; E_value 0.0003 ; Align 101 11 ; Align 179 36 ; Comment \"Please ignore this \\\"Comment\\\" attribute; Escape \\x1a\\037 and \\\\\\t\\r\\n\\f\\b\\a\\e\\v; This is test.\" 123 4.56e-34 \"Test for freetext\" ; Note \"\"; Misc IdString; Misc \"free text\"; Misc 5678 "
  143. @obj = Bio::GFF::GFF2::Record.new(str)
  144. end
  145. def test_to_s
  146. str = "seq1\tBLASTX\tsimilarity\t101\t235\t87.1\t+\t0\tTarget HBA_HUMAN 11 55 ; E_value 0.0003 ; Align 101 11 ; Align 179 36 ; Comment \"Please ignore this \\\"Comment\\\" attribute; Escape \\032\\037 and \\\\\\t\\r\\n\\f\\b\\a\\e\\v; This is test.\" 123 4.56e-34 \"Test for freetext\" ; Note \"\" ; Misc IdString ; Misc \"free text\" ; Misc 5678\n"
  147. assert_equal(str, @obj.to_s)
  148. end
  149. def test_eqeq
  150. obj2 = Bio::GFF::GFF2::Record.new(@obj.to_s)
  151. assert_equal(true, @obj == obj2)
  152. end
  153. def test_eqeq_false
  154. obj2 = Bio::GFF::GFF2::Record.new(@obj.to_s)
  155. obj2.seqname = 'seq2'
  156. assert_equal(false, @obj == obj2)
  157. end
  158. def test_comment_only?
  159. assert_equal(false, @obj.comment_only?)
  160. end
  161. def test_seqname
  162. assert_equal('seq1', @obj.seqname)
  163. end
  164. def test_source
  165. assert_equal('BLASTX', @obj.source)
  166. end
  167. def test_feature
  168. assert_equal('similarity', @obj.feature)
  169. end
  170. def test_start
  171. assert_equal(101, @obj.start)
  172. end
  173. def test_end
  174. assert_equal(235, @obj.end)
  175. end
  176. def test_score
  177. assert_equal(87.1, @obj.score)
  178. end
  179. def test_strand
  180. assert_equal('+', @obj.strand)
  181. end
  182. def test_frame
  183. assert_equal(0, @obj.frame)
  184. end
  185. def test_attributes_to_hash
  186. hash = {
  187. 'Target' =>
  188. Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']),
  189. 'E_value' => '0.0003',
  190. 'Align' =>
  191. Bio::GFF::GFF2::Record::Value.new(['101', '11']),
  192. 'Comment' =>
  193. Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]),
  194. 'Note' => '',
  195. 'Misc' => 'IdString'
  196. }
  197. assert_equal(hash, @obj.attributes_to_hash)
  198. end
  199. def test_attributes
  200. attributes =
  201. [ [ 'Target',
  202. Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) ],
  203. [ 'E_value', '0.0003' ],
  204. [ 'Align',
  205. Bio::GFF::GFF2::Record::Value.new(['101', '11']) ],
  206. [ 'Align',
  207. Bio::GFF::GFF2::Record::Value.new(['179', '36']) ],
  208. [ 'Comment',
  209. Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) ],
  210. [ 'Note', '' ],
  211. [ 'Misc', 'IdString' ],
  212. [ 'Misc', 'free text' ],
  213. [ 'Misc', '5678' ]
  214. ]
  215. assert_equal(attributes, @obj.attributes)
  216. end
  217. def test_attribute
  218. val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
  219. assert_equal(val_Target, @obj.attribute('Target'))
  220. assert_equal('0.0003', @obj.attribute('E_value'))
  221. val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
  222. val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
  223. assert_equal(val_Align0, @obj.attribute('Align'))
  224. val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
  225. assert_equal(val_Comment, @obj.attribute('Comment'))
  226. assert_equal('', @obj.attribute('Note'))
  227. assert_equal('IdString', @obj.attribute('Misc'))
  228. end
  229. def test_attribute_nonexistent
  230. assert_equal(nil, @obj.attribute('NonExistent'))
  231. end
  232. def test_get_attribute
  233. val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
  234. assert_equal(val_Target, @obj.get_attribute('Target'))
  235. assert_equal('0.0003', @obj.get_attribute('E_value'))
  236. val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
  237. val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
  238. assert_equal(val_Align0, @obj.get_attribute('Align'))
  239. val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
  240. assert_equal(val_Comment, @obj.get_attribute('Comment'))
  241. assert_equal('', @obj.get_attribute('Note'))
  242. assert_equal('IdString', @obj.get_attribute('Misc'))
  243. end
  244. def test_get_attribute_nonexistent
  245. assert_equal(nil, @obj.get_attribute('NonExistent'))
  246. end
  247. def test_get_attributes
  248. val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
  249. assert_equal([ val_Target ], @obj.get_attributes('Target'))
  250. assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
  251. val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
  252. val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
  253. assert_equal([ val_Align0, val_Align1 ],
  254. @obj.get_attributes('Align'))
  255. val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
  256. assert_equal([ val_Comment ], @obj.get_attributes('Comment'))
  257. assert_equal([ '' ], @obj.get_attributes('Note'))
  258. assert_equal([ 'IdString', 'free text', '5678' ],
  259. @obj.get_attributes('Misc'))
  260. end
  261. def test_get_attributes_nonexistent
  262. assert_equal([], @obj.get_attributes('NonExistent'))
  263. end
  264. def test_set_attribute
  265. assert_equal('0.0003', @obj.attribute('E_value'))
  266. assert_equal('1e-10', @obj.set_attribute('E_value', '1e-10'))
  267. assert_equal('1e-10', @obj.attribute('E_value'))
  268. end
  269. def test_set_attribute_multiple
  270. assert_equal([ 'IdString', 'free text', '5678' ],
  271. @obj.get_attributes('Misc'))
  272. assert_equal('Replaced',
  273. @obj.set_attribute('Misc', 'Replaced'))
  274. assert_equal([ 'Replaced', 'free text', '5678' ],
  275. @obj.get_attributes('Misc'))
  276. end
  277. def test_set_attribute_nonexistent
  278. assert_equal(nil, @obj.attribute('NonExistent'))
  279. assert_equal('test', @obj.set_attribute('NonExistent', 'test'))
  280. assert_equal('test', @obj.attribute('NonExistent'))
  281. end
  282. def test_replace_attributes
  283. assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
  284. assert_equal(@obj, @obj.replace_attributes('E_value', '1e-10'))
  285. assert_equal([ '1e-10' ], @obj.get_attributes('E_value'))
  286. end
  287. def test_replace_attributes_single_multiple
  288. assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
  289. assert_equal(@obj, @obj.replace_attributes('E_value',
  290. '1e-10', '3.14', '2.718'))
  291. assert_equal([ '1e-10', '3.14', '2.718' ],
  292. @obj.get_attributes('E_value'))
  293. end
  294. def test_replace_attributes_multiple_single
  295. assert_equal([ 'IdString', 'free text', '5678' ],
  296. @obj.get_attributes('Misc'))
  297. assert_equal(@obj,
  298. @obj.replace_attributes('Misc', 'Replaced_All'))
  299. assert_equal([ 'Replaced_All' ],
  300. @obj.get_attributes('Misc'))
  301. end
  302. def test_replace_attributes_multiple_multiple_two
  303. assert_equal([ 'IdString', 'free text', '5678' ],
  304. @obj.get_attributes('Misc'))
  305. assert_equal(@obj,
  306. @obj.replace_attributes('Misc',
  307. 'Replaced', 'test2'))
  308. assert_equal([ 'Replaced', 'test2' ],
  309. @obj.get_attributes('Misc'))
  310. end
  311. def test_replace_attributes_multiple_multiple_same
  312. assert_equal([ 'IdString', 'free text', '5678' ],
  313. @obj.get_attributes('Misc'))
  314. assert_equal(@obj,
  315. @obj.replace_attributes('Misc',
  316. 'Replaced', 'test2', 'test3'))
  317. assert_equal([ 'Replaced', 'test2', 'test3' ],
  318. @obj.get_attributes('Misc'))
  319. end
  320. def test_replace_attributes_multiple_multiple_over
  321. assert_equal([ 'IdString', 'free text', '5678' ],
  322. @obj.get_attributes('Misc'))
  323. assert_equal(@obj,
  324. @obj.replace_attributes('Misc',
  325. 'Replaced', 'test2', 'test3', '4'))
  326. assert_equal([ 'Replaced', 'test2', 'test3', '4' ],
  327. @obj.get_attributes('Misc'))
  328. end
  329. def test_replace_attributes_nonexistent
  330. assert_equal(nil, @obj.attribute('NonExistent'))
  331. assert_equal(@obj, @obj.replace_attributes('NonExistent', 'test'))
  332. assert_equal([ 'test' ], @obj.get_attributes('NonExistent'))
  333. end
  334. def test_replace_attributes_nonexistent_multiple
  335. assert_equal(nil, @obj.attribute('NonExistent'))
  336. assert_equal(@obj,
  337. @obj.replace_attributes('NonExistent',
  338. 'test', 'gff2', 'attr'))
  339. assert_equal([ 'test', 'gff2', 'attr' ],
  340. @obj.get_attributes('NonExistent'))
  341. end
  342. def test_delete_attribute
  343. assert_equal('0.0003', @obj.attribute('E_value'))
  344. assert_equal('0.0003', @obj.delete_attribute('E_value', '0.0003'))
  345. assert_equal(nil, @obj.attribute('E_value'))
  346. end
  347. def test_delete_attribute_nil
  348. assert_equal('0.0003', @obj.attribute('E_value'))
  349. assert_equal(nil, @obj.delete_attribute('E_value', '3'))
  350. assert_equal('0.0003', @obj.attribute('E_value'))
  351. end
  352. def test_delete_attribute_multiple
  353. assert_equal([ 'IdString', 'free text', '5678' ],
  354. @obj.get_attributes('Misc'))
  355. assert_equal('free text',
  356. @obj.delete_attribute('Misc', 'free text'))
  357. assert_equal([ 'IdString', '5678' ],
  358. @obj.get_attributes('Misc'))
  359. end
  360. def test_delete_attribute_multiple2
  361. assert_equal([ 'IdString', 'free text', '5678' ],
  362. @obj.get_attributes('Misc'))
  363. assert_equal('IdString',
  364. @obj.delete_attribute('Misc', 'IdString'))
  365. assert_equal([ 'free text', '5678' ],
  366. @obj.get_attributes('Misc'))
  367. assert_equal('5678',
  368. @obj.delete_attribute('Misc', '5678'))
  369. assert_equal([ 'free text' ],
  370. @obj.get_attributes('Misc'))
  371. end
  372. def test_delete_attribute_multiple_nil
  373. assert_equal([ 'IdString', 'free text', '5678' ],
  374. @obj.get_attributes('Misc'))
  375. assert_equal(nil,
  376. @obj.delete_attribute('Misc', 'test'))
  377. assert_equal([ 'IdString', 'free text', '5678' ],
  378. @obj.get_attributes('Misc'))
  379. end
  380. def test_delete_attribute_nonexistent
  381. assert_equal(nil, @obj.attribute('NonExistent'))
  382. assert_equal(nil, @obj.delete_attribute('NonExistent', 'test'))
  383. assert_equal([], @obj.get_attributes('NonExistent'))
  384. end
  385. def test_delete_attributes
  386. assert_equal('0.0003', @obj.attribute('E_value'))
  387. assert_equal(@obj, @obj.delete_attributes('E_value'))
  388. assert_equal(nil, @obj.attribute('E_value'))
  389. end
  390. def test_delete_attributes_multiple
  391. assert_equal([ 'IdString', 'free text', '5678' ],
  392. @obj.get_attributes('Misc'))
  393. assert_equal(@obj, @obj.delete_attributes('Misc'))
  394. assert_equal([], @obj.get_attributes('Misc'))
  395. end
  396. def test_delete_attributes_nonexistent
  397. assert_equal(nil, @obj.attribute('NonExistent'))
  398. assert_equal(nil, @obj.delete_attributes('NonExistent'))
  399. assert_equal([], @obj.get_attributes('NonExistent'))
  400. end
  401. def test_sort_attributes_by_tag!
  402. tags = %w( Comment Align E_value Note )
  403. assert_equal(@obj, @obj.sort_attributes_by_tag!(tags))
  404. assert_equal(%w( Comment Align Align E_value Note Target
  405. Misc Misc Misc ),
  406. @obj.attributes.collect { |x| x[0] })
  407. # check if the order of 'Misc' is not changed
  408. assert_equal([ 'IdString', 'free text', '5678' ],
  409. @obj.get_attributes('Misc'))
  410. end
  411. def test_sort_attributes_by_tag_bang_test2
  412. tags = %w( E_value Misc Note Target )
  413. assert_equal(@obj, @obj.sort_attributes_by_tag!(tags))
  414. assert_equal(%w( E_value Misc Misc Misc Note Target
  415. Align Align Comment ),
  416. @obj.attributes.collect { |x| x[0] })
  417. # check if the order of 'Misc' is not changed
  418. assert_equal([ 'IdString', 'free text', '5678' ],
  419. @obj.get_attributes('Misc'))
  420. end
  421. def test_sort_attributes_by_tag_bang_with_block
  422. assert_equal(@obj,
  423. @obj.sort_attributes_by_tag! { |x, y|
  424. x <=> y
  425. })
  426. assert_equal(%w( Align Align Comment E_value Misc Misc Misc
  427. Note Target ),
  428. @obj.attributes.collect { |x| x[0] })
  429. # check if the order of 'Misc' is not changed
  430. assert_equal([ 'IdString', 'free text', '5678' ],
  431. @obj.get_attributes('Misc'))
  432. end
  433. end #class TestGFF2Record
  434. class TestGFF2RecordEmpty < Test::Unit::TestCase
  435. def setup
  436. @obj = Bio::GFF::GFF2::Record.new('# test comment')
  437. end
  438. def test_comment_only?
  439. assert_equal(true, @obj.comment_only?)
  440. end
  441. def test_comment_only_false
  442. @obj.seqname = 'test'
  443. assert_equal(false, @obj.comment_only?)
  444. end
  445. def test_to_s
  446. assert_equal("# test comment\n", @obj.to_s)
  447. end
  448. def test_to_s_not_empty
  449. @obj.seqname = 'test'
  450. @obj.feature = 'region'
  451. @obj.start = 1
  452. @obj.end = 100
  453. assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\t\t# test comment\n",
  454. @obj.to_s)
  455. @obj.add_attribute('Gene', 'unknown')
  456. assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\tGene unknown\t# test comment\n",
  457. @obj.to_s)
  458. end
  459. def test_comment
  460. assert_equal(' test comment', @obj.comment)
  461. end
  462. def test_comment_eq
  463. assert_equal('changed the comment',
  464. @obj.comment = 'changed the comment')
  465. end
  466. end #class TestGFF2RecordEmpty
  467. class TestGFF2ComplexAttributes < Test::Unit::TestCase
  468. # The test string comes from the Popular genome annotation from the JGI.
  469. # ftp://ftp.jgi-psf.org/pub/JGI_data/Poplar/annotation/v1.1/Poptr1_1.JamboreeModels.gff.gz
  470. # Thanks to Tomoaki NISHIYAMA who picks up the example line.
  471. def test_attributes_case1
  472. str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3\n"
  473. attributes = [
  474. [ "name", "grail3.0116000101" ],
  475. [ "proteinId", "639579" ],
  476. [ "exonNumber", "3" ]
  477. ]
  478. record = Bio::GFF::GFF2::Record.new(str)
  479. assert_equal(attributes, record.attributes)
  480. end
  481. # The test string is modified from that of test_attributes_case1.
  482. def test_attributes_case2
  483. str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3; Note \"Semicolons ; and \;, and quote \\\" can be OK\"; Comment \"This is the \\\"comment\\\"\"\n"
  484. attributes = [
  485. [ "name", "grail3.0116000101" ],
  486. [ "proteinId", "639579" ],
  487. [ "exonNumber", "3" ],
  488. [ "Note", "Semicolons ; and ;, and quote \" can be OK" ],
  489. [ "Comment", "This is the \"comment\"" ]
  490. ]
  491. record = Bio::GFF::GFF2::Record.new(str)
  492. assert_equal(attributes, record.attributes)
  493. end
  494. def test_attributes_incompatible_backslash_semicolon
  495. # No special treatments for backslash-semicolon outside the free text.
  496. str =<<END_OF_DATA
  497. I sgd gene 151453 151591 . + . Gene "CEN1" ; Note "Chromosome I Centromere"; Semicolon a "b;c" d "e;f;g" h; Illegal a\\;b c d; Comment "a ; b"
  498. END_OF_DATA
  499. attributes = [
  500. [ 'Gene', 'CEN1' ],
  501. [ 'Note', 'Chromosome I Centromere' ],
  502. [ 'Semicolon',
  503. Bio::GFF::GFF2::Record::Value.new(['a', 'b;c', 'd', 'e;f;g', 'h']) ],
  504. [ 'Illegal', "a\\" ],
  505. [ 'b', Bio::GFF::GFF2::Record::Value.new(['c', 'd']) ],
  506. [ 'Comment', 'a ; b' ]
  507. ]
  508. record = Bio::GFF::GFF2::Record.new(str)
  509. assert_equal(attributes, record.attributes)
  510. end
  511. end #class TestGFF2ComplexAttributes
  512. class TestGFF2MetaData < Test::Unit::TestCase
  513. def setup
  514. @data =
  515. Bio::GFF::GFF2::MetaData.new('date', '2008-09-22')
  516. end
  517. def test_parse
  518. assert_equal(@data,
  519. Bio::GFF::GFF2::MetaData.parse('##date 2008-09-22'))
  520. end
  521. def test_directive
  522. assert_equal('date', @data.directive)
  523. end
  524. def test_data
  525. assert_equal('2008-09-22', @data.data)
  526. end
  527. end #class TestGFF2MetaData
  528. class TestGFF3 < Test::Unit::TestCase
  529. def setup
  530. @data =<<END_OF_DATA
  531. ##gff-version 3
  532. ##sequence-region test01 1 400
  533. test01 RANDOM contig 1 400 . + . ID=test01;Note=this is test
  534. test01 . mRNA 101 230 . + . ID=mrna01;Name=testmRNA;Note=this is test mRNA
  535. test01 . mRNA 101 280 . + . ID=mrna01a;Name=testmRNAalterative;Note=test of alternative splicing variant
  536. test01 . exon 101 160 . + . ID=exon01;Name=exon01;Alias=exon 1;Parent=mrna01,mrna01a
  537. test01 . exon 201 230 . + . ID=exon02;Name=exon02;Alias=exon 2;Parent=mrna01
  538. test01 . exon 251 280 . + . ID=exon02a;Name=exon02a;Alias=exon 2a;Parent=mrna01a
  539. test01 . Match 101 123 . . . ID=match01;Name=match01;Target=EST101 1 21;Gap=M8 D3 M6 I1 M6
  540. ##FASTA
  541. >test01
  542. ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA
  543. GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA
  544. CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT
  545. AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
  546. GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC
  547. CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC
  548. END_OF_DATA
  549. @gff3 = Bio::GFF::GFF3.new(@data)
  550. end
  551. def test_const_version
  552. assert_equal(3, Bio::GFF::GFF3::VERSION)
  553. end
  554. def test_sequence_regions
  555. region = Bio::GFF::GFF3::SequenceRegion.new('test01', 1, 400)
  556. assert_equal([ region ], @gff3.sequence_regions)
  557. end
  558. def test_gff_version
  559. assert_equal('3', @gff3.gff_version)
  560. end
  561. def test_records
  562. assert_equal(7, @gff3.records.size)
  563. r_test01 = Bio::GFF::GFF3::Record.new('test01',
  564. 'RANDOM',
  565. 'contig',
  566. 1, 400, nil, '+', nil,
  567. [ ['ID', 'test01'],
  568. ['Note', 'this is test'] ])
  569. r_mrna01 = Bio::GFF::GFF3::Record.new('test01',
  570. nil,
  571. 'mRNA',
  572. 101, 230, nil, '+', nil,
  573. [ ['ID', 'mrna01'],
  574. ['Name', 'testmRNA'],
  575. ['Note', 'this is test mRNA'] ])
  576. r_exon01 = Bio::GFF::GFF3::Record.new('test01',
  577. nil,
  578. 'exon',
  579. 101, 160, nil, '+', nil,
  580. [ ['ID', 'exon01'],
  581. ['Name', 'exon01'],
  582. ['Alias', 'exon 1'],
  583. ['Parent', 'mrna01'],
  584. ['Parent', 'mrna01a'] ])
  585. target = Bio::GFF::GFF3::Record::Target.new('EST101', 1, 21)
  586. gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6')
  587. r_match01 =Bio::GFF::GFF3::Record.new('test01',
  588. nil,
  589. 'Match',
  590. 101, 123, nil, nil, nil,
  591. [ ['ID', 'match01'],
  592. ['Name', 'match01'],
  593. ['Target', target],
  594. ['Gap', gap] ])
  595. assert_equal(r_test01, @gff3.records[0])
  596. assert_equal(r_mrna01, @gff3.records[1])
  597. assert_equal(r_exon01, @gff3.records[3])
  598. assert_equal(r_match01, @gff3.records[6])
  599. end
  600. def test_sequences
  601. assert_equal(1, @gff3.sequences.size)
  602. assert_equal('test01', @gff3.sequences[0].entry_id)
  603. assert_equal('3510a3c4f66f9c2ab8d4d97446490aced7ed1fa4',
  604. Digest::SHA1.hexdigest(@gff3.sequences[0].seq.to_s))
  605. end
  606. def test_to_s
  607. assert_equal(@data, @gff3.to_s)
  608. end
  609. end #class TestGFF3
  610. class TestGFF3Record < Test::Unit::TestCase
  611. def setup
  612. data =<<END_OF_DATA
  613. chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;gene=CEN1;Alias=CEN1,test%3B0001;Note=Chromosome%20I%20centromere;dbxref=SGD:S000006463;Target=test%2002 123 456 -,test%2C03 159 314;memo%3Dtest%3Battr=99.9%25%09match
  614. END_OF_DATA
  615. @obj = Bio::GFF::GFF3::Record.new(data)
  616. end
  617. def test_seqname
  618. assert_equal('chrI', @obj.seqname)
  619. end
  620. def test_source
  621. assert_equal('SGD', @obj.source)
  622. end
  623. def test_feature
  624. assert_equal('centromere', @obj.feature)
  625. end
  626. def test_start
  627. assert_equal(151467, @obj.start)
  628. end
  629. def test_end
  630. assert_equal(151584, @obj.end)
  631. end
  632. def test_score
  633. assert_equal(nil, @obj.score)
  634. end
  635. def test_strand
  636. assert_equal('+', @obj.strand)
  637. end
  638. def test_frame
  639. assert_equal(nil, @obj.frame)
  640. end
  641. def test_attributes
  642. attr = [
  643. ['ID', 'CEN1'],
  644. ['Name', 'CEN1'],
  645. ['gene', 'CEN1'],
  646. ['Alias', 'CEN1'],
  647. ['Alias', 'test;0001'],
  648. ['Note', 'Chromosome I centromere'],
  649. ['dbxref', 'SGD:S000006463'],
  650. ['Target',
  651. Bio::GFF::GFF3::Record::Target.new('test 02', 123, 456, '-')],
  652. ['Target',
  653. Bio::GFF::GFF3::Record::Target.new('test,03', 159, 314)],
  654. ['memo=test;attr', "99.9%\tmatch"]
  655. ]
  656. assert_equal(attr, @obj.attributes)
  657. end
  658. def test_id
  659. assert_equal('CEN1', @obj.id)
  660. end
  661. def test_to_s
  662. str =<<END_OF_DATA
  663. chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;gene=CEN1;Alias=CEN1,test%3B0001;Note=Chromosome I centromere;dbxref=SGD:S000006463;Target=test%2002 123 456 -,test%2C03 159 314;memo%3Dtest%3Battr=99.9%25%09match
  664. END_OF_DATA
  665. assert_equal(str, @obj.to_s)
  666. end
  667. def test_to_s_attr_order_changed
  668. str = <<END_OF_STR
  669. chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;Alias=CEN1,test%3B0001;Target=test%2002 123 456 -,test%2C03 159 314;Note=Chromosome I centromere;dbxref=SGD:S000006463;gene=CEN1;memo%3Dtest%3Battr=99.9%25%09match
  670. END_OF_STR
  671. keys = [ 'ID', 'Name', 'Alias', 'Target', 'Note', 'dbxref', 'gene' ]
  672. @obj.sort_attributes_by_tag!(keys)
  673. assert_equal(str, @obj.to_s)
  674. end
  675. end #class TestGFF3Record
  676. class TestGFF3RecordMisc < Test::Unit::TestCase
  677. def test_attributes_none
  678. # test blank with tab
  679. data =<<END_OF_DATA
  680. I sgd gene 151453 151591 . + .
  681. END_OF_DATA
  682. obj = Bio::GFF::GFF3::Record.new(data)
  683. assert_equal([], obj.attributes)
  684. # test blank with no tab at end
  685. data =<<END_OF_DATA
  686. I sgd gene 151453 151591 . + .
  687. END_OF_DATA
  688. obj = Bio::GFF::GFF3::Record.new(data)
  689. assert_equal([], obj.attributes)
  690. end
  691. def test_attributes_one
  692. data =<<END_OF_DATA
  693. I sgd gene 151453 151591 . + . ID=CEN1
  694. END_OF_DATA
  695. obj = Bio::GFF::GFF3::Record.new(data)
  696. at = [ ["ID", 'CEN1'] ]
  697. assert_equal(at, obj.attributes)
  698. end
  699. def test_attributes_with_escaping
  700. data =<<END_OF_DATA
  701. I sgd gene 151453 151591 . + . ID=CEN1;gene=CEN1%3Boh;Note=Chromosome I Centromere
  702. END_OF_DATA
  703. obj = Bio::GFF::GFF3::Record.new(data)
  704. at = [ ['ID', 'CEN1'],
  705. ["gene", 'CEN1;oh'],
  706. ["Note", 'Chromosome I Centromere']
  707. ]
  708. assert_equal(at, obj.attributes)
  709. end
  710. def test_score
  711. data =<<END_OF_DATA
  712. ctg123 src match 456 788 1e-10 - . ID=test01
  713. END_OF_DATA
  714. obj = Bio::GFF::GFF3::Record.new(data)
  715. assert_equal(1e-10, obj.score)
  716. obj.score = 0.5
  717. assert_equal(0.5, obj.score)
  718. end
  719. def test_phase
  720. data =<<END_OF_DATA
  721. ctg123 src CDS 456 788 . - 2 ID=test02
  722. END_OF_DATA
  723. obj = Bio::GFF::GFF3::Record.new(data)
  724. assert_equal(2, obj.phase)
  725. assert_equal(2, obj.frame)
  726. obj.phase = 1
  727. assert_equal(1, obj.phase)
  728. assert_equal(1, obj.frame)
  729. end
  730. def test_id_replace
  731. data =<<END_OF_DATA
  732. ctg123 src CDS 456 788 1e-10 - 2 ID=test03
  733. END_OF_DATA
  734. obj = Bio::GFF::GFF3::Record.new(data)
  735. assert_equal('test03', obj.id)
  736. assert_equal('test_id', obj.id = 'test_id')
  737. assert_equal('test_id', obj.id)
  738. end
  739. def test_id_set
  740. data =<<END_OF_DATA
  741. ctg123 src CDS 456 788 1e-10 - 2 NAME=test03
  742. END_OF_DATA
  743. obj = Bio::GFF::GFF3::Record.new(data)
  744. assert_nil(obj.id)
  745. assert_equal('test_id', obj.id = 'test_id')
  746. assert_equal('test_id', obj.id)
  747. assert_equal('next_test', obj.id = 'next_test')
  748. assert_equal('next_test', obj.id)
  749. end
  750. def test_id_multiple
  751. # Note: Two ID attributes in a record is illegal in GFF3.
  752. data =<<END_OF_DATA
  753. ctg123 src CDS 456 788 . - 2 ID=test03,test04
  754. END_OF_DATA
  755. obj = Bio::GFF::GFF3::Record.new(data)
  756. assert_equal([ [ 'ID', 'test03' ], [ 'ID', 'test04' ] ],
  757. obj.attributes)
  758. assert_equal('test03', obj.id)
  759. assert_equal('test_id', obj.id = 'test_id')
  760. assert_equal('test_id', obj.id)
  761. assert_equal([ [ 'ID', 'test_id' ], [ 'ID', 'test04' ] ],
  762. obj.attributes)
  763. str = "ctg123\tsrc\tCDS\t456\t788\t.\t-\t2\tID=test_id,test04\n"
  764. assert_equal(str, obj.to_s)
  765. end
  766. def test_id_multiple2
  767. # Note: Two ID attributes in a record is illegal in GFF3.
  768. data =<<END_OF_DATA
  769. ctg123 src CDS 456 788 . - 2 ID=test03;ID=test04
  770. END_OF_DATA
  771. obj = Bio::GFF::GFF3::Record.new(data)
  772. assert_equal([ [ 'ID', 'test03' ], [ 'ID', 'test04' ] ],
  773. obj.attributes)
  774. assert_equal('test03', obj.id)
  775. assert_equal('test_id', obj.id = 'test_id')
  776. assert_equal('test_id', obj.id)
  777. assert_equal([ [ 'ID', 'test_id' ], [ 'ID', 'test04' ] ],
  778. obj.attributes)
  779. # The "XXX=test03;XXX=test04" is automatically changed to
  780. # "XXX=test03,test04", as defined in the GFF3 spec.
  781. str = "ctg123\tsrc\tCDS\t456\t788\t.\t-\t2\tID=test_id,test04\n"
  782. assert_equal(str, obj.to_s)
  783. end
  784. def test_initialize_9
  785. obj = Bio::GFF::GFF3::Record.new('test01',
  786. 'testsrc',
  787. 'exon',
  788. 1, 400, nil, '+', nil,
  789. [ ['ID', 'test01'],
  790. ['Note', 'this is test'] ])
  791. assert_equal('test01', obj.seqid)
  792. end
  793. def test_to_s_void
  794. obj = Bio::GFF::GFF3::Record.new
  795. assert_equal(".\t.\t.\t.\t.\t.\t.\t.\t.\n", obj.to_s)
  796. end
  797. end #class TestGFF3RecordMisc
  798. class TestGFF3RecordEscape < Test::Unit::TestCase
  799. def setup
  800. @obj = Object.new.extend(Bio::GFF::GFF3::Escape)
  801. @str = "A>B\tC=100%;d=e,f,g h"
  802. end
  803. def test_escape
  804. str = @str
  805. assert_equal('A>B%09C=100%25;d=e,f,g h',
  806. @obj.instance_eval { escape(str) })
  807. end
  808. def test_escape_attribute
  809. str = @str
  810. assert_equal('A>B%09C%3D100%25%3Bd%3De%2Cf%2Cg h',
  811. @obj.instance_eval { escape_attribute(str) })
  812. end
  813. def test_escape_seqid
  814. str = @str
  815. assert_equal('A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h',
  816. @obj.instance_eval { escape_seqid(str) })
  817. end
  818. def test_unescape
  819. escaped_str = 'A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h'
  820. assert_equal(@str,
  821. @obj.instance_eval {
  822. unescape(escaped_str) })
  823. end
  824. end #class TestGFF3RecordEscape
  825. class TestGFF3RecordTarget < Test::Unit::TestCase
  826. def setup
  827. @target =
  828. [ Bio::GFF::GFF3::Record::Target.new('ABCD1234', 123, 456, '+'),
  829. Bio::GFF::GFF3::Record::Target.new(">X Y=Z;P%,Q\tR", 78, 90),
  830. Bio::GFF::GFF3::Record::Target.new(nil, nil, nil),
  831. ]
  832. end
  833. def test_parse
  834. strings =
  835. [ 'ABCD1234 123 456 +',
  836. '%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90',
  837. ''
  838. ]
  839. @target.each do |target|
  840. str = strings.shift
  841. assert_equal(target, Bio::GFF::GFF3::Record::Target.parse(str))
  842. end
  843. end
  844. def test_target_id
  845. assert_equal('ABCD1234', @target[0].target_id)
  846. assert_equal(">X Y=Z;P%,Q\tR", @target[1].target_id)
  847. assert_equal(nil, @target[2].target_id)
  848. end
  849. def test_start
  850. assert_equal(123, @target[0].start)
  851. assert_equal(78, @target[1].start)
  852. assert_nil(@target[2].start)
  853. end
  854. def test_end
  855. assert_equal(456, @target[0].end)
  856. assert_equal(90, @target[1].end)
  857. assert_nil(@target[2].end)
  858. end
  859. def test_strand
  860. assert_equal('+', @target[0].strand)
  861. assert_nil(@target[1].strand)
  862. assert_nil(@target[2].strand)
  863. end
  864. def test_to_s
  865. assert_equal('ABCD1234 123 456 +', @target[0].to_s)
  866. assert_equal('%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', @target[1].to_s)
  867. assert_equal('. . .', @target[2].to_s)
  868. end
  869. end #class TestGFF3RecordTarget
  870. class TestGFF3RecordGap < Test::Unit::TestCase
  871. def setup
  872. # examples taken from http://song.sourceforge.net/gff3.shtml
  873. @gaps_src = [ 'M8 D3 M6 I1 M6',
  874. 'M3 I1 M2 F1 M4',
  875. 'M3 I1 M2 R1 M4' ]
  876. @gaps = @gaps_src.collect { |x| Bio::GFF::GFF3::Record::Gap.new(x) }
  877. end
  878. def test_to_s
  879. @gaps_src.each do |src|
  880. assert_equal(src, @gaps.shift.to_s)
  881. end
  882. end
  883. def test_eqeq
  884. gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6')
  885. assert(gap == @gaps[0])
  886. assert_equal(false, gap == @gaps[1])
  887. end
  888. def test_process_sequences_na
  889. ref = 'CAAGACCTAAACTGGATTCCAAT'
  890. tgt = 'CAAGACCTCTGGATATCCAAT'
  891. ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT'
  892. tgt_aligned = 'CAAGACCT---CTGGATATCCAAT'
  893. assert_equal([ ref_aligned, tgt_aligned ],
  894. @gaps[0].process_sequences_na(ref, tgt))
  895. end
  896. def test_process_sequences_na_tooshort
  897. ref = 'CAAGACCTAAACTGGATTCCAA'
  898. tgt = 'CAAGACCTCTGGATATCCAA'
  899. assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) }
  900. ref = 'c'
  901. tgt = 'c'
  902. assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) }
  903. end
  904. def test_process_sequences_na_aa
  905. ref1 = 'atgaaggaggttattgaatgtcggcggt'
  906. tgt1 = 'MKEVVINVGG'
  907. ref1_aligned = 'atgaaggag---gttattgaatgtcggcggt'
  908. tgt1_aligned = 'M K E V V I >N V G G '
  909. assert_equal([ ref1_aligned, tgt1_aligned ],
  910. @gaps[1].process_sequences_na_aa(ref1, tgt1))
  911. end
  912. def test_process_sequences_na_aa_reverse_frameshift
  913. ref2 = 'atgaaggaggttataatgtcggcggt'
  914. tgt2 = 'MKEVVINVGG'
  915. ref2_aligned = 'atgaaggag---gttat<aatgtcggcggt'
  916. tgt2_aligned = 'M K E V V I N V G G '
  917. assert_equal([ ref2_aligned, tgt2_aligned ],
  918. @gaps[2].process_sequences_na_aa(ref2, tgt2))
  919. end
  920. def test_process_sequences_na_aa_reverse_frameshift_more
  921. gap = Bio::GFF::GFF3::Record::Gap.new("M3 R3 M3")
  922. ref = 'atgaagattaatgtc'
  923. tgt = 'MKIINV'
  924. ref_aligned = 'atgaag<<<attaatgtc'
  925. tgt_aligned = 'M K I I N V '
  926. assert_equal([ ref_aligned, tgt_aligned ],
  927. gap.process_sequences_na_aa(ref, tgt))
  928. end
  929. def test_process_sequences_na_aa_tooshort
  930. ref2 = 'atgaaggaggttataatgtcggcgg'
  931. tgt2 = 'MKEVVINVG'
  932. assert_raise(RuntimeError) do
  933. @gaps[2].process_sequences_na_aa(ref2, tgt2)
  934. end
  935. ref2 = 'atg'
  936. tgt2 = 'M'
  937. assert_raise(RuntimeError) do
  938. @gaps[2].process_sequences_na_aa(ref2, tgt2)
  939. end
  940. end
  941. def test___scan_gap
  942. str1 = 'CAAGACCT---CTGGATATCCAAT'
  943. str2 = '-aaaaaaa-a-a---ggag--'
  944. c = Bio::GFF::GFF3::Record::Gap::Code
  945. data1 = [ c.new(:M, 8), c.new(:I, 3), c.new(:M, 13) ]
  946. data2 = [ c.new(:I, 1), c.new(:M, 7), c.new(:I, 1),
  947. c.new(:M, 1), c.new(:I, 1), c.new(:M, 1),
  948. c.new(:I, 3), c.new(:M, 4), c.new(:I, 2) ]
  949. assert_equal(data1, @gaps[0].instance_eval { __scan_gap(str1) })
  950. assert_equal(data2, @gaps[0].instance_eval { __scan_gap(str2) })
  951. end
  952. def test_new_from_sequences_na
  953. ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT'
  954. tgt_aligned = 'CAAGACCT---CTGGATATCCAAT'
  955. assert_equal(@gaps[0], Bio::GFF::GFF3::Record::Gap.new_from_sequences_na(ref_aligned, tgt_aligned))
  956. end
  957. def test_new_from_sequences_na_aa
  958. ref = 'atgaaggag---gttattgaatgtcggcggt'
  959. tgt = 'M K E V V I >N V G G '
  960. assert_equal(@gaps[1],
  961. Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
  962. tgt))
  963. end
  964. def test_new_from_sequences_na_aa_reverse_frameshift
  965. ref = 'atgaaggag---gttat<aatgtcggcggt'
  966. tgt = 'M K E V V I N V G G '
  967. assert_equal(@gaps[2],
  968. Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
  969. tgt))
  970. end
  971. def test_new_from_sequences_na_aa_reverse_frameshift_more
  972. gap = Bio::GFF::GFF3::Record::Gap.new("M3 R3 M3")
  973. ref = 'atgaag<<<attaatgtc'
  974. tgt = 'M K I I N V '
  975. assert_equal(gap,
  976. Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
  977. tgt))
  978. end
  979. def test_new_from_sequences_na_aa_boundary_gap
  980. g = Bio::GFF::GFF3::Record::Gap
  981. ref = '---atgatg'
  982. tgt = 'K M M '
  983. assert_equal(Bio::GFF::GFF3::Record::Gap.new('I1 M2'),
  984. g.new_from_sequences_na_aa(ref, tgt))
  985. ref = 'atgatg---'
  986. tgt = 'M M K '
  987. assert_equal(Bio::GFF::GFF3::Record::Gap.new('M2 I1'),
  988. g.new_from_sequences_na_aa(ref, tgt))
  989. ref = 'atgatgatg'
  990. tgt = '- M M '
  991. assert_equal(Bio::GFF::GFF3::Record::Gap.new('D1 M2'),
  992. g.new_from_sequences_na_aa(ref, tgt))
  993. ref = 'atgatgatg'
  994. tgt = 'M M - '
  995. assert_equal(Bio::GFF::GFF3::Record::Gap.new('M2 D1'),
  996. g.new_from_sequences_na_aa(ref, tgt))
  997. end
  998. def test_new_from_sequences_na_aa_example
  999. gap = Bio::GFF::GFF3::Record::Gap.new('M2 R1 M1 F2 M1')
  1000. ref1 = 'atgg-taagac-att'
  1001. tgt1 = 'M V K - I '
  1002. ref2 = 'atggt<aagacatt'
  1003. tgt2 = 'M V K >>I '
  1004. gap1 = Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref1, tgt1)
  1005. assert_equal(gap, gap1)
  1006. gap2 = Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref2, tgt2)
  1007. assert_equal(gap, gap2)
  1008. end
  1009. end #class TestGFF3RecordGap
  1010. class TestGFF3SequenceRegion < Test::Unit::TestCase
  1011. def setup
  1012. @data =
  1013. [ Bio::GFF::GFF3::SequenceRegion.new('ABCD1234', 123, 456),
  1014. Bio::GFF::GFF3::SequenceRegion.new(">X Y=Z;P%,Q\tR", 78, 90),
  1015. Bio::GFF::GFF3::SequenceRegion.new(nil, nil, nil),
  1016. ]
  1017. end
  1018. def test_parse
  1019. strings =
  1020. [ '##sequence-region ABCD1234 123 456',
  1021. '##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90',
  1022. '##sequence-region'
  1023. ]
  1024. @data.each do |reg|
  1025. str = strings.shift
  1026. assert_equal(reg, Bio::GFF::GFF3::SequenceRegion.parse(str))
  1027. end
  1028. end
  1029. def test_seqid
  1030. assert_equal('ABCD1234', @data[0].seqid)
  1031. assert_equal(">X Y=Z;P%,Q\tR", @data[1].seqid)
  1032. assert_equal(nil, @data[2].seqid)
  1033. end
  1034. def test_start
  1035. assert_equal(123, @data[0].start)
  1036. assert_equal(78, @data[1].start)
  1037. assert_nil(@data[2].start)
  1038. end
  1039. def test_end
  1040. assert_equal(456, @data[0].end)
  1041. assert_equal(90, @data[1].end)
  1042. assert_nil(@data[2].end)
  1043. end
  1044. def test_to_s
  1045. assert_equal("##sequence-region ABCD1234 123 456\n", @data[0].to_s)
  1046. assert_equal("##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90\n",
  1047. @data[1].to_s)
  1048. assert_equal("##sequence-region . . .\n", @data[2].to_s)
  1049. end
  1050. end #class TestGFF3SequenceRegion
  1051. class TestGFF3MetaData < Test::Unit::TestCase
  1052. def setup
  1053. @data =
  1054. Bio::GFF::GFF3::MetaData.new('feature-ontology',
  1055. 'http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12')
  1056. end
  1057. def test_parse
  1058. assert_equal(@data,
  1059. Bio::GFF::GFF3::MetaData.parse('##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12'))
  1060. end
  1061. def test_directive
  1062. assert_equal('feature-ontology', @data.directive)
  1063. end
  1064. def test_data
  1065. assert_equal('http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12', @data.data)
  1066. end
  1067. end #class TestGFF3MetaData
  1068. end #module Bio