PageRenderTime 72ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 1ms

/test/unit/bio/db/embl/test_sptr.rb

https://github.com/nmb/bioruby
Ruby | 1810 lines | 1497 code | 200 blank | 113 comment | 22 complexity | c16f93b60739c6dd39bcbe3c15d5cf42 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1
  1. #
  2. # test/unit/bio/db/embl/test_sptr.rb - Unit test for Bio::SPTR
  3. #
  4. # Copyright::: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
  5. # License:: The Ruby License
  6. #
  7. # $Id:$
  8. #
  9. # loading helper routine for testing bioruby
  10. require 'pathname'
  11. load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
  12. 'bioruby_test_helper.rb')).cleanpath.to_s
  13. # libraries needed for the tests
  14. require 'test/unit'
  15. require 'bio/db/embl/sptr'
  16. module Bio
  17. class TestSPTR < Test::Unit::TestCase
  18. def setup
  19. data = File.read(File.join(BioRubyTestDataPath,
  20. 'uniprot', 'p53_human.uniprot'))
  21. @obj = Bio::SPTR.new(data)
  22. end
  23. def test_id_line
  24. assert(@obj.id_line)
  25. end
  26. def test_id_line_entry_name
  27. assert_equal('P53_HUMAN', @obj.id_line('ENTRY_NAME'))
  28. end
  29. def test_id_line_data_class
  30. assert_equal('STANDARD', @obj.id_line('DATA_CLASS'))
  31. end
  32. def test_id_line_molecule_type
  33. assert_equal('PRT', @obj.id_line('MOLECULE_TYPE'))
  34. end
  35. def test_id_line_sequence_length
  36. assert_equal(393, @obj.id_line('SEQUENCE_LENGTH'))
  37. end
  38. def test_entry
  39. entry = 'P53_HUMAN'
  40. assert_equal(entry, @obj.entry)
  41. assert_equal(entry, @obj.entry_name)
  42. assert_equal(entry, @obj.entry_id)
  43. end
  44. def test_molecule
  45. assert_equal('PRT', @obj.molecule)
  46. assert_equal('PRT', @obj.molecule_type)
  47. end
  48. def test_sequence_length
  49. seqlen = 393
  50. assert_equal(seqlen, @obj.sequence_length)
  51. assert_equal(seqlen, @obj.aalen)
  52. end
  53. def test_ac
  54. acs = ["P04637", "Q15086", "Q15087", "Q15088", "Q16535", "Q16807",
  55. "Q16808", "Q16809", "Q16810", "Q16811", "Q16848", "Q86UG1",
  56. "Q8J016", "Q99659", "Q9BTM4", "Q9HAQ8", "Q9NP68", "Q9NPJ2",
  57. "Q9NZD0", "Q9UBI2", "Q9UQ61"]
  58. assert_equal(acs, @obj.ac)
  59. assert_equal(acs, @obj.accessions)
  60. end
  61. def test_accession
  62. assert_equal('P04637', @obj.accession)
  63. end
  64. def test_dr
  65. assert_equal(17, @obj.dr.size)
  66. assert_equal(27, @obj.dr['GO'].size)
  67. assert_equal([["IPR002117", "P53"],
  68. ["IPR011615", "P53_DNA_bd"],
  69. ["IPR012346", "P53_RUNT_DNA_bd"],
  70. ["IPR010991", "p53_tetrameristn"]],
  71. @obj.dr['InterPro'])
  72. end
  73. def test_dr_with_key
  74. pfam = [
  75. { " " => "1",
  76. "Version" => "P53",
  77. "Accession" => "PF00870",
  78. "Molecular Type" => nil
  79. },
  80. { " " => "1",
  81. "Version" => "P53_tetramer",
  82. "Accession" => "PF07710",
  83. "Molecular Type" => nil
  84. }
  85. ]
  86. assert_equal(pfam, @obj.dr('Pfam'))
  87. embl3 = {
  88. " " => "JOINED",
  89. "Version" => "AAA59987.1",
  90. "Accession" => "M13113",
  91. "Molecular Type" => "Genomic_DNA"
  92. }
  93. assert_equal(embl3, @obj.dr('EMBL')[3])
  94. end
  95. def test_dr_with_key_empty
  96. assert_equal([], @obj.dr('NOT_A_DATABASE'))
  97. end
  98. def test_dt
  99. assert(@obj.dt)
  100. end
  101. def test_dt_created
  102. assert_equal('13-AUG-1987 (Rel. 05, Created)', @obj.dt('created'))
  103. end
  104. def test_dt_sequence
  105. assert_equal('01-MAR-1989 (Rel. 10, Last sequence update)',
  106. @obj.dt('sequence'))
  107. end
  108. def test_dt_annotation
  109. assert_equal('13-SEP-2005 (Rel. 48, Last annotation update)',
  110. @obj.dt('annotation'))
  111. end
  112. def test_de
  113. assert(@obj.de)
  114. end
  115. def test_protein_name
  116. assert_equal("Cellular tumor antigen p53", @obj.protein_name)
  117. end
  118. def test_synonyms
  119. ary = ["Tumor suppressor p53", "Phosphoprotein p53", "Antigen NY-CO-13"]
  120. assert_equal(ary, @obj.synonyms)
  121. end
  122. def test_gn
  123. assert_equal([{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}],
  124. @obj.gn)
  125. end
  126. def test_gn_uniprot_parser
  127. gn_uniprot_data = ''
  128. assert_equal([{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}],
  129. @obj.instance_eval("gn_uniprot_parser"))
  130. end
  131. def test_gn_old_parser
  132. gn_old_data = ''
  133. assert_equal([["Name=TP53; Synonyms=P53;"]],
  134. @obj.instance_eval("gn_old_parser"))
  135. end
  136. def test_gene_names
  137. assert_equal(["TP53"], @obj.gene_names)
  138. end
  139. def test_gene_name
  140. assert_equal('TP53', @obj.gene_name)
  141. end
  142. def test_os
  143. assert(@obj.os)
  144. end
  145. def test_os_access
  146. assert_equal("Homo sapiens (Human)", @obj.os(0))
  147. end
  148. def test_os_access2
  149. assert_equal({"name"=>"(Human)", "os"=>"Homo sapiens"}, @obj.os[0])
  150. end
  151. def test_og_1
  152. og = "OG Plastid; Chloroplast."
  153. ary = ['Plastid', 'Chloroplast']
  154. @obj.instance_eval("@orig['OG'] = '#{og}'")
  155. assert_equal(ary, @obj.og)
  156. end
  157. def test_og_2
  158. og = "OG Mitochondrion."
  159. ary = ['Mitochondrion']
  160. @obj.instance_eval("@orig['OG'] = '#{og}'")
  161. assert_equal(ary, @obj.og)
  162. end
  163. def test_og_3
  164. og = "OG Plasmid sym pNGR234a."
  165. ary = ["Plasmid sym pNGR234a"]
  166. @obj.instance_eval("@orig['OG'] = '#{og}'")
  167. assert_equal(ary, @obj.og)
  168. end
  169. def test_og_4
  170. og = "OG Plastid; Cyanelle."
  171. ary = ['Plastid', 'Cyanelle']
  172. @obj.instance_eval("@orig['OG'] = '#{og}'")
  173. assert_equal(ary, @obj.og)
  174. end
  175. def test_og_5
  176. og = "OG Plasmid pSymA (megaplasmid 1)."
  177. ary = ["Plasmid pSymA (megaplasmid 1)"]
  178. @obj.instance_eval("@orig['OG'] = '#{og}'")
  179. assert_equal(ary, @obj.og)
  180. end
  181. def test_og_6
  182. og = "OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1."
  183. ary = ['Plasmid pNRC100', 'Plasmid pNRC200', 'Plasmid pHH1']
  184. @obj.instance_eval("@orig['OG'] = '#{og}'")
  185. assert_equal(ary, @obj.og)
  186. end
  187. def test_oc
  188. assert_equal(["Eukaryota", "Metazoa", "Chordata", "Craniata",
  189. "Vertebrata", "Euteleostomi", "Mammalia", "Eutheria",
  190. "Euarchontoglires", "Primates", "Catarrhini", "Hominidae",
  191. "Homo"],
  192. @obj.oc)
  193. end
  194. def test_ox
  195. assert_equal({"NCBI_TaxID"=>["9606"]}, @obj.ox)
  196. end
  197. def test_ref # Bio::SPTR#ref
  198. assert_equal(Array, @obj.ref.class)
  199. end
  200. def test_cc
  201. assert_equal(Hash, @obj.cc.class)
  202. end
  203. def test_cc_database
  204. db = [{"NAME" => "IARC TP53 mutation database",
  205. "WWW" => "http://www.iarc.fr/p53/",
  206. "FTP" => nil, "NOTE" => "IARC db of somatic p53 mutations"},
  207. {"NAME" => "Tokyo p53",
  208. "WWW" => "http://p53.genome.ad.jp/", "FTP" => nil,
  209. "NOTE" => "University of Tokyo db of p53 mutations"},
  210. {"NAME" => "p53 web site at the Institut Curie",
  211. "WWW" => "http://p53.curie.fr/", "FTP" => nil, "NOTE" => nil},
  212. {"NAME" => "Atlas Genet. Cytogenet. Oncol. Haematol.",
  213. "WWW" => "http://www.infobiogen.fr/services/chromcancer/Genes/P53ID88.html",
  214. "FTP" => nil, "NOTE" => nil}]
  215. assert_equal(db, @obj.cc('DATABASE'))
  216. end
  217. def test_cc_alternative_products
  218. ap = {"Comment" => "",
  219. "Named isoforms" => "2",
  220. "Variants" => [{"IsoId" => ["P04637-1"],
  221. "Name" => "1",
  222. "Synonyms" => [],
  223. "Sequence" => ["Displayed"]},
  224. {"IsoId" => ["P04637-2"],
  225. "Name" => "2",
  226. "Synonyms" => ["I9RET"],
  227. "Sequence" => ["VSP_006535", "VSP_006536"]}],
  228. "Event" => ["Alternative splicing"]}
  229. assert_equal(ap, @obj.cc('ALTERNATIVE PRODUCTS'))
  230. end
  231. def test_cc_mass_spectrometry
  232. assert_equal(nil, @obj.cc('MASS SPECTROMETRY'))
  233. end
  234. def test_kw
  235. keywords = ["3D-structure", "Acetylation", "Activator",
  236. "Alternative splicing", "Anti-oncogene",
  237. "Apoptosis", "Cell cycle", "Disease mutation", "DNA-binding",
  238. "Glycoprotein", "Li-Fraumeni syndrome", "Metal-binding",
  239. "Nuclear protein", "Phosphorylation", "Polymorphism",
  240. "Transcription", "Transcription regulation", "Zinc"]
  241. assert_equal(keywords, @obj.kw)
  242. end
  243. def test_ft
  244. assert(@obj.ft)
  245. name = 'DNA_BIND'
  246. assert_equal([{"FTId"=>"", "From"=>102, "diff"=>[], "To"=>292,
  247. "Description"=>"",
  248. "original" => ['DNA_BIND', '102', '292', '', '']}],
  249. @obj.ft[name])
  250. end
  251. def test_sq
  252. assert_equal({"CRC64"=>"AD5C149FD8106131", "aalen"=>393, "MW"=>43653},
  253. @obj.sq)
  254. end
  255. def test_sq_crc64
  256. assert_equal("AD5C149FD8106131", @obj.sq('CRC64'))
  257. end
  258. def test_sq_mw
  259. mw = 43653
  260. assert_equal(mw, @obj.sq('mw'))
  261. assert_equal(mw, @obj.sq('molecular'))
  262. assert_equal(mw, @obj.sq('weight'))
  263. end
  264. def test_sq_len
  265. length = 393
  266. assert_equal(length, @obj.sq('len'))
  267. assert_equal(length, @obj.sq('length'))
  268. assert_equal(length, @obj.sq('AA'))
  269. end
  270. def test_seq
  271. seq = 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD'
  272. assert_equal(seq, @obj.seq)
  273. assert_equal(seq, @obj.aaseq)
  274. end
  275. end # class TestSPTR
  276. class TestSPTRCC < Test::Unit::TestCase
  277. def test_allergen
  278. # ALLERGEN Information relevant to allergenic proteins
  279. data = 'CC -!- ALLERGEN: Causes an allergic reaction in human.'
  280. sp = Bio::SPTR.new(data)
  281. assert_equal(['Causes an allergic reaction in human.'],
  282. sp.cc['ALLERGEN'])
  283. assert_equal(['Causes an allergic reaction in human.'],
  284. sp.cc('ALLERGEN'))
  285. end
  286. def test_alternative_products_access_as_hash
  287. data = "CC -!- ALTERNATIVE PRODUCTS:
  288. CC Event=Alternative initiation; Named isoforms=2;
  289. CC Name=Long;
  290. CC IsoId=P68250-1; Sequence=Displayed;
  291. CC Name=Short;
  292. CC IsoId=P68250-2; Sequence=VSP_018631;
  293. CC Note=Contains a N-acetylmethionine at position 1 (By
  294. CC similarity);"
  295. res = ["Event=Alternative initiation; Named isoforms=2; Name=Long; IsoId=P68250-1; Sequence=Displayed; Name=Short; IsoId=P68250-2; Sequence=VSP_018631; Note=Contains a N-acetylmethionine at position 1 (By similarity);"]
  296. sp = Bio::SPTR.new(data)
  297. assert_equal(res,
  298. sp.cc['ALTERNATIVE PRODUCTS'])
  299. end
  300. def test_alternative_products_ai
  301. # ALTERNATIVE PRODUCTS Description of the existence of related protein sequence(s) produced by alternative splicing of the same gene, alternative promoter usage, ribosomal frameshifting or by the use of alternative initiation codons; see 3.21.15
  302. # Alternative promoter usage, Alternative splicing, Alternative initiation, Ribosomal frameshifting
  303. data = "CC -!- ALTERNATIVE PRODUCTS:
  304. CC Event=Alternative initiation; Named isoforms=2;
  305. CC Name=Long;
  306. CC IsoId=P68250-1; Sequence=Displayed;
  307. CC Name=Short;
  308. CC IsoId=P68250-2; Sequence=VSP_018631;
  309. CC Note=Contains a N-acetylmethionine at position 1 (By
  310. CC similarity);"
  311. sp = Bio::SPTR.new(data)
  312. assert_equal({"Comment"=>"",
  313. "Named isoforms"=>"2",
  314. "Variants"=>
  315. [{"IsoId"=>["P68250-1"],
  316. "Name"=>"Long",
  317. "Synonyms" => [],
  318. "Sequence"=>["Displayed"]},
  319. {"IsoId"=>["P68250-2"],
  320. "Name"=>"Short",
  321. "Synonyms" => [],
  322. "Sequence"=>["VSP_018631"]}],
  323. "Event"=>["Alternative initiation"]},
  324. sp.cc('ALTERNATIVE PRODUCTS'))
  325. end
  326. def test_alternative_products_as
  327. data = "CC -!- ALTERNATIVE PRODUCTS:
  328. CC Event=Alternative splicing; Named isoforms=2;
  329. CC Name=1;
  330. CC IsoId=P04637-1; Sequence=Displayed;
  331. CC Name=2; Synonyms=I9RET;
  332. CC IsoId=P04637-2; Sequence=VSP_006535, VSP_006536;
  333. CC Note=Seems to be non-functional. Expressed in quiescent
  334. CC lymphocytes;"
  335. sp = Bio::SPTR.new(data)
  336. assert_equal({"Comment"=>"",
  337. "Named isoforms"=>"2",
  338. "Variants"=>
  339. [{"Name"=>"1",
  340. "IsoId"=>["P04637-1"],
  341. "Synonyms"=>[],
  342. "Sequence"=>["Displayed"]},
  343. {"IsoId"=>["P04637-2"],
  344. "Name"=>"2",
  345. "Synonyms"=>["I9RET"],
  346. "Sequence"=>["VSP_006535", "VSP_006536"]}],
  347. "Event"=>["Alternative splicing"]},
  348. sp.cc('ALTERNATIVE PRODUCTS'))
  349. end
  350. def test_alternative_products_apu
  351. data = "CC -!- ALTERNATIVE PRODUCTS:
  352. CC Event=Alternative promoter usage, Alternative splicing; Named isoforms=5;
  353. CC Comment=Additional isoforms (AAT-1L and AAT-1S) may exist;
  354. CC Name=1; Synonyms=AAT-1M;
  355. CC IsoId=Q7Z4T9-1; Sequence=Displayed;
  356. CC Name=2;
  357. CC IsoId=Q7Z4T9-2; Sequence=VSP_014910, VSP_014911;
  358. CC Note=No experimental confirmation available;
  359. CC Name=3;
  360. CC IsoId=Q7Z4T9-3; Sequence=VSP_014907, VSP_014912;
  361. CC Name=4; Synonyms=AAT1-alpha;
  362. CC IsoId=Q7Z4T9-4; Sequence=VSP_014908;
  363. CC Note=May be produced by alternative promoter usage;
  364. CC Name=5; Synonyms=AAT1-beta, AAT1-gamma;
  365. CC IsoId=Q7Z4T9-5; Sequence=VSP_014909;
  366. CC Note=May be produced by alternative promoter usage;"
  367. sp = Bio::SPTR.new(data)
  368. assert_equal({"Comment"=>"Additional isoforms (AAT-1L and AAT-1S) may exist",
  369. "Named isoforms"=>"5",
  370. "Variants"=>
  371. [{"Name"=>"1",
  372. "IsoId"=>["Q7Z4T9-1"],
  373. "Synonyms"=>["AAT-1M"],
  374. "Sequence"=>["Displayed"]},
  375. {"Name"=>"2",
  376. "IsoId"=>["Q7Z4T9-2"],
  377. "Synonyms" => [],
  378. "Sequence"=>["VSP_014910", "VSP_014911"]},
  379. {"Name"=>"3",
  380. "IsoId"=>["Q7Z4T9-3"],
  381. "Synonyms" => [],
  382. "Sequence"=>["VSP_014907", "VSP_014912"]},
  383. {"Name"=>"4",
  384. "IsoId"=>["Q7Z4T9-4"],
  385. "Synonyms"=>["AAT1-alpha"],
  386. "Sequence"=>["VSP_014908"]},
  387. {"Name"=>"5",
  388. "IsoId"=>["Q7Z4T9-5"],
  389. "Synonyms"=>["AAT1-beta", "AAT1-gamma"],
  390. "Sequence"=>["VSP_014909"]}],
  391. "Event"=>["Alternative promoter usage", "Alternative splicing"]},
  392. sp.cc('ALTERNATIVE PRODUCTS'))
  393. end
  394. def test_alternative_products_rf
  395. data = ""
  396. sp = Bio::SPTR.new(data)
  397. assert_equal({},
  398. sp.cc('ALTERNATIVE PRODUCTS'))
  399. end
  400. def test_biophysicochemical_properties
  401. # BIOPHYSICOCHEMICAL PROPERTIES Description of the information relevant to biophysical and physicochemical data and information on pH dependence, temperature dependence, kinetic parameters, redox potentials, and maximal absorption; see 3.21.8
  402. #
  403. data = 'CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
  404. CC Kinetic parameters:
  405. CC KM=45 uM for AdoMet;
  406. CC Vmax=32 uM/h/mg enzyme;
  407. CC pH dependence:
  408. CC Optimum pH is 8.2;'
  409. sp = Bio::SPTR.new(data)
  410. assert_equal(["Kinetic parameters: KM=45 uM for AdoMet; Vmax=32 uM/h/mg enzyme; pH dependence: Optimum pH is 8.2;"],
  411. sp.cc['BIOPHYSICOCHEMICAL PROPERTIES'])
  412. assert_equal({"Redox potential" => "",
  413. "Temperature dependence" => "",
  414. "Kinetic parameters" => {"KM" => "45 uM for AdoMet",
  415. "Vmax" => "32 uM/h/mg enzyme"},
  416. "Absorption" => {},
  417. "pH dependence" => "Optimum pH is 8.2"},
  418. sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
  419. # 3.12.2. Syntax of the topic 'BIOPHYSICOCHEMICAL PROPERTIES'
  420. data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
  421. CC Absorption:
  422. CC Abs(max)=xx nm;
  423. CC Note=free_text;
  424. CC Kinetic parameters:
  425. CC KM=xx unit for substrate [(free_text)];
  426. CC Vmax=xx unit enzyme [free_text];
  427. CC Note=free_text;
  428. CC pH dependence:
  429. CC free_text;
  430. CC Redox potential:
  431. CC free_text;
  432. CC Temperature dependence:
  433. CC free_text;"
  434. sp = Bio::SPTR.new(data)
  435. assert_equal({"Redox potential"=>"free_text",
  436. "Temperature dependence"=>"free_text",
  437. "Kinetic parameters"=>
  438. {"KM"=>"xx unit for substrate [(free_text)]",
  439. "Note"=>"free_text",
  440. "Vmax"=>"xx unit enzyme [free_text]"},
  441. "Absorption"=>{"Note"=>"free_text", "Abs(max)"=>"xx nm"},
  442. "pH dependence"=>"free_text"},
  443. sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
  444. end
  445. def test_biotechnology
  446. # BIOTECHNOLOGY Description of the use of a specific protein in a biotechnological process
  447. data = 'CC -!- BIOTECHNOLOGY: Introduced by genetic manipulation and expressed in
  448. CC improved ripening tomato by Monsanto. ACC is the immediate
  449. CC precursor of the phytohormone ethylene which is involved in the
  450. CC control of ripening. ACC deaminase reduces ethylene biosynthesis
  451. CC and thus extends the shelf life of fruits and vegetables.'
  452. sp = Bio::SPTR.new(data)
  453. assert_equal(["Introduced by genetic manipulation and expressed in improved ripening tomato by Monsanto. ACC is the immediate precursor of the phytohormone ethylene which is involved in the control of ripening. ACC deaminase reduces ethylene biosynthesis and thus extends the shelf life of fruits and vegetables."],
  454. sp.cc['BIOTECHNOLOGY'])
  455. end
  456. def test_catalytic_activity
  457. # CATALYTIC ACTIVITY Description of the reaction(s) catalyzed by an enzyme [1]
  458. data = 'CC -!- CATALYTIC ACTIVITY: Hydrolysis of alkylated DNA, releasing 3-
  459. CC methyladenine, 3-methylguanine, 7-methylguanine and 7-
  460. CC methyladenine.'
  461. sp = Bio::SPTR.new(data)
  462. assert_equal(["Hydrolysis of alkylated DNA, releasing 3-methyladenine, 3-methylguanine, 7-methylguanine and 7-methyladenine."],
  463. sp.cc['CATALYTIC ACTIVITY'])
  464. end
  465. def test_caution
  466. # CAUTION Warning about possible errors and/or grounds for confusion
  467. data = 'CC -!- CAUTION: Ref.1 sequence differs from that shown due to a Leu codon
  468. CC in position 480 which was translated as a stop codon to shorten
  469. CC the sequence.'
  470. sp = Bio::SPTR.new(data)
  471. assert_equal(["Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence."],
  472. sp.cc['CAUTION'])
  473. assert_equal("Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence.",
  474. sp.cc('CAUTION'))
  475. end
  476. def test_cofactor
  477. # COFACTOR Description of any non-protein substance required by an enzyme for its catalytic activity
  478. data = 'CC -!- COFACTOR: Cl(-). Is unique in requiring Cl(-) for its activity.
  479. CC -!- COFACTOR: Mg(2+).'
  480. sp = Bio::SPTR.new(data)
  481. assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
  482. "Mg(2+)."],
  483. sp.cc['COFACTOR'])
  484. assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
  485. "Mg(2+)."],
  486. sp.cc('COFACTOR'))
  487. end
  488. def test_developmental_stage
  489. # DEVELOPMENTAL STAGE Description of the developmentally-specific expression of mRNA or protein
  490. data = 'CC -!- DEVELOPMENTAL STAGE: In females, isoform 1 is expressed at day 35
  491. CC with higher levels detected at day 56. Isoform 1 is not detected
  492. CC in males of any age.'
  493. sp = Bio::SPTR.new(data)
  494. assert_equal(["In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age."],
  495. sp.cc['DEVELOPMENTAL STAGE'])
  496. assert_equal("In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age.",
  497. sp.cc('DEVELOPMENTAL STAGE'))
  498. end
  499. def test_disease
  500. # DISEASE Description of the disease(s) associated with a deficiency of a protein
  501. data = 'CC -!- DISEASE: Defects in APP are a cause of hereditary cerebral
  502. CC hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This
  503. CC disorder is characterized by amyloid deposits in cerebral vessels.
  504. CC The principal clinical characteristics are recurring cerebral
  505. CC hemorrhages, sometimes preceded by migrainous headaches or mental
  506. CC cleavage. Various types of HCHWAD are known. They differ in onset
  507. CC and aggressiveness of the disease. The Iowa type demonstrated no
  508. CC cerebral hemorrhaging but is characterized by progressive
  509. CC cognitive decline. Beta-APP40 is the predominant form of
  510. CC cerebrovascular amyloid.'
  511. sp = Bio::SPTR.new(data)
  512. assert_equal(["Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid."],
  513. sp.cc['DISEASE'])
  514. assert_equal("Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid.",
  515. sp.cc('DISEASE'))
  516. end
  517. def test_domain
  518. # DOMAIN Description of the domain structure of a protein
  519. data = 'CC -!- DOMAIN: The basolateral sorting signal (BaSS) is required for
  520. CC sorting of membrane proteins to the basolateral surface of
  521. CC epithelial cells.
  522. CC -!- DOMAIN: The NPXY sequence motif found in many tyrosine-
  523. CC phosphorylated proteins is required for the specific binding of
  524. CC the PID domain. However, additional amino acids either N- or C-
  525. CC terminal to the NPXY motif are often required for complete
  526. CC interaction. The PID domain-containing proteins which bind APP
  527. CC require the YENPTY motif for full interaction. These interactions
  528. CC are independent of phosphorylation on the terminal tyrosine
  529. CC residue. The NPXY site is also involved in clathrin-mediated
  530. CC endocytosis (By similarity).'
  531. sp = Bio::SPTR.new(data)
  532. assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
  533. "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
  534. sp.cc['DOMAIN'])
  535. assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
  536. "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
  537. sp.cc('DOMAIN'))
  538. end
  539. def test_enzyme_regulation
  540. # ENZYME REGULATION Description of an enzyme regulatory mechanism
  541. data = 'CC -!- ENZYME REGULATION: Insensitive to calcium/calmodulin. Stimulated
  542. CC by the G protein beta and gamma subunit complex.'
  543. sp = Bio::SPTR.new(data)
  544. assert_equal(["Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex."],
  545. sp.cc['ENZYME REGULATION'])
  546. assert_equal("Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex.",
  547. sp.cc('ENZYME REGULATION'))
  548. end
  549. def test_function
  550. # FUNCTION General description of the function(s) of a protein
  551. data = 'CC -!- FUNCTION: May play a fundamental role in situations where fine
  552. CC interplay between intracellular calcium and cAMP determines the
  553. CC cellular function. May be a physiologically relevant docking site
  554. CC for calcineurin (By similarity).'
  555. sp = Bio::SPTR.new(data)
  556. assert_equal(["May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity)."],
  557. sp.cc['FUNCTION'])
  558. assert_equal("May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity).",
  559. sp.cc('FUNCTION'))
  560. end
  561. def test_induction
  562. # INDUCTION Description of the compound(s) or condition(s) that regulate gene expression
  563. data = 'CC -!- INDUCTION: By pheromone (alpha-factor).'
  564. sp = Bio::SPTR.new(data)
  565. assert_equal(["By pheromone (alpha-factor)."],
  566. sp.cc['INDUCTION'])
  567. assert_equal("By pheromone (alpha-factor).",
  568. sp.cc('INDUCTION'))
  569. end
  570. def test_interaction
  571. # INTERACTION Conveys information relevant to binary protein-protein interaction 3.21.12
  572. data = 'CC -!- INTERACTION:
  573. CC P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435;
  574. CC P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;'
  575. sp = Bio::SPTR.new(data)
  576. assert_equal(["P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435; P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;"],
  577. sp.cc['INTERACTION'])
  578. assert_equal([{'SP_Ac' => 'P62158',
  579. 'identifier' => 'CALM1',
  580. 'optional_identifier' => '(xeno)',
  581. 'NbExp' => '1',
  582. 'IntAct' => ['EBI-457011', 'EBI-397435']},
  583. {'SP_Ac' => 'P62155',
  584. 'identifier' => 'calm1',
  585. 'optional_identifier' => '(xeno)',
  586. 'NbExp' => '1',
  587. 'IntAct' => ['EBI-457011', 'EBI-397568']}],
  588. sp.cc('INTERACTION'))
  589. end
  590. def test_mass_spectrometry
  591. # MASS SPECTROMETRY Reports the exact molecular weight of a protein or part of a protein as determined by mass spectrometric methods; see 3.21.23
  592. data = "CC -!- MASS SPECTROMETRY: MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29;
  593. CC NOTE=Ref.1.
  594. CC -!- MASS SPECTROMETRY: MW=2892.2; METHOD=Electrospray; RANGE=1-29;
  595. CC NOTE=Ref.2."
  596. sp = Bio::SPTR.new(data)
  597. assert_equal(["MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29; NOTE=Ref.1.",
  598. "MW=2892.2; METHOD=Electrospray; RANGE=1-29; NOTE=Ref.2."],
  599. sp.cc['MASS SPECTROMETRY'])
  600. assert_equal([{'MW' => '2894.9',
  601. 'MW_ERR' => '3',
  602. 'METHOD' => 'MALDI',
  603. 'RANGE' => '1-29',
  604. 'NOTE' => 'Ref.1'},
  605. {'MW' => '2892.2',
  606. 'METHOD' => 'Electrospray',
  607. 'MW_ERR' => nil,
  608. 'RANGE' => '1-29',
  609. 'NOTE' => 'Ref.2'}],
  610. sp.cc('MASS SPECTROMETRY'))
  611. end
  612. def test_miscellaneous
  613. # MISCELLANEOUS Any comment which does not belong to any of the other defined topics
  614. data = 'CC -!- MISCELLANEOUS: There are two isozymes; a cytoplasmic one and a
  615. CC mitochondrial one.'
  616. sp = Bio::SPTR.new(data)
  617. assert_equal(["There are two isozymes; a cytoplasmic one and a mitochondrial one."],
  618. sp.cc['MISCELLANEOUS'])
  619. end
  620. def test_pathway
  621. # PATHWAY Description of the metabolic pathway(s) with which a protein is associated
  622. data = 'CC -!- PATHWAY: Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-
  623. CC phosphate and glycerone phosphate from D-glucose: step 4.'
  624. sp = Bio::SPTR.new(data)
  625. assert_equal(["Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-phosphate and glycerone phosphate from D-glucose: step 4."],
  626. sp.cc['PATHWAY'])
  627. assert_equal(["Carbohydrate degradation",
  628. 'glycolysis',
  629. 'D-glyceraldehyde 3-phosphate',
  630. 'glycerone phosphate from D-glucose',
  631. 'step 4'],
  632. sp.cc('PATHWAY'))
  633. end
  634. def test_pharmaceutical
  635. # PHARMACEUTICAL Description of the use of a protein as a pharmaceutical drug
  636. data = 'CC -!- PHARMACEUTICAL: Available under the names Factrel (Ayerst Labs),
  637. CC Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm
  638. CC (Serono). Used in evaluating hypothalamic-pituitary gonadotropic
  639. CC function.'
  640. sp = Bio::SPTR.new(data)
  641. assert_equal(["Available under the names Factrel (Ayerst Labs), Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm (Serono). Used in evaluating hypothalamic-pituitary gonadotropic function."],
  642. sp.cc['PHARMACEUTICAL'])
  643. end
  644. def test_polymorphism
  645. # POLYMORPHISM Description of polymorphism(s)
  646. data = 'CC -!- POLYMORPHISM: Position 161 is associated with platelet-specific
  647. CC alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161.
  648. CC Siba is involved in neonatal alloimmune thrombocytopenia (NATP).
  649. CC -!- POLYMORPHISM: Polymorphisms arise from a variable number of tandem
  650. CC 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-
  651. CC like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown
  652. CC here) contains one repeat starting at position 415, allele C
  653. CC contains two repeats, allele B contains three repeats and allele A
  654. CC contains four repeats.'
  655. sp = Bio::SPTR.new(data)
  656. assert_equal(["Position 161 is associated with platelet-specific alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161. Siba is involved in neonatal alloimmune thrombocytopenia (NATP).",
  657. "Polymorphisms arise from a variable number of tandem 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown here) contains one repeat starting at position 415, allele C contains two repeats, allele B contains three repeats and allele A contains four repeats."],
  658. sp.cc['POLYMORPHISM'])
  659. end
  660. def test_ptm
  661. # PTM Description of any chemical alternation of a polypeptide (proteolytic cleavage, amino acid modifications including crosslinks). This topic complements information given in the feature table or indicates polypeptide modifications for which position-specific data is not available.
  662. data = 'CC -!- PTM: N-glycosylated, contains approximately 8 kDa of N-linked
  663. CC carbohydrate.
  664. CC -!- PTM: Palmitoylated.'
  665. sp = Bio::SPTR.new(data)
  666. assert_equal(["N-glycosylated, contains approximately 8 kDa of N-linked carbohydrate.",
  667. "Palmitoylated."],
  668. sp.cc['PTM'])
  669. end
  670. def test_rna_editing
  671. # RNA EDITING Description of any type of RNA editing that leads to one or more amino acid changes
  672. data = 'CC -!- RNA EDITING: Modified_positions=50, 59, 78, 87, 104, 132, 139,
  673. CC 146, 149, 160, 170, 177, 185, 198, 208, 223, 226, 228, 243, 246,
  674. CC 252, 260, 264, 277, 285, 295; Note=The nonsense codons at
  675. CC positions 50, 78, 104, 260 and 264 are modified to sense codons.'
  676. data = 'CC -!- RNA EDITING: Modified_positions=607; Note=Fully edited in the
  677. CC brain. Heteromerically expressed edited GLUR2 (R) receptor
  678. CC complexes are impermeable to calcium, whereas the unedited (Q)
  679. CC forms are highly permeable to divalent ions (By similarity).'
  680. sp = Bio::SPTR.new(data)
  681. assert_equal(["Modified_positions=607; Note=Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."],
  682. sp.cc['RNA EDITING'])
  683. assert_equal({"Modified_positions" => ['607'],
  684. "Note" => "Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."},
  685. sp.cc('RNA EDITING'))
  686. end
  687. def test_similarity
  688. # SIMILARITY Description of the similaritie(s) (sequence or structural) of a protein with other proteins
  689. data = 'CC -!- SIMILARITY: Contains 1 protein kinase domain.
  690. CC -!- SIMILARITY: Contains 1 RGS domain.'
  691. sp = Bio::SPTR.new(data)
  692. assert_equal(["Contains 1 protein kinase domain.", "Contains 1 RGS domain."],
  693. sp.cc['SIMILARITY'])
  694. end
  695. def test_subcellular_location
  696. # SUBCELLULAR LOCATION Description of the subcellular location of the mature protein
  697. data = 'CC -!- SUBCELLULAR LOCATION: Or: Cytoplasm. Or: Secreted protein. May be
  698. CC secreted by a non-classical secretory pathway.'
  699. data = "CC -!- SUBCELLULAR LOCATION: Cytoplasmic or may be secreted by a non-
  700. CC classical secretory pathway (By similarity)."
  701. data = "CC -!- SUBCELLULAR LOCATION: Cytoplasm. In neurons, axonally transported
  702. CC to the nerve terminals."
  703. data = "CC -!- SUBCELLULAR LOCATION: Cell wall. Probably the external side of the
  704. CC cell wall."
  705. data = "CC -!- SUBCELLULAR LOCATION: Endosome; late endosome; late endosomal
  706. CC membrane; single-pass type I membrane protein. Lysosome; lysosomal
  707. CC membrane; single-pass type I membrane protein. Localizes to late
  708. CC endocytic compartment. Associates with lysosome membranes."
  709. data = "CC -!- SUBCELLULAR LOCATION: Plastid; chloroplast; chloroplast membrane;
  710. CC peripheral membrane protein. Plastid; chloroplast; chloroplast
  711. CC stroma."
  712. sp = Bio::SPTR.new(data)
  713. assert_equal(["Plastid; chloroplast; chloroplast membrane; peripheral membrane protein. Plastid; chloroplast; chloroplast stroma."],
  714. sp.cc['SUBCELLULAR LOCATION'])
  715. assert_equal([["Plastid",
  716. "chloroplast",
  717. "chloroplast membrane",
  718. "peripheral membrane protein"],
  719. ["Plastid", "chloroplast",
  720. "chloroplast stroma"]],
  721. sp.cc('SUBCELLULAR LOCATION'))
  722. end
  723. def test_subunit
  724. # SUBUNIT Description of the quaternary structure of a protein and any kind of interactions with other proteins or protein complexes; except for receptor-ligand interactions, which are described in the topic FUNCTION.
  725. data = 'CC -!- SUBUNIT: Interacts with BTK. Interacts with all isoforms of MAPK8,
  726. CC MAPK9, MAPK10 and MAPK12.'
  727. data = 'CC -!- SUBUNIT: Homotetramer.'
  728. sp = Bio::SPTR.new(data)
  729. assert_equal(["Homotetramer."],
  730. sp.cc['SUBUNIT'])
  731. end
  732. def test_tissue_specificity
  733. # TISSUE SPECIFICITY Description of the tissue-specific expression of mRNA or protein
  734. data = "CC -!- TISSUE SPECIFICITY: Heart, brain and liver mitochondria."
  735. data = "CC -!- TISSUE SPECIFICITY: Widely expressed with highest expression in
  736. CC thymus, testis, embryo and proliferating blood lymphocytes."
  737. data = "CC -!- TISSUE SPECIFICITY: Isoform 2 is highly expressed in the brain,
  738. CC heart, spleen, kidney and blood. Isoform 2 is expressed (at
  739. CC protein level) in the spleen, skeletal muscle and gastrointestinal
  740. CC epithelia."
  741. sp = Bio::SPTR.new(data)
  742. assert_equal(["Isoform 2 is highly expressed in the brain, heart, spleen, kidney and blood. Isoform 2 is expressed (at protein level) in the spleen, skeletal muscle and gastrointestinal epithelia."],
  743. sp.cc['TISSUE SPECIFICITY'])
  744. end
  745. def test_toxic_dose
  746. # TOXIC DOSE Description of the lethal dose (LD), paralytic dose (PD) or effective dose of a protein
  747. data = 'CC -!- TOXIC DOSE: LD(50) is 12 mg/kg by intraperitoneal injection.'
  748. sp = Bio::SPTR.new(data)
  749. assert_equal(["LD(50) is 12 mg/kg by intraperitoneal injection."],
  750. sp.cc['TOXIC DOSE'])
  751. end
  752. def test_web_resource
  753. # WEB RESOURCE Description of a cross-reference to a network database/resource for a specific protein; see 3.21.34
  754. data = 'CC -!- WEB RESOURCE: NAME=Inherited peripheral neuropathies mutation db;
  755. CC URL="http://www.molgen.ua.ac.be/CMTMutations/".
  756. CC -!- WEB RESOURCE: NAME=Connexin-deafness homepage;
  757. CC URL="http://www.crg.es/deafness/".
  758. CC -!- WEB RESOURCE: NAME=GeneReviews;
  759. CC URL="http://www.genetests.org/query?gene=GJB1".'
  760. sp = Bio::SPTR.new(data)
  761. assert_equal(['NAME=Inherited peripheral neuropathies mutation db; URL="http://www.molgen.ua.ac.be/CMTMutations/".',
  762. 'NAME=Connexin-deafness homepage; URL="http://www.crg.es/deafness/".',
  763. 'NAME=GeneReviews; URL="http://www.genetests.org/query?gene=GJB1".'],
  764. sp.cc['WEB RESOURCE'])
  765. assert_equal([{'NAME' => "Inherited peripheral neuropathies mutation db",
  766. 'URL' => 'http://www.molgen.ua.ac.be/CMTMutations/', 'NOTE' => nil},
  767. {'NAME' => "Connexin-deafness homepage",
  768. 'URL' => 'http://www.crg.es/deafness/', 'NOTE' => nil},
  769. {'NAME' => "GeneReviews",
  770. 'URL' => 'http://www.genetests.org/query?gene=GJB1', 'NOTE' => nil}],
  771. sp.cc('WEB RESOURCE'))
  772. end
  773. end # class TestSPTRCC
  774. # http://br.expasy.org/sprot/userman.html#Ref_line
  775. class TestSPTRRef < Test::Unit::TestCase
  776. def setup
  777. data = 'RN [1]
  778. RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C), FUNCTION, INTERACTION
  779. RP WITH PKC-3, SUBCELLULAR LOCATION, TISSUE SPECIFICITY, DEVELOPMENTAL
  780. RP STAGE, AND MUTAGENESIS OF PHE-175 AND PHE-221.
  781. RC STRAIN=Bristol N2;
  782. RX PubMed=11134024; DOI=10.1074/jbc.M008990200;
  783. RG The mouse genome sequencing consortium;
  784. RA Galinier A., Bleicher F., Negre D., Perriere G., Duclos B.,
  785. RA Cozzone A.J., Cortay J.-C.;
  786. RT "A novel adapter protein employs a phosphotyrosine binding domain and
  787. RT exceptionally basic N-terminal domains to capture and localize an
  788. RT atypical protein kinase C: characterization of Caenorhabditis elegans
  789. RT C kinase adapter 1, a protein that avidly binds protein kinase C3.";
  790. RL J. Biol. Chem. 276:10463-10475(2001).'
  791. @obj = SPTR.new(data)
  792. end
  793. def test_ref
  794. res = {"RT" => "A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
  795. "RL" => "J. Biol. Chem. 276:10463-10475(2001).",
  796. "RA" => "Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
  797. "RX" => {"MEDLINE" => nil,
  798. "DOI" => "10.1074/jbc.M008990200",
  799. "PubMed" => "11134024"},
  800. "RC" => [{"Text" => "Bristol N2", "Token" => "STRAIN"}],
  801. "RN" => "[1]",
  802. "RP" => ["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
  803. "FUNCTION",
  804. "INTERACTION WITH PKC-3",
  805. "SUBCELLULAR LOCATION",
  806. "TISSUE SPECIFICITY",
  807. "DEVELOPMENTAL STAGE",
  808. "MUTAGENESIS OF PHE-175 AND PHE-221"],
  809. "RG" => ["The mouse genome sequencing consortium"]}
  810. assert_equal(res, @obj.ref.first)
  811. end
  812. def test_RN
  813. assert_equal("[1]", @obj.ref.first['RN'])
  814. end
  815. def test_RP
  816. assert_equal(["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
  817. "FUNCTION", "INTERACTION WITH PKC-3",
  818. "SUBCELLULAR LOCATION",
  819. "TISSUE SPECIFICITY",
  820. "DEVELOPMENTAL STAGE",
  821. "MUTAGENESIS OF PHE-175 AND PHE-221"],
  822. @obj.ref.first['RP'])
  823. end
  824. def test_RC
  825. assert_equal([{"Text"=>"Bristol N2", "Token"=>"STRAIN"}],
  826. @obj.ref.first['RC'])
  827. end
  828. def test_RX
  829. assert_equal({'MEDLINE' => nil,
  830. 'PubMed' => '11134024',
  831. 'DOI' => '10.1074/jbc.M008990200'},
  832. @obj.ref.first['RX'])
  833. end
  834. def test_RG
  835. assert_equal(["The mouse genome sequencing consortium"],
  836. @obj.ref.first['RG'])
  837. end
  838. def test_RA
  839. assert_equal("Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
  840. @obj.ref.first['RA'])
  841. end
  842. def test_RT
  843. assert_equal("A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
  844. @obj.ref.first['RT'])
  845. end
  846. def test_RL
  847. assert_equal("J. Biol. Chem. 276:10463-10475(2001).",
  848. @obj.ref.first['RL'])
  849. end
  850. end # class TestSPTRReferences
  851. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.0
  852. class TestSPTRSwissProtRel41_0 < Test::Unit::TestCase
  853. # Progress in the conversion of Swiss-Prot to mixed-case characters
  854. # Multiple RP lines
  855. def test_multiple_RP_lines
  856. data = "RN [1]
  857. RP SEQUENCE FROM N.A., SEQUENCE OF 23-42 AND 351-365, AND
  858. RP CHARACTERIZATION."
  859. sp = SPTR.new(data)
  860. assert_equal(['SEQUENCE FROM N.A.',
  861. 'SEQUENCE OF 23-42 AND 351-365',
  862. 'CHARACTERIZATION'],
  863. sp.ref.first['RP'])
  864. end
  865. end
  866. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.1
  867. class TestSPTRSwissProtRel41_1 < Test::Unit::TestCase
  868. # New syntax of the CC line topic ALTERNATIVE PRODUCTS
  869. def test_alternative_products
  870. data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
  871. CC -!- ALTERNATIVE PRODUCTS:
  872. CC Event=Alternative promoter;
  873. CC Comment=Free text;
  874. CC Event=Alternative splicing; Named isoforms=2;
  875. CC Comment=Optional free text;
  876. CC Name=Isoform_1; Synonyms=Synonym_1;
  877. CC IsoId=Isoform_identifier_1;
  878. CC Sequence=Displayed;
  879. CC Note=Free text;
  880. CC Name=Isoform_2; Synonyms=Synonym_1, Synonym_2;
  881. CC IsoId=Isoform_identifier_1, Isoform_identifer_2;
  882. CC Sequence=VSP_identifier_1, VSP_identifier_2;
  883. CC Note=Free text;
  884. CC Event=Alternative initiation;
  885. CC Comment=Free text;"
  886. sp = SPTR.new(data)
  887. res = {"Comment" => "Free text",
  888. "Named isoforms" => "2",
  889. "Variants" => [{"Name" => "Isoform_1",
  890. "Synonyms" => ["Synonym_1"],
  891. "IsoId" => ["Isoform_identifier_1"],
  892. "Sequence" => ["Displayed"] },
  893. {"Name" => "Isoform_2",
  894. "Synonyms" => ["Synonym_1", "Synonym_2"],
  895. "IsoId" => ["Isoform_identifier_1", "Isoform_identifer_2"],
  896. "Sequence" => ["VSP_identifier_1", "VSP_identifier_2"]}],
  897. "Event" => ["Alternative promoter"]}
  898. assert_equal(res,
  899. sp.cc('ALTERNATIVE PRODUCTS'))
  900. end
  901. def test_alternative_products_with_ft
  902. data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
  903. CC -!- ALTERNATIVE PRODUCTS:
  904. CC Event=Alternative splicing; Named isoforms=6;
  905. CC Name=1;
  906. CC IsoId=Q15746-4; Sequence=Displayed;
  907. CC Name=2;
  908. CC IsoId=Q15746-5; Sequence=VSP_000040;
  909. CC Name=3A;
  910. CC IsoId=Q15746-6; Sequence=VSP_000041, VSP_000043;
  911. CC Name=3B;
  912. CC IsoId=Q15746-7; Sequence=VSP_000040, VSP_000041, VSP_000042;
  913. CC Name=4;
  914. CC IsoId=Q15746-8; Sequence=VSP_000041, VSP_000042;
  915. CC Name=del-1790;
  916. CC IsoId=Q15746-9; Sequence=VSP_000044;
  917. FT VARSPLIC 437 506 VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA
  918. FT RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in
  919. FT isoform 2 and isoform 3B).
  920. FT /FTId=VSP_004791.
  921. FT VARSPLIC 1433 1439 DEVEVSD -> MKWRCQT (in isoform 3A,
  922. FT isoform 3B and isoform 4).
  923. FT /FTId=VSP_004792.
  924. FT VARSPLIC 1473 1545 Missing (in isoform 4).
  925. FT /FTId=VSP_004793.
  926. FT VARSPLIC 1655 1705 Missing (in isoform 3A and isoform 3B).
  927. FT /FTId=VSP_004794.
  928. FT VARSPLIC 1790 1790 Missing (in isoform Del-1790).
  929. FT /FTId=VSP_004795."
  930. sp = SPTR.new(data)
  931. assert_equal({"Comment" => "",
  932. "Named isoforms" => "6",
  933. "Variants" => [{"IsoId"=>["Q15746-4"],
  934. "Name"=>"1",
  935. "Synonyms"=>[],
  936. "Sequence"=>["Displayed"]},
  937. {"IsoId"=>["Q15746-5"],
  938. "Name"=>"2",
  939. "Synonyms"=>[],
  940. "Sequence"=>["VSP_000040"]},
  941. {"IsoId"=>["Q15746-6"],
  942. "Name"=>"3A",
  943. "Synonyms"=>[],
  944. "Sequence"=>["VSP_000041", "VSP_000043"]},
  945. {"IsoId"=>["Q15746-7"],
  946. "Name"=>"3B",
  947. "Synonyms"=>[],
  948. "Sequence"=>["VSP_000040", "VSP_000041", "VSP_000042"]},
  949. {"IsoId"=>["Q15746-8"],
  950. "Name"=>"4",
  951. "Synonyms"=>[],
  952. "Sequence"=>["VSP_000041", "VSP_000042"]},
  953. {"IsoId"=>["Q15746-9"],
  954. "Name"=>"del-1790",
  955. "Synonyms"=>[],
  956. "Sequence"=>["VSP_000044"]}],
  957. "Event"=>["Alternative splicing"]},
  958. sp.cc('ALTERNATIVE PRODUCTS'))
  959. assert_equal([{"FTId"=>"VSP_004791",
  960. "From"=>437,
  961. "To"=>506,
  962. "Description"=>"VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in isoform 2 and isoform 3B).",
  963. "diff"=> ["VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKARTRDSGTYSCTASNAQGQVSCSWTLQVER", "G"],
  964. "original"=> ["VARSPLIC", "437", "506", "VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in isoform 2 and isoform 3B).", "/FTId=VSP_004791."]},
  965. {"FTId"=>"VSP_004792",
  966. "From"=>1433,
  967. "diff"=>["DEVEVSD", "MKWRCQT"],
  968. "To"=>1439,
  969. "original"=> ["VARSPLIC", "1433", "1439", "DEVEVSD -> MKWRCQT (in isoform 3A, isoform 3B and isoform 4).", "/FTId=VSP_004792."],
  970. "Description"=>"DEVEVSD -> MKWRCQT (in isoform 3A, isoform 3B and isoform 4)."},
  971. {"FTId"=>"VSP_004793",
  972. "From"=>1473,
  973. "diff"=>[nil, nil],
  974. "To"=>1545,
  975. "original"=> ["VARSPLIC", "1473", "1545", "Missing (in isoform 4).", "/FTId=VSP_004793."], "Description"=>"Missing (in isoform 4)."},
  976. {"FTId"=>"VSP_004794",
  977. "From"=>1655,
  978. "diff"=>[nil, nil],
  979. "To"=>1705,
  980. "original"=> ["VARSPLIC", "1655", "1705", "Missing (in isoform 3A and isoform 3B).", "/FTId=VSP_004794."],
  981. "Description"=>"Missing (in isoform 3A and isoform 3B)."},
  982. {"FTId"=>"VSP_004795",
  983. "From"=>1790,
  984. "diff"=>[nil, nil],
  985. "To"=>1790,
  986. "original"=>["VARSPLIC", "1790", "1790", "Missing (in isoform Del-1790).", "/FTId=VSP_004795."],
  987. "Description"=>"Missing (in isoform Del-1790)."}],
  988. sp.ft['VARSPLIC'])
  989. end
  990. end
  991. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.10
  992. class TestSPTRSwissProtRel41_10 < Test::Unit::TestCase
  993. # Reference Comment (RC) line topics may span lines
  994. def test_RC_lines
  995. data = "RN [1]
  996. RC STRAIN=AZ.026, DC.005, GA.039, GA2181, IL.014, IN.018, KY.172, KY2.37,
  997. RC LA.013, MN.001, MNb027, MS.040, NY.016, OH.036, TN.173, TN2.38,
  998. RC UT.002, AL.012, AZ.180, MI.035, VA.015, and IL2.17;"
  999. sp = SPTR.new(data)
  1000. assert_equal([{"Text"=>"AZ.026", "Token"=>"STRAIN"},
  1001. {"Text"=>"DC.005", "Token"=>"STRAIN"},
  1002. {"Text"=>"GA.039", "Token"=>"STRAIN"},
  1003. {"Text"=>"GA2181", "Token"=>"STRAIN"},
  1004. {"Text"=>"IL.014", "Token"=>"STRAIN"},
  1005. {"Text"=>"IN.018", "Token"=>"STRAIN"},
  1006. {"Text"=>"KY.172", "Token"=>"STRAIN"},
  1007. {"Text"=>"KY2.37", "Token"=>"STRAIN"},
  1008. {"Text"=>"LA.013", "Token"=>"STRAIN"},
  1009. {"Text"=>"MN.001", "Token"=>"STRAIN"},
  1010. {"Text"=>"MNb027", "Token"=>"STRAIN"},
  1011. {"Text"=>"MS.040", "Token"=>"STRAIN"},
  1012. {"Text"=>"NY.016", "Token"=>"STRAIN"},
  1013. {"Text"=>"OH.036", "Token"=>"STRAIN"},
  1014. {"Text"=>"TN.173", "Token"=>"STRAIN"},
  1015. {"Text"=>"TN2.38", "Token"=>"STRAIN"},
  1016. {"Text"=>"UT.002", "Token"=>"STRAIN"},
  1017. {"Text"=>"AL.012", "Token"=>"STRAIN"},
  1018. {"Text"=>"AZ.180", "Token"=>"STRAIN"},
  1019. {"Text"=>"MI.035", "Token"=>"STRAIN"},
  1020. {"Text"=>"VA.015", "Token"=>"STRAIN"},
  1021. {"Text"=>"IL2.17", "Token"=>"STRAIN"}],
  1022. sp.ref.first['RC'])
  1023. end
  1024. end
  1025. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.20
  1026. class TestSPTRSwissProtRel41_20 < Test::Unit::TestCase
  1027. # Case and wording change for submissions to Swiss-Prot in reference location (RL) lines
  1028. def test_RL_lines
  1029. data = "RL Submitted (MAY-2002) to the SWISS-PROT data bank."
  1030. sp = SPTR.new(data)
  1031. assert_equal('',
  1032. sp.ref.first['RL'])
  1033. end
  1034. # New comment line (CC) topic ALLERGEN
  1035. def test_CC_allergen
  1036. data = "CC -!- ALLERGEN: Causes an allergic reaction in human. Binds IgE. It is a
  1037. CC partially heat-labile allergen that may cause both respiratory and
  1038. CC food-allergy symptoms in patients with the bird-egg syndrome."
  1039. sp = SPTR.new(data)
  1040. assert_equal(["Causes an allergic reaction in human. Binds IgE. It is a partially heat-labile allergen that may cause both respiratory and food-allergy symptoms in patients with the bird-egg syndrome."],
  1041. sp.cc("ALLERGEN"))
  1042. end
  1043. end
  1044. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel42.6
  1045. class TestSPTRSwissProtRel42_6 < Test::Unit::TestCase
  1046. # New comment line (CC) topic RNA EDITING
  1047. def test_CC_rna_editing
  1048. data = "CC -!- RNA EDITING: Modified_positions=393, 431, 452, 495."
  1049. sp = SPTR.new(data)
  1050. assert_equal({"Note"=>"",
  1051. "Modified_positions"=>['393', '431', '452', '495']},
  1052. sp.cc("RNA EDITING"))
  1053. data = "CC -!- RNA EDITING: Modified_positions=59, 78, 94, 98, 102, 121; Note=The
  1054. CC stop codon at position 121 is created by RNA editing. The nonsense
  1055. CC codon at position 59 is modified to a sense codon."
  1056. sp = SPTR.new(data)
  1057. assert_equal({"Note"=>"The stop codon at position 121 is created by RNA editing. The nonsense codon at position 59 is modified to a sense codon.",
  1058. "Modified_positions"=>['59', '78', '94', '98', '102', '121']},
  1059. sp.cc("RNA EDITING"))
  1060. data = "CC -!- RNA EDITING: Modified_positions=Not_applicable; Note=Some
  1061. CC positions are modified by RNA editing via nucleotide insertion or
  1062. CC deletion. The initiator methionine is created by RNA editing."
  1063. sp = SPTR.new(data)
  1064. assert_equal({'Modified_positions' => ['Not_applicable'],
  1065. 'Note' => "Some positions are modified by RNA editing via nucleotide insertion or deletion. The initiator methionine is created by RNA editing."},
  1066. sp.cc("RNA EDITING"))
  1067. end
  1068. end
  1069. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel1_12
  1070. class TestSPTRUniProtRel1_12 < Test::Unit::TestCase
  1071. # Digital Object Identifier (DOI) in the RX line
  1072. def test_DOI_in_RX_line
  1073. # RX [MEDLINE=Medline_identifier; ][PubMed=Pubmed_identifier; ][DOI=Digital_object_identifier;]
  1074. data = "
  1075. RN [1]
  1076. RX MEDLINE=97291283; PubMed=9145897; DOI=10.1007/s00248-002-2038-4;"
  1077. sp = SPTR.new(data)
  1078. assert_equal({'MEDLINE' => '97291283',
  1079. 'PubMed' => '9145897',
  1080. 'DOI' => '10.1007/s00248-002-2038-4'},
  1081. sp.ref.first['RX'])
  1082. end
  1083. # New line type: RG (Reference Group)
  1084. def test_RG_line
  1085. data = "
  1086. RN [1]
  1087. RG The C. elegans sequencing consortium;
  1088. RG The Brazilian network for HIV isolation and characterization;"
  1089. sp = SPTR.new(data)
  1090. assert_equal(['The C. elegans sequencing consortium',
  1091. 'The Brazilian network for HIV isolation and characterization'],
  1092. sp.ref.first['RG'])
  1093. end
  1094. end
  1095. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_0
  1096. class TestSPTRUniProtRel2_0 < Test::Unit::TestCase
  1097. # New format for the GN (Gene Name) line
  1098. # GN Name=<name>; Synonyms=<name1>[, <name2>...]; OrderedLocusNames=<name1>[, <name2>...];
  1099. # xsGN ORFNames=<name1>[, <name2>...];
  1100. def test_GN_line
  1101. data = "GN Name=atpG; Synonyms=uncG, papC;
  1102. GN OrderedLocusNames=b3733, c4659, z5231, ECs4675, SF3813, S3955;"
  1103. sp = SPTR.new(data)
  1104. assert_equal([{:orfs => [],
  1105. :loci => ["b3733", "c4659", "z5231", "ECs4675", "SF3813", "S3955"],
  1106. :name => "atpG",
  1107. :synonyms => ["uncG", "papC"]}],
  1108. sp.gn)
  1109. data = "GN ORFNames=SPAC1834.11c;"
  1110. sp = SPTR.new(data)
  1111. assert_equal([{:orfs => ['SPAC1834.11c'],
  1112. :loci => [],
  1113. :name => '',
  1114. :synonyms => []}],
  1115. sp.gn)
  1116. data = "GN Name=cysA1; Synonyms=cysA; OrderedLocusNames=Rv3117, MT3199;
  1117. GN ORFNames=MTCY164.27;
  1118. GN and
  1119. GN Name=cysA2; OrderedLocusNames=Rv0815c, MT0837; ORFNames=MTV043.07c;"
  1120. sp = SPTR.new(data)
  1121. assert_equal([{:orfs => ["MTCY164.27"],
  1122. :loci => ["Rv3117", "MT3199"],
  1123. :name => "cysA1",
  1124. :synonyms => ["cysA"]},
  1125. {:orfs => ["MTV043.07c"],
  1126. :loci => ["Rv0815c", "MT0837"],
  1127. :name => "cysA2",
  1128. :synonyms => []}],
  1129. sp.gn)
  1130. end
  1131. end
  1132. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_1
  1133. class TestSPTRUniProtRel2_1 < Test::Unit::TestCase
  1134. # Format change in the comment line (CC) topic: MASS SPECTROMETRY
  1135. def test_CC_mass_spectrometry
  1136. data = "CC -!- MASS SPECTROMETRY: MW=32875.93; METHOD=MALDI;
  1137. CC RANGE=1-284 (Isoform 3); NOTE=Ref.6."
  1138. sp = SPTR.new(data)
  1139. assert_equal([{"RANGE"=>"1-284",
  1140. "METHOD"=>"MALDI",
  1141. "MW_ERR"=>nil,
  1142. "NOTE"=>"Ref.6",
  1143. "MW"=>"32875.93"}],
  1144. sp.cc("MASS SPECTROMETRY"))
  1145. end
  1146. end
  1147. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_3
  1148. class TestSPTRUniProtRel2_3 < Test::Unit::TestCase
  1149. # New RL line structure for electronic publications
  1150. def test_RL_line
  1151. data = "RL Submitted (XXX-YYYY) to the HIV data bank."
  1152. sp = SPTR.new(data)
  1153. assert_equal('',
  1154. sp.ref.first['RL'])
  1155. end
  1156. # Format change in the cross-reference to PDB
  1157. def test_DR_PDB
  1158. data = "DR PDB; 1NB3; X-ray; A/B/C/D=116-335, P/R/S/T=98-105."
  1159. sp = SPTR.new(data)
  1160. assert_equal([["1NB3", "X-ray", "A/B/C/D=116-335, P/R/S/T=98-105"]],
  1161. sp.dr['PDB'])
  1162. end
  1163. end
  1164. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_4
  1165. class TestSPTRUniProtRel3_4 < Test::Unit::TestCase
  1166. # Changes in the RP (Reference Position) line
  1167. def test_RP_line
  1168. data = "
  1169. RN [1]
  1170. RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1), PROTEIN SEQUENCE
  1171. RP OF 108-131; 220-231 AND 349-393, CHARACTERIZATION, AND MUTAGENESIS OF
  1172. RP ARG-336."
  1173. sp = SPTR.new(data)
  1174. assert_equal(['NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1)',
  1175. 'PROTEIN SEQUENCE OF 108-131; 220-231 AND 349-393',
  1176. 'CHARACTERIZATION',
  1177. 'MUTAGENESIS OF ARG-336'],
  1178. sp.ref.first['RP'])
  1179. data = "
  1180. RN [1]
  1181. RP NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]."
  1182. sp = SPTR.new(data)
  1183. assert_equal(['NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]'],
  1184. sp.ref.first['RP'])
  1185. end
  1186. # New comment line (CC) topic: BIOPHYSICOCHEMICAL PROPERTIES
  1187. def test_CC_biophysiochemical_properties
  1188. data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
  1189. CC Absorption:
  1190. CC Abs(max)=395 nm;
  1191. CC Note=Exhibits a smaller absorbance peak at 470 nm. The
  1192. CC fluorescence emission spectrum peaks at 509 nm with a shoulder
  1193. CC at 540 nm;"
  1194. sp = SPTR.new(data)
  1195. assert_equal({"Redox potential" => "",
  1196. "Temperature dependence" => "",
  1197. "Kinetic parameters" => {},
  1198. "Absorption" => {"Note" => "Exhibits a smaller absorbance peak at 470 nm. The fluorescence emission spectrum peaks at 509 nm with a shoulder at 540 nm",
  1199. "Abs(max)" => "395 nm"},
  1200. "pH dependence" => ""},
  1201. sp.cc("BIOPHYSICOCHEMICAL PROPERTIES"))
  1202. data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
  1203. CC Kinetic parameters:
  1204. CC KM=62 mM for glucose;
  1205. CC KM=90 mM for maltose;
  1206. CC Vmax=0.20 mmol/min/mg enzyme with glucose as substrate;
  1207. CC Vmax=0.11 mmol/min/mg enzyme with maltose as substrate;
  1208. CC Note=Acetylates glucose, maltose, mannose, galactose, and
  1209. CC fructose with a decreasing relative rate of 1, 0.55, 0.20, 0.07,
  1210. CC 0.04;"
  1211. sp = SPTR.new(data)
  1212. assert_equal({"Redox potential" => "",
  1213. "Temperature dependence" => "",
  1214. "Kinetic parameters" => {"KM" => "62 mM for glucose; KM=90 mM for maltose",
  1215. "Note" => "Acetylates glucose, maltose, mannose, galactose, and fructose with a decreasing relative rate of 1, 0.55, 0.20, 0.07, 0.04",
  1216. "Vmax" => "0.20 mmol/min/mg enzyme with glucose as substrate"},
  1217. "Absorption" => {},
  1218. "pH dependence" => ""},
  1219. sp.cc("BIOPHYSICOCHEMICAL PROPERTIES"))
  1220. data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
  1221. CC Kinetic parameters:
  1222. CC KM=1.76 uM for chlorophyll;
  1223. CC pH dependence:
  1224. CC Optimum pH is 7.5. Active from pH 5.0 to 9.0;
  1225. CC Temperature dependence:
  1226. CC Optimum temperature is 45 degrees Celsius. Active from 30 to 60
  1227. CC degrees Celsius;"
  1228. sp = SPTR.new(data)
  1229. assert_equal({"Redox potential" => "",
  1230. "Temperature dependence" => "Optimum temperature is 45 degrees Celsius. Active from 30 to 60 degrees Celsius",
  1231. "Kinetic parameters" => {},
  1232. "Absorption" => {},
  1233. "pH dependence" => "Optimum pH is 7.5. Active from pH 5.0 to 9.0"},
  1234. sp.cc("BIOPHYSICOCHEMICAL PROPERTIES"))
  1235. end
  1236. end
  1237. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_5
  1238. class TestSPTRUniProtRel3_5 < Test::Unit::TestCase
  1239. # Extension of the Swiss-Prot entry name format
  1240. def test_entry_name_format
  1241. # TBD
  1242. end
  1243. end
  1244. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel4_0
  1245. class TestSPTRUniProtRel4_0 < Test::Unit::TestCase
  1246. # Extension of the TrEMBL entry name format
  1247. # Change of the entry name in many Swiss-Prot entries
  1248. # New comment line (CC) topic: INTERACTION
  1249. def test_CC_interaction
  1250. data = "CC -!- INTERACTION:
  1251. CC P11450:fcp3c; NbExp=1; IntAct=EBI-126914, EBI-159556;"
  1252. sp = SPTR.new(data)
  1253. assert_equal([{"SP_Ac" => "P11450",
  1254. "identifier" => "fcp3c",
  1255. "optional_identifier" => nil,
  1256. "NbExp" => "1",
  1257. "IntAct" => ["EBI-126914", "EBI-159556"]}],
  1258. sp.cc("INTERACTION"))
  1259. end
  1260. def test_CC_interaction_isoform
  1261. data = "CC -!- INTERACTION:
  1262. CC Q9W1K5-1:cg11299; NbExp=1; IntAct=EBI-133844, EBI-212772;"
  1263. sp = SPTR.new(data)
  1264. assert_equal([{"SP_Ac" => 'Q9W1K5-1',
  1265. "identifier" => 'cg11299',
  1266. "optional_identifier" => nil,
  1267. "NbExp" => "1",
  1268. "IntAct" => ["EBI-133844", "EBI-212772"]}],
  1269. sp.cc("INTERACTION"))
  1270. end
  1271. def test_CC_interaction_no_gene_name
  1272. data = "CC -!- INTERACTION:
  1273. CC Q8NI08:-; NbExp=1; IntAct=EBI-80809, EBI-80799;"
  1274. sp = SPTR.new(data)
  1275. assert_equal([{"SP_Ac" => 'Q8NI08',
  1276. "identifier" => '-',
  1277. "optional_identifier" => nil,
  1278. "NbExp" => "1",
  1279. "IntAct" => ["EBI-80809", "EBI-80799"]}],
  1280. sp.cc("INTERACTION"))
  1281. end
  1282. def test_CC_interaction_self_association
  1283. data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
  1284. CC -!- INTERACTION:
  1285. CC Self; NbExp=1; IntAct=EBI-123485, EBI-123485;"
  1286. sp = SPTR.new(data)
  1287. assert_equal([{"SP_Ac" => 'TEST_ENTRY',
  1288. "identifier" => 'TEST_ENTRY',
  1289. "optional_identifier" => nil,
  1290. "NbExp" => "1",
  1291. "IntAct" => ["EBI-123485", "EBI-123485"]}],
  1292. sp.cc("INTERACTION"))
  1293. end
  1294. def test_CC_interaction_The_source_organisms_of_the_interacting_proteins_are_different
  1295. data = "CC -!- INTERACTION:
  1296. CC Q8C1S0:2410018m14rik (xeno); NbExp=1; IntAct=EBI-394562, EBI-398761;"
  1297. sp = SPTR.new(data)
  1298. assert_equal([{"SP_Ac" => 'Q8C1S0',
  1299. "identifier" => '2410018m14rik',
  1300. "optional_identifier" => '(xeno)',
  1301. "NbExp" => "1",
  1302. "IntAct" => ["EBI-394562", "EBI-398761"]}],
  1303. sp.cc("INTERACTION"))
  1304. end
  1305. def test_CC_interaction_Different_isoforms_of_the_current_protein_are_shown_to_interact_with_the_same_protein
  1306. data = "CC -!- INTERACTION:
  1307. CC P51617:irak1; NbExp=1; IntAct=EBI-448466, EBI-358664;
  1308. CC P51617:irak1; NbExp=1; IntAct=EBI-448472, EBI-358664;"
  1309. sp = SPTR.new(data)
  1310. assert_equal([{"SP_Ac" => "P51617",
  1311. "identifier" => "irak1",
  1312. "optional_identifier" => nil,
  1313. "NbExp" => "1",
  1314. "IntAct" => ["EBI-448466", "EBI-358664"]},
  1315. {"SP_Ac" => "P51617",
  1316. "identifier" => "irak1",
  1317. "optional_identifier" => nil,
  1318. "NbExp" => "1",
  1319. "IntAct" => ["EBI-448472", "EBI-358664"]}],
  1320. sp.cc("INTERACTION"))
  1321. end
  1322. end
  1323. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_0
  1324. class TestSPTRUniProtRel5_0 < Test::Unit::TestCase
  1325. # Format change in the DR line
  1326. # DR DATABASE_IDENTIFIER; PRIMARY_IDENTIFIER; SECONDARY_IDENTIFIER[; TERTIARY_IDENTIFIER][; QUATERNARY_IDENTIFIER].
  1327. def test_DR_line
  1328. data = "
  1329. DR EMBL; M68939; AAA26107.1; -; Genomic_DNA.
  1330. DR EMBL; U56386; AAB72034.1; -; mRNA."
  1331. sp = SPTR.new(data)
  1332. assert_equal([["M68939", "AAA26107.1", "-", "Genomic_DNA"],
  1333. ["U56386", "AAB72034.1", "-", "mRNA"]],
  1334. sp.dr['EMBL'])
  1335. assert_equal([{" "=>"-",
  1336. "Version"=>"AAA26107.1",
  1337. "Accession"=>"M68939",
  1338. "Molecular Type"=>"Genomic_DNA"},
  1339. {" "=>"-",
  1340. "Version"=>"AAB72034.1",
  1341. "Accession"=>"U56386",
  1342. "Molecular Type"=>"mRNA"}],
  1343. sp.dr('EMBL'))
  1344. end
  1345. # New feature (FT) keys and redefinition of existing FT keys
  1346. end
  1347. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_4
  1348. class TestSPTRUniProtRel5_4 < Test::Unit::TestCase
  1349. # Multiple comment line (CC) topics COFACTOR
  1350. def test_multiple_cofactors
  1351. data = "CC -!- COFACTOR: Binds 1 2Fe-2S cluster per subunit (By similarity).
  1352. CC -!- COFACTOR: Binds 1 Fe(2+) ion per subunit (By similarity)."
  1353. sp = SPTR.new(data)
  1354. assert_equal(["Binds 1 2Fe-2S cluster per subunit (By similarity).",
  1355. "Binds 1 Fe(2+) ion per subunit (By similarity)."],
  1356. sp.cc['COFACTOR'])
  1357. assert_equal(["Binds 1 2Fe-2S cluster per subunit (By similarity).",
  1358. "Binds 1 Fe(2+) ion per subunit (By similarity)."],
  1359. sp.cc('COFACTOR'))
  1360. end
  1361. end
  1362. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_0
  1363. class TestSPTRUniProtRel6_0 < Test::Unit::TestCase
  1364. # Changes in the OG (OrGanelle) line
  1365. def test_OG_line
  1366. data = "OG Plastid."
  1367. sp = SPTR.new(data)
  1368. assert_equal(['Plastid'], sp.og)
  1369. data = "OG Plastid; Apicoplast."
  1370. sp = SPTR.new(data)
  1371. assert_equal(['Plastid', 'Apicoplast'], sp.og)
  1372. data = "OG Plastid; Chloroplast."
  1373. sp = SPTR.new(data)
  1374. assert_equal(['Plastid', 'Chloroplast'], sp.og)
  1375. data = "OG Plastid; Cyanelle."
  1376. sp = SPTR.new(data)
  1377. assert_equal(['Plastid', 'Cyanelle'], sp.og)
  1378. data = "OG Plastid; Non-photosynthetic plastid."
  1379. sp = SPTR.new(data)
  1380. assert_equal(['Plastid', 'Non-photosynthetic plastid'], sp.og)
  1381. end
  1382. end
  1383. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_1
  1384. class TestSPTRUniProtRel6_1 < Test::Unit::TestCase
  1385. # Annotation changes concerning the feature key METAL
  1386. def test_FT_metal
  1387. old_data = "FT METAL 61 61 Copper and zinc."
  1388. sp = SPTR.new(old_data)
  1389. assert_equal([{'From' => 61,
  1390. 'To' => 61,
  1391. 'Description' => 'Copper and zinc.',
  1392. 'FTId' =>'',
  1393. 'diff' => [],
  1394. 'original' => ["METAL", "61", "61", "Copper and zinc.", ""]}],
  1395. sp.ft['METAL'])
  1396. new_data = "FT METAL 61 61 Copper.
  1397. FT METAL 61 61 Zinc."
  1398. sp = SPTR.new(new_data)
  1399. assert_equal([{"From" => 61,
  1400. "To" => 61,
  1401. "Description" => "Copper.",
  1402. "FTId" => "",
  1403. "diff" => [],
  1404. "original" => ["METAL", "61", "61", "Copper.", ""]},
  1405. {"From" => 61,
  1406. "To" => 61,
  1407. "Description" => "Zinc.",
  1408. "FTId" => "",
  1409. "diff" => [],
  1410. "original" => ["METAL", "61", "61", "Zinc.", ""]}],
  1411. sp.ft['METAL'])
  1412. end
  1413. end
  1414. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_5
  1415. class TestSPTRUniProtRel6_5 < Test::Unit::TestCase
  1416. # Changes in the keywlist.txt file
  1417. # * Modification of the HI line format:
  1418. def test_HI_line
  1419. # HI Category: Keyword_1; ...; Keyword_n; Described_Keyword.
  1420. # The first term listed in an HI line is a category. It is followed by a hierarchical list of keywords of that category and ends with the described keyword. There can be more than one HI line of the same category in one keyword entry.
  1421. data = "HI Molecular function: Ionic channel; Calcium channel.
  1422. HI Biological process: Transport; Ion transport; Calcium transport; Calcium channel.
  1423. HI Ligand: Calcium; Calcium channel."
  1424. sp = SPTR.new(data)
  1425. assert_equal([{'Category' => 'Molecular function',
  1426. 'Keywords' => ['Ionic channel'],
  1427. 'Keyword' => 'Calcium channel'},
  1428. {'Category' => 'Biological process',
  1429. 'Keywords' => ['Transport', 'Ion transport', 'Calcium transport'],
  1430. 'Keyword' => 'Calcium channel'},
  1431. {'Category' => 'Ligand',
  1432. 'Keywords' => ['Calcium'],
  1433. 'Keyword' => 'Calcium channel'}],
  1434. sp.hi)
  1435. end
  1436. end
  1437. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.0
  1438. class TestSPTRUniProtRel7_0 < Test::Unit::TestCase
  1439. # Changes concerning dates and versions numbers (DT lines)
  1440. def test_DT_line
  1441. up_sp_data = "DT 01-JAN-1998, integrated into UniProtKB/Swiss-Prot.
  1442. DT 15-OCT-2001, sequence version 3.
  1443. DT 01-APR-2004, entry version 14."
  1444. sp = SPTR.new(up_sp_data)
  1445. assert_equal({"sequence" => "15-OCT-2001, sequence version 3.",
  1446. "annotation" => "01-APR-2004, entry version 14.",
  1447. "created" => "01-JAN-1998, integrated into UniProtKB/Swiss-Prot."},
  1448. sp.dt)
  1449. up_tr_data = "DT 01-FEB-1999, integrated into UniProtKB/TrEMBL.
  1450. DT 15-OCT-2000, sequence version 2.
  1451. DT 15-DEC-2004, entry version 5."
  1452. sp = SPTR.new(up_tr_data)
  1453. assert_equal({"sequence" => "15-OCT-2000, sequence version 2.",
  1454. "annotation" => "15-DEC-2004, entry version 5.",
  1455. "created" => "01-FEB-1999, integrated into UniProtKB/TrEMBL."},
  1456. sp.dt)
  1457. end
  1458. # Addition of a feature (FT) key CHAIN over the whole sequence length
  1459. # Changes concerning the copyright statement
  1460. def test_CC_copyright_statement
  1461. data = "CC -----------------------------------------------------------------------
  1462. CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
  1463. CC Distributed under the Creative Commons Attribution-NoDerivs License
  1464. CC -----------------------------------------------------------------------"
  1465. sp = SPTR.new(data)
  1466. assert_equal({}, sp.cc)
  1467. end
  1468. end
  1469. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.6
  1470. class TestSPTRUniProtRel7_6 < Test::Unit::TestCase
  1471. # Sequences with over 10000 amino acids in UniProtKB/Swiss-Prot
  1472. def test_10000aa
  1473. entry_id = 'Q09165'
  1474. data = ["SQ SEQUENCE 393 AA; 43653 MW; AD5C149FD8106131 CRC64;\n",
  1475. " MEEPQSDPSV EPPLSQETFS DLWKLLPENN VLSPLPSQAM DDLMLSPDDI EQWFTEDPGP\n" * 200,
  1476. "//\n"].join
  1477. sp = SPTR.new(data)
  1478. assert(12000, sp.seq.size)
  1479. end
  1480. end
  1481. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel8.0
  1482. class TestSPTRUniProtRel8_0 < Test::Unit::TestCase
  1483. # Replacement of the feature key VARSPLIC by VAR_SEQ
  1484. def test_FT_VER_SEQ
  1485. data = "FT VAR_SEQ 1 34 Missing (in isoform 3).
  1486. FT /FTId=VSP_004099."
  1487. sp = SPTR.new(data)
  1488. res = [{'From' => 1,
  1489. 'To' => 34,
  1490. 'Description' => 'Missing (in isoform 3).',
  1491. 'diff' => ['', nil],
  1492. 'FTId' => 'VSP_004099',
  1493. 'original' => ["VAR_SEQ", "1", "34", "Missing (in isoform 3).",
  1494. "/FTId=VSP_004099."]}]
  1495. assert_equal(res, sp.ft('VAR_SEQ'))
  1496. end
  1497. # Syntax modification of the comment line (CC) topic ALTERNATIVE PRODUCTS
  1498. def test_CC_alternative_products
  1499. # CC -!- ALTERNATIVE PRODUCTS:
  1500. # CC Event=Event(, Event)*; Named isoforms=Number_of_isoforms;
  1501. # (CC Comment=Free_text;)?
  1502. # (CC Name=Isoform_name;( Synonyms=Synonym(, Synonym)*;)?
  1503. # CC IsoId=Isoform_identifier(, Isoform_identifer)*;
  1504. # CC Sequence=(Displayed|External|Not described|Feature_identifier(, Feature_identifier)*);
  1505. # (CC Note=Free_text;)?)+
  1506. # Note: Variable values are represented in italics. Perl-style multipliers indicate whether a pattern (as delimited by parentheses) is optional (?), may occur 0 or more times (*), or 1 or more times (+). Alternative values are separated by a pipe symbol (|).
  1507. data = "CC -!- ALTERNATIVE PRODUCTS:
  1508. CC Event=Alternative splicing, Alternative initiation; Named isoforms=3;
  1509. CC Comment=Isoform 1 and isoform 2 arise due to the use of two
  1510. CC alternative first exons joined to a common exon 2 at the same
  1511. CC acceptor site but in different reading frames, resulting in two
  1512. CC completely different isoforms;
  1513. CC Name=1; Synonyms=p16INK4a;
  1514. CC IsoId=O77617-1; Sequence=Displayed;
  1515. CC Name=3;
  1516. CC IsoId=O77617-2; Sequence=VSP_004099;
  1517. CC Note=Produced by alternative initiation at Met-35 of isoform 1;
  1518. CC Name=2; Synonyms=p19ARF;
  1519. CC IsoId=O77618-1; Sequence=External;
  1520. FT VAR_SEQ 1 34 Missing (in isoform 3).
  1521. FT /FTId=VSP_004099."
  1522. sp = SPTR.new(data)
  1523. assert_equal({"Comment" => "Isoform 1 and isoform 2 arise due to the use of two alternative first exons joined to a common exon 2 at the same acceptor site but in different reading frames, resulting in two completely different isoforms",
  1524. "Named isoforms" => "3",
  1525. "Variants" => [{"IsoId" => ["O77617-1"],
  1526. "Name" => "1",
  1527. "Synonyms" => ["p16INK4a"],
  1528. "Sequence" => ["Displayed"]},
  1529. {"IsoId" => ["O77617-2"],
  1530. "Name" => "3",
  1531. "Synonyms" => [],
  1532. "Sequence" => ["VSP_004099"]},
  1533. {"IsoId" => ["O77618-1"],
  1534. "Name" => "2",
  1535. "Synonyms" => ["p19ARF"],
  1536. "Sequence" => ["External"]}],
  1537. "Event" => ["Alternative splicing", "Alternative initiation"]},
  1538. sp.cc("ALTERNATIVE PRODUCTS"))
  1539. assert_equal([{"From" => 1,
  1540. "To" => 34,
  1541. "Description"=>"Missing (in isoform 3).",
  1542. "FTId" => "VSP_004099",
  1543. "diff" => ["", nil],
  1544. "original"=> ["VAR_SEQ", "1", "34",
  1545. "Missing (in isoform 3).", "/FTId=VSP_004099."]}],
  1546. sp.ft("VAR_SEQ"))
  1547. end
  1548. # Replacement of the comment line (CC) topic DATABASE by WEB RESOURCE
  1549. def test_CC_web_resource
  1550. # CC -!- DATABASE: NAME=ResourceName[; NOTE=FreeText][; WWW=WWWAddress][; FTP=FTPAddress].
  1551. # CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText]; URL=WWWAddress.
  1552. # The length of these lines may exceed 75 characters because long URL addresses are not wrapped into multiple lines.
  1553. assert(true)
  1554. end
  1555. # Introduction of the new line type OH (Organism Host) for viral hosts
  1556. def test_OH_lines
  1557. data = 'OS Tomato black ring virus (strain E) (TBRV).
  1558. OC Viruses; ssRNA positive-strand viruses, no DNA stage; Comoviridae;
  1559. OC Nepovirus; Subgroup B.
  1560. OX NCBI_TaxID=12277;
  1561. OH NCBI_TaxID=4681; Allium porrum (Leek).
  1562. OH NCBI_TaxID=4045; Apium graveolens (Celery).
  1563. OH NCBI_TaxID=161934; Beta vulgaris (Sugar beet).
  1564. OH NCBI_TaxID=38871; Fraxinus (ash trees).
  1565. OH NCBI_TaxID=4236; Lactuca sativa (Garden lettuce).
  1566. OH NCBI_TaxID=4081; Lycopersicon esculentum (Tomato).
  1567. OH NCBI_TaxID=39639; Narcissus pseudonarcissus (Daffodil).
  1568. OH NCBI_TaxID=3885; Phaseolus vulgaris (Kidney bean) (French bean).
  1569. OH NCBI_TaxID=35938; Robinia pseudoacacia (Black locust).
  1570. OH NCBI_TaxID=23216; Rubus (bramble).
  1571. OH NCBI_TaxID=4113; Solanum tuberosum (Potato).
  1572. OH NCBI_TaxID=13305; Tulipa.
  1573. OH NCBI_TaxID=3603; Vitis.'
  1574. res = [{'NCBI_TaxID' => '4681', 'HostName' => 'Allium porrum (Leek)'},
  1575. {'NCBI_TaxID' => '4045', 'HostName' => 'Apium graveolens (Celery)'},
  1576. {'NCBI_TaxID' => '161934', 'HostName' => 'Beta vulgaris (Sugar beet)'},
  1577. {'NCBI_TaxID' => '38871', 'HostName' => 'Fraxinus (ash trees)'},
  1578. {'NCBI_TaxID' => '4236', 'HostName' => 'Lactuca sativa (Garden lettuce)'},
  1579. {'NCBI_TaxID' => '4081', 'HostName' => 'Lycopersicon esculentum (Tomato)'},
  1580. {'NCBI_TaxID' => '39639', 'HostName' => 'Narcissus pseudonarcissus (Daffodil)'},
  1581. {'NCBI_TaxID' => '3885',
  1582. 'HostName' => 'Phaseolus vulgaris (Kidney bean) (French bean)'},
  1583. {'NCBI_TaxID' => '35938', 'HostName' => 'Robinia pseudoacacia (Black locust)'},
  1584. {'NCBI_TaxID' => '23216', 'HostName' => 'Rubus (bramble)'},
  1585. {'NCBI_TaxID' => '4113', 'HostName' => 'Solanum tuberosum (Potato)'},
  1586. {'NCBI_TaxID' => '13305', 'HostName' => 'Tulipa'},
  1587. {'NCBI_TaxID' => '3603', 'HostName' => 'Vitis'}]
  1588. sp = SPTR.new(data)
  1589. assert_equal(res, sp.oh)
  1590. end
  1591. def test_OH_line_exception
  1592. data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
  1593. OH NCBI_TaxID=23216x: Rubus (bramble)."
  1594. sp = SPTR.new(data)
  1595. assert_raise(ArgumentError) { sp.oh }
  1596. end
  1597. end
  1598. class TestOSLine < Test::Unit::TestCase
  1599. def test_uncapitalized_letter_Q32725_9POAL
  1600. data = "OS unknown cyperaceous sp.\n"
  1601. sp = SPTR.new(data)
  1602. assert_equal('unknown cyperaceous sp.', sp.os.first['os'])
  1603. end
  1604. def test_period_trancation_O63147
  1605. data = "OS Hippotis sp. Clark and Watts 825.\n"
  1606. sp = SPTR.new(data)
  1607. assert_equal('Hippotis sp. Clark and Watts 825.', sp.os.first['os'])
  1608. end
  1609. end
  1610. end # module Bio