PageRenderTime 52ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/test/unit/bio/db/embl/test_sptr_rel201107.rb

https://github.com/phylogenomics/bioruby
Ruby | 1852 lines | 1540 code | 198 blank | 114 comment | 22 complexity | e5c09fbc9f5a3dea51063976603b3fa8 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1

Large files files are truncated, but you can click here to view the full file

  1. #
  2. # test/unit/bio/db/embl/test_sptr.rb - Unit test for Bio::SPTR
  3. #
  4. # Copyright::: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
  5. # 2011 The Regents of the University of California
  6. # License:: The Ruby License
  7. #
  8. # $Id:$
  9. #
  10. # loading helper routine for testing bioruby
  11. require 'pathname'
  12. load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
  13. 'bioruby_test_helper.rb')).cleanpath.to_s
  14. # libraries needed for the tests
  15. require 'test/unit'
  16. require 'bio/db/embl/sptr201107'
  17. module Bio
  18. class TestSPTR201107 < Test::Unit::TestCase
  19. def setup
  20. data = File.read(File.join(BioRubyTestDataPath,
  21. 'uniprot', 'p53_human_2011_07.uniprot'))
  22. @obj = Bio::SPTR201107.new(data)
  23. end
  24. def test_id_line
  25. assert(@obj.id_line)
  26. end
  27. def test_id_line_entry_name
  28. assert_equal('P53_HUMAN', @obj.id_line('ENTRY_NAME'))
  29. end
  30. def test_id_line_data_class
  31. assert_equal('Reviewed', @obj.id_line('DATA_CLASS'))
  32. end
  33. def test_id_line_sequence_length
  34. assert_equal(393, @obj.id_line('SEQUENCE_LENGTH'))
  35. end
  36. def test_entry
  37. entry = 'P53_HUMAN'
  38. assert_equal(entry, @obj.entry)
  39. assert_equal(entry, @obj.entry_name)
  40. assert_equal(entry, @obj.entry_id)
  41. end
  42. def test_sequence_length
  43. seqlen = 393
  44. assert_equal(seqlen, @obj.sequence_length)
  45. assert_equal(seqlen, @obj.aalen)
  46. end
  47. def test_ac
  48. acs = ["P04637", "Q15086", "Q15087", "Q15088", "Q16535", "Q16807",
  49. "Q16808", "Q16809", "Q16810", "Q16811", "Q16848", "Q2XN98",
  50. "Q3LRW1", "Q3LRW2", "Q3LRW3", "Q3LRW4", "Q3LRW5", "Q86UG1",
  51. "Q8J016", "Q99659", "Q9BTM4", "Q9HAQ8", "Q9NP68", "Q9NPJ2",
  52. "Q9NZD0", "Q9UBI2", "Q9UQ61"]
  53. assert_equal(acs, @obj.ac)
  54. assert_equal(acs, @obj.accessions)
  55. end
  56. def test_accession
  57. assert_equal('P04637', @obj.accession)
  58. end
  59. def test_dr
  60. assert_equal(48, @obj.dr.size)
  61. assert_equal(74, @obj.dr['GO'].size)
  62. assert_equal([["IPR008967", "p53-like_TF_DNA-bd"],
  63. ["IPR012346", "p53/RUNT-type_TF_DNA-bd"],
  64. ["IPR011615", "p53_DNA-bd"],
  65. ["IPR010991", "p53_tetrameristn"],
  66. ["IPR013872", "p53_transactivation_domain"],
  67. ["IPR002117", "p53_tumour_suppressor"]],
  68. @obj.dr['InterPro'])
  69. end
  70. def test_dr_with_key
  71. pfam = [
  72. { " " => "1",
  73. "Version" => "P53",
  74. "Accession" => "PF00870",
  75. "Molecular Type" => nil
  76. },
  77. { " " => "1",
  78. "Version" => "P53_TAD",
  79. "Accession" => "PF08563",
  80. "Molecular Type" => nil
  81. },
  82. { " " => "1",
  83. "Version" => "P53_tetramer",
  84. "Accession" => "PF07710",
  85. "Molecular Type" => nil
  86. }
  87. ]
  88. assert_equal(pfam, @obj.dr('Pfam'))
  89. embl3 = {
  90. " " => "JOINED",
  91. "Version" => "AAA59987.1",
  92. "Accession" => "M13113",
  93. "Molecular Type" => "Genomic_DNA"
  94. }
  95. assert_equal(embl3, @obj.dr('EMBL')[3])
  96. end
  97. def test_dr_with_key_empty
  98. assert_equal([], @obj.dr('NOT_A_DATABASE'))
  99. end
  100. def test_dt
  101. assert(@obj.dt)
  102. end
  103. def test_dt_created
  104. assert_equal('13-AUG-1987, integrated into UniProtKB/Swiss-Prot.', @obj.dt('created'))
  105. end
  106. def test_dt_sequence
  107. assert_equal('24-NOV-2009, sequence version 4.',
  108. @obj.dt('sequence'))
  109. end
  110. def test_dt_annotation
  111. assert_equal('31-MAY-2011, entry version 186.',
  112. @obj.dt('annotation'))
  113. end
  114. def test_de
  115. assert(@obj.de)
  116. end
  117. def test_protein_name
  118. assert_equal("Cellular tumor antigen p53", @obj.protein_name)
  119. end
  120. def test_synonyms
  121. ary = ["Antigen NY-CO-13", "Phosphoprotein p53", "Tumor suppressor p53"]
  122. assert_equal(ary, @obj.synonyms)
  123. end
  124. def test_gn
  125. assert_equal([{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}],
  126. @obj.gn)
  127. end
  128. def test_gn_uniprot_parser
  129. gn_uniprot_data = ''
  130. assert_equal([{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}],
  131. @obj.instance_eval("gn_uniprot_parser"))
  132. end
  133. def test_gn_old_parser
  134. gn_old_data = ''
  135. assert_equal([["Name=TP53; Synonyms=P53;"]],
  136. @obj.instance_eval("gn_old_parser"))
  137. end
  138. def test_gene_names
  139. assert_equal(["TP53"], @obj.gene_names)
  140. end
  141. def test_gene_name
  142. assert_equal('TP53', @obj.gene_name)
  143. end
  144. def test_os
  145. assert(@obj.os)
  146. end
  147. def test_os_access
  148. assert_equal("Homo sapiens (Human)", @obj.os(0))
  149. end
  150. def test_os_access2
  151. assert_equal({"name"=>"(Human)", "os"=>"Homo sapiens"}, @obj.os[0])
  152. end
  153. def test_og_1
  154. og = "OG Plastid; Chloroplast."
  155. ary = ['Plastid', 'Chloroplast']
  156. @obj.instance_eval("@orig['OG'] = '#{og}'")
  157. assert_equal(ary, @obj.og)
  158. end
  159. def test_og_2
  160. og = "OG Mitochondrion."
  161. ary = ['Mitochondrion']
  162. @obj.instance_eval("@orig['OG'] = '#{og}'")
  163. assert_equal(ary, @obj.og)
  164. end
  165. def test_og_3
  166. og = "OG Plasmid sym pNGR234a."
  167. ary = ["Plasmid sym pNGR234a"]
  168. @obj.instance_eval("@orig['OG'] = '#{og}'")
  169. assert_equal(ary, @obj.og)
  170. end
  171. def test_og_4
  172. og = "OG Plastid; Cyanelle."
  173. ary = ['Plastid', 'Cyanelle']
  174. @obj.instance_eval("@orig['OG'] = '#{og}'")
  175. assert_equal(ary, @obj.og)
  176. end
  177. def test_og_5
  178. og = "OG Plasmid pSymA (megaplasmid 1)."
  179. ary = ["Plasmid pSymA (megaplasmid 1)"]
  180. @obj.instance_eval("@orig['OG'] = '#{og}'")
  181. assert_equal(ary, @obj.og)
  182. end
  183. def test_og_6
  184. og = "OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1."
  185. ary = ['Plasmid pNRC100', 'Plasmid pNRC200', 'Plasmid pHH1']
  186. @obj.instance_eval("@orig['OG'] = '#{og}'")
  187. assert_equal(ary, @obj.og)
  188. end
  189. def test_oc
  190. assert_equal(["Eukaryota", "Metazoa", "Chordata", "Craniata",
  191. "Vertebrata", "Euteleostomi", "Mammalia", "Eutheria",
  192. "Euarchontoglires", "Primates", "Haplorrhini", "Catarrhini",
  193. "Hominidae", "Homo"],
  194. @obj.oc)
  195. end
  196. def test_ox
  197. assert_equal({"NCBI_TaxID"=>["9606"]}, @obj.ox)
  198. end
  199. def test_ref # Bio::SPTR#ref
  200. assert_equal(Array, @obj.ref.class)
  201. end
  202. def test_cc
  203. assert_equal(Hash, @obj.cc.class)
  204. end
  205. def test_cc_database
  206. wr = [{"NAME"=>"IARC TP53 mutation database",
  207. "NOTE"=>"Somatic and germline TP53 mutations in human cancers",
  208. "URL"=>"http://www-p53.iarc.fr/"},
  209. {"NAME"=>"p53 web site at the Institut Curie",
  210. "NOTE"=>nil,
  211. "URL"=>"http://p53.free.fr/"},
  212. {"NAME"=>"Atlas of Genetics and Cytogenetics in Oncology and Haematology",
  213. "NOTE"=>nil,
  214. "URL"=>"http://atlasgeneticsoncology.org/Genes/P53ID88.html"},
  215. {"NAME"=>"GeneReviews",
  216. "NOTE"=>nil,
  217. "URL"=>"http://www.ncbi.nlm.nih.gov/sites/GeneTests/lab/gene/TP53"},
  218. {"NAME"=>"NIEHS-SNPs",
  219. "NOTE"=>nil,
  220. "URL"=>"http://egp.gs.washington.edu/data/tp53/"},
  221. {"NAME"=>"SHMPD",
  222. "NOTE"=>"The Singapore human mutation and polymorphism database",
  223. "URL"=>"http://shmpd.bii.a-star.edu.sg/gene.php?genestart=A&genename=TP53"},
  224. {"NAME"=>"Wikipedia",
  225. "NOTE"=>"P53 entry",
  226. "URL"=>"http://en.wikipedia.org/wiki/P53"}]
  227. assert_equal(wr, @obj.cc('WEB RESOURCE'))
  228. end
  229. def test_cc_alternative_products
  230. ap = {"Event"=>["Alternative promoter usage", "Alternative splicing"],
  231. "Named isoforms"=>"9",
  232. "Comment"=>"",
  233. "Variants"=>
  234. [{"Name"=>"1",
  235. "Synonyms"=>["p53", "p53alpha"],
  236. "IsoId"=>["P04637-1"],
  237. "Sequence"=>["Displayed"]},
  238. {"Name"=>"2",
  239. "Synonyms"=>["I9RET", "p53beta"],
  240. "IsoId"=>["P04637-2"],
  241. "Sequence"=>["VSP_006535", "VSP_006536"]},
  242. {"Name"=>"3",
  243. "Synonyms"=>["p53gamma"],
  244. "IsoId"=>["P04637-3"],
  245. "Sequence"=>["VSP_040560", "VSP_040561"]},
  246. {"Name"=>"4",
  247. "Synonyms"=>["Del40-p53", "Del40-p53alpha", "p47"],
  248. "IsoId"=>["P04637-4"],
  249. "Sequence"=>["VSP_040832"]},
  250. {"Name"=>"5",
  251. "Synonyms"=>["Del40-p53beta"],
  252. "IsoId"=>["P04637-5"],
  253. "Sequence"=>["VSP_040832", "VSP_006535", "VSP_006536"]},
  254. {"Name"=>"6",
  255. "Synonyms"=>["Del40-p53gamma"],
  256. "IsoId"=>["P04637-6"],
  257. "Sequence"=>["VSP_040832", "VSP_040560", "VSP_040561"]},
  258. {"Name"=>"7",
  259. "Synonyms"=>["Del133-p53", "Del133-p53alpha"],
  260. "IsoId"=>["P04637-7"],
  261. "Sequence"=>["VSP_040833"]},
  262. {"Name"=>"8",
  263. "Synonyms"=>["Del133-p53beta"],
  264. "IsoId"=>["P04637-8"],
  265. "Sequence"=>["VSP_040833", "VSP_006535", "VSP_006536"]},
  266. {"Name"=>"9",
  267. "Synonyms"=>["Del133-p53gamma"],
  268. "IsoId"=>["P04637-9"],
  269. "Sequence"=>["VSP_040833", "VSP_040560", "VSP_040561"]}]}
  270. assert_equal(ap, @obj.cc('ALTERNATIVE PRODUCTS'))
  271. end
  272. def test_cc_mass_spectrometry
  273. assert_equal(nil, @obj.cc('MASS SPECTROMETRY'))
  274. end
  275. def test_kw
  276. keywords = ["3D-structure", "Acetylation", "Activator",
  277. "Alternative promoter usage", "Alternative splicing", "Apoptosis",
  278. "Cell cycle", "Complete proteome", "Cytoplasm",
  279. "Disease mutation", "DNA-binding", "Endoplasmic reticulum",
  280. "Glycoprotein", "Host-virus interaction", "Isopeptide bond",
  281. "Li-Fraumeni syndrome", "Metal-binding", "Methylation",
  282. "Nucleus", "Phosphoprotein", "Polymorphism",
  283. "Transcription", "Transcription regulation", "Tumor suppressor",
  284. "Ubl conjugation", "Zinc"]
  285. assert_equal(keywords, @obj.kw)
  286. end
  287. def test_ft
  288. assert(@obj.ft)
  289. name = 'DNA_BIND'
  290. assert_equal([{"FTId"=>"", "From"=>102, "diff"=>[], "To"=>292,
  291. "Description"=>"",
  292. "original" => ['DNA_BIND', '102', '292', '', '']}],
  293. @obj.ft[name])
  294. end
  295. def test_sq
  296. assert_equal({"CRC64"=>"AD5C149FD8106131", "aalen"=>393, "MW"=>43653},
  297. @obj.sq)
  298. end
  299. def test_sq_crc64
  300. assert_equal("AD5C149FD8106131", @obj.sq('CRC64'))
  301. end
  302. def test_sq_mw
  303. mw = 43653
  304. assert_equal(mw, @obj.sq('mw'))
  305. assert_equal(mw, @obj.sq('molecular'))
  306. assert_equal(mw, @obj.sq('weight'))
  307. end
  308. def test_sq_len
  309. length = 393
  310. assert_equal(length, @obj.sq('len'))
  311. assert_equal(length, @obj.sq('length'))
  312. assert_equal(length, @obj.sq('AA'))
  313. end
  314. def test_seq
  315. seq = 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD'
  316. assert_equal(seq, @obj.seq)
  317. assert_equal(seq, @obj.aaseq)
  318. end
  319. end # class TestSPTR
  320. class TestSPTRCC < Test::Unit::TestCase
  321. def test_allergen
  322. # ALLERGEN Information relevant to allergenic proteins
  323. data = 'CC -!- ALLERGEN: Causes an allergic reaction in human.'
  324. sp = Bio::SPTR201107.new(data)
  325. assert_equal(['Causes an allergic reaction in human.'],
  326. sp.cc['ALLERGEN'])
  327. assert_equal(['Causes an allergic reaction in human.'],
  328. sp.cc('ALLERGEN'))
  329. end
  330. def test_alternative_products_access_as_hash
  331. data = "CC -!- ALTERNATIVE PRODUCTS:
  332. CC Event=Alternative initiation; Named isoforms=2;
  333. CC Name=Long;
  334. CC IsoId=P68250-1; Sequence=Displayed;
  335. CC Name=Short;
  336. CC IsoId=P68250-2; Sequence=VSP_018631;
  337. CC Note=Contains a N-acetylmethionine at position 1 (By
  338. CC similarity);"
  339. res = ["Event=Alternative initiation; Named isoforms=2; Name=Long; IsoId=P68250-1; Sequence=Displayed; Name=Short; IsoId=P68250-2; Sequence=VSP_018631; Note=Contains a N-acetylmethionine at position 1 (By similarity);"]
  340. sp = Bio::SPTR201107.new(data)
  341. assert_equal(res,
  342. sp.cc['ALTERNATIVE PRODUCTS'])
  343. end
  344. def test_alternative_products_ai
  345. # ALTERNATIVE PRODUCTS Description of the existence of related protein sequence(s) produced by alternative splicing of the same gene, alternative promoter usage, ribosomal frameshifting or by the use of alternative initiation codons; see 3.21.15
  346. # Alternative promoter usage, Alternative splicing, Alternative initiation, Ribosomal frameshifting
  347. data = "CC -!- ALTERNATIVE PRODUCTS:
  348. CC Event=Alternative initiation; Named isoforms=2;
  349. CC Name=Long;
  350. CC IsoId=P68250-1; Sequence=Displayed;
  351. CC Name=Short;
  352. CC IsoId=P68250-2; Sequence=VSP_018631;
  353. CC Note=Contains a N-acetylmethionine at position 1 (By
  354. CC similarity);"
  355. sp = Bio::SPTR201107.new(data)
  356. assert_equal({"Comment"=>"",
  357. "Named isoforms"=>"2",
  358. "Variants"=>
  359. [{"IsoId"=>["P68250-1"],
  360. "Name"=>"Long",
  361. "Synonyms" => [],
  362. "Sequence"=>["Displayed"]},
  363. {"IsoId"=>["P68250-2"],
  364. "Name"=>"Short",
  365. "Synonyms" => [],
  366. "Sequence"=>["VSP_018631"]}],
  367. "Event"=>["Alternative initiation"]},
  368. sp.cc('ALTERNATIVE PRODUCTS'))
  369. end
  370. def test_alternative_products_as
  371. data = "CC -!- ALTERNATIVE PRODUCTS:
  372. CC Event=Alternative splicing; Named isoforms=2;
  373. CC Name=1;
  374. CC IsoId=P04637-1; Sequence=Displayed;
  375. CC Name=2; Synonyms=I9RET;
  376. CC IsoId=P04637-2; Sequence=VSP_006535, VSP_006536;
  377. CC Note=Seems to be non-functional. Expressed in quiescent
  378. CC lymphocytes;"
  379. sp = Bio::SPTR201107.new(data)
  380. assert_equal({"Comment"=>"",
  381. "Named isoforms"=>"2",
  382. "Variants"=>
  383. [{"Name"=>"1",
  384. "IsoId"=>["P04637-1"],
  385. "Synonyms"=>[],
  386. "Sequence"=>["Displayed"]},
  387. {"IsoId"=>["P04637-2"],
  388. "Name"=>"2",
  389. "Synonyms"=>["I9RET"],
  390. "Sequence"=>["VSP_006535", "VSP_006536"]}],
  391. "Event"=>["Alternative splicing"]},
  392. sp.cc('ALTERNATIVE PRODUCTS'))
  393. end
  394. def test_alternative_products_apu
  395. data = "CC -!- ALTERNATIVE PRODUCTS:
  396. CC Event=Alternative promoter usage, Alternative splicing; Named isoforms=5;
  397. CC Comment=Additional isoforms (AAT-1L and AAT-1S) may exist;
  398. CC Name=1; Synonyms=AAT-1M;
  399. CC IsoId=Q7Z4T9-1; Sequence=Displayed;
  400. CC Name=2;
  401. CC IsoId=Q7Z4T9-2; Sequence=VSP_014910, VSP_014911;
  402. CC Note=No experimental confirmation available;
  403. CC Name=3;
  404. CC IsoId=Q7Z4T9-3; Sequence=VSP_014907, VSP_014912;
  405. CC Name=4; Synonyms=AAT1-alpha;
  406. CC IsoId=Q7Z4T9-4; Sequence=VSP_014908;
  407. CC Note=May be produced by alternative promoter usage;
  408. CC Name=5; Synonyms=AAT1-beta, AAT1-gamma;
  409. CC IsoId=Q7Z4T9-5; Sequence=VSP_014909;
  410. CC Note=May be produced by alternative promoter usage;"
  411. sp = Bio::SPTR201107.new(data)
  412. assert_equal({"Comment"=>"Additional isoforms (AAT-1L and AAT-1S) may exist",
  413. "Named isoforms"=>"5",
  414. "Variants"=>
  415. [{"Name"=>"1",
  416. "IsoId"=>["Q7Z4T9-1"],
  417. "Synonyms"=>["AAT-1M"],
  418. "Sequence"=>["Displayed"]},
  419. {"Name"=>"2",
  420. "IsoId"=>["Q7Z4T9-2"],
  421. "Synonyms" => [],
  422. "Sequence"=>["VSP_014910", "VSP_014911"]},
  423. {"Name"=>"3",
  424. "IsoId"=>["Q7Z4T9-3"],
  425. "Synonyms" => [],
  426. "Sequence"=>["VSP_014907", "VSP_014912"]},
  427. {"Name"=>"4",
  428. "IsoId"=>["Q7Z4T9-4"],
  429. "Synonyms"=>["AAT1-alpha"],
  430. "Sequence"=>["VSP_014908"]},
  431. {"Name"=>"5",
  432. "IsoId"=>["Q7Z4T9-5"],
  433. "Synonyms"=>["AAT1-beta", "AAT1-gamma"],
  434. "Sequence"=>["VSP_014909"]}],
  435. "Event"=>["Alternative promoter usage", "Alternative splicing"]},
  436. sp.cc('ALTERNATIVE PRODUCTS'))
  437. end
  438. def test_alternative_products_rf
  439. data = ""
  440. sp = Bio::SPTR201107.new(data)
  441. assert_equal({},
  442. sp.cc('ALTERNATIVE PRODUCTS'))
  443. end
  444. def test_biophysicochemical_properties
  445. # BIOPHYSICOCHEMICAL PROPERTIES Description of the information relevant to biophysical and physicochemical data and information on pH dependence, temperature dependence, kinetic parameters, redox potentials, and maximal absorption; see 3.21.8
  446. #
  447. data = 'CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
  448. CC Kinetic parameters:
  449. CC KM=45 uM for AdoMet;
  450. CC Vmax=32 uM/h/mg enzyme;
  451. CC pH dependence:
  452. CC Optimum pH is 8.2;'
  453. sp = Bio::SPTR201107.new(data)
  454. assert_equal(["Kinetic parameters: KM=45 uM for AdoMet; Vmax=32 uM/h/mg enzyme; pH dependence: Optimum pH is 8.2;"],
  455. sp.cc['BIOPHYSICOCHEMICAL PROPERTIES'])
  456. assert_equal({"Redox potential" => "",
  457. "Temperature dependence" => "",
  458. "Kinetic parameters" => {"KM" => "45 uM for AdoMet",
  459. "Vmax" => "32 uM/h/mg enzyme"},
  460. "Absorption" => {},
  461. "pH dependence" => "Optimum pH is 8.2"},
  462. sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
  463. # 3.12.2. Syntax of the topic 'BIOPHYSICOCHEMICAL PROPERTIES'
  464. data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
  465. CC Absorption:
  466. CC Abs(max)=xx nm;
  467. CC Note=free_text;
  468. CC Kinetic parameters:
  469. CC KM=xx unit for substrate [(free_text)];
  470. CC Vmax=xx unit enzyme [free_text];
  471. CC Note=free_text;
  472. CC pH dependence:
  473. CC free_text;
  474. CC Redox potential:
  475. CC free_text;
  476. CC Temperature dependence:
  477. CC free_text;"
  478. sp = Bio::SPTR201107.new(data)
  479. assert_equal({"Redox potential"=>"free_text",
  480. "Temperature dependence"=>"free_text",
  481. "Kinetic parameters"=>
  482. {"KM"=>"xx unit for substrate [(free_text)]",
  483. "Note"=>"free_text",
  484. "Vmax"=>"xx unit enzyme [free_text]"},
  485. "Absorption"=>{"Note"=>"free_text", "Abs(max)"=>"xx nm"},
  486. "pH dependence"=>"free_text"},
  487. sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
  488. end
  489. def test_biotechnology
  490. # BIOTECHNOLOGY Description of the use of a specific protein in a biotechnological process
  491. data = 'CC -!- BIOTECHNOLOGY: Introduced by genetic manipulation and expressed in
  492. CC improved ripening tomato by Monsanto. ACC is the immediate
  493. CC precursor of the phytohormone ethylene which is involved in the
  494. CC control of ripening. ACC deaminase reduces ethylene biosynthesis
  495. CC and thus extends the shelf life of fruits and vegetables.'
  496. sp = Bio::SPTR201107.new(data)
  497. assert_equal(["Introduced by genetic manipulation and expressed in improved ripening tomato by Monsanto. ACC is the immediate precursor of the phytohormone ethylene which is involved in the control of ripening. ACC deaminase reduces ethylene biosynthesis and thus extends the shelf life of fruits and vegetables."],
  498. sp.cc['BIOTECHNOLOGY'])
  499. end
  500. def test_catalytic_activity
  501. # CATALYTIC ACTIVITY Description of the reaction(s) catalyzed by an enzyme [1]
  502. data = 'CC -!- CATALYTIC ACTIVITY: Hydrolysis of alkylated DNA, releasing 3-
  503. CC methyladenine, 3-methylguanine, 7-methylguanine and 7-
  504. CC methyladenine.'
  505. sp = Bio::SPTR201107.new(data)
  506. assert_equal(["Hydrolysis of alkylated DNA, releasing 3-methyladenine, 3-methylguanine, 7-methylguanine and 7-methyladenine."],
  507. sp.cc['CATALYTIC ACTIVITY'])
  508. end
  509. def test_caution
  510. # CAUTION Warning about possible errors and/or grounds for confusion
  511. data = 'CC -!- CAUTION: Ref.1 sequence differs from that shown due to a Leu codon
  512. CC in position 480 which was translated as a stop codon to shorten
  513. CC the sequence.'
  514. sp = Bio::SPTR201107.new(data)
  515. assert_equal(["Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence."],
  516. sp.cc['CAUTION'])
  517. assert_equal("Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence.",
  518. sp.cc('CAUTION'))
  519. end
  520. def test_cofactor
  521. # COFACTOR Description of any non-protein substance required by an enzyme for its catalytic activity
  522. data = 'CC -!- COFACTOR: Cl(-). Is unique in requiring Cl(-) for its activity.
  523. CC -!- COFACTOR: Mg(2+).'
  524. sp = Bio::SPTR201107.new(data)
  525. assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
  526. "Mg(2+)."],
  527. sp.cc['COFACTOR'])
  528. assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
  529. "Mg(2+)."],
  530. sp.cc('COFACTOR'))
  531. end
  532. def test_developmental_stage
  533. # DEVELOPMENTAL STAGE Description of the developmentally-specific expression of mRNA or protein
  534. data = 'CC -!- DEVELOPMENTAL STAGE: In females, isoform 1 is expressed at day 35
  535. CC with higher levels detected at day 56. Isoform 1 is not detected
  536. CC in males of any age.'
  537. sp = Bio::SPTR201107.new(data)
  538. assert_equal(["In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age."],
  539. sp.cc['DEVELOPMENTAL STAGE'])
  540. assert_equal("In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age.",
  541. sp.cc('DEVELOPMENTAL STAGE'))
  542. end
  543. def test_disease
  544. # DISEASE Description of the disease(s) associated with a deficiency of a protein
  545. data = 'CC -!- DISEASE: Defects in APP are a cause of hereditary cerebral
  546. CC hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This
  547. CC disorder is characterized by amyloid deposits in cerebral vessels.
  548. CC The principal clinical characteristics are recurring cerebral
  549. CC hemorrhages, sometimes preceded by migrainous headaches or mental
  550. CC cleavage. Various types of HCHWAD are known. They differ in onset
  551. CC and aggressiveness of the disease. The Iowa type demonstrated no
  552. CC cerebral hemorrhaging but is characterized by progressive
  553. CC cognitive decline. Beta-APP40 is the predominant form of
  554. CC cerebrovascular amyloid.'
  555. sp = Bio::SPTR201107.new(data)
  556. assert_equal(["Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid."],
  557. sp.cc['DISEASE'])
  558. assert_equal("Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid.",
  559. sp.cc('DISEASE'))
  560. end
  561. def test_domain
  562. # DOMAIN Description of the domain structure of a protein
  563. data = 'CC -!- DOMAIN: The basolateral sorting signal (BaSS) is required for
  564. CC sorting of membrane proteins to the basolateral surface of
  565. CC epithelial cells.
  566. CC -!- DOMAIN: The NPXY sequence motif found in many tyrosine-
  567. CC phosphorylated proteins is required for the specific binding of
  568. CC the PID domain. However, additional amino acids either N- or C-
  569. CC terminal to the NPXY motif are often required for complete
  570. CC interaction. The PID domain-containing proteins which bind APP
  571. CC require the YENPTY motif for full interaction. These interactions
  572. CC are independent of phosphorylation on the terminal tyrosine
  573. CC residue. The NPXY site is also involved in clathrin-mediated
  574. CC endocytosis (By similarity).'
  575. sp = Bio::SPTR201107.new(data)
  576. assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
  577. "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
  578. sp.cc['DOMAIN'])
  579. assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
  580. "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
  581. sp.cc('DOMAIN'))
  582. end
  583. def test_enzyme_regulation
  584. # ENZYME REGULATION Description of an enzyme regulatory mechanism
  585. data = 'CC -!- ENZYME REGULATION: Insensitive to calcium/calmodulin. Stimulated
  586. CC by the G protein beta and gamma subunit complex.'
  587. sp = Bio::SPTR201107.new(data)
  588. assert_equal(["Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex."],
  589. sp.cc['ENZYME REGULATION'])
  590. assert_equal("Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex.",
  591. sp.cc('ENZYME REGULATION'))
  592. end
  593. def test_function
  594. # FUNCTION General description of the function(s) of a protein
  595. data = 'CC -!- FUNCTION: May play a fundamental role in situations where fine
  596. CC interplay between intracellular calcium and cAMP determines the
  597. CC cellular function. May be a physiologically relevant docking site
  598. CC for calcineurin (By similarity).'
  599. sp = Bio::SPTR201107.new(data)
  600. assert_equal(["May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity)."],
  601. sp.cc['FUNCTION'])
  602. assert_equal("May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity).",
  603. sp.cc('FUNCTION'))
  604. end
  605. def test_induction
  606. # INDUCTION Description of the compound(s) or condition(s) that regulate gene expression
  607. data = 'CC -!- INDUCTION: By pheromone (alpha-factor).'
  608. sp = Bio::SPTR201107.new(data)
  609. assert_equal(["By pheromone (alpha-factor)."],
  610. sp.cc['INDUCTION'])
  611. assert_equal("By pheromone (alpha-factor).",
  612. sp.cc('INDUCTION'))
  613. end
  614. def test_interaction
  615. # INTERACTION Conveys information relevant to binary protein-protein interaction 3.21.12
  616. data = 'CC -!- INTERACTION:
  617. CC P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435;
  618. CC P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;'
  619. sp = Bio::SPTR201107.new(data)
  620. assert_equal(["P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435; P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;"],
  621. sp.cc['INTERACTION'])
  622. assert_equal([{'SP_Ac' => 'P62158',
  623. 'identifier' => 'CALM1',
  624. 'optional_identifier' => '(xeno)',
  625. 'NbExp' => '1',
  626. 'IntAct' => ['EBI-457011', 'EBI-397435']},
  627. {'SP_Ac' => 'P62155',
  628. 'identifier' => 'calm1',
  629. 'optional_identifier' => '(xeno)',
  630. 'NbExp' => '1',
  631. 'IntAct' => ['EBI-457011', 'EBI-397568']}],
  632. sp.cc('INTERACTION'))
  633. end
  634. def test_mass_spectrometry
  635. # MASS SPECTROMETRY Reports the exact molecular weight of a protein or part of a protein as determined by mass spectrometric methods; see 3.21.23
  636. data = "CC -!- MASS SPECTROMETRY: MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29;
  637. CC NOTE=Ref.1.
  638. CC -!- MASS SPECTROMETRY: MW=2892.2; METHOD=Electrospray; RANGE=1-29;
  639. CC NOTE=Ref.2."
  640. sp = Bio::SPTR201107.new(data)
  641. assert_equal(["MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29; NOTE=Ref.1.",
  642. "MW=2892.2; METHOD=Electrospray; RANGE=1-29; NOTE=Ref.2."],
  643. sp.cc['MASS SPECTROMETRY'])
  644. assert_equal([{'MW' => '2894.9',
  645. 'MW_ERR' => '3',
  646. 'METHOD' => 'MALDI',
  647. 'RANGE' => '1-29',
  648. 'NOTE' => 'Ref.1'},
  649. {'MW' => '2892.2',
  650. 'METHOD' => 'Electrospray',
  651. 'MW_ERR' => nil,
  652. 'RANGE' => '1-29',
  653. 'NOTE' => 'Ref.2'}],
  654. sp.cc('MASS SPECTROMETRY'))
  655. end
  656. def test_miscellaneous
  657. # MISCELLANEOUS Any comment which does not belong to any of the other defined topics
  658. data = 'CC -!- MISCELLANEOUS: There are two isozymes; a cytoplasmic one and a
  659. CC mitochondrial one.'
  660. sp = Bio::SPTR201107.new(data)
  661. assert_equal(["There are two isozymes; a cytoplasmic one and a mitochondrial one."],
  662. sp.cc['MISCELLANEOUS'])
  663. end
  664. def test_pathway
  665. # PATHWAY Description of the metabolic pathway(s) with which a protein is associated
  666. data = 'CC -!- PATHWAY: Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-
  667. CC phosphate and glycerone phosphate from D-glucose: step 4.'
  668. sp = Bio::SPTR201107.new(data)
  669. assert_equal(["Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-phosphate and glycerone phosphate from D-glucose: step 4."],
  670. sp.cc['PATHWAY'])
  671. assert_equal(["Carbohydrate degradation",
  672. 'glycolysis',
  673. 'D-glyceraldehyde 3-phosphate',
  674. 'glycerone phosphate from D-glucose',
  675. 'step 4'],
  676. sp.cc('PATHWAY'))
  677. end
  678. def test_pharmaceutical
  679. # PHARMACEUTICAL Description of the use of a protein as a pharmaceutical drug
  680. data = 'CC -!- PHARMACEUTICAL: Available under the names Factrel (Ayerst Labs),
  681. CC Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm
  682. CC (Serono). Used in evaluating hypothalamic-pituitary gonadotropic
  683. CC function.'
  684. sp = Bio::SPTR201107.new(data)
  685. assert_equal(["Available under the names Factrel (Ayerst Labs), Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm (Serono). Used in evaluating hypothalamic-pituitary gonadotropic function."],
  686. sp.cc['PHARMACEUTICAL'])
  687. end
  688. def test_polymorphism
  689. # POLYMORPHISM Description of polymorphism(s)
  690. data = 'CC -!- POLYMORPHISM: Position 161 is associated with platelet-specific
  691. CC alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161.
  692. CC Siba is involved in neonatal alloimmune thrombocytopenia (NATP).
  693. CC -!- POLYMORPHISM: Polymorphisms arise from a variable number of tandem
  694. CC 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-
  695. CC like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown
  696. CC here) contains one repeat starting at position 415, allele C
  697. CC contains two repeats, allele B contains three repeats and allele A
  698. CC contains four repeats.'
  699. sp = Bio::SPTR201107.new(data)
  700. assert_equal(["Position 161 is associated with platelet-specific alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161. Siba is involved in neonatal alloimmune thrombocytopenia (NATP).",
  701. "Polymorphisms arise from a variable number of tandem 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown here) contains one repeat starting at position 415, allele C contains two repeats, allele B contains three repeats and allele A contains four repeats."],
  702. sp.cc['POLYMORPHISM'])
  703. end
  704. def test_ptm
  705. # PTM Description of any chemical alternation of a polypeptide (proteolytic cleavage, amino acid modifications including crosslinks). This topic complements information given in the feature table or indicates polypeptide modifications for which position-specific data is not available.
  706. data = 'CC -!- PTM: N-glycosylated, contains approximately 8 kDa of N-linked
  707. CC carbohydrate.
  708. CC -!- PTM: Palmitoylated.'
  709. sp = Bio::SPTR201107.new(data)
  710. assert_equal(["N-glycosylated, contains approximately 8 kDa of N-linked carbohydrate.",
  711. "Palmitoylated."],
  712. sp.cc['PTM'])
  713. end
  714. def test_rna_editing
  715. # RNA EDITING Description of any type of RNA editing that leads to one or more amino acid changes
  716. data = 'CC -!- RNA EDITING: Modified_positions=50, 59, 78, 87, 104, 132, 139,
  717. CC 146, 149, 160, 170, 177, 185, 198, 208, 223, 226, 228, 243, 246,
  718. CC 252, 260, 264, 277, 285, 295; Note=The nonsense codons at
  719. CC positions 50, 78, 104, 260 and 264 are modified to sense codons.'
  720. data = 'CC -!- RNA EDITING: Modified_positions=607; Note=Fully edited in the
  721. CC brain. Heteromerically expressed edited GLUR2 (R) receptor
  722. CC complexes are impermeable to calcium, whereas the unedited (Q)
  723. CC forms are highly permeable to divalent ions (By similarity).'
  724. sp = Bio::SPTR201107.new(data)
  725. assert_equal(["Modified_positions=607; Note=Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."],
  726. sp.cc['RNA EDITING'])
  727. assert_equal({"Modified_positions" => ['607'],
  728. "Note" => "Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."},
  729. sp.cc('RNA EDITING'))
  730. end
  731. def test_similarity
  732. # SIMILARITY Description of the similaritie(s) (sequence or structural) of a protein with other proteins
  733. data = 'CC -!- SIMILARITY: Contains 1 protein kinase domain.
  734. CC -!- SIMILARITY: Contains 1 RGS domain.'
  735. sp = Bio::SPTR201107.new(data)
  736. assert_equal(["Contains 1 protein kinase domain.", "Contains 1 RGS domain."],
  737. sp.cc['SIMILARITY'])
  738. end
  739. def test_subcellular_location
  740. # SUBCELLULAR LOCATION Description of the subcellular location of the mature protein
  741. data = 'CC -!- SUBCELLULAR LOCATION: Or: Cytoplasm. Or: Secreted protein. May be
  742. CC secreted by a non-classical secretory pathway.'
  743. data = "CC -!- SUBCELLULAR LOCATION: Cytoplasmic or may be secreted by a non-
  744. CC classical secretory pathway (By similarity)."
  745. data = "CC -!- SUBCELLULAR LOCATION: Cytoplasm. In neurons, axonally transported
  746. CC to the nerve terminals."
  747. data = "CC -!- SUBCELLULAR LOCATION: Cell wall. Probably the external side of the
  748. CC cell wall."
  749. data = "CC -!- SUBCELLULAR LOCATION: Endosome; late endosome; late endosomal
  750. CC membrane; single-pass type I membrane protein. Lysosome; lysosomal
  751. CC membrane; single-pass type I membrane protein. Localizes to late
  752. CC endocytic compartment. Associates with lysosome membranes."
  753. data = "CC -!- SUBCELLULAR LOCATION: Plastid; chloroplast; chloroplast membrane;
  754. CC peripheral membrane protein. Plastid; chloroplast; chloroplast
  755. CC stroma."
  756. sp = Bio::SPTR201107.new(data)
  757. assert_equal(["Plastid; chloroplast; chloroplast membrane; peripheral membrane protein. Plastid; chloroplast; chloroplast stroma."],
  758. sp.cc['SUBCELLULAR LOCATION'])
  759. assert_equal([["Plastid",
  760. "chloroplast",
  761. "chloroplast membrane",
  762. "peripheral membrane protein"],
  763. ["Plastid", "chloroplast",
  764. "chloroplast stroma"]],
  765. sp.cc('SUBCELLULAR LOCATION'))
  766. end
  767. def test_subunit
  768. # SUBUNIT Description of the quaternary structure of a protein and any kind of interactions with other proteins or protein complexes; except for receptor-ligand interactions, which are described in the topic FUNCTION.
  769. data = 'CC -!- SUBUNIT: Interacts with BTK. Interacts with all isoforms of MAPK8,
  770. CC MAPK9, MAPK10 and MAPK12.'
  771. data = 'CC -!- SUBUNIT: Homotetramer.'
  772. sp = Bio::SPTR201107.new(data)
  773. assert_equal(["Homotetramer."],
  774. sp.cc['SUBUNIT'])
  775. end
  776. def test_tissue_specificity
  777. # TISSUE SPECIFICITY Description of the tissue-specific expression of mRNA or protein
  778. data = "CC -!- TISSUE SPECIFICITY: Heart, brain and liver mitochondria."
  779. data = "CC -!- TISSUE SPECIFICITY: Widely expressed with highest expression in
  780. CC thymus, testis, embryo and proliferating blood lymphocytes."
  781. data = "CC -!- TISSUE SPECIFICITY: Isoform 2 is highly expressed in the brain,
  782. CC heart, spleen, kidney and blood. Isoform 2 is expressed (at
  783. CC protein level) in the spleen, skeletal muscle and gastrointestinal
  784. CC epithelia."
  785. sp = Bio::SPTR201107.new(data)
  786. assert_equal(["Isoform 2 is highly expressed in the brain, heart, spleen, kidney and blood. Isoform 2 is expressed (at protein level) in the spleen, skeletal muscle and gastrointestinal epithelia."],
  787. sp.cc['TISSUE SPECIFICITY'])
  788. end
  789. def test_toxic_dose
  790. # TOXIC DOSE Description of the lethal dose (LD), paralytic dose (PD) or effective dose of a protein
  791. data = 'CC -!- TOXIC DOSE: LD(50) is 12 mg/kg by intraperitoneal injection.'
  792. sp = Bio::SPTR201107.new(data)
  793. assert_equal(["LD(50) is 12 mg/kg by intraperitoneal injection."],
  794. sp.cc['TOXIC DOSE'])
  795. end
  796. def test_web_resource
  797. # WEB RESOURCE Description of a cross-reference to a network database/resource for a specific protein; see 3.21.34
  798. data = 'CC -!- WEB RESOURCE: NAME=Inherited peripheral neuropathies mutation db;
  799. CC URL="http://www.molgen.ua.ac.be/CMTMutations/".
  800. CC -!- WEB RESOURCE: NAME=Connexin-deafness homepage;
  801. CC URL="http://www.crg.es/deafness/".
  802. CC -!- WEB RESOURCE: NAME=GeneReviews;
  803. CC URL="http://www.genetests.org/query?gene=GJB1".'
  804. sp = Bio::SPTR201107.new(data)
  805. assert_equal(['NAME=Inherited peripheral neuropathies mutation db; URL="http://www.molgen.ua.ac.be/CMTMutations/".',
  806. 'NAME=Connexin-deafness homepage; URL="http://www.crg.es/deafness/".',
  807. 'NAME=GeneReviews; URL="http://www.genetests.org/query?gene=GJB1".'],
  808. sp.cc['WEB RESOURCE'])
  809. assert_equal([{'NAME' => "Inherited peripheral neuropathies mutation db",
  810. 'URL' => 'http://www.molgen.ua.ac.be/CMTMutations/', 'NOTE' => nil},
  811. {'NAME' => "Connexin-deafness homepage",
  812. 'URL' => 'http://www.crg.es/deafness/', 'NOTE' => nil},
  813. {'NAME' => "GeneReviews",
  814. 'URL' => 'http://www.genetests.org/query?gene=GJB1', 'NOTE' => nil}],
  815. sp.cc('WEB RESOURCE'))
  816. end
  817. end # class TestSPTRCC
  818. # http://br.expasy.org/sprot/userman.html#Ref_line
  819. class TestSPTRRef < Test::Unit::TestCase
  820. def setup
  821. data = 'RN [1]
  822. RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C), FUNCTION, INTERACTION
  823. RP WITH PKC-3, SUBCELLULAR LOCATION, TISSUE SPECIFICITY, DEVELOPMENTAL
  824. RP STAGE, AND MUTAGENESIS OF PHE-175 AND PHE-221.
  825. RC STRAIN=Bristol N2;
  826. RX PubMed=11134024; DOI=10.1074/jbc.M008990200;
  827. RG The mouse genome sequencing consortium;
  828. RA Galinier A., Bleicher F., Negre D., Perriere G., Duclos B.,
  829. RA Cozzone A.J., Cortay J.-C.;
  830. RT "A novel adapter protein employs a phosphotyrosine binding domain and
  831. RT exceptionally basic N-terminal domains to capture and localize an
  832. RT atypical protein kinase C: characterization of Caenorhabditis elegans
  833. RT C kinase adapter 1, a protein that avidly binds protein kinase C3.";
  834. RL J. Biol. Chem. 276:10463-10475(2001).'
  835. @obj = SPTR201107.new(data)
  836. end
  837. def test_ref
  838. res = {"RT" => "A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
  839. "RL" => "J. Biol. Chem. 276:10463-10475(2001).",
  840. "RA" => "Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
  841. "RX" => {"MEDLINE" => nil,
  842. "DOI" => "10.1074/jbc.M008990200",
  843. "PubMed" => "11134024"},
  844. "RC" => [{"Text" => "Bristol N2", "Token" => "STRAIN"}],
  845. "RN" => "[1]",
  846. "RP" => ["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
  847. "FUNCTION",
  848. "INTERACTION WITH PKC-3",
  849. "SUBCELLULAR LOCATION",
  850. "TISSUE SPECIFICITY",
  851. "DEVELOPMENTAL STAGE",
  852. "MUTAGENESIS OF PHE-175 AND PHE-221"],
  853. "RG" => ["The mouse genome sequencing consortium"]}
  854. assert_equal(res, @obj.ref.first)
  855. end
  856. def test_RN
  857. assert_equal("[1]", @obj.ref.first['RN'])
  858. end
  859. def test_RP
  860. assert_equal(["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
  861. "FUNCTION", "INTERACTION WITH PKC-3",
  862. "SUBCELLULAR LOCATION",
  863. "TISSUE SPECIFICITY",
  864. "DEVELOPMENTAL STAGE",
  865. "MUTAGENESIS OF PHE-175 AND PHE-221"],
  866. @obj.ref.first['RP'])
  867. end
  868. def test_RC
  869. assert_equal([{"Text"=>"Bristol N2", "Token"=>"STRAIN"}],
  870. @obj.ref.first['RC'])
  871. end
  872. def test_RX
  873. assert_equal({'MEDLINE' => nil,
  874. 'PubMed' => '11134024',
  875. 'DOI' => '10.1074/jbc.M008990200'},
  876. @obj.ref.first['RX'])
  877. end
  878. def test_RG
  879. assert_equal(["The mouse genome sequencing consortium"],
  880. @obj.ref.first['RG'])
  881. end
  882. def test_RA
  883. assert_equal("Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
  884. @obj.ref.first['RA'])
  885. end
  886. def test_RT
  887. assert_equal("A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
  888. @obj.ref.first['RT'])
  889. end
  890. def test_RL
  891. assert_equal("J. Biol. Chem. 276:10463-10475(2001).",
  892. @obj.ref.first['RL'])
  893. end
  894. end # class TestSPTRReferences
  895. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.0
  896. class TestSPTRSwissProtRel41_0 < Test::Unit::TestCase
  897. # Progress in the conversion of Swiss-Prot to mixed-case characters
  898. # Multiple RP lines
  899. def test_multiple_RP_lines
  900. data = "RN [1]
  901. RP SEQUENCE FROM N.A., SEQUENCE OF 23-42 AND 351-365, AND
  902. RP CHARACTERIZATION."
  903. sp = SPTR201107.new(data)
  904. assert_equal(['SEQUENCE FROM N.A.',
  905. 'SEQUENCE OF 23-42 AND 351-365',
  906. 'CHARACTERIZATION'],
  907. sp.ref.first['RP'])
  908. end
  909. end
  910. # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.1
  911. class TestSPTRSwissProtRel41_1 < Test::Unit::TestCase
  912. # New syntax of the CC line topic ALTERNATIVE PRODUCTS
  913. def test_alternative_products
  914. data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
  915. CC -!- ALTERNATIVE PRODUCTS:
  916. CC Event=Alternative promoter;
  917. CC Comment=Free text;
  918. CC Event=Alternative splicing; Named isoforms=2;
  919. CC Comment=Optional free text;
  920. CC Name=Isoform_1; Synonyms=Synonym_1;
  921. CC IsoId=Isoform_identifier_1;
  922. CC Sequence=Displayed;
  923. CC Note=Free text;
  924. CC Name=Isoform_2; Synonyms=Synonym_1, Synonym_2;
  925. CC IsoId=Isoform_identifier_1, Isoform_identifer_2;
  926. CC Sequence=VSP_identifier_1, VSP_identifier_2;
  927. CC Note=Free text;
  928. CC Event=Alternative initiation;
  929. CC Comment=Free text;"
  930. sp = SPTR201107.new(data)
  931. res = {"Comment" => "Free text",
  932. "Named isoforms" => "2",
  933. "Variants" => [{"Name" => "Isoform_1",
  934. "Synonyms" => ["Synonym_1"],
  935. "IsoId" => ["Isoform_identifier_1"],
  936. "Sequence" => ["Displayed"] },
  937. {"Name" => "Isoform_2",
  938. "Synonyms" => ["Synonym_1", "Synonym_2"],
  939. "IsoId" => ["Isoform_identifier_1", "Isoform_identifer_2"],
  940. "Sequence" => ["VSP_identifier_1", "VSP_identifier_2"]}],
  941. "Event" => ["Alternative promoter"]}
  942. assert_equal(res,
  943. sp.cc('ALTERNATIVE PRODUCTS'))
  944. end
  945. def test_alternative_products_with_ft
  946. data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
  947. CC -!- ALTERNATIVE PRODUCTS:
  948. CC Event=Alternative splicing; Named isoforms=6;
  949. CC Name=1;
  950. CC IsoId=Q15746-4; Sequence=Displayed;
  951. CC Name=2;
  952. CC IsoId=Q15746-5; Sequence=VSP_000040;
  953. CC Name=3A;
  954. CC IsoId=Q15746-6; Sequence=VSP_000041, VSP_000043;
  955. CC Name=3B;
  956. CC IsoId=Q15746-7; Sequence=VSP_000040, VSP_000041, VSP_000042;
  957. CC Name=4;
  958. CC IsoId=Q15746-8; Sequence=VSP_000041, VSP_000042;
  959. CC Name=del-1790;
  960. CC IsoId=Q15746-9; Sequence=VSP_000044;
  961. FT VARSPLIC 437 506 VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA
  962. FT RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in
  963. FT isoform 2 and isoform 3B).
  964. FT /FTId=VSP_004791.
  965. FT VARSPLIC 1433 1439 DEVEVSD -> MKWRCQT (in isoform 3A,
  966. FT isoform 3B and isoform 4).
  967. FT /FTId=VSP_004792.
  968. FT VARSPLIC 1473 1545 Missing (in isoform 4).
  969. FT /FTId=VSP_004793.
  970. FT VARSPLIC 1655 1705 M

Large files files are truncated, but you can click here to view the full file