/test/unit/bio/db/embl/test_sptr.rb
Ruby | 1810 lines | 1497 code | 200 blank | 113 comment | 22 complexity | c16f93b60739c6dd39bcbe3c15d5cf42 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1
Large files files are truncated, but you can click here to view the full file
- #
- # test/unit/bio/db/embl/test_sptr.rb - Unit test for Bio::SPTR
- #
- # Copyright::: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
- # License:: The Ruby License
- #
- # $Id:$
- #
- # loading helper routine for testing bioruby
- require 'pathname'
- load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
- 'bioruby_test_helper.rb')).cleanpath.to_s
- # libraries needed for the tests
- require 'test/unit'
- require 'bio/db/embl/sptr'
- module Bio
- class TestSPTR < Test::Unit::TestCase
- def setup
- data = File.read(File.join(BioRubyTestDataPath,
- 'uniprot', 'p53_human.uniprot'))
- @obj = Bio::SPTR.new(data)
- end
- def test_id_line
- assert(@obj.id_line)
- end
- def test_id_line_entry_name
- assert_equal('P53_HUMAN', @obj.id_line('ENTRY_NAME'))
- end
- def test_id_line_data_class
- assert_equal('STANDARD', @obj.id_line('DATA_CLASS'))
- end
- def test_id_line_molecule_type
- assert_equal('PRT', @obj.id_line('MOLECULE_TYPE'))
- end
- def test_id_line_sequence_length
- assert_equal(393, @obj.id_line('SEQUENCE_LENGTH'))
- end
- def test_entry
- entry = 'P53_HUMAN'
- assert_equal(entry, @obj.entry)
- assert_equal(entry, @obj.entry_name)
- assert_equal(entry, @obj.entry_id)
- end
- def test_molecule
- assert_equal('PRT', @obj.molecule)
- assert_equal('PRT', @obj.molecule_type)
- end
- def test_sequence_length
- seqlen = 393
- assert_equal(seqlen, @obj.sequence_length)
- assert_equal(seqlen, @obj.aalen)
- end
- def test_ac
- acs = ["P04637", "Q15086", "Q15087", "Q15088", "Q16535", "Q16807",
- "Q16808", "Q16809", "Q16810", "Q16811", "Q16848", "Q86UG1",
- "Q8J016", "Q99659", "Q9BTM4", "Q9HAQ8", "Q9NP68", "Q9NPJ2",
- "Q9NZD0", "Q9UBI2", "Q9UQ61"]
- assert_equal(acs, @obj.ac)
- assert_equal(acs, @obj.accessions)
- end
- def test_accession
- assert_equal('P04637', @obj.accession)
- end
- def test_dr
- assert_equal(17, @obj.dr.size)
- assert_equal(27, @obj.dr['GO'].size)
- assert_equal([["IPR002117", "P53"],
- ["IPR011615", "P53_DNA_bd"],
- ["IPR012346", "P53_RUNT_DNA_bd"],
- ["IPR010991", "p53_tetrameristn"]],
- @obj.dr['InterPro'])
- end
- def test_dr_with_key
- pfam = [
- { " " => "1",
- "Version" => "P53",
- "Accession" => "PF00870",
- "Molecular Type" => nil
- },
- { " " => "1",
- "Version" => "P53_tetramer",
- "Accession" => "PF07710",
- "Molecular Type" => nil
- }
- ]
- assert_equal(pfam, @obj.dr('Pfam'))
- embl3 = {
- " " => "JOINED",
- "Version" => "AAA59987.1",
- "Accession" => "M13113",
- "Molecular Type" => "Genomic_DNA"
- }
- assert_equal(embl3, @obj.dr('EMBL')[3])
- end
- def test_dr_with_key_empty
- assert_equal([], @obj.dr('NOT_A_DATABASE'))
- end
- def test_dt
- assert(@obj.dt)
- end
- def test_dt_created
- assert_equal('13-AUG-1987 (Rel. 05, Created)', @obj.dt('created'))
- end
- def test_dt_sequence
- assert_equal('01-MAR-1989 (Rel. 10, Last sequence update)',
- @obj.dt('sequence'))
- end
- def test_dt_annotation
- assert_equal('13-SEP-2005 (Rel. 48, Last annotation update)',
- @obj.dt('annotation'))
- end
- def test_de
- assert(@obj.de)
- end
- def test_protein_name
- assert_equal("Cellular tumor antigen p53", @obj.protein_name)
- end
- def test_synonyms
- ary = ["Tumor suppressor p53", "Phosphoprotein p53", "Antigen NY-CO-13"]
- assert_equal(ary, @obj.synonyms)
- end
- def test_gn
- assert_equal([{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}],
- @obj.gn)
- end
- def test_gn_uniprot_parser
- gn_uniprot_data = ''
- assert_equal([{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}],
- @obj.instance_eval("gn_uniprot_parser"))
- end
- def test_gn_old_parser
- gn_old_data = ''
- assert_equal([["Name=TP53; Synonyms=P53;"]],
- @obj.instance_eval("gn_old_parser"))
- end
- def test_gene_names
- assert_equal(["TP53"], @obj.gene_names)
- end
- def test_gene_name
- assert_equal('TP53', @obj.gene_name)
- end
- def test_os
- assert(@obj.os)
- end
- def test_os_access
- assert_equal("Homo sapiens (Human)", @obj.os(0))
- end
- def test_os_access2
- assert_equal({"name"=>"(Human)", "os"=>"Homo sapiens"}, @obj.os[0])
- end
- def test_og_1
- og = "OG Plastid; Chloroplast."
- ary = ['Plastid', 'Chloroplast']
- @obj.instance_eval("@orig['OG'] = '#{og}'")
- assert_equal(ary, @obj.og)
- end
- def test_og_2
- og = "OG Mitochondrion."
- ary = ['Mitochondrion']
- @obj.instance_eval("@orig['OG'] = '#{og}'")
- assert_equal(ary, @obj.og)
- end
- def test_og_3
- og = "OG Plasmid sym pNGR234a."
- ary = ["Plasmid sym pNGR234a"]
- @obj.instance_eval("@orig['OG'] = '#{og}'")
- assert_equal(ary, @obj.og)
- end
- def test_og_4
- og = "OG Plastid; Cyanelle."
- ary = ['Plastid', 'Cyanelle']
- @obj.instance_eval("@orig['OG'] = '#{og}'")
- assert_equal(ary, @obj.og)
- end
- def test_og_5
- og = "OG Plasmid pSymA (megaplasmid 1)."
- ary = ["Plasmid pSymA (megaplasmid 1)"]
- @obj.instance_eval("@orig['OG'] = '#{og}'")
- assert_equal(ary, @obj.og)
- end
- def test_og_6
- og = "OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1."
- ary = ['Plasmid pNRC100', 'Plasmid pNRC200', 'Plasmid pHH1']
- @obj.instance_eval("@orig['OG'] = '#{og}'")
- assert_equal(ary, @obj.og)
- end
- def test_oc
- assert_equal(["Eukaryota", "Metazoa", "Chordata", "Craniata",
- "Vertebrata", "Euteleostomi", "Mammalia", "Eutheria",
- "Euarchontoglires", "Primates", "Catarrhini", "Hominidae",
- "Homo"],
- @obj.oc)
- end
- def test_ox
- assert_equal({"NCBI_TaxID"=>["9606"]}, @obj.ox)
- end
- def test_ref # Bio::SPTR#ref
- assert_equal(Array, @obj.ref.class)
- end
- def test_cc
- assert_equal(Hash, @obj.cc.class)
- end
-
- def test_cc_database
- db = [{"NAME" => "IARC TP53 mutation database",
- "WWW" => "http://www.iarc.fr/p53/",
- "FTP" => nil, "NOTE" => "IARC db of somatic p53 mutations"},
- {"NAME" => "Tokyo p53",
- "WWW" => "http://p53.genome.ad.jp/", "FTP" => nil,
- "NOTE" => "University of Tokyo db of p53 mutations"},
- {"NAME" => "p53 web site at the Institut Curie",
- "WWW" => "http://p53.curie.fr/", "FTP" => nil, "NOTE" => nil},
- {"NAME" => "Atlas Genet. Cytogenet. Oncol. Haematol.",
- "WWW" => "http://www.infobiogen.fr/services/chromcancer/Genes/P53ID88.html",
- "FTP" => nil, "NOTE" => nil}]
- assert_equal(db, @obj.cc('DATABASE'))
- end
- def test_cc_alternative_products
- ap = {"Comment" => "",
- "Named isoforms" => "2",
- "Variants" => [{"IsoId" => ["P04637-1"],
- "Name" => "1",
- "Synonyms" => [],
- "Sequence" => ["Displayed"]},
- {"IsoId" => ["P04637-2"],
- "Name" => "2",
- "Synonyms" => ["I9RET"],
- "Sequence" => ["VSP_006535", "VSP_006536"]}],
- "Event" => ["Alternative splicing"]}
- assert_equal(ap, @obj.cc('ALTERNATIVE PRODUCTS'))
- end
- def test_cc_mass_spectrometry
- assert_equal(nil, @obj.cc('MASS SPECTROMETRY'))
- end
- def test_kw
- keywords = ["3D-structure", "Acetylation", "Activator",
- "Alternative splicing", "Anti-oncogene",
- "Apoptosis", "Cell cycle", "Disease mutation", "DNA-binding",
- "Glycoprotein", "Li-Fraumeni syndrome", "Metal-binding",
- "Nuclear protein", "Phosphorylation", "Polymorphism",
- "Transcription", "Transcription regulation", "Zinc"]
- assert_equal(keywords, @obj.kw)
- end
-
- def test_ft
- assert(@obj.ft)
- name = 'DNA_BIND'
- assert_equal([{"FTId"=>"", "From"=>102, "diff"=>[], "To"=>292,
- "Description"=>"",
- "original" => ['DNA_BIND', '102', '292', '', '']}],
- @obj.ft[name])
- end
- def test_sq
- assert_equal({"CRC64"=>"AD5C149FD8106131", "aalen"=>393, "MW"=>43653},
- @obj.sq)
- end
- def test_sq_crc64
- assert_equal("AD5C149FD8106131", @obj.sq('CRC64'))
- end
- def test_sq_mw
- mw = 43653
- assert_equal(mw, @obj.sq('mw'))
- assert_equal(mw, @obj.sq('molecular'))
- assert_equal(mw, @obj.sq('weight'))
- end
- def test_sq_len
- length = 393
- assert_equal(length, @obj.sq('len'))
- assert_equal(length, @obj.sq('length'))
- assert_equal(length, @obj.sq('AA'))
- end
- def test_seq
- seq = 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD'
- assert_equal(seq, @obj.seq)
- assert_equal(seq, @obj.aaseq)
- end
- end # class TestSPTR
- class TestSPTRCC < Test::Unit::TestCase
- def test_allergen
- # ALLERGEN Information relevant to allergenic proteins
- data = 'CC -!- ALLERGEN: Causes an allergic reaction in human.'
- sp = Bio::SPTR.new(data)
- assert_equal(['Causes an allergic reaction in human.'],
- sp.cc['ALLERGEN'])
- assert_equal(['Causes an allergic reaction in human.'],
- sp.cc('ALLERGEN'))
- end
-
- def test_alternative_products_access_as_hash
- data = "CC -!- ALTERNATIVE PRODUCTS:
- CC Event=Alternative initiation; Named isoforms=2;
- CC Name=Long;
- CC IsoId=P68250-1; Sequence=Displayed;
- CC Name=Short;
- CC IsoId=P68250-2; Sequence=VSP_018631;
- CC Note=Contains a N-acetylmethionine at position 1 (By
- CC similarity);"
- res = ["Event=Alternative initiation; Named isoforms=2; Name=Long; IsoId=P68250-1; Sequence=Displayed; Name=Short; IsoId=P68250-2; Sequence=VSP_018631; Note=Contains a N-acetylmethionine at position 1 (By similarity);"]
- sp = Bio::SPTR.new(data)
- assert_equal(res,
- sp.cc['ALTERNATIVE PRODUCTS'])
- end
- def test_alternative_products_ai
- # ALTERNATIVE PRODUCTS Description of the existence of related protein sequence(s) produced by alternative splicing of the same gene, alternative promoter usage, ribosomal frameshifting or by the use of alternative initiation codons; see 3.21.15
- # Alternative promoter usage, Alternative splicing, Alternative initiation, Ribosomal frameshifting
- data = "CC -!- ALTERNATIVE PRODUCTS:
- CC Event=Alternative initiation; Named isoforms=2;
- CC Name=Long;
- CC IsoId=P68250-1; Sequence=Displayed;
- CC Name=Short;
- CC IsoId=P68250-2; Sequence=VSP_018631;
- CC Note=Contains a N-acetylmethionine at position 1 (By
- CC similarity);"
- sp = Bio::SPTR.new(data)
- assert_equal({"Comment"=>"",
- "Named isoforms"=>"2",
- "Variants"=>
- [{"IsoId"=>["P68250-1"],
- "Name"=>"Long",
- "Synonyms" => [],
- "Sequence"=>["Displayed"]},
- {"IsoId"=>["P68250-2"],
- "Name"=>"Short",
- "Synonyms" => [],
- "Sequence"=>["VSP_018631"]}],
- "Event"=>["Alternative initiation"]},
- sp.cc('ALTERNATIVE PRODUCTS'))
- end
- def test_alternative_products_as
- data = "CC -!- ALTERNATIVE PRODUCTS:
- CC Event=Alternative splicing; Named isoforms=2;
- CC Name=1;
- CC IsoId=P04637-1; Sequence=Displayed;
- CC Name=2; Synonyms=I9RET;
- CC IsoId=P04637-2; Sequence=VSP_006535, VSP_006536;
- CC Note=Seems to be non-functional. Expressed in quiescent
- CC lymphocytes;"
- sp = Bio::SPTR.new(data)
- assert_equal({"Comment"=>"",
- "Named isoforms"=>"2",
- "Variants"=>
- [{"Name"=>"1",
- "IsoId"=>["P04637-1"],
- "Synonyms"=>[],
- "Sequence"=>["Displayed"]},
- {"IsoId"=>["P04637-2"],
- "Name"=>"2",
- "Synonyms"=>["I9RET"],
- "Sequence"=>["VSP_006535", "VSP_006536"]}],
- "Event"=>["Alternative splicing"]},
- sp.cc('ALTERNATIVE PRODUCTS'))
- end
- def test_alternative_products_apu
- data = "CC -!- ALTERNATIVE PRODUCTS:
- CC Event=Alternative promoter usage, Alternative splicing; Named isoforms=5;
- CC Comment=Additional isoforms (AAT-1L and AAT-1S) may exist;
- CC Name=1; Synonyms=AAT-1M;
- CC IsoId=Q7Z4T9-1; Sequence=Displayed;
- CC Name=2;
- CC IsoId=Q7Z4T9-2; Sequence=VSP_014910, VSP_014911;
- CC Note=No experimental confirmation available;
- CC Name=3;
- CC IsoId=Q7Z4T9-3; Sequence=VSP_014907, VSP_014912;
- CC Name=4; Synonyms=AAT1-alpha;
- CC IsoId=Q7Z4T9-4; Sequence=VSP_014908;
- CC Note=May be produced by alternative promoter usage;
- CC Name=5; Synonyms=AAT1-beta, AAT1-gamma;
- CC IsoId=Q7Z4T9-5; Sequence=VSP_014909;
- CC Note=May be produced by alternative promoter usage;"
- sp = Bio::SPTR.new(data)
- assert_equal({"Comment"=>"Additional isoforms (AAT-1L and AAT-1S) may exist",
- "Named isoforms"=>"5",
- "Variants"=>
- [{"Name"=>"1",
- "IsoId"=>["Q7Z4T9-1"],
- "Synonyms"=>["AAT-1M"],
- "Sequence"=>["Displayed"]},
- {"Name"=>"2",
- "IsoId"=>["Q7Z4T9-2"],
- "Synonyms" => [],
- "Sequence"=>["VSP_014910", "VSP_014911"]},
- {"Name"=>"3",
- "IsoId"=>["Q7Z4T9-3"],
- "Synonyms" => [],
- "Sequence"=>["VSP_014907", "VSP_014912"]},
- {"Name"=>"4",
- "IsoId"=>["Q7Z4T9-4"],
- "Synonyms"=>["AAT1-alpha"],
- "Sequence"=>["VSP_014908"]},
- {"Name"=>"5",
- "IsoId"=>["Q7Z4T9-5"],
- "Synonyms"=>["AAT1-beta", "AAT1-gamma"],
- "Sequence"=>["VSP_014909"]}],
- "Event"=>["Alternative promoter usage", "Alternative splicing"]},
- sp.cc('ALTERNATIVE PRODUCTS'))
- end
- def test_alternative_products_rf
- data = ""
- sp = Bio::SPTR.new(data)
- assert_equal({},
- sp.cc('ALTERNATIVE PRODUCTS'))
- end
-
- def test_biophysicochemical_properties
- # BIOPHYSICOCHEMICAL PROPERTIES Description of the information relevant to biophysical and physicochemical data and information on pH dependence, temperature dependence, kinetic parameters, redox potentials, and maximal absorption; see 3.21.8
- #
- data = 'CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
- CC Kinetic parameters:
- CC KM=45 uM for AdoMet;
- CC Vmax=32 uM/h/mg enzyme;
- CC pH dependence:
- CC Optimum pH is 8.2;'
- sp = Bio::SPTR.new(data)
- assert_equal(["Kinetic parameters: KM=45 uM for AdoMet; Vmax=32 uM/h/mg enzyme; pH dependence: Optimum pH is 8.2;"],
- sp.cc['BIOPHYSICOCHEMICAL PROPERTIES'])
- assert_equal({"Redox potential" => "",
- "Temperature dependence" => "",
- "Kinetic parameters" => {"KM" => "45 uM for AdoMet",
- "Vmax" => "32 uM/h/mg enzyme"},
- "Absorption" => {},
- "pH dependence" => "Optimum pH is 8.2"},
- sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
- # 3.12.2. Syntax of the topic 'BIOPHYSICOCHEMICAL PROPERTIES'
- data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
- CC Absorption:
- CC Abs(max)=xx nm;
- CC Note=free_text;
- CC Kinetic parameters:
- CC KM=xx unit for substrate [(free_text)];
- CC Vmax=xx unit enzyme [free_text];
- CC Note=free_text;
- CC pH dependence:
- CC free_text;
- CC Redox potential:
- CC free_text;
- CC Temperature dependence:
- CC free_text;"
- sp = Bio::SPTR.new(data)
- assert_equal({"Redox potential"=>"free_text",
- "Temperature dependence"=>"free_text",
- "Kinetic parameters"=>
- {"KM"=>"xx unit for substrate [(free_text)]",
- "Note"=>"free_text",
- "Vmax"=>"xx unit enzyme [free_text]"},
- "Absorption"=>{"Note"=>"free_text", "Abs(max)"=>"xx nm"},
- "pH dependence"=>"free_text"},
- sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
- end
- def test_biotechnology
- # BIOTECHNOLOGY Description of the use of a specific protein in a biotechnological process
- data = 'CC -!- BIOTECHNOLOGY: Introduced by genetic manipulation and expressed in
- CC improved ripening tomato by Monsanto. ACC is the immediate
- CC precursor of the phytohormone ethylene which is involved in the
- CC control of ripening. ACC deaminase reduces ethylene biosynthesis
- CC and thus extends the shelf life of fruits and vegetables.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Introduced by genetic manipulation and expressed in improved ripening tomato by Monsanto. ACC is the immediate precursor of the phytohormone ethylene which is involved in the control of ripening. ACC deaminase reduces ethylene biosynthesis and thus extends the shelf life of fruits and vegetables."],
- sp.cc['BIOTECHNOLOGY'])
- end
- def test_catalytic_activity
- # CATALYTIC ACTIVITY Description of the reaction(s) catalyzed by an enzyme [1]
- data = 'CC -!- CATALYTIC ACTIVITY: Hydrolysis of alkylated DNA, releasing 3-
- CC methyladenine, 3-methylguanine, 7-methylguanine and 7-
- CC methyladenine.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Hydrolysis of alkylated DNA, releasing 3-methyladenine, 3-methylguanine, 7-methylguanine and 7-methyladenine."],
- sp.cc['CATALYTIC ACTIVITY'])
- end
- def test_caution
- # CAUTION Warning about possible errors and/or grounds for confusion
- data = 'CC -!- CAUTION: Ref.1 sequence differs from that shown due to a Leu codon
- CC in position 480 which was translated as a stop codon to shorten
- CC the sequence.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence."],
- sp.cc['CAUTION'])
- assert_equal("Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence.",
- sp.cc('CAUTION'))
- end
- def test_cofactor
- # COFACTOR Description of any non-protein substance required by an enzyme for its catalytic activity
- data = 'CC -!- COFACTOR: Cl(-). Is unique in requiring Cl(-) for its activity.
- CC -!- COFACTOR: Mg(2+).'
- sp = Bio::SPTR.new(data)
- assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
- "Mg(2+)."],
- sp.cc['COFACTOR'])
- assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
- "Mg(2+)."],
- sp.cc('COFACTOR'))
- end
- def test_developmental_stage
- # DEVELOPMENTAL STAGE Description of the developmentally-specific expression of mRNA or protein
- data = 'CC -!- DEVELOPMENTAL STAGE: In females, isoform 1 is expressed at day 35
- CC with higher levels detected at day 56. Isoform 1 is not detected
- CC in males of any age.'
- sp = Bio::SPTR.new(data)
- assert_equal(["In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age."],
- sp.cc['DEVELOPMENTAL STAGE'])
- assert_equal("In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age.",
- sp.cc('DEVELOPMENTAL STAGE'))
- end
- def test_disease
- # DISEASE Description of the disease(s) associated with a deficiency of a protein
- data = 'CC -!- DISEASE: Defects in APP are a cause of hereditary cerebral
- CC hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This
- CC disorder is characterized by amyloid deposits in cerebral vessels.
- CC The principal clinical characteristics are recurring cerebral
- CC hemorrhages, sometimes preceded by migrainous headaches or mental
- CC cleavage. Various types of HCHWAD are known. They differ in onset
- CC and aggressiveness of the disease. The Iowa type demonstrated no
- CC cerebral hemorrhaging but is characterized by progressive
- CC cognitive decline. Beta-APP40 is the predominant form of
- CC cerebrovascular amyloid.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid."],
- sp.cc['DISEASE'])
- assert_equal("Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid.",
- sp.cc('DISEASE'))
- end
- def test_domain
- # DOMAIN Description of the domain structure of a protein
- data = 'CC -!- DOMAIN: The basolateral sorting signal (BaSS) is required for
- CC sorting of membrane proteins to the basolateral surface of
- CC epithelial cells.
- CC -!- DOMAIN: The NPXY sequence motif found in many tyrosine-
- CC phosphorylated proteins is required for the specific binding of
- CC the PID domain. However, additional amino acids either N- or C-
- CC terminal to the NPXY motif are often required for complete
- CC interaction. The PID domain-containing proteins which bind APP
- CC require the YENPTY motif for full interaction. These interactions
- CC are independent of phosphorylation on the terminal tyrosine
- CC residue. The NPXY site is also involved in clathrin-mediated
- CC endocytosis (By similarity).'
- sp = Bio::SPTR.new(data)
- assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
- "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
- sp.cc['DOMAIN'])
- assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
- "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
- sp.cc('DOMAIN'))
- end
- def test_enzyme_regulation
- # ENZYME REGULATION Description of an enzyme regulatory mechanism
- data = 'CC -!- ENZYME REGULATION: Insensitive to calcium/calmodulin. Stimulated
- CC by the G protein beta and gamma subunit complex.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex."],
- sp.cc['ENZYME REGULATION'])
- assert_equal("Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex.",
- sp.cc('ENZYME REGULATION'))
- end
-
- def test_function
- # FUNCTION General description of the function(s) of a protein
- data = 'CC -!- FUNCTION: May play a fundamental role in situations where fine
- CC interplay between intracellular calcium and cAMP determines the
- CC cellular function. May be a physiologically relevant docking site
- CC for calcineurin (By similarity).'
- sp = Bio::SPTR.new(data)
- assert_equal(["May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity)."],
- sp.cc['FUNCTION'])
- assert_equal("May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity).",
- sp.cc('FUNCTION'))
- end
- def test_induction
- # INDUCTION Description of the compound(s) or condition(s) that regulate gene expression
- data = 'CC -!- INDUCTION: By pheromone (alpha-factor).'
- sp = Bio::SPTR.new(data)
- assert_equal(["By pheromone (alpha-factor)."],
- sp.cc['INDUCTION'])
- assert_equal("By pheromone (alpha-factor).",
- sp.cc('INDUCTION'))
- end
- def test_interaction
- # INTERACTION Conveys information relevant to binary protein-protein interaction 3.21.12
- data = 'CC -!- INTERACTION:
- CC P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435;
- CC P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;'
- sp = Bio::SPTR.new(data)
- assert_equal(["P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435; P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;"],
- sp.cc['INTERACTION'])
- assert_equal([{'SP_Ac' => 'P62158',
- 'identifier' => 'CALM1',
- 'optional_identifier' => '(xeno)',
- 'NbExp' => '1',
- 'IntAct' => ['EBI-457011', 'EBI-397435']},
- {'SP_Ac' => 'P62155',
- 'identifier' => 'calm1',
- 'optional_identifier' => '(xeno)',
- 'NbExp' => '1',
- 'IntAct' => ['EBI-457011', 'EBI-397568']}],
- sp.cc('INTERACTION'))
- end
- def test_mass_spectrometry
- # MASS SPECTROMETRY Reports the exact molecular weight of a protein or part of a protein as determined by mass spectrometric methods; see 3.21.23
- data = "CC -!- MASS SPECTROMETRY: MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29;
- CC NOTE=Ref.1.
- CC -!- MASS SPECTROMETRY: MW=2892.2; METHOD=Electrospray; RANGE=1-29;
- CC NOTE=Ref.2."
- sp = Bio::SPTR.new(data)
- assert_equal(["MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29; NOTE=Ref.1.",
- "MW=2892.2; METHOD=Electrospray; RANGE=1-29; NOTE=Ref.2."],
- sp.cc['MASS SPECTROMETRY'])
- assert_equal([{'MW' => '2894.9',
- 'MW_ERR' => '3',
- 'METHOD' => 'MALDI',
- 'RANGE' => '1-29',
- 'NOTE' => 'Ref.1'},
- {'MW' => '2892.2',
- 'METHOD' => 'Electrospray',
- 'MW_ERR' => nil,
- 'RANGE' => '1-29',
- 'NOTE' => 'Ref.2'}],
- sp.cc('MASS SPECTROMETRY'))
- end
- def test_miscellaneous
- # MISCELLANEOUS Any comment which does not belong to any of the other defined topics
- data = 'CC -!- MISCELLANEOUS: There are two isozymes; a cytoplasmic one and a
- CC mitochondrial one.'
- sp = Bio::SPTR.new(data)
- assert_equal(["There are two isozymes; a cytoplasmic one and a mitochondrial one."],
- sp.cc['MISCELLANEOUS'])
- end
- def test_pathway
- # PATHWAY Description of the metabolic pathway(s) with which a protein is associated
- data = 'CC -!- PATHWAY: Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-
- CC phosphate and glycerone phosphate from D-glucose: step 4.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-phosphate and glycerone phosphate from D-glucose: step 4."],
- sp.cc['PATHWAY'])
- assert_equal(["Carbohydrate degradation",
- 'glycolysis',
- 'D-glyceraldehyde 3-phosphate',
- 'glycerone phosphate from D-glucose',
- 'step 4'],
- sp.cc('PATHWAY'))
- end
- def test_pharmaceutical
- # PHARMACEUTICAL Description of the use of a protein as a pharmaceutical drug
- data = 'CC -!- PHARMACEUTICAL: Available under the names Factrel (Ayerst Labs),
- CC Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm
- CC (Serono). Used in evaluating hypothalamic-pituitary gonadotropic
- CC function.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Available under the names Factrel (Ayerst Labs), Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm (Serono). Used in evaluating hypothalamic-pituitary gonadotropic function."],
- sp.cc['PHARMACEUTICAL'])
- end
- def test_polymorphism
- # POLYMORPHISM Description of polymorphism(s)
- data = 'CC -!- POLYMORPHISM: Position 161 is associated with platelet-specific
- CC alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161.
- CC Siba is involved in neonatal alloimmune thrombocytopenia (NATP).
- CC -!- POLYMORPHISM: Polymorphisms arise from a variable number of tandem
- CC 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-
- CC like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown
- CC here) contains one repeat starting at position 415, allele C
- CC contains two repeats, allele B contains three repeats and allele A
- CC contains four repeats.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Position 161 is associated with platelet-specific alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161. Siba is involved in neonatal alloimmune thrombocytopenia (NATP).",
- "Polymorphisms arise from a variable number of tandem 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown here) contains one repeat starting at position 415, allele C contains two repeats, allele B contains three repeats and allele A contains four repeats."],
- sp.cc['POLYMORPHISM'])
- end
- def test_ptm
- # PTM Description of any chemical alternation of a polypeptide (proteolytic cleavage, amino acid modifications including crosslinks). This topic complements information given in the feature table or indicates polypeptide modifications for which position-specific data is not available.
- data = 'CC -!- PTM: N-glycosylated, contains approximately 8 kDa of N-linked
- CC carbohydrate.
- CC -!- PTM: Palmitoylated.'
- sp = Bio::SPTR.new(data)
- assert_equal(["N-glycosylated, contains approximately 8 kDa of N-linked carbohydrate.",
- "Palmitoylated."],
- sp.cc['PTM'])
- end
- def test_rna_editing
- # RNA EDITING Description of any type of RNA editing that leads to one or more amino acid changes
- data = 'CC -!- RNA EDITING: Modified_positions=50, 59, 78, 87, 104, 132, 139,
- CC 146, 149, 160, 170, 177, 185, 198, 208, 223, 226, 228, 243, 246,
- CC 252, 260, 264, 277, 285, 295; Note=The nonsense codons at
- CC positions 50, 78, 104, 260 and 264 are modified to sense codons.'
- data = 'CC -!- RNA EDITING: Modified_positions=607; Note=Fully edited in the
- CC brain. Heteromerically expressed edited GLUR2 (R) receptor
- CC complexes are impermeable to calcium, whereas the unedited (Q)
- CC forms are highly permeable to divalent ions (By similarity).'
- sp = Bio::SPTR.new(data)
- assert_equal(["Modified_positions=607; Note=Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."],
- sp.cc['RNA EDITING'])
- assert_equal({"Modified_positions" => ['607'],
- "Note" => "Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."},
- sp.cc('RNA EDITING'))
- end
- def test_similarity
- # SIMILARITY Description of the similaritie(s) (sequence or structural) of a protein with other proteins
- data = 'CC -!- SIMILARITY: Contains 1 protein kinase domain.
- CC -!- SIMILARITY: Contains 1 RGS domain.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Contains 1 protein kinase domain.", "Contains 1 RGS domain."],
- sp.cc['SIMILARITY'])
- end
-
- def test_subcellular_location
- # SUBCELLULAR LOCATION Description of the subcellular location of the mature protein
- data = 'CC -!- SUBCELLULAR LOCATION: Or: Cytoplasm. Or: Secreted protein. May be
- CC secreted by a non-classical secretory pathway.'
- data = "CC -!- SUBCELLULAR LOCATION: Cytoplasmic or may be secreted by a non-
- CC classical secretory pathway (By similarity)."
- data = "CC -!- SUBCELLULAR LOCATION: Cytoplasm. In neurons, axonally transported
- CC to the nerve terminals."
- data = "CC -!- SUBCELLULAR LOCATION: Cell wall. Probably the external side of the
- CC cell wall."
- data = "CC -!- SUBCELLULAR LOCATION: Endosome; late endosome; late endosomal
- CC membrane; single-pass type I membrane protein. Lysosome; lysosomal
- CC membrane; single-pass type I membrane protein. Localizes to late
- CC endocytic compartment. Associates with lysosome membranes."
- data = "CC -!- SUBCELLULAR LOCATION: Plastid; chloroplast; chloroplast membrane;
- CC peripheral membrane protein. Plastid; chloroplast; chloroplast
- CC stroma."
- sp = Bio::SPTR.new(data)
- assert_equal(["Plastid; chloroplast; chloroplast membrane; peripheral membrane protein. Plastid; chloroplast; chloroplast stroma."],
- sp.cc['SUBCELLULAR LOCATION'])
- assert_equal([["Plastid",
- "chloroplast",
- "chloroplast membrane",
- "peripheral membrane protein"],
- ["Plastid", "chloroplast",
- "chloroplast stroma"]],
- sp.cc('SUBCELLULAR LOCATION'))
- end
- def test_subunit
- # SUBUNIT Description of the quaternary structure of a protein and any kind of interactions with other proteins or protein complexes; except for receptor-ligand interactions, which are described in the topic FUNCTION.
- data = 'CC -!- SUBUNIT: Interacts with BTK. Interacts with all isoforms of MAPK8,
- CC MAPK9, MAPK10 and MAPK12.'
- data = 'CC -!- SUBUNIT: Homotetramer.'
- sp = Bio::SPTR.new(data)
- assert_equal(["Homotetramer."],
- sp.cc['SUBUNIT'])
- end
- def test_tissue_specificity
- # TISSUE SPECIFICITY Description of the tissue-specific expression of mRNA or protein
- data = "CC -!- TISSUE SPECIFICITY: Heart, brain and liver mitochondria."
- data = "CC -!- TISSUE SPECIFICITY: Widely expressed with highest expression in
- CC thymus, testis, embryo and proliferating blood lymphocytes."
- data = "CC -!- TISSUE SPECIFICITY: Isoform 2 is highly expressed in the brain,
- CC heart, spleen, kidney and blood. Isoform 2 is expressed (at
- CC protein level) in the spleen, skeletal muscle and gastrointestinal
- CC epithelia."
- sp = Bio::SPTR.new(data)
- assert_equal(["Isoform 2 is highly expressed in the brain, heart, spleen, kidney and blood. Isoform 2 is expressed (at protein level) in the spleen, skeletal muscle and gastrointestinal epithelia."],
- sp.cc['TISSUE SPECIFICITY'])
- end
- def test_toxic_dose
- # TOXIC DOSE Description of the lethal dose (LD), paralytic dose (PD) or effective dose of a protein
- data = 'CC -!- TOXIC DOSE: LD(50) is 12 mg/kg by intraperitoneal injection.'
- sp = Bio::SPTR.new(data)
- assert_equal(["LD(50) is 12 mg/kg by intraperitoneal injection."],
- sp.cc['TOXIC DOSE'])
- end
- def test_web_resource
- # WEB RESOURCE Description of a cross-reference to a network database/resource for a specific protein; see 3.21.34
- data = 'CC -!- WEB RESOURCE: NAME=Inherited peripheral neuropathies mutation db;
- CC URL="http://www.molgen.ua.ac.be/CMTMutations/".
- CC -!- WEB RESOURCE: NAME=Connexin-deafness homepage;
- CC URL="http://www.crg.es/deafness/".
- CC -!- WEB RESOURCE: NAME=GeneReviews;
- CC URL="http://www.genetests.org/query?gene=GJB1".'
- sp = Bio::SPTR.new(data)
- assert_equal(['NAME=Inherited peripheral neuropathies mutation db; URL="http://www.molgen.ua.ac.be/CMTMutations/".',
- 'NAME=Connexin-deafness homepage; URL="http://www.crg.es/deafness/".',
- 'NAME=GeneReviews; URL="http://www.genetests.org/query?gene=GJB1".'],
- sp.cc['WEB RESOURCE'])
- assert_equal([{'NAME' => "Inherited peripheral neuropathies mutation db",
- 'URL' => 'http://www.molgen.ua.ac.be/CMTMutations/', 'NOTE' => nil},
- {'NAME' => "Connexin-deafness homepage",
- 'URL' => 'http://www.crg.es/deafness/', 'NOTE' => nil},
- {'NAME' => "GeneReviews",
- 'URL' => 'http://www.genetests.org/query?gene=GJB1', 'NOTE' => nil}],
- sp.cc('WEB RESOURCE'))
- end
- end # class TestSPTRCC
- # http://br.expasy.org/sprot/userman.html#Ref_line
- class TestSPTRRef < Test::Unit::TestCase
- def setup
- data = 'RN [1]
- RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C), FUNCTION, INTERACTION
- RP WITH PKC-3, SUBCELLULAR LOCATION, TISSUE SPECIFICITY, DEVELOPMENTAL
- RP STAGE, AND MUTAGENESIS OF PHE-175 AND PHE-221.
- RC STRAIN=Bristol N2;
- RX PubMed=11134024; DOI=10.1074/jbc.M008990200;
- RG The mouse genome sequencing consortium;
- RA Galinier A., Bleicher F., Negre D., Perriere G., Duclos B.,
- RA Cozzone A.J., Cortay J.-C.;
- RT "A novel adapter protein employs a phosphotyrosine binding domain and
- RT exceptionally basic N-terminal domains to capture and localize an
- RT atypical protein kinase C: characterization of Caenorhabditis elegans
- RT C kinase adapter 1, a protein that avidly binds protein kinase C3.";
- RL J. Biol. Chem. 276:10463-10475(2001).'
- @obj = SPTR.new(data)
- end
- def test_ref
- res = {"RT" => "A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
- "RL" => "J. Biol. Chem. 276:10463-10475(2001).",
- "RA" => "Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
- "RX" => {"MEDLINE" => nil,
- "DOI" => "10.1074/jbc.M008990200",
- "PubMed" => "11134024"},
- "RC" => [{"Text" => "Bristol N2", "Token" => "STRAIN"}],
- "RN" => "[1]",
- "RP" => ["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
- "FUNCTION",
- "INTERACTION WITH PKC-3",
- "SUBCELLULAR LOCATION",
- "TISSUE SPECIFICITY",
- "DEVELOPMENTAL STAGE",
- "MUTAGENESIS OF PHE-175 AND PHE-221"],
- "RG" => ["The mouse genome sequencing consortium"]}
- assert_equal(res, @obj.ref.first)
- end
- def test_RN
- assert_equal("[1]", @obj.ref.first['RN'])
- end
-
- def test_RP
- assert_equal(["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
- "FUNCTION", "INTERACTION WITH PKC-3",
- "SUBCELLULAR LOCATION",
- "TISSUE SPECIFICITY",
- "DEVELOPMENTAL STAGE",
- "MUTAGENESIS OF PHE-175 AND PHE-221"],
- @obj.ref.first['RP'])
- end
- def test_RC
- assert_equal([{"Text"=>"Bristol N2", "Token"=>"STRAIN"}],
- @obj.ref.first['RC'])
- end
- def test_RX
- assert_equal({'MEDLINE' => nil,
- 'PubMed' => '11134024',
- 'DOI' => '10.1074/jbc.M008990200'},
- @obj.ref.first['RX'])
- end
- def test_RG
- assert_equal(["The mouse genome sequencing consortium"],
- @obj.ref.first['RG'])
- end
- def test_RA
- assert_equal("Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
- @obj.ref.first['RA'])
- end
- def test_RT
- assert_equal("A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
- @obj.ref.first['RT'])
- end
- def test_RL
- assert_equal("J. Biol. Chem. 276:10463-10475(2001).",
- @obj.ref.first['RL'])
- end
-
- end # class TestSPTRReferences
- # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.0
- class TestSPTRSwissProtRel41_0 < Test::Unit::TestCase
- # Progress in the conversion of Swiss-Prot to mixed-case characters
- # Multiple RP lines
- def test_multiple_RP_lines
- data = "RN [1]
- RP SEQUENCE FROM N.A., SEQUENCE OF 23-42 AND 351-365, AND
- RP CHARACTERIZATION."
- sp = SPTR.new(data)
- assert_equal(['SEQUENCE FROM N.A.',
- 'SEQUENCE OF 23-42 AND 351-365',
- 'CHARACTERIZATION'],
- sp.ref.first['RP'])
- end
- end
- # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.1
- class TestSPTRSwissProtRel41_1 < Test::Unit::TestCase
- # New syntax of the CC line topic ALTERNATIVE PRODUCTS
- def test_alternative_products
- data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
- CC -!- ALTERNATIVE PRODUCTS:
- CC Event=Alternative promoter;
- CC Comment=Free text;
- CC Event=Alternative splicing; Named isoforms=2;
- CC Comment=Optional free text;
- CC Name=Isoform_1; Synonyms=Synonym_1;
- CC IsoId=Isoform_identifier_1;
- CC Sequence=Displayed;
- CC Note=Free text;
- CC Name=Isoform_2; Synonyms=Synonym_1, Synonym_2;
- CC IsoId=Isoform_identifier_1, Isoform_identifer_2;
- CC Sequence=VSP_identifier_1, VSP_identifier_2;
- CC Note=Free text;
- CC Event=Alternative initiation;
- CC Comment=Free text;"
- sp = SPTR.new(data)
- res = {"Comment" => "Free text",
- "Named isoforms" => "2",
- "Variants" => [{"Name" => "Isoform_1",
- "Synonyms" => ["Synonym_1"],
- "IsoId" => ["Isoform_identifier_1"],
- "Sequence" => ["Displayed"] },
- {"Name" => "Isoform_2",
- "Synonyms" => ["Synonym_1", "Synonym_2"],
- "IsoId" => ["Isoform_identifier_1", "Isoform_identifer_2"],
- "Sequence" => ["VSP_identifier_1", "VSP_identifier_2"]}],
- "Event" => ["Alternative promoter"]}
- assert_equal(res,
- sp.cc('ALTERNATIVE PRODUCTS'))
- end
- def test_alternative_products_with_ft
- data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
- CC -!- ALTERNATIVE PRODUCTS:
- CC Event=Alternative splicing; Named isoforms=6;
- CC Name=1;
- CC IsoId=Q15746-4; Sequence=Displayed;
- CC Name=2;
- CC IsoId=Q15746-5; Sequence=VSP_000040;
- CC Name=3A;
- CC IsoId=Q15746-6; Sequence=VSP_000041, VSP_000043;
- CC Name=3B;
- CC IsoId=Q15746-7; Sequence=VSP_000040, VSP_000041, VSP_000042;
- CC Name=4;
- CC IsoId=Q15746-8; Sequence=VSP_000041, VSP_000042;
- CC Name=del-1790;
- CC IsoId=Q15746-9; Sequence=VSP_000044;
- FT VARSPLIC 437 506 VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA
- FT RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in
- FT isoform 2 and isoform 3B).
- FT /FTId=VSP_004791.
- FT VARSPLIC 1433 1439 DEVEVSD -> MKWRCQT (in isoform 3A,
- FT isoform 3B and isoform 4).
- FT /FTId=VSP_004792.
- FT VARSPLIC 1473 1545 Missing (in isoform 4).
- FT /FTId=VSP_004793.
- FT VARSPLIC 1655 1705 Missing (in isoform 3A and isoform 3B).
- FT /FTId=VSP_004794.
- FT VARSPLIC 1790 1790 Missing (in isoform Del-1790).
- FT /FTId=VSP_004795."
- sp = SPTR.new(data)
-
- assert_equal({"Comment" => "",
- "Named isoforms" => "6",
- "Variants" => [{"IsoId"=>["Q15746-4"],
- "Name"=>"1",
- "Synonyms"=>[],
- "Sequence"=>["Displayed"]},
- {"IsoId"=>["Q15746-5"],
- "Name"=>"2",
- "Synonyms"=>[],
- "Sequence"=>["VSP_000040"]},
- {"IsoId"=>["Q15746-6"],
- "Name"=>"3A",
- "Synonyms"=>[],
- "Sequence"=>["VSP_000041", "VSP_000043"]},
- {"IsoId"=>["Q15746-7"],
- "Name"=>"3B",
- "Synonyms"=>[],
- "Sequence"=>["VSP_000040", "VSP_000041", "VSP_000042"]},
- {"IsoId"=>["Q15746-8"],
- "Name"=>"4",
- "Synonyms"=>[],
- "Sequence"=>["VSP_000041", "VSP_000042"]},
- {"IsoId"=>["Q15746-9"],
- "Name"=>"del-1790",
- "Synonyms"=>[],
- "Sequence"=>["VSP_000044"]}],
- "Event"=>["Alternative splicing"]},
- sp.cc('ALTERNATIVE PRODUCTS'))
- assert_equal([{"FTId"=>"VSP_004791",
- "From"=>437,
- "To"=>506,
- "Description"=>"VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in isoform 2 and isoform 3B).",
- "diff"=> ["VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKARTRDSGTYSCTASNAQGQVSCSWTLQVER", "G"],
- "original"=> ["VARSPLIC", "437", "506", "VSGIPKPEVAWFLEGTPVRRQE…
Large files files are truncated, but you can click here to view the full file