test_sptr_rel201107.rb - Copyright::: Copyright (C) 2005 Mi…

/test/unit/bio/db/embl/test_sptr_rel201107.rb

https://github.com/phylogenomics/bioruby · Ruby · 1852 lines · 1540 code · 198 blank · 114 comment · 22 complexity · e5c09fbc9f5a3dea51063976603b3fa8 MD5 · raw file
Large files are truncated click here to view the full file

#
# test/unit/bio/db/embl/test_sptr.rb - Unit test for Bio::SPTR
#
# Copyright:::  Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
#                             2011 The Regents of the University of California
# License::     The Ruby License
#
#  $Id:$
#

# loading helper routine for testing bioruby
require 'pathname'
load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
                            'bioruby_test_helper.rb')).cleanpath.to_s

# libraries needed for the tests
require 'test/unit'
require 'bio/db/embl/sptr201107'

module Bio
  class TestSPTR201107 < Test::Unit::TestCase

    def setup
      data = File.read(File.join(BioRubyTestDataPath, 
                                 'uniprot', 'p53_human_2011_07.uniprot'))
      @obj = Bio::SPTR201107.new(data)
    end

    def test_id_line
      assert(@obj.id_line)
    end

    def test_id_line_entry_name
      assert_equal('P53_HUMAN', @obj.id_line('ENTRY_NAME'))
    end   

    def test_id_line_data_class
      assert_equal('Reviewed', @obj.id_line('DATA_CLASS'))
    end

    def test_id_line_sequence_length
      assert_equal(393, @obj.id_line('SEQUENCE_LENGTH'))
    end

    def test_entry
      entry = 'P53_HUMAN'
      assert_equal(entry, @obj.entry)
      assert_equal(entry, @obj.entry_name)
      assert_equal(entry, @obj.entry_id)
    end

    def test_sequence_length
      seqlen = 393
      assert_equal(seqlen, @obj.sequence_length)
      assert_equal(seqlen, @obj.aalen)
    end

    def test_ac
      acs = ["P04637", "Q15086", "Q15087", "Q15088", "Q16535", "Q16807",
             "Q16808", "Q16809", "Q16810", "Q16811", "Q16848", "Q2XN98",
             "Q3LRW1", "Q3LRW2", "Q3LRW3", "Q3LRW4", "Q3LRW5", "Q86UG1",
             "Q8J016", "Q99659", "Q9BTM4", "Q9HAQ8", "Q9NP68", "Q9NPJ2",
             "Q9NZD0", "Q9UBI2", "Q9UQ61"]
      assert_equal(acs, @obj.ac)
      assert_equal(acs, @obj.accessions)
    end

    def test_accession
      assert_equal('P04637', @obj.accession)
    end

    def test_dr
      assert_equal(48, @obj.dr.size)
      assert_equal(74, @obj.dr['GO'].size)
      assert_equal([["IPR008967", "p53-like_TF_DNA-bd"],
                    ["IPR012346", "p53/RUNT-type_TF_DNA-bd"],
                    ["IPR011615", "p53_DNA-bd"],
                    ["IPR010991", "p53_tetrameristn"],
                    ["IPR013872", "p53_transactivation_domain"],
                    ["IPR002117", "p53_tumour_suppressor"]],
                   @obj.dr['InterPro'])
    end

    def test_dr_with_key
      pfam = [
              { " "              => "1",
                "Version"        => "P53",
                "Accession"      => "PF00870",
                "Molecular Type" => nil
              },
              { " "              => "1",
                "Version"        => "P53_TAD",
                "Accession"      => "PF08563",
                "Molecular Type" => nil
              },
              { " "              => "1",
                "Version"        => "P53_tetramer",
                "Accession"      => "PF07710",
                "Molecular Type" => nil
              }
             ]
      assert_equal(pfam, @obj.dr('Pfam'))
      embl3 = {
        " "              => "JOINED",
        "Version"        => "AAA59987.1",
        "Accession"      => "M13113",
        "Molecular Type" => "Genomic_DNA"
      }
      assert_equal(embl3, @obj.dr('EMBL')[3])
    end

    def test_dr_with_key_empty
      assert_equal([], @obj.dr('NOT_A_DATABASE'))
    end

    def test_dt
      assert(@obj.dt)
    end

    def test_dt_created
      assert_equal('13-AUG-1987, integrated into UniProtKB/Swiss-Prot.', @obj.dt('created'))
    end

    def test_dt_sequence
      assert_equal('24-NOV-2009, sequence version 4.', 
                   @obj.dt('sequence'))
    end

    def test_dt_annotation
      assert_equal('31-MAY-2011, entry version 186.', 
                   @obj.dt('annotation'))
    end

    def test_de
      assert(@obj.de)
    end

    def test_protein_name
      assert_equal("Cellular tumor antigen p53", @obj.protein_name)
    end

    def test_synonyms
      ary = ["Antigen NY-CO-13", "Phosphoprotein p53", "Tumor suppressor p53"]
      assert_equal(ary, @obj.synonyms)
    end

    def test_gn
      assert_equal([{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}], 
                   @obj.gn)
    end

    def test_gn_uniprot_parser
      gn_uniprot_data = ''
      assert_equal([{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}], 
                   @obj.instance_eval("gn_uniprot_parser"))
    end

    def test_gn_old_parser
      gn_old_data = ''
      assert_equal([["Name=TP53; Synonyms=P53;"]], 
                   @obj.instance_eval("gn_old_parser"))
    end

    def test_gene_names
      assert_equal(["TP53"], @obj.gene_names)
    end

    def test_gene_name
      assert_equal('TP53', @obj.gene_name)
    end

    def test_os
      assert(@obj.os)
    end

    def test_os_access
      assert_equal("Homo sapiens (Human)", @obj.os(0))
    end

    def test_os_access2
      assert_equal({"name"=>"(Human)", "os"=>"Homo sapiens"}, @obj.os[0])
    end

    def test_og_1
      og = "OG   Plastid; Chloroplast."
      ary = ['Plastid', 'Chloroplast']
      @obj.instance_eval("@orig['OG'] = '#{og}'")
      assert_equal(ary, @obj.og)
    end

    def test_og_2
      og = "OG   Mitochondrion."
      ary = ['Mitochondrion']
      @obj.instance_eval("@orig['OG'] = '#{og}'")
      assert_equal(ary, @obj.og)
    end

    def test_og_3
      og = "OG   Plasmid sym pNGR234a."
      ary = ["Plasmid sym pNGR234a"]
      @obj.instance_eval("@orig['OG'] = '#{og}'")
      assert_equal(ary, @obj.og)
    end

    def test_og_4
      og = "OG   Plastid; Cyanelle."
      ary = ['Plastid', 'Cyanelle']
      @obj.instance_eval("@orig['OG'] = '#{og}'")
      assert_equal(ary, @obj.og)
    end

    def test_og_5
      og = "OG   Plasmid pSymA (megaplasmid 1)." 
      ary = ["Plasmid pSymA (megaplasmid 1)"]
      @obj.instance_eval("@orig['OG'] = '#{og}'")
      assert_equal(ary, @obj.og)
    end

    def test_og_6
      og = "OG   Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1." 
      ary = ['Plasmid pNRC100', 'Plasmid pNRC200', 'Plasmid pHH1']
      @obj.instance_eval("@orig['OG'] = '#{og}'")
      assert_equal(ary, @obj.og)
    end

    def test_oc
      assert_equal(["Eukaryota", "Metazoa", "Chordata", "Craniata", 
                    "Vertebrata", "Euteleostomi", "Mammalia", "Eutheria", 
                    "Euarchontoglires", "Primates",  "Haplorrhini", "Catarrhini",
                    "Hominidae", "Homo"], 
                   @obj.oc)
    end

    def test_ox
      assert_equal({"NCBI_TaxID"=>["9606"]}, @obj.ox)
    end

    def test_ref # Bio::SPTR#ref
      assert_equal(Array, @obj.ref.class)
    end

    def test_cc
      assert_equal(Hash, @obj.cc.class)
    end
   
    def test_cc_database
      wr = [{"NAME"=>"IARC TP53 mutation database",
            "NOTE"=>"Somatic and germline TP53 mutations in human cancers",
            "URL"=>"http://www-p53.iarc.fr/"},
           {"NAME"=>"p53 web site at the Institut Curie",
            "NOTE"=>nil,
            "URL"=>"http://p53.free.fr/"},
           {"NAME"=>"Atlas of Genetics and Cytogenetics in Oncology and Haematology",
            "NOTE"=>nil,
            "URL"=>"http://atlasgeneticsoncology.org/Genes/P53ID88.html"},
           {"NAME"=>"GeneReviews",
            "NOTE"=>nil,
            "URL"=>"http://www.ncbi.nlm.nih.gov/sites/GeneTests/lab/gene/TP53"},
           {"NAME"=>"NIEHS-SNPs",
            "NOTE"=>nil,
            "URL"=>"http://egp.gs.washington.edu/data/tp53/"},
           {"NAME"=>"SHMPD",
            "NOTE"=>"The Singapore human mutation and polymorphism database",
            "URL"=>"http://shmpd.bii.a-star.edu.sg/gene.php?genestart=A&genename=TP53"},
           {"NAME"=>"Wikipedia",
            "NOTE"=>"P53 entry",
            "URL"=>"http://en.wikipedia.org/wiki/P53"}]
      assert_equal(wr, @obj.cc('WEB RESOURCE'))
    end

    def test_cc_alternative_products
      ap = {"Event"=>["Alternative promoter usage", "Alternative splicing"],
            "Named isoforms"=>"9",
            "Comment"=>"",
            "Variants"=>
             [{"Name"=>"1",
               "Synonyms"=>["p53", "p53alpha"],
               "IsoId"=>["P04637-1"],
               "Sequence"=>["Displayed"]},
              {"Name"=>"2",
               "Synonyms"=>["I9RET", "p53beta"],
               "IsoId"=>["P04637-2"],
               "Sequence"=>["VSP_006535", "VSP_006536"]},
              {"Name"=>"3",
               "Synonyms"=>["p53gamma"],
               "IsoId"=>["P04637-3"],
               "Sequence"=>["VSP_040560", "VSP_040561"]},
              {"Name"=>"4",
               "Synonyms"=>["Del40-p53", "Del40-p53alpha", "p47"],
               "IsoId"=>["P04637-4"],
               "Sequence"=>["VSP_040832"]},
              {"Name"=>"5",
               "Synonyms"=>["Del40-p53beta"],
               "IsoId"=>["P04637-5"],
               "Sequence"=>["VSP_040832", "VSP_006535", "VSP_006536"]},
              {"Name"=>"6",
               "Synonyms"=>["Del40-p53gamma"],
               "IsoId"=>["P04637-6"],
               "Sequence"=>["VSP_040832", "VSP_040560", "VSP_040561"]},
              {"Name"=>"7",
               "Synonyms"=>["Del133-p53", "Del133-p53alpha"],
               "IsoId"=>["P04637-7"],
               "Sequence"=>["VSP_040833"]},
              {"Name"=>"8",
               "Synonyms"=>["Del133-p53beta"],
               "IsoId"=>["P04637-8"],
               "Sequence"=>["VSP_040833", "VSP_006535", "VSP_006536"]},
              {"Name"=>"9",
               "Synonyms"=>["Del133-p53gamma"],
               "IsoId"=>["P04637-9"],
               "Sequence"=>["VSP_040833", "VSP_040560", "VSP_040561"]}]}
      assert_equal(ap, @obj.cc('ALTERNATIVE PRODUCTS'))
    end

    def test_cc_mass_spectrometry
      assert_equal(nil, @obj.cc('MASS SPECTROMETRY'))
    end


    def test_kw
      keywords = ["3D-structure", "Acetylation", "Activator",
                  "Alternative promoter usage", "Alternative splicing", "Apoptosis",
                  "Cell cycle", "Complete proteome", "Cytoplasm",
                  "Disease mutation", "DNA-binding", "Endoplasmic reticulum",
                  "Glycoprotein", "Host-virus interaction", "Isopeptide bond",
                  "Li-Fraumeni syndrome", "Metal-binding", "Methylation",
                  "Nucleus", "Phosphoprotein", "Polymorphism",
                  "Transcription", "Transcription regulation", "Tumor suppressor",
                  "Ubl conjugation", "Zinc"]
      assert_equal(keywords, @obj.kw)
    end
    
    def test_ft
      assert(@obj.ft)
      name = 'DNA_BIND'
      assert_equal([{"FTId"=>"", "From"=>102, "diff"=>[], "To"=>292, 
                     "Description"=>"", 
                     "original" => ['DNA_BIND', '102', '292', '', '']}], 
                   @obj.ft[name])
    end

    def test_sq
      assert_equal({"CRC64"=>"AD5C149FD8106131", "aalen"=>393, "MW"=>43653}, 
                   @obj.sq)
    end

    def test_sq_crc64
      assert_equal("AD5C149FD8106131", @obj.sq('CRC64'))
    end

    def test_sq_mw
      mw = 43653
      assert_equal(mw, @obj.sq('mw'))
      assert_equal(mw, @obj.sq('molecular'))
      assert_equal(mw, @obj.sq('weight'))
    end

    def test_sq_len
      length = 393
      assert_equal(length, @obj.sq('len'))
      assert_equal(length, @obj.sq('length'))
      assert_equal(length, @obj.sq('AA'))
    end

    def test_seq
      seq = 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD'
      assert_equal(seq, @obj.seq)
      assert_equal(seq, @obj.aaseq)
    end

  end # class TestSPTR



  class TestSPTRCC < Test::Unit::TestCase
    def test_allergen
      # ALLERGEN	Information relevant to allergenic proteins
      data = 'CC   -!- ALLERGEN: Causes an allergic reaction in human.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(['Causes an allergic reaction in human.'], 
                   sp.cc['ALLERGEN'])
      assert_equal(['Causes an allergic reaction in human.'], 
                   sp.cc('ALLERGEN'))
    end
    
    def test_alternative_products_access_as_hash
      data = "CC   -!- ALTERNATIVE PRODUCTS:
CC       Event=Alternative initiation; Named isoforms=2;
CC       Name=Long;
CC         IsoId=P68250-1; Sequence=Displayed;
CC       Name=Short;
CC         IsoId=P68250-2; Sequence=VSP_018631;
CC         Note=Contains a N-acetylmethionine at position 1 (By
CC         similarity);"

      res = ["Event=Alternative initiation; Named isoforms=2; Name=Long; IsoId=P68250-1; Sequence=Displayed; Name=Short; IsoId=P68250-2; Sequence=VSP_018631; Note=Contains a N-acetylmethionine at position 1 (By similarity);"]
      sp = Bio::SPTR201107.new(data)
      assert_equal(res,
                   sp.cc['ALTERNATIVE PRODUCTS'])
    end

    def test_alternative_products_ai
      # ALTERNATIVE PRODUCTS	Description of the existence of related protein sequence(s) produced by alternative splicing of the same gene, alternative promoter usage, ribosomal frameshifting or by the use of alternative initiation codons; see 3.21.15
      # Alternative promoter usage, Alternative splicing, Alternative initiation, Ribosomal frameshifting
      data = "CC   -!- ALTERNATIVE PRODUCTS:
CC       Event=Alternative initiation; Named isoforms=2;
CC       Name=Long;
CC         IsoId=P68250-1; Sequence=Displayed;
CC       Name=Short;
CC         IsoId=P68250-2; Sequence=VSP_018631;
CC         Note=Contains a N-acetylmethionine at position 1 (By
CC         similarity);"

      sp = Bio::SPTR201107.new(data)
      assert_equal({"Comment"=>"",
                    "Named isoforms"=>"2",
                    "Variants"=>
                    [{"IsoId"=>["P68250-1"], 
                      "Name"=>"Long", 
                      "Synonyms" => [],
                      "Sequence"=>["Displayed"]},
                     {"IsoId"=>["P68250-2"], 
                      "Name"=>"Short", 
                      "Synonyms" => [],
                      "Sequence"=>["VSP_018631"]}],
                    "Event"=>["Alternative initiation"]},
                   sp.cc('ALTERNATIVE PRODUCTS'))
    end
    def test_alternative_products_as
      data = "CC   -!- ALTERNATIVE PRODUCTS:
CC       Event=Alternative splicing; Named isoforms=2;
CC       Name=1;
CC         IsoId=P04637-1; Sequence=Displayed;
CC       Name=2; Synonyms=I9RET;
CC         IsoId=P04637-2; Sequence=VSP_006535, VSP_006536;
CC         Note=Seems to be non-functional. Expressed in quiescent
CC         lymphocytes;"
      sp = Bio::SPTR201107.new(data)
      assert_equal({"Comment"=>"",
                    "Named isoforms"=>"2",
                    "Variants"=>
                    [{"Name"=>"1", 
                      "IsoId"=>["P04637-1"],
                      "Synonyms"=>[], 
                      "Sequence"=>["Displayed"]},
                     {"IsoId"=>["P04637-2"],
                      "Name"=>"2",
                      "Synonyms"=>["I9RET"],
                      "Sequence"=>["VSP_006535", "VSP_006536"]}],
                    "Event"=>["Alternative splicing"]},
                   sp.cc('ALTERNATIVE PRODUCTS'))
    end
    def test_alternative_products_apu
      data = "CC   -!- ALTERNATIVE PRODUCTS:
CC       Event=Alternative promoter usage, Alternative splicing; Named isoforms=5;
CC         Comment=Additional isoforms (AAT-1L and AAT-1S) may exist;
CC       Name=1; Synonyms=AAT-1M;
CC         IsoId=Q7Z4T9-1; Sequence=Displayed;
CC       Name=2;
CC         IsoId=Q7Z4T9-2; Sequence=VSP_014910, VSP_014911;
CC         Note=No experimental confirmation available;
CC       Name=3;
CC         IsoId=Q7Z4T9-3; Sequence=VSP_014907, VSP_014912;
CC       Name=4; Synonyms=AAT1-alpha;
CC         IsoId=Q7Z4T9-4; Sequence=VSP_014908;
CC         Note=May be produced by alternative promoter usage;
CC       Name=5; Synonyms=AAT1-beta, AAT1-gamma;
CC         IsoId=Q7Z4T9-5; Sequence=VSP_014909;
CC         Note=May be produced by alternative promoter usage;"
      sp = Bio::SPTR201107.new(data)
      assert_equal({"Comment"=>"Additional isoforms (AAT-1L and AAT-1S) may exist",
                    "Named isoforms"=>"5",
                    "Variants"=>
                    [{"Name"=>"1",
                      "IsoId"=>["Q7Z4T9-1"],
                      "Synonyms"=>["AAT-1M"],
                      "Sequence"=>["Displayed"]},
                     {"Name"=>"2",
                      "IsoId"=>["Q7Z4T9-2"],
                      "Synonyms" => [],
                      "Sequence"=>["VSP_014910", "VSP_014911"]},
                     {"Name"=>"3",
                      "IsoId"=>["Q7Z4T9-3"],
                      "Synonyms" => [],
                      "Sequence"=>["VSP_014907", "VSP_014912"]},
                     {"Name"=>"4",
                      "IsoId"=>["Q7Z4T9-4"],
                      "Synonyms"=>["AAT1-alpha"],
                      "Sequence"=>["VSP_014908"]},
                     {"Name"=>"5",
                      "IsoId"=>["Q7Z4T9-5"],
                      "Synonyms"=>["AAT1-beta", "AAT1-gamma"],
                      "Sequence"=>["VSP_014909"]}],
                   "Event"=>["Alternative promoter usage", "Alternative splicing"]},
                   sp.cc('ALTERNATIVE PRODUCTS'))
    end
    def test_alternative_products_rf
      data = ""
      sp = Bio::SPTR201107.new(data)
      assert_equal({},
                   sp.cc('ALTERNATIVE PRODUCTS'))
    end
    
    def test_biophysicochemical_properties
      # BIOPHYSICOCHEMICAL PROPERTIES	Description of the information relevant to biophysical and physicochemical data and information on pH dependence, temperature dependence, kinetic parameters, redox potentials, and maximal absorption; see 3.21.8
      #
      data = 'CC   -!- BIOPHYSICOCHEMICAL PROPERTIES:
CC       Kinetic parameters:
CC         KM=45 uM for AdoMet;
CC         Vmax=32 uM/h/mg enzyme;
CC       pH dependence:
CC         Optimum pH is 8.2;'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Kinetic parameters: KM=45 uM for AdoMet; Vmax=32 uM/h/mg enzyme; pH dependence: Optimum pH is 8.2;"],
                   sp.cc['BIOPHYSICOCHEMICAL PROPERTIES'])
      assert_equal({"Redox potential" => "",
                    "Temperature dependence" => "",
                    "Kinetic parameters" => {"KM" => "45 uM for AdoMet", 
                                             "Vmax" => "32 uM/h/mg enzyme"}, 
                    "Absorption" => {},
                    "pH dependence" => "Optimum pH is 8.2"},
                   sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))

# 3.12.2. Syntax of the topic 'BIOPHYSICOCHEMICAL PROPERTIES'
      data = "CC   -!- BIOPHYSICOCHEMICAL PROPERTIES:
CC       Absorption:
CC         Abs(max)=xx nm;
CC         Note=free_text;
CC       Kinetic parameters:
CC         KM=xx unit for substrate [(free_text)];
CC         Vmax=xx unit enzyme [free_text];
CC         Note=free_text;
CC       pH dependence:
CC         free_text;
CC       Redox potential:
CC         free_text;
CC       Temperature dependence:
CC         free_text;"
      sp = Bio::SPTR201107.new(data)
      assert_equal({"Redox potential"=>"free_text",
                    "Temperature dependence"=>"free_text",
                    "Kinetic parameters"=>
                    {"KM"=>"xx unit for substrate [(free_text)]",
                     "Note"=>"free_text",
                     "Vmax"=>"xx unit enzyme [free_text]"},
                    "Absorption"=>{"Note"=>"free_text", "Abs(max)"=>"xx nm"},
                    "pH dependence"=>"free_text"},
                   sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
    end


    def test_biotechnology
      # BIOTECHNOLOGY	Description of the use of a specific protein in a biotechnological process
      data = 'CC   -!- BIOTECHNOLOGY: Introduced by genetic manipulation and expressed in
CC       improved ripening tomato by Monsanto. ACC is the immediate
CC       precursor of the phytohormone ethylene which is involved in the
CC       control of ripening. ACC deaminase reduces ethylene biosynthesis
CC       and thus extends the shelf life of fruits and vegetables.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Introduced by genetic manipulation and expressed in improved ripening tomato by Monsanto. ACC is the immediate precursor of the phytohormone ethylene which is involved in the control of ripening. ACC deaminase reduces ethylene biosynthesis and thus extends the shelf life of fruits and vegetables."],
                   sp.cc['BIOTECHNOLOGY'])
    end

    def test_catalytic_activity
      # CATALYTIC ACTIVITY	Description of the reaction(s) catalyzed by an enzyme [1]
      data = 'CC   -!- CATALYTIC ACTIVITY: Hydrolysis of alkylated DNA, releasing 3-
CC       methyladenine, 3-methylguanine, 7-methylguanine and 7-
CC       methyladenine.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Hydrolysis of alkylated DNA, releasing 3-methyladenine, 3-methylguanine, 7-methylguanine and 7-methyladenine."],
                   sp.cc['CATALYTIC ACTIVITY'])
    end

    def test_caution
      # CAUTION	Warning about possible errors and/or grounds for confusion
      data = 'CC   -!- CAUTION: Ref.1 sequence differs from that shown due to a Leu codon
CC       in position 480 which was translated as a stop codon to shorten
CC       the sequence.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence."],
                   sp.cc['CAUTION'])
      assert_equal("Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence.",
                   sp.cc('CAUTION'))

    end

    def test_cofactor
      # COFACTOR	Description of any non-protein substance required by an enzyme for its catalytic activity
      data = 'CC   -!- COFACTOR: Cl(-). Is unique in requiring Cl(-) for its activity.
CC   -!- COFACTOR: Mg(2+).'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.", 
                    "Mg(2+)."],
                   sp.cc['COFACTOR'])

      assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.", 
                    "Mg(2+)."],
                   sp.cc('COFACTOR'))
    end

    def test_developmental_stage
      # DEVELOPMENTAL STAGE	Description of the developmentally-specific expression of mRNA or protein
      data = 'CC   -!- DEVELOPMENTAL STAGE: In females, isoform 1 is expressed at day 35
CC       with higher levels detected at day 56. Isoform 1 is not detected
CC       in males of any age.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age."],
                   sp.cc['DEVELOPMENTAL STAGE'])
      assert_equal("In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age.",
                   sp.cc('DEVELOPMENTAL STAGE'))
    end

    def test_disease
      # DISEASE	Description of the disease(s) associated with a deficiency of a protein
      data = 'CC   -!- DISEASE: Defects in APP are a cause of hereditary cerebral
CC       hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This
CC       disorder is characterized by amyloid deposits in cerebral vessels.
CC       The principal clinical characteristics are recurring cerebral
CC       hemorrhages, sometimes preceded by migrainous headaches or mental
CC       cleavage. Various types of HCHWAD are known. They differ in onset
CC       and aggressiveness of the disease. The Iowa type demonstrated no
CC       cerebral hemorrhaging but is characterized by progressive
CC       cognitive decline. Beta-APP40 is the predominant form of
CC       cerebrovascular amyloid.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid."],
                   sp.cc['DISEASE'])
      assert_equal("Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid.",
                   sp.cc('DISEASE'))
    end

    def test_domain
      # DOMAIN	Description of the domain structure of a protein
      data = 'CC   -!- DOMAIN: The basolateral sorting signal (BaSS) is required for
CC       sorting of membrane proteins to the basolateral surface of
CC       epithelial cells.
CC   -!- DOMAIN: The NPXY sequence motif found in many tyrosine-
CC       phosphorylated proteins is required for the specific binding of
CC       the PID domain. However, additional amino acids either N- or C-
CC       terminal to the NPXY motif are often required for complete
CC       interaction. The PID domain-containing proteins which bind APP
CC       require the YENPTY motif for full interaction. These interactions
CC       are independent of phosphorylation on the terminal tyrosine
CC       residue. The NPXY site is also involved in clathrin-mediated
CC       endocytosis (By similarity).'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
 "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
                   sp.cc['DOMAIN'])
      assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
 "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
                   sp.cc('DOMAIN'))
    end

    def test_enzyme_regulation
      # ENZYME REGULATION	Description of an enzyme regulatory mechanism
      data = 'CC   -!- ENZYME REGULATION: Insensitive to calcium/calmodulin. Stimulated
CC       by the G protein beta and gamma subunit complex.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex."],
                   sp.cc['ENZYME REGULATION'])
      assert_equal("Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex.",
                   sp.cc('ENZYME REGULATION'))
    end
    
    def test_function
      # FUNCTION	General description of the function(s) of a protein
      data = 'CC   -!- FUNCTION: May play a fundamental role in situations where fine
CC       interplay between intracellular calcium and cAMP determines the
CC       cellular function. May be a physiologically relevant docking site
CC       for calcineurin (By similarity).'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity)."],
                   sp.cc['FUNCTION'])
      assert_equal("May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity).",
                   sp.cc('FUNCTION'))
    end

    def test_induction
      # INDUCTION	Description of the compound(s) or condition(s) that regulate gene expression
      data = 'CC   -!- INDUCTION: By pheromone (alpha-factor).'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["By pheromone (alpha-factor)."],
                   sp.cc['INDUCTION'])
      assert_equal("By pheromone (alpha-factor).",
                   sp.cc('INDUCTION'))
    end

    def test_interaction
      # INTERACTION	Conveys information relevant to binary protein-protein interaction 3.21.12
      data = 'CC   -!- INTERACTION:
CC       P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435;
CC       P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435; P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;"],
                   sp.cc['INTERACTION'])
      assert_equal([{'SP_Ac' => 'P62158', 
                     'identifier' => 'CALM1', 
                     'optional_identifier' => '(xeno)',
                     'NbExp' => '1', 
                     'IntAct' => ['EBI-457011', 'EBI-397435']},
                    {'SP_Ac' => 'P62155', 
                     'identifier' => 'calm1', 
                     'optional_identifier' => '(xeno)',
                     'NbExp' => '1', 
                     'IntAct' => ['EBI-457011', 'EBI-397568']}],
                   sp.cc('INTERACTION'))
    end

    def test_mass_spectrometry
      # MASS SPECTROMETRY	Reports the exact molecular weight of a protein or part of a protein as determined by mass spectrometric methods; see 3.21.23
      data = "CC   -!- MASS SPECTROMETRY: MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29;
CC       NOTE=Ref.1.
CC   -!- MASS SPECTROMETRY: MW=2892.2; METHOD=Electrospray; RANGE=1-29;
CC       NOTE=Ref.2."
      sp = Bio::SPTR201107.new(data)
      assert_equal(["MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29; NOTE=Ref.1.",
                    "MW=2892.2; METHOD=Electrospray; RANGE=1-29; NOTE=Ref.2."],
                   sp.cc['MASS SPECTROMETRY'])
      assert_equal([{'MW' => '2894.9', 
                     'MW_ERR' => '3', 
                     'METHOD' => 'MALDI',  
                     'RANGE' => '1-29',
                     'NOTE' => 'Ref.1'},
                    {'MW' => '2892.2', 
                     'METHOD' => 'Electrospray',
                     'MW_ERR' => nil,
                     'RANGE' => '1-29',
                     'NOTE' => 'Ref.2'}],
                   sp.cc('MASS SPECTROMETRY'))
    end

    def test_miscellaneous
      # MISCELLANEOUS	Any comment which does not belong to any of the other defined topics
      data = 'CC   -!- MISCELLANEOUS: There are two isozymes; a cytoplasmic one and a
CC       mitochondrial one.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["There are two isozymes; a cytoplasmic one and a mitochondrial one."],
                   sp.cc['MISCELLANEOUS'])
    end

    def test_pathway
      # PATHWAY	Description of the metabolic pathway(s) with which a protein is associated
      data = 'CC   -!- PATHWAY: Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-
CC       phosphate and glycerone phosphate from D-glucose: step 4.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-phosphate and glycerone phosphate from D-glucose: step 4."],
                   sp.cc['PATHWAY'])
      assert_equal(["Carbohydrate degradation", 
                    'glycolysis', 
                    'D-glyceraldehyde 3-phosphate',
                    'glycerone phosphate from D-glucose', 
                    'step 4'],
                   sp.cc('PATHWAY'))
    end

    def test_pharmaceutical
      # PHARMACEUTICAL	Description of the use of a protein as a pharmaceutical drug
      data = 'CC   -!- PHARMACEUTICAL: Available under the names Factrel (Ayerst Labs),
CC       Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm
CC       (Serono). Used in evaluating hypothalamic-pituitary gonadotropic
CC       function.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Available under the names Factrel (Ayerst Labs), Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm (Serono). Used in evaluating hypothalamic-pituitary gonadotropic function."],
                   sp.cc['PHARMACEUTICAL'])
    end

    def test_polymorphism
      # POLYMORPHISM	Description of polymorphism(s)
      data = 'CC   -!- POLYMORPHISM: Position 161 is associated with platelet-specific
CC       alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161.
CC       Siba is involved in neonatal alloimmune thrombocytopenia (NATP).
CC   -!- POLYMORPHISM: Polymorphisms arise from a variable number of tandem
CC       13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-
CC       like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown
CC       here) contains one repeat starting at position 415, allele C
CC       contains two repeats, allele B contains three repeats and allele A
CC       contains four repeats.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Position 161 is associated with platelet-specific alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161. Siba is involved in neonatal alloimmune thrombocytopenia (NATP).",
                    "Polymorphisms arise from a variable number of tandem 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown here) contains one repeat starting at position 415, allele C contains two repeats, allele B contains three repeats and allele A contains four repeats."],
                   sp.cc['POLYMORPHISM'])
    end

    def test_ptm
      # PTM	Description of any chemical alternation of a polypeptide (proteolytic cleavage, amino acid modifications including crosslinks). This topic complements information given in the feature table or indicates polypeptide modifications for which position-specific data is not available.
      data = 'CC   -!- PTM: N-glycosylated, contains approximately 8 kDa of N-linked
CC       carbohydrate.
CC   -!- PTM: Palmitoylated.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["N-glycosylated, contains approximately 8 kDa of N-linked carbohydrate.",
 "Palmitoylated."],
                   sp.cc['PTM'])
    end

    def test_rna_editing
      # RNA EDITING	Description of any type of RNA editing that leads to one or more amino acid changes
      data = 'CC   -!- RNA EDITING: Modified_positions=50, 59, 78, 87, 104, 132, 139,
CC       146, 149, 160, 170, 177, 185, 198, 208, 223, 226, 228, 243, 246,
CC       252, 260, 264, 277, 285, 295; Note=The nonsense codons at
CC       positions 50, 78, 104, 260 and 264 are modified to sense codons.'

      data = 'CC   -!- RNA EDITING: Modified_positions=607; Note=Fully edited in the
CC       brain. Heteromerically expressed edited GLUR2 (R) receptor
CC       complexes are impermeable to calcium, whereas the unedited (Q)
CC       forms are highly permeable to divalent ions (By similarity).'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Modified_positions=607; Note=Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."],
                   sp.cc['RNA EDITING'])
      assert_equal({"Modified_positions" => ['607'], 
                    "Note" => "Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."},
                   sp.cc('RNA EDITING'))
    end

    def test_similarity
      # SIMILARITY	Description of the similaritie(s) (sequence or structural) of a protein with other proteins
      data = 'CC   -!- SIMILARITY: Contains 1 protein kinase domain.
CC   -!- SIMILARITY: Contains 1 RGS domain.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Contains 1 protein kinase domain.", "Contains 1 RGS domain."],
                   sp.cc['SIMILARITY'])
    end
    
    def test_subcellular_location
      # SUBCELLULAR LOCATION	Description of the subcellular location of the mature protein

      data = 'CC   -!- SUBCELLULAR LOCATION: Or: Cytoplasm. Or: Secreted protein. May be
CC       secreted by a non-classical secretory pathway.'

      data = "CC   -!- SUBCELLULAR LOCATION: Cytoplasmic or may be secreted by a non-
CC       classical secretory pathway (By similarity)."

      data = "CC   -!- SUBCELLULAR LOCATION: Cytoplasm. In neurons, axonally transported
CC       to the nerve terminals."

      data = "CC   -!- SUBCELLULAR LOCATION: Cell wall. Probably the external side of the
CC       cell wall."

      data = "CC   -!- SUBCELLULAR LOCATION: Endosome; late endosome; late endosomal
CC       membrane; single-pass type I membrane protein. Lysosome; lysosomal
CC       membrane; single-pass type I membrane protein. Localizes to late
CC       endocytic compartment. Associates with lysosome membranes."


      data = "CC   -!- SUBCELLULAR LOCATION: Plastid; chloroplast; chloroplast membrane;
CC       peripheral membrane protein. Plastid; chloroplast; chloroplast
CC       stroma."
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Plastid; chloroplast; chloroplast membrane; peripheral membrane protein. Plastid; chloroplast; chloroplast stroma."],
                   sp.cc['SUBCELLULAR LOCATION'])
      assert_equal([["Plastid",
                     "chloroplast", 
                     "chloroplast membrane", 
                     "peripheral membrane protein"], 
                    ["Plastid", "chloroplast", 
                     "chloroplast stroma"]],
                   sp.cc('SUBCELLULAR LOCATION'))
    end

    def test_subunit
      # SUBUNIT	Description of the quaternary structure of a protein and any kind of interactions with other proteins or protein complexes; except for receptor-ligand interactions, which are described in the topic FUNCTION.

      data = 'CC   -!- SUBUNIT: Interacts with BTK. Interacts with all isoforms of MAPK8,
CC       MAPK9, MAPK10 and MAPK12.'

      data = 'CC   -!- SUBUNIT: Homotetramer.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Homotetramer."],
                   sp.cc['SUBUNIT'])
    end

    def test_tissue_specificity
      # TISSUE SPECIFICITY	Description of the tissue-specific expression of mRNA or protein
      data = "CC   -!- TISSUE SPECIFICITY: Heart, brain and liver mitochondria."

      data = "CC   -!- TISSUE SPECIFICITY: Widely expressed with highest expression in
CC       thymus, testis, embryo and proliferating blood lymphocytes."

      data = "CC   -!- TISSUE SPECIFICITY: Isoform 2 is highly expressed in the brain,
CC       heart, spleen, kidney and blood. Isoform 2 is expressed (at
CC       protein level) in the spleen, skeletal muscle and gastrointestinal
CC       epithelia."
      sp = Bio::SPTR201107.new(data)
      assert_equal(["Isoform 2 is highly expressed in the brain, heart, spleen, kidney and blood. Isoform 2 is expressed (at protein level) in the spleen, skeletal muscle and gastrointestinal epithelia."],
                   sp.cc['TISSUE SPECIFICITY'])
    end

    def test_toxic_dose
      # TOXIC DOSE	Description of the lethal dose (LD), paralytic dose (PD) or effective dose of a protein
      data = 'CC   -!- TOXIC DOSE: LD(50) is 12 mg/kg by intraperitoneal injection.'
      sp = Bio::SPTR201107.new(data)
      assert_equal(["LD(50) is 12 mg/kg by intraperitoneal injection."],
                   sp.cc['TOXIC DOSE'])
    end

    def test_web_resource
      # WEB RESOURCE	Description of a cross-reference to a network database/resource for a specific protein; see 3.21.34
      data = 'CC   -!- WEB RESOURCE: NAME=Inherited peripheral neuropathies mutation db;
CC       URL="http://www.molgen.ua.ac.be/CMTMutations/".
CC   -!- WEB RESOURCE: NAME=Connexin-deafness homepage;
CC       URL="http://www.crg.es/deafness/".
CC   -!- WEB RESOURCE: NAME=GeneReviews;
CC       URL="http://www.genetests.org/query?gene=GJB1".'
            sp = Bio::SPTR201107.new(data)
      assert_equal(['NAME=Inherited peripheral neuropathies mutation db; URL="http://www.molgen.ua.ac.be/CMTMutations/".',
                    'NAME=Connexin-deafness homepage; URL="http://www.crg.es/deafness/".',
                    'NAME=GeneReviews; URL="http://www.genetests.org/query?gene=GJB1".'],
                   sp.cc['WEB RESOURCE'])
      assert_equal([{'NAME' => "Inherited peripheral neuropathies mutation db", 
                     'URL' => 'http://www.molgen.ua.ac.be/CMTMutations/', 'NOTE' => nil},
                    {'NAME' => "Connexin-deafness homepage", 
                     'URL' => 'http://www.crg.es/deafness/', 'NOTE' => nil},
                    {'NAME' => "GeneReviews", 
                     'URL' => 'http://www.genetests.org/query?gene=GJB1', 'NOTE' => nil}],
                   sp.cc('WEB RESOURCE'))

    end

  end # class TestSPTRCC

  # http://br.expasy.org/sprot/userman.html#Ref_line
  class TestSPTRRef < Test::Unit::TestCase

    def setup
      data = 'RN   [1]
RP   NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C), FUNCTION, INTERACTION
RP   WITH PKC-3, SUBCELLULAR LOCATION, TISSUE SPECIFICITY, DEVELOPMENTAL
RP   STAGE, AND MUTAGENESIS OF PHE-175 AND PHE-221.
RC   STRAIN=Bristol N2;
RX   PubMed=11134024; DOI=10.1074/jbc.M008990200;
RG   The mouse genome sequencing consortium;
RA   Galinier A., Bleicher F., Negre D., Perriere G., Duclos B.,
RA   Cozzone A.J., Cortay J.-C.;
RT   "A novel adapter protein employs a phosphotyrosine binding domain and
RT   exceptionally basic N-terminal domains to capture and localize an
RT   atypical protein kinase C: characterization of Caenorhabditis elegans
RT   C kinase adapter 1, a protein that avidly binds protein kinase C3.";
RL   J. Biol. Chem. 276:10463-10475(2001).'
      @obj = SPTR201107.new(data)
    end

    def test_ref
      res = {"RT" => "A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
             "RL" => "J. Biol. Chem. 276:10463-10475(2001).",
             "RA" => "Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
             "RX" => {"MEDLINE" => nil, 
                      "DOI" => "10.1074/jbc.M008990200", 
                      "PubMed" => "11134024"}, 
             "RC" => [{"Text" => "Bristol N2", "Token" => "STRAIN"}], 
             "RN" => "[1]", 
             "RP" =>  ["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
                       "FUNCTION",
                       "INTERACTION WITH PKC-3",
                       "SUBCELLULAR LOCATION",
                       "TISSUE SPECIFICITY", 
                       "DEVELOPMENTAL STAGE",
                       "MUTAGENESIS OF PHE-175 AND PHE-221"],
             "RG" => ["The mouse genome sequencing consortium"]}
      assert_equal(res, @obj.ref.first)
    end

    def test_RN
      assert_equal("[1]", @obj.ref.first['RN'])
    end
      
    def test_RP
      assert_equal(["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
                    "FUNCTION", "INTERACTION WITH PKC-3",
                    "SUBCELLULAR LOCATION",
                    "TISSUE SPECIFICITY",
                    "DEVELOPMENTAL STAGE",
                    "MUTAGENESIS OF PHE-175 AND PHE-221"],
                   @obj.ref.first['RP'])
    end

    def test_RC
      assert_equal([{"Text"=>"Bristol N2", "Token"=>"STRAIN"}],
                   @obj.ref.first['RC'])
    end

    def test_RX
      assert_equal({'MEDLINE' => nil,
                    'PubMed' => '11134024', 
                    'DOI' => '10.1074/jbc.M008990200'},
                   @obj.ref.first['RX'])
    end

    def test_RG
      assert_equal(["The mouse genome sequencing consortium"],
                   @obj.ref.first['RG'])
    end

    def test_RA
      assert_equal("Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
                   @obj.ref.first['RA'])
    end

    def test_RT
      assert_equal("A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
                   @obj.ref.first['RT'])
    end

    def test_RL
      assert_equal("J. Biol. Chem. 276:10463-10475(2001).",
                   @obj.ref.first['RL'])
    end
    
  end # class TestSPTRReferences


  # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.0
  class TestSPTRSwissProtRel41_0 < Test::Unit::TestCase
    # Progress in the conversion of Swiss-Prot to mixed-case characters

    # Multiple RP lines
    def test_multiple_RP_lines
      data = "RN    [1]
RP   SEQUENCE FROM N.A., SEQUENCE OF 23-42 AND 351-365, AND
RP   CHARACTERIZATION."
      sp = SPTR201107.new(data)
      assert_equal(['SEQUENCE FROM N.A.', 
                    'SEQUENCE OF 23-42 AND 351-365',
                    'CHARACTERIZATION'],
                   sp.ref.first['RP'])
    end
  end


  # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.1
  class TestSPTRSwissProtRel41_1 < Test::Unit::TestCase
    # New syntax of the CC line topic ALTERNATIVE PRODUCTS
    def test_alternative_products
      data = "ID   TEST_ENTRY      STANDARD;      PRT;   393 AA.
CC   -!- ALTERNATIVE PRODUCTS:
CC       Event=Alternative promoter;
CC         Comment=Free text;
CC       Event=Alternative splicing; Named isoforms=2;
CC         Comment=Optional free text;
CC       Name=Isoform_1; Synonyms=Synonym_1;
CC         IsoId=Isoform_identifier_1;
CC         Sequence=Displayed;
CC         Note=Free text;
CC       Name=Isoform_2; Synonyms=Synonym_1, Synonym_2;
CC         IsoId=Isoform_identifier_1, Isoform_identifer_2; 
CC         Sequence=VSP_identifier_1, VSP_identifier_2;
CC         Note=Free text;
CC       Event=Alternative initiation;
CC         Comment=Free text;"
      sp = SPTR201107.new(data)
      res = {"Comment" => "Free text",
             "Named isoforms" => "2", 
             "Variants" => [{"Name" => "Isoform_1",
                             "Synonyms" => ["Synonym_1"],
                             "IsoId" => ["Isoform_identifier_1"],
                             "Sequence" => ["Displayed"]   },
                            {"Name" => "Isoform_2",
                             "Synonyms" => ["Synonym_1", "Synonym_2"],
                             "IsoId" => ["Isoform_identifier_1", "Isoform_identifer_2"],
                             "Sequence" => ["VSP_identifier_1", "VSP_identifier_2"]}],
             "Event" => ["Alternative promoter"]}
      assert_equal(res,
                   sp.cc('ALTERNATIVE PRODUCTS'))
    end

    def test_alternative_products_with_ft
data = "ID   TEST_ENTRY      STANDARD;      PRT;   393 AA.
CC   -!- ALTERNATIVE PRODUCTS:
CC       Event=Alternative splicing; Named isoforms=6;
CC       Name=1;
CC         IsoId=Q15746-4; Sequence=Displayed;
CC       Name=2;
CC         IsoId=Q15746-5; Sequence=VSP_000040;
CC       Name=3A;
CC         IsoId=Q15746-6; Sequence=VSP_000041, VSP_000043; 
CC       Name=3B;
CC         IsoId=Q15746-7; Sequence=VSP_000040, VSP_000041, VSP_000042;
CC       Name=4;
CC         IsoId=Q15746-8; Sequence=VSP_000041, VSP_000042;
CC       Name=del-1790;
CC         IsoId=Q15746-9; Sequence=VSP_000044;
FT   VARSPLIC    437    506       VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA
FT                                RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in
FT                                isoform 2 and isoform 3B).
FT                                /FTId=VSP_004791.
FT   VARSPLIC   1433   1439       DEVEVSD -> MKWRCQT (in isoform 3A,
FT                                isoform 3B and isoform 4).
FT                                /FTId=VSP_004792.
FT   VARSPLIC   1473   1545       Missing (in isoform 4).
FT                                /FTId=VSP_004793.
FT   VARSPLIC   1655   1705       M…
Alerts (4)

Complexity hotspot; lines 647 to 648 (total complexity: 4)
647 648
Complexity hotspot; lines 650 to 651 (total complexity: 4)
650 651