PageRenderTime 28ms CodeModel.GetById 9ms app.highlight 15ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/datatypes/xml.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 118 lines | 106 code | 4 blank | 8 comment | 5 complexity | ed8adc0fcd5cf77747dd859be6a3edc3 MD5 | raw file
  1"""
  2XML format classes
  3"""
  4import data
  5import logging
  6from galaxy.datatypes.sniff import *
  7import dataproviders
  8
  9log = logging.getLogger(__name__)
 10
 11@dataproviders.decorators.has_dataproviders
 12class GenericXml( data.Text ):
 13    """Base format class for any XML file."""
 14    file_ext = "xml"
 15
 16    def set_peek( self, dataset, is_multi_byte=False ):
 17        """Set the peek and blurb text"""
 18        if not dataset.dataset.purged:
 19            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
 20            dataset.blurb = 'XML data'
 21        else:
 22            dataset.peek = 'file does not exist'
 23            dataset.blurb = 'file purged from disk'
 24
 25    def sniff( self, filename ):
 26        """
 27        Determines whether the file is XML or not
 28
 29        >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' )
 30        >>> GenericXml().sniff( fname )
 31        True
 32        >>> fname = get_test_fname( 'interval.interval' )
 33        >>> GenericXml().sniff( fname )
 34        False
 35        """
 36        #TODO - Use a context manager on Python 2.5+ to close handle
 37        handle = open(filename)
 38        line = handle.readline()
 39        handle.close()
 40
 41        #TODO - Is there a more robust way to do this?
 42        return line.startswith('<?xml ')
 43
 44    def merge(split_files, output_file):
 45        """Merging multiple XML files is non-trivial and must be done in subclasses."""
 46        if len(split_files) > 1:
 47            raise NotImplementedError("Merging multiple XML files is non-trivial and must be implemented for each XML type")
 48        #For one file only, use base class method (move/copy)
 49        data.Text.merge(split_files, output_file)
 50    merge = staticmethod(merge)
 51
 52    @dataproviders.decorators.dataprovider_factory( 'xml', dataproviders.hierarchy.XMLDataProvider.settings )
 53    def xml_dataprovider( self, dataset, **settings ):
 54        dataset_source = dataproviders.dataset.DatasetDataProvider( dataset )
 55        return dataproviders.hierarchy.XMLDataProvider( dataset_source, **settings )
 56
 57
 58class MEMEXml( GenericXml ):
 59    """MEME XML Output data"""
 60    file_ext = "memexml"
 61
 62    def set_peek( self, dataset, is_multi_byte=False ):
 63        """Set the peek and blurb text"""
 64        if not dataset.dataset.purged:
 65            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
 66            dataset.blurb = 'MEME XML data'
 67        else:
 68            dataset.peek = 'file does not exist'
 69            dataset.blurb = 'file purged from disk'
 70    def sniff( self, filename ):
 71        return False
 72
 73
 74class CisML( GenericXml ):
 75    """CisML XML data""" #see: http://www.ncbi.nlm.nih.gov/pubmed/15001475
 76    file_ext = "cisml"
 77
 78    def set_peek( self, dataset, is_multi_byte=False ):
 79        """Set the peek and blurb text"""
 80        if not dataset.dataset.purged:
 81            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
 82            dataset.blurb = 'CisML data'
 83        else:
 84            dataset.peek = 'file does not exist'
 85            dataset.blurb = 'file purged from disk'
 86    def sniff( self, filename ):
 87        return False
 88
 89
 90class Phyloxml( GenericXml ):
 91    """Format for defining phyloxml data http://www.phyloxml.org/"""
 92    file_ext = "phyloxml"
 93    def set_peek( self, dataset, is_multi_byte=False ):
 94        """Set the peek and blurb text"""
 95        if not dataset.dataset.purged:
 96            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
 97            dataset.blurb = 'Phyloxml data'
 98        else:
 99            dataset.peek = 'file does not exist'
100            dataset.blurb = 'file purged from disk'
101
102    def sniff( self, filename ):
103        """"Checking for keyword - 'phyloxml' always in lowercase in the first few lines"""
104
105        f = open( filename, "r" )
106        firstlines = "".join( f.readlines(5) )
107        f.close()
108
109        if "phyloxml" in firstlines:
110            return True
111        return False
112
113    def get_visualizations( self, dataset ):
114        """
115        Returns a list of visualizations for datatype.
116        """
117
118        return [ 'phyloviz' ]