/lib/galaxy/tools/parameters/sanitize.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 167 lines · 155 code · 3 blank · 9 comment · 0 complexity · 827d8599548eba91a1f9efa46a41f3c1 MD5 · raw file

  1. """
  2. Tool Parameter specific sanitizing.
  3. """
  4. import logging
  5. import string
  6. import galaxy.util
  7. log = logging.getLogger( __name__ )
  8. class ToolParameterSanitizer( object ):
  9. """
  10. Handles tool parameter specific sanitizing.
  11. >>> from elementtree.ElementTree import XML
  12. >>> sanitizer = ToolParameterSanitizer.from_element( XML(
  13. ... '''
  14. ... <sanitizer invalid_char="">
  15. ... <valid initial="string.letters"/>
  16. ... </sanitizer>
  17. ... ''' ) )
  18. >>> sanitizer.sanitize_param( ''.join( sorted( [ c for c in string.printable ] ) ) ) == ''.join( sorted( [ c for c in string.letters ] ) )
  19. True
  20. >>> slash = chr( 92 )
  21. >>> sanitizer = ToolParameterSanitizer.from_element( XML(
  22. ... '''
  23. ... <sanitizer>
  24. ... <valid initial="none">
  25. ... <add preset="string.printable"/>
  26. ... <remove value="&quot;"/>
  27. ... <remove value="%s"/>
  28. ... </valid>
  29. ... <mapping initial="none">
  30. ... <add source="&quot;" target="%s&quot;"/>
  31. ... <add source="%s" target="%s%s"/>
  32. ... </mapping>
  33. ... </sanitizer>
  34. ... ''' % ( slash, slash, slash, slash, slash ) ) )
  35. >>> text = '%s"$rm&#!' % slash
  36. >>> [ c for c in sanitizer.sanitize_param( text ) ] == [ slash, slash, slash, '"', '$', 'r', 'm', '&', '#', '!' ]
  37. True
  38. """
  39. VALID_PRESET = { 'default':( string.letters + string.digits +" -=_.()/+*^,:?!" ), 'none':'' }
  40. MAPPING_PRESET = { 'default':galaxy.util.mapped_chars, 'none':{} }
  41. DEFAULT_INVALID_CHAR = 'X'
  42. #class methods
  43. @classmethod
  44. def from_element( cls, elem ):
  45. """Loads the proper filter by the type attribute of elem"""
  46. #TODO: Add ability to generically specify a method to use for sanitizing input via specification in tool XML
  47. rval = ToolParameterSanitizer()
  48. rval._invalid_char = elem.get( 'invalid_char', cls.DEFAULT_INVALID_CHAR )
  49. rval.sanitize = galaxy.util.string_as_bool( elem.get( 'sanitize', 'True' ) )
  50. for valid_elem in elem.findall( 'valid' ):
  51. rval._valid_chars = rval.get_valid_by_name( valid_elem.get( 'initial', 'default' ) )
  52. for action_elem in valid_elem:
  53. preset = rval.get_valid_by_name( action_elem.get( 'preset', 'none' ) )
  54. valid_value = [ val for val in action_elem.get( 'value', [] ) ]
  55. if action_elem.tag.lower() == 'add':
  56. for val in ( preset + valid_value ):
  57. if val not in rval._valid_chars:
  58. rval._valid_chars.append( val )
  59. elif action_elem.tag.lower() == 'remove':
  60. for val in ( preset + valid_value ):
  61. while val in rval._valid_chars:
  62. rval._valid_chars.remove( val )
  63. else:
  64. log.debug( 'Invalid action tag in valid: %s' % action_elem.tag )
  65. for mapping_elem in elem.findall( 'mapping' ):
  66. rval._mapped_chars = rval.get_mapping_by_name( mapping_elem.get( 'initial', 'default' ) )
  67. for action_elem in mapping_elem:
  68. map_source = action_elem.get( 'source', None )
  69. map_target = action_elem.get( 'target', None )
  70. preset = rval.get_mapping_by_name( action_elem.get( 'preset', 'none' ) )
  71. if action_elem.tag.lower() == 'add':
  72. rval._mapped_chars.update( preset )
  73. if None not in [ map_source, map_target ]:
  74. rval._mapped_chars[ map_source ] = map_target
  75. elif action_elem.tag.lower() == 'remove':
  76. for map_key in preset.keys():
  77. if map_key in rval._mapped_chars:
  78. del rval._mapped_chars[ map_key ]
  79. if map_source is not None and map_key in rval._mapped_chars:
  80. del rval._mapped_chars[ map_key ]
  81. else:
  82. log.debug( 'Invalid action tag in mapping: %s' % action_elem.tag )
  83. return rval
  84. @classmethod
  85. def get_valid_by_name( cls, name ):
  86. rval = []
  87. for split_name in name.split( ',' ):
  88. split_name = split_name.strip()
  89. value = []
  90. if split_name.startswith( 'string.' ):
  91. try:
  92. value = eval( split_name )
  93. except NameError, e:
  94. log.debug( 'Invalid string preset specified: %s' % e )
  95. elif split_name in cls.VALID_PRESET:
  96. value = cls.VALID_PRESET[ split_name ]
  97. else:
  98. log.debug( 'Invalid preset name specified: %s' % split_name )
  99. rval.extend( [ val for val in value if val not in rval ] )
  100. return rval
  101. @classmethod
  102. def get_mapping_by_name( cls, name ):
  103. rval = {}
  104. for split_name in name.split( ',' ):
  105. split_name = split_name.strip()
  106. if split_name in cls.MAPPING_PRESET:
  107. rval.update( cls.MAPPING_PRESET[ split_name ] )
  108. else:
  109. log.debug( 'Invalid preset name specified: %s' % split_name )
  110. return rval
  111. #end class methods
  112. def __init__( self ):
  113. self._valid_chars = [] #List of valid characters
  114. self._mapped_chars = {} #Replace a char with a any number of characters
  115. self._invalid_char = self.DEFAULT_INVALID_CHAR #Replace invalid characters with this character
  116. self.sanitize = True #Simply pass back the passed in value
  117. def restore_text( self, text ):
  118. """Restores sanitized text"""
  119. if self.sanitize:
  120. for key, value in self._mapped_chars.iteritems():
  121. text = text.replace( value, key )
  122. return text
  123. def restore_param( self, value ):
  124. if self.sanitize:
  125. if isinstance( value, basestring ):
  126. return self.restore_text( value )
  127. elif isinstance( value, list ):
  128. return map( self.restore_text, value )
  129. else:
  130. raise Exception, 'Unknown parameter type (%s:%s)' % ( type( value ), value )
  131. return value
  132. def sanitize_text( self, text ):
  133. """Restricts the characters that are allowed in a text"""
  134. if not self.sanitize:
  135. return text
  136. rval = []
  137. for c in text:
  138. if c in self._valid_chars:
  139. rval.append( c )
  140. elif c in self._mapped_chars:
  141. rval.append( self._mapped_chars[ c ] )
  142. else:
  143. rval.append( self._invalid_char )
  144. return ''.join( rval )
  145. def sanitize_param( self, value ):
  146. """Clean incoming parameters (strings or lists)"""
  147. if not self.sanitize:
  148. return value
  149. if isinstance( value, basestring ):
  150. return self.sanitize_text( value )
  151. elif isinstance( value, list ):
  152. return map( self.sanitize_text, value )
  153. else:
  154. raise Exception, 'Unknown parameter type (%s:%s)' % ( type( value ), value )