PageRenderTime 61ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/websubmit/lib/websubmit_file_metadata.py

https://github.com/gardenunez/invenio
Python | 369 lines | 268 code | 15 blank | 86 comment | 31 complexity | acc3b1e25ccda3288b7b1065b97ec779 MD5 | raw file
Possible License(s): GPL-2.0
  1. # -*- coding: utf-8 -*-
  2. ##
  3. ## This file is part of Invenio.
  4. ## Copyright (C) 2009, 2010, 2011 CERN.
  5. ##
  6. ## Invenio is free software; you can redistribute it and/or
  7. ## modify it under the terms of the GNU General Public License as
  8. ## published by the Free Software Foundation; either version 2 of the
  9. ## License, or (at your option) any later version.
  10. ##
  11. ## Invenio is distributed in the hope that it will be useful, but
  12. ## WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ## General Public License for more details.
  15. ##
  16. ## You should have received a copy of the GNU General Public License
  17. ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
  18. ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  19. """
  20. This is the metadata reader and writer module. Contains the proper
  21. plugin containers in order to read/write metadata from images or other
  22. files.
  23. Public APIs:
  24. - read_metadata()
  25. - write_metadata()
  26. """
  27. __required_plugin_API_version__ = "WebSubmit File Metadata Plugin API 1.0"
  28. import os, sys
  29. import traceback
  30. from optparse import OptionParser
  31. from invenio.pluginutils import PluginContainer
  32. from invenio.config import CFG_PYLIBDIR
  33. from invenio.bibdocfile import decompose_file
  34. from invenio.websubmit_config import InvenioWebSubmitFileMetadataRuntimeError
  35. def read_metadata(inputfile, force=None, remote=False,
  36. loginpw=None, verbose=0):
  37. """
  38. Returns metadata extracted from given file as dictionary.
  39. Availability depends on input file format and installed plugins
  40. (return C{TypeError} if unsupported file format).
  41. @param inputfile: path to a file
  42. @type inputfile: string
  43. @param verbose: verbosity
  44. @type verbose: int
  45. @param force: name of plugin to use, to skip plugin auto-discovery
  46. @type force: string
  47. @param remote: if the file is accessed remotely or not
  48. @type remote: boolean
  49. @param loginpw: credentials to access secure servers (username:password)
  50. @type loginpw: string
  51. @return: dictionary of metadata tags as keys, and (interpreted)
  52. value as value
  53. @rtype: dict
  54. @raise TypeError: if file format is not supported.
  55. @raise RuntimeError: if required library to process file is missing.
  56. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be read.
  57. """
  58. metadata = None
  59. # Check file type (0 base, 1 name, 2 ext)
  60. ext = decompose_file(inputfile)[2]
  61. if verbose > 5:
  62. print ext.lower(), 'extension to extract from'
  63. # Load plugins
  64. metadata_extractor_plugins = PluginContainer(
  65. os.path.join(CFG_PYLIBDIR,
  66. 'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'),
  67. plugin_builder=plugin_builder_function,
  68. api_version=__required_plugin_API_version__)
  69. # Loop through the plugins to find a good one for given file
  70. for plugin_name, plugin in metadata_extractor_plugins.iteritems():
  71. # Local file
  72. if plugin.has_key('can_read_local') and \
  73. plugin['can_read_local'](inputfile) and not remote and \
  74. (not force or plugin_name == force):
  75. if verbose > 5:
  76. print 'Using ' + plugin_name
  77. fetched_metadata = plugin['read_metadata_local'](inputfile,
  78. verbose)
  79. if not metadata:
  80. metadata = fetched_metadata
  81. else:
  82. metadata.update(fetched_metadata)
  83. # Remote file
  84. elif remote and plugin.has_key('can_read_remote') and \
  85. plugin['can_read_remote'](inputfile) and \
  86. (not force or plugin_name == force):
  87. if verbose > 5:
  88. print 'Using ' + plugin_name
  89. fetched_metadata = plugin['read_metadata_remote'](inputfile,
  90. loginpw,
  91. verbose)
  92. if not metadata:
  93. metadata = fetched_metadata
  94. else:
  95. metadata.update(fetched_metadata)
  96. # Return in case we have something
  97. if metadata is not None:
  98. return metadata
  99. # Case of no plugin found, raise
  100. raise TypeError, 'Unsupported file type'
  101. def write_metadata(inputfile, outputfile, metadata_dictionary,
  102. force=None, verbose=0):
  103. """
  104. Writes metadata to given file.
  105. Availability depends on input file format and installed plugins
  106. (return C{TypeError} if unsupported file format).
  107. @param inputfile: path to a file
  108. @type inputfile: string
  109. @param outputfile: path to the resulting file.
  110. @type outputfile: string
  111. @param verbose: verbosity
  112. @type verbose: int
  113. @param metadata_dictionary: keys and values of metadata to update.
  114. @type metadata_dictionary: dict
  115. @param force: name of plugin to use, to skip plugin auto-discovery
  116. @type force: string
  117. @return: output of the plugin
  118. @rtype: string
  119. @raise TypeError: if file format is not supported.
  120. @raise RuntimeError: if required library to process file is missing.
  121. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be updated.
  122. """
  123. # Check file type (0 base, 1 name, 2 ext)
  124. ext = decompose_file(inputfile)[2]
  125. if verbose > 5:
  126. print ext.lower(), 'extension to write to'
  127. # Plugins
  128. metadata_extractor_plugins = PluginContainer(
  129. os.path.join(CFG_PYLIBDIR,
  130. 'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'),
  131. plugin_builder=plugin_builder_function,
  132. api_version=__required_plugin_API_version__
  133. )
  134. # Loop through the plugins to find a good one to ext
  135. for plugin_name, plugin in metadata_extractor_plugins.iteritems():
  136. if plugin.has_key('can_write_local') and \
  137. plugin['can_write_local'](inputfile) and \
  138. (not force or plugin_name == force):
  139. if verbose > 5:
  140. print 'Using ' + plugin_name
  141. return plugin['write_metadata_local'](inputfile,
  142. outputfile,
  143. metadata_dictionary,
  144. verbose)
  145. # Case of no plugin found, raise
  146. raise TypeError, 'Unsupported file type'
  147. def metadata_info(verbose=0):
  148. """Shows information about the available plugins"""
  149. print 'Plugin APIs version: %s' % str(__required_plugin_API_version__)
  150. # Plugins
  151. print 'Available plugins:'
  152. metadata_extractor_plugins = PluginContainer(
  153. os.path.join(CFG_PYLIBDIR,
  154. 'invenio', 'websubmit_file_metadata_plugins', 'wsm_*.py'),
  155. plugin_builder=plugin_builder_function,
  156. api_version=__required_plugin_API_version__
  157. )
  158. # Print each operation on each plugin
  159. for plugin_name, plugin_funcs in metadata_extractor_plugins.iteritems():
  160. if len(plugin_funcs) > 0:
  161. print '-- Name: ' + plugin_name
  162. print ' Supported operation%s: ' % \
  163. (len(plugin_funcs) > 1 and 's' or '') + \
  164. ', '.join(plugin_funcs)
  165. # Are there any unloaded plugins?
  166. broken_plugins = metadata_extractor_plugins.get_broken_plugins()
  167. if len(broken_plugins.keys()) > 0:
  168. print 'Could not load the following plugin%s:' % \
  169. (len(broken_plugins.keys()) > 1 and 's' or '')
  170. for broken_plugin_name, broken_plugin_trace_info in broken_plugins.iteritems():
  171. print '-- Name: ' + broken_plugin_name
  172. if verbose > 5:
  173. formatted_traceback = \
  174. traceback.format_exception(broken_plugin_trace_info[0],
  175. broken_plugin_trace_info[1],
  176. broken_plugin_trace_info[2])
  177. print ' ' + ''.join(formatted_traceback).replace('\n', '\n ')
  178. elif verbose > 0:
  179. print ' ' + str(broken_plugin_trace_info[1])
  180. def print_metadata(metadata):
  181. """
  182. Pretty-prints metadata returned by the plugins to standard output.
  183. @param metadata: object returned by the plugins when reading metadata
  184. @type metadata: dict
  185. """
  186. if metadata:
  187. max_key_length = max([len(key) for key in metadata.keys()])
  188. for key, value in metadata.iteritems():
  189. print key, "." * (max_key_length - len(key)), str(value)
  190. else:
  191. print '(No metadata)'
  192. def plugin_builder_function(plugin_name, plugin_code):
  193. """
  194. Internal function used to build the plugin container, so it behaves as a
  195. dictionary.
  196. @param plugin_name: plugin_name
  197. @param plugin_code: plugin_code
  198. @return: the plugin container
  199. @rtype: dict
  200. """
  201. ret = {}
  202. for funct_name in ('can_read_local',
  203. 'can_read_remote',
  204. 'can_write_local',
  205. 'read_metadata_local',
  206. 'write_metadata_local',
  207. 'read_metadata_remote'):
  208. funct = getattr(plugin_code, funct_name, None)
  209. if funct is not None:
  210. ret[funct_name] = funct
  211. return ret
  212. def main():
  213. """
  214. Manages the arguments, in order to call the proper metadata
  215. handling function
  216. """
  217. def dictionary_callback(option, opt, value, parser, *args, **kwargs):
  218. """callback function used to get strings from command line
  219. of the type tag=value and push it into a dictionary
  220. @param parameters: optparse parameters"""
  221. if '=' in value:
  222. key, val = value.split('=', 1)
  223. if getattr(parser.values, 'metadata', None) is None:
  224. parser.values.metadata = {}
  225. parser.values.metadata[key] = val
  226. return
  227. else:
  228. raise ValueError("%s is not in the form key=value" % value)
  229. # Parse arguments
  230. parser = OptionParser(usage="websubmit_file_metadata {-e | -u | -i} " + \
  231. "[-f arg2] [-v] [-d tag=value] [-r] [-l arg3] " + \
  232. "/path/to/file")
  233. parser.add_option("-e", "--extract", dest="extract", action='store_true',
  234. help="extract metadata from file", default=False)
  235. parser.add_option("-u", "--update", dest="update", action='store_true',
  236. help="update file metadata", default=False)
  237. parser.add_option("-o", "--output-file", dest="output_file",
  238. help="Place to save updated file (when --update). Default is same as input file",
  239. type="string", default=None)
  240. parser.add_option("-f", "--force", dest="force_plugin",
  241. help="Plugin we want to be used", type="string",
  242. default=None)
  243. parser.add_option('-v', '--verbose', type="int",
  244. dest='verbose', help='shows detailed information',
  245. default=1)
  246. parser.add_option('-r', '--remote', action='store_true',
  247. dest='remote', help='working with remote file',
  248. default=False)
  249. parser.add_option('-d', '--dictionary-entry',
  250. action="callback",
  251. callback=dictionary_callback, type="string",
  252. help='metadata to update [-d tag=value]')
  253. parser.add_option('-i', '--info', action='store_true',
  254. dest='info', help='shows plugin information',
  255. default=False)
  256. parser.add_option("-l", "--loginpw", dest="loginpw",
  257. help="Login and password to access remote server [login:pw]",
  258. type="string", default=None)
  259. (options, args) = parser.parse_args()
  260. ## Get the input file from the arguments list (it should be the
  261. ## first argument):
  262. input_file = None
  263. if len(args) > 0:
  264. input_file = args[0]
  265. # If there is no option -d, we avoid metadata option being undefined
  266. if getattr(parser.values, 'metadata', None) is None:
  267. parser.values.metadata = {}
  268. # Is output file specified?
  269. if options.update and not options.output_file:
  270. if options.verbose > 5:
  271. print "Option --output-file not specified. Updating input file."
  272. options.output_file = input_file
  273. elif options.extract and options.output_file:
  274. print "Option --output-file cannot be used with --extract."
  275. print parser.get_usage()
  276. sys.exit(1)
  277. # Make sure there is not extract / write / info at the same time
  278. if (options.extract and options.update) or \
  279. (options.extract and options.info) or \
  280. (options.info and options.update):
  281. print "Choose either --extract, --update or --info"
  282. print parser.get_usage()
  283. sys.exit(1)
  284. elif (options.extract and not input_file) or \
  285. (options.update and not input_file):
  286. print "Input file is missing"
  287. print parser.get_usage()
  288. sys.exit(1)
  289. # Function call based on args
  290. if options.extract:
  291. try:
  292. metadata = read_metadata(input_file,
  293. options.force_plugin,
  294. options.remote,
  295. options.loginpw,
  296. options.verbose)
  297. print_metadata(metadata)
  298. except TypeError, err:
  299. print err
  300. return 1
  301. except RuntimeError, err:
  302. print err
  303. return 1
  304. except InvenioWebSubmitFileMetadataRuntimeError, err:
  305. print err
  306. return 1
  307. elif options.update:
  308. try:
  309. write_metadata(input_file,
  310. options.output_file,
  311. options.metadata,
  312. options.force_plugin,
  313. options.verbose)
  314. except TypeError, err:
  315. print err
  316. return 1
  317. except RuntimeError, err:
  318. print err
  319. return 1
  320. except InvenioWebSubmitFileMetadataRuntimeError, err:
  321. print err
  322. return 1
  323. elif options.info:
  324. try:
  325. metadata_info(options.verbose)
  326. except TypeError:
  327. print 'Problem retrieving plugin information\n'
  328. return 1
  329. else:
  330. parser.error("Incorrect number of arguments\n")
  331. if __name__ == "__main__":
  332. main()