PageRenderTime 57ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/invenio/legacy/websubmit/file_metadata.py

https://github.com/MSusik/invenio
Python | 366 lines | 239 code | 27 blank | 100 comment | 69 complexity | 4440357a61d47d1512cf9f7c53685c10 MD5 | raw file
Possible License(s): GPL-2.0
  1. # -*- coding: utf-8 -*-
  2. ##
  3. ## This file is part of Invenio.
  4. ## Copyright (C) 2009, 2010, 2011 CERN.
  5. ##
  6. ## Invenio is free software; you can redistribute it and/or
  7. ## modify it under the terms of the GNU General Public License as
  8. ## published by the Free Software Foundation; either version 2 of the
  9. ## License, or (at your option) any later version.
  10. ##
  11. ## Invenio is distributed in the hope that it will be useful, but
  12. ## WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ## General Public License for more details.
  15. ##
  16. ## You should have received a copy of the GNU General Public License
  17. ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
  18. ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  19. """
  20. This is the metadata reader and writer module. Contains the proper
  21. plugin containers in order to read/write metadata from images or other
  22. files.
  23. from __future__ import print_function
  24. Public APIs:
  25. - read_metadata()
  26. - write_metadata()
  27. """
  28. __required_plugin_API_version__ = "WebSubmit File Metadata Plugin API 1.0"
  29. import sys
  30. from optparse import OptionParser
  31. from six import iteritems
  32. from invenio.legacy.bibdocfile.api import decompose_file
  33. from invenio.legacy.websubmit.config import InvenioWebSubmitFileMetadataRuntimeError
  34. from invenio.utils.datastructures import LazyDict
  35. from invenio.base.utils import import_submodules_from_packages
  36. metadata_extractor_plugins = LazyDict(lambda: dict(filter(None, map(
  37. plugin_builder_function,
  38. import_submodules_from_packages('file_metadata_plugins',
  39. packages=['invenio.legacy.websubmit'])))))
  40. def read_metadata(inputfile, force=None, remote=False,
  41. loginpw=None, verbose=0):
  42. """
  43. Returns metadata extracted from given file as dictionary.
  44. Availability depends on input file format and installed plugins
  45. (return C{TypeError} if unsupported file format).
  46. @param inputfile: path to a file
  47. @type inputfile: string
  48. @param verbose: verbosity
  49. @type verbose: int
  50. @param force: name of plugin to use, to skip plugin auto-discovery
  51. @type force: string
  52. @param remote: if the file is accessed remotely or not
  53. @type remote: boolean
  54. @param loginpw: credentials to access secure servers (username:password)
  55. @type loginpw: string
  56. @return: dictionary of metadata tags as keys, and (interpreted)
  57. value as value
  58. @rtype: dict
  59. @raise TypeError: if file format is not supported.
  60. @raise RuntimeError: if required library to process file is missing.
  61. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be read.
  62. """
  63. metadata = None
  64. # Check file type (0 base, 1 name, 2 ext)
  65. ext = decompose_file(inputfile)[2]
  66. if verbose > 5:
  67. print(ext.lower(), 'extension to extract from')
  68. # Loop through the plugins to find a good one for given file
  69. for plugin_name, plugin in iteritems(metadata_extractor_plugins):
  70. # Local file
  71. if 'can_read_local' in plugin and \
  72. plugin['can_read_local'](inputfile) and not remote and \
  73. (not force or plugin_name == force):
  74. if verbose > 5:
  75. print('Using ' + plugin_name)
  76. fetched_metadata = plugin['read_metadata_local'](inputfile,
  77. verbose)
  78. if not metadata:
  79. metadata = fetched_metadata
  80. else:
  81. metadata.update(fetched_metadata)
  82. # Remote file
  83. elif remote and 'can_read_remote' in plugin and \
  84. plugin['can_read_remote'](inputfile) and \
  85. (not force or plugin_name == force):
  86. if verbose > 5:
  87. print('Using ' + plugin_name)
  88. fetched_metadata = plugin['read_metadata_remote'](inputfile,
  89. loginpw,
  90. verbose)
  91. if not metadata:
  92. metadata = fetched_metadata
  93. else:
  94. metadata.update(fetched_metadata)
  95. # Return in case we have something
  96. if metadata is not None:
  97. return metadata
  98. # Case of no plugin found, raise
  99. raise TypeError, 'Unsupported file type'
  100. def write_metadata(inputfile, outputfile, metadata_dictionary,
  101. force=None, verbose=0):
  102. """
  103. Writes metadata to given file.
  104. Availability depends on input file format and installed plugins
  105. (return C{TypeError} if unsupported file format).
  106. @param inputfile: path to a file
  107. @type inputfile: string
  108. @param outputfile: path to the resulting file.
  109. @type outputfile: string
  110. @param verbose: verbosity
  111. @type verbose: int
  112. @param metadata_dictionary: keys and values of metadata to update.
  113. @type metadata_dictionary: dict
  114. @param force: name of plugin to use, to skip plugin auto-discovery
  115. @type force: string
  116. @return: output of the plugin
  117. @rtype: string
  118. @raise TypeError: if file format is not supported.
  119. @raise RuntimeError: if required library to process file is missing.
  120. @raise InvenioWebSubmitFileMetadataRuntimeError: when metadata cannot be updated.
  121. """
  122. # Check file type (0 base, 1 name, 2 ext)
  123. ext = decompose_file(inputfile)[2]
  124. if verbose > 5:
  125. print(ext.lower(), 'extension to write to')
  126. # Loop through the plugins to find a good one to ext
  127. for plugin_name, plugin in iteritems(metadata_extractor_plugins):
  128. if 'can_write_local' in plugin and \
  129. plugin['can_write_local'](inputfile) and \
  130. (not force or plugin_name == force):
  131. if verbose > 5:
  132. print('Using ' + plugin_name)
  133. return plugin['write_metadata_local'](inputfile,
  134. outputfile,
  135. metadata_dictionary,
  136. verbose)
  137. # Case of no plugin found, raise
  138. raise TypeError, 'Unsupported file type'
  139. def metadata_info(verbose=0):
  140. """Shows information about the available plugins"""
  141. print('Plugin APIs version: %s' % str(__required_plugin_API_version__))
  142. # Plugins
  143. print('Available plugins:')
  144. # Print each operation on each plugin
  145. for plugin_name, plugin_funcs in iteritems(metadata_extractor_plugins):
  146. if len(plugin_funcs) > 0:
  147. print('-- Name: ' + plugin_name)
  148. print(' Supported operation%s: ' % \
  149. (len(plugin_funcs) > 1 and 's' or '') + \
  150. ', '.join(plugin_funcs))
  151. # Are there any unloaded plugins?
  152. # broken_plugins = metadata_extractor_plugins.get_broken_plugins()
  153. # if len(broken_plugins.keys()) > 0:
  154. # print 'Could not load the following plugin%s:' % \
  155. # (len(broken_plugins.keys()) > 1 and 's' or '')
  156. # for broken_plugin_name, broken_plugin_trace_info in iteritems(broken_plugins):
  157. # print '-- Name: ' + broken_plugin_name
  158. # if verbose > 5:
  159. # formatted_traceback = \
  160. # traceback.format_exception(broken_plugin_trace_info[0],
  161. # broken_plugin_trace_info[1],
  162. # broken_plugin_trace_info[2])
  163. # print ' ' + ''.join(formatted_traceback).replace('\n', '\n ')
  164. # elif verbose > 0:
  165. # print ' ' + str(broken_plugin_trace_info[1])
  166. def print_metadata(metadata):
  167. """
  168. Pretty-prints metadata returned by the plugins to standard output.
  169. @param metadata: object returned by the plugins when reading metadata
  170. @type metadata: dict
  171. """
  172. if metadata:
  173. max_key_length = max([len(key) for key in metadata.keys()])
  174. for key, value in iteritems(metadata):
  175. print(key, "." * (max_key_length - len(key)), str(value))
  176. else:
  177. print('(No metadata)')
  178. def plugin_builder_function(plugin):
  179. """
  180. Internal function used to build the plugin container, so it behaves as a
  181. dictionary.
  182. @param plugin_name: plugin_name
  183. @param plugin_code: plugin_code
  184. @return: the plugin container
  185. @rtype: dict
  186. """
  187. name = plugin.__name__.split('.')[-1]
  188. if not name.startswith('wsm_'):
  189. return
  190. ## Let's check for API version.
  191. api_version = getattr(plugin, '__plugin_version__', None)
  192. if api_version != __required_plugin_API_version__:
  193. raise Exception("Plugin version mismatch."
  194. " Expected %s, found %s" % (__required_plugin_API_version__,
  195. api_version))
  196. ret = {}
  197. for funct_name in ('can_read_local',
  198. 'can_read_remote',
  199. 'can_write_local',
  200. 'read_metadata_local',
  201. 'write_metadata_local',
  202. 'read_metadata_remote'):
  203. funct = getattr(plugin, funct_name, None)
  204. if funct is not None:
  205. ret[funct_name] = funct
  206. return name, ret
  207. def main():
  208. """
  209. Manages the arguments, in order to call the proper metadata
  210. handling function
  211. """
  212. def dictionary_callback(option, opt, value, parser, *args, **kwargs):
  213. """callback function used to get strings from command line
  214. of the type tag=value and push it into a dictionary
  215. @param parameters: optparse parameters"""
  216. if '=' in value:
  217. key, val = value.split('=', 1)
  218. if getattr(parser.values, 'metadata', None) is None:
  219. parser.values.metadata = {}
  220. parser.values.metadata[key] = val
  221. return
  222. else:
  223. raise ValueError("%s is not in the form key=value" % value)
  224. # Parse arguments
  225. parser = OptionParser(usage="websubmit_file_metadata {-e | -u | -i} " + \
  226. "[-f arg2] [-v] [-d tag=value] [-r] [-l arg3] " + \
  227. "/path/to/file")
  228. parser.add_option("-e", "--extract", dest="extract", action='store_true',
  229. help="extract metadata from file", default=False)
  230. parser.add_option("-u", "--update", dest="update", action='store_true',
  231. help="update file metadata", default=False)
  232. parser.add_option("-o", "--output-file", dest="output_file",
  233. help="Place to save updated file (when --update). Default is same as input file",
  234. type="string", default=None)
  235. parser.add_option("-f", "--force", dest="force_plugin",
  236. help="Plugin we want to be used", type="string",
  237. default=None)
  238. parser.add_option('-v', '--verbose', type="int",
  239. dest='verbose', help='shows detailed information',
  240. default=1)
  241. parser.add_option('-r', '--remote', action='store_true',
  242. dest='remote', help='working with remote file',
  243. default=False)
  244. parser.add_option('-d', '--dictionary-entry',
  245. action="callback",
  246. callback=dictionary_callback, type="string",
  247. help='metadata to update [-d tag=value]')
  248. parser.add_option('-i', '--info', action='store_true',
  249. dest='info', help='shows plugin information',
  250. default=False)
  251. parser.add_option("-l", "--loginpw", dest="loginpw",
  252. help="Login and password to access remote server [login:pw]",
  253. type="string", default=None)
  254. (options, args) = parser.parse_args()
  255. ## Get the input file from the arguments list (it should be the
  256. ## first argument):
  257. input_file = None
  258. if len(args) > 0:
  259. input_file = args[0]
  260. # If there is no option -d, we avoid metadata option being undefined
  261. if getattr(parser.values, 'metadata', None) is None:
  262. parser.values.metadata = {}
  263. # Is output file specified?
  264. if options.update and not options.output_file:
  265. if options.verbose > 5:
  266. print("Option --output-file not specified. Updating input file.")
  267. options.output_file = input_file
  268. elif options.extract and options.output_file:
  269. print("Option --output-file cannot be used with --extract.")
  270. print(parser.get_usage())
  271. sys.exit(1)
  272. # Make sure there is not extract / write / info at the same time
  273. if (options.extract and options.update) or \
  274. (options.extract and options.info) or \
  275. (options.info and options.update):
  276. print("Choose either --extract, --update or --info")
  277. print(parser.get_usage())
  278. sys.exit(1)
  279. elif (options.extract and not input_file) or \
  280. (options.update and not input_file):
  281. print("Input file is missing")
  282. print(parser.get_usage())
  283. sys.exit(1)
  284. # Function call based on args
  285. if options.extract:
  286. try:
  287. metadata = read_metadata(input_file,
  288. options.force_plugin,
  289. options.remote,
  290. options.loginpw,
  291. options.verbose)
  292. print_metadata(metadata)
  293. except TypeError as err:
  294. print(err)
  295. return 1
  296. except RuntimeError as err:
  297. print(err)
  298. return 1
  299. except InvenioWebSubmitFileMetadataRuntimeError as err:
  300. print(err)
  301. return 1
  302. elif options.update:
  303. try:
  304. write_metadata(input_file,
  305. options.output_file,
  306. options.metadata,
  307. options.force_plugin,
  308. options.verbose)
  309. except TypeError as err:
  310. print(err)
  311. return 1
  312. except RuntimeError as err:
  313. print(err)
  314. return 1
  315. except InvenioWebSubmitFileMetadataRuntimeError as err:
  316. print(err)
  317. return 1
  318. elif options.info:
  319. try:
  320. metadata_info(options.verbose)
  321. except TypeError:
  322. print('Problem retrieving plugin information\n')
  323. return 1
  324. else:
  325. parser.error("Incorrect number of arguments\n")
  326. if __name__ == "__main__":
  327. main()