PageRenderTime 44ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/websubmit/lib/wsm_pyexiv2_plugin.py

https://github.com/gardenunez/invenio
Python | 397 lines | 208 code | 40 blank | 149 comment | 54 complexity | 7d5a7a36762be1323d9d854958b5ae4d MD5 | raw file
Possible License(s): GPL-2.0
  1. ## This file is part of Invenio.
  2. ## Copyright (C) 2010, 2011 CERN.
  3. ##
  4. ## Invenio is free software; you can redistribute it and/or
  5. ## modify it under the terms of the GNU General Public License as
  6. ## published by the Free Software Foundation; either version 2 of the
  7. ## License, or (at your option) any later version.
  8. ##
  9. ## Invenio is distributed in the hope that it will be useful, but
  10. ## WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ## General Public License for more details.
  13. ##
  14. ## You should have received a copy of the GNU General Public License
  15. ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
  16. ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  17. """
  18. WebSubmit Metadata Plugin - This is a plugin to extract/update
  19. metadata from images.
  20. Dependencies: Exiv2
  21. """
  22. __plugin_version__ = "WebSubmit File Metadata Plugin API 1.0"
  23. import os
  24. import base64
  25. import httplib
  26. import tempfile
  27. import shutil
  28. import pyexiv2
  29. from invenio.bibdocfile import decompose_file
  30. from invenio.config import CFG_TMPDIR
  31. from invenio.websubmit_config import InvenioWebSubmitFileMetadataRuntimeError
  32. def can_read_local(inputfile):
  33. """
  34. Checks if inputfile is among metadata-readable file types
  35. @param inputfile: path to the image
  36. @type inputfile: string
  37. @rtype: boolean
  38. @return: True if file can be processed
  39. """
  40. # Check file type (0 base, 1 name, 2 ext)
  41. ext = decompose_file(inputfile)[2]
  42. return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe',
  43. '.jfif', '.jfi', '.jif']
  44. def can_read_remote(inputfile):
  45. """Checks if inputfile is among metadata-readable
  46. file types
  47. @param inputfile: (string) path to the image
  48. @type inputfile: string
  49. @rtype: boolean
  50. @return: true if extension casn be handled"""
  51. # Check file type (0 base, 1 name, 2 ext)
  52. ext = decompose_file(inputfile)[2]
  53. return ext.lower() in ['.jpg', '.jpeg', 'jpe',
  54. '.jfif', '.jfi', '.jif']
  55. def can_write_local(inputfile):
  56. """
  57. Checks if inputfile is among metadata-writable file types
  58. @param inputfile: path to the image
  59. @type inputfile: string
  60. @rtype: boolean
  61. @return: True if file can be processed
  62. """
  63. # Check file type (0 base, 1 name, 2 ext)
  64. ext = decompose_file(inputfile)[2]
  65. return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe',
  66. '.jfif', '.jfi', '.jif']
  67. def read_metadata_local(inputfile, verbose):
  68. """
  69. EXIF and IPTC metadata extraction and printing from images
  70. @param inputfile: path to the image
  71. @type inputfile: string
  72. @param verbose: verbosity
  73. @type verbose: int
  74. @rtype: dict
  75. @return: dictionary with metadata
  76. """
  77. # Load the image
  78. image = pyexiv2.Image(inputfile)
  79. # Read the metadata
  80. image.readMetadata()
  81. image_info = {}
  82. # EXIF metadata
  83. for key in image.exifKeys():
  84. image_info[key] = image.interpretedExifValue(key)
  85. # IPTC metadata
  86. for key in image.iptcKeys():
  87. image_info[key] = repr(image[key])
  88. # Return the dictionary
  89. return image_info
  90. def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose):
  91. """
  92. EXIF and IPTC metadata writing, previous tag printing, to
  93. images. If some tag not set, it is auto-added, but be a valid exif
  94. or iptc tag.
  95. @param inputfile: path to the image
  96. @type inputfile: string
  97. @param outputfile: path to the resulting image
  98. @type outputfile: string
  99. @param verbose: verbosity
  100. @type verbose: int
  101. @param metadata_dictionary: metadata information to update inputfile
  102. @rtype: dict
  103. """
  104. if inputfile != outputfile:
  105. # Create copy of inputfile
  106. try:
  107. shutil.copy2(inputfile, outputfile)
  108. except Exception, err:
  109. raise InvenioWebSubmitFileMetadataRuntimeError(err)
  110. # Load the image
  111. image = pyexiv2.Image(inputfile)
  112. # Read the metadata
  113. image.readMetadata()
  114. # Main Case: Dictionary received through option -d
  115. if metadata_dictionary:
  116. for tag in metadata_dictionary:
  117. if tag in image.exifKeys() or tag in image.iptcKeys():
  118. # Updating
  119. if verbose > 0:
  120. print "Updating %(tag)s from <%(old_value)s> to <%(new_value)s>" % \
  121. {'tag': tag,
  122. 'old_value': image[tag],
  123. 'new_value': metadata_dictionary[tag]}
  124. else:
  125. # Adding
  126. if verbose > 0:
  127. print "Adding %(tag)s with value <%(new_value)s>" % \
  128. {'tag': tag,
  129. 'new_value': metadata_dictionary[tag]}
  130. try:
  131. image[tag] = metadata_dictionary[tag]
  132. image.writeMetadata()
  133. except Exception:
  134. print 'Tag or Value incorrect'
  135. # Alternative way: User interaction
  136. else:
  137. data_modified = False
  138. user_input = 'user_input'
  139. print "Entering interactive mode. Choose what you want to do:"
  140. while (user_input):
  141. if not data_modified:
  142. try:
  143. user_input = raw_input('[w]rite / [q]uit\n')
  144. except:
  145. print "Aborting"
  146. return
  147. else:
  148. try:
  149. user_input = raw_input('[w]rite / [q]uit and apply / [a]bort \n')
  150. except:
  151. print "Aborting"
  152. return
  153. if user_input == 'q':
  154. if not data_modified:
  155. return
  156. break
  157. elif user_input == 'w':
  158. try:
  159. tag = raw_input('Tag to update (Any valid Exif or Iptc Tag):\n')
  160. value = raw_input('With value:\n')
  161. data_modified = True
  162. except:
  163. print "Aborting"
  164. return
  165. try:
  166. image[tag] = value
  167. except Exception, err:
  168. print 'Tag or Value incorrect'
  169. elif user_input == 'a':
  170. return
  171. else:
  172. print "Invalid option: "
  173. try:
  174. image.writeMetadata()
  175. except Exception, err:
  176. raise InvenioWebSubmitFileMetadataRuntimeError("Could not update metadata: " + err)
  177. def read_metadata_remote(inputfile, loginpw, verbose):
  178. """
  179. EXIF and IPTC metadata extraction and printing from remote images
  180. @param inputfile: path to the remote image
  181. @type inputfile: string
  182. @param verbose: verbosity
  183. @type verbose: int
  184. @param loginpw: credentials to access secure servers (username:password)
  185. @type loginpw: string
  186. @return: dictionary with metadata
  187. @rtype: dict
  188. """
  189. # Check that inputfile is an URL
  190. secure = False
  191. pos = inputfile.lower().find('http://')
  192. if pos < 0:
  193. secure = True
  194. pos = inputfile.lower().find('https://')
  195. if pos < 0:
  196. raise InvenioWebSubmitFileMetadataRuntimeError("Inputfile (" + inputfile + ") is " + \
  197. "not an URL, nor remote resource.")
  198. # Check if there is login and password
  199. if loginpw != None:
  200. (userid, passwd) = loginpw.split(':')
  201. # Make HTTPS Connection
  202. domain = inputfile.split('/')[2]
  203. if verbose > 3:
  204. print 'Domain: ', domain
  205. url = inputfile.split(domain)[1]
  206. if verbose > 3:
  207. print 'URL: ', url
  208. # Establish headers
  209. if loginpw != None:
  210. _headers = {"Accept": "*/*",
  211. "Authorization": "Basic " + \
  212. base64.encodestring(userid + ':' + passwd).strip()}
  213. else:
  214. _headers = {"Accept": "*/*"}
  215. conn = None
  216. # Establish connection
  217. # Case HTTPS
  218. if secure:
  219. try:
  220. conn = httplib.HTTPSConnection(domain)
  221. ## Request a connection
  222. conn.request("GET", url,
  223. headers = _headers)
  224. except Exception:
  225. # Cannot connect
  226. print 'Could not connect'
  227. # Case HTTP
  228. else:
  229. try:
  230. conn = httplib.HTTPConnection(domain)
  231. ## Request a connection
  232. conn.request("GET", url,
  233. headers = _headers)
  234. except Exception:
  235. # Cannot connect
  236. print 'Could not connect'
  237. # Get response
  238. if verbose > 5:
  239. print "Fetching data from remote server."
  240. response = conn.getresponse()
  241. if verbose > 2:
  242. print response.status, response.reason
  243. if response.status == 401:
  244. # Authentication required
  245. raise InvenioWebSubmitFileMetadataRuntimeError("URL requires authentication. Use --loginpw option")
  246. # Read first marker from image
  247. data = response.read(2)
  248. # Check if it is a valid image
  249. if data[0:2] != '\xff\xd8':
  250. raise InvenioWebSubmitFileMetadataRuntimeError("URL does not brings to a valid image file.")
  251. else:
  252. if verbose > 5:
  253. print 'Valid JPEG Standard-based image'
  254. # Start the fake image
  255. path_to_fake = fake_image_init(verbose)
  256. # Continue reading
  257. data = response.read(2)
  258. # Check if we find metadata (EXIF or IPTC)
  259. while data[0:2] != '\xff\xdb':
  260. if data[0:2] == '\xff\xe1' or data[0:2] == '\xff\xed':
  261. marker = data
  262. if verbose > 5:
  263. print 'Metadata Marker->', repr(marker), '\nGetting data'
  264. size = response.read(2)
  265. length = ord(size[0]) * 256 + ord(size[1])
  266. meta = response.read(length-2)
  267. insert_metadata(path_to_fake, marker, size, meta, verbose)
  268. break
  269. else:
  270. data = response.read(2)
  271. # Close connection
  272. conn.close()
  273. # Close fake image
  274. fake_image_close(path_to_fake, verbose)
  275. # Extract metadata once fake image is done
  276. return read_metadata_local(path_to_fake, verbose)
  277. def fake_image_init(verbose):
  278. """
  279. Initializes the fake image
  280. @param verbose: verbosity
  281. @type verbose: int
  282. @rtype: string
  283. @return: path to fake image
  284. """
  285. # Create temp file for fake image
  286. (fd, path_to_fake) = tempfile.mkstemp(prefix='wsm_image_plugin_img_',
  287. dir=CFG_TMPDIR)
  288. os.close(fd)
  289. # Open fake image and write head to it
  290. fake_image = open(path_to_fake, 'a')
  291. image_head = '\xff\xd8\xff\xe0\x00\x10\x4a\x46\x49\x46\x00' + \
  292. '\x01\x01\x01\x00\x48\x00\x48\x00\x00'
  293. fake_image.write(image_head)
  294. fake_image.close()
  295. return path_to_fake
  296. def fake_image_close(path_to_fake, verbose):
  297. """
  298. Closes the fake image
  299. @param path_to_fake: path to the fake image
  300. @type path_to_fake: string
  301. @param verbose: verbosity
  302. @type verbose: int
  303. """
  304. # Open fake image and write image structure info
  305. # (Huffman table[s]...) to it
  306. fake_image = open(path_to_fake, 'a')
  307. image_tail = '\xff\xdb\x00\x43\x00\x05\x03\x04\x04\x04\x03\x05' + \
  308. '\x04\x04\x04\x05\x05\x05\x06\x07\x0c\x08\x07\x07' + \
  309. '\x07\x07\x0f\x0b\x0b\x09\x0c\x11\x0f\x12\x12\x11' + \
  310. '\x0f\x11\x11\x13\x16\x1c\x17\x13\x14\x1a\x15\x11' + \
  311. '\x11\x18\x21\x18\x1a\x1d\x1d\x1f\x1f\x1f\x13\x17' + \
  312. '\x22\x24\x22\x1e\x24\x1c\x1e\x1f\x1e\xff\xdb\x00' + \
  313. '\x43\x01\x05\x05\x05\x07\x06\x07\x0e\x08\x08\x0e' + \
  314. '\x1e\x14\x11\x14\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e' + \
  315. '\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e' + \
  316. '\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e' + \
  317. '\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e' + \
  318. '\x1e\x1e\x1e\x1e\x1e\x1e\xff\xc0\x00\x11\x08\x00' + \
  319. '\x01\x00\x01\x03\x01\x22\x00\x02\x11\x01\x03\x11' + \
  320. '\x01\xff\xc4\x00\x15\x00\x01\x01\x00\x00\x00\x00' + \
  321. '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08' + \
  322. '\xff\xc4\x00\x14\x10\x01\x00\x00\x00\x00\x00\x00' + \
  323. '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xc4' + \
  324. '\x00\x14\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00' + \
  325. '\x00\x00\x00\x00\x00\x00\x00\x00\xff\xc4\x00\x14' + \
  326. '\x11\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + \
  327. '\x00\x00\x00\x00\x00\x00\xff\xda\x00\x0c\x03\x01' + \
  328. '\x00\x02\x11\x03\x11\x00\x3f\x00\xb2\xc0\x07\xff\xd9'
  329. fake_image.write(image_tail)
  330. fake_image.close()
  331. def insert_metadata(path_to_fake, marker, size, meta, verbose):
  332. """
  333. Insert metadata into the fake image
  334. @param path_to_fake: path to the fake image
  335. @type path_to_fake: string
  336. @param marker: JPEG marker
  337. @type marker: string
  338. @param size: size of a JPEG block
  339. @type size: string
  340. @param meta: metadata information
  341. @type meta: string
  342. """
  343. # Metadata insertion
  344. fake_image = open(path_to_fake, 'a')
  345. fake_image.write(marker)
  346. fake_image.write(size)
  347. fake_image.write(meta)
  348. fake_image.close()