invenio /modules/websubmit/lib/wsm_pyexiv2_plugin.py

Language Python Lines 398
MD5 Hash 7d5a7a36762be1323d9d854958b5ae4d
Repository https://github.com/gardenunez/invenio.git View Raw File View Project SPDX
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
## This file is part of Invenio.
## Copyright (C) 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
WebSubmit Metadata Plugin - This is a plugin to extract/update
metadata from images.

Dependencies: Exiv2
"""

__plugin_version__ = "WebSubmit File Metadata Plugin API 1.0"

import os
import base64
import httplib
import tempfile
import shutil
import pyexiv2
from invenio.bibdocfile import decompose_file
from invenio.config import CFG_TMPDIR
from invenio.websubmit_config import InvenioWebSubmitFileMetadataRuntimeError

def can_read_local(inputfile):
    """
    Checks if inputfile is among metadata-readable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe',
                           '.jfif', '.jfi', '.jif']

def can_read_remote(inputfile):
    """Checks if inputfile is among metadata-readable
    file types
    @param inputfile: (string) path to the image
    @type inputfile: string
    @rtype: boolean
    @return: true if extension casn be handled"""

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.jpg', '.jpeg', 'jpe',
                           '.jfif', '.jfi', '.jif']

def can_write_local(inputfile):
    """
    Checks if inputfile is among metadata-writable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """
    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.jpg', '.tiff', '.jpeg', 'jpe',
                           '.jfif', '.jfi', '.jif']

def read_metadata_local(inputfile, verbose):
    """
    EXIF and IPTC metadata extraction and printing from images

    @param inputfile: path to the image
    @type inputfile: string
    @param verbose: verbosity
    @type verbose: int
    @rtype: dict
    @return: dictionary with metadata
    """
    # Load the image
    image = pyexiv2.Image(inputfile)

    # Read the metadata
    image.readMetadata()

    image_info = {}

    # EXIF metadata
    for key in image.exifKeys():
        image_info[key] = image.interpretedExifValue(key)

    # IPTC metadata
    for key in image.iptcKeys():
        image_info[key] = repr(image[key])

    # Return the dictionary
    return image_info

def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose):
    """
    EXIF and IPTC metadata writing, previous tag printing, to
    images. If some tag not set, it is auto-added, but be a valid exif
    or iptc tag.

    @param inputfile: path to the image
    @type inputfile: string
    @param outputfile: path to the resulting image
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: metadata information to update inputfile
    @rtype: dict
    """
    if inputfile != outputfile:
        # Create copy of inputfile
        try:
            shutil.copy2(inputfile, outputfile)
        except Exception, err:
            raise InvenioWebSubmitFileMetadataRuntimeError(err)

    # Load the image
    image = pyexiv2.Image(inputfile)

    # Read the metadata
    image.readMetadata()

    # Main Case: Dictionary received through option -d
    if metadata_dictionary:
        for tag in metadata_dictionary:
            if tag in image.exifKeys() or tag in image.iptcKeys():
                # Updating
                if verbose > 0:
                    print "Updating %(tag)s from <%(old_value)s> to <%(new_value)s>" % \
                          {'tag': tag,
                           'old_value': image[tag],
                           'new_value': metadata_dictionary[tag]}
            else:
                # Adding
                if verbose > 0:
                    print "Adding %(tag)s with value <%(new_value)s>" % \
                          {'tag': tag,
                           'new_value': metadata_dictionary[tag]}
            try:
                image[tag] = metadata_dictionary[tag]
                image.writeMetadata()
            except Exception:
                print 'Tag or Value incorrect'

    # Alternative way: User interaction
    else:
        data_modified = False
        user_input = 'user_input'
        print "Entering interactive mode. Choose what you want to do:"
        while (user_input):
            if not data_modified:
                try:
                    user_input = raw_input('[w]rite / [q]uit\n')
                except:
                    print "Aborting"
                    return
            else:
                try:
                    user_input = raw_input('[w]rite / [q]uit and apply / [a]bort \n')
                except:
                    print "Aborting"
                    return

            if user_input == 'q':
                if not data_modified:
                    return
                break
            elif user_input == 'w':
                try:
                    tag = raw_input('Tag to update (Any valid Exif or Iptc Tag):\n')
                    value = raw_input('With value:\n')
                    data_modified = True
                except:
                    print "Aborting"
                    return
                try:
                    image[tag] = value
                except Exception, err:
                    print 'Tag or Value incorrect'
            elif user_input == 'a':
                return
            else:
                print "Invalid option: "
        try:
            image.writeMetadata()
        except Exception, err:
            raise InvenioWebSubmitFileMetadataRuntimeError("Could not update metadata: " + err)

def read_metadata_remote(inputfile, loginpw, verbose):
    """
    EXIF and IPTC metadata extraction and printing from remote images

    @param inputfile: path to the remote image
    @type inputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param loginpw: credentials to access secure servers (username:password)
    @type loginpw: string
    @return: dictionary with metadata
    @rtype: dict
    """
    # Check that inputfile is an URL
    secure = False
    pos = inputfile.lower().find('http://')
    if pos < 0:
        secure = True
        pos = inputfile.lower().find('https://')
    if pos < 0:
        raise InvenioWebSubmitFileMetadataRuntimeError("Inputfile (" + inputfile + ") is " + \
                                                       "not an URL, nor remote resource.")

    # Check if there is login and password
    if loginpw != None:
        (userid, passwd) = loginpw.split(':')

    # Make HTTPS Connection
    domain = inputfile.split('/')[2]
    if verbose > 3:
        print 'Domain: ', domain
    url = inputfile.split(domain)[1]
    if verbose > 3:
        print 'URL: ', url

    # Establish headers
    if loginpw != None:
        _headers = {"Accept": "*/*",
                    "Authorization": "Basic " + \
                    base64.encodestring(userid + ':' + passwd).strip()}
    else:
        _headers = {"Accept": "*/*"}

    conn = None

    # Establish connection
    # Case HTTPS
    if secure:
        try:
            conn = httplib.HTTPSConnection(domain)
            ## Request a connection
            conn.request("GET", url,
                  headers = _headers)
        except Exception:
            # Cannot connect
            print 'Could not connect'
    # Case HTTP
    else:
        try:
            conn = httplib.HTTPConnection(domain)
            ## Request a connection
            conn.request("GET", url,
                  headers = _headers)
        except Exception:
            # Cannot connect
            print 'Could not connect'

    # Get response
    if verbose > 5:
        print "Fetching data from remote server."
    response = conn.getresponse()
    if verbose > 2:
        print response.status, response.reason

    if response.status == 401:
        # Authentication required
        raise InvenioWebSubmitFileMetadataRuntimeError("URL requires authentication. Use --loginpw option")

    # Read first marker from image
    data = response.read(2)

    # Check if it is a valid image
    if data[0:2] != '\xff\xd8':
        raise InvenioWebSubmitFileMetadataRuntimeError("URL does not brings to a valid image file.")
    else:
        if verbose > 5:
            print 'Valid JPEG Standard-based image'

    # Start the fake image
    path_to_fake = fake_image_init(verbose)

    # Continue reading
    data = response.read(2)

    # Check if we find metadata (EXIF or IPTC)
    while data[0:2] != '\xff\xdb':
        if data[0:2] == '\xff\xe1' or data[0:2] == '\xff\xed':
            marker = data
            if verbose > 5:
                print 'Metadata Marker->', repr(marker), '\nGetting data'
            size = response.read(2)
            length = ord(size[0]) * 256 + ord(size[1])
            meta = response.read(length-2)
            insert_metadata(path_to_fake, marker, size, meta, verbose)
            break
        else:
            data = response.read(2)

    # Close connection
    conn.close()

    # Close fake image
    fake_image_close(path_to_fake, verbose)

    # Extract metadata once fake image is done
    return read_metadata_local(path_to_fake, verbose)

def fake_image_init(verbose):
    """
    Initializes the fake image

    @param verbose: verbosity
    @type verbose: int
    @rtype: string
    @return: path to fake image
    """
    # Create temp file for fake image
    (fd, path_to_fake) = tempfile.mkstemp(prefix='wsm_image_plugin_img_',
                                             dir=CFG_TMPDIR)
    os.close(fd)

    # Open fake image and write head to it
    fake_image = open(path_to_fake, 'a')
    image_head = '\xff\xd8\xff\xe0\x00\x10\x4a\x46\x49\x46\x00' + \
                 '\x01\x01\x01\x00\x48\x00\x48\x00\x00'
    fake_image.write(image_head)
    fake_image.close()

    return path_to_fake

def fake_image_close(path_to_fake, verbose):
    """
    Closes the fake image

    @param path_to_fake: path to the fake image
    @type path_to_fake: string
    @param verbose: verbosity
    @type verbose: int
    """
    # Open fake image and write image structure info
    # (Huffman table[s]...) to it
    fake_image = open(path_to_fake, 'a')

    image_tail = '\xff\xdb\x00\x43\x00\x05\x03\x04\x04\x04\x03\x05' + \
                 '\x04\x04\x04\x05\x05\x05\x06\x07\x0c\x08\x07\x07' + \
                 '\x07\x07\x0f\x0b\x0b\x09\x0c\x11\x0f\x12\x12\x11' + \
                 '\x0f\x11\x11\x13\x16\x1c\x17\x13\x14\x1a\x15\x11' + \
                 '\x11\x18\x21\x18\x1a\x1d\x1d\x1f\x1f\x1f\x13\x17' + \
                 '\x22\x24\x22\x1e\x24\x1c\x1e\x1f\x1e\xff\xdb\x00' + \
                 '\x43\x01\x05\x05\x05\x07\x06\x07\x0e\x08\x08\x0e' + \
                 '\x1e\x14\x11\x14\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e' + \
                 '\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e' + \
                 '\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e' + \
                 '\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e' + \
                 '\x1e\x1e\x1e\x1e\x1e\x1e\xff\xc0\x00\x11\x08\x00' + \
                 '\x01\x00\x01\x03\x01\x22\x00\x02\x11\x01\x03\x11' + \
                 '\x01\xff\xc4\x00\x15\x00\x01\x01\x00\x00\x00\x00' + \
                 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08' + \
                 '\xff\xc4\x00\x14\x10\x01\x00\x00\x00\x00\x00\x00' + \
                 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xc4' + \
                 '\x00\x14\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00' + \
                 '\x00\x00\x00\x00\x00\x00\x00\x00\xff\xc4\x00\x14' + \
                 '\x11\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + \
                 '\x00\x00\x00\x00\x00\x00\xff\xda\x00\x0c\x03\x01' + \
                 '\x00\x02\x11\x03\x11\x00\x3f\x00\xb2\xc0\x07\xff\xd9'
    fake_image.write(image_tail)
    fake_image.close()

def insert_metadata(path_to_fake, marker, size, meta, verbose):
    """
    Insert metadata into the fake image

    @param path_to_fake: path to the fake image
    @type path_to_fake: string
    @param marker: JPEG marker
    @type marker: string
    @param size: size of a JPEG block
    @type size: string
    @param meta: metadata information
    @type meta: string
    """
    # Metadata insertion
    fake_image = open(path_to_fake, 'a')
    fake_image.write(marker)
    fake_image.write(size)
    fake_image.write(meta)
    fake_image.close()
Back to Top