invenio /modules/websubmit/lib/wsm_pdftk_plugin.py

Language Python Lines 209
MD5 Hash fb2223ca288830a2ce5227f77b4f7df1
Repository https://github.com/gardenunez/invenio.git View Raw File View Project SPDX
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
## This file is part of Invenio.
## Copyright (C) 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
WebSubmit Metadata Plugin - This is the plugin to update metadata from
PDF files.

Dependencies: pdftk
"""

__plugin_version__ = "WebSubmit File Metadata Plugin API 1.0"

import os
import shutil
import tempfile
from invenio.shellutils import run_shell_command
from invenio.bibdocfile import decompose_file
from invenio.config import CFG_PATH_PDFTK, CFG_TMPDIR
from invenio.websubmit_config import InvenioWebSubmitFileMetadataRuntimeError

if not CFG_PATH_PDFTK:
    raise ImportError, "Path to PDFTK is not set in CFG_PATH_PDFTK"

def can_read_local(inputfile):
    """
    Checks if inputfile is among metadata-readable file types

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """

    # Check file type (0 base, 1 name, 2 ext)
    ext = decompose_file(inputfile)[2]
    return ext.lower() in ['.pdf']

def can_write_local(inputfile):
    """
    Checks if inputfile is among metadata-writable file types (pdf)

    @param inputfile: path to the image
    @type inputfile: string
    @rtype: boolean
    @return: True if file can be processed
    """
    ext = os.path.splitext(inputfile)[1]
    return ext.lower() in ['.pdf']

def read_metadata_local(inputfile, verbose):
    """
    Metadata extraction from many kind of files

    @param inputfile: path to the image
    @type inputfile: string
    @param verbose: verbosity
    @type verbose: int
    @rtype: dict
    @return: dictionary with metadata
    """
    cmd = CFG_PATH_PDFTK + ' %s dump_data'
    (exit_status, output_std, output_err) = \
                      run_shell_command(cmd, args=(inputfile,))
    metadata_dict = {}
    key = None
    value = None
    for metadata_line in output_std.splitlines():
        if metadata_line.strip().startswith("InfoKey"):
            key = metadata_line.split(':', 1)[1].strip()
        elif metadata_line.strip().startswith("InfoValue"):
            value = metadata_line.split(':', 1)[1].strip()
            if key in ["ModDate", "CreationDate"]:
                # FIXME: Interpret these dates?
                try:
                    pass
                    #value = datetime.strptime(value, "D:%Y%m%d%H%M%S%Z")
                except:
                    pass
            if key:
                metadata_dict[key] = value
                key = None
        else:
            try:
                custom_key, custom_value = metadata_line.split(':', 1)
                metadata_dict[custom_key.strip()] = custom_value.strip()
            except:
                # Most probably not relevant line
                pass

    return metadata_dict

def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose):
    """
    Metadata write method, takes the .pdf as input and creates a new
    one with the new info.

    @param inputfile: path to the pdf
    @type inputfile: string
    @param outputfile: path to the resulting pdf
    @type outputfile: string
    @param verbose: verbosity
    @type verbose: int
    @param metadata_dictionary: metadata information to update inputfile
    @type metadata_dictionary: dict
    """
    # Take the file name (0 base, 1 name, 2 ext)
    filename = decompose_file(inputfile)[1]

    # Print pdf metadata
    if verbose > 1:
        print 'Metadata information in the PDF file ' + filename + ': \n'
        try:
            os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data')
        except Exception:
            print 'Problem with inputfile to PDFTK'

    # Info file for pdftk
    (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \
                                             dir=CFG_TMPDIR)
    os.close(fd)
    file_in = open(path_to_info, 'w')
    if verbose > 5:
        print "Saving PDFTK info file to %s" % path_to_info

    # User interaction to form the info file
    # Main Case: Dictionary received through option -d
    if not metadata_dictionary == {}:
        for tag in metadata_dictionary:
            line = 'InfoKey: ' + tag + '\nInfoValue: ' + \
                   metadata_dictionary[tag] + '\n'
            if verbose > 0:
                print line
            file_in.writelines(line)
    else:
        data_modified = False
        user_input = 'user_input'
        print "Entering interactive mode. Choose what you want to do:"
        while (user_input):
            if not data_modified:
                try:
                    user_input = raw_input('[w]rite / [q]uit\n')
                except:
                    print "Aborting"
                    return
            else:
                try:
                    user_input = raw_input('[w]rite / [q]uit and apply / [a]bort \n')
                except:
                    print "Aborting"
                    return
            if user_input == 'q':
                if not data_modified:
                    return
                break
            elif user_input == 'w':
                try:
                    tag = raw_input('Tag to update:\n')
                    value = raw_input('With value:\n')
                except:
                    print "Aborting"
                    return
                # Write to info file
                line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n'
                data_modified = True
                file_in.writelines(line)
            elif user_input == 'a':
                return
            else:
                print "Invalid option: "
    file_in.close()

    (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \
                                              dir=CFG_TMPDIR)
    os.close(fd)

    # Now we call pdftk tool to update the info on a pdf
    #try:
    cmd_pdftk = '%s %s update_info %s output %s'
    (exit_status, output_std, output_err) = \
                  run_shell_command(cmd_pdftk,
                                    args=(CFG_PATH_PDFTK, inputfile,
                                          path_to_info, pdf_temp_path))
    if verbose > 5:
        print output_std, output_err

    if os.path.exists(pdf_temp_path):
        # Move to final destination if exist
        try:
            shutil.move(pdf_temp_path, outputfile)
        except Exception, err:
            raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \
                                                           (pdf_temp_path, outputfile))
    else:
        # Something bad happened
        raise InvenioWebSubmitFileMetadataRuntimeError("Could not update metadata " + output_err)
Back to Top