PageRenderTime 58ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/modules/websubmit/lib/wsm_pdftk_plugin.py

https://github.com/lbjay/cds-invenio
Python | 208 lines | 118 code | 15 blank | 75 comment | 33 complexity | 4b7f08c4f14b279c99605554b53dafbf MD5 | raw file
Possible License(s): GPL-2.0
  1. ## This file is part of CDS Invenio.
  2. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
  3. ##
  4. ## CDS Invenio is free software; you can redistribute it and/or
  5. ## modify it under the terms of the GNU General Public License as
  6. ## published by the Free Software Foundation; either version 2 of the
  7. ## License, or (at your option) any later version.
  8. ##
  9. ## CDS Invenio is distributed in the hope that it will be useful, but
  10. ## WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ## General Public License for more details.
  13. ##
  14. ## You should have received a copy of the GNU General Public License
  15. ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
  16. ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  17. """
  18. WebSubmit Metadata Plugin - This is the plugin to update metadata from
  19. PDF files.
  20. Dependencies: pdftk
  21. """
  22. __plugin_version__ = "WebSubmit File Metadata Plugin API 1.0"
  23. import os
  24. import shutil
  25. import tempfile
  26. from invenio.shellutils import run_shell_command
  27. from invenio.bibdocfile import decompose_file
  28. from invenio.config import CFG_PATH_PDFTK, CFG_TMPDIR
  29. from invenio.websubmit_config import InvenioWebSubmitFileMetadataRuntimeError
  30. if not CFG_PATH_PDFTK:
  31. raise ImportError, "Path to PDFTK is not set in CFG_PATH_PDFTK"
  32. def can_read_local(inputfile):
  33. """
  34. Checks if inputfile is among metadata-readable file types
  35. @param inputfile: path to the image
  36. @type inputfile: string
  37. @rtype: boolean
  38. @return: True if file can be processed
  39. """
  40. # Check file type (0 base, 1 name, 2 ext)
  41. ext = decompose_file(inputfile)[2]
  42. return ext.lower() in ['.pdf']
  43. def can_write_local(inputfile):
  44. """
  45. Checks if inputfile is among metadata-writable file types (pdf)
  46. @param inputfile: path to the image
  47. @type inputfile: string
  48. @rtype: boolean
  49. @return: True if file can be processed
  50. """
  51. ext = os.path.splitext(inputfile)[1]
  52. return ext.lower() in ['.pdf']
  53. def read_metadata_local(inputfile, verbose):
  54. """
  55. Metadata extraction from many kind of files
  56. @param inputfile: path to the image
  57. @type inputfile: string
  58. @param verbose: verbosity
  59. @type verbose: int
  60. @rtype: dict
  61. @return: dictionary with metadata
  62. """
  63. cmd = CFG_PATH_PDFTK + ' %s dump_data'
  64. (exit_status, output_std, output_err) = \
  65. run_shell_command(cmd, args=(inputfile,))
  66. metadata_dict = {}
  67. key = None
  68. value = None
  69. for metadata_line in output_std.splitlines():
  70. if metadata_line.strip().startswith("InfoKey"):
  71. key = metadata_line.split(':', 1)[1].strip()
  72. elif metadata_line.strip().startswith("InfoValue"):
  73. value = metadata_line.split(':', 1)[1].strip()
  74. if key in ["ModDate", "CreationDate"]:
  75. # FIXME: Interpret these dates?
  76. try:
  77. pass
  78. #value = datetime.strptime(value, "D:%Y%m%d%H%M%S%Z")
  79. except:
  80. pass
  81. if key:
  82. metadata_dict[key] = value
  83. key = None
  84. else:
  85. try:
  86. custom_key, custom_value = metadata_line.split(':', 1)
  87. metadata_dict[custom_key.strip()] = custom_value.strip()
  88. except:
  89. # Most probably not relevant line
  90. pass
  91. return metadata_dict
  92. def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose):
  93. """
  94. Metadata write method, takes the .pdf as input and creates a new
  95. one with the new info.
  96. @param inputfile: path to the pdf
  97. @type inputfile: string
  98. @param outputfile: path to the resulting pdf
  99. @type outputfile: string
  100. @param verbose: verbosity
  101. @type verbose: int
  102. @param metadata_dictionary: metadata information to update inputfile
  103. @type metadata_dictionary: dict
  104. """
  105. # Take the file name (0 base, 1 name, 2 ext)
  106. filename = decompose_file(inputfile)[1]
  107. # Print pdf metadata
  108. if verbose > 1:
  109. print 'Metadata information in the PDF file ' + filename + ': \n'
  110. try:
  111. os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data')
  112. except Exception:
  113. print 'Problem with inputfile to PDFTK'
  114. # Info file for pdftk
  115. (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \
  116. dir=CFG_TMPDIR)
  117. os.close(fd)
  118. file_in = open(path_to_info, 'w')
  119. if verbose > 5:
  120. print "Saving PDFTK info file to %s" % path_to_info
  121. # User interaction to form the info file
  122. # Main Case: Dictionary received through option -d
  123. if not metadata_dictionary == {}:
  124. for tag in metadata_dictionary:
  125. line = 'InfoKey: ' + tag + '\nInfoValue: ' + \
  126. metadata_dictionary[tag] + '\n'
  127. if verbose > 0:
  128. print line
  129. file_in.writelines(line)
  130. else:
  131. data_modified = False
  132. user_input = 'user_input'
  133. print "Entering interactive mode. Choose what you want to do:"
  134. while (user_input):
  135. if not data_modified:
  136. try:
  137. user_input = raw_input('[w]rite / [q]uit\n')
  138. except:
  139. print "Aborting"
  140. return
  141. else:
  142. try:
  143. user_input = raw_input('[w]rite / [q]uit and apply / [a]bort \n')
  144. except:
  145. print "Aborting"
  146. return
  147. if user_input == 'q':
  148. if not data_modified:
  149. return
  150. break
  151. elif user_input == 'w':
  152. try:
  153. tag = raw_input('Tag to update:\n')
  154. value = raw_input('With value:\n')
  155. except:
  156. print "Aborting"
  157. return
  158. # Write to info file
  159. line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n'
  160. data_modified = True
  161. file_in.writelines(line)
  162. elif user_input == 'a':
  163. return
  164. else:
  165. print "Invalid option: "
  166. file_in.close()
  167. (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \
  168. dir=CFG_TMPDIR)
  169. os.close(fd)
  170. # Now we call pdftk tool to update the info on a pdf
  171. #try:
  172. cmd_pdftk = '%s %s update_info %s output %s'
  173. (exit_status, output_std, output_err) = \
  174. run_shell_command(cmd_pdftk,
  175. args=(CFG_PATH_PDFTK, inputfile,
  176. path_to_info, pdf_temp_path))
  177. if verbose > 5:
  178. print output_std, output_err
  179. if os.path.exists(pdf_temp_path):
  180. # Move to final destination if exist
  181. try:
  182. shutil.move(pdf_temp_path, outputfile)
  183. except Exception, err:
  184. raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \
  185. (pdf_temp_path, outputfile))
  186. else:
  187. # Something bad happened
  188. raise InvenioWebSubmitFileMetadataRuntimeError("Could not update metadata " + output_err)