PageRenderTime 50ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/invenio/legacy/websubmit/file_metadata_plugins/pdftk_plugin.py

https://github.com/MSusik/invenio
Python | 210 lines | 118 code | 15 blank | 77 comment | 33 complexity | 4b4d0c17386f76d7b58522586fd6e84c MD5 | raw file
Possible License(s): GPL-2.0
  1. ## This file is part of Invenio.
  2. ## Copyright (C) 2010, 2011 CERN.
  3. ##
  4. ## Invenio is free software; you can redistribute it and/or
  5. ## modify it under the terms of the GNU General Public License as
  6. ## published by the Free Software Foundation; either version 2 of the
  7. ## License, or (at your option) any later version.
  8. ##
  9. ## Invenio is distributed in the hope that it will be useful, but
  10. ## WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ## General Public License for more details.
  13. ##
  14. ## You should have received a copy of the GNU General Public License
  15. ## along with Invenio; if not, write to the Free Software Foundation, Inc.,
  16. ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  17. """
  18. WebSubmit Metadata Plugin - This is the plugin to update metadata from
  19. PDF files.
  20. from __future__ import print_function
  21. Dependencies: pdftk
  22. """
  23. __plugin_version__ = "WebSubmit File Metadata Plugin API 1.0"
  24. import os
  25. import shutil
  26. import tempfile
  27. from invenio.utils.shell import run_shell_command
  28. from invenio.legacy.bibdocfile.api import decompose_file
  29. from invenio.config import CFG_PATH_PDFTK, CFG_TMPDIR
  30. from invenio.legacy.websubmit.config import InvenioWebSubmitFileMetadataRuntimeError
  31. if not CFG_PATH_PDFTK:
  32. raise ImportError, "Path to PDFTK is not set in CFG_PATH_PDFTK"
  33. def can_read_local(inputfile):
  34. """
  35. Checks if inputfile is among metadata-readable file types
  36. @param inputfile: path to the image
  37. @type inputfile: string
  38. @rtype: boolean
  39. @return: True if file can be processed
  40. """
  41. # Check file type (0 base, 1 name, 2 ext)
  42. ext = decompose_file(inputfile)[2]
  43. return ext.lower() in ['.pdf']
  44. def can_write_local(inputfile):
  45. """
  46. Checks if inputfile is among metadata-writable file types (pdf)
  47. @param inputfile: path to the image
  48. @type inputfile: string
  49. @rtype: boolean
  50. @return: True if file can be processed
  51. """
  52. ext = os.path.splitext(inputfile)[1]
  53. return ext.lower() in ['.pdf']
  54. def read_metadata_local(inputfile, verbose):
  55. """
  56. Metadata extraction from many kind of files
  57. @param inputfile: path to the image
  58. @type inputfile: string
  59. @param verbose: verbosity
  60. @type verbose: int
  61. @rtype: dict
  62. @return: dictionary with metadata
  63. """
  64. cmd = CFG_PATH_PDFTK + ' %s dump_data'
  65. (exit_status, output_std, output_err) = \
  66. run_shell_command(cmd, args=(inputfile,))
  67. metadata_dict = {}
  68. key = None
  69. value = None
  70. for metadata_line in output_std.splitlines():
  71. if metadata_line.strip().startswith("InfoKey"):
  72. key = metadata_line.split(':', 1)[1].strip()
  73. elif metadata_line.strip().startswith("InfoValue"):
  74. value = metadata_line.split(':', 1)[1].strip()
  75. if key in ["ModDate", "CreationDate"]:
  76. # FIXME: Interpret these dates?
  77. try:
  78. pass
  79. #value = datetime.strptime(value, "D:%Y%m%d%H%M%S%Z")
  80. except:
  81. pass
  82. if key:
  83. metadata_dict[key] = value
  84. key = None
  85. else:
  86. try:
  87. custom_key, custom_value = metadata_line.split(':', 1)
  88. metadata_dict[custom_key.strip()] = custom_value.strip()
  89. except:
  90. # Most probably not relevant line
  91. pass
  92. return metadata_dict
  93. def write_metadata_local(inputfile, outputfile, metadata_dictionary, verbose):
  94. """
  95. Metadata write method, takes the .pdf as input and creates a new
  96. one with the new info.
  97. @param inputfile: path to the pdf
  98. @type inputfile: string
  99. @param outputfile: path to the resulting pdf
  100. @type outputfile: string
  101. @param verbose: verbosity
  102. @type verbose: int
  103. @param metadata_dictionary: metadata information to update inputfile
  104. @type metadata_dictionary: dict
  105. """
  106. # Take the file name (0 base, 1 name, 2 ext)
  107. filename = decompose_file(inputfile)[1]
  108. # Print pdf metadata
  109. if verbose > 1:
  110. print('Metadata information in the PDF file ' + filename + ': \n')
  111. try:
  112. os.system(CFG_PATH_PDFTK + ' ' + inputfile + ' dump_data')
  113. except Exception:
  114. print('Problem with inputfile to PDFTK')
  115. # Info file for pdftk
  116. (fd, path_to_info) = tempfile.mkstemp(prefix="wsm_pdf_plugin_info_", \
  117. dir=CFG_TMPDIR)
  118. os.close(fd)
  119. file_in = open(path_to_info, 'w')
  120. if verbose > 5:
  121. print("Saving PDFTK info file to %s" % path_to_info)
  122. # User interaction to form the info file
  123. # Main Case: Dictionary received through option -d
  124. if not metadata_dictionary == {}:
  125. for tag in metadata_dictionary:
  126. line = 'InfoKey: ' + tag + '\nInfoValue: ' + \
  127. metadata_dictionary[tag] + '\n'
  128. if verbose > 0:
  129. print(line)
  130. file_in.writelines(line)
  131. else:
  132. data_modified = False
  133. user_input = 'user_input'
  134. print("Entering interactive mode. Choose what you want to do:")
  135. while (user_input):
  136. if not data_modified:
  137. try:
  138. user_input = raw_input('[w]rite / [q]uit\n')
  139. except:
  140. print("Aborting")
  141. return
  142. else:
  143. try:
  144. user_input = raw_input('[w]rite / [q]uit and apply / [a]bort \n')
  145. except:
  146. print("Aborting")
  147. return
  148. if user_input == 'q':
  149. if not data_modified:
  150. return
  151. break
  152. elif user_input == 'w':
  153. try:
  154. tag = raw_input('Tag to update:\n')
  155. value = raw_input('With value:\n')
  156. except:
  157. print("Aborting")
  158. return
  159. # Write to info file
  160. line = 'InfoKey: ' + tag + '\nInfoValue: ' + value + '\n'
  161. data_modified = True
  162. file_in.writelines(line)
  163. elif user_input == 'a':
  164. return
  165. else:
  166. print("Invalid option: ")
  167. file_in.close()
  168. (fd, pdf_temp_path) = tempfile.mkstemp(prefix="wsm_pdf_plugin_pdf_", \
  169. dir=CFG_TMPDIR)
  170. os.close(fd)
  171. # Now we call pdftk tool to update the info on a pdf
  172. #try:
  173. cmd_pdftk = '%s %s update_info %s output %s'
  174. (exit_status, output_std, output_err) = \
  175. run_shell_command(cmd_pdftk,
  176. args=(CFG_PATH_PDFTK, inputfile,
  177. path_to_info, pdf_temp_path))
  178. if verbose > 5:
  179. print(output_std, output_err)
  180. if os.path.exists(pdf_temp_path):
  181. # Move to final destination if exist
  182. try:
  183. shutil.move(pdf_temp_path, outputfile)
  184. except Exception as err:
  185. raise InvenioWebSubmitFileMetadataRuntimeError("Could not move %s to %s" % \
  186. (pdf_temp_path, outputfile))
  187. else:
  188. # Something bad happened
  189. raise InvenioWebSubmitFileMetadataRuntimeError("Could not update metadata " + output_err)