PageRenderTime 48ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/batch-bin/generate_pdf_on_submission.py

https://bitbucket.org/btingle/voro
Python | 228 lines | 208 code | 5 blank | 15 comment | 8 complexity | 89a598d3558d777505f05a05f6589128 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. '''
  2. This program is a wrapper for the pdf_gen OAC_EADtoPDFGenerator and is called by process.cgi when a contributor submits an ead finding aid.
  3. It first attempts to generate the PDF with a 64 second timeout. If this succeeds, it returns a HTML formatted success message which is put into the report file by process.cgi.
  4. If it fails, it returns an error message to process.cgi.
  5. If this first attempt times out, this forks. The parent returns a HTML formatted message stating that the generation is continuing. The forked child attempts to generate a PDF with no timeout set. After the attempt is over, it emails results to the user & oacops and modifies the report file....
  6. '''
  7. import os, sys
  8. import datetime
  9. import logging
  10. import StringIO
  11. import tempfile
  12. from BeautifulSoup import BeautifulSoup
  13. import pdf_gen
  14. def msg_header():
  15. return '\n<div id="pdf_results"><h2>PDF Generation Results</h2>\n'
  16. def msg_footer():
  17. return '\n</div>\n'
  18. def msg_success_html(completed):
  19. ''' Return a nicely formatted success message for the completed file.
  20. HTML format.
  21. '''
  22. msg = msg_header()
  23. msg += '<h3>PDF Created</h3>PDF created for finding aid.'
  24. msg += ' ' + str(completed[0])
  25. msg += msg_footer()
  26. return msg
  27. def msg_skipped_html(skipped):
  28. ''' Return a nicely formatted success message for the completed file.
  29. HTML format.
  30. '''
  31. msg = msg_header()
  32. msg += '<h3>PDF Creation Skipped</h3>.'
  33. msg += ' ' + str(skipped[0])
  34. msg += msg_footer()
  35. return msg
  36. def msg_error_html(errors, useremail, errorLog):
  37. msg = msg_header()
  38. msg += ''.join(['<h3><font color=indianred>PDF Generation Errors</font></h3>There were problems with the generation of the PDF from your xml ead finding aid. OAC operations has been notified and should contact you at: ',
  39. useremail,
  40. '<div>',
  41. unicode(errors),
  42. '</div><div>',
  43. unicode(errorLog)
  44. ]
  45. )
  46. msg += msg_footer()
  47. return msg
  48. def msg_timeout_html(timeouts, useremail):
  49. ''' Return a nicely formatted success message for the completed file.
  50. HTML format.
  51. '''
  52. msg = msg_header()
  53. msg += '<h3>PDF Generation Timed Out</h3>The generation of a pdf from the finding aid took longer than expected. OAC operations has been notified and the file has been restarted for pdf generation. You will recieve an email at: ' + useremail + ' when pdf processing is completed.'
  54. msg += msg_footer()
  55. return msg
  56. def get_status(completed, timeouts, errors, skipped, useremail, errorLog):
  57. ''' Returns status code of OK, ERROR or TIMEOUT
  58. '''
  59. status = 'ERROR'
  60. if len(completed) > 0:
  61. status = 'OK'
  62. elif len(skipped) > 0:
  63. status = 'SKIPPED'
  64. elif len(timeouts) > 0:
  65. status = 'TIMEOUT'
  66. return status
  67. def msg_results_html(completed, timeouts, errors, skipped, useremail, errorLog):
  68. ''' Return appropriate message for result
  69. '''
  70. msg = 'Unknown status for pdf generation'
  71. status = get_status(completed, timeouts, errors, skipped, useremail, errorLog)
  72. if status == 'OK':
  73. msg = msg_success_html(completed)
  74. elif status == 'SKIPPED':
  75. msg = msg_skipped_html(skipped)
  76. elif status == 'ERROR':
  77. msg = msg_error_html(errors, useremail, errorLog)
  78. elif status == 'TIMEOUT':
  79. msg = msg_timeout_html(timeouts, useremail)
  80. return msg
  81. def modify_results_file(resultsfile, inputfile, completed, timeouts, errors, skipped, useremail, errorLog):
  82. if not os.path.exists(resultsfile):
  83. return
  84. #read the results file & soup it
  85. f = open(resultsfile)
  86. html = f.read()
  87. f.close()
  88. # use soup to modify tag
  89. soup = BeautifulSoup(html)
  90. div_pdf = soup.find('div', attrs={'id':'pdf_results'})
  91. div_pdf.replaceWith(msg_results_html(completed, timeouts, errors, skipped, useremail, errorLog))
  92. #open tempfile and save, then rename
  93. tempfilename = resultsfile + '.tmp'
  94. f = open(tempfilename, 'w')
  95. try:
  96. f.write(unicode(soup))
  97. f.close()
  98. os.rename(tempfilename, resultsfile)
  99. finally:
  100. f.close()
  101. if os.path.exists(tempfilename):
  102. os.remove(tempfilename)
  103. def email_results(useremail, inputfile, completed, timeouts, errors, errorLog):
  104. import smtplib
  105. server = 'localhost'
  106. sender = "OAC Operations Team <oacops\@cdlib.org>"
  107. to = useremail
  108. reply_to = 'oacops@cdlib.org'
  109. subject = "voroEAD: PDF File Generation Results";
  110. if len(errors) > 0 or errorLog:
  111. msg = 'Error while generating PDF for finding aid ' + inputfile
  112. msg += ' The OAC Operations team has been notified.'
  113. msg += "ERROR LOG: " + errorLog
  114. elif len(completed) > 0:
  115. msg = 'PDF Generated for finding aid: ' + inputfile
  116. elif len(timeouts) > 0:
  117. msg = "PDF Generation Failed due to timeouts"
  118. else:
  119. msg = "PDF Generation Failed for unknown reason"
  120. mailMsgText= "From:" + sender + "\nTo:" + to + "\nReply-to:" + reply_to + "\nSubject:" + subject + "\n\n" + msg + "\n\n"
  121. server=smtplib.SMTP(server)
  122. ehlo=server.ehlo()
  123. snd=server.sendmail(sender,to,mailMsgText)
  124. server.quit()
  125. def main(args):
  126. #setup our processor
  127. inputfile = args[1]
  128. inputfile = os.path.normpath(inputfile)
  129. if '/apps/dsc/data/in/oac-ead/prime2002/' not in inputfile:
  130. print inputfile
  131. raise Exception
  132. resultsfile = args[2]
  133. useremail = args[3]
  134. # any tempfiles will be here
  135. tempfile.tempdir = "/apps/dsc/log/pdf/"
  136. log_stringio = StringIO.StringIO()
  137. logging.basicConfig(level=logging.ERROR,
  138. stream=log_stringio)
  139. generator = pdf_gen.OAC_EADtoPDFGenerator(os.path.join(os.environ.get("PATH_TO_PROGS", '/dsc/branches/production/oac-ead-to-pdf'), 'oac4_to_pdf.xslt'))
  140. (completed, timeouts, errors, skipped) = generator.pdf_gen_file(inputfile,
  141. timeoutSecs=64,
  142. outdir_option='parallel',
  143. data_root='/apps/dsc/data/in/oac-ead/prime2002/',
  144. force=True,
  145. )
  146. status = get_status(completed, timeouts, errors, skipped, useremail, log_stringio.getvalue())
  147. # This returns to process cgi
  148. print msg_results_html(completed, timeouts, errors, skipped, useremail, log_stringio.getvalue())
  149. if status == 'OK':
  150. sys.exit(0)
  151. if status == 'ERROR':
  152. sys.exit(1)
  153. if status != 'TIMEOUT':
  154. sys.exit(2)
  155. else:
  156. # try again with no timeout
  157. # in a subprocess
  158. sys.stdout.flush() # flush here, else close in forked processes
  159. # will output any previous stuff
  160. logging.shutdown()
  161. sys.stdin.close()
  162. sys.stderr.close()
  163. sys.stdout.close()
  164. # the trick to REALLY release std filedescriptors
  165. os.close(0)
  166. os.close(1)
  167. os.close(2)
  168. pid = os.fork()
  169. if pid:
  170. sys.exit(0)
  171. else:
  172. # want to put log into file
  173. (fd, filename) = tempfile.mkstemp()
  174. os.close(fd)
  175. logging.basicConfig(level=logging.ERROR,
  176. filename=filename)
  177. # in child, run generator again with no time out
  178. nice = os.nice(19)
  179. (completed, timeouts, errors, skipped) = generator.pdf_gen_file(inputfile,
  180. timeoutSecs=0,
  181. outdir_option='parallel',
  182. data_root='/apps/dsc/data/in/oac-ead/prime2002/',
  183. force=True,
  184. )
  185. #READ LOG FILE and delete after results file modified
  186. logging.shutdown()
  187. f = open(filename, 'r')
  188. try:
  189. errorLog = f.read()
  190. finally:
  191. f.close()
  192. os.remove(filename)
  193. # if completed, modify results file (replace the div id pdf_results
  194. # and email the user
  195. email_results(useremail, inputfile, completed, timeouts, errors, errorLog)
  196. # email oac ops
  197. # email_results('oacops@cdlib.org', inputfile, completed, timeouts, errors, errorLog)
  198. import time
  199. time.sleep(64)
  200. modify_results_file(resultsfile, inputfile, completed, timeouts, errors, skipped, useremail, log_stringio.getvalue())
  201. sys.exit(0)
  202. if __name__=="__main__":
  203. main(sys.argv)