PageRenderTime 58ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/djvubind/encode.py

https://code.google.com/p/djvubind/
Python | 320 lines | 201 code | 48 blank | 71 comment | 52 complexity | e3b1db69611a3602541b2665be004fca MD5 | raw file
  1. #! /usr/bin/env python3
  2. # This program is free software; you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License as published by
  4. # the Free Software Foundation; either version 3 of the License, or
  5. # (at your option) any later version.
  6. #
  7. # This program is distributed in the hope that it will be useful,
  8. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. # GNU General Public License for more details.
  11. #
  12. # You should have received a copy of the GNU General Public License
  13. # along with this program; if not, write to the Free Software
  14. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  15. # MA 02110-1301, USA.
  16. """
  17. Contains code relevant to encoding images and metadata into a djvu format.
  18. """
  19. import glob
  20. import os
  21. import shutil
  22. import sys
  23. from . import utils
  24. class Encoder:
  25. """
  26. An intelligent djvu super-encoder that can work with numerous djvu encoders.
  27. """
  28. def __init__(self, opts):
  29. self.opts = opts
  30. self.dep_check()
  31. def progress(self):
  32. pass
  33. def _c44(self, infile, outfile, dpi):
  34. """
  35. Encode files with c44.
  36. """
  37. # Make sure that the image is in a format acceptable for c44
  38. extension = infile.split('.')[-1]
  39. if extension not in ['pgm', 'ppm', 'jpg', 'jpeg']:
  40. utils.execute('convert {0} {1}'.format(infile, 'temp.ppm'))
  41. infile = 'temp.ppm'
  42. # Encode
  43. cmd = 'c44 -dpi {0} {1} "{2}" "{3}"'.format(dpi, self.opts['c44_options'], infile, outfile)
  44. utils.execute(cmd)
  45. # Check that the outfile has been created.
  46. if not os.path.isfile(outfile):
  47. msg = 'err: encode.Encoder._c44(): No encode errors, but "{0}" does not exist!'.format(outfile)
  48. print(msg, file=sys.stderr)
  49. sys.exit(1)
  50. # Cleanup
  51. if (infile == 'temp.ppm') and (os.path.isfile('temp.ppm')):
  52. os.remove('temp.ppm')
  53. return None
  54. def _cjb2(self, infile, outfile, dpi):
  55. """
  56. Encode files with cjb2.
  57. """
  58. cmd = 'cjb2 -dpi {0} {1} "{2}" "{3}"'.format(dpi, self.opts['cjb2_options'], infile, outfile)
  59. utils.execute(cmd)
  60. # Check that the outfile has been created.
  61. if not os.path.isfile(outfile):
  62. msg = 'err: encode.Encoder._cpaldjvu(): No encode errors, but "{0}" does not exist!'.format(outfile)
  63. print(msg, file=sys.stderr)
  64. sys.exit(1)
  65. return None
  66. def _cpaldjvu(self, infile, outfile, dpi):
  67. """
  68. Encode files with cpaldjvu.
  69. """
  70. # Make sure that the image is in a format acceptable for cpaldjvu
  71. extension = infile.split('.')[-1]
  72. if extension not in ['ppm']:
  73. utils.execute('convert {0} {1}'.format(infile, 'temp.ppm'))
  74. infile = 'temp.ppm'
  75. # Encode
  76. cmd = 'cpaldjvu -dpi {0} {1} "{2}" "{3}"'.format(dpi, self.opts['cpaldjvu_options'], infile, outfile)
  77. utils.execute(cmd)
  78. # Check that the outfile has been created.
  79. if not os.path.isfile(outfile):
  80. msg = 'err: encode.Encoder._cpaldjvu(): No encode errors, but "{0}" does not exist!'.format(outfile)
  81. print(msg, file=sys.stderr)
  82. sys.exit(1)
  83. # Cleanup
  84. if (infile == 'temp.ppm') and (os.path.isfile('temp.ppm')):
  85. os.remove('temp.ppm')
  86. return None
  87. def _csepdjvu(self, infile, outfile, dpi):
  88. """
  89. Encode files with csepdjvu.
  90. """
  91. # Separate the bitonal text (scantailor's mixed mode) from everything else.
  92. utils.execute('convert -opaque black "{0}" "temp_graphics.tif"'.format(infile))
  93. utils.execute('convert +opaque black "{0}" "temp_textual.tif"'.format(infile))
  94. # Encode the bitonal image.
  95. self._cjb2('temp_textual.tif', 'enc_bitonal_out.djvu', dpi)
  96. # Encode with color with bitonal via csepdjvu
  97. utils.execute('ddjvu -format=rle -v "enc_bitonal_out.djvu" "temp_textual.rle"')
  98. utils.execute('convert temp_graphics.tif temp_graphics.ppm')
  99. with open('temp_merge.mix', 'wb') as mix:
  100. with open('temp_textual.rle', 'rb') as rle:
  101. buffer = rle.read(1024)
  102. while buffer:
  103. mix.write(buffer)
  104. buffer = rle.read(1024)
  105. with open('temp_graphics.ppm', 'rb') as ppm:
  106. buffer = ppm.read(1024)
  107. while buffer:
  108. mix.write(buffer)
  109. buffer = ppm.read(1024)
  110. utils.execute('csepdjvu -d {0} {1} "temp_merge.mix" "temp_final.djvu"'.format(dpi, self.opts['csepdjvu_options']))
  111. if (not os.path.isfile(outfile)):
  112. shutil.move('temp_final.djvu', outfile)
  113. else:
  114. utils.execute('djvm -i {0} "temp_final.djvu"'.format(outfile))
  115. # Clean up
  116. for tempfile in glob.glob('temp_*'):
  117. os.remove(tempfile)
  118. os.remove('enc_bitonal_out.djvu')
  119. return None
  120. def _minidjvu(self, infiles, outfile, dpi):
  121. """
  122. Encode files with minidjvu.
  123. N.B., minidjvu is the only encoder function that expects a list a filenames
  124. and not a string with a single filename. This is because minidjvu gains
  125. better compression with a shared dictionary across multiple images.
  126. """
  127. # Specify filenames that will be used.
  128. tempfile = 'enc_temp.djvu'
  129. # Minidjvu has to worry about the length of the command since all the filenames are
  130. # listed.
  131. cmds = utils.split_cmd('minidjvu -d {0} {1}'.format(dpi, self.opts['minidjvu_options']), infiles, tempfile)
  132. # Execute each command, adding each result into a single, multipage djvu.
  133. for cmd in cmds:
  134. utils.execute(cmd)
  135. self.djvu_insert(tempfile, outfile)
  136. os.remove(tempfile)
  137. return None
  138. def dep_check(self):
  139. """
  140. Check for ocr engine availability.
  141. """
  142. if not utils.is_executable(self.opts['bitonal_encoder']):
  143. msg = 'err: encoder "{0}" is not installed.'.format(self.opts['bitonal_encoder'])
  144. print(msg, file=sys.stderr)
  145. sys.exit(1)
  146. if not utils.is_executable(self.opts['color_encoder']):
  147. msg = 'err: encoder "{0}" is not installed.'.format(self.opts['color_encoder'])
  148. print(msg, file=sys.stderr)
  149. sys.exit(1)
  150. return None
  151. def djvu_insert(self, infile, djvufile, page_num=None):
  152. """
  153. Insert a single page djvu file into a multipage djvu file. By default it will be
  154. placed at the end, unless page_num is specified.
  155. """
  156. if (not os.path.isfile(djvufile)):
  157. shutil.copy(infile, djvufile)
  158. elif page_num is None:
  159. utils.execute('djvm -i "{0}" "{1}"'.format(djvufile, infile))
  160. else:
  161. utils.execute('djvm -i "{0}" "{1}" {2}'.format(djvufile, infile, int(page_num)))
  162. def enc_book(self, book, outfile):
  163. """
  164. Encode pages, metadata, etc. contained within a organizer.Book() class.
  165. """
  166. tempfile = 'temp.djvu'
  167. # Encode bitonal images first, mainly because of minidjvu needing to do
  168. # them all at once.
  169. if self.opts['bitonal_encoder'] == 'minidjvu':
  170. bitonals = []
  171. for page in book.pages:
  172. if page.bitonal:
  173. filepath = os.path.split(page.path)[1]
  174. bitonals.append(filepath)
  175. if len(bitonals) > 0:
  176. if self.opts['bitonal_encoder'] == 'minidjvu':
  177. self._minidjvu(bitonals, tempfile, book.dpi)
  178. self.djvu_insert(tempfile, outfile)
  179. os.remove(tempfile)
  180. self.progress()
  181. elif self.opts['bitonal_encoder'] == 'cjb2':
  182. for page in book.pages:
  183. if page.bitonal:
  184. self._cjb2(page.path, tempfile, page.dpi)
  185. self.djvu_insert(tempfile, outfile)
  186. os.remove(tempfile)
  187. self.progress()
  188. else:
  189. for page in book.pages:
  190. if not page.bitonal:
  191. msg = 'wrn: Invalid bitonal encoder. Bitonal pages will be omitted.'
  192. msg = utils.color(msg, 'red')
  193. print(msg, file=sys.stderr)
  194. break
  195. # Encode and insert non-bitonal
  196. if self.opts['color_encoder'] == 'csepdjvu':
  197. for page in book.pages:
  198. if not page.bitonal:
  199. page_number = book.pages.index(page) + 1
  200. self._csepdjvu(page.path, tempfile, page.dpi)
  201. self.djvu_insert(tempfile, outfile, page_number)
  202. os.remove(tempfile)
  203. self.progress()
  204. elif self.opts['color_encoder'] == 'c44':
  205. for page in book.pages:
  206. if not page.bitonal:
  207. page_number = book.pages.index(page) + 1
  208. self._c44(page.path, tempfile, page.dpi)
  209. self.djvu_insert(tempfile, outfile, page_number)
  210. os.remove(tempfile)
  211. self.progress()
  212. elif self.opts['color_encoder'] == 'cpaldjvu':
  213. for page in book.pages:
  214. if not page.bitonal:
  215. page_number = book.pages.index(page) + 1
  216. self._cpaldjvu(page.path, tempfile, page.dpi)
  217. self.djvu_insert(tempfile, outfile, page_number)
  218. os.remove(tempfile)
  219. self.progress()
  220. else:
  221. for page in book.pages:
  222. if not page.bitonal:
  223. msg = 'wrn: Invalid color encoder. Colored pages will be omitted.'
  224. msg = utils.color(msg, 'red')
  225. print(msg, file=sys.stderr)
  226. break
  227. # Add ocr data
  228. if self.opts['ocr']:
  229. for page in book.pages:
  230. handle = open('ocr.txt', 'w', encoding="utf8")
  231. handle.write(page.text)
  232. handle.close()
  233. page_number = book.pages.index(page) + 1
  234. utils.simple_exec('djvused -e "select {0}; remove-txt; set-txt \'ocr.txt\'; save" "{1}"'.format(page_number, outfile))
  235. os.remove('ocr.txt')
  236. # Insert front/back covers, metadata, and bookmarks
  237. if book.suppliments['cover_front'] is not None:
  238. dpi = int(utils.execute('identify -ping -format %x "{0}"'.format(book.suppliments['cover_front']), capture=True).decode('ascii').split(' ')[0])
  239. self._c44(book.suppliments['cover_front'], tempfile, dpi)
  240. self.djvu_insert(tempfile, outfile, 1)
  241. utils.execute('djvused -e "select 1; set-page-title cover; save" "{0}"'.format(outfile))
  242. if book.suppliments['cover_back'] is not None:
  243. dpi = int(utils.execute('identify -ping -format %x "{0}"'.format(book.suppliments['cover_back']), capture=True).decode('ascii').split(' ')[0])
  244. self._c44(book.suppliments['cover_back'], tempfile, dpi)
  245. self.djvu_insert(tempfile, outfile, -1)
  246. if book.suppliments['metadata'] is not None:
  247. utils.simple_exec('djvused -e "set-meta {0}; save" "{1}"'.format(book.suppliments['metadata'], outfile))
  248. if book.suppliments['bookmarks'] is not None:
  249. utils.simple_exec('djvused -e "set-outline {0}; save" "{1}"'.format(book.suppliments['bookmarks'], outfile))
  250. script = ''
  251. index = 1
  252. if book.suppliments['cover_front'] is not None:
  253. script += 'select '+str(index)+'; set-page-title "cover";\n'
  254. index = index + 1
  255. for page in book.pages:
  256. if page.title is None:
  257. index = index + 1
  258. else:
  259. script += 'select '+str(index)+'; set-page-title "'+str(page.title)+'";\n'
  260. index = index + 1
  261. if book.suppliments['cover_back'] is not None:
  262. script += 'select '+str(index)+'; set-page-title "back cover";\n'
  263. script += 'save'
  264. with open('titles', 'w') as handle:
  265. handle.write(script)
  266. utils.simple_exec('djvused -f titles "{0}"'.format(outfile))
  267. os.remove('titles')
  268. if os.path.isfile(tempfile):
  269. os.remove(tempfile)
  270. return None