PageRenderTime 34ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/fb2utils/recovery.py

http://fb2utils.googlecode.com/
Python | 195 lines | 131 code | 22 blank | 42 comment | 31 complexity | 04cc07fddcbf5989382d349efc33bac4 MD5 | raw file
Possible License(s): GPL-3.0
  1. #!/usr/bin/env python
  2. # -*- mode: python; coding: utf-8; -*-
  3. # (c) Lankier mailto:lankier@gmail.com
  4. import sys
  5. import os
  6. import time
  7. from optparse import OptionParser, make_option
  8. import zipfile
  9. import traceback
  10. from lxml import etree
  11. from parser import FB2Parser
  12. from validation import validate, check_tags
  13. from edition import add_desc, norm_desc
  14. from utils import prog_version, read_file, walk, print_log, LogOptions, check_xml
  15. ##
  16. ## Processing
  17. ##
  18. def parse(data):
  19. # return values:
  20. # 0 - good file
  21. # 1 - fixed
  22. # -1 - not fixed
  23. # pre validation
  24. ## if options.pre_fb2lint:
  25. ## if validate(data, 'fb2', 'PRE') is not None:
  26. ## return 0
  27. ## else:
  28. if not options.force and validate(data, 'xml', 'PRE') is not None:
  29. return 0
  30. # parsing and recovery
  31. try:
  32. soup = FB2Parser(data, convertEntities='xml')
  33. except:
  34. traceback.print_exc()
  35. print_log('FATAL: exception', level=3)
  36. return -1
  37. ret = 1
  38. # post validation #1
  39. xml = validate(str(soup.FictionBook), 'xml', 'POST')
  40. if xml is None:
  41. print_log('FATAL: not fixed', level=3)
  42. return -1
  43. # post processing
  44. ##add_desc(soup)
  45. ##norm_desc(soup)
  46. # post validation #2
  47. #check_tags(soup)
  48. ## if options.post_fb2lint:
  49. ## if validate(data, 'fb2', 'POST') is None:
  50. ## ret = -1
  51. if options.check_only:
  52. return ret
  53. data = etree.tostring(xml, encoding=options.output_encoding,
  54. xml_declaration=True)
  55. # save result
  56. def add_suffix(fn):
  57. root, ext = os.path.splitext(fn)
  58. return root+'-fixed'+ext
  59. newfilename = None
  60. zipfilename = None
  61. z = False # zip result ?
  62. # filenames
  63. if not options.nozip:
  64. if options.z_filename or options.zip:
  65. z = True
  66. newfilename = add_suffix(options.filename)
  67. if z:
  68. if not newfilename.endswith('.zip'):
  69. newfilename = add_suffix(newfilename+'.zip')
  70. newfilename = add_suffix(newfilename)
  71. if options.z_filename:
  72. zipfilename = options.z_filename
  73. else:
  74. zipfilename = options.filename
  75. zipfilename = os.path.basename(zipfilename)
  76. else:
  77. if options.z_filename:
  78. d = os.path.dirname(options.filename)
  79. newfilename = add_suffix(os.path.join(d, options.z_filename))
  80. if options.outfile:
  81. newfilename = options.outfile
  82. if options.dest_dir:
  83. f = os.path.basename(newfilename)
  84. newfilename = os.path.join(options.dest_dir, f)
  85. if os.path.exists(newfilename):
  86. print_log('FATAL: could not save file, file exists:',
  87. newfilename, level=3)
  88. return
  89. if newfilename == '-':
  90. z = False
  91. # save
  92. if options.verbose:
  93. print_log('save:', newfilename)
  94. if z:
  95. zf = zipfile.ZipFile(newfilename, 'w')
  96. zf.writestr(zipfilename, data)
  97. else:
  98. if newfilename == '-':
  99. sys.stdout.write(data)
  100. else:
  101. open(newfilename, 'w').write(data)
  102. total_files = 0
  103. def process_file(filename):
  104. global total_files
  105. options.filename = os.path.abspath(filename)
  106. LogOptions.filename = os.path.abspath(filename)
  107. for file_format, z_filename, data in read_file(filename):
  108. options.file_format = file_format
  109. options.z_filename = z_filename
  110. LogOptions.z_filename = z_filename
  111. total_files += 1
  112. if file_format == 'error':
  113. print_log('FATAL: read file error', level=3)
  114. continue
  115. if not check_xml(data):
  116. continue
  117. # run parsing
  118. parse(data)
  119. ##
  120. ## Main
  121. ##
  122. options = None
  123. def main():
  124. # parsing command-line options
  125. global options
  126. option_list = [
  127. make_option("-o", "--out", dest="outfile",
  128. help="write result to FILE", metavar="FILE"),
  129. make_option("-d", "--dest-dir", dest="dest_dir",
  130. help="save result files to DIR", metavar="DIR"),
  131. make_option("-z", "--zip", dest="zip", action="store_true",
  132. default=False, help="zip result file"),
  133. make_option("-n", "--no-zip", dest="nozip", action="store_true",
  134. default=False, help="don't zip result file"),
  135. make_option("-c", "--check-only", dest="check_only",
  136. action="store_true", default=False,
  137. help="check only, do not save result"),
  138. make_option("-f", "--force", dest="force", action="store_true",
  139. default=False, help="don't validate XML"),
  140. ## make_option("-b", "--pre-fb2-lint", dest="pre_fb2lint",
  141. ## action="store_true", default=False,
  142. ## help="pre process FB2 validation"),
  143. ## make_option("-a", "--post-fb2-lint", dest="post_fb2lint",
  144. ## action="store_true", default=False,
  145. ## help="post process FB2 validation"),
  146. make_option("-e", "--output-encoding", dest="output_encoding",
  147. default = 'utf-8', metavar="ENC",
  148. help="fb2 output encoding"),
  149. make_option("-v", "--verbose", dest="verbose", action="store_true",
  150. default=False, help="more info"),
  151. make_option("-q", "--quiet", dest="quiet", action="store_true",
  152. default=False, help="less info"),
  153. ]
  154. parser = OptionParser(option_list=option_list,
  155. usage="usage: %prog [options] files|dirs",
  156. version="%prog "+prog_version)
  157. options, args = parser.parse_args()
  158. if options.verbose:
  159. LogOptions.level = 0
  160. elif options.quiet:
  161. LogOptions.level = 2
  162. starttime = time.time()
  163. # walk a files
  164. for filename in walk(args):
  165. process_file(filename)
  166. # print stats
  167. if options.verbose:
  168. et = time.time() - starttime
  169. print 'elapsed time: %.2f secs' % et
  170. print 'average: %.3f secs' % (et/total_files)
  171. ##
  172. if __name__ == '__main__':
  173. main()