PageRenderTime 61ms CodeModel.GetById 31ms RepoModel.GetById 1ms app.codeStats 0ms

/src/openaccess_epub/navigation/__init__.py

https://github.com/SavinaRoja/OpenAccess_EPUB
Python | 349 lines | 296 code | 16 blank | 37 comment | 17 complexity | 0a624a22e43316328f5d1511129d43a0 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. """
  3. openaccess_epub.navigation provides facilities for producing EPUB navigation
  4. documents
  5. The Navigation Document is a required component of an EPUB document in both
  6. EPUB2 and EPUB3. OpenAccess_EPUB provides support for both EPUB versions with
  7. this module. The Navigation class can work with either a single article or with
  8. multiple articles. The processing of articles for important navigation mapping
  9. is currently publisher agnostic, however since this document also utilizes (a
  10. very limited subset of) metadata , there will be some amount of
  11. publisher-specific metadata methods required.
  12. """
  13. #Standard Library modules
  14. from collections import namedtuple
  15. import logging
  16. import os
  17. #Non-Standard Library modules
  18. from lxml import etree
  19. #OpenAccess_EPUB modules
  20. from openaccess_epub.utils import OrderedSet
  21. import openaccess_epub.utils.element_methods as element_methods
  22. from openaccess_epub._version import __version__
  23. log = logging.getLogger('openaccess_epub.navigation')
  24. navpoint = namedtuple('navpoint', 'id, label, playOrder, source, children')
  25. class Navigation(object):
  26. def __init__(self, collection=False, title=''):
  27. self.collection = collection
  28. #Special navigation structures: List of Equations/Figures/Tables
  29. self.equations_list = []
  30. self.figures_list = []
  31. self.tables_list = []
  32. self.article = None
  33. self.article_doi = None
  34. self.all_dois = [] # Used to create UID
  35. #These are the limited forms of metadata that might make it in to the
  36. #navigation document. Both are used for EPUB2, only the title is used
  37. #for EPUB3
  38. self.title = title
  39. self.contributors = OrderedSet()
  40. #The nav structure is a list of navpoint trees. Each navpoint may have
  41. #children navpoints. This structure will be converted to the appropriate
  42. #xml/xhtml structure and written to file when required.
  43. self.nav = []
  44. self.nav_depth = 0
  45. self._play_order = 0
  46. self._auto_id = 0
  47. def process(self, article):
  48. """
  49. Ingests an Article to create navigation structures and parse global
  50. metadata.
  51. """
  52. if self.article is not None and not self.collection:
  53. log.warning('Could not process additional article. Navigation only \
  54. handles one article unless collection mode is set.')
  55. return False
  56. if article.publisher is None:
  57. log.error('''Navigation cannot be generated for an Article \
  58. without a publisher!''')
  59. return
  60. self.article = article
  61. self.article_doi = self.article.doi.split('/')[1]
  62. self.all_dois.append(self.article.doi)
  63. if self.collection:
  64. pass
  65. else:
  66. self.title = self.article.publisher.nav_title()
  67. for author in self.article.publisher.nav_contributors():
  68. self.contributors.add(author)
  69. #Analyze the structure of the article to create internal mapping
  70. self.map_navigation()
  71. def map_navigation(self):
  72. """
  73. This is a wrapper for depth-first recursive analysis of the article
  74. """
  75. #All articles should have titles
  76. title_id = 'titlepage-{0}'.format(self.article_doi)
  77. title_label = self.article.publisher.nav_title()
  78. title_source = 'main.{0}.xhtml#title'.format(self.article_doi)
  79. title_navpoint = navpoint(title_id, title_label, self.play_order,
  80. title_source, [])
  81. self.nav.append(title_navpoint)
  82. #When processing a collection of articles, we will want all subsequent
  83. #navpoints for this article to be located under the title
  84. if self.collection:
  85. nav_insertion = title_navpoint.children
  86. else:
  87. nav_insertion = self.nav
  88. #If the article has a body, we'll need to parse it for navigation
  89. if self.article.body is not None:
  90. #Here is where we invoke the recursive parsing!
  91. for nav_pt in self.recursive_article_navmap(self.article.body):
  92. nav_insertion.append(nav_pt)
  93. #Add a navpoint to the references if appropriate
  94. if self.article.root.xpath('./back/ref'):
  95. ref_id = 'references-{0}'.format(self.article_doi)
  96. ref_label = 'References'
  97. ref_source = 'biblio.{0}.xhtml#references'.format(self.article_doi)
  98. ref_navpoint = navpoint(ref_id, ref_label, self.play_order,
  99. ref_source, [])
  100. nav_insertion.append(ref_navpoint)
  101. def recursive_article_navmap(self, src_element, depth=0, first=True):
  102. """
  103. This function recursively traverses the content of an input article to
  104. add the correct elements to the NCX file's navMap and Lists.
  105. """
  106. if depth > self.nav_depth:
  107. self.nav_depth = depth
  108. navpoints = []
  109. tagnames = ['sec', 'fig', 'table-wrap']
  110. for child in src_element:
  111. try:
  112. tagname = child.tag
  113. except AttributeError:
  114. continue
  115. else:
  116. if tagname not in tagnames:
  117. continue
  118. #Safely handle missing id attributes
  119. if 'id' not in child.attrib:
  120. child.attrib['id'] = self.auto_id
  121. #If in collection mode, we'll prepend the article DOI to avoid
  122. #collisions
  123. if self.collection:
  124. child_id = '-'.join([self.article_doi,
  125. child.attrib['id']])
  126. else:
  127. child_id = child.attrib['id']
  128. #Attempt to infer the correct text as a label
  129. #Skip the element if we cannot
  130. child_title = child.find('title')
  131. if child_title is None:
  132. continue # If there is no immediate title, skip this element
  133. label = element_methods.all_text(child_title)
  134. if not label:
  135. continue # If no text in the title, skip this element
  136. source = 'main.{0}.xhtml#{1}'.format(self.article_doi,
  137. child.attrib['id'])
  138. if tagname == 'sec':
  139. children = self.recursive_article_navmap(child, depth=depth + 1)
  140. navpoints.append(navpoint(child_id,
  141. label,
  142. self.play_order,
  143. source,
  144. children))
  145. #figs and table-wraps do not have children
  146. elif tagname == 'fig': # Add navpoints to list_of_figures
  147. self.figures_list.append(navpoint(child.attrib['id'],
  148. label,
  149. None,
  150. source,
  151. []))
  152. elif tagname == 'table-wrap': # Add navpoints to list_of_tables
  153. self.tables_list.append(navpoint(child.attrib['id'],
  154. label,
  155. None,
  156. source,
  157. []))
  158. return navpoints
  159. def render_EPUB2(self, location):
  160. """
  161. Creates the NCX specified file for EPUB2
  162. """
  163. def make_navlabel(text):
  164. """
  165. Creates and returns a navLabel element with the supplied text.
  166. """
  167. navlabel = etree.Element('navLabel')
  168. navlabel_text = etree.SubElement(navlabel, 'text')
  169. navlabel_text.text = text
  170. return navlabel
  171. def make_navMap(nav=None):
  172. if nav is None:
  173. nav_element = etree.Element('navMap')
  174. for nav_point in self.nav:
  175. nav_element.append(make_navMap(nav=nav_point))
  176. else:
  177. nav_element = etree.Element('navPoint')
  178. nav_element.attrib['id'] = nav.id
  179. nav_element.attrib['playOrder'] = nav.playOrder
  180. nav_element.append(make_navlabel(nav.label))
  181. content_element = etree.SubElement(nav_element, 'content')
  182. content_element.attrib['src'] = nav.source
  183. for child in nav.children:
  184. nav_element.append(make_navMap(nav=child))
  185. return nav_element
  186. root = etree.XML('''\
  187. <?xml version="1.0"?>\
  188. <ncx version="2005-1" xmlns="http://www.daisy.org/z3986/2005/ncx/">\
  189. <head>\
  190. <meta name="dtb:uid" content="{uid}"/>\
  191. <meta name="dtb:depth" content="{depth}"/>\
  192. <meta name="dtb:totalPageCount" content="0"/>\
  193. <meta name="dtb:maxPageNumber" content="0"/>\
  194. <meta name="dtb:generator" content="OpenAccess_EPUB {version}"/>\
  195. </head>\
  196. </ncx>'''.format(**{'uid': ','.join(self.all_dois),
  197. 'depth': self.nav_depth,
  198. 'version': __version__}))
  199. document = etree.ElementTree(root)
  200. ncx = document.getroot()
  201. #Create the docTitle element
  202. doctitle = etree.SubElement(ncx, 'docTitle')
  203. doctitle_text = etree.SubElement(doctitle, 'text')
  204. doctitle_text.text = self.title
  205. #Create the docAuthor elements
  206. for contributor in self.contributors:
  207. if contributor.role == 'author':
  208. docauthor = etree.SubElement(ncx, 'docAuthor')
  209. docauthor_text = etree.SubElement(docauthor, 'text')
  210. docauthor_text.text = contributor.name
  211. #Create the navMap element
  212. ncx.append(make_navMap())
  213. if self.figures_list:
  214. navlist = etree.SubElement(ncx, 'navList')
  215. navlist.append(make_navlabel('List of Figures'))
  216. for nav_pt in self.figures_list:
  217. navtarget = etree.SubElement(navlist, 'navTarget')
  218. navtarget.attrib['id'] = nav_pt.id
  219. navtarget.append(self.make_navlabel(nav_pt.label))
  220. content = etree.SubElement(navtarget, 'content')
  221. content.attrib['src'] = nav_pt.source
  222. if self.tables_list:
  223. navlist = etree.SubElement(ncx, 'navList')
  224. navlist.append(make_navlabel('List of Tables'))
  225. for nav_pt in self.tables_list:
  226. navtarget = etree.SubElement(navlist, 'navTarget')
  227. navtarget.attrib['id'] = nav_pt.id
  228. navtarget.append(self.make_navlabel(nav_pt.label))
  229. content = etree.SubElement(navtarget, 'content')
  230. content.attrib['src'] = nav_pt.source
  231. with open(os.path.join(location, 'EPUB', 'toc.ncx'), 'wb') as output:
  232. output.write(etree.tostring(document, encoding='utf-8', pretty_print=True))
  233. def render_EPUB3(self, location):
  234. def make_nav(nav=None):
  235. if nav is None:
  236. nav_element = etree.Element('ol')
  237. for nav_point in self.nav:
  238. nav_element.append(make_nav(nav=nav_point))
  239. else:
  240. nav_element = etree.Element('li')
  241. a = etree.SubElement(nav_element, 'a')
  242. a.attrib['href'] = nav.source
  243. a.text = nav.label
  244. if nav.children:
  245. ol = etree.SubElement(nav_element, 'ol')
  246. for child in nav.children:
  247. ol.append(make_nav(nav=child))
  248. return nav_element
  249. root = etree.XML('''\
  250. <?xml version="1.0"?>\
  251. <!DOCTYPE html>\
  252. <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">\
  253. <head>\
  254. <link rel="stylesheet" type="text/css" href="css/default.css" />\
  255. </head>\
  256. </html>''')
  257. document = etree.ElementTree(root)
  258. html = document.getroot()
  259. title = etree.SubElement(html[0], 'title')
  260. title.text = self.title
  261. body = etree.SubElement(html, 'body') # Create the body element
  262. #h1 = etree.SubElement(body, 'h1')
  263. #h1.text = self.title
  264. #Create the prinary nav element
  265. nav = etree.SubElement(body, 'nav')
  266. nav.attrib['{http://www.idpf.org/2007/ops}type'] = 'toc'
  267. nav.attrib['id'] = 'toc'
  268. #Create the title
  269. h2 = etree.SubElement(nav, 'h2')
  270. h2.text = 'Table of Contents'
  271. #Stuff
  272. nav.append(make_nav())
  273. if self.figures_list:
  274. nav = etree.SubElement(body, 'nav')
  275. h2 = etree.SubElement(nav, 'h2')
  276. h2.text = 'List of Figures'
  277. ol = etree.SubElement(nav, 'ol')
  278. for nav_pt in self.figures_list:
  279. li = etree.SubElement(ol, 'li')
  280. a = etree.SubElement(li, 'a')
  281. a.attrib['href'] = nav_pt.source
  282. a.text = nav_pt.label
  283. if self.tables_list:
  284. nav = etree.SubElement(body, 'nav')
  285. h2 = etree.SubElement(nav, 'h2')
  286. h2.text = 'List of Tables'
  287. ol = etree.SubElement(nav, 'ol')
  288. for nav_pt in self.figures_list:
  289. li = etree.SubElement(ol, 'li')
  290. a = etree.SubElement(li, 'a')
  291. a.attrib['href'] = nav_pt.source
  292. a.text = nav_pt.label
  293. with open(os.path.join(location, 'EPUB', 'nav.xhtml'), 'wb') as output:
  294. output.write(etree.tostring(document, encoding='utf-8', pretty_print=True))
  295. @property
  296. def play_order(self):
  297. self._play_order += 1
  298. return str(self._play_order)
  299. @property
  300. def auto_id(self):
  301. self._auto_id += 1
  302. id_gen = 'OAE-{0}'.format(self._auto_id)
  303. log.debug('Navigation element missing ID: assigned {0}'.format(id_gen))
  304. return id_gen