PageRenderTime 46ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/src/openaccess_epub/utils/__init__.py

https://github.com/SavinaRoja/OpenAccess_EPUB
Python | 337 lines | 281 code | 32 blank | 24 comment | 25 complexity | 7dc95a4abf159f40baf684b347d3ddf0 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. """
  3. Common utility functions
  4. """
  5. #Standard Library modules
  6. import collections
  7. import logging
  8. import os
  9. import platform
  10. import shutil
  11. import subprocess
  12. import sys
  13. #Non-Standard Library modules
  14. #OpenAccess_EPUB modules
  15. from openaccess_epub.utils.inputs import doi_input, url_input
  16. log = logging.getLogger('openaccess_epub.utils')
  17. Identifier = collections.namedtuple('Identifer', 'id, type')
  18. #Python documentation refers to this recipe for an OrderedSet
  19. #http://code.activestate.com/recipes/576694/
  20. class OrderedSet(collections.MutableSet):
  21. def __init__(self, iterable=None):
  22. self.end = end = []
  23. end += [None, end, end] # sentinel node for doubly linked list
  24. self.map = {} # key --> [key, prev, next]
  25. if iterable is not None:
  26. self |= iterable
  27. def __len__(self):
  28. return len(self.map)
  29. def __contains__(self, key):
  30. return key in self.map
  31. def add(self, key):
  32. if key not in self.map:
  33. end = self.end
  34. curr = end[1]
  35. curr[2] = end[1] = self.map[key] = [key, curr, end]
  36. def discard(self, key):
  37. if key in self.map:
  38. key, prev, next = self.map.pop(key)
  39. prev[2] = next
  40. next[1] = prev
  41. def __iter__(self):
  42. end = self.end
  43. curr = end[2]
  44. while curr is not end:
  45. yield curr[0]
  46. curr = curr[2]
  47. def __reversed__(self):
  48. end = self.end
  49. curr = end[1]
  50. while curr is not end:
  51. yield curr[0]
  52. curr = curr[1]
  53. def pop(self, last=True):
  54. if not self:
  55. raise KeyError('set is empty')
  56. key = self.end[1][0] if last else self.end[2][0]
  57. self.discard(key)
  58. return key
  59. def __repr__(self):
  60. if not self:
  61. return '%s()' % (self.__class__.__name__,)
  62. return '%s(%r)' % (self.__class__.__name__, list(self))
  63. def __eq__(self, other):
  64. if isinstance(other, OrderedSet):
  65. return len(self) == len(other) and list(self) == list(other)
  66. return set(self) == set(other)
  67. def cache_location():
  68. '''Cross-platform placement of cached files'''
  69. plat = platform.platform()
  70. log.debug('Platform read as: {0}'.format(plat))
  71. if plat.startswith('Windows'):
  72. log.debug('Windows platform detected')
  73. return os.path.join(os.environ['APPDATA'], 'OpenAccess_EPUB')
  74. elif plat.startswith('Darwin'):
  75. log.debug('Mac platform detected')
  76. elif plat.startswith('Linux'):
  77. log.debug('Linux platform detected')
  78. else:
  79. log.warning('Unhandled platform for cache_location')
  80. #This code is written for Linux and Mac, don't expect success for others
  81. path = os.path.expanduser('~')
  82. if path == '~':
  83. path = os.path.expanduser('~user')
  84. if path == '~user':
  85. log.critical('Could not resolve the correct cache location')
  86. sys.exit('Could not resolve the correct cache location')
  87. cache_loc = os.path.join(path, '.OpenAccess_EPUB')
  88. log.debug('Cache located: {0}'.format(cache_loc))
  89. return cache_loc
  90. def config_location():
  91. """
  92. Returns the expected location of the config file
  93. """
  94. return os.path.join(cache_location(), 'config.py')
  95. def base_epub_location():
  96. """
  97. Returns the expected location of the base_epub directory
  98. """
  99. return os.path.join(cache_location(), 'base_epub')
  100. def publisher_plugin_location():
  101. """
  102. Returns the expected location of the publisher_plugins directory.
  103. """
  104. return os.path.join(cache_location(), 'publisher_plugins')
  105. def load_config_module():
  106. """
  107. If the config.py file exists, import it as a module. If it does not exist,
  108. call sys.exit() with a request to run oaepub configure.
  109. """
  110. import imp
  111. config_path = config_location()
  112. try:
  113. config = imp.load_source('config', config_path)
  114. except IOError:
  115. log.critical('Config file not found. oaepub exiting...')
  116. sys.exit('Config file not found. Please run \'oaepub configure\'')
  117. else:
  118. log.debug('Config file loaded from {0}'.format(config_path))
  119. return config
  120. def mkdir_p(dir):
  121. if os.path.isdir(dir):
  122. return
  123. os.makedirs(dir)
  124. def evaluate_relative_path(working=os.getcwd(), relative=''):
  125. """
  126. This function receives two strings representing system paths. The first is
  127. the working directory and it should be an absolute path. The second is the
  128. relative path and it should not be absolute. This function will render an
  129. OS-appropriate absolute path, which is the normalized path from working
  130. to relative.
  131. """
  132. return os.path.normpath(os.path.join(working, relative))
  133. def get_absolute_path(some_path):
  134. """
  135. This function will return an appropriate absolute path for the path it is
  136. given. If the input is absolute, it will return unmodified; if the input is
  137. relative, it will be rendered as relative to the current working directory.
  138. """
  139. if os.path.isabs(some_path):
  140. return some_path
  141. else:
  142. return evaluate_relative_path(os.getcwd(), some_path)
  143. def get_output_directory(args):
  144. """
  145. Determination of the directory for output placement involves possibilities
  146. for explicit user instruction (absolute path or relative to execution) and
  147. implicit default configuration (absolute path or relative to input) from
  148. the system global configuration file. This function is responsible for
  149. reliably returning the appropriate output directory which will contain any
  150. log(s), ePub(s), and unzipped output of OpenAccess_EPUB.
  151. It utilizes the parsed args, passed as an object, and is self-sufficient in
  152. accessing the config file.
  153. All paths returned by this function are absolute.
  154. """
  155. #Import the global config file as a module
  156. import imp
  157. config_path = os.path.join(cache_location(), 'config.py')
  158. try:
  159. config = imp.load_source('config', config_path)
  160. except IOError:
  161. print('Could not find {0}, please run oae-quickstart'.format(config_path))
  162. sys.exit()
  163. #args.output is the explicit user instruction, None if unspecified
  164. if args.output:
  165. #args.output may be an absolute path
  166. if os.path.isabs(args.output):
  167. return args.output # return as is
  168. #or args.output may be a relative path, relative to cwd
  169. else:
  170. return evaluate_relative_path(relative=args.output)
  171. #config.default_output for default behavior without explicit instruction
  172. else:
  173. #config.default_output may be an absolute_path
  174. if os.path.isabs(config.default_output):
  175. return config.default_output
  176. #or config.default_output may be a relative path, relative to input
  177. else:
  178. if args.input: # The case of single input
  179. if 'http://www' in args.input:
  180. #Fetched from internet by URL
  181. raw_name = url_input(args.input, download=False)
  182. abs_input_path = os.path.join(os.getcwd(), raw_name+'.xml')
  183. elif args.input[:4] == 'doi:':
  184. #Fetched from internet by DOI
  185. raw_name = doi_input(args.input, download=False)
  186. abs_input_path = os.path.join(os.getcwd(), raw_name+'.xml')
  187. else:
  188. #Local option, could be anywhere
  189. abs_input_path = get_absolute_path(args.input)
  190. abs_input_parent = os.path.split(abs_input_path)[0]
  191. return evaluate_relative_path(abs_input_parent, config.default_output)
  192. elif args.batch: # The case of Batch Mode
  193. #Batch should only work on a supplied directory
  194. abs_batch_path = get_absolute_path(args.batch)
  195. return abs_batch_path
  196. elif args.zip:
  197. #Zip is a local-only option, behaves just like local xml
  198. abs_input_path = get_absolute_path(args.zip)
  199. abs_input_parent = os.path.split(abs_input_path)[0]
  200. return evaluate_relative_path(abs_input_parent, config.default_output)
  201. elif args.collection:
  202. return os.getcwd()
  203. else: # Un-handled or currently unsupported options
  204. print('The output location could not be determined...')
  205. sys.exit()
  206. def file_root_name(name):
  207. """
  208. Returns the root name of a file from a full file path.
  209. It will not raise an error if the result is empty, but an warning will be
  210. issued.
  211. """
  212. base = os.path.basename(name)
  213. root = os.path.splitext(base)[0]
  214. if not root:
  215. warning = 'file_root_name returned an empty root name from \"{0}\"'
  216. log.warning(warning.format(name))
  217. return root
  218. def files_with_ext(extension, directory='.', recursive=False):
  219. """
  220. Generator function that will iterate over all files in the specified
  221. directory and return a path to the files which possess a matching extension.
  222. You should include the period in your extension, and matching is not case
  223. sensitive: '.xml' will also match '.XML' and vice versa.
  224. An empty string passed to extension will match extensionless files.
  225. """
  226. if recursive:
  227. log.info('Recursively searching {0} for files with extension "{1}"'.format(directory, extension))
  228. for dirname, subdirnames, filenames in os.walk(directory):
  229. for filename in filenames:
  230. filepath = os.path.join(dirname, filename)
  231. _root, ext = os.path.splitext(filepath)
  232. if extension.lower() == ext.lower():
  233. yield filepath
  234. else:
  235. log.info('Looking in {0} for files with extension: "{1}"'.format(directory, extension))
  236. for name in os.listdir(directory):
  237. filepath = os.path.join(directory, name)
  238. if not os.path.isfile(filepath): # Skip non-files
  239. continue
  240. _root, ext = os.path.splitext(filepath)
  241. if extension.lower() == ext.lower():
  242. yield filepath
  243. def epubcheck(epubname, config=None):
  244. """
  245. This method takes the name of an epub file as an argument. This name is
  246. the input for the java execution of a locally installed epubcheck-.jar. The
  247. location of this .jar file is configured in config.py.
  248. """
  249. if config is None:
  250. config = load_config_module()
  251. r, e = os.path.splitext(epubname)
  252. if not e:
  253. log.warning('Missing file extension, appending ".epub"')
  254. e = '.epub'
  255. epubname = r + e
  256. elif not e == '.epub':
  257. log.warning('File does not have ".epub" extension, appending it')
  258. epubname += '.epub'
  259. subprocess.call(['java', '-jar', config.epubcheck_jarfile, epubname])
  260. def dir_exists(directory):
  261. """
  262. If a directory already exists that will be overwritten by some action, this
  263. will ask the user whether or not to continue with the deletion.
  264. If the user responds affirmatively, then the directory will be removed. If
  265. the user responds negatively, then the process will abort.
  266. """
  267. log.info('Directory exists! Asking the user')
  268. reply = input('''The directory {0} already exists.
  269. It will be overwritten if the operation continues.
  270. Replace? [Y/n]'''.format(directory))
  271. if reply.lower() in ['y', 'yes', '']:
  272. shutil.rmtree(directory)
  273. os.makedirs(directory)
  274. else:
  275. log.critical('Aborting process, user declined overwriting {0}'.format(directory))
  276. sys.exit('Aborting process!')
  277. suggested_article_types = ['abstract', 'addendum', 'announcement',
  278. 'article-commentary', 'book-review', 'books-received', 'brief-report',
  279. 'calendar', 'case-report', 'collection', 'correction', 'discussion',
  280. 'dissertation', 'editorial', 'in-brief', 'introduction', 'letter',
  281. 'meeting-report', 'news', 'obituary', 'oration', 'partial-retraction',
  282. 'product-review', 'rapid-communication', 'rapid-communication', 'reply',
  283. 'reprint', 'research-article', 'retraction', 'review-article',
  284. 'translation']