PageRenderTime 640ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/geeknote/editor.py

https://gitlab.com/dannywillems/geeknote
Python | 259 lines | 237 code | 14 blank | 8 comment | 6 complexity | 190aa2a5f08a04c1cf077ab314041a74 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import sys
  4. import tempfile
  5. from bs4 import BeautifulSoup, NavigableString
  6. import threading
  7. import hashlib
  8. import html2text as html2text
  9. import markdown2 as markdown
  10. import tools
  11. import out
  12. import re
  13. import config
  14. from storage import Storage
  15. from log import logging
  16. from xml.sax.saxutils import escape, unescape
  17. class EditorThread(threading.Thread):
  18. def __init__(self, editor):
  19. threading.Thread.__init__(self)
  20. self.editor = editor
  21. def run(self):
  22. self.editor.edit()
  23. class Editor(object):
  24. # escape() and unescape() takes care of &, < and >.
  25. @staticmethod
  26. def getHtmlEscapeTable():
  27. return {'"': "&quot;",
  28. "'": "&apos;",
  29. '\n': "<br />"}
  30. @staticmethod
  31. def getHtmlUnescapeTable():
  32. return {v:k for k, v in Editor.getHtmlEscapeTable().items()}
  33. @staticmethod
  34. def HTMLEscape(text):
  35. return escape(text, Editor.getHtmlEscapeTable())
  36. @staticmethod
  37. def HTMLUnescape(text):
  38. return unescape(text, Editor.getHtmlUnescapeTable())
  39. @staticmethod
  40. def checklistInENMLtoSoup(soup):
  41. '''
  42. Transforms Evernote checklist elements to github `* [ ]` task list style
  43. '''
  44. transform_tags = ['p','div']
  45. # soup.select cant be used with dashes: https://bugs.launchpad.net/beautifulsoup/+bug/1276211
  46. for todo in soup.find_all('en-todo'):
  47. parent = todo.parent
  48. transform = parent.find() == todo and parent.name in transform_tags
  49. checked = todo.attrs.get('checked',None) == "true"
  50. todo.replace_with("[x] " if checked else "[ ] ")
  51. # EN checklist can appear anywhere, but if they appear at the beggining
  52. # of a block element, transform it so it ressembles github markdown syntax
  53. if transform:
  54. content = ''.join(unicode(child) for child in parent.children
  55. if isinstance(child, NavigableString)
  56. ).strip()
  57. new_tag = soup.new_tag("li")
  58. new_tag.string = content
  59. parent.replace_with(new_tag)
  60. @staticmethod
  61. def ENMLtoText(contentENML):
  62. soup = BeautifulSoup(contentENML.decode('utf-8'))
  63. for section in soup.select('li > p'):
  64. section.replace_with( section.contents[0] )
  65. for section in soup.select('li > br'):
  66. if section.next_sibling:
  67. next_sibling = section.next_sibling.next_sibling
  68. if next_sibling:
  69. if next_sibling.find('li'):
  70. section.extract()
  71. else:
  72. section.extract()
  73. Editor.checklistInENMLtoSoup(soup)
  74. for section in soup.findAll('en-todo', checked='true'):
  75. section.replace_with('[x]')
  76. for section in soup.findAll('en-todo'):
  77. section.replace_with('[ ]')
  78. content = html2text.html2text(str(soup).decode('utf-8'), '', 0)
  79. content = re.sub(r' *\n', os.linesep, content)
  80. return content.encode('utf-8')
  81. @staticmethod
  82. def wrapENML(contentHTML):
  83. body = '<?xml version="1.0" encoding="UTF-8"?>\n'\
  84. '<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">\n'\
  85. '<en-note>%s</en-note>' % contentHTML
  86. return body
  87. @staticmethod
  88. def checklistInSoupToENML(soup):
  89. '''
  90. Transforms github style checklists `* [ ]` in the BeautifulSoup tree to
  91. enml.
  92. '''
  93. checktodo_re = re.compile(r'\[(.)\]')
  94. # To be more github compatible, if in a list all elements begins with `[ ]``
  95. # transform it to normal `[ ]` evernote elements
  96. for ul in soup.find_all('ul'):
  97. tasks = []; istodo = True
  98. for li in ul.find_all('li'):
  99. task = soup.new_tag('div')
  100. todo_tag = soup.new_tag('en-todo')
  101. reg = checktodo_re.match(li.get_text())
  102. istodo = istodo and reg
  103. character = reg.group(1) if reg else None
  104. if character == "x": todo_tag['checked']="true"
  105. task.append(todo_tag)
  106. if reg: task.append(NavigableString(li.get_text()[3:].strip()))
  107. tasks.append(task)
  108. if istodo:
  109. for task in tasks: ul.insert_after(task)
  110. ul.extract()
  111. # For the rest of elements just replace `[ ]` with the appropriate element
  112. for todo in soup.find_all(text=checktodo_re):
  113. str_re = re.match(r'(.*)\[(.)\](.*)',todo)
  114. pre = str_re.group(1)
  115. post = str_re.group(3)
  116. todo_tag = soup.new_tag('en-todo')
  117. if str_re.group(2) == "x": todo_tag['checked']="true"
  118. todo.replace_with(todo_tag)
  119. todo_tag.insert_before(pre)
  120. todo_tag.insert_after(post)
  121. @staticmethod
  122. def textToENML(content, raise_ex=False, format='markdown'):
  123. """
  124. Create an ENML format of note.
  125. """
  126. if not isinstance(content, str):
  127. content = ""
  128. try:
  129. content = unicode(content, "utf-8")
  130. # add 2 space before new line in paragraph for creating br tags
  131. content = re.sub(r'([^\r\n])([\r\n])([^\r\n])', r'\1 \n\3', content)
  132. if format=='markdown':
  133. contentHTML = markdown.markdown(content)
  134. soup = BeautifulSoup(contentHTML, 'html.parser')
  135. Editor.checklistInSoupToENML(soup)
  136. # Non-Pretty HTML output
  137. contentHTML = str(soup)
  138. #
  139. # For the 'pre' format, simply wrap the content with a 'pre' tag. Do
  140. # perform any parsing/mutation.
  141. #
  142. elif format=='pre':
  143. contentHTML = u''.join(('<pre>', content, '</pre>')).encode("utf-8")
  144. else:
  145. contentHTML = Editor.HTMLEscape(content)
  146. contentHTML = contentHTML.replace('[x]','<en-todo checked="true"></en-todo>')
  147. contentHTML = contentHTML.replace('[ ]','<en-todo></en-todo>')
  148. return Editor.wrapENML(contentHTML)
  149. except:
  150. import traceback
  151. traceback.print_exc()
  152. if raise_ex:
  153. raise Exception("Error while parsing text to html."
  154. " Content must be an UTF-8 encode.")
  155. logging.error("Error while parsing text to html. "
  156. "Content must be an UTF-8 encode.")
  157. out.failureMessage("Error while parsing text to html. "
  158. "Content must be an UTF-8 encode.")
  159. return tools.exitErr()
  160. def __init__(self, content):
  161. if not isinstance(content, str):
  162. raise Exception("Note content must be an instance "
  163. "of string, '%s' given." % type(content))
  164. (tempfileHandler, tempfileName) = tempfile.mkstemp(suffix=".markdown")
  165. os.write(tempfileHandler, self.ENMLtoText(content))
  166. os.close(tempfileHandler)
  167. self.content = content
  168. self.tempfile = tempfileName
  169. def getTempfileChecksum(self):
  170. with open(self.tempfile, 'rb') as fileHandler:
  171. checksum = hashlib.md5()
  172. while True:
  173. data = fileHandler.read(8192)
  174. if not data:
  175. break
  176. checksum.update(data)
  177. return checksum.hexdigest()
  178. def edit(self):
  179. """
  180. Call the system editor, that types as a default in the system.
  181. Editing goes in markdown format, and then the markdown
  182. converts into HTML, before uploading to Evernote.
  183. """
  184. # Try to find default editor in the system.
  185. storage = Storage()
  186. editor = storage.getUserprop('editor')
  187. if not editor:
  188. editor = os.environ.get("editor")
  189. if not editor:
  190. editor = os.environ.get("EDITOR")
  191. if not editor:
  192. # If default editor is not finded, then use nano as a default.
  193. if sys.platform == 'win32':
  194. editor = config.DEF_WIN_EDITOR
  195. else:
  196. editor = config.DEF_UNIX_EDITOR
  197. # Make a system call to open file for editing.
  198. logging.debug("launch system editor: %s %s" % (editor, self.tempfile))
  199. out.preloader.stop()
  200. os.system(editor + " " + self.tempfile)
  201. out.preloader.launch()
  202. newContent = open(self.tempfile, 'r').read()
  203. return newContent