PageRenderTime 81ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 1ms

/views.py

https://bitbucket.org/kevindunn/ucommentapp/
Python | 2674 lines | 2231 code | 135 blank | 308 comment | 152 complexity | 7c74b08b2d1d2375c4fd12459027fdd9 MD5 | raw file
  1. """
  2. :copyright: Copyright 2010, by Kevin Dunn
  3. :license: BSD, see LICENSE file for details.
  4. TO ADD:
  5. ---------
  6. Add a time-out to comment compiling (5 seconds): then force a return
  7. Deferred processing of comments using a message queue:
  8. http://www.turnkeylinux.org/blog/django-celery-rabbitmq
  9. Why? Because we have to do a repo checkout, update the files, commit
  10. CAPTCHA
  11. Use PostgreSQL instead
  12. Handle the case where commit fails because a user name is not present.
  13. FUTURE
  14. ======
  15. Create a web-interface to approve or reject comments; allowing the comment admin
  16. to pick various reasons to reject comment and append extra info to the poster.
  17. Also provide option NOT to send an email at all (simply reject the posting).
  18. """
  19. # Standard library imports
  20. import os, sys, random, subprocess, pickle, re, logging.handlers, datetime
  21. import smtplib, time, shutil
  22. from collections import defaultdict, namedtuple
  23. from StringIO import StringIO
  24. # Settings for the ucomment application
  25. from conf import settings as conf
  26. # Django and Jinja import imports
  27. from django import forms, template
  28. from django.shortcuts import render_to_response
  29. from django.contrib import auth as django_auth
  30. from django.core import cache as django_cache
  31. from django.core import serializers
  32. from django.core.context_processors import csrf
  33. from django.core.mail import send_mail, BadHeaderError
  34. from django.http import HttpResponse, HttpResponseRedirect
  35. from django.core.urlresolvers import reverse as django_reverse
  36. from django.utils import simplejson # used for XHR returns
  37. from django.utils import html as django_html # used for clean search results
  38. from jinja2 import Template # Jinja2 is Sphinx dependency; should be available
  39. from jinja2.exceptions import TemplateSyntaxError
  40. # Sphinx import
  41. from sphinx.util.osutil import ensuredir
  42. from sphinx.application import Sphinx, SphinxError
  43. if conf.repo_DVCS_type == 'hg':
  44. import hgwrapper as dvcs
  45. dvcs.executable = conf.repo_DVCS_exec
  46. dvcs.local_repo_physical_dir = conf.local_repo_physical_dir
  47. # Import the application's models, without knowing the application name.
  48. models = getattr(__import__(conf.app_dirname, None, None, ['models']),'models')
  49. # The ucomment directive:
  50. COMMENT_DIRECTIVE = '.. ucomment::'
  51. # These words will be removed from any search
  52. # Taken from: http://www.ranks.nl/resources/stopwords.html
  53. STOP_WORDS = ['I', 'a', 'an', 'are', 'as', 'at', 'be', 'by', 'com', 'for',
  54. 'from', 'how', 'in', 'is', 'it', 'of', 'on', 'that', 'the',
  55. 'this', 'to', 'was', 'what', 'when', 'who', 'will', 'with',
  56. 'www']
  57. # Code begins from here
  58. # ---------------------
  59. log_file = logging.getLogger('ucomment')
  60. log_file.setLevel(logging.INFO)
  61. fh = logging.handlers.RotatingFileHandler(conf.log_filename,
  62. maxBytes=5000000,
  63. backupCount=10)
  64. formatter = logging.Formatter(('%(asctime)s - %(name)s '
  65. '- %(levelname)s - %(message)s'))
  66. fh.setFormatter(formatter)
  67. log_file.addHandler(fh)
  68. class UcommentError(Exception):
  69. """ A generic error inside this Django application. Will log the error,
  70. and email the site administrator.
  71. This class must be initiated an exception object:
  72. * UcommentError(exc_object)
  73. But an optional string can also be provided, to give the log and email
  74. message some extra information:
  75. * UcommentError(exc_object, err_msg)
  76. This will figure out where the error was raised and provide some
  77. source code lines in the email.
  78. An alternative way to call this class is to just with a string input
  79. * UcommentError(err_msg)
  80. But this will only email and log the given error message string.
  81. The exception will not be raised again here: it is the calling function's
  82. choise whether to reraise it (and possibly interrupt the user's experience),
  83. or whether to just let the application continue.
  84. """
  85. def __init__(self, error, extra=''):
  86. if isinstance(error, Exception):
  87. exc = sys.exc_info()
  88. from inspect import getframeinfo
  89. emsg = 'The error that was raised: ' + str(exc[1]) + '\n\n'
  90. if extra:
  91. emsg += 'This additional information was provided: "%s"' % extra
  92. emsg += '\n\n'
  93. emsg += self.format_frame(getframeinfo(exc[2], context=5))
  94. else:
  95. # Handles an older syntax
  96. emsg = 'The following error was raised: ' + error.__repr__()
  97. if isinstance(error, UcommentError) and error.raised:
  98. return
  99. self.raised = True # prevent errors with emailing from cycling again.
  100. log_file.error(emsg)
  101. alert_system_admin(emsg)
  102. def format_frame(self, traceback):
  103. """ Receives a named tuple, ``traceback``, created by the ``inspect``
  104. module's ``getframeinfo()`` function. Formats it to string output.
  105. """
  106. out = '*\tfilename = %s\n' % os.path.abspath(traceback.filename)
  107. out += '*\tline number = %s\n' % traceback.lineno
  108. out += '*\tfunction = %s(...)\n' % traceback.function
  109. out += '*\tsource code where error occurred:\n'
  110. for idx, line in enumerate(traceback.code_context):
  111. out += '\t\t%s' % line.rstrip()
  112. if idx == traceback.index:
  113. out += ' <--- error occurred here\n'
  114. else:
  115. out += '\n'
  116. return out + '\n'
  117. def create_codes_ID(num):
  118. """
  119. Creates a new comment identifier; these appear in the source code for
  120. each page; they must be short, not confusing, and low chance of collisions.
  121. Intentionally does not include "i", "I", "l" "D", "o", "O", "Z", "0" to
  122. avoid visual confusion with similar-looking characters. We (ab)use this
  123. fact to create the orphan comment reference with name = '_ORFN_'.
  124. 53 characters, N=4 combinations = 53^4 = many comment ID's
  125. """
  126. valid_letters = 'abcdefghjkmnpqrstuvwxyzABCEFGHJKLMNPQRSTUVWXY23456789'
  127. return ''.join([random.choice(valid_letters) for i in range(num)])
  128. def convert_web_name_to_link_name(page_name, prefix=''):
  129. """
  130. Converts the web page name over to the ``link_name`` used in the Django
  131. model for ``Page``.
  132. If a prefix is provided (e.g. the fully web address), then this is stripped
  133. off first, then the rest is used as the page_name.
  134. """
  135. if prefix:
  136. page_name = page_name.split(prefix)[1]
  137. if '?' in page_name:
  138. page_name = page_name.split('?')[0]
  139. if conf.url_views_prefix:
  140. return page_name.split('/'+conf.url_views_prefix+'/')[1].rstrip('/')
  141. else:
  142. return page_name.lstrip('/').rstrip('/')
  143. # TODO(KGD): remove ``conf.url_views_prefix``, and the need for this
  144. # function. Can we not use the reverse(..) function?
  145. def get_site_url(request, add_path=True, add_views_prefix=False):
  146. """
  147. Utility function: returns the URL from which this Django application is
  148. served. E.g. when receiving a Django ``request`` object from the user
  149. on comment submission, their URL might be:
  150. https://site.example.com/document/_submit-comment/
  151. >>> get_site_url(request)
  152. 'https://site.example.com/document/_submit-comment/'
  153. """
  154. # TODO(KGD): Consider using ``request.build_absolute_uri()`` instead
  155. out = 'http://'
  156. if request.is_secure():
  157. out = 'https://'
  158. out += request.get_host()
  159. if add_path:
  160. out += request.path
  161. if add_views_prefix:
  162. if conf.url_views_prefix:
  163. out += '/' + conf.url_views_prefix + '/'
  164. else:
  165. out += '/'
  166. return out
  167. def get_IP_address(request):
  168. """
  169. Returns the visitor's IP address as a string.
  170. """
  171. # Catchs the case when the user is on a proxy
  172. try:
  173. ip = request.META['HTTP_X_FORWARDED_FOR']
  174. except KeyError:
  175. ip = ''
  176. else:
  177. # HTTP_X_FORWARDED_FOR is a comma-separated list; take first IP:
  178. ip = ip.split(',')[0]
  179. if ip == '' or ip.lower() == 'unkown':
  180. ip = request.META['REMOTE_ADDR'] # User is not on a proxy
  181. return ip
  182. # Comment preview and submission functions
  183. # ----------------------------------------
  184. class CommentForm(forms.Form):
  185. """ Comment form as seen on the server """
  186. email = forms.EmailField(required=False)
  187. comment_raw = forms.CharField(min_length=conf.comment_min_length,
  188. max_length=conf.comment_max_length)
  189. def valid_form(p_email, comment_raw):
  190. """ Verifies if a valid form was filled out.
  191. Returns an empty string if the form is valid.
  192. Returns a string, containing an HTML error message if the form is not valid.
  193. """
  194. # Ignore empty email addresses; email field is optional anyway, but we do
  195. # want to alert the user if the email is invalid.
  196. if p_email.strip() == '':
  197. p_email = 'no.email@example.com'
  198. user_form = CommentForm({'email': p_email,
  199. 'comment_raw': comment_raw})
  200. if not user_form.is_valid():
  201. error_dict = user_form.errors
  202. errors = ['<ul class="ucomment-error">']
  203. if 'email' in error_dict:
  204. errors.append(('<li> Your email address is not in the correct '
  205. 'format.</li>'))
  206. if 'comment_raw' in error_dict:
  207. errors.append(('<li> Comments must have between %i and %i '
  208. 'characters.</li>' % (conf.comment_min_length,
  209. conf.comment_max_length)))
  210. errors.append('</ul>')
  211. log_file.info('Returning with these errors:' + str(errors))
  212. return ''.join(errors)
  213. else:
  214. return ''
  215. def initial_comment_check(request):
  216. """
  217. Provides a preliminary check of the comment submission.
  218. * Must be a POST request not at a GET request.
  219. * Must have a valid email address.
  220. * Comment length must be appropriate (see conf/settings.py file).
  221. """
  222. if request.method == 'POST':
  223. c_comment_RST = request.POST['comment']
  224. p_email = request.POST['email']
  225. errors = valid_form(p_email, c_comment_RST)
  226. if errors:
  227. web_response = HttpResponse(errors, status=200)
  228. web_response['Ucomment'] = 'Preview-Invalid input'
  229. return False, web_response
  230. else:
  231. return True, c_comment_RST
  232. elif request.method == 'GET':
  233. return False, HttpResponse('N/A', status=404)
  234. elif request.method == 'OPTIONS':
  235. # Handles a Firefox probe before the POST request is received.
  236. web_response = HttpResponse(status=404)
  237. web_response['Access-Control-Allow-Origin'] = '*'
  238. return False, web_response
  239. else:
  240. log_file.warn(request.method + ' received; not handled; return 400.')
  241. return False, HttpResponse(status=400)
  242. def preview_comment(request):
  243. """
  244. User has clicked the "Preview comment" button in browser. Using XHR, the
  245. comment is POSTed here, extracted, and compiled.
  246. """
  247. # ``Success``: means the submission was validated, or was processed.
  248. # If it failed, then ``response`` will contain an appropriate HttpResponse
  249. # that can be returned right away.
  250. success, response = initial_comment_check(request)
  251. if success:
  252. web_response = HttpResponse(status=200)
  253. try:
  254. compiled_comment_HTML = compile_comment(response)
  255. web_response['Ucomment'] = 'Preview-OK'
  256. except Exception as err:
  257. # Should an error occur while commenting, log it, but respond to
  258. # the user.
  259. UcommentError(err, ('An exception occurred while generating a '
  260. 'comment preview for the user.'))
  261. compiled_comment_HTML = ('<p>An error occurred while processing '
  262. 'your comment. The error has been '
  263. 'reported to the website administrator.'
  264. '<p>UTC time of error: %s' % \
  265. datetime.datetime.utcnow().ctime())
  266. web_response['Ucomment'] = 'Preview-Exception'
  267. log_file.info('COMPILE: from IP=%s; comment: "%s"' % \
  268. (get_IP_address(request), response))
  269. web_response.write(compiled_comment_HTML)
  270. return web_response
  271. else:
  272. return response
  273. def compile_comment(comment):
  274. """
  275. First scans the ``comment`` string, then compiles the RST to HTML.
  276. """
  277. # The Javascript XHR request have a timeout value (set to 5 seconds).
  278. # set a timer on the compile time? If more than 5 seconds to
  279. # compile, then log the comment, return a response back to the user.
  280. start_time = time.time()
  281. comment = compile_RST_to_HTML(comment)
  282. end_time = time.time()
  283. if (end_time-start_time) > 3:
  284. log_file.warning(('Comment compile time exceeded 3 seconds; server'
  285. 'load too high?'))
  286. return comment
  287. def call_sphinx_to_compile(working_dir):
  288. """
  289. Changes to the ``working_dir`` directory and compiles the RST files to
  290. pickle files, according to settings in the conf.py file.
  291. Returns nothing, but logs if an error occurred.
  292. """
  293. build_dir = os.path.abspath(working_dir + os.sep + '_build')
  294. ensuredir(working_dir)
  295. ensuredir(build_dir)
  296. status = StringIO()
  297. warning = StringIO()
  298. try:
  299. app = Sphinx(srcdir=working_dir, confdir=working_dir,
  300. outdir = build_dir + os.sep + 'pickle',
  301. doctreedir = build_dir + os.sep + 'doctrees',
  302. buildername = 'pickle',
  303. status = status,
  304. warning = warning,
  305. freshenv = True,
  306. warningiserror = False,
  307. tags = [])
  308. # Call the ``pickle`` builder
  309. app.build()
  310. except SphinxError as e:
  311. if warning.tell():
  312. warning.seek(0)
  313. for line in warning.readlines():
  314. log_file.warn('COMMENT: ' + line)
  315. msg = ('Sphinx error occurred when compiling comment (error type = %s): '
  316. '%s' % (e.category, str(e)))
  317. UcommentError(err, msg)
  318. if app.statuscode == 0:
  319. log_file.info("COMMENT: Successfully compiled the reader's comment.")
  320. else:
  321. log_file.error("COMMENT: Non-zero status code when compiling.")
  322. def convert_raw_RST(raw_RST):
  323. """
  324. Performs any sanitizing of the user's input.
  325. Currently performs:
  326. * converts '\\' to '\\\\': i.e. single slash converted to double-slash,
  327. because Sphinx converts is back to a single slash
  328. """
  329. out = raw_RST.replace('\\', '\\\\')
  330. # You can perform any other filtering here, if required.
  331. return out
  332. def compile_RST_to_HTML(raw_RST):
  333. """ Compiles the RST string, ``raw_RST`, to HTML. Performs no
  334. further checking on the RST string.
  335. If it is a comment, then we don't modify the HTML with extra class info.
  336. But we do filter comments to disable hyperlinks.
  337. Also copy over generated MATH media to the correct directory on the server.
  338. """
  339. ensuredir(conf.comment_compile_area)
  340. modified_RST = convert_raw_RST(raw_RST)
  341. with open(conf.comment_compile_area + os.sep + 'index.rst', 'w') as fhand:
  342. fhand.write(modified_RST)
  343. try:
  344. conf_file = conf.comment_compile_area + os.sep + 'conf.py'
  345. f = file(conf_file, 'r')
  346. except IOError:
  347. # Store a fresh copy of the "conf.py" file, found in
  348. # ../sphinx-extensions/ucomment-conf.py; copy it to comment destination.
  349. this_file = os.path.abspath(__file__).rstrip(os.sep)
  350. parent = this_file[0:this_file.rfind(os.sep)]
  351. src = os.sep.join([parent, 'sphinx-extensions', 'ucomment-conf.py'])
  352. shutil.copyfile(src, conf.comment_compile_area + os.sep + 'conf.py')
  353. else:
  354. f.close()
  355. # Compile the comment
  356. call_sphinx_to_compile(conf.comment_compile_area)
  357. pickle_f = ''.join([conf.comment_compile_area, os.sep, '_build', os.sep,
  358. 'pickle', os.sep, 'index.fpickle'])
  359. with open(pickle_f, 'r') as fhand:
  360. obj = pickle.load(fhand)
  361. html_body = obj['body'].encode('utf-8')
  362. # Any equations in the HTML? Transfer these images to the media directory
  363. # and rewrite the URL's in the HTML.
  364. return transfer_html_media(html_body)
  365. def transfer_html_media(html_body):
  366. """
  367. Any media files referred to in the HTML comment are transferred to a
  368. sub-directory on the webserver.
  369. The links are rewritten to refer to the updated location.
  370. """
  371. mathdir = ''.join([conf.comment_compile_area, os.sep, '_build', os.sep,
  372. 'pickle', os.sep, '_images', os.sep, 'math', os.sep])
  373. ensuredir(mathdir)
  374. dst_dir = conf.MEDIA_ROOT + 'comments' + os.sep
  375. ensuredir(dst_dir)
  376. for mathfile in os.listdir(mathdir):
  377. shutil.copyfile(mathdir + mathfile, dst_dir + mathfile)
  378. src_prefix = 'src="'
  379. math_prefix = '_images' + os.sep + 'math' + os.sep
  380. replacement_text = ''.join([src_prefix, conf.MEDIA_URL,
  381. 'comments', os.sep])
  382. html_body = re.sub(src_prefix + math_prefix, replacement_text, html_body)
  383. return html_body
  384. def create_poster(request):
  385. """
  386. Creates a new ``CommentPoster`` object from a web submission, ``request``.
  387. """
  388. p_name = request.POST['name'].strip() or 'Anonymous contributor'
  389. p_email = request.POST['email'].strip()
  390. # The default (unchecked box) is for opt-in = False
  391. p_opted_in = True
  392. try:
  393. # Fails if unchecked (default); succeeds if checked: caught in "else"
  394. p_opted_in = request.POST['updates'] == 'get_updates'
  395. except KeyError:
  396. p_opted_in = False
  397. # Get the poster entry, or create a new one. Always create a new poster
  398. # entry for anonymous posters.
  399. c_IP_address = get_IP_address(request)
  400. c_UA_string = request.META.get('HTTP_USER_AGENT', '')[0:499]
  401. p_default = {'name' : p_name,
  402. 'long_name': p_name + '__' + c_IP_address + '__' + c_UA_string,
  403. 'email': p_email,
  404. 'number_of_approved_comments': 0,
  405. 'avatar_link': '', # add this functionality later?
  406. 'auto_approve_comments': False,
  407. 'opted_in': False}
  408. if p_email:
  409. if p_name.lower() not in ('anonymous', 'anonymous contributor', ''):
  410. p_default['long_name'] = p_name
  411. poster, _ = models.CommentPoster.objects.get_or_create(email=p_email,
  412. defaults=p_default)
  413. poster.opted_in = poster.opted_in or p_opted_in
  414. poster.save()
  415. else:
  416. poster, _ = models.CommentPoster.objects.get_or_create(\
  417. defaults=p_default)
  418. # Change settings for all posters:
  419. if poster.number_of_approved_comments >= conf.number_before_auto_approval:
  420. poster.auto_approve_comments = True
  421. poster.number_of_approved_comments += 1
  422. poster.save()
  423. log_file.info('POSTER: Created/updated poster: ' + str(poster))
  424. return poster
  425. def submit_and_store_comment(request):
  426. """
  427. The user has typed in a comment and previewed it. Now store it and queue
  428. it for approval.
  429. ``Comment`` objects have 3 ForeignKeys which must already exist prior:
  430. * ``page``: the page name on which the comment appears
  431. * ``poster``: an object representing the person making the comment
  432. * ``reference``: a comment reference that facilitates making the comment
  433. """
  434. start_time = time.time()
  435. # Response back to user, if everything goes OK
  436. response = HttpResponse(status=200)
  437. response['Ucomment'] = 'Submission-OK'
  438. try:
  439. html_template = Template(conf.once_submitted_HTML_template)
  440. except TemplateSyntaxError as err:
  441. # Log the error, but don't disrupt the response to the user.
  442. html_template = Template('Thank you for your submission.')
  443. UcommentError(err, "Error in 'once_submitted_HTML_template'.")
  444. # Note about variable names: ``p_`` prefix refers to poster objects in the
  445. # database, while ``c_`` = refers to comment objects.
  446. # ForeignKey: comment object (setup only; will be created at the end)
  447. # --------------------------
  448. success, c_comment_RST = initial_comment_check(request)
  449. if not success:
  450. return c_comment_RST
  451. else:
  452. c_comment_HTML = compile_comment(c_comment_RST)
  453. # Only get the comment reference via its root:
  454. ref = models.CommentReference.objects.filter(\
  455. comment_root=request.POST['comment_root'])
  456. if len(ref):
  457. c_reference = ref[0]
  458. else:
  459. # One possibility to consider is to create the comment reference
  460. # right here. However, it is quite hard to do this properly, because
  461. # do not know all the field properties for a CommentReference object:
  462. # such as the line number, page_link_name, node_type, and others.
  463. # This will only occur in the exceptional case when the document
  464. # has been republished, and the user still has a previous version in
  465. # their browser. Hence the page reload request.
  466. response.write(('<p>A minor error occurred while processing your '
  467. 'comment.<p>The only way to correct it is to reload '
  468. 'the page you are on, and to resubmit your comment. '
  469. '<p>Sorry for the inconvenience.'))
  470. log_file.warn('COMMENT: User posting a comment from an older page.')
  471. return response
  472. # Below the comment reference appears an unique node:
  473. used_nodes = []
  474. c_node = create_codes_ID(conf.short_node_length)
  475. for comment in c_reference.comment_set.all():
  476. used_nodes.append(comment.node)
  477. while c_node in used_nodes:
  478. c_node = create_codes_ID(conf.short_node_length)
  479. # ForeignKey: page object; get the page on which the comment appears
  480. # -----------------------
  481. link_name = convert_web_name_to_link_name(request.POST['page_name'])
  482. c_page = models.Page.objects.filter(link_name=link_name)[0]
  483. # ForeignKey: comment poster objects
  484. # -----------------------------------
  485. poster = create_poster(request)
  486. # We can update the response as soon as we have created the poster object
  487. response.write(html_template.render(settings=conf, poster=poster))
  488. if poster.number_of_approved_comments >= conf.number_before_auto_approval:
  489. c_is_approved = True
  490. c_node_for_RST = c_node
  491. else:
  492. c_node_for_RST = c_node + '*' # indicates comment is not approved yet
  493. # Do all the work here of adding the comment to the RST sources
  494. revision_changeset, c_root = commit_comment_to_sources(\
  495. c_reference,
  496. c_node_for_RST,
  497. update_RST_with_comment)
  498. # NOTE: the line numbers for any comment references that might appear below
  499. # the current comment reference will be incorrect - they will be too
  500. # low. However, their line numbers will be rectified once the
  501. # document is republished (comment references are updated).
  502. # An error occurred:
  503. if revision_changeset == False:
  504. # Technically the submission is NOT OK, but the comment admin has been
  505. # emailed about the problem and can manually enter the comment into
  506. # the database and RST source files.
  507. return response
  508. # Update the ``comment_root_is_used`` field in the comment reference, since
  509. # this root can never be used again.
  510. c_reference.comment_root_is_used = True
  511. # Also update the changeset information. In the future we will update
  512. # comments for this node from this newer repository.
  513. c_reference.revision_changeset = revision_changeset
  514. c_reference.save()
  515. # Create the comment object
  516. c_datetime_submitted = c_datetime_approved = datetime.datetime.now()
  517. c_IP_address = get_IP_address(request)
  518. c_UA_string = request.META.get('HTTP_USER_AGENT', '')[0:499]
  519. c_approval_code = create_codes_ID(conf.approval_code_length)
  520. c_rejection_code = create_codes_ID(conf.approval_code_length)
  521. c_is_approved = c_is_rejected = False
  522. # For now all comments have no parent (they are descendents of the comment
  523. # root). Later, perhaps, we can add threading functionality, so that users
  524. # can respond to previous comments. Then the parent of a new comment will
  525. # be given by: ``c_root + ':' + parent_comment.node``
  526. c_parent = c_root
  527. the_comment, _ = models.Comment.objects.get_or_create(
  528. page = c_page,
  529. poster = poster,
  530. reference = c_reference,
  531. node = c_node,
  532. parent = c_parent,
  533. UA_string = c_UA_string,
  534. IP_address = c_IP_address,
  535. datetime_submitted = c_datetime_submitted,
  536. datetime_approved = c_datetime_approved,
  537. approval_code = c_approval_code,
  538. rejection_code = c_rejection_code,
  539. comment_HTML = c_comment_HTML,
  540. comment_RST = c_comment_RST,
  541. is_rejected = c_is_rejected,
  542. is_approved = c_is_approved)
  543. log_file.info('COMMENT: Submitted comment now saved in the database.')
  544. # Send emails to the poster and comment admin regarding the new comment
  545. # TODO(KGD): queue it
  546. emails_after_submission(poster, the_comment, request)
  547. total_time = str(round(time.time() - start_time, 1))
  548. log_file.info(('COMMENT: Emails to poster and admin sent successfully; '
  549. "returning response back to user's browser. Total time to "
  550. ' process comment = %s secs.') % total_time)
  551. return response
  552. def approve_reject_comment(request, code):
  553. """
  554. Either approves or rejects the comment, depending on the code received.
  555. Approved comments:
  556. - The # part after the comment node is removed in the RST file
  557. - The comment is marked as approved in the database and will appear on
  558. the next page refresh.
  559. - The poster is notified by email (if an email was supplied)
  560. Rejected comments:
  561. - The # part after the comment node is changed to a * in the RST file
  562. - The comment is marked as rejected in the database.
  563. - The poster is notified by email (if an email was supplied)
  564. """
  565. # Response back to user, if everything goes OK
  566. response = HttpResponse(status=200)
  567. approve = models.Comment.objects.filter(approval_code=code)
  568. reject = models.Comment.objects.filter(rejection_code=code)
  569. # Settings used to approve the comment: we remove the '*'
  570. if len(approve) == 1:
  571. verb = 'approved'
  572. symbol = '\*' # escaped, because it will be used in a regular expressn
  573. replace = ''
  574. comment = approve[0]
  575. comment.is_approved = True
  576. comment.is_rejected = False
  577. email_func = email_poster_approved
  578. # Settings used to reject the comment: we change the '*' to a '#'
  579. elif len(reject) == 1:
  580. verb = 'rejected'
  581. symbol = '\*'
  582. replace = '#*'
  583. comment = reject[0]
  584. comment.is_approved = False
  585. comment.is_rejected = True
  586. email_func = email_poster_rejected
  587. # Bad approve/reject code given: don't mention anything; just return a 404.
  588. else:
  589. return HttpResponse('', status=404)
  590. revision_changeset, _ = commit_comment_to_sources(comment.reference,
  591. comment.node,
  592. update_RST_comment_status,
  593. additional={'search': symbol,
  594. 'replace': replace})
  595. if revision_changeset == False:
  596. # An error occurred while committing the comment. An email has already
  597. # been sent. Return a message to the user:
  598. response.write(('An error occurred while approving/rejecting the '
  599. 'comment. Please check the log files and/or email for '
  600. 'the site administrator.'))
  601. return response
  602. comment.reference.comment_root_is_used = True
  603. # Also update the changeset information. In the future we will update
  604. # comments for this node from this newer repository.
  605. comment.reference.revision_changeset = revision_changeset
  606. comment.reference.save()
  607. if verb == 'approved':
  608. comment.poster.number_of_approved_comments += 1
  609. elif verb == 'rejected':
  610. comment.poster.number_of_approved_comments -= 1
  611. comment.poster.number_of_approved_comments = max(0,
  612. comment.poster.number_of_approved_comments)
  613. comment.poster.save()
  614. comment.datetime_approved = datetime.datetime.now()
  615. comment.save()
  616. # Remove the comment count cache for the page on which this comment appears
  617. cache_key = 'counts_for__' + comment.page.link_name
  618. django_cache.cache.delete(cache_key)
  619. # Send an email the comment poster: rejected or approved
  620. email_func(comment.poster, comment)
  621. approve_reject_template = Template(('<pre>'
  622. 'The comment was {{action}}.\n\n'
  623. '\t* Comment root = {{reference.comment_root}}\n'
  624. '\t* Comment node = {{comment.node}}\n'
  625. '\t* At line number = {{reference.line_number}}\n'
  626. '\t* In file name = {{filename}}\n'
  627. '\t* Committed as changeset = {{changeset}}\n\n</pre>'))
  628. output = approve_reject_template.render(action=verb.upper(),
  629. reference = comment.reference,
  630. comment = comment,
  631. filename = os.path.split(\
  632. comment.reference.file_name)[1],
  633. changeset = revision_changeset)
  634. response.write(output)
  635. return response
  636. # Repository manipulation functions
  637. # ---------------------------------
  638. def update_local_repo(rev='tip'):
  639. """
  640. Updates the local repository from the remote repository and must be used
  641. before performing any write operations on files in the repo.
  642. If the local repository does not exist, it will first create a full clone
  643. from the remote repository.
  644. Then it does the equivalent of:
  645. * hg pull (pulls in changes from the remote repository)
  646. * hg update (takes our repo up to tip)
  647. * hg merge (merges any changes that might be required)
  648. Then if the optional input ``rev`` is provided, it will revert the
  649. repository to that revision, given by a string, containing the hexadecimal
  650. indicator for the required revision.
  651. This function returns the hexadecimal changeset for the local repo as
  652. it has been left after all this activity.
  653. """
  654. # First check if the local repo exists; if not, create a clone from
  655. # the remote repo.
  656. try:
  657. try:
  658. ensuredir(conf.local_repo_physical_dir)
  659. except OSError as err:
  660. msg = ('The local repository location does not exist, or cannot '
  661. 'be created.')
  662. raise UcommentError(err, msg)
  663. hex_str = dvcs.get_revision_info()
  664. except dvcs.DVCSError:
  665. try:
  666. dvcs.clone_repo(conf.remote_repo_URL, conf.local_repo_URL)
  667. except dvcs.DVCSError as error_remote:
  668. msg = ('The remote repository does not exist, or is '
  669. 'badly specified in the settings file.')
  670. raise UcommentError(error_remote, msg)
  671. log_file.info('Created a clone of the remote repo in the local path')
  672. # Update the local repository to rev='tip' from the source repo first
  673. try:
  674. dvcs.pull_update_and_merge()
  675. except dvcs.DVCSError as err:
  676. raise UcommentError(err, 'Repository update and merge error')
  677. hex_str = dvcs.get_revision_info()
  678. if rev != 'tip' and isinstance(rev, basestring):
  679. hex_str = dvcs.check_out(rev=rev)
  680. return hex_str
  681. def commit_to_repo_and_push(commit_message):
  682. """
  683. Use this after performing any write operations on files.
  684. Commits to the local repository; pushes updates from the local repository
  685. to the remote repository.
  686. Optionally, it will also update the remote repository to the tip (the
  687. default is not to do this).
  688. Returns the changeset code for the local repo on completion.
  689. """
  690. hex_str = dvcs.commit_and_push_updates(commit_message)
  691. # Merge failed! Log it and email the site admin
  692. if not(hex_str):
  693. raise UcommentError(('Repo merging failed (conflicts?) when trying to '
  694. 'commit. Commit message was: %s' % commit_message))
  695. # Check that changeset and revision matches the remote repo numbers
  696. return hex_str
  697. def commit_comment_to_sources(reference, node, func, additional=None):
  698. """
  699. Commits or updates a comment in the RST sources.
  700. ``reference``: is a comment reference object from the database and tells
  701. us how and where to add the comment
  702. ``node``: is the commment noded (string) that is added to the RST source.
  703. ``func``: does the work of either adding or updating the RST sources.
  704. ``additional``: named keywords and values in a dict that will be passed
  705. to ``func``.
  706. On successful completion it will return:
  707. ``revision_changeset``: string identifier for the updated repository
  708. ``comment_root``: a string of the comment root that was added/updated
  709. """
  710. # This part is sensitive to errors occurring when writing to the
  711. # RST source files.
  712. try:
  713. # Get the RST file to the revision required for adding the comment:
  714. hex_str = update_local_repo(reference.revision_changeset)
  715. f_handle = file(reference.file_name, 'r')
  716. RST_source = f_handle.readlines()
  717. f_handle.close()
  718. # Add the comment to the RST source; send the comment reference
  719. # which has all the necessary input information in it.
  720. try:
  721. if additional == None:
  722. additional = {}
  723. c_root = func(comment_ref = reference,
  724. comment_node = node,
  725. RST_source = RST_source, **additional)
  726. except Exception as err:
  727. # will be caught in outer try-except
  728. # TODO(KGD): test that this works as expected: what happens after?
  729. raise UcommentError(err, ('General error while adding or updating '
  730. 'comment in the RST sources.'))
  731. # Write the update list of strings, RST_source, back to the file
  732. f_handle = file(reference.file_name, 'w')
  733. f_handle.writelines(RST_source)
  734. f_handle.close()
  735. short_filename = os.path.split(reference.file_name)[1]
  736. commit_message = ('COMMIT: Automatic comment [comment_root=%s, '
  737. 'node=%s, line=%s, file=%s]; repo_id=%s') % \
  738. (c_root, node, str(reference.line_number),
  739. short_filename, hex_str)
  740. hex_str = commit_to_repo_and_push(commit_message)
  741. log_file.info(commit_message)
  742. return hex_str, c_root
  743. except (UcommentError, dvcs.DVCSError) as err:
  744. UcommentError(err)
  745. return False, False
  746. def update_RST_with_comment(comment_ref, comment_node, RST_source):
  747. """
  748. Appends the ``comment_node`` string (usually a 2-character string), to the
  749. appropriate line in the RST_source (a list of strings).
  750. ``comment_ref`` provides the line number and node type which will be
  751. commented. We always use an existing comment root, if there is one,
  752. otherwise we create a new comment root according to these rules:
  753. Paragraphs, titles, literal_block (source code blocks), tables and figures
  754. will have their ucomment appended in the second blank line after the node.
  755. List items (bullet points and ordered lists) will have their ucomment
  756. appended in the very next line, always.
  757. In all cases, an existing ucomment directive for that node will be searched
  758. for and added to.
  759. The simplest example possible: comment_node='2s' and comment_root='sR4fa4':
  760. RST_source before (1 line)
  761. Here is a paragraph of text.
  762. RST_source after (3 lines):
  763. Here is a paragraph of text.
  764. .. ucomment:: sR4fa4: 2s
  765. Output and side effects:
  766. * Returns the comment_root as the only output.
  767. * Modifies the list of strings, ``RST_source`` in place.
  768. """
  769. # We can only handle these nodes: anything else will raise an error
  770. KNOWN_NODES = set(('paragraph', 'title', 'literal_block', 'table',
  771. 'image', 'list_item', 'displaymath'))
  772. # Regular expression, which when matched AT THE START OF A LINE, indicate
  773. # list items in the RST syntax. See the full RST specification:
  774. # http://docutils.sourceforge.net/docs/user/rst/quickstart.html
  775. RST_LIST_ITEMS_AS_RE = re.compile(r'''(\s*)
  776. ( # group all list item types
  777. (\-)|(\*)|(\+)| # any bullet list items
  778. (\#\.)| # auto-enumerate: "#."
  779. (\w*\.)| # enumerated: "12." or "A." or "i."
  780. (\(\w*\))| # enumerated: "(23)" or "(B)"
  781. (\w\)) # enumerated: "12)" or "iii)"
  782. ) # end of all list item types
  783. (?P<space>\s*) # catch trailing spaces at end.''', \
  784. re.X)
  785. # Maps ``node_type`` to RST directives
  786. NODE_DIRECTIVE_MAP = {'displaymath': ['math'],
  787. 'image': ['image', 'figure'],
  788. 'table': ['table', 'csv-table', 'list_table'],
  789. 'literal_block': ['code-block', 'literalinclude'],}
  790. # Nodes given by the ``keys`` in NODE_DIRECTIVE_MAP allow blank lines
  791. # within in their content, so determining where to place the ucomment
  792. # directive cannot rely purely on finding the next blank line. Example:
  793. #
  794. # |Before |After
  795. # |--------------------------------------------------------------
  796. # 1 |.. figure:: the_figure.png |.. figure:: the_figure.png
  797. # 2 | :scale: 100% | :scale: 100%
  798. # 3 | |
  799. # 4 | Figure caption goes here. | Figure caption goes here.
  800. # 5 | |
  801. # 6 |Next paragraph begins here. |..ucomment:: ABCDEF: 2b
  802. # 7 | |
  803. # 8 | |Next paragraph begins here.
  804. #
  805. # It would be wrong to place the ucomment directive at/around line 3
  806. # as this would cut of the figure's caption. What we do instead is to
  807. # find the end of the node and insert the comment at that point.
  808. def wrap_return(RST_source, insert, prefix):
  809. """
  810. Adds the new ucomment directive and return the updated RST_source,
  811. or, appends to the existing comment.
  812. """
  813. if prefix is None:
  814. comment = RST_source[insert]
  815. c_root = comment.strip()[dir_len+1:dir_len+1+conf.root_node_length]
  816. # Always add a comma after the last comment
  817. if not comment.rstrip().endswith(','):
  818. suffix = ', '
  819. else:
  820. suffix = ' '
  821. RST_source[insert] = comment[0:-1] + suffix + comment_node + ',\n'
  822. else:
  823. c_root = comment_ref.comment_root
  824. line_to_add = prefix + COMMENT_DIRECTIVE + ' ' + c_root + ': ' + \
  825. comment_node + ',\n'
  826. if comment_ref.node_type in KNOWN_NODES:
  827. RST_source.insert(insert, '\n')
  828. RST_source.insert(insert, line_to_add)
  829. # Pop off the last line of text that was artifically added.
  830. RST_source.pop()
  831. return c_root
  832. # A few corner cases are solved if we ensure the file ends with a blank line
  833. if RST_source[-1].strip() != '':
  834. RST_source.append('\n')
  835. # Force an unrelated line at the end of the file to avoid coding
  836. # specifically for end-effects.
  837. RST_source.extend(['___END__OF___FILE___\n'])
  838. # The comment reference line numbers are 1-based; we need 0-based numbers
  839. line_num = comment_ref.line_number - 1
  840. dir_len = len(COMMENT_DIRECTIVE)
  841. if comment_ref.node_type not in KNOWN_NODES:
  842. raise UcommentError('Unknown node type: "%s"' % str(comment_ref))
  843. # To find any spaces at the start of a line
  844. prefix_re = re.compile('^\s*')
  845. prefix_match = prefix_re.match(RST_source[line_num])
  846. prefix = prefix_match.group()
  847. # There is one exception though: source code blocks marked with '::'
  848. # While the node's line number refers to the first line of code, the correct
  849. # prefix is the amount of space at the start of the line containing the '
  850. # double colons. Search backwards to find them.
  851. # If we can't find them, then this literal_block is assumed to be a
  852. # 'code-block', or 'literalinclude' node (i.e. no double colons).
  853. if comment_ref.node_type == 'literal_block':
  854. double_colon = re.compile(r'(?P<space>\s*)(.*)::(\s*)')
  855. directive_re = r'^(\s*)\.\. '
  856. for directive in NODE_DIRECTIVE_MAP['literal_block']:
  857. directive_re += '(' + directive + ')|'
  858. directive_re = directive_re[0:-1] + r'::(\s*)(.*)'
  859. directive = re.compile(directive_re)
  860. double_colon_line = line_num + 1
  861. for line in RST_source[line_num::-1]:
  862. double_colon_line -= 1
  863. if directive.search(line):
  864. break # it is one of the other directives
  865. if double_colon.match(line):
  866. prefix = double_colon.match(line).group('space')
  867. # Do some surgery on the RST source if the author is using
  868. # double colons. By example
  869. #
  870. # |Below is some code, the line ends with a double colon::
  871. # |
  872. # | >>> a = 'some source code'
  873. # |
  874. # Replace it as follows.
  875. #
  876. # |Below is some code, the line ends with a double colon:
  877. # |
  878. # |::
  879. # |
  880. # | >>> a = 'some source code'
  881. # |
  882. if line.strip() != '::':
  883. dci = line.rindex('::')
  884. RST_source[double_colon_line] = line[0:dci] + line[dci+1:]
  885. RST_source.insert(double_colon_line+1, '\n')
  886. RST_source.insert(double_colon_line+2, prefix + '::\n')
  887. # This will get saved in ``submit_and_store_comment(...)``
  888. if not isinstance(comment_ref, tuple):
  889. # Minor technicality: we used named tuples in unit tests
  890. comment_ref.line_number = comment_ref.line_number + 2
  891. line_num += 2
  892. break
  893. # The point where the ucomment directive will be inserted
  894. insert = line_num + 1
  895. # We are *always* given the top line number of the node: so we only have to
  896. # search for the insertion point below that. We will start examining from
  897. # the first line below that.
  898. finished = False
  899. next_line = ''
  900. idx_next = 0
  901. for idx, line in enumerate(RST_source[line_num+1:]):
  902. insert += 1 # insert = line_num + idx + 1
  903. bias = idx + 2
  904. if line.strip() == '' or comment_ref.node_type == 'list_item':
  905. if comment_ref.node_type == 'list_item':
  906. bias -= 1
  907. # Keep looking further down for an existing ucomment directive
  908. for idx_next, next_line in enumerate(RST_source[line_num+bias:]):
  909. if next_line.strip() != '':
  910. if next_line.lstrip()[0:dir_len] == COMMENT_DIRECTIVE:
  911. insert = line_num + bias + idx_next
  912. prefix = None
  913. return wrap_return(RST_source, insert, prefix)
  914. finished = True
  915. break
  916. if finished:
  917. next_prefix = prefix_re.match(next_line.rstrip('\n')).group()
  918. # Certain nodes cannot rely purely on blank lines to mark their end
  919. if comment_ref.node_type in NODE_DIRECTIVE_MAP.keys():
  920. # Break if a non-blank line has the same, or lower indentation
  921. # level than the environment's level (``prefix``)
  922. if len(next_prefix.expandtabs()) <= len(prefix.expandtabs()):
  923. break
  924. else:
  925. finished = False
  926. # ``list_item``s generally are commented on the very next line, but
  927. # first ensure the next line is in fact another list_item.
  928. # If the next line is a continuation of the current list_item, then
  929. # set ``finished`` to False, and keep searching.
  930. # blank or not, but
  931. elif comment_ref.node_type == 'list_item':
  932. cleaned = next_line[prefix_match.end():]
  933. # Most list items will break on this criterion (that the next
  934. # line contains a list item)
  935. if RST_LIST_ITEMS_AS_RE.match(cleaned):
  936. insert = insert + (bias - 2) + idx_next
  937. # Subtract off extra lines to handle multiline items
  938. if bias > 1:
  939. insert -= (bias -1)
  940. break
  941. # but the final entry in a list will break on this criterion
  942. elif len(next_prefix.expandtabs()) <= len(prefix.expandtabs()):
  943. #insert = insert - 1 # commented out: passes bullet_11
  944. break
  945. # It wasn't really the end of the current item (the one being
  946. # commented on). It's just that this item is written over
  947. # multiple lines.
  948. else:
  949. finished = False
  950. else:
  951. break
  952. # Lastly, list items require a bit more work to handle. What we want:
  953. #
  954. # |Before |After
  955. # |--------------------------------------------------------------
  956. # |#. This is a list item |#. This is a list item
  957. # |<no blank line originally here> | .. ucomment:: ABCDEF: 2a,
  958. # |#. Next list item |#. Next list item
  959. if comment_ref.node_type == 'list_item':
  960. # list_item's need to have a different level of indentation
  961. # If the ``RST_source[line_num]`` is ``____#.\tTwo.\n``, then
  962. # (note that _ represents a space)
  963. # * remainder = '#.\tTwo.\n' i.e. removed upfront spaces
  964. # * the_list_item = '#.' i.e. what defines the list
  965. # * prefix = '______\t' i.e. what to put before '.. ucomment'
  966. # * list_item.group('space')='\t' i.e. the tab that appears after '#.'
  967. remainder = RST_source[line_num][prefix_match.end():]
  968. list_item = RST_LIST_ITEMS_AS_RE.match(remainder)
  969. the_list_item = list_item.group().rstrip(list_item.group('space'))
  970. prefix = prefix + ' ' * len(the_list_item) + list_item.group('space')
  971. c_root = wrap_return(RST_source, insert, prefix)
  972. # There was no spaced between the list_items
  973. if idx_next == 0 and finished:
  974. RST_source.insert(insert, '\n')
  975. return c_root
  976. else:
  977. return wrap_return(RST_source, insert, prefix)
  978. def update_RST_comment_status(comment_ref, comment_node, RST_source, \
  979. search, replace):
  980. """
  981. Searches for the existing ``comment_ref`` and ``comment_node`` in the list
  982. of strings given by ``RST_source``. Finds the ``search`` character and
  983. replaces it with the ``replace`` character (indicating whether the comment
  984. was approved or rejected).
  985. The ``RST_source`` is a list of strings; this list will be updated in place.
  986. """
  987. comment_re = re.compile(r'(\s*)\.\. ' + COMMENT_DIRECTIVE.strip(' .:') + \
  988. r'::(\s*)(?P<root>\w*)(\s*):(\s*)(?P<nodes>.+)')
  989. idx = -1
  990. line = ''
  991. for idx, line in enumerate(RST_source[comment_ref.line_number-1:]):
  992. rematch = comment_re.match(line)
  993. if rematch:
  994. if rematch.group('root') == comment_ref.comment_root:
  995. break
  996. if idx < 0:
  997. log_file.error(('Comment (%s) was to be changed, but was not found in'
  998. 'the RST_sources.') % str(comment_ref))
  999. return RST_source
  1000. # We have the correct node. Now edit the code.
  1001. nodes = rematch.group('nodes')
  1002. nodes = re.sub(comment_node + search, comment_node + replace, nodes)
  1003. to_replace = line[0:rematch.start('nodes')] + nodes
  1004. RST_source[comment_ref.line_number - 1 + idx] = to_replace + '\n'
  1005. return comment_ref.comment_root
  1006. # Emailing functions
  1007. # ------------------
  1008. def send_email(from_address, to_addresses, subject, message):
  1009. """
  1010. Basic function to send email according to the four required string inputs.
  1011. Let Django send the message; it takes care of opening and closing the
  1012. connection, as well as locking for thread safety.
  1013. """
  1014. if subject and message and from_address:
  1015. try:
  1016. send_mail(subject, message, from_address, to_addresses,
  1017. fail_silently=True)
  1018. except (BadHeaderError, smtplib.SMTPException) as err:
  1019. # This will log the error, and hopefully email the admin
  1020. UcommentError(err, 'When sending email')
  1021. except Exception as err:
  1022. # Only log the error, incase we are returned back here
  1023. log_file.error('EMAIL ERROR: ' + str(err))
  1024. log_file.info('EMAIL: sent to: ' + ', '.join(to_addresses))
  1025. def email_poster_pending(poster, comment):
  1026. """ Sends an email to the poster to let them know their comment is in the
  1027. queue for approval. Give a time-frame, and tell them the number of comments
  1028. left before their future comments are automatically approved.
  1029. """
  1030. try:
  1031. pending_template = Template(conf.once_submitted_template)
  1032. except TemplateSyntaxError as err:
  1033. # Log the error, but don't email the poster.
  1034. UcommentError(err, "Error in 'once_submitted_template'.")
  1035. return
  1036. message = pending_template.render(settings=conf, poster=poster,
  1037. comment=comment)
  1038. if conf.once_submitted_subject:
  1039. send_email(from_address = conf.email_from,
  1040. to_addresses = [poster.email],
  1041. subject = conf.once_submitted_subject,
  1042. message = message)
  1043. def email_poster_approved(poster, comment):
  1044. """ Sends an email to the poster to let them know their comment has been
  1045. approved. Give a link?
  1046. """
  1047. try:
  1048. approved_template = Template(conf.once_approved_template)
  1049. except TemplateSyntaxError as err:
  1050. # Log the error, but don't email the poster.
  1051. UcommentError(err, "Error in 'once_approved_template'.")
  1052. return
  1053. message = approved_template.render(settings = conf,
  1054. poster = poster,
  1055. comment = comment)
  1056. send_email(from_address = conf.email_from,
  1057. to_addresses = [poster.email],
  1058. subject = conf.once_approved_template,
  1059. message = message)
  1060. def email_approval_confirmation(poster, comment, web_root):
  1061. """
  1062. Sends email to the ``conf.email_comment_administrators_list`` with special
  1063. links to either approve or reject a new comment.
  1064. """
  1065. try:
  1066. approval_template = Template(conf.email_for_approval)
  1067. except TemplateSyntaxError as err:
  1068. # Log the error, but send a bare-bones email
  1069. UcommentError(err, "Error in 'comment-approved' template")
  1070. approval_template = Template(('THIS IS A DEFAULT EMAIL.\n\n'
  1071. 'A new comment was received on your '
  1072. 'ucomment-enabled website.\n\nHowever an error in your settings '
  1073. 'template ("email_for_approval" template) prevented a correctly '
  1074. 'formatted email from being sent to you. Please check your '
  1075. 'template settings carefully.\n\nThe comment was '
  1076. 'recorded in the database.\n\nClick this link to ACCEPT the '
  1077. 'comment: {{comment.approval_code}}\nTo REJECT the comment, click '
  1078. 'here: {{comment.rejection_code}}\n'))
  1079. comment.approval_code = web_root + '_approve-or-reject/' + \
  1080. comment.approval_code
  1081. comment.rejection_code = web_root + '_approve-or-reject/' + \
  1082. comment.rejection_code
  1083. msg = approval_template.render(email_from = conf.email_from,
  1084. poster = poster,
  1085. comment = comment,
  1086. reference = comment.reference,
  1087. webpage = web_root + comment.page.link_name,
  1088. settings = conf)
  1089. send_email(from_address = conf.email_from,
  1090. to_addresses = conf.email_comment_administrators_list,
  1091. subject = conf.email_for_approval_subject + ': %s, node %s' %\
  1092. (comment.reference.comment_root, comment.node),
  1093. message = msg)
  1094. def email_poster_rejected(poster, extra_info=''):
  1095. """
  1096. Sends the poster an email saying their comment was not suitable. If any
  1097. extra text is provided in ``extra_info``, add that to the email.
  1098. """
  1099. # TODO(KGD): add function to reject posting
  1100. pass
  1101. def alert_system_admin(error_msg):
  1102. """ An error occurred: more information contained in the ``error`` string.
  1103. Send an email to the comment administrator.
  1104. """
  1105. msg = ('The following error was logged on your ucomment-enabled '
  1106. 'website at %s: \n\n') % (str(datetime.datetime.now()))
  1107. msg = msg + str(error_msg)
  1108. send_email(conf.email_from, conf.email_system_administrators,
  1109. conf.email_system_administrators_subject, msg)
  1110. def emails_after_submission(poster, comment, request):
  1111. """ Email the poster once the comment has been submitted to the website,
  1112. """
  1113. # Don't bother if no email address
  1114. if poster.email != '':
  1115. # The comment was auto-approved
  1116. if comment.is_approved:
  1117. email_poster_approved(poster, comment)
  1118. # The comment is waiting for approval
  1119. else:
  1120. email_poster_pending(poster, comment)
  1121. # Let the comment administrator know
  1122. email_approval_confirmation(poster, comment, get_site_url(request,
  1123. add_path=False, add_views_prefix=True))
  1124. # Web output functions (HTTP and XHR)
  1125. # -----------------------------------
  1126. def render_page_for_web(page, request, search_value=''):
  1127. """
  1128. Renders a ``page`` object to be displayed in the user's browser.
  1129. We must supply the original ``request`` object so we can add a CSRF token.
  1130. The optional ``search_value`` gives a string with which to pre-fill the
  1131. search box.
  1132. """
  1133. try:
  1134. toc_page = models.Page.objects.filter(is_toc=True).filter(
  1135. prev_link=None)[0]
  1136. toc_link = models.Link(link=django_reverse('ucomment-root'),
  1137. title='Table of contents')
  1138. except IndexError:
  1139. # We only reach here if there is no TOC page in the DB.
  1140. toc_page = ''
  1141. toc_link = models.Link()
  1142. # Build up the navigation links: e.g. "Previous|Up|Table of Contents|Next"
  1143. try:
  1144. nav_template = Template(conf.html_navigation_template.replace('\n',''))
  1145. except TemplateSyntaxError as err:
  1146. # Log the error, but don't disrupt the response to the user.
  1147. UcommentError(err, 'Error in the page navigation template.')
  1148. nav_template = Template('')
  1149. nav_links = nav_template.render(prev=page.prev_link, next=page.next_link,
  1150. parent=page.parent_link, home=toc_link)
  1151. root_link = models.Link.objects.filter(link = '___TOC___')[0]
  1152. root_link.link = toc_link.link
  1153. page_body = ''.join(['\n<!-- page output starts -->\n',
  1154. page.body,
  1155. '<!-- /page output ends -->\n'])
  1156. # Replace any {{IMAGE_LOCATION}} markers in the page.body with a direct
  1157. # link to the media.
  1158. page_body = page_body.replace(r'src="/{{IMAGE_LOCATION}}/',
  1159. r'src="' + conf.media_url)
  1160. # If user is visiting TOC, but is being referred, show where they came from:
  1161. full_referrer = request.META.get('HTTP_REFERER', '')
  1162. log_file.debug('REFERER = %s' % full_referrer)
  1163. referrer_str = ''
  1164. if full_referrer:
  1165. # First, make sure the referrer is hosted on the same website as ours
  1166. if full_referrer.find(request.get_host()) > 0:
  1167. current_URL = request.build_absolute_uri()
  1168. referrer = full_referrer.split(current_URL)
  1169. if len(referrer) > 1:
  1170. referrer_str = referrer[1]
  1171. else:
  1172. referrer_str = referrer[0]
  1173. else:
  1174. referrer = []
  1175. else:
  1176. referrer = []
  1177. if page.is_toc and referrer:
  1178. if page == toc_page and len(referrer) == 1:
  1179. # Strip off the last part of ``current_URL`` and the rest is the
  1180. # base part of the hosting website.
  1181. idx = 0
  1182. for idx, part in enumerate(reversed(current_URL.split('/'))):
  1183. if part != '':
  1184. break
  1185. break_off = '/'.join(current_URL.split('/')[0:-idx-1])
  1186. referrer_str = referrer_str.lstrip(break_off)
  1187. # We are coming from a link "Up one level"
  1188. elif len(referrer) == 2:
  1189. pass
  1190. try:
  1191. # At most one split: parts of the referrer can appear multiple time,
  1192. # so only split once.
  1193. before, after = page_body.split(referrer_str, 1)
  1194. except ValueError:
  1195. # "referrer" not in the page_body (happens when going to the TOC
  1196. # from the "Search" page.
  1197. pass
  1198. else:
  1199. breakpoint = after.find('</a>')
  1200. prefix = after[0:breakpoint]
  1201. suffix = after[breakpoint+4:]
  1202. to_add = ('</a><span id="ucomment-toc-referrer">&lArr; You arrived '
  1203. 'from here</span>')
  1204. page_body = before + referrer_str + prefix + to_add + suffix
  1205. # Create a page hit entry in the database
  1206. page_hit = models.Hit(UA_string = request.META.get('HTTP_USER_AGENT', ''),
  1207. IP_address = get_IP_address(request),
  1208. page_hit = page.html_title, # was ``page.link_name``
  1209. referrer = referrer_str or full_referrer)
  1210. page_hit.save()
  1211. # Was highlighting requested?
  1212. highlight = request.GET.get('highlight', '')
  1213. if highlight:
  1214. search_value = highlight
  1215. # Apply highlighting, using <span> elements
  1216. with_case = request.GET.get('with_case', 'False')
  1217. with_case = with_case.lower() == 'true'
  1218. highlight = [word for word in highlight.split() \
  1219. if (word not in STOP_WORDS and len(word)>3)]
  1220. highlight = []
  1221. # TODO(KGD): turn highlighting off, tentatively. Note that the
  1222. # highlighting code below can break the HTML by inserting span
  1223. # elements, especially if the search term is likely an HTML element
  1224. # in the ``page_body``. Mitigated somewhat by the fact that we search
  1225. # only for whole words.
  1226. for word in highlight:
  1227. if with_case:
  1228. word_re = re.compile(r'\b(%s)\b'%word, re.U + re.L)
  1229. else:
  1230. word_re = re.compile(r'\b(%s)\b'%word, re.I + re.U + re.L)
  1231. page_body = word_re.sub(
  1232. r'<span id="ucomment-highlight-word">\1</span>', page_body)
  1233. # Build up the navigation links: e.g. "Previous|Up|Table of Contents|Next"
  1234. try:
  1235. local_toc_template = Template(conf.side_bar_local_toc_template)
  1236. except TemplateSyntaxError as err:
  1237. # Log the error, but don't disrupt the response to the user.
  1238. UcommentError(err, 'Error in the local sidebar TOC template.')
  1239. local_toc_template = Template('')
  1240. # Modify the page's local TOC to strip out the rendundant one-and-only <li>
  1241. # Render this local TOC to display in the sidebar.
  1242. if page.local_toc.strip() != '':
  1243. local_toc = page.local_toc
  1244. keepthis = re.match('^<ul>(.*?)<li>(.*?)<ul>(?P<keepthis>.*)',local_toc,
  1245. re.DOTALL)
  1246. cleanup = None
  1247. if keepthis:
  1248. keepthis = keepthis.group('keepthis')
  1249. cleanup = re.match('(?P<keepthis>.*?)</ul></li></ul>$',
  1250. keepthis.replace('\n',''))
  1251. if cleanup:
  1252. # This additional cleanup only works on document where we are
  1253. # splitting the major sections across multiple HTML pages.
  1254. page.local_toc = cleanup.group('keepthis')
  1255. sidebar_local_toc = local_toc_template.render(page=page)
  1256. # If the page is the main TOC, and user option is set, then style the <li>
  1257. # so they can expand (uses Javascript). Will still display the page
  1258. # properly even if there is no Javascript.
  1259. if page == toc_page:
  1260. page_body = re.sub('toctree-l1', 'ucomment-toctree-l1', page_body)
  1261. css_body_class = 'ucomment-root'
  1262. else:
  1263. css_body_class = 'ucomment-page'
  1264. # Finally, send this all off to the template for rendering.
  1265. # NOTE: any additional fields added to this dictionary must also be added
  1266. # to the named tuple for the search page: ``def search_document(...)``
  1267. page_content = {'html_title': page.html_title,
  1268. 'body_html': page_body,
  1269. 'nav_links': nav_links,
  1270. 'root_link': root_link,
  1271. 'stylesheet_link': conf.stylesheet_link,
  1272. 'prefix_html': conf.html_prefix_text,
  1273. 'suffix_html': conf.html_suffix_text,
  1274. 'search_value': search_value,
  1275. 'local_TOC': sidebar_local_toc,
  1276. 'sidebar_html': page.sidebar,
  1277. 'css_body_class': css_body_class,
  1278. 'about_commenting_system': conf.html_about_commenting,
  1279. 'page_hits': page.number_of_HTML_visits,
  1280. 'updated_on': page.updated_on}
  1281. page_content.update(csrf(request)) # Handle the search form's CSRF
  1282. # TODO(KGD): redirect to /_search/search terms/AND/True if required
  1283. return render_to_response('document-page.html', page_content)
  1284. def display_page(page_requested):
  1285. """
  1286. Displays the HTML for a page, including all necessary navigation links.
  1287. Must also handle the case of http://example.com/page#subsection to got
  1288. to subsection links within a page.
  1289. """
  1290. start_time = time.time()
  1291. link_name = convert_web_name_to_link_name(page_requested.path)
  1292. log_file.debug('Page requested = %s' % page_requested.path)
  1293. ip_address = get_IP_address(page_requested)
  1294. item = models.Page.objects.filter(link_name=link_name)
  1295. if not item:
  1296. # Requested the master_doc (main document)
  1297. if link_name == '':
  1298. toc_page = models.Page.objects.filter(is_toc=True).filter(\
  1299. prev_link=None)
  1300. if toc_page:
  1301. resp = django_reverse('ucomment-root') + toc_page[0].link_name
  1302. return HttpResponseRedirect(resp)
  1303. else:
  1304. emsg = ('A master page does not exist in the ucomment database.'
  1305. '<p>Have you correctly specified the settings in this '
  1306. 'file? <pre>%sconf/settings.py</pre>'
  1307. '<p>Also visit <a href="%s">the administration page</a>'
  1308. ' to start compiling your document.' % (
  1309. conf.application_path,
  1310. django_reverse('ucomment-admin-signin')))
  1311. return HttpResponse(emsg)
  1312. elif models.Page.objects.filter(link_name=link_name + '/index'):
  1313. # To accommodate a peculiar Sphinx settings for PickleHTMLBuilder
  1314. # It may lead to broken links for images that are included on
  1315. # this page.
  1316. item = models.Page.objects.filter(link_name=link_name + '/index')
  1317. else:
  1318. # TODO(KGD): return a 404 page template: how to do that?
  1319. log_file.debug('Unknown page requested "%s" from %s. Full request '
  1320. 'was %s' % (link_name, ip_address,
  1321. page_requested.path))
  1322. return HttpResponse('Page not found', status=404)
  1323. page = item[0]
  1324. page.number_of_HTML_visits += 1
  1325. page.save()
  1326. result = render_page_for_web(page, page_requested)
  1327. log_file.info('REQUEST: page = %s from IP=%s; rendered in %f secs.' % (
  1328. link_name, ip_address, time.time()-start_time))
  1329. return result
  1330. def format_comments_for_web(comments):
  1331. """
  1332. Received a list of comment objects from the database; must format these
  1333. comments into appropriate HTML string output to be rendered in the browser.
  1334. """
  1335. resp = ''
  1336. for item in comments:
  1337. if not(item.is_approved):
  1338. continue
  1339. date_str = item.datetime_submitted.strftime("%Y-%m-%d at %H:%M")
  1340. resp += '<li id="%s"><dl><dt><span id="ucomment-author">%s</span>' \
  1341. % (item.node, item.poster.name)
  1342. resp += (('<span class="ucomment-meta">%s</span></dt>'
  1343. '<dd>%s</dd></dl></li>') % (date_str, item.comment_HTML))
  1344. resp += '\n'
  1345. return resp
  1346. def retrieve_comment_HTML(request):
  1347. """
  1348. Retrieves any comments associated with a comment root and returns the
  1349. HTML in a JSON container back to the user.
  1350. http://www.b-list.org/weblog/2006/jul/31/django-tips-simple-ajax-example-part-1/
  1351. """
  1352. if request.method == 'POST':
  1353. # If comment reading/writing is disabled: return nothing
  1354. if not(conf.enable_comments):
  1355. return HttpResponse('', status=200)
  1356. root = request.POST.get('comment_root', '')
  1357. sort_order = request.POST.get('order', 'forward')
  1358. response = ''
  1359. ref = models.CommentReference.objects.filter(comment_root=root)
  1360. if len(ref):
  1361. ref = ref[0]
  1362. associated_comments = ref.comment_set.order_by("datetime_submitted")
  1363. if sort_order == 'reverse':
  1364. associated_comments = reversed(associated_comments)
  1365. response = format_comments_for_web(associated_comments)
  1366. log_file.info('COMMENT: Request HTML for %s from IP=%s' %\
  1367. (root, get_IP_address(request)))
  1368. return HttpResponse(response, status=200)
  1369. else:
  1370. log_file.warn(('A user requested comment reference = %s which did '
  1371. 'exist; this is not too serious; you have probably '
  1372. 'just updated the document and they are accessing '
  1373. 'a prior version.') % root)
  1374. return HttpResponse('', status=200)
  1375. elif request.method == 'GET':
  1376. return HttpResponse('N/A', status=404)
  1377. else:
  1378. log_file.warn((request.method + ' method for comment HTML received; '
  1379. 'not handled; return 400.'))
  1380. return HttpResponse(status=400)
  1381. def retrieve_comment_counts(request):
  1382. """
  1383. Given the list of nodes, it returns a list with the number of comments
  1384. associated with each node.
  1385. """
  1386. start_time = time.time()
  1387. def process_counts(comment_roots, cache_key):
  1388. """
  1389. Accepts a list of comment_roots and populates the ``response_dict``
  1390. with the number of comments associated with each ``comment_root``.
  1391. Also supply a ``cache_key`` so that counts can be cached for a while.
  1392. """
  1393. response_dict = {}
  1394. try:
  1395. if not conf.enable_comments:
  1396. return HttpResponse(simplejson.dumps(response_dict),
  1397. mimetype='application/javascript')
  1398. if cache_key in django_cache.cache:
  1399. log_file.info('COUNTS: returned cached result.')
  1400. response_dict = django_cache.cache.get(cache_key)
  1401. else:
  1402. for key in comment_roots:
  1403. num = 0
  1404. ref = models.CommentReference.objects.filter(\
  1405. comment_root=key)
  1406. if len(ref) > 0:
  1407. ref = ref[0]
  1408. associated_comments = ref.comment_set.all()
  1409. for comment in associated_comments:
  1410. if comment.is_approved:
  1411. num += 1
  1412. # Every key must return a result, even if it is zero
  1413. response_dict[key] = num
  1414. log_file.debug('COUNTS: for %d nodes retrieved in %f secs' %\
  1415. (len(comment_roots), time.time()-start_time))
  1416. # Should we cache the result for future?
  1417. if (time.time()-start_time) > conf.cache_count_duration:
  1418. django_cache.cache.set(cache_key, response_dict,
  1419. timeout=conf.cache_count_timout)
  1420. log_file.info('COUNTS: %s will be cached for %f secs.' % \
  1421. (cache_key, conf.cache_count_timout))
  1422. except Exception as err:
  1423. # only log the error, don't break the app
  1424. UcommentError(err, 'While retrieving comment counts')
  1425. return response_dict
  1426. log_file.debug('COUNTS: request received with method = %s' % request.method)
  1427. if request.method == 'POST':
  1428. comment_roots = sorted(request.POST.keys())
  1429. comment_roots.pop(comment_roots.index('_page_name_'))
  1430. cache_key = 'counts_for__' + convert_web_name_to_link_name(
  1431. request.POST.get('_page_name_', ''))
  1432. response_dict = process_counts(comment_roots, cache_key)
  1433. return HttpResponse(simplejson.dumps(response_dict),
  1434. mimetype='application/javascript')
  1435. elif request.method == 'GET':
  1436. return HttpResponse('N/A', status=404)
  1437. else:
  1438. log_file.info((request.method + ' method for comment counts '
  1439. 'received; not handled; return 400.'))
  1440. return HttpResponse(status=400)
  1441. def retrieve_page_name(request):
  1442. """
  1443. Returns the page title given the page hyperlink in the request (POST), it .
  1444. """
  1445. # TODO(KGD): add section name and sub-page name
  1446. # e.g. Design and analysis of experiments: Fractional factorial designs
  1447. if request.method == 'POST':
  1448. page_name = request.POST.get('_page_name_', '')
  1449. link_name = convert_web_name_to_link_name(page_name)
  1450. item = models.Page.objects.filter(link_name=link_name)
  1451. if not item:
  1452. # Requested the master_doc (main document)
  1453. if link_name == '':
  1454. toc_page = models.Page.objects.filter(is_toc=True).filter(\
  1455. prev_link=None)
  1456. item = toc_page
  1457. #elif models.Page.objects.filter(link_name=link_name + '/index'):
  1458. ## To accommodate a peculiar Sphinx settings for PickleHTMLBuilder
  1459. ## It may lead to broken links for images that are included on
  1460. ## this page.
  1461. #item = models.Page.objects.filter(link_name=link_name + '/index')
  1462. else:
  1463. log_file.debug('NAME request: unknown page "%s"' % link_name)
  1464. return HttpResponse('', status=404)
  1465. page = item[0]
  1466. log_file.debug('NAME request = %s; returned %s' % (link_name, page_name))
  1467. return HttpResponse(page.html_title)
  1468. elif request.method == 'GET':
  1469. return HttpResponse('', status=404)
  1470. else:
  1471. log_file.info((request.method + ' method for comment counts '
  1472. 'received; not handled; return 400.'))
  1473. return HttpResponse(status=400)
  1474. # Publishing update functions
  1475. # ----------------------------
  1476. def publish_update_document(request):
  1477. """
  1478. After pages have been remotely updated and checked back in; the author
  1479. must trigger an update. This compiles the code to HTML for the changed
  1480. files, created database entries for each node, regenerates the PDF output.
  1481. """
  1482. if not request.user.is_authenticated():
  1483. return HttpResponseRedirect(django_reverse('ucomment-admin-signin'))
  1484. msg = call_sphinx_to_publish()
  1485. # An empty message, msg, indicates no problems. Any problems that may
  1486. # have occurred have already been emailed and logged to the admin user.
  1487. if msg:
  1488. return HttpResponse(msg, status=404)
  1489. # TODO(KGD): Convert any changed images to JPG from PNG.
  1490. # Compile PDF here, or even earlier.
  1491. # Update search index tables in Sphinx search
  1492. msg = 'PUBLISH: Update and publish operation successfully completed'
  1493. log_file.info(msg)
  1494. msg += ('<br><p>View your document <a href="%s">from this link</a>'
  1495. '.</p>') % (django_reverse('ucomment-root'))
  1496. return HttpResponse(msg, status=200)
  1497. def call_sphinx_to_publish():
  1498. """ Does the work of publishing the latest version of the document.
  1499. Pulls in the latest revision from the DVCS, publishes the document.
  1500. """
  1501. # TODO(KGD): can we show a list of changed files to the author before
  1502. # s/he clicks "Publish": you will have to dig into Sphinx's
  1503. # internals to see that.
  1504. revision_changeset = update_local_repo()
  1505. log_file.info('PUBLISH: the document with revision changeset = %s' % \
  1506. revision_changeset)
  1507. # Copy over the ucomment extension to the local repo: that way the author
  1508. # does not have to include it in his/her repo of the document.
  1509. srcdir = os.path.join(conf.application_path, 'sphinx-extensions') + os.sep
  1510. if os.name == 'posix':
  1511. try:
  1512. os.symlink(srcdir + 'ucomment-extension.py',
  1513. conf.local_repo_physical_dir+os.sep+'ucomment-extension.py')
  1514. except OSError as err:
  1515. if err.errno == 17: # File already exists
  1516. pass
  1517. else:
  1518. UcommentError(err, ('When creating symlink for ucomment '
  1519. 'extension'))
  1520. else:
  1521. try:
  1522. shutil.copy(srcdir + 'ucomment-extension.py',
  1523. conf.local_repo_physical_dir)
  1524. except shutil.Error:
  1525. UcommentError(err, ('When copying the ucomment extension - not '
  1526. 'found'))
  1527. # When Sphinx is called to compile the document, it is expected that the
  1528. # document's ``conf.py`` has the correct path to the extensions.
  1529. status = StringIO()
  1530. warning = StringIO()
  1531. # TODO(KGD): can we send this to the logfile instead of status and warning?
  1532. # will allow us to track compiling of large documents via logfile
  1533. # TODO(KGD): investigate using http://ajaxpatterns.org/Periodic_Refresh
  1534. # also see: http://www.ajaxprojects.com/ajax/tutorialdetails.php?itemid=9
  1535. # The code below simulates the command-line call
  1536. # $ sphinx-build -a -b pickle -d _build/doctrees . _build/pickle
  1537. # The ``conf.local_repo_physical_dir`` must not have a trailing slash,
  1538. # and must be "clean", otherwise lookups later with ``file_linkname_map``
  1539. # will fail.
  1540. conf.local_repo_physical_dir = os.path.abspath(conf.local_repo_physical_dir)
  1541. build_dir = os.path.abspath(conf.local_repo_physical_dir+os.sep + '_build')
  1542. ensuredir(build_dir)
  1543. # TODO(KGD): make this setting a choice in the web before publishing
  1544. # Note: FRESHENV: if True: we must delete all previous comment references,
  1545. # to avoid an accumulation of references in the database.
  1546. conf.use_freshenv = False
  1547. try:
  1548. app = Sphinx(srcdir=conf.local_repo_physical_dir,
  1549. confdir=conf.local_repo_physical_dir,
  1550. outdir = build_dir + os.sep + 'pickle',
  1551. doctreedir = build_dir + os.sep + 'doctrees',
  1552. buildername = 'pickle',
  1553. status = status,
  1554. warning = warning,
  1555. freshenv = conf.use_freshenv,
  1556. warningiserror = False,
  1557. tags = [])
  1558. if app.builder.name != 'pickle':
  1559. emsg = ('Please use the Sphinx "pickle" builder to compile the '
  1560. 'RST files.')
  1561. log_file.error(emsg)
  1562. # TODO(KGD): return HttpResponse object still
  1563. return
  1564. # We also want to compile the documents using the text builder (search).
  1565. # But rather than calling Sphinx from the start, just create a text
  1566. # builder and run it right after the pickle builder. Any drawbacks?
  1567. text_builder_cls = getattr(__import__('sphinx.builders.text', None,
  1568. None, ['TextBuilder']), 'TextBuilder')
  1569. text_builder = text_builder_cls(app)
  1570. pickle_builder = app.builder
  1571. if 'ucomment' not in app.env.config:
  1572. emsg = ('The document was not published: please ensure the '
  1573. "``ucomment`` dictionary appears in your document's"
  1574. '`conf.py`` file.')
  1575. UcommentError(emsg)
  1576. return emsg
  1577. # Call the ``pickle`` builder
  1578. app.env.config.ucomment['revision_changeset'] = revision_changeset
  1579. app.env.config.ucomment['skip-cleanup'] = True
  1580. app.build()
  1581. # Log any warnings to the logfile.
  1582. log_file.info('PUBLISH: Sphinx compiling HTML (pickle) successfully.')
  1583. if warning.tell():
  1584. warning.seek(0)
  1585. for line in warning.readlines():
  1586. log_file.warn('PUBLISH: ' + line)
  1587. # Now switch to the text builder (to create the search index)
  1588. app.env.config.ucomment['skip-cleanup'] = False
  1589. app.builder = text_builder
  1590. try:
  1591. app.build()
  1592. except SphinxError as e:
  1593. log_file.warn(('PUBLISH: could not successfully publish the text-'
  1594. 'based version of the document (used for searching).'
  1595. 'Error reported = %s') % str(e))
  1596. # TODO(KGD): defer clean-up to after RST files are used as search
  1597. log_file.debug('PUBLISH: Sphinx compiling TEXT version successfully.')
  1598. if warning.tell():
  1599. warning.seek(0)
  1600. for line in warning.readlines():
  1601. log_file.warn('PUBLISH WARNING: ' + line.strip())
  1602. # Switch back to the pickle builder (we need this when doing the
  1603. # database commits)
  1604. app.builder = pickle_builder
  1605. except SphinxError as e:
  1606. msg = 'A Sphinx error occurred (error type = %s): %s' % \
  1607. (e.category, str(e))
  1608. log_file.error(msg)
  1609. alert_system_admin(msg)
  1610. return msg
  1611. if app.statuscode == 0:
  1612. commit_updated_document_to_database(app)
  1613. else:
  1614. log_file.error(('The Sphinx status code was non-zero. Please check '
  1615. 'lines in the log file above this one for more info.'))
  1616. return ''
  1617. def commit_updated_document_to_database(app):
  1618. """
  1619. Two types of objects must be commited to the database to complete the
  1620. publishing of the document:
  1621. 1. Each (web)pages in the document
  1622. 2. All the comment references
  1623. We need to take some extra care with the comment references: remove unused
  1624. comment references, find and take care of references that were orphaned
  1625. (see below), and add new comment references.
  1626. """
  1627. sphinx_settings = app.env.config.ucomment
  1628. # Used to convert ``class="reference internal"`` to
  1629. # ``class="ucomment-internal-reference"``
  1630. local_toc_re = re.compile(r'class="reference internal"')
  1631. replace_toc = 'class="ucomment-internal-reference"'
  1632. # First, generate a dictionary of page_name -> next_page_name
  1633. # The first page = TOC = app.env.config.master_doc
  1634. # The last page has no next page link.
  1635. # E.g" {'toc': 'page1', 'page2': 'page3', 'page1': 'page2', 'page3': None}
  1636. all_files = app.env.found_docs
  1637. document_order = {}
  1638. for fname in list(all_files):
  1639. is_toc = False
  1640. if fname == app.env.config.master_doc:
  1641. is_toc = True
  1642. name = app.builder.outdir + os.sep + fname + app.builder.out_suffix
  1643. try:
  1644. f = file(name, 'r')
  1645. page_info = pickle.load(f)
  1646. except IOError:
  1647. raise IOError('An IOError occurred when processing %s' % name)
  1648. finally:
  1649. f.close()
  1650. # What is the page's HTML title?
  1651. link_name = page_info['current_page_name']
  1652. has_next = False
  1653. for item in page_info['rellinks']:
  1654. if item[3] == 'next':
  1655. has_next = True
  1656. next_section = item[0]
  1657. break
  1658. if not has_next:
  1659. next_section = None
  1660. document_order[link_name] = next_section
  1661. # Next, order the pages. The ``ordered_names`` list will grow in size
  1662. # TODO(KGD): The problem comes from how the document is split in
  1663. # "LVM" section index.rst
  1664. page_names = document_order.keys()
  1665. ordered_names = [app.env.config.master_doc]
  1666. for idx in xrange(len(page_names)):
  1667. if ordered_names[idx] is not None:
  1668. ordered_names.append(document_order[ordered_names[idx]])
  1669. else:
  1670. break
  1671. # The last ``None`` element designates the end of the document
  1672. ordered_names.pop()
  1673. # Check if there were docs not included in the toctree: add them at the end
  1674. ordered_names.extend(set(page_names) - set(ordered_names))
  1675. # Now commit each (web)page to the DB in order
  1676. # ---------------------------------------------
  1677. prior_pages = models.Page.objects.all()
  1678. file_linkname_map = {}
  1679. for fname in reversed(ordered_names):
  1680. is_toc = is_chapter_index = False
  1681. if fname in app.env.config.ucomment['toc_docs']:
  1682. is_chapter_index = True
  1683. if fname == app.env.config.master_doc:
  1684. is_toc = True
  1685. is_chapter_index = False
  1686. name = app.builder.outdir + os.sep + fname + app.builder.out_suffix
  1687. try:
  1688. f = file(name, 'r')
  1689. page_info = pickle.load(f)
  1690. except IOError:
  1691. raise IOError('An IOError occurred when processing %s' % name)
  1692. finally:
  1693. f.close()
  1694. # Aim: get a text version of each page to generate a search index
  1695. # Get the RST source code, clean it, and store that in the database.
  1696. # TOC and chapter indicies are not to be indexed for the search engine.
  1697. src = app.builder.srcdir + os.sep + fname + app.config.source_suffix
  1698. try:
  1699. unsplit_source_name = sphinx_settings['split_sources'][src]
  1700. except KeyError:
  1701. unsplit_source_name = src
  1702. if is_toc or is_chapter_index:
  1703. # Good side-effect: TOC pages will never show up in search results
  1704. search_text = ''
  1705. else:
  1706. try:
  1707. f = file(src, 'r')
  1708. search_text = ''.join(f.readlines())
  1709. search_text = sanitize_search_text(search_text)
  1710. except IOError:
  1711. raise IOError(('An IOError occurred when processing RST '
  1712. 'source file: %s' % src))
  1713. finally:
  1714. f.close()
  1715. # By what name will the HTML page be accessed?
  1716. link_name = page_info['current_page_name']
  1717. # Now get some link information to add to the page. Not every
  1718. # page has a parent; for those pages, set the parent to be the TOC.
  1719. # The parent link for the TOC is the TOC
  1720. try:
  1721. parent_link, _ = models.Link.objects.get_or_create(
  1722. link = page_info['parents'][0]['link'],
  1723. title = page_info['parents'][0]['title'])
  1724. except IndexError:
  1725. # The highest level TOC does not have a parent: this is used
  1726. # to correctly output the navigation bar.
  1727. if is_toc:
  1728. parent_link = None
  1729. else:
  1730. parent_link, _ = models.Link.objects.get_or_create(
  1731. link = u'../',
  1732. title = u'')
  1733. try:
  1734. next_link, _ = models.Link.objects.get_or_create(
  1735. link = page_info['next']['link'],
  1736. title = page_info['next']['title'])
  1737. except TypeError:
  1738. # Only the last section in the document won't have a next link
  1739. next_link = None
  1740. try:
  1741. prev_link, _ = models.Link.objects.get_or_create(
  1742. link = page_info['prev']['link'],
  1743. title = page_info['prev']['title'])
  1744. except TypeError:
  1745. # Only the TOC won't have a previous link. We rely on this fact to
  1746. # filter the pages to locate the root TOC.
  1747. if is_toc:
  1748. prev_link = None
  1749. else:
  1750. # This is for pages that happened to be compiled, but don't
  1751. # fall in the document structure. For example, we have RST
  1752. # files, but they were not included in any toctree, yet they
  1753. # were compiled by Sphinx.
  1754. prev_link, _ = models.Link.objects.get_or_create(
  1755. link = u'../',
  1756. title = u'')
  1757. # While we are here, create a "root TOC" link with the appropriate
  1758. # title: use the ``project`` setting from the Sphinx conf.py file.
  1759. # The actual link will be determined on page request.
  1760. models.Link.objects.get_or_create(link = '___TOC___',
  1761. title = app.env.config.project)
  1762. # Generate a "local" table of contents: useful for long pages; will not
  1763. # be generated if there is only one subsection on the page, nor will
  1764. # it be generated for pages that are primarily an index page.
  1765. if is_toc or is_chapter_index:
  1766. # Good side-effect: TOC pages will never show up in search results
  1767. local_toc = ''
  1768. else:
  1769. local_toc = page_info['toc']
  1770. local_toc, number = local_toc_re.subn(replace_toc, local_toc)
  1771. if number == 1:
  1772. local_toc = ''
  1773. # TODO(KGD): take a look at the ``app.env.resolve_toctree`` function
  1774. # in Sphinx.
  1775. # Use the Project's name for the master_doc (i.e. the main TOC page)
  1776. # for the document.
  1777. if is_toc and not(is_chapter_index):
  1778. page_info['title'] = app.env.config.project
  1779. # If a page with the same link (an unique field) is found, then update
  1780. # the page. Do not delete the page, because that will remove any
  1781. # associated comments. See the ``models.py`` file for ``Comment``
  1782. # definition -- the ``Page`` objects are a ForeignKey.
  1783. existing_page = prior_pages.filter(link_name=link_name)
  1784. if existing_page:
  1785. page = existing_page[0]
  1786. if page.search_text.encode('utf-8') != search_text:
  1787. # If the content has changed, only then change ``updated_on``
  1788. page.updated_on = datetime.datetime.now()
  1789. if is_toc or is_chapter_index:
  1790. if page.body != '\n' + page_info['body'] + '\n':
  1791. page.updated_on = datetime.datetime.now()
  1792. page.revision_changeset = sphinx_settings['revision_changeset']
  1793. page.html_title = page_info['title']
  1794. page.is_toc = is_toc or is_chapter_index
  1795. page.source_name = unsplit_source_name
  1796. page.PDF_file_name = 'STILL_TO_COME.pdf'
  1797. page.body = '\n' + page_info['body'] + '\n'
  1798. page.search_text = search_text
  1799. page.parent_link = parent_link
  1800. page.next_link = next_link
  1801. page.prev_link = prev_link
  1802. page.local_toc = local_toc
  1803. page.save()
  1804. else:
  1805. defaults = {'revision_changeset': \
  1806. sphinx_settings['revision_changeset'],
  1807. 'link_name': link_name,
  1808. 'html_title': page_info['title'],
  1809. 'is_toc': is_toc or is_chapter_index,
  1810. 'source_name': unsplit_source_name,
  1811. 'PDF_file_name': 'STILL_TO_COME.pdf',
  1812. 'number_of_HTML_visits': 0,
  1813. 'body': '\n' + page_info['body'] + '\n',
  1814. 'search_text': search_text,
  1815. 'parent_link': parent_link,
  1816. 'next_link': next_link,
  1817. 'prev_link': prev_link,
  1818. 'local_toc': local_toc,}
  1819. created = models.Page.objects.create(**defaults)
  1820. file_linkname_map[app.srcdir + os.sep + fname + \
  1821. app.env.config.source_suffix] = link_name
  1822. log_file.info('PUBLISH: pages saved to the database.')
  1823. # Next, deal with the comment references
  1824. # ---------------------------------------------
  1825. to_update = []
  1826. to_remove = []
  1827. orphans = []
  1828. prior_references = models.CommentReference.objects.all()
  1829. # Only if we used a fresh environment. Because then all the comment
  1830. # references are regenerated.
  1831. if conf.use_freshenv:
  1832. for item in prior_references:
  1833. orphans.append(item.comment_root)
  1834. for item in sphinx_settings['comment_refs']:
  1835. # First check whether this comment reference exists in the database;
  1836. # If not, add it. If it does exist, add it to the list of references
  1837. # to update next.
  1838. defaults={'revision_changeset': sphinx_settings['revision_changeset'],
  1839. 'file_name': item.source,
  1840. 'page_link_name': file_linkname_map[item.link_name],
  1841. 'node_type': item.node,
  1842. 'line_number': item.line,
  1843. 'comment_root': item.root,
  1844. 'comment_root_is_used': False}
  1845. ref, created = models.CommentReference.objects.get_or_create(
  1846. comment_root=item.root, # comment_root is a unique field
  1847. defaults=defaults)
  1848. if not created:
  1849. try:
  1850. orphans.remove(item.root)
  1851. except ValueError:
  1852. pass
  1853. to_update.append(item)
  1854. # Update the references that already exist in the DB. In most cases these
  1855. # references are used as ForeignKeys in ``Comment`` objects.
  1856. # The (very unusual) case when they don't exist in the DB is when the RST
  1857. # repo is processed the first time and there happen to be ucomment
  1858. # directives in the RST source. In this case we would have created a
  1859. # comment reference in the code above (using ``get_or_create()``)
  1860. for item in to_update:
  1861. ref = prior_references.filter(comment_root=item.root)[0]
  1862. ref.revision_changeset = sphinx_settings['revision_changeset']
  1863. ref.file_name = item.source
  1864. ref.node_type = item.node
  1865. ref.line_number = item.line
  1866. ref.date_added = datetime.datetime.now()
  1867. ref.save()
  1868. # The above code is quite useful: if the author ever happens to move the
  1869. # ucomment directives around, even to a different file, the comments
  1870. # associated with that reference will still appear at the new location.
  1871. # Orphans occur if the user removed the ucomment directive from the RST
  1872. # source.
  1873. # They are problematic only if they happen to have an associated ``Comment``
  1874. # object in the database (which is expected, since a CommentReference is
  1875. # created the same time ).
  1876. for item in orphans[:]:
  1877. # Remove comment references from the list that don't have comments.
  1878. ref = prior_references.filter(comment_root=item)[0]
  1879. if ref.comment_set.exists() == False:
  1880. orphans.remove(item)
  1881. to_remove.append(item)
  1882. # It is safe to remove these references, because they do not have any
  1883. # comments associated with them (CommentReference objects only appear as
  1884. # ForeignKeys in ``Comment`` objects.
  1885. for remove_root in to_remove:
  1886. to_remove = prior_references.filter(comment_root=remove_root)
  1887. if to_remove:
  1888. to_remove[0].delete()
  1889. for orphan_id in orphans:
  1890. # These arise when comment references are removed from the text by the
  1891. # author. But, these references still have comments associated with
  1892. # them in the database, but are not made available on any page,
  1893. # nor do they have a valid comment reference.
  1894. orphan = prior_references.filter(comment_root = orphan_id)[0]
  1895. # Create an unreachable page (starts with '_')
  1896. defaults = {'revision_changeset': sphinx_settings['revision_changeset'],
  1897. 'link_name': '_orphans_',
  1898. 'html_title': '_orphans_',
  1899. 'is_toc': False,
  1900. 'source_name': '_orphans_',
  1901. 'PDF_file_name': '_orphans_',
  1902. 'number_of_HTML_visits': 0,
  1903. 'body': '_orphans_',
  1904. 'search_text': '',
  1905. 'parent_link': None,
  1906. 'next_link': None,
  1907. 'prev_link': None,
  1908. 'local_toc': '',}
  1909. orphan_page, created = models.Page.objects.get_or_create(
  1910. link_name='_orphans_',
  1911. defaults=defaults)
  1912. # Create an comment reference that would not normally be created
  1913. defaults = {'revision_changeset': '-1',
  1914. 'file_name': '_orphans_',
  1915. 'node_type': '_orphan_',
  1916. 'line_number': 0,
  1917. 'comment_root': '_ORFN_',
  1918. 'comment_root_is_used': True}
  1919. orphan_ref, created = models.CommentReference.objects.get_or_create(
  1920. comment_root='_ORFN_',
  1921. defaults=defaults)
  1922. # Try to de-orphan any comments on subsequent republishing (author may
  1923. # have realized the mistake and brought the node back).
  1924. if orphan_id == '_ORFN_':
  1925. # It's a little hard to go back from the orphaned comment to find
  1926. # its original reference. But we will use the fact the re-created
  1927. # reference's comment_root will be the same as the orphaned
  1928. # comment's parent (or at least the first ``conf.root_node_length``
  1929. # characters of the parent).
  1930. for comment in orphan.comment_set.all():
  1931. former_parent = comment.parent[0:conf.root_node_length]
  1932. new_parent = prior_references.filter(comment_root=former_parent)
  1933. if len(new_parent):
  1934. comment.reference = new_parent[0]
  1935. # Now find the page on which that comment reference is used
  1936. all_pages = models.Page.objects.filter(
  1937. link_name=new_parent[0].page_link_name)
  1938. if len(all_pages):
  1939. comment.page = all_pages[0]
  1940. comment.save()
  1941. log_file.warn(('PUBLISH: re-parented the orphan comment '
  1942. 'reference %s; now appears on page "%s".')%\
  1943. (comment.reference.comment_root,
  1944. new_parent[0].page_link_name))
  1945. n_orphans = 0
  1946. for comment in orphan.comment_set.all():
  1947. comment.reference = orphan_ref
  1948. comment.page = orphan_page
  1949. comment.save()
  1950. n_orphans += 1
  1951. log_file.warn(('PUBLISH: dealt with comment reference orphan: %s; '
  1952. 'was orphaned between revision %s (last known use) '
  1953. 'and revision %s (current). Has %d associated '
  1954. 'comments.') % (orphan_id, orphan.revision_changeset,
  1955. sphinx_settings['revision_changeset'], n_orphans))
  1956. # Dumping and loading fixtures
  1957. # ----------------------------
  1958. def dump_relevent_fixtures(request):
  1959. """
  1960. Dumps certain model types to fixture files.
  1961. """
  1962. if not request.user.is_authenticated():
  1963. return HttpResponseRedirect(django_reverse('ucomment-admin-signin'))
  1964. style = 'xml'
  1965. fixtures = ( (models.Comment, 'Comment.'+style),
  1966. (models.CommentReference, 'CommentReference.'+style),
  1967. (models.CommentPoster, 'CommentPoster.'+style),
  1968. (models.Hit, 'Hit.'+style),
  1969. )
  1970. log_file.debug('FIXTURES: About to dump fixtures to file.')
  1971. for model, filename in fixtures:
  1972. data = serializers.serialize(style, model.objects.all(), indent=2,
  1973. use_natural_keys = True)
  1974. try:
  1975. ensuredir(conf.django_fixtures_dir)
  1976. full_filename = os.path.join(conf.django_fixtures_dir, filename)
  1977. f = file(full_filename, 'w')
  1978. try:
  1979. f.write(data)
  1980. finally:
  1981. f.close()
  1982. log_file.info('FIXTURES: Dumped %s objects to %s' %\
  1983. (model, full_filename))
  1984. except IOError:
  1985. return HttpResponse(('An IOError occurred while writing %s '
  1986. 'objects to the fixture file: %s' % \
  1987. (model, filename)), status=200)
  1988. return HttpResponse(('All fixtures successfully saved to %s' % \
  1989. conf.django_fixtures_dir), status=200)
  1990. def load_from_fixtures(request):
  1991. pass
  1992. #TODO(KGD): still to come
  1993. # Searching the document text
  1994. # ---------------------------
  1995. def sanitize_search_text(text):
  1996. """
  1997. Cleans the RST source code to remove:
  1998. * .. ucomment:: directives
  1999. * Underlines for headings: e.g. "-----"
  2000. * inline math roles :math:`....` (leaves the part inside the role beind
  2001. * cross-references: e.g. .. _my-cross-reference:
  2002. * .. rubric:: XYZ is left just as XYZ
  2003. TODO(KGD):
  2004. * table_row = re.compile(r'(={2,})|(\+-{2,})')
  2005. * figure and image directives; and the options that go with them
  2006. """
  2007. text_list = text.split('\n')
  2008. ucomment_lines = re.compile(r'^\s*\.\. ucomment::\s*(.*?):')
  2009. VALID_TITLES = ['!', '"', '#', '$', '%', "'", '(', ')', '*', '+',
  2010. ',', '-', '.', '/', ':', ';', '<', '=', '>', '?',
  2011. '@', '[', '\\', ']', '^', '_', '`', r'{', '|',
  2012. '}', '~']
  2013. div_re_str = r'^'
  2014. for entry in VALID_TITLES:
  2015. if entry in ['^', '*', '+', '$', '(', ')', '-', '.', '?', '[', ']',
  2016. '\\', '{', '}', '|']:
  2017. entry = '\\' + entry
  2018. div_re_str += entry + r'{3,}|'
  2019. div_re_str = div_re_str[0:-1] + '$'
  2020. div_re = re.compile(div_re_str)
  2021. crossref_re = re.compile(r'^\s*\.\. _(.*?):')
  2022. rubric_re = re.compile(r'^\s*(\.\. rubric:: )(.*?)')
  2023. # Remove the :math:`...` part, leaving only the ... portion behind.
  2024. math_role = re.compile(r'(:math:`)(.*?)(`)')
  2025. out = []
  2026. for line in text_list:
  2027. line = math_role.sub('\g<2>', line)
  2028. line = rubric_re.sub('\g<2>', line)
  2029. out.append(line)
  2030. if ucomment_lines.match(line):
  2031. out.pop()
  2032. continue
  2033. if div_re.match(line):
  2034. out.pop()
  2035. continue
  2036. if crossref_re.match(line):
  2037. out.pop()
  2038. continue
  2039. return '\n'.join(out)
  2040. def format_search_pages_for_web(pages, context, with_case):
  2041. """
  2042. Receives a dictionary. The keys are ``Page`` objects, and the corresponding
  2043. values are the list of words that appear on that page.
  2044. Will format these into appropriate HTML string output that is sent to the
  2045. user.
  2046. Uses ``context`` number of characters around the search word to display
  2047. in the results.
  2048. The ``with_case`` input will either ``True`` (indicating case-sensitive
  2049. search was requested), or ``False``.
  2050. """
  2051. def common_span(spanA, spanB):
  2052. """
  2053. Takes two tuples (containing 2 integers) and returns the largest region
  2054. spanned, provided they overlap. If no overlap, then return two spans
  2055. exactly as they were received.
  2056. Can be improved later on to avoid really large spans by taking
  2057. some sort of compromised span.
  2058. """
  2059. # First sort them
  2060. swapped = False
  2061. if spanA[0] > spanB[0]:
  2062. spanA, spanB = spanB, spanA
  2063. swapped = True
  2064. # If they overlap, return the intersection of the spans
  2065. if spanA[1] > spanB[0]:
  2066. return ((spanA[0], max(spanA[1], spanB[1])), )
  2067. elif swapped:
  2068. return (spanB, spanA)
  2069. else:
  2070. return (spanA, spanB)
  2071. def clean_search_output_for_web(text):
  2072. """
  2073. Returns a cleaned version of ``text`` suitable for display in a browser.
  2074. * Removes newline markers
  2075. * Converts the string to web-safe output
  2076. """
  2077. text = re.sub('\n', ' ', text)
  2078. #text = html.strip_tags(text)
  2079. text = django_html.escape(text)
  2080. # replace('&', '&amp;')
  2081. #.replace('<', '&lt;')
  2082. #.replace('>', '&gt;')
  2083. #.replace('"', '&quot;')
  2084. #.replace("'", '&#39;'))
  2085. return text
  2086. results = defaultdict(list)
  2087. page_counts = {}
  2088. n_pages = 0
  2089. for page, search_words in pages.iteritems():
  2090. # For each word, find the first N (use N=3) appearances in the text.
  2091. # Get the context to the left and right of the word, store the ranges
  2092. # in a list.
  2093. page_text = page.search_text
  2094. maxlen = len(page_text)
  2095. N_instances = 3
  2096. all_spans = []
  2097. page_counts[page] = 0
  2098. for word in search_words:
  2099. # Find these words, ensuring they are whole words only, where the
  2100. # definition is locale (re.L) and unicode (re.U) dependent.
  2101. if with_case:
  2102. word_iter = re.finditer(r'\b' + word + r'\b', page_text,
  2103. re.L + re.U)
  2104. else:
  2105. word_iter = re.finditer(r'\b' + word + r'\b' , page_text,
  2106. re.I + re.L + re.U)
  2107. for idx, reobj in enumerate(word_iter):
  2108. if idx >= N_instances:
  2109. # How many entries were there ?
  2110. page_counts[page] += len(list(word_iter)) + 1
  2111. break
  2112. span = reobj.span()
  2113. span = (max(0, span[0]-context), min(maxlen, span[1]+context))
  2114. all_spans.append(span)
  2115. page_counts[page] += 1
  2116. # We don't always find the text (i.e. a false result) when using sqlite
  2117. # databases and requesting a case-sensitive search. Just skip over these
  2118. # pages.
  2119. if len(all_spans) == 0:
  2120. continue
  2121. # Now combine all the ranges together, consolidating overlapping regions
  2122. final_spans = {all_spans[0]: None} # Use a dict to ensure unique spans
  2123. for span in all_spans[1:]:
  2124. keys = final_spans.keys()
  2125. for key in keys:
  2126. overlap = common_span(key, span)
  2127. if len(overlap) == 1:
  2128. del final_spans[key]
  2129. final_spans[overlap[0]] = None
  2130. else:
  2131. final_spans[overlap[1]] = None
  2132. # Extract the text within the range
  2133. display = ''
  2134. startend = '...'
  2135. for span in sorted(final_spans.keys()):
  2136. display += startend + page_text[span[0]:span[1]]
  2137. display += startend
  2138. # Clean-up the text for web rendering
  2139. display = clean_search_output_for_web(display)
  2140. # Finally, highlight the search terms inside ``<span>`` brackets
  2141. for word in search_words:
  2142. if with_case:
  2143. word_re = re.compile(r'(%s)'%word)
  2144. else:
  2145. word_re = re.compile(r'(%s)'%word, re.I)
  2146. display = word_re.sub(\
  2147. r'<span id="ucomment-search-term">\1</span>', display)
  2148. results[page].append('<li><a href="%s">%s</a>' % (\
  2149. django_reverse('ucomment-root') + page.link_name +\
  2150. '/?highlight=' + ' '.join(search_words) + \
  2151. '&with_case=' + str(with_case), page.html_title))
  2152. if page_counts[page] > 1:
  2153. results[page].append(('<span id="ucomment-search-count">'
  2154. '[%d times]</span>') % page_counts[page])
  2155. else:
  2156. results[page].append(('<span id="ucomment-search-count">'
  2157. '[%d time]</span>') % page_counts[page])
  2158. results[page].append('<div id="ucomment-search-result-context">')
  2159. results[page].append('%s</div></li>' % display)
  2160. n_pages += 1
  2161. resp = ['<div id=ucomment-search-results>\n<h2>Search results</h2>']
  2162. if n_pages == 0:
  2163. resp.append('<p>There were no pages matching your query.</p></div>')
  2164. return ''.join(resp)
  2165. elif n_pages == 1:
  2166. resp.append(('Found 1 page matching your search query.'))
  2167. else:
  2168. resp.append(('Found %d pages matching your search query.')%len(pages))
  2169. # How to sort by relevance: crude metric is to sort the pages in order
  2170. # of number of counts from high to low.
  2171. out = sorted(zip(page_counts.values(), page_counts.keys()), reverse=True)
  2172. entries = []
  2173. for item in out:
  2174. # access the dictionary by ``page`` and get the contextual output string
  2175. entries.append('\n'.join(results[item[1]]))
  2176. resp.append('\n\t<ul>\n' + '\t\t\n'.join(entries) + '\t</ul>\n</div>')
  2177. return ''.join(resp)
  2178. def search_document(request, search_terms='', search_type='AND',
  2179. with_case=False):
  2180. """ Will search the document for words within the string ``search_terms``.
  2181. The results will be returned as hyperlinks containing ``CONTEXT`` number of
  2182. characters around the search terms.
  2183. By default and "OR"-based search is performed, so that pages containing
  2184. one or more of the search terms will be returned. The other alternative is
  2185. to use ``search_type`` as "AND", requiring that all search terms must
  2186. appear on the page.
  2187. By default the search is case-insensitive (``with_case`` is False).
  2188. """
  2189. CONTEXT = 90 # characters around the search term
  2190. if request.method == 'GET':
  2191. search = str(search_terms)
  2192. search_type = str(search_type).strip('/').upper()
  2193. if search_type == '':
  2194. search_type = 'AND'
  2195. with_case = str(with_case).strip('/').lower()=='true'
  2196. if with_case == '':
  2197. with_case = False
  2198. elif request.method == 'POST':
  2199. # This seemingly messy code redirects back to a GET request so that
  2200. # the user can see how the search URL is formed. Search can be done
  2201. # from the URL: e.g. ``_search/python guido/AND/case=False`` at the end
  2202. # of the URL will search for "python" AND "guido" ignoring case.
  2203. search = request.POST['search_terms']
  2204. search_type = str(request.POST.get('search_type', search_type)).upper()
  2205. with_case = str(request.POST.get('with_case',
  2206. with_case)).lower()=='true'
  2207. return HttpResponseRedirect(\
  2208. django_reverse('ucomment-search-document') + \
  2209. search +'/'+ search_type +'/'+ 'case=' + str(with_case))
  2210. start_time = time.time()
  2211. search_for = search.split()
  2212. results = defaultdict(list)
  2213. n_search_words = 0
  2214. for word in search_for:
  2215. # Filter out certain stop words
  2216. if word not in STOP_WORDS:
  2217. if with_case:
  2218. pages = models.Page.objects.filter(search_text__icontains=word)
  2219. else:
  2220. pages = models.Page.objects.filter(
  2221. search_text__icontains=word.lower())
  2222. n_search_words += 1
  2223. for page in pages:
  2224. results[page].append(word)
  2225. # If it's an "or" search then we simply display all pages that appear as
  2226. # keys in ``results``. For an "AND" search, we only display pages that
  2227. # have ``n_search_words`` entries in the corresponding dictionary value.
  2228. if search_type == 'AND':
  2229. out = {}
  2230. for page, found_words in results.iteritems():
  2231. if len(found_words) == n_search_words:
  2232. out[page] = found_words
  2233. results = out
  2234. web_output = format_search_pages_for_web(results, CONTEXT, with_case)
  2235. # Create a psuedo-"Page" object containing the search results and return
  2236. # that to the user. It is infact a named tuple, which has the same
  2237. # behaviour as a ``Page`` object
  2238. page = namedtuple('Page', ('revision_changeset next_link prev_link sidebar '
  2239. 'parent_link html_title body local_toc link_name is_toc '
  2240. 'number_of_HTML_visits updated_on'))
  2241. search_output = page(revision_changeset='',
  2242. next_link = None,
  2243. prev_link = None,
  2244. parent_link = models.Link(),
  2245. html_title = 'Search results',
  2246. body = web_output,
  2247. local_toc = '',
  2248. is_toc = True, # prevents sidebar
  2249. sidebar = '', # but still set it to empty
  2250. number_of_HTML_visits = 0,
  2251. updated_on = datetime.datetime.now(),
  2252. link_name = request.path.lstrip(\
  2253. django_reverse('ucomment-root')[0:-1]))
  2254. log_file.info('SEARCH: "%s" :: took %f secs' % (search,
  2255. time.time() - start_time))
  2256. return render_page_for_web(search_output, request, search)
  2257. def admin_signin(request):
  2258. """
  2259. Perform administrator/author features for the application.
  2260. * (Re)publish the document to the web
  2261. * Dump all fixtures to disk - for backup purposes.
  2262. """
  2263. if request.user.is_authenticated():
  2264. msg = ('<ul>'
  2265. '<li><a href="%s">Table of Contents for your document</a>'
  2266. '<li><a href="%s">The Django admin page for your site</a>'
  2267. '<li><a href="%s">Publish or update the document</a>'
  2268. '<li>Backup your application by <a href="%s">dumping objects '
  2269. 'to fixtures</a>') % \
  2270. (django_reverse('ucomment-root'),
  2271. django_reverse('admin:index'),
  2272. django_reverse('ucomment-publish-update-document'),
  2273. django_reverse('ucomment-dump-fixtures'))
  2274. return HttpResponse(msg, status=200)
  2275. elif request.method == 'GET':
  2276. log_file.info('Entering the admin section; IP = %s' % \
  2277. (get_IP_address(request)))
  2278. context = {}
  2279. context.update(csrf(request))
  2280. msg = ( '<p>Please sign-in first with your Django (admin) credentials:'
  2281. r'<form action="%s" method="POST">{%% csrf_token %%}'
  2282. '<label for="username">Username:</label>'
  2283. ' <input type="text" name="username" /><br />'
  2284. '<label for="password">Password:</label>'
  2285. ' <input type="password" name="password" />'
  2286. '<input type="submit" value="Log in"></form>') % (request.path)
  2287. resp = template.Template(msg)
  2288. html_body = resp.render(template.Context(context))
  2289. return HttpResponse(html_body, status=200)
  2290. elif request.method == 'POST':
  2291. username = request.POST.get('username', '')
  2292. password = request.POST.get('password', '')
  2293. user = django_auth.authenticate(username=username, password=password)
  2294. if user is not None and user.is_active:
  2295. django_auth.login(request, user)
  2296. return HttpResponseRedirect(request.path)
  2297. else:
  2298. # TODO(KGD): respond that user does not exist, or that password
  2299. # is incorrect; redirect to log in page again.
  2300. return HttpResponseRedirect(request.path)