views.py | searchcode

/views.py

https://bitbucket.org/kevindunn/ucommentapp/
Python | 2674 lines | 2231 code | 135 blank | 308 comment | 152 complexity | 7c74b08b2d1d2375c4fd12459027fdd9 MD5 | raw file

"""
:copyright: Copyright 2010, by Kevin Dunn
:license: BSD, see LICENSE file for details.

TO ADD:
 ---------
 Add a time-out to comment compiling (5 seconds): then force a return
 Deferred processing of comments using a message queue:
    http://www.turnkeylinux.org/blog/django-celery-rabbitmq
    Why?  Because we have to do a repo checkout, update the files, commit

 CAPTCHA
 Use PostgreSQL instead
 Handle the case where commit fails because a user name is not present.

FUTURE
======
Create a web-interface to approve or reject comments; allowing the comment admin
to pick various reasons to reject comment and append extra info to the poster.
Also provide option NOT to send an email at all (simply reject the posting).
"""

# Standard library imports
import os, sys, random, subprocess, pickle, re, logging.handlers, datetime
import smtplib, time, shutil
from collections import defaultdict, namedtuple
from StringIO import StringIO

# Settings for the ucomment application
from conf import settings as conf

# Django and Jinja import imports
from django import forms, template
from django.shortcuts import render_to_response
from django.contrib import auth as django_auth
from django.core import cache as django_cache
from django.core import serializers
from django.core.context_processors import csrf
from django.core.mail import send_mail, BadHeaderError
from django.http import HttpResponse, HttpResponseRedirect
from django.core.urlresolvers import reverse as django_reverse
from django.utils import simplejson            # used for XHR returns
from django.utils import html as django_html   # used for clean search results

from jinja2 import Template  # Jinja2 is Sphinx dependency; should be available
from jinja2.exceptions import TemplateSyntaxError

# Sphinx import
from sphinx.util.osutil import ensuredir
from sphinx.application import Sphinx, SphinxError

if conf.repo_DVCS_type == 'hg':
    import hgwrapper as dvcs
    dvcs.executable = conf.repo_DVCS_exec
    dvcs.local_repo_physical_dir = conf.local_repo_physical_dir

# Import the application's models, without knowing the application name.
models = getattr(__import__(conf.app_dirname, None, None, ['models']),'models')

# The ucomment directive:
COMMENT_DIRECTIVE = '.. ucomment::'

# These words will be removed from any search
# Taken from: http://www.ranks.nl/resources/stopwords.html
STOP_WORDS = ['I', 'a', 'an', 'are', 'as', 'at', 'be', 'by', 'com', 'for',
              'from', 'how', 'in', 'is', 'it', 'of', 'on', 'that', 'the',
              'this', 'to', 'was', 'what', 'when', 'who', 'will', 'with',
              'www']

# Code begins from here
# ---------------------
log_file = logging.getLogger('ucomment')
log_file.setLevel(logging.INFO)
fh = logging.handlers.RotatingFileHandler(conf.log_filename,
                                          maxBytes=5000000,
                                          backupCount=10)
formatter = logging.Formatter(('%(asctime)s - %(name)s '
                               '- %(levelname)s - %(message)s'))
fh.setFormatter(formatter)
log_file.addHandler(fh)

class UcommentError(Exception):
    """ A generic error inside this Django application.  Will log the error,
    and email the site administrator.

    This class must be initiated an exception object:
        * UcommentError(exc_object)

    But an optional string can also be provided, to give the log and email
    message some extra information:
        * UcommentError(exc_object, err_msg)

    This will figure out where the error was raised and provide some
    source code lines in the email.

    An alternative way to call this class is to just with a string input
        * UcommentError(err_msg)

    But this will only email and log the given error message string.

    The exception will not be raised again here: it is the calling function's
    choise whether to reraise it (and possibly interrupt the user's experience),
    or whether to just let the application continue.
    """
    def __init__(self, error, extra=''):
        if isinstance(error, Exception):
            exc = sys.exc_info()
            from inspect import getframeinfo
            emsg = 'The error that was raised: ' + str(exc[1]) + '\n\n'
            if extra:
                emsg += 'This additional information was provided: "%s"' % extra
                emsg += '\n\n'
            emsg += self.format_frame(getframeinfo(exc[2], context=5))
        else:
            # Handles an older syntax
            emsg = 'The following error was raised: ' + error.__repr__()
        if isinstance(error, UcommentError) and error.raised:
            return
        self.raised = True  # prevent errors with emailing from cycling again.
        log_file.error(emsg)
        alert_system_admin(emsg)

    def format_frame(self, traceback):
        """ Receives a named tuple, ``traceback``, created by the ``inspect``
        module's ``getframeinfo()`` function.  Formats it to string output.
        """
        out =  '*\tfilename    = %s\n' % os.path.abspath(traceback.filename)
        out += '*\tline number = %s\n' % traceback.lineno
        out += '*\tfunction    = %s(...)\n' % traceback.function
        out += '*\tsource code where error occurred:\n'
        for idx, line in enumerate(traceback.code_context):
            out += '\t\t%s' % line.rstrip()
            if idx == traceback.index:
                out += '    <--- error occurred here\n'
            else:
                out += '\n'
        return out + '\n'


def create_codes_ID(num):
    """
    Creates a new comment identifier; these appear in the source code for
    each page; they must be short, not confusing, and low chance of collisions.

    Intentionally does not include "i", "I", "l" "D", "o", "O", "Z", "0" to
    avoid visual confusion with similar-looking characters.  We (ab)use this
    fact to create the orphan comment reference with name = '_ORFN_'.

    53 characters, N=4 combinations = 53^4 = many comment ID's
    """
    valid_letters = 'abcdefghjkmnpqrstuvwxyzABCEFGHJKLMNPQRSTUVWXY23456789'
    return ''.join([random.choice(valid_letters) for i in range(num)])

def convert_web_name_to_link_name(page_name, prefix=''):
    """
    Converts the web page name over to the ``link_name`` used in the Django
    model for ``Page``.

    If a prefix is provided (e.g. the fully web address), then this is stripped
    off first, then the rest is used as the page_name.
    """
    if prefix:
        page_name = page_name.split(prefix)[1]
    if '?' in page_name:
        page_name = page_name.split('?')[0]
    if conf.url_views_prefix:
        return page_name.split('/'+conf.url_views_prefix+'/')[1].rstrip('/')
    else:
        return page_name.lstrip('/').rstrip('/')

    # TODO(KGD): remove ``conf.url_views_prefix``, and the need for this
    #            function.  Can we not use the reverse(..) function?

def get_site_url(request, add_path=True, add_views_prefix=False):
    """
    Utility function: returns the URL from which this Django application is
    served.  E.g. when receiving a Django ``request`` object from the user
    on comment submission, their URL might be:
        https://site.example.com/document/_submit-comment/

    >>> get_site_url(request)
    'https://site.example.com/document/_submit-comment/'
    """
    # TODO(KGD): Consider using ``request.build_absolute_uri()`` instead
    out = 'http://'
    if request.is_secure():
        out = 'https://'
    out += request.get_host()
    if add_path:
        out += request.path
    if add_views_prefix:
        if conf.url_views_prefix:
            out += '/' + conf.url_views_prefix + '/'
        else:
            out += '/'
    return out

def get_IP_address(request):
    """
    Returns the visitor's IP address as a string.
    """
    # Catchs the case when the user is on a proxy
    try:
        ip = request.META['HTTP_X_FORWARDED_FOR']
    except KeyError:
        ip = ''
    else:
        # HTTP_X_FORWARDED_FOR is a comma-separated list; take first IP:
        ip = ip.split(',')[0]

    if ip == '' or ip.lower() == 'unkown':
        ip = request.META['REMOTE_ADDR']      # User is not on a proxy
    return ip

# Comment preview and submission functions
# ----------------------------------------
class CommentForm(forms.Form):
    """ Comment form as seen on the server """
    email = forms.EmailField(required=False)
    comment_raw = forms.CharField(min_length=conf.comment_min_length,
                                  max_length=conf.comment_max_length)

def valid_form(p_email, comment_raw):
    """ Verifies if a valid form was filled out.

    Returns an empty string if the form is valid.
    Returns a string, containing an HTML error message if the form is not valid.
    """
    # Ignore empty email addresses; email field is optional anyway, but we do
    # want to alert the user if the email is invalid.
    if p_email.strip() == '':
        p_email = 'no.email@example.com'
    user_form = CommentForm({'email': p_email,
                             'comment_raw': comment_raw})

    if not user_form.is_valid():
        error_dict = user_form.errors
        errors = ['<ul class="ucomment-error">']
        if 'email' in error_dict:
            errors.append(('<li> Your email address is not in the correct '
                           'format.</li>'))
        if 'comment_raw' in error_dict:
            errors.append(('<li> Comments must have between %i and %i '
                           'characters.</li>' % (conf.comment_min_length,
                                                 conf.comment_max_length)))
        errors.append('</ul>')
        log_file.info('Returning with these errors:' + str(errors))
        return ''.join(errors)
    else:
        return ''

def initial_comment_check(request):
    """
    Provides a preliminary check of the comment submission.
    * Must be a POST request not at a GET request.
    * Must have a valid email address.
    * Comment length must be appropriate (see conf/settings.py file).
    """

    if request.method == 'POST':
        c_comment_RST = request.POST['comment']
        p_email = request.POST['email']
        errors = valid_form(p_email, c_comment_RST)
        if errors:
            web_response = HttpResponse(errors, status=200)
            web_response['Ucomment'] = 'Preview-Invalid input'
            return False, web_response
        else:
            return True, c_comment_RST
    elif request.method == 'GET':
        return False, HttpResponse('N/A', status=404)
    elif request.method == 'OPTIONS':
        # Handles a Firefox probe before the POST request is received.
        web_response = HttpResponse(status=404)
        web_response['Access-Control-Allow-Origin'] = '*'
        return False, web_response
    else:
        log_file.warn(request.method + ' received; not handled; return 400.')
        return False, HttpResponse(status=400)

def preview_comment(request):
    """
    User has clicked the "Preview comment" button in browser. Using XHR, the
    comment is POSTed here, extracted, and compiled.
    """
    # ``Success``: means the submission was validated, or was processed.
    # If it failed, then ``response`` will contain an appropriate HttpResponse
    # that can be returned right away.
    success, response = initial_comment_check(request)
    if success:
        web_response = HttpResponse(status=200)
        try:
            compiled_comment_HTML = compile_comment(response)
            web_response['Ucomment'] = 'Preview-OK'
        except Exception as err:
            # Should an error occur while commenting, log it, but respond to
            # the user.
            UcommentError(err, ('An exception occurred while generating a '
                                'comment preview for the user.'))
            compiled_comment_HTML = ('<p>An error occurred while processing '
                                     'your comment.  The error has been '
                                     'reported to the website administrator.'
                                     '<p>UTC time of error: %s' % \
                                     datetime.datetime.utcnow().ctime())
            web_response['Ucomment'] = 'Preview-Exception'

        log_file.info('COMPILE: from IP=%s; comment: "%s"' % \
                        (get_IP_address(request), response))
        web_response.write(compiled_comment_HTML)
        return web_response
    else:
        return response

def compile_comment(comment):
    """
    First scans the ``comment`` string, then compiles the RST to HTML.
    """
    # The Javascript XHR request have a timeout value (set to 5 seconds).
    # set a timer on the compile time?  If more than 5 seconds to
    # compile, then log the comment, return a response back to the user.
    start_time = time.time()
    comment = compile_RST_to_HTML(comment)
    end_time = time.time()
    if (end_time-start_time) > 3:
        log_file.warning(('Comment compile time exceeded 3 seconds; server'
                          'load too high?'))
    return comment

def call_sphinx_to_compile(working_dir):
    """
    Changes to the ``working_dir`` directory and compiles the RST files to
    pickle files, according to settings in the conf.py file.

    Returns nothing, but logs if an error occurred.
    """
    build_dir = os.path.abspath(working_dir + os.sep + '_build')
    ensuredir(working_dir)
    ensuredir(build_dir)
    status = StringIO()
    warning = StringIO()
    try:
        app = Sphinx(srcdir=working_dir, confdir=working_dir,
                     outdir = build_dir + os.sep + 'pickle',
                     doctreedir = build_dir + os.sep + 'doctrees',
                     buildername = 'pickle',
                     status = status,
                     warning = warning,
                     freshenv = True,
                     warningiserror = False,
                     tags = [])

        # Call the ``pickle`` builder
        app.build()

    except SphinxError as e:
        if warning.tell():
            warning.seek(0)
            for line in warning.readlines():
                log_file.warn('COMMENT: ' + line)

        msg = ('Sphinx error occurred when compiling comment (error type = %s): '
               '%s'  % (e.category, str(e)))
        UcommentError(err, msg)

    if app.statuscode == 0:
        log_file.info("COMMENT: Successfully compiled the reader's comment.")
    else:
        log_file.error("COMMENT: Non-zero status code when compiling.")

def convert_raw_RST(raw_RST):
    """
    Performs any sanitizing of the user's input.

    Currently performs:
    * converts '\\' to '\\\\': i.e. single slash converted to double-slash,
                               because Sphinx converts is back to a single slash
    """
    out = raw_RST.replace('\\', '\\\\')
    # You can perform any other filtering here, if required.
    return out

def compile_RST_to_HTML(raw_RST):
    """ Compiles the RST string, ``raw_RST`, to HTML.  Performs no
    further checking on the RST string.

    If it is a comment, then we don't modify the HTML with extra class info.
    But we do filter comments to disable hyperlinks.

    Also copy over generated MATH media to the correct directory on the server.
    """

    ensuredir(conf.comment_compile_area)
    modified_RST = convert_raw_RST(raw_RST)
    with open(conf.comment_compile_area + os.sep + 'index.rst', 'w') as fhand:
        fhand.write(modified_RST)

    try:
        conf_file = conf.comment_compile_area + os.sep + 'conf.py'
        f = file(conf_file, 'r')
    except IOError:
        # Store a fresh copy of the "conf.py" file, found in
        # ../sphinx-extensions/ucomment-conf.py; copy it to comment destination.
        this_file = os.path.abspath(__file__).rstrip(os.sep)
        parent = this_file[0:this_file.rfind(os.sep)]
        src = os.sep.join([parent, 'sphinx-extensions', 'ucomment-conf.py'])
        shutil.copyfile(src, conf.comment_compile_area + os.sep + 'conf.py')
    else:
        f.close()

    # Compile the comment
    call_sphinx_to_compile(conf.comment_compile_area)

    pickle_f = ''.join([conf.comment_compile_area, os.sep, '_build', os.sep,
                        'pickle', os.sep, 'index.fpickle'])
    with open(pickle_f, 'r') as fhand:
        obj = pickle.load(fhand)

    html_body = obj['body'].encode('utf-8')

    # Any equations in the HTML?  Transfer these images to the media directory
    # and rewrite the URL's in the HTML.
    return transfer_html_media(html_body)

def transfer_html_media(html_body):
    """
    Any media files referred to in the HTML comment are transferred to a
    sub-directory on the webserver.

    The links are rewritten to refer to the updated location.
    """
    mathdir = ''.join([conf.comment_compile_area, os.sep, '_build', os.sep,
                       'pickle', os.sep, '_images', os.sep, 'math', os.sep])
    ensuredir(mathdir)
    dst_dir = conf.MEDIA_ROOT + 'comments' + os.sep
    ensuredir(dst_dir)

    for mathfile in os.listdir(mathdir):
        shutil.copyfile(mathdir + mathfile, dst_dir + mathfile)

    src_prefix = 'src="'
    math_prefix = '_images' + os.sep + 'math' + os.sep
    replacement_text = ''.join([src_prefix, conf.MEDIA_URL,
                                'comments', os.sep])
    html_body = re.sub(src_prefix + math_prefix, replacement_text, html_body)
    return html_body


def create_poster(request):
    """
    Creates a new ``CommentPoster`` object from a web submission, ``request``.
    """
    p_name = request.POST['name'].strip() or 'Anonymous contributor'
    p_email = request.POST['email'].strip()

    # The default (unchecked box) is for opt-in = False
    p_opted_in = True
    try:
        # Fails if unchecked (default); succeeds if checked: caught in "else"
        p_opted_in = request.POST['updates'] == 'get_updates'
    except KeyError:
        p_opted_in = False

    # Get the poster entry, or create a new one.  Always create a new poster
    # entry for anonymous posters.
    c_IP_address = get_IP_address(request)
    c_UA_string = request.META.get('HTTP_USER_AGENT', '')[0:499]
    p_default = {'name' : p_name,
                 'long_name': p_name + '__' + c_IP_address + '__' + c_UA_string,
                 'email': p_email,
                 'number_of_approved_comments': 0,
                 'avatar_link': '', # add this functionality later?
                 'auto_approve_comments': False,
                 'opted_in': False}

    if p_email:
        if p_name.lower() not in ('anonymous', 'anonymous contributor', ''):
            p_default['long_name'] = p_name
        poster, _ = models.CommentPoster.objects.get_or_create(email=p_email,
                                                        defaults=p_default)
        poster.opted_in = poster.opted_in or p_opted_in
        poster.save()
    else:
        poster, _ = models.CommentPoster.objects.get_or_create(\
                                                            defaults=p_default)

    # Change settings for all posters:
    if poster.number_of_approved_comments >= conf.number_before_auto_approval:
        poster.auto_approve_comments = True
        poster.number_of_approved_comments += 1
        poster.save()


    log_file.info('POSTER: Created/updated poster: ' + str(poster))
    return poster

def submit_and_store_comment(request):
    """
    The user has typed in a comment and previewed it. Now store it and queue
    it for approval.

    ``Comment`` objects have 3 ForeignKeys which must already exist prior:
    * ``page``: the page name on which the comment appears
    * ``poster``: an object representing the person making the comment
    * ``reference``: a comment reference that facilitates making the comment
    """

    start_time = time.time()

    # Response back to user, if everything goes OK
    response = HttpResponse(status=200)
    response['Ucomment'] = 'Submission-OK'
    try:
        html_template = Template(conf.once_submitted_HTML_template)
    except TemplateSyntaxError as err:
        # Log the error, but don't disrupt the response to the user.
        html_template = Template('Thank you for your submission.')
        UcommentError(err, "Error in 'once_submitted_HTML_template'.")

    # Note about variable names: ``p_`` prefix refers to poster objects in the
    # database, while ``c_`` = refers to comment objects.

    # ForeignKey: comment object (setup only; will be created at the end)
    # --------------------------
    success, c_comment_RST = initial_comment_check(request)
    if not success:
        return c_comment_RST
    else:
        c_comment_HTML = compile_comment(c_comment_RST)

    # Only get the comment reference via its root:
    ref = models.CommentReference.objects.filter(\
                                     comment_root=request.POST['comment_root'])
    if len(ref):
        c_reference = ref[0]
    else:
        # One possibility to consider is to create the comment reference
        # right here.  However, it is quite hard to do this properly, because
        # do not know all the field properties for a CommentReference object:
        # such as the line number, page_link_name, node_type, and others.

        # This will only occur in the exceptional case when the document
        # has been republished, and the user still has a previous version in
        # their browser.  Hence the page reload request.
        response.write(('<p>A minor error occurred while processing your '
                        'comment.<p>The only way to correct it is to reload '
                        'the page you are on, and to resubmit your comment. '
                        '<p>Sorry for the inconvenience.'))
        log_file.warn('COMMENT: User posting a comment from an older page.')
        return response

    # Below the comment reference appears an unique node:
    used_nodes = []
    c_node = create_codes_ID(conf.short_node_length)
    for comment in c_reference.comment_set.all():
        used_nodes.append(comment.node)
    while c_node in used_nodes:
        c_node = create_codes_ID(conf.short_node_length)


    # ForeignKey: page object; get the page on which the comment appears
    # -----------------------
    link_name = convert_web_name_to_link_name(request.POST['page_name'])
    c_page = models.Page.objects.filter(link_name=link_name)[0]

    # ForeignKey: comment poster objects
    # -----------------------------------
    poster = create_poster(request)
    # We can update the response as soon as we have created the poster object
    response.write(html_template.render(settings=conf, poster=poster))

    if poster.number_of_approved_comments >= conf.number_before_auto_approval:
        c_is_approved = True
        c_node_for_RST = c_node
    else:
        c_node_for_RST = c_node + '*'    # indicates comment is not approved yet

    # Do all the work here of adding the comment to the RST sources
    revision_changeset, c_root = commit_comment_to_sources(\
                                                    c_reference,
                                                    c_node_for_RST,
                                                    update_RST_with_comment)

    # NOTE: the line numbers for any comment references that might appear below
    #       the current comment reference will be incorrect - they will be too
    #       low.  However, their line numbers will be rectified once the
    #       document is republished (comment references are updated).

    # An error occurred:
    if revision_changeset == False:
        # Technically the submission is NOT OK, but the comment admin has been
        # emailed about the problem and can manually enter the comment into
        # the database and RST source files.
        return response

    # Update the ``comment_root_is_used`` field in the comment reference, since
    # this root can never be used again.
    c_reference.comment_root_is_used = True
    # Also update the changeset information.  In the future we will update
    # comments for this node from this newer repository.
    c_reference.revision_changeset = revision_changeset
    c_reference.save()

    # Create the comment object
    c_datetime_submitted = c_datetime_approved = datetime.datetime.now()
    c_IP_address = get_IP_address(request)
    c_UA_string = request.META.get('HTTP_USER_AGENT', '')[0:499]
    c_approval_code = create_codes_ID(conf.approval_code_length)
    c_rejection_code = create_codes_ID(conf.approval_code_length)
    c_is_approved = c_is_rejected = False

    # For now all comments have no parent (they are descendents of the comment
    # root). Later, perhaps, we can add threading functionality, so that users
    # can respond to previous comments.  Then the parent of a new comment will
    # be given by:  ``c_root + ':' + parent_comment.node``
    c_parent = c_root

    the_comment, _ = models.Comment.objects.get_or_create(
        page = c_page,
        poster = poster,
        reference = c_reference,
        node = c_node,
        parent = c_parent,
        UA_string = c_UA_string,
        IP_address = c_IP_address,
        datetime_submitted = c_datetime_submitted,
        datetime_approved = c_datetime_approved,
        approval_code = c_approval_code,
        rejection_code = c_rejection_code,
        comment_HTML = c_comment_HTML,
        comment_RST = c_comment_RST,
        is_rejected = c_is_rejected,
        is_approved = c_is_approved)

    log_file.info('COMMENT: Submitted comment now saved in the database.')

    # Send emails to the poster and comment admin regarding the new comment
    # TODO(KGD): queue it
    emails_after_submission(poster, the_comment, request)

    total_time = str(round(time.time() - start_time, 1))
    log_file.info(('COMMENT: Emails to poster and admin sent successfully; '
                    "returning response back to user's browser.  Total time to "
                    ' process comment = %s secs.') % total_time)
    return response

def approve_reject_comment(request, code):
    """
    Either approves or rejects the comment, depending on the code received.

    Approved comments:
    - The # part after the comment node is removed in the RST file
    - The comment is marked as approved in the database and will appear on
      the next page refresh.
    - The poster is notified by email (if an email was supplied)

    Rejected comments:
    - The # part after the comment node is changed to a * in the RST  file
    - The comment is marked as rejected in the database.
    - The poster is notified by email (if an email was supplied)
    """
    # Response back to user, if everything goes OK
    response = HttpResponse(status=200)
    approve = models.Comment.objects.filter(approval_code=code)
    reject = models.Comment.objects.filter(rejection_code=code)

    # Settings used to approve the comment: we remove the '*'
    if len(approve) == 1:
        verb = 'approved'
        symbol = '\*'  # escaped, because it will be used in a regular expressn
        replace = ''
        comment = approve[0]
        comment.is_approved = True
        comment.is_rejected = False
        email_func = email_poster_approved

    # Settings used to reject the comment: we change the '*' to a 	'#'
    elif len(reject) == 1:
        verb = 'rejected'
        symbol = '\*'
        replace = '#*'
        comment = reject[0]
        comment.is_approved = False
        comment.is_rejected = True
        email_func = email_poster_rejected

    # Bad approve/reject code given: don't mention anything; just return a 404.
    else:
        return HttpResponse('', status=404)

    revision_changeset, _ = commit_comment_to_sources(comment.reference,
                                                comment.node,
                                                update_RST_comment_status,
                                                additional={'search': symbol,
                                                            'replace': replace})

    if revision_changeset == False:
        # An error occurred while committing the comment.  An email has already
        # been sent.  Return a message to the user:
        response.write(('An error occurred while approving/rejecting the '
                        'comment.  Please check the log files and/or email for '
                        'the site administrator.'))
        return response

    comment.reference.comment_root_is_used = True
    # Also update the changeset information.  In the future we will update
    # comments for this node from this newer repository.
    comment.reference.revision_changeset = revision_changeset
    comment.reference.save()
    if verb == 'approved':
        comment.poster.number_of_approved_comments += 1
    elif verb == 'rejected':
        comment.poster.number_of_approved_comments -= 1
        comment.poster.number_of_approved_comments = max(0,
                                    comment.poster.number_of_approved_comments)
    comment.poster.save()
    comment.datetime_approved = datetime.datetime.now()
    comment.save()

    # Remove the comment count cache for the page on which this comment appears
    cache_key = 'counts_for__' + comment.page.link_name
    django_cache.cache.delete(cache_key)

    # Send an email the comment poster: rejected or approved
    email_func(comment.poster, comment)

    approve_reject_template = Template(('<pre>'
                        'The comment was {{action}}.\n\n'
                        '\t* Comment root = {{reference.comment_root}}\n'
                        '\t* Comment node = {{comment.node}}\n'
                        '\t* At line number = {{reference.line_number}}\n'
                        '\t* In file name = {{filename}}\n'
                        '\t* Committed as changeset = {{changeset}}\n\n</pre>'))
    output = approve_reject_template.render(action=verb.upper(),
                                            reference = comment.reference,
                                            comment = comment,
                                            filename = os.path.split(\
                                            comment.reference.file_name)[1],
                                            changeset = revision_changeset)
    response.write(output)

    return response


# Repository manipulation functions
# ---------------------------------
def update_local_repo(rev='tip'):
    """
    Updates the local repository from the remote repository and must be used
    before performing any write operations on files in the repo.

    If the local repository does not exist, it will first create a full clone
    from the remote repository.

    Then it does the equivalent of:
    * hg pull   (pulls in changes from the remote repository)
    * hg update (takes our repo up to tip)
    * hg merge  (merges any changes that might be required)

    Then if the optional input ``rev`` is provided, it will revert the
    repository to that revision, given by a string, containing the hexadecimal
    indicator for the required revision.

    This function returns the hexadecimal changeset for the local repo as
    it has been left after all this activity.
    """
    # First check if the local repo exists; if not, create a clone from
    # the remote repo.
    try:
        try:
            ensuredir(conf.local_repo_physical_dir)
        except OSError as err:
            msg = ('The local repository location does not exist, or cannot '
                   'be created.')
            raise UcommentError(err, msg)

        hex_str = dvcs.get_revision_info()
    except dvcs.DVCSError:
        try:
            dvcs.clone_repo(conf.remote_repo_URL, conf.local_repo_URL)
        except dvcs.DVCSError as error_remote:
            msg = ('The remote repository does not exist, or is '
                   'badly specified in the settings file.')
            raise UcommentError(error_remote, msg)

        log_file.info('Created a clone of the remote repo in the local path')

    # Update the local repository to rev='tip' from the source repo first
    try:
        dvcs.pull_update_and_merge()
    except dvcs.DVCSError as err:
        raise UcommentError(err, 'Repository update and merge error')

    hex_str = dvcs.get_revision_info()
    if rev != 'tip' and isinstance(rev, basestring):
        hex_str = dvcs.check_out(rev=rev)
    return hex_str

def commit_to_repo_and_push(commit_message):
    """
    Use this after performing any write operations on files.

    Commits to the local repository; pushes updates from the local repository
    to the remote repository.

    Optionally, it will also update the remote repository to the tip (the
    default is not to do this).

    Returns the changeset code for the local repo on completion.
    """
    hex_str = dvcs.commit_and_push_updates(commit_message)

    # Merge failed!  Log it and email the site admin
    if not(hex_str):
        raise UcommentError(('Repo merging failed (conflicts?) when trying to '
                             'commit. Commit message was: %s' % commit_message))

    # Check that changeset and revision matches the remote repo numbers
    return hex_str

def commit_comment_to_sources(reference, node, func, additional=None):
    """
    Commits or updates a comment in the RST sources.

    ``reference``: is a comment reference object from the database and tells
                   us how and where to add the comment
    ``node``: is the commment noded (string) that is added to the RST source.
    ``func``: does the work of either adding or updating the RST sources.
    ``additional``: named keywords and values in a dict that will be passed
                    to ``func``.

    On successful completion it will return:
    ``revision_changeset``: string identifier for the updated repository
    ``comment_root``: a string of the comment root that was added/updated
    """
    # This part is sensitive to errors occurring when writing to the
    # RST source files.
    try:

        # Get the RST file to the revision required for adding the comment:
        hex_str = update_local_repo(reference.revision_changeset)

        f_handle = file(reference.file_name, 'r')
        RST_source = f_handle.readlines()
        f_handle.close()

        # Add the comment to the RST source; send the comment reference
        # which has all the necessary input information in it.
        try:
            if additional == None:
                additional = {}
            c_root = func(comment_ref = reference,
                          comment_node = node,
                          RST_source = RST_source, **additional)
        except Exception as err:
            # will be caught in outer try-except
            # TODO(KGD): test that this works as expected: what happens after?
            raise UcommentError(err, ('General error while adding or updating '
                                      'comment in the RST sources.'))

        # Write the update list of strings, RST_source, back to the file
        f_handle = file(reference.file_name, 'w')
        f_handle.writelines(RST_source)
        f_handle.close()

        short_filename = os.path.split(reference.file_name)[1]
        commit_message = ('COMMIT: Automatic comment [comment_root=%s, '
                          'node=%s, line=%s, file=%s]; repo_id=%s') % \
                       (c_root, node, str(reference.line_number),
                        short_filename, hex_str)
        hex_str = commit_to_repo_and_push(commit_message)
        log_file.info(commit_message)
        return hex_str, c_root
    except (UcommentError, dvcs.DVCSError) as err:
        UcommentError(err)
        return False, False


def update_RST_with_comment(comment_ref, comment_node, RST_source):
    """
    Appends the ``comment_node`` string (usually a 2-character string), to the
    appropriate line in the RST_source (a list of strings).

    ``comment_ref`` provides the line number and node type which will be
    commented.  We always use an existing comment root, if there is one,
    otherwise we create a new comment root according to these rules:

    Paragraphs, titles, literal_block (source code blocks), tables and figures
    will have their ucomment appended in the second blank line after the node.

    List items (bullet points and ordered lists) will have their ucomment
    appended in the very next line, always.

    In all cases, an existing ucomment directive for that node will be searched
    for and added to.

    The simplest example possible: comment_node='2s' and comment_root='sR4fa4':

    RST_source before (1 line)

        Here is a paragraph of text.

    RST_source after (3 lines):

        Here is a paragraph of text.

        .. ucomment:: sR4fa4: 2s

    Output and side effects:
        * Returns the comment_root as the only output.
        * Modifies the list of strings, ``RST_source`` in place.
    """
    # We can only handle these nodes: anything else will raise an error
    KNOWN_NODES = set(('paragraph', 'title', 'literal_block', 'table',
                       'image', 'list_item', 'displaymath'))

    # Regular expression, which when matched AT THE START OF A LINE, indicate
    # list items in the RST syntax. See the full RST specification:
    # http://docutils.sourceforge.net/docs/user/rst/quickstart.html
    RST_LIST_ITEMS_AS_RE = re.compile(r'''(\s*)
                        (                 # group all list item types
                        (\-)|(\*)|(\+)|   # any bullet list items
                        (\#\.)|           # auto-enumerate: "#."
                        (\w*\.)|          # enumerated: "12." or "A." or "i."
                        (\(\w*\))|        # enumerated: "(23)" or "(B)"
                        (\w\))            # enumerated: "12)" or "iii)"
                        )                 # end of all list item types
                        (?P<space>\s*)    # catch trailing spaces at end.''', \
                                                                        re.X)
    # Maps ``node_type`` to RST directives
    NODE_DIRECTIVE_MAP = {'displaymath': ['math'],
                          'image': ['image', 'figure'],
                          'table': ['table', 'csv-table', 'list_table'],
                          'literal_block': ['code-block', 'literalinclude'],}

    # Nodes given by the ``keys`` in NODE_DIRECTIVE_MAP allow blank lines
    # within in their content, so determining where to place the ucomment
    # directive cannot rely purely on finding the next blank line.  Example:
    #
    #   |Before                         |After
    #   |--------------------------------------------------------------
    # 1 |.. figure:: the_figure.png     |.. figure:: the_figure.png
    # 2 |    :scale: 100%               |    :scale: 100%
    # 3 |                               |
    # 4	|    Figure caption goes here.  |    Figure caption goes here.
    # 5 |                               |
    # 6 |Next paragraph begins here.    |..ucomment:: ABCDEF: 2b
    # 7 |                               |
    # 8 |                               |Next paragraph begins here.
    #
    # It would be wrong to place the ucomment directive at/around line 3
    # as this would cut of the figure's caption.  What we do instead is to
    # find the end of the node and insert the comment at that point.


    def wrap_return(RST_source, insert, prefix):
        """
        Adds the new ucomment directive and return the updated RST_source,
        or, appends to the existing comment.
        """

        if prefix is None:
            comment = RST_source[insert]
            c_root = comment.strip()[dir_len+1:dir_len+1+conf.root_node_length]
            # Always add a comma after the last comment
            if not comment.rstrip().endswith(','):
                suffix = ', '
            else:
                suffix = ' '
            RST_source[insert] = comment[0:-1] + suffix + comment_node + ',\n'

        else:
            c_root = comment_ref.comment_root
            line_to_add = prefix + COMMENT_DIRECTIVE + ' ' + c_root + ': ' + \
                        comment_node + ',\n'

            if comment_ref.node_type in KNOWN_NODES:
                RST_source.insert(insert, '\n')

            RST_source.insert(insert, line_to_add)

        # Pop off the last line of text that was artifically added.
        RST_source.pop()

        return c_root

    # A few corner cases are solved if we ensure the file ends with a blank line
    if RST_source[-1].strip() != '':
        RST_source.append('\n')
    # Force an unrelated line at the end of the file to avoid coding
    # specifically for end-effects.
    RST_source.extend(['___END__OF___FILE___\n'])

    # The comment reference line numbers are 1-based; we need 0-based numbers
    line_num = comment_ref.line_number - 1
    dir_len = len(COMMENT_DIRECTIVE)

    if comment_ref.node_type not in KNOWN_NODES:
        raise UcommentError('Unknown node type: "%s"' % str(comment_ref))

    # To find any spaces at the start of a line
    prefix_re = re.compile('^\s*')
    prefix_match = prefix_re.match(RST_source[line_num])
    prefix = prefix_match.group()

    # There is one exception though: source code blocks marked with '::'
    # While the node's line number refers to the first line of code, the correct
    # prefix is the amount of space at the start of the line containing the '
    # double colons.  Search backwards to find them.
    # If we can't find them, then this literal_block is assumed to be a
    # 'code-block', or 'literalinclude' node (i.e. no double colons).
    if comment_ref.node_type == 'literal_block':
        double_colon = re.compile(r'(?P<space>\s*)(.*)::(\s*)')
        directive_re = r'^(\s*)\.\. '
        for directive in NODE_DIRECTIVE_MAP['literal_block']:
            directive_re += '(' + directive + ')|'
        directive_re = directive_re[0:-1] + r'::(\s*)(.*)'
        directive = re.compile(directive_re)

        double_colon_line = line_num + 1
        for line in RST_source[line_num::-1]:
            double_colon_line -= 1
            if directive.search(line):
                break # it is one of the other directives
            if double_colon.match(line):
                prefix = double_colon.match(line).group('space')

                # Do some surgery on the RST source if the author is using
                # double colons.  By example
                #
                #   |Below is some code, the line ends with a double colon::
                #   |
                #   |    >>> a = 'some source code'
                #   |
                # Replace it as follows.
                #
                #   |Below is some code, the line ends with a double colon:
                #   |
                #   |::
                #   |
                #   |    >>> a = 'some source code'
                #   |

                if line.strip() != '::':
                    dci = line.rindex('::')
                    RST_source[double_colon_line] = line[0:dci] + line[dci+1:]
                    RST_source.insert(double_colon_line+1, '\n')
                    RST_source.insert(double_colon_line+2, prefix + '::\n')
                    # This will get saved in ``submit_and_store_comment(...)``
                    if not isinstance(comment_ref, tuple):
                        # Minor technicality: we used named tuples in unit tests
                        comment_ref.line_number = comment_ref.line_number + 2
                    line_num += 2
                break

    # The point where the ucomment directive will be inserted
    insert = line_num + 1

    # We are *always* given the top line number of the node: so we only have to
    # search for the insertion point below that.  We will start examining from
    # the first line below that.
    finished = False
    next_line = ''
    idx_next = 0
    for idx, line in enumerate(RST_source[line_num+1:]):
        insert += 1  # insert = line_num + idx + 1
        bias = idx + 2
        if line.strip() == '' or comment_ref.node_type == 'list_item':
            if comment_ref.node_type == 'list_item':
                bias -= 1

            # Keep looking further down for an existing ucomment directive
            for idx_next, next_line in enumerate(RST_source[line_num+bias:]):
                if next_line.strip() != '':
                    if next_line.lstrip()[0:dir_len] == COMMENT_DIRECTIVE:
                        insert = line_num + bias + idx_next
                        prefix = None
                        return wrap_return(RST_source, insert, prefix)
                    finished = True
                    break

        if finished:
            next_prefix = prefix_re.match(next_line.rstrip('\n')).group()

            # Certain nodes cannot rely purely on blank lines to mark their end
            if comment_ref.node_type in NODE_DIRECTIVE_MAP.keys():
                # Break if a non-blank line has the same, or lower indentation
                # level than the environment's level (``prefix``)
                if len(next_prefix.expandtabs()) <= len(prefix.expandtabs()):
                    break
                else:
                    finished = False

            # ``list_item``s generally are commented on the very next line, but
            # first ensure the next line is in fact another list_item.
            # If the next line is a continuation of the current list_item, then
            # set ``finished`` to False, and keep searching.
            # blank or not, but
            elif comment_ref.node_type == 'list_item':
                cleaned = next_line[prefix_match.end():]


                # Most list items will break on this criterion (that the next
                # line contains a list item)
                if RST_LIST_ITEMS_AS_RE.match(cleaned):
                    insert = insert + (bias - 2) + idx_next

                    # Subtract off extra lines to handle multiline items
                    if bias > 1:
                        insert -= (bias -1)
                    break

                # but the final entry in a list will break on this criterion
                elif len(next_prefix.expandtabs()) <= len(prefix.expandtabs()):
                    #insert = insert - 1  # commented out: passes bullet_11
                    break

                # It wasn't really the end of the current item (the one being
                # commented on).  It's just that this item is written over
                # multiple lines.
                else:
                    finished = False

            else:
                break

    # Lastly, list items require a bit more work to handle.  What we want:
    #
    #   |Before                          |After
    #   |--------------------------------------------------------------
    #   |#.  This is a list item         |#.  This is a list item
    #   |<no blank line originally here> |    .. ucomment:: ABCDEF: 2a,
    #   |#.  Next list item              |#.  Next list item

    if comment_ref.node_type == 'list_item':
        # list_item's need to have a different level of indentation
        # If the ``RST_source[line_num]`` is  ``____#.\tTwo.\n``, then
        # (note that _ represents a space)
        # * remainder     = '#.\tTwo.\n'  i.e. removed upfront spaces
        # * the_list_item = '#.'          i.e. what defines the list
        # * prefix        = '______\t'    i.e. what to put before '.. ucomment'
        # * list_item.group('space')='\t' i.e. the tab that appears after '#.'
        remainder = RST_source[line_num][prefix_match.end():]
        list_item = RST_LIST_ITEMS_AS_RE.match(remainder)
        the_list_item = list_item.group().rstrip(list_item.group('space'))
        prefix = prefix + ' ' * len(the_list_item) + list_item.group('space')
        c_root = wrap_return(RST_source, insert, prefix)

        # There was no spaced between the list_items
        if idx_next == 0 and finished:
            RST_source.insert(insert, '\n')

        return c_root
    else:
        return wrap_return(RST_source, insert, prefix)


def update_RST_comment_status(comment_ref, comment_node, RST_source, \
                                  search, replace):
    """
    Searches for the existing ``comment_ref`` and ``comment_node`` in the list
    of strings given by ``RST_source``.  Finds the ``search`` character and
    replaces it with the ``replace`` character (indicating whether the comment
    was approved or rejected).

    The ``RST_source`` is a list of strings; this list will be updated in place.
    """
    comment_re = re.compile(r'(\s*)\.\. ' + COMMENT_DIRECTIVE.strip(' .:') + \
                            r'::(\s*)(?P<root>\w*)(\s*):(\s*)(?P<nodes>.+)')
    idx = -1
    line = ''
    for idx, line in enumerate(RST_source[comment_ref.line_number-1:]):
        rematch = comment_re.match(line)
        if rematch:
            if rematch.group('root') == comment_ref.comment_root:
                break

    if idx < 0:
        log_file.error(('Comment (%s) was to be changed, but was not found in'
                       'the RST_sources.') % str(comment_ref))
        return RST_source

    # We have the correct node.  Now edit the code.
    nodes = rematch.group('nodes')
    nodes = re.sub(comment_node + search, comment_node + replace, nodes)
    to_replace = line[0:rematch.start('nodes')] + nodes
    RST_source[comment_ref.line_number - 1 + idx] = to_replace + '\n'
    return comment_ref.comment_root

# Emailing functions
# ------------------
def send_email(from_address, to_addresses, subject, message):
    """
    Basic function to send email according to the four required string inputs.
    Let Django send the message; it takes care of opening and closing the
    connection, as well as locking for thread safety.
    """
    if subject and message and from_address:
        try:
            send_mail(subject, message, from_address, to_addresses,
                      fail_silently=True)
        except (BadHeaderError, smtplib.SMTPException) as err:
            # This will log the error, and hopefully email the admin
            UcommentError(err, 'When sending email')
        except Exception as err:
            # Only log the error, incase we are returned back here
            log_file.error('EMAIL ERROR: ' + str(err))

        log_file.info('EMAIL: sent to: ' + ', '.join(to_addresses))

def email_poster_pending(poster, comment):
    """ Sends an email to the poster to let them know their comment is in the
    queue for approval.  Give a time-frame, and tell them the number of comments
    left before their future comments are automatically approved.
    """
    try:
        pending_template = Template(conf.once_submitted_template)
    except TemplateSyntaxError as err:
        # Log the error, but don't email the poster.
        UcommentError(err, "Error in 'once_submitted_template'.")
        return

    message = pending_template.render(settings=conf, poster=poster,
                                      comment=comment)

    if conf.once_submitted_subject:
        send_email(from_address = conf.email_from,
                   to_addresses = [poster.email],
                   subject = conf.once_submitted_subject,
                   message = message)

def email_poster_approved(poster, comment):
    """ Sends an email to the poster to let them know their comment has been
    approved.  Give a link?
    """
    try:
        approved_template = Template(conf.once_approved_template)
    except TemplateSyntaxError as err:
        # Log the error, but don't email the poster.
        UcommentError(err, "Error in 'once_approved_template'.")
        return

    message = approved_template.render(settings = conf,
                                      poster = poster,
                                      comment = comment)

    send_email(from_address = conf.email_from,
               to_addresses = [poster.email],
               subject = conf.once_approved_template,
               message = message)


def email_approval_confirmation(poster, comment, web_root):
    """
    Sends email to the ``conf.email_comment_administrators_list`` with special
    links to either approve or reject a new comment.
    """
    try:
        approval_template = Template(conf.email_for_approval)
    except TemplateSyntaxError as err:
        # Log the error, but send a bare-bones email
        UcommentError(err, "Error in 'comment-approved' template")
        approval_template = Template(('THIS IS A DEFAULT EMAIL.\n\n'
            'A new comment was received on your '
            'ucomment-enabled website.\n\nHowever an error in your settings '
            'template ("email_for_approval" template) prevented a correctly '
            'formatted email from being sent to you.  Please check your '
            'template settings carefully.\n\nThe comment was '
            'recorded in the database.\n\nClick this link to ACCEPT the '
            'comment: {{comment.approval_code}}\nTo REJECT the comment, click '
            'here: {{comment.rejection_code}}\n'))

    comment.approval_code = web_root + '_approve-or-reject/' + \
                                                        comment.approval_code
    comment.rejection_code = web_root + '_approve-or-reject/' + \
                                                        comment.rejection_code

    msg = approval_template.render(email_from = conf.email_from,
                                   poster = poster,
                                   comment = comment,
                                   reference = comment.reference,
                                   webpage = web_root + comment.page.link_name,
                                   settings = conf)

    send_email(from_address = conf.email_from,
               to_addresses = conf.email_comment_administrators_list,
               subject = conf.email_for_approval_subject + ': %s, node %s' %\
                            (comment.reference.comment_root, comment.node),
               message = msg)

def email_poster_rejected(poster, extra_info=''):
    """
    Sends the poster an email saying their comment was not suitable.  If any
    extra text is provided in ``extra_info``, add that to the email.
    """
    # TODO(KGD): add function to reject posting
    pass

def alert_system_admin(error_msg):
    """ An error occurred: more information contained in the ``error`` string.
    Send an email to the comment administrator.
    """
    msg = ('The following error was logged on your ucomment-enabled '
           'website at %s: \n\n') % (str(datetime.datetime.now()))
    msg = msg + str(error_msg)

    send_email(conf.email_from, conf.email_system_administrators,
               conf.email_system_administrators_subject, msg)



def emails_after_submission(poster, comment, request):
    """ Email the poster once the comment has been submitted to the website,
    """
    # Don't bother if no email address
    if poster.email != '':

        # The comment was auto-approved
        if comment.is_approved:
            email_poster_approved(poster, comment)

        # The comment is waiting for approval
        else:
            email_poster_pending(poster, comment)

    # Let the comment administrator know
    email_approval_confirmation(poster, comment, get_site_url(request,
                                    add_path=False, add_views_prefix=True))





# Web output functions (HTTP and XHR)
# -----------------------------------
def render_page_for_web(page, request, search_value=''):
    """
    Renders a ``page`` object to be displayed in the user's browser.

    We must supply the original ``request`` object so we can add a CSRF token.

    The optional ``search_value`` gives a string with which to pre-fill the
    search box.
    """
    try:
        toc_page = models.Page.objects.filter(is_toc=True).filter(
                                                            prev_link=None)[0]
        toc_link = models.Link(link=django_reverse('ucomment-root'),
                               title='Table of contents')
    except IndexError:
        # We only reach here if there is no TOC page in the DB.
        toc_page = ''
        toc_link = models.Link()

    # Build up the navigation links: e.g. "Previous|Up|Table of Contents|Next"
    try:
        nav_template = Template(conf.html_navigation_template.replace('\n',''))
    except TemplateSyntaxError as err:
        # Log the error, but don't disrupt the response to the user.
        UcommentError(err, 'Error in the page navigation template.')
        nav_template = Template('')
    nav_links = nav_template.render(prev=page.prev_link, next=page.next_link,
                                    parent=page.parent_link, home=toc_link)
    root_link = models.Link.objects.filter(link = '___TOC___')[0]
    root_link.link = toc_link.link

    page_body = ''.join(['\n<!-- page output starts -->\n',
                         page.body,
                         '<!-- /page output ends -->\n'])

    # Replace any {{IMAGE_LOCATION}} markers in the page.body with a direct
    # link to the media.
    page_body = page_body.replace(r'src="/{{IMAGE_LOCATION}}/',
                                  r'src="' + conf.media_url)

    # If user is visiting TOC, but is being referred, show where they came from:
    full_referrer = request.META.get('HTTP_REFERER', '')
    log_file.debug('REFERER = %s' % full_referrer)
    referrer_str = ''
    if full_referrer:
        # First, make sure the referrer is hosted on the same website as ours
        if full_referrer.find(request.get_host()) > 0:
            current_URL = request.build_absolute_uri()
            referrer = full_referrer.split(current_URL)
            if len(referrer) > 1:
                referrer_str = referrer[1]
            else:
                referrer_str = referrer[0]
        else:
            referrer = []
    else:
        referrer = []

    if page.is_toc and referrer:
        if page == toc_page and len(referrer) == 1:
            # Strip off the last part of ``current_URL`` and the rest is the
            # base part of the hosting website.
            idx = 0
            for idx, part in enumerate(reversed(current_URL.split('/'))):
                if part != '':
                    break
            break_off = '/'.join(current_URL.split('/')[0:-idx-1])
            referrer_str = referrer_str.lstrip(break_off)

        # We are coming from a link "Up one level"
        elif len(referrer) == 2:
            pass
        try:
            # At most one split: parts of the referrer can appear multiple time,
            # so only split once.
            before, after = page_body.split(referrer_str, 1)
        except ValueError:
            # "referrer" not in the page_body (happens when going to the TOC
            # from the "Search" page.
            pass
        else:
            breakpoint = after.find('</a>')
            prefix = after[0:breakpoint]
            suffix = after[breakpoint+4:]
            to_add = ('</a><span id="ucomment-toc-referrer">&lArr; You arrived '
                      'from here</span>')
            page_body = before + referrer_str + prefix + to_add + suffix

    # Create a page hit entry in the database
    page_hit = models.Hit(UA_string = request.META.get('HTTP_USER_AGENT', ''),
                   IP_address = get_IP_address(request),
                   page_hit = page.html_title,   # was ``page.link_name``
                   referrer = referrer_str or full_referrer)
    page_hit.save()

    # Was highlighting requested?
    highlight = request.GET.get('highlight', '')
    if highlight:
        search_value = highlight
        # Apply highlighting, using <span> elements
        with_case = request.GET.get('with_case', 'False')
        with_case = with_case.lower() == 'true'
        highlight = [word for word in highlight.split() \
                                if (word not in STOP_WORDS and len(word)>3)]

        highlight = []
        # TODO(KGD): turn highlighting off, tentatively. Note that the
        # highlighting code below can break the HTML by inserting span
        # elements, especially if the search term is likely an HTML element
        # in the ``page_body``.  Mitigated somewhat by the fact that we search
        # only for whole words.
        for word in highlight:
            if with_case:
                word_re = re.compile(r'\b(%s)\b'%word, re.U + re.L)
            else:
                word_re = re.compile(r'\b(%s)\b'%word, re.I + re.U + re.L)
            page_body = word_re.sub(
                    r'<span id="ucomment-highlight-word">\1</span>', page_body)

     # Build up the navigation links: e.g. "Previous|Up|Table of Contents|Next"
    try:
        local_toc_template = Template(conf.side_bar_local_toc_template)
    except TemplateSyntaxError as err:
        # Log the error, but don't disrupt the response to the user.
        UcommentError(err, 'Error in the local sidebar TOC template.')
        local_toc_template = Template('')

    # Modify the page's local TOC to strip out the rendundant one-and-only <li>
    # Render this local TOC to display in the sidebar.
    if page.local_toc.strip() != '':
        local_toc = page.local_toc
        keepthis = re.match('^<ul>(.*?)<li>(.*?)<ul>(?P<keepthis>.*)',local_toc,
                                                                      re.DOTALL)
        cleanup = None
        if keepthis:
            keepthis = keepthis.group('keepthis')
            cleanup = re.match('(?P<keepthis>.*?)</ul></li></ul>$',
                                                    keepthis.replace('\n',''))
        if cleanup:
            # This additional cleanup only works on document where we are
            # splitting the major sections across multiple HTML pages.
            page.local_toc = cleanup.group('keepthis')
    sidebar_local_toc = local_toc_template.render(page=page)

    # If the page is the main TOC, and user option is set, then style the <li>
    # so they can expand (uses Javascript).  Will still display the page
    # properly even if there is no Javascript.
    if page == toc_page:
        page_body = re.sub('toctree-l1', 'ucomment-toctree-l1', page_body)
        css_body_class = 'ucomment-root'
    else:
        css_body_class = 'ucomment-page'

    # Finally, send this all off to the template for rendering.
    # NOTE: any additional fields added to this dictionary must also be added
    #       to the named tuple for the search page: ``def search_document(...)``
    page_content = {'html_title': page.html_title,
                    'body_html': page_body,
                    'nav_links': nav_links,
                    'root_link': root_link,
                    'stylesheet_link': conf.stylesheet_link,
                    'prefix_html': conf.html_prefix_text,
                    'suffix_html': conf.html_suffix_text,
                    'search_value': search_value,
                    'local_TOC': sidebar_local_toc,
                    'sidebar_html': page.sidebar,
                    'css_body_class': css_body_class,
                    'about_commenting_system': conf.html_about_commenting,
                    'page_hits': page.number_of_HTML_visits,
                    'updated_on': page.updated_on}
    page_content.update(csrf(request))  # Handle the search form's CSRF

    # TODO(KGD): redirect to /_search/search terms/AND/True if required

    return render_to_response('document-page.html', page_content)


def display_page(page_requested):
    """
    Displays the HTML for a page, including all necessary navigation links.

    Must also handle the case of http://example.com/page#subsection to got
    to subsection links within a page.
    """
    start_time = time.time()
    link_name = convert_web_name_to_link_name(page_requested.path)
    log_file.debug('Page requested = %s' % page_requested.path)
    ip_address = get_IP_address(page_requested)
    item = models.Page.objects.filter(link_name=link_name)
    if not item:
        # Requested the master_doc (main document)
        if link_name == '':
            toc_page = models.Page.objects.filter(is_toc=True).filter(\
                                                            prev_link=None)
            if toc_page:
                resp = django_reverse('ucomment-root') + toc_page[0].link_name
                return HttpResponseRedirect(resp)
            else:
                emsg = ('A master page does not exist in the ucomment database.'
                        '<p>Have you correctly specified the settings in this '
                        'file? <pre>%sconf/settings.py</pre>'
                        '<p>Also visit <a href="%s">the administration page</a>'
                        ' to start compiling your document.' % (
                                       conf.application_path,
                                       django_reverse('ucomment-admin-signin')))
                return HttpResponse(emsg)
        elif models.Page.objects.filter(link_name=link_name + '/index'):
            # To accommodate a peculiar Sphinx settings for PickleHTMLBuilder
            # It may lead to broken links for images that are included on
            # this page.
            item = models.Page.objects.filter(link_name=link_name + '/index')
        else:
            # TODO(KGD): return a 404 page template: how to do that?
            log_file.debug('Unknown page requested "%s" from %s.  Full request '
                           'was %s' % (link_name, ip_address,
                                       page_requested.path))
            return HttpResponse('Page not found', status=404)

    page = item[0]
    page.number_of_HTML_visits += 1
    page.save()

    result = render_page_for_web(page, page_requested)
    log_file.info('REQUEST: page = %s from IP=%s; rendered in %f secs.' % (
        link_name, ip_address, time.time()-start_time))
    return result

def format_comments_for_web(comments):
    """
    Received a list of comment objects from the database; must format these
    comments into appropriate HTML string output to be rendered in the browser.
    """
    resp = ''
    for item in comments:
        if not(item.is_approved):
            continue

        date_str = item.datetime_submitted.strftime("%Y-%m-%d at %H:%M")
        resp += '<li id="%s"><dl><dt><span id="ucomment-author">%s</span>' \
             % (item.node, item.poster.name)
        resp += (('<span class="ucomment-meta">%s</span></dt>'
                 '<dd>%s</dd></dl></li>') % (date_str, item.comment_HTML))

    resp += '\n'
    return resp
def retrieve_comment_HTML(request):
    """
    Retrieves any comments associated with a comment root and returns the
    HTML in a JSON container back to the user.

    http://www.b-list.org/weblog/2006/jul/31/django-tips-simple-ajax-example-part-1/
    """
    if request.method == 'POST':
        # If comment reading/writing is disabled: return nothing
        if not(conf.enable_comments):
            return HttpResponse('', status=200)

        root = request.POST.get('comment_root', '')
        sort_order = request.POST.get('order', 'forward')
        response = ''
        ref = models.CommentReference.objects.filter(comment_root=root)
        if len(ref):
            ref = ref[0]
            associated_comments = ref.comment_set.order_by("datetime_submitted")
            if sort_order == 'reverse':
                associated_comments = reversed(associated_comments)

            response = format_comments_for_web(associated_comments)
            log_file.info('COMMENT: Request HTML for %s from IP=%s' %\
                          (root, get_IP_address(request)))
            return HttpResponse(response, status=200)
        else:
            log_file.warn(('A user requested comment reference = %s which did '
                           'exist; this is not too serious; you have probably '
                           'just updated the document and they are accessing '
                           'a prior version.') % root)
            return HttpResponse('', status=200)


    elif request.method == 'GET':
        return HttpResponse('N/A', status=404)
    else:
        log_file.warn((request.method + ' method for comment HTML received; '
                        'not handled; return 400.'))
        return HttpResponse(status=400)



def retrieve_comment_counts(request):
    """
    Given the list of nodes, it returns a list with the number of comments
    associated with each node.
    """
    start_time = time.time()

    def process_counts(comment_roots, cache_key):
        """
        Accepts a list of comment_roots and populates the ``response_dict``
        with the number of comments associated with each ``comment_root``.

        Also supply a ``cache_key`` so that counts can be cached for a while.
        """
        response_dict = {}
        try:
            if not conf.enable_comments:
                return HttpResponse(simplejson.dumps(response_dict),
                                            mimetype='application/javascript')

            if cache_key in django_cache.cache:
                log_file.info('COUNTS: returned cached result.')
                response_dict = django_cache.cache.get(cache_key)
            else:
                for key in comment_roots:
                    num = 0
                    ref = models.CommentReference.objects.filter(\
                                                            comment_root=key)
                    if len(ref) > 0:
                        ref = ref[0]
                        associated_comments = ref.comment_set.all()
                        for comment in associated_comments:
                            if comment.is_approved:
                                num += 1

                    # Every key must return a result, even if it is zero
                    response_dict[key] = num

                log_file.debug('COUNTS: for %d nodes retrieved in %f secs' %\
                         (len(comment_roots), time.time()-start_time))

            # Should we cache the result for future?
            if (time.time()-start_time) > conf.cache_count_duration:
                django_cache.cache.set(cache_key, response_dict,
                                       timeout=conf.cache_count_timout)
                log_file.info('COUNTS: %s will be cached for %f secs.' % \
                                    (cache_key, conf.cache_count_timout))
        except Exception as err:
            # only log the error, don't break the app
            UcommentError(err, 'While retrieving comment counts')

        return response_dict

    log_file.debug('COUNTS: request received with method = %s' % request.method)
    if request.method == 'POST':
        comment_roots = sorted(request.POST.keys())
        comment_roots.pop(comment_roots.index('_page_name_'))
        cache_key = 'counts_for__' + convert_web_name_to_link_name(
                                       request.POST.get('_page_name_', ''))
        response_dict = process_counts(comment_roots, cache_key)
        return HttpResponse(simplejson.dumps(response_dict),
                                mimetype='application/javascript')
    elif request.method == 'GET':
        return HttpResponse('N/A', status=404)
    else:
        log_file.info((request.method + ' method for comment counts '
                        'received; not handled; return 400.'))
        return HttpResponse(status=400)

def retrieve_page_name(request):
    """
    Returns the page title given the page hyperlink in the request (POST), it .
    """
    # TODO(KGD): add section name and sub-page name
    # e.g. Design and analysis of experiments: Fractional factorial designs
    if request.method == 'POST':
        page_name = request.POST.get('_page_name_', '')
        link_name = convert_web_name_to_link_name(page_name)
        item = models.Page.objects.filter(link_name=link_name)
        if not item:
            # Requested the master_doc (main document)
            if link_name == '':
                toc_page = models.Page.objects.filter(is_toc=True).filter(\
                                                                prev_link=None)
                item = toc_page
            #elif models.Page.objects.filter(link_name=link_name + '/index'):
                ## To accommodate a peculiar Sphinx settings for PickleHTMLBuilder
                ## It may lead to broken links for images that are included on
                ## this page.
                #item = models.Page.objects.filter(link_name=link_name + '/index')
            else:
                log_file.debug('NAME request: unknown page "%s"' % link_name)
                return HttpResponse('', status=404)

        page = item[0]
        log_file.debug('NAME request = %s; returned %s' % (link_name, page_name))
        return HttpResponse(page.html_title)
    elif request.method == 'GET':
        return HttpResponse('', status=404)
    else:
        log_file.info((request.method + ' method for comment counts '
                        'received; not handled; return 400.'))
        return HttpResponse(status=400)


# Publishing update functions
# ----------------------------
def publish_update_document(request):
    """
    After pages have been remotely updated and checked back in; the author
    must trigger an update.  This compiles the code to HTML for the changed
    files, created database entries for each node, regenerates the PDF output.
    """
    if not request.user.is_authenticated():
        return HttpResponseRedirect(django_reverse('ucomment-admin-signin'))

    msg = call_sphinx_to_publish()

    # An empty message, msg, indicates no problems.  Any problems that may
    # have occurred have already been emailed and logged to the admin user.
    if msg:
        return HttpResponse(msg, status=404)

    # TODO(KGD):  Convert any changed images to JPG from PNG.
    #             Compile PDF here, or even earlier.
    #             Update search index tables in Sphinx search
    msg = 'PUBLISH: Update and publish operation successfully completed'
    log_file.info(msg)
    msg += ('<br><p>View your document <a href="%s">from this link</a>'
        '.</p>') % (django_reverse('ucomment-root'))
    return HttpResponse(msg, status=200)


def call_sphinx_to_publish():
    """ Does the work of publishing the latest version of the document.

    Pulls in the latest revision from the DVCS, publishes the document.
    """
    # TODO(KGD): can we show a list of changed files to the author before
    #            s/he clicks "Publish": you will have to dig into Sphinx's
    #            internals to see that.
    revision_changeset = update_local_repo()
    log_file.info('PUBLISH: the document with revision changeset = %s' % \
                   revision_changeset)

    # Copy over the ucomment extension to the local repo: that way the author
    # does not have to include it in his/her repo of the document.
    srcdir = os.path.join(conf.application_path, 'sphinx-extensions') + os.sep
    if os.name == 'posix':
        try:
            os.symlink(srcdir + 'ucomment-extension.py',
                   conf.local_repo_physical_dir+os.sep+'ucomment-extension.py')
        except OSError as err:
            if err.errno == 17: # File already exists
                pass
            else:
                UcommentError(err, ('When creating symlink for ucomment '
                                    'extension'))

    else:
        try:
            shutil.copy(srcdir + 'ucomment-extension.py',
                    conf.local_repo_physical_dir)
        except shutil.Error:
            UcommentError(err, ('When copying the ucomment extension - not '
                                'found'))

    # When Sphinx is called to compile the document, it is expected that the
    # document's ``conf.py`` has the correct path to the extensions.
    status = StringIO()
    warning = StringIO()

    # TODO(KGD): can we send this to the logfile instead of status and warning?
    #            will allow us to track compiling of large documents via logfile

    # TODO(KGD): investigate using http://ajaxpatterns.org/Periodic_Refresh
    # also see: http://www.ajaxprojects.com/ajax/tutorialdetails.php?itemid=9

    # The code below simulates the command-line call
    # $ sphinx-build -a -b pickle -d _build/doctrees . _build/pickle

    # The ``conf.local_repo_physical_dir``  must not have a trailing slash,
    # and must be "clean", otherwise lookups later with ``file_linkname_map``
    # will fail.
    conf.local_repo_physical_dir = os.path.abspath(conf.local_repo_physical_dir)
    build_dir = os.path.abspath(conf.local_repo_physical_dir+os.sep + '_build')
    ensuredir(build_dir)

    # TODO(KGD): make this setting a choice in the web before publishing
    # Note: FRESHENV: if True: we must delete all previous comment references,
    # to avoid an accumulation of references in the database.
    conf.use_freshenv = False
    try:
        app = Sphinx(srcdir=conf.local_repo_physical_dir,
                     confdir=conf.local_repo_physical_dir,
                     outdir = build_dir + os.sep + 'pickle',
                     doctreedir = build_dir + os.sep + 'doctrees',
                     buildername = 'pickle',
                     status = status,
                     warning = warning,
                     freshenv = conf.use_freshenv,
                     warningiserror = False,
                     tags = [])

        if app.builder.name != 'pickle':
            emsg = ('Please use the Sphinx "pickle" builder to compile the '
                    'RST files.')
            log_file.error(emsg)
            # TODO(KGD): return HttpResponse object still
            return

        # We also want to compile the documents using the text builder (search).
        # But rather than calling Sphinx from the start, just create a text
        # builder and run it right after the pickle builder.  Any drawbacks?
        text_builder_cls = getattr(__import__('sphinx.builders.text', None,
                                    None, ['TextBuilder']), 'TextBuilder')
        text_builder = text_builder_cls(app)
        pickle_builder = app.builder

        if 'ucomment' not in app.env.config:
            emsg = ('The document was not published: please ensure the '
                    "``ucomment`` dictionary appears in your document's"
                    '`conf.py`` file.')
            UcommentError(emsg)
            return emsg

        # Call the ``pickle`` builder
        app.env.config.ucomment['revision_changeset'] = revision_changeset
        app.env.config.ucomment['skip-cleanup'] = True
        app.build()

        # Log any warnings to the logfile.
        log_file.info('PUBLISH: Sphinx compiling HTML (pickle) successfully.')
        if warning.tell():
            warning.seek(0)
            for line in warning.readlines():
                log_file.warn('PUBLISH: ' + line)

        # Now switch to the text builder (to create the search index)
        app.env.config.ucomment['skip-cleanup'] = False
        app.builder = text_builder

        try:
            app.build()
        except SphinxError as e:
            log_file.warn(('PUBLISH: could not successfully publish the text-'
                           'based version of the document (used for searching).'
                           'Error reported = %s') % str(e))
            # TODO(KGD): defer clean-up to after RST files are used as search

        log_file.debug('PUBLISH: Sphinx compiling TEXT version successfully.')
        if warning.tell():
            warning.seek(0)
            for line in warning.readlines():
                log_file.warn('PUBLISH WARNING: ' + line.strip())

        # Switch back to the pickle builder (we need this when doing the
        # database commits)
        app.builder = pickle_builder

    except SphinxError as e:
        msg = 'A Sphinx error occurred (error type = %s): %s'  % \
            (e.category, str(e))
        log_file.error(msg)
        alert_system_admin(msg)
        return msg

    if app.statuscode == 0:
        commit_updated_document_to_database(app)
    else:
        log_file.error(('The Sphinx status code was non-zero.  Please check '
                        'lines in the log file above this one for more info.'))
    return ''


def commit_updated_document_to_database(app):
    """
    Two types of objects must be commited to the database to complete the
    publishing of the document:
        1. Each (web)pages in the document
        2. All the comment references

    We need to take some extra care with the comment references: remove unused
    comment references, find and take care of references that were orphaned
    (see below), and add new comment references.
    """
    sphinx_settings = app.env.config.ucomment

    # Used to convert ``class="reference internal"`` to
    # ``class="ucomment-internal-reference"``
    local_toc_re = re.compile(r'class="reference internal"')
    replace_toc = 'class="ucomment-internal-reference"'

    # First, generate a dictionary of page_name -> next_page_name
    # The first page = TOC = app.env.config.master_doc
    # The last page  has no next page link.
    # E.g" {'toc': 'page1', 'page2': 'page3', 'page1': 'page2', 'page3': None}
    all_files = app.env.found_docs
    document_order = {}
    for fname in list(all_files):
        is_toc = False
        if fname == app.env.config.master_doc:
            is_toc = True

        name = app.builder.outdir + os.sep + fname + app.builder.out_suffix
        try:
            f = file(name, 'r')
            page_info = pickle.load(f)
        except IOError:
            raise IOError('An IOError occurred when processing %s' % name)
        finally:
            f.close()

        # What is the page's HTML title?
        link_name = page_info['current_page_name']
        has_next = False
        for item in page_info['rellinks']:
            if item[3] == 'next':
                has_next = True
                next_section = item[0]
                break
        if not has_next:
            next_section = None

        document_order[link_name] = next_section

    # Next, order the pages.  The ``ordered_names`` list will grow in size
    # TODO(KGD): The problem comes from how the document is split in
    # "LVM" section index.rst
    page_names = document_order.keys()
    ordered_names = [app.env.config.master_doc]
    for idx in xrange(len(page_names)):
        if ordered_names[idx] is not None:
            ordered_names.append(document_order[ordered_names[idx]])
        else:
            break

    # The last ``None`` element designates the end of the document
    ordered_names.pop()
    # Check if there were docs not included in the toctree: add them at the end
    ordered_names.extend(set(page_names) - set(ordered_names))


    # Now commit each (web)page to the DB in order
    # ---------------------------------------------
    prior_pages = models.Page.objects.all()
    file_linkname_map = {}
    for fname in reversed(ordered_names):
        is_toc = is_chapter_index = False
        if fname in app.env.config.ucomment['toc_docs']:
            is_chapter_index = True
        if fname == app.env.config.master_doc:
            is_toc = True
            is_chapter_index = False

        name = app.builder.outdir + os.sep + fname + app.builder.out_suffix
        try:
            f = file(name, 'r')
            page_info = pickle.load(f)
        except IOError:
            raise IOError('An IOError occurred when processing %s' % name)
        finally:
            f.close()

        # Aim: get a text version of each page to generate a search index
        # Get the RST source code, clean it, and store that in the database.
        # TOC and chapter indicies are not to be indexed for the search engine.

        src = app.builder.srcdir + os.sep + fname + app.config.source_suffix
        try:
            unsplit_source_name = sphinx_settings['split_sources'][src]
        except KeyError:
            unsplit_source_name = src

        if is_toc or is_chapter_index:
            # Good side-effect: TOC pages will never show up in search results
            search_text = ''
        else:
            try:
                f = file(src, 'r')
                search_text = ''.join(f.readlines())
                search_text = sanitize_search_text(search_text)
            except IOError:
                raise IOError(('An IOError occurred when processing RST '
                               'source file: %s' % src))
            finally:
                f.close()

        # By what name will the HTML page be accessed?
        link_name = page_info['current_page_name']

        # Now get some link information to add to the page.  Not every
        # page has a parent; for those pages, set the parent to be the TOC.
        # The parent link for the TOC is the TOC
        try:
            parent_link, _ = models.Link.objects.get_or_create(
                link = page_info['parents'][0]['link'],
                title = page_info['parents'][0]['title'])
        except IndexError:
            # The highest level TOC does not have a parent: this is used
            # to correctly output the navigation bar.
            if is_toc:
                parent_link = None
            else:
                parent_link, _ = models.Link.objects.get_or_create(
                    link = u'../',
                    title = u'')

        try:
            next_link, _ = models.Link.objects.get_or_create(
                link = page_info['next']['link'],
                title = page_info['next']['title'])
        except TypeError:
            # Only the last section in the document won't have a next link
            next_link = None

        try:
            prev_link, _ = models.Link.objects.get_or_create(
                link = page_info['prev']['link'],
                title = page_info['prev']['title'])
        except TypeError:
            # Only the TOC won't have a previous link.  We rely on this fact to
            # filter the pages to locate the root TOC.
            if is_toc:
                prev_link = None
            else:
                # This is for pages that happened to be compiled, but don't
                # fall in the document structure.  For example, we have RST
                # files, but they were not included in any toctree, yet they
                # were compiled by Sphinx.
                prev_link, _ = models.Link.objects.get_or_create(
                    link = u'../',
                    title = u'')

            # While we are here, create a "root TOC" link with the appropriate
            # title: use the ``project`` setting from the Sphinx conf.py file.
            # The actual link will be determined on page request.
            models.Link.objects.get_or_create(link = '___TOC___',
                                              title = app.env.config.project)

        # Generate a "local" table of contents: useful for long pages; will not
        # be generated if there is only one subsection on the page, nor will
        # it be generated for pages that are primarily an index page.
        if is_toc or is_chapter_index:
            # Good side-effect: TOC pages will never show up in search results
            local_toc = ''
        else:
            local_toc = page_info['toc']
            local_toc, number = local_toc_re.subn(replace_toc, local_toc)
            if number == 1:
                local_toc = ''

            # TODO(KGD): take a look at the ``app.env.resolve_toctree`` function
            #            in Sphinx.

        # Use the Project's name for the master_doc (i.e. the main TOC page)
        # for the document.
        if is_toc and not(is_chapter_index):
            page_info['title'] = app.env.config.project

        # If a page with the same link (an unique field) is found, then update
        # the page.  Do not delete the page, because that will remove any
        # associated comments.  See the ``models.py`` file for ``Comment``
        # definition -- the ``Page`` objects are a ForeignKey.

        existing_page = prior_pages.filter(link_name=link_name)
        if existing_page:
            page = existing_page[0]
            if page.search_text.encode('utf-8') != search_text:
                # If the content has changed, only then change ``updated_on``
                page.updated_on = datetime.datetime.now()
            if is_toc or is_chapter_index:
                if page.body != '\n' + page_info['body'] + '\n':
                    page.updated_on = datetime.datetime.now()

            page.revision_changeset = sphinx_settings['revision_changeset']
            page.html_title = page_info['title']
            page.is_toc = is_toc or is_chapter_index
            page.source_name = unsplit_source_name
            page.PDF_file_name = 'STILL_TO_COME.pdf'
            page.body = '\n' + page_info['body'] + '\n'
            page.search_text = search_text
            page.parent_link = parent_link
            page.next_link = next_link
            page.prev_link = prev_link
            page.local_toc = local_toc
            page.save()
        else:
            defaults = {'revision_changeset': \
                                        sphinx_settings['revision_changeset'],
                        'link_name': link_name,
                        'html_title': page_info['title'],
                        'is_toc': is_toc or is_chapter_index,
                        'source_name': unsplit_source_name,
                        'PDF_file_name': 'STILL_TO_COME.pdf',
                        'number_of_HTML_visits': 0,
                        'body': '\n' + page_info['body'] + '\n',
                        'search_text': search_text,
                        'parent_link': parent_link,
                        'next_link': next_link,
                        'prev_link': prev_link,
                        'local_toc': local_toc,}
            created = models.Page.objects.create(**defaults)

        file_linkname_map[app.srcdir + os.sep + fname + \
                                     app.env.config.source_suffix] = link_name

    log_file.info('PUBLISH: pages saved to the database.')

    # Next, deal with the comment references
    # ---------------------------------------------
    to_update = []
    to_remove = []
    orphans = []
    prior_references = models.CommentReference.objects.all()

    # Only if we used a fresh environment.  Because then all the comment
    # references are regenerated.
    if conf.use_freshenv:
        for item in prior_references:
            orphans.append(item.comment_root)

    for item in sphinx_settings['comment_refs']:
        # First check whether this comment reference exists in the database;
        # If not, add it.  If it does exist, add it to the list of references
        # to update next.
        defaults={'revision_changeset': sphinx_settings['revision_changeset'],
                  'file_name': item.source,
                  'page_link_name': file_linkname_map[item.link_name],
                  'node_type': item.node,
                  'line_number': item.line,
                  'comment_root': item.root,
                  'comment_root_is_used': False}
        ref, created = models.CommentReference.objects.get_or_create(
            comment_root=item.root,  # comment_root is a unique field
            defaults=defaults)
        if not created:
            try:
                orphans.remove(item.root)
            except ValueError:
                pass
            to_update.append(item)

    # Update the references that already exist in the DB.  In most cases these
    # references are used as ForeignKeys in ``Comment`` objects.
    # The (very unusual) case when they don't exist in the DB is when the RST
    # repo is processed the first time and there happen to be ucomment
    # directives in the RST source.  In this case we would have created a
    # comment reference in the code above (using ``get_or_create()``)

    for item in to_update:
        ref = prior_references.filter(comment_root=item.root)[0]
        ref.revision_changeset = sphinx_settings['revision_changeset']
        ref.file_name = item.source
        ref.node_type = item.node
        ref.line_number = item.line
        ref.date_added = datetime.datetime.now()
        ref.save()
        # The above code is quite useful: if the author ever happens to move the
        # ucomment directives around, even to a different file, the comments
        # associated with that reference will still appear at the new location.

    # Orphans occur if the user removed the ucomment directive from the RST
    # source.
    # They are problematic only if they happen to have an associated ``Comment``
    # object in the database (which is expected, since a CommentReference is
    # created the same time ).
    for item in orphans[:]:
        # Remove comment references from the list that don't have comments.
        ref = prior_references.filter(comment_root=item)[0]
        if ref.comment_set.exists() == False:
            orphans.remove(item)
            to_remove.append(item)

    # It is safe to remove these references, because they do not have any
    # comments associated with them (CommentReference objects only appear as
    # ForeignKeys in ``Comment`` objects.
    for remove_root in to_remove:
        to_remove = prior_references.filter(comment_root=remove_root)
        if to_remove:
            to_remove[0].delete()

    for orphan_id in orphans:
        # These arise when comment references are removed from the text by the
        # author.  But, these references still have comments associated with
        # them in the database, but are not made available on any page,
        # nor do they have a valid comment reference.

        orphan = prior_references.filter(comment_root = orphan_id)[0]

        # Create an unreachable page (starts with '_')
        defaults = {'revision_changeset': sphinx_settings['revision_changeset'],
                    'link_name': '_orphans_',
                    'html_title': '_orphans_',
                    'is_toc': False,
                    'source_name': '_orphans_',
                    'PDF_file_name': '_orphans_',
                    'number_of_HTML_visits': 0,
                    'body': '_orphans_',
                    'search_text': '',
                    'parent_link': None,
                    'next_link': None,
                    'prev_link': None,
                    'local_toc': '',}
        orphan_page, created = models.Page.objects.get_or_create(
                                                        link_name='_orphans_',
                                                        defaults=defaults)
        # Create an comment reference that would not normally be created
        defaults = {'revision_changeset': '-1',
                    'file_name': '_orphans_',
                    'node_type': '_orphan_',
                    'line_number': 0,
                    'comment_root': '_ORFN_',
                    'comment_root_is_used': True}
        orphan_ref, created = models.CommentReference.objects.get_or_create(
                                                        comment_root='_ORFN_',
                                                        defaults=defaults)

        # Try to de-orphan any comments on subsequent republishing (author may
        # have realized the mistake and brought the node back).
        if orphan_id == '_ORFN_':

            # It's a little hard to go back from the orphaned comment to find
            # its original reference.  But we will use the fact the re-created
            # reference's comment_root will be the same as the orphaned
            # comment's parent (or at least the first ``conf.root_node_length``
            # characters of the parent).
            for comment in orphan.comment_set.all():
                former_parent = comment.parent[0:conf.root_node_length]
                new_parent = prior_references.filter(comment_root=former_parent)
                if len(new_parent):
                    comment.reference = new_parent[0]

                    # Now find the page on which that comment reference is used
                    all_pages = models.Page.objects.filter(
                                        link_name=new_parent[0].page_link_name)
                    if len(all_pages):
                        comment.page = all_pages[0]

                    comment.save()

                    log_file.warn(('PUBLISH: re-parented the orphan comment '
                                   'reference %s; now appears on page "%s".')%\
                                    (comment.reference.comment_root,
                                     new_parent[0].page_link_name))

        n_orphans = 0
        for comment in orphan.comment_set.all():
            comment.reference = orphan_ref
            comment.page = orphan_page
            comment.save()
            n_orphans += 1

            log_file.warn(('PUBLISH: dealt with comment reference orphan: %s; '
                           'was orphaned between revision %s (last known use) '
                           'and revision %s (current).  Has %d associated '
                           'comments.') % (orphan_id, orphan.revision_changeset,
                           sphinx_settings['revision_changeset'], n_orphans))

# Dumping and loading fixtures
# ----------------------------
def dump_relevent_fixtures(request):
    """
    Dumps certain model types to fixture files.
    """
    if not request.user.is_authenticated():
        return HttpResponseRedirect(django_reverse('ucomment-admin-signin'))

    style = 'xml'

    fixtures = (    (models.Comment, 'Comment.'+style),
                    (models.CommentReference, 'CommentReference.'+style),
                    (models.CommentPoster, 'CommentPoster.'+style),
                    (models.Hit, 'Hit.'+style),
                )
    log_file.debug('FIXTURES: About to dump fixtures to file.')
    for model, filename in fixtures:
        data = serializers.serialize(style, model.objects.all(), indent=2,
                                     use_natural_keys = True)

        try:
            ensuredir(conf.django_fixtures_dir)
            full_filename = os.path.join(conf.django_fixtures_dir, filename)
            f = file(full_filename, 'w')
            try:
                f.write(data)
            finally:
                f.close()
            log_file.info('FIXTURES: Dumped %s objects to %s' %\
                           (model, full_filename))
        except IOError:
            return HttpResponse(('An IOError occurred while writing %s '
                                 'objects to the fixture file: %s' % \
                                 (model, filename)), status=200)

    return HttpResponse(('All fixtures successfully saved to %s' % \
                         conf.django_fixtures_dir), status=200)

def load_from_fixtures(request):
    pass
    #TODO(KGD): still to come

# Searching the document text
# ---------------------------
def sanitize_search_text(text):
    """
    Cleans the RST source code to remove:

    * .. ucomment:: directives
    * Underlines for headings: e.g. "-----"
    * inline math roles :math:`....` (leaves the part inside the role beind
    * cross-references: e.g.  .. _my-cross-reference:
    * .. rubric:: XYZ is left just as XYZ

    TODO(KGD):
    * table_row = re.compile(r'(={2,})|(\+-{2,})')
    * figure and image directives; and the options that go with them

    """
    text_list = text.split('\n')
    ucomment_lines = re.compile(r'^\s*\.\. ucomment::\s*(.*?):')
    VALID_TITLES = ['!', '"', '#', '$', '%', "'", '(', ')', '*', '+',
                    ',', '-', '.', '/', ':', ';', '<', '=', '>', '?',
                    '@', '[', '\\', ']', '^', '_', '`', r'{', '|',
                    '}', '~']

    div_re_str = r'^'
    for entry in VALID_TITLES:
        if entry in ['^', '*', '+', '$', '(', ')', '-', '.', '?', '[', ']',
                      '\\', '{', '}', '|']:
            entry = '\\' + entry

        div_re_str += entry + r'{3,}|'
    div_re_str = div_re_str[0:-1] + '$'
    div_re = re.compile(div_re_str)

    crossref_re = re.compile(r'^\s*\.\. _(.*?):')
    rubric_re = re.compile(r'^\s*(\.\. rubric:: )(.*?)')

    # Remove the :math:`...` part, leaving only the ... portion behind.
    math_role = re.compile(r'(:math:`)(.*?)(`)')

    out = []
    for line in text_list:
        line = math_role.sub('\g<2>', line)
        line = rubric_re.sub('\g<2>', line)
        out.append(line)
        if ucomment_lines.match(line):
            out.pop()
            continue
        if div_re.match(line):
            out.pop()
            continue
        if crossref_re.match(line):
            out.pop()
            continue

    return '\n'.join(out)


def format_search_pages_for_web(pages, context, with_case):
    """
    Receives a dictionary.  The keys are ``Page`` objects, and the corresponding
    values are the list of words that appear on that page.

    Will format these into appropriate HTML string output that is sent to the
    user.

    Uses ``context`` number of characters around the search word to display
    in the results.

    The ``with_case`` input will either ``True`` (indicating case-sensitive
    search was requested), or ``False``.
    """
    def common_span(spanA, spanB):
        """
        Takes two tuples (containing 2 integers) and returns the largest region
        spanned, provided they overlap.  If no overlap, then return two spans
        exactly as they were received.

        Can be improved later on to avoid really large spans by taking
        some sort of compromised span.
        """
        # First sort them
        swapped = False
        if spanA[0] > spanB[0]:
            spanA, spanB = spanB, spanA
            swapped = True
        # If they overlap, return the intersection of the spans
        if spanA[1] > spanB[0]:
            return ((spanA[0], max(spanA[1], spanB[1])), )
        elif swapped:
            return (spanB, spanA)
        else:
            return (spanA, spanB)

    def clean_search_output_for_web(text):
        """
        Returns a cleaned version of ``text`` suitable for display in a browser.
        * Removes newline markers
        * Converts the string to web-safe output
        """
        text = re.sub('\n', ' ', text)
        #text = html.strip_tags(text)
        text = django_html.escape(text)
        # replace('&', '&amp;')
        #.replace('<', '&lt;')
        #.replace('>', '&gt;')
        #.replace('"', '&quot;')
        #.replace("'", '&#39;'))
        return text


    results = defaultdict(list)
    page_counts = {}
    n_pages = 0
    for page, search_words in pages.iteritems():
        # For each word, find the first N (use N=3) appearances in the text.
        # Get the context to the left and right of the word, store the ranges
        # in a list.
        page_text = page.search_text
        maxlen = len(page_text)
        N_instances = 3
        all_spans = []
        page_counts[page] = 0
        for word in search_words:
            # Find these words, ensuring they are whole words only, where the
            # definition is locale (re.L) and unicode (re.U) dependent.
            if with_case:
                word_iter = re.finditer(r'\b' + word + r'\b', page_text,
                                        re.L + re.U)
            else:
                word_iter = re.finditer(r'\b' + word + r'\b' , page_text,
                                        re.I + re.L + re.U)
            for idx, reobj in enumerate(word_iter):
                if idx >= N_instances:
                    # How many entries were there ?
                    page_counts[page] += len(list(word_iter)) + 1
                    break
                span = reobj.span()
                span = (max(0, span[0]-context), min(maxlen, span[1]+context))
                all_spans.append(span)
                page_counts[page] += 1

        # We don't always find the text (i.e. a false result) when using sqlite
        # databases and requesting a case-sensitive search. Just skip over these
        # pages.
        if len(all_spans) == 0:
            continue

        # Now combine all the ranges together, consolidating overlapping regions
        final_spans = {all_spans[0]: None}  # Use a dict to ensure unique spans
        for span in all_spans[1:]:
            keys = final_spans.keys()
            for key in keys:
                overlap = common_span(key, span)
                if len(overlap) == 1:
                    del final_spans[key]
                    final_spans[overlap[0]] = None
                else:
                    final_spans[overlap[1]] = None

        # Extract the text within the range
        display = ''
        startend = '...'
        for span in sorted(final_spans.keys()):
            display += startend + page_text[span[0]:span[1]]
        display += startend

        # Clean-up the text for web rendering
        display = clean_search_output_for_web(display)

        # Finally, highlight the search terms inside ``<span>`` brackets
        for word in search_words:
            if with_case:
                word_re = re.compile(r'(%s)'%word)
            else:
                word_re = re.compile(r'(%s)'%word, re.I)
            display = word_re.sub(\
                          r'<span id="ucomment-search-term">\1</span>', display)


        results[page].append('<li><a href="%s">%s</a>' % (\
                            django_reverse('ucomment-root') + page.link_name +\
                            '/?highlight=' + ' '.join(search_words) + \
                            '&with_case=' + str(with_case), page.html_title))
        if page_counts[page] > 1:
            results[page].append(('<span id="ucomment-search-count">'
                                '[%d times]</span>') % page_counts[page])
        else:
            results[page].append(('<span id="ucomment-search-count">'
                                '[%d time]</span>') % page_counts[page])
        results[page].append('<div id="ucomment-search-result-context">')
        results[page].append('%s</div></li>' % display)
        n_pages += 1

    resp = ['<div id=ucomment-search-results>\n<h2>Search results</h2>']
    if n_pages == 0:
        resp.append('<p>There were no pages matching your query.</p></div>')
        return ''.join(resp)
    elif n_pages == 1:
        resp.append(('Found 1 page matching your search query.'))
    else:
        resp.append(('Found %d pages matching your search query.')%len(pages))

    # How to sort by relevance: crude metric is to sort the pages in order
    # of number of counts from high to low.
    out = sorted(zip(page_counts.values(), page_counts.keys()), reverse=True)
    entries = []
    for item in out:
        # access the dictionary by ``page`` and get the contextual output string
        entries.append('\n'.join(results[item[1]]))

    resp.append('\n\t<ul>\n' + '\t\t\n'.join(entries) + '\t</ul>\n</div>')
    return ''.join(resp)

def search_document(request, search_terms='', search_type='AND',
                       with_case=False):
    """ Will search the document for words within the string ``search_terms``.

    The results will be returned as hyperlinks containing ``CONTEXT`` number of
    characters around the search terms.

    By default and "OR"-based search is performed, so that pages containing
    one or more of the search terms will be returned.  The other alternative is
    to use ``search_type`` as "AND", requiring that all search terms must
    appear on the page.

    By default the search is case-insensitive (``with_case`` is False).
    """
    CONTEXT = 90   # characters around the search term
    if request.method == 'GET':
        search = str(search_terms)
        search_type = str(search_type).strip('/').upper()
        if search_type == '':
            search_type = 'AND'
        with_case = str(with_case).strip('/').lower()=='true'
        if with_case == '':
            with_case = False

    elif request.method == 'POST':
        # This seemingly messy code redirects back to a GET request so that
        # the user can see how the search URL is formed.  Search can be done
        # from the URL: e.g. ``_search/python guido/AND/case=False`` at the end
        # of the URL will search for "python" AND "guido" ignoring case.
        search = request.POST['search_terms']
        search_type = str(request.POST.get('search_type', search_type)).upper()
        with_case = str(request.POST.get('with_case',
                                                   with_case)).lower()=='true'
        return HttpResponseRedirect(\
                    django_reverse('ucomment-search-document') + \
                    search +'/'+ search_type +'/'+ 'case=' + str(with_case))

    start_time = time.time()
    search_for = search.split()

    results = defaultdict(list)
    n_search_words = 0
    for word in search_for:
        # Filter out certain stop words
        if word not in STOP_WORDS:
            if with_case:
                pages = models.Page.objects.filter(search_text__icontains=word)
            else:
                pages = models.Page.objects.filter(
                                           search_text__icontains=word.lower())
            n_search_words += 1
            for page in pages:
                results[page].append(word)

    # If it's an "or" search then we simply display all pages that appear as
    # keys in ``results``.  For an "AND" search, we only display pages that
    # have ``n_search_words`` entries in the corresponding dictionary value.
    if search_type == 'AND':
        out = {}
        for page, found_words in results.iteritems():
            if len(found_words) == n_search_words:
                out[page] = found_words
        results = out

    web_output = format_search_pages_for_web(results, CONTEXT, with_case)

    # Create a psuedo-"Page" object containing the search results and return
    # that to the user.  It is infact a named tuple, which has the same
    # behaviour as a ``Page`` object
    page = namedtuple('Page', ('revision_changeset next_link prev_link sidebar '
                      'parent_link html_title body local_toc link_name is_toc '
                      'number_of_HTML_visits updated_on'))
    search_output = page(revision_changeset='',
                         next_link = None,
                         prev_link = None,
                         parent_link = models.Link(),
                         html_title = 'Search results',
                         body = web_output,
                         local_toc = '',
                         is_toc = True, # prevents sidebar
                         sidebar = '',  # but still set it to empty
                         number_of_HTML_visits = 0,
                         updated_on = datetime.datetime.now(),
                         link_name = request.path.lstrip(\
                                       django_reverse('ucomment-root')[0:-1]))

    log_file.info('SEARCH: "%s" :: took %f secs' % (search,
                                                     time.time() - start_time))
    return render_page_for_web(search_output, request, search)

def admin_signin(request):
    """
    Perform administrator/author features for the application.

    * (Re)publish the document to the web
    * Dump all fixtures to disk - for backup purposes.
    """
    if request.user.is_authenticated():
        msg = ('<ul>'
               '<li><a href="%s">Table of Contents for your document</a>'
               '<li><a href="%s">The Django admin page for your site</a>'
               '<li><a href="%s">Publish or update the document</a>'
               '<li>Backup your application by <a href="%s">dumping objects '
               'to fixtures</a>') % \
               (django_reverse('ucomment-root'),
                django_reverse('admin:index'),
                django_reverse('ucomment-publish-update-document'),
                django_reverse('ucomment-dump-fixtures'))
        return HttpResponse(msg, status=200)
    elif request.method == 'GET':
        log_file.info('Entering the admin section; IP = %s' % \
                                                (get_IP_address(request)))
        context = {}
        context.update(csrf(request))
        msg = ( '<p>Please sign-in first with your Django (admin) credentials:'
                r'<form action="%s" method="POST">{%% csrf_token %%}'
                '<label for="username">Username:</label>'
                '    <input type="text" name="username" /><br />'
                '<label for="password">Password:</label>'
                '    <input type="password" name="password" />'
                '<input type="submit" value="Log in"></form>') % (request.path)
        resp = template.Template(msg)
        html_body = resp.render(template.Context(context))
        return HttpResponse(html_body, status=200)

    elif request.method == 'POST':
        username = request.POST.get('username', '')
        password = request.POST.get('password', '')
        user = django_auth.authenticate(username=username, password=password)
        if user is not None and user.is_active:
            django_auth.login(request, user)
            return HttpResponseRedirect(request.path)
        else:
            # TODO(KGD): respond that user does not exist, or that password
            #            is incorrect;  redirect to log in page again.
            return HttpResponseRedirect(request.path)