asciidoc.py - Program constants. Allowed substitution optio…

/asciidoc-8.4.5/asciidoc.py

# · Python · 5315 lines · 4939 code · 65 blank · 311 comment · 538 complexity · 3d8c49106162df3e11f65d3c3dd23451 MD5 · raw file
Large files are truncated click here to view the full file

#!/usr/bin/env python
"""
asciidoc - converts an AsciiDoc text file to DocBook, HTML or LinuxDoc

Copyright (C) 2002-2009 Stuart Rackham. Free use of this software is granted
under the terms of the GNU General Public License (GPL).
"""

import sys, os, re, time, traceback, tempfile, subprocess, codecs, locale

### Used by asciidocapi.py ###
VERSION = '8.4.5'   # See CHANGLOG file for version history.

MIN_PYTHON_VERSION = 2.4   # Require this version of Python or better.

#---------------------------------------------------------------------------
# Program constants.
#---------------------------------------------------------------------------
DEFAULT_BACKEND = 'xhtml11'
DEFAULT_DOCTYPE = 'article'
# Allowed substitution options for List, Paragraph and DelimitedBlock
# definition subs entry.
SUBS_OPTIONS = ('specialcharacters','quotes','specialwords',
    'replacements', 'attributes','macros','callouts','normal','verbatim',
    'none','replacements2')
# Default value for unspecified subs and presubs configuration file entries.
SUBS_NORMAL = ('specialcharacters','quotes','attributes',
    'specialwords','replacements','macros','replacements2')
SUBS_VERBATIM = ('specialcharacters','callouts')

NAME_RE = r'(?u)[^\W\d][-\w]*'  # Valid section or attrbibute name.


#---------------------------------------------------------------------------
# Utility functions and classes.
#---------------------------------------------------------------------------

class EAsciiDoc(Exception): pass

class OrderedDict(dict):
    """
    Dictionary ordered by insertion order.
    Python Cookbook: Ordered Dictionary, Submitter: David Benjamin.
    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
    """
    def __init__(self, d=None, **kwargs):
        self._keys = []
        if d is None: d = kwargs
        dict.__init__(self, d)
    def __delitem__(self, key):
        dict.__delitem__(self, key)
        self._keys.remove(key)
    def __setitem__(self, key, item):
        dict.__setitem__(self, key, item)
        if key not in self._keys: self._keys.append(key)
    def clear(self):
        dict.clear(self)
        self._keys = []
    def copy(self):
        d = dict.copy(self)
        d._keys = self._keys[:]
        return d
    def items(self):
        return zip(self._keys, self.values())
    def keys(self):
        return self._keys
    def popitem(self):
        try:
            key = self._keys[-1]
        except IndexError:
            raise KeyError('dictionary is empty')
        val = self[key]
        del self[key]
        return (key, val)
    def setdefault(self, key, failobj = None):
        dict.setdefault(self, key, failobj)
        if key not in self._keys: self._keys.append(key)
    def update(self, d=None, **kwargs):
        if d is None:
            d = kwargs
        dict.update(self, d)
        for key in d.keys():
            if key not in self._keys: self._keys.append(key)
    def values(self):
        return map(self.get, self._keys)

class AttrDict(dict):
    """
    Like a dictionary except values can be accessed as attributes i.e. obj.foo
    can be used in addition to obj['foo'].
    If an item is not present None is returned.
    """
    def __getattr__(self, key):
        try: return self[key]
        except KeyError, k: return None
    def __setattr__(self, key, value):
        self[key] = value
    def __delattr__(self, key):
        try: del self[key]
        except KeyError, k: raise AttributeError, k
    def __repr__(self):
        return '<AttrDict ' + dict.__repr__(self) + '>'
    def __getstate__(self):
        return dict(self)
    def __setstate__(self,value):
        for k,v in value.items(): self[k]=v

class Trace(object):
    """
    Used in conjunction with the 'trace' attribute to generate diagnostic
    output. There is a single global instance of this class named trace.
    """
    SUBS_NAMES = ('specialcharacters','quotes','specialwords',
                  'replacements', 'attributes','macros','callouts',
                  'replacements2')
    def __init__(self):
        self.name_re = ''        # Regexp pattern to match trace names.
        self.linenos = True
        self.offset = 0
    def __call__(self, name, before, after=None):
        """
        Print trace message if tracing is on and the trace 'name' matches the
        document 'trace' attribute (treated as a regexp).
        The 'before' and 'after' messages are only printed if they differ.
        """
        name_re = document.attributes.get('trace')
        if name_re == 'subs':    # Alias for all the inline substitutions.
            name_re = '|'.join(self.SUBS_NAMES)
        self.name_re = name_re
        if self.name_re is not None:
            msg = message.format(name, 'TRACE: ', self.linenos, offset=self.offset)
            if before != after and re.match(self.name_re,name):
                if is_array(before):
                    before = '\n'.join(before)
                if after is None:
                    msg += '\n%s\n' % before
                else:
                    if is_array(after):
                        after = '\n'.join(after)
                    msg += '\n<<<\n%s\n>>>\n%s\n' % (before,after)
                message.stderr(msg)

class Message:
    """
    Message functions.
    """
    def __init__(self):
        self.linenos = None     # Used to globally override line numbers.
        self.messages = []

    def stderr(self,line=''):
        self.messages.append(line)
        if __name__ == '__main__':
            sys.stderr.write(line+os.linesep)

    def verbose(self, msg,linenos=True):
        if config.verbose:
            msg = self.format(msg,linenos=linenos)
            self.stderr(msg)

    def warning(self, msg,linenos=True,offset=0):
        msg = self.format(msg,'WARNING: ',linenos,offset=offset)
        document.has_warnings = True
        self.stderr(msg)

    def deprecated(self, msg, linenos=True):
        msg = self.format(msg, 'DEPRECATED: ', linenos)
        self.stderr(msg)

    def format(self, msg, prefix='', linenos=True, cursor=None, offset=0):
        """Return formatted message string."""
        if self.linenos is not False and ((linenos or self.linenos) and reader.cursor):
            if cursor is None:
                cursor = reader.cursor
            prefix += '%s: line %d: ' % (os.path.basename(cursor[0]),cursor[1]+offset)
        return prefix + msg

    def error(self, msg, cursor=None, halt=False):
        """
        Report fatal error.
        If halt=True raise EAsciiDoc exception.
        If halt=False don't exit application, continue in the hope of reporting
        all fatal errors finishing with a non-zero exit code.
        """
        if halt:
            raise EAsciiDoc, self.format(msg,linenos=False,cursor=cursor)
        else:
            msg = self.format(msg,'ERROR: ',cursor=cursor)
            self.stderr(msg)
            document.has_errors = True

    def unsafe(self, msg):
        self.error('unsafe: '+msg)


def file_in(fname, directory):
    """Return True if file fname resides inside directory."""
    assert os.path.isfile(fname)
    # Empty directory (not to be confused with None) is the current directory.
    if directory == '':
        directory = os.getcwd()
    else:
        assert os.path.isdir(directory)
        directory = os.path.realpath(directory)
    fname = os.path.realpath(fname)
    return os.path.commonprefix((directory, fname)) == directory

def safe():
    return document.safe

def is_safe_file(fname, directory=None):
    # A safe file must reside in directory directory (defaults to the source
    # file directory).
    if directory is None:
        if document.infile == '<stdin>':
           return not safe()
        directory = os.path.dirname(document.infile)
    elif directory == '':
        directory = '.'
    return not safe() or file_in(fname, directory)

def safe_filename(fname, parentdir):
    """
    Return file name which must reside in the parent file directory.
    Return None if file is not found or not safe.
    """
    if not os.path.isabs(fname):
        # Include files are relative to parent document
        # directory.
        fname = os.path.join(parentdir,fname)
    if not os.path.isfile(fname):
        message.warning('include file not found: %s' % fname)
        return None
    if not is_safe_file(fname, parentdir):
        message.unsafe('include file: %s' % fname)
        return None
    return fname

def assign(dst,src):
    """Assign all attributes from 'src' object to 'dst' object."""
    for a,v in src.__dict__.items():
        setattr(dst,a,v)

def strip_quotes(s):
    """Trim white space and, if necessary, quote characters from s."""
    s = s.strip()
    # Strip quotation mark characters from quoted strings.
    if len(s) >= 3 and s[0] == '"' and s[-1] == '"':
        s = s[1:-1]
    return s

def is_re(s):
    """Return True if s is a valid regular expression else return False."""
    try: re.compile(s)
    except: return False
    else: return True

def re_join(relist):
    """Join list of regular expressions re1,re2,... to single regular
    expression (re1)|(re2)|..."""
    if len(relist) == 0:
        return None
    result = []
    # Delete named groups to avoid ambiguity.
    for s in relist:
        result.append(re.sub(r'\?P<\S+?>','',s))
    result = ')|('.join(result)
    result = '('+result+')'
    return result

def validate(value,rule,errmsg):
    """Validate value against rule expression. Throw EAsciiDoc exception with
    errmsg if validation fails."""
    try:
        if not eval(rule.replace('$',str(value))):
            raise EAsciiDoc,errmsg
    except Exception:
        raise EAsciiDoc,errmsg
    return value

def lstrip_list(s):
    """
    Return list with empty items from start of list removed.
    """
    for i in range(len(s)):
        if s[i]: break
    else:
        return []
    return s[i:]

def rstrip_list(s):
    """
    Return list with empty items from end of list removed.
    """
    for i in range(len(s)-1,-1,-1):
        if s[i]: break
    else:
        return []
    return s[:i+1]

def strip_list(s):
    """
    Return list with empty items from start and end of list removed.
    """
    s = lstrip_list(s)
    s = rstrip_list(s)
    return s

def is_array(obj):
    """
    Return True if object is list or tuple type.
    """
    return isinstance(obj,list) or isinstance(obj,tuple)

def dovetail(lines1, lines2):
    """
    Append list or tuple of strings 'lines2' to list 'lines1'.  Join the last
    non-blank item in 'lines1' with the first non-blank item in 'lines2' into a
    single string.
    """
    assert is_array(lines1)
    assert is_array(lines2)
    lines1 = strip_list(lines1)
    lines2 = strip_list(lines2)
    if not lines1 or not lines2:
        return list(lines1) + list(lines2)
    result = list(lines1[:-1])
    result.append(lines1[-1] + lines2[0])
    result += list(lines2[1:])
    return result

def dovetail_tags(stag,content,etag):
    """Merge the end tag with the first content line and the last
    content line with the end tag. This ensures verbatim elements don't
    include extraneous opening and closing line breaks."""
    return dovetail(dovetail(stag,content), etag)

def parse_attributes(attrs,dict):
    """Update a dictionary with name/value attributes from the attrs string.
    The attrs string is a comma separated list of values and keyword name=value
    pairs. Values must preceed keywords and are named '1','2'... The entire
    attributes list is named '0'. If keywords are specified string values must
    be quoted. Examples:

    attrs: ''
    dict: {}

    attrs: 'hello,world'
    dict: {'2': 'world', '0': 'hello,world', '1': 'hello'}

    attrs: '"hello", planet="earth"'
    dict: {'planet': 'earth', '0': '"hello",planet="earth"', '1': 'hello'}
    """
    def f(*args,**keywords):
        # Name and add aguments '1','2'... to keywords.
        for i in range(len(args)):
            if not str(i+1) in keywords:
                keywords[str(i+1)] = args[i]
        return keywords

    if not attrs:
        return
    dict['0'] = attrs
    # Replace line separators with spaces so line spanning works.
    s = re.sub(r'\s', ' ', attrs)
    try:
        d = eval('f('+s+')')
        # Attributes must evaluate to strings, numbers or None.
        for v in d.values():
            if not (isinstance(v,str) or isinstance(v,int) or isinstance(v,float) or v is None):
                raise
    except Exception:
        s = s.replace('"','\\"')
        s = s.split(',')
        s = map(lambda x: '"' + x.strip() + '"', s)
        s = ','.join(s)
        try:
            d = eval('f('+s+')')
        except Exception:
            return  # If there's a syntax error leave with {0}=attrs.
        for k in d.keys():  # Drop any empty positional arguments.
            if d[k] == '': del d[k]
    dict.update(d)
    assert len(d) > 0

def parse_named_attributes(s,attrs):
    """Update a attrs dictionary with name="value" attributes from the s string.
    Returns False if invalid syntax.
    Example:
    attrs: 'star="sun",planet="earth"'
    dict: {'planet':'earth', 'star':'sun'}
    """
    def f(**keywords): return keywords

    try:
        d = eval('f('+s+')')
        attrs.update(d)
        return True
    except Exception:
        return False

def parse_list(s):
    """Parse comma separated string of Python literals. Return a tuple of of
    parsed values."""
    try:
        result = eval('tuple(['+s+'])')
    except Exception:
        raise EAsciiDoc,'malformed list: '+s
    return result

def parse_options(options,allowed,errmsg):
    """Parse comma separated string of unquoted option names and return as a
    tuple of valid options. 'allowed' is a list of allowed option values.
    If allowed=() then all legitimate names are allowed.
    'errmsg' is an error message prefix if an illegal option error is thrown."""
    result = []
    if options:
        for s in re.split(r'\s*,\s*',options):
            if (allowed and s not in allowed) or not is_name(s):
                raise EAsciiDoc,'%s: %s' % (errmsg,s)
            result.append(s)
    return tuple(result)

def symbolize(s):
    """Drop non-symbol characters and convert to lowercase."""
    return re.sub(r'(?u)[^\w\-_]', '', s).lower()

def is_name(s):
    """Return True if s is valid attribute, macro or tag name
    (starts with alpha containing alphanumeric and dashes only)."""
    return re.match(r'^'+NAME_RE+r'$',s) is not None

def subs_quotes(text):
    """Quoted text is marked up and the resulting text is
    returned."""
    keys = config.quotes.keys()
    for q in keys:
        i = q.find('|')
        if i != -1 and q != '|' and q != '||':
            lq = q[:i]      # Left quote.
            rq = q[i+1:]    # Right quote.
        else:
            lq = rq = q
        tag = config.quotes[q]
        # Unconstrained quotes prefix the tag name with a hash.
        if tag[0] == '#':
            tag = tag[1:]
            # Unconstrained quotes can appear anywhere.
            reo = re.compile(r'(?msu)(^|.)(\[(?P<attrlist>[^[\]]+?)\])?' \
                    + r'(?:' + re.escape(lq) + r')' \
                    + r'(?P<content>.+?)(?:'+re.escape(rq)+r')')
        else:
            # The text within constrained quotes must be bounded by white space.
            # Non-word (\W) characters are allowed at boundaries to accomodate
            # enveloping quotes.
            reo = re.compile(r'(?msu)(^|\W)(\[(?P<attrlist>[^[\]]+?)\])?' \
                + r'(?:' + re.escape(lq) + r')' \
                + r'(?P<content>\S|\S.*?\S)(?:'+re.escape(rq)+r')(?=\W|$)')
        pos = 0
        while True:
            mo = reo.search(text,pos)
            if not mo: break
            if text[mo.start()] == '\\':
                # Delete leading backslash.
                text = text[:mo.start()] + text[mo.start()+1:]
                # Skip past start of match.
                pos = mo.start() + 1
            else:
                attrlist = {}
                parse_attributes(mo.group('attrlist'), attrlist)
                stag,etag = config.tag(tag, attrlist)
                s = mo.group(1) + stag + mo.group('content') + etag
                text = text[:mo.start()] + s + text[mo.end():]
                pos = mo.start() + len(s)
    return text

def subs_tag(tag,dict={}):
    """Perform attribute substitution and split tag string returning start, end
    tag tuple (c.f. Config.tag())."""
    if not tag:
        return [None,None]
    s = subs_attrs(tag,dict)
    if not s:
        message.warning('tag \'%s\' dropped: contains undefined attribute' % tag)
        return [None,None]
    result = s.split('|')
    if len(result) == 1:
        return result+[None]
    elif len(result) == 2:
        return result
    else:
        raise EAsciiDoc,'malformed tag: %s' % tag

def parse_entry(entry, dict=None, unquote=False, unique_values=False,
        allow_name_only=False, escape_delimiter=True):
    """Parse name=value entry to dictionary 'dict'. Return tuple (name,value)
    or None if illegal entry.
    If name= then value is set to ''.
    If name and allow_name_only=True then value is set to ''.
    If name! and allow_name_only=True then value is set to None.
    Leading and trailing white space is striped from 'name' and 'value'.
    'name' can contain any printable characters.
    If the '=' delimiter character is allowed in  the 'name' then
    it must be escaped with a backslash and escape_delimiter must be True.
    If 'unquote' is True leading and trailing double-quotes are stripped from
    'name' and 'value'.
    If unique_values' is True then dictionary entries with the same value are
    removed before the parsed entry is added."""
    if escape_delimiter:
        mo = re.search(r'(?:[^\\](=))',entry)
    else:
        mo = re.search(r'(=)',entry)
    if mo:  # name=value entry.
        if mo.group(1):
            name = entry[:mo.start(1)]
            if escape_delimiter:
                name = name.replace(r'\=','=')  # Unescape \= in name.
            value = entry[mo.end(1):]
    elif allow_name_only and entry:         # name or name! entry.
        name = entry
        if name[-1] == '!':
            name = name[:-1]
            value = None
        else:
            value = ''
    else:
        return None
    if unquote:
        name = strip_quotes(name)
        if value is not None:
            value = strip_quotes(value)
    else:
        name = name.strip()
        if value is not None:
            value = value.strip()
    if not name:
        return None
    if dict is not None:
        if unique_values:
            for k,v in dict.items():
                if v == value: del dict[k]
        dict[name] = value
    return name,value

def parse_entries(entries, dict, unquote=False, unique_values=False,
        allow_name_only=False,escape_delimiter=True):
    """Parse name=value entries from  from lines of text in 'entries' into
    dictionary 'dict'. Blank lines are skipped."""
    entries = config.expand_templates(entries)
    for entry in entries:
        if entry and not parse_entry(entry, dict, unquote, unique_values,
                allow_name_only, escape_delimiter):
            raise EAsciiDoc,'malformed section entry: %s' % entry

def load_conf_file(sections, fname, dir, namepat=NAME_RE):
    """Loads sections dictionary with sections from file fname.
    Existing sections are overlaid. Silently skips missing configuration
    files."""
    if dir:
        fname = os.path.join(dir, fname)
    # Sliently skip missing configuration file.
    if not os.path.isfile(fname):
        return
    reo = re.compile(r'^\[(?P<section>'+namepat+')\]\s*$')
    section,contents = '',[]
    for line in open(fname):
        if line and line[0] == '#': # Skip comment lines.
            continue
        line = line.rstrip()
        found = reo.findall(line)
        if found:
            if section:             # Store previous section.
                sections[section] = contents
            section = found[0].lower()
            contents = []
        else:
            contents.append(line)
    if section and contents:        # Store last section.
        sections[section] = contents

def dump_section(name,dict,f=sys.stdout):
    """Write parameters in 'dict' as in configuration file section format with
    section 'name'."""
    f.write('[%s]%s' % (name,writer.newline))
    for k,v in dict.items():
        k = str(k)
        k = k.replace('=',r'\=')    # Escape = in name.
        # Quote if necessary.
        if len(k) != len(k.strip()):
            k = '"'+k+'"'
        if v and len(v) != len(v.strip()):
            v = '"'+v+'"'
        if v is None:
            # Don't dump undefined attributes.
            continue
        else:
            s = k+'='+v
        if s[0] == '#':
            s = '\\' + s    # Escape so not treated as comment lines.
        f.write('%s%s' % (s,writer.newline))
    f.write(writer.newline)

def update_attrs(attrs,dict):
    """Update 'attrs' dictionary with parsed attributes in dictionary 'dict'."""
    for k,v in dict.items():
        if not is_name(k):
            raise EAsciiDoc,'illegal attribute name: %s' % k
        attrs[k] = v

def filter_lines(filter_cmd, lines, attrs={}):
    """
    Run 'lines' through the 'filter_cmd' shell command and return the result.
    The 'attrs' dictionary contains additional filter attributes.
    """
    def findfilter(name,dir,filter):
        """Find filter file 'fname' with style name 'name' in directory
        'dir'. Return found file path or None if not found."""
        if name:
            result = os.path.join(dir,'filters',name,filter)
            if os.path.isfile(result):
                return result
        result = os.path.join(dir,'filters',filter)
        if os.path.isfile(result):
            return result
        return None

    # Return input lines if there's not filter.
    if not filter_cmd or not filter_cmd.strip():
        return lines
    # Perform attributes substitution on the filter command.
    s = subs_attrs(filter_cmd, attrs)
    if not s:
        raise EAsciiDoc,'undefined filter attribute in command: %s' % filter_cmd
    filter_cmd = s.strip()
    # Parse for quoted and unquoted command and command tail.
    # Double quoted.
    mo = re.match(r'^"(?P<cmd>[^"]+)"(?P<tail>.*)$', filter_cmd)
    if not mo:
        # Single quoted.
        mo = re.match(r"^'(?P<cmd>[^']+)'(?P<tail>.*)$", filter_cmd)
        if not mo:
            # Unquoted catch all.
            mo = re.match(r'^(?P<cmd>\S+)(?P<tail>.*)$', filter_cmd)
    cmd = mo.group('cmd').strip()
    found = None
    if not os.path.dirname(cmd):
        # Filter command has no directory path so search filter directories.
        filtername = attrs.get('style')
        if USER_DIR:
            found = findfilter(filtername, USER_DIR, cmd)
        if not found:
            found = findfilter(filtername, CONF_DIR, cmd)
        if not found:
            found = findfilter(filtername, DATA_DIR, cmd)
        if not found:
            found = findfilter(filtername, APP_DIR, cmd)
    else:
        if os.path.isfile(cmd):
            found = cmd
        else:
            message.warning('filter not found: %s' % cmd)
    if found:
        filter_cmd = '"' + found + '"' + mo.group('tail')
    if sys.platform == 'win32':
        # Windows doesn't like running scripts directly so explicitly
        # specify interpreter.
        if found:
            if cmd.endswith('.py'):
                filter_cmd = 'python ' + filter_cmd
            elif cmd.endswith('.rb'):
                filter_cmd = 'ruby ' + filter_cmd
    message.verbose('filtering: ' + filter_cmd)
    try:
        p = subprocess.Popen(filter_cmd, shell=True,
                stdin=subprocess.PIPE, stdout=subprocess.PIPE)
        output = p.communicate(os.linesep.join(lines))[0]
    except Exception:
        raise EAsciiDoc,'filter error: %s: %s' % (filter_cmd, sys.exc_info()[1])
    if output:
        result = [s.rstrip() for s in output.split(os.linesep)]
    else:
        result = []
    filter_status = p.wait()
    if filter_status:
        message.warning('filter non-zero exit code: %s: returned %d' %
               (filter_cmd, filter_status))
    if lines and not result:
        message.warning('no output from filter: %s' % filter_cmd)
    return result

def system(name, args, is_macro=False):
    """
    Evaluate a system attribute ({name:args}) or system block macro
    (name::[args]). If is_macro is True then we are processing a system
    block macro otherwise it's a system attribute.
    NOTE: The include1 attribute is used internally by the include1::[] macro
    and is not for public use.
    """
    if is_macro:
        syntax = '%s::[%s]'
        separator = '\n'
    else:
        syntax = '{%s:%s}'
        separator = writer.newline
    if name not in ('eval','sys','sys2','include','include1'):
        if is_macro:
            msg = 'illegal system macro name: %s' % name
        else:
            msg = 'illegal system attribute name: %s' % name
        message.warning(msg)
        return None
    if is_macro:
        s = subs_attrs(args)
        if s is None:
            message.warning('skipped %s: undefined attribute in: %s' % (name,args))
            return None
        args = s
    if name != 'include1':
        message.verbose(('evaluating: '+syntax) % (name,args))
    if safe() and name not in ('include','include1'):
        message.unsafe(syntax % (name,args))
        return None
    result = None
    if name == 'eval':
        try:
            result = eval(args)
            if result is True:
                result = ''
            elif result is False:
                result = None
            elif result is not None:
                result = str(result)
        except Exception:
            message.warning((syntax+': expression evaluation error') % (name,args))
    elif name in ('sys','sys2'):
        result = ''
        fd,tmp = tempfile.mkstemp()
        os.close(fd)
        try:
            cmd = args
            cmd = cmd + (' > %s' % tmp)
            if name == 'sys2':
                cmd = cmd + ' 2>&1'
            if os.system(cmd):
                message.warning((syntax+': non-zero exit status') % (name,args))
            try:
                if os.path.isfile(tmp):
                    lines = [s.rstrip() for s in open(tmp)]
                else:
                    lines = []
            except Exception:
                raise EAsciiDoc,(syntax+': temp file read error') % (name,args)
            result = separator.join(lines)
        finally:
            if os.path.isfile(tmp):
                os.remove(tmp)
    elif name == 'include':
        if not os.path.exists(args):
            message.warning((syntax+': file does not exist') % (name,args))
        elif not is_safe_file(args):
            message.unsafe(syntax % (name,args))
        else:
            result = [s.rstrip() for s in open(args)]
            if result:
                result = subs_attrs(result)
                result = separator.join(result)
                result = result.expandtabs(reader.tabsize)
            else:
                result = ''
    elif name == 'include1':
        result = separator.join(config.include1[args])
    else:
        assert False
    return result

def subs_attrs(lines, dictionary=None):
    """Substitute 'lines' of text with attributes from the global
    document.attributes dictionary and from 'dictionary' ('dictionary'
    entries take precedence). Return a tuple of the substituted lines.  'lines'
    containing undefined attributes are deleted. If 'lines' is a string then
    return a string.

    - Attribute references are substituted in the following order: simple,
      conditional, system.
    - Attribute references inside 'dictionary' entry values are substituted.
    """

    def end_brace(text,start):
        """Return index following end brace that matches brace at start in
        text."""
        assert text[start] == '{'
        n = 0
        result = start
        for c in text[start:]:
            # Skip braces that are followed by a backslash.
            if result == len(text)-1 or text[result+1] != '\\':
                if c == '{': n = n + 1
                elif c == '}': n = n - 1
            result = result + 1
            if n == 0: break
        return result

    if type(lines) == str:
        string_result = True
        lines = [lines]
    else:
        string_result = False
        lines = list(lines)
    if dictionary is None:
        attrs = document.attributes
    else:
        # Remove numbered document attributes so they don't clash with
        # attribute list positional attributes.
        attrs = {}
        for k,v in document.attributes.items():
            if not re.match(r'^\d+$', k):
                attrs[k] = v
        # Substitute attribute references inside dictionary values.
        dictionary = dictionary.copy()
        for k,v in dictionary.items():
            if v is None:
                del dictionary[k]
            else:
                v = subs_attrs(str(v))
                if v is None:
                    del dictionary[k]
                else:
                    dictionary[k] = v
        attrs.update(dictionary)
    # Substitute all attributes in all lines.
    for i in range(len(lines)-1,-1,-1): # Reverse iterate lines.
        text = lines[i]
        # Make it easier for regular expressions.
        text = text.replace('\\{','{\\')
        text = text.replace('\\}','}\\')
        # Expand simple attributes ({name}).
        # Nested attributes not allowed.
        reo = re.compile(r'(?su)\{(?P<name>[^\\\W][-\w]*?)\}(?!\\)')
        pos = 0
        while True:
            mo = reo.search(text,pos)
            if not mo: break
            s =  attrs.get(mo.group('name'))
            if s is None:
                pos = mo.end()
            else:
                s = str(s)
                text = text[:mo.start()] + s + text[mo.end():]
                pos = mo.start() + len(s)
        # Expand conditional attributes.
        reo = re.compile(r'(?su)\{(?P<name>[^\\\W][-\w]*?)' \
                         r'(?P<op>\=|\?|!|#|%|@|\$)'        \
                         r'(?P<value>.*?)\}(?!\\)')
        pos = 0
        while True:
            mo = reo.search(text,pos)
            if not mo: break
            attr = mo.group()
            name =  mo.group('name')
            lval =  attrs.get(name)
            op = mo.group('op')
            # mo.end() is not good enough because '{x={y}}' matches '{x={y}'.
            end = end_brace(text,mo.start())
            rval = text[mo.start('value'):end-1]
            if lval is None:
                if op == '=': s = rval
                elif op == '?': s = ''
                elif op == '!': s = rval
                elif op == '#': s = '{'+name+'}'    # So the line is dropped.
                elif op == '%': s = rval
                elif op in ('@','$'):
                    s = '{'+name+'}'                # So the line is dropped.
                else:
                    assert False, 'illegal attribute: %s' % attr
            else:
                if op == '=': s = lval
                elif op == '?': s = rval
                elif op == '!': s = ''
                elif op == '#': s = rval
                elif op == '%': s = '{zzzzz}'       # So the line is dropped.
                elif op in ('@','$'):
                    v = re.split(r'(?<!\\):',rval)
                    if len(v) not in (2,3):
                        message.error('illegal attribute syntax: %s' % attr)
                        s = ''
                    elif not is_re('^'+v[0]+'$'):
                        message.error('illegal attribute regexp: %s' % attr)
                        s = ''
                    else:
                        v = [s.replace('\\:',':') for s in v]
                        re_mo = re.match('^'+v[0]+'$',lval)
                        if op == '@':
                            if re_mo:
                                s = v[1]            # {<name>@<re>:<v1>[:<v2>]}
                            else:
                                if len(v) == 3:     # {<name>@<re>:<v1>:<v2>}
                                    s = v[2]
                                else:               # {<name>@<re>:<v1>}
                                    s = ''
                        else:
                            if re_mo:
                                if len(v) == 2:     # {<name>$<re>:<v1>}
                                    s = v[1]
                                elif v[1] == '':    # {<name>$<re>::<v2>}
                                    s = '{zzzzz}'   # So the line is dropped.
                                else:               # {<name>$<re>:<v1>:<v2>}
                                    s = v[1]
                            else:
                                if len(v) == 2:     # {<name>$<re>:<v1>}
                                    s = '{zzzzz}'   # So the line is dropped.
                                else:               # {<name>$<re>:<v1>:<v2>}
                                    s = v[2]
                else:
                    assert False, 'illegal attribute: %s' % attr
            s = str(s)
            text = text[:mo.start()] + s + text[end:]
            pos = mo.start() + len(s)
        # Drop line if it contains  unsubstituted {name} references.
        skipped = re.search(r'(?su)\{[^\\\W][-\w]*?\}(?!\\)', text)
        if skipped:
            del lines[i]
            continue;
        # Expand system attributes.
        reo = re.compile(r'(?su)\{(?P<action>[^\\\W][-\w]*?):(?P<expr>.*?)\}(?!\\)')
        skipped = False
        pos = 0
        while True:
            mo = reo.search(text,pos)
            if not mo: break
            expr = mo.group('expr')
            expr = expr.replace('{\\','{')
            expr = expr.replace('}\\','}')
            s = system(mo.group('action'),expr)
            if s is None:
                skipped = True
                break
            text = text[:mo.start()] + s + text[mo.end():]
            pos = mo.start() + len(s)
        # Drop line if the action returns None.
        if skipped:
            del lines[i]
            continue;
        # Remove backslash from escaped entries.
        text = text.replace('{\\','{')
        text = text.replace('}\\','}')
        lines[i] = text
    if string_result:
        if lines:
            return '\n'.join(lines)
        else:
            return None
    else:
        return tuple(lines)

def char_encoding():
    encoding = document.attributes.get('encoding')
    if encoding:
        try:
            codecs.lookup(encoding)
        except LookupError,e:
            raise EAsciiDoc,str(e)
    return encoding

def char_len(s):
    return len(char_decode(s))

def char_decode(s):
    if char_encoding():
        try:
            return s.decode(char_encoding())
        except Exception:
            raise EAsciiDoc, \
                "'%s' codec can't decode \"%s\"" % (char_encoding(), s)
    else:
        return s

def char_encode(s):
    if char_encoding():
        return s.encode(char_encoding())
    else:
        return s

def time_str(t):
    """Convert seconds since the Epoch to formatted local time string."""
    t = time.localtime(t)
    s = time.strftime('%H:%M:%S',t)
    if time.daylight:
        result = s + ' ' + time.tzname[1]
    else:
        result = s + ' ' + time.tzname[0]
    # Attempt to convert the localtime to the output encoding.
    try:
        result = char_encode(result.decode(locale.getdefaultlocale()[1]))
    except Exception:
        pass
    return result

def date_str(t):
    """Convert seconds since the Epoch to formatted local date string."""
    t = time.localtime(t)
    return time.strftime('%Y-%m-%d',t)


class Lex:
    """Lexical analysis routines. Static methods and attributes only."""
    prev_element = None
    prev_cursor = None
    def __init__(self):
        raise AssertionError,'no class instances allowed'
    @staticmethod
    def next():
        """Returns class of next element on the input (None if EOF).  The
        reader is assumed to be at the first line following a previous element,
        end of file or line one.  Exits with the reader pointing to the first
        line of the next element or EOF (leading blank lines are skipped)."""
        reader.skip_blank_lines()
        if reader.eof(): return None
        # Optimization: If we've already checked for an element at this
        # position return the element.
        if Lex.prev_element and Lex.prev_cursor == reader.cursor:
            return Lex.prev_element
        if AttributeEntry.isnext():
            result = AttributeEntry
        elif AttributeList.isnext():
            result = AttributeList
        elif Title.isnext():
            result = Title
        elif macros.isnext():
            result = macros.current
        elif lists.isnext():
            result = lists.current
        elif blocks.isnext():
            result = blocks.current
        elif tables_OLD.isnext():
            result = tables_OLD.current
        elif tables.isnext():
            result = tables.current
        elif BlockTitle.isnext():
            result = BlockTitle
        else:
            if not paragraphs.isnext():
                raise EAsciiDoc,'paragraph expected'
            result = paragraphs.current
        # Optimization: Cache answer.
        Lex.prev_cursor = reader.cursor
        Lex.prev_element = result
        return result

    @staticmethod
    def canonical_subs(options):
        """Translate composite subs values."""
        if len(options) == 1:
            if options[0] == 'none':
                options = ()
            elif options[0] == 'normal':
                options = config.subsnormal
            elif options[0] == 'verbatim':
                options = config.subsverbatim
        return options

    @staticmethod
    def subs_1(s,options):
        """Perform substitution specified in 'options' (in 'options' order) on
        Does not process 'attributes' substitutions."""
        if not s:
            return s
        result = s
        options = Lex.canonical_subs(options)
        for o in options:
            if o == 'specialcharacters':
                result = config.subs_specialchars(result)
            elif o == 'attributes':
                result = subs_attrs(result)
            elif o == 'quotes':
                result = subs_quotes(result)
            elif o == 'specialwords':
                result = config.subs_specialwords(result)
            elif o in ('replacements','replacements2'):
                result = config.subs_replacements(result,o)
            elif o == 'macros':
                result = macros.subs(result)
            elif o == 'callouts':
                result = macros.subs(result,callouts=True)
            else:
                raise EAsciiDoc,'illegal substitution option: %s' % o
            trace(o, s, result)
            if not result:
                break
        return result

    @staticmethod
    def subs(lines,options):
        """Perform inline processing specified by 'options' (in 'options'
        order) on sequence of 'lines'."""
        if not lines or not options:
            return lines
        options = Lex.canonical_subs(options)
        # Join lines so quoting can span multiple lines.
        para = '\n'.join(lines)
        if 'macros' in options:
            para = macros.extract_passthroughs(para)
        for o in options:
            if o == 'attributes':
                # If we don't substitute attributes line-by-line then a single
                # undefined attribute will drop the entire paragraph.
                lines = subs_attrs(para.split('\n'))
                para = '\n'.join(lines)
            else:
                para = Lex.subs_1(para,(o,))
        if 'macros' in options:
            para = macros.restore_passthroughs(para)
        return para.splitlines()

    @staticmethod
    def set_margin(lines, margin=0):
        """Utility routine that sets the left margin to 'margin' space in a
        block of non-blank lines."""
        # Calculate width of block margin.
        lines = list(lines)
        width = len(lines[0])
        for s in lines:
            i = re.search(r'\S',s).start()
            if i < width: width = i
        # Strip margin width from all lines.
        for i in range(len(lines)):
            lines[i] = ' '*margin + lines[i][width:]
        return lines

#---------------------------------------------------------------------------
# Document element classes parse AsciiDoc reader input and write DocBook writer
# output.
#---------------------------------------------------------------------------
class Document:
    def __init__(self):
        self.doctype = None     # 'article','manpage' or 'book'.
        self.backend = None     # -b option argument.
        self.infile = None      # Source file name.
        self.outfile = None     # Output file name.
        self.attributes = {}
        self.level = 0          # 0 => front matter. 1,2,3 => sect1,2,3.
        self.has_errors = False # Set true if processing errors were flagged.
        self.has_warnings = False # Set true if warnings were flagged.
        self.safe = False       # Default safe mode.
    def update_attributes(self):
        # Set implicit attributes.
        if self.infile and os.path.exists(self.infile):
            t = os.path.getmtime(self.infile)
        elif self.infile == '<stdin>':
            t = time.time()
        else:
            t = None
        if t:
            self.attributes['doctime'] = time_str(t)
            self.attributes['docdate'] = date_str(t)
        t = time.time()
        self.attributes['localtime'] = time_str(t)
        self.attributes['localdate'] = date_str(t)
        self.attributes['asciidoc-version'] = VERSION
        self.attributes['backend'] = document.backend
        self.attributes['doctype'] = document.doctype
        self.attributes['backend-'+document.backend] = ''
        self.attributes['doctype-'+document.doctype] = ''
        self.attributes[document.backend+'-'+document.doctype] = ''
        self.attributes['asciidoc-file'] = APP_FILE
        self.attributes['asciidoc-dir'] = APP_DIR
        self.attributes['user-dir'] = USER_DIR
        if self.infile != '<stdin>':
            self.attributes['infile'] = self.infile
            self.attributes['indir'] = os.path.dirname(self.infile)
            self.attributes['docfile'] = self.infile
            self.attributes['docdir'] = os.path.dirname(self.infile)
            self.attributes['docname'] = os.path.splitext(
                    os.path.basename(self.infile))[0]
        if config.verbose:
            self.attributes['verbose'] = ''
        # Update with configuration file attributes.
        self.attributes.update(config.conf_attrs)
        # Update with command-line attributes.
        self.attributes.update(config.cmd_attrs)
        # Extract miscellaneous configuration section entries from attributes.
        config.load_miscellaneous(config.conf_attrs)
        config.load_miscellaneous(config.cmd_attrs)
        self.attributes['newline'] = config.newline
        if self.outfile:
            if self.outfile != '<stdout>':
                self.attributes['outfile'] = self.outfile
                self.attributes['outdir'] = os.path.dirname(self.outfile)
                self.attributes['docname'] = os.path.splitext(
                        os.path.basename(self.outfile))[0]
                ext = os.path.splitext(self.outfile)[1][1:]
            elif config.outfilesuffix:
                ext = config.outfilesuffix[1:]
            else:
                ext = ''
            if ext:
                self.attributes['filetype'] = ext
                self.attributes['filetype-'+ext] = ''
    def load_lang(self,linenos=False):
        """
        Load language configuration file.
        """
        lang = self.attributes.get('lang')
        message.linenos = linenos
        if lang:
            if not config.load_lang(lang):
                message.error('missing language conf file: lang-%s.conf' % lang)
            self.attributes['lang'] = lang  # Reinstate new lang attribute.
        else:
            message.error('language attribute (lang) is not defined')
        message.linenos = None  # Restore default line number behavior.
    def set_deprecated_attribute(self,old,new):
        """
        Ensures the 'old' name of an attribute that was renamed to 'new' is
        still honored.
        """
        if self.attributes.get(new) is None:
            if self.attributes.get(old) is not None:
                self.attributes[new] = self.attributes[old]
        else:
            self.attributes[old] = self.attributes[new]
    def translate(self):
        assert self.doctype in ('article','manpage','book'), \
            'illegal document type'
        assert self.level == 0
        config.expand_all_templates()
        self.load_lang()
        # Skip leading comments and attribute entries.
        finished = False
        attr_count = 0
        while not finished:
            finished = True
            if blocks.isnext() and 'skip' in blocks.current.options:
                finished = False
                blocks.current.translate()
            if macros.isnext() and macros.current.name == 'comment':
                finished = False
                macros.current.translate()
            if AttributeEntry.isnext():
                finished = False
                AttributeEntry.translate()
                if AttributeEntry.name == 'lang':
                    self.load_lang(linenos=True)
                    if attr_count > 0:
                        message.error('lang attribute should be first entry')
                attr_count += 1
        message.verbose('writing: '+writer.fname,False)
        # Process document header.
        has_header =  Lex.next() is Title and Title.level == 0
        if self.doctype == 'manpage' and not has_header:
            message.error('manpage document title is mandatory')
        if has_header:
            Header.translate()
            # Command-line entries override header derived entries.
            self.attributes.update(config.cmd_attrs)
            # DEPRECATED: revision renamed to revnumber.
            self.set_deprecated_attribute('revision','revnumber')
            # DEPRECATED: date renamed to revdate.
            self.set_deprecated_attribute('date','revdate')
            if config.header_footer:
                hdr = config.subs_section('header',{})
                writer.write(hdr,trace='header')
            if self.doctype in ('article','book'):
                # Translate 'preamble' (untitled elements between header
                # and first section title).
                if Lex.next() is not Title:
                    stag,etag = config.section2tags('preamble')
                    writer.write(stag,trace='preamble open')
                    Section.translate_body()
                    writer.write(etag,trace='preamble close')
        else:
            document.process_author_names()
            if config.header_footer:
                hdr = config.subs_section('header',{})
                writer.write(hdr,trace='header')
            if Lex.next() is not Title:
                Section.translate_body()
        # Process remaining sections.
        while not reader.eof():
            if Lex.next() is not Title:
                raise EAsciiDoc,'section title expected'
            Section.translate()
        Section.setlevel(0) # Write remaining unwritten section close tags.
        # Substitute document parameters and write document footer.
        if config.header_footer:
            ftr = config.subs_section('footer',{})
            writer.write(ftr,trace='footer')
    def parse_author(self,s):
        """ Return False if the author is malformed."""
        attrs = self.attributes # Alias for readability.
        s = s.strip()
        mo = re.match(r'^(?P<name1>[^<>\s]+)'
                '(\s+(?P<name2>[^<>\s]+))?'
                '(\s+(?P<name3>[^<>\s]+))?'
                '(\s+<(?P<email>\S+)>)?$',s)
        if not mo:
            message.error('malformed author: %s' % s)
            return False
        firstname = mo.group('name1')
        if mo.group('name3'):
            middlename = mo.group('name2')
            lastname = mo.group('name3')
        else:
            middlename = None
            lastname = mo.group('name2')
        firstname = firstname.replace('_',' ')
        if middlename:
            middlename = middlename.replace('_',' ')
        if lastname:
            lastname = lastname.replace('_',' ')
        email = mo.group('email')
        if firstname:
            attrs['firstname'] = firstname
        if middlename:
            attrs['middlename'] = middlename
        if lastname:
            attrs['lastname'] = lastname
        if email:
            attrs['email'] = email
        return True
    def process_author_names(self):
        """ Calculate any missing author related attributes."""
        attrs = self.attributes # Alias for readability.
        firstname = attrs.get('firstname','')
        middlename = attrs.get('middlename','')
        lastname = attrs.get('lastname','')
        author = attrs.get('author')
        initials = attrs.get('authorinitials')
        if author and not (firstname or middlename or lastname):
            if not self.parse_author(author):
                return
            attrs['author'] = author.replace('_',' ')
            self.process_author_names()
            return
        if not author:
            author = '%s %s %s' % (firstname, middlename, lastname)
            author = author.strip()
            author…
Tech Fingerprint

Standard Library: OS Interaction
Alerts (64)

'def' Ensure functions have docstrings for documentation
56 59 63 65 67 75 78 84 151 156 161 166 192 208 211 494 545 955 964 967 977 1144 1221
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
73 822 826 921 941
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
255
'eval(' Avoid due to security risks; use ast.literal_eval for safer evaluation of literals
275 367 378 396 406
'except Exception:' Catch specific exceptions instead of Exception to avoid masking bugs
277 372 379 399 407 971 994
'isinstance(' Overuse may indicate design issues; consider polymorphism
313 370
'list(' Avoid unnecessary list conversions; use generators where possible
326 327 329 808 1119
'open(' Use 'with open()' to ensure Files are properly closed
566
'type(' Use isinstance() for type checking instead of type()
803
Complexity hotspot; lines 865 to 871 (total complexity: 7)
865 866 867 868 869 870 871
Complexity hotspot; lines 875 to 881 (total complexity: 7)
875 876 877 878 879 880 881