asciidoc.py - Program constants. Allowed substitution optio…

/dev/asciidoc/asciidoc.py

https://code.google.com/ · Python · 5902 lines · 5608 code · 54 blank · 240 comment · 468 complexity · d61d36499cb6cece2a540d36a7d4ea0e MD5 · raw file
Large files are truncated click here to view the full file

#!/usr/bin/env python
"""
asciidoc - converts an AsciiDoc text file to HTML or DocBook

Copyright (C) 2002-2010 Stuart Rackham. Free use of this software is granted
under the terms of the GNU General Public License (GPL).
"""

import sys, os, re, time, traceback, tempfile, subprocess, codecs, locale, unicodedata

### Used by asciidocapi.py ###
VERSION = '8.6.5'           # See CHANGLOG file for version history.

MIN_PYTHON_VERSION = 2.4    # Require this version of Python or better.

#---------------------------------------------------------------------------
# Program constants.
#---------------------------------------------------------------------------
DEFAULT_BACKEND = 'html'
DEFAULT_DOCTYPE = 'article'
# Allowed substitution options for List, Paragraph and DelimitedBlock
# definition subs entry.
SUBS_OPTIONS = ('specialcharacters','quotes','specialwords',
    'replacements', 'attributes','macros','callouts','normal','verbatim',
    'none','replacements2')
# Default value for unspecified subs and presubs configuration file entries.
SUBS_NORMAL = ('specialcharacters','quotes','attributes',
    'specialwords','replacements','macros','replacements2')
SUBS_VERBATIM = ('specialcharacters','callouts')

NAME_RE = r'(?u)[^\W\d][-\w]*'  # Valid section or attribute name.
OR, AND = ',', '+'              # Attribute list separators.


#---------------------------------------------------------------------------
# Utility functions and classes.
#---------------------------------------------------------------------------

class EAsciiDoc(Exception): pass

class OrderedDict(dict):
    """
    Dictionary ordered by insertion order.
    Python Cookbook: Ordered Dictionary, Submitter: David Benjamin.
    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
    """
    def __init__(self, d=None, **kwargs):
        self._keys = []
        if d is None: d = kwargs
        dict.__init__(self, d)
    def __delitem__(self, key):
        dict.__delitem__(self, key)
        self._keys.remove(key)
    def __setitem__(self, key, item):
        dict.__setitem__(self, key, item)
        if key not in self._keys: self._keys.append(key)
    def clear(self):
        dict.clear(self)
        self._keys = []
    def copy(self):
        d = dict.copy(self)
        d._keys = self._keys[:]
        return d
    def items(self):
        return zip(self._keys, self.values())
    def keys(self):
        return self._keys
    def popitem(self):
        try:
            key = self._keys[-1]
        except IndexError:
            raise KeyError('dictionary is empty')
        val = self[key]
        del self[key]
        return (key, val)
    def setdefault(self, key, failobj = None):
        dict.setdefault(self, key, failobj)
        if key not in self._keys: self._keys.append(key)
    def update(self, d=None, **kwargs):
        if d is None:
            d = kwargs
        dict.update(self, d)
        for key in d.keys():
            if key not in self._keys: self._keys.append(key)
    def values(self):
        return map(self.get, self._keys)

class AttrDict(dict):
    """
    Like a dictionary except values can be accessed as attributes i.e. obj.foo
    can be used in addition to obj['foo'].
    If an item is not present None is returned.
    """
    def __getattr__(self, key):
        try: return self[key]
        except KeyError: return None
    def __setattr__(self, key, value):
        self[key] = value
    def __delattr__(self, key):
        try: del self[key]
        except KeyError, k: raise AttributeError, k
    def __repr__(self):
        return '<AttrDict ' + dict.__repr__(self) + '>'
    def __getstate__(self):
        return dict(self)
    def __setstate__(self,value):
        for k,v in value.items(): self[k]=v

class InsensitiveDict(dict):
    """
    Like a dictionary except key access is case insensitive.
    Keys are stored in lower case.
    """
    def __getitem__(self, key):
        return dict.__getitem__(self, key.lower())
    def __setitem__(self, key, value):
        dict.__setitem__(self, key.lower(), value)
    def has_key(self, key):
        return dict.has_key(self,key.lower())
    def get(self, key, default=None):
        return dict.get(self, key.lower(), default)
    def update(self, dict):
        for k,v in dict.items():
            self[k] = v
    def setdefault(self, key, default = None):
        return dict.setdefault(self, key.lower(), default)


class Trace(object):
    """
    Used in conjunction with the 'trace' attribute to generate diagnostic
    output. There is a single global instance of this class named trace.
    """
    SUBS_NAMES = ('specialcharacters','quotes','specialwords',
                  'replacements', 'attributes','macros','callouts',
                  'replacements2')
    def __init__(self):
        self.name_re = ''        # Regexp pattern to match trace names.
        self.linenos = True
        self.offset = 0
    def __call__(self, name, before, after=None):
        """
        Print trace message if tracing is on and the trace 'name' matches the
        document 'trace' attribute (treated as a regexp).
        'before' is the source text before substitution; 'after' text is the
        source text after substitutuion.
        The 'before' and 'after' messages are only printed if they differ.
        """
        name_re = document.attributes.get('trace')
        if name_re == 'subs':    # Alias for all the inline substitutions.
            name_re = '|'.join(self.SUBS_NAMES)
        self.name_re = name_re
        if self.name_re is not None:
            msg = message.format(name, 'TRACE: ', self.linenos, offset=self.offset)
            if before != after and re.match(self.name_re,name):
                if is_array(before):
                    before = '\n'.join(before)
                if after is None:
                    msg += '\n%s\n' % before
                else:
                    if is_array(after):
                        after = '\n'.join(after)
                    msg += '\n<<<\n%s\n>>>\n%s\n' % (before,after)
                message.stderr(msg)

class Message:
    """
    Message functions.
    """
    PROG = os.path.basename(os.path.splitext(__file__)[0])

    def __init__(self):
        # Set to True or False to globally override line numbers method
        # argument. Has no effect when set to None.
        self.linenos = None
        self.messages = []

    def stdout(self,msg):
        print msg

    def stderr(self,msg=''):
        self.messages.append(msg)
        if __name__ == '__main__':
            sys.stderr.write('%s: %s%s' % (self.PROG, msg, os.linesep))

    def verbose(self, msg,linenos=True):
        if config.verbose:
            msg = self.format(msg,linenos=linenos)
            self.stderr(msg)

    def warning(self, msg,linenos=True,offset=0):
        msg = self.format(msg,'WARNING: ',linenos,offset=offset)
        document.has_warnings = True
        self.stderr(msg)

    def deprecated(self, msg, linenos=True):
        msg = self.format(msg, 'DEPRECATED: ', linenos)
        self.stderr(msg)

    def format(self, msg, prefix='', linenos=True, cursor=None, offset=0):
        """Return formatted message string."""
        if self.linenos is not False and ((linenos or self.linenos) and reader.cursor):
            if cursor is None:
                cursor = reader.cursor
            prefix += '%s: line %d: ' % (os.path.basename(cursor[0]),cursor[1]+offset)
        return prefix + msg

    def error(self, msg, cursor=None, halt=False):
        """
        Report fatal error.
        If halt=True raise EAsciiDoc exception.
        If halt=False don't exit application, continue in the hope of reporting
        all fatal errors finishing with a non-zero exit code.
        """
        if halt:
            raise EAsciiDoc, self.format(msg,linenos=False,cursor=cursor)
        else:
            msg = self.format(msg,'ERROR: ',cursor=cursor)
            self.stderr(msg)
            document.has_errors = True

    def unsafe(self, msg):
        self.error('unsafe: '+msg)


def userdir():
    """
    Return user's home directory or None if it is not defined.
    """
    result = os.path.expanduser('~')
    if result == '~':
        result = None
    return result

def localapp():
    """
    Return True if we are not executing the system wide version
    i.e. the configuration is in the executable's directory.
    """
    return os.path.isfile(os.path.join(APP_DIR, 'asciidoc.conf'))

def file_in(fname, directory):
    """Return True if file fname resides inside directory."""
    assert os.path.isfile(fname)
    # Empty directory (not to be confused with None) is the current directory.
    if directory == '':
        directory = os.getcwd()
    else:
        assert os.path.isdir(directory)
        directory = os.path.realpath(directory)
    fname = os.path.realpath(fname)
    return os.path.commonprefix((directory, fname)) == directory

def safe():
    return document.safe

def is_safe_file(fname, directory=None):
    # A safe file must reside in directory directory (defaults to the source
    # file directory).
    if directory is None:
        if document.infile == '<stdin>':
           return not safe()
        directory = os.path.dirname(document.infile)
    elif directory == '':
        directory = '.'
    return (
        not safe()
        or file_in(fname, directory)
        or file_in(fname, APP_DIR)
        or file_in(fname, CONF_DIR)
    )

def safe_filename(fname, parentdir):
    """
    Return file name which must reside in the parent file directory.
    Return None if file is not found or not safe.
    """
    if not os.path.isabs(fname):
        # Include files are relative to parent document
        # directory.
        fname = os.path.normpath(os.path.join(parentdir,fname))
    if not os.path.isfile(fname):
        message.warning('include file not found: %s' % fname)
        return None
    if not is_safe_file(fname, parentdir):
        message.unsafe('include file: %s' % fname)
        return None
    return fname

def assign(dst,src):
    """Assign all attributes from 'src' object to 'dst' object."""
    for a,v in src.__dict__.items():
        setattr(dst,a,v)

def strip_quotes(s):
    """Trim white space and, if necessary, quote characters from s."""
    s = s.strip()
    # Strip quotation mark characters from quoted strings.
    if len(s) >= 3 and s[0] == '"' and s[-1] == '"':
        s = s[1:-1]
    return s

def is_re(s):
    """Return True if s is a valid regular expression else return False."""
    try: re.compile(s)
    except: return False
    else: return True

def re_join(relist):
    """Join list of regular expressions re1,re2,... to single regular
    expression (re1)|(re2)|..."""
    if len(relist) == 0:
        return None
    result = []
    # Delete named groups to avoid ambiguity.
    for s in relist:
        result.append(re.sub(r'\?P<\S+?>','',s))
    result = ')|('.join(result)
    result = '('+result+')'
    return result

def validate(value,rule,errmsg):
    """Validate value against rule expression. Throw EAsciiDoc exception with
    errmsg if validation fails."""
    try:
        if not eval(rule.replace('$',str(value))):
            raise EAsciiDoc,errmsg
    except Exception:
        raise EAsciiDoc,errmsg
    return value

def lstrip_list(s):
    """
    Return list with empty items from start of list removed.
    """
    for i in range(len(s)):
        if s[i]: break
    else:
        return []
    return s[i:]

def rstrip_list(s):
    """
    Return list with empty items from end of list removed.
    """
    for i in range(len(s)-1,-1,-1):
        if s[i]: break
    else:
        return []
    return s[:i+1]

def strip_list(s):
    """
    Return list with empty items from start and end of list removed.
    """
    s = lstrip_list(s)
    s = rstrip_list(s)
    return s

def is_array(obj):
    """
    Return True if object is list or tuple type.
    """
    return isinstance(obj,list) or isinstance(obj,tuple)

def dovetail(lines1, lines2):
    """
    Append list or tuple of strings 'lines2' to list 'lines1'.  Join the last
    non-blank item in 'lines1' with the first non-blank item in 'lines2' into a
    single string.
    """
    assert is_array(lines1)
    assert is_array(lines2)
    lines1 = strip_list(lines1)
    lines2 = strip_list(lines2)
    if not lines1 or not lines2:
        return list(lines1) + list(lines2)
    result = list(lines1[:-1])
    result.append(lines1[-1] + lines2[0])
    result += list(lines2[1:])
    return result

def dovetail_tags(stag,content,etag):
    """Merge the end tag with the first content line and the last
    content line with the end tag. This ensures verbatim elements don't
    include extraneous opening and closing line breaks."""
    return dovetail(dovetail(stag,content), etag)

def parse_attributes(attrs,dict):
    """Update a dictionary with name/value attributes from the attrs string.
    The attrs string is a comma separated list of values and keyword name=value
    pairs. Values must preceed keywords and are named '1','2'... The entire
    attributes list is named '0'. If keywords are specified string values must
    be quoted. Examples:

    attrs: ''
    dict: {}

    attrs: 'hello,world'
    dict: {'2': 'world', '0': 'hello,world', '1': 'hello'}

    attrs: '"hello", planet="earth"'
    dict: {'planet': 'earth', '0': '"hello",planet="earth"', '1': 'hello'}
    """
    def f(*args,**keywords):
        # Name and add aguments '1','2'... to keywords.
        for i in range(len(args)):
            if not str(i+1) in keywords:
                keywords[str(i+1)] = args[i]
        return keywords

    if not attrs:
        return
    dict['0'] = attrs
    # Replace line separators with spaces so line spanning works.
    s = re.sub(r'\s', ' ', attrs)
    try:
        d = eval('f('+s+')')
        # Attributes must evaluate to strings, numbers or None.
        for v in d.values():
            if not (isinstance(v,str) or isinstance(v,int) or isinstance(v,float) or v is None):
                raise Exception
    except Exception:
        s = s.replace('"','\\"')
        s = s.split(',')
        s = map(lambda x: '"' + x.strip() + '"', s)
        s = ','.join(s)
        try:
            d = eval('f('+s+')')
        except Exception:
            return  # If there's a syntax error leave with {0}=attrs.
        for k in d.keys():  # Drop any empty positional arguments.
            if d[k] == '': del d[k]
    dict.update(d)
    assert len(d) > 0

def parse_named_attributes(s,attrs):
    """Update a attrs dictionary with name="value" attributes from the s string.
    Returns False if invalid syntax.
    Example:
    attrs: 'star="sun",planet="earth"'
    dict: {'planet':'earth', 'star':'sun'}
    """
    def f(**keywords): return keywords

    try:
        d = eval('f('+s+')')
        attrs.update(d)
        return True
    except Exception:
        return False

def parse_list(s):
    """Parse comma separated string of Python literals. Return a tuple of of
    parsed values."""
    try:
        result = eval('tuple(['+s+'])')
    except Exception:
        raise EAsciiDoc,'malformed list: '+s
    return result

def parse_options(options,allowed,errmsg):
    """Parse comma separated string of unquoted option names and return as a
    tuple of valid options. 'allowed' is a list of allowed option values.
    If allowed=() then all legitimate names are allowed.
    'errmsg' is an error message prefix if an illegal option error is thrown."""
    result = []
    if options:
        for s in re.split(r'\s*,\s*',options):
            if (allowed and s not in allowed) or not is_name(s):
                raise EAsciiDoc,'%s: %s' % (errmsg,s)
            result.append(s)
    return tuple(result)

def symbolize(s):
    """Drop non-symbol characters and convert to lowercase."""
    return re.sub(r'(?u)[^\w\-_]', '', s).lower()

def is_name(s):
    """Return True if s is valid attribute, macro or tag name
    (starts with alpha containing alphanumeric and dashes only)."""
    return re.match(r'^'+NAME_RE+r'$',s) is not None

def subs_quotes(text):
    """Quoted text is marked up and the resulting text is
    returned."""
    keys = config.quotes.keys()
    for q in keys:
        i = q.find('|')
        if i != -1 and q != '|' and q != '||':
            lq = q[:i]      # Left quote.
            rq = q[i+1:]    # Right quote.
        else:
            lq = rq = q
        tag = config.quotes[q]
        if not tag: continue
        # Unconstrained quotes prefix the tag name with a hash.
        if tag[0] == '#':
            tag = tag[1:]
            # Unconstrained quotes can appear anywhere.
            reo = re.compile(r'(?msu)(^|.)(\[(?P<attrlist>[^[\]]+?)\])?' \
                    + r'(?:' + re.escape(lq) + r')' \
                    + r'(?P<content>.+?)(?:'+re.escape(rq)+r')')
        else:
            # The text within constrained quotes must be bounded by white space.
            # Non-word (\W) characters are allowed at boundaries to accomodate
            # enveloping quotes and punctuation e.g. a='x', ('x'), 'x', ['x'].
            reo = re.compile(r'(?msu)(^|[^\w;:}])(\[(?P<attrlist>[^[\]]+?)\])?' \
                + r'(?:' + re.escape(lq) + r')' \
                + r'(?P<content>\S|\S.*?\S)(?:'+re.escape(rq)+r')(?=\W|$)')
        pos = 0
        while True:
            mo = reo.search(text,pos)
            if not mo: break
            if text[mo.start()] == '\\':
                # Delete leading backslash.
                text = text[:mo.start()] + text[mo.start()+1:]
                # Skip past start of match.
                pos = mo.start() + 1
            else:
                attrlist = {}
                parse_attributes(mo.group('attrlist'), attrlist)
                stag,etag = config.tag(tag, attrlist)
                s = mo.group(1) + stag + mo.group('content') + etag
                text = text[:mo.start()] + s + text[mo.end():]
                pos = mo.start() + len(s)
    return text

def subs_tag(tag,dict={}):
    """Perform attribute substitution and split tag string returning start, end
    tag tuple (c.f. Config.tag())."""
    if not tag:
        return [None,None]
    s = subs_attrs(tag,dict)
    if not s:
        message.warning('tag \'%s\' dropped: contains undefined attribute' % tag)
        return [None,None]
    result = s.split('|')
    if len(result) == 1:
        return result+[None]
    elif len(result) == 2:
        return result
    else:
        raise EAsciiDoc,'malformed tag: %s' % tag

def parse_entry(entry, dict=None, unquote=False, unique_values=False,
        allow_name_only=False, escape_delimiter=True):
    """Parse name=value entry to dictionary 'dict'. Return tuple (name,value)
    or None if illegal entry.
    If name= then value is set to ''.
    If name and allow_name_only=True then value is set to ''.
    If name! and allow_name_only=True then value is set to None.
    Leading and trailing white space is striped from 'name' and 'value'.
    'name' can contain any printable characters.
    If the '=' delimiter character is allowed in  the 'name' then
    it must be escaped with a backslash and escape_delimiter must be True.
    If 'unquote' is True leading and trailing double-quotes are stripped from
    'name' and 'value'.
    If unique_values' is True then dictionary entries with the same value are
    removed before the parsed entry is added."""
    if escape_delimiter:
        mo = re.search(r'(?:[^\\](=))',entry)
    else:
        mo = re.search(r'(=)',entry)
    if mo:  # name=value entry.
        if mo.group(1):
            name = entry[:mo.start(1)]
            if escape_delimiter:
                name = name.replace(r'\=','=')  # Unescape \= in name.
            value = entry[mo.end(1):]
    elif allow_name_only and entry:         # name or name! entry.
        name = entry
        if name[-1] == '!':
            name = name[:-1]
            value = None
        else:
            value = ''
    else:
        return None
    if unquote:
        name = strip_quotes(name)
        if value is not None:
            value = strip_quotes(value)
    else:
        name = name.strip()
        if value is not None:
            value = value.strip()
    if not name:
        return None
    if dict is not None:
        if unique_values:
            for k,v in dict.items():
                if v == value: del dict[k]
        dict[name] = value
    return name,value

def parse_entries(entries, dict, unquote=False, unique_values=False,
        allow_name_only=False,escape_delimiter=True):
    """Parse name=value entries from  from lines of text in 'entries' into
    dictionary 'dict'. Blank lines are skipped."""
    entries = config.expand_templates(entries)
    for entry in entries:
        if entry and not parse_entry(entry, dict, unquote, unique_values,
                allow_name_only, escape_delimiter):
            raise EAsciiDoc,'malformed section entry: %s' % entry

def dump_section(name,dict,f=sys.stdout):
    """Write parameters in 'dict' as in configuration file section format with
    section 'name'."""
    f.write('[%s]%s' % (name,writer.newline))
    for k,v in dict.items():
        k = str(k)
        k = k.replace('=',r'\=')    # Escape = in name.
        # Quote if necessary.
        if len(k) != len(k.strip()):
            k = '"'+k+'"'
        if v and len(v) != len(v.strip()):
            v = '"'+v+'"'
        if v is None:
            # Don't dump undefined attributes.
            continue
        else:
            s = k+'='+v
        if s[0] == '#':
            s = '\\' + s    # Escape so not treated as comment lines.
        f.write('%s%s' % (s,writer.newline))
    f.write(writer.newline)

def update_attrs(attrs,dict):
    """Update 'attrs' dictionary with parsed attributes in dictionary 'dict'."""
    for k,v in dict.items():
        if not is_name(k):
            raise EAsciiDoc,'illegal attribute name: %s' % k
        attrs[k] = v

def is_attr_defined(attrs,dic):
    """
    Check if the sequence of attributes is defined in dictionary 'dic'.
    Valid 'attrs' sequence syntax:
    <attr> Return True if single attrbiute is defined.
    <attr1>,<attr2>,... Return True if one or more attributes are defined.
    <attr1>+<attr2>+... Return True if all the attributes are defined.
    """
    if OR in attrs:
        for a in attrs.split(OR):
            if dic.get(a.strip()) is not None:
                return True
        else: return False
    elif AND in attrs:
        for a in attrs.split(AND):
            if dic.get(a.strip()) is None:
                return False
        else: return True
    else:
        return dic.get(attrs.strip()) is not None

def filter_lines(filter_cmd, lines, attrs={}):
    """
    Run 'lines' through the 'filter_cmd' shell command and return the result.
    The 'attrs' dictionary contains additional filter attributes.
    """
    def findfilter(name,dir,filter):
        """Find filter file 'fname' with style name 'name' in directory
        'dir'. Return found file path or None if not found."""
        if name:
            result = os.path.join(dir,'filters',name,filter)
            if os.path.isfile(result):
                return result
        result = os.path.join(dir,'filters',filter)
        if os.path.isfile(result):
            return result
        return None

    # Return input lines if there's not filter.
    if not filter_cmd or not filter_cmd.strip():
        return lines
    # Perform attributes substitution on the filter command.
    s = subs_attrs(filter_cmd, attrs)
    if not s:
        message.error('undefined filter attribute in command: %s' % filter_cmd)
        return []
    filter_cmd = s.strip()
    # Parse for quoted and unquoted command and command tail.
    # Double quoted.
    mo = re.match(r'^"(?P<cmd>[^"]+)"(?P<tail>.*)$', filter_cmd)
    if not mo:
        # Single quoted.
        mo = re.match(r"^'(?P<cmd>[^']+)'(?P<tail>.*)$", filter_cmd)
        if not mo:
            # Unquoted catch all.
            mo = re.match(r'^(?P<cmd>\S+)(?P<tail>.*)$', filter_cmd)
    cmd = mo.group('cmd').strip()
    found = None
    if not os.path.dirname(cmd):
        # Filter command has no directory path so search filter directories.
        filtername = attrs.get('style')
        d = document.attributes.get('docdir')
        if d:
            found = findfilter(filtername, d, cmd)
        if not found:
            if USER_DIR:
                found = findfilter(filtername, USER_DIR, cmd)
            if not found:
                if localapp():
                    found = findfilter(filtername, APP_DIR, cmd)
                else:
                    found = findfilter(filtername, CONF_DIR, cmd)
    else:
        if os.path.isfile(cmd):
            found = cmd
        else:
            message.warning('filter not found: %s' % cmd)
    if found:
        filter_cmd = '"' + found + '"' + mo.group('tail')
    if sys.platform == 'win32':
        # Windows doesn't like running scripts directly so explicitly
        # specify interpreter.
        if found:
            if cmd.endswith('.py'):
                filter_cmd = 'python ' + filter_cmd
            elif cmd.endswith('.rb'):
                filter_cmd = 'ruby ' + filter_cmd
    message.verbose('filtering: ' + filter_cmd)
    try:
        p = subprocess.Popen(filter_cmd, shell=True,
                stdin=subprocess.PIPE, stdout=subprocess.PIPE)
        output = p.communicate(os.linesep.join(lines))[0]
    except Exception:
        raise EAsciiDoc,'filter error: %s: %s' % (filter_cmd, sys.exc_info()[1])
    if output:
        result = [s.rstrip() for s in output.split(os.linesep)]
    else:
        result = []
    filter_status = p.wait()
    if filter_status:
        message.warning('filter non-zero exit code: %s: returned %d' %
               (filter_cmd, filter_status))
    if lines and not result:
        message.warning('no output from filter: %s' % filter_cmd)
    return result

def system(name, args, is_macro=False, attrs=None):
    """
    Evaluate a system attribute ({name:args}) or system block macro
    (name::[args]).
    If is_macro is True then we are processing a system block macro otherwise
    it's a system attribute.
    The attrs dictionary is updated by the counter and set system attributes.
    NOTE: The include1 attribute is used internally by the include1::[] macro
    and is not for public use.
    """
    if is_macro:
        syntax = '%s::[%s]' % (name,args)
        separator = '\n'
    else:
        syntax = '{%s:%s}' % (name,args)
        separator = writer.newline
    if name not in ('eval','eval3','sys','sys2','sys3','include','include1','counter','counter2','set','set2','template'):
        if is_macro:
            msg = 'illegal system macro name: %s' % name
        else:
            msg = 'illegal system attribute name: %s' % name
        message.warning(msg)
        return None
    if is_macro:
        s = subs_attrs(args)
        if s is None:
            message.warning('skipped %s: undefined attribute in: %s' % (name,args))
            return None
        args = s
    if name != 'include1':
        message.verbose('evaluating: %s' % syntax)
    if safe() and name not in ('include','include1'):
        message.unsafe(syntax)
        return None
    result = None
    if name in ('eval','eval3'):
        try:
            result = eval(args)
            if result is True:
                result = ''
            elif result is False:
                result = None
            elif result is not None:
                result = str(result)
        except Exception:
            message.warning('%s: evaluation error' % syntax)
    elif name in ('sys','sys2','sys3'):
        result = ''
        fd,tmp = tempfile.mkstemp()
        os.close(fd)
        try:
            cmd = args
            cmd = cmd + (' > %s' % tmp)
            if name == 'sys2':
                cmd = cmd + ' 2>&1'
            if os.system(cmd):
                message.warning('%s: non-zero exit status' % syntax)
            try:
                if os.path.isfile(tmp):
                    lines = [s.rstrip() for s in open(tmp)]
                else:
                    lines = []
            except Exception:
                raise EAsciiDoc,'%s: temp file read error' % syntax
            result = separator.join(lines)
        finally:
            if os.path.isfile(tmp):
                os.remove(tmp)
    elif name in ('counter','counter2'):
        mo = re.match(r'^(?P<attr>[^:]*?)(:(?P<seed>.*))?$', args)
        attr = mo.group('attr')
        seed = mo.group('seed')
        if seed and (not re.match(r'^\d+$', seed) and len(seed) > 1):
            message.warning('%s: illegal counter seed: %s' % (syntax,seed))
            return None
        if not is_name(attr):
            message.warning('%s: illegal attribute name' % syntax)
            return None
        value = document.attributes.get(attr)
        if value:
            if not re.match(r'^\d+$', value) and len(value) > 1:
                message.warning('%s: illegal counter value: %s'
                                % (syntax,value))
                return None
            if re.match(r'^\d+$', value):
                expr = value + '+1'
            else:
                expr = 'chr(ord("%s")+1)' % value
            try:
                result = str(eval(expr))
            except Exception:
                message.warning('%s: evaluation error: %s' % (syntax, expr))
        else:
            if seed:
                result = seed
            else:
                result = '1'
        document.attributes[attr] = result
        if attrs is not None:
            attrs[attr] = result
        if name == 'counter2':
            result = ''
    elif name in ('set','set2'):
        mo = re.match(r'^(?P<attr>[^:]*?)(:(?P<value>.*))?$', args)
        attr = mo.group('attr')
        value = mo.group('value')
        if value is None:
            value = ''
        if attr.endswith('!'):
            attr = attr[:-1]
            value = None
        if not is_name(attr):
            message.warning('%s: illegal attribute name' % syntax)
        else:
            if attrs is not None:
                attrs[attr] = value
            if name != 'set2':  # set2 only updates local attributes.
                document.attributes[attr] = value
        if value is None:
            result = None
        else:
            result = ''
    elif name == 'include':
        if not os.path.exists(args):
            message.warning('%s: file does not exist' % syntax)
        elif not is_safe_file(args):
            message.unsafe(syntax)
        else:
            result = [s.rstrip() for s in open(args)]
            if result:
                result = subs_attrs(result)
                result = separator.join(result)
                result = result.expandtabs(reader.tabsize)
            else:
                result = ''
    elif name == 'include1':
        result = separator.join(config.include1[args])
    elif name == 'template':
        if not args in config.sections:
            message.warning('%s: template does not exist' % syntax)
        else:
            result = []
            for line in  config.sections[args]:
                line = subs_attrs(line)
                if line is not None:
                    result.append(line)
            result = '\n'.join(result)
    else:
        assert False
    if result and name in ('eval3','sys3'):
        macros.passthroughs.append(result)
        result = '\x07' + str(len(macros.passthroughs)-1) + '\x07'
    return result

def subs_attrs(lines, dictionary=None):
    """Substitute 'lines' of text with attributes from the global
    document.attributes dictionary and from 'dictionary' ('dictionary'
    entries take precedence). Return a tuple of the substituted lines.  'lines'
    containing undefined attributes are deleted. If 'lines' is a string then
    return a string.

    - Attribute references are substituted in the following order: simple,
      conditional, system.
    - Attribute references inside 'dictionary' entry values are substituted.
    """

    def end_brace(text,start):
        """Return index following end brace that matches brace at start in
        text."""
        assert text[start] == '{'
        n = 0
        result = start
        for c in text[start:]:
            # Skip braces that are followed by a backslash.
            if result == len(text)-1 or text[result+1] != '\\':
                if c == '{': n = n + 1
                elif c == '}': n = n - 1
            result = result + 1
            if n == 0: break
        return result

    if type(lines) == str:
        string_result = True
        lines = [lines]
    else:
        string_result = False
    if dictionary is None:
        attrs = document.attributes
    else:
        # Remove numbered document attributes so they don't clash with
        # attribute list positional attributes.
        attrs = {}
        for k,v in document.attributes.items():
            if not re.match(r'^\d+$', k):
                attrs[k] = v
        # Substitute attribute references inside dictionary values.
        for k,v in dictionary.items():
            if v is None:
                del dictionary[k]
            else:
                v = subs_attrs(str(v))
                if v is None:
                    del dictionary[k]
                else:
                    dictionary[k] = v
        attrs.update(dictionary)
    # Substitute all attributes in all lines.
    result = []
    for line in lines:
        # Make it easier for regular expressions.
        line = line.replace('\\{','{\\')
        line = line.replace('\\}','}\\')
        # Expand simple attributes ({name}).
        # Nested attributes not allowed.
        reo = re.compile(r'(?su)\{(?P<name>[^\\\W][-\w]*?)\}(?!\\)')
        pos = 0
        while True:
            mo = reo.search(line,pos)
            if not mo: break
            s =  attrs.get(mo.group('name'))
            if s is None:
                pos = mo.end()
            else:
                s = str(s)
                line = line[:mo.start()] + s + line[mo.end():]
                pos = mo.start() + len(s)
        # Expand conditional attributes.
        # Single name -- higher precedence.
        reo1 = re.compile(r'(?su)\{(?P<name>[^\\\W][-\w]*?)' \
                          r'(?P<op>\=|\?|!|#|%|@|\$)' \
                          r'(?P<value>.*?)\}(?!\\)')
        # Multiple names (n1,n2,... or n1+n2+...) -- lower precedence.
        reo2 = re.compile(r'(?su)\{(?P<name>[^\\\W][-\w'+OR+AND+r']*?)' \
                          r'(?P<op>\=|\?|!|#|%|@|\$)' \
                          r'(?P<value>.*?)\}(?!\\)')
        for reo in [reo1,reo2]:
            pos = 0
            while True:
                mo = reo.search(line,pos)
                if not mo: break
                attr = mo.group()
                name =  mo.group('name')
                if reo == reo2:
                    if OR in name:
                        sep = OR
                    else:
                        sep = AND
                    names = [s.strip() for s in name.split(sep) if s.strip() ]
                    for n in names:
                        if not re.match(r'^[^\\\W][-\w]*$',n):
                            message.error('illegal attribute syntax: %s' % attr)
                    if sep == OR:
                        # Process OR name expression: n1,n2,...
                        for n in names:
                            if attrs.get(n) is not None:
                                lval = ''
                                break
                        else:
                            lval = None
                    else:
                        # Process AND name expression: n1+n2+...
                        for n in names:
                            if attrs.get(n) is None:
                                lval = None
                                break
                        else:
                            lval = ''
                else:
                    lval =  attrs.get(name)
                op = mo.group('op')
                # mo.end() not good enough because '{x={y}}' matches '{x={y}'.
                end = end_brace(line,mo.start())
                rval = line[mo.start('value'):end-1]
                UNDEFINED = '{zzzzz}'
                if lval is None:
                    if op == '=': s = rval
                    elif op == '?': s = ''
                    elif op == '!': s = rval
                    elif op == '#': s = UNDEFINED   # So the line is dropped.
                    elif op == '%': s = rval
                    elif op in ('@','$'):
                        s = UNDEFINED               # So the line is dropped.
                    else:
                        assert False, 'illegal attribute: %s' % attr
                else:
                    if op == '=': s = lval
                    elif op == '?': s = rval
                    elif op == '!': s = ''
                    elif op == '#': s = rval
                    elif op == '%': s = UNDEFINED   # So the line is dropped.
                    elif op in ('@','$'):
                        v = re.split(r'(?<!\\):',rval)
                        if len(v) not in (2,3):
                            message.error('illegal attribute syntax: %s' % attr)
                            s = ''
                        elif not is_re('^'+v[0]+'$'):
                            message.error('illegal attribute regexp: %s' % attr)
                            s = ''
                        else:
                            v = [s.replace('\\:',':') for s in v]
                            re_mo = re.match('^'+v[0]+'$',lval)
                            if op == '@':
                                if re_mo:
                                    s = v[1]         # {<name>@<re>:<v1>[:<v2>]}
                                else:
                                    if len(v) == 3:   # {<name>@<re>:<v1>:<v2>}
                                        s = v[2]
                                    else:             # {<name>@<re>:<v1>}
                                        s = ''
                            else:
                                if re_mo:
                                    if len(v) == 2:   # {<name>$<re>:<v1>}
                                        s = v[1]
                                    elif v[1] == '':  # {<name>$<re>::<v2>}
                                        s = UNDEFINED # So the line is dropped.
                                    else:             # {<name>$<re>:<v1>:<v2>}
                                        s = v[1]
                                else:
                                    if len(v) == 2:   # {<name>$<re>:<v1>}
                                        s = UNDEFINED # So the line is dropped.
                                    else:             # {<name>$<re>:<v1>:<v2>}
                                        s = v[2]
                    else:
                        assert False, 'illegal attribute: %s' % attr
                s = str(s)
                line = line[:mo.start()] + s + line[end:]
                pos = mo.start() + len(s)
        # Drop line if it contains  unsubstituted {name} references.
        skipped = re.search(r'(?su)\{[^\\\W][-\w]*?\}(?!\\)', line)
        if skipped:
            trace('dropped line', line)
            continue;
        # Expand system attributes (eval has precedence).
        reos = [
            re.compile(r'(?su)\{(?P<action>eval):(?P<expr>.*?)\}(?!\\)'),
            re.compile(r'(?su)\{(?P<action>[^\\\W][-\w]*?):(?P<expr>.*?)\}(?!\\)'),
        ]
        skipped = False
        for reo in reos:
            pos = 0
            while True:
                mo = reo.search(line,pos)
                if not mo: break
                expr = mo.group('expr')
                action = mo.group('action')
                expr = expr.replace('{\\','{')
                expr = expr.replace('}\\','}')
                s = system(action, expr, attrs=dictionary)
                if dictionary is not None and action in ('counter','counter2','set','set2'):
                    # These actions create and update attributes.
                    attrs.update(dictionary)
                if s is None:
                    # Drop line if the action returns None.
                    skipped = True
                    break
                line = line[:mo.start()] + s + line[mo.end():]
                pos = mo.start() + len(s)
            if skipped:
                break
        if not skipped:
            # Remove backslash from escaped entries.
            line = line.replace('{\\','{')
            line = line.replace('}\\','}')
            result.append(line)
    if string_result:
        if result:
            return '\n'.join(result)
        else:
            return None
    else:
        return tuple(result)

def char_encoding():
    encoding = document.attributes.get('encoding')
    if encoding:
        try:
            codecs.lookup(encoding)
        except LookupError,e:
            raise EAsciiDoc,str(e)
    return encoding

def char_len(s):
    return len(char_decode(s))

east_asian_widths = {'W': 2,   # Wide
                     'F': 2,   # Full-width (wide)
                     'Na': 1,  # Narrow
                     'H': 1,   # Half-width (narrow)
                     'N': 1,   # Neutral (not East Asian, treated as narrow)
                     'A': 1}   # Ambiguous (s/b wide in East Asian context,
                               # narrow otherwise, but that doesn't work)
"""Mapping of result codes from `unicodedata.east_asian_width()` to character
column widths."""

def column_width(s):
    text = char_decode(s)
    if isinstance(text, unicode):
        width = 0
        for c in text:
            width += east_asian_widths[unicodedata.east_asian_width(c)]
        return width
    else:
        return len(text)

def char_decode(s):
    if char_encoding():
        try:
            return s.decode(char_encoding())
        except Exception:
            raise EAsciiDoc, \
                "'%s' codec can't decode \"%s\"" % (char_encoding(), s)
    else:
        return s

def char_encode(s):
    if char_encoding():
        return s.encode(char_encoding())
    else:
        return s

def time_str(t):
    """Convert seconds since the Epoch to formatted local time string."""
    t = time.localtime(t)
    s = time.strftime('%H:%M:%S',t)
    if time.daylight and t.tm_isdst == 1:
        result = s + ' ' + time.tzname[1]
    else:
        result = s + ' ' + time.tzname[0]
    # Attempt to convert the localtime to the output encoding.
    try:
        result = char_encode(result.decode(locale.getdefaultlocale()[1]))
    except Exception:
        pass
    return result

def date_str(t):
    """Convert seconds since the Epoch to formatted local date string."""
    t = time.localtime(t)
    return time.strftime('%Y-%m-%d',t)


class Lex:
    """Lexical analysis routines. Static methods and attributes only."""
    prev_element = None
    prev_cursor = None
    def __init__(self):
        raise AssertionError,'no class instances allowed'
    @staticmethod
    def next():
        """Returns class of next element on the input (None if EOF).  The
        reader is assumed to be at the first line following a previous element,
        end of file or line one.  Exits with the reader pointing to the first
        line of the next element or EOF (leading blank lines are skipped)."""
        reader.skip_blank_lines()
        if reader.eof(): return None
        # Optimization: If we've already checked for an element at this
        # position return the element.
        if Lex.prev_element and Lex.prev_cursor == reader.cursor:
            return Lex.prev_element
        if AttributeEntry.isnext():
            result = AttributeEntry
        elif AttributeList.isnext():
            result = AttributeList
        elif BlockTitle.isnext() and not tables_OLD.isnext():
            result = BlockTitle
        elif Title.isnext():
            if AttributeList.style() == 'float':
                result = FloatingTitle
            else:
                result = Title
        elif macros.isnext():
            result = macros.current
        elif lists.isnext():
            result = lists.current
        elif blocks.isnext():
            result = blocks.current
        elif tables_OLD.isnext():
            result = tables_OLD.current
        elif tables.isnext():
            result = tables.current
        else:
            if not paragraphs.isnext():
                raise EAsciiDoc,'paragraph expected'
            result = paragraphs.current
        # Optimization: Cache answer.
        Lex.prev_cursor = reader.cursor
        Lex.prev_element = result
        return result

    @staticmethod
    def canonical_subs(options):
        """Translate composite subs values."""
        if len(options) == 1:
            if options[0] == 'none':
                options = ()
            elif options[0] == 'normal':
                options = config.subsnormal
            elif options[0] == 'verbatim':
                options = config.subsverbatim
        return options

    @staticmethod
    def subs_1(s,options):
        """Perform substitution specified in 'options' (in 'options' order)."""
        if not s:
            return s
        if document.attributes.get('plaintext') is not None:
            options = ('specialcharacters',)
        result = s
        options = Lex.canonical_subs(options)
        for o in options:
            if o == 'specialcharacters':
                result = config.subs_specialchars(result)
            elif o == 'attributes':
                result = subs_attrs(result)
            elif o == 'quotes':
                result = subs_quotes(result)
            elif o == 'specialwords':
                result = config.subs_specialwords(result)
            elif o in ('replacements','replacements2'):
                result = config.subs_replacements(result,o)
            elif o == 'macros':
                result = macros.subs(result)
            elif o == 'callouts':
                result = macros.subs(result,callouts=True)
            else:
                raise EAsciiDoc,'illegal substitution option: %s' % o
            trace(o, s, result)
            if not result:
                break
        return result

    @staticmethod
    def subs(lines,options):
        """Perform inline processing specified by 'options' (in 'options'
        order) on sequence of 'lines'."""
        if not lines or not options:
            return lines
        options = Lex.canonical_subs(options)
        # Join lines so quoting can span multiple lines.
        para = '\n'.join(lines)
        if 'macros' in options:
            para = macros.extract_passthroughs(para)
        for o in options:
            if o == 'attributes':
                # If we don't substitute attributes line-by-line then a single
                # undefined attribute will drop the entire paragraph.
                lines = subs_attrs(para.split('\n'))
                para = '\n'.join(lines)
            else:
                para = Lex.subs_1(para,(o,))
        if 'macros' in options:
            para = macros.restore_passthroughs(para)
        return para.splitlines()

    @staticmethod
    def set_margin(lines, margin=0):
        """Utility routine that sets the left margin to 'margin' space in a
        block of non-blank lines."""
        # Calculate width of block margin.
        lines = list(lines)
        width = len(lines[0])
        for s in lines:
            i = re.search(r'\S',s).start()
            if i < width: width = i
        # Strip margin width from all lines.
        for i in range(len(lines)):
            lines[i] = ' '*margin + lines[i][width:]
        return lines

#---------------------------------------------------------------------------
# Document element classes parse AsciiDoc reader input and write DocBook writer
# output.
#---------------------------------------------------------------------------
class Document(object):

    # doctype property.
    def getdoctype(self):
        return self.attributes.get('doctype')
    def setdoctype(self,doctype):
        self.attributes['doctype'] = doctype
    doctype = property(getdoctype,setdoctype)

    # backend property.
    def getbackend(self):
        return self.attributes.get('backend')
    def setbackend(self,backend):
        if backend:
            backend = self.attributes.get('backend-alias-' + backend, backend)
        self.attributes['backend'] = backend
    backend = property(getbackend,setbackend)

    def __init__(self):
        self.infile = None      # Source file name.
        self.outfile = None     # Output file name.
        self.attributes = InsensitiveDict()
        self.level = 0          # 0 => front matter. 1,2,3 => sect1,2,3.
        self.has_errors = False # Set true if processing errors were flagged.
        self.has_warnings = False # Set true if warnings were flagged.
        self.safe = False       # Default safe mode.
    def update_attributes(self,attrs=None):
        """
        Set implicit attributes and attributes in 'attrs'.
        """
        t = time.time()
        self.attributes['localtime'] = time_str(t)
        self.attributes['localdate'] = date_str(t)
        self.attributes['asciidoc-version'] = VERSION
        self.attributes['asciidoc-file'] = APP_FILE
        self.attributes['asciidoc-dir'] = APP_DIR
        self.attributes['asciidoc-confdir'] = CONF_DIR
        self.attributes['user-dir'] = USER_DIR
        if config.verbose:
            self.attributes['verbose'] = ''
        # Update with configuration file attributes.
        if attrs:
            self.attributes.update(attrs)
        # Update with command-line attributes.
        self.…
Tech Fingerprint

Standard Library: OS Interaction
Alerts (70)

'def' Ensure functions have docstrings for documentation
57 60 64 66 68 76 79 85 118 120 122 125 178 181 186 191 196 222 254 257 546 597 1114 1123 1136 1146 1156 1319 1321 1326 1328
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
74 940 944
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
306
'eval(' Avoid due to security risks; use ast.literal_eval for safer evaluation of literals
326 418 429 447 457
'except Exception:' Catch specific exceptions instead of Exception to avoid masking bugs
328 423 430 450 458 1150 1173
'isinstance(' Overuse may indicate design issues; consider polymorphism
364 421 1138
'list(' Avoid unnecessary list conversions; use generators where possible
377 378 380 1302
'raise Exception' Raise specific exception types for better error handling
422
'type(' Use isinstance() for type checking instead of type()
923
Complexity hotspot; lines 1016 to 1022 (total complexity: 7)
1016 1017 1018 1019 1020 1021 1022
Complexity hotspot; lines 1026 to 1032 (total complexity: 7)
1026 1027 1028 1029 1030 1031 1032