/asciidoc-8.4.5/asciidoc.py
Python | 5315 lines | 5123 code | 23 blank | 169 comment | 216 complexity | 3d8c49106162df3e11f65d3c3dd23451 MD5 | raw file
Possible License(s): GPL-2.0
Large files files are truncated, but you can click here to view the full file
- #!/usr/bin/env python
- """
- asciidoc - converts an AsciiDoc text file to DocBook, HTML or LinuxDoc
- Copyright (C) 2002-2009 Stuart Rackham. Free use of this software is granted
- under the terms of the GNU General Public License (GPL).
- """
- import sys, os, re, time, traceback, tempfile, subprocess, codecs, locale
- ### Used by asciidocapi.py ###
- VERSION = '8.4.5' # See CHANGLOG file for version history.
- MIN_PYTHON_VERSION = 2.4 # Require this version of Python or better.
- #---------------------------------------------------------------------------
- # Program constants.
- #---------------------------------------------------------------------------
- DEFAULT_BACKEND = 'xhtml11'
- DEFAULT_DOCTYPE = 'article'
- # Allowed substitution options for List, Paragraph and DelimitedBlock
- # definition subs entry.
- SUBS_OPTIONS = ('specialcharacters','quotes','specialwords',
- 'replacements', 'attributes','macros','callouts','normal','verbatim',
- 'none','replacements2')
- # Default value for unspecified subs and presubs configuration file entries.
- SUBS_NORMAL = ('specialcharacters','quotes','attributes',
- 'specialwords','replacements','macros','replacements2')
- SUBS_VERBATIM = ('specialcharacters','callouts')
- NAME_RE = r'(?u)[^\W\d][-\w]*' # Valid section or attrbibute name.
- #---------------------------------------------------------------------------
- # Utility functions and classes.
- #---------------------------------------------------------------------------
- class EAsciiDoc(Exception): pass
- class OrderedDict(dict):
- """
- Dictionary ordered by insertion order.
- Python Cookbook: Ordered Dictionary, Submitter: David Benjamin.
- http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
- """
- def __init__(self, d=None, **kwargs):
- self._keys = []
- if d is None: d = kwargs
- dict.__init__(self, d)
- def __delitem__(self, key):
- dict.__delitem__(self, key)
- self._keys.remove(key)
- def __setitem__(self, key, item):
- dict.__setitem__(self, key, item)
- if key not in self._keys: self._keys.append(key)
- def clear(self):
- dict.clear(self)
- self._keys = []
- def copy(self):
- d = dict.copy(self)
- d._keys = self._keys[:]
- return d
- def items(self):
- return zip(self._keys, self.values())
- def keys(self):
- return self._keys
- def popitem(self):
- try:
- key = self._keys[-1]
- except IndexError:
- raise KeyError('dictionary is empty')
- val = self[key]
- del self[key]
- return (key, val)
- def setdefault(self, key, failobj = None):
- dict.setdefault(self, key, failobj)
- if key not in self._keys: self._keys.append(key)
- def update(self, d=None, **kwargs):
- if d is None:
- d = kwargs
- dict.update(self, d)
- for key in d.keys():
- if key not in self._keys: self._keys.append(key)
- def values(self):
- return map(self.get, self._keys)
- class AttrDict(dict):
- """
- Like a dictionary except values can be accessed as attributes i.e. obj.foo
- can be used in addition to obj['foo'].
- If an item is not present None is returned.
- """
- def __getattr__(self, key):
- try: return self[key]
- except KeyError, k: return None
- def __setattr__(self, key, value):
- self[key] = value
- def __delattr__(self, key):
- try: del self[key]
- except KeyError, k: raise AttributeError, k
- def __repr__(self):
- return '<AttrDict ' + dict.__repr__(self) + '>'
- def __getstate__(self):
- return dict(self)
- def __setstate__(self,value):
- for k,v in value.items(): self[k]=v
- class Trace(object):
- """
- Used in conjunction with the 'trace' attribute to generate diagnostic
- output. There is a single global instance of this class named trace.
- """
- SUBS_NAMES = ('specialcharacters','quotes','specialwords',
- 'replacements', 'attributes','macros','callouts',
- 'replacements2')
- def __init__(self):
- self.name_re = '' # Regexp pattern to match trace names.
- self.linenos = True
- self.offset = 0
- def __call__(self, name, before, after=None):
- """
- Print trace message if tracing is on and the trace 'name' matches the
- document 'trace' attribute (treated as a regexp).
- The 'before' and 'after' messages are only printed if they differ.
- """
- name_re = document.attributes.get('trace')
- if name_re == 'subs': # Alias for all the inline substitutions.
- name_re = '|'.join(self.SUBS_NAMES)
- self.name_re = name_re
- if self.name_re is not None:
- msg = message.format(name, 'TRACE: ', self.linenos, offset=self.offset)
- if before != after and re.match(self.name_re,name):
- if is_array(before):
- before = '\n'.join(before)
- if after is None:
- msg += '\n%s\n' % before
- else:
- if is_array(after):
- after = '\n'.join(after)
- msg += '\n<<<\n%s\n>>>\n%s\n' % (before,after)
- message.stderr(msg)
- class Message:
- """
- Message functions.
- """
- def __init__(self):
- self.linenos = None # Used to globally override line numbers.
- self.messages = []
- def stderr(self,line=''):
- self.messages.append(line)
- if __name__ == '__main__':
- sys.stderr.write(line+os.linesep)
- def verbose(self, msg,linenos=True):
- if config.verbose:
- msg = self.format(msg,linenos=linenos)
- self.stderr(msg)
- def warning(self, msg,linenos=True,offset=0):
- msg = self.format(msg,'WARNING: ',linenos,offset=offset)
- document.has_warnings = True
- self.stderr(msg)
- def deprecated(self, msg, linenos=True):
- msg = self.format(msg, 'DEPRECATED: ', linenos)
- self.stderr(msg)
- def format(self, msg, prefix='', linenos=True, cursor=None, offset=0):
- """Return formatted message string."""
- if self.linenos is not False and ((linenos or self.linenos) and reader.cursor):
- if cursor is None:
- cursor = reader.cursor
- prefix += '%s: line %d: ' % (os.path.basename(cursor[0]),cursor[1]+offset)
- return prefix + msg
- def error(self, msg, cursor=None, halt=False):
- """
- Report fatal error.
- If halt=True raise EAsciiDoc exception.
- If halt=False don't exit application, continue in the hope of reporting
- all fatal errors finishing with a non-zero exit code.
- """
- if halt:
- raise EAsciiDoc, self.format(msg,linenos=False,cursor=cursor)
- else:
- msg = self.format(msg,'ERROR: ',cursor=cursor)
- self.stderr(msg)
- document.has_errors = True
- def unsafe(self, msg):
- self.error('unsafe: '+msg)
- def file_in(fname, directory):
- """Return True if file fname resides inside directory."""
- assert os.path.isfile(fname)
- # Empty directory (not to be confused with None) is the current directory.
- if directory == '':
- directory = os.getcwd()
- else:
- assert os.path.isdir(directory)
- directory = os.path.realpath(directory)
- fname = os.path.realpath(fname)
- return os.path.commonprefix((directory, fname)) == directory
- def safe():
- return document.safe
- def is_safe_file(fname, directory=None):
- # A safe file must reside in directory directory (defaults to the source
- # file directory).
- if directory is None:
- if document.infile == '<stdin>':
- return not safe()
- directory = os.path.dirname(document.infile)
- elif directory == '':
- directory = '.'
- return not safe() or file_in(fname, directory)
- def safe_filename(fname, parentdir):
- """
- Return file name which must reside in the parent file directory.
- Return None if file is not found or not safe.
- """
- if not os.path.isabs(fname):
- # Include files are relative to parent document
- # directory.
- fname = os.path.join(parentdir,fname)
- if not os.path.isfile(fname):
- message.warning('include file not found: %s' % fname)
- return None
- if not is_safe_file(fname, parentdir):
- message.unsafe('include file: %s' % fname)
- return None
- return fname
- def assign(dst,src):
- """Assign all attributes from 'src' object to 'dst' object."""
- for a,v in src.__dict__.items():
- setattr(dst,a,v)
- def strip_quotes(s):
- """Trim white space and, if necessary, quote characters from s."""
- s = s.strip()
- # Strip quotation mark characters from quoted strings.
- if len(s) >= 3 and s[0] == '"' and s[-1] == '"':
- s = s[1:-1]
- return s
- def is_re(s):
- """Return True if s is a valid regular expression else return False."""
- try: re.compile(s)
- except: return False
- else: return True
- def re_join(relist):
- """Join list of regular expressions re1,re2,... to single regular
- expression (re1)|(re2)|..."""
- if len(relist) == 0:
- return None
- result = []
- # Delete named groups to avoid ambiguity.
- for s in relist:
- result.append(re.sub(r'\?P<\S+?>','',s))
- result = ')|('.join(result)
- result = '('+result+')'
- return result
- def validate(value,rule,errmsg):
- """Validate value against rule expression. Throw EAsciiDoc exception with
- errmsg if validation fails."""
- try:
- if not eval(rule.replace('$',str(value))):
- raise EAsciiDoc,errmsg
- except Exception:
- raise EAsciiDoc,errmsg
- return value
- def lstrip_list(s):
- """
- Return list with empty items from start of list removed.
- """
- for i in range(len(s)):
- if s[i]: break
- else:
- return []
- return s[i:]
- def rstrip_list(s):
- """
- Return list with empty items from end of list removed.
- """
- for i in range(len(s)-1,-1,-1):
- if s[i]: break
- else:
- return []
- return s[:i+1]
- def strip_list(s):
- """
- Return list with empty items from start and end of list removed.
- """
- s = lstrip_list(s)
- s = rstrip_list(s)
- return s
- def is_array(obj):
- """
- Return True if object is list or tuple type.
- """
- return isinstance(obj,list) or isinstance(obj,tuple)
- def dovetail(lines1, lines2):
- """
- Append list or tuple of strings 'lines2' to list 'lines1'. Join the last
- non-blank item in 'lines1' with the first non-blank item in 'lines2' into a
- single string.
- """
- assert is_array(lines1)
- assert is_array(lines2)
- lines1 = strip_list(lines1)
- lines2 = strip_list(lines2)
- if not lines1 or not lines2:
- return list(lines1) + list(lines2)
- result = list(lines1[:-1])
- result.append(lines1[-1] + lines2[0])
- result += list(lines2[1:])
- return result
- def dovetail_tags(stag,content,etag):
- """Merge the end tag with the first content line and the last
- content line with the end tag. This ensures verbatim elements don't
- include extraneous opening and closing line breaks."""
- return dovetail(dovetail(stag,content), etag)
- def parse_attributes(attrs,dict):
- """Update a dictionary with name/value attributes from the attrs string.
- The attrs string is a comma separated list of values and keyword name=value
- pairs. Values must preceed keywords and are named '1','2'... The entire
- attributes list is named '0'. If keywords are specified string values must
- be quoted. Examples:
- attrs: ''
- dict: {}
- attrs: 'hello,world'
- dict: {'2': 'world', '0': 'hello,world', '1': 'hello'}
- attrs: '"hello", planet="earth"'
- dict: {'planet': 'earth', '0': '"hello",planet="earth"', '1': 'hello'}
- """
- def f(*args,**keywords):
- # Name and add aguments '1','2'... to keywords.
- for i in range(len(args)):
- if not str(i+1) in keywords:
- keywords[str(i+1)] = args[i]
- return keywords
- if not attrs:
- return
- dict['0'] = attrs
- # Replace line separators with spaces so line spanning works.
- s = re.sub(r'\s', ' ', attrs)
- try:
- d = eval('f('+s+')')
- # Attributes must evaluate to strings, numbers or None.
- for v in d.values():
- if not (isinstance(v,str) or isinstance(v,int) or isinstance(v,float) or v is None):
- raise
- except Exception:
- s = s.replace('"','\\"')
- s = s.split(',')
- s = map(lambda x: '"' + x.strip() + '"', s)
- s = ','.join(s)
- try:
- d = eval('f('+s+')')
- except Exception:
- return # If there's a syntax error leave with {0}=attrs.
- for k in d.keys(): # Drop any empty positional arguments.
- if d[k] == '': del d[k]
- dict.update(d)
- assert len(d) > 0
- def parse_named_attributes(s,attrs):
- """Update a attrs dictionary with name="value" attributes from the s string.
- Returns False if invalid syntax.
- Example:
- attrs: 'star="sun",planet="earth"'
- dict: {'planet':'earth', 'star':'sun'}
- """
- def f(**keywords): return keywords
- try:
- d = eval('f('+s+')')
- attrs.update(d)
- return True
- except Exception:
- return False
- def parse_list(s):
- """Parse comma separated string of Python literals. Return a tuple of of
- parsed values."""
- try:
- result = eval('tuple(['+s+'])')
- except Exception:
- raise EAsciiDoc,'malformed list: '+s
- return result
- def parse_options(options,allowed,errmsg):
- """Parse comma separated string of unquoted option names and return as a
- tuple of valid options. 'allowed' is a list of allowed option values.
- If allowed=() then all legitimate names are allowed.
- 'errmsg' is an error message prefix if an illegal option error is thrown."""
- result = []
- if options:
- for s in re.split(r'\s*,\s*',options):
- if (allowed and s not in allowed) or not is_name(s):
- raise EAsciiDoc,'%s: %s' % (errmsg,s)
- result.append(s)
- return tuple(result)
- def symbolize(s):
- """Drop non-symbol characters and convert to lowercase."""
- return re.sub(r'(?u)[^\w\-_]', '', s).lower()
- def is_name(s):
- """Return True if s is valid attribute, macro or tag name
- (starts with alpha containing alphanumeric and dashes only)."""
- return re.match(r'^'+NAME_RE+r'$',s) is not None
- def subs_quotes(text):
- """Quoted text is marked up and the resulting text is
- returned."""
- keys = config.quotes.keys()
- for q in keys:
- i = q.find('|')
- if i != -1 and q != '|' and q != '||':
- lq = q[:i] # Left quote.
- rq = q[i+1:] # Right quote.
- else:
- lq = rq = q
- tag = config.quotes[q]
- # Unconstrained quotes prefix the tag name with a hash.
- if tag[0] == '#':
- tag = tag[1:]
- # Unconstrained quotes can appear anywhere.
- reo = re.compile(r'(?msu)(^|.)(\[(?P<attrlist>[^[\]]+?)\])?' \
- + r'(?:' + re.escape(lq) + r')' \
- + r'(?P<content>.+?)(?:'+re.escape(rq)+r')')
- else:
- # The text within constrained quotes must be bounded by white space.
- # Non-word (\W) characters are allowed at boundaries to accomodate
- # enveloping quotes.
- reo = re.compile(r'(?msu)(^|\W)(\[(?P<attrlist>[^[\]]+?)\])?' \
- + r'(?:' + re.escape(lq) + r')' \
- + r'(?P<content>\S|\S.*?\S)(?:'+re.escape(rq)+r')(?=\W|$)')
- pos = 0
- while True:
- mo = reo.search(text,pos)
- if not mo: break
- if text[mo.start()] == '\\':
- # Delete leading backslash.
- text = text[:mo.start()] + text[mo.start()+1:]
- # Skip past start of match.
- pos = mo.start() + 1
- else:
- attrlist = {}
- parse_attributes(mo.group('attrlist'), attrlist)
- stag,etag = config.tag(tag, attrlist)
- s = mo.group(1) + stag + mo.group('content') + etag
- text = text[:mo.start()] + s + text[mo.end():]
- pos = mo.start() + len(s)
- return text
- def subs_tag(tag,dict={}):
- """Perform attribute substitution and split tag string returning start, end
- tag tuple (c.f. Config.tag())."""
- if not tag:
- return [None,None]
- s = subs_attrs(tag,dict)
- if not s:
- message.warning('tag \'%s\' dropped: contains undefined attribute' % tag)
- return [None,None]
- result = s.split('|')
- if len(result) == 1:
- return result+[None]
- elif len(result) == 2:
- return result
- else:
- raise EAsciiDoc,'malformed tag: %s' % tag
- def parse_entry(entry, dict=None, unquote=False, unique_values=False,
- allow_name_only=False, escape_delimiter=True):
- """Parse name=value entry to dictionary 'dict'. Return tuple (name,value)
- or None if illegal entry.
- If name= then value is set to ''.
- If name and allow_name_only=True then value is set to ''.
- If name! and allow_name_only=True then value is set to None.
- Leading and trailing white space is striped from 'name' and 'value'.
- 'name' can contain any printable characters.
- If the '=' delimiter character is allowed in the 'name' then
- it must be escaped with a backslash and escape_delimiter must be True.
- If 'unquote' is True leading and trailing double-quotes are stripped from
- 'name' and 'value'.
- If unique_values' is True then dictionary entries with the same value are
- removed before the parsed entry is added."""
- if escape_delimiter:
- mo = re.search(r'(?:[^\\](=))',entry)
- else:
- mo = re.search(r'(=)',entry)
- if mo: # name=value entry.
- if mo.group(1):
- name = entry[:mo.start(1)]
- if escape_delimiter:
- name = name.replace(r'\=','=') # Unescape \= in name.
- value = entry[mo.end(1):]
- elif allow_name_only and entry: # name or name! entry.
- name = entry
- if name[-1] == '!':
- name = name[:-1]
- value = None
- else:
- value = ''
- else:
- return None
- if unquote:
- name = strip_quotes(name)
- if value is not None:
- value = strip_quotes(value)
- else:
- name = name.strip()
- if value is not None:
- value = value.strip()
- if not name:
- return None
- if dict is not None:
- if unique_values:
- for k,v in dict.items():
- if v == value: del dict[k]
- dict[name] = value
- return name,value
- def parse_entries(entries, dict, unquote=False, unique_values=False,
- allow_name_only=False,escape_delimiter=True):
- """Parse name=value entries from from lines of text in 'entries' into
- dictionary 'dict'. Blank lines are skipped."""
- entries = config.expand_templates(entries)
- for entry in entries:
- if entry and not parse_entry(entry, dict, unquote, unique_values,
- allow_name_only, escape_delimiter):
- raise EAsciiDoc,'malformed section entry: %s' % entry
- def load_conf_file(sections, fname, dir, namepat=NAME_RE):
- """Loads sections dictionary with sections from file fname.
- Existing sections are overlaid. Silently skips missing configuration
- files."""
- if dir:
- fname = os.path.join(dir, fname)
- # Sliently skip missing configuration file.
- if not os.path.isfile(fname):
- return
- reo = re.compile(r'^\[(?P<section>'+namepat+')\]\s*$')
- section,contents = '',[]
- for line in open(fname):
- if line and line[0] == '#': # Skip comment lines.
- continue
- line = line.rstrip()
- found = reo.findall(line)
- if found:
- if section: # Store previous section.
- sections[section] = contents
- section = found[0].lower()
- contents = []
- else:
- contents.append(line)
- if section and contents: # Store last section.
- sections[section] = contents
- def dump_section(name,dict,f=sys.stdout):
- """Write parameters in 'dict' as in configuration file section format with
- section 'name'."""
- f.write('[%s]%s' % (name,writer.newline))
- for k,v in dict.items():
- k = str(k)
- k = k.replace('=',r'\=') # Escape = in name.
- # Quote if necessary.
- if len(k) != len(k.strip()):
- k = '"'+k+'"'
- if v and len(v) != len(v.strip()):
- v = '"'+v+'"'
- if v is None:
- # Don't dump undefined attributes.
- continue
- else:
- s = k+'='+v
- if s[0] == '#':
- s = '\\' + s # Escape so not treated as comment lines.
- f.write('%s%s' % (s,writer.newline))
- f.write(writer.newline)
- def update_attrs(attrs,dict):
- """Update 'attrs' dictionary with parsed attributes in dictionary 'dict'."""
- for k,v in dict.items():
- if not is_name(k):
- raise EAsciiDoc,'illegal attribute name: %s' % k
- attrs[k] = v
- def filter_lines(filter_cmd, lines, attrs={}):
- """
- Run 'lines' through the 'filter_cmd' shell command and return the result.
- The 'attrs' dictionary contains additional filter attributes.
- """
- def findfilter(name,dir,filter):
- """Find filter file 'fname' with style name 'name' in directory
- 'dir'. Return found file path or None if not found."""
- if name:
- result = os.path.join(dir,'filters',name,filter)
- if os.path.isfile(result):
- return result
- result = os.path.join(dir,'filters',filter)
- if os.path.isfile(result):
- return result
- return None
- # Return input lines if there's not filter.
- if not filter_cmd or not filter_cmd.strip():
- return lines
- # Perform attributes substitution on the filter command.
- s = subs_attrs(filter_cmd, attrs)
- if not s:
- raise EAsciiDoc,'undefined filter attribute in command: %s' % filter_cmd
- filter_cmd = s.strip()
- # Parse for quoted and unquoted command and command tail.
- # Double quoted.
- mo = re.match(r'^"(?P<cmd>[^"]+)"(?P<tail>.*)$', filter_cmd)
- if not mo:
- # Single quoted.
- mo = re.match(r"^'(?P<cmd>[^']+)'(?P<tail>.*)$", filter_cmd)
- if not mo:
- # Unquoted catch all.
- mo = re.match(r'^(?P<cmd>\S+)(?P<tail>.*)$', filter_cmd)
- cmd = mo.group('cmd').strip()
- found = None
- if not os.path.dirname(cmd):
- # Filter command has no directory path so search filter directories.
- filtername = attrs.get('style')
- if USER_DIR:
- found = findfilter(filtername, USER_DIR, cmd)
- if not found:
- found = findfilter(filtername, CONF_DIR, cmd)
- if not found:
- found = findfilter(filtername, DATA_DIR, cmd)
- if not found:
- found = findfilter(filtername, APP_DIR, cmd)
- else:
- if os.path.isfile(cmd):
- found = cmd
- else:
- message.warning('filter not found: %s' % cmd)
- if found:
- filter_cmd = '"' + found + '"' + mo.group('tail')
- if sys.platform == 'win32':
- # Windows doesn't like running scripts directly so explicitly
- # specify interpreter.
- if found:
- if cmd.endswith('.py'):
- filter_cmd = 'python ' + filter_cmd
- elif cmd.endswith('.rb'):
- filter_cmd = 'ruby ' + filter_cmd
- message.verbose('filtering: ' + filter_cmd)
- try:
- p = subprocess.Popen(filter_cmd, shell=True,
- stdin=subprocess.PIPE, stdout=subprocess.PIPE)
- output = p.communicate(os.linesep.join(lines))[0]
- except Exception:
- raise EAsciiDoc,'filter error: %s: %s' % (filter_cmd, sys.exc_info()[1])
- if output:
- result = [s.rstrip() for s in output.split(os.linesep)]
- else:
- result = []
- filter_status = p.wait()
- if filter_status:
- message.warning('filter non-zero exit code: %s: returned %d' %
- (filter_cmd, filter_status))
- if lines and not result:
- message.warning('no output from filter: %s' % filter_cmd)
- return result
- def system(name, args, is_macro=False):
- """
- Evaluate a system attribute ({name:args}) or system block macro
- (name::[args]). If is_macro is True then we are processing a system
- block macro otherwise it's a system attribute.
- NOTE: The include1 attribute is used internally by the include1::[] macro
- and is not for public use.
- """
- if is_macro:
- syntax = '%s::[%s]'
- separator = '\n'
- else:
- syntax = '{%s:%s}'
- separator = writer.newline
- if name not in ('eval','sys','sys2','include','include1'):
- if is_macro:
- msg = 'illegal system macro name: %s' % name
- else:
- msg = 'illegal system attribute name: %s' % name
- message.warning(msg)
- return None
- if is_macro:
- s = subs_attrs(args)
- if s is None:
- message.warning('skipped %s: undefined attribute in: %s' % (name,args))
- return None
- args = s
- if name != 'include1':
- message.verbose(('evaluating: '+syntax) % (name,args))
- if safe() and name not in ('include','include1'):
- message.unsafe(syntax % (name,args))
- return None
- result = None
- if name == 'eval':
- try:
- result = eval(args)
- if result is True:
- result = ''
- elif result is False:
- result = None
- elif result is not None:
- result = str(result)
- except Exception:
- message.warning((syntax+': expression evaluation error') % (name,args))
- elif name in ('sys','sys2'):
- result = ''
- fd,tmp = tempfile.mkstemp()
- os.close(fd)
- try:
- cmd = args
- cmd = cmd + (' > %s' % tmp)
- if name == 'sys2':
- cmd = cmd + ' 2>&1'
- if os.system(cmd):
- message.warning((syntax+': non-zero exit status') % (name,args))
- try:
- if os.path.isfile(tmp):
- lines = [s.rstrip() for s in open(tmp)]
- else:
- lines = []
- except Exception:
- raise EAsciiDoc,(syntax+': temp file read error') % (name,args)
- result = separator.join(lines)
- finally:
- if os.path.isfile(tmp):
- os.remove(tmp)
- elif name == 'include':
- if not os.path.exists(args):
- message.warning((syntax+': file does not exist') % (name,args))
- elif not is_safe_file(args):
- message.unsafe(syntax % (name,args))
- else:
- result = [s.rstrip() for s in open(args)]
- if result:
- result = subs_attrs(result)
- result = separator.join(result)
- result = result.expandtabs(reader.tabsize)
- else:
- result = ''
- elif name == 'include1':
- result = separator.join(config.include1[args])
- else:
- assert False
- return result
- def subs_attrs(lines, dictionary=None):
- """Substitute 'lines' of text with attributes from the global
- document.attributes dictionary and from 'dictionary' ('dictionary'
- entries take precedence). Return a tuple of the substituted lines. 'lines'
- containing undefined attributes are deleted. If 'lines' is a string then
- return a string.
- - Attribute references are substituted in the following order: simple,
- conditional, system.
- - Attribute references inside 'dictionary' entry values are substituted.
- """
- def end_brace(text,start):
- """Return index following end brace that matches brace at start in
- text."""
- assert text[start] == '{'
- n = 0
- result = start
- for c in text[start:]:
- # Skip braces that are followed by a backslash.
- if result == len(text)-1 or text[result+1] != '\\':
- if c == '{': n = n + 1
- elif c == '}': n = n - 1
- result = result + 1
- if n == 0: break
- return result
- if type(lines) == str:
- string_result = True
- lines = [lines]
- else:
- string_result = False
- lines = list(lines)
- if dictionary is None:
- attrs = document.attributes
- else:
- # Remove numbered document attributes so they don't clash with
- # attribute list positional attributes.
- attrs = {}
- for k,v in document.attributes.items():
- if not re.match(r'^\d+$', k):
- attrs[k] = v
- # Substitute attribute references inside dictionary values.
- dictionary = dictionary.copy()
- for k,v in dictionary.items():
- if v is None:
- del dictionary[k]
- else:
- v = subs_attrs(str(v))
- if v is None:
- del dictionary[k]
- else:
- dictionary[k] = v
- attrs.update(dictionary)
- # Substitute all attributes in all lines.
- for i in range(len(lines)-1,-1,-1): # Reverse iterate lines.
- text = lines[i]
- # Make it easier for regular expressions.
- text = text.replace('\\{','{\\')
- text = text.replace('\\}','}\\')
- # Expand simple attributes ({name}).
- # Nested attributes not allowed.
- reo = re.compile(r'(?su)\{(?P<name>[^\\\W][-\w]*?)\}(?!\\)')
- pos = 0
- while True:
- mo = reo.search(text,pos)
- if not mo: break
- s = attrs.get(mo.group('name'))
- if s is None:
- pos = mo.end()
- else:
- s = str(s)
- text = text[:mo.start()] + s + text[mo.end():]
- pos = mo.start() + len(s)
- # Expand conditional attributes.
- reo = re.compile(r'(?su)\{(?P<name>[^\\\W][-\w]*?)' \
- r'(?P<op>\=|\?|!|#|%|@|\$)' \
- r'(?P<value>.*?)\}(?!\\)')
- pos = 0
- while True:
- mo = reo.search(text,pos)
- if not mo: break
- attr = mo.group()
- name = mo.group('name')
- lval = attrs.get(name)
- op = mo.group('op')
- # mo.end() is not good enough because '{x={y}}' matches '{x={y}'.
- end = end_brace(text,mo.start())
- rval = text[mo.start('value'):end-1]
- if lval is None:
- if op == '=': s = rval
- elif op == '?': s = ''
- elif op == '!': s = rval
- elif op == '#': s = '{'+name+'}' # So the line is dropped.
- elif op == '%': s = rval
- elif op in ('@','$'):
- s = '{'+name+'}' # So the line is dropped.
- else:
- assert False, 'illegal attribute: %s' % attr
- else:
- if op == '=': s = lval
- elif op == '?': s = rval
- elif op == '!': s = ''
- elif op == '#': s = rval
- elif op == '%': s = '{zzzzz}' # So the line is dropped.
- elif op in ('@','$'):
- v = re.split(r'(?<!\\):',rval)
- if len(v) not in (2,3):
- message.error('illegal attribute syntax: %s' % attr)
- s = ''
- elif not is_re('^'+v[0]+'$'):
- message.error('illegal attribute regexp: %s' % attr)
- s = ''
- else:
- v = [s.replace('\\:',':') for s in v]
- re_mo = re.match('^'+v[0]+'$',lval)
- if op == '@':
- if re_mo:
- s = v[1] # {<name>@<re>:<v1>[:<v2>]}
- else:
- if len(v) == 3: # {<name>@<re>:<v1>:<v2>}
- s = v[2]
- else: # {<name>@<re>:<v1>}
- s = ''
- else:
- if re_mo:
- if len(v) == 2: # {<name>$<re>:<v1>}
- s = v[1]
- elif v[1] == '': # {<name>$<re>::<v2>}
- s = '{zzzzz}' # So the line is dropped.
- else: # {<name>$<re>:<v1>:<v2>}
- s = v[1]
- else:
- if len(v) == 2: # {<name>$<re>:<v1>}
- s = '{zzzzz}' # So the line is dropped.
- else: # {<name>$<re>:<v1>:<v2>}
- s = v[2]
- else:
- assert False, 'illegal attribute: %s' % attr
- s = str(s)
- text = text[:mo.start()] + s + text[end:]
- pos = mo.start() + len(s)
- # Drop line if it contains unsubstituted {name} references.
- skipped = re.search(r'(?su)\{[^\\\W][-\w]*?\}(?!\\)', text)
- if skipped:
- del lines[i]
- continue;
- # Expand system attributes.
- reo = re.compile(r'(?su)\{(?P<action>[^\\\W][-\w]*?):(?P<expr>.*?)\}(?!\\)')
- skipped = False
- pos = 0
- while True:
- mo = reo.search(text,pos)
- if not mo: break
- expr = mo.group('expr')
- expr = expr.replace('{\\','{')
- expr = expr.replace('}\\','}')
- s = system(mo.group('action'),expr)
- if s is None:
- skipped = True
- break
- text = text[:mo.start()] + s + text[mo.end():]
- pos = mo.start() + len(s)
- # Drop line if the action returns None.
- if skipped:
- del lines[i]
- continue;
- # Remove backslash from escaped entries.
- text = text.replace('{\\','{')
- text = text.replace('}\\','}')
- lines[i] = text
- if string_result:
- if lines:
- return '\n'.join(lines)
- else:
- return None
- else:
- return tuple(lines)
- def char_encoding():
- encoding = document.attributes.get('encoding')
- if encoding:
- try:
- codecs.lookup(encoding)
- except LookupError,e:
- raise EAsciiDoc,str(e)
- return encoding
- def char_len(s):
- return len(char_decode(s))
- def char_decode(s):
- if char_encoding():
- try:
- return s.decode(char_encoding())
- except Exception:
- raise EAsciiDoc, \
- "'%s' codec can't decode \"%s\"" % (char_encoding(), s)
- else:
- return s
- def char_encode(s):
- if char_encoding():
- return s.encode(char_encoding())
- else:
- return s
- def time_str(t):
- """Convert seconds since the Epoch to formatted local time string."""
- t = time.localtime(t)
- s = time.strftime('%H:%M:%S',t)
- if time.daylight:
- result = s + ' ' + time.tzname[1]
- else:
- result = s + ' ' + time.tzname[0]
- # Attempt to convert the localtime to the output encoding.
- try:
- result = char_encode(result.decode(locale.getdefaultlocale()[1]))
- except Exception:
- pass
- return result
- def date_str(t):
- """Convert seconds since the Epoch to formatted local date string."""
- t = time.localtime(t)
- return time.strftime('%Y-%m-%d',t)
- class Lex:
- """Lexical analysis routines. Static methods and attributes only."""
- prev_element = None
- prev_cursor = None
- def __init__(self):
- raise AssertionError,'no class instances allowed'
- @staticmethod
- def next():
- """Returns class of next element on the input (None if EOF). The
- reader is assumed to be at the first line following a previous element,
- end of file or line one. Exits with the reader pointing to the first
- line of the next element or EOF (leading blank lines are skipped)."""
- reader.skip_blank_lines()
- if reader.eof(): return None
- # Optimization: If we've already checked for an element at this
- # position return the element.
- if Lex.prev_element and Lex.prev_cursor == reader.cursor:
- return Lex.prev_element
- if AttributeEntry.isnext():
- result = AttributeEntry
- elif AttributeList.isnext():
- result = AttributeList
- elif Title.isnext():
- result = Title
- elif macros.isnext():
- result = macros.current
- elif lists.isnext():
- result = lists.current
- elif blocks.isnext():
- result = blocks.current
- elif tables_OLD.isnext():
- result = tables_OLD.current
- elif tables.isnext():
- result = tables.current
- elif BlockTitle.isnext():
- result = BlockTitle
- else:
- if not paragraphs.isnext():
- raise EAsciiDoc,'paragraph expected'
- result = paragraphs.current
- # Optimization: Cache answer.
- Lex.prev_cursor = reader.cursor
- Lex.prev_element = result
- return result
- @staticmethod
- def canonical_subs(options):
- """Translate composite subs values."""
- if len(options) == 1:
- if options[0] == 'none':
- options = ()
- elif options[0] == 'normal':
- options = config.subsnormal
- elif options[0] == 'verbatim':
- options = config.subsverbatim
- return options
- @staticmethod
- def subs_1(s,options):
- """Perform substitution specified in 'options' (in 'options' order) on
- Does not process 'attributes' substitutions."""
- if not s:
- return s
- result = s
- options = Lex.canonical_subs(options)
- for o in options:
- if o == 'specialcharacters':
- result = config.subs_specialchars(result)
- elif o == 'attributes':
- result = subs_attrs(result)
- elif o == 'quotes':
- result = subs_quotes(result)
- elif o == 'specialwords':
- result = config.subs_specialwords(result)
- elif o in ('replacements','replacements2'):
- result = config.subs_replacements(result,o)
- elif o == 'macros':
- result = macros.subs(result)
- elif o == 'callouts':
- result = macros.subs(result,callouts=True)
- else:
- raise EAsciiDoc,'illegal substitution option: %s' % o
- trace(o, s, result)
- if not result:
- break
- return result
- @staticmethod
- def subs(lines,options):
- """Perform inline processing specified by 'options' (in 'options'
- order) on sequence of 'lines'."""
- if not lines or not options:
- return lines
- options = Lex.canonical_subs(options)
- # Join lines so quoting can span multiple lines.
- para = '\n'.join(lines)
- if 'macros' in options:
- para = macros.extract_passthroughs(para)
- for o in options:
- if o == 'attributes':
- # If we don't substitute attributes line-by-line then a single
- # undefined attribute will drop the entire paragraph.
- lines = subs_attrs(para.split('\n'))
- para = '\n'.join(lines)
- else:
- para = Lex.subs_1(para,(o,))
- if 'macros' in options:
- para = macros.restore_passthroughs(para)
- return para.splitlines()
- @staticmethod
- def set_margin(lines, margin=0):
- """Utility routine that sets the left margin to 'margin' space in a
- block of non-blank lines."""
- # Calculate width of block margin.
- lines = list(lines)
- width = len(lines[0])
- for s in lines:
- i = re.search(r'\S',s).start()
- if i < width: width = i
- # Strip margin width from all lines.
- for i in range(len(lines)):
- lines[i] = ' '*margin + lines[i][width:]
- return lines
- #---------------------------------------------------------------------------
- # Document element classes parse AsciiDoc reader input and write DocBook writer
- # output.
- #---------------------------------------------------------------------------
- class Document:
- def __init__(self):
- self.doctype = None # 'article','manpage' or 'book'.
- self.backend = None # -b option argument.
- self.infile = None # Source file name.
- self.outfile = None # Output file name.
- self.attributes = {}
- self.level = 0 # 0 => front matter. 1,2,3 => sect1,2,3.
- self.has_errors = False # Set true if processing errors were flagged.
- self.has_warnings = False # Set true if warnings were flagged.
- self.safe = False # Default safe mode.
- def update_attributes(self):
- # Set implicit attributes.
- if self.infile and os.path.exists(self.infile):
- t = os.path.getmtime(self.infile)
- elif self.infile == '<stdin>':
- t = time.time()
- else:
- t = None
- if t:
- self.attributes['doctime'] = time_str(t)
- self.attributes['docdate'] = date_str(t)
- t = time.time()
- self.attributes['localtime'] = time_str(t)
- self.attributes['localdate'] = date_str(t)
- self.attributes['asciidoc-version'] = VERSION
- self.attributes['backend'] = document.backend
- self.attributes['doctype'] = document.doctype
- self.attributes['backend-'+document.backend] = ''
- self.attributes['doctype-'+document.doctype] = ''
- self.attributes[document.backend+'-'+document.doctype] = ''
- self.attributes['asciidoc-file'] = APP_FILE
- self.attributes['asciidoc-dir'] = APP_DIR
- self.attributes['user-dir'] = USER_DIR
- if self.infile != '<stdin>':
- self.attributes['infile'] = self.infile
- self.attributes['indir'] = os.path.dirname(self.infile)
- self.attributes['docfile'] = self.infile
- self.attributes['docdir'] = os.path.dirname(self.infile)
- self.attributes['docname'] = os.path.splitext(
- os.path.basename(self.infile))[0]
- if config.verbose:
- self.attributes['verbose'] = ''
- # Update with configuration file attributes.
- self.attributes.update(config.conf_attrs)
- # Update with command-line attributes.
- self.attributes.update(config.cmd_attrs)
- # Extract miscellaneous configuration section entries from attributes.
- config.load_miscellaneous(config.conf_attrs)
- config.load_miscellaneous(config.cmd_attrs)
- self.attributes['newline'] = config.newline
- if self.outfile:
- if self.outfile != '<stdout>':
- self.attributes['outfile'] = self.outfile
- self.attributes['outdir'] = os.path.dirname(self.outfile)
- self.attributes['docname'] = os.path.splitext(
- os.path.basename(self.outfile))[0]
- ext = os.path.splitext(self.outfile)[1][1:]
- elif config.outfilesuffix:
- ext = config.outfilesuffix[1:]
- else:
- ext = ''
- if ext:
- self.attributes['filetype'] = ext
- self.attributes['filetype-'+ext] = ''
- def load_lang(self,linenos=False):
- """
- Load language configuration file.
- """
- lang = self.attributes.get('lang')
- message.linenos = linenos
- if lang:
- if not config.load_lang(lang):
- message.error('missing language conf file: lang-%s.conf' % lang)
- self.attributes['lang'] = lang # Reinstate new lang attribute.
- else:
- message.error('language attribute (lang) is not defined')
- message.linenos = None # Restore default line number behavior.
- def set_deprecated_attribute(self,old,new):
- """
- Ensures the 'old' name of an attribute that was renamed to 'new' is
- still honored.
- """
- if self.attributes.get(new) is None:
- if self.attributes.get(old) is not None:
- self.attributes[new] = self.attributes[old]
- else:
- self.attributes[old] = self.attributes[new]
- def translate(self):
- assert self.doctype in ('article','manpage','book'), \
- 'illegal document type'
- assert self.level == 0
- config.expand_all_templates()
- self.load_lang()
- # Skip leading comments and attribute entries.
- finished = False
- attr_count = 0
- while not finished:
- finished = True
- if blocks.isnext() and 'skip' in blocks.current.options:
- finished = False
- blocks.current.translate()
- if macros.isnext() and macros.current.name == 'comment':
- finished = False
- macros.current.translate()
- if AttributeEntry.isnext():
- finished = False
- AttributeEntry.translate()
- if AttributeEntry.name == 'lang':
- self.load_lang(linenos=True)
- if attr_count > 0:
- message.error('lang attribute should be first entry')
- attr_count += 1
- message.verbose('writing: '+writer.fname,False)
- # Process document header.
- has_header = Lex.next() is Title and Title.level == 0
- if self.doctype == 'manpage' and not has_header:
- message.error('manpage document title is mandatory')
- if has_header:
- Header.translate()
- # Command-line entries override header derived entries.
- self.attributes.update(config.cmd_attrs)
- # DEPRECATED: revision renamed to revnumber.
- self.set_deprecated_attribute('revision','revnumber')
- # DEPRECATED: date renamed to revdate.
- self.set_deprecated_attribute('date','revdate')
- if config.header_footer:
- hdr = config.subs_section('header',{})
- writer.write(hdr,trace='header')
- if self.doctype in ('article','book'):
- # Translate 'preamble' (untitled elements between header
- # and first section title).
- if Lex.next() is not Title:
- stag,etag = config.section2tags('preamble')
- writer.write(stag,trace='preamble open')
- Section.translate_body()
- writer.write(etag,trace='preamble close')
- else:
- document.process_author_names()
- if config.header_footer:
- hdr = config.subs_section('header',{})
- writer.write(hdr,trace='header')
- if Lex.next() is not Title:
- Section.translate_body()
- # Process remaining sections.
- while not reader.eof():
- if Lex.next() is not Title:
- raise EAsciiDoc,'section title expected'
- Section.translate()
- Section.setlevel(0) # Write remaining unwritten section close tags.
- # Substitute document parameters and write document footer.
- if config.header_footer:
- ftr = config.subs_section('footer',{})
- writer.write(ftr,trace='footer')
- def parse_author(self,s):
- """ Return False if the author is malformed."""
- attrs = self.attributes # Alias for readability.
- s = s.strip()
- mo = re.match(r'^(?P<name1>[^<>\s]+)'
- '(\s+(?P<name2>[^<>\s]+))?'
- '(\s+(?P<name3>[^<>\s]+))?'
- '(\s+<(?P<email>\S+)>)?$',s)
- if not mo:
- message.error('malformed author: %s' % s)
- return False
- firstname = mo.group('name1')
- if mo.group('name3'):
- middlename = mo.group('name2')
- lastname = mo.group('name3')
- else:
- middlename = None
- lastname = mo.group('name2')
- firstname = firstname.replace('_',' ')
- if middlename:
- middlename = middlename.replace('_',' ')
- if lastname:
- lastname = lastname.replace('_',' ')
- email = mo.group('email')
- if firstname:
- attrs['firstname'] = firstname
- if middlename:
- attrs['middlename'] = middlename
- if lastname:
- attrs['lastname'] = lastname
- if email:
- attrs['email'] = email
- return True
- def process_author_names(self):
- """ Calculate any missing author related attributes."""
- attrs = self.attributes # Alias for readability.
- firstname = attrs.get('firstname','')
- middlename = attrs.get('middlename','')
- lastname = attrs.get('lastname','')
- author = attrs.get('author')
- initials = attrs.get('authorinitials')
- if author and not (firstname or middlename or lastname):
- if not self.parse_author(author):
- return
- attrs['author'] = author.replace('_',' ')
- self.process_author_names()
- return
- if not author:
- author = '%s %s %s' % (firstname, middlename, lastname)
- author = author.strip()
- author …
Large files files are truncated, but you can click here to view the full file