html.py | searchcode

/gluon/html.py

Large files files are truncated, but you can click here to view the full file

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
This file is part of the web2py Web Framework
Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
"""

import cgi
import os
import re
import copy
import types
import urllib
import base64
import sanitizer
import itertools
import decoder
import copy_reg
import cPickle
import marshal

from HTMLParser import HTMLParser
from htmlentitydefs import name2codepoint

from storage import Storage
from utils import web2py_uuid, simple_hash, compare
from highlight import highlight

regex_crlf = re.compile('\r|\n')

join = ''.join

# name2codepoint is incomplete respect to xhtml (and xml): 'apos' is missing.
entitydefs = dict(map(lambda (
    k, v): (k, unichr(v).encode('utf-8')), name2codepoint.iteritems()))
entitydefs.setdefault('apos', u"'".encode('utf-8'))


__all__ = [
    'A',
    'B',
    'BEAUTIFY',
    'BODY',
    'BR',
    'BUTTON',
    'CENTER',
    'CAT',
    'CODE',
    'COL',
    'COLGROUP',
    'DIV',
    'EM',
    'EMBED',
    'FIELDSET',
    'FORM',
    'H1',
    'H2',
    'H3',
    'H4',
    'H5',
    'H6',
    'HEAD',
    'HR',
    'HTML',
    'I',
    'IFRAME',
    'IMG',
    'INPUT',
    'LABEL',
    'LEGEND',
    'LI',
    'LINK',
    'OL',
    'UL',
    'MARKMIN',
    'MENU',
    'META',
    'OBJECT',
    'ON',
    'OPTION',
    'P',
    'PRE',
    'SCRIPT',
    'OPTGROUP',
    'SELECT',
    'SPAN',
    'STRONG',
    'STYLE',
    'TABLE',
    'TAG',
    'TD',
    'TEXTAREA',
    'TH',
    'THEAD',
    'TBODY',
    'TFOOT',
    'TITLE',
    'TR',
    'TT',
    'URL',
    'XHTML',
    'XML',
    'xmlescape',
    'embed64',
]


def xmlescape(data, quote=True):
    """
    returns an escaped string of the provided data

    :param data: the data to be escaped
    :param quote: optional (default False)
    """

    # first try the xml function
    if hasattr(data, 'xml') and callable(data.xml):
        return data.xml()

    # otherwise, make it a string
    if not isinstance(data, (str, unicode)):
        data = str(data)
    elif isinstance(data, unicode):
        data = data.encode('utf8', 'xmlcharrefreplace')

    # ... and do the escaping
    data = cgi.escape(data, quote).replace("'", "&#x27;")
    return data

def call_as_list(f,*a,**b):
    if not isinstance(f, (list,tuple)):
        f = [f]
    for item in f:
        item(*a,**b)

def truncate_string(text, length, dots='...'):
    text = text.decode('utf-8')
    if len(text) > length:
        text = text[:length - len(dots)].encode('utf-8') + dots
    return text


def URL(
    a=None,
    c=None,
    f=None,
    r=None,
    args=None,
    vars=None,
    anchor='',
    extension=None,
    env=None,
    hmac_key=None,
    hash_vars=True,
    salt=None,
    user_signature=None,
    scheme=None,
    host=None,
    port=None,
    encode_embedded_slash=False,
    url_encode=True
):
    """
    generate a URL

    example::

        >>> str(URL(a='a', c='c', f='f', args=['x', 'y', 'z'],
        ...     vars={'p':1, 'q':2}, anchor='1'))
        '/a/c/f/x/y/z?p=1&q=2#1'

        >>> str(URL(a='a', c='c', f='f', args=['x', 'y', 'z'],
        ...     vars={'p':(1,3), 'q':2}, anchor='1'))
        '/a/c/f/x/y/z?p=1&p=3&q=2#1'

        >>> str(URL(a='a', c='c', f='f', args=['x', 'y', 'z'],
        ...     vars={'p':(3,1), 'q':2}, anchor='1'))
        '/a/c/f/x/y/z?p=3&p=1&q=2#1'

        >>> str(URL(a='a', c='c', f='f', anchor='1+2'))
        '/a/c/f#1%2B2'

        >>> str(URL(a='a', c='c', f='f', args=['x', 'y', 'z'],
        ...     vars={'p':(1,3), 'q':2}, anchor='1', hmac_key='key'))
        '/a/c/f/x/y/z?p=1&p=3&q=2&_signature=a32530f0d0caa80964bb92aad2bedf8a4486a31f#1'

        >>> str(URL(a='a', c='c', f='f', args=['w/x', 'y/z']))
        '/a/c/f/w/x/y/z'

        >>> str(URL(a='a', c='c', f='f', args=['w/x', 'y/z'], encode_embedded_slash=True))
        '/a/c/f/w%2Fx/y%2Fz'

        >>> str(URL(a='a', c='c', f='f', args=['%(id)d'], url_encode=False))
        '/a/c/f/%(id)d'

        >>> str(URL(a='a', c='c', f='f', args=['%(id)d'], url_encode=True))
        '/a/c/f/%25%28id%29d'

        >>> str(URL(a='a', c='c', f='f', vars={'id' : '%(id)d' }, url_encode=False))
        '/a/c/f?id=%(id)d'

        >>> str(URL(a='a', c='c', f='f', vars={'id' : '%(id)d' }, url_encode=True))
        '/a/c/f?id=%25%28id%29d'

        >>> str(URL(a='a', c='c', f='f', anchor='%(id)d', url_encode=False))
        '/a/c/f#%(id)d'

        >>> str(URL(a='a', c='c', f='f', anchor='%(id)d', url_encode=True))
        '/a/c/f#%25%28id%29d'

    generates a url '/a/c/f' corresponding to application a, controller c
    and function f. If r=request is passed, a, c, f are set, respectively,
    to r.application, r.controller, r.function.

    The more typical usage is:

    URL(r=request, f='index') that generates a url for the index function
    within the present application and controller.

    :param a: application (default to current if r is given)
    :param c: controller (default to current if r is given)
    :param f: function (default to current if r is given)
    :param r: request (optional)
    :param args: any arguments (optional)
    :param vars: any variables (optional)
    :param anchor: anchorname, without # (optional)
    :param hmac_key: key to use when generating hmac signature (optional)
    :param hash_vars: which of the vars to include in our hmac signature
        True (default) - hash all vars, False - hash none of the vars,
        iterable - hash only the included vars ['key1','key2']
    :param scheme: URI scheme (True, 'http' or 'https', etc); forces absolute URL (optional)
    :param host: string to force absolute URL with host (True means http_host)
    :param port: optional port number (forces absolute URL)

    :raises SyntaxError: when no application, controller or function is
        available
    :raises SyntaxError: when a CRLF is found in the generated url
    """

    from rewrite import url_out  # done here in case used not-in web2py

    if args in (None, []):
        args = []
    vars = vars or {}
    application = None
    controller = None
    function = None

    if not isinstance(args, (list, tuple)):
        args = [args]

    if not r:
        if a and not c and not f:
            (f, a, c) = (a, c, f)
        elif a and c and not f:
            (c, f, a) = (a, c, f)
        from globals import current
        if hasattr(current, 'request'):
            r = current.request

    if r:
        application = r.application
        controller = r.controller
        function = r.function
        env = r.env
        if extension is None and r.extension != 'html':
            extension = r.extension
    if a:
        application = a
    if c:
        controller = c
    if f:
        if not isinstance(f, str):
            if hasattr(f, '__name__'):
                function = f.__name__
            else:
                raise SyntaxError(
                    'when calling URL, function or function name required')
        elif '/' in f:
            if f.startswith("/"):
                f = f[1:]
            items = f.split('/')
            function = f = items[0]
            args = items[1:] + args
        else:
            function = f

        # if the url gets a static resource, don't force extention
        if controller == 'static':
            extension = None

        if '.' in function:
            function, extension = function.rsplit('.', 1)

    function2 = '%s.%s' % (function, extension or 'html')

    if not (application and controller and function):
        raise SyntaxError('not enough information to build the url (%s %s %s)' % (application, controller, function))

    if args:
        if url_encode:
            if encode_embedded_slash:
                other = '/' + '/'.join([urllib.quote(str(
                    x), '') for x in args])
            else:
                other = args and urllib.quote(
                    '/' + '/'.join([str(x) for x in args]))
        else:
            other = args and ('/' + '/'.join([str(x) for x in args]))
    else:
        other = ''

    if other.endswith('/'):
        other += '/'    # add trailing slash to make last trailing empty arg explicit

    list_vars = []
    for (key, vals) in sorted(vars.items()):
        if key == '_signature':
            continue
        if not isinstance(vals, (list, tuple)):
            vals = [vals]
        for val in vals:
            list_vars.append((key, val))

    if user_signature:
        from globals import current
        if current.session.auth:
            hmac_key = current.session.auth.hmac_key

    if hmac_key:
        # generate an hmac signature of the vars & args so can later
        # verify the user hasn't messed with anything

        h_args = '/%s/%s/%s%s' % (application, controller, function2, other)

        # how many of the vars should we include in our hash?
        if hash_vars is True:       # include them all
            h_vars = list_vars
        elif hash_vars is False:    # include none of them
            h_vars = ''
        else:                       # include just those specified
            if hash_vars and not isinstance(hash_vars, (list, tuple)):
                hash_vars = [hash_vars]
            h_vars = [(k, v) for (k, v) in list_vars if k in hash_vars]

        # re-assembling the same way during hash authentication
        message = h_args + '?' + urllib.urlencode(sorted(h_vars))
        sig = simple_hash(
            message, hmac_key or '', salt or '', digest_alg='sha1')
        # add the signature into vars
        list_vars.append(('_signature', sig))

    if list_vars:
        if url_encode:
            other += '?%s' % urllib.urlencode(list_vars)
        else:
            other += '?%s' % '&'.join(['%s=%s' % var[:2] for var in list_vars])
    if anchor:
        if url_encode:
            other += '#' + urllib.quote(str(anchor))
        else:
            other += '#' + (str(anchor))
    if extension:
        function += '.' + extension

    if regex_crlf.search(join([application, controller, function, other])):
        raise SyntaxError('CRLF Injection Detected')

    url = url_out(r, env, application, controller, function,
                  args, other, scheme, host, port)
    return url


def verifyURL(request, hmac_key=None, hash_vars=True, salt=None, user_signature=None):
    """
    Verifies that a request's args & vars have not been tampered with by the user

    :param request: web2py's request object
    :param hmac_key: the key to authenticate with, must be the same one previously
                    used when calling URL()
    :param hash_vars: which vars to include in our hashing. (Optional)
                    Only uses the 1st value currently
                    True (or undefined) means all, False none,
                    an iterable just the specified keys

    do not call directly. Use instead:

    URL.verify(hmac_key='...')

    the key has to match the one used to generate the URL.

        >>> r = Storage()
        >>> gv = Storage(p=(1,3),q=2,_signature='a32530f0d0caa80964bb92aad2bedf8a4486a31f')
        >>> r.update(dict(application='a', controller='c', function='f', extension='html'))
        >>> r['args'] = ['x', 'y', 'z']
        >>> r['get_vars'] = gv
        >>> verifyURL(r, 'key')
        True
        >>> verifyURL(r, 'kay')
        False
        >>> r.get_vars.p = (3, 1)
        >>> verifyURL(r, 'key')
        True
        >>> r.get_vars.p = (3, 2)
        >>> verifyURL(r, 'key')
        False

    """

    if not '_signature' in request.get_vars:
        return False  # no signature in the request URL

    # check if user_signature requires
    if user_signature:
        from globals import current
        if not current.session or not current.session.auth:
            return False
        hmac_key = current.session.auth.hmac_key
    if not hmac_key:
        return False

    # get our sig from request.get_vars for later comparison
    original_sig = request.get_vars._signature

    # now generate a new hmac for the remaining args & vars
    vars, args = request.get_vars, request.args

    # remove the signature var since it was not part of our signed message
    request.get_vars.pop('_signature')

    # join all the args & vars into one long string

    # always include all of the args
    other = args and urllib.quote('/' + '/'.join([str(x) for x in args])) or ''
    h_args = '/%s/%s/%s.%s%s' % (request.application,
                                 request.controller,
                                 request.function,
                                 request.extension,
                                 other)

    # but only include those vars specified (allows more flexibility for use with
    # forms or ajax)

    list_vars = []
    for (key, vals) in sorted(vars.items()):
        if not isinstance(vals, (list, tuple)):
            vals = [vals]
        for val in vals:
            list_vars.append((key, val))

    # which of the vars are to be included?
    if hash_vars is True:       # include them all
        h_vars = list_vars
    elif hash_vars is False:    # include none of them
        h_vars = ''
    else:                       # include just those specified
        # wrap in a try - if the desired vars have been removed it'll fail
        try:
            if hash_vars and not isinstance(hash_vars, (list, tuple)):
                hash_vars = [hash_vars]
            h_vars = [(k, v) for (k, v) in list_vars if k in hash_vars]
        except:
            # user has removed one of our vars! Immediate fail
            return False
    # build the full message string with both args & vars
    message = h_args + '?' + urllib.urlencode(sorted(h_vars))

    # hash with the hmac_key provided
    sig = simple_hash(message, str(hmac_key), salt or '', digest_alg='sha1')

    # put _signature back in get_vars just in case a second call to URL.verify is performed
    # (otherwise it'll immediately return false)
    request.get_vars['_signature'] = original_sig

    # return whether or not the signature in the request matched the one we just generated
    # (I.E. was the message the same as the one we originally signed)

    return compare(original_sig, sig)

URL.verify = verifyURL

ON = True


class XmlComponent(object):
    """
    Abstract root for all Html components
    """

    # TODO: move some DIV methods to here

    def xml(self):
        raise NotImplementedError

    def __mul__(self, n):
        return CAT(*[self for i in range(n)])

    def __add__(self, other):
        if isinstance(self, CAT):
            components = self.components
        else:
            components = [self]
        if isinstance(other, CAT):
            components += other.components
        else:
            components += [other]
        return CAT(*components)

    def add_class(self, name):
        """ add a class to _class attribute """
        c = self['_class']
        classes = (set(c.split()) if c else set()) | set(name.split())
        self['_class'] = ' '.join(classes) if classes else None
        return self

    def remove_class(self, name):
        """ remove a class from _class attribute """
        c = self['_class']
        classes = (set(c.split()) if c else set()) - set(name.split())
        self['_class'] = ' '.join(classes) if classes else None
        return self

class XML(XmlComponent):
    """
    use it to wrap a string that contains XML/HTML so that it will not be
    escaped by the template

    example:

    >>> XML('<h1>Hello</h1>').xml()
    '<h1>Hello</h1>'
    """

    def __init__(
        self,
        text,
        sanitize=False,
        permitted_tags=[
            'a',
            'b',
            'blockquote',
            'br/',
            'i',
            'li',
            'ol',
            'ul',
            'p',
            'cite',
            'code',
            'pre',
            'img/',
            'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
            'table', 'tr', 'td', 'div',
            'strong','span',
        ],
        allowed_attributes={
            'a': ['href', 'title', 'target'],
            'img': ['src', 'alt'],
            'blockquote': ['type'],
            'td': ['colspan'],
        },
    ):
        """
        :param text: the XML text
        :param sanitize: sanitize text using the permitted tags and allowed
            attributes (default False)
        :param permitted_tags: list of permitted tags (default: simple list of
            tags)
        :param allowed_attributes: dictionary of allowed attributed (default
            for A, IMG and BlockQuote).
            The key is the tag; the value is a list of allowed attributes.
        """

        if sanitize:
            text = sanitizer.sanitize(text, permitted_tags,
                                      allowed_attributes)
        if isinstance(text, unicode):
            text = text.encode('utf8', 'xmlcharrefreplace')
        elif not isinstance(text, str):
            text = str(text)
        self.text = text

    def xml(self):
        return self.text

    def __str__(self):
        return self.text

    def __add__(self, other):
        return '%s%s' % (self, other)

    def __radd__(self, other):
        return '%s%s' % (other, self)

    def __cmp__(self, other):
        return cmp(str(self), str(other))

    def __hash__(self):
        return hash(str(self))

#    why was this here? Break unpickling in sessions
#    def __getattr__(self, name):
#        return getattr(str(self), name)

    def __getitem__(self, i):
        return str(self)[i]

    def __getslice__(self, i, j):
        return str(self)[i:j]

    def __iter__(self):
        for c in str(self):
            yield c

    def __len__(self):
        return len(str(self))

    def flatten(self, render=None):
        """
        return the text stored by the XML object rendered by the render function
        """
        if render:
            return render(self.text, None, {})
        return self.text

    def elements(self, *args, **kargs):
        """
        to be considered experimental since the behavior of this method is questionable
        another options could be TAG(self.text).elements(*args,**kargs)
        """
        return []

### important to allow safe session.flash=T(....)


def XML_unpickle(data):
    return marshal.loads(data)


def XML_pickle(data):
    return XML_unpickle, (marshal.dumps(str(data)),)
copy_reg.pickle(XML, XML_pickle, XML_unpickle)


class DIV(XmlComponent):
    """
    HTML helper, for easy generating and manipulating a DOM structure.
    Little or no validation is done.

    Behaves like a dictionary regarding updating of attributes.
    Behaves like a list regarding inserting/appending components.

    example::

        >>> DIV('hello', 'world', _style='color:red;').xml()
        '<div style=\"color:red;\">helloworld</div>'

    all other HTML helpers are derived from DIV.

    _something=\"value\" attributes are transparently translated into
    something=\"value\" HTML attributes
    """

    # name of the tag, subclasses should update this
    # tags ending with a '/' denote classes that cannot
    # contain components
    tag = 'div'

    def __init__(self, *components, **attributes):
        """
        :param *components: any components that should be nested in this element
        :param **attributes: any attributes you want to give to this element

        :raises SyntaxError: when a stand alone tag receives components
        """

        if self.tag[-1:] == '/' and components:
            raise SyntaxError('<%s> tags cannot have components'
                              % self.tag)
        if len(components) == 1 and isinstance(components[0], (list, tuple)):
            self.components = list(components[0])
        else:
            self.components = list(components)
        self.attributes = attributes
        self._fixup()
        # converts special attributes in components attributes
        self.parent = None
        for c in self.components:
            self._setnode(c)
        self._postprocessing()

    def update(self, **kargs):
        """
        dictionary like updating of the tag attributes
        """

        for (key, value) in kargs.iteritems():
            self[key] = value
        return self

    def append(self, value):
        """
        list style appending of components

        >>> a=DIV()
        >>> a.append(SPAN('x'))
        >>> print a
        <div><span>x</span></div>
        """
        self._setnode(value)
        ret = self.components.append(value)
        self._fixup()
        return ret

    def insert(self, i, value):
        """
        list style inserting of components

        >>> a=DIV()
        >>> a.insert(0,SPAN('x'))
        >>> print a
        <div><span>x</span></div>
        """
        self._setnode(value)
        ret = self.components.insert(i, value)
        self._fixup()
        return ret

    def __getitem__(self, i):
        """
        gets attribute with name 'i' or component #i.
        If attribute 'i' is not found returns None

        :param i: index
           if i is a string: the name of the attribute
           otherwise references to number of the component
        """

        if isinstance(i, str):
            try:
                return self.attributes[i]
            except KeyError:
                return None
        else:
            return self.components[i]

    def __setitem__(self, i, value):
        """
        sets attribute with name 'i' or component #i.

        :param i: index
           if i is a string: the name of the attribute
           otherwise references to number of the component
        :param value: the new value
        """
        self._setnode(value)
        if isinstance(i, (str, unicode)):
            self.attributes[i] = value
        else:
            self.components[i] = value

    def __delitem__(self, i):
        """
        deletes attribute with name 'i' or component #i.

        :param i: index
           if i is a string: the name of the attribute
           otherwise references to number of the component
        """

        if isinstance(i, str):
            del self.attributes[i]
        else:
            del self.components[i]

    def __len__(self):
        """
        returns the number of included components
        """
        return len(self.components)

    def __nonzero__(self):
        """
        always return True
        """
        return True

    def _fixup(self):
        """
        Handling of provided components.

        Nothing to fixup yet. May be overridden by subclasses,
        eg for wrapping some components in another component or blocking them.
        """
        return

    def _wrap_components(self, allowed_parents,
                         wrap_parent=None,
                         wrap_lambda=None):
        """
        helper for _fixup. Checks if a component is in allowed_parents,
        otherwise wraps it in wrap_parent

        :param allowed_parents: (tuple) classes that the component should be an
            instance of
        :param wrap_parent: the class to wrap the component in, if needed
        :param wrap_lambda: lambda to use for wrapping, if needed

        """
        components = []
        for c in self.components:
            if isinstance(c, allowed_parents):
                pass
            elif wrap_lambda:
                c = wrap_lambda(c)
            else:
                c = wrap_parent(c)
            if isinstance(c, DIV):
                c.parent = self
            components.append(c)
        self.components = components

    def _postprocessing(self):
        """
        Handling of attributes (normally the ones not prefixed with '_').

        Nothing to postprocess yet. May be overridden by subclasses
        """
        return

    def _traverse(self, status, hideerror=False):
        # TODO: docstring
        newstatus = status
        for c in self.components:
            if hasattr(c, '_traverse') and callable(c._traverse):
                c.vars = self.vars
                c.request_vars = self.request_vars
                c.errors = self.errors
                c.latest = self.latest
                c.session = self.session
                c.formname = self.formname
                c['hideerror'] = hideerror or \
                        self.attributes.get('hideerror', False)
                newstatus = c._traverse(status, hideerror) and newstatus

        # for input, textarea, select, option
        # deal with 'value' and 'validation'

        name = self['_name']
        if newstatus:
            newstatus = self._validate()
            self._postprocessing()
        elif 'old_value' in self.attributes:
            self['value'] = self['old_value']
            self._postprocessing()
        elif name and name in self.vars:
            self['value'] = self.vars[name]
            self._postprocessing()
        if name:
            self.latest[name] = self['value']
        return newstatus

    def _validate(self):
        """
        nothing to validate yet. May be overridden by subclasses
        """
        return True

    def _setnode(self, value):
        if isinstance(value, DIV):
            value.parent = self

    def _xml(self):
        """
        helper for xml generation. Returns separately:
        - the component attributes
        - the generated xml of the inner components

        Component attributes start with an underscore ('_') and
        do not have a False or None value. The underscore is removed.
        A value of True is replaced with the attribute name.

        :returns: tuple: (attributes, components)
        """

        # get the attributes for this component
        # (they start with '_', others may have special meanings)
        attr = []
        for key, value in self.attributes.iteritems():
            if key[:1] != '_':
                continue
            name = key[1:]
            if value is True:
                value = name
            elif value is False or value is None:
                continue
            attr.append((name, value))
        data = self.attributes.get('data',{})
        for key, value in data.iteritems():
            name = 'data-' + key
            value = data[key]
            attr.append((name,value))
        attr.sort()
        fa = ''
        for name,value in attr:
            fa += ' %s="%s"' % (name, xmlescape(value, True))
        # get the xml for the inner components
        co = join([xmlescape(component) for component in
                   self.components])

        return (fa, co)

    def xml(self):
        """
        generates the xml for this component.
        """

        (fa, co) = self._xml()

        if not self.tag:
            return co

        if self.tag[-1:] == '/':
            # <tag [attributes] />
            return '<%s%s />' % (self.tag[:-1], fa)

        # else: <tag [attributes]>  inner components xml </tag>
        return '<%s%s>%s</%s>' % (self.tag, fa, co, self.tag)

    def __str__(self):
        """
        str(COMPONENT) returns equals COMPONENT.xml()
        """

        return self.xml()

    def flatten(self, render=None):
        """
        return the text stored by the DIV object rendered by the render function
        the render function must take text, tagname, and attributes
        render=None is equivalent to render=lambda text, tag, attr: text

        >>> markdown = lambda text,tag=None,attributes={}: \
                        {None: re.sub('\s+',' ',text), \
                         'h1':'#'+text+'\\n\\n', \
                         'p':text+'\\n'}.get(tag,text)
        >>> a=TAG('<h1>Header</h1><p>this is a     test</p>')
        >>> a.flatten(markdown)
        '#Header\\n\\nthis is a test\\n'
        """

        text = ''
        for c in self.components:
            if isinstance(c, XmlComponent):
                s = c.flatten(render)
            elif render:
                s = render(str(c))
            else:
                s = str(c)
            text += s
        if render:
            text = render(text, self.tag, self.attributes)
        return text

    regex_tag = re.compile('^[\w\-\:]+')
    regex_id = re.compile('#([\w\-]+)')
    regex_class = re.compile('\.([\w\-]+)')
    regex_attr = re.compile('\[([\w\-\:]+)=(.*?)\]')

    def elements(self, *args, **kargs):
        """
        find all component that match the supplied attribute dictionary,
        or None if nothing could be found

        All components of the components are searched.

        >>> a = DIV(DIV(SPAN('x'),3,DIV(SPAN('y'))))
        >>> for c in a.elements('span',first_only=True): c[0]='z'
        >>> print a
        <div><div><span>z</span>3<div><span>y</span></div></div></div>
        >>> for c in a.elements('span'): c[0]='z'
        >>> print a
        <div><div><span>z</span>3<div><span>z</span></div></div></div>

        It also supports a syntax compatible with jQuery

        >>> a=TAG('<div><span><a id="1-1" u:v=$>hello</a></span><p class="this is a test">world</p></div>')
        >>> for e in a.elements('div a#1-1, p.is'): print e.flatten()
        hello
        world
        >>> for e in a.elements('#1-1'): print e.flatten()
        hello
        >>> a.elements('a[u:v=$]')[0].xml()
        '<a id="1-1" u:v="$">hello</a>'

        >>> a=FORM( INPUT(_type='text'), SELECT(range(1)), TEXTAREA() )
        >>> for c in a.elements('input, select, textarea'): c['_disabled'] = 'disabled'
        >>> a.xml()
        '<form action="#" enctype="multipart/form-data" method="post"><input disabled="disabled" type="text" /><select disabled="disabled"><option value="0">0</option></select><textarea cols="40" disabled="disabled" rows="10"></textarea></form>'

        Elements that are matched can also be replaced or removed by specifying
        a "replace" argument (note, a list of the original matching elements
        is still returned as usual).

        >>> a = DIV(DIV(SPAN('x', _class='abc'), DIV(SPAN('y', _class='abc'), SPAN('z', _class='abc'))))
        >>> b = a.elements('span.abc', replace=P('x', _class='xyz'))
        >>> print a
        <div><div><p class="xyz">x</p><div><p class="xyz">x</p><p class="xyz">x</p></div></div></div>

        "replace" can be a callable, which will be passed the original element and
        should return a new element to replace it.

        >>> a = DIV(DIV(SPAN('x', _class='abc'), DIV(SPAN('y', _class='abc'), SPAN('z', _class='abc'))))
        >>> b = a.elements('span.abc', replace=lambda el: P(el[0], _class='xyz'))
        >>> print a
        <div><div><p class="xyz">x</p><div><p class="xyz">y</p><p class="xyz">z</p></div></div></div>

        If replace=None, matching elements will be removed completely.

        >>> a = DIV(DIV(SPAN('x', _class='abc'), DIV(SPAN('y', _class='abc'), SPAN('z', _class='abc'))))
        >>> b = a.elements('span', find='y', replace=None)
        >>> print a
        <div><div><span class="abc">x</span><div><span class="abc">z</span></div></div></div>

        If a "find_text" argument is specified, elements will be searched for text
        components that match find_text, and any matching text components will be
        replaced (find_text is ignored if "replace" is not also specified).
        Like the "find" argument, "find_text" can be a string or a compiled regex.

        >>> a = DIV(DIV(SPAN('x', _class='abc'), DIV(SPAN('y', _class='abc'), SPAN('z', _class='abc'))))
        >>> b = a.elements(find_text=re.compile('x|y|z'), replace='hello')
        >>> print a
        <div><div><span class="abc">hello</span><div><span class="abc">hello</span><span class="abc">hello</span></div></div></div>

        If other attributes are specified along with find_text, then only components
        that match the specified attributes will be searched for find_text.

        >>> a = DIV(DIV(SPAN('x', _class='abc'), DIV(SPAN('y', _class='efg'), SPAN('z', _class='abc'))))
        >>> b = a.elements('span.efg', find_text=re.compile('x|y|z'), replace='hello')
        >>> print a
        <div><div><span class="abc">x</span><div><span class="efg">hello</span><span class="abc">z</span></div></div></div>
        """
        if len(args) == 1:
            args = [a.strip() for a in args[0].split(',')]
        if len(args) > 1:
            subset = [self.elements(a, **kargs) for a in args]
            return reduce(lambda a, b: a + b, subset, [])
        elif len(args) == 1:
            items = args[0].split()
            if len(items) > 1:
                subset = [a.elements(' '.join(
                    items[1:]), **kargs) for a in self.elements(items[0])]
                return reduce(lambda a, b: a + b, subset, [])
            else:
                item = items[0]
                if '#' in item or '.' in item or '[' in item:
                    match_tag = self.regex_tag.search(item)
                    match_id = self.regex_id.search(item)
                    match_class = self.regex_class.search(item)
                    match_attr = self.regex_attr.finditer(item)
                    args = []
                    if match_tag:
                        args = [match_tag.group()]
                    if match_id:
                        kargs['_id'] = match_id.group(1)
                    if match_class:
                        kargs['_class'] = re.compile('(?<!\w)%s(?!\w)' %
                                                     match_class.group(1).replace('-', '\\-').replace(':', '\\:'))
                    for item in match_attr:
                        kargs['_' + item.group(1)] = item.group(2)
                    return self.elements(*args, **kargs)
        # make a copy of the components
        matches = []
        # check if the component has an attribute with the same
        # value as provided
        check = True
        tag = getattr(self, 'tag').replace('/', '')
        if args and tag not in args:
            check = False
        for (key, value) in kargs.iteritems():
            if key not in ['first_only', 'replace', 'find_text']:
                if isinstance(value, (str, int)):
                    if self[key] != str(value):
                        check = False
                elif key in self.attributes:
                    if not value.search(str(self[key])):
                        check = False
                else:
                    check = False
        if 'find' in kargs:
            find = kargs['find']
            is_regex = not isinstance(find, (str, int))
            for c in self.components:
                if (isinstance(c, str) and ((is_regex and find.search(c)) or
                   (str(find) in c))):
                    check = True
        # if found, return the component
        if check:
            matches.append(self)

        first_only = kargs.get('first_only', False)
        replace = kargs.get('replace', False)
        find_text = replace is not False and kargs.get('find_text', False)
        is_regex = not isinstance(find_text, (str, int, bool))
        find_components = not (check and first_only)

        def replace_component(i):
            if replace is None:
                del self[i]
            elif callable(replace):
                self[i] = replace(self[i])
            else:
                self[i] = replace
        # loop the components
        if find_text or find_components:
            for i, c in enumerate(self.components):
                if check and find_text and isinstance(c, str) and \
                        ((is_regex and find_text.search(c)) or (str(find_text) in c)):
                    replace_component(i)
                if find_components and isinstance(c, XmlComponent):
                    child_matches = c.elements(*args, **kargs)
                    if len(child_matches):
                        if not find_text and replace is not False and child_matches[0] is c:
                            replace_component(i)
                        if first_only:
                            return child_matches
                        matches.extend(child_matches)
        return matches

    def element(self, *args, **kargs):
        """
        find the first component that matches the supplied attribute dictionary,
        or None if nothing could be found

        Also the components of the components are searched.
        """
        kargs['first_only'] = True
        elements = self.elements(*args, **kargs)
        if not elements:
            # we found nothing
            return None
        return elements[0]

    def siblings(self, *args, **kargs):
        """
        find all sibling components that match the supplied argument list
        and attribute dictionary, or None if nothing could be found
        """
        sibs = [s for s in self.parent.components if not s == self]
        matches = []
        first_only = False
        if 'first_only' in kargs:
            first_only = kargs.pop('first_only')
        for c in sibs:
            try:
                check = True
                tag = getattr(c, 'tag').replace("/", "")
                if args and tag not in args:
                        check = False
                for (key, value) in kargs.iteritems():
                    if c[key] != value:
                            check = False
                if check:
                    matches.append(c)
                    if first_only:
                        break
            except:
                pass
        return matches

    def sibling(self, *args, **kargs):
        """
        find the first sibling component that match the supplied argument list
        and attribute dictionary, or None if nothing could be found
        """
        kargs['first_only'] = True
        sibs = self.siblings(*args, **kargs)
        if not sibs:
            return None
        return sibs[0]


class CAT(DIV):

    tag = ''


def TAG_unpickler(data):
    return cPickle.loads(data)


def TAG_pickler(data):
    d = DIV()
    d.__dict__ = data.__dict__
    marshal_dump = cPickle.dumps(d)
    return (TAG_unpickler, (marshal_dump,))


class __tag_div__(DIV):
    def __init__(self,name,*a,**b):
        DIV.__init__(self,*a,**b)
        self.tag = name

copy_reg.pickle(__tag_div__, TAG_pickler, TAG_unpickler)

class __TAG__(XmlComponent):

    """
    TAG factory example::

        >>> print TAG.first(TAG.second('test'), _key = 3)
        <first key=\"3\"><second>test</second></first>

    """

    def __getitem__(self, name):
        return self.__getattr__(name)

    def __getattr__(self, name):
        if name[-1:] == '_':
            name = name[:-1] + '/'
        if isinstance(name, unicode):
            name = name.encode('utf-8')
        return lambda *a,**b: __tag_div__(name,*a,**b)

    def __call__(self, html):
        return web2pyHTMLParser(decoder.decoder(html)).tree

TAG = __TAG__()


class HTML(DIV):
    """
    There are four predefined document type definitions.
    They can be specified in the 'doctype' parameter:

    -'strict' enables strict doctype
    -'transitional' enables transitional doctype (default)
    -'frameset' enables frameset doctype
    -'html5' enables HTML 5 doctype
    -any other string will be treated as user's own doctype

    'lang' parameter specifies the language of the document.
    Defaults to 'en'.

    See also :class:`DIV`
    """

    tag = 'html'

    strict = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n'
    transitional = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\n'
    frameset = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">\n'
    html5 = '<!DOCTYPE HTML>\n'

    def xml(self):
        lang = self['lang']
        if not lang:
            lang = 'en'
        self.attributes['_lang'] = lang
        doctype = self['doctype']
        if doctype is None:
            doctype = self.transitional
        elif doctype == 'strict':
            doctype = self.strict
        elif doctype == 'transitional':
            doctype = self.transitional
        elif doctype == 'frameset':
            doctype = self.frameset
        elif doctype == 'html5':
            doctype = self.html5
        elif doctype == '':
            doctype = ''
        else:
            doctype = '%s\n' % doctype
        (fa, co) = self._xml()
        return '%s<%s%s>%s</%s>' % (doctype, self.tag, fa, co, self.tag)


class XHTML(DIV):
    """
    This is XHTML version of the HTML helper.

    There are three predefined document type definitions.
    They can be specified in the 'doctype' parameter:

    -'strict' enables strict doctype
    -'transitional' enables transitional doctype (default)
    -'frameset' enables frameset doctype
    -any other string will be treated as user's own doctype

    'lang' parameter specifies the language of the document and the xml document.
    Defaults to 'en'.

    'xmlns' parameter specifies the xml namespace.
    Defaults to 'http://www.w3.org/1999/xhtml'.

    See also :class:`DIV`
    """

    tag = 'html'

    strict = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
    transitional = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n'
    frameset = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">\n'
    xmlns = 'http://www.w3.org/1999/xhtml'

    def xml(self):
        xmlns = self['xmlns']
        if xmlns:
            self.attributes['_xmlns'] = xmlns
        else:
            self.attributes['_xmlns'] = self.xmlns
        lang = self['lang']
        if not lang:
            lang = 'en'
        self.attributes['_lang'] = lang
        self.attributes['_xml:lang'] = lang
        doctype = self['doctype']
        if doctype:
            if doctype == 'strict':
                doctype = self.strict
            elif doctype == 'transitional':
                doctype = self.transitional
            elif doctype == 'frameset':
                doctype = self.frameset
            else:
                doctype = '%s\n' % doctype
        else:
            doctype = self.transitional
        (fa, co) = self._xml()
        return '%s<%s%s>%s</%s>' % (doctype, self.tag, fa, co, self.tag)


class HEAD(DIV):

    tag = 'head'


class TITLE(DIV):

    tag = 'title'


class META(DIV):

    tag = 'meta/'


class LINK(DIV):

    tag = 'link/'


class SCRIPT(DIV):

    tag = 'script'

    def xml(self):
        (fa, co) = self._xml()
        # no escaping of subcomponents
        co = '\n'.join([str(component) for component in
                       self.components])
        if co:
            # <script [attributes]><!--//--><![CDATA[//><!--
            # script body
            # //--><!]]></script>
            # return '<%s%s><!--//--><![CDATA[//><!--\n%s\n//--><!]]></%s>' % (self.tag, fa, co, self.tag)
            return '<%s%s><!--\n%s\n//--></%s>' % (self.tag, fa, co, self.tag)
        else:
            return DIV.xml(self)


class STYLE(DIV):

    tag = 'style'

    def xml(self):
        (fa, co) = self._xml()
        # no escaping of subcomponents
        co = '\n'.join([str(component) for component in
                       self.components])
        if co:
            # <style [attributes]><!--/*--><![CDATA[/*><!--*/
            # style body
            # /*]]>*/--></style>
            return '<%s%s><!--/*--><![CDATA[/*><!--*/\n%s\n/*]]>*/--></%s>' % (self.tag, fa, co, self.tag)
        else:
            return DIV.xml(self)


class IMG(DIV):

    tag = 'img/'


class SPAN(DIV):

    tag = 'span'


class BODY(DIV):

    tag = 'body'


class H1(DIV):

    tag = 'h1'


class H2(DIV):

    tag = 'h2'


class H3(DIV):

    tag = 'h3'


class H4(DIV):

    tag = 'h4'


class H5(DIV):

    tag = 'h5'


class H6(DIV):

    tag = 'h6'


class P(DIV):
    """
    Will replace ``\\n`` by ``<br />`` if the `cr2br` attribute is provided.

    see also :class:`DIV`
    """

    tag = 'p'

    def xml(self):
        text = DIV.xml(self)
        if self['cr2br']:
            text = text.replace('\n', '<br />')
        return text


class STRONG(DIV):

    tag = 'strong'


class B(DIV):

    tag = 'b'


class BR(DIV):

    tag = 'br/'


class HR(DIV):

    tag = 'hr/'


class A(DIV):

    tag = 'a'

    def xml(self):
        if not self.components and self['_href']:
            self.append(self['_href'])
        if not self['_disable_with']:
            self['_data-w2p_disable_with'] = 'default'
        if self['callback'] and not self['_id']:
            self['_id'] = web2py_uuid()        
        if self['delete']:
            self['_data-w2p_remove'] = self['delete']
        if self['target']:
            if self['target'] == '<self>':
                self['target'] = self['_id']
            self['_data-w2p_target'] = self['target']
        if self['component']:
            self['_data-w2p_method'] = 'GET'
            self['_href'] = self['component']
        elif self['callback']:
            self['_data-w2p_method'] = 'POST'
            self['_href'] = self['callback']
            if self['delete'] and not self['noconfirm']:
                if not self['confirm']:
                    self['_data-w2p_confirm'] = 'default'
                else:
                    self['_data-w2p_confirm'] = self['confirm']
        elif self['cid']:
            self['_data-w2p_method'] = 'GET'
            self['_data-w2p_target'] = self['cid']
            if self['pre_call']:
                self['_data-w2p_pre_call'] = self['pre_call']
        return DIV.xml(self)

class BUTTON(DIV):

    tag = 'button'


class EM(DIV):

    tag = 'em'


class EMBED(DIV):

    tag = 'embed/'


class TT(DIV):

    tag = 'tt'


class PRE(DIV):

    tag = 'pre'


class CENTER(DIV):

    tag = 'center'


class CODE(DIV):

    """
    displays code in HTML with syntax highlighting.

    :param attributes: optional attributes:

        - language: indicates the language, otherwise PYTHON is assumed
        - link: can provide a link
        - styles: for styles

    Example::

        {{=CODE(\"print 'hello world'\", language='python', link=None,
            counter=1, styles={}, highlight_line=None)}}


    supported languages are \"python\", \"html_plain\", \"c\", \"cpp\",
    \"web2py\", \"html\".
    The \"html\" language interprets {{ and }} tags as \"web2py\" code,
    \"html_plain\" doesn't.

    if a link='/examples/global/vars/' is provided web2py keywords are linked to
    the online docs.

    the counter is used for line numbering, counter can be None or a prompt
    string.
    """

    def xml(self):
        language = self['language'] or 'PYTHON'
        link = self['link']
        counter = self.attributes.get('counter', 1)
        highlight_line = self.attributes.get('highlight_line', None)
        context_lines = self.attributes.get('context_lines', None)
        styles = self['styles'] or {}
        return highlight(
            join(self.components),
            language=language,
            link=link,
            counter=counter,
            styles=styles,
            attributes=self.attributes,
            highlight_line=highlight_line,
            context_lines=context_lines,
        )


class LABEL(DIV):

    tag = 'label'


class LI(DIV):

    tag = 'li'


class UL(DIV):
    """
    UL Component.

    If subcomponents are not LI-components they will be wrapped in a LI

    see also :class:`DIV`
    """

    tag = 'ul'

    def _fixup(self):
        self._wrap_components(LI, LI)


class OL(UL):

    tag = 'ol'


class TD(DIV):

    tag = 'td'


class TH(DIV):

    tag = 'th'


class TR(DIV):
    """
    TR Component.

    If subcomponents are not TD/TH-components they will be wrapped in a TD

    see also :class:`DIV`
    """

    tag = 'tr'

    def _fixup(self):
        self._wrap_components((TD, TH), TD)


class __TRHEAD__(DIV):
    """
    __TRHEAD__ Component, internal only

    If subcomponents are not TD/TH-components they will be wrapped in a TH

    see also :class:`DIV`
    """

    tag = 'tr'

    def _fixup(self):
        self._wrap_components((TD, TH), TH)


class THEAD(DIV):

    tag = 'thead'

    def _fixup(self):
        self._wrap_components((__TRHEAD__, TR), __TRHEAD__)


class TBODY(DIV):

    tag = 'tbody'

    def _fixup(self):
        self._wrap_components(TR, TR)


class TFOOT(DIV):

    tag = 'tfoot'

    def _fixup(self):
        self._wrap_components(TR, TR)


class COL(DIV):

    tag = 'col'


class COLGROUP(DIV):

    tag = 'colgroup'


class TABLE(DIV):
    """
    TABLE Component.

    If subcomponents are not TR/TBODY/THEAD/TFOOT-components
    they will be wrapped in a TR

    see also :class:`DIV`
    """

    tag = 'table'

    def _fixup(self):
        self._wrap_components((TR, TBODY, THEAD, TFOOT, COL, COLGROUP), TR)


class I(DIV):

    tag = 'i'


class IFRAME(DIV):

    tag = 'iframe'


class INPUT(DIV):

    """
        INPUT Component

        examples::

            >>> INPUT(_type='text', _name='name', value='Max').xml()
            '<input name=\"name\" type=\"text\" value=\"Max\" />'

            >>> INPUT(_type='checkbox', _name='checkbox', value='on').xml()
            '<input checked=\"checked\" name=\"checkbox\" type=\"checkbox\" value=\"on\" />'

            >>> INPUT(_type='radio', _name='radio', _value='yes', value='yes').xml()
            '<input checked=\"checked\" name=\"radio\" type=\"radio\" value=\"yes\" />'

            >>> INPUT(_type='radio', _name='radio', _value='no', value='yes').xml()
            '<input name=\"radio\" type=\"radio\" value=\"no\" />'

        the input helper takes two special attributes value= and requires=.

        :param …
Large files files are truncated, but you can click here to view the full file