validators.py | searchcode

/gluon/validators.py

Large files files are truncated, but you can click here to view the full file

#!/bin/env python
# -*- coding: utf-8 -*-

"""
| This file is part of the web2py Web Framework
| Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
| Thanks to ga2arch for help with IS_IN_DB and IS_NOT_IN_DB on GAE

Validators
-----------
"""

import os
import re
import datetime
import time
import cgi
import urllib
import struct
import decimal
import unicodedata
from cStringIO import StringIO
from gluon.utils import simple_hash, web2py_uuid, DIGEST_ALG_BY_SIZE
from pydal.objects import FieldVirtual, FieldMethod

regex_isint = re.compile('^[+-]?\d+$')

JSONErrors = (NameError, TypeError, ValueError, AttributeError,
              KeyError)
try:
    import json as simplejson
except ImportError:
    from gluon.contrib import simplejson
    from gluon.contrib.simplejson.decoder import JSONDecodeError
    JSONErrors += (JSONDecodeError,)

__all__ = [
    'ANY_OF',
    'CLEANUP',
    'CRYPT',
    'IS_ALPHANUMERIC',
    'IS_DATE_IN_RANGE',
    'IS_DATE',
    'IS_DATETIME_IN_RANGE',
    'IS_DATETIME',
    'IS_DECIMAL_IN_RANGE',
    'IS_EMAIL',
    'IS_LIST_OF_EMAILS',
    'IS_EMPTY_OR',
    'IS_EXPR',
    'IS_FLOAT_IN_RANGE',
    'IS_IMAGE',
    'IS_IN_DB',
    'IS_IN_SET',
    'IS_INT_IN_RANGE',
    'IS_IPV4',
    'IS_IPV6',
    'IS_IPADDRESS',
    'IS_LENGTH',
    'IS_LIST_OF',
    'IS_LOWER',
    'IS_MATCH',
    'IS_EQUAL_TO',
    'IS_NOT_EMPTY',
    'IS_NOT_IN_DB',
    'IS_NULL_OR',
    'IS_SLUG',
    'IS_STRONG',
    'IS_TIME',
    'IS_UPLOAD_FILENAME',
    'IS_UPPER',
    'IS_URL',
    'IS_JSON',
]

try:
    from globals import current
    have_current = True
except ImportError:
    have_current = False


def translate(text):
    if text is None:
        return None
    elif isinstance(text, (str, unicode)) and have_current:
        if hasattr(current, 'T'):
            return str(current.T(text))
    return str(text)


def options_sorter(x, y):
    return (str(x[1]).upper() > str(y[1]).upper() and 1) or -1


class Validator(object):
    """
    Root for all validators, mainly for documentation purposes.

    Validators are classes used to validate input fields (including forms
    generated from database tables).

    Here is an example of using a validator with a FORM::

        INPUT(_name='a', requires=IS_INT_IN_RANGE(0, 10))

    Here is an example of how to require a validator for a table field::

        db.define_table('person', SQLField('name'))
        db.person.name.requires=IS_NOT_EMPTY()

    Validators are always assigned using the requires attribute of a field. A
    field can have a single validator or multiple validators. Multiple
    validators are made part of a list::

        db.person.name.requires=[IS_NOT_EMPTY(), IS_NOT_IN_DB(db, 'person.id')]

    Validators are called by the function accepts on a FORM or other HTML
    helper object that contains a form. They are always called in the order in
    which they are listed.

    Built-in validators have constructors that take the optional argument error
    message which allows you to change the default error message.
    Here is an example of a validator on a database table::

        db.person.name.requires=IS_NOT_EMPTY(error_message=T('Fill this'))

    where we have used the translation operator T to allow for
    internationalization.

    Notice that default error messages are not translated.
    """

    def formatter(self, value):
        """
        For some validators returns a formatted version (matching the validator)
        of value. Otherwise just returns the value.
        """
        return value

    def __call__(self, value):
        raise NotImplementedError
        return (value, None)


class IS_MATCH(Validator):
    """
    Example:
        Used as::

            INPUT(_type='text', _name='name', requires=IS_MATCH('.+'))

    The argument of IS_MATCH is a regular expression::

        >>> IS_MATCH('.+')('hello')
        ('hello', None)

        >>> IS_MATCH('hell')('hello')
        ('hello', None)

        >>> IS_MATCH('hell.*', strict=False)('hello')
        ('hello', None)

        >>> IS_MATCH('hello')('shello')
        ('shello', 'invalid expression')

        >>> IS_MATCH('hello', search=True)('shello')
        ('shello', None)

        >>> IS_MATCH('hello', search=True, strict=False)('shellox')
        ('shellox', None)

        >>> IS_MATCH('.*hello.*', search=True, strict=False)('shellox')
        ('shellox', None)

        >>> IS_MATCH('.+')('')
        ('', 'invalid expression')

    """

    def __init__(self, expression, error_message='Invalid expression',
                 strict=False, search=False, extract=False,
                 is_unicode=False):

        if strict or not search:
            if not expression.startswith('^'):
                expression = '^(%s)' % expression
        if strict:
            if not expression.endswith('$'):
                expression = '(%s)$' % expression
        if is_unicode:
            if not isinstance(expression,unicode):
                expression = expression.decode('utf8')
            self.regex = re.compile(expression,re.UNICODE)
        else:
            self.regex = re.compile(expression)
        self.error_message = error_message
        self.extract = extract
        self.is_unicode = is_unicode

    def __call__(self, value):
        if self.is_unicode and not isinstance(value,unicode):
            match = self.regex.search(str(value).decode('utf8'))
        else:
            match = self.regex.search(str(value))
        if match is not None:
            return (self.extract and match.group() or value, None)
        return (value, translate(self.error_message))


class IS_EQUAL_TO(Validator):
    """
    Example:
        Used as::

            INPUT(_type='text', _name='password')
            INPUT(_type='text', _name='password2',
                  requires=IS_EQUAL_TO(request.vars.password))

    The argument of IS_EQUAL_TO is a string::

        >>> IS_EQUAL_TO('aaa')('aaa')
        ('aaa', None)

        >>> IS_EQUAL_TO('aaa')('aab')
        ('aab', 'no match')

    """

    def __init__(self, expression, error_message='No match'):
        self.expression = expression
        self.error_message = error_message

    def __call__(self, value):
        if value == self.expression:
            return (value, None)
        return (value, translate(self.error_message))


class IS_EXPR(Validator):
    """
    Example:
        Used as::

            INPUT(_type='text', _name='name',
                requires=IS_EXPR('5 < int(value) < 10'))

    The argument of IS_EXPR must be python condition::

        >>> IS_EXPR('int(value) < 2')('1')
        ('1', None)

        >>> IS_EXPR('int(value) < 2')('2')
        ('2', 'invalid expression')

    """

    def __init__(self, expression, error_message='Invalid expression', environment=None):
        self.expression = expression
        self.error_message = error_message
        self.environment = environment or {}

    def __call__(self, value):
        if callable(self.expression):
            return (value, self.expression(value))
        # for backward compatibility
        self.environment.update(value=value)
        exec '__ret__=' + self.expression in self.environment
        if self.environment['__ret__']:
            return (value, None)
        return (value, translate(self.error_message))


class IS_LENGTH(Validator):
    """
    Checks if length of field's value fits between given boundaries. Works
    for both text and file inputs.

    Args:
        maxsize: maximum allowed length / size
        minsize: minimum allowed length / size

    Examples:
        Check if text string is shorter than 33 characters::

            INPUT(_type='text', _name='name', requires=IS_LENGTH(32))

        Check if password string is longer than 5 characters::

            INPUT(_type='password', _name='name', requires=IS_LENGTH(minsize=6))

        Check if uploaded file has size between 1KB and 1MB::

            INPUT(_type='file', _name='name', requires=IS_LENGTH(1048576, 1024))

        Other examples::

            >>> IS_LENGTH()('')
            ('', None)
            >>> IS_LENGTH()('1234567890')
            ('1234567890', None)
            >>> IS_LENGTH(maxsize=5, minsize=0)('1234567890')  # too long
            ('1234567890', 'enter from 0 to 5 characters')
            >>> IS_LENGTH(maxsize=50, minsize=20)('1234567890')  # too short
            ('1234567890', 'enter from 20 to 50 characters')
    """

    def __init__(self, maxsize=255, minsize=0,
                 error_message='Enter from %(min)g to %(max)g characters'):
        self.maxsize = maxsize
        self.minsize = minsize
        self.error_message = error_message

    def __call__(self, value):
        if value is None:
            length = 0
            if self.minsize <= length <= self.maxsize:
                return (value, None)
        elif isinstance(value, cgi.FieldStorage):
            if value.file:
                value.file.seek(0, os.SEEK_END)
                length = value.file.tell()
                value.file.seek(0, os.SEEK_SET)
            elif hasattr(value, 'value'):
                val = value.value
                if val:
                    length = len(val)
                else:
                    length = 0
            if self.minsize <= length <= self.maxsize:
                return (value, None)
        elif isinstance(value, str):
            try:
                lvalue = len(value.decode('utf8'))
            except:
                lvalue = len(value)
            if self.minsize <= lvalue <= self.maxsize:
                return (value, None)
        elif isinstance(value, unicode):
            if self.minsize <= len(value) <= self.maxsize:
                return (value.encode('utf8'), None)
        elif isinstance(value, (tuple, list)):
            if self.minsize <= len(value) <= self.maxsize:
                return (value, None)
        elif self.minsize <= len(str(value)) <= self.maxsize:
            return (str(value), None)
        return (value, translate(self.error_message)
                % dict(min=self.minsize, max=self.maxsize))

class IS_JSON(Validator):
    """
    Example:
        Used as::

            INPUT(_type='text', _name='name',
                requires=IS_JSON(error_message="This is not a valid json input")

            >>> IS_JSON()('{"a": 100}')
            ({u'a': 100}, None)

            >>> IS_JSON()('spam1234')
            ('spam1234', 'invalid json')
    """

    def __init__(self, error_message='Invalid json', native_json=False):
        self.native_json = native_json
        self.error_message = error_message

    def __call__(self, value):
        try:
            if self.native_json:
                simplejson.loads(value) # raises error in case of malformed json
                return (value, None) #  the serialized value is not passed
            else:
                return (simplejson.loads(value), None)
        except JSONErrors:
            return (value, translate(self.error_message))

    def formatter(self,value):
        if value is None:
            return None
        if self.native_json:
            return value
        else:
            return simplejson.dumps(value)


class IS_IN_SET(Validator):
    """
    Example:
        Used as::

            INPUT(_type='text', _name='name',
                  requires=IS_IN_SET(['max', 'john'],zero=''))

    The argument of IS_IN_SET must be a list or set::

        >>> IS_IN_SET(['max', 'john'])('max')
        ('max', None)
        >>> IS_IN_SET(['max', 'john'])('massimo')
        ('massimo', 'value not allowed')
        >>> IS_IN_SET(['max', 'john'], multiple=True)(('max', 'john'))
        (('max', 'john'), None)
        >>> IS_IN_SET(['max', 'john'], multiple=True)(('bill', 'john'))
        (('bill', 'john'), 'value not allowed')
        >>> IS_IN_SET(('id1','id2'), ['first label','second label'])('id1') # Traditional way
        ('id1', None)
        >>> IS_IN_SET({'id1':'first label', 'id2':'second label'})('id1')
        ('id1', None)
        >>> import itertools
        >>> IS_IN_SET(itertools.chain(['1','3','5'],['2','4','6']))('1')
        ('1', None)
        >>> IS_IN_SET([('id1','first label'), ('id2','second label')])('id1') # Redundant way
        ('id1', None)

    """

    def __init__(
        self,
        theset,
        labels=None,
        error_message='Value not allowed',
        multiple=False,
        zero='',
        sort=False,
    ):
        self.multiple = multiple
        if isinstance(theset, dict):
            self.theset = [str(item) for item in theset]
            self.labels = theset.values()
        elif theset and isinstance(theset, (tuple, list)) \
                and isinstance(theset[0], (tuple, list)) and len(theset[0]) == 2:
            self.theset = [str(item) for item, label in theset]
            self.labels = [str(label) for item, label in theset]
        else:
            self.theset = [str(item) for item in theset]
            self.labels = labels
        self.error_message = error_message
        self.zero = zero
        self.sort = sort

    def options(self, zero=True):
        if not self.labels:
            items = [(k, k) for (i, k) in enumerate(self.theset)]
        else:
            items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)]
        if self.sort:
            items.sort(options_sorter)
        if zero and not self.zero is None and not self.multiple:
            items.insert(0, ('', self.zero))
        return items

    def __call__(self, value):
        if self.multiple:
            ### if below was values = re.compile("[\w\-:]+").findall(str(value))
            if not value:
                values = []
            elif isinstance(value, (tuple, list)):
                values = value
            else:
                values = [value]
        else:
            values = [value]
        thestrset = [str(x) for x in self.theset]
        failures = [x for x in values if not str(x) in thestrset]
        if failures and self.theset:
            if self.multiple and (value is None or value == ''):
                return ([], None)
            return (value, translate(self.error_message))
        if self.multiple:
            if isinstance(self.multiple, (tuple, list)) and \
                    not self.multiple[0] <= len(values) < self.multiple[1]:
                return (values, translate(self.error_message))
            return (values, None)
        return (value, None)


regex1 = re.compile('\w+\.\w+')
regex2 = re.compile('%\(([^\)]+)\)\d*(?:\.\d+)?[a-zA-Z]')


class IS_IN_DB(Validator):
    """
    Example:
        Used as::

            INPUT(_type='text', _name='name',
                  requires=IS_IN_DB(db, db.mytable.myfield, zero=''))

    used for reference fields, rendered as a dropbox
    """

    def __init__(
        self,
        dbset,
        field,
        label=None,
        error_message='Value not in database',
        orderby=None,
        groupby=None,
        distinct=None,
        cache=None,
        multiple=False,
        zero='',
        sort=False,
        _and=None,
    ):
        from pydal.objects import Table
        if isinstance(field, Table):
            field = field._id

        if hasattr(dbset, 'define_table'):
            self.dbset = dbset()
        else:
            self.dbset = dbset
        (ktable, kfield) = str(field).split('.')
        if not label:
            label = '%%(%s)s' % kfield
        if isinstance(label, str):
            if regex1.match(str(label)):
                label = '%%(%s)s' % str(label).split('.')[-1]
            ks = regex2.findall(label)
            if not kfield in ks:
                ks += [kfield]
            fields = ks
        else:
            ks = [kfield]
            fields = 'all'
        self.fields = fields
        self.label = label
        self.ktable = ktable
        self.kfield = kfield
        self.ks = ks
        self.error_message = error_message
        self.theset = None
        self.orderby = orderby
        self.groupby = groupby
        self.distinct = distinct
        self.cache = cache
        self.multiple = multiple
        self.zero = zero
        self.sort = sort
        self._and = _and

    def set_self_id(self, id):
        if self._and:
            self._and.record_id = id

    def build_set(self):
        table = self.dbset.db[self.ktable]
        if self.fields == 'all':
            fields = [f for f in table]
        else:
            fields = [table[k] for k in self.fields]
        ignore = (FieldVirtual,FieldMethod)
        fields = filter(lambda f:not isinstance(f,ignore), fields)
        if self.dbset.db._dbname != 'gae':
            orderby = self.orderby or reduce(lambda a, b: a | b, fields)
            groupby = self.groupby
            distinct = self.distinct
            dd = dict(orderby=orderby, groupby=groupby,
                      distinct=distinct, cache=self.cache,
                      cacheable=True)
            records = self.dbset(table).select(*fields, **dd)
        else:
            orderby = self.orderby or \
                reduce(lambda a, b: a | b, (
                    f for f in fields if not f.name == 'id'))
            dd = dict(orderby=orderby, cache=self.cache, cacheable=True)
            records = self.dbset(table).select(table.ALL, **dd)
        self.theset = [str(r[self.kfield]) for r in records]
        if isinstance(self.label, str):
            self.labels = [self.label % r for r in records]
        else:
            self.labels = [self.label(r) for r in records]

    def options(self, zero=True):
        self.build_set()
        items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)]
        if self.sort:
            items.sort(options_sorter)
        if zero and not self.zero is None and not self.multiple:
            items.insert(0, ('', self.zero))
        return items

    def __call__(self, value):
        table = self.dbset.db[self.ktable]
        field = table[self.kfield]
        if self.multiple:
            if self._and:
                raise NotImplementedError
            if isinstance(value, list):
                values = value
            elif value:
                values = [value]
            else:
                values = []
            if isinstance(self.multiple, (tuple, list)) and \
                    not self.multiple[0] <= len(values) < self.multiple[1]:
                return (values, translate(self.error_message))
            if self.theset:
                if not [v for v in values if not v in self.theset]:
                    return (values, None)
            else:
                from pydal.adapters import GoogleDatastoreAdapter

                def count(values, s=self.dbset, f=field):
                    return s(f.belongs(map(int, values))).count()
                if isinstance(self.dbset.db._adapter, GoogleDatastoreAdapter):
                    range_ids = range(0, len(values), 30)
                    total = sum(count(values[i:i + 30]) for i in range_ids)
                    if total == len(values):
                        return (values, None)
                elif count(values) == len(values):
                    return (values, None)
        elif self.theset:
            if str(value) in self.theset:
                if self._and:
                    return self._and(value)
                else:
                    return (value, None)
        else:
            if self.dbset(field == value).count():
                if self._and:
                    return self._and(value)
                else:
                    return (value, None)
        return (value, translate(self.error_message))


class IS_NOT_IN_DB(Validator):
    """
    Example:
        Used as::

            INPUT(_type='text', _name='name', requires=IS_NOT_IN_DB(db, db.table))

    makes the field unique
    """

    def __init__(
        self,
        dbset,
        field,
        error_message='Value already in database or empty',
        allowed_override=[],
        ignore_common_filters=False,
    ):

        from pydal.objects import Table
        if isinstance(field, Table):
            field = field._id

        if hasattr(dbset, 'define_table'):
            self.dbset = dbset()
        else:
            self.dbset = dbset
        self.field = field
        self.error_message = error_message
        self.record_id = 0
        self.allowed_override = allowed_override
        self.ignore_common_filters = ignore_common_filters

    def set_self_id(self, id):
        self.record_id = id

    def __call__(self, value):
        if isinstance(value,unicode):
            value = value.encode('utf8')
        else:
            value = str(value)
        if not value.strip():
            return (value, translate(self.error_message))
        if value in self.allowed_override:
            return (value, None)
        (tablename, fieldname) = str(self.field).split('.')
        table = self.dbset.db[tablename]
        field = table[fieldname]
        subset = self.dbset(field == value,
                            ignore_common_filters=self.ignore_common_filters)
        id = self.record_id
        if isinstance(id, dict):
            fields = [table[f] for f in id]
            row = subset.select(*fields, **dict(limitby=(0, 1), orderby_on_limitby=False)).first()
            if row and any(str(row[f]) != str(id[f]) for f in id):
                return (value, translate(self.error_message))
        else:
            row = subset.select(table._id, field, limitby=(0, 1), orderby_on_limitby=False).first()
            if row and str(row.id) != str(id):
                return (value, translate(self.error_message))
        return (value, None)


def range_error_message(error_message, what_to_enter, minimum, maximum):
    "build the error message for the number range validators"
    if error_message is None:
        error_message = 'Enter ' + what_to_enter
        if minimum is not None and maximum is not None:
            error_message += ' between %(min)g and %(max)g'
        elif minimum is not None:
            error_message += ' greater than or equal to %(min)g'
        elif maximum is not None:
            error_message += ' less than or equal to %(max)g'
    if type(maximum) in [int, long]:
        maximum -= 1
    return translate(error_message) % dict(min=minimum, max=maximum)


class IS_INT_IN_RANGE(Validator):
    """
    Determines that the argument is (or can be represented as) an int,
    and that it falls within the specified range. The range is interpreted
    in the Pythonic way, so the test is: min <= value < max.

    The minimum and maximum limits can be None, meaning no lower or upper limit,
    respectively.

    Example:
        Used as::

            INPUT(_type='text', _name='name', requires=IS_INT_IN_RANGE(0, 10))

            >>> IS_INT_IN_RANGE(1,5)('4')
            (4, None)
            >>> IS_INT_IN_RANGE(1,5)(4)
            (4, None)
            >>> IS_INT_IN_RANGE(1,5)(1)
            (1, None)
            >>> IS_INT_IN_RANGE(1,5)(5)
            (5, 'enter an integer between 1 and 4')
            >>> IS_INT_IN_RANGE(1,5)(5)
            (5, 'enter an integer between 1 and 4')
            >>> IS_INT_IN_RANGE(1,5)(3.5)
            (3.5, 'enter an integer between 1 and 4')
            >>> IS_INT_IN_RANGE(None,5)('4')
            (4, None)
            >>> IS_INT_IN_RANGE(None,5)('6')
            ('6', 'enter an integer less than or equal to 4')
            >>> IS_INT_IN_RANGE(1,None)('4')
            (4, None)
            >>> IS_INT_IN_RANGE(1,None)('0')
            ('0', 'enter an integer greater than or equal to 1')
            >>> IS_INT_IN_RANGE()(6)
            (6, None)
            >>> IS_INT_IN_RANGE()('abc')
            ('abc', 'enter an integer')
    """

    def __init__(
        self,
        minimum=None,
        maximum=None,
        error_message=None,
    ):
        self.minimum = int(minimum) if minimum is not None else None
        self.maximum = int(maximum) if maximum is not None else None
        self.error_message = range_error_message(
            error_message, 'an integer', self.minimum, self.maximum)

    def __call__(self, value):
        if regex_isint.match(str(value)):
            v = int(value)
            if ((self.minimum is None or v >= self.minimum) and
                (self.maximum is None or v < self.maximum)):
                return (v, None)
        return (value, self.error_message)


def str2dec(number):
    s = str(number)
    if not '.' in s:
        s += '.00'
    else:
        s += '0' * (2 - len(s.split('.')[1]))
    return s


class IS_FLOAT_IN_RANGE(Validator):
    """
    Determines that the argument is (or can be represented as) a float,
    and that it falls within the specified inclusive range.
    The comparison is made with native arithmetic.

    The minimum and maximum limits can be None, meaning no lower or upper limit,
    respectively.

    Example:
        Used as::

            INPUT(_type='text', _name='name', requires=IS_FLOAT_IN_RANGE(0, 10))

            >>> IS_FLOAT_IN_RANGE(1,5)('4')
            (4.0, None)
            >>> IS_FLOAT_IN_RANGE(1,5)(4)
            (4.0, None)
            >>> IS_FLOAT_IN_RANGE(1,5)(1)
            (1.0, None)
            >>> IS_FLOAT_IN_RANGE(1,5)(5.25)
            (5.25, 'enter a number between 1 and 5')
            >>> IS_FLOAT_IN_RANGE(1,5)(6.0)
            (6.0, 'enter a number between 1 and 5')
            >>> IS_FLOAT_IN_RANGE(1,5)(3.5)
            (3.5, None)
            >>> IS_FLOAT_IN_RANGE(1,None)(3.5)
            (3.5, None)
            >>> IS_FLOAT_IN_RANGE(None,5)(3.5)
            (3.5, None)
            >>> IS_FLOAT_IN_RANGE(1,None)(0.5)
            (0.5, 'enter a number greater than or equal to 1')
            >>> IS_FLOAT_IN_RANGE(None,5)(6.5)
            (6.5, 'enter a number less than or equal to 5')
            >>> IS_FLOAT_IN_RANGE()(6.5)
            (6.5, None)
            >>> IS_FLOAT_IN_RANGE()('abc')
            ('abc', 'enter a number')
    """

    def __init__(
        self,
        minimum=None,
        maximum=None,
        error_message=None,
        dot='.'
    ):
        self.minimum = float(minimum) if minimum is not None else None
        self.maximum = float(maximum) if maximum is not None else None
        self.dot = str(dot)
        self.error_message = range_error_message(
            error_message, 'a number', self.minimum, self.maximum)

    def __call__(self, value):
        try:
            if self.dot == '.':
                v = float(value)
            else:
                v = float(str(value).replace(self.dot, '.'))
            if ((self.minimum is None or v >= self.minimum) and
                (self.maximum is None or v <= self.maximum)):
                return (v, None)
        except (ValueError, TypeError):
            pass
        return (value, self.error_message)

    def formatter(self, value):
        if value is None:
            return None
        return str2dec(value).replace('.', self.dot)


class IS_DECIMAL_IN_RANGE(Validator):
    """
    Determines that the argument is (or can be represented as) a Python Decimal,
    and that it falls within the specified inclusive range.
    The comparison is made with Python Decimal arithmetic.

    The minimum and maximum limits can be None, meaning no lower or upper limit,
    respectively.

    Example:
        Used as::

            INPUT(_type='text', _name='name', requires=IS_DECIMAL_IN_RANGE(0, 10))

            >>> IS_DECIMAL_IN_RANGE(1,5)('4')
            (Decimal('4'), None)
            >>> IS_DECIMAL_IN_RANGE(1,5)(4)
            (Decimal('4'), None)
            >>> IS_DECIMAL_IN_RANGE(1,5)(1)
            (Decimal('1'), None)
            >>> IS_DECIMAL_IN_RANGE(1,5)(5.25)
            (5.25, 'enter a number between 1 and 5')
            >>> IS_DECIMAL_IN_RANGE(5.25,6)(5.25)
            (Decimal('5.25'), None)
            >>> IS_DECIMAL_IN_RANGE(5.25,6)('5.25')
            (Decimal('5.25'), None)
            >>> IS_DECIMAL_IN_RANGE(1,5)(6.0)
            (6.0, 'enter a number between 1 and 5')
            >>> IS_DECIMAL_IN_RANGE(1,5)(3.5)
            (Decimal('3.5'), None)
            >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(3.5)
            (Decimal('3.5'), None)
            >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(6.5)
            (6.5, 'enter a number between 1.5 and 5.5')
            >>> IS_DECIMAL_IN_RANGE(1.5,None)(6.5)
            (Decimal('6.5'), None)
            >>> IS_DECIMAL_IN_RANGE(1.5,None)(0.5)
            (0.5, 'enter a number greater than or equal to 1.5')
            >>> IS_DECIMAL_IN_RANGE(None,5.5)(4.5)
            (Decimal('4.5'), None)
            >>> IS_DECIMAL_IN_RANGE(None,5.5)(6.5)
            (6.5, 'enter a number less than or equal to 5.5')
            >>> IS_DECIMAL_IN_RANGE()(6.5)
            (Decimal('6.5'), None)
            >>> IS_DECIMAL_IN_RANGE(0,99)(123.123)
            (123.123, 'enter a number between 0 and 99')
            >>> IS_DECIMAL_IN_RANGE(0,99)('123.123')
            ('123.123', 'enter a number between 0 and 99')
            >>> IS_DECIMAL_IN_RANGE(0,99)('12.34')
            (Decimal('12.34'), None)
            >>> IS_DECIMAL_IN_RANGE()('abc')
            ('abc', 'enter a number')
    """

    def __init__(
        self,
        minimum=None,
        maximum=None,
        error_message=None,
        dot='.'
    ):
        self.minimum = decimal.Decimal(str(minimum)) if minimum is not None else None
        self.maximum = decimal.Decimal(str(maximum)) if maximum is not None else None
        self.dot = str(dot)
        self.error_message = range_error_message(
            error_message, 'a number', self.minimum, self.maximum)

    def __call__(self, value):
        try:
            if isinstance(value, decimal.Decimal):
                v = value
            else:
                v = decimal.Decimal(str(value).replace(self.dot, '.'))
            if ((self.minimum is None or v >= self.minimum) and
                (self.maximum is None or v <= self.maximum)):
                return (v, None)
        except (ValueError, TypeError, decimal.InvalidOperation):
            pass
        return (value, self.error_message)

    def formatter(self, value):
        if value is None:
            return None
        return str2dec(value).replace('.', self.dot)


def is_empty(value, empty_regex=None):
    "test empty field"
    if isinstance(value, (str, unicode)):
        value = value.strip()
        if empty_regex is not None and empty_regex.match(value):
            value = ''
    if value is None or value == '' or value == []:
        return (value, True)
    return (value, False)


class IS_NOT_EMPTY(Validator):
    """
    Example:
        Used as::

            INPUT(_type='text', _name='name', requires=IS_NOT_EMPTY())

            >>> IS_NOT_EMPTY()(1)
            (1, None)
            >>> IS_NOT_EMPTY()(0)
            (0, None)
            >>> IS_NOT_EMPTY()('x')
            ('x', None)
            >>> IS_NOT_EMPTY()(' x ')
            ('x', None)
            >>> IS_NOT_EMPTY()(None)
            (None, 'enter a value')
            >>> IS_NOT_EMPTY()('')
            ('', 'enter a value')
            >>> IS_NOT_EMPTY()('  ')
            ('', 'enter a value')
            >>> IS_NOT_EMPTY()(' \\n\\t')
            ('', 'enter a value')
            >>> IS_NOT_EMPTY()([])
            ([], 'enter a value')
            >>> IS_NOT_EMPTY(empty_regex='def')('def')
            ('', 'enter a value')
            >>> IS_NOT_EMPTY(empty_regex='de[fg]')('deg')
            ('', 'enter a value')
            >>> IS_NOT_EMPTY(empty_regex='def')('abc')
            ('abc', None)
    """

    def __init__(self, error_message='Enter a value', empty_regex=None):
        self.error_message = error_message
        if empty_regex is not None:
            self.empty_regex = re.compile(empty_regex)
        else:
            self.empty_regex = None

    def __call__(self, value):
        value, empty = is_empty(value, empty_regex=self.empty_regex)
        if empty:
            return (value, translate(self.error_message))
        return (value, None)


class IS_ALPHANUMERIC(IS_MATCH):
    """
    Example:
        Used as::

            INPUT(_type='text', _name='name', requires=IS_ALPHANUMERIC())

            >>> IS_ALPHANUMERIC()('1')
            ('1', None)
            >>> IS_ALPHANUMERIC()('')
            ('', None)
            >>> IS_ALPHANUMERIC()('A_a')
            ('A_a', None)
            >>> IS_ALPHANUMERIC()('!')
            ('!', 'enter only letters, numbers, and underscore')
    """

    def __init__(self, error_message='Enter only letters, numbers, and underscore'):
        IS_MATCH.__init__(self, '^[\w]*$', error_message)


class IS_EMAIL(Validator):
    """
    Checks if field's value is a valid email address. Can be set to disallow
    or force addresses from certain domain(s).

    Email regex adapted from
    http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx,
    generally following the RFCs, except that we disallow quoted strings
    and permit underscores and leading numerics in subdomain labels

    Args:
        banned: regex text for disallowed address domains
        forced: regex text for required address domains

    Both arguments can also be custom objects with a match(value) method.

    Example:
        Check for valid email address::

            INPUT(_type='text', _name='name',
                requires=IS_EMAIL())

        Check for valid email address that can't be from a .com domain::

            INPUT(_type='text', _name='name',
                requires=IS_EMAIL(banned='^.*\.com(|\..*)$'))

        Check for valid email address that must be from a .edu domain::

            INPUT(_type='text', _name='name',
                requires=IS_EMAIL(forced='^.*\.edu(|\..*)$'))

            >>> IS_EMAIL()('a@b.com')
            ('a@b.com', None)
            >>> IS_EMAIL()('abc@def.com')
            ('abc@def.com', None)
            >>> IS_EMAIL()('abc@3def.com')
            ('abc@3def.com', None)
            >>> IS_EMAIL()('abc@def.us')
            ('abc@def.us', None)
            >>> IS_EMAIL()('abc@d_-f.us')
            ('abc@d_-f.us', None)
            >>> IS_EMAIL()('@def.com')           # missing name
            ('@def.com', 'enter a valid email address')
            >>> IS_EMAIL()('"abc@def".com')      # quoted name
            ('"abc@def".com', 'enter a valid email address')
            >>> IS_EMAIL()('abc+def.com')        # no @
            ('abc+def.com', 'enter a valid email address')
            >>> IS_EMAIL()('abc@def.x')          # one-char TLD
            ('abc@def.x', 'enter a valid email address')
            >>> IS_EMAIL()('abc@def.12')         # numeric TLD
            ('abc@def.12', 'enter a valid email address')
            >>> IS_EMAIL()('abc@def..com')       # double-dot in domain
            ('abc@def..com', 'enter a valid email address')
            >>> IS_EMAIL()('abc@.def.com')       # dot starts domain
            ('abc@.def.com', 'enter a valid email address')
            >>> IS_EMAIL()('abc@def.c_m')        # underscore in TLD
            ('abc@def.c_m', 'enter a valid email address')
            >>> IS_EMAIL()('NotAnEmail')         # missing @
            ('NotAnEmail', 'enter a valid email address')
            >>> IS_EMAIL()('abc@NotAnEmail')     # missing TLD
            ('abc@NotAnEmail', 'enter a valid email address')
            >>> IS_EMAIL()('customer/department@example.com')
            ('customer/department@example.com', None)
            >>> IS_EMAIL()('$A12345@example.com')
            ('$A12345@example.com', None)
            >>> IS_EMAIL()('!def!xyz%abc@example.com')
            ('!def!xyz%abc@example.com', None)
            >>> IS_EMAIL()('_Yosemite.Sam@example.com')
            ('_Yosemite.Sam@example.com', None)
            >>> IS_EMAIL()('~@example.com')
            ('~@example.com', None)
            >>> IS_EMAIL()('.wooly@example.com')       # dot starts name
            ('.wooly@example.com', 'enter a valid email address')
            >>> IS_EMAIL()('wo..oly@example.com')      # adjacent dots in name
            ('wo..oly@example.com', 'enter a valid email address')
            >>> IS_EMAIL()('pootietang.@example.com')  # dot ends name
            ('pootietang.@example.com', 'enter a valid email address')
            >>> IS_EMAIL()('.@example.com')            # name is bare dot
            ('.@example.com', 'enter a valid email address')
            >>> IS_EMAIL()('Ima.Fool@example.com')
            ('Ima.Fool@example.com', None)
            >>> IS_EMAIL()('Ima Fool@example.com')     # space in name
            ('Ima Fool@example.com', 'enter a valid email address')
            >>> IS_EMAIL()('localguy@localhost')       # localhost as domain
            ('localguy@localhost', None)

    """

    regex = re.compile('''
        ^(?!\.)                            # name may not begin with a dot
        (
          [-a-z0-9!\#$%&'*+/=?^_`{|}~]     # all legal characters except dot
          |
          (?<!\.)\.                        # single dots only
        )+
        (?<!\.)                            # name may not end with a dot
        @
        (
          localhost
          |
          (
            [a-z0-9]
                # [sub]domain begins with alphanumeric
            (
              [-\w]*                         # alphanumeric, underscore, dot, hyphen
              [a-z0-9]                       # ending alphanumeric
            )?
          \.                               # ending dot
          )+
          [a-z]{2,}                        # TLD alpha-only
       )$
    ''', re.VERBOSE | re.IGNORECASE)

    regex_proposed_but_failed = re.compile('^([\w\!\#$\%\&\'\*\+\-\/\=\?\^\`{\|\}\~]+\.)*[\w\!\#$\%\&\'\*\+\-\/\=\?\^\`{\|\}\~]+@((((([a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(\d{1,3}\.){3}\d{1,3}(\:\d{1,5})?)$', re.VERBOSE | re.IGNORECASE)

    def __init__(self,
                 banned=None,
                 forced=None,
                 error_message='Enter a valid email address'):
        if isinstance(banned, str):
            banned = re.compile(banned)
        if isinstance(forced, str):
            forced = re.compile(forced)
        self.banned = banned
        self.forced = forced
        self.error_message = error_message

    def __call__(self, value):
        match = self.regex.match(value)
        if match:
            domain = value.split('@')[1]
            if (not self.banned or not self.banned.match(domain)) \
                    and (not self.forced or self.forced.match(domain)):
                return (value, None)
        return (value, translate(self.error_message))

class IS_LIST_OF_EMAILS(object):
    """
    Example:
        Used as::

            Field('emails','list:string',
                  widget=SQLFORM.widgets.text.widget,
                  requires=IS_LIST_OF_EMAILS(),
                  represent=lambda v,r: \
                     SPAN(*[A(x,_href='mailto:'+x) for x in (v or [])])
                  )
    """
    split_emails = re.compile('[^,;\s]+')
    def __init__(self, error_message = 'Invalid emails: %s'):
        self.error_message = error_message

    def __call__(self, value):
        bad_emails = []
        f = IS_EMAIL()
        for email in self.split_emails.findall(value):
            error = f(email)[1]
            if error and not email in bad_emails:
                bad_emails.append(email)
        if not bad_emails:
            return (value, None)
        else:
            return (value,
                    translate(self.error_message) % ', '.join(bad_emails))

    def formatter(self,value,row=None):
        return ', '.join(value or [])


# URL scheme source:
# <http://en.wikipedia.org/wiki/URI_scheme> obtained on 2008-Nov-10

official_url_schemes = [
    'aaa',
    'aaas',
    'acap',
    'cap',
    'cid',
    'crid',
    'data',
    'dav',
    'dict',
    'dns',
    'fax',
    'file',
    'ftp',
    'go',
    'gopher',
    'h323',
    'http',
    'https',
    'icap',
    'im',
    'imap',
    'info',
    'ipp',
    'iris',
    'iris.beep',
    'iris.xpc',
    'iris.xpcs',
    'iris.lws',
    'ldap',
    'mailto',
    'mid',
    'modem',
    'msrp',
    'msrps',
    'mtqp',
    'mupdate',
    'news',
    'nfs',
    'nntp',
    'opaquelocktoken',
    'pop',
    'pres',
    'prospero',
    'rtsp',
    'service',
    'shttp',
    'sip',
    'sips',
    'snmp',
    'soap.beep',
    'soap.beeps',
    'tag',
    'tel',
    'telnet',
    'tftp',
    'thismessage',
    'tip',
    'tv',
    'urn',
    'vemmi',
    'wais',
    'xmlrpc.beep',
    'xmlrpc.beep',
    'xmpp',
    'z39.50r',
    'z39.50s',
]
unofficial_url_schemes = [
    'about',
    'adiumxtra',
    'aim',
    'afp',
    'aw',
    'callto',
    'chrome',
    'cvs',
    'ed2k',
    'feed',
    'fish',
    'gg',
    'gizmoproject',
    'iax2',
    'irc',
    'ircs',
    'itms',
    'jar',
    'javascript',
    'keyparc',
    'lastfm',
    'ldaps',
    'magnet',
    'mms',
    'msnim',
    'mvn',
    'notes',
    'nsfw',
    'psyc',
    'paparazzi:http',
    'rmi',
    'rsync',
    'secondlife',
    'sgn',
    'skype',
    'ssh',
    'sftp',
    'smb',
    'sms',
    'soldat',
    'steam',
    'svn',
    'teamspeak',
    'unreal',
    'ut2004',
    'ventrilo',
    'view-source',
    'webcal',
    'wyciwyg',
    'xfire',
    'xri',
    'ymsgr',
]
all_url_schemes = [None] + official_url_schemes + unofficial_url_schemes
http_schemes = [None, 'http', 'https']


# This regex comes from RFC 2396, Appendix B. It's used to split a URL into
# its component parts
# Here are the regex groups that it extracts:
#    scheme = group(2)
#    authority = group(4)
#    path = group(5)
#    query = group(7)
#    fragment = group(9)

url_split_regex = \
    re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?')

# Defined in RFC 3490, Section 3.1, Requirement #1
# Use this regex to split the authority component of a unicode URL into
# its component labels
label_split_regex = re.compile(u'[\u002e\u3002\uff0e\uff61]')


def escape_unicode(string):
    '''
    Converts a unicode string into US-ASCII, using a simple conversion scheme.
    Each unicode character that does not have a US-ASCII equivalent is
    converted into a URL escaped form based on its hexadecimal value.
    For example, the unicode character '\u4e86' will become the string '%4e%86'

    Args:
        string: unicode string, the unicode string to convert into an
            escaped US-ASCII form

    Returns:
        string: the US-ASCII escaped form of the inputted string

    @author: Jonathan Benn
    '''
    returnValue = StringIO()

    for character in string:
        code = ord(character)
        if code > 0x7F:
            hexCode = hex(code)
            returnValue.write('%' + hexCode[2:4] + '%' + hexCode[4:6])
        else:
            returnValue.write(character)

    return returnValue.getvalue()


def unicode_to_ascii_authority(authority):
    '''
    Follows the steps in RFC 3490, Section 4 to convert a unicode authority
    string into its ASCII equivalent.
    For example, u'www.Alliancefran\xe7aise.nu' will be converted into
    'www.xn--alliancefranaise-npb.nu'

    Args:
        authority: unicode string, the URL authority component to convert,
            e.g. u'www.Alliancefran\xe7aise.nu'

    Returns:
        string: the US-ASCII character equivalent to the inputed authority,
             e.g. 'www.xn--alliancefranaise-npb.nu'

    Raises:
        Exception: if the function is not able to convert the inputed
            authority

    @author: Jonathan Benn
    '''
    #RFC 3490, Section 4, Step 1
    #The encodings.idna Python module assumes that AllowUnassigned == True

    #RFC 3490, Section 4, Step 2
    labels = label_split_regex.split(authority)

    #RFC 3490, Section 4, Step 3
    #The encodings.idna Python module assumes that UseSTD3ASCIIRules == False

    #RFC 3490, Section 4, Step 4
    #We use the ToASCII operation because we are about to put the authority
    #into an IDN-unaware slot
    asciiLabels = []
    try:
        import encodings.idna
        for label in labels:
            if label:
                asciiLabels.append(encodings.idna.ToASCII(label))
            else:
                 #encodings.idna.ToASCII does not accept an empty string, but
                 #it is necessary for us to allow for empty labels so that we
                 #don't modify the URL
                asciiLabels.append('')
    except:
        asciiLabels = [str(label) for label in labels]
    #RFC 3490, Section 4, Step 5
    return str(reduce(lambda x, y: x + unichr(0x002E) + y, asciiLabels))


def unicode_to_ascii_url(url, prepend_scheme):
    '''
    Converts the inputed unicode url into a US-ASCII equivalent. This function
    goes a little beyond RFC 3490, which is limited in scope to the domain name
    (authority) only. Here, the functionality is expanded to what was observed
    on Wikipedia on 2009-Jan-22:

       Component    Can Use Unicode?
       ---------    ----------------
       scheme       No
       authority    Yes
       path         Yes
       query        Yes
       fragment     No

    The authority component gets converted to punycode, but occurrences of
    unicode in other components get converted into a pair of URI escapes (we
    assume 4-byte unicode). E.g. the unicode character U+4E2D will be
    converted into '%4E%2D'. Testing with Firefox v3.0.5 has shown that it can
    understand this kind of URI encoding.

    Args:
        url: unicode string, the URL to convert from unicode into US-ASCII
        prepend_scheme: string, a protocol scheme to prepend to the URL if
            we're having trouble parsing it.
            e.g. "http". Input None to disable this functionality

    Returns:
        string: a US-ASCII equivalent of the inputed url

    @author: Jonathan Benn
    '''
    #convert the authority component of the URL into an ASCII punycode string,
    #but encode the rest using the regular URI character encoding

    groups = url_split_regex.match(url).groups()
    #If no authority was found
    if not groups[3]:
        #Try appending a scheme to see if that fixes the problem
        scheme_to_prepend = prepend_scheme or 'http'
        groups = url_split_regex.match(
            unicode(scheme_to_prepend) + u'://' + url).groups()
    #if we still can't find the authority
    if not groups[3]:
        raise Exception('No authority component found, ' +
                        'could not decode unicode to US-ASCII')

    #We're here if we found an authority, let's rebuild the URL
    scheme = groups[1]
    authority = groups[3]
    path = groups[4] or ''
    query = groups[5] or ''
    fragment = groups[7] or ''

    if prepend_scheme:
        scheme = str(scheme) + '://'
    else:
        scheme = ''
    return scheme + unicode_to_ascii_authority(authority) +\
        escape_unicode(path) + escape_unicode(query) + str(fragment)


class IS_GENERIC_URL(Validator):
    """
    Rejects a URL string if any of the following is true:
       * The string is empty or None
       * The string uses characters that are not allowed in a URL
       * The URL scheme specified (if one is specified) is not valid

    Based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html

    This function only checks the URL's syntax. It does not check that the URL
    points to a real document, for example, or that it otherwise makes sense
    semantically. This function does automatically prepend 'http://' in front
    of a URL if and only if that's necessary to successfully parse the URL.
    Please note that a scheme will be prepended only for rare cases
    (e.g. 'google.ca:80')

    The list of allowed schemes is customizable with the allowed_schemes
    parameter. If you exclude None from the list, then abbreviated URLs
    (lacking a scheme such as 'http') will be rejected.

    The default prepended scheme is customizable with the prepend_scheme
    parameter. If you set prepend_scheme to None then prepending will be
    disabled. URLs that require prepending to parse will still be accepted,
    but the return value will not be modified.

    @author: Jonathan Benn

        >>> IS_GENERIC_URL()('http://user@abc.com')
        ('http://user@abc.com', None)

    Args:
        error_message: a string, the error message to give the end user
            if the URL does not validate
        allowed_schemes: a list containing strings or None. Each element
            is a scheme the inputed URL is allowed to use
        prepend_scheme: a string, this scheme is prepended if it's
            necessary to make the URL valid

    """

    def __init__(
        self,
        error_message='Enter a valid URL',
        allowed_schemes=None,
        prepend_scheme=None,
    ):

        self.error_message = error_message
        if allowed_schemes is None:
            self.allowed_schemes = all_url_schemes
        else:
            self.allowed_schemes = allowed_schemes
        self.prepend_scheme = prepend_scheme
        if self.prepend_scheme not in self.allowed_schemes:
            raise SyntaxError("prepend_scheme='%s' is not in allowed_schemes=%s"
                              % (self.prepend_scheme, self.allowed_schemes))

    GENERIC_URL = re.compile(r"%[^0-9A-Fa-f]{2}|%[^0-9A-Fa-f][0-9A-Fa-f]|%[0-9A-Fa-f][^0-9A-Fa-f]|%$|%[0-9A-Fa-f]$|%[^0-9A-Fa-f]$")
    GENERIC_URL_VALID = re.compile(r"[A-Za-z0-9;/?:@&=+$,\-_\.!~*'\(\)%#]+$")

    def __call__(self, value):
        """
        Args:
            value: a string, the URL to validate

        Returns:
            a tuple, where tuple[0] is the inputed value (possible
            prepended with prepend_scheme), and tuple[1] is either
            None (success!) or the string erro…
Large files files are truncated, but you can click here to view the full file