PageRenderTime 64ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/gluon/validators.py

https://github.com/clach04/web2py
Python | 3345 lines | 3161 code | 73 blank | 111 comment | 141 complexity | 027af7eaa8b23e00fec75b040c3c1645 MD5 | raw file
Possible License(s): MIT, BSD-3-Clause, BSD-2-Clause
  1. #!/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. This file is part of the web2py Web Framework
  5. Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
  6. License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
  7. Thanks to ga2arch for help with IS_IN_DB and IS_NOT_IN_DB on GAE
  8. """
  9. import os
  10. import re
  11. import datetime
  12. import time
  13. import cgi
  14. import urllib
  15. import struct
  16. import decimal
  17. import unicodedata
  18. from cStringIO import StringIO
  19. from utils import simple_hash, web2py_uuid, DIGEST_ALG_BY_SIZE
  20. from dal import FieldVirtual, FieldMethod
  21. JSONErrors = (NameError, TypeError, ValueError, AttributeError,
  22. KeyError)
  23. try:
  24. import json as simplejson
  25. except ImportError:
  26. from gluon.contrib import simplejson
  27. from gluon.contrib.simplejson.decoder import JSONDecodeError
  28. JSONErrors += (JSONDecodeError,)
  29. __all__ = [
  30. 'CLEANUP',
  31. 'CRYPT',
  32. 'IS_ALPHANUMERIC',
  33. 'IS_DATE_IN_RANGE',
  34. 'IS_DATE',
  35. 'IS_DATETIME_IN_RANGE',
  36. 'IS_DATETIME',
  37. 'IS_DECIMAL_IN_RANGE',
  38. 'IS_EMAIL',
  39. 'IS_EMPTY_OR',
  40. 'IS_EXPR',
  41. 'IS_FLOAT_IN_RANGE',
  42. 'IS_IMAGE',
  43. 'IS_IN_DB',
  44. 'IS_IN_SET',
  45. 'IS_INT_IN_RANGE',
  46. 'IS_IPV4',
  47. 'IS_LENGTH',
  48. 'IS_LIST_OF',
  49. 'IS_LOWER',
  50. 'IS_MATCH',
  51. 'IS_EQUAL_TO',
  52. 'IS_NOT_EMPTY',
  53. 'IS_NOT_IN_DB',
  54. 'IS_NULL_OR',
  55. 'IS_SLUG',
  56. 'IS_STRONG',
  57. 'IS_TIME',
  58. 'IS_UPLOAD_FILENAME',
  59. 'IS_UPPER',
  60. 'IS_URL',
  61. 'IS_JSON',
  62. ]
  63. try:
  64. from globals import current
  65. have_current = True
  66. except ImportError:
  67. have_current = False
  68. def translate(text):
  69. if text is None:
  70. return None
  71. elif isinstance(text, (str, unicode)) and have_current:
  72. if hasattr(current, 'T'):
  73. return str(current.T(text))
  74. return str(text)
  75. def options_sorter(x, y):
  76. return (str(x[1]).upper() > str(y[1]).upper() and 1) or -1
  77. class Validator(object):
  78. """
  79. Root for all validators, mainly for documentation purposes.
  80. Validators are classes used to validate input fields (including forms
  81. generated from database tables).
  82. Here is an example of using a validator with a FORM::
  83. INPUT(_name='a', requires=IS_INT_IN_RANGE(0, 10))
  84. Here is an example of how to require a validator for a table field::
  85. db.define_table('person', SQLField('name'))
  86. db.person.name.requires=IS_NOT_EMPTY()
  87. Validators are always assigned using the requires attribute of a field. A
  88. field can have a single validator or multiple validators. Multiple
  89. validators are made part of a list::
  90. db.person.name.requires=[IS_NOT_EMPTY(), IS_NOT_IN_DB(db, 'person.id')]
  91. Validators are called by the function accepts on a FORM or other HTML
  92. helper object that contains a form. They are always called in the order in
  93. which they are listed.
  94. Built-in validators have constructors that take the optional argument error
  95. message which allows you to change the default error message.
  96. Here is an example of a validator on a database table::
  97. db.person.name.requires=IS_NOT_EMPTY(error_message=T('fill this'))
  98. where we have used the translation operator T to allow for
  99. internationalization.
  100. Notice that default error messages are not translated.
  101. """
  102. def formatter(self, value):
  103. """
  104. For some validators returns a formatted version (matching the validator)
  105. of value. Otherwise just returns the value.
  106. """
  107. return value
  108. def __call__(self, value):
  109. raise NotImplementedError
  110. return (value, None)
  111. class IS_MATCH(Validator):
  112. """
  113. example::
  114. INPUT(_type='text', _name='name', requires=IS_MATCH('.+'))
  115. the argument of IS_MATCH is a regular expression::
  116. >>> IS_MATCH('.+')('hello')
  117. ('hello', None)
  118. >>> IS_MATCH('hell')('hello')
  119. ('hello', None)
  120. >>> IS_MATCH('hell.*', strict=False)('hello')
  121. ('hello', None)
  122. >>> IS_MATCH('hello')('shello')
  123. ('shello', 'invalid expression')
  124. >>> IS_MATCH('hello', search=True)('shello')
  125. ('shello', None)
  126. >>> IS_MATCH('hello', search=True, strict=False)('shellox')
  127. ('shellox', None)
  128. >>> IS_MATCH('.*hello.*', search=True, strict=False)('shellox')
  129. ('shellox', None)
  130. >>> IS_MATCH('.+')('')
  131. ('', 'invalid expression')
  132. """
  133. def __init__(self, expression, error_message='invalid expression',
  134. strict=False, search=False, extract=False):
  135. if strict or not search:
  136. if not expression.startswith('^'):
  137. expression = '^(%s)' % expression
  138. if strict:
  139. if not expression.endswith('$'):
  140. expression = '(%s)$' % expression
  141. self.regex = re.compile(expression)
  142. self.error_message = error_message
  143. self.extract = extract
  144. def __call__(self, value):
  145. match = self.regex.search(value)
  146. if match is not None:
  147. return (self.extract and match.group() or value, None)
  148. return (value, translate(self.error_message))
  149. class IS_EQUAL_TO(Validator):
  150. """
  151. example::
  152. INPUT(_type='text', _name='password')
  153. INPUT(_type='text', _name='password2',
  154. requires=IS_EQUAL_TO(request.vars.password))
  155. the argument of IS_EQUAL_TO is a string
  156. >>> IS_EQUAL_TO('aaa')('aaa')
  157. ('aaa', None)
  158. >>> IS_EQUAL_TO('aaa')('aab')
  159. ('aab', 'no match')
  160. """
  161. def __init__(self, expression, error_message='no match'):
  162. self.expression = expression
  163. self.error_message = error_message
  164. def __call__(self, value):
  165. if value == self.expression:
  166. return (value, None)
  167. return (value, translate(self.error_message))
  168. class IS_EXPR(Validator):
  169. """
  170. example::
  171. INPUT(_type='text', _name='name',
  172. requires=IS_EXPR('5 < int(value) < 10'))
  173. the argument of IS_EXPR must be python condition::
  174. >>> IS_EXPR('int(value) < 2')('1')
  175. ('1', None)
  176. >>> IS_EXPR('int(value) < 2')('2')
  177. ('2', 'invalid expression')
  178. """
  179. def __init__(self, expression, error_message='invalid expression', environment=None):
  180. self.expression = expression
  181. self.error_message = error_message
  182. self.environment = environment or {}
  183. def __call__(self, value):
  184. if callable(self.expression):
  185. return (value, self.expression(value))
  186. # for backward compatibility
  187. self.environment.update(value=value)
  188. exec '__ret__=' + self.expression in self.environment
  189. if self.environment['__ret__']:
  190. return (value, None)
  191. return (value, translate(self.error_message))
  192. class IS_LENGTH(Validator):
  193. """
  194. Checks if length of field's value fits between given boundaries. Works
  195. for both text and file inputs.
  196. Arguments:
  197. maxsize: maximum allowed length / size
  198. minsize: minimum allowed length / size
  199. Examples::
  200. #Check if text string is shorter than 33 characters:
  201. INPUT(_type='text', _name='name', requires=IS_LENGTH(32))
  202. #Check if password string is longer than 5 characters:
  203. INPUT(_type='password', _name='name', requires=IS_LENGTH(minsize=6))
  204. #Check if uploaded file has size between 1KB and 1MB:
  205. INPUT(_type='file', _name='name', requires=IS_LENGTH(1048576, 1024))
  206. >>> IS_LENGTH()('')
  207. ('', None)
  208. >>> IS_LENGTH()('1234567890')
  209. ('1234567890', None)
  210. >>> IS_LENGTH(maxsize=5, minsize=0)('1234567890') # too long
  211. ('1234567890', 'enter from 0 to 5 characters')
  212. >>> IS_LENGTH(maxsize=50, minsize=20)('1234567890') # too short
  213. ('1234567890', 'enter from 20 to 50 characters')
  214. """
  215. def __init__(self, maxsize=255, minsize=0,
  216. error_message='enter from %(min)g to %(max)g characters'):
  217. self.maxsize = maxsize
  218. self.minsize = minsize
  219. self.error_message = error_message
  220. def __call__(self, value):
  221. if value is None:
  222. length = 0
  223. if self.minsize <= length <= self.maxsize:
  224. return (value, None)
  225. elif isinstance(value, cgi.FieldStorage):
  226. if value.file:
  227. value.file.seek(0, os.SEEK_END)
  228. length = value.file.tell()
  229. value.file.seek(0, os.SEEK_SET)
  230. elif hasattr(value, 'value'):
  231. val = value.value
  232. if val:
  233. length = len(val)
  234. else:
  235. length = 0
  236. if self.minsize <= length <= self.maxsize:
  237. return (value, None)
  238. elif isinstance(value, (str, unicode, list)):
  239. if self.minsize <= len(value) <= self.maxsize:
  240. return (value, None)
  241. elif self.minsize <= len(str(value)) <= self.maxsize:
  242. try:
  243. value.decode('utf8')
  244. return (value, None)
  245. except:
  246. pass
  247. return (value, translate(self.error_message)
  248. % dict(min=self.minsize, max=self.maxsize))
  249. class IS_JSON(Validator):
  250. """
  251. example::
  252. INPUT(_type='text', _name='name',
  253. requires=IS_JSON(error_message="This is not a valid json input")
  254. >>> IS_JSON()('{"a": 100}')
  255. ('{"a": 100}', None)
  256. >>> IS_JSON()('spam1234')
  257. ('spam1234', 'invalid json')
  258. """
  259. def __init__(self, error_message='invalid json'):
  260. self.error_message = error_message
  261. def __call__(self, value):
  262. if value is None:
  263. return None
  264. try:
  265. return (simplejson.loads(value), None)
  266. except JSONErrors:
  267. return (value, translate(self.error_message))
  268. def formatter(self,value):
  269. if value is None:
  270. return None
  271. return simplejson.dumps(value)
  272. class IS_IN_SET(Validator):
  273. """
  274. example::
  275. INPUT(_type='text', _name='name',
  276. requires=IS_IN_SET(['max', 'john'],zero=''))
  277. the argument of IS_IN_SET must be a list or set
  278. >>> IS_IN_SET(['max', 'john'])('max')
  279. ('max', None)
  280. >>> IS_IN_SET(['max', 'john'])('massimo')
  281. ('massimo', 'value not allowed')
  282. >>> IS_IN_SET(['max', 'john'], multiple=True)(('max', 'john'))
  283. (('max', 'john'), None)
  284. >>> IS_IN_SET(['max', 'john'], multiple=True)(('bill', 'john'))
  285. (('bill', 'john'), 'value not allowed')
  286. >>> IS_IN_SET(('id1','id2'), ['first label','second label'])('id1') # Traditional way
  287. ('id1', None)
  288. >>> IS_IN_SET({'id1':'first label', 'id2':'second label'})('id1')
  289. ('id1', None)
  290. >>> import itertools
  291. >>> IS_IN_SET(itertools.chain(['1','3','5'],['2','4','6']))('1')
  292. ('1', None)
  293. >>> IS_IN_SET([('id1','first label'), ('id2','second label')])('id1') # Redundant way
  294. ('id1', None)
  295. """
  296. def __init__(
  297. self,
  298. theset,
  299. labels=None,
  300. error_message='value not allowed',
  301. multiple=False,
  302. zero='',
  303. sort=False,
  304. ):
  305. self.multiple = multiple
  306. if isinstance(theset, dict):
  307. self.theset = [str(item) for item in theset]
  308. self.labels = theset.values()
  309. elif theset and isinstance(theset, (tuple, list)) \
  310. and isinstance(theset[0], (tuple, list)) and len(theset[0]) == 2:
  311. self.theset = [str(item) for item, label in theset]
  312. self.labels = [str(label) for item, label in theset]
  313. else:
  314. self.theset = [str(item) for item in theset]
  315. self.labels = labels
  316. self.error_message = error_message
  317. self.zero = zero
  318. self.sort = sort
  319. def options(self, zero=True):
  320. if not self.labels:
  321. items = [(k, k) for (i, k) in enumerate(self.theset)]
  322. else:
  323. items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)]
  324. if self.sort:
  325. items.sort(options_sorter)
  326. if zero and not self.zero is None and not self.multiple:
  327. items.insert(0, ('', self.zero))
  328. return items
  329. def __call__(self, value):
  330. if self.multiple:
  331. ### if below was values = re.compile("[\w\-:]+").findall(str(value))
  332. if not value:
  333. values = []
  334. elif isinstance(value, (tuple, list)):
  335. values = value
  336. else:
  337. values = [value]
  338. else:
  339. values = [value]
  340. thestrset = [str(x) for x in self.theset]
  341. failures = [x for x in values if not str(x) in thestrset]
  342. if failures and self.theset:
  343. if self.multiple and (value is None or value == ''):
  344. return ([], None)
  345. return (value, translate(self.error_message))
  346. if self.multiple:
  347. if isinstance(self.multiple, (tuple, list)) and \
  348. not self.multiple[0] <= len(values) < self.multiple[1]:
  349. return (values, translate(self.error_message))
  350. return (values, None)
  351. return (value, None)
  352. regex1 = re.compile('\w+\.\w+')
  353. regex2 = re.compile('%\((?P<name>[^\)]+)\)s')
  354. class IS_IN_DB(Validator):
  355. """
  356. example::
  357. INPUT(_type='text', _name='name',
  358. requires=IS_IN_DB(db, db.mytable.myfield, zero=''))
  359. used for reference fields, rendered as a dropbox
  360. """
  361. def __init__(
  362. self,
  363. dbset,
  364. field,
  365. label=None,
  366. error_message='value not in database',
  367. orderby=None,
  368. groupby=None,
  369. distinct=None,
  370. cache=None,
  371. multiple=False,
  372. zero='',
  373. sort=False,
  374. _and=None,
  375. ):
  376. from dal import Table
  377. if isinstance(field, Table):
  378. field = field._id
  379. if hasattr(dbset, 'define_table'):
  380. self.dbset = dbset()
  381. else:
  382. self.dbset = dbset
  383. (ktable, kfield) = str(field).split('.')
  384. if not label:
  385. label = '%%(%s)s' % kfield
  386. if isinstance(label, str):
  387. if regex1.match(str(label)):
  388. label = '%%(%s)s' % str(label).split('.')[-1]
  389. ks = regex2.findall(label)
  390. if not kfield in ks:
  391. ks += [kfield]
  392. fields = ks
  393. else:
  394. ks = [kfield]
  395. fields = 'all'
  396. self.fields = fields
  397. self.label = label
  398. self.ktable = ktable
  399. self.kfield = kfield
  400. self.ks = ks
  401. self.error_message = error_message
  402. self.theset = None
  403. self.orderby = orderby
  404. self.groupby = groupby
  405. self.distinct = distinct
  406. self.cache = cache
  407. self.multiple = multiple
  408. self.zero = zero
  409. self.sort = sort
  410. self._and = _and
  411. def set_self_id(self, id):
  412. if self._and:
  413. self._and.record_id = id
  414. def build_set(self):
  415. table = self.dbset.db[self.ktable]
  416. if self.fields == 'all':
  417. fields = [f for f in table]
  418. else:
  419. fields = [table[k] for k in self.fields]
  420. ignore = (FieldVirtual,FieldMethod)
  421. fields = filter(lambda f:not isinstance(f,ignore), fields)
  422. if self.dbset.db._dbname != 'gae':
  423. orderby = self.orderby or reduce(lambda a, b: a | b, fields)
  424. groupby = self.groupby
  425. distinct = self.distinct
  426. dd = dict(orderby=orderby, groupby=groupby,
  427. distinct=distinct, cache=self.cache,
  428. cacheable=True)
  429. records = self.dbset(table).select(*fields, **dd)
  430. else:
  431. orderby = self.orderby or \
  432. reduce(lambda a, b: a | b, (
  433. f for f in fields if not f.name == 'id'))
  434. dd = dict(orderby=orderby, cache=self.cache, cacheable=True)
  435. records = self.dbset(table).select(table.ALL, **dd)
  436. self.theset = [str(r[self.kfield]) for r in records]
  437. if isinstance(self.label, str):
  438. self.labels = [self.label % dict(r) for r in records]
  439. else:
  440. self.labels = [self.label(r) for r in records]
  441. def options(self, zero=True):
  442. self.build_set()
  443. items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)]
  444. if self.sort:
  445. items.sort(options_sorter)
  446. if zero and not self.zero is None and not self.multiple:
  447. items.insert(0, ('', self.zero))
  448. return items
  449. def __call__(self, value):
  450. table = self.dbset.db[self.ktable]
  451. field = table[self.kfield]
  452. if self.multiple:
  453. if self._and:
  454. raise NotImplementedError
  455. if isinstance(value, list):
  456. values = value
  457. elif value:
  458. values = [value]
  459. else:
  460. values = []
  461. if isinstance(self.multiple, (tuple, list)) and \
  462. not self.multiple[0] <= len(values) < self.multiple[1]:
  463. return (values, translate(self.error_message))
  464. if self.theset:
  465. if not [v for v in values if not v in self.theset]:
  466. return (values, None)
  467. else:
  468. from dal import GoogleDatastoreAdapter
  469. def count(values, s=self.dbset, f=field):
  470. return s(f.belongs(map(int, values))).count()
  471. if isinstance(self.dbset.db._adapter, GoogleDatastoreAdapter):
  472. range_ids = range(0, len(values), 30)
  473. total = sum(count(values[i:i + 30]) for i in range_ids)
  474. if total == len(values):
  475. return (values, None)
  476. elif count(values) == len(values):
  477. return (values, None)
  478. elif self.theset:
  479. if str(value) in self.theset:
  480. if self._and:
  481. return self._and(value)
  482. else:
  483. return (value, None)
  484. else:
  485. if self.dbset(field == value).count():
  486. if self._and:
  487. return self._and(value)
  488. else:
  489. return (value, None)
  490. return (value, translate(self.error_message))
  491. class IS_NOT_IN_DB(Validator):
  492. """
  493. example::
  494. INPUT(_type='text', _name='name', requires=IS_NOT_IN_DB(db, db.table))
  495. makes the field unique
  496. """
  497. def __init__(
  498. self,
  499. dbset,
  500. field,
  501. error_message='value already in database or empty',
  502. allowed_override=[],
  503. ignore_common_filters=False,
  504. ):
  505. from dal import Table
  506. if isinstance(field, Table):
  507. field = field._id
  508. if hasattr(dbset, 'define_table'):
  509. self.dbset = dbset()
  510. else:
  511. self.dbset = dbset
  512. self.field = field
  513. self.error_message = error_message
  514. self.record_id = 0
  515. self.allowed_override = allowed_override
  516. self.ignore_common_filters = ignore_common_filters
  517. def set_self_id(self, id):
  518. self.record_id = id
  519. def __call__(self, value):
  520. if isinstance(value,unicode):
  521. value = value.encode('utf8')
  522. else:
  523. value = str(value)
  524. if not value.strip():
  525. return (value, translate(self.error_message))
  526. if value in self.allowed_override:
  527. return (value, None)
  528. (tablename, fieldname) = str(self.field).split('.')
  529. table = self.dbset.db[tablename]
  530. field = table[fieldname]
  531. subset = self.dbset(field == value,
  532. ignore_common_filters=self.ignore_common_filters)
  533. id = self.record_id
  534. if isinstance(id, dict):
  535. fields = [table[f] for f in id]
  536. row = subset.select(*fields, **dict(limitby=(0, 1))).first()
  537. if row and any(str(row[f]) != str(id[f]) for f in id):
  538. return (value, translate(self.error_message))
  539. else:
  540. row = subset.select(table._id, limitby=(0, 1)).first()
  541. if row and str(row.id) != str(id):
  542. return (value, translate(self.error_message))
  543. return (value, None)
  544. class IS_INT_IN_RANGE(Validator):
  545. """
  546. Determine that the argument is (or can be represented as) an int,
  547. and that it falls within the specified range. The range is interpreted
  548. in the Pythonic way, so the test is: min <= value < max.
  549. The minimum and maximum limits can be None, meaning no lower or upper limit,
  550. respectively.
  551. example::
  552. INPUT(_type='text', _name='name', requires=IS_INT_IN_RANGE(0, 10))
  553. >>> IS_INT_IN_RANGE(1,5)('4')
  554. (4, None)
  555. >>> IS_INT_IN_RANGE(1,5)(4)
  556. (4, None)
  557. >>> IS_INT_IN_RANGE(1,5)(1)
  558. (1, None)
  559. >>> IS_INT_IN_RANGE(1,5)(5)
  560. (5, 'enter an integer between 1 and 4')
  561. >>> IS_INT_IN_RANGE(1,5)(5)
  562. (5, 'enter an integer between 1 and 4')
  563. >>> IS_INT_IN_RANGE(1,5)(3.5)
  564. (3, 'enter an integer between 1 and 4')
  565. >>> IS_INT_IN_RANGE(None,5)('4')
  566. (4, None)
  567. >>> IS_INT_IN_RANGE(None,5)('6')
  568. (6, 'enter an integer less than or equal to 4')
  569. >>> IS_INT_IN_RANGE(1,None)('4')
  570. (4, None)
  571. >>> IS_INT_IN_RANGE(1,None)('0')
  572. (0, 'enter an integer greater than or equal to 1')
  573. >>> IS_INT_IN_RANGE()(6)
  574. (6, None)
  575. >>> IS_INT_IN_RANGE()('abc')
  576. ('abc', 'enter an integer')
  577. """
  578. def __init__(
  579. self,
  580. minimum=None,
  581. maximum=None,
  582. error_message=None,
  583. ):
  584. self.minimum = self.maximum = None
  585. if minimum is None:
  586. if maximum is None:
  587. self.error_message = error_message or 'enter an integer'
  588. else:
  589. self.maximum = int(maximum)
  590. if error_message is None:
  591. error_message = 'enter an integer less than or equal to %(max)g'
  592. self.error_message = translate(
  593. error_message) % dict(max=self.maximum - 1)
  594. elif maximum is None:
  595. self.minimum = int(minimum)
  596. if error_message is None:
  597. error_message = 'enter an integer greater than or equal to %(min)g'
  598. self.error_message = translate(
  599. error_message) % dict(min=self.minimum)
  600. else:
  601. self.minimum = int(minimum)
  602. self.maximum = int(maximum)
  603. if error_message is None:
  604. error_message = 'enter an integer between %(min)g and %(max)g'
  605. self.error_message = translate(error_message) \
  606. % dict(min=self.minimum, max=self.maximum - 1)
  607. def __call__(self, value):
  608. try:
  609. fvalue = float(value)
  610. value = int(value)
  611. if value != fvalue:
  612. return (value, self.error_message)
  613. if self.minimum is None:
  614. if self.maximum is None or value < self.maximum:
  615. return (value, None)
  616. elif self.maximum is None:
  617. if value >= self.minimum:
  618. return (value, None)
  619. elif self.minimum <= value < self.maximum:
  620. return (value, None)
  621. except ValueError:
  622. pass
  623. return (value, self.error_message)
  624. def str2dec(number):
  625. s = str(number)
  626. if not '.' in s:
  627. s += '.00'
  628. else:
  629. s += '0' * (2 - len(s.split('.')[1]))
  630. return s
  631. class IS_FLOAT_IN_RANGE(Validator):
  632. """
  633. Determine that the argument is (or can be represented as) a float,
  634. and that it falls within the specified inclusive range.
  635. The comparison is made with native arithmetic.
  636. The minimum and maximum limits can be None, meaning no lower or upper limit,
  637. respectively.
  638. example::
  639. INPUT(_type='text', _name='name', requires=IS_FLOAT_IN_RANGE(0, 10))
  640. >>> IS_FLOAT_IN_RANGE(1,5)('4')
  641. (4.0, None)
  642. >>> IS_FLOAT_IN_RANGE(1,5)(4)
  643. (4.0, None)
  644. >>> IS_FLOAT_IN_RANGE(1,5)(1)
  645. (1.0, None)
  646. >>> IS_FLOAT_IN_RANGE(1,5)(5.25)
  647. (5.25, 'enter a number between 1 and 5')
  648. >>> IS_FLOAT_IN_RANGE(1,5)(6.0)
  649. (6.0, 'enter a number between 1 and 5')
  650. >>> IS_FLOAT_IN_RANGE(1,5)(3.5)
  651. (3.5, None)
  652. >>> IS_FLOAT_IN_RANGE(1,None)(3.5)
  653. (3.5, None)
  654. >>> IS_FLOAT_IN_RANGE(None,5)(3.5)
  655. (3.5, None)
  656. >>> IS_FLOAT_IN_RANGE(1,None)(0.5)
  657. (0.5, 'enter a number greater than or equal to 1')
  658. >>> IS_FLOAT_IN_RANGE(None,5)(6.5)
  659. (6.5, 'enter a number less than or equal to 5')
  660. >>> IS_FLOAT_IN_RANGE()(6.5)
  661. (6.5, None)
  662. >>> IS_FLOAT_IN_RANGE()('abc')
  663. ('abc', 'enter a number')
  664. """
  665. def __init__(
  666. self,
  667. minimum=None,
  668. maximum=None,
  669. error_message=None,
  670. dot='.'
  671. ):
  672. self.minimum = self.maximum = None
  673. self.dot = dot
  674. if minimum is None:
  675. if maximum is None:
  676. if error_message is None:
  677. error_message = 'enter a number'
  678. else:
  679. self.maximum = float(maximum)
  680. if error_message is None:
  681. error_message = 'enter a number less than or equal to %(max)g'
  682. elif maximum is None:
  683. self.minimum = float(minimum)
  684. if error_message is None:
  685. error_message = 'enter a number greater than or equal to %(min)g'
  686. else:
  687. self.minimum = float(minimum)
  688. self.maximum = float(maximum)
  689. if error_message is None:
  690. error_message = 'enter a number between %(min)g and %(max)g'
  691. self.error_message = translate(error_message) \
  692. % dict(min=self.minimum, max=self.maximum)
  693. def __call__(self, value):
  694. try:
  695. if self.dot == '.':
  696. fvalue = float(value)
  697. else:
  698. fvalue = float(str(value).replace(self.dot, '.'))
  699. if self.minimum is None:
  700. if self.maximum is None or fvalue <= self.maximum:
  701. return (fvalue, None)
  702. elif self.maximum is None:
  703. if fvalue >= self.minimum:
  704. return (fvalue, None)
  705. elif self.minimum <= fvalue <= self.maximum:
  706. return (fvalue, None)
  707. except (ValueError, TypeError):
  708. pass
  709. return (value, self.error_message)
  710. def formatter(self, value):
  711. if value is None:
  712. return None
  713. return str2dec(value).replace('.', self.dot)
  714. class IS_DECIMAL_IN_RANGE(Validator):
  715. """
  716. Determine that the argument is (or can be represented as) a Python Decimal,
  717. and that it falls within the specified inclusive range.
  718. The comparison is made with Python Decimal arithmetic.
  719. The minimum and maximum limits can be None, meaning no lower or upper limit,
  720. respectively.
  721. example::
  722. INPUT(_type='text', _name='name', requires=IS_DECIMAL_IN_RANGE(0, 10))
  723. >>> IS_DECIMAL_IN_RANGE(1,5)('4')
  724. (Decimal('4'), None)
  725. >>> IS_DECIMAL_IN_RANGE(1,5)(4)
  726. (Decimal('4'), None)
  727. >>> IS_DECIMAL_IN_RANGE(1,5)(1)
  728. (Decimal('1'), None)
  729. >>> IS_DECIMAL_IN_RANGE(1,5)(5.25)
  730. (5.25, 'enter a number between 1 and 5')
  731. >>> IS_DECIMAL_IN_RANGE(5.25,6)(5.25)
  732. (Decimal('5.25'), None)
  733. >>> IS_DECIMAL_IN_RANGE(5.25,6)('5.25')
  734. (Decimal('5.25'), None)
  735. >>> IS_DECIMAL_IN_RANGE(1,5)(6.0)
  736. (6.0, 'enter a number between 1 and 5')
  737. >>> IS_DECIMAL_IN_RANGE(1,5)(3.5)
  738. (Decimal('3.5'), None)
  739. >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(3.5)
  740. (Decimal('3.5'), None)
  741. >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(6.5)
  742. (6.5, 'enter a number between 1.5 and 5.5')
  743. >>> IS_DECIMAL_IN_RANGE(1.5,None)(6.5)
  744. (Decimal('6.5'), None)
  745. >>> IS_DECIMAL_IN_RANGE(1.5,None)(0.5)
  746. (0.5, 'enter a number greater than or equal to 1.5')
  747. >>> IS_DECIMAL_IN_RANGE(None,5.5)(4.5)
  748. (Decimal('4.5'), None)
  749. >>> IS_DECIMAL_IN_RANGE(None,5.5)(6.5)
  750. (6.5, 'enter a number less than or equal to 5.5')
  751. >>> IS_DECIMAL_IN_RANGE()(6.5)
  752. (Decimal('6.5'), None)
  753. >>> IS_DECIMAL_IN_RANGE(0,99)(123.123)
  754. (123.123, 'enter a number between 0 and 99')
  755. >>> IS_DECIMAL_IN_RANGE(0,99)('123.123')
  756. ('123.123', 'enter a number between 0 and 99')
  757. >>> IS_DECIMAL_IN_RANGE(0,99)('12.34')
  758. (Decimal('12.34'), None)
  759. >>> IS_DECIMAL_IN_RANGE()('abc')
  760. ('abc', 'enter a decimal number')
  761. """
  762. def __init__(
  763. self,
  764. minimum=None,
  765. maximum=None,
  766. error_message=None,
  767. dot='.'
  768. ):
  769. self.minimum = self.maximum = None
  770. self.dot = dot
  771. if minimum is None:
  772. if maximum is None:
  773. if error_message is None:
  774. error_message = 'enter a decimal number'
  775. else:
  776. self.maximum = decimal.Decimal(str(maximum))
  777. if error_message is None:
  778. error_message = 'enter a number less than or equal to %(max)g'
  779. elif maximum is None:
  780. self.minimum = decimal.Decimal(str(minimum))
  781. if error_message is None:
  782. error_message = 'enter a number greater than or equal to %(min)g'
  783. else:
  784. self.minimum = decimal.Decimal(str(minimum))
  785. self.maximum = decimal.Decimal(str(maximum))
  786. if error_message is None:
  787. error_message = 'enter a number between %(min)g and %(max)g'
  788. self.error_message = translate(error_message) \
  789. % dict(min=self.minimum, max=self.maximum)
  790. def __call__(self, value):
  791. try:
  792. if isinstance(value, decimal.Decimal):
  793. v = value
  794. else:
  795. v = decimal.Decimal(str(value).replace(self.dot, '.'))
  796. if self.minimum is None:
  797. if self.maximum is None or v <= self.maximum:
  798. return (v, None)
  799. elif self.maximum is None:
  800. if v >= self.minimum:
  801. return (v, None)
  802. elif self.minimum <= v <= self.maximum:
  803. return (v, None)
  804. except (ValueError, TypeError, decimal.InvalidOperation):
  805. pass
  806. return (value, self.error_message)
  807. def formatter(self, value):
  808. if value is None:
  809. return None
  810. return str2dec(value).replace('.', self.dot)
  811. def is_empty(value, empty_regex=None):
  812. "test empty field"
  813. if isinstance(value, (str, unicode)):
  814. value = value.strip()
  815. if empty_regex is not None and empty_regex.match(value):
  816. value = ''
  817. if value is None or value == '' or value == []:
  818. return (value, True)
  819. return (value, False)
  820. class IS_NOT_EMPTY(Validator):
  821. """
  822. example::
  823. INPUT(_type='text', _name='name', requires=IS_NOT_EMPTY())
  824. >>> IS_NOT_EMPTY()(1)
  825. (1, None)
  826. >>> IS_NOT_EMPTY()(0)
  827. (0, None)
  828. >>> IS_NOT_EMPTY()('x')
  829. ('x', None)
  830. >>> IS_NOT_EMPTY()(' x ')
  831. ('x', None)
  832. >>> IS_NOT_EMPTY()(None)
  833. (None, 'enter a value')
  834. >>> IS_NOT_EMPTY()('')
  835. ('', 'enter a value')
  836. >>> IS_NOT_EMPTY()(' ')
  837. ('', 'enter a value')
  838. >>> IS_NOT_EMPTY()(' \\n\\t')
  839. ('', 'enter a value')
  840. >>> IS_NOT_EMPTY()([])
  841. ([], 'enter a value')
  842. >>> IS_NOT_EMPTY(empty_regex='def')('def')
  843. ('', 'enter a value')
  844. >>> IS_NOT_EMPTY(empty_regex='de[fg]')('deg')
  845. ('', 'enter a value')
  846. >>> IS_NOT_EMPTY(empty_regex='def')('abc')
  847. ('abc', None)
  848. """
  849. def __init__(self, error_message='enter a value', empty_regex=None):
  850. self.error_message = error_message
  851. if empty_regex is not None:
  852. self.empty_regex = re.compile(empty_regex)
  853. else:
  854. self.empty_regex = None
  855. def __call__(self, value):
  856. value, empty = is_empty(value, empty_regex=self.empty_regex)
  857. if empty:
  858. return (value, translate(self.error_message))
  859. return (value, None)
  860. class IS_ALPHANUMERIC(IS_MATCH):
  861. """
  862. example::
  863. INPUT(_type='text', _name='name', requires=IS_ALPHANUMERIC())
  864. >>> IS_ALPHANUMERIC()('1')
  865. ('1', None)
  866. >>> IS_ALPHANUMERIC()('')
  867. ('', None)
  868. >>> IS_ALPHANUMERIC()('A_a')
  869. ('A_a', None)
  870. >>> IS_ALPHANUMERIC()('!')
  871. ('!', 'enter only letters, numbers, and underscore')
  872. """
  873. def __init__(self, error_message='enter only letters, numbers, and underscore'):
  874. IS_MATCH.__init__(self, '^[\w]*$', error_message)
  875. class IS_EMAIL(Validator):
  876. """
  877. Checks if field's value is a valid email address. Can be set to disallow
  878. or force addresses from certain domain(s).
  879. Email regex adapted from
  880. http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx,
  881. generally following the RFCs, except that we disallow quoted strings
  882. and permit underscores and leading numerics in subdomain labels
  883. Arguments:
  884. - banned: regex text for disallowed address domains
  885. - forced: regex text for required address domains
  886. Both arguments can also be custom objects with a match(value) method.
  887. Examples::
  888. #Check for valid email address:
  889. INPUT(_type='text', _name='name',
  890. requires=IS_EMAIL())
  891. #Check for valid email address that can't be from a .com domain:
  892. INPUT(_type='text', _name='name',
  893. requires=IS_EMAIL(banned='^.*\.com(|\..*)$'))
  894. #Check for valid email address that must be from a .edu domain:
  895. INPUT(_type='text', _name='name',
  896. requires=IS_EMAIL(forced='^.*\.edu(|\..*)$'))
  897. >>> IS_EMAIL()('a@b.com')
  898. ('a@b.com', None)
  899. >>> IS_EMAIL()('abc@def.com')
  900. ('abc@def.com', None)
  901. >>> IS_EMAIL()('abc@3def.com')
  902. ('abc@3def.com', None)
  903. >>> IS_EMAIL()('abc@def.us')
  904. ('abc@def.us', None)
  905. >>> IS_EMAIL()('abc@d_-f.us')
  906. ('abc@d_-f.us', None)
  907. >>> IS_EMAIL()('@def.com') # missing name
  908. ('@def.com', 'enter a valid email address')
  909. >>> IS_EMAIL()('"abc@def".com') # quoted name
  910. ('"abc@def".com', 'enter a valid email address')
  911. >>> IS_EMAIL()('abc+def.com') # no @
  912. ('abc+def.com', 'enter a valid email address')
  913. >>> IS_EMAIL()('abc@def.x') # one-char TLD
  914. ('abc@def.x', 'enter a valid email address')
  915. >>> IS_EMAIL()('abc@def.12') # numeric TLD
  916. ('abc@def.12', 'enter a valid email address')
  917. >>> IS_EMAIL()('abc@def..com') # double-dot in domain
  918. ('abc@def..com', 'enter a valid email address')
  919. >>> IS_EMAIL()('abc@.def.com') # dot starts domain
  920. ('abc@.def.com', 'enter a valid email address')
  921. >>> IS_EMAIL()('abc@def.c_m') # underscore in TLD
  922. ('abc@def.c_m', 'enter a valid email address')
  923. >>> IS_EMAIL()('NotAnEmail') # missing @
  924. ('NotAnEmail', 'enter a valid email address')
  925. >>> IS_EMAIL()('abc@NotAnEmail') # missing TLD
  926. ('abc@NotAnEmail', 'enter a valid email address')
  927. >>> IS_EMAIL()('customer/department@example.com')
  928. ('customer/department@example.com', None)
  929. >>> IS_EMAIL()('$A12345@example.com')
  930. ('$A12345@example.com', None)
  931. >>> IS_EMAIL()('!def!xyz%abc@example.com')
  932. ('!def!xyz%abc@example.com', None)
  933. >>> IS_EMAIL()('_Yosemite.Sam@example.com')
  934. ('_Yosemite.Sam@example.com', None)
  935. >>> IS_EMAIL()('~@example.com')
  936. ('~@example.com', None)
  937. >>> IS_EMAIL()('.wooly@example.com') # dot starts name
  938. ('.wooly@example.com', 'enter a valid email address')
  939. >>> IS_EMAIL()('wo..oly@example.com') # adjacent dots in name
  940. ('wo..oly@example.com', 'enter a valid email address')
  941. >>> IS_EMAIL()('pootietang.@example.com') # dot ends name
  942. ('pootietang.@example.com', 'enter a valid email address')
  943. >>> IS_EMAIL()('.@example.com') # name is bare dot
  944. ('.@example.com', 'enter a valid email address')
  945. >>> IS_EMAIL()('Ima.Fool@example.com')
  946. ('Ima.Fool@example.com', None)
  947. >>> IS_EMAIL()('Ima Fool@example.com') # space in name
  948. ('Ima Fool@example.com', 'enter a valid email address')
  949. >>> IS_EMAIL()('localguy@localhost') # localhost as domain
  950. ('localguy@localhost', None)
  951. """
  952. regex = re.compile('''
  953. ^(?!\.) # name may not begin with a dot
  954. (
  955. [-a-z0-9!\#$%&'*+/=?^_`{|}~] # all legal characters except dot
  956. |
  957. (?<!\.)\. # single dots only
  958. )+
  959. (?<!\.) # name may not end with a dot
  960. @
  961. (
  962. localhost
  963. |
  964. (
  965. [a-z0-9]
  966. # [sub]domain begins with alphanumeric
  967. (
  968. [-\w]* # alphanumeric, underscore, dot, hyphen
  969. [a-z0-9] # ending alphanumeric
  970. )?
  971. \. # ending dot
  972. )+
  973. [a-z]{2,} # TLD alpha-only
  974. )$
  975. ''', re.VERBOSE | re.IGNORECASE)
  976. regex_proposed_but_failed = re.compile('^([\w\!\#$\%\&\'\*\+\-\/\=\?\^\`{\|\}\~]+\.)*[\w\!\#$\%\&\'\*\+\-\/\=\?\^\`{\|\}\~]+@((((([a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(\d{1,3}\.){3}\d{1,3}(\:\d{1,5})?)$', re.VERBOSE | re.IGNORECASE)
  977. def __init__(self,
  978. banned=None,
  979. forced=None,
  980. error_message='enter a valid email address'):
  981. if isinstance(banned, str):
  982. banned = re.compile(banned)
  983. if isinstance(forced, str):
  984. forced = re.compile(forced)
  985. self.banned = banned
  986. self.forced = forced
  987. self.error_message = error_message
  988. def __call__(self, value):
  989. match = self.regex.match(value)
  990. if match:
  991. domain = value.split('@')[1]
  992. if (not self.banned or not self.banned.match(domain)) \
  993. and (not self.forced or self.forced.match(domain)):
  994. return (value, None)
  995. return (value, translate(self.error_message))
  996. # URL scheme source:
  997. # <http://en.wikipedia.org/wiki/URI_scheme> obtained on 2008-Nov-10
  998. official_url_schemes = [
  999. 'aaa',
  1000. 'aaas',
  1001. 'acap',
  1002. 'cap',
  1003. 'cid',
  1004. 'crid',
  1005. 'data',
  1006. 'dav',
  1007. 'dict',
  1008. 'dns',
  1009. 'fax',
  1010. 'file',
  1011. 'ftp',
  1012. 'go',
  1013. 'gopher',
  1014. 'h323',
  1015. 'http',
  1016. 'https',
  1017. 'icap',
  1018. 'im',
  1019. 'imap',
  1020. 'info',
  1021. 'ipp',
  1022. 'iris',
  1023. 'iris.beep',
  1024. 'iris.xpc',
  1025. 'iris.xpcs',
  1026. 'iris.lws',
  1027. 'ldap',
  1028. 'mailto',
  1029. 'mid',
  1030. 'modem',
  1031. 'msrp',
  1032. 'msrps',
  1033. 'mtqp',
  1034. 'mupdate',
  1035. 'news',
  1036. 'nfs',
  1037. 'nntp',
  1038. 'opaquelocktoken',
  1039. 'pop',
  1040. 'pres',
  1041. 'prospero',
  1042. 'rtsp',
  1043. 'service',
  1044. 'shttp',
  1045. 'sip',
  1046. 'sips',
  1047. 'snmp',
  1048. 'soap.beep',
  1049. 'soap.beeps',
  1050. 'tag',
  1051. 'tel',
  1052. 'telnet',
  1053. 'tftp',
  1054. 'thismessage',
  1055. 'tip',
  1056. 'tv',
  1057. 'urn',
  1058. 'vemmi',
  1059. 'wais',
  1060. 'xmlrpc.beep',
  1061. 'xmlrpc.beep',
  1062. 'xmpp',
  1063. 'z39.50r',
  1064. 'z39.50s',
  1065. ]
  1066. unofficial_url_schemes = [
  1067. 'about',
  1068. 'adiumxtra',
  1069. 'aim',
  1070. 'afp',
  1071. 'aw',
  1072. 'callto',
  1073. 'chrome',
  1074. 'cvs',
  1075. 'ed2k',
  1076. 'feed',
  1077. 'fish',
  1078. 'gg',
  1079. 'gizmoproject',
  1080. 'iax2',
  1081. 'irc',
  1082. 'ircs',
  1083. 'itms',
  1084. 'jar',
  1085. 'javascript',
  1086. 'keyparc',
  1087. 'lastfm',
  1088. 'ldaps',
  1089. 'magnet',
  1090. 'mms',
  1091. 'msnim',
  1092. 'mvn',
  1093. 'notes',
  1094. 'nsfw',
  1095. 'psyc',
  1096. 'paparazzi:http',
  1097. 'rmi',
  1098. 'rsync',
  1099. 'secondlife',
  1100. 'sgn',
  1101. 'skype',
  1102. 'ssh',
  1103. 'sftp',
  1104. 'smb',
  1105. 'sms',
  1106. 'soldat',
  1107. 'steam',
  1108. 'svn',
  1109. 'teamspeak',
  1110. 'unreal',
  1111. 'ut2004',
  1112. 'ventrilo',
  1113. 'view-source',
  1114. 'webcal',
  1115. 'wyciwyg',
  1116. 'xfire',
  1117. 'xri',
  1118. 'ymsgr',
  1119. ]
  1120. all_url_schemes = [None] + official_url_schemes + unofficial_url_schemes
  1121. http_schemes = [None, 'http', 'https']
  1122. # This regex comes from RFC 2396, Appendix B. It's used to split a URL into
  1123. # its component parts
  1124. # Here are the regex groups that it extracts:
  1125. # scheme = group(2)
  1126. # authority = group(4)
  1127. # path = group(5)
  1128. # query = group(7)
  1129. # fragment = group(9)
  1130. url_split_regex = \
  1131. re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?')
  1132. # Defined in RFC 3490, Section 3.1, Requirement #1
  1133. # Use this regex to split the authority component of a unicode URL into
  1134. # its component labels
  1135. label_split_regex = re.compile(u'[\u002e\u3002\uff0e\uff61]')
  1136. def escape_unicode(string):
  1137. '''
  1138. Converts a unicode string into US-ASCII, using a simple conversion scheme.
  1139. Each unicode character that does not have a US-ASCII equivalent is
  1140. converted into a URL escaped form based on its hexadecimal value.
  1141. For example, the unicode character '\u4e86' will become the string '%4e%86'
  1142. :param string: unicode string, the unicode string to convert into an
  1143. escaped US-ASCII form
  1144. :returns: the US-ASCII escaped form of the inputted string
  1145. :rtype: string
  1146. @author: Jonathan Benn
  1147. '''
  1148. returnValue = StringIO()
  1149. for character in string:
  1150. code = ord(character)
  1151. if code > 0x7F:
  1152. hexCode = hex(code)
  1153. returnValue.write('%' + hexCode[2:4] + '%' + hexCode[4:6])
  1154. else:
  1155. returnValue.write(character)
  1156. return returnValue.getvalue()
  1157. def unicode_to_ascii_authority(authority):
  1158. '''
  1159. Follows the steps in RFC 3490, Section 4 to convert a unicode authority
  1160. string into its ASCII equivalent.
  1161. For example, u'www.Alliancefran\xe7aise.nu' will be converted into
  1162. 'www.xn--alliancefranaise-npb.nu'
  1163. :param authority: unicode string, the URL authority component to convert,
  1164. e.g. u'www.Alliancefran\xe7aise.nu'
  1165. :returns: the US-ASCII character equivalent to the inputed authority,
  1166. e.g. 'www.xn--alliancefranaise-npb.nu'
  1167. :rtype: string
  1168. :raises Exception: if the function is not able to convert the inputed
  1169. authority
  1170. @author: Jonathan Benn
  1171. '''
  1172. #RFC 3490, Section 4, Step 1
  1173. #The encodings.idna Python module assumes that AllowUnassigned == True
  1174. #RFC 3490, Section 4, Step 2
  1175. labels = label_split_regex.split(authority)
  1176. #RFC 3490, Section 4, Step 3
  1177. #The encodings.idna Python module assumes that UseSTD3ASCIIRules == False
  1178. #RFC 3490, Section 4, Step 4
  1179. #We use the ToASCII operation because we are about to put the authority
  1180. #into an IDN-unaware slot
  1181. asciiLabels = []
  1182. try:
  1183. import encodings.idna
  1184. for label in labels:
  1185. if label:
  1186. asciiLabels.append(encodings.idna.ToASCII(label))
  1187. else:
  1188. #encodings.idna.ToASCII does not accept an empty string, but
  1189. #it is necessary for us to allow for empty labels so that we
  1190. #don't modify the URL
  1191. asciiLabels.append('')
  1192. except:
  1193. asciiLabels = [str(label) for label in labels]
  1194. #RFC 3490, Section 4, Step 5
  1195. return str(reduce(lambda x, y: x + unichr(0x002E) + y, asciiLabels))
  1196. def unicode_to_ascii_url(url, prepend_scheme):
  1197. '''
  1198. Converts the inputed unicode url into a US-ASCII equivalent. This function
  1199. goes a little beyond RFC 3490, which is limited in scope to the domain name
  1200. (authority) only. Here, the functionality is expanded to what was observed
  1201. on Wikipedia on 2009-Jan-22:
  1202. Component Can Use Unicode?
  1203. --------- ----------------
  1204. scheme No
  1205. authority Yes
  1206. path Yes
  1207. query Yes
  1208. fragment No
  1209. The authority component gets converted to punycode, but occurrences of
  1210. unicode in other components get converted into a pair of URI escapes (we
  1211. assume 4-byte unicode). E.g. the unicode character U+4E2D will be
  1212. converted into '%4E%2D'. Testing with Firefox v3.0.5 has shown that it can
  1213. understand this kind of URI encoding.
  1214. :param url: unicode string, the URL to convert from unicode into US-ASCII
  1215. :param prepend_scheme: string, a protocol scheme to prepend to the URL if
  1216. we're having trouble parsing it.
  1217. e.g. "http". Input None to disable this functionality
  1218. :returns: a US-ASCII equivalent of the inputed url
  1219. :rtype: string
  1220. @author: Jonathan Benn
  1221. '''
  1222. #convert the authority component of the URL into an ASCII punycode string,
  1223. #but encode the rest using the regular URI character encoding
  1224. groups = url_split_regex.match(url).groups()
  1225. #If no authority was found
  1226. if not groups[3]:
  1227. #Try appending a scheme to see if that fixes the problem
  1228. scheme_to_prepend = prepend_scheme or 'http'
  1229. groups = url_split_regex.match(
  1230. unicode(scheme_to_prepend) + u'://' + url).groups()
  1231. #if we still can't find the authority
  1232. if not groups[3]:
  1233. raise Exception('No authority component found, ' +
  1234. 'could not decode unicode to US-ASCII')
  1235. #We're here if we found an authority, let's rebuild the URL
  1236. scheme = groups[1]
  1237. authority = groups[3]
  1238. path = groups[4] or ''
  1239. query = groups[5] or ''
  1240. fragment = groups[7] or ''
  1241. if prepend_scheme:
  1242. scheme = str(scheme) + '://'
  1243. else:
  1244. scheme = ''
  1245. return scheme + unicode_to_ascii_authority(authority) +\
  1246. escape_unicode(path) + escape_unicode(query) + str(fragment)
  1247. class IS_GENERIC_URL(Validator):
  1248. """
  1249. Rejects a URL string if any of the following is true:
  1250. * The string is empty or None
  1251. * The string uses characters that are not allowed in a URL
  1252. * The URL scheme specified (if one is specified) is not valid
  1253. Based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html
  1254. This function only checks the URL's syntax. It does not check that the URL
  1255. points to a real document, for example, or that it otherwise makes sense
  1256. semantically. This function does automatically prepend 'http://' in front
  1257. of a URL if and only if that's necessary to successfully parse the URL.
  1258. Please note that a scheme will be prepended only for rare cases
  1259. (e.g. 'google.ca:80')
  1260. The list of allowed schemes is customizable with the allowed_schemes
  1261. parameter. If you exclude None from the list, then abbreviated URLs
  1262. (lacking a scheme such as 'http') will be rejected.
  1263. The default prepended scheme is customizable with the prepend_scheme
  1264. parameter. If you set prepend_scheme to None then prepending will be
  1265. disabled. URLs that require prepending to parse will still be accepted,
  1266. but the return value will not be modified.
  1267. @author: Jonathan Benn
  1268. >>> IS_GENERIC_URL()('http://user@abc.com')
  1269. ('http://user@abc.com', None)
  1270. """
  1271. def __init__(
  1272. self,
  1273. error_message='enter a valid URL',
  1274. allowed_schemes=None,
  1275. prepend_scheme=None,
  1276. ):
  1277. """
  1278. :param error_message: a string, the error message to give the end user
  1279. if the URL does not validate
  1280. :param allowed_schemes: a list containing strings or None. Each element
  1281. is a scheme the inputed URL is allowed to use
  1282. :param prepend_scheme: a string, this scheme is prepended if it's
  1283. necessary to make the URL valid
  1284. """
  1285. self.error_message = error_message
  1286. if allowed_schemes is None:
  1287. self.allowed_schemes = all_url_schemes
  1288. else:
  1289. self.allowed_schemes = allowed_schemes
  1290. self.prepend_scheme = prepend_scheme
  1291. if self.prepend_scheme not in self.allowed_schemes:
  1292. raise SyntaxError("prepend_scheme='%s' is not in allowed_schemes=%s"
  1293. % (self.prepend_scheme, self.allowed_schemes))
  1294. GENERIC_URL = re.compile(r"%[^0-9A-Fa-f]{2}|%[^0-9A-Fa-f][0-9A-Fa-f]|%[0-9A-Fa-f][^0-9A-Fa-f]|%$|%[0-9A-Fa-f]$|%[^0-9A-Fa-f]$")
  1295. GENERIC_URL_VALID = re.compile(r"[A-Za-z0-9;/?:@&=+$,\-_\.!~*'\(\)%#]+$")
  1296. def __call__(self, value):
  1297. """
  1298. :param value: a string, the URL to validate
  1299. :returns: a tuple, where tuple[0] is the inputed value (possible
  1300. prepended with prepend_scheme), and tuple[1] is either
  1301. None (success!) or the string error_message
  1302. """
  1303. try:
  1304. # if the URL does not misuse the '%' character
  1305. if not self.GENERIC_URL.search(value):
  1306. # if the URL is only composed of valid characters
  1307. if self.GENERIC_URL_VALID.match(value):
  1308. # Then split up the URL into its components and check on
  1309. # the scheme
  1310. scheme = url_split_regex.match(value).group(2)
  1311. # Clean up the scheme before we check it
  1312. if not scheme is None:
  1313. scheme = urllib.unquote(scheme).lower()
  1314. # If the scheme really exists
  1315. if scheme in self.allowed_schemes:
  1316. # Then the URL is valid
  1317. return (value, None)
  1318. else:
  1319. # else, for the possible case of abbreviated URLs with
  1320. # ports, check to see if adding a valid scheme fixes
  1321. # the problem (but only do this if it doesn't have
  1322. # one already!)
  1323. if value.find('://') < 0 and None in self.allowed_schemes:
  1324. schemeToUse = self.prepend_scheme or 'http'
  1325. prependTest = self.__call__(
  1326. schemeToUse + '://' + value)
  1327. # if the prepend test succeeded
  1328. if prependTest[1] is None:
  1329. # if prepending in the output is enabled
  1330. if self.prepend_scheme:
  1331. return prependTest
  1332. else:
  1333. # else return the original,
  1334. # non-prepended value
  1335. return (value, None)
  1336. except:
  1337. pass
  1338. # else the URL is not valid
  1339. return (value, translate(self.error_message))
  1340. # Sources (obtained 2008-Nov-11):
  1341. # http://en.wikipedia.org/wiki/Top-level_domain
  1342. # http://www.iana.org/domains/root/db/
  1343. official_top_level_domains = [
  1344. 'ac',
  1345. 'ad',
  1346. 'ae',
  1347. 'aero',
  1348. 'af',
  1349. 'ag',
  1350. 'ai',
  1351. 'al',
  1352. 'am',
  1353. 'an',
  1354. 'ao',
  1355. 'aq',
  1356. 'ar',
  1357. 'arpa',
  1358. 'as',
  1359. 'asia',
  1360. 'at',
  1361. 'au',
  1362. 'aw',
  1363. 'ax',
  1364. 'az',
  1365. 'ba',
  1366. 'bb',
  1367. 'bd',
  1368. 'be',
  1369. 'bf',
  1370. 'bg',
  1371. 'bh',
  1372. 'bi',
  1373. 'biz',
  1374. 'bj',
  1375. 'bl',
  1376. 'bm',
  1377. 'bn',
  1378. 'bo',
  1379. 'br',
  1380. 'bs',
  1381. 'bt',
  1382. 'bv',
  1383. 'bw',
  1384. 'by',
  1385. 'bz',
  1386. 'ca',
  1387. 'cat',
  1388. 'cc',
  1389. 'cd',
  1390. 'cf',
  1391. 'cg',
  1392. 'ch',
  1393. 'ci',
  1394. 'ck',
  1395. 'cl',
  1396. 'cm',
  1397. 'cn',
  1398. 'co',
  1399. 'com',
  1400. 'coop',
  1401. 'cr',
  1402. 'cu',
  1403. 'cv',
  1404. 'cx',
  1405. 'cy',
  1406. 'cz',
  1407. 'de',
  1408. 'dj',
  1409. 'dk',
  1410. 'dm',
  1411. 'do',
  1412. 'dz',
  1413. 'ec',
  1414. 'edu',
  1415. 'ee',
  1416. 'eg',
  1417. 'eh',
  1418. 'er',
  1419. 'es',
  1420. 'et',
  1421. 'eu',
  1422. 'example',
  1423. 'fi',
  1424. 'fj',
  1425. 'fk',
  1426. 'fm',
  1427. 'fo',
  1428. 'fr',
  1429. 'ga',
  1430. 'gb',
  1431. 'gd',
  1432. 'ge',
  1433. 'gf',
  1434. 'gg',
  1435. 'gh',
  1436. 'gi',
  1437. 'gl',
  1438. 'gm',
  1439. 'gn',
  1440. 'gov',
  1441. 'gp',
  1442. 'gq',
  1443. 'gr',
  1444. 'gs',
  1445. 'gt',
  1446. 'gu',
  1447. 'gw',
  1448. 'gy',
  1449. 'hk',
  1450. 'hm',
  1451. 'hn',
  1452. 'hr',
  1453. 'ht',
  1454. 'hu',
  1455. 'id',
  1456. 'ie',
  1457. 'il',
  1458. 'im',
  1459. 'in',
  1460. 'info',
  1461. 'int',
  1462. 'invalid',
  1463. 'io',
  1464. 'iq',
  1465. 'ir',
  1466. 'is',
  1467. 'it',
  1468. 'je',
  1469. 'jm',
  1470. 'jo',
  1471. 'jobs',
  1472. 'jp',
  1473. 'ke',
  1474. 'kg',
  1475. 'kh',
  1476. 'ki',
  1477. 'km',
  1478. 'kn',
  1479. 'kp',
  1480. 'kr',
  1481. 'kw',
  1482. 'ky',
  1483. 'kz',
  1484. 'la',
  1485. 'lb',
  1486. 'lc',
  1487. 'li',
  1488. 'lk',
  1489. 'localhost',
  1490. 'lr',
  1491. 'ls',
  1492. 'lt',
  1493. 'lu',
  1494. 'lv',
  1495. 'ly',
  1496. 'ma',
  1497. 'mc',
  1498. 'md',
  1499. 'me',
  1500. 'mf',
  1501. 'mg',
  1502. 'mh',
  1503. 'mil',
  1504. 'mk',
  1505. 'ml',
  1506. 'mm',
  1507. 'mn',
  1508. 'mo',
  1509. 'mobi',
  1510. 'mp',
  1511. 'mq',
  1512. 'mr',
  1513. 'ms',
  1514. 'mt',
  1515. 'mu',
  1516. 'museum',
  1517. 'mv',
  1518. 'mw',
  1519. 'mx',
  1520. 'my',
  1521. 'mz',
  1522. 'na',
  1523. 'name',
  1524. 'nc',
  1525. 'ne',
  1526. 'net',
  1527. 'nf',
  1528. 'ng',
  1529. 'ni',
  1530. 'nl',
  1531. 'no',
  1532. 'np',
  1533. 'nr',
  1534. 'nu',
  1535. 'nz',
  1536. 'om',
  1537. 'org',
  1538. 'pa',
  1539. 'pe',
  1540. 'pf',
  1541. 'pg',
  1542. 'ph',
  1543. 'pk',
  1544. 'pl',
  1545. 'pm',
  1546. 'pn',
  1547. 'pr',
  1548. 'pro',
  1549. 'ps',
  1550. 'pt',
  1551. 'pw',
  1552. 'py',
  1553. 'qa',
  1554. 're',
  1555. 'ro',
  1556. 'rs',
  1557. 'ru',
  1558. 'rw',
  1559. 'sa',
  1560. 'sb',
  1561. 'sc',
  1562. 'sd',
  1563. 'se',
  1564. 'sg',
  1565. 'sh',
  1566. 'si',
  1567. 'sj',
  1568. 'sk',
  1569. 'sl',
  1570. 'sm',
  1571. 'sn',
  1572. 'so',
  1573. 'sr',
  1574. 'st',
  1575. 'su',
  1576. 'sv',
  1577. 'sy',
  1578. 'sz',
  1579. 'tc',
  1580. 'td',
  1581. 'tel',
  1582. 'test',
  1583. 'tf',
  1584. 'tg',
  1585. 'th',
  1586. 'tj',
  1587. 'tk',
  1588. 'tl',
  1589. 'tm',
  1590. 'tn',
  1591. 'to',
  1592. 'tp',
  1593. 'tr',
  1594. 'travel',
  1595. 'tt',
  1596. 'tv',
  1597. 'tw',
  1598. 'tz',
  1599. 'ua',
  1600. 'ug',
  1601. 'uk',
  1602. 'um',
  1603. 'us',
  1604. 'uy',
  1605. 'uz',
  1606. 'va',
  1607. 'vc',
  1608. 've',
  1609. 'vg',
  1610. 'vi',
  1611. 'vn',
  1612. 'vu',
  1613. 'wf',
  1614. 'ws',
  1615. 'xn--0zwm56d',
  1616. 'xn--11b5bs3a9aj6g',
  1617. 'xn--80akhbyknj4f',
  1618. 'xn--9t4b11yi5a',
  1619. 'xn--deba0ad',
  1620. 'xn--g6w251d',
  1621. 'xn--hgbk6aj7f53bba',
  1622. 'xn--hlcj6aya9esc7a',
  1623. 'xn--jxalpdlp',
  1624. 'xn--kgbechtv',
  1625. 'xn--p1ai',
  1626. 'xn--zckzah',
  1627. 'ye',
  1628. 'yt',
  1629. 'yu',
  1630. 'za',
  1631. 'zm',
  1632. 'zw',
  1633. ]
  1634. class IS_HTTP_URL(Validator):
  1635. """
  1636. Rejects a URL string if any of the following is true:
  1637. * The string is empty or None
  1638. * The string uses characters that are not allowed in a URL
  1639. * The string breaks any of the HTTP syntactic rules
  1640. * The URL scheme specified (if one is specified) is not 'http' or 'https'
  1641. * The top-level domain (if a host name is specified) does not exist
  1642. Based on RFC 2616: http://www.faqs.org/rfcs/rfc2616.html
  1643. This function only checks the URL's syntax. It does not check that the URL
  1644. points to a real document, for example, or that it otherwise makes sense
  1645. semantically. This function does automatically prepend 'http://' in front
  1646. of a URL in the case of an abbreviated URL (e.g. 'google.ca').
  1647. The list of allowed schemes is customizable with the allowed_schemes
  1648. parameter. If you exclude None from the list, then abbreviated URLs
  1649. (lacking a scheme such as 'http') will be rejected.
  1650. The default prepended scheme is customizable with the prepend_scheme
  1651. parameter. If you set prepend_scheme to None then prepending will be
  1652. disabled. URLs that require prepending to parse will still be accepted,
  1653. but the return value will not be modified.
  1654. @author: Jonathan Benn
  1655. >>> IS_HTTP_URL()('http://1.2.3.4')
  1656. ('http://1.2.3.4', None)
  1657. >>> IS_HTTP_URL()('http://abc.com')
  1658. ('http://abc.com', None)
  1659. >>> IS_HTTP_URL()('https://abc.com')
  1660. ('https://abc.com', None)
  1661. >>> IS_HTTP_URL()('httpx://abc.com')
  1662. ('httpx://abc.com', 'enter a valid URL')
  1663. >>> IS_HTTP_URL()('http://abc.com:80')
  1664. ('http://abc.com:80', None)
  1665. >>> IS_HTTP_URL()('http://user@abc.com')
  1666. ('http://user@abc.com', None)
  1667. >>> IS_HTTP_URL()('http://user@1.2.3.4')
  1668. ('http://user@1.2.3.4', None)
  1669. """
  1670. GENERIC_VALID_IP = re.compile(
  1671. "([\w.!~*'|;:&=+$,-]+@)?\d+\.\d+\.\d+\.\d+(:\d*)*$")
  1672. GENERIC_VALID_DOMAIN = re.compile("([\w.!~*'|;:&=+$,-]+@)?(([A-Za-z0-9]+[A-Za-z0-9\-]*[A-Za-z0-9]+\.)*([A-Za-z0-9]+\.)*)*([A-Za-z]+[A-Za-z0-9\-]*[A-Za-z0-9]+)\.?(:\d*)*$")
  1673. def __init__(
  1674. self,
  1675. error_message='enter a valid URL',
  1676. allowed_schemes=None,
  1677. prepend_scheme='http',
  1678. ):
  1679. """
  1680. :param error_message: a string, the error message to give the end user
  1681. if the URL does not validate
  1682. :param allowed_schemes: a list containing strings or None. Each element
  1683. is a scheme the inputed URL is allowed to use
  1684. :param prepend_scheme: a string, this scheme is prepended if it's
  1685. necessary to make the URL valid
  1686. """
  1687. self.error_message = error_message
  1688. if allowed_schemes is None:
  1689. self.allowed_schemes = http_schemes
  1690. else:
  1691. self.allowed_schemes = allowed_schemes
  1692. self.prepend_scheme = prepend_scheme
  1693. for i in self.allowed_schemes:
  1694. if i not in http_schemes:
  1695. raise SyntaxError("allowed_scheme value '%s' is not in %s" %
  1696. (i, http_schemes))
  1697. if self.prepend_scheme not in self.allowed_schemes:
  1698. raise SyntaxError("prepend_scheme='%s' is not in allowed_schemes=%s" %
  1699. (self.prepend_scheme, self.allowed_schemes))
  1700. def __call__(self, value):
  1701. """
  1702. :param value: a string, the URL to validate
  1703. :returns: a tuple, where tuple[0] is the inputed value
  1704. (possible prepended with prepend_scheme), and tuple[1] is either
  1705. None (success!) or the string error_message
  1706. """
  1707. try:
  1708. # if the URL passes generic validation
  1709. x = IS_GENERIC_URL(error_message=self.error_message,
  1710. allowed_schemes=self.allowed_schemes,
  1711. prepend_scheme=self.prepend_scheme)
  1712. if x(value)[1] is None:
  1713. componentsMatch = url_split_regex.match(value)
  1714. authority = componentsMatch.group(4)
  1715. # if there is an authority component
  1716. if authority:
  1717. # if authority is a valid IP address
  1718. if self.GENERIC_VALID_IP.match(authority):
  1719. # Then this HTTP URL is valid
  1720. return (value, None)
  1721. else:
  1722. # else if authority is a valid domain name
  1723. domainMatch = self.GENERIC_VALID_DOMAIN.match(
  1724. authority)
  1725. if domainMatch:
  1726. # if the top-level domain really exists
  1727. if domainMatch.group(5).lower()\
  1728. in official_top_level_domains:
  1729. # Then this HTTP URL is valid
  1730. return (value, None)
  1731. else:
  1732. # else this is a relative/abbreviated URL, which will parse
  1733. # into the URL's path component
  1734. path = componentsMatch.group(5)
  1735. # relative case: if this is a valid path (if it starts with
  1736. # a slash)
  1737. if path.startswith('/'):
  1738. # Then this HTTP URL is valid
  1739. return (value, None)
  1740. else:
  1741. # abbreviated case: if we haven't already, prepend a
  1742. # scheme and see if it fixes the problem
  1743. if value.find('://') < 0:
  1744. schemeToUse = self.prepend_scheme or 'http'
  1745. prependTest = self.__call__(schemeToUse
  1746. + '://' + value)
  1747. # if the prepend test succeeded
  1748. if prependTest[1] is None:
  1749. # if prepending in the output is enabled
  1750. if self.prepend_scheme:
  1751. return prependTest
  1752. else:
  1753. # else return the original, non-prepended
  1754. # value
  1755. return (value, None)
  1756. except:
  1757. pass
  1758. # else the HTTP URL is not valid
  1759. return (value, translate(self.error_message))
  1760. class IS_URL(Validator):
  1761. """
  1762. Rejects a URL string if any of the following is true:
  1763. * The string is empty or None
  1764. * The string uses characters that are not allowed in a URL
  1765. * The string breaks any of the HTTP syntactic rules
  1766. * The URL scheme specified (if one is specified) is not 'http' or 'https'
  1767. * The top-level domain (if a host name is specified) does not exist
  1768. (These rules are based on RFC 2616: http://www.faqs.org/rfcs/rfc2616.html)
  1769. This function only checks the URL's syntax. It does not check that the URL
  1770. points to a real document, for example, or that it otherwise makes sense
  1771. semantically. This function does automatically prepend 'http://' in front
  1772. of a URL in the case of an abbreviated URL (e.g. 'google.ca').
  1773. If the parameter mode='generic' is used, then this function's behavior
  1774. changes. It then rejects a URL string if any of the following is true:
  1775. * The string is empty or None
  1776. * The string uses characters that are not allowed in a URL
  1777. * The URL scheme specified (if one is specified) is not valid
  1778. (These rules are based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html)
  1779. The list of allowed schemes is customizable with the allowed_schemes
  1780. parameter. If you exclude None from the list, then abbreviated URLs
  1781. (lacking a scheme such as 'http') will be rejected.
  1782. The default prepended scheme is customizable with the prepend_scheme
  1783. parameter. If you set prepend_scheme to None then prepending will be
  1784. disabled. URLs that require prepending to parse will still be accepted,
  1785. but the return value will not be modified.
  1786. IS_URL is compatible with the Internationalized Domain Name (IDN) standard
  1787. specified in RFC 3490 (http://tools.ietf.org/html/rfc3490). As a result,
  1788. URLs can be regular strings or unicode strings.
  1789. If the URL's domain component (e.g. google.ca) contains non-US-ASCII
  1790. letters, then the domain will be converted into Punycode (defined in
  1791. RFC 3492, http://tools.ietf.org/html/rfc3492). IS_URL goes a bit beyond
  1792. the standards, and allows non-US-ASCII characters to be present in the path
  1793. and query components of the URL as well. These non-US-ASCII characters will
  1794. be escaped using the standard '%20' type syntax. e.g. the unicode
  1795. character with hex code 0x4e86 will become '%4e%86'
  1796. Code Examples::
  1797. INPUT(_type='text', _name='name', requires=IS_URL())
  1798. >>> IS_URL()('abc.com')
  1799. ('http://abc.com', None)
  1800. INPUT(_type='text', _name='name', requires=IS_URL(mode='generic'))
  1801. >>> IS_URL(mode='generic')('abc.com')
  1802. ('abc.com', None)
  1803. INPUT(_type='text', _name='name',
  1804. requires=IS_URL(allowed_schemes=['https'], prepend_scheme='https'))
  1805. >>> IS_URL(allowed_schemes=['https'], prepend_scheme='https')('https://abc.com')
  1806. ('https://abc.com', None)
  1807. INPUT(_type='text', _name='name',
  1808. requires=IS_URL(prepend_scheme='https'))
  1809. >>> IS_URL(prepend_scheme='https')('abc.com')
  1810. ('https://abc.com', None)
  1811. INPUT(_type='text', _name='name',
  1812. requires=IS_URL(mode='generic', allowed_schemes=['ftps', 'https'],
  1813. prepend_scheme='https'))
  1814. >>> IS_URL(mode='generic', allowed_schemes=['ftps', 'https'], prepend_scheme='https')('https://abc.com')
  1815. ('https://abc.com', None)
  1816. >>> IS_URL(mode='generic', allowed_schemes=['ftps', 'https', None], prepend_scheme='https')('abc.com')
  1817. ('abc.com', None)
  1818. @author: Jonathan Benn
  1819. """
  1820. def __init__(
  1821. self,
  1822. error_message='enter a valid URL',
  1823. mode='http',
  1824. allowed_schemes=None,
  1825. prepend_scheme='http',
  1826. ):
  1827. """
  1828. :param error_message: a string, the error message to give the end user
  1829. if the URL does not validate
  1830. :param allowed_schemes: a list containing strings or None. Each element
  1831. is a scheme the inputed URL is allowed to use
  1832. :param prepend_scheme: a string, this scheme is prepended if it's
  1833. necessary to make the URL valid
  1834. """
  1835. self.error_message = error_message
  1836. self.mode = mode.lower()
  1837. if not self.mode in ['generic', 'http']:
  1838. raise SyntaxError("invalid mode '%s' in IS_URL" % self.mode)
  1839. self.allowed_schemes = allowed_schemes
  1840. if self.allowed_schemes:
  1841. if prepend_scheme not in self.allowed_schemes:
  1842. raise SyntaxError("prepend_scheme='%s' is not in allowed_schemes=%s"
  1843. % (prepend_scheme, self.allowed_schemes))
  1844. # if allowed_schemes is None, then we will defer testing
  1845. # prepend_scheme's validity to a sub-method
  1846. self.prepend_scheme = prepend_scheme
  1847. def __call__(self, value):
  1848. """
  1849. :param value: a unicode or regular string, the URL to validate
  1850. :returns: a (string, string) tuple, where tuple[0] is the modified
  1851. input value and tuple[1] is either None (success!) or the
  1852. string error_message. The input value will never be modified in the
  1853. case of an error. However, if there is success then the input URL
  1854. may be modified to (1) prepend a scheme, and/or (2) convert a
  1855. non-compliant unicode URL into a compliant US-ASCII version.
  1856. """
  1857. if self.mode == 'generic':
  1858. subMethod = IS_GENERIC_URL(error_message=self.error_message,
  1859. allowed_schemes=self.allowed_schemes,
  1860. prepend_scheme=self.prepend_scheme)
  1861. elif self.mode == 'http':
  1862. subMethod = IS_HTTP_URL(error_message=self.error_message,
  1863. allowed_schemes=self.allowed_schemes,
  1864. prepend_scheme=self.prepend_scheme)
  1865. else:
  1866. raise SyntaxError("invalid mode '%s' in IS_URL" % self.mode)
  1867. if type(value) != unicode:
  1868. return subMethod(value)
  1869. else:
  1870. try:
  1871. asciiValue = unicode_to_ascii_url(value, self.prepend_scheme)
  1872. except Exception:
  1873. #If we are not able to convert the unicode url into a
  1874. # US-ASCII URL, then the URL is not valid
  1875. return (value, translate(self.error_message))
  1876. methodResult = subMethod(asciiValue)
  1877. #if the validation of the US-ASCII version of the value failed
  1878. if not methodResult[1] is None:
  1879. # then return the original input value, not the US-ASCII version
  1880. return (value, methodResult[1])
  1881. else:
  1882. return methodResult
  1883. regex_time = re.compile(
  1884. '((?P<h>[0-9]+))([^0-9 ]+(?P<m>[0-9 ]+))?([^0-9ap ]+(?P<s>[0-9]*))?((?P<d>[ap]m))?')
  1885. class IS_TIME(Validator):
  1886. """
  1887. example::
  1888. INPUT(_type='text', _name='name', requires=IS_TIME())
  1889. understands the following formats
  1890. hh:mm:ss [am/pm]
  1891. hh:mm [am/pm]
  1892. hh [am/pm]
  1893. [am/pm] is optional, ':' can be replaced by any other non-space non-digit
  1894. >>> IS_TIME()('21:30')
  1895. (datetime.time(21, 30), None)
  1896. >>> IS_TIME()('21-30')
  1897. (datetime.time(21, 30), None)
  1898. >>> IS_TIME()('21.30')
  1899. (datetime.time(21, 30), None)
  1900. >>> IS_TIME()('21:30:59')
  1901. (datetime.time(21, 30, 59), None)
  1902. >>> IS_TIME()('5:30')
  1903. (datetime.time(5, 30), None)
  1904. >>> IS_TIME()('5:30 am')
  1905. (datetime.time(5, 30), None)
  1906. >>> IS_TIME()('5:30 pm')
  1907. (datetime.time(17, 30), None)
  1908. >>> IS_TIME()('5:30 whatever')
  1909. ('5:30 whatever', 'enter time as hh:mm:ss (seconds, am, pm optional)')
  1910. >>> IS_TIME()('5:30 20')
  1911. ('5:30 20', 'enter time as hh:mm:ss (seconds, am, pm optional)')
  1912. >>> IS_TIME()('24:30')
  1913. ('24:30', 'enter time as hh:mm:ss (seconds, am, pm optional)')
  1914. >>> IS_TIME()('21:60')
  1915. ('21:60', 'enter time as hh:mm:ss (seconds, am, pm optional)')
  1916. >>> IS_TIME()('21:30::')
  1917. ('21:30::', 'enter time as hh:mm:ss (seconds, am, pm optional)')
  1918. >>> IS_TIME()('')
  1919. ('', 'enter time as hh:mm:ss (seconds, am, pm optional)')
  1920. """
  1921. def __init__(self, error_message='enter time as hh:mm:ss (seconds, am, pm optional)'):
  1922. self.error_message = error_message
  1923. def __call__(self, value):
  1924. try:
  1925. ivalue = value
  1926. value = regex_time.match(value.lower())
  1927. (h, m, s) = (int(value.group('h')), 0, 0)
  1928. if not value.group('m') is None:
  1929. m = int(value.group('m'))
  1930. if not value.group('s') is None:
  1931. s = int(value.group('s'))
  1932. if value.group('d') == 'pm' and 0 < h < 12:
  1933. h = h + 12
  1934. if not (h in range(24) and m in range(60) and s
  1935. in range(60)):
  1936. raise ValueError('Hours or minutes or seconds are outside of allowed range')
  1937. value = datetime.time(h, m, s)
  1938. return (value, None)
  1939. except AttributeError:
  1940. pass
  1941. except ValueError:
  1942. pass
  1943. return (ivalue, translate(self.error_message))
  1944. class IS_DATE(Validator):
  1945. """
  1946. example::
  1947. INPUT(_type='text', _name='name', requires=IS_DATE())
  1948. date has to be in the ISO8960 format YYYY-MM-DD
  1949. """
  1950. def __init__(self, format='%Y-%m-%d',
  1951. error_message='enter date as %(format)s'):
  1952. self.format = translate(format)
  1953. self.error_message = str(error_message)
  1954. self.extremes = {}
  1955. def __call__(self, value):
  1956. if isinstance(value, datetime.date):
  1957. return (value, None)
  1958. try:
  1959. (y, m, d, hh, mm, ss, t0, t1, t2) = \
  1960. time.strptime(value, str(self.format))
  1961. value = datetime.date(y, m, d)
  1962. return (value, None)
  1963. except:
  1964. self.extremes.update(IS_DATETIME.nice(self.format))
  1965. return (value, translate(self.error_message) % self.extremes)
  1966. def formatter(self, value):
  1967. if value is None:
  1968. return None
  1969. format = self.format
  1970. year = value.year
  1971. y = '%.4i' % year
  1972. format = format.replace('%y', y[-2:])
  1973. format = format.replace('%Y', y)
  1974. if year < 1900:
  1975. year = 2000
  1976. d = datetime.date(year, value.month, value.day)
  1977. return d.strftime(format)
  1978. class IS_DATETIME(Validator):
  1979. """
  1980. example::
  1981. INPUT(_type='text', _name='name', requires=IS_DATETIME())
  1982. datetime has to be in the ISO8960 format YYYY-MM-DD hh:mm:ss
  1983. """
  1984. isodatetime = '%Y-%m-%d %H:%M:%S'
  1985. @staticmethod
  1986. def nice(format):
  1987. code = (('%Y', '1963'),
  1988. ('%y', '63'),
  1989. ('%d', '28'),
  1990. ('%m', '08'),
  1991. ('%b', 'Aug'),
  1992. ('%B', 'August'),
  1993. ('%H', '14'),
  1994. ('%I', '02'),
  1995. ('%p', 'PM'),
  1996. ('%M', '30'),
  1997. ('%S', '59'))
  1998. for (a, b) in code:
  1999. format = format.replace(a, b)
  2000. return dict(format=format)
  2001. def __init__(self, format='%Y-%m-%d %H:%M:%S',
  2002. error_message='enter date and time as %(format)s'):
  2003. self.format = translate(format)
  2004. self.error_message = str(error_message)
  2005. self.extremes = {}
  2006. def __call__(self, value):
  2007. if isinstance(value, datetime.datetime):
  2008. return (value, None)
  2009. try:
  2010. (y, m, d, hh, mm, ss, t0, t1, t2) = \
  2011. time.strptime(value, str(self.format))
  2012. value = datetime.datetime(y, m, d, hh, mm, ss)
  2013. return (value, None)
  2014. except:
  2015. self.extremes.update(IS_DATETIME.nice(self.format))
  2016. return (value, translate(self.error_message) % self.extremes)
  2017. def formatter(self, value):
  2018. if value is None:
  2019. return None
  2020. format = self.format
  2021. year = value.year
  2022. y = '%.4i' % year
  2023. format = format.replace('%y', y[-2:])
  2024. format = format.replace('%Y', y)
  2025. if year < 1900:
  2026. year = 2000
  2027. d = datetime.datetime(year, value.month, value.day,
  2028. value.hour, value.minute, value.second)
  2029. return d.strftime(format)
  2030. class IS_DATE_IN_RANGE(IS_DATE):
  2031. """
  2032. example::
  2033. >>> v = IS_DATE_IN_RANGE(minimum=datetime.date(2008,1,1), \
  2034. maximum=datetime.date(2009,12,31), \
  2035. format="%m/%d/%Y",error_message="oops")
  2036. >>> v('03/03/2008')
  2037. (datetime.date(2008, 3, 3), None)
  2038. >>> v('03/03/2010')
  2039. (datetime.date(2010, 3, 3), 'oops')
  2040. >>> v(datetime.date(2008,3,3))
  2041. (datetime.date(2008, 3, 3), None)
  2042. >>> v(datetime.date(2010,3,3))
  2043. (datetime.date(2010, 3, 3), 'oops')
  2044. """
  2045. def __init__(self,
  2046. minimum=None,
  2047. maximum=None,
  2048. format='%Y-%m-%d',
  2049. error_message=None):
  2050. self.minimum = minimum
  2051. self.maximum = maximum
  2052. if error_message is None:
  2053. if minimum is None:
  2054. error_message = "enter date on or before %(max)s"
  2055. elif maximum is None:
  2056. error_message = "enter date on or after %(min)s"
  2057. else:
  2058. error_message = "enter date in range %(min)s %(max)s"
  2059. IS_DATE.__init__(self,
  2060. format=format,
  2061. error_message=error_message)
  2062. self.extremes = dict(min=minimum, max=maximum)
  2063. def __call__(self, value):
  2064. (value, msg) = IS_DATE.__call__(self, value)
  2065. if msg is not None:
  2066. return (value, msg)
  2067. if self.minimum and self.minimum > value:
  2068. return (value, translate(self.error_message) % self.extremes)
  2069. if self.maximum and value > self.maximum:
  2070. return (value, translate(self.error_message) % self.extremes)
  2071. return (value, None)
  2072. class IS_DATETIME_IN_RANGE(IS_DATETIME):
  2073. """
  2074. example::
  2075. >>> v = IS_DATETIME_IN_RANGE(\
  2076. minimum=datetime.datetime(2008,1,1,12,20), \
  2077. maximum=datetime.datetime(2009,12,31,12,20), \
  2078. format="%m/%d/%Y %H:%M",error_message="oops")
  2079. >>> v('03/03/2008 12:40')
  2080. (datetime.datetime(2008, 3, 3, 12, 40), None)
  2081. >>> v('03/03/2010 10:34')
  2082. (datetime.datetime(2010, 3, 3, 10, 34), 'oops')
  2083. >>> v(datetime.datetime(2008,3,3,0,0))
  2084. (datetime.datetime(2008, 3, 3, 0, 0), None)
  2085. >>> v(datetime.datetime(2010,3,3,0,0))
  2086. (datetime.datetime(2010, 3, 3, 0, 0), 'oops')
  2087. """
  2088. def __init__(self,
  2089. minimum=None,
  2090. maximum=None,
  2091. format='%Y-%m-%d %H:%M:%S',
  2092. error_message=None):
  2093. self.minimum = minimum
  2094. self.maximum = maximum
  2095. if error_message is None:
  2096. if minimum is None:
  2097. error_message = "enter date and time on or before %(max)s"
  2098. elif maximum is None:
  2099. error_message = "enter date and time on or after %(min)s"
  2100. else:
  2101. error_message = "enter date and time in range %(min)s %(max)s"
  2102. IS_DATETIME.__init__(self,
  2103. format=format,
  2104. error_message=error_message)
  2105. self.extremes = dict(min=minimum, max=maximum)
  2106. def __call__(self, value):
  2107. (value, msg) = IS_DATETIME.__call__(self, value)
  2108. if msg is not None:
  2109. return (value, msg)
  2110. if self.minimum and self.minimum > value:
  2111. return (value, translate(self.error_message) % self.extremes)
  2112. if self.maximum and value > self.maximum:
  2113. return (value, translate(self.error_message) % self.extremes)
  2114. return (value, None)
  2115. class IS_LIST_OF(Validator):
  2116. def __init__(self, other=None, minimum=0, maximum=100,
  2117. error_message=None):
  2118. self.other = other
  2119. self.minimum = minimum
  2120. self.maximum = maximum
  2121. self.error_message = error_message or "enter between %(min)g and %(max)g values"
  2122. def __call__(self, value):
  2123. ivalue = value
  2124. if not isinstance(value, list):
  2125. ivalue = [ivalue]
  2126. if not self.minimum is None and len(ivalue) < self.minimum:
  2127. return (ivalue, translate(self.error_message) % dict(min=self.minimum, max=self.maximum))
  2128. if not self.maximum is None and len(ivalue) > self.maximum:
  2129. return (ivalue, translate(self.error_message) % dict(min=self.minimum, max=self.maximum))
  2130. new_value = []
  2131. if self.other:
  2132. for item in ivalue:
  2133. if item.strip():
  2134. (v, e) = self.other(item)
  2135. if e:
  2136. return (ivalue, e)
  2137. else:
  2138. new_value.append(v)
  2139. ivalue = new_value
  2140. return (ivalue, None)
  2141. class IS_LOWER(Validator):
  2142. """
  2143. convert to lower case
  2144. >>> IS_LOWER()('ABC')
  2145. ('abc', None)
  2146. >>> IS_LOWER()('Ñ')
  2147. ('\\xc3\\xb1', None)
  2148. """
  2149. def __call__(self, value):
  2150. return (value.decode('utf8').lower().encode('utf8'), None)
  2151. class IS_UPPER(Validator):
  2152. """
  2153. convert to upper case
  2154. >>> IS_UPPER()('abc')
  2155. ('ABC', None)
  2156. >>> IS_UPPER()('ñ')
  2157. ('\\xc3\\x91', None)
  2158. """
  2159. def __call__(self, value):
  2160. return (value.decode('utf8').upper().encode('utf8'), None)
  2161. def urlify(value, maxlen=80, keep_underscores=False):
  2162. """
  2163. Convert incoming string to a simplified ASCII subset.
  2164. if (keep_underscores): underscores are retained in the string
  2165. else: underscores are translated to hyphens (default)
  2166. """
  2167. s = value.lower() # to lowercase
  2168. s = s.decode('utf-8') # to utf-8
  2169. s = unicodedata.normalize('NFKD', s) # normalize eg è => e, ñ => n
  2170. s = s.encode('ASCII', 'ignore') # encode as ASCII
  2171. s = re.sub('&\w+;', '', s) # strip html entities
  2172. if keep_underscores:
  2173. s = re.sub('\s+', '-', s) # whitespace to hyphens
  2174. s = re.sub('[^\w\-]', '', s)
  2175. # strip all but alphanumeric/underscore/hyphen
  2176. else:
  2177. s = re.sub('[\s_]+', '-', s) # whitespace & underscores to hyphens
  2178. s = re.sub('[^a-z0-9\-]', '', s) # strip all but alphanumeric/hyphen
  2179. s = re.sub('[-_][-_]+', '-', s) # collapse strings of hyphens
  2180. s = s.strip('-') # remove leading and trailing hyphens
  2181. return s[:maxlen] # enforce maximum length
  2182. class IS_SLUG(Validator):
  2183. """
  2184. convert arbitrary text string to a slug
  2185. >>> IS_SLUG()('abc123')
  2186. ('abc123', None)
  2187. >>> IS_SLUG()('ABC123')
  2188. ('abc123', None)
  2189. >>> IS_SLUG()('abc-123')
  2190. ('abc-123', None)
  2191. >>> IS_SLUG()('abc--123')
  2192. ('abc-123', None)
  2193. >>> IS_SLUG()('abc 123')
  2194. ('abc-123', None)
  2195. >>> IS_SLUG()('abc\t_123')
  2196. ('abc-123', None)
  2197. >>> IS_SLUG()('-abc-')
  2198. ('abc', None)
  2199. >>> IS_SLUG()('--a--b--_ -c--')
  2200. ('a-b-c', None)
  2201. >>> IS_SLUG()('abc&amp;123')
  2202. ('abc123', None)
  2203. >>> IS_SLUG()('abc&amp;123&amp;def')
  2204. ('abc123def', None)
  2205. >>> IS_SLUG()('ñ')
  2206. ('n', None)
  2207. >>> IS_SLUG(maxlen=4)('abc123')
  2208. ('abc1', None)
  2209. >>> IS_SLUG()('abc_123')
  2210. ('abc-123', None)
  2211. >>> IS_SLUG(keep_underscores=False)('abc_123')
  2212. ('abc-123', None)
  2213. >>> IS_SLUG(keep_underscores=True)('abc_123')
  2214. ('abc_123', None)
  2215. >>> IS_SLUG(check=False)('abc')
  2216. ('abc', None)
  2217. >>> IS_SLUG(check=True)('abc')
  2218. ('abc', None)
  2219. >>> IS_SLUG(check=False)('a bc')
  2220. ('a-bc', None)
  2221. >>> IS_SLUG(check=True)('a bc')
  2222. ('a bc', 'must be slug')
  2223. """
  2224. @staticmethod
  2225. def urlify(value, maxlen=80, keep_underscores=False):
  2226. return urlify(value, maxlen, keep_underscores)
  2227. def __init__(self, maxlen=80, check=False, error_message='must be slug', keep_underscores=False):
  2228. self.maxlen = maxlen
  2229. self.check = check
  2230. self.error_message = error_message
  2231. self.keep_underscores = keep_underscores
  2232. def __call__(self, value):
  2233. if self.check and value != urlify(value, self.maxlen, self.keep_underscores):
  2234. return (value, translate(self.error_message))
  2235. return (urlify(value, self.maxlen, self.keep_underscores), None)
  2236. class IS_EMPTY_OR(Validator):
  2237. """
  2238. dummy class for testing IS_EMPTY_OR
  2239. >>> IS_EMPTY_OR(IS_EMAIL())('abc@def.com')
  2240. ('abc@def.com', None)
  2241. >>> IS_EMPTY_OR(IS_EMAIL())(' ')
  2242. (None, None)
  2243. >>> IS_EMPTY_OR(IS_EMAIL(), null='abc')(' ')
  2244. ('abc', None)
  2245. >>> IS_EMPTY_OR(IS_EMAIL(), null='abc', empty_regex='def')('def')
  2246. ('abc', None)
  2247. >>> IS_EMPTY_OR(IS_EMAIL())('abc')
  2248. ('abc', 'enter a valid email address')
  2249. >>> IS_EMPTY_OR(IS_EMAIL())(' abc ')
  2250. ('abc', 'enter a valid email address')
  2251. """
  2252. def __init__(self, other, null=None, empty_regex=None):
  2253. (self.other, self.null) = (other, null)
  2254. if empty_regex is not None:
  2255. self.empty_regex = re.compile(empty_regex)
  2256. else:
  2257. self.empty_regex = None
  2258. if hasattr(other, 'multiple'):
  2259. self.multiple = other.multiple
  2260. if hasattr(other, 'options'):
  2261. self.options = self._options
  2262. def _options(self):
  2263. options = self.other.options()
  2264. if (not options or options[0][0] != '') and not self.multiple:
  2265. options.insert(0, ('', ''))
  2266. return options
  2267. def set_self_id(self, id):
  2268. if isinstance(self.other, (list, tuple)):
  2269. for item in self.other:
  2270. if hasattr(item, 'set_self_id'):
  2271. item.set_self_id(id)
  2272. else:
  2273. if hasattr(self.other, 'set_self_id'):
  2274. self.other.set_self_id(id)
  2275. def __call__(self, value):
  2276. value, empty = is_empty(value, empty_regex=self.empty_regex)
  2277. if empty:
  2278. return (self.null, None)
  2279. if isinstance(self.other, (list, tuple)):
  2280. error = None
  2281. for item in self.other:
  2282. value, error = item(value)
  2283. if error:
  2284. break
  2285. return value, error
  2286. else:
  2287. return self.other(value)
  2288. def formatter(self, value):
  2289. if hasattr(self.other, 'formatter'):
  2290. return self.other.formatter(value)
  2291. return value
  2292. IS_NULL_OR = IS_EMPTY_OR # for backward compatibility
  2293. class CLEANUP(Validator):
  2294. """
  2295. example::
  2296. INPUT(_type='text', _name='name', requires=CLEANUP())
  2297. removes special characters on validation
  2298. """
  2299. REGEX_CLEANUP = re.compile('[^\x09\x0a\x0d\x20-\x7e]')
  2300. def __init__(self, regex=None):
  2301. self.regex = self.REGEX_CLEANUP if regex is None \
  2302. else re.compile(regex)
  2303. def __call__(self, value):
  2304. v = self.regex.sub('', str(value).strip())
  2305. return (v, None)
  2306. class LazyCrypt(object):
  2307. """
  2308. Stores a lazy password hash
  2309. """
  2310. def __init__(self, crypt, password):
  2311. """
  2312. crypt is an instance of the CRYPT validator,
  2313. password is the password as inserted by the user
  2314. """
  2315. self.crypt = crypt
  2316. self.password = password
  2317. self.crypted = None
  2318. def __str__(self):
  2319. """
  2320. Encrypted self.password and caches it in self.crypted.
  2321. If self.crypt.salt the output is in the format <algorithm>$<salt>$<hash>
  2322. Try get the digest_alg from the key (if it exists)
  2323. else assume the default digest_alg. If not key at all, set key=''
  2324. If a salt is specified use it, if salt is True, set salt to uuid
  2325. (this should all be backward compatible)
  2326. Options:
  2327. key = 'uuid'
  2328. key = 'md5:uuid'
  2329. key = 'sha512:uuid'
  2330. ...
  2331. key = 'pbkdf2(1000,64,sha512):uuid' 1000 iterations and 64 chars length
  2332. """
  2333. if self.crypted:
  2334. return self.crypted
  2335. if self.crypt.key:
  2336. if ':' in self.crypt.key:
  2337. digest_alg, key = self.crypt.key.split(':', 1)
  2338. else:
  2339. digest_alg, key = self.crypt.digest_alg, self.crypt.key
  2340. else:
  2341. digest_alg, key = self.crypt.digest_alg, ''
  2342. if self.crypt.salt:
  2343. if self.crypt.salt == True:
  2344. salt = str(web2py_uuid()).replace('-', '')[-16:]
  2345. else:
  2346. salt = self.crypt.salt
  2347. else:
  2348. salt = ''
  2349. hashed = simple_hash(self.password, key, salt, digest_alg)
  2350. self.crypted = '%s$%s$%s' % (digest_alg, salt, hashed)
  2351. return self.crypted
  2352. def __eq__(self, stored_password):
  2353. """
  2354. compares the current lazy crypted password with a stored password
  2355. """
  2356. # LazyCrypt objects comparison
  2357. if isinstance(stored_password, self.__class__):
  2358. return ((self is stored_password) or
  2359. ((self.crypt.key == stored_password.crypt.key) and
  2360. (self.password == stored_password.password)))
  2361. if self.crypt.key:
  2362. if ':' in self.crypt.key:
  2363. key = self.crypt.key.split(':')[1]
  2364. else:
  2365. key = self.crypt.key
  2366. else:
  2367. key = ''
  2368. if stored_password is None:
  2369. return False
  2370. elif stored_password.count('$') == 2:
  2371. (digest_alg, salt, hash) = stored_password.split('$')
  2372. h = simple_hash(self.password, key, salt, digest_alg)
  2373. temp_pass = '%s$%s$%s' % (digest_alg, salt, h)
  2374. else: # no salting
  2375. # guess digest_alg
  2376. digest_alg = DIGEST_ALG_BY_SIZE.get(len(stored_password), None)
  2377. if not digest_alg:
  2378. return False
  2379. else:
  2380. temp_pass = simple_hash(self.password, key, '', digest_alg)
  2381. return temp_pass == stored_password
  2382. class CRYPT(object):
  2383. """
  2384. example::
  2385. INPUT(_type='text', _name='name', requires=CRYPT())
  2386. encodes the value on validation with a digest.
  2387. If no arguments are provided CRYPT uses the MD5 algorithm.
  2388. If the key argument is provided the HMAC+MD5 algorithm is used.
  2389. If the digest_alg is specified this is used to replace the
  2390. MD5 with, for example, SHA512. The digest_alg can be
  2391. the name of a hashlib algorithm as a string or the algorithm itself.
  2392. min_length is the minimal password length (default 4) - IS_STRONG for serious security
  2393. error_message is the message if password is too short
  2394. Notice that an empty password is accepted but invalid. It will not allow login back.
  2395. Stores junk as hashed password.
  2396. Specify an algorithm or by default we will use sha512.
  2397. Typical available algorithms:
  2398. md5, sha1, sha224, sha256, sha384, sha512
  2399. If salt, it hashes a password with a salt.
  2400. If salt is True, this method will automatically generate one.
  2401. Either case it returns an encrypted password string in the following format:
  2402. <algorithm>$<salt>$<hash>
  2403. Important: hashed password is returned as a LazyCrypt object and computed only if needed.
  2404. The LasyCrypt object also knows how to compare itself with an existing salted password
  2405. Supports standard algorithms
  2406. >>> for alg in ('md5','sha1','sha256','sha384','sha512'):
  2407. ... print str(CRYPT(digest_alg=alg,salt=True)('test')[0])
  2408. md5$...$...
  2409. sha1$...$...
  2410. sha256$...$...
  2411. sha384$...$...
  2412. sha512$...$...
  2413. The syntax is always alg$salt$hash
  2414. Supports for pbkdf2
  2415. >>> alg = 'pbkdf2(1000,20,sha512)'
  2416. >>> print str(CRYPT(digest_alg=alg,salt=True)('test')[0])
  2417. pbkdf2(1000,20,sha512)$...$...
  2418. An optional hmac_key can be specified and it is used as salt prefix
  2419. >>> a = str(CRYPT(digest_alg='md5',key='mykey',salt=True)('test')[0])
  2420. >>> print a
  2421. md5$...$...
  2422. Even if the algorithm changes the hash can still be validated
  2423. >>> CRYPT(digest_alg='sha1',key='mykey',salt=True)('test')[0] == a
  2424. True
  2425. If no salt is specified CRYPT can guess the algorithms from length:
  2426. >>> a = str(CRYPT(digest_alg='sha1',salt=False)('test')[0])
  2427. >>> a
  2428. 'sha1$$a94a8fe5ccb19ba61c4c0873d391e987982fbbd3'
  2429. >>> CRYPT(digest_alg='sha1',salt=False)('test')[0] == a
  2430. True
  2431. >>> CRYPT(digest_alg='sha1',salt=False)('test')[0] == a[6:]
  2432. True
  2433. >>> CRYPT(digest_alg='md5',salt=False)('test')[0] == a
  2434. True
  2435. >>> CRYPT(digest_alg='md5',salt=False)('test')[0] == a[6:]
  2436. True
  2437. """
  2438. def __init__(self,
  2439. key=None,
  2440. digest_alg='pbkdf2(1000,20,sha512)',
  2441. min_length=0,
  2442. error_message='too short', salt=True):
  2443. """
  2444. important, digest_alg='md5' is not the default hashing algorithm for
  2445. web2py. This is only an example of usage of this function.
  2446. The actual hash algorithm is determined from the key which is
  2447. generated by web2py in tools.py. This defaults to hmac+sha512.
  2448. """
  2449. self.key = key
  2450. self.digest_alg = digest_alg
  2451. self.min_length = min_length
  2452. self.error_message = error_message
  2453. self.salt = salt
  2454. def __call__(self, value):
  2455. if len(value) < self.min_length:
  2456. return ('', translate(self.error_message))
  2457. return (LazyCrypt(self, value), None)
  2458. # entropy calculator for IS_STRONG
  2459. #
  2460. lowerset = frozenset(unicode('abcdefghijklmnopqrstuvwxyz'))
  2461. upperset = frozenset(unicode('ABCDEFGHIJKLMNOPQRSTUVWXYZ'))
  2462. numberset = frozenset(unicode('0123456789'))
  2463. sym1set = frozenset(unicode('!@#$%^&*()'))
  2464. sym2set = frozenset(unicode('~`-_=+[]{}\\|;:\'",.<>?/'))
  2465. otherset = frozenset(
  2466. unicode('0123456789abcdefghijklmnopqrstuvwxyz')) # anything else
  2467. def calc_entropy(string):
  2468. " calculate a simple entropy for a given string "
  2469. import math
  2470. alphabet = 0 # alphabet size
  2471. other = set()
  2472. seen = set()
  2473. lastset = None
  2474. if isinstance(string, str):
  2475. string = unicode(string, encoding='utf8')
  2476. for c in string:
  2477. # classify this character
  2478. inset = otherset
  2479. for cset in (lowerset, upperset, numberset, sym1set, sym2set):
  2480. if c in cset:
  2481. inset = cset
  2482. break
  2483. # calculate effect of character on alphabet size
  2484. if inset not in seen:
  2485. seen.add(inset)
  2486. alphabet += len(inset) # credit for a new character set
  2487. elif c not in other:
  2488. alphabet += 1 # credit for unique characters
  2489. other.add(c)
  2490. if inset is not lastset:
  2491. alphabet += 1 # credit for set transitions
  2492. lastset = cset
  2493. entropy = len(
  2494. string) * math.log(alphabet) / 0.6931471805599453 # math.log(2)
  2495. return round(entropy, 2)
  2496. class IS_STRONG(object):
  2497. """
  2498. example::
  2499. INPUT(_type='password', _name='passwd',
  2500. requires=IS_STRONG(min=10, special=2, upper=2))
  2501. enforces complexity requirements on a field
  2502. >>> IS_STRONG(es=True)('Abcd1234')
  2503. ('Abcd1234',
  2504. 'Must include at least 1 of the following: ~!@#$%^&*()_+-=?<>,.:;{}[]|')
  2505. >>> IS_STRONG(es=True)('Abcd1234!')
  2506. ('Abcd1234!', None)
  2507. >>> IS_STRONG(es=True, entropy=1)('a')
  2508. ('a', None)
  2509. >>> IS_STRONG(es=True, entropy=1, min=2)('a')
  2510. ('a', 'Minimum length is 2')
  2511. >>> IS_STRONG(es=True, entropy=100)('abc123')
  2512. ('abc123', 'Entropy (32.35) less than required (100)')
  2513. >>> IS_STRONG(es=True, entropy=100)('and')
  2514. ('and', 'Entropy (14.57) less than required (100)')
  2515. >>> IS_STRONG(es=True, entropy=100)('aaa')
  2516. ('aaa', 'Entropy (14.42) less than required (100)')
  2517. >>> IS_STRONG(es=True, entropy=100)('a1d')
  2518. ('a1d', 'Entropy (15.97) less than required (100)')
  2519. >>> IS_STRONG(es=True, entropy=100)('añd')
  2520. ('a\\xc3\\xb1d', 'Entropy (18.13) less than required (100)')
  2521. """
  2522. def __init__(self, min=None, max=None, upper=None, lower=None, number=None,
  2523. entropy=None,
  2524. special=None, specials=r'~!@#$%^&*()_+-=?<>,.:;{}[]|',
  2525. invalid=' "', error_message=None, es=False):
  2526. self.entropy = entropy
  2527. if entropy is None:
  2528. # enforce default requirements
  2529. self.min = 8 if min is None else min
  2530. self.max = max # was 20, but that doesn't make sense
  2531. self.upper = 1 if upper is None else upper
  2532. self.lower = 1 if lower is None else lower
  2533. self.number = 1 if number is None else number
  2534. self.special = 1 if special is None else special
  2535. else:
  2536. # by default, an entropy spec is exclusive
  2537. self.min = min
  2538. self.max = max
  2539. self.upper = upper
  2540. self.lower = lower
  2541. self.number = number
  2542. self.special = special
  2543. self.specials = specials
  2544. self.invalid = invalid
  2545. self.error_message = error_message
  2546. self.estring = es # return error message as string (for doctest)
  2547. def __call__(self, value):
  2548. failures = []
  2549. if value and len(value) == value.count('*') > 4:
  2550. return (value, None)
  2551. if self.entropy is not None:
  2552. entropy = calc_entropy(value)
  2553. if entropy < self.entropy:
  2554. failures.append(translate("Entropy (%(have)s) less than required (%(need)s)")
  2555. % dict(have=entropy, need=self.entropy))
  2556. if type(self.min) == int and self.min > 0:
  2557. if not len(value) >= self.min:
  2558. failures.append(translate("Minimum length is %s") % self.min)
  2559. if type(self.max) == int and self.max > 0:
  2560. if not len(value) <= self.max:
  2561. failures.append(translate("Maximum length is %s") % self.max)
  2562. if type(self.special) == int:
  2563. all_special = [ch in value for ch in self.specials]
  2564. if self.special > 0:
  2565. if not all_special.count(True) >= self.special:
  2566. failures.append(translate("Must include at least %s of the following: %s")
  2567. % (self.special, self.specials))
  2568. if self.invalid:
  2569. all_invalid = [ch in value for ch in self.invalid]
  2570. if all_invalid.count(True) > 0:
  2571. failures.append(translate("May not contain any of the following: %s")
  2572. % self.invalid)
  2573. if type(self.upper) == int:
  2574. all_upper = re.findall("[A-Z]", value)
  2575. if self.upper > 0:
  2576. if not len(all_upper) >= self.upper:
  2577. failures.append(translate("Must include at least %s upper case")
  2578. % str(self.upper))
  2579. else:
  2580. if len(all_upper) > 0:
  2581. failures.append(
  2582. translate("May not include any upper case letters"))
  2583. if type(self.lower) == int:
  2584. all_lower = re.findall("[a-z]", value)
  2585. if self.lower > 0:
  2586. if not len(all_lower) >= self.lower:
  2587. failures.append(translate("Must include at least %s lower case")
  2588. % str(self.lower))
  2589. else:
  2590. if len(all_lower) > 0:
  2591. failures.append(
  2592. translate("May not include any lower case letters"))
  2593. if type(self.number) == int:
  2594. all_number = re.findall("[0-9]", value)
  2595. if self.number > 0:
  2596. numbers = "number"
  2597. if self.number > 1:
  2598. numbers = "numbers"
  2599. if not len(all_number) >= self.number:
  2600. failures.append(translate("Must include at least %s %s")
  2601. % (str(self.number), numbers))
  2602. else:
  2603. if len(all_number) > 0:
  2604. failures.append(translate("May not include any numbers"))
  2605. if len(failures) == 0:
  2606. return (value, None)
  2607. if not self.error_message:
  2608. if self.estring:
  2609. return (value, '|'.join(failures))
  2610. from html import XML
  2611. return (value, XML('<br />'.join(failures)))
  2612. else:
  2613. return (value, translate(self.error_message))
  2614. class IS_IN_SUBSET(IS_IN_SET):
  2615. REGEX_W = re.compile('\w+')
  2616. def __init__(self, *a, **b):
  2617. IS_IN_SET.__init__(self, *a, **b)
  2618. def __call__(self, value):
  2619. values = self.REGEX_W.findall(str(value))
  2620. failures = [x for x in values if IS_IN_SET.__call__(self, x)[1]]
  2621. if failures:
  2622. return (value, translate(self.error_message))
  2623. return (value, None)
  2624. class IS_IMAGE(Validator):
  2625. """
  2626. Checks if file uploaded through file input was saved in one of selected
  2627. image formats and has dimensions (width and height) within given boundaries.
  2628. Does *not* check for maximum file size (use IS_LENGTH for that). Returns
  2629. validation failure if no data was uploaded.
  2630. Supported file formats: BMP, GIF, JPEG, PNG.
  2631. Code parts taken from
  2632. http://mail.python.org/pipermail/python-list/2007-June/617126.html
  2633. Arguments:
  2634. extensions: iterable containing allowed *lowercase* image file extensions
  2635. ('jpg' extension of uploaded file counts as 'jpeg')
  2636. maxsize: iterable containing maximum width and height of the image
  2637. minsize: iterable containing minimum width and height of the image
  2638. Use (-1, -1) as minsize to pass image size check.
  2639. Examples::
  2640. #Check if uploaded file is in any of supported image formats:
  2641. INPUT(_type='file', _name='name', requires=IS_IMAGE())
  2642. #Check if uploaded file is either JPEG or PNG:
  2643. INPUT(_type='file', _name='name',
  2644. requires=IS_IMAGE(extensions=('jpeg', 'png')))
  2645. #Check if uploaded file is PNG with maximum size of 200x200 pixels:
  2646. INPUT(_type='file', _name='name',
  2647. requires=IS_IMAGE(extensions=('png'), maxsize=(200, 200)))
  2648. """
  2649. def __init__(self,
  2650. extensions=('bmp', 'gif', 'jpeg', 'png'),
  2651. maxsize=(10000, 10000),
  2652. minsize=(0, 0),
  2653. error_message='invalid image'):
  2654. self.extensions = extensions
  2655. self.maxsize = maxsize
  2656. self.minsize = minsize
  2657. self.error_message = error_message
  2658. def __call__(self, value):
  2659. try:
  2660. extension = value.filename.rfind('.')
  2661. assert extension >= 0
  2662. extension = value.filename[extension + 1:].lower()
  2663. if extension == 'jpg':
  2664. extension = 'jpeg'
  2665. assert extension in self.extensions
  2666. if extension == 'bmp':
  2667. width, height = self.__bmp(value.file)
  2668. elif extension == 'gif':
  2669. width, height = self.__gif(value.file)
  2670. elif extension == 'jpeg':
  2671. width, height = self.__jpeg(value.file)
  2672. elif extension == 'png':
  2673. width, height = self.__png(value.file)
  2674. else:
  2675. width = -1
  2676. height = -1
  2677. assert self.minsize[0] <= width <= self.maxsize[0] \
  2678. and self.minsize[1] <= height <= self.maxsize[1]
  2679. value.file.seek(0)
  2680. return (value, None)
  2681. except:
  2682. return (value, translate(self.error_message))
  2683. def __bmp(self, stream):
  2684. if stream.read(2) == 'BM':
  2685. stream.read(16)
  2686. return struct.unpack("<LL", stream.read(8))
  2687. return (-1, -1)
  2688. def __gif(self, stream):
  2689. if stream.read(6) in ('GIF87a', 'GIF89a'):
  2690. stream = stream.read(5)
  2691. if len(stream) == 5:
  2692. return tuple(struct.unpack("<HHB", stream)[:-1])
  2693. return (-1, -1)
  2694. def __jpeg(self, stream):
  2695. if stream.read(2) == '\xFF\xD8':
  2696. while True:
  2697. (marker, code, length) = struct.unpack("!BBH", stream.read(4))
  2698. if marker != 0xFF:
  2699. break
  2700. elif code >= 0xC0 and code <= 0xC3:
  2701. return tuple(reversed(
  2702. struct.unpack("!xHH", stream.read(5))))
  2703. else:
  2704. stream.read(length - 2)
  2705. return (-1, -1)
  2706. def __png(self, stream):
  2707. if stream.read(8) == '\211PNG\r\n\032\n':
  2708. stream.read(4)
  2709. if stream.read(4) == "IHDR":
  2710. return struct.unpack("!LL", stream.read(8))
  2711. return (-1, -1)
  2712. class IS_UPLOAD_FILENAME(Validator):
  2713. """
  2714. Checks if name and extension of file uploaded through file input matches
  2715. given criteria.
  2716. Does *not* ensure the file type in any way. Returns validation failure
  2717. if no data was uploaded.
  2718. Arguments::
  2719. filename: filename (before dot) regex
  2720. extension: extension (after dot) regex
  2721. lastdot: which dot should be used as a filename / extension separator:
  2722. True means last dot, eg. file.png -> file / png
  2723. False means first dot, eg. file.tar.gz -> file / tar.gz
  2724. case: 0 - keep the case, 1 - transform the string into lowercase (default),
  2725. 2 - transform the string into uppercase
  2726. If there is no dot present, extension checks will be done against empty
  2727. string and filename checks against whole value.
  2728. Examples::
  2729. #Check if file has a pdf extension (case insensitive):
  2730. INPUT(_type='file', _name='name',
  2731. requires=IS_UPLOAD_FILENAME(extension='pdf'))
  2732. #Check if file has a tar.gz extension and name starting with backup:
  2733. INPUT(_type='file', _name='name',
  2734. requires=IS_UPLOAD_FILENAME(filename='backup.*',
  2735. extension='tar.gz', lastdot=False))
  2736. #Check if file has no extension and name matching README
  2737. #(case sensitive):
  2738. INPUT(_type='file', _name='name',
  2739. requires=IS_UPLOAD_FILENAME(filename='^README$',
  2740. extension='^$', case=0))
  2741. """
  2742. def __init__(self, filename=None, extension=None, lastdot=True, case=1,
  2743. error_message='enter valid filename'):
  2744. if isinstance(filename, str):
  2745. filename = re.compile(filename)
  2746. if isinstance(extension, str):
  2747. extension = re.compile(extension)
  2748. self.filename = filename
  2749. self.extension = extension
  2750. self.lastdot = lastdot
  2751. self.case = case
  2752. self.error_message = error_message
  2753. def __call__(self, value):
  2754. try:
  2755. string = value.filename
  2756. except:
  2757. return (value, translate(self.error_message))
  2758. if self.case == 1:
  2759. string = string.lower()
  2760. elif self.case == 2:
  2761. string = string.upper()
  2762. if self.lastdot:
  2763. dot = string.rfind('.')
  2764. else:
  2765. dot = string.find('.')
  2766. if dot == -1:
  2767. dot = len(string)
  2768. if self.filename and not self.filename.match(string[:dot]):
  2769. return (value, translate(self.error_message))
  2770. elif self.extension and not self.extension.match(string[dot + 1:]):
  2771. return (value, translate(self.error_message))
  2772. else:
  2773. return (value, None)
  2774. class IS_IPV4(Validator):
  2775. """
  2776. Checks if field's value is an IP version 4 address in decimal form. Can
  2777. be set to force addresses from certain range.
  2778. IPv4 regex taken from: http://regexlib.com/REDetails.aspx?regexp_id=1411
  2779. Arguments:
  2780. minip: lowest allowed address; accepts:
  2781. str, eg. 192.168.0.1
  2782. list or tuple of octets, eg. [192, 168, 0, 1]
  2783. maxip: highest allowed address; same as above
  2784. invert: True to allow addresses only from outside of given range; note
  2785. that range boundaries are not matched this way
  2786. is_localhost: localhost address treatment:
  2787. None (default): indifferent
  2788. True (enforce): query address must match localhost address
  2789. (127.0.0.1)
  2790. False (forbid): query address must not match localhost
  2791. address
  2792. is_private: same as above, except that query address is checked against
  2793. two address ranges: 172.16.0.0 - 172.31.255.255 and
  2794. 192.168.0.0 - 192.168.255.255
  2795. is_automatic: same as above, except that query address is checked against
  2796. one address range: 169.254.0.0 - 169.254.255.255
  2797. Minip and maxip may also be lists or tuples of addresses in all above
  2798. forms (str, int, list / tuple), allowing setup of multiple address ranges:
  2799. minip = (minip1, minip2, ... minipN)
  2800. | | |
  2801. | | |
  2802. maxip = (maxip1, maxip2, ... maxipN)
  2803. Longer iterable will be truncated to match length of shorter one.
  2804. Examples::
  2805. #Check for valid IPv4 address:
  2806. INPUT(_type='text', _name='name', requires=IS_IPV4())
  2807. #Check for valid IPv4 address belonging to specific range:
  2808. INPUT(_type='text', _name='name',
  2809. requires=IS_IPV4(minip='100.200.0.0', maxip='100.200.255.255'))
  2810. #Check for valid IPv4 address belonging to either 100.110.0.0 -
  2811. #100.110.255.255 or 200.50.0.0 - 200.50.0.255 address range:
  2812. INPUT(_type='text', _name='name',
  2813. requires=IS_IPV4(minip=('100.110.0.0', '200.50.0.0'),
  2814. maxip=('100.110.255.255', '200.50.0.255')))
  2815. #Check for valid IPv4 address belonging to private address space:
  2816. INPUT(_type='text', _name='name', requires=IS_IPV4(is_private=True))
  2817. #Check for valid IPv4 address that is not a localhost address:
  2818. INPUT(_type='text', _name='name', requires=IS_IPV4(is_localhost=False))
  2819. >>> IS_IPV4()('1.2.3.4')
  2820. ('1.2.3.4', None)
  2821. >>> IS_IPV4()('255.255.255.255')
  2822. ('255.255.255.255', None)
  2823. >>> IS_IPV4()('1.2.3.4 ')
  2824. ('1.2.3.4 ', 'enter valid IPv4 address')
  2825. >>> IS_IPV4()('1.2.3.4.5')
  2826. ('1.2.3.4.5', 'enter valid IPv4 address')
  2827. >>> IS_IPV4()('123.123')
  2828. ('123.123', 'enter valid IPv4 address')
  2829. >>> IS_IPV4()('1111.2.3.4')
  2830. ('1111.2.3.4', 'enter valid IPv4 address')
  2831. >>> IS_IPV4()('0111.2.3.4')
  2832. ('0111.2.3.4', 'enter valid IPv4 address')
  2833. >>> IS_IPV4()('256.2.3.4')
  2834. ('256.2.3.4', 'enter valid IPv4 address')
  2835. >>> IS_IPV4()('300.2.3.4')
  2836. ('300.2.3.4', 'enter valid IPv4 address')
  2837. >>> IS_IPV4(minip='1.2.3.4', maxip='1.2.3.4')('1.2.3.4')
  2838. ('1.2.3.4', None)
  2839. >>> IS_IPV4(minip='1.2.3.5', maxip='1.2.3.9', error_message='bad ip')('1.2.3.4')
  2840. ('1.2.3.4', 'bad ip')
  2841. >>> IS_IPV4(maxip='1.2.3.4', invert=True)('127.0.0.1')
  2842. ('127.0.0.1', None)
  2843. >>> IS_IPV4(maxip='1.2.3.4', invert=True)('1.2.3.4')
  2844. ('1.2.3.4', 'enter valid IPv4 address')
  2845. >>> IS_IPV4(is_localhost=True)('127.0.0.1')
  2846. ('127.0.0.1', None)
  2847. >>> IS_IPV4(is_localhost=True)('1.2.3.4')
  2848. ('1.2.3.4', 'enter valid IPv4 address')
  2849. >>> IS_IPV4(is_localhost=False)('127.0.0.1')
  2850. ('127.0.0.1', 'enter valid IPv4 address')
  2851. >>> IS_IPV4(maxip='100.0.0.0', is_localhost=True)('127.0.0.1')
  2852. ('127.0.0.1', 'enter valid IPv4 address')
  2853. """
  2854. regex = re.compile(
  2855. '^(([1-9]?\d|1\d\d|2[0-4]\d|25[0-5])\.){3}([1-9]?\d|1\d\d|2[0-4]\d|25[0-5])$')
  2856. numbers = (16777216, 65536, 256, 1)
  2857. localhost = 2130706433
  2858. private = ((2886729728L, 2886795263L), (3232235520L, 3232301055L))
  2859. automatic = (2851995648L, 2852061183L)
  2860. def __init__(
  2861. self,
  2862. minip='0.0.0.0',
  2863. maxip='255.255.255.255',
  2864. invert=False,
  2865. is_localhost=None,
  2866. is_private=None,
  2867. is_automatic=None,
  2868. error_message='enter valid IPv4 address'):
  2869. for n, value in enumerate((minip, maxip)):
  2870. temp = []
  2871. if isinstance(value, str):
  2872. temp.append(value.split('.'))
  2873. elif isinstance(value, (list, tuple)):
  2874. if len(value) == len(filter(lambda item: isinstance(item, int), value)) == 4:
  2875. temp.append(value)
  2876. else:
  2877. for item in value:
  2878. if isinstance(item, str):
  2879. temp.append(item.split('.'))
  2880. elif isinstance(item, (list, tuple)):
  2881. temp.append(item)
  2882. numbers = []
  2883. for item in temp:
  2884. number = 0
  2885. for i, j in zip(self.numbers, item):
  2886. number += i * int(j)
  2887. numbers.append(number)
  2888. if n == 0:
  2889. self.minip = numbers
  2890. else:
  2891. self.maxip = numbers
  2892. self.invert = invert
  2893. self.is_localhost = is_localhost
  2894. self.is_private = is_private
  2895. self.is_automatic = is_automatic
  2896. self.error_message = error_message
  2897. def __call__(self, value):
  2898. if self.regex.match(value):
  2899. number = 0
  2900. for i, j in zip(self.numbers, value.split('.')):
  2901. number += i * int(j)
  2902. ok = False
  2903. for bottom, top in zip(self.minip, self.maxip):
  2904. if self.invert != (bottom <= number <= top):
  2905. ok = True
  2906. if not (self.is_localhost is None or self.is_localhost ==
  2907. (number == self.localhost)):
  2908. ok = False
  2909. if not (self.is_private is None or self.is_private ==
  2910. (sum([number[0] <= number <= number[1] for number in self.private]) > 0)):
  2911. ok = False
  2912. if not (self.is_automatic is None or self.is_automatic ==
  2913. (self.automatic[0] <= number <= self.automatic[1])):
  2914. ok = False
  2915. if ok:
  2916. return (value, None)
  2917. return (value, translate(self.error_message))
  2918. if __name__ == '__main__':
  2919. import doctest
  2920. doctest.testmod(
  2921. optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)