PageRenderTime 51ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 1ms

/gluon/validators.py

https://code.google.com/p/web2py/
Python | 3844 lines | 3559 code | 119 blank | 166 comment | 202 complexity | 4109f78325f1d2a93c9c476e6427b806 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-2-Clause, MIT, BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. #!/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. | This file is part of the web2py Web Framework
  5. | Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
  6. | License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
  7. | Thanks to ga2arch for help with IS_IN_DB and IS_NOT_IN_DB on GAE
  8. Validators
  9. -----------
  10. """
  11. import os
  12. import re
  13. import datetime
  14. import time
  15. import cgi
  16. import urllib
  17. import struct
  18. import decimal
  19. import unicodedata
  20. from cStringIO import StringIO
  21. from gluon.utils import simple_hash, web2py_uuid, DIGEST_ALG_BY_SIZE
  22. from pydal.objects import FieldVirtual, FieldMethod
  23. regex_isint = re.compile('^[+-]?\d+$')
  24. JSONErrors = (NameError, TypeError, ValueError, AttributeError,
  25. KeyError)
  26. try:
  27. import json as simplejson
  28. except ImportError:
  29. from gluon.contrib import simplejson
  30. from gluon.contrib.simplejson.decoder import JSONDecodeError
  31. JSONErrors += (JSONDecodeError,)
  32. __all__ = [
  33. 'ANY_OF',
  34. 'CLEANUP',
  35. 'CRYPT',
  36. 'IS_ALPHANUMERIC',
  37. 'IS_DATE_IN_RANGE',
  38. 'IS_DATE',
  39. 'IS_DATETIME_IN_RANGE',
  40. 'IS_DATETIME',
  41. 'IS_DECIMAL_IN_RANGE',
  42. 'IS_EMAIL',
  43. 'IS_LIST_OF_EMAILS',
  44. 'IS_EMPTY_OR',
  45. 'IS_EXPR',
  46. 'IS_FLOAT_IN_RANGE',
  47. 'IS_IMAGE',
  48. 'IS_IN_DB',
  49. 'IS_IN_SET',
  50. 'IS_INT_IN_RANGE',
  51. 'IS_IPV4',
  52. 'IS_IPV6',
  53. 'IS_IPADDRESS',
  54. 'IS_LENGTH',
  55. 'IS_LIST_OF',
  56. 'IS_LOWER',
  57. 'IS_MATCH',
  58. 'IS_EQUAL_TO',
  59. 'IS_NOT_EMPTY',
  60. 'IS_NOT_IN_DB',
  61. 'IS_NULL_OR',
  62. 'IS_SLUG',
  63. 'IS_STRONG',
  64. 'IS_TIME',
  65. 'IS_UPLOAD_FILENAME',
  66. 'IS_UPPER',
  67. 'IS_URL',
  68. 'IS_JSON',
  69. ]
  70. try:
  71. from globals import current
  72. have_current = True
  73. except ImportError:
  74. have_current = False
  75. def translate(text):
  76. if text is None:
  77. return None
  78. elif isinstance(text, (str, unicode)) and have_current:
  79. if hasattr(current, 'T'):
  80. return str(current.T(text))
  81. return str(text)
  82. def options_sorter(x, y):
  83. return (str(x[1]).upper() > str(y[1]).upper() and 1) or -1
  84. class Validator(object):
  85. """
  86. Root for all validators, mainly for documentation purposes.
  87. Validators are classes used to validate input fields (including forms
  88. generated from database tables).
  89. Here is an example of using a validator with a FORM::
  90. INPUT(_name='a', requires=IS_INT_IN_RANGE(0, 10))
  91. Here is an example of how to require a validator for a table field::
  92. db.define_table('person', SQLField('name'))
  93. db.person.name.requires=IS_NOT_EMPTY()
  94. Validators are always assigned using the requires attribute of a field. A
  95. field can have a single validator or multiple validators. Multiple
  96. validators are made part of a list::
  97. db.person.name.requires=[IS_NOT_EMPTY(), IS_NOT_IN_DB(db, 'person.id')]
  98. Validators are called by the function accepts on a FORM or other HTML
  99. helper object that contains a form. They are always called in the order in
  100. which they are listed.
  101. Built-in validators have constructors that take the optional argument error
  102. message which allows you to change the default error message.
  103. Here is an example of a validator on a database table::
  104. db.person.name.requires=IS_NOT_EMPTY(error_message=T('Fill this'))
  105. where we have used the translation operator T to allow for
  106. internationalization.
  107. Notice that default error messages are not translated.
  108. """
  109. def formatter(self, value):
  110. """
  111. For some validators returns a formatted version (matching the validator)
  112. of value. Otherwise just returns the value.
  113. """
  114. return value
  115. def __call__(self, value):
  116. raise NotImplementedError
  117. return (value, None)
  118. class IS_MATCH(Validator):
  119. """
  120. Example:
  121. Used as::
  122. INPUT(_type='text', _name='name', requires=IS_MATCH('.+'))
  123. The argument of IS_MATCH is a regular expression::
  124. >>> IS_MATCH('.+')('hello')
  125. ('hello', None)
  126. >>> IS_MATCH('hell')('hello')
  127. ('hello', None)
  128. >>> IS_MATCH('hell.*', strict=False)('hello')
  129. ('hello', None)
  130. >>> IS_MATCH('hello')('shello')
  131. ('shello', 'invalid expression')
  132. >>> IS_MATCH('hello', search=True)('shello')
  133. ('shello', None)
  134. >>> IS_MATCH('hello', search=True, strict=False)('shellox')
  135. ('shellox', None)
  136. >>> IS_MATCH('.*hello.*', search=True, strict=False)('shellox')
  137. ('shellox', None)
  138. >>> IS_MATCH('.+')('')
  139. ('', 'invalid expression')
  140. """
  141. def __init__(self, expression, error_message='Invalid expression',
  142. strict=False, search=False, extract=False,
  143. is_unicode=False):
  144. if strict or not search:
  145. if not expression.startswith('^'):
  146. expression = '^(%s)' % expression
  147. if strict:
  148. if not expression.endswith('$'):
  149. expression = '(%s)$' % expression
  150. if is_unicode:
  151. if not isinstance(expression,unicode):
  152. expression = expression.decode('utf8')
  153. self.regex = re.compile(expression,re.UNICODE)
  154. else:
  155. self.regex = re.compile(expression)
  156. self.error_message = error_message
  157. self.extract = extract
  158. self.is_unicode = is_unicode
  159. def __call__(self, value):
  160. if self.is_unicode and not isinstance(value,unicode):
  161. match = self.regex.search(str(value).decode('utf8'))
  162. else:
  163. match = self.regex.search(str(value))
  164. if match is not None:
  165. return (self.extract and match.group() or value, None)
  166. return (value, translate(self.error_message))
  167. class IS_EQUAL_TO(Validator):
  168. """
  169. Example:
  170. Used as::
  171. INPUT(_type='text', _name='password')
  172. INPUT(_type='text', _name='password2',
  173. requires=IS_EQUAL_TO(request.vars.password))
  174. The argument of IS_EQUAL_TO is a string::
  175. >>> IS_EQUAL_TO('aaa')('aaa')
  176. ('aaa', None)
  177. >>> IS_EQUAL_TO('aaa')('aab')
  178. ('aab', 'no match')
  179. """
  180. def __init__(self, expression, error_message='No match'):
  181. self.expression = expression
  182. self.error_message = error_message
  183. def __call__(self, value):
  184. if value == self.expression:
  185. return (value, None)
  186. return (value, translate(self.error_message))
  187. class IS_EXPR(Validator):
  188. """
  189. Example:
  190. Used as::
  191. INPUT(_type='text', _name='name',
  192. requires=IS_EXPR('5 < int(value) < 10'))
  193. The argument of IS_EXPR must be python condition::
  194. >>> IS_EXPR('int(value) < 2')('1')
  195. ('1', None)
  196. >>> IS_EXPR('int(value) < 2')('2')
  197. ('2', 'invalid expression')
  198. """
  199. def __init__(self, expression, error_message='Invalid expression', environment=None):
  200. self.expression = expression
  201. self.error_message = error_message
  202. self.environment = environment or {}
  203. def __call__(self, value):
  204. if callable(self.expression):
  205. return (value, self.expression(value))
  206. # for backward compatibility
  207. self.environment.update(value=value)
  208. exec '__ret__=' + self.expression in self.environment
  209. if self.environment['__ret__']:
  210. return (value, None)
  211. return (value, translate(self.error_message))
  212. class IS_LENGTH(Validator):
  213. """
  214. Checks if length of field's value fits between given boundaries. Works
  215. for both text and file inputs.
  216. Args:
  217. maxsize: maximum allowed length / size
  218. minsize: minimum allowed length / size
  219. Examples:
  220. Check if text string is shorter than 33 characters::
  221. INPUT(_type='text', _name='name', requires=IS_LENGTH(32))
  222. Check if password string is longer than 5 characters::
  223. INPUT(_type='password', _name='name', requires=IS_LENGTH(minsize=6))
  224. Check if uploaded file has size between 1KB and 1MB::
  225. INPUT(_type='file', _name='name', requires=IS_LENGTH(1048576, 1024))
  226. Other examples::
  227. >>> IS_LENGTH()('')
  228. ('', None)
  229. >>> IS_LENGTH()('1234567890')
  230. ('1234567890', None)
  231. >>> IS_LENGTH(maxsize=5, minsize=0)('1234567890') # too long
  232. ('1234567890', 'enter from 0 to 5 characters')
  233. >>> IS_LENGTH(maxsize=50, minsize=20)('1234567890') # too short
  234. ('1234567890', 'enter from 20 to 50 characters')
  235. """
  236. def __init__(self, maxsize=255, minsize=0,
  237. error_message='Enter from %(min)g to %(max)g characters'):
  238. self.maxsize = maxsize
  239. self.minsize = minsize
  240. self.error_message = error_message
  241. def __call__(self, value):
  242. if value is None:
  243. length = 0
  244. if self.minsize <= length <= self.maxsize:
  245. return (value, None)
  246. elif isinstance(value, cgi.FieldStorage):
  247. if value.file:
  248. value.file.seek(0, os.SEEK_END)
  249. length = value.file.tell()
  250. value.file.seek(0, os.SEEK_SET)
  251. elif hasattr(value, 'value'):
  252. val = value.value
  253. if val:
  254. length = len(val)
  255. else:
  256. length = 0
  257. if self.minsize <= length <= self.maxsize:
  258. return (value, None)
  259. elif isinstance(value, str):
  260. try:
  261. lvalue = len(value.decode('utf8'))
  262. except:
  263. lvalue = len(value)
  264. if self.minsize <= lvalue <= self.maxsize:
  265. return (value, None)
  266. elif isinstance(value, unicode):
  267. if self.minsize <= len(value) <= self.maxsize:
  268. return (value.encode('utf8'), None)
  269. elif isinstance(value, (tuple, list)):
  270. if self.minsize <= len(value) <= self.maxsize:
  271. return (value, None)
  272. elif self.minsize <= len(str(value)) <= self.maxsize:
  273. return (str(value), None)
  274. return (value, translate(self.error_message)
  275. % dict(min=self.minsize, max=self.maxsize))
  276. class IS_JSON(Validator):
  277. """
  278. Example:
  279. Used as::
  280. INPUT(_type='text', _name='name',
  281. requires=IS_JSON(error_message="This is not a valid json input")
  282. >>> IS_JSON()('{"a": 100}')
  283. ({u'a': 100}, None)
  284. >>> IS_JSON()('spam1234')
  285. ('spam1234', 'invalid json')
  286. """
  287. def __init__(self, error_message='Invalid json', native_json=False):
  288. self.native_json = native_json
  289. self.error_message = error_message
  290. def __call__(self, value):
  291. try:
  292. if self.native_json:
  293. simplejson.loads(value) # raises error in case of malformed json
  294. return (value, None) # the serialized value is not passed
  295. else:
  296. return (simplejson.loads(value), None)
  297. except JSONErrors:
  298. return (value, translate(self.error_message))
  299. def formatter(self,value):
  300. if value is None:
  301. return None
  302. if self.native_json:
  303. return value
  304. else:
  305. return simplejson.dumps(value)
  306. class IS_IN_SET(Validator):
  307. """
  308. Example:
  309. Used as::
  310. INPUT(_type='text', _name='name',
  311. requires=IS_IN_SET(['max', 'john'],zero=''))
  312. The argument of IS_IN_SET must be a list or set::
  313. >>> IS_IN_SET(['max', 'john'])('max')
  314. ('max', None)
  315. >>> IS_IN_SET(['max', 'john'])('massimo')
  316. ('massimo', 'value not allowed')
  317. >>> IS_IN_SET(['max', 'john'], multiple=True)(('max', 'john'))
  318. (('max', 'john'), None)
  319. >>> IS_IN_SET(['max', 'john'], multiple=True)(('bill', 'john'))
  320. (('bill', 'john'), 'value not allowed')
  321. >>> IS_IN_SET(('id1','id2'), ['first label','second label'])('id1') # Traditional way
  322. ('id1', None)
  323. >>> IS_IN_SET({'id1':'first label', 'id2':'second label'})('id1')
  324. ('id1', None)
  325. >>> import itertools
  326. >>> IS_IN_SET(itertools.chain(['1','3','5'],['2','4','6']))('1')
  327. ('1', None)
  328. >>> IS_IN_SET([('id1','first label'), ('id2','second label')])('id1') # Redundant way
  329. ('id1', None)
  330. """
  331. def __init__(
  332. self,
  333. theset,
  334. labels=None,
  335. error_message='Value not allowed',
  336. multiple=False,
  337. zero='',
  338. sort=False,
  339. ):
  340. self.multiple = multiple
  341. if isinstance(theset, dict):
  342. self.theset = [str(item) for item in theset]
  343. self.labels = theset.values()
  344. elif theset and isinstance(theset, (tuple, list)) \
  345. and isinstance(theset[0], (tuple, list)) and len(theset[0]) == 2:
  346. self.theset = [str(item) for item, label in theset]
  347. self.labels = [str(label) for item, label in theset]
  348. else:
  349. self.theset = [str(item) for item in theset]
  350. self.labels = labels
  351. self.error_message = error_message
  352. self.zero = zero
  353. self.sort = sort
  354. def options(self, zero=True):
  355. if not self.labels:
  356. items = [(k, k) for (i, k) in enumerate(self.theset)]
  357. else:
  358. items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)]
  359. if self.sort:
  360. items.sort(options_sorter)
  361. if zero and not self.zero is None and not self.multiple:
  362. items.insert(0, ('', self.zero))
  363. return items
  364. def __call__(self, value):
  365. if self.multiple:
  366. ### if below was values = re.compile("[\w\-:]+").findall(str(value))
  367. if not value:
  368. values = []
  369. elif isinstance(value, (tuple, list)):
  370. values = value
  371. else:
  372. values = [value]
  373. else:
  374. values = [value]
  375. thestrset = [str(x) for x in self.theset]
  376. failures = [x for x in values if not str(x) in thestrset]
  377. if failures and self.theset:
  378. if self.multiple and (value is None or value == ''):
  379. return ([], None)
  380. return (value, translate(self.error_message))
  381. if self.multiple:
  382. if isinstance(self.multiple, (tuple, list)) and \
  383. not self.multiple[0] <= len(values) < self.multiple[1]:
  384. return (values, translate(self.error_message))
  385. return (values, None)
  386. return (value, None)
  387. regex1 = re.compile('\w+\.\w+')
  388. regex2 = re.compile('%\(([^\)]+)\)\d*(?:\.\d+)?[a-zA-Z]')
  389. class IS_IN_DB(Validator):
  390. """
  391. Example:
  392. Used as::
  393. INPUT(_type='text', _name='name',
  394. requires=IS_IN_DB(db, db.mytable.myfield, zero=''))
  395. used for reference fields, rendered as a dropbox
  396. """
  397. def __init__(
  398. self,
  399. dbset,
  400. field,
  401. label=None,
  402. error_message='Value not in database',
  403. orderby=None,
  404. groupby=None,
  405. distinct=None,
  406. cache=None,
  407. multiple=False,
  408. zero='',
  409. sort=False,
  410. _and=None,
  411. ):
  412. from pydal.objects import Table
  413. if isinstance(field, Table):
  414. field = field._id
  415. if hasattr(dbset, 'define_table'):
  416. self.dbset = dbset()
  417. else:
  418. self.dbset = dbset
  419. (ktable, kfield) = str(field).split('.')
  420. if not label:
  421. label = '%%(%s)s' % kfield
  422. if isinstance(label, str):
  423. if regex1.match(str(label)):
  424. label = '%%(%s)s' % str(label).split('.')[-1]
  425. ks = regex2.findall(label)
  426. if not kfield in ks:
  427. ks += [kfield]
  428. fields = ks
  429. else:
  430. ks = [kfield]
  431. fields = 'all'
  432. self.fields = fields
  433. self.label = label
  434. self.ktable = ktable
  435. self.kfield = kfield
  436. self.ks = ks
  437. self.error_message = error_message
  438. self.theset = None
  439. self.orderby = orderby
  440. self.groupby = groupby
  441. self.distinct = distinct
  442. self.cache = cache
  443. self.multiple = multiple
  444. self.zero = zero
  445. self.sort = sort
  446. self._and = _and
  447. def set_self_id(self, id):
  448. if self._and:
  449. self._and.record_id = id
  450. def build_set(self):
  451. table = self.dbset.db[self.ktable]
  452. if self.fields == 'all':
  453. fields = [f for f in table]
  454. else:
  455. fields = [table[k] for k in self.fields]
  456. ignore = (FieldVirtual,FieldMethod)
  457. fields = filter(lambda f:not isinstance(f,ignore), fields)
  458. if self.dbset.db._dbname != 'gae':
  459. orderby = self.orderby or reduce(lambda a, b: a | b, fields)
  460. groupby = self.groupby
  461. distinct = self.distinct
  462. dd = dict(orderby=orderby, groupby=groupby,
  463. distinct=distinct, cache=self.cache,
  464. cacheable=True)
  465. records = self.dbset(table).select(*fields, **dd)
  466. else:
  467. orderby = self.orderby or \
  468. reduce(lambda a, b: a | b, (
  469. f for f in fields if not f.name == 'id'))
  470. dd = dict(orderby=orderby, cache=self.cache, cacheable=True)
  471. records = self.dbset(table).select(table.ALL, **dd)
  472. self.theset = [str(r[self.kfield]) for r in records]
  473. if isinstance(self.label, str):
  474. self.labels = [self.label % r for r in records]
  475. else:
  476. self.labels = [self.label(r) for r in records]
  477. def options(self, zero=True):
  478. self.build_set()
  479. items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)]
  480. if self.sort:
  481. items.sort(options_sorter)
  482. if zero and not self.zero is None and not self.multiple:
  483. items.insert(0, ('', self.zero))
  484. return items
  485. def __call__(self, value):
  486. table = self.dbset.db[self.ktable]
  487. field = table[self.kfield]
  488. if self.multiple:
  489. if self._and:
  490. raise NotImplementedError
  491. if isinstance(value, list):
  492. values = value
  493. elif value:
  494. values = [value]
  495. else:
  496. values = []
  497. if isinstance(self.multiple, (tuple, list)) and \
  498. not self.multiple[0] <= len(values) < self.multiple[1]:
  499. return (values, translate(self.error_message))
  500. if self.theset:
  501. if not [v for v in values if not v in self.theset]:
  502. return (values, None)
  503. else:
  504. from pydal.adapters import GoogleDatastoreAdapter
  505. def count(values, s=self.dbset, f=field):
  506. return s(f.belongs(map(int, values))).count()
  507. if isinstance(self.dbset.db._adapter, GoogleDatastoreAdapter):
  508. range_ids = range(0, len(values), 30)
  509. total = sum(count(values[i:i + 30]) for i in range_ids)
  510. if total == len(values):
  511. return (values, None)
  512. elif count(values) == len(values):
  513. return (values, None)
  514. elif self.theset:
  515. if str(value) in self.theset:
  516. if self._and:
  517. return self._and(value)
  518. else:
  519. return (value, None)
  520. else:
  521. if self.dbset(field == value).count():
  522. if self._and:
  523. return self._and(value)
  524. else:
  525. return (value, None)
  526. return (value, translate(self.error_message))
  527. class IS_NOT_IN_DB(Validator):
  528. """
  529. Example:
  530. Used as::
  531. INPUT(_type='text', _name='name', requires=IS_NOT_IN_DB(db, db.table))
  532. makes the field unique
  533. """
  534. def __init__(
  535. self,
  536. dbset,
  537. field,
  538. error_message='Value already in database or empty',
  539. allowed_override=[],
  540. ignore_common_filters=False,
  541. ):
  542. from pydal.objects import Table
  543. if isinstance(field, Table):
  544. field = field._id
  545. if hasattr(dbset, 'define_table'):
  546. self.dbset = dbset()
  547. else:
  548. self.dbset = dbset
  549. self.field = field
  550. self.error_message = error_message
  551. self.record_id = 0
  552. self.allowed_override = allowed_override
  553. self.ignore_common_filters = ignore_common_filters
  554. def set_self_id(self, id):
  555. self.record_id = id
  556. def __call__(self, value):
  557. if isinstance(value,unicode):
  558. value = value.encode('utf8')
  559. else:
  560. value = str(value)
  561. if not value.strip():
  562. return (value, translate(self.error_message))
  563. if value in self.allowed_override:
  564. return (value, None)
  565. (tablename, fieldname) = str(self.field).split('.')
  566. table = self.dbset.db[tablename]
  567. field = table[fieldname]
  568. subset = self.dbset(field == value,
  569. ignore_common_filters=self.ignore_common_filters)
  570. id = self.record_id
  571. if isinstance(id, dict):
  572. fields = [table[f] for f in id]
  573. row = subset.select(*fields, **dict(limitby=(0, 1), orderby_on_limitby=False)).first()
  574. if row and any(str(row[f]) != str(id[f]) for f in id):
  575. return (value, translate(self.error_message))
  576. else:
  577. row = subset.select(table._id, field, limitby=(0, 1), orderby_on_limitby=False).first()
  578. if row and str(row.id) != str(id):
  579. return (value, translate(self.error_message))
  580. return (value, None)
  581. def range_error_message(error_message, what_to_enter, minimum, maximum):
  582. "build the error message for the number range validators"
  583. if error_message is None:
  584. error_message = 'Enter ' + what_to_enter
  585. if minimum is not None and maximum is not None:
  586. error_message += ' between %(min)g and %(max)g'
  587. elif minimum is not None:
  588. error_message += ' greater than or equal to %(min)g'
  589. elif maximum is not None:
  590. error_message += ' less than or equal to %(max)g'
  591. if type(maximum) in [int, long]:
  592. maximum -= 1
  593. return translate(error_message) % dict(min=minimum, max=maximum)
  594. class IS_INT_IN_RANGE(Validator):
  595. """
  596. Determines that the argument is (or can be represented as) an int,
  597. and that it falls within the specified range. The range is interpreted
  598. in the Pythonic way, so the test is: min <= value < max.
  599. The minimum and maximum limits can be None, meaning no lower or upper limit,
  600. respectively.
  601. Example:
  602. Used as::
  603. INPUT(_type='text', _name='name', requires=IS_INT_IN_RANGE(0, 10))
  604. >>> IS_INT_IN_RANGE(1,5)('4')
  605. (4, None)
  606. >>> IS_INT_IN_RANGE(1,5)(4)
  607. (4, None)
  608. >>> IS_INT_IN_RANGE(1,5)(1)
  609. (1, None)
  610. >>> IS_INT_IN_RANGE(1,5)(5)
  611. (5, 'enter an integer between 1 and 4')
  612. >>> IS_INT_IN_RANGE(1,5)(5)
  613. (5, 'enter an integer between 1 and 4')
  614. >>> IS_INT_IN_RANGE(1,5)(3.5)
  615. (3.5, 'enter an integer between 1 and 4')
  616. >>> IS_INT_IN_RANGE(None,5)('4')
  617. (4, None)
  618. >>> IS_INT_IN_RANGE(None,5)('6')
  619. ('6', 'enter an integer less than or equal to 4')
  620. >>> IS_INT_IN_RANGE(1,None)('4')
  621. (4, None)
  622. >>> IS_INT_IN_RANGE(1,None)('0')
  623. ('0', 'enter an integer greater than or equal to 1')
  624. >>> IS_INT_IN_RANGE()(6)
  625. (6, None)
  626. >>> IS_INT_IN_RANGE()('abc')
  627. ('abc', 'enter an integer')
  628. """
  629. def __init__(
  630. self,
  631. minimum=None,
  632. maximum=None,
  633. error_message=None,
  634. ):
  635. self.minimum = int(minimum) if minimum is not None else None
  636. self.maximum = int(maximum) if maximum is not None else None
  637. self.error_message = range_error_message(
  638. error_message, 'an integer', self.minimum, self.maximum)
  639. def __call__(self, value):
  640. if regex_isint.match(str(value)):
  641. v = int(value)
  642. if ((self.minimum is None or v >= self.minimum) and
  643. (self.maximum is None or v < self.maximum)):
  644. return (v, None)
  645. return (value, self.error_message)
  646. def str2dec(number):
  647. s = str(number)
  648. if not '.' in s:
  649. s += '.00'
  650. else:
  651. s += '0' * (2 - len(s.split('.')[1]))
  652. return s
  653. class IS_FLOAT_IN_RANGE(Validator):
  654. """
  655. Determines that the argument is (or can be represented as) a float,
  656. and that it falls within the specified inclusive range.
  657. The comparison is made with native arithmetic.
  658. The minimum and maximum limits can be None, meaning no lower or upper limit,
  659. respectively.
  660. Example:
  661. Used as::
  662. INPUT(_type='text', _name='name', requires=IS_FLOAT_IN_RANGE(0, 10))
  663. >>> IS_FLOAT_IN_RANGE(1,5)('4')
  664. (4.0, None)
  665. >>> IS_FLOAT_IN_RANGE(1,5)(4)
  666. (4.0, None)
  667. >>> IS_FLOAT_IN_RANGE(1,5)(1)
  668. (1.0, None)
  669. >>> IS_FLOAT_IN_RANGE(1,5)(5.25)
  670. (5.25, 'enter a number between 1 and 5')
  671. >>> IS_FLOAT_IN_RANGE(1,5)(6.0)
  672. (6.0, 'enter a number between 1 and 5')
  673. >>> IS_FLOAT_IN_RANGE(1,5)(3.5)
  674. (3.5, None)
  675. >>> IS_FLOAT_IN_RANGE(1,None)(3.5)
  676. (3.5, None)
  677. >>> IS_FLOAT_IN_RANGE(None,5)(3.5)
  678. (3.5, None)
  679. >>> IS_FLOAT_IN_RANGE(1,None)(0.5)
  680. (0.5, 'enter a number greater than or equal to 1')
  681. >>> IS_FLOAT_IN_RANGE(None,5)(6.5)
  682. (6.5, 'enter a number less than or equal to 5')
  683. >>> IS_FLOAT_IN_RANGE()(6.5)
  684. (6.5, None)
  685. >>> IS_FLOAT_IN_RANGE()('abc')
  686. ('abc', 'enter a number')
  687. """
  688. def __init__(
  689. self,
  690. minimum=None,
  691. maximum=None,
  692. error_message=None,
  693. dot='.'
  694. ):
  695. self.minimum = float(minimum) if minimum is not None else None
  696. self.maximum = float(maximum) if maximum is not None else None
  697. self.dot = str(dot)
  698. self.error_message = range_error_message(
  699. error_message, 'a number', self.minimum, self.maximum)
  700. def __call__(self, value):
  701. try:
  702. if self.dot == '.':
  703. v = float(value)
  704. else:
  705. v = float(str(value).replace(self.dot, '.'))
  706. if ((self.minimum is None or v >= self.minimum) and
  707. (self.maximum is None or v <= self.maximum)):
  708. return (v, None)
  709. except (ValueError, TypeError):
  710. pass
  711. return (value, self.error_message)
  712. def formatter(self, value):
  713. if value is None:
  714. return None
  715. return str2dec(value).replace('.', self.dot)
  716. class IS_DECIMAL_IN_RANGE(Validator):
  717. """
  718. Determines that the argument is (or can be represented as) a Python Decimal,
  719. and that it falls within the specified inclusive range.
  720. The comparison is made with Python Decimal arithmetic.
  721. The minimum and maximum limits can be None, meaning no lower or upper limit,
  722. respectively.
  723. Example:
  724. Used as::
  725. INPUT(_type='text', _name='name', requires=IS_DECIMAL_IN_RANGE(0, 10))
  726. >>> IS_DECIMAL_IN_RANGE(1,5)('4')
  727. (Decimal('4'), None)
  728. >>> IS_DECIMAL_IN_RANGE(1,5)(4)
  729. (Decimal('4'), None)
  730. >>> IS_DECIMAL_IN_RANGE(1,5)(1)
  731. (Decimal('1'), None)
  732. >>> IS_DECIMAL_IN_RANGE(1,5)(5.25)
  733. (5.25, 'enter a number between 1 and 5')
  734. >>> IS_DECIMAL_IN_RANGE(5.25,6)(5.25)
  735. (Decimal('5.25'), None)
  736. >>> IS_DECIMAL_IN_RANGE(5.25,6)('5.25')
  737. (Decimal('5.25'), None)
  738. >>> IS_DECIMAL_IN_RANGE(1,5)(6.0)
  739. (6.0, 'enter a number between 1 and 5')
  740. >>> IS_DECIMAL_IN_RANGE(1,5)(3.5)
  741. (Decimal('3.5'), None)
  742. >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(3.5)
  743. (Decimal('3.5'), None)
  744. >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(6.5)
  745. (6.5, 'enter a number between 1.5 and 5.5')
  746. >>> IS_DECIMAL_IN_RANGE(1.5,None)(6.5)
  747. (Decimal('6.5'), None)
  748. >>> IS_DECIMAL_IN_RANGE(1.5,None)(0.5)
  749. (0.5, 'enter a number greater than or equal to 1.5')
  750. >>> IS_DECIMAL_IN_RANGE(None,5.5)(4.5)
  751. (Decimal('4.5'), None)
  752. >>> IS_DECIMAL_IN_RANGE(None,5.5)(6.5)
  753. (6.5, 'enter a number less than or equal to 5.5')
  754. >>> IS_DECIMAL_IN_RANGE()(6.5)
  755. (Decimal('6.5'), None)
  756. >>> IS_DECIMAL_IN_RANGE(0,99)(123.123)
  757. (123.123, 'enter a number between 0 and 99')
  758. >>> IS_DECIMAL_IN_RANGE(0,99)('123.123')
  759. ('123.123', 'enter a number between 0 and 99')
  760. >>> IS_DECIMAL_IN_RANGE(0,99)('12.34')
  761. (Decimal('12.34'), None)
  762. >>> IS_DECIMAL_IN_RANGE()('abc')
  763. ('abc', 'enter a number')
  764. """
  765. def __init__(
  766. self,
  767. minimum=None,
  768. maximum=None,
  769. error_message=None,
  770. dot='.'
  771. ):
  772. self.minimum = decimal.Decimal(str(minimum)) if minimum is not None else None
  773. self.maximum = decimal.Decimal(str(maximum)) if maximum is not None else None
  774. self.dot = str(dot)
  775. self.error_message = range_error_message(
  776. error_message, 'a number', self.minimum, self.maximum)
  777. def __call__(self, value):
  778. try:
  779. if isinstance(value, decimal.Decimal):
  780. v = value
  781. else:
  782. v = decimal.Decimal(str(value).replace(self.dot, '.'))
  783. if ((self.minimum is None or v >= self.minimum) and
  784. (self.maximum is None or v <= self.maximum)):
  785. return (v, None)
  786. except (ValueError, TypeError, decimal.InvalidOperation):
  787. pass
  788. return (value, self.error_message)
  789. def formatter(self, value):
  790. if value is None:
  791. return None
  792. return str2dec(value).replace('.', self.dot)
  793. def is_empty(value, empty_regex=None):
  794. "test empty field"
  795. if isinstance(value, (str, unicode)):
  796. value = value.strip()
  797. if empty_regex is not None and empty_regex.match(value):
  798. value = ''
  799. if value is None or value == '' or value == []:
  800. return (value, True)
  801. return (value, False)
  802. class IS_NOT_EMPTY(Validator):
  803. """
  804. Example:
  805. Used as::
  806. INPUT(_type='text', _name='name', requires=IS_NOT_EMPTY())
  807. >>> IS_NOT_EMPTY()(1)
  808. (1, None)
  809. >>> IS_NOT_EMPTY()(0)
  810. (0, None)
  811. >>> IS_NOT_EMPTY()('x')
  812. ('x', None)
  813. >>> IS_NOT_EMPTY()(' x ')
  814. ('x', None)
  815. >>> IS_NOT_EMPTY()(None)
  816. (None, 'enter a value')
  817. >>> IS_NOT_EMPTY()('')
  818. ('', 'enter a value')
  819. >>> IS_NOT_EMPTY()(' ')
  820. ('', 'enter a value')
  821. >>> IS_NOT_EMPTY()(' \\n\\t')
  822. ('', 'enter a value')
  823. >>> IS_NOT_EMPTY()([])
  824. ([], 'enter a value')
  825. >>> IS_NOT_EMPTY(empty_regex='def')('def')
  826. ('', 'enter a value')
  827. >>> IS_NOT_EMPTY(empty_regex='de[fg]')('deg')
  828. ('', 'enter a value')
  829. >>> IS_NOT_EMPTY(empty_regex='def')('abc')
  830. ('abc', None)
  831. """
  832. def __init__(self, error_message='Enter a value', empty_regex=None):
  833. self.error_message = error_message
  834. if empty_regex is not None:
  835. self.empty_regex = re.compile(empty_regex)
  836. else:
  837. self.empty_regex = None
  838. def __call__(self, value):
  839. value, empty = is_empty(value, empty_regex=self.empty_regex)
  840. if empty:
  841. return (value, translate(self.error_message))
  842. return (value, None)
  843. class IS_ALPHANUMERIC(IS_MATCH):
  844. """
  845. Example:
  846. Used as::
  847. INPUT(_type='text', _name='name', requires=IS_ALPHANUMERIC())
  848. >>> IS_ALPHANUMERIC()('1')
  849. ('1', None)
  850. >>> IS_ALPHANUMERIC()('')
  851. ('', None)
  852. >>> IS_ALPHANUMERIC()('A_a')
  853. ('A_a', None)
  854. >>> IS_ALPHANUMERIC()('!')
  855. ('!', 'enter only letters, numbers, and underscore')
  856. """
  857. def __init__(self, error_message='Enter only letters, numbers, and underscore'):
  858. IS_MATCH.__init__(self, '^[\w]*$', error_message)
  859. class IS_EMAIL(Validator):
  860. """
  861. Checks if field's value is a valid email address. Can be set to disallow
  862. or force addresses from certain domain(s).
  863. Email regex adapted from
  864. http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx,
  865. generally following the RFCs, except that we disallow quoted strings
  866. and permit underscores and leading numerics in subdomain labels
  867. Args:
  868. banned: regex text for disallowed address domains
  869. forced: regex text for required address domains
  870. Both arguments can also be custom objects with a match(value) method.
  871. Example:
  872. Check for valid email address::
  873. INPUT(_type='text', _name='name',
  874. requires=IS_EMAIL())
  875. Check for valid email address that can't be from a .com domain::
  876. INPUT(_type='text', _name='name',
  877. requires=IS_EMAIL(banned='^.*\.com(|\..*)$'))
  878. Check for valid email address that must be from a .edu domain::
  879. INPUT(_type='text', _name='name',
  880. requires=IS_EMAIL(forced='^.*\.edu(|\..*)$'))
  881. >>> IS_EMAIL()('a@b.com')
  882. ('a@b.com', None)
  883. >>> IS_EMAIL()('abc@def.com')
  884. ('abc@def.com', None)
  885. >>> IS_EMAIL()('abc@3def.com')
  886. ('abc@3def.com', None)
  887. >>> IS_EMAIL()('abc@def.us')
  888. ('abc@def.us', None)
  889. >>> IS_EMAIL()('abc@d_-f.us')
  890. ('abc@d_-f.us', None)
  891. >>> IS_EMAIL()('@def.com') # missing name
  892. ('@def.com', 'enter a valid email address')
  893. >>> IS_EMAIL()('"abc@def".com') # quoted name
  894. ('"abc@def".com', 'enter a valid email address')
  895. >>> IS_EMAIL()('abc+def.com') # no @
  896. ('abc+def.com', 'enter a valid email address')
  897. >>> IS_EMAIL()('abc@def.x') # one-char TLD
  898. ('abc@def.x', 'enter a valid email address')
  899. >>> IS_EMAIL()('abc@def.12') # numeric TLD
  900. ('abc@def.12', 'enter a valid email address')
  901. >>> IS_EMAIL()('abc@def..com') # double-dot in domain
  902. ('abc@def..com', 'enter a valid email address')
  903. >>> IS_EMAIL()('abc@.def.com') # dot starts domain
  904. ('abc@.def.com', 'enter a valid email address')
  905. >>> IS_EMAIL()('abc@def.c_m') # underscore in TLD
  906. ('abc@def.c_m', 'enter a valid email address')
  907. >>> IS_EMAIL()('NotAnEmail') # missing @
  908. ('NotAnEmail', 'enter a valid email address')
  909. >>> IS_EMAIL()('abc@NotAnEmail') # missing TLD
  910. ('abc@NotAnEmail', 'enter a valid email address')
  911. >>> IS_EMAIL()('customer/department@example.com')
  912. ('customer/department@example.com', None)
  913. >>> IS_EMAIL()('$A12345@example.com')
  914. ('$A12345@example.com', None)
  915. >>> IS_EMAIL()('!def!xyz%abc@example.com')
  916. ('!def!xyz%abc@example.com', None)
  917. >>> IS_EMAIL()('_Yosemite.Sam@example.com')
  918. ('_Yosemite.Sam@example.com', None)
  919. >>> IS_EMAIL()('~@example.com')
  920. ('~@example.com', None)
  921. >>> IS_EMAIL()('.wooly@example.com') # dot starts name
  922. ('.wooly@example.com', 'enter a valid email address')
  923. >>> IS_EMAIL()('wo..oly@example.com') # adjacent dots in name
  924. ('wo..oly@example.com', 'enter a valid email address')
  925. >>> IS_EMAIL()('pootietang.@example.com') # dot ends name
  926. ('pootietang.@example.com', 'enter a valid email address')
  927. >>> IS_EMAIL()('.@example.com') # name is bare dot
  928. ('.@example.com', 'enter a valid email address')
  929. >>> IS_EMAIL()('Ima.Fool@example.com')
  930. ('Ima.Fool@example.com', None)
  931. >>> IS_EMAIL()('Ima Fool@example.com') # space in name
  932. ('Ima Fool@example.com', 'enter a valid email address')
  933. >>> IS_EMAIL()('localguy@localhost') # localhost as domain
  934. ('localguy@localhost', None)
  935. """
  936. regex = re.compile('''
  937. ^(?!\.) # name may not begin with a dot
  938. (
  939. [-a-z0-9!\#$%&'*+/=?^_`{|}~] # all legal characters except dot
  940. |
  941. (?<!\.)\. # single dots only
  942. )+
  943. (?<!\.) # name may not end with a dot
  944. @
  945. (
  946. localhost
  947. |
  948. (
  949. [a-z0-9]
  950. # [sub]domain begins with alphanumeric
  951. (
  952. [-\w]* # alphanumeric, underscore, dot, hyphen
  953. [a-z0-9] # ending alphanumeric
  954. )?
  955. \. # ending dot
  956. )+
  957. [a-z]{2,} # TLD alpha-only
  958. )$
  959. ''', re.VERBOSE | re.IGNORECASE)
  960. regex_proposed_but_failed = re.compile('^([\w\!\#$\%\&\'\*\+\-\/\=\?\^\`{\|\}\~]+\.)*[\w\!\#$\%\&\'\*\+\-\/\=\?\^\`{\|\}\~]+@((((([a-z0-9]{1}[a-z0-9\-]{0,62}[a-z0-9]{1})|[a-z])\.)+[a-z]{2,6})|(\d{1,3}\.){3}\d{1,3}(\:\d{1,5})?)$', re.VERBOSE | re.IGNORECASE)
  961. def __init__(self,
  962. banned=None,
  963. forced=None,
  964. error_message='Enter a valid email address'):
  965. if isinstance(banned, str):
  966. banned = re.compile(banned)
  967. if isinstance(forced, str):
  968. forced = re.compile(forced)
  969. self.banned = banned
  970. self.forced = forced
  971. self.error_message = error_message
  972. def __call__(self, value):
  973. match = self.regex.match(value)
  974. if match:
  975. domain = value.split('@')[1]
  976. if (not self.banned or not self.banned.match(domain)) \
  977. and (not self.forced or self.forced.match(domain)):
  978. return (value, None)
  979. return (value, translate(self.error_message))
  980. class IS_LIST_OF_EMAILS(object):
  981. """
  982. Example:
  983. Used as::
  984. Field('emails','list:string',
  985. widget=SQLFORM.widgets.text.widget,
  986. requires=IS_LIST_OF_EMAILS(),
  987. represent=lambda v,r: \
  988. SPAN(*[A(x,_href='mailto:'+x) for x in (v or [])])
  989. )
  990. """
  991. split_emails = re.compile('[^,;\s]+')
  992. def __init__(self, error_message = 'Invalid emails: %s'):
  993. self.error_message = error_message
  994. def __call__(self, value):
  995. bad_emails = []
  996. f = IS_EMAIL()
  997. for email in self.split_emails.findall(value):
  998. error = f(email)[1]
  999. if error and not email in bad_emails:
  1000. bad_emails.append(email)
  1001. if not bad_emails:
  1002. return (value, None)
  1003. else:
  1004. return (value,
  1005. translate(self.error_message) % ', '.join(bad_emails))
  1006. def formatter(self,value,row=None):
  1007. return ', '.join(value or [])
  1008. # URL scheme source:
  1009. # <http://en.wikipedia.org/wiki/URI_scheme> obtained on 2008-Nov-10
  1010. official_url_schemes = [
  1011. 'aaa',
  1012. 'aaas',
  1013. 'acap',
  1014. 'cap',
  1015. 'cid',
  1016. 'crid',
  1017. 'data',
  1018. 'dav',
  1019. 'dict',
  1020. 'dns',
  1021. 'fax',
  1022. 'file',
  1023. 'ftp',
  1024. 'go',
  1025. 'gopher',
  1026. 'h323',
  1027. 'http',
  1028. 'https',
  1029. 'icap',
  1030. 'im',
  1031. 'imap',
  1032. 'info',
  1033. 'ipp',
  1034. 'iris',
  1035. 'iris.beep',
  1036. 'iris.xpc',
  1037. 'iris.xpcs',
  1038. 'iris.lws',
  1039. 'ldap',
  1040. 'mailto',
  1041. 'mid',
  1042. 'modem',
  1043. 'msrp',
  1044. 'msrps',
  1045. 'mtqp',
  1046. 'mupdate',
  1047. 'news',
  1048. 'nfs',
  1049. 'nntp',
  1050. 'opaquelocktoken',
  1051. 'pop',
  1052. 'pres',
  1053. 'prospero',
  1054. 'rtsp',
  1055. 'service',
  1056. 'shttp',
  1057. 'sip',
  1058. 'sips',
  1059. 'snmp',
  1060. 'soap.beep',
  1061. 'soap.beeps',
  1062. 'tag',
  1063. 'tel',
  1064. 'telnet',
  1065. 'tftp',
  1066. 'thismessage',
  1067. 'tip',
  1068. 'tv',
  1069. 'urn',
  1070. 'vemmi',
  1071. 'wais',
  1072. 'xmlrpc.beep',
  1073. 'xmlrpc.beep',
  1074. 'xmpp',
  1075. 'z39.50r',
  1076. 'z39.50s',
  1077. ]
  1078. unofficial_url_schemes = [
  1079. 'about',
  1080. 'adiumxtra',
  1081. 'aim',
  1082. 'afp',
  1083. 'aw',
  1084. 'callto',
  1085. 'chrome',
  1086. 'cvs',
  1087. 'ed2k',
  1088. 'feed',
  1089. 'fish',
  1090. 'gg',
  1091. 'gizmoproject',
  1092. 'iax2',
  1093. 'irc',
  1094. 'ircs',
  1095. 'itms',
  1096. 'jar',
  1097. 'javascript',
  1098. 'keyparc',
  1099. 'lastfm',
  1100. 'ldaps',
  1101. 'magnet',
  1102. 'mms',
  1103. 'msnim',
  1104. 'mvn',
  1105. 'notes',
  1106. 'nsfw',
  1107. 'psyc',
  1108. 'paparazzi:http',
  1109. 'rmi',
  1110. 'rsync',
  1111. 'secondlife',
  1112. 'sgn',
  1113. 'skype',
  1114. 'ssh',
  1115. 'sftp',
  1116. 'smb',
  1117. 'sms',
  1118. 'soldat',
  1119. 'steam',
  1120. 'svn',
  1121. 'teamspeak',
  1122. 'unreal',
  1123. 'ut2004',
  1124. 'ventrilo',
  1125. 'view-source',
  1126. 'webcal',
  1127. 'wyciwyg',
  1128. 'xfire',
  1129. 'xri',
  1130. 'ymsgr',
  1131. ]
  1132. all_url_schemes = [None] + official_url_schemes + unofficial_url_schemes
  1133. http_schemes = [None, 'http', 'https']
  1134. # This regex comes from RFC 2396, Appendix B. It's used to split a URL into
  1135. # its component parts
  1136. # Here are the regex groups that it extracts:
  1137. # scheme = group(2)
  1138. # authority = group(4)
  1139. # path = group(5)
  1140. # query = group(7)
  1141. # fragment = group(9)
  1142. url_split_regex = \
  1143. re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?')
  1144. # Defined in RFC 3490, Section 3.1, Requirement #1
  1145. # Use this regex to split the authority component of a unicode URL into
  1146. # its component labels
  1147. label_split_regex = re.compile(u'[\u002e\u3002\uff0e\uff61]')
  1148. def escape_unicode(string):
  1149. '''
  1150. Converts a unicode string into US-ASCII, using a simple conversion scheme.
  1151. Each unicode character that does not have a US-ASCII equivalent is
  1152. converted into a URL escaped form based on its hexadecimal value.
  1153. For example, the unicode character '\u4e86' will become the string '%4e%86'
  1154. Args:
  1155. string: unicode string, the unicode string to convert into an
  1156. escaped US-ASCII form
  1157. Returns:
  1158. string: the US-ASCII escaped form of the inputted string
  1159. @author: Jonathan Benn
  1160. '''
  1161. returnValue = StringIO()
  1162. for character in string:
  1163. code = ord(character)
  1164. if code > 0x7F:
  1165. hexCode = hex(code)
  1166. returnValue.write('%' + hexCode[2:4] + '%' + hexCode[4:6])
  1167. else:
  1168. returnValue.write(character)
  1169. return returnValue.getvalue()
  1170. def unicode_to_ascii_authority(authority):
  1171. '''
  1172. Follows the steps in RFC 3490, Section 4 to convert a unicode authority
  1173. string into its ASCII equivalent.
  1174. For example, u'www.Alliancefran\xe7aise.nu' will be converted into
  1175. 'www.xn--alliancefranaise-npb.nu'
  1176. Args:
  1177. authority: unicode string, the URL authority component to convert,
  1178. e.g. u'www.Alliancefran\xe7aise.nu'
  1179. Returns:
  1180. string: the US-ASCII character equivalent to the inputed authority,
  1181. e.g. 'www.xn--alliancefranaise-npb.nu'
  1182. Raises:
  1183. Exception: if the function is not able to convert the inputed
  1184. authority
  1185. @author: Jonathan Benn
  1186. '''
  1187. #RFC 3490, Section 4, Step 1
  1188. #The encodings.idna Python module assumes that AllowUnassigned == True
  1189. #RFC 3490, Section 4, Step 2
  1190. labels = label_split_regex.split(authority)
  1191. #RFC 3490, Section 4, Step 3
  1192. #The encodings.idna Python module assumes that UseSTD3ASCIIRules == False
  1193. #RFC 3490, Section 4, Step 4
  1194. #We use the ToASCII operation because we are about to put the authority
  1195. #into an IDN-unaware slot
  1196. asciiLabels = []
  1197. try:
  1198. import encodings.idna
  1199. for label in labels:
  1200. if label:
  1201. asciiLabels.append(encodings.idna.ToASCII(label))
  1202. else:
  1203. #encodings.idna.ToASCII does not accept an empty string, but
  1204. #it is necessary for us to allow for empty labels so that we
  1205. #don't modify the URL
  1206. asciiLabels.append('')
  1207. except:
  1208. asciiLabels = [str(label) for label in labels]
  1209. #RFC 3490, Section 4, Step 5
  1210. return str(reduce(lambda x, y: x + unichr(0x002E) + y, asciiLabels))
  1211. def unicode_to_ascii_url(url, prepend_scheme):
  1212. '''
  1213. Converts the inputed unicode url into a US-ASCII equivalent. This function
  1214. goes a little beyond RFC 3490, which is limited in scope to the domain name
  1215. (authority) only. Here, the functionality is expanded to what was observed
  1216. on Wikipedia on 2009-Jan-22:
  1217. Component Can Use Unicode?
  1218. --------- ----------------
  1219. scheme No
  1220. authority Yes
  1221. path Yes
  1222. query Yes
  1223. fragment No
  1224. The authority component gets converted to punycode, but occurrences of
  1225. unicode in other components get converted into a pair of URI escapes (we
  1226. assume 4-byte unicode). E.g. the unicode character U+4E2D will be
  1227. converted into '%4E%2D'. Testing with Firefox v3.0.5 has shown that it can
  1228. understand this kind of URI encoding.
  1229. Args:
  1230. url: unicode string, the URL to convert from unicode into US-ASCII
  1231. prepend_scheme: string, a protocol scheme to prepend to the URL if
  1232. we're having trouble parsing it.
  1233. e.g. "http". Input None to disable this functionality
  1234. Returns:
  1235. string: a US-ASCII equivalent of the inputed url
  1236. @author: Jonathan Benn
  1237. '''
  1238. #convert the authority component of the URL into an ASCII punycode string,
  1239. #but encode the rest using the regular URI character encoding
  1240. groups = url_split_regex.match(url).groups()
  1241. #If no authority was found
  1242. if not groups[3]:
  1243. #Try appending a scheme to see if that fixes the problem
  1244. scheme_to_prepend = prepend_scheme or 'http'
  1245. groups = url_split_regex.match(
  1246. unicode(scheme_to_prepend) + u'://' + url).groups()
  1247. #if we still can't find the authority
  1248. if not groups[3]:
  1249. raise Exception('No authority component found, ' +
  1250. 'could not decode unicode to US-ASCII')
  1251. #We're here if we found an authority, let's rebuild the URL
  1252. scheme = groups[1]
  1253. authority = groups[3]
  1254. path = groups[4] or ''
  1255. query = groups[5] or ''
  1256. fragment = groups[7] or ''
  1257. if prepend_scheme:
  1258. scheme = str(scheme) + '://'
  1259. else:
  1260. scheme = ''
  1261. return scheme + unicode_to_ascii_authority(authority) +\
  1262. escape_unicode(path) + escape_unicode(query) + str(fragment)
  1263. class IS_GENERIC_URL(Validator):
  1264. """
  1265. Rejects a URL string if any of the following is true:
  1266. * The string is empty or None
  1267. * The string uses characters that are not allowed in a URL
  1268. * The URL scheme specified (if one is specified) is not valid
  1269. Based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html
  1270. This function only checks the URL's syntax. It does not check that the URL
  1271. points to a real document, for example, or that it otherwise makes sense
  1272. semantically. This function does automatically prepend 'http://' in front
  1273. of a URL if and only if that's necessary to successfully parse the URL.
  1274. Please note that a scheme will be prepended only for rare cases
  1275. (e.g. 'google.ca:80')
  1276. The list of allowed schemes is customizable with the allowed_schemes
  1277. parameter. If you exclude None from the list, then abbreviated URLs
  1278. (lacking a scheme such as 'http') will be rejected.
  1279. The default prepended scheme is customizable with the prepend_scheme
  1280. parameter. If you set prepend_scheme to None then prepending will be
  1281. disabled. URLs that require prepending to parse will still be accepted,
  1282. but the return value will not be modified.
  1283. @author: Jonathan Benn
  1284. >>> IS_GENERIC_URL()('http://user@abc.com')
  1285. ('http://user@abc.com', None)
  1286. Args:
  1287. error_message: a string, the error message to give the end user
  1288. if the URL does not validate
  1289. allowed_schemes: a list containing strings or None. Each element
  1290. is a scheme the inputed URL is allowed to use
  1291. prepend_scheme: a string, this scheme is prepended if it's
  1292. necessary to make the URL valid
  1293. """
  1294. def __init__(
  1295. self,
  1296. error_message='Enter a valid URL',
  1297. allowed_schemes=None,
  1298. prepend_scheme=None,
  1299. ):
  1300. self.error_message = error_message
  1301. if allowed_schemes is None:
  1302. self.allowed_schemes = all_url_schemes
  1303. else:
  1304. self.allowed_schemes = allowed_schemes
  1305. self.prepend_scheme = prepend_scheme
  1306. if self.prepend_scheme not in self.allowed_schemes:
  1307. raise SyntaxError("prepend_scheme='%s' is not in allowed_schemes=%s"
  1308. % (self.prepend_scheme, self.allowed_schemes))
  1309. GENERIC_URL = re.compile(r"%[^0-9A-Fa-f]{2}|%[^0-9A-Fa-f][0-9A-Fa-f]|%[0-9A-Fa-f][^0-9A-Fa-f]|%$|%[0-9A-Fa-f]$|%[^0-9A-Fa-f]$")
  1310. GENERIC_URL_VALID = re.compile(r"[A-Za-z0-9;/?:@&=+$,\-_\.!~*'\(\)%#]+$")
  1311. def __call__(self, value):
  1312. """
  1313. Args:
  1314. value: a string, the URL to validate
  1315. Returns:
  1316. a tuple, where tuple[0] is the inputed value (possible
  1317. prepended with prepend_scheme), and tuple[1] is either
  1318. None (success!) or the string erro

Large files files are truncated, but you can click here to view the full file