PageRenderTime 48ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/r2/r2/lib/cssfilter.py

https://github.com/stevewilber/reddit
Python | 425 lines | 323 code | 53 blank | 49 comment | 52 complexity | 5516e18b8b6ba4d70b48db83b9ec256b MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, Apache-2.0
  1. # The contents of this file are subject to the Common Public Attribution
  2. # License Version 1.0. (the "License"); you may not use this file except in
  3. # compliance with the License. You may obtain a copy of the License at
  4. # http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
  5. # License Version 1.1, but Sections 14 and 15 have been added to cover use of
  6. # software over a computer network and provide for limited attribution for the
  7. # Original Developer. In addition, Exhibit A has been modified to be consistent
  8. # with Exhibit B.
  9. #
  10. # Software distributed under the License is distributed on an "AS IS" basis,
  11. # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
  12. # the specific language governing rights and limitations under the License.
  13. #
  14. # The Original Code is reddit.
  15. #
  16. # The Original Developer is the Initial Developer. The Initial Developer of
  17. # the Original Code is reddit Inc.
  18. #
  19. # All portions of the code written by reddit are Copyright (c) 2006-2012 reddit
  20. # Inc. All Rights Reserved.
  21. ###############################################################################
  22. from __future__ import with_statement
  23. from r2.models import *
  24. from r2.lib.utils import sanitize_url, strip_www, randstr
  25. from r2.lib.strings import string_dict
  26. from r2.lib.pages.things import wrap_links
  27. from pylons import g, c
  28. from pylons.i18n import _
  29. from mako import filters
  30. import os
  31. import tempfile
  32. from r2.lib import s3cp
  33. from r2.lib.media import upload_media
  34. from r2.lib.template_helpers import s3_https_if_secure
  35. import re
  36. from urlparse import urlparse
  37. import cssutils
  38. from cssutils import CSSParser
  39. from cssutils.css import CSSStyleRule
  40. from cssutils.css import CSSValue, CSSValueList
  41. from cssutils.css import CSSPrimitiveValue
  42. from cssutils.css import cssproperties
  43. from xml.dom import DOMException
  44. msgs = string_dict['css_validator_messages']
  45. browser_prefixes = ['o','moz','webkit','ms','khtml','apple','xv']
  46. custom_macros = {
  47. 'num': r'[-]?\d+|[-]?\d*\.\d+',
  48. 'percentage': r'{num}%',
  49. 'length': r'0|{num}(em|ex|px|in|cm|mm|pt|pc)',
  50. 'int': r'[-]?\d+',
  51. 'w': r'\s*',
  52. # From: http://www.w3.org/TR/2008/WD-css3-color-20080721/#svg-color
  53. 'x11color': r'aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkgrey|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkslategrey|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dimgrey|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|grey|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgray|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightslategrey|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|maroon|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|slategrey|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen',
  54. 'csscolor': r'(maroon|red|orange|yellow|olive|purple|fuchsia|white|lime|green|navy|blue|aqua|teal|black|silver|gray|ActiveBorder|ActiveCaption|AppWorkspace|Background|ButtonFace|ButtonHighlight|ButtonShadow|ButtonText|CaptionText|GrayText|Highlight|HighlightText|InactiveBorder|InactiveCaption|InactiveCaptionText|InfoBackground|InfoText|Menu|MenuText|Scrollbar|ThreeDDarkShadow|ThreeDFace|ThreeDHighlight|ThreeDLightShadow|ThreeDShadow|Window|WindowFrame|WindowText)|#[0-9a-f]{3}|#[0-9a-f]{6}|rgb\({w}{int}{w},{w}{int}{w},{w}{int}{w}\)|rgb\({w}{num}%{w},{w}{num}%{w},{w}{num}%{w}\)',
  55. 'color': '{x11color}|{csscolor}',
  56. 'bg-gradient': r'none|{color}|[a-z-]*-gradient\([^;]*\)',
  57. 'bg-gradients': r'{bg-gradient}(?:,\s*{bg-gradient})*',
  58. 'border-radius': r'(({length}|{percentage}){w}){1,2}',
  59. 'single-text-shadow': r'({color}\s+)?{length}\s+{length}(\s+{length})?|{length}\s+{length}(\s+{length})?(\s+{color})?',
  60. 'box-shadow-pos': r'{length}\s+{length}(\s+{length})?(\s+{length})?',
  61. }
  62. custom_values = {
  63. '_height': r'{length}|{percentage}|auto|inherit',
  64. '_width': r'{length}|{percentage}|auto|inherit',
  65. '_overflow': r'visible|hidden|scroll|auto|inherit',
  66. 'color': r'{color}',
  67. 'border-color': r'{color}',
  68. 'opacity': r'^0?\.?[0-9]*|1\.0*|1|0',
  69. 'filter': r'alpha\(opacity={num}\)',
  70. 'background': r'{bg-gradients}',
  71. 'background-image': r'{bg-gradients}',
  72. 'background-color': r'{color}',
  73. 'background-position': r'(({percentage}|{length}){0,3})?\s*(top|center|left)?\s*(left|center|right)?',
  74. # http://www.w3.org/TR/css3-background/#border-top-right-radius
  75. 'border-radius': r'{border-radius}',
  76. 'border-top-right-radius': r'{border-radius}',
  77. 'border-bottom-right-radius': r'{border-radius}',
  78. 'border-bottom-left-radius': r'{border-radius}',
  79. 'border-top-left-radius': r'{border-radius}',
  80. # old mozilla style (for compatibility with existing stylesheets)
  81. 'border-radius-topright': r'{border-radius}',
  82. 'border-radius-bottomright': r'{border-radius}',
  83. 'border-radius-bottomleft': r'{border-radius}',
  84. 'border-radius-topleft': r'{border-radius}',
  85. # http://www.w3.org/TR/css3-text/#text-shadow
  86. 'text-shadow': r'none|({single-text-shadow}{w},{w})*{single-text-shadow}',
  87. # http://www.w3.org/TR/css3-background/#the-box-shadow
  88. # (This description doesn't support multiple shadows)
  89. 'box-shadow': 'none|(?:({box-shadow-pos}\s+)?{color}|({color}\s+?){box-shadow-pos})',
  90. }
  91. def _build_regex_prefix(prefixes):
  92. return re.compile("|".join("^-"+p+"-" for p in prefixes))
  93. prefix_regex = _build_regex_prefix(browser_prefixes)
  94. def _expand_macros(tokdict,macrodict):
  95. """ Expand macros in token dictionary """
  96. def macro_value(m):
  97. return '(?:%s)' % macrodict[m.groupdict()['macro']]
  98. for key, value in tokdict.items():
  99. while re.search(r'{[a-z][a-z0-9-]*}', value):
  100. value = re.sub(r'{(?P<macro>[a-z][a-z0-9-]*)}',
  101. macro_value, value)
  102. tokdict[key] = value
  103. return tokdict
  104. def _compile_regexes(tokdict):
  105. """ Compile all regular expressions into callable objects """
  106. for key, value in tokdict.items():
  107. tokdict[key] = re.compile('\A(?:%s)\Z' % value, re.I).match
  108. return tokdict
  109. _compile_regexes(_expand_macros(custom_values,custom_macros))
  110. class ValidationReport(object):
  111. def __init__(self, original_text=''):
  112. self.errors = []
  113. self.original_text = original_text.split('\n') if original_text else ''
  114. def __str__(self):
  115. "only for debugging"
  116. return "Report:\n" + '\n'.join(['\t' + str(x) for x in self.errors])
  117. def append(self,error):
  118. if hasattr(error,'line'):
  119. error.offending_line = self.original_text[error.line-1]
  120. self.errors.append(error)
  121. class ValidationError(Exception):
  122. def __init__(self, message, obj = None, line = None):
  123. self.message = message
  124. if obj is not None:
  125. self.obj = obj
  126. # self.offending_line is the text of the actual line that
  127. # caused the problem; it's set by the ValidationReport that
  128. # owns this ValidationError
  129. if obj is not None and line is None and hasattr(self.obj,'_linetoken'):
  130. (_type1,_type2,self.line,_char) = obj._linetoken
  131. elif line is not None:
  132. self.line = line
  133. def __cmp__(self, other):
  134. if hasattr(self,'line') and not hasattr(other,'line'):
  135. return -1
  136. elif hasattr(other,'line') and not hasattr(self,'line'):
  137. return 1
  138. else:
  139. return cmp(self.line,other.line)
  140. def __str__(self):
  141. "only for debugging"
  142. line = (("(%d)" % self.line)
  143. if hasattr(self,'line') else '')
  144. obj = str(self.obj) if hasattr(self,'obj') else ''
  145. return "ValidationError%s: %s (%s)" % (line, self.message, obj)
  146. def legacy_s3_url(url, site):
  147. if isinstance(url, int): # legacy url, needs to be generated
  148. bucket = g.s3_old_thumb_bucket
  149. baseurl = "http://%s" % (bucket)
  150. if g.s3_media_direct:
  151. baseurl = "http://%s/%s" % (s3_direct_url, bucket)
  152. url = "%s/%s_%d.png"\
  153. % (baseurl, site._fullname, url)
  154. url = s3_https_if_secure(url)
  155. return url
  156. # local urls should be in the static directory
  157. local_urls = re.compile(r'\A/static/[a-z./-]+\Z')
  158. # substitutable urls will be css-valid labels surrounded by "%%"
  159. custom_img_urls = re.compile(r'%%([a-zA-Z0-9\-]+)%%')
  160. valid_url_schemes = ('http', 'https')
  161. def valid_url(prop,value,report):
  162. """
  163. checks url(...) arguments in CSS, ensuring that the contents are
  164. officially sanctioned. Sanctioned urls include:
  165. * anything in /static/
  166. * image labels %%..%% for images uploaded on /about/stylesheet
  167. * urls with domains in g.allowed_css_linked_domains
  168. """
  169. try:
  170. url = value.getStringValue()
  171. except IndexError:
  172. g.log.error("Problem validating [%r]" % value)
  173. raise
  174. # local urls are allowed
  175. if local_urls.match(url):
  176. t_url = None
  177. while url != t_url:
  178. t_url, url = url, filters.url_unescape(url)
  179. # disallow path trickery
  180. if "../" in url:
  181. report.append(ValidationError(msgs['broken_url']
  182. % dict(brokenurl = value.cssText),
  183. value))
  184. # custom urls are allowed, but need to be transformed into a real path
  185. elif custom_img_urls.match(url):
  186. name = custom_img_urls.match(url).group(1)
  187. # the label -> image number lookup is stored on the subreddit
  188. if c.site.images.has_key(name):
  189. url = c.site.images[name]
  190. url = legacy_s3_url(url, c.site)
  191. value._setCssText("url(%s)"%url)
  192. else:
  193. # unknown image label -> error
  194. report.append(ValidationError(msgs['broken_url']
  195. % dict(brokenurl = value.cssText),
  196. value))
  197. else:
  198. try:
  199. u = urlparse(url)
  200. valid_scheme = u.scheme and u.scheme in valid_url_schemes
  201. valid_domain = strip_www(u.netloc) in g.allowed_css_linked_domains
  202. except ValueError:
  203. u = False
  204. # allowed domains are ok
  205. if not (u and valid_scheme and valid_domain):
  206. report.append(ValidationError(msgs['broken_url']
  207. % dict(brokenurl = value.cssText),
  208. value))
  209. #elif sanitize_url(url) != url:
  210. # report.append(ValidationError(msgs['broken_url']
  211. # % dict(brokenurl = value.cssText),
  212. # value))
  213. def strip_browser_prefix(prop):
  214. t = prefix_regex.split(prop, maxsplit=1)
  215. return t[len(t) - 1]
  216. def valid_value(prop,value,report):
  217. prop_name = strip_browser_prefix(prop.name) # Remove browser-specific prefixes eg: -moz-border-radius becomes border-radius
  218. if not (value.valid and value.wellformed):
  219. if (value.wellformed
  220. and prop_name in cssproperties.cssvalues
  221. and cssproperties.cssvalues[prop_name](prop.value)):
  222. # it's actually valid. cssutils bug.
  223. pass
  224. elif (not value.valid
  225. and value.wellformed
  226. and prop_name in custom_values
  227. and custom_values[prop_name](prop.value)):
  228. # we're allowing it via our own custom validator
  229. value.valid = True
  230. # see if this suddenly validates the entire property
  231. prop.valid = True
  232. prop.cssValue.valid = True
  233. if prop.cssValue.cssValueType == CSSValue.CSS_VALUE_LIST:
  234. for i in range(prop.cssValue.length):
  235. if not prop.cssValue.item(i).valid:
  236. prop.cssValue.valid = False
  237. prop.valid = False
  238. break
  239. elif not (prop_name in cssproperties.cssvalues or prop_name in custom_values):
  240. error = (msgs['invalid_property']
  241. % dict(cssprop = prop.name))
  242. report.append(ValidationError(error,value))
  243. else:
  244. error = (msgs['invalid_val_for_prop']
  245. % dict(cssvalue = value.cssText,
  246. cssprop = prop.name))
  247. report.append(ValidationError(error,value))
  248. if value.primitiveType == CSSPrimitiveValue.CSS_URI:
  249. valid_url(prop,value,report)
  250. error_message_extract_re = re.compile('.*\\[([0-9]+):[0-9]*:.*\\]\Z')
  251. only_whitespace = re.compile('\A\s*\Z')
  252. def validate_css(string):
  253. p = CSSParser(raiseExceptions = True)
  254. if not string or only_whitespace.match(string):
  255. return ('',ValidationReport())
  256. report = ValidationReport(string)
  257. # avoid a very expensive parse
  258. max_size_kb = 100;
  259. if len(string) > max_size_kb * 1024:
  260. report.append(ValidationError((msgs['too_big']
  261. % dict (max_size = max_size_kb))))
  262. return ('', report)
  263. if '\\' in string:
  264. report.append(ValidationError(_("if you need backslashes, you're doing it wrong")))
  265. try:
  266. parsed = p.parseString(string)
  267. except DOMException,e:
  268. # yuck; xml.dom.DOMException can't give us line-information
  269. # directly, so we have to parse its error message string to
  270. # get it
  271. line = None
  272. line_match = error_message_extract_re.match(e.message)
  273. if line_match:
  274. line = line_match.group(1)
  275. if line:
  276. line = int(line)
  277. error_message= (msgs['syntax_error']
  278. % dict(syntaxerror = e.message))
  279. report.append(ValidationError(error_message,e,line))
  280. return (None,report)
  281. for rule in parsed.cssRules:
  282. if rule.type == CSSStyleRule.IMPORT_RULE:
  283. report.append(ValidationError(msgs['no_imports'],rule))
  284. elif rule.type == CSSStyleRule.COMMENT:
  285. pass
  286. elif rule.type == CSSStyleRule.STYLE_RULE:
  287. style = rule.style
  288. for prop in style.getProperties():
  289. if prop.cssValue.cssValueType == CSSValue.CSS_VALUE_LIST:
  290. for i in range(prop.cssValue.length):
  291. valid_value(prop,prop.cssValue.item(i),report)
  292. if not (prop.cssValue.valid and prop.cssValue.wellformed):
  293. report.append(ValidationError(msgs['invalid_property_list']
  294. % dict(proplist = prop.cssText),
  295. prop.cssValue))
  296. elif prop.cssValue.cssValueType == CSSValue.CSS_PRIMITIVE_VALUE:
  297. valid_value(prop,prop.cssValue,report)
  298. # cssutils bug: because valid values might be marked
  299. # as invalid, we can't trust cssutils to properly
  300. # label valid properties, so we're going to rely on
  301. # the value validation (which will fail if the
  302. # property is invalid anyway). If this bug is fixed,
  303. # we should uncomment this 'if'
  304. # a property is not valid if any of its values are
  305. # invalid, or if it is itself invalid. To get the
  306. # best-quality error messages, we only report on
  307. # whether the property is valid after we've checked
  308. # the values
  309. #if not (prop.valid and prop.wellformed):
  310. # report.append(ValidationError(_('invalid property'),prop))
  311. else:
  312. report.append(ValidationError(msgs['unknown_rule_type']
  313. % dict(ruletype = rule.cssText),
  314. rule))
  315. return parsed,report
  316. def find_preview_comments(sr):
  317. if g.use_query_cache:
  318. from r2.lib.db.queries import get_sr_comments, get_all_comments
  319. comments = get_sr_comments(c.site)
  320. comments = list(comments)
  321. if not comments:
  322. comments = get_all_comments()
  323. comments = list(comments)
  324. return Thing._by_fullname(comments[:25], data=True, return_dict=False)
  325. else:
  326. comments = Comment._query(Comment.c.sr_id == c.site._id,
  327. limit=25, data=True)
  328. comments = list(comments)
  329. if not comments:
  330. comments = Comment._query(limit=25, data=True)
  331. comments = list(comments)
  332. return comments
  333. def find_preview_links(sr):
  334. from r2.lib.normalized_hot import get_hot
  335. # try to find a link to use, otherwise give up and return
  336. links = get_hot([c.site])
  337. if not links:
  338. links = get_hot(Subreddit.default_subreddits(ids=False))
  339. if links:
  340. links = links[:25]
  341. links = Link._by_fullname(links, data=True, return_dict=False)
  342. return links
  343. def rendered_link(links, media, compress):
  344. with c.user.safe_set_attr:
  345. c.user.pref_compress = compress
  346. c.user.pref_media = media
  347. links = wrap_links(links, show_nums = True, num = 1)
  348. delattr(c.user, 'pref_compress')
  349. delattr(c.user, 'pref_media')
  350. return links.render(style = "html")
  351. def rendered_comment(comments):
  352. return wrap_links(comments, num = 1).render(style = "html")
  353. class BadImage(Exception):
  354. def __init__(self, error = None):
  355. self.error = error
  356. def save_sr_image(sr, data, suffix = '.png'):
  357. try:
  358. return upload_media(data, file_type = suffix)
  359. except Exception as e:
  360. raise BadImage(e)