PageRenderTime 52ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/pyconau/lib/xhtml2pdf/w3c/cssParser.py

https://bitbucket.org/hexdump42/pypy-benchmarks
Python | 1077 lines | 994 code | 20 blank | 63 comment | 14 complexity | ba893dbfd4458173de7d1a1966eb1a5d MD5 | raw file
Possible License(s): Apache-2.0, GPL-2.0, BSD-3-Clause
  1. #!/usr/bin/env python
  2. ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  3. ##~ Copyright (C) 2002-2004 TechGame Networks, LLC.
  4. ##~
  5. ##~ This library is free software; you can redistribute it and/or
  6. ##~ modify it under the terms of the BSD style License as found in the
  7. ##~ LICENSE file included with this distribution.
  8. ##
  9. ## Modified by Dirk Holtwick <holtwick@web.de>, 2007-2008
  10. ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  11. """CSS-2.1 parser.
  12. The CSS 2.1 Specification this parser was derived from can be found at http://www.w3.org/TR/CSS21/
  13. Primary Classes:
  14. * CSSParser
  15. Parses CSS source forms into results using a Builder Pattern. Must
  16. provide concrete implemenation of CSSBuilderAbstract.
  17. * CSSBuilderAbstract
  18. Outlines the interface between CSSParser and it's rule-builder.
  19. Compose CSSParser with a concrete implementation of the builder to get
  20. usable results from the CSS parser.
  21. Dependencies:
  22. python 2.3 (or greater)
  23. re
  24. """
  25. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  26. #~ Imports
  27. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  28. import re
  29. import cssSpecial
  30. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  31. #~ Definitions
  32. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  33. def isAtRuleIdent(src, ident):
  34. return re.match(r'^@' + ident + r'\s*', src)
  35. def stripAtRuleIdent(src):
  36. return re.sub(r'^@[a-z\-]+\s*', '', src)
  37. class CSSSelectorAbstract(object):
  38. """Outlines the interface between CSSParser and it's rule-builder for selectors.
  39. CSSBuilderAbstract.selector and CSSBuilderAbstract.combineSelectors must
  40. return concrete implementations of this abstract.
  41. See css.CSSMutableSelector for an example implementation.
  42. """
  43. def addHashId(self, hashId):
  44. raise NotImplementedError('Subclass responsibility')
  45. def addClass(self, class_):
  46. raise NotImplementedError('Subclass responsibility')
  47. def addAttribute(self, attrName):
  48. raise NotImplementedError('Subclass responsibility')
  49. def addAttributeOperation(self, attrName, op, attrValue):
  50. raise NotImplementedError('Subclass responsibility')
  51. def addPseudo(self, name):
  52. raise NotImplementedError('Subclass responsibility')
  53. def addPseudoFunction(self, name, value):
  54. raise NotImplementedError('Subclass responsibility')
  55. class CSSBuilderAbstract(object):
  56. """Outlines the interface between CSSParser and it's rule-builder. Compose
  57. CSSParser with a concrete implementation of the builder to get usable
  58. results from the CSS parser.
  59. See css.CSSBuilder for an example implementation
  60. """
  61. def setCharset(self, charset):
  62. raise NotImplementedError('Subclass responsibility')
  63. #~ css results ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  64. def beginStylesheet(self):
  65. raise NotImplementedError('Subclass responsibility')
  66. def stylesheet(self, elements):
  67. raise NotImplementedError('Subclass responsibility')
  68. def endStylesheet(self):
  69. raise NotImplementedError('Subclass responsibility')
  70. def beginInline(self):
  71. raise NotImplementedError('Subclass responsibility')
  72. def inline(self, declarations):
  73. raise NotImplementedError('Subclass responsibility')
  74. def endInline(self):
  75. raise NotImplementedError('Subclass responsibility')
  76. def ruleset(self, selectors, declarations):
  77. raise NotImplementedError('Subclass responsibility')
  78. #~ css namespaces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  79. def resolveNamespacePrefix(self, nsPrefix, name):
  80. raise NotImplementedError('Subclass responsibility')
  81. #~ css @ directives ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  82. def atCharset(self, charset):
  83. raise NotImplementedError('Subclass responsibility')
  84. def atImport(self, import_, mediums, cssParser):
  85. raise NotImplementedError('Subclass responsibility')
  86. def atNamespace(self, nsPrefix, uri):
  87. raise NotImplementedError('Subclass responsibility')
  88. def atMedia(self, mediums, ruleset):
  89. raise NotImplementedError('Subclass responsibility')
  90. def atPage(self, page, pseudopage, declarations):
  91. raise NotImplementedError('Subclass responsibility')
  92. def atFontFace(self, declarations):
  93. raise NotImplementedError('Subclass responsibility')
  94. def atIdent(self, atIdent, cssParser, src):
  95. return src, NotImplemented
  96. #~ css selectors ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  97. def combineSelectors(self, selectorA, combiner, selectorB):
  98. """Return value must implement CSSSelectorAbstract"""
  99. raise NotImplementedError('Subclass responsibility')
  100. def selector(self, name):
  101. """Return value must implement CSSSelectorAbstract"""
  102. raise NotImplementedError('Subclass responsibility')
  103. #~ css declarations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  104. def property(self, name, value, important=False):
  105. raise NotImplementedError('Subclass responsibility')
  106. def combineTerms(self, termA, combiner, termB):
  107. raise NotImplementedError('Subclass responsibility')
  108. def termIdent(self, value):
  109. raise NotImplementedError('Subclass responsibility')
  110. def termNumber(self, value, units=None):
  111. raise NotImplementedError('Subclass responsibility')
  112. def termRGB(self, value):
  113. raise NotImplementedError('Subclass responsibility')
  114. def termURI(self, value):
  115. raise NotImplementedError('Subclass responsibility')
  116. def termString(self, value):
  117. raise NotImplementedError('Subclass responsibility')
  118. def termUnicodeRange(self, value):
  119. raise NotImplementedError('Subclass responsibility')
  120. def termFunction(self, name, value):
  121. raise NotImplementedError('Subclass responsibility')
  122. def termUnknown(self, src):
  123. raise NotImplementedError('Subclass responsibility')
  124. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  125. #~ CSS Parser
  126. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  127. class CSSParseError(Exception):
  128. src = None
  129. ctxsrc = None
  130. fullsrc = None
  131. inline = False
  132. srcCtxIdx = None
  133. srcFullIdx = None
  134. ctxsrcFullIdx = None
  135. def __init__(self, msg, src, ctxsrc=None):
  136. Exception.__init__(self, msg)
  137. self.src = src
  138. self.ctxsrc = ctxsrc or src
  139. if self.ctxsrc:
  140. self.srcCtxIdx = self.ctxsrc.find(self.src)
  141. if self.srcCtxIdx < 0:
  142. del self.srcCtxIdx
  143. def __str__(self):
  144. if self.ctxsrc:
  145. return Exception.__str__(self) + ':: (' + repr(self.ctxsrc[:self.srcCtxIdx]) + ', ' + repr(self.ctxsrc[self.srcCtxIdx:self.srcCtxIdx+20]) + ')'
  146. else:
  147. return Exception.__str__(self) + ':: ' + repr(self.src[:40])
  148. def setFullCSSSource(self, fullsrc, inline=False):
  149. self.fullsrc = fullsrc
  150. if inline:
  151. self.inline = inline
  152. if self.fullsrc:
  153. self.srcFullIdx = self.fullsrc.find(self.src)
  154. if self.srcFullIdx < 0:
  155. del self.srcFullIdx
  156. self.ctxsrcFullIdx = self.fullsrc.find(self.ctxsrc)
  157. if self.ctxsrcFullIdx < 0:
  158. del self.ctxsrcFullIdx
  159. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  160. class CSSParser(object):
  161. """CSS-2.1 parser dependent only upon the re module.
  162. Implemented directly from http://www.w3.org/TR/CSS21/grammar.html
  163. Tested with some existing CSS stylesheets for portability.
  164. CSS Parsing API:
  165. * setCSSBuilder()
  166. To set your concrete implementation of CSSBuilderAbstract
  167. * parseFile()
  168. Use to parse external stylesheets using a file-like object
  169. >>> cssFile = open('test.css', 'r')
  170. >>> stylesheets = myCSSParser.parseFile(cssFile)
  171. * parse()
  172. Use to parse embedded stylesheets using source string
  173. >>> cssSrc = '''
  174. body,body.body {
  175. font: 110%, "Times New Roman", Arial, Verdana, Helvetica, serif;
  176. background: White;
  177. color: Black;
  178. }
  179. a {text-decoration: underline;}
  180. '''
  181. >>> stylesheets = myCSSParser.parse(cssSrc)
  182. * parseInline()
  183. Use to parse inline stylesheets using attribute source string
  184. >>> style = 'font: 110%, "Times New Roman", Arial, Verdana, Helvetica, serif; background: White; color: Black'
  185. >>> stylesheets = myCSSParser.parseInline(style)
  186. * parseAttributes()
  187. Use to parse attribute string values into inline stylesheets
  188. >>> stylesheets = myCSSParser.parseAttributes(
  189. font='110%, "Times New Roman", Arial, Verdana, Helvetica, serif',
  190. background='White',
  191. color='Black')
  192. * parseSingleAttr()
  193. Use to parse a single string value into a CSS expression
  194. >>> fontValue = myCSSParser.parseSingleAttr('110%, "Times New Roman", Arial, Verdana, Helvetica, serif')
  195. """
  196. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  197. #~ Constants / Variables / Etc.
  198. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  199. ParseError = CSSParseError
  200. AttributeOperators = ['=', '~=', '|=', '&=', '^=', '!=', '<>']
  201. SelectorQualifiers = ('#', '.', '[', ':')
  202. SelectorCombiners = ['+', '>']
  203. ExpressionOperators = ('/', '+', ',')
  204. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  205. #~ Regular expressions
  206. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  207. if True: # makes the following code foldable
  208. _orRule = lambda *args: '|'.join(args)
  209. _reflags = re.I | re.M | re.U
  210. i_hex = '[0-9a-fA-F]'
  211. i_nonascii = u'[\200-\377]'
  212. i_unicode = '\\\\(?:%s){1,6}\s?' % i_hex
  213. i_escape = _orRule(i_unicode, u'\\\\[ -~\200-\377]')
  214. # i_nmstart = _orRule('[A-Za-z_]', i_nonascii, i_escape)
  215. i_nmstart = _orRule('\-[^0-9]|[A-Za-z_]', i_nonascii, i_escape) # XXX Added hyphen, http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
  216. i_nmchar = _orRule('[-0-9A-Za-z_]', i_nonascii, i_escape)
  217. i_ident = '((?:%s)(?:%s)*)' % (i_nmstart,i_nmchar)
  218. re_ident = re.compile(i_ident, _reflags)
  219. i_element_name = '((?:%s)|\*)' % (i_ident[1:-1],)
  220. re_element_name = re.compile(i_element_name, _reflags)
  221. i_namespace_selector = '((?:%s)|\*|)\|(?!=)' % (i_ident[1:-1],)
  222. re_namespace_selector = re.compile(i_namespace_selector, _reflags)
  223. i_class = '\\.' + i_ident
  224. re_class = re.compile(i_class, _reflags)
  225. i_hash = '#((?:%s)+)' % i_nmchar
  226. re_hash = re.compile(i_hash, _reflags)
  227. i_rgbcolor = '(#%s{6}|#%s{3})' % (i_hex, i_hex)
  228. re_rgbcolor = re.compile(i_rgbcolor, _reflags)
  229. i_nl = u'\n|\r\n|\r|\f'
  230. i_escape_nl = u'\\\\(?:%s)' % i_nl
  231. i_string_content = _orRule(u'[\t !#$%&(-~]', i_escape_nl, i_nonascii, i_escape)
  232. i_string1 = u'\"((?:%s|\')*)\"' % i_string_content
  233. i_string2 = u'\'((?:%s|\")*)\'' % i_string_content
  234. i_string = _orRule(i_string1, i_string2)
  235. re_string = re.compile(i_string, _reflags)
  236. i_uri = (u'url\\(\s*(?:(?:%s)|((?:%s)+))\s*\\)'
  237. % (i_string, _orRule('[!#$%&*-~]', i_nonascii, i_escape)))
  238. # XXX For now
  239. # i_uri = u'(url\\(.*?\\))'
  240. re_uri = re.compile(i_uri, _reflags)
  241. i_num = u'(([-+]?[0-9]+(?:\\.[0-9]+)?)|([-+]?\\.[0-9]+))' # XXX Added out paranthesis, because e.g. .5em was not parsed correctly
  242. re_num = re.compile(i_num, _reflags)
  243. i_unit = '(%%|%s)?' % i_ident
  244. re_unit = re.compile(i_unit, _reflags)
  245. i_function = i_ident + '\\('
  246. re_function = re.compile(i_function, _reflags)
  247. i_functionterm = u'[-+]?' + i_function
  248. re_functionterm = re.compile(i_functionterm, _reflags)
  249. i_unicoderange1 = "(?:U\\+%s{1,6}-%s{1,6})" % (i_hex, i_hex)
  250. i_unicoderange2 = "(?:U\\+\?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}(\?{0,3}|{h}(\?{0,2}|{h}(\??|{h}))))))"
  251. i_unicoderange = i_unicoderange1 # u'(%s|%s)' % (i_unicoderange1, i_unicoderange2)
  252. re_unicoderange = re.compile(i_unicoderange, _reflags)
  253. # i_comment = u'(?:\/\*[^*]*\*+([^/*][^*]*\*+)*\/)|(?://.*)'
  254. # gabriel: only C convention for comments is allowed in CSS
  255. i_comment = u'(?:\/\*[^*]*\*+([^/*][^*]*\*+)*\/)'
  256. re_comment = re.compile(i_comment, _reflags)
  257. i_important = u'!\s*(important)'
  258. re_important = re.compile(i_important, _reflags)
  259. del _orRule
  260. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  261. #~ Public
  262. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  263. def __init__(self, cssBuilder=None):
  264. self.setCSSBuilder(cssBuilder)
  265. #~ CSS Builder to delegate to ~~~~~~~~~~~~~~~~~~~~~~~~
  266. def getCSSBuilder(self):
  267. """A concrete instance implementing CSSBuilderAbstract"""
  268. return self._cssBuilder
  269. def setCSSBuilder(self, cssBuilder):
  270. """A concrete instance implementing CSSBuilderAbstract"""
  271. self._cssBuilder = cssBuilder
  272. cssBuilder = property(getCSSBuilder, setCSSBuilder)
  273. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  274. #~ Public CSS Parsing API
  275. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  276. def parseFile(self, srcFile, closeFile=False):
  277. """Parses CSS file-like objects using the current cssBuilder.
  278. Use for external stylesheets."""
  279. try:
  280. result = self.parse(srcFile.read())
  281. finally:
  282. if closeFile:
  283. srcFile.close()
  284. return result
  285. def parse(self, src):
  286. """Parses CSS string source using the current cssBuilder.
  287. Use for embedded stylesheets."""
  288. self.cssBuilder.beginStylesheet()
  289. try:
  290. # XXX Some simple preprocessing
  291. src = cssSpecial.cleanupCSS(src)
  292. try:
  293. src, stylesheet = self._parseStylesheet(src)
  294. except self.ParseError, err:
  295. err.setFullCSSSource(src)
  296. raise
  297. finally:
  298. self.cssBuilder.endStylesheet()
  299. return stylesheet
  300. def parseInline(self, src):
  301. """Parses CSS inline source string using the current cssBuilder.
  302. Use to parse a tag's 'sytle'-like attribute."""
  303. self.cssBuilder.beginInline()
  304. try:
  305. try:
  306. src, properties = self._parseDeclarationGroup(src.strip(), braces=False)
  307. except self.ParseError, err:
  308. err.setFullCSSSource(src, inline=True)
  309. raise
  310. result = self.cssBuilder.inline(properties)
  311. finally:
  312. self.cssBuilder.endInline()
  313. return result
  314. def parseAttributes(self, attributes={}, **kwAttributes):
  315. """Parses CSS attribute source strings, and return as an inline stylesheet.
  316. Use to parse a tag's highly CSS-based attributes like 'font'.
  317. See also: parseSingleAttr
  318. """
  319. if attributes:
  320. kwAttributes.update(attributes)
  321. self.cssBuilder.beginInline()
  322. try:
  323. properties = []
  324. try:
  325. for propertyName, src in kwAttributes.iteritems():
  326. src, property = self._parseDeclarationProperty(src.strip(), propertyName)
  327. properties.append(property)
  328. except self.ParseError, err:
  329. err.setFullCSSSource(src, inline=True)
  330. raise
  331. result = self.cssBuilder.inline(properties)
  332. finally:
  333. self.cssBuilder.endInline()
  334. return result
  335. def parseSingleAttr(self, attrValue):
  336. """Parse a single CSS attribute source string, and returns the built CSS expression.
  337. Use to parse a tag's highly CSS-based attributes like 'font'.
  338. See also: parseAttributes
  339. """
  340. results = self.parseAttributes(temp=attrValue)
  341. if 'temp' in results[1]:
  342. return results[1]['temp']
  343. else:
  344. return results[0]['temp']
  345. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  346. #~ Internal _parse methods
  347. #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  348. def _parseStylesheet(self, src):
  349. """stylesheet
  350. : [ CHARSET_SYM S* STRING S* ';' ]?
  351. [S|CDO|CDC]* [ import [S|CDO|CDC]* ]*
  352. [ [ ruleset | media | page | font_face ] [S|CDO|CDC]* ]*
  353. ;
  354. """
  355. # Get rid of the comments
  356. src = self.re_comment.sub(u'', src)
  357. # [ CHARSET_SYM S* STRING S* ';' ]?
  358. src = self._parseAtCharset(src)
  359. # [S|CDO|CDC]*
  360. src = self._parseSCDOCDC(src)
  361. # [ import [S|CDO|CDC]* ]*
  362. src, stylesheetImports = self._parseAtImports(src)
  363. # [ namespace [S|CDO|CDC]* ]*
  364. src = self._parseAtNamespace(src)
  365. stylesheetElements = []
  366. # [ [ ruleset | atkeywords ] [S|CDO|CDC]* ]*
  367. while src: # due to ending with ]*
  368. if src.startswith('@'):
  369. # @media, @page, @font-face
  370. src, atResults = self._parseAtKeyword(src)
  371. if atResults is not None:
  372. stylesheetElements.extend(atResults)
  373. else:
  374. # ruleset
  375. src, ruleset = self._parseRuleset(src)
  376. stylesheetElements.append(ruleset)
  377. # [S|CDO|CDC]*
  378. src = self._parseSCDOCDC(src)
  379. stylesheet = self.cssBuilder.stylesheet(stylesheetElements, stylesheetImports)
  380. return src, stylesheet
  381. def _parseSCDOCDC(self, src):
  382. """[S|CDO|CDC]*"""
  383. while 1:
  384. src = src.lstrip()
  385. if src.startswith('<!--'):
  386. src = src[4:]
  387. elif src.startswith('-->'):
  388. src = src[3:]
  389. else:
  390. break
  391. return src
  392. #~ CSS @ directives ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  393. def _parseAtCharset(self, src):
  394. """[ CHARSET_SYM S* STRING S* ';' ]?"""
  395. if isAtRuleIdent(src, 'charset'):
  396. src = stripAtRuleIdent(src)
  397. charset, src = self._getString(src)
  398. src = src.lstrip()
  399. if src[:1] != ';':
  400. raise self.ParseError('@charset expected a terminating \';\'', src, ctxsrc)
  401. src = src[1:].lstrip()
  402. self.cssBuilder.atCharset(charset)
  403. return src
  404. def _parseAtImports(self, src):
  405. """[ import [S|CDO|CDC]* ]*"""
  406. result = []
  407. while isAtRuleIdent(src, 'import'):
  408. ctxsrc = src
  409. src = stripAtRuleIdent(src)
  410. import_, src = self._getStringOrURI(src)
  411. if import_ is None:
  412. raise self.ParseError('Import expecting string or url', src, ctxsrc)
  413. mediums = []
  414. medium, src = self._getIdent(src.lstrip())
  415. while medium is not None:
  416. mediums.append(medium)
  417. if src[:1] == ',':
  418. src = src[1:].lstrip()
  419. medium, src = self._getIdent(src)
  420. else:
  421. break
  422. # XXX No medium inherits and then "all" is appropriate
  423. if not mediums:
  424. mediums = ["all"]
  425. if src[:1] != ';':
  426. raise self.ParseError('@import expected a terminating \';\'', src, ctxsrc)
  427. src = src[1:].lstrip()
  428. stylesheet = self.cssBuilder.atImport(import_, mediums, self)
  429. if stylesheet is not None:
  430. result.append(stylesheet)
  431. src = self._parseSCDOCDC(src)
  432. return src, result
  433. def _parseAtNamespace(self, src):
  434. """namespace :
  435. @namespace S* [IDENT S*]? [STRING|URI] S* ';' S*
  436. """
  437. src = self._parseSCDOCDC(src)
  438. while isAtRuleIdent(src, 'namespace'):
  439. ctxsrc = src
  440. src = stripAtRuleIdent(src)
  441. namespace, src = self._getStringOrURI(src)
  442. if namespace is None:
  443. nsPrefix, src = self._getIdent(src)
  444. if nsPrefix is None:
  445. raise self.ParseError('@namespace expected an identifier or a URI', src, ctxsrc)
  446. namespace, src = self._getStringOrURI(src.lstrip())
  447. if namespace is None:
  448. raise self.ParseError('@namespace expected a URI', src, ctxsrc)
  449. else:
  450. nsPrefix = None
  451. src = src.lstrip()
  452. if src[:1] != ';':
  453. raise self.ParseError('@namespace expected a terminating \';\'', src, ctxsrc)
  454. src = src[1:].lstrip()
  455. self.cssBuilder.atNamespace(nsPrefix, namespace)
  456. src = self._parseSCDOCDC(src)
  457. return src
  458. def _parseAtKeyword(self, src):
  459. """[media | page | font_face | unknown_keyword]"""
  460. ctxsrc = src
  461. if isAtRuleIdent(src, 'media'):
  462. src, result = self._parseAtMedia(src)
  463. elif isAtRuleIdent(src, 'page'):
  464. src, result = self._parseAtPage(src)
  465. elif isAtRuleIdent(src, 'font-face'):
  466. src, result = self._parseAtFontFace(src)
  467. # XXX added @import, was missing!
  468. elif isAtRuleIdent(src, 'import'):
  469. src, result = self._parseAtImports(src)
  470. elif isAtRuleIdent(src, 'frame'):
  471. src, result = self._parseAtFrame(src)
  472. elif src.startswith('@'):
  473. src, result = self._parseAtIdent(src)
  474. else:
  475. raise self.ParseError('Unknown state in atKeyword', src, ctxsrc)
  476. return src, result
  477. def _parseAtMedia(self, src):
  478. """media
  479. : MEDIA_SYM S* medium [ ',' S* medium ]* '{' S* ruleset* '}' S*
  480. ;
  481. """
  482. ctxsrc = src
  483. src = src[len('@media '):].lstrip()
  484. mediums = []
  485. while src and src[0] != '{':
  486. medium, src = self._getIdent(src)
  487. if medium is None:
  488. raise self.ParseError('@media rule expected media identifier', src, ctxsrc)
  489. mediums.append(medium)
  490. if src[0] == ',':
  491. src = src[1:].lstrip()
  492. else:
  493. src = src.lstrip()
  494. if not src.startswith('{'):
  495. raise self.ParseError('Ruleset opening \'{\' not found', src, ctxsrc)
  496. src = src[1:].lstrip()
  497. stylesheetElements = []
  498. #while src and not src.startswith('}'):
  499. # src, ruleset = self._parseRuleset(src)
  500. # stylesheetElements.append(ruleset)
  501. # src = src.lstrip()
  502. # Containing @ where not found and parsed
  503. while src and not src.startswith('}'):
  504. if src.startswith('@'):
  505. # @media, @page, @font-face
  506. src, atResults = self._parseAtKeyword(src)
  507. if atResults is not None:
  508. stylesheetElements.extend(atResults)
  509. else:
  510. # ruleset
  511. src, ruleset = self._parseRuleset(src)
  512. stylesheetElements.append(ruleset)
  513. src = src.lstrip()
  514. if not src.startswith('}'):
  515. raise self.ParseError('Ruleset closing \'}\' not found', src, ctxsrc)
  516. else:
  517. src = src[1:].lstrip()
  518. result = self.cssBuilder.atMedia(mediums, stylesheetElements)
  519. return src, result
  520. def _parseAtPage(self, src):
  521. """page
  522. : PAGE_SYM S* IDENT? pseudo_page? S*
  523. '{' S* declaration [ ';' S* declaration ]* '}' S*
  524. ;
  525. """
  526. ctxsrc = src
  527. src = src[len('@page '):].lstrip()
  528. page, src = self._getIdent(src)
  529. if src[:1] == ':':
  530. pseudopage, src = self._getIdent(src[1:])
  531. page = page + '_' + pseudopage
  532. else:
  533. pseudopage = None
  534. #src, properties = self._parseDeclarationGroup(src.lstrip())
  535. # Containing @ where not found and parsed
  536. stylesheetElements = []
  537. src = src.lstrip()
  538. properties = []
  539. # XXX Extended for PDF use
  540. if not src.startswith('{'):
  541. raise self.ParseError('Ruleset opening \'{\' not found', src, ctxsrc)
  542. else:
  543. src = src[1:].lstrip()
  544. while src and not src.startswith('}'):
  545. if src.startswith('@'):
  546. # @media, @page, @font-face
  547. src, atResults = self._parseAtKeyword(src)
  548. if atResults is not None:
  549. stylesheetElements.extend(atResults)
  550. else:
  551. src, nproperties = self._parseDeclarationGroup(src.lstrip(), braces=False)
  552. properties += nproperties
  553. src = src.lstrip()
  554. result = [self.cssBuilder.atPage(page, pseudopage, properties)]
  555. return src[1:].lstrip(), result
  556. def _parseAtFrame(self, src):
  557. """
  558. XXX Proprietary for PDF
  559. """
  560. ctxsrc = src
  561. src = src[len('@frame '):].lstrip()
  562. box, src = self._getIdent(src)
  563. src, properties = self._parseDeclarationGroup(src.lstrip())
  564. result = [self.cssBuilder.atFrame(box, properties)]
  565. return src.lstrip(), result
  566. def _parseAtFontFace(self, src):
  567. ctxsrc = src
  568. src = src[len('@font-face '):].lstrip()
  569. src, properties = self._parseDeclarationGroup(src)
  570. result = [self.cssBuilder.atFontFace(properties)]
  571. return src, result
  572. def _parseAtIdent(self, src):
  573. ctxsrc = src
  574. atIdent, src = self._getIdent(src[1:])
  575. if atIdent is None:
  576. raise self.ParseError('At-rule expected an identifier for the rule', src, ctxsrc)
  577. src, result = self.cssBuilder.atIdent(atIdent, self, src)
  578. if result is NotImplemented:
  579. # An at-rule consists of everything up to and including the next semicolon (;) or the next block, whichever comes first
  580. semiIdx = src.find(';')
  581. if semiIdx < 0:
  582. semiIdx = None
  583. blockIdx = src[:semiIdx].find('{')
  584. if blockIdx < 0:
  585. blockIdx = None
  586. if semiIdx is not None and semiIdx < blockIdx:
  587. src = src[semiIdx+1:].lstrip()
  588. elif blockIdx is None:
  589. # consume the rest of the content since we didn't find a block or a semicolon
  590. src = src[-1:-1]
  591. elif blockIdx is not None:
  592. # expecing a block...
  593. src = src[blockIdx:]
  594. try:
  595. # try to parse it as a declarations block
  596. src, declarations = self._parseDeclarationGroup(src)
  597. except self.ParseError:
  598. # try to parse it as a stylesheet block
  599. src, stylesheet = self._parseStylesheet(src)
  600. else:
  601. raise self.ParserError('Unable to ignore @-rule block', src, ctxsrc)
  602. return src.lstrip(), result
  603. #~ ruleset - see selector and declaration groups ~~~~
  604. def _parseRuleset(self, src):
  605. """ruleset
  606. : selector [ ',' S* selector ]*
  607. '{' S* declaration [ ';' S* declaration ]* '}' S*
  608. ;
  609. """
  610. src, selectors = self._parseSelectorGroup(src)
  611. src, properties = self._parseDeclarationGroup(src.lstrip())
  612. result = self.cssBuilder.ruleset(selectors, properties)
  613. return src, result
  614. #~ selector parsing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  615. def _parseSelectorGroup(self, src):
  616. selectors = []
  617. while src[:1] not in ('{','}', ']','(',')', ';', ''):
  618. src, selector = self._parseSelector(src)
  619. if selector is None:
  620. break
  621. selectors.append(selector)
  622. if src.startswith(','):
  623. src = src[1:].lstrip()
  624. return src, selectors
  625. def _parseSelector(self, src):
  626. """selector
  627. : simple_selector [ combinator simple_selector ]*
  628. ;
  629. """
  630. src, selector = self._parseSimpleSelector(src)
  631. srcLen = len(src) # XXX
  632. while src[:1] not in ('', ',', ';', '{','}', '[',']','(',')'):
  633. for combiner in self.SelectorCombiners:
  634. if src.startswith(combiner):
  635. src = src[len(combiner):].lstrip()
  636. break
  637. else:
  638. combiner = ' '
  639. src, selectorB = self._parseSimpleSelector(src)
  640. # XXX Fix a bug that occured here e.g. : .1 {...}
  641. if len(src) >= srcLen:
  642. src = src[1:]
  643. while src and (src[:1] not in ('', ',', ';', '{','}', '[',']','(',')')):
  644. src = src[1:]
  645. return src.lstrip(), None
  646. selector = self.cssBuilder.combineSelectors(selector, combiner, selectorB)
  647. return src.lstrip(), selector
  648. def _parseSimpleSelector(self, src):
  649. """simple_selector
  650. : [ namespace_selector ]? element_name? [ HASH | class | attrib | pseudo ]* S*
  651. ;
  652. """
  653. ctxsrc = src.lstrip()
  654. nsPrefix, src = self._getMatchResult(self.re_namespace_selector, src)
  655. name, src = self._getMatchResult(self.re_element_name, src)
  656. if name:
  657. pass # already *successfully* assigned
  658. elif src[:1] in self.SelectorQualifiers:
  659. name = '*'
  660. else:
  661. raise self.ParseError('Selector name or qualifier expected', src, ctxsrc)
  662. name = self.cssBuilder.resolveNamespacePrefix(nsPrefix, name)
  663. selector = self.cssBuilder.selector(name)
  664. while src and src[:1] in self.SelectorQualifiers:
  665. hash_, src = self._getMatchResult(self.re_hash, src)
  666. if hash_ is not None:
  667. selector.addHashId(hash_)
  668. continue
  669. class_, src = self._getMatchResult(self.re_class, src)
  670. if class_ is not None:
  671. selector.addClass(class_)
  672. continue
  673. if src.startswith('['):
  674. src, selector = self._parseSelectorAttribute(src, selector)
  675. elif src.startswith(':'):
  676. src, selector = self._parseSelectorPseudo(src, selector)
  677. else:
  678. break
  679. return src.lstrip(), selector
  680. def _parseSelectorAttribute(self, src, selector):
  681. """attrib
  682. : '[' S* [ namespace_selector ]? IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
  683. [ IDENT | STRING ] S* ]? ']'
  684. ;
  685. """
  686. ctxsrc = src
  687. if not src.startswith('['):
  688. raise self.ParseError('Selector Attribute opening \'[\' not found', src, ctxsrc)
  689. src = src[1:].lstrip()
  690. nsPrefix, src = self._getMatchResult(self.re_namespace_selector, src)
  691. attrName, src = self._getIdent(src)
  692. src=src.lstrip()
  693. if attrName is None:
  694. raise self.ParseError('Expected a selector attribute name', src, ctxsrc)
  695. if nsPrefix is not None:
  696. attrName = self.cssBuilder.resolveNamespacePrefix(nsPrefix, attrName)
  697. for op in self.AttributeOperators:
  698. if src.startswith(op):
  699. break
  700. else:
  701. op = ''
  702. src = src[len(op):].lstrip()
  703. if op:
  704. attrValue, src = self._getIdent(src)
  705. if attrValue is None:
  706. attrValue, src = self._getString(src)
  707. if attrValue is None:
  708. raise self.ParseError('Expected a selector attribute value', src, ctxsrc)
  709. else:
  710. attrValue = None
  711. if not src.startswith(']'):
  712. raise self.ParseError('Selector Attribute closing \']\' not found', src, ctxsrc)
  713. else:
  714. src = src[1:]
  715. if op:
  716. selector.addAttributeOperation(attrName, op, attrValue)
  717. else:
  718. selector.addAttribute(attrName)
  719. return src, selector
  720. def _parseSelectorPseudo(self, src, selector):
  721. """pseudo
  722. : ':' [ IDENT | function ]
  723. ;
  724. """
  725. ctxsrc = src
  726. if not src.startswith(':'):
  727. raise self.ParseError('Selector Pseudo \':\' not found', src, ctxsrc)
  728. src = re.search('^:{1,2}(.*)', src, re.M | re.S).group(1)
  729. name, src = self._getIdent(src)
  730. if not name:
  731. raise self.ParseError('Selector Pseudo identifier not found', src, ctxsrc)
  732. if src.startswith('('):
  733. # function
  734. src = src[1:].lstrip()
  735. src, term = self._parseExpression(src, True)
  736. if not src.startswith(')'):
  737. raise self.ParseError('Selector Pseudo Function closing \')\' not found', src, ctxsrc)
  738. src = src[1:]
  739. selector.addPseudoFunction(name, term)
  740. else:
  741. selector.addPseudo(name)
  742. return src, selector
  743. #~ declaration and expression parsing ~~~~~~~~~~~~~~~
  744. def _parseDeclarationGroup(self, src, braces=True):
  745. ctxsrc = src
  746. if src.startswith('{'):
  747. src, braces = src[1:], True
  748. elif braces:
  749. raise self.ParseError('Declaration group opening \'{\' not found', src, ctxsrc)
  750. properties = []
  751. src = src.lstrip()
  752. while src[:1] not in ('', ',', '{','}', '[',']','(',')','@'): # XXX @?
  753. src, property = self._parseDeclaration(src)
  754. # XXX Workaround for styles like "*font: smaller"
  755. if src.startswith("*"):
  756. src = "-nothing-" + src[1:]
  757. continue
  758. if property is None:
  759. break
  760. properties.append(property)
  761. if src.startswith(';'):
  762. src = src[1:].lstrip()
  763. else:
  764. break
  765. if braces:
  766. if not src.startswith('}'):
  767. raise self.ParseError('Declaration group closing \'}\' not found', src, ctxsrc)
  768. src = src[1:]
  769. return src.lstrip(), properties
  770. def _parseDeclaration(self, src):
  771. """declaration
  772. : ident S* ':' S* expr prio?
  773. | /* empty */
  774. ;
  775. """
  776. # property
  777. propertyName, src = self._getIdent(src)
  778. if propertyName is not None:
  779. src = src.lstrip()
  780. # S* : S*
  781. if src[:1] in (':', '='):
  782. # Note: we are being fairly flexable here... technically, the
  783. # ":" is *required*, but in the name of flexibility we
  784. # suppor a null transition, as well as an "=" transition
  785. src = src[1:].lstrip()
  786. src, property = self._parseDeclarationProperty(src, propertyName)
  787. else:
  788. property = None
  789. return src, property
  790. def _parseDeclarationProperty(self, src, propertyName):
  791. # expr
  792. src, expr = self._parseExpression(src)
  793. # prio?
  794. important, src = self._getMatchResult(self.re_important, src)
  795. src = src.lstrip()
  796. property = self.cssBuilder.property(propertyName, expr, important)
  797. return src, property
  798. def _parseExpression(self, src, returnList=False):
  799. """
  800. expr
  801. : term [ operator term ]*
  802. ;
  803. """
  804. src, term = self._parseExpressionTerm(src)
  805. operator = None
  806. while src[:1] not in ('', ';', '{','}', '[',']', ')'):
  807. for operator in self.ExpressionOperators:
  808. if src.startswith(operator):
  809. src = src[len(operator):]
  810. break
  811. else:
  812. operator = ' '
  813. src, term2 = self._parseExpressionTerm(src.lstrip())
  814. if term2 is NotImplemented:
  815. break
  816. else:
  817. term = self.cssBuilder.combineTerms(term, operator, term2)
  818. if operator is None and returnList:
  819. term = self.cssBuilder.combineTerms(term, None, None)
  820. return src, term
  821. else:
  822. return src, term
  823. def _parseExpressionTerm(self, src):
  824. """term
  825. : unary_operator?
  826. [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* |
  827. TIME S* | FREQ S* | function ]
  828. | STRING S* | IDENT S* | URI S* | RGB S* | UNICODERANGE S* | hexcolor
  829. ;
  830. """
  831. ctxsrc = src
  832. result, src = self._getMatchResult(self.re_num, src)
  833. if result is not None:
  834. units, src = self._getMatchResult(self.re_unit, src)
  835. term = self.cssBuilder.termNumber(result, units)
  836. return src.lstrip(), term
  837. result, src = self._getString(src, self.re_uri)
  838. if result is not None:
  839. # XXX URL!!!!
  840. term = self.cssBuilder.termURI(result)
  841. return src.lstrip(), term
  842. result, src = self._getString(src)
  843. if result is not None:
  844. term = self.cssBuilder.termString(result)
  845. return src.lstrip(), term
  846. result, src = self._getMatchResult(self.re_functionterm, src)
  847. if result is not None:
  848. src, params = self._parseExpression(src, True)
  849. if src[0] != ')':
  850. raise self.ParseError('Terminal function expression expected closing \')\'', src, ctxsrc)
  851. src = src[1:].lstrip()
  852. term = self.cssBuilder.termFunction(result, params)
  853. return src, term
  854. result, src = self._getMatchResult(self.re_rgbcolor, src)
  855. if result is not None:
  856. term = self.cssBuilder.termRGB(result)
  857. return src.lstrip(), term
  858. result, src = self._getMatchResult(self.re_unicoderange, src)
  859. if result is not None:
  860. term = self.cssBuilder.termUnicodeRange(result)
  861. return src.lstrip(), term
  862. nsPrefix, src = self._getMatchResult(self.re_namespace_selector, src)
  863. result, src = self._getIdent(src)
  864. if result is not None:
  865. if nsPrefix is not None:
  866. result = self.cssBuilder.resolveNamespacePrefix(nsPrefix, result)
  867. term = self.cssBuilder.termIdent(result)
  868. return src.lstrip(), term
  869. return self.cssBuilder.termUnknown(src)
  870. #~ utility methods ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  871. def _getIdent(self, src, default=None):
  872. return self._getMatchResult(self.re_ident, src, default)
  873. def _getString(self, src, rexpression=None, default=None):
  874. if rexpression is None:
  875. rexpression = self.re_string
  876. result = rexpression.match(src)
  877. if result:
  878. strres = filter(None, result.groups())
  879. if strres:
  880. strres = strres[0]
  881. else:
  882. strres = ''
  883. return strres, src[result.end():]
  884. else:
  885. return default, src
  886. def _getStringOrURI(self, src):
  887. result, src = self._getString(src, self.re_uri)
  888. if result is None:
  889. result, src = self._getString(src)
  890. return result, src
  891. def _getMatchResult(self, rexpression, src, default=None, group=1):
  892. result = rexpression.match(src)
  893. if result:
  894. return result.group(group), src[result.end():]
  895. else:
  896. return default, src