PageRenderTime 162ms CodeModel.GetById 28ms RepoModel.GetById 2ms app.codeStats 0ms

/SmartObjectFramework/src/rdflib/plugins/parsers/notation3.py

https://github.com/mjkoster/SmartObject
Python | 2427 lines | 2111 code | 176 blank | 140 comment | 224 complexity | 825ecb377cb8c00574f56dbdcc5a06e0 MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. #!/usr/bin/env python
  2. u"""
  3. notation3.py - Standalone Notation3 Parser
  4. Derived from CWM, the Closed World Machine
  5. Authors of the original suite:
  6. * Dan Connolly <@@>
  7. * Tim Berners-Lee <@@>
  8. * Yosi Scharf <@@>
  9. * Joseph M. Reagle Jr. <reagle@w3.org>
  10. * Rich Salz <rsalz@zolera.com>
  11. http://www.w3.org/2000/10/swap/notation3.py
  12. Copyright 2000-2007, World Wide Web Consortium.
  13. Copyright 2001, MIT.
  14. Copyright 2001, Zolera Systems Inc.
  15. License: W3C Software License
  16. http://www.w3.org/Consortium/Legal/copyright-software
  17. Modified by Sean B. Palmer
  18. Copyright 2007, Sean B. Palmer. \u32E1
  19. Modified to work with rdflib by Gunnar Aastrand Grimnes
  20. Copyright 2010, Gunnar A. Grimnes
  21. """
  22. # Python standard libraries
  23. import types
  24. import sys
  25. import os
  26. import re
  27. import StringIO
  28. import codecs
  29. from binascii import a2b_hex
  30. from decimal import Decimal
  31. from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id
  32. from rdflib.graph import QuotedGraph, ConjunctiveGraph
  33. from rdflib import py3compat
  34. b = py3compat.b
  35. __all__ = [
  36. 'URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity",
  37. "progress", "splitFrag", "splitFragP", "join", "refTo", "base",
  38. "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR",
  39. "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify"]
  40. from rdflib.parser import Parser
  41. # Incestuous.. would be nice to separate N3 and XML
  42. # from sax2rdf import XMLtoDOM
  43. def XMLtoDOM(*args, **kargs):
  44. # print >> sys.stderr, args, kargs
  45. pass
  46. # SWAP http://www.w3.org/2000/10/swap
  47. # from diag import verbosity, setVerbosity, progress
  48. def verbosity(*args, **kargs):
  49. # print >> sys.stderr, args, kargs
  50. pass
  51. def setVerbosity(*args, **kargs):
  52. # print >> sys.stderr, args, kargs
  53. pass
  54. def progress(*args, **kargs):
  55. # print >> sys.stderr, args, kargs
  56. pass
  57. def splitFrag(uriref):
  58. """split a URI reference between the fragment and the rest.
  59. Punctuation is thrown away.
  60. e.g.
  61. >>> splitFrag("abc#def")
  62. ('abc', 'def')
  63. >>> splitFrag("abcdef")
  64. ('abcdef', None)
  65. """
  66. i = uriref.rfind("#")
  67. if i >= 0:
  68. return uriref[:i], uriref[i + 1:]
  69. else:
  70. return uriref, None
  71. def splitFragP(uriref, punct=0):
  72. """split a URI reference before the fragment
  73. Punctuation is kept.
  74. e.g.
  75. >>> splitFragP("abc#def")
  76. ('abc', '#def')
  77. >>> splitFragP("abcdef")
  78. ('abcdef', '')
  79. """
  80. i = uriref.rfind("#")
  81. if i >= 0:
  82. return uriref[:i], uriref[i:]
  83. else:
  84. return uriref, ''
  85. @py3compat.format_doctest_out
  86. def join(here, there):
  87. """join an absolute URI and URI reference
  88. (non-ascii characters are supported/doctested;
  89. haven't checked the details of the IRI spec though)
  90. here is assumed to be absolute.
  91. there is URI reference.
  92. >>> join('http://example/x/y/z', '../abc')
  93. 'http://example/x/abc'
  94. Raise ValueError if there uses relative path
  95. syntax but here has no hierarchical path.
  96. >>> join('mid:foo@example', '../foo')
  97. Traceback (most recent call last):
  98. raise ValueError, here
  99. ValueError: Base <mid:foo@example> has no slash after colon - with relative '../foo'.
  100. >>> join('http://example/x/y/z', '')
  101. 'http://example/x/y/z'
  102. >>> join('mid:foo@example', '#foo')
  103. 'mid:foo@example#foo'
  104. We grok IRIs
  105. >>> len(%(u)s'Andr\\xe9')
  106. 5
  107. >>> join('http://example.org/', %(u)s'#Andr\\xe9')
  108. %(u)s'http://example.org/#Andr\\xe9'
  109. """
  110. assert(here.find("#") < 0), "Base may not contain hash: '%s'" % here # caller must splitFrag (why?)
  111. slashl = there.find('/')
  112. colonl = there.find(':')
  113. # join(base, 'foo:/') -- absolute
  114. if colonl >= 0 and (slashl < 0 or colonl < slashl):
  115. return there
  116. bcolonl = here.find(':')
  117. assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute
  118. path, frag = splitFragP(there)
  119. if not path:
  120. return here + frag
  121. # join('mid:foo@example', '../foo') bzzt
  122. if here[bcolonl + 1:bcolonl + 2] != '/':
  123. raise ValueError("Base <%s> has no slash after colon - with relative '%s'." % (here, there))
  124. if here[bcolonl + 1:bcolonl + 3] == '//':
  125. bpath = here.find('/', bcolonl + 3)
  126. else:
  127. bpath = bcolonl + 1
  128. # join('http://xyz', 'foo')
  129. if bpath < 0:
  130. bpath = len(here)
  131. here = here + '/'
  132. # join('http://xyz/', '//abc') => 'http://abc'
  133. if there[:2] == '//':
  134. return here[:bcolonl + 1] + there
  135. # join('http://xyz/', '/abc') => 'http://xyz/abc'
  136. if there[:1] == '/':
  137. return here[:bpath] + there
  138. slashr = here.rfind('/')
  139. while 1:
  140. if path[:2] == './':
  141. path = path[2:]
  142. if path == '.':
  143. path = ''
  144. elif path[:3] == '../' or path == '..':
  145. path = path[3:]
  146. i = here.rfind('/', bpath, slashr)
  147. if i >= 0:
  148. here = here[:i + 1]
  149. slashr = i
  150. else:
  151. break
  152. return here[:slashr + 1] + path + frag
  153. commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$')
  154. def refTo(base, uri):
  155. """figure out a relative URI reference from base to uri
  156. >>> refTo('http://example/x/y/z', 'http://example/x/abc')
  157. '../abc'
  158. >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s')
  159. 'q/r#s'
  160. >>> refTo(None, 'http://ex/x/y')
  161. 'http://ex/x/y'
  162. >>> refTo('http://ex/x/y', 'http://ex/x/y')
  163. ''
  164. Note the relationship between refTo and join:
  165. join(x, refTo(x, y)) == y
  166. which points out certain strings which cannot be URIs. e.g.
  167. >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y
  168. 0
  169. So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead:
  170. >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y
  171. 1
  172. This one checks that it uses a root-realtive one where that is
  173. all they share. Now uses root-relative where no path is shared.
  174. This is a matter of taste but tends to give more resilience IMHO
  175. -- and shorter paths
  176. Note that base may be None, meaning no base. In some situations, there
  177. just ain't a base. Slife. In these cases, relTo returns the absolute value.
  178. The axiom abs(,rel(b,x))=x still holds.
  179. This saves people having to set the base to "bogus:".
  180. >>> refTo('http://ex/x/y/z', 'http://ex/r')
  181. '/r'
  182. """
  183. # assert base # don't mask bugs -danc # not a bug. -tim
  184. if not base:
  185. return uri
  186. if base == uri:
  187. return ""
  188. # Find how many path segments in common
  189. i = 0
  190. while i < len(uri) and i < len(base):
  191. if uri[i] == base[i]:
  192. i = i + 1
  193. else:
  194. break
  195. # print "# relative", base, uri, " same up to ", i
  196. # i point to end of shortest one or first difference
  197. m = commonHost.match(base[:i])
  198. if m:
  199. k = uri.find("//")
  200. if k < 0:
  201. k = -2 # no host
  202. l = uri.find("/", k + 2)
  203. if uri[l + 1:l + 2] != "/" and base[l + 1:l + 2] != "/" and uri[:l] == base[:l]:
  204. return uri[l:]
  205. if uri[i:i + 1] == "#" and len(base) == i:
  206. return uri[i:] # fragment of base
  207. while i > 0 and uri[i - 1] != '/':
  208. i = i - 1 # scan for slash
  209. if i < 3:
  210. return uri # No way.
  211. if base.find("//", i - 2) > 0 or uri.find("//", i - 2) > 0:
  212. return uri # An unshared "//"
  213. if base.find(":", i) > 0:
  214. return uri # An unshared ":"
  215. n = base.count("/", i)
  216. if n == 0 and i < len(uri) and uri[i] == '#':
  217. return "./" + uri[i:]
  218. elif n == 0 and i == len(uri):
  219. return "./"
  220. else:
  221. return ("../" * n) + uri[i:]
  222. def base():
  223. """The base URI for this process - the Web equiv of cwd
  224. Relative or abolute unix-standard filenames parsed relative to
  225. this yeild the URI of the file.
  226. If we had a reliable way of getting a computer name,
  227. we should put it in the hostname just to prevent ambiguity
  228. """
  229. # return "file://" + hostname + os.getcwd() + "/"
  230. return "file://" + _fixslash(os.getcwd()) + "/"
  231. def _fixslash(argstr):
  232. """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)"""
  233. s = argstr
  234. for i in range(len(s)):
  235. if s[i] == "\\":
  236. s = s[:i] + "/" + s[i + 1:]
  237. if s[0] != "/" and s[1] == ":":
  238. s = s[2:] # @@@ Hack when drive letter present
  239. return s
  240. URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~")
  241. # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  242. @py3compat.format_doctest_out
  243. def canonical(str_in):
  244. """Convert equivalent URIs (or parts) to the same string
  245. There are many differenet levels of URI canonicalization
  246. which are possible. See http://www.ietf.org/rfc/rfc3986.txt
  247. Done:
  248. - Converfting unicode IRI to utf-8
  249. - Escaping all non-ASCII
  250. - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39),
  251. hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4)
  252. - Making all escapes uppercase hexadecimal
  253. Not done:
  254. - Making URI scheme lowercase
  255. - changing /./ or /foo/../ to / with care not to change host part
  256. >>> canonical("foo bar")
  257. %(b)s'foo%%20bar'
  258. >>> canonical(%(u)s'http:')
  259. %(b)s'http:'
  260. >>> canonical('fran%%c3%%83%%c2%%a7ois')
  261. %(b)s'fran%%C3%%83%%C2%%A7ois'
  262. >>> canonical('a')
  263. %(b)s'a'
  264. >>> canonical('%%4e')
  265. %(b)s'N'
  266. >>> canonical('%%9d')
  267. %(b)s'%%9D'
  268. >>> canonical('%%2f')
  269. %(b)s'%%2F'
  270. >>> canonical('%%2F')
  271. %(b)s'%%2F'
  272. """
  273. if type(str_in) == type(u''):
  274. s8 = str_in.encode('utf-8')
  275. else:
  276. s8 = str_in
  277. s = b('')
  278. i = 0
  279. while i < len(s8):
  280. if py3compat.PY3:
  281. n = s8[i]
  282. ch = bytes([n])
  283. else:
  284. ch = s8[i]
  285. n = ord(ch)
  286. if (n > 126) or (n < 33): # %-encode controls, SP, DEL, and utf-8
  287. s += b("%%%02X" % ord(ch))
  288. elif ch == b('%') and i + 2 < len(s8):
  289. ch2 = a2b_hex(s8[i + 1:i + 3])
  290. if ch2 in URI_unreserved:
  291. s += ch2
  292. else:
  293. s += b("%%%02X" % ord(ch2))
  294. i = i + 3
  295. continue
  296. else:
  297. s += ch
  298. i = i + 1
  299. return s
  300. CONTEXT = 0
  301. PRED = 1
  302. SUBJ = 2
  303. OBJ = 3
  304. PARTS = PRED, SUBJ, OBJ
  305. ALL4 = CONTEXT, PRED, SUBJ, OBJ
  306. SYMBOL = 0
  307. FORMULA = 1
  308. LITERAL = 2
  309. LITERAL_DT = 21
  310. LITERAL_LANG = 22
  311. ANONYMOUS = 3
  312. XMLLITERAL = 25
  313. Logic_NS = "http://www.w3.org/2000/10/swap/log#"
  314. NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging
  315. forSomeSym = Logic_NS + "forSome"
  316. forAllSym = Logic_NS + "forAll"
  317. RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
  318. RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  319. OWL_NS = "http://www.w3.org/2002/07/owl#"
  320. DAML_sameAs_URI = OWL_NS + "sameAs"
  321. parsesTo_URI = Logic_NS + "parsesTo"
  322. RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/"
  323. List_NS = RDF_NS_URI # From 20030808
  324. _Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#"
  325. N3_first = (SYMBOL, List_NS + "first")
  326. N3_rest = (SYMBOL, List_NS + "rest")
  327. N3_li = (SYMBOL, List_NS + "li")
  328. N3_nil = (SYMBOL, List_NS + "nil")
  329. N3_List = (SYMBOL, List_NS + "List")
  330. N3_Empty = (SYMBOL, List_NS + "Empty")
  331. runNamespaceValue = None
  332. def runNamespace():
  333. "Return a URI suitable as a namespace for run-local objects"
  334. # @@@ include hostname (privacy?) (hash it?)
  335. global runNamespaceValue
  336. if runNamespaceValue == None:
  337. runNamespaceValue = join(base(), _unique_id()) + '#'
  338. return runNamespaceValue
  339. nextu = 0
  340. def uniqueURI():
  341. "A unique URI"
  342. global nextu
  343. nextu += 1
  344. # return runNamespace() + "u_" + `nextu`
  345. return runNamespace() + "u_" + str(nextu)
  346. class URISyntaxError(ValueError):
  347. """A parameter is passed to a routine that requires a URI reference"""
  348. pass
  349. tracking = False
  350. chatty_flag = 50
  351. from xml.dom import Node
  352. try:
  353. from xml.ns import XMLNS
  354. except:
  355. class XMLNS:
  356. BASE = "http://www.w3.org/2000/xmlns/"
  357. XML = "http://www.w3.org/XML/1998/namespace"
  358. _attrs = lambda E: (E.attributes and E.attributes.values()) or []
  359. _children = lambda E: E.childNodes or []
  360. _IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML
  361. _inclusive = lambda n: n.unsuppressedPrefixes == None
  362. # Does a document/PI has lesser/greater document order than the
  363. # first element?
  364. _LesserElement, _Element, _GreaterElement = range(3)
  365. def _sorter(n1, n2):
  366. '''_sorter(n1, n2) -> int
  367. Sorting predicate for non-NS attributes.'''
  368. i = cmp(n1.namespaceURI, n2.namespaceURI)
  369. if i:
  370. return i
  371. return cmp(n1.localName, n2.localName)
  372. def _sorter_ns(n1, n2):
  373. '''_sorter_ns((n,v),(n,v)) -> int
  374. "(an empty namespace URI is lexicographically least)."'''
  375. if n1[0] == 'xmlns':
  376. return -1
  377. if n2[0] == 'xmlns':
  378. return 1
  379. return cmp(n1[0], n2[0])
  380. def _utilized(n, node, other_attrs, unsuppressedPrefixes):
  381. '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
  382. Return true if that nodespace is utilized within the node'''
  383. if n.startswith('xmlns:'):
  384. n = n[6:]
  385. elif n.startswith('xmlns'):
  386. n = n[5:]
  387. if (n == "" and node.prefix in ["#default", None]) or \
  388. n == node.prefix or n in unsuppressedPrefixes:
  389. return 1
  390. for attr in other_attrs:
  391. if n == attr.prefix:
  392. return 1
  393. return 0
  394. #_in_subset = lambda subset, node: not subset or node in subset
  395. _in_subset = lambda subset, node: subset is None or node in subset # rich's tweak
  396. class _implementation:
  397. '''Implementation class for C14N. This accompanies a node during it's
  398. processing and includes the parameters and processing state.'''
  399. # Handler for each node type; populated during module instantiation.
  400. handlers = {}
  401. def __init__(self, node, write, **kw):
  402. '''Create and run the implementation.'''
  403. self.write = write
  404. self.subset = kw.get('subset')
  405. self.comments = kw.get('comments', 0)
  406. self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
  407. nsdict = kw.get('nsdict', {'xml': XMLNS.XML, 'xmlns': XMLNS.BASE})
  408. # Processing state.
  409. self.state = (nsdict, {'xml': ''}, {}) # 0422
  410. if node.nodeType == Node.DOCUMENT_NODE:
  411. self._do_document(node)
  412. elif node.nodeType == Node.ELEMENT_NODE:
  413. self.documentOrder = _Element # At document element
  414. if not _inclusive(self):
  415. self._do_element(node)
  416. else:
  417. inherited = self._inherit_context(node)
  418. self._do_element(node, inherited)
  419. elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
  420. pass
  421. elif node.nodeType == Node.TEXT_NODE:
  422. self._do_text(node)
  423. else:
  424. raise TypeError(str(node))
  425. def _inherit_context(self, node):
  426. '''_inherit_context(self, node) -> list
  427. Scan ancestors of attribute and namespace context. Used only
  428. for single element node canonicalization, not for subset
  429. canonicalization.'''
  430. # Collect the initial list of xml:foo attributes.
  431. xmlattrs = filter(_IN_XML_NS, _attrs(node))
  432. # Walk up and get all xml:XXX attributes we inherit.
  433. inherited, parent = [], node.parentNode
  434. while parent and parent.nodeType == Node.ELEMENT_NODE:
  435. for a in filter(_IN_XML_NS, _attrs(parent)):
  436. n = a.localName
  437. if n not in xmlattrs:
  438. xmlattrs.append(n)
  439. inherited.append(a)
  440. parent = parent.parentNode
  441. return inherited
  442. def _do_document(self, node):
  443. '''_do_document(self, node) -> None
  444. Process a document node. documentOrder holds whether the document
  445. element has been encountered such that PIs/comments can be written
  446. as specified.'''
  447. self.documentOrder = _LesserElement
  448. for child in node.childNodes:
  449. if child.nodeType == Node.ELEMENT_NODE:
  450. self.documentOrder = _Element # At document element
  451. self._do_element(child)
  452. self.documentOrder = _GreaterElement # After document element
  453. elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
  454. self._do_pi(child)
  455. elif child.nodeType == Node.COMMENT_NODE:
  456. self._do_comment(child)
  457. elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
  458. pass
  459. else:
  460. raise TypeError(str(child))
  461. handlers[Node.DOCUMENT_NODE] = _do_document
  462. def _do_text(self, node):
  463. '''_do_text(self, node) -> None
  464. Process a text or CDATA node. Render various special characters
  465. as their C14N entity representations.'''
  466. if not _in_subset(self.subset, node):
  467. return
  468. s = node.data.replace("&", "&amp;")
  469. s = s.replace("<", "&lt;")
  470. s = s.replace(">", "&gt;")
  471. s = s.replace("\015", "&#xD;")
  472. if s:
  473. self.write(s)
  474. handlers[Node.TEXT_NODE] = _do_text
  475. handlers[Node.CDATA_SECTION_NODE] = _do_text
  476. def _do_pi(self, node):
  477. '''_do_pi(self, node) -> None
  478. Process a PI node. Render a leading or trailing # xA if the
  479. document order of the PI is greater or lesser (respectively)
  480. than the document element.
  481. '''
  482. if not _in_subset(self.subset, node):
  483. return
  484. W = self.write
  485. if self.documentOrder == _GreaterElement:
  486. W('\n')
  487. W('<?')
  488. W(node.nodeName)
  489. s = node.data
  490. if s:
  491. W(' ')
  492. W(s)
  493. W('?>')
  494. if self.documentOrder == _LesserElement:
  495. W('\n')
  496. handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
  497. def _do_comment(self, node):
  498. '''_do_comment(self, node) -> None
  499. Process a comment node. Render a leading or trailing # xA if the
  500. document order of the comment is greater or lesser (respectively)
  501. than the document element.
  502. '''
  503. if not _in_subset(self.subset, node):
  504. return
  505. if self.comments:
  506. W = self.write
  507. if self.documentOrder == _GreaterElement:
  508. W('\n')
  509. W('<!--')
  510. W(node.data)
  511. W('-->')
  512. if self.documentOrder == _LesserElement:
  513. W('\n')
  514. handlers[Node.COMMENT_NODE] = _do_comment
  515. def _do_attr(self, n, value):
  516. ''''_do_attr(self, node) -> None
  517. Process an attribute.'''
  518. W = self.write
  519. W(' ')
  520. W(n)
  521. W('="')
  522. s = value.replace(value, "&", "&amp;")
  523. s = s.replace("<", "&lt;")
  524. s = s.replace('"', '&quot;')
  525. s = s.replace('\011', '&#x9')
  526. s = s.replace('\012', '&#xA')
  527. s = s.replace('\015', '&#xD')
  528. W(s)
  529. W('"')
  530. def _do_element(self, node, initial_other_attrs=[]):
  531. '''_do_element(self, node, initial_other_attrs = []) -> None
  532. Process an element (and its children).'''
  533. # Get state (from the stack) make local copies.
  534. # ns_parent -- NS declarations in parent
  535. # ns_rendered -- NS nodes rendered by ancestors
  536. # ns_local -- NS declarations relevant to this element
  537. # xml_attrs -- Attributes in XML namespace from parent
  538. # xml_attrs_local -- Local attributes in XML namespace.
  539. ns_parent, ns_rendered, xml_attrs = \
  540. self.state[0], self.state[1].copy(), self.state[2].copy() # 0422
  541. ns_local = ns_parent.copy()
  542. xml_attrs_local = {}
  543. # progress("_do_element node.nodeName=", node.nodeName)
  544. # progress("_do_element node.namespaceURI", node.namespaceURI)
  545. # progress("_do_element node.tocml()", node.toxml())
  546. # Divide attributes into NS, XML, and others.
  547. other_attrs = initial_other_attrs[:]
  548. in_subset = _in_subset(self.subset, node)
  549. for a in _attrs(node):
  550. # progress("\t_do_element a.nodeName=", a.nodeName)
  551. if a.namespaceURI == XMLNS.BASE:
  552. n = a.nodeName
  553. if n == "xmlns:":
  554. n = "xmlns" # DOM bug workaround
  555. ns_local[n] = a.nodeValue
  556. elif a.namespaceURI == XMLNS.XML:
  557. if _inclusive(self) or in_subset:
  558. xml_attrs_local[a.nodeName] = a # 0426
  559. else:
  560. other_attrs.append(a)
  561. # add local xml:foo attributes to ancestor's xml:foo attributes
  562. xml_attrs.update(xml_attrs_local)
  563. # Render the node
  564. W, name = self.write, None
  565. if in_subset:
  566. name = node.nodeName
  567. W('<')
  568. W(name)
  569. # Create list of NS attributes to render.
  570. ns_to_render = []
  571. for n, v in ns_local.items():
  572. # If default namespace is XMLNS.BASE or empty,
  573. # and if an ancestor was the same
  574. if n == "xmlns" and v in [XMLNS.BASE, ''] \
  575. and ns_rendered.get('xmlns') in [XMLNS.BASE, '', None]:
  576. continue
  577. # "omit namespace node with local name xml, which defines
  578. # the xml prefix, if its string value is
  579. # http://www.w3.org/XML/1998/namespace."
  580. if n in ["xmlns:xml", "xml"] \
  581. and v in ['http://www.w3.org/XML/1998/namespace']:
  582. continue
  583. # If not previously rendered
  584. # and it's inclusive or utilized
  585. if (n, v) not in ns_rendered.items() \
  586. and (_inclusive(self) or \
  587. _utilized(n, node, other_attrs, self.unsuppressedPrefixes)):
  588. ns_to_render.append((n, v))
  589. # Sort and render the ns, marking what was rendered.
  590. ns_to_render.sort(_sorter_ns)
  591. for n, v in ns_to_render:
  592. self._do_attr(n, v)
  593. ns_rendered[n] = v # 0417
  594. # If exclusive or the parent is in the subset, add the local xml attributes
  595. # Else, add all local and ancestor xml attributes
  596. # Sort and render the attributes.
  597. if not _inclusive(self) or _in_subset(self.subset, node.parentNode): # 0426
  598. other_attrs.extend(xml_attrs_local.values())
  599. else:
  600. other_attrs.extend(xml_attrs.values())
  601. other_attrs.sort(_sorter)
  602. for a in other_attrs:
  603. self._do_attr(a.nodeName, a.value)
  604. W('>')
  605. # Push state, recurse, pop state.
  606. state, self.state = self.state, (ns_local, ns_rendered, xml_attrs)
  607. for c in _children(node):
  608. _implementation.handlers[c.nodeType](self, c)
  609. self.state = state
  610. if name:
  611. W('</%s>' % name)
  612. handlers[Node.ELEMENT_NODE] = _do_element
  613. def Canonicalize(node, output=None, **kw):
  614. '''Canonicalize(node, output=None, **kw) -> UTF-8
  615. Canonicalize a DOM document/element node and all descendents.
  616. Return the text; if output is specified then output.write will
  617. be called to output the text and None will be returned
  618. Keyword parameters:
  619. nsdict -- a dictionary of prefix:uri namespace entries
  620. assumed to exist in the surrounding context
  621. comments -- keep comments if non-zero (default is 0)
  622. subset -- Canonical XML subsetting resulting from XPath (default is [])
  623. unsuppressedPrefixes -- do exclusive C14N, and this specifies the
  624. prefixes that should be inherited.
  625. '''
  626. if output:
  627. apply(_implementation, (node, output.write), kw)
  628. else:
  629. s = StringIO.StringIO()
  630. apply(_implementation, (node, s.write), kw)
  631. return s.getvalue()
  632. # end of xmlC14n.py
  633. # from why import BecauseOfData, becauseSubexpression
  634. def BecauseOfData(*args, **kargs):
  635. # print args, kargs
  636. pass
  637. def becauseSubexpression(*args, **kargs):
  638. # print args, kargs
  639. pass
  640. N3_forSome_URI = forSomeSym
  641. N3_forAll_URI = forAllSym
  642. # Magic resources we know about
  643. ADDED_HASH = "#" # Stop where we use this in case we want to remove it!
  644. # This is the hash on namespace URIs
  645. RDF_type = (SYMBOL, RDF_type_URI)
  646. DAML_sameAs = (SYMBOL, DAML_sameAs_URI)
  647. LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies"
  648. BOOLEAN_DATATYPE = _XSD_PFX + "boolean"
  649. DECIMAL_DATATYPE = _XSD_PFX + "decimal"
  650. DOUBLE_DATATYPE = _XSD_PFX + "double"
  651. FLOAT_DATATYPE = _XSD_PFX + "float"
  652. INTEGER_DATATYPE = _XSD_PFX + "integer"
  653. option_noregen = 0 # If set, do not regenerate genids on output
  654. # @@ I18n - the notname chars need extending for well known unicode non-text
  655. # characters. The XML spec switched to assuming unknown things were name
  656. # characaters.
  657. # _namechars = string.lowercase + string.uppercase + string.digits + '_-'
  658. _notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/
  659. _notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/
  660. _rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
  661. N3CommentCharacter = "#" # For unix script # ! compatabilty
  662. ########################################## Parse string to sink
  663. #
  664. # Regular expressions:
  665. eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment
  666. eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment
  667. ws = re.compile(r'[ \t]*') # Whitespace not including NL
  668. signed_integer = re.compile(r'[-+]?[0-9]+') # integer
  669. number_syntax = re.compile(r'(?P<integer>[-+]?[0-9]+)(?P<decimal>\.[0-9]+)?(?P<exponent>(?:e|E)[-+]?[0-9]+)?')
  670. digitstring = re.compile(r'[0-9]+') # Unsigned integer
  671. interesting = re.compile(r'[\\\r\n\"]')
  672. langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?')
  673. class SinkParser:
  674. def __init__(self, store, openFormula=None, thisDoc="", baseURI=None,
  675. genPrefix="", flags="", why=None):
  676. """ note: namespace names should *not* end in # ;
  677. the # will get added during qname processing """
  678. self._bindings = {}
  679. self._flags = flags
  680. if thisDoc != "":
  681. assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc
  682. self._bindings[""] = thisDoc + "#" # default
  683. self._store = store
  684. if genPrefix:
  685. store.setGenPrefix(genPrefix) # pass it on
  686. self._thisDoc = thisDoc
  687. self.lines = 0 # for error handling
  688. self.startOfLine = 0 # For calculating character number
  689. self._genPrefix = genPrefix
  690. self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false']
  691. self.keywordsSet = 0 # Then only can others be considerd qnames
  692. self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term
  693. self._variables = {}
  694. self._parentVariables = {}
  695. self._reason = why # Why the parser was asked to parse this
  696. self._reason2 = None # Why these triples
  697. # was: diag.tracking
  698. if tracking:
  699. self._reason2 = BecauseOfData(
  700. store.newSymbol(thisDoc), because=self._reason)
  701. if baseURI:
  702. self._baseURI = baseURI
  703. else:
  704. if thisDoc:
  705. self._baseURI = thisDoc
  706. else:
  707. self._baseURI = None
  708. assert not self._baseURI or ':' in self._baseURI
  709. if not self._genPrefix:
  710. if self._thisDoc:
  711. self._genPrefix = self._thisDoc + "#_g"
  712. else:
  713. self._genPrefix = uniqueURI()
  714. if openFormula == None:
  715. if self._thisDoc:
  716. self._formula = store.newFormula(thisDoc + "#_formula")
  717. else:
  718. self._formula = store.newFormula()
  719. else:
  720. self._formula = openFormula
  721. self._context = self._formula
  722. self._parentContext = None
  723. def here(self, i):
  724. """String generated from position in file
  725. This is for repeatability when refering people to bnodes in a document.
  726. This has diagnostic uses less formally, as it should point one to which
  727. bnode the arbitrary identifier actually is. It gives the
  728. line and character number of the '[' charcacter or path character
  729. which introduced the blank node. The first blank node is boringly _L1C1.
  730. It used to be used only for tracking, but for tests in general
  731. it makes the canonical ordering of bnodes repeatable."""
  732. return "%s_L%iC%i" % (self._genPrefix, self.lines,
  733. i - self.startOfLine + 1)
  734. def formula(self):
  735. return self._formula
  736. def loadStream(self, stream):
  737. return self.loadBuf(stream.read()) # Not ideal
  738. def loadBuf(self, buf):
  739. """Parses a buffer and returns its top level formula"""
  740. self.startDoc()
  741. self.feed(buf)
  742. return self.endDoc() # self._formula
  743. def feed(self, octets):
  744. """Feed an octet stream tothe parser
  745. if BadSyntax is raised, the string
  746. passed in the exception object is the
  747. remainder after any statements have been parsed.
  748. So if there is more data to feed to the
  749. parser, it should be straightforward to recover."""
  750. if not isinstance(octets, unicode):
  751. s = octets.decode('utf-8')
  752. # NB already decoded, so \ufeff
  753. if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'):
  754. s = s[1:]
  755. else:
  756. s = octets
  757. i = 0
  758. while i >= 0:
  759. j = self.skipSpace(s, i)
  760. if j < 0:
  761. return
  762. i = self.directiveOrStatement(s, j)
  763. if i < 0:
  764. print("# next char: %s" % s[j])
  765. raise BadSyntax(self._thisDoc, self.lines, s, j,
  766. "expected directive or statement")
  767. def directiveOrStatement(self, argstr, h):
  768. i = self.skipSpace(argstr, h)
  769. if i < 0:
  770. return i # EOF
  771. j = self.directive(argstr, i)
  772. if j >= 0:
  773. return self.checkDot(argstr, j)
  774. j = self.statement(argstr, i)
  775. if j >= 0:
  776. return self.checkDot(argstr, j)
  777. return j
  778. # @@I18N
  779. global _notNameChars
  780. # _namechars = string.lowercase + string.uppercase + string.digits + '_-'
  781. def tok(self, tok, argstr, i):
  782. """Check for keyword. Space must have been stripped on entry and
  783. we must not be at end of file."""
  784. assert tok[0] not in _notNameChars # not for punctuation
  785. if argstr[i:i + 1] == "@":
  786. i = i + 1
  787. else:
  788. if tok not in self.keywords:
  789. return -1 # No, this has neither keywords declaration nor "@"
  790. if (argstr[i:i + len(tok)] == tok
  791. and (argstr[i + len(tok)] in _notQNameChars)):
  792. i = i + len(tok)
  793. return i
  794. else:
  795. return -1
  796. def directive(self, argstr, i):
  797. j = self.skipSpace(argstr, i)
  798. if j < 0:
  799. return j # eof
  800. res = []
  801. j = self.tok('bind', argstr, i) # implied "#". Obsolete.
  802. if j > 0:
  803. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  804. "keyword bind is obsolete: use @prefix")
  805. j = self.tok('keywords', argstr, i)
  806. if j > 0:
  807. i = self.commaSeparatedList(argstr, j, res, self.bareWord)
  808. if i < 0:
  809. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  810. "'@keywords' needs comma separated list of words")
  811. self.setKeywords(res[:])
  812. # was: diag.chatty_flag
  813. if chatty_flag > 80:
  814. progress("Keywords ", self.keywords)
  815. return i
  816. j = self.tok('forAll', argstr, i)
  817. if j > 0:
  818. i = self.commaSeparatedList(argstr, j, res, self.uri_ref2)
  819. if i < 0:
  820. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  821. "Bad variable list after @forAll")
  822. for x in res:
  823. # self._context.declareUniversal(x)
  824. if x not in self._variables or x in self._parentVariables:
  825. self._variables[x] = self._context.newUniversal(x)
  826. return i
  827. j = self.tok('forSome', argstr, i)
  828. if j > 0:
  829. i = self. commaSeparatedList(argstr, j, res, self.uri_ref2)
  830. if i < 0:
  831. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  832. "Bad variable list after @forSome")
  833. for x in res:
  834. self._context.declareExistential(x)
  835. return i
  836. j = self.tok('prefix', argstr, i) # no implied "#"
  837. if j >= 0:
  838. t = []
  839. i = self.qname(argstr, j, t)
  840. if i < 0:
  841. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  842. "expected qname after @prefix")
  843. j = self.uri_ref2(argstr, i, t)
  844. if j < 0:
  845. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  846. "expected <uriref> after @prefix _qname_")
  847. ns = self.uriOf(t[1])
  848. if self._baseURI:
  849. ns = join(self._baseURI, ns)
  850. elif ":" not in ns:
  851. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  852. "With no base URI, cannot use relative URI in @prefix <" + ns + ">")
  853. assert ':' in ns # must be absolute
  854. self._bindings[t[0][0]] = ns
  855. self.bind(t[0][0], hexify(ns))
  856. return j
  857. j = self.tok('base', argstr, i) # Added 2007/7/7
  858. if j >= 0:
  859. t = []
  860. i = self.uri_ref2(argstr, j, t)
  861. if i < 0:
  862. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  863. "expected <uri> after @base ")
  864. ns = self.uriOf(t[0])
  865. if self._baseURI:
  866. ns = join(self._baseURI, ns)
  867. else:
  868. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  869. "With no previous base URI, cannot use relative URI in @base <" + ns + ">")
  870. assert ':' in ns # must be absolute
  871. self._baseURI = ns
  872. return i
  873. return -1 # Not a directive, could be something else.
  874. def bind(self, qn, uri):
  875. assert isinstance(uri,
  876. types.StringType), "Any unicode must be %x-encoded already"
  877. if qn == "":
  878. self._store.setDefaultNamespace(uri)
  879. else:
  880. self._store.bind(qn, uri)
  881. def setKeywords(self, k):
  882. "Takes a list of strings"
  883. if k == None:
  884. self.keywordsSet = 0
  885. else:
  886. self.keywords = k
  887. self.keywordsSet = 1
  888. def startDoc(self):
  889. # was: self._store.startDoc()
  890. self._store.startDoc(self._formula)
  891. def endDoc(self):
  892. """Signal end of document and stop parsing. returns formula"""
  893. self._store.endDoc(self._formula) # don't canonicalize yet
  894. return self._formula
  895. def makeStatement(self, quadruple):
  896. # $$$$$$$$$$$$$$$$$$$$$
  897. # print "# Parser output: ", `quadruple`
  898. self._store.makeStatement(quadruple, why=self._reason2)
  899. def statement(self, argstr, i):
  900. r = []
  901. i = self.object(argstr, i, r) # Allow literal for subject - extends RDF
  902. if i < 0:
  903. return i
  904. j = self.property_list(argstr, i, r[0])
  905. if j < 0:
  906. raise BadSyntax(self._thisDoc, self.lines,
  907. argstr, i, "expected propertylist")
  908. return j
  909. def subject(self, argstr, i, res):
  910. return self.item(argstr, i, res)
  911. def verb(self, argstr, i, res):
  912. """ has _prop_
  913. is _prop_ of
  914. a
  915. =
  916. _prop_
  917. >- prop ->
  918. <- prop -<
  919. _operator_"""
  920. j = self.skipSpace(argstr, i)
  921. if j < 0:
  922. return j # eof
  923. r = []
  924. j = self.tok('has', argstr, i)
  925. if j >= 0:
  926. i = self.prop(argstr, j, r)
  927. if i < 0:
  928. raise BadSyntax(self._thisDoc, self.lines,
  929. argstr, j, "expected property after 'has'")
  930. res.append(('->', r[0]))
  931. return i
  932. j = self.tok('is', argstr, i)
  933. if j >= 0:
  934. i = self.prop(argstr, j, r)
  935. if i < 0:
  936. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  937. "expected <property> after 'is'")
  938. j = self.skipSpace(argstr, i)
  939. if j < 0:
  940. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  941. "End of file found, expected property after 'is'")
  942. return j # eof
  943. i = j
  944. j = self.tok('of', argstr, i)
  945. if j < 0:
  946. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  947. "expected 'of' after 'is' <prop>")
  948. res.append(('<-', r[0]))
  949. return j
  950. j = self.tok('a', argstr, i)
  951. if j >= 0:
  952. res.append(('->', RDF_type))
  953. return j
  954. if argstr[i:i + 2] == "<=":
  955. res.append(('<-', self._store.newSymbol(Logic_NS + "implies")))
  956. return i + 2
  957. if argstr[i:i + 1] == "=":
  958. if argstr[i + 1:i + 2] == ">":
  959. res.append(('->', self._store.newSymbol(Logic_NS + "implies")))
  960. return i + 2
  961. res.append(('->', DAML_sameAs))
  962. return i + 1
  963. if argstr[i:i + 2] == ":=":
  964. # patch file relates two formulae, uses this @@ really?
  965. res.append(('->', Logic_NS + "becomes"))
  966. return i + 2
  967. j = self.prop(argstr, i, r)
  968. if j >= 0:
  969. res.append(('->', r[0]))
  970. return j
  971. if argstr[i:i + 2] == ">-" or argstr[i:i + 2] == "<-":
  972. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  973. ">- ... -> syntax is obsolete.")
  974. return -1
  975. def prop(self, argstr, i, res):
  976. return self.item(argstr, i, res)
  977. def item(self, argstr, i, res):
  978. return self.path(argstr, i, res)
  979. def blankNode(self, uri=None):
  980. if "B" not in self._flags:
  981. return self._context.newBlankNode(uri, why=self._reason2)
  982. x = self._context.newSymbol(uri)
  983. self._context.declareExistential(x)
  984. return x
  985. def path(self, argstr, i, res):
  986. """Parse the path production.
  987. """
  988. j = self.nodeOrLiteral(argstr, i, res)
  989. if j < 0:
  990. return j # nope
  991. while argstr[j:j + 1] in "!^.": # no spaces, must follow exactly (?)
  992. ch = argstr[j:j + 1] # @@ Allow "." followed IMMEDIATELY by a node.
  993. if ch == ".":
  994. ahead = argstr[j + 1:j + 2]
  995. if not ahead or (ahead in _notNameChars
  996. and ahead not in ":?<[{("):
  997. break
  998. subj = res.pop()
  999. obj = self.blankNode(uri=self.here(j))
  1000. j = self.node(argstr, j + 1, res)
  1001. if j < 0:
  1002. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  1003. "EOF found in middle of path syntax")
  1004. pred = res.pop()
  1005. if ch == "^": # Reverse traverse
  1006. self.makeStatement((self._context, pred, obj, subj))
  1007. else:
  1008. self.makeStatement((self._context, pred, subj, obj))
  1009. res.append(obj)
  1010. return j
  1011. def anonymousNode(self, ln):
  1012. """Remember or generate a term for one of these _: anonymous nodes"""
  1013. term = self._anonymousNodes.get(ln, None)
  1014. if term != None:
  1015. return term
  1016. term = self._store.newBlankNode(self._context, why=self._reason2)
  1017. self._anonymousNodes[ln] = term
  1018. return term
  1019. def node(self, argstr, i, res, subjectAlready=None):
  1020. """Parse the <node> production.
  1021. Space is now skipped once at the beginning
  1022. instead of in multipe calls to self.skipSpace().
  1023. """
  1024. subj = subjectAlready
  1025. j = self.skipSpace(argstr, i)
  1026. if j < 0:
  1027. return j # eof
  1028. i = j
  1029. ch = argstr[i:i + 1] # Quick 1-character checks first:
  1030. if ch == "[":
  1031. bnodeID = self.here(i)
  1032. j = self.skipSpace(argstr, i + 1)
  1033. if j < 0:
  1034. raise BadSyntax(self._thisDoc,
  1035. self.lines, argstr, i, "EOF after '['")
  1036. if argstr[j:j + 1] == "=": # Hack for "is" binding name to anon node
  1037. i = j + 1
  1038. objs = []
  1039. j = self.objectList(argstr, i, objs)
  1040. if j >= 0:
  1041. subj = objs[0]
  1042. if len(objs) > 1:
  1043. for obj in objs:
  1044. self.makeStatement((self._context,
  1045. DAML_sameAs, subj, obj))
  1046. j = self.skipSpace(argstr, j)
  1047. if j < 0:
  1048. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1049. "EOF when objectList expected after [ = ")
  1050. if argstr[j:j + 1] == ";":
  1051. j = j + 1
  1052. else:
  1053. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1054. "objectList expected after [= ")
  1055. if subj is None:
  1056. subj = self.blankNode(uri=bnodeID)
  1057. i = self.property_list(argstr, j, subj)
  1058. if i < 0:
  1059. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  1060. "property_list expected")
  1061. j = self.skipSpace(argstr, i)
  1062. if j < 0:
  1063. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1064. "EOF when ']' expected after [ <propertyList>")
  1065. if argstr[j:j + 1] != "]":
  1066. raise BadSyntax(self._thisDoc,
  1067. self.lines, argstr, j, "']' expected")
  1068. res.append(subj)
  1069. return j + 1
  1070. if ch == "{":
  1071. ch2 = argstr[i + 1:i + 2]
  1072. if ch2 == '$':
  1073. i += 1
  1074. j = i + 1
  1075. List = []
  1076. first_run = True
  1077. while 1:
  1078. i = self.skipSpace(argstr, j)
  1079. if i < 0:
  1080. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1081. "needed '$}', found end.")
  1082. if argstr[i:i + 2] == '$}':
  1083. j = i + 2
  1084. break
  1085. if not first_run:
  1086. if argstr[i:i + 1] == ',':
  1087. i += 1
  1088. else:
  1089. raise BadSyntax(self._thisDoc, self.lines,
  1090. argstr, i, "expected: ','")
  1091. else:
  1092. first_run = False
  1093. item = []
  1094. j = self.item(argstr, i, item) # @@@@@ should be path, was object
  1095. if j < 0:
  1096. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1097. "expected item in set or '$}'")
  1098. List.append(self._store.intern(item[0]))
  1099. res.append(self._store.newSet(List, self._context))
  1100. return j
  1101. else:
  1102. j = i + 1
  1103. oldParentContext = self._parentContext
  1104. self._parentContext = self._context
  1105. parentAnonymousNodes = self._anonymousNodes
  1106. grandParentVariables = self._parentVariables
  1107. self._parentVariables = self._variables
  1108. self._anonymousNodes = {}
  1109. self._variables = self._variables.copy()
  1110. reason2 = self._reason2
  1111. self._reason2 = becauseSubexpression
  1112. if subj is None:
  1113. subj = self._store.newFormula()
  1114. self._context = subj
  1115. while 1:
  1116. i = self.skipSpace(argstr, j)
  1117. if i < 0:
  1118. raise BadSyntax(self._thisDoc, self.lines,
  1119. argstr, i, "needed '}', found end.")
  1120. if argstr[i:i + 1] == "}":
  1121. j = i + 1
  1122. break
  1123. j = self.directiveOrStatement(argstr, i)
  1124. if j < 0:
  1125. raise BadSyntax(self._thisDoc, self.lines,
  1126. argstr, i, "expected statement or '}'")
  1127. self._anonymousNodes = parentAnonymousNodes
  1128. self._variables = self._parentVariables
  1129. self._parentVariables = grandParentVariables
  1130. self._context = self._parentContext
  1131. self._reason2 = reason2
  1132. self._parentContext = oldParentContext
  1133. res.append(subj.close()) # No use until closed
  1134. return j
  1135. if ch == "(":
  1136. thing_type = self._store.newList
  1137. ch2 = argstr[i + 1:i + 2]
  1138. if ch2 == '$':
  1139. thing_type = self._store.newSet
  1140. i += 1
  1141. j = i + 1
  1142. List = []
  1143. while 1:
  1144. i = self.skipSpace(argstr, j)
  1145. if i < 0:
  1146. raise BadSyntax(self._thisDoc, self.lines,
  1147. argstr, i, "needed ')', found end.")
  1148. if argstr[i:i + 1] == ')':
  1149. j = i + 1
  1150. break
  1151. item = []
  1152. j = self.item(argstr, i, item) # @@@@@ should be path, was object
  1153. if j < 0:
  1154. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1155. "expected item in list or ')'")
  1156. List.append(self._store.intern(item[0]))
  1157. res.append(thing_type(List, self._context))
  1158. return j
  1159. j = self.tok('this', argstr, i) # This context
  1160. if j >= 0:
  1161. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1162. "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.")
  1163. res.append(self._context)
  1164. return j
  1165. # booleans
  1166. j = self.tok('true', argstr, i)
  1167. if j >= 0:
  1168. res.append(True)
  1169. return j
  1170. j = self.tok('false', argstr, i)
  1171. if j >= 0:
  1172. res.append(False)
  1173. return j
  1174. if subj is None: # If this can be a named node, then check for a name.
  1175. j = self.uri_ref2(argstr, i, res)
  1176. if j >= 0:
  1177. return j
  1178. return -1
  1179. def property_list(self, argstr, i, subj):
  1180. """Parse property list
  1181. Leaves the terminating punctuation in the buffer
  1182. """
  1183. while 1:
  1184. j = self.skipSpace(argstr, i)
  1185. if j < 0:
  1186. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1187. "EOF found when expected verb in property list")
  1188. return j # eof
  1189. if argstr[j:j + 2] == ":-":
  1190. i = j + 2
  1191. res = []
  1192. j = self.node(argstr, i, res, subj)
  1193. if j < 0:
  1194. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1195. "bad {} or () or [] node after :- ")
  1196. i = j
  1197. continue
  1198. i = j
  1199. v = []
  1200. j = self.verb(argstr, i, v)
  1201. if j <= 0:
  1202. return i # void but valid
  1203. objs = []
  1204. i = self.objectList(argstr, j, objs)
  1205. if i < 0:
  1206. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  1207. "objectList expected")
  1208. for obj in objs:
  1209. dira, sym = v[0]
  1210. if dira == '->':
  1211. self.makeStatement((self._context, sym, subj, obj))
  1212. else:
  1213. self.makeStatement((self._context, sym, obj, subj))
  1214. j = self.skipSpace(argstr, i)
  1215. if j < 0:
  1216. raise BadSyntax(self._thisDoc, self.lines, argstr, j,
  1217. "EOF found in list of objects")
  1218. return j # eof
  1219. if argstr[i:i + 1] != ";":
  1220. return i
  1221. i = i + 1 # skip semicolon and continue
  1222. def commaSeparatedList(self, argstr, j, res, what):
  1223. """return value: -1 bad syntax; >1 new position in argstr
  1224. res has things found appended
  1225. """
  1226. i = self.skipSpace(argstr, j)
  1227. if i < 0:
  1228. raise BadSyntax(self._thisDoc, self.lines, argstr, i,
  1229. "EOF found expecting comma sep list")
  1230. return i
  1231. if argstr[i] == ".":
  1232. return j # empty list is OK
  1233. i = what(argstr, i, res)
  1234. if i < 0:
  1235. return -1
  1236. while 1:
  1237. j = self.skipSpace(argstr, i)
  1238. if j < 0:
  1239. return j # eof
  1240. ch = argstr[j:j + 1]
  1241. if ch != ",":

Large files files are truncated, but you can click here to view the full file