PageRenderTime 28ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/rdfextras/store/SPARQL.py

https://code.google.com/p/rdfextras/
Python | 457 lines | 293 code | 66 blank | 98 comment | 48 complexity | fd99c624975f65c978ccce904f1f7f3d MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. #
  3. """
  4. This is an RDFLib store around Ivan Herman et al.'s SPARQL service wrapper.
  5. This was first done in layer-cake, and then ported to rdflib 3 and rdfextras
  6. This version works with vanilla SPARQLWrapper installed by easy_install or similar
  7. Changes:
  8. - Layercake adding support for namespace binding, I removed it again to work with vanilla SPARQLWrapper
  9. - JSON object mapping support suppressed
  10. - Replaced '4Suite-XML Domlette with Elementtree
  11. - Incorporated as an rdflib store
  12. """
  13. __version__ = "1.02"
  14. __authors__ = u"Ivan Herman, Sergio Fern??ndez, Carlos Tejo Alonso, Gunnar Aastrand Grimnes"
  15. __license__ = u'W3CŽ SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231'
  16. __contact__ = 'Ivan Herman, ivan_herman@users.sourceforge.net'
  17. __date__ = "2011-01-30"
  18. import re
  19. import warnings
  20. try:
  21. from SPARQLWrapper import SPARQLWrapper, XML
  22. from SPARQLWrapper.Wrapper import QueryResult
  23. except ImportError:
  24. raise Exception("SPARQLWrapper not found! SPARQL Store will not work. Install with 'easy_install SPARQLWrapper'")
  25. try:
  26. from xml import etree
  27. except ImportError:
  28. import elementtree as etree
  29. from rdfextras.store.REGEXMatching import NATIVE_REGEX
  30. from rdflib.store import Store
  31. from rdflib import Variable, Namespace, BNode, URIRef, Literal
  32. import httplib
  33. import urlparse
  34. BNODE_IDENT_PATTERN = re.compile('(?P<label>_\:[^\s]+)')
  35. SPARQL_NS = Namespace('http://www.w3.org/2005/sparql-results#')
  36. sparqlNsBindings = {u'sparql':SPARQL_NS}
  37. etree.ElementTree._namespace_map["sparql"]=SPARQL_NS
  38. def TraverseSPARQLResultDOM(doc,asDictionary=False):
  39. """
  40. Returns a generator over tuples of results
  41. by (4Suite) XPath evaluation over the result XML
  42. """
  43. # namespace handling in elementtree xpath sub-set is not pretty :(
  44. # and broken in < 1.3, according to two FutureWarnings:
  45. # 1.
  46. # FutureWarning: This search is broken in 1.3 and earlier, and will
  47. # be fixed in a future version. If you rely on the current behaviour,
  48. # change it to
  49. # './{http://www.w3.org/2005/sparql-results#}head/{http://www.w3.org/2005/sparql-results#}variable'
  50. # 2.
  51. # FutureWarning: This search is broken in 1.3 and earlier, and will be
  52. # fixed in a future version. If you rely on the current behaviour,
  53. # change it to
  54. # './{http://www.w3.org/2005/sparql-results#}results/{http://www.w3.org/2005/sparql-results#}result'
  55. # Handle ElementTree warning
  56. variablematch = '/{http://www.w3.org/2005/sparql-results#}head/{http://www.w3.org/2005/sparql-results#}variable'
  57. resultmatch = '/{http://www.w3.org/2005/sparql-results#}results/{http://www.w3.org/2005/sparql-results#}result'
  58. # with warnings.catch_warnings(record=True) as w:
  59. # warnings.simplefilter("always")
  60. # matched_variables = doc.findall(variablematch)
  61. # if len(w) == 1:
  62. # variablematch = '.' + variablematch
  63. # resultmatch = '.' + resultmatch
  64. # # Could be wrong result, re-do from start
  65. # matched_variables = doc.findall(variablematch)
  66. for w in (warnings.catch_warnings(record=True)):
  67. warnings.simplefilter("always")
  68. matched_variables = doc.findall(variablematch)
  69. if len(w) == 1:
  70. variablematch = '.' + variablematch
  71. resultmatch = '.' + resultmatch
  72. # Could be wrong result, re-do from start
  73. matched_variables = doc.findall(variablematch)
  74. vars = [Variable(v.attrib["name"]) for v in matched_variables]
  75. for result in doc.findall(resultmatch):
  76. currBind = {}
  77. values = []
  78. for binding in result.findall('{http://www.w3.org/2005/sparql-results#}binding'):
  79. varVal = binding.attrib["name"]
  80. var = Variable(varVal)
  81. term = CastToTerm(binding.findall('*')[0])
  82. values.append(term)
  83. currBind[var]=term
  84. if asDictionary:
  85. yield currBind,vars
  86. else:
  87. def stab(values):
  88. if len(values)==1:
  89. return values[0]
  90. else:
  91. return tuple(values)
  92. yield stab(values), vars
  93. def localName(qname):
  94. # wtf - elementtree cant do this for me
  95. return qname[qname.index("}")+1:]
  96. def CastToTerm(node):
  97. """
  98. Helper function that casts XML node in SPARQL results
  99. to appropriate rdflib term
  100. """
  101. if node.tag == '{%s}bnode'%SPARQL_NS:
  102. return BNode(node.text)
  103. elif node.tag == '{%s}uri'%SPARQL_NS:
  104. return URIRef(node.text)
  105. elif node.tag == '{%s}literal'%SPARQL_NS:
  106. if 'datatype' in node.attrib:
  107. dT = URIRef(node.attrib['datatype'])
  108. if False:#not node.xpath('*'):
  109. return Literal('',datatype=dT)
  110. else:
  111. return Literal(node.text,
  112. datatype=dT)
  113. elif '{http://www.w3.org/XML/1998/namespace}lang' in node.attrib:
  114. return Literal(node.text, lang=node.attrib["{http://www.w3.org/XML/1998/namespace}lang"])
  115. else:
  116. return Literal(node.text)
  117. else:
  118. raise Exception('Unknown answer type')
  119. class SPARQLResult(QueryResult):
  120. """
  121. Query result class for SPARQL
  122. xml : as an XML string conforming to the SPARQL XML result format: http://www.w3.org/TR/rdf-sparql-XMLres/
  123. python: as Python objects
  124. json : as JSON
  125. graph : as an RDFLib Graph - for CONSTRUCT and DESCRIBE queries
  126. """
  127. def __init__(self,result):
  128. self.result = etree.ElementTree.parse(result)
  129. self.noAnswers = 0
  130. self.askAnswer = None
  131. def _parseResults(self):
  132. # Handle ElementTree warning, see LOC#51 (above)
  133. booleanmatch = '/{http://www.w3.org/2005/sparql-results#}boolean'
  134. # with warnings.catch_warnings(record=True) as w:
  135. # warnings.simplefilter("always")
  136. # matched_results = self.result.findall(booleanmatch)
  137. # if len(w) == 1:
  138. # # Could be wrong result, re-do from start
  139. # booleanmatch = '.' + booleanmatch
  140. # matched_results = self.askAnswer=self.result.findall(booleanmatch)
  141. # return matched_results
  142. for w in (warnings.catch_warnings(record=True)):
  143. warnings.simplefilter("always")
  144. matched_results = self.result.findall(booleanmatch)
  145. if len(w) == 1:
  146. # Could be wrong result, re-do from start
  147. booleanmatch = '.' + booleanmatch
  148. matched_results = self.askAnswer=self.result.findall(booleanmatch)
  149. return matched_results
  150. def __len__(self):
  151. raise NotImplementedError("Results are an iterable!")
  152. def __iter__(self):
  153. """Iterates over the result entries"""
  154. self._parseResults()
  155. if not self.askAnswer:
  156. for rt,vars in TraverseSPARQLResultDOM(self.result):
  157. self.noAnswers += 1
  158. yield rt
  159. def serialize(self,format='xml'):
  160. if format == 'python':
  161. self._parseResults()
  162. if self.askAnswer:
  163. return bool(self.askAnswer=='true')
  164. else:
  165. return self
  166. elif format == 'xml':
  167. return self.result
  168. else:
  169. raise Exception("Result format not implemented: %s"%format)
  170. class SPARQLStore(SPARQLWrapper,Store):
  171. """
  172. An RDFLib store around a SPARQL endpoint
  173. """
  174. context_aware = True
  175. formula_aware = False
  176. transaction_aware = False
  177. regex_matching = NATIVE_REGEX
  178. batch_unification = False
  179. def __init__(self,identifier=None,bNodeAsURI = False, sparql11=True):
  180. """
  181. """
  182. super(SPARQLStore, self).__init__(identifier,returnFormat=XML)
  183. self.bNodeAsURI = bNodeAsURI
  184. self.nsBindings = {}
  185. self.sparql11 = sparql11
  186. #Database Management Methods
  187. def create(self, configuration):
  188. raise TypeError('The SPARQL store is read only')
  189. def open(self, configuration, create=False):
  190. """
  191. Opens the store specified by the configuration string. If
  192. create is True a store will be created if it does not already
  193. exist. If create is False and a store does not already exist
  194. an exception is raised. An exception is also raised if a store
  195. exists, but there is insufficient permissions to open the
  196. store.
  197. """
  198. if create: raise Exception("Cannot create a SPARQL Endpoint")
  199. def destroy(self, configuration):
  200. """
  201. FIXME: Add documentation
  202. """
  203. raise TypeError('The SPARQL store is read only')
  204. #Transactional interfaces
  205. def commit(self):
  206. """ """
  207. raise TypeError('The SPARQL store is read only')
  208. def rollback(self):
  209. """ """
  210. raise TypeError('The SPARQL store is read only')
  211. def add(self, (subject, predicate, obj), context=None, quoted=False):
  212. """ Add a triple to the store of triples. """
  213. raise TypeError('The SPARQL store is read only')
  214. def addN(self, quads):
  215. """
  216. Adds each item in the list of statements to a specific context. The quoted argument
  217. is interpreted by formula-aware stores to indicate this statement is quoted/hypothetical.
  218. Note that the default implementation is a redirect to add
  219. """
  220. raise TypeError('The SPARQL store is read only')
  221. def remove(self, (subject, predicate, obj), context):
  222. """ Remove a triple from the store """
  223. raise TypeError('The SPARQL store is read only')
  224. def query(self, graph,
  225. queryStringOrObj,
  226. initNs={},
  227. initBindings={},
  228. DEBUG=False):
  229. self.debug = DEBUG
  230. assert isinstance(queryStringOrObj,basestring)
  231. #self.setNamespaceBindings(initNs)
  232. if len(initNs)>0:
  233. raise Exception("initNs not supported.")
  234. if len(initBindings)>0:
  235. raise Exception("initBindings not supported.")
  236. self.setQuery(queryStringOrObj)
  237. return SPARQLResult(SPARQLWrapper.query(self).response)
  238. def triples(self, (subject, predicate, obj), context=None):
  239. """
  240. SELECT ?subj ?pred ?obj WHERE { ?subj ?pred ?obj }
  241. """
  242. subjVar = Variable('subj')
  243. predVar = Variable('pred')
  244. objVar = Variable('obj')
  245. termsSlots = {}
  246. selectVars = []
  247. if subject is not None:
  248. termsSlots[subjVar] = subject
  249. else:
  250. selectVars.append(subjVar)
  251. if predicate is not None:
  252. termsSlots[predVar] = predicate
  253. else:
  254. selectVars.append(predVar)
  255. if obj is not None:
  256. termsSlots[objVar] = obj
  257. else:
  258. selectVars.append(objVar)
  259. query ="SELECT %s WHERE { %s %s %s }"%(
  260. ' '.join([term.n3() for term in selectVars]),
  261. termsSlots.get(subjVar, subjVar).n3(),
  262. termsSlots.get(predVar, predVar).n3(),
  263. termsSlots.get(objVar , objVar ).n3()
  264. )
  265. self.setQuery(query)
  266. doc = etree.ElementTree.parse(SPARQLWrapper.query(self).response)
  267. #xml.etree.ElementTree.dump(doc)
  268. for rt,vars in TraverseSPARQLResultDOM(doc,asDictionary=True):
  269. yield (rt.get(subjVar,subject),
  270. rt.get(predVar,predicate),
  271. rt.get(objVar,obj)),None
  272. def triples_choices(self, (subject, predicate, object_),context=None):
  273. """
  274. A variant of triples that can take a list of terms instead of a single
  275. term in any slot. Stores can implement this to optimize the response time
  276. from the import default 'fallback' implementation, which will iterate
  277. over each term in the list and dispatch to tripless
  278. """
  279. raise NotImplementedError('Triples choices currently not supported')
  280. def __len__(self, context=None):
  281. if not self.sparql11:
  282. raise NotImplementedError("For performance reasons, this is not supported for sparql1.0 endpoints")
  283. else:
  284. if context is not None:
  285. q="SELECT (count(*) as ?c) FROM <%s> WHERE { ?s ?p ?o . }"%context
  286. else:
  287. q="SELECT (count(*) as ?c) WHERE { ?s ?p ?o . }"
  288. self.setQuery(q)
  289. doc = etree.ElementTree.parse(SPARQLWrapper.query(self).response)
  290. rt,vars=iter(TraverseSPARQLResultDOM(doc,asDictionary=True)).next()
  291. return int(rt.get(Variable("c")))
  292. def contexts(self, triple=None):
  293. """
  294. iterates over results to SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }
  295. returning instances of this store with the SPARQL wrapper
  296. object updated via addNamedGraph(?NAME)
  297. This causes a named-graph-uri key / value pair to be sent over the protocol
  298. """
  299. raise NotImplementedError(".contexts(..) not supported")
  300. # self.setQuery("SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }")
  301. # doc = self.query().convert()
  302. # for result in doc.xpath('/{http://www.w3.org/2005/sparql-results#}sparql/{http://www.w3.org/2005/sparql-results#}results/{http://www.w3.org/2005/sparql-results#}result',
  303. # explicitNss=sparqlNsBindings):
  304. # statmentTerms = {}
  305. # for binding in result.xpath('{http://www.w3.org/2005/sparql-results#}binding',
  306. # explicitNss=sparqlNsBindings):
  307. # term = CastToTerm(binding.xpath('*')[0])
  308. # newStore = SPARQLStore(self.baseURI)
  309. # newStore.addNamedGraph(term)
  310. # yield Graph(self,term)
  311. #Namespace persistence interface implementation
  312. def bind(self, prefix, namespace):
  313. self.nsBindings[prefix]=namespace
  314. def prefix(self, namespace):
  315. """ """
  316. return dict([(v,k) for k,v in self.nsBindings.items()]).get(namespace)
  317. def namespace(self, prefix):
  318. return self.nsBindings.get(prefix)
  319. def namespaces(self):
  320. for prefix,ns in self.nsBindings.items():
  321. yield prefix,ns
  322. class SPARQLUpdateStore(SPARQLStore):
  323. """
  324. A store using SPARQL queries for read-access
  325. and SPARQL Update for changes
  326. """
  327. def __init__(self, queryEndpoint=None,updateEndpoint=None, bNodeAsURI = False):
  328. SPARQLStore.__init__(self, queryEndpoint, bNodeAsURI)
  329. self.updateEndpoint=updateEndpoint
  330. p=urlparse.urlparse(self.updateEndpoint)
  331. assert not p.username, "SPARQL Update store does not support HTTP authentication"
  332. assert not p.password, "SPARQL Update store does not support HTTP authentication"
  333. assert p.scheme=="http", "SPARQL Update is an http protocol!"
  334. self.host=p.hostname
  335. self.port=p.port
  336. self.path=p.path
  337. self.connection = httplib.HTTPConnection(self.host, self.port)
  338. self.headers={'Content-type': "application/sparql-update" }
  339. #Transactional interfaces
  340. def commit(self):
  341. """ """
  342. raise TypeError('The SPARQL Update store is not transaction aware!')
  343. def rollback(self):
  344. """ """
  345. raise TypeError('The SPARQL Update store is not transaction aware')
  346. def add(self, (subject, predicate, obj), context=None, quoted=False):
  347. """ Add a triple to the store of triples. """
  348. assert not quoted
  349. triple="%s %s %s ."%(subject.n3(), predicate.n3(), obj.n3())
  350. if context is not None:
  351. q="INSERT DATA { %s }"%triple
  352. else:
  353. q="INSERT DATA { GRAPH <%s> { %s } }"%(context, triple)
  354. r=self._do_update(q)
  355. r.read() # we expect no content
  356. if r.status not in (200, 204):
  357. raise Exception("Could not update: %d %s"%(r.status, r.reason))
  358. def addN(self, quads):
  359. Store.addN(self,quads)
  360. def remove(self, (subject, predicate, obj), context):
  361. """ Remove a triple from the store """
  362. triple="%s %s %s ."%(subject.n3(), predicate.n3(), obj.n3())
  363. if context is not None:
  364. q="DELETE DATA { %s }"%triple
  365. else:
  366. q="DELETE DATA { GRAPH <%s> { %s } }"%(context, triple)
  367. r=self._do_update(q)
  368. r.read() # we expect no content
  369. if r.status not in (200, 204):
  370. raise Exception("Could not update: %d %s"%(r.status, r.reason))
  371. def _do_update(self, update):
  372. self.connection.request('POST', self.path, update, self.headers)
  373. return self.connection.getresponse()