PageRenderTime 58ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/surf/query/__init__.py

https://github.com/ceberhardt/surf
Python | 403 lines | 344 code | 17 blank | 42 comment | 12 complexity | f21e41175222e9b5a4f48ed332652ab9 MD5 | raw file
  1. # Copyright (c) 2009, Digital Enterprise Research Institute (DERI),
  2. # NUI Galway
  3. # All rights reserved.
  4. # author: Cosmin Basca
  5. # email: cosmin.basca@gmail.com
  6. # Redistribution and use in source and binary forms, with or without
  7. # modification, are permitted provided that the following conditions
  8. # are met:
  9. # * Redistributions of source code must retain the above copyright
  10. # notice, this list of conditions and the following disclaimer.
  11. # * Redistributions in binary form must reproduce the above copyright
  12. # notice, this list of conditions and the following disclaimer
  13. # in the documentation and/or other materials provided with
  14. # the distribution.
  15. # * Neither the name of DERI nor the
  16. # names of its contributors may be used to endorse or promote
  17. # products derived from this software without specific prior
  18. # written permission.
  19. # THIS SOFTWARE IS PROVIDED BY DERI ''AS IS'' AND ANY
  20. # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  21. # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  22. # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DERI BE
  23. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  24. # OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  25. # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26. # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27. # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  28. # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  30. # OF THE POSSIBILITY OF SUCH DAMAGE.
  31. # -*- coding: utf-8 -*-
  32. __author__ = 'Cosmin Basca'
  33. import logging
  34. import re
  35. from surf.rdf import BNode, Graph, ConjunctiveGraph, Literal, Namespace
  36. from surf.rdf import RDF, URIRef
  37. a = RDF['type']
  38. SELECT = 'select'
  39. ASK = 'ask'
  40. CONSTRUCT = 'construct'
  41. DESCRIBE = 'describe'
  42. DISTINCT = 'distinct'
  43. REDUCED = 'reduced'
  44. UNION = 'union'
  45. #the classes
  46. class Group(list):
  47. '''A **SPARQL** triple pattern group
  48. '''
  49. pass
  50. class NamedGroup(Group):
  51. '''A **SPARQL** triple pattern named group
  52. '''
  53. def __init__(self, name = None):
  54. Group.__init__(self)
  55. if isinstance(name, URIRef) or (type(name) in [str, unicode] and name.startswith('?')):
  56. self.name = name
  57. else:
  58. raise ValueError('The names')
  59. class OptionalGroup(Group):
  60. '''A **SPARQL** triple pattern optional group
  61. '''
  62. pass
  63. class Union(Group):
  64. '''A **SPARQL** union
  65. '''
  66. pass
  67. class Filter(unicode):
  68. '''A **SPARQL** triple pattern filter
  69. '''
  70. @classmethod
  71. def regex(cls, var, pattern, flag = None):
  72. if type(var) in [str, unicode] and var.startswith('?'): pass
  73. else: raise ValueError('not a filter variable')
  74. if type(pattern) in [str, unicode]: pass
  75. elif type(pattern) is Literal: pattern = '"%s"@%s' % (pattern, pattern.language)
  76. elif type(pattern) in [list, tuple]: pattern = '"%s"@%s' % (pattern[0], pattern[1])
  77. else: raise ValueError('regular expression')
  78. if flag is None:
  79. flag = ""
  80. else:
  81. if not type(flag) in [str, unicode]:
  82. raise ValueError('not a filter flag')
  83. return Filter('regex(%s,"%s"%s)' % (var, pattern, ',"%s"' % flag))
  84. class Query(object):
  85. """
  86. The `Query` object is used by SuRF to construct queries in a programatic
  87. manner. The class supports the major SPARQL query types: *select*, *ask*,
  88. *describe*, *construct*. Although it follows the SPARQL format the query
  89. can be translated to other Query formats such as PROLOG, for now
  90. though only SPARQL is supported.
  91. Query objects should not be instatiated directly, instead use module-level
  92. :func:`ask`, :func:`construct`, :func:`describe`, :func:`select` functions.
  93. Query methods can be chained.
  94. """
  95. STATEMENT_TYPES = [list, tuple, Group, NamedGroup, OptionalGroup,
  96. Union, Filter] # + Query, but cannot reference it here.
  97. AGGREGATE_FUCTIONS = ["count", "min", "max", "avg"]
  98. TYPES = [SELECT, ASK, CONSTRUCT, DESCRIBE]
  99. def __init__(self, type, *vars):
  100. if type not in self.TYPES:
  101. raise ValueError('''The query is not of a supported type [%s], supported
  102. types are %s''' % (type, str(Query.TYPES)))
  103. self._type = type
  104. self._modifier = None
  105. self._vars = [var for var in vars if self._validate_variable(var)]
  106. self._from = []
  107. self._data = []
  108. self._limit = None
  109. self._offset = None
  110. self._order_by = []
  111. query_type = property(fget = lambda self: self._type)
  112. '''the query `type` can be: *SELECT*, *ASK*, *DESCRIBE*or *CONSTRUCT*'''
  113. query_modifier = property(fget = lambda self: self._modifier)
  114. '''the query `modifier` can be: *DISTINCT*, *REDUCED*, or `None`'''
  115. query_vars = property(fget = lambda self: self._vars)
  116. '''the query `variables` to return as the resultset'''
  117. query_from = property(fget = lambda self: self._from)
  118. '''list of URIs that will go into query FROM clauses'''
  119. query_data = property(fget = lambda self: self._data)
  120. '''the query `data`, internal structure representing the contents of the *WHERE* clause'''
  121. query_limit = property(fget = lambda self: self._limit)
  122. '''the query `limit`, can be a number or None'''
  123. query_offset = property(fget = lambda self: self._offset)
  124. '''the query `offset`, can be a number or None'''
  125. query_order_by = property(fget = lambda self: self._order_by)
  126. '''the query `order by` variables'''
  127. def _validate_variable(self, var):
  128. if type(var) in [str, unicode]:
  129. if not var.startswith('?'):
  130. for aggregate in Query.AGGREGATE_FUCTIONS:
  131. if var.lower().startswith(aggregate):
  132. return True
  133. raise ValueError('''Not a variable : <%s>, check correct syntax ("?" or
  134. supported aggregate %s)''' % (var, str(Query.AGGREGATE_FUCTIONS)))
  135. return True
  136. else:
  137. raise ValueError('''Unknown variable type, all variables must either
  138. start with a "?" or be among the recognized aggregates :
  139. %s''' % Query.AGGREGATE_FUCTIONS)
  140. def distinct(self):
  141. """ Add *DISTINCT* modifier. """
  142. self._modifier = DISTINCT
  143. return self
  144. def reduced(self):
  145. """ Add *REDUCED* modifier. """
  146. self._modifier = REDUCED
  147. return self
  148. def from_(self, *uris):
  149. """ Add graph URI(s) that will go in separate *FROM* clause.
  150. Each argument can be either `string` or :class:`surf.rdf.URIRef`.
  151. """
  152. if len(uris) == 1 and type(uris[0]) is list:
  153. uris = uris[0]
  154. for uri in uris:
  155. if uri is None:
  156. raise ValueError("Invalid graph URI")
  157. self._from += uris
  158. return self
  159. def where(self, *statements):
  160. """ Add graph pattern(s) to *WHERE* clause.
  161. `where()` accepts multiple arguments. Each argument represents a
  162. a graph pattern and will be added to default group graph pattern.
  163. Each argument can be `tuple`, `list`, :class:`surf.query.Query`,
  164. :class:`surf.query.NamedGroup`, :class:`surf.query.OptionalGroup`.
  165. Example:
  166. >>> query = select("?s").where(("?s", a, surf.ns.FOAF["person"]))
  167. """
  168. self._data.extend([stmt for stmt in statements if validate_statement(stmt)])
  169. return self
  170. def optional_group(self, *statements):
  171. """ Add optional group graph pattern to *WHERE* clause.
  172. `optional_group()` accepts multiple arguments, similarly
  173. to :meth:`where()`.
  174. """
  175. g = OptionalGroup()
  176. g.extend([stmt for stmt in statements if validate_statement(stmt)])
  177. self._data.append(g)
  178. return self
  179. def group(self, *statements):
  180. g = Group()
  181. g.extend([stmt for stmt in statements if validate_statement(stmt)])
  182. self._data.append(g)
  183. return self
  184. def union(self, *statements):
  185. g = Union()
  186. g.extend([stmt for stmt in statements if validate_statement(stmt)])
  187. self._data.append(g)
  188. return self
  189. def named_group(self, name, *statements):
  190. """ Add ``GROUP ?name { ... }`` construct to *WHERE* clause.
  191. ``name`` is the variable name that will be bound to graph IRI.
  192. ``*statements`` is one or more graph patterns.
  193. Example:
  194. >>> import surf
  195. >>> from surf.query import a, select
  196. >>> query = select("?s", "?src").named_group("?src", ("?s", a, surf.ns.FOAF['Person']))
  197. >>> print unicode(query)
  198. SELECT ?s ?src WHERE { GRAPH ?src { ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> } }
  199. """
  200. g = NamedGroup(name)
  201. g.extend([stmt for stmt in statements if validate_statement(stmt)])
  202. self._data.append(g)
  203. return self
  204. def filter(self, filter):
  205. """ Add *FILTER* construct to query *WHERE* clause.
  206. ``filter`` must be either `string`/`unicode` or
  207. :class:`surf.query.Filter` object, if it is `None` then no filter
  208. is appended.
  209. """
  210. if not filter:
  211. return self
  212. elif type(filter) in [str, unicode]:
  213. filter = Filter(filter)
  214. elif type(filter) is not Filter:
  215. raise ValueError('the filter must be of type Filter, str or unicode following the syntax of the query language')
  216. self._data.append(filter)
  217. return self
  218. def limit(self, limit):
  219. """ Add *LIMIT* modifier to query. """
  220. if limit:
  221. self._limit = limit
  222. return self
  223. def offset(self, offset):
  224. """ Add *OFFSET* modifier to query. """
  225. if offset:
  226. self._offset = offset
  227. return self
  228. def order_by(self, *vars):
  229. """ Add *ORDER_BY* modifier to query. """
  230. pattern = re.compile("(asc|desc)\(\?\w+\)|\?\w+", re.I)
  231. for var in vars:
  232. if re.match(pattern, var):
  233. self._order_by.append(var)
  234. return self
  235. def __unicode__(self):
  236. # Importing here to avoid circular imports.
  237. from surf.query.translator.sparql import SparqlTranslator
  238. return SparqlTranslator(self).translate()
  239. def __str__(self):
  240. return unicode(self).encode("utf-8")
  241. def validate_statement(statement):
  242. if type(statement) in Query.STATEMENT_TYPES or isinstance(statement, Query):
  243. if type(statement) in [list, tuple]:
  244. try:
  245. s, p, o = statement
  246. except:
  247. raise ValueError('''Statement of type [list, tuple] does not
  248. have all the (s,p,o) members (the length of the
  249. supplied arguemnt must be at least 3)''')
  250. if type(s) in [URIRef, BNode] or \
  251. (type(s) in [str, unicode] and s.startswith('?')): pass
  252. else: raise ValueError('The subject is not a valid variable type')
  253. if type(p) in [URIRef] or \
  254. (type(p) in [str, unicode] and p.startswith('?')): pass
  255. else: raise ValueError('The predicate is not a valid variable type')
  256. if type(o) in [URIRef, BNode, Literal] or \
  257. (type(o) in [str, unicode] and o.startswith('?')): pass
  258. else:
  259. raise ValueError('The object is not a valid variable type: %s' % o)
  260. return True
  261. else:
  262. raise ValueError('Statement type not in %s' % str(Query.STATEMENT_TYPES))
  263. def optional_group(*statements):
  264. """ Return optional group graph pattern.
  265. Returned object can be used as argument in :meth:`Query.where` method.
  266. `optional_group()` accepts multiple arguments, similarly
  267. to :meth:`Query.where()`.
  268. """
  269. g = OptionalGroup()
  270. g.extend([stmt for stmt in statements if validate_statement(stmt)])
  271. return g
  272. def group(*statements):
  273. g = Group()
  274. g.extend([stmt for stmt in statements if validate_statement(stmt)])
  275. return g
  276. def named_group(name, *statements):
  277. """ Return named group graph pattern.
  278. Returned object can be used as argument in :meth:`Query.where` method.
  279. ``*statements`` is one or more graph patterns.
  280. Example:
  281. >>> import surf
  282. >>> from surf.query import a, select, named_group
  283. >>> query = select("?s", "?src").where(named_group("?src", ("?s", a, surf.ns.FOAF['Person'])))
  284. >>> print unicode(query)
  285. SELECT ?s ?src WHERE { GRAPH ?src { ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> } }
  286. """
  287. g = NamedGroup(name)
  288. g.extend([stmt for stmt in statements if validate_statement(stmt)])
  289. return g
  290. # the query creators
  291. def select(*vars):
  292. """ Construct and return :class:`surf.query.Query` object of type **SELECT**
  293. ``*vars`` are variables to be selected.
  294. Example:
  295. >>> query = select("?s", "?p", "?o")
  296. """
  297. return Query(SELECT, *vars)
  298. def ask():
  299. """ Construct and return :class:`surf.query.Query` object of type **ASK** """
  300. return Query(ASK)
  301. def construct(*vars):
  302. """ Construct and return :class:`surf.query.Query` object of type **CONSTRUCT** """
  303. return Query(CONSTRUCT, *vars)
  304. def describe(*vars):
  305. """ Construct and return :class:`surf.query.Query` object of type **DESCRIBE** """
  306. return Query(DESCRIBE, *vars)