PageRenderTime 26ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/rdfextras/store/SQLite.py

https://code.google.com/p/rdfextras/
Python | 572 lines | 488 code | 22 blank | 62 comment | 73 complexity | affd69e3a8a19a517494ad47ef3e521b MD5 | raw file
  1. from __future__ import generators
  2. try:
  3. from sqlite3 import dbapi2
  4. except ImportError:
  5. try:
  6. from pysqlite2 import dbapi2
  7. except ImportError:
  8. import warnings
  9. warnings.warn("pysqlite2 is not installed")
  10. __test__=False
  11. import re, os
  12. from rdflib.graph import QuotedGraph
  13. from rdflib.graph import RDF
  14. from rdflib.store import CORRUPTED_STORE
  15. from rdflib.store import NO_STORE
  16. from rdflib.store import VALID_STORE
  17. from rdflib.term import Literal
  18. from rdfextras.utils.termutils import escape_quotes
  19. from rdfextras.store.REGEXMatching import REGEXTerm
  20. from rdfextras.store.REGEXMatching import PYTHON_REGEX
  21. from rdfextras.store.AbstractSQLStore import AbstractSQLStore, Graph
  22. from rdfextras.store.AbstractSQLStore import extractTriple, unionSELECT
  23. from rdfextras.store.AbstractSQLStore import ASSERTED_NON_TYPE_PARTITION
  24. from rdfextras.store.AbstractSQLStore import ASSERTED_TYPE_PARTITION
  25. from rdfextras.store.AbstractSQLStore import ASSERTED_LITERAL_PARTITION
  26. from rdfextras.store.AbstractSQLStore import QUOTED_PARTITION
  27. from rdfextras.store.AbstractSQLStore import table_name_prefixes
  28. from rdfextras.store.AbstractSQLStore import TRIPLE_SELECT_NO_ORDER
  29. Any = None
  30. def regexp(expr, item):
  31. """
  32. User-defined REGEXP operator
  33. """
  34. r = re.compile(expr)
  35. return r.match(item) is not None
  36. class SQLite(AbstractSQLStore):
  37. """
  38. SQLite store formula-aware implementation. It stores its triples in the
  39. following partitions:
  40. - Asserted non rdf:type statements
  41. - Asserted rdf:type statements (in a table which models Class membership)
  42. The motivation for this partition is primarily query speed and
  43. scalability as most graphs will always have more rdf:type statements
  44. than others
  45. - All Quoted statements
  46. In addition it persists namespace mappings in a seperate table
  47. """
  48. context_aware = True
  49. formula_aware = True
  50. transaction_aware = True
  51. regex_matching = PYTHON_REGEX
  52. autocommit_default = False
  53. _Store__node_pickler = None
  54. def open(self, db_path, create=True):
  55. """
  56. Opens the store specified by the configuration string. If
  57. create is True a store will be created if it does not already
  58. exist. If create is False and a store does not already exist
  59. an exception is raised. An exception is also raised if a store
  60. exists, but there is insufficient permissions to open the
  61. store.
  62. """
  63. if create:
  64. db = dbapi2.connect(db_path)
  65. c = db.cursor()
  66. # Only create tables if they don't already exist. If the first
  67. # exists, assume they all do.
  68. try:
  69. c.execute(CREATE_ASSERTED_STATEMENTS_TABLE % self._internedId)
  70. except dbapi2.OperationalError, e:
  71. # Raise any error aside from existing table.
  72. if (str(e) != 'table %s_asserted_statements already exists'
  73. % self._internedId):
  74. raise dbapi2.OperationalError, e
  75. else:
  76. c.execute(CREATE_ASSERTED_TYPE_STATEMENTS_TABLE %
  77. self._internedId)
  78. c.execute(CREATE_QUOTED_STATEMENTS_TABLE % self._internedId)
  79. c.execute(CREATE_NS_BINDS_TABLE % self._internedId)
  80. c.execute(CREATE_LITERAL_STATEMENTS_TABLE % self._internedId)
  81. for tblName, indices in [
  82. (
  83. "%s_asserted_statements",
  84. [
  85. ("%s_A_termComb_index",('termComb',)),
  86. ("%s_A_s_index",('subject',)),
  87. ("%s_A_p_index",('predicate',)),
  88. ("%s_A_o_index",('object',)),
  89. ("%s_A_c_index",('context',)),
  90. ],
  91. ),
  92. (
  93. "%s_type_statements",
  94. [
  95. ("%s_T_termComb_index",('termComb',)),
  96. ("%s_member_index",('member',)),
  97. ("%s_klass_index",('klass',)),
  98. ("%s_c_index",('context',)),
  99. ],
  100. ),
  101. (
  102. "%s_literal_statements",
  103. [
  104. ("%s_L_termComb_index",('termComb',)),
  105. ("%s_L_s_index",('subject',)),
  106. ("%s_L_p_index",('predicate',)),
  107. ("%s_L_c_index",('context',)),
  108. ],
  109. ),
  110. (
  111. "%s_quoted_statements",
  112. [
  113. ("%s_Q_termComb_index",('termComb',)),
  114. ("%s_Q_s_index",('subject',)),
  115. ("%s_Q_p_index",('predicate',)),
  116. ("%s_Q_o_index",('object',)),
  117. ("%s_Q_c_index",('context',)),
  118. ],
  119. ),
  120. (
  121. "%s_namespace_binds",
  122. [
  123. ("%s_uri_index",('uri',)),
  124. ],
  125. )]:
  126. for indexName, columns in indices:
  127. c.execute("CREATE INDEX %s on %s (%s)" %
  128. (indexName % self._internedId,
  129. tblName % self._internedId,
  130. ','.join(columns)))
  131. c.close()
  132. db.commit()
  133. db.close()
  134. self._db = dbapi2.connect(db_path)
  135. self._db.create_function("regexp", 2, regexp)
  136. #if os.path.exists(db_path):
  137. # c = self._db.cursor()
  138. # c.execute("SELECT * FROM sqlite_master WHERE type='table'")
  139. # tbls = [rt[1] for rt in c.fetchall()]
  140. # c.close()
  141. # for tn in [tbl%(self._internedId) for tbl in table_name_prefixes]:
  142. # if tn not in tbls:
  143. # # The database exists, but one of the partitions doesn't
  144. # # exist
  145. # return 0
  146. # # Everything is there (the database and the partitions)
  147. # return 1
  148. ## The database doesn't exist - nothing is there
  149. #return -1
  150. # Alcides fix
  151. if os.path.exists(db_path):
  152. c = self._db.cursor()
  153. c.execute("SELECT * FROM sqlite_master WHERE type='table'")
  154. tbls = [rt[1] for rt in c.fetchall()]
  155. c.close()
  156. missing = 0
  157. for tn in [tbl%(self._internedId) for tbl in table_name_prefixes]:
  158. if tn not in tbls:
  159. missing +=1
  160. if missing == len(table_name_prefixes):
  161. return NO_STORE
  162. elif missing > 0:
  163. return CORRUPTED_STORE
  164. else:
  165. return VALID_STORE
  166. # The database doesn't exist - nothing is there
  167. return NO_STORE
  168. def destroy(self, db_path):
  169. """
  170. FIXME: Add documentation
  171. """
  172. db = dbapi2.connect(db_path)
  173. c=db.cursor()
  174. for tblsuffix in table_name_prefixes:
  175. try:
  176. c.execute('DROP table %s'
  177. % tblsuffix%(self._internedId))
  178. except:
  179. print("unable to drop table: %s"
  180. % (tblsuffix%(self._internedId)))
  181. # Note, this only removes the associated tables for the closed world
  182. # universe given by the identifier.
  183. print("Destroyed Close World Universe %s ( in SQLite database %s)"
  184. % (self.identifier,db_path))
  185. db.commit()
  186. c.close()
  187. db.close()
  188. os.remove(db_path)
  189. def EscapeQuotes(self, qstr):
  190. return escape_quotes(qstr)
  191. # This is overridden in order to leave unicode terms as is instead of
  192. # converting them to ascii (which is the default behavior)
  193. def normalizeTerm(self, term):
  194. if isinstance(term,(QuotedGraph,Graph)):
  195. return term.identifier
  196. elif isinstance(term,Literal):
  197. return self.EscapeQuotes(term)
  198. elif term is None or isinstance(term, (list,REGEXTerm)):
  199. return term
  200. else:
  201. return term
  202. # "Where Clause" utility Functions
  203. # The predicate and object clause builders are modified in order to
  204. # optimize subjects and objects utility functions which can take lists
  205. # as their last argument (object,predicate - respectively)
  206. def buildSubjClause(self,subject,tableName):
  207. if isinstance(subject,REGEXTerm):
  208. return " REGEXP (%s,"+" %s)" % \
  209. (tableName and '%s.subject'%tableName or 'subject'),[subject]
  210. elif isinstance(subject,list):
  211. clauseStrings=[]
  212. paramStrings = []
  213. for s in subject:
  214. if isinstance(s,REGEXTerm):
  215. clauseStrings.append(
  216. " REGEXP (%s,"+" %s)"
  217. % (tableName and '%s.subject'%tableName or 'subject')
  218. + " %s")
  219. paramStrings.append(self.normalizeTerm(s))
  220. elif isinstance(s,(QuotedGraph,Graph)):
  221. clauseStrings.append("%s=" % \
  222. (tableName and '%s.subject'%tableName or 'subject')
  223. + "%s")
  224. paramStrings.append(self.normalizeTerm(s.identifier))
  225. else:
  226. clauseStrings.append(
  227. "%s="
  228. % (tableName and '%s.subject'
  229. % tableName or 'subject')
  230. + "%s")
  231. paramStrings.append(self.normalizeTerm(s))
  232. return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  233. elif isinstance(subject,(QuotedGraph,Graph)):
  234. return "%s=" % \
  235. (tableName and '%s.subject'%tableName or 'subject') + \
  236. "%s", [self.normalizeTerm(subject.identifier)]
  237. else:
  238. return subject is not None and "%s=" % \
  239. (tableName and '%s.subject'%tableName or 'subject') + \
  240. "%s",[subject] or None
  241. # Capable of taking a list of predicates as well (in which case sub
  242. # clauses are joined with 'OR')
  243. def buildPredClause(self,predicate,tableName):
  244. if isinstance(predicate,REGEXTerm):
  245. return " REGEXP (%s,"+" %s)" % \
  246. (tableName and '%s.predicate' % \
  247. tableName or 'predicate'),[predicate]
  248. elif isinstance(predicate,list):
  249. clauseStrings=[]
  250. paramStrings = []
  251. for p in predicate:
  252. if isinstance(p,REGEXTerm):
  253. clauseStrings.append(" REGEXP (%s,"+" %s)"% \
  254. (tableName and '%s.predicate' % \
  255. tableName or 'predicate'))
  256. else:
  257. clauseStrings.append("%s=" % \
  258. (tableName and '%s.predicate' % \
  259. tableName or 'predicate')+"%s")
  260. paramStrings.append(self.normalizeTerm(p))
  261. return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  262. else:
  263. return predicate is not None and "%s=" % \
  264. (tableName and '%s.predicate'%tableName or 'predicate') + \
  265. "%s",[predicate] or None
  266. def buildObjClause(self,obj,tableName):
  267. """
  268. Capable of taking a list of objects as well (in which case sub-clauses
  269. are joined with 'OR')
  270. """
  271. if isinstance(obj,REGEXTerm):
  272. return " REGEXP (%s,"+" %s)" % \
  273. (tableName and '%s.object'%tableName or 'object'),[obj]
  274. elif isinstance(obj,list):
  275. clauseStrings=[]
  276. paramStrings = []
  277. for o in obj:
  278. if isinstance(o,REGEXTerm):
  279. clauseStrings.append(" REGEXP (%s,"+" %s)" % \
  280. (tableName and '%s.object'%tableName or 'object'))
  281. paramStrings.append(self.normalizeTerm(o))
  282. elif isinstance(o,(QuotedGraph,Graph)):
  283. clauseStrings.append("%s="%(tableName and '%s.object' % \
  284. tableName or 'object')+"%s")
  285. paramStrings.append(self.normalizeTerm(o.identifier))
  286. else:
  287. clauseStrings.append("%s="%(tableName and '%s.object' % \
  288. tableName or 'object')+"%s")
  289. paramStrings.append(self.normalizeTerm(o))
  290. return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  291. elif isinstance(obj,(QuotedGraph,Graph)):
  292. return "%s=" % \
  293. (tableName and '%s.object'%tableName or 'object') + \
  294. "%s",[self.normalizeTerm(obj.identifier)]
  295. else:
  296. return obj is not None and "%s=" % \
  297. (tableName and '%s.object' % \
  298. tableName or 'object')+"%s",[obj] or None
  299. def buildContextClause(self,context,tableName):
  300. context = context is not None \
  301. and self.normalizeTerm(context.identifier) \
  302. or context
  303. if isinstance(context,REGEXTerm):
  304. return " REGEXP (%s,"+" %s)" % \
  305. (tableName and '%s.context' % \
  306. tableName or 'context'),[context]
  307. else:
  308. return context is not None and "%s=" % \
  309. (tableName and '%s.context' % tableName or 'context') + \
  310. "%s", [context] or None
  311. def buildTypeMemberClause(self,subject,tableName):
  312. if isinstance(subject,REGEXTerm):
  313. return " REGEXP (%s,"+" %s)" % \
  314. (tableName and '%s.member' % \
  315. tableName or 'member'), [subject]
  316. elif isinstance(subject,list):
  317. clauseStrings=[]
  318. paramStrings = []
  319. for s in subject:
  320. clauseStrings.append("%s.member="%tableName+"%s")
  321. if isinstance(s,(QuotedGraph,Graph)):
  322. paramStrings.append(self.normalizeTerm(s.identifier))
  323. else:
  324. paramStrings.append(self.normalizeTerm(s))
  325. return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  326. else:
  327. return subject and u"%s.member = "%(tableName)+"%s",[subject]
  328. def buildTypeClassClause(self,obj,tableName):
  329. if isinstance(obj,REGEXTerm):
  330. return " REGEXP (%s,"+" %s)" % \
  331. (tableName and '%s.klass' % \
  332. tableName or 'klass'), [obj]
  333. elif isinstance(obj,list):
  334. clauseStrings=[]
  335. paramStrings = []
  336. for o in obj:
  337. clauseStrings.append("%s.klass=" % tableName + "%s")
  338. if isinstance(o,(QuotedGraph,Graph)):
  339. paramStrings.append(self.normalizeTerm(o.identifier))
  340. else:
  341. paramStrings.append(self.normalizeTerm(o))
  342. return '('+ ' or '.join(clauseStrings) + ')', paramStrings
  343. else:
  344. return obj is not None and "%s.klass = " % \
  345. tableName + "%s", [obj] or None
  346. def triples(self, (subject, predicate, obj), context=None):
  347. """
  348. A generator over all the triples matching pattern. Pattern can
  349. be any objects for comparing against nodes in the store, for
  350. example, RegExLiteral, Date? DateRange?
  351. quoted table: <id>_quoted_statements
  352. asserted rdf:type table: <id>_type_statements
  353. asserted non rdf:type table: <id>_asserted_statements
  354. triple columns: subject,predicate,object,context,
  355. termComb,objLanguage,objDatatype
  356. class membership columns: member,klass,context termComb
  357. FIXME: These union all selects *may* be further optimized by joins
  358. """
  359. quoted_table = "%s_quoted_statements" % self._internedId
  360. asserted_table = "%s_asserted_statements" % self._internedId
  361. asserted_type_table = "%s_type_statements" % self._internedId
  362. literal_table = "%s_literal_statements" % self._internedId
  363. c = self._db.cursor()
  364. parameters = []
  365. if predicate == RDF.type:
  366. # select from asserted rdf:type partition and
  367. # quoted table (if a context is specified)
  368. clauseString,params = self.buildClause(
  369. 'typeTable',subject,RDF.type, obj,context,True)
  370. parameters.extend(params)
  371. selects = [
  372. (
  373. asserted_type_table,
  374. 'typeTable',
  375. clauseString,
  376. ASSERTED_TYPE_PARTITION
  377. ),
  378. ]
  379. elif isinstance(predicate,REGEXTerm) \
  380. and predicate.compiledExpr.match(RDF.type) \
  381. or not predicate:
  382. # Select from quoted partition (if context is specified),
  383. # literal partition if (obj is Literal or None) and
  384. # asserted non rdf:type partition (if obj is URIRef or None)
  385. selects = []
  386. if not self.STRONGLY_TYPED_TERMS or \
  387. isinstance(obj,Literal) \
  388. or not obj \
  389. or (self.STRONGLY_TYPED_TERMS \
  390. and isinstance(obj,REGEXTerm)):
  391. clauseString,params = self.buildClause(
  392. 'literal',subject,predicate,obj,context)
  393. parameters.extend(params)
  394. selects.append((
  395. literal_table,
  396. 'literal',
  397. clauseString,
  398. ASSERTED_LITERAL_PARTITION
  399. ))
  400. if not isinstance(obj,Literal) \
  401. and not (isinstance(obj,REGEXTerm) \
  402. and self.STRONGLY_TYPED_TERMS) \
  403. or not obj:
  404. clauseString,params = self.buildClause(
  405. 'asserted',subject,predicate,obj,context)
  406. parameters.extend(params)
  407. selects.append((
  408. asserted_table,
  409. 'asserted',
  410. clauseString,
  411. ASSERTED_NON_TYPE_PARTITION
  412. ))
  413. clauseString,params = self.buildClause(
  414. 'typeTable',subject,RDF.type,obj,context,True)
  415. parameters.extend(params)
  416. selects.append(
  417. (
  418. asserted_type_table,
  419. 'typeTable',
  420. clauseString,
  421. ASSERTED_TYPE_PARTITION
  422. )
  423. )
  424. elif predicate:
  425. # select from asserted non rdf:type partition (optionally),
  426. # quoted partition (if context is speciied), and literal
  427. # partition (optionally)
  428. selects = []
  429. if not self.STRONGLY_TYPED_TERMS \
  430. or isinstance(obj,Literal) \
  431. or not obj \
  432. or (self.STRONGLY_TYPED_TERMS \
  433. and isinstance(obj,REGEXTerm)):
  434. clauseString,params = self.buildClause(
  435. 'literal',subject,predicate,obj,context)
  436. parameters.extend(params)
  437. selects.append((
  438. literal_table,
  439. 'literal',
  440. clauseString,
  441. ASSERTED_LITERAL_PARTITION
  442. ))
  443. if not isinstance(obj,Literal) \
  444. and not (isinstance(obj,REGEXTerm) \
  445. and self.STRONGLY_TYPED_TERMS) \
  446. or not obj:
  447. clauseString,params = self.buildClause(
  448. 'asserted',subject,predicate,obj,context)
  449. parameters.extend(params)
  450. selects.append((
  451. asserted_table,
  452. 'asserted',
  453. clauseString,
  454. ASSERTED_NON_TYPE_PARTITION
  455. ))
  456. if context is not None:
  457. clauseString,params = self.buildClause(
  458. 'quoted',subject,predicate, obj,context)
  459. parameters.extend(params)
  460. selects.append(
  461. (
  462. quoted_table,
  463. 'quoted',
  464. clauseString,
  465. QUOTED_PARTITION
  466. )
  467. )
  468. q = self._normalizeSQLCmd(unionSELECT(
  469. selects,selectType=TRIPLE_SELECT_NO_ORDER))
  470. self.executeSQL(c,q,parameters)
  471. # NOTE: SQLite does not support ORDER BY terms that aren't
  472. # integers, so the entire result set must be iterated
  473. # in order to be able to return a generator of contexts
  474. tripleCoverage = {}
  475. result = c.fetchall()
  476. c.close()
  477. for rt in result:
  478. # Fix by Alcides Fonseca
  479. # https://github.com/slok/rdflib/commit/e05827b080772e785290b270da63dce64addfc7c#diff-0
  480. tmp = []
  481. for i,r in enumerate(rt):
  482. if r == u"NULL":
  483. tmp.append(None)
  484. else:
  485. tmp.append(r)
  486. rt = tuple(tmp)
  487. s,p,o,(graphKlass,idKlass,graphId) = extractTriple(rt,self,context)
  488. contexts = tripleCoverage.get((s,p,o),[])
  489. contexts.append(graphKlass(self,idKlass(graphId)))
  490. tripleCoverage[(s,p,o)] = contexts
  491. for (s,p,o),contexts in tripleCoverage.items():
  492. yield (s,p,o),(c for c in contexts)
  493. CREATE_ASSERTED_STATEMENTS_TABLE = """
  494. CREATE TABLE %s_asserted_statements (
  495. subject text not NULL,
  496. predicate text not NULL,
  497. object text not NULL,
  498. context text not NULL,
  499. termComb tinyint unsigned not NULL)"""
  500. CREATE_ASSERTED_TYPE_STATEMENTS_TABLE = """
  501. CREATE TABLE %s_type_statements (
  502. member text not NULL,
  503. klass text not NULL,
  504. context text not NULL,
  505. termComb tinyint unsigned not NULL)"""
  506. CREATE_LITERAL_STATEMENTS_TABLE = """
  507. CREATE TABLE %s_literal_statements (
  508. subject text not NULL,
  509. predicate text not NULL,
  510. object text,
  511. context text not NULL,
  512. termComb tinyint unsigned not NULL,
  513. objLanguage varchar(3),
  514. objDatatype text)"""
  515. CREATE_QUOTED_STATEMENTS_TABLE = """
  516. CREATE TABLE %s_quoted_statements (
  517. subject text not NULL,
  518. predicate text not NULL,
  519. object text,
  520. context text not NULL,
  521. termComb tinyint unsigned not NULL,
  522. objLanguage varchar(3),
  523. objDatatype text)"""
  524. CREATE_NS_BINDS_TABLE = """
  525. CREATE TABLE %s_namespace_binds (
  526. prefix varchar(20) UNIQUE not NULL,
  527. uri text,
  528. PRIMARY KEY (prefix))"""