PageRenderTime 51ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/rdfextras/store/FOPLRelationalModel/MySQLMassLoader.py

https://code.google.com/p/rdfextras/
Python | 790 lines | 757 code | 27 blank | 6 comment | 16 complexity | 1a1bebb998c85ac80cbed69476ad93e8 MD5 | raw file
  1. import sys, re, os
  2. import datetime
  3. import rdflib
  4. from rdflib import BNode
  5. from rdflib import RDF
  6. from rdflib.store import Store
  7. from rdfextras.utils.termutils import OBJECT
  8. from rdflib.graph import ConjunctiveGraph
  9. from rdfextras.store.MySQL import SQL, MySQL, PostgreSQL
  10. from rdfextras.store.FOPLRelationalModel.QuadSlot import genQuadSlots
  11. from rdfextras.store.FOPLRelationalModel.QuadSlot import normalizeNode
  12. from Ft.Lib import Uri
  13. import logging
  14. log = logging.getLogger(__name__)
  15. Any = None
  16. VALUES_EXPR = re.compile('.*VALUES (\(.*\))')
  17. TABLE_NAME_EXPR = re.compile('INSERT INTO (\S*)\s+VALUES')
  18. ROW_DELIMITER = '\n'
  19. COL_DELIMITER = '|'
  20. DENORMALIZED_INDEX_TARGETS = [
  21. 'subject', 'subject_term', 'predicate', 'predicate_term',
  22. 'object', 'object_term', 'context', 'context_term']
  23. def make_delimited(_list):
  24. return COL_DELIMITER.join(
  25. [i is None and 'NULL' or '"%s"' % i for i in _list])
  26. class LoadError(Exception):
  27. pass
  28. class DllError(Exception):
  29. pass
  30. class DllNode(object):
  31. def __init__(self, stuff):
  32. self.prev = None
  33. self.next = None
  34. self.stuff = stuff
  35. def insert_before(self, following):
  36. prev = following.prev
  37. if prev is not None:
  38. prev.next = self
  39. self.prev = prev
  40. self.next = following
  41. following.prev = self
  42. def insert_after(self, preceding):
  43. next = preceding.next
  44. if next is not None:
  45. next.prev = self
  46. self.prev = preceding
  47. self.next = next
  48. preceding.next = self
  49. def remove(self):
  50. if self.prev is not None:
  51. self.prev.next = self.next
  52. if self.next is not None:
  53. self.next.prev = self.prev
  54. self.prev = None
  55. self.next = None
  56. class RecentSet(object):
  57. def __init__(self, size):
  58. self.size = size
  59. self.store = {}
  60. '''map from objects to their node in the doubly-linked list'''
  61. self.head = None
  62. '''first item in the doubly-linked list'''
  63. self.tail = None
  64. '''last item in the doubly-linked list'''
  65. def check(self, item):
  66. if item in self.store:
  67. node = self.store[item]
  68. if self.head is not node:
  69. if self.tail is node:
  70. self.tail = self.tail.prev
  71. node.remove()
  72. node.insert_before(self.head)
  73. self.head = node
  74. return True
  75. else:
  76. node = DllNode(item)
  77. if self.head is None:
  78. assert self.tail is None
  79. self.head = node
  80. self.tail = node
  81. else:
  82. node.insert_before(self.head)
  83. self.head = node
  84. self.store[item] = node
  85. if len(self.store) > self.size:
  86. drop = self.tail
  87. #print >> sys.stderr, "Dropping", drop.stuff
  88. self.tail = drop.prev
  89. drop.remove()
  90. del self.store[drop.stuff]
  91. return False
  92. class Loader(SQL):
  93. # Thought: just have one table corresponding to each ending table (for
  94. # both tables below)
  95. TRIPLES_SQL_TEMPLATE = '''
  96. create table triples (
  97. subject bigint unsigned,
  98. subject_term enum('U','B','F','V'),
  99. predicate bigint unsigned,
  100. predicate_term enum('U','V'),
  101. object bigint unsigned,
  102. object_term enum('U','B','F','V','L'),
  103. data_type bigint unsigned default NULL,
  104. language varchar(3) default NULL,
  105. context bigint unsigned,
  106. context_term enum('U','B','F')
  107. )'''
  108. LEXICAL_SQL_TEMPLATE = '''
  109. create table lexical (
  110. id bigint unsigned,
  111. term_type enum('U','B','F','V','L'),
  112. lexical text
  113. )'''
  114. loadStatement = """LOAD DATA LOCAL INFILE '%s' IGNORE INTO TABLE
  115. %s FIELDS TERMINATED BY '|' ENCLOSED BY '"'
  116. ESCAPED BY '\\\\'"""
  117. def __init__(
  118. self, #triplesFileName=None, lexicalFileName=None,
  119. delimited_directory='delimited_dumps',
  120. reuseExistingFiles=False):
  121. self.init_storage(delimited_directory, reuseExistingFiles)
  122. for table in self.tables:
  123. table.delimited_file = open(
  124. self.delimited_filename(table.get_name()), self.mode)
  125. self.recent = RecentSet(100000)
  126. self.recent_hits = 0
  127. self.recent_misses = 0
  128. def init_storage(self, delimited_directory, reuseExistingFiles):
  129. self.log_statement('init_storage')
  130. self.mode = 'a'
  131. if not reuseExistingFiles:
  132. self.mode = 'w'
  133. try:
  134. os.mkdir(delimited_directory)
  135. except OSError:
  136. raise LoadError('Can\'t create `' + delimited_directory +
  137. '\': it already exists.')
  138. self.delimited_directory = delimited_directory
  139. def delimited_filename(self, name, extension='.csv'):
  140. return os.path.join(self.delimited_directory, name + extension)
  141. def open(self, configuration=None, create=False):
  142. super(Loader, self).open(configuration, create)
  143. def add(self, (subject, predicate, obj), context=None, quoted=False):
  144. self.addN([(subject, predicate, obj, context)])
  145. def addN(self, quads):
  146. for s,p,o,c in quads:
  147. assert c is not None, \
  148. "Context associated with %s %s %s is None!" % (s, p, o)
  149. qSlots = genQuadSlots([s, p, o, c.identifier],
  150. self.useSignedInts)
  151. table = self.get_table((s, p, o))
  152. table.delimited_file.write(make_delimited(
  153. table.makeRowComponents(qSlots)) + ROW_DELIMITER)
  154. for row in table.listIdentifiers(qSlots):
  155. if not self.recent.check((row[2], row[1])):
  156. self.idHash.delimited_file.write(make_delimited(
  157. row) + ROW_DELIMITER)
  158. self.recent_misses += 1
  159. else:
  160. self.recent_hits += 1
  161. for row in table.listLiterals(qSlots):
  162. self.valueHash.delimited_file.write(
  163. make_delimited(row) + ROW_DELIMITER)
  164. def dumpRDF(self, suffix):
  165. for table in self.tables:
  166. table.delimited_file.close()
  167. print 'Recent hits: %s' % self.recent_hits
  168. print 'Recent misses: %s' % self.recent_misses
  169. def makeLoadStatement(self, fileName, tableName):
  170. return self.loadStatement % (fileName, tableName)
  171. def init_workspace(self):
  172. pass
  173. def close_workspace(self):
  174. pass
  175. def load_temporary_tables(self, cursor):
  176. pass
  177. self.log_statement('load_temporary_tables')
  178. if self.triplesFileName is None or self.lexicalFileName is None:
  179. return
  180. if self.triplesFile and not self.triplesFile.closed:
  181. self.triplesFile.close()
  182. cursor.execute(self.TRIPLES_SQL_TEMPLATE)
  183. cursor.execute(self.makeLoadStatement(self.triplesFileName, 'triples'))
  184. if self.lexicalFile and not self.lexicalFile.closed:
  185. self.lexicalFile.close()
  186. cursor.execute(self.LEXICAL_SQL_TEMPLATE)
  187. cursor.execute(self.makeLoadStatement(self.lexicalFileName, 'lexical'))
  188. def indexTriplesTable(self, cursor, columns=[]):
  189. self.log_statement('indexTriplesTable')
  190. for column in columns:
  191. cursor.execute('CREATE INDEX triples_%s ON triples (%s)' %
  192. ((column,) * 2))
  193. cursor.execute('CREATE INDEX triples_all ON triples (%s)' %
  194. (', '.join(columns),))
  195. def indexLexicalTable(self, cursor):
  196. self.log_statement('indexLexicalTable')
  197. cursor.execute('CREATE INDEX lexical_id ON lexical (id)')
  198. cursor.execute('CREATE INDEX lexical_term_type ON lexical (term_type)')
  199. def loadAssociativeBox(self, indexFirst=False):
  200. self.log_statement('loadAssociativeBox')
  201. cursor = self._db.cursor()
  202. cursor.execute("""
  203. insert into %s
  204. select distinct subject as member, subject_term as member_term,
  205. object as class, object_term as class_term,
  206. context, context_term
  207. from triples where predicate = %s and predicate_term = 'U'""" %
  208. (self.aboxAssertions,
  209. str(normalizeNode(RDF.type, self.useSignedInts))))
  210. def loadLiteralProperties(self, indexFirst=False):
  211. self.log_statement('loadLiteralProperties')
  212. cursor = self._db.cursor()
  213. cursor.execute("""
  214. insert into %s
  215. select distinct subject, subject_term, predicate, predicate_term,
  216. object, context, context_term, data_type, language
  217. from triples where object_term = 'L'
  218. """ %
  219. (self.literalProperties,))
  220. def loadRelations(self, indexFirst=False):
  221. self.log_statement('loadRelations')
  222. cursor = self._db.cursor()
  223. cursor.execute("""
  224. insert into %s
  225. select distinct subject, subject_term, predicate, predicate_term,
  226. object, object_term, context, context_term
  227. from triples where predicate != %s and object_term != 'L'""" %
  228. (self.binaryRelations,
  229. str(normalizeNode(RDF.type, self.useSignedInts))))
  230. def loadLiterals(self, indexFirst=False):
  231. self.log_statement('loadLiterals')
  232. cursor = self._db.cursor()
  233. cursor.execute("""
  234. insert into %s select distinct id, lexical from lexical where
  235. term_type = 'L'""" % (self.valueHash,))
  236. def loadIdentifiers(self, indexFirst=False):
  237. self.log_statement('loadIdentifiers')
  238. cursor = self._db.cursor()
  239. cursor.execute("""
  240. insert into %s select distinct id, term_type,
  241. lexical from lexical where
  242. term_type != 'L'""" % (self.idHash,))
  243. def remove(self, triple_pattern, context):
  244. pass
  245. #Transactional interfaces
  246. def commit(self):
  247. """ """
  248. pass
  249. def rollback(self):
  250. """ """
  251. pass
  252. def bind(self, prefix, namespace):
  253. """ """
  254. pass
  255. def prefix(self, namespace):
  256. """ """
  257. pass
  258. def namespace(self, prefix):
  259. """ """
  260. pass
  261. def namespaces(self):
  262. pass
  263. class MySQLLoader(Loader, MySQL):
  264. def __init__(
  265. self, identifier=None, configuration=None,
  266. delimited_directory='delimited_dumps',
  267. reuseExistingFiles=False):
  268. MySQL.__init__(self, identifier, configuration, debug=True,
  269. engine="ENGINE=MyISAM")
  270. Loader.__init__(self, delimited_directory, reuseExistingFiles)
  271. def delimited_filename(self, name):
  272. return Loader.delimited_filename(self, name, '.csv.mysql')
  273. def init_workspace(self):
  274. self.applyIndices()
  275. def _loadTable(self, table, indexFirst=False):
  276. cursor = self._db.cursor()
  277. sql = 'ALTER TABLE %s DISABLE KEYS' % (table.get_name(),)
  278. self.log_statement(sql)
  279. cursor.execute(sql)
  280. sql = self.makeLoadStatement(
  281. self.delimited_filename(table.get_name()), table.get_name())
  282. self.log_statement(sql)
  283. cursor.execute(sql)
  284. sql = 'ALTER TABLE %s ENABLE KEYS' % (table.get_name(),)
  285. self.log_statement(sql)
  286. cursor.execute(sql)
  287. def loadAssociativeBox(self, indexFirst=False):
  288. self._loadTable(self.aboxAssertions)
  289. return
  290. cursor = self._db.cursor()
  291. sql = """
  292. insert into %s
  293. select subject as member, subject_term as member_term,
  294. object as class, object_term as class_term,
  295. context, context_term
  296. from triples
  297. where predicate = %s and predicate_term = 'U'
  298. group by subject, object, context
  299. """ % (self.aboxAssertions,
  300. str(normalizeNode(RDF.type, self.useSignedInts)))
  301. log.debug(sql)
  302. cursor.execute(sql)
  303. def loadLiteralProperties(self, indexFirst=False):
  304. self._loadTable(self.literalProperties)
  305. return
  306. cursor = self._db.cursor()
  307. sql = """
  308. insert into %s
  309. select subject, subject_term, predicate, predicate_term,
  310. object, context, context_term, data_type, language
  311. from triples
  312. where object_term = 'L'
  313. group by subject, predicate, object, context
  314. """ % (self.literalProperties,)
  315. log.debug(sql)
  316. cursor.execute(sql)
  317. def loadRelations(self, indexFirst=False):
  318. self._loadTable(self.binaryRelations)
  319. return
  320. cursor = self._db.cursor()
  321. sql = """
  322. insert into %s
  323. select subject, subject_term, predicate, predicate_term,
  324. object, object_term, context, context_term
  325. from triples
  326. where predicate != %s and object_term != 'L'
  327. group by subject, predicate, object, context
  328. """ % (self.binaryRelations,
  329. str(normalizeNode(RDF.type, self.useSignedInts)))
  330. log.debug(sql)
  331. cursor.execute(sql)
  332. def loadLiterals(self, indexFirst=False):
  333. self._loadTable(self.valueHash)
  334. return
  335. cursor = self._db.cursor()
  336. sql = """
  337. insert into %s select id, lexical from lexical
  338. where term_type = 'L' group by id
  339. """ % (self.valueHash,)
  340. log.debug(sql)
  341. cursor.execute(sql)
  342. def loadIdentifiers(self, indexFirst=False):
  343. self._loadTable(self.idHash)
  344. return
  345. cursor = self._db.cursor()
  346. sql = """
  347. insert into %s select id, term_type, lexical from lexical
  348. where term_type != 'L' group by id
  349. """ % (self.idHash,)
  350. log.debug(sql)
  351. cursor.execute(sql)
  352. class PostgreSQLLoader(Loader, PostgreSQL):
  353. TRIPLES_SQL_TEMPLATE = '''
  354. create table triples (
  355. subject bigint,
  356. subject_term char,
  357. predicate bigint,
  358. predicate_term char,
  359. object bigint,
  360. object_term char,
  361. data_type bigint default NULL,
  362. language varchar(3) default NULL,
  363. context bigint,
  364. context_term char
  365. )'''
  366. LEXICAL_SQL_TEMPLATE = '''
  367. create table lexical (
  368. id bigint,
  369. term_type char,
  370. lexical text
  371. )'''
  372. loadStatement = """COPY %s FROM STDIN WITH DELIMITER '|'
  373. NULL AS 'NULL' CSV ESCAPE E'\\\\'"""
  374. def __init__(
  375. self, identifier=None, configuration=None,
  376. delimited_directory='delimited_dumps',
  377. reuseExistingFiles=False):
  378. PostgreSQL.__init__(self, identifier, configuration, debug=True)
  379. self.init_storage(delimited_directory, reuseExistingFiles)
  380. self.triplesFileName = self.delimited_filename('triples')
  381. self.triplesFile = open(self.triplesFileName, self.mode)
  382. self.lexicalFileName = self.delimited_filename('lexical')
  383. self.lexicalFile = open(self.lexicalFileName, self.mode)
  384. self.recent = RecentSet(100000)
  385. self.recent_hits = 0
  386. self.recent_misses = 0
  387. def delimited_filename(self, name):
  388. return Loader.delimited_filename(self, name, '.csv.postgresql')
  389. def makeLoadStatement(self, fileName, tableName):
  390. return self.loadStatement % (tableName,)
  391. try:
  392. import pg
  393. def _copy_from_file(self, table, filename):
  394. f = open(filename, 'r')
  395. conn = self.pg.connect(
  396. dbname=self.config['db'], host=self.config['host'],
  397. port=int(self.config['port']), user=self.config['user'],
  398. passwd=self.config['password'])
  399. conn.query(self.makeLoadStatement(filename, table))
  400. for _line in f:
  401. conn.putline(_line)
  402. conn.putline('\\.\n')
  403. conn.endcopy()
  404. f.close()
  405. except ImportError:
  406. def _copy_from_file(self, table, filename):
  407. raise NotImplementedError(
  408. 'We need the PyGreSQL module to bulk load into PostgreSQL databases.')
  409. def load_temporary_tables(self, cursor):
  410. self.log_statement('load_temporary_tables')
  411. if self.triplesFileName is None or self.lexicalFileName is None:
  412. return
  413. if self.triplesFile and not self.triplesFile.closed:
  414. self.triplesFile.close()
  415. cursor.execute(self.TRIPLES_SQL_TEMPLATE)
  416. cursor.execute('COMMIT')
  417. self._copy_from_file('triples', self.triplesFileName)
  418. if self.lexicalFile and not self.lexicalFile.closed:
  419. self.lexicalFile.close()
  420. cursor.execute(self.LEXICAL_SQL_TEMPLATE)
  421. cursor.execute('COMMIT')
  422. self._copy_from_file('lexical', self.lexicalFileName)
  423. def init_workspace(self):
  424. cursor = self._db.cursor()
  425. self.load_temporary_tables(cursor)
  426. self.indexTriplesTable(cursor, ['subject', 'predicate', 'object',
  427. 'context'])
  428. self.indexLexicalTable(cursor)
  429. def close_workspace(self):
  430. cursor = self._db.cursor()
  431. cursor.execute('DROP TABLE triples')
  432. cursor.execute('DROP TABLE lexical')
  433. self.applyIndices()
  434. def addN(self, quads):
  435. for s,p,o,c in quads:
  436. assert c is not None, \
  437. "Context associated with %s %s %s is None!" % (s, p, o)
  438. qSlots = genQuadSlots([s, p, o, c.identifier],
  439. self.useSignedInts)
  440. # print 'Writing data...', qSlots
  441. # Add to the denormalized delimited file:
  442. beginning = True
  443. for item in qSlots:
  444. parts = [item.md5Int, item.termType]
  445. if not self.recent.check((item.termType, item.term)):
  446. self.lexicalFile.write(make_delimited(
  447. parts + [item.normalizeTerm()]) + ROW_DELIMITER)
  448. self.recent_misses += 1
  449. else:
  450. self.recent_hits += 1
  451. if item.position == OBJECT:
  452. dqs = item.getDatatypeQuadSlot()
  453. if dqs is not None:
  454. parts.append(dqs.md5Int)
  455. if not self.recent.check((dqs.termType, dqs.term)):
  456. self.lexicalFile.write(make_delimited(
  457. [dqs.md5Int, 'U', dqs.normalizeTerm()]) +
  458. ROW_DELIMITER)
  459. self.recent_misses += 1
  460. else:
  461. self.recent_hits += 1
  462. else:
  463. parts.append(None)
  464. if item.termType == 'L':
  465. parts.append(item.term.language)
  466. else:
  467. parts.append(None)
  468. if not beginning:
  469. self.triplesFile.write(COL_DELIMITER)
  470. beginning = False
  471. self.triplesFile.write(make_delimited(parts))
  472. self.triplesFile.write(ROW_DELIMITER)
  473. def dumpRDF(self, suffix):
  474. self.triplesFile.close()
  475. self.lexicalFile.close()
  476. print 'Recent hits: %s' % self.recent_hits
  477. print 'Recent misses: %s' % self.recent_misses
  478. def timing(config, tableid, dumpfile):
  479. plugins = ['MySQL', 'PostgreSQL']
  480. index = 0
  481. runs = {}
  482. for plugin in plugins:
  483. # @@FIXME: incorrect code
  484. # if options.delete:
  485. # store = rdflib.plugin.get(plugin, Store)(tableid)
  486. # store.open(config)
  487. # cursor = store._db.cursor()
  488. # #cursor.execute('DROP TABLE denormalized')
  489. # store.destroy(config)
  490. # store.close()
  491. start = datetime.datetime.now()
  492. print start
  493. store = rdflib.plugin.get(plugin, Store)(tableid)
  494. runid = 'run' + str(index)
  495. runs[runid] = [plugin, 'no denormalized indices',
  496. 'index store before load']
  497. store.create(config, False)
  498. store.applyIndices()
  499. store.applyForeignKeys()
  500. cursor = store._db.cursor()
  501. # @@FIXME: unused (incorrect) code ...
  502. # hashFieldType = store.hashFieldType
  503. # load_temporary_tables(cursor)
  504. # loadLiterals(store)
  505. # loadIdentifiers(store)
  506. # loadAssociativeBox(store)
  507. # loadLiteralProperties(store)
  508. # loadRelations(store)
  509. stop = datetime.datetime.now()
  510. print stop
  511. runs[runid].append(str(stop - start))
  512. print '\t'.join(runs[runid])
  513. cursor.execute('DROP TABLE triples')
  514. cursor.execute('DROP TABLE lexical')
  515. store.destroy(config)
  516. index = index + 1
  517. #start = time.clock()
  518. start = datetime.datetime.now()
  519. store = rdflib.plugin.get(plugin, Store)(tableid)
  520. runid = 'run' + str(index)
  521. runs[runid] = [plugin, 'no denormalized indices',
  522. 'index store after load']
  523. store.create(config, False)
  524. cursor = store._db.cursor()
  525. # @@FIXME: unused (incorrect) code...
  526. # hashFieldType = store.hashFieldType
  527. # load_temporary_tables(cursor)
  528. # loadLiterals(store)
  529. # loadIdentifiers(store)
  530. # loadAssociativeBox(store)
  531. # loadLiteralProperties(store)
  532. # loadRelations(store)
  533. store.applyIndices()
  534. store.applyForeignKeys()
  535. stop = datetime.datetime.now()
  536. runs[runid].append(str(stop - start))
  537. print '\t'.join(runs[runid])
  538. cursor.execute('DROP TABLE triples')
  539. cursor.execute('DROP TABLE lexical')
  540. store.destroy(config)
  541. index = index + 1
  542. PLUGIN_MAP = {
  543. 'MySQL': MySQLLoader,
  544. 'PostgreSQL': PostgreSQLLoader,
  545. }
  546. def main():
  547. from optparse import OptionParser
  548. usage = '''usage: %prog [options] <DB Type>'''
  549. op = OptionParser(usage=usage)
  550. op.add_option('-c', '--connection', help='Database connection string')
  551. op.add_option('-i', '--id', help='Database table set identifier')
  552. op.add_option('--delimited',
  553. help = 'Directory in which to store delimited files')
  554. op.add_option('-r', '--reuse', action='store_true',
  555. help = 'Reuse existing delimited files instead of creating new ones')
  556. op.add_option('-d', '--delete', action='store_true',
  557. help = 'Delete old repository before starting')
  558. op.add_option('--name', dest='graphName',
  559. help = 'The name of the graph to parse the RDF serialization(s) into')
  560. op.add_option('-x', '--xml', action='append',
  561. help = 'Append to the list of RDF/XML documents to parse')
  562. op.add_option('-t', '--trix', action='append',
  563. help = 'Append to the list of TriX documents to parse')
  564. op.add_option('-n', '--n3', action='append',
  565. help = 'Append to the list of N3 documents to parse')
  566. op.add_option('--nt', action='append',
  567. help = 'Append to the list of NT documents to parse')
  568. op.add_option('-a', '--rdfa', action='append',
  569. help = 'Append to the list of RDFa documents to parse')
  570. op.add_option('--directory',
  571. help = 'Load all children of this directory into the repository')
  572. op.add_option('--format', type='choice',
  573. choices = ['xml', 'n3', 'nt', 'rdfa', 'trix'],
  574. help = 'Format of files found when using `--directory`')
  575. op.add_option('--catalog',
  576. help = 'Catalog to use to resolve local URIs to identify graphs')
  577. op.add_option('--timing', action='store_true',
  578. help = 'Run timing tests')
  579. op.set_defaults(connection=None, delimited='delimited_dumps',
  580. reuse=False, id=None,
  581. xml=[], trix=[], n3=[], nt=[], rdfa=[],
  582. graphName=BNode())
  583. (options, args) = op.parse_args()
  584. if options.delimited is not None:
  585. options.delimited = os.path.abspath(options.delimited)
  586. if not options.id:
  587. op.error('You need to provide a table set identifier')
  588. try:
  589. store = PLUGIN_MAP[args[0]](identifier=options.id,
  590. configuration=options.connection,
  591. delimited_directory=options.delimited,
  592. reuseExistingFiles=options.reuse)
  593. except Exception, e:
  594. raise
  595. op.error('You need to provide a database type (MySQL or PostgreSQL).')
  596. store.open()
  597. factGraph = ConjunctiveGraph(store, identifier=options.graphName)
  598. if not options.reuse:
  599. if options.directory:
  600. if not options.format:
  601. op.error('You need to provide the format with `--format`\n' +
  602. 'when loading from a directory')
  603. dirparts = os.walk(options.directory).next()
  604. for entry in dirparts[2]:
  605. graphRef = os.path.join(dirparts[0], entry)
  606. factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
  607. format = options.format)
  608. for graphRef in options.xml:
  609. factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
  610. format='xml')
  611. for graphRef in options.trix:
  612. factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
  613. format='trix')
  614. for graphRef in options.n3:
  615. factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
  616. format='n3')
  617. for graphRef in options.nt:
  618. factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
  619. format='nt')
  620. for graphRef in options.rdfa:
  621. factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
  622. format='rdfa')
  623. store.dumpRDF('solo')
  624. store.close()
  625. if options.connection:
  626. if options.timing:
  627. # @@FIXME: incorrect code
  628. # timing(config, tableid, dumpfile)
  629. pass
  630. else:
  631. if options.delete:
  632. log.debug(store.open())
  633. # @@FIXME: incorrect code
  634. # cursor = store._db.cursor()
  635. store.destroy()
  636. store.close()
  637. store.create(populate=False)
  638. store.init_workspace()
  639. store.loadLiterals()
  640. store.loadIdentifiers()
  641. store.loadAssociativeBox()
  642. store.loadLiteralProperties()
  643. store.loadRelations()
  644. store.close_workspace()
  645. #store.applyForeignKeys()
  646. if __name__ == '__main__':
  647. main()