PageRenderTime 57ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/s3db3.5.10/pearlib/arc/store/ARC2_StoreLoadQueryHandler.php

https://github.com/drobbins/s3db
PHP | 386 lines | 321 code | 29 blank | 36 comment | 67 complexity | 959b79b4895d93e044e15ad022eb17de MD5 | raw file
  1. <?php
  2. /*
  3. homepage: http://arc.semsol.org/
  4. license: http://arc.semsol.org/license
  5. class: ARC2 RDF Store LOAD Query Handler
  6. author: Benjamin Nowack
  7. version: 2008-07-15 (Addition: SG API JSON support)
  8. */
  9. ARC2::inc('StoreQueryHandler');
  10. class ARC2_StoreLoadQueryHandler extends ARC2_StoreQueryHandler {
  11. function __construct($a = '', &$caller) {/* caller has to be a store */
  12. parent::__construct($a, $caller);
  13. }
  14. function ARC2_StoreLoadQueryHandler($a = '', &$caller) {
  15. $this->__construct($a, $caller);
  16. }
  17. function __init() {/* db_con, store_log_inserts */
  18. parent::__init();
  19. $this->store =& $this->caller;
  20. $this->write_buffer_size = $this->v('store_write_buffer', 5000, $this->a);
  21. }
  22. /* */
  23. function runQuery($infos, $data = '', $keep_bnode_ids = 0) {
  24. $url = $infos['query']['url'];
  25. $graph = $infos['query']['target_graph'];
  26. $this->target_graph = $graph ? $this->calcURI($graph) : $this->calcURI($url);
  27. $this->fixed_target_graph = $graph ? $this->target_graph : '';
  28. $this->keep_bnode_ids = $keep_bnode_ids;
  29. /* reader */
  30. ARC2::inc('Reader');
  31. $reader =& new ARC2_Reader($this->a, $this);
  32. $reader->activate($url, $data);
  33. /* format detection */
  34. $mappings = array(
  35. 'rdfxml' => 'RDFXML',
  36. 'sparqlxml' => 'SPOG',
  37. 'turtle' => 'Turtle',
  38. 'ntriples' => 'Turtle',
  39. 'rss' => 'RSS',
  40. 'n3' => 'Turtle',
  41. 'html' => 'SemHTML',
  42. 'sgajson' => 'SGAJSON'
  43. );
  44. $format = $reader->getFormat();
  45. if (!$format || !isset($mappings[$format])) {
  46. return $this->addError('No loader available for "' .$url. '": ' . $format);
  47. }
  48. /* format loader */
  49. $suffix = 'Store' . $mappings[$format] . 'Loader';
  50. ARC2::inc($suffix);
  51. $cls = 'ARC2_' . $suffix;
  52. $loader =& new $cls($this->a, $this);
  53. $loader->setReader($reader);
  54. /* lock */
  55. if (!$this->store->getLock()) return $this->addError('Could not get lock in "runQuery"');
  56. $this->has_lock = 1;
  57. /* logging */
  58. $this->t_count = 0;
  59. $this->t_start = ARC2::mtime();
  60. $this->log_inserts = $this->v('store_log_inserts', 0, $this->a);
  61. if ($this->log_inserts) {
  62. @unlink("arc_insert_log.txt");
  63. $this->inserts = array();
  64. $this->insert_times = array();
  65. $this->t_prev = $this->t_start;
  66. $this->t_count_prev = 0 ;
  67. }
  68. /* load and parse */
  69. $this->max_term_id = $this->getMaxTermID();
  70. $this->max_triple_id = $this->getMaxTripleID();
  71. $this->term_ids = array();
  72. $this->triple_ids = array();
  73. $this->sql_buffers = array();
  74. $r = $loader->parse($url, $data);
  75. /* done */
  76. $this->checkSQLBuffers(1);
  77. if ($this->log_inserts) {
  78. $this->logInserts();
  79. }
  80. $this->store->releaseLock();
  81. if ((rand(1, 10) == 1)) $this->store->optimizeTables();
  82. $t2 = ARC2::mtime();
  83. $dur = round($t2 - $this->t_start, 4);
  84. $r = array(
  85. 't_count' => $this->t_count,
  86. 'load_time' => $dur,
  87. );
  88. if ($this->log_inserts) {
  89. $r['inserts'] = $this->inserts;
  90. $r['insert_times'] = $this->insert_times;
  91. }
  92. return $r;
  93. }
  94. /* */
  95. function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
  96. if (!$this->has_lock) return 0;
  97. $type_ids = array ('uri' => '0', 'bnode' => '1' , 'literal' => '2');
  98. $g = $this->getTermID($this->target_graph, '0', 'id');
  99. $s = (($s_type == 'bnode') && !$this->keep_bnode_ids) ? '_:b' . abs(crc32($g . $s)) . '_' . (strlen($s) > 12 ? substr(substr($s, 2) , -10) : substr($s, 2)) : $s;
  100. $o = (($o_type == 'bnode') && !$this->keep_bnode_ids) ? '_:b' . abs(crc32($g . $o)) . '_' . (strlen($o) > 12 ? substr(substr($o, 2), -10) : substr($o, 2)) : $o;
  101. /* triple */
  102. $t = array(
  103. 's' => $this->getTermID($s, $type_ids[$s_type], 's'),
  104. 'p' => $this->getTermID($p, '0', 'id'),
  105. 'o' => $this->getTermID($o, $type_ids[$o_type], 'o'),
  106. 'o_lang_dt' => $this->getTermID($o_dt . $o_lang, $o_dt ? '0' : '2', 'id'),
  107. 'o_comp' => $this->getOComp($o),
  108. 's_type' => $type_ids[$s_type],
  109. 'o_type' => $type_ids[$o_type],
  110. );
  111. $t['t'] = $this->getTripleID($t);
  112. if (is_array($t['t'])) {/* t exists already */
  113. $t['t'] = $t['t'][0];
  114. }
  115. else {
  116. $this->bufferTripleSQL($t);
  117. /* triple_backup */
  118. $tb = array(
  119. 't' => $t['t'],
  120. 'data' => serialize(array($t['t'], $t['s'], $t['p'], $t['o'], $t['o_lang_dt'], $t['s_type'], $t['o_type']))
  121. );
  122. $this->bufferTripleBackupSQL($tb);
  123. }
  124. /* g2t */
  125. $g2t = array('g' => $g, 't' => $t['t']);
  126. $this->bufferGraphSQL($g2t);
  127. $this->t_count++;
  128. /* check buffers */
  129. if (($this->t_count % $this->write_buffer_size) == 0) {
  130. $force_write = 1;
  131. $reset_buffers = 0;
  132. $refresh_lock = 0;
  133. $split_tables = 0;
  134. if (($this->t_count % ($this->write_buffer_size * 2)) == 0) {
  135. $reset_buffers = 1;
  136. if (($this->t_count % ($this->write_buffer_size * 10)) == 0) {
  137. $refresh_lock = 1;
  138. $split_tables = 1;
  139. }
  140. }
  141. if ($this->log_inserts) {
  142. $this->logInserts();
  143. }
  144. $this->checkSQLBuffers($force_write, $reset_buffers, $refresh_lock, $split_tables);
  145. }
  146. }
  147. /* */
  148. function getMaxTermID() {
  149. $con = $this->store->getDBCon();
  150. $sql = '';
  151. foreach (array('id2val', 's2val', 'o2val') as $tbl) {
  152. $sql .= $sql ? ' UNION ' : '';
  153. $sql .= "(SELECT MAX(id) as `id` FROM " . $this->store->getTablePrefix() . $tbl . ')';
  154. }
  155. $r = 0;
  156. if (($rs = mysql_query($sql)) && mysql_num_rows($rs)) {
  157. while ($row = mysql_fetch_array($rs)) {
  158. $r = ($r < $row['id']) ? $row['id'] : $r;
  159. }
  160. }
  161. return $r + 1;
  162. }
  163. function getMaxTripleID() {
  164. $con = $this->store->getDBCon();
  165. $sql = "SELECT MAX(t) AS `id` FROM " . $this->store->getTablePrefix() . "triple";
  166. if (($rs = mysql_query($sql)) && mysql_num_rows($rs) && ($row = mysql_fetch_array($rs))) {
  167. return $row['id'] + 1;
  168. }
  169. return 1;
  170. }
  171. function getTermID($val, $type_id, $tbl) {
  172. $con = $this->store->getDBCon();
  173. /* buffered */
  174. if (isset($this->term_ids[$val])) {
  175. if (!isset($this->term_ids[$val][$tbl])) {
  176. foreach (array('id', 's', 'o') as $other_tbl) {
  177. if (isset($this->term_ids[$val][$other_tbl])) {
  178. $this->term_ids[$val][$tbl] = $this->term_ids[$val][$other_tbl];
  179. $this->bufferIDSQL($tbl, $this->term_ids[$val][$tbl], $val, $type_id);
  180. break;
  181. }
  182. }
  183. }
  184. return $this->term_ids[$val][$tbl];
  185. }
  186. /* db */
  187. $sub_tbls = ($tbl == 'id') ? array('id2val', 's2val', 'o2val') : ($tbl == 's' ? array('s2val', 'id2val', 'o2val') : array('o2val', 'id2val', 's2val'));
  188. foreach ($sub_tbls as $sub_tbl) {
  189. $cid_suffix = preg_match('/^(s|o)/', $sub_tbl) ? ', cid AS `cid`' : ', id AS `cid`';
  190. $sql = "SELECT id AS `id`" . $cid_suffix . ", '" . $sub_tbl . "' AS `tbl` FROM " . $this->store->getTablePrefix() . $sub_tbl . " WHERE val = BINARY '" . mysql_real_escape_string($val) . "'";
  191. if (($rs = mysql_query($sql . ' LIMIT 1')) && mysql_num_rows($rs) && ($row = mysql_fetch_array($rs))) {
  192. $this->term_ids[$val] = array($tbl => isset($row['cid']) ? $row['cid'] : $row['id']);
  193. if ($row['tbl'] != $tbl) {
  194. $this->bufferIDSQL($tbl, $row['id'], $val, $type_id);
  195. }
  196. break;
  197. }
  198. }
  199. /* new */
  200. if (!isset($this->term_ids[$val])) {
  201. $this->term_ids[$val] = array($tbl => $this->max_term_id);
  202. $this->bufferIDSQL($tbl, $this->max_term_id, $val, $type_id);
  203. $this->max_term_id++;
  204. }
  205. return $this->term_ids[$val][$tbl];
  206. }
  207. function getTripleID($t) {
  208. $con = $this->store->getDBCon();
  209. $val = print_r($t, 1);
  210. /* buffered */
  211. if (isset($this->triple_ids[$val])) {
  212. return array($this->triple_ids[$val]);/* hack for "don't insert this triple" */
  213. }
  214. /* db */
  215. $sql = "SELECT t FROM " . $this->store->getTablePrefix() . "triple WHERE
  216. s = " . $t['s'] . " AND p = " . $t['p'] . " AND o = " . $t['o'] . " AND o_lang_dt = " . $t['o_lang_dt'] . " AND s_type = " . $t['s_type'] . " AND o_type = " . $t['o_type'] . "
  217. LIMIT 1
  218. ";
  219. if (($rs = mysql_query($sql)) && mysql_num_rows($rs) && ($row = mysql_fetch_array($rs))) {
  220. $this->triple_ids[$val] = $row['t'];/* hack for "don't insert this triple" */
  221. return array($row['t']);/* hack for "don't insert this triple" */
  222. }
  223. /* new */
  224. else {
  225. $this->triple_ids[$val] = $this->max_triple_id;
  226. $this->max_triple_id++;
  227. return $this->triple_ids[$val];
  228. }
  229. }
  230. function getOComp($val) {
  231. /* try date (e.g. 21 August 2007) */
  232. if (preg_match('/^[0-9]{1,2}\s+[a-z]+\s+[0-9]{4}/i', $val) && ($uts = strtotime($val)) && ($uts !== -1)) {
  233. return date("Y-m-d\TH:i:s", $uts);
  234. }
  235. if (preg_match('/^[0-9]{4}[0-9\-\:\T\Z\+]+([a-z]{2,3})?$/i', $val)) {
  236. return $val;
  237. }
  238. if (is_numeric($val)) {
  239. $val = sprintf("%f", $val);
  240. if (preg_match("/([\-\+])([0-9]*)\.([0-9]*)/", $val, $m)) {
  241. return $m[1] . sprintf("%018s", $m[2]) . "." . sprintf("%-015s", $m[3]);
  242. }
  243. if (preg_match("/([0-9]*)\.([0-9]*)/", $val, $m)) {
  244. return "+" . sprintf("%018s", $m[1]) . "." . sprintf("%-015s", $m[2]);
  245. }
  246. return $val;
  247. }
  248. /* any other string: remove tags, linebreaks etc. */
  249. return substr(trim(preg_replace('/[\W\s]+/is', '-', strip_tags($val))), 0, 35);
  250. }
  251. /* */
  252. function bufferTripleSQL($t) {
  253. $tbl = 'triple';
  254. $sql = ", ";
  255. if (!isset($this->sql_buffers[$tbl])) {
  256. $this->sql_buffers[$tbl] = "INSERT INTO " . $this->store->getTablePrefix() . $tbl . " (t, s, p, o, o_lang_dt, o_comp, s_type, o_type) VALUES";
  257. $sql = " ";
  258. }
  259. $this->sql_buffers[$tbl] .= $sql . "(" . $t['t'] . ", " . $t['s'] . ", " . $t['p'] . ", " . $t['o'] . ", " . $t['o_lang_dt'] . ", '" . mysql_real_escape_string($t['o_comp']) . "', " . $t['s_type'] . ", " . $t['o_type'] . ")";
  260. }
  261. function bufferTripleBackupSQL($tb) {
  262. return 1;
  263. $tbl = 'triple_backup';
  264. $sql = ", ";
  265. if (!isset($this->sql_buffers[$tbl])) {
  266. $this->sql_buffers[$tbl] = "INSERT INTO " . $this->store->getTablePrefix() . $tbl . " (t, data) VALUES";
  267. $sql = " ";
  268. }
  269. $this->sql_buffers[$tbl] .= $sql . "(" . $tb['t'] . ", '" . mysql_real_escape_string($tb['data']) . "')";
  270. }
  271. function bufferGraphSQL($g2t) {
  272. $tbl = 'g2t';
  273. $sql = ", ";
  274. if (!isset($this->sql_buffers[$tbl])) {
  275. $this->sql_buffers[$tbl] = "INSERT IGNORE INTO " . $this->store->getTablePrefix() . $tbl . " (g, t) VALUES";
  276. $sql = " ";
  277. }
  278. $this->sql_buffers[$tbl] .= $sql . "(" . $g2t['g'] . ", " . $g2t['t'] . ")";
  279. }
  280. function bufferIDSQL($tbl, $id, $val, $val_type) {
  281. $tbl = $tbl . '2val';
  282. $sql = ", ";
  283. if (!isset($this->sql_buffers[$tbl])) {
  284. $cols = ($tbl == 'id2val') ? "id, val, val_type" : "id, cid, val";
  285. $this->sql_buffers[$tbl] = "INSERT IGNORE INTO " . $this->store->getTablePrefix() . $tbl . "(" . $cols . ") VALUES";
  286. $sql = " ";
  287. }
  288. if ($tbl == 'id2val') {
  289. $sql .= "(" . $id . ", '" . mysql_real_escape_string($val) . "', " . $val_type . ")";
  290. }
  291. else {
  292. $sql .= "(" . $id . ", " . $id . ", '" . mysql_real_escape_string($val) . "')";
  293. }
  294. $this->sql_buffers[$tbl] .= $sql;
  295. }
  296. /* */
  297. function checkSQLBuffers($force_write = 0, $reset_id_buffers = 0, $refresh_lock = 0, $split_tables = 0) {
  298. $con = $this->store->getDBCon();
  299. @set_time_limit($this->v('time_limit', 60, $this->a));
  300. foreach (array('triple', 'g2t', 'id2val', 's2val', 'o2val') as $tbl) {
  301. $buffer_size = isset($this->sql_buffers[$tbl]) ? 1 : 0;
  302. if ($buffer_size && $force_write) {
  303. $t1 = ARC2::mtime();
  304. mysql_query($this->sql_buffers[$tbl]);
  305. /* table error */
  306. if ($er = mysql_error()) {
  307. $this->addError('Error detected (possibly auto-fixed): ' . $er . ' (' . $this->sql_buffers[$tbl] . ')');
  308. if (preg_match('/\/([a-z0-9\_\-]+)\' .+ should be repaired/i', $er, $m)) {
  309. mysql_query('REPAIR TABLE ' . rawurlencode($m[1]));
  310. }
  311. }
  312. unset($this->sql_buffers[$tbl]);
  313. if ($this->log_inserts) {
  314. $t2 = ARC2::mtime();
  315. $this->inserts[$tbl] = $this->v($tbl, 0, $this->inserts) + max(0, mysql_affected_rows());
  316. $dur = round($t2 - $t1, 4);
  317. $this->insert_times[$tbl] = isset($this->insert_times[$tbl]) ? $this->insert_times[$tbl] : array('min' => $dur, 'max' => $dur, 'sum' => $dur);
  318. $this->insert_times[$tbl] = array('min' => min($dur, $this->insert_times[$tbl]['min']), 'max' => max($dur, $this->insert_times[$tbl]['max']), 'sum' => $dur + $this->insert_times[$tbl]['sum']);
  319. }
  320. /* reset term id buffers */
  321. if ($reset_id_buffers) {
  322. $this->term_ids = array();
  323. $this->triple_ids = array();
  324. }
  325. /* refresh lock */
  326. if ($refresh_lock) {
  327. $this->store->releaseLock();
  328. $this->has_lock = 0;
  329. sleep(1);
  330. if (!$this->store->getLock(5)) return $this->addError('Could not re-obtain lock in "checkSQLBuffers"');
  331. $this->has_lock = 1;
  332. }
  333. }
  334. }
  335. return 1;
  336. }
  337. /* speed log */
  338. function logInserts() {
  339. $t_start = $this->t_start;
  340. $t_prev = $this->t_prev;
  341. $t_now = ARC2::mtime();
  342. $tc_prev = $this->t_count_prev;
  343. $tc_now = $this->t_count;
  344. $tc_diff = $tc_now - $tc_prev;
  345. $dur_full = $t_now - $t_start;
  346. $dur_diff = $t_now - $t_prev;
  347. $speed_full = round($tc_now / $dur_full);
  348. $speed_now = round($tc_diff / $dur_diff);
  349. $r = $tc_diff . ' in ' . round($dur_diff, 5) . ' = ' . $speed_now . ' t/s (' .$tc_now. ' in ' . round($dur_full, 5). ' = ' . $speed_full . ' t/s )';
  350. $fp = @fopen("arc_insert_log.txt", "a");
  351. @fwrite($fp, $r . "\r\n");
  352. @fclose($fp);
  353. $this->t_prev = $t_now;
  354. $this->t_count_prev = $tc_now;
  355. }
  356. }