PageRenderTime 55ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/arc/parsers/ARC2_TurtleParser.php

https://github.com/damz/foafssl-drupal
PHP | 874 lines | 747 code | 80 blank | 47 comment | 170 complexity | a5353276d2e984fd2ab7d1fb84d11a8a MD5 | raw file
  1. <?php
  2. /*
  3. homepage: http://arc.semsol.org/
  4. license: http://arc.semsol.org/license
  5. class: ARC2 SPARQL-enhanced Turtle Parser
  6. author: Benjamin Nowack
  7. version: 2009-08-04
  8. */
  9. ARC2::inc('RDFParser');
  10. class ARC2_TurtleParser extends ARC2_RDFParser {
  11. function __construct($a = '', &$caller) {
  12. parent::__construct($a, $caller);
  13. }
  14. function ARC2_TurtleParser($a = '', &$caller) {
  15. $this->__construct($a, $caller);
  16. }
  17. function __init() {/* reader */
  18. parent::__init();
  19. $this->state = 0;
  20. $this->xml = 'http://www.w3.org/XML/1998/namespace';
  21. $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
  22. $this->xsd = 'http://www.w3.org/2001/XMLSchema#';
  23. $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf', $this->xsd => 'xsd');
  24. $this->unparsed_code = '';
  25. }
  26. /* */
  27. function x($re, $v, $options = 'si') {
  28. $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
  29. while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {/* comment removal */
  30. $v = $m[2];
  31. }
  32. $this->unparsed_code = (strlen($this->unparsed_code) > strlen($v)) ? $v : $this->unparsed_code;
  33. return ARC2::x($re, $v, $options);
  34. }
  35. function createBnodeID(){
  36. $this->bnode_id++;
  37. return '_:' . $this->bnode_prefix . $this->bnode_id;
  38. }
  39. /* */
  40. function addT($t) {
  41. if ($this->skip_dupes) {
  42. $h = md5(serialize($t));
  43. if (!isset($this->added_triples[$h])) {
  44. $this->triples[$this->t_count] = $t;
  45. $this->t_count++;
  46. $this->added_triples[$h] = true;
  47. }
  48. }
  49. else {
  50. $this->triples[$this->t_count] = $t;
  51. $this->t_count++;
  52. }
  53. }
  54. /* */
  55. function getTriples() {
  56. return $this->v('triples', array());
  57. }
  58. function countTriples() {
  59. return $this->t_count;
  60. }
  61. /* */
  62. function getUnparsedCode() {
  63. return $this->v('unparsed_code', '');
  64. }
  65. /* */
  66. function setDefaultPrefixes() {
  67. $this->prefixes = array(
  68. 'rdf:' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
  69. 'rdfs:' => 'http://www.w3.org/2000/01/rdf-schema#',
  70. 'owl:' => 'http://www.w3.org/2002/07/owl#',
  71. 'xsd:' => 'http://www.w3.org/2001/XMLSchema#',
  72. );
  73. if ($ns = $this->v('ns', array(), $this->a)) {
  74. foreach ($ns as $p => $u) $this->prefixes[$p . ':'] = $u;
  75. }
  76. }
  77. function parse($path, $data = '', $iso_fallback = false) {
  78. $this->setDefaultPrefixes();
  79. /* reader */
  80. if (!$this->v('reader')) {
  81. ARC2::inc('Reader');
  82. $this->reader = & new ARC2_Reader($this->a, $this);
  83. }
  84. $this->reader->setAcceptHeader('Accept: application/x-turtle; q=0.9, */*; q=0.1');
  85. $this->reader->activate($path, $data);
  86. $this->base = $this->v1('base', $this->reader->base, $this->a);
  87. $this->r = array('vars' => array());
  88. /* parse */
  89. $buffer = '';
  90. $more_triples = array();
  91. $sub_v = '';
  92. $sub_v2 = '';
  93. $loops = 0;
  94. $prologue_done = 0;
  95. while ($d = $this->reader->readStream(0)) {
  96. $buffer .= $d;
  97. $sub_v = $buffer;
  98. do {
  99. $proceed = 0;
  100. if (!$prologue_done) {
  101. $proceed = 1;
  102. if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
  103. $loops = 0;
  104. $sub_v .= $this->reader->readStream(0, 128);
  105. /* we might have missed the final DOT in the previous prologue loop */
  106. if ($sub_r = $this->x('\.', $sub_v)) $sub_v = $sub_r[1];
  107. if ($this->x("\@?(base|prefix)", $sub_v)) {/* more prologue to come, use outer loop */
  108. $proceed = 0;
  109. }
  110. }
  111. else {
  112. $prologue_done = 1;
  113. }
  114. }
  115. if ($prologue_done && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v)) && is_array($sub_r)) {
  116. $proceed = 1;
  117. $loops = 0;
  118. foreach ($sub_r as $t) {
  119. $this->addT($t);
  120. }
  121. }
  122. } while ($proceed);
  123. $loops++;
  124. $buffer = $sub_v;
  125. if ($loops > 100) {/* most probably a parser or code bug, might also be a huge object value, though */
  126. $this->addError('too many loops: ' . $loops);
  127. break;
  128. }
  129. }
  130. foreach ($more_triples as $t) {
  131. $this->addT($t);
  132. }
  133. $sub_v = count($more_triples) ? $sub_v2 : $sub_v;
  134. $buffer = $sub_v;
  135. $this->reader->closeStream();
  136. unset($this->reader);
  137. return $this->done();
  138. }
  139. function xPrologue($v) {
  140. $r = 0;
  141. if (!$this->t_count) {
  142. if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
  143. $this->base = $sub_r;
  144. $r = 1;
  145. }
  146. while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
  147. $this->prefixes[$sub_r['prefix']] = $sub_r['uri'];
  148. $r = 1;
  149. }
  150. }
  151. return array($r, $v);
  152. }
  153. /* 3 */
  154. function xBaseDecl($v) {
  155. if ($r = $this->x("\@?base\s+", $v)) {
  156. if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
  157. if ($sub_r = $this->x('\.', $sub_v)) {
  158. $sub_v = $sub_r[1];
  159. }
  160. return array($r, $sub_v);
  161. }
  162. }
  163. return array(0, $v);
  164. }
  165. /* 4 */
  166. function xPrefixDecl($v) {
  167. if ($r = $this->x("\@?prefix\s+", $v)) {
  168. if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
  169. $prefix = $r;
  170. if((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
  171. $uri = $this->calcURI($r, $this->base);
  172. if ($sub_r = $this->x('\.', $sub_v)) {
  173. $sub_v = $sub_r[1];
  174. }
  175. return array(array('prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri), $sub_v);
  176. }
  177. }
  178. }
  179. return array(0, $v);
  180. }
  181. /* 21.., 32.. */
  182. function xTriplesBlock($v) {
  183. $pre_r = array();
  184. $r = array();
  185. $state = 1;
  186. $sub_v = $v;
  187. $buffer = $sub_v;
  188. do {
  189. $proceed = 0;
  190. if ($state == 1) {/* expecting subject */
  191. $t = array('type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => '');
  192. if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
  193. $t['s'] = $sub_r['value'];
  194. $t['s_type'] = $sub_r['type'];
  195. $state = 2;
  196. $proceed = 1;
  197. if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
  198. if ($t['s_type'] == 'placeholder') {
  199. $state = 4;
  200. }
  201. else {
  202. $this->addError('"' . $sub_r[1]. '" after subject found.');
  203. }
  204. }
  205. }
  206. elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
  207. $t['s'] = $sub_r['id'];
  208. $t['s_type'] = $sub_r['type'];
  209. $pre_r = array_merge($pre_r, $sub_r['triples']);
  210. $state = 2;
  211. $proceed = 1;
  212. if ($sub_r = $this->x('\.', $sub_v)) {
  213. $this->addError('DOT after subject found.');
  214. }
  215. }
  216. elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
  217. $t['s'] = $sub_r['id'];
  218. $t['s_type'] = $sub_r['type'];
  219. $pre_r = array_merge($pre_r, $sub_r['triples']);
  220. $state = 2;
  221. $proceed = 1;
  222. }
  223. elseif ($sub_r = $this->x('\.', $sub_v)) {
  224. $this->addError('Subject expected, DOT found.' . $sub_v);
  225. }
  226. }
  227. if ($state == 2) {/* expecting predicate */
  228. if ($sub_r = $this->x('a\s+', $sub_v)) {
  229. $sub_v = $sub_r[1];
  230. $t['p'] = $this->rdf . 'type';
  231. $t['p_type'] = 'uri';
  232. $state = 3;
  233. $proceed = 1;
  234. }
  235. elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
  236. if ($sub_r['type'] == 'bnode') {
  237. $this->addError('Blank node used as triple predicate');
  238. }
  239. $t['p'] = $sub_r['value'];
  240. $t['p_type'] = $sub_r['type'];
  241. $state = 3;
  242. $proceed = 1;
  243. }
  244. elseif ($sub_r = $this->x('\.', $sub_v)) {
  245. $state = 4;
  246. }
  247. elseif ($sub_r = $this->x('\}', $sub_v)) {
  248. $buffer = $sub_v;
  249. $r = array_merge($r, $pre_r);
  250. $pre_r = array();
  251. $proceed = 0;
  252. }
  253. }
  254. if ($state == 3) {/* expecting object */
  255. if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
  256. $t['o'] = $sub_r['value'];
  257. $t['o_type'] = $sub_r['type'];
  258. $t['o_lang'] = $this->v('lang', '', $sub_r);
  259. $t['o_datatype'] = $this->v('datatype', '', $sub_r);
  260. $pre_r[] = $t;
  261. $state = 4;
  262. $proceed = 1;
  263. }
  264. elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
  265. $t['o'] = $sub_r['id'];
  266. $t['o_type'] = $sub_r['type'];
  267. $pre_r = array_merge($pre_r, array($t), $sub_r['triples']);
  268. $state = 4;
  269. $proceed = 1;
  270. }
  271. elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
  272. $t['o'] = $sub_r['id'];
  273. $t['o_type'] = $sub_r['type'];
  274. $pre_r = array_merge($pre_r, array($t), $sub_r['triples']);
  275. $state = 4;
  276. $proceed = 1;
  277. }
  278. }
  279. if ($state == 4) {/* expecting . or ; or , or } */
  280. if ($sub_r = $this->x('\.', $sub_v)) {
  281. $sub_v = $sub_r[1];
  282. $buffer = $sub_v;
  283. $r = array_merge($r, $pre_r);
  284. $pre_r = array();
  285. $state = 1;
  286. $proceed = 1;
  287. }
  288. elseif ($sub_r = $this->x('\;', $sub_v)) {
  289. $sub_v = $sub_r[1];
  290. $state = 2;
  291. $proceed = 1;
  292. }
  293. elseif ($sub_r = $this->x('\,', $sub_v)) {
  294. $sub_v = $sub_r[1];
  295. $state = 3;
  296. $proceed = 1;
  297. if ($sub_r = $this->x('\}', $sub_v)) {
  298. $this->addError('Object expected, } found.');
  299. }
  300. }
  301. if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
  302. $buffer = $sub_v;
  303. $r = array_merge($r, $pre_r);
  304. $pre_r = array();
  305. $proceed = 0;
  306. }
  307. }
  308. } while ($proceed);
  309. return count($r) ? array($r, $buffer, $pre_r, $sub_v) : array(0, $buffer, $pre_r, $sub_v);
  310. }
  311. /* 39.. */
  312. function xBlankNodePropertyList($v) {
  313. if ($sub_r = $this->x('\[', $v)) {
  314. $sub_v = $sub_r[1];
  315. $s = $this->createBnodeID();
  316. $r = array('id' => $s, 'type' => 'bnode', 'triples' => array());
  317. $t = array('type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => '');
  318. $state = 2;
  319. $closed = 0;
  320. do {
  321. $proceed = 0;
  322. if ($state == 2) {/* expecting predicate */
  323. if ($sub_r = $this->x('a\s+', $sub_v)) {
  324. $sub_v = $sub_r[1];
  325. $t['p'] = $this->rdf . 'type';
  326. $t['p_type'] = 'uri';
  327. $state = 3;
  328. $proceed = 1;
  329. }
  330. elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
  331. $t['p'] = $sub_r['value'];
  332. $t['p_type'] = $sub_r['type'];
  333. $state = 3;
  334. $proceed = 1;
  335. }
  336. }
  337. if ($state == 3) {/* expecting object */
  338. if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
  339. $t['o'] = $sub_r['value'];
  340. $t['o_type'] = $sub_r['type'];
  341. $t['o_lang'] = $this->v('lang', '', $sub_r);
  342. $t['o_datatype'] = $this->v('datatype', '', $sub_r);
  343. $r['triples'][] = $t;
  344. $state = 4;
  345. $proceed = 1;
  346. }
  347. elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
  348. $t['o'] = $sub_r['id'];
  349. $t['o_type'] = $sub_r['type'];
  350. $r['triples'] = array_merge($r['triples'], array($t), $sub_r['triples']);
  351. $state = 4;
  352. $proceed = 1;
  353. }
  354. elseif((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
  355. $t['o'] = $sub_r['id'];
  356. $t['o_type'] = $sub_r['type'];
  357. $r['triples'] = array_merge($r['triples'], array($t), $sub_r['triples']);
  358. $state = 4;
  359. $proceed = 1;
  360. }
  361. }
  362. if ($state == 4) {/* expecting . or ; or , or ] */
  363. if ($sub_r = $this->x('\.', $sub_v)) {
  364. $sub_v = $sub_r[1];
  365. $state = 1;
  366. $proceed = 1;
  367. }
  368. if ($sub_r = $this->x('\;', $sub_v)) {
  369. $sub_v = $sub_r[1];
  370. $state = 2;
  371. $proceed = 1;
  372. }
  373. if ($sub_r = $this->x('\,', $sub_v)) {
  374. $sub_v = $sub_r[1];
  375. $state = 3;
  376. $proceed = 1;
  377. }
  378. if ($sub_r = $this->x('\]', $sub_v)) {
  379. $sub_v = $sub_r[1];
  380. $proceed = 0;
  381. $closed = 1;
  382. }
  383. }
  384. } while ($proceed);
  385. if ($closed) {
  386. return array($r, $sub_v);
  387. }
  388. return array(0, $v);
  389. }
  390. return array(0, $v);
  391. }
  392. /* 40.. */
  393. function xCollection($v) {
  394. if ($sub_r = $this->x('\(', $v)) {
  395. $sub_v = $sub_r[1];
  396. $s = $this->createBnodeID();
  397. $r = array('id' => $s, 'type' => 'bnode', 'triples' => array());
  398. $closed = 0;
  399. do {
  400. $proceed = 0;
  401. if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
  402. $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['value'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => $this->v('lang', '', $sub_r), 'o_datatype' => $this->v('datatype', '', $sub_r));
  403. $proceed = 1;
  404. }
  405. elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
  406. $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['id'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => '', 'o_datatype' => '');
  407. $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
  408. $proceed = 1;
  409. }
  410. elseif((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
  411. $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['id'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => '', 'o_datatype' => '');
  412. $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
  413. $proceed = 1;
  414. }
  415. if ($proceed) {
  416. if ($sub_r = $this->x('\)', $sub_v)) {
  417. $sub_v = $sub_r[1];
  418. $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'rest', 'o' => $this->rdf . 'nil', 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => 'uri', 'o_lang' => '', 'o_datatype' => '');
  419. $closed = 1;
  420. $proceed = 0;
  421. }
  422. else {
  423. $next_s = $this->createBnodeID();
  424. $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'rest', 'o' => $next_s, 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => 'bnode', 'o_lang' => '', 'o_datatype' => '');
  425. $s = $next_s;
  426. }
  427. }
  428. } while ($proceed);
  429. if ($closed) {
  430. return array($r, $sub_v);
  431. }
  432. }
  433. return array (0, $v);
  434. }
  435. /* 42 */
  436. function xVarOrTerm($v) {
  437. if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
  438. return array($sub_r, $sub_v);
  439. }
  440. elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
  441. return array($sub_r, $sub_v);
  442. }
  443. return array(0, $v);
  444. }
  445. /* 44, 74.., 75.. */
  446. function xVar($v) {
  447. if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
  448. if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
  449. if (!in_array($sub_r, $this->r['vars'])) {
  450. $this->r['vars'][] = $sub_r;
  451. }
  452. return array(array('value' => $sub_r, 'type' => 'var'), $sub_v . $r[3]);
  453. }
  454. }
  455. return array(0, $v);
  456. }
  457. /* 45 */
  458. function xGraphTerm($v) {
  459. foreach (array(
  460. 'IRIref' => 'uri',
  461. 'RDFLiteral' => 'literal',
  462. 'NumericLiteral' => 'literal',
  463. 'BooleanLiteral' => 'literal',
  464. 'BlankNode' => 'bnode',
  465. 'NIL' => 'uri',
  466. 'Placeholder' => 'placeholder'
  467. ) as $term => $type) {
  468. $m = 'x' . $term;
  469. if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
  470. if (!is_array($sub_r)) {
  471. $sub_r = array('value' => $sub_r);
  472. }
  473. $sub_r['type'] = $this->v1('type', $type, $sub_r);
  474. return array($sub_r, $sub_v);
  475. }
  476. }
  477. return array(0, $v);
  478. }
  479. /* 60 */
  480. function xRDFLiteral($v) {
  481. if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
  482. $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
  483. $r = $sub_r;
  484. if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
  485. $r['lang'] = $sub_r;
  486. }
  487. elseif (!$this->x('\s', $sub_v) && ($sub_r = $this->x('\^\^', $sub_v)) && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1])) && $sub_r[1]) {
  488. $r['datatype'] = $sub_r;
  489. }
  490. return array($r, $sub_v);
  491. }
  492. return array(0, $v);
  493. }
  494. /* 61.., 62.., 63.., 64.. */
  495. function xNumericLiteral($v) {
  496. $sub_r = $this->x('(\-|\+)?', $v);
  497. $prefix = $sub_r[1];
  498. $sub_v = $sub_r[2];
  499. foreach (array('DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer') as $type => $xsd) {
  500. $m = 'x' . $type;
  501. if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && ($sub_r !== false)) {
  502. $r = array('value' => $prefix . $sub_r, 'type' => 'literal', 'datatype' => $this->xsd . $xsd);
  503. return array($r, $sub_v);
  504. }
  505. }
  506. return array(0, $v);
  507. }
  508. /* 65.. */
  509. function xBooleanLiteral($v) {
  510. if ($r = $this->x('(true|false)', $v)) {
  511. return array($r[1], $r[2]);
  512. }
  513. return array(0, $v);
  514. }
  515. /* 66.., 87.., 88.., 89.., 90.., 91.. */
  516. function xString($v) {/* largely simplified, may need some tweaks in following revisions */
  517. $sub_v = $v;
  518. if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) return array(0, $v);
  519. $delim = $m[1];
  520. $rest = $m[2];
  521. $sub_types = array("'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2');
  522. $sub_type = $sub_types[$delim];
  523. $pos = 0;
  524. $r = false;
  525. do {
  526. $proceed = 0;
  527. $delim_pos = strpos($rest, $delim, $pos);
  528. if ($delim_pos === false) break;
  529. $new_rest = substr($rest, $delim_pos + strlen($delim));
  530. $r = substr($rest, 0, $delim_pos);
  531. if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(strlen($m[1]) % 2)) {
  532. $rest = $new_rest;
  533. }
  534. else {
  535. $r = false;
  536. $pos = $delim_pos + 1;
  537. $proceed = 1;
  538. }
  539. } while ($proceed);
  540. if ($r !== false) {
  541. return array(array('value' => $this->toUTF8($r) , 'type' => 'literal', 'sub_type' => $sub_type), $rest);
  542. }
  543. return array(0, $v);
  544. }
  545. /* 67 */
  546. function xIRIref($v) {
  547. if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
  548. return array($this->calcURI($r, $this->base), $v);
  549. }
  550. elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
  551. return array($r, $v);
  552. }
  553. return array(0, $v);
  554. }
  555. /* 68 */
  556. function xPrefixedName($v) {
  557. if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
  558. return array($r, $v);
  559. }
  560. elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
  561. return isset($this->prefixes[$r]) ? array($this->prefixes[$r], $sub_v) : array(0, $v);
  562. }
  563. return array(0, $v);
  564. }
  565. /* 69.., 73.., 93, 94.. */
  566. function xBlankNode($v) {
  567. if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
  568. return array(array('type' => 'bnode', 'value' => '_:' . $r), $sub_v);
  569. }
  570. if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
  571. return array(array('type' => 'bnode', 'value' => $this->createBnodeID()), $r[1]);
  572. }
  573. return array(0, $v);
  574. }
  575. /* 70.. */
  576. function xIRI_REF($v) {
  577. //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
  578. if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
  579. return array($r[1], $r[2]);
  580. }
  581. elseif ($r = $this->x('\<([^\<\>\s]*)\>', $v)) {
  582. return array($r[1] ? $r[1] : true, $r[2]);
  583. }
  584. return array(0, $v);
  585. }
  586. /* 71 */
  587. function xPNAME_NS($v) {
  588. list($r, $sub_v) = $this->xPN_PREFIX($v);
  589. $prefix = $r ? $r : '';
  590. return ($r = $this->x("\:", $sub_v)) ? array($prefix . ':', $r[1]) : array(0, $v);
  591. }
  592. /* 72 */
  593. function xPNAME_LN($v) {
  594. if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
  595. if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
  596. if (!isset($this->prefixes[$r])) {
  597. return array(0, $v);
  598. }
  599. return array($this->prefixes[$r] . $sub_r, $sub_v);
  600. }
  601. }
  602. return array(0, $v);
  603. }
  604. /* 76 */
  605. function xLANGTAG($v) {
  606. if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
  607. return array($r[1], $r[3]);
  608. }
  609. return array(0, $v);
  610. }
  611. /* 77.. */
  612. function xINTEGER($v) {
  613. if ($r = $this->x('([0-9]+)', $v)) {
  614. return array($r[1], $r[2]);
  615. }
  616. return array(false, $v);
  617. }
  618. /* 78.. */
  619. function xDECIMAL($v) {
  620. if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
  621. return array($r[1], $r[2]);
  622. }
  623. if ($r = $this->x('(\.[0-9]+)', $v)) {
  624. return array($r[1], $r[2]);
  625. }
  626. return array(false, $v);
  627. }
  628. /* 79.., 86.. */
  629. function xDOUBLE($v) {
  630. if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
  631. return array($r[1], $r[2]);
  632. }
  633. if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
  634. return array($r[1], $r[2]);
  635. }
  636. if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
  637. return array($r[1], $r[2]);
  638. }
  639. return array(false, $v);
  640. }
  641. /* 92 */
  642. function xNIL($v) {
  643. if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
  644. return array(array('type' => 'uri', 'value' => $this->rdf . 'nil'), $r[1]);
  645. }
  646. return array(0, $v);
  647. }
  648. /* 95.. */
  649. function xPN_CHARS_BASE($v) {
  650. if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
  651. return array($r[1], $r[2]);
  652. }
  653. return array(0, $v);
  654. }
  655. /* 96 */
  656. function xPN_CHARS_U($v) {
  657. if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
  658. return array($r, $sub_v);
  659. }
  660. elseif ($r = $this->x("(_)", $v)) {
  661. return array($r[1], $r[2]);
  662. }
  663. return array(0, $v);
  664. }
  665. /* 97.. */
  666. function xVARNAME($v) {
  667. $r = '';
  668. do {
  669. $proceed = 0;
  670. if ($sub_r = $this->x('([0-9]+)', $v)) {
  671. $r .= $sub_r[1];
  672. $v = $sub_r[2];
  673. $proceed = 1;
  674. }
  675. elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
  676. $r .= $sub_r;
  677. $v = $sub_v;
  678. $proceed = 1;
  679. }
  680. elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
  681. $r .= $sub_r[1];
  682. $v = $sub_r[2];
  683. $proceed = 1;
  684. }
  685. } while ($proceed);
  686. return array($r, $v);
  687. }
  688. /* 98.. */
  689. function xPN_CHARS($v) {
  690. if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
  691. return array($r, $sub_v);
  692. }
  693. elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
  694. return array($r[1], $r[2]);
  695. }
  696. return array(false, $v);
  697. }
  698. /* 99 */
  699. function xPN_PREFIX($v) {
  700. if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
  701. return array($sub_r[1], $sub_r[2]);/* @@testing */
  702. }
  703. if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
  704. do {
  705. $proceed = 0;
  706. list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
  707. if ($sub_r !== false) {
  708. $r .= $sub_r;
  709. $proceed = 1;
  710. }
  711. elseif ($sub_r = $this->x("\.", $sub_v)) {
  712. $r .= '.';
  713. $sub_v = $sub_r[1];
  714. $proceed = 1;
  715. }
  716. } while ($proceed);
  717. list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
  718. $r .= $sub_r ? $sub_r : '';
  719. }
  720. return array($r, $sub_v);
  721. }
  722. /* 100 */
  723. function xPN_LOCAL($v) {
  724. if (($sub_r = $this->x("([^\s\(\)\{\}\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
  725. return array($sub_r[1], $sub_r[2]);/* @@testing */
  726. }
  727. $r = '';
  728. $sub_v = $v;
  729. do {
  730. $proceed = 0;
  731. if ($this->x('\s', $sub_v)) {
  732. return array($r, $sub_v);
  733. }
  734. if ($sub_r = $this->x('([0-9])', $sub_v)) {
  735. $r .= $sub_r[1];
  736. $sub_v = $sub_r[2];
  737. $proceed = 1;
  738. }
  739. elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
  740. $r .= $sub_r;
  741. $proceed = 1;
  742. }
  743. elseif ($r) {
  744. if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^\s/s', $sub_r[2])) {
  745. $r .= $sub_r[1];
  746. $sub_v = $sub_r[2];
  747. }
  748. if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
  749. $r .= $sub_r;
  750. $proceed = 1;
  751. }
  752. }
  753. } while ($proceed);
  754. return array($r, $sub_v);
  755. }
  756. /* */
  757. function unescapeNtripleUTF($v) {
  758. if (strpos($v, '\\') === false) return $v;
  759. $mappings = array('t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'");
  760. foreach ($mappings as $in => $out) {
  761. $v = preg_replace('/\x5c([' . $in . '])/', $out, $v);
  762. }
  763. if (strpos(strtolower($v), '\u') === false) return $v;
  764. while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
  765. $no = hexdec($m[2]);
  766. if ($no < 128) $char = chr($no);
  767. else if ($no < 2048) $char = chr(($no >> 6) + 192) . chr(($no & 63) + 128);
  768. else if ($no < 65536) $char = chr(($no >> 12) + 224) . chr((($no >> 6) & 63) + 128) . chr(($no & 63) + 128);
  769. else if ($no < 2097152) $char = chr(($no >> 18) + 240) . chr((($no >> 12) & 63) + 128) . chr((($no >> 6) & 63) + 128) . chr(($no & 63) + 128);
  770. else $char= '';
  771. $v = str_replace('\\' . $m[1] . $m[2], $char, $v);
  772. }
  773. return $v;
  774. }
  775. /* */
  776. function xPlaceholder($v) {
  777. //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
  778. if ($r = $this->x('(\?|\$)', $v)) {
  779. if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && strpos(trim($r[2]), $m[1]) === 0) {
  780. $ph = substr($m[1], 1, -1);
  781. $rest = substr(trim($r[2]), strlen($m[1]));
  782. if (!isset($this->r['placeholders'])) $this->r['placeholders'] = array();
  783. if (!in_array($ph, $this->r['placeholders'])) $this->r['placeholders'][] = $ph;
  784. return array(array('value' => $ph, 'type' => 'placeholder'), $rest);
  785. }
  786. }
  787. return array(0, $v);
  788. }
  789. /* */
  790. }