PageRenderTime 49ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/rdfapi-php/api/syntax/N3Parser.php

https://github.com/koja13/DSi2.0
PHP | 1137 lines | 744 code | 150 blank | 243 comment | 120 complexity | 81b8c341b045fef9f24d9314836ee64b MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. require_once RDFAPI_INCLUDE_DIR . 'util/Object.php';
  3. require_once RDFAPI_INCLUDE_DIR . 'model/Blanknode.php';
  4. require_once RDFAPI_INCLUDE_DIR . 'model/Resource.php';
  5. require_once RDFAPI_INCLUDE_DIR . 'model/Literal.php';
  6. require_once RDFAPI_INCLUDE_DIR . 'model/Statement.php';
  7. require_once RDFAPI_INCLUDE_DIR . 'model/MemModel.php';
  8. require_once RDFAPI_INCLUDE_DIR . 'constants.php';
  9. // ----------------------------------------------------------------------------------
  10. // Class: N3Parser
  11. // ----------------------------------------------------------------------------------
  12. /**
  13. * PHP Notation3 Parser
  14. *
  15. * This parser can parse a subset of n3, reporting triples to a callback function
  16. * or constructing a RAP Model ( http://www.wiwiss.fu-berlin.de/suhl/bizer/rdfapi )
  17. *
  18. * Supported N3 features:
  19. * <ul>
  20. * <li>Standard things, repeated triples ( ; and , ), blank nodes using [ ], self-reference ('<>')</li>
  21. * <li>@prefix mappings</li>
  22. * <li>= maps to owl#sameAs</li>
  23. * <li>a maps to rdf-syntax-ns#type</li>
  24. * <li>Literal datytype- and xmlLanguageTag support
  25. * </ul>
  26. * Un-supported N3 Features include:
  27. * <ul>
  28. * <li>Reification using { }</li>
  29. * <li>. and ^ operators for tree traversal</li>
  30. * <li>Any log operators, like log:forAll etc.</li>
  31. * </ul>
  32. *
  33. * This parser is based on n3.py from Epp released 2nd March, 2002.
  34. * by Sean B. Palmer
  35. * ( http://infomesh.net/2002/eep/20020302-013802/n3.py )
  36. *
  37. * This parser is released under the GNU GPL license.
  38. * ( http://www.gnu.org/licenses/gpl.txt )
  39. *
  40. *
  41. *
  42. * @author Sean B. Palmer <sean@mysterylights.com>
  43. * @author Gunnar AA. Grimnes <ggrimnes@csd.abdn.ac.uk>
  44. * @author Daniel Westphal <mail@d-westphal.de>
  45. * @version $Id: N3Parser.php 517 2007-08-13 16:14:17Z cweiske $
  46. * @license GPL http://www.gnu.org/licenses/gpl.txt
  47. * @package syntax
  48. * @access public
  49. **/
  50. class N3Parser extends Object {
  51. /* ==================== Variables ==================== */
  52. var $Tokens;
  53. var $bNode;
  54. var $RDF_NS, $DAML_NS, $OWL_NS;
  55. var $debug;
  56. var $parseError;
  57. var $parsedNamespaces = array();
  58. /* ==================== Public Methods ==================== */
  59. /**
  60. * Constructor
  61. * @access public
  62. **/
  63. function N3Parser() {
  64. //Regular expressions:
  65. $Name = '[A-Za-z0-9_@\.]+[^\.,;\[\]\s\) ]*';
  66. $URI = '<[^> ]*>';
  67. $bNode = '_:'.$Name;
  68. $Univar = '\?'.$Name;
  69. $QName = '(?:[A-Za-z][A-Za-z0-9_@\.]*)?:'.$Name;
  70. $Literal = '(?:'
  71. . '"(\\\"|[^"])*"'
  72. . '|'
  73. . "'(\\\'|[^'])*'"
  74. . ')';
  75. # '"(?:\\"|[^"])*"'
  76. $Number = '[-+]?[0-9]+(\\.[0-9]+)?([eE][-+]?[0-9]+)?';
  77. $Boolean = '@(?:true|false)';
  78. // $Literal = '"[^"\\\\]*(?:\\.\\[^"\\]*)*"'; # '"(?:\\"|[^"])*"'
  79. $LangTag = '@[A-Za-z\-]*[^ \^\.\;\,]';
  80. $Datatype = '(\^\^)[^ ,\.;)]+';
  81. $Datatype_URI = '(\^\^)'.$URI;
  82. // $LLiteral = '"""[^"\\\\]*(?:(?:.|"(?!""))[^"\\\\]*)*"""';
  83. $LLiteral = '(?:'
  84. . '"""[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""'
  85. . '|'
  86. . "'''[^'\\\\]*(?:(?:\\\\.|'(?!''))[^\"\\\\]*)*'''"
  87. . ')';
  88. // '"""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
  89. $Comment = '#.*$';
  90. $Prefix = '(?:[A-Za-z][A-Za-z0-9_]*)?:';
  91. $PrefixDecl = '@prefix';
  92. $WS = '[ \t]';
  93. $this->RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; # for 'a' keyword
  94. $this->DAML_NS = 'http://www.daml.org/2001/03/daml+oil#'; # for '=' keyword
  95. $this->OWL_NS = 'http://www.w3.org/2002/07/owl#';
  96. // $t = array( $LLiteral, $URI); //, $Literal, $PrefixDecl, $QName, $bNode, $Prefix,
  97. // $Univar, 'a', '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.', $WS, $Comment);
  98. $t = array(
  99. $Datatype_URI, $Datatype, $LLiteral, $URI, $Literal,
  100. $PrefixDecl, $QName, $Number, $Boolean, $bNode,
  101. $Prefix, $Univar, 'a','=',
  102. '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.',
  103. $WS, $Comment,$LangTag
  104. );
  105. $this->Tokens = "/(".join($t,"|").")/m";
  106. $this->bNode = 0;
  107. $this->debug = 0;
  108. $this->bNodeMap = array();
  109. $this->FixBnodes = FIX_BLANKNODES;
  110. $this->parseError =false;
  111. }
  112. /**
  113. * Sets, if BlankNode labels should be replaced by the generic label from the constants.php file
  114. * default is "false" -> the used label in n3 is parsed to the model
  115. * @param boolean
  116. * @access public
  117. **/
  118. function setFixBnodes($set) {
  119. if (($set===true) OR ($set===false)) $this->FixBnodes = $set;
  120. }
  121. /**
  122. * This parses a N3 string and prints out the triples
  123. * @param string $s
  124. * @access public
  125. **/
  126. function parse($s) {
  127. // """Get a string, tokenize, create list, convert to Eep store."""
  128. $stat=$this->n3tolist($s);
  129. foreach ( $stat as $t) {
  130. if (count($t)>3) {
  131. $object=$t[2];
  132. for ($i = 3; $i < 5; $i++){
  133. if ($t[$i][0]=='@')$object.=$t[$i];
  134. if (substr($t[$i],0,2)=='^^')$object.=$t[$i];
  135. };
  136. } else {$object=$t[2];};
  137. print '('.$t[0].', '.$t[1].', '.$object.")\n";
  138. }
  139. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  140. // for t in n3tolist(s)]
  141. }
  142. /**
  143. * This parses a N3 string and calls func($subject, $predicate, $object) with each triple
  144. * @param string $s
  145. * @param string $func
  146. * @access public
  147. **/
  148. function uparse($s,$func) {
  149. // """Get a string, tokenize, create list, convert to Eep store."""
  150. $stat=$this->n3tolist($s);
  151. foreach ( $stat as $t) {
  152. if (count($t)>3) {
  153. $object=$t[2];
  154. for ($i = 3; $i < 5; $i++){
  155. if ($t[$i][0]=='@')$object.=$t[$i];
  156. if (substr($t[$i],0,2)=='^^')$object.=$t[$i];
  157. };
  158. } else {$object=$t[2];};
  159. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  160. $func($t[0],$t[1],$object);
  161. }
  162. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  163. // for t in n3tolist(s)]
  164. }
  165. /**
  166. * This parses a N3 string and returns a memmodel
  167. * @param string $s
  168. * @access public
  169. * @return object Model
  170. **/
  171. function parse2model($s,$model = false) {
  172. if($model == false){
  173. $m=new MemModel();
  174. }else{
  175. $m=$model;
  176. }
  177. // """Get a string, tokenize, create list, convert to Eep store."""
  178. $stat=$this->n3tolist($s);
  179. foreach ( $stat as $t) {
  180. $s=$this->toRDFNode($t[0],$t);
  181. $p=$this->toRDFNode($t[1],$t);
  182. $o=$this->toRDFNode($t[2],$t);
  183. $new_statement= new Statement($s,$p,$o);
  184. $m->add($new_statement);
  185. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  186. }
  187. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  188. // for t in n3tolist(s)]
  189. $m->addParsedNamespaces($this->parsedNamespaces);
  190. return $m;
  191. }
  192. /**
  193. * Generate a new MemModel from an URI or file.
  194. *
  195. * @access public
  196. * @param $path
  197. * @throws PhpError
  198. * @return object MemModel
  199. */
  200. function & generateModel($path,$dummy=false,$model=false) {
  201. $handle = fopen($path,'r') or die("N3 Parser: Could not open File: '$path' - Stopped parsing.");
  202. $done=false;
  203. $input="";
  204. while(!$done)
  205. {
  206. $input .= fread( $handle, 512 );
  207. $done = feof($handle);
  208. };
  209. fclose($handle);
  210. $m = $this->parse2model($input,$model);
  211. return $m;
  212. }
  213. /* ==================== Private Methods from here ==================== */
  214. // General list processing functions
  215. /**
  216. * Returns FALSE if argument is a whitespace character
  217. * @access private
  218. * @param string $s
  219. **/
  220. function isWS($s) {
  221. return !preg_match('/^(#.*|\s*)$/', $s);
  222. }
  223. /**
  224. * Returns true if the string is not a comment
  225. * @access private
  226. * @param string $s
  227. * @returns boolean
  228. **/
  229. function notComment($s) {
  230. if ($s=="") return false;
  231. $N3Comment = '^[ \t]*\#';
  232. if (ereg($N3Comment,$s)) return false;
  233. else return true;
  234. }
  235. /**
  236. * Removes all whitespace tokens from list
  237. * @access private
  238. * @param array $list
  239. **/
  240. function filterWs($list) {
  241. // var_dump($list);
  242. // """Filter whitespace from a list."""
  243. return array_filter($list, array($this,"isWS"));
  244. }
  245. /**
  246. * converts a string to its unicode NFC form (e.g. \uHHHH or \UHHHHHHHH).
  247. *
  248. * @param String $str
  249. * @return String
  250. * @access private
  251. *
  252. */
  253. function str2unicode_nfc($str=""){
  254. $result="";
  255. /* try to detect encoding */
  256. $tmp=str_replace("?", "", $str);
  257. if(strpos(utf8_decode($tmp), "?")===false){
  258. $str=utf8_decode($str);
  259. }
  260. for($i=0,$i_max=strlen($str);$i<$i_max;$i++){
  261. $nr=0;/* unicode dec nr */
  262. /* char */
  263. $char=$str[$i];
  264. /* utf8 binary */
  265. $utf8_char=utf8_encode($char);
  266. $bytes=strlen($utf8_char);
  267. if($bytes==1){
  268. /* 0####### (0-127) */
  269. $nr=ord($utf8_char);
  270. }
  271. elseif($bytes==2){
  272. /* 110##### 10###### = 192+x 128+x */
  273. $nr=((ord($utf8_char[0])-192)*64) + (ord($utf8_char[1])-128);
  274. }
  275. elseif($bytes==3){
  276. /* 1110#### 10###### 10###### = 224+x 128+x 128+x */
  277. $nr=((ord($utf8_char[0])-224)*4096) + ((ord($utf8_char[1])-128)*64) + (ord($utf8_char[2])-128);
  278. }
  279. elseif($bytes==4){
  280. /* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */
  281. $nr=((ord($utf8_char[0])-240)*262144) + ((ord($utf8_char[1])-128)*4096) + ((ord($utf8_char[2])-128)*64) + (ord($utf8_char[3])-128);
  282. }
  283. /* result (see http://www.w3.org/TR/rdf-testcases/#ntrip_strings) */
  284. if($nr<9){/* #x0-#x8 (0-8) */
  285. $result.="\\u".sprintf("%04X",$nr);
  286. }
  287. elseif($nr==9){/* #x9 (9) */
  288. $result.='\t';
  289. }
  290. elseif($nr==10){/* #xA (10) */
  291. $result.='\n';
  292. }
  293. elseif($nr<13){/* #xB-#xC (11-12) */
  294. $result.="\\u".sprintf("%04X",$nr);
  295. }
  296. elseif($nr==13){/* #xD (13) */
  297. $result.='\t';
  298. }
  299. elseif($nr<32){/* #xE-#x1F (14-31) */
  300. $result.="\\u".sprintf("%04X",$nr);
  301. }
  302. elseif($nr<34){/* #x20-#x21 (32-33) */
  303. $result.=$char;
  304. }
  305. elseif($nr==34){/* #x22 (34) */
  306. $result.='\"';
  307. }
  308. elseif($nr<92){/* #x23-#x5B (35-91) */
  309. $result.=$char;
  310. }
  311. elseif($nr==92){/* #x5C (92) */
  312. $result.='\\';
  313. }
  314. elseif($nr<127){/* #x5D-#x7E (93-126) */
  315. $result.=$char;
  316. }
  317. elseif($nr<65536){/* #x7F-#xFFFF (128-65535) */
  318. $result.="\\u".sprintf("%04X",$nr);
  319. }
  320. elseif($nr<1114112){/* #x10000-#x10FFFF (65536-1114111) */
  321. $result.="\\U".sprintf("%08X",$nr);
  322. }
  323. else{
  324. /* other chars are not defined => ignore */
  325. }
  326. }
  327. return $result;
  328. }
  329. /**
  330. * Gets a slice of an array.
  331. * Returns the wanted slice, as well as the remainder of the array.
  332. * e.g. getSpan(['p', 'q', 'r'], 1, 2) gives (['q'], ['p', 'r'])
  333. * @return array
  334. * @access private
  335. * @param array $list
  336. * @param integer $start
  337. * @param integer $end
  338. **/
  339. function getSpan($list, $start, $end) {
  340. $pre=array_slice($list, 0, $start);
  341. $post=array_slice($list, $end);
  342. return array(array_slice($list, $start,$end-$start),$this->array_concat($pre,$post));
  343. }
  344. /**
  345. * Concatenates two arrays
  346. * @param array $a
  347. * @param array $b
  348. * @returns array
  349. * @access private
  350. **/
  351. function array_concat($a, $b) {
  352. array_splice($a,count($a),0,$b);
  353. return $a;
  354. }
  355. /**
  356. * Returns an array with all indexes where item appears in list
  357. * @param array $list
  358. * @param string $item
  359. * @returns array
  360. * @access private
  361. **/
  362. function posns($list, $item) {
  363. $res=array();
  364. $i=0;
  365. foreach ( $list as $k=>$v) {
  366. if ($v === $item ) $res[]=$i;
  367. $i++;
  368. }
  369. $res[]=$i;
  370. return $res;
  371. }
  372. /* More N3 specific functions */
  373. /**
  374. * Returns a list of tokens
  375. * @param string $s
  376. * @returns array
  377. * @access private
  378. **/
  379. function toke($s) {
  380. // print "$s\n";
  381. // """Notation3 tokenizer. Takes in a string, returns a raw token list."""
  382. if (strlen($s) == 0) die('Document has no content!');
  383. $s=str_replace("\r\n","\n",$s);
  384. $s=str_replace("\r","\n",$s);
  385. //$lines=explode("\n",$s);
  386. //$reallines=array_filter($lines, array($this, "notComment"));
  387. // print "LINES: ".join($reallines, " ")." :LINES\n";
  388. //array_walk($reallines, array($this, "trimLine"));
  389. //$res=array();
  390. // foreach ($reallines as $l) {
  391. //preg_match_all($this->Tokens, $l, $newres);
  392. //$res=$this->array_concat($res,$newres[0]);
  393. //}
  394. $res=array();
  395. preg_match_all($this->Tokens, $s, $newres);
  396. $res=$this->array_concat($res, array_map('trim', $newres[0]));
  397. //var_dump($newres[0]);
  398. return $res;
  399. }
  400. /**
  401. * Returns a list with the elements between start and end as one quoted string
  402. * e.g. listify(["a","b","c","d"],1,2) => ["a","b c", "d"]
  403. * @param array $list
  404. * @param integer $start
  405. * @param integer $end
  406. * @returns array
  407. * @access private
  408. **/
  409. function listify($list, $start, $end) {
  410. //Re-form a list, merge elements start->end into one quoted element
  411. //Start and end are offsets...
  412. $l=$end-$start;
  413. $s=array_slice($list, 0, $start);
  414. $m=array_slice($list, $start,$l);
  415. $e=array_slice($list, $end);
  416. // array_push($s,"\"".join($m," ")."\"");
  417. array_push($s,$m);
  418. return $this->array_concat($s,$e);
  419. }
  420. /**
  421. * Returns an array with prefixes=>namespace mappings
  422. * @param array $list
  423. * @access private
  424. * @returns array
  425. **/
  426. function getPrefixes($list) {
  427. $prefixes=array();
  428. $ns=1;
  429. $name=2;
  430. foreach ($list as $l) {
  431. if ($l=='@prefix') {
  432. // while '@prefix' in list {
  433. $pos=current($list);
  434. //pos = list.index('@prefix')
  435. $r = $this->getSpan($list, $pos, ($pos+4)); # processes the prefix tokens
  436. $binding=$r[0];
  437. $list=$r[1];
  438. $prefixes[$binding[$ns]] = substr($binding[$name],1,-1);
  439. $this->parsedNamespaces[substr($binding[$name],1,-1)] = substr($binding[$ns],0,-1);
  440. }
  441. }
  442. if (count($prefixes)<1) $list= array_slice($list,0);
  443. return array($prefixes, $list);
  444. }
  445. /**
  446. * Callback function for replacing "a" elements with the right RDF uri.
  447. * @param string $l
  448. * @access private
  449. **/
  450. function replace_a_type(&$l,$p) {
  451. if ($l=='a') $l='<'.$this->RDF_NS.'type>';
  452. }
  453. /**
  454. * Callback function for replacing "=" elements with the right DAML+OIL uri.
  455. * @param string $l
  456. * @access private
  457. **/
  458. function replace_equal(&$l,$p) {
  459. if ($l=='=') $l='<'.$this->OWL_NS.'sameAs>';
  460. }
  461. /**
  462. * Callback function for replacing "this" elements with the right RDF uri.
  463. * @param string $l
  464. * @access private
  465. **/
  466. function replace_this($l,$p) {
  467. if ($l=='this') $l='<urn:urn-n:this>';
  468. }
  469. /**
  470. * Applies stuff :)
  471. * Expands namespace prefixes etc.
  472. * @param array $prefixes
  473. * @param array $list
  474. * @returns $list
  475. * @access private
  476. **/
  477. function applyStuff($prefixes, $list)
  478. {
  479. array_walk($list, array($this, 'replace_a_type'));
  480. array_walk($list, array($this, 'replace_equal'));
  481. array_walk($list, array($this, 'replace_this'));
  482. for ($i = 0; $i < count($list); $i++) {
  483. if ($list[$i]=='<>') {
  484. if (!isset($path)) {
  485. if (!isset($_SERVER['SERVER_ADDR'])) {
  486. $_SERVER['SERVER_ADDR'] = 'localhost';
  487. }
  488. if (!isset($_SERVER['REQUEST_URI'])) {
  489. $_SERVER['REQUEST_URI'] = '/rdfapi-php';
  490. }
  491. $list[$i] = '<http://'.$_SERVER['SERVER_ADDR'].$_SERVER['REQUEST_URI'].'#generate_timestamp_'.time().'>';
  492. } else {
  493. $list[$i] = '<'.$path.'>';
  494. };
  495. };
  496. if (preg_match('/^[-+]?[0-9]+$/', $list[$i])) {
  497. //integer
  498. $list[$i] = intval($list[$i]);
  499. } else if (is_numeric($list[$i])) {
  500. //float or decimal
  501. // After conversion we cannot distinguish between both
  502. $list[$i] = floatval($list[$i]);
  503. } else if ((!strstr('<_"\'?.;,{}[]()@', $list[$i]{0}))
  504. && (substr($list[$i],0,3) != '^^<')
  505. ) {
  506. //prefix or unknown
  507. $_r = explode(':', $list[$i]);
  508. $ns = $_r[0] . ':';
  509. $name = $_r[1];
  510. if (isset($prefixes[$ns])) {
  511. $list[$i] = '<'.$prefixes[$ns].$name.'>';
  512. } else if (isset($prefixes[substr($ns, 2)])) {
  513. $list[$i] = '^^' . $prefixes[substr($ns, 2)] . $name . '';
  514. } else {
  515. //die('Prefix not declared:'.$ns);
  516. $this->parseError = true;
  517. trigger_error('Prefix not declared: '.$ns, E_USER_ERROR);
  518. break;
  519. }
  520. } else {
  521. if ($list[$i]{0} == '"') {
  522. $bLiteral = true;
  523. $chBase = '"';
  524. } else if ($list[$i]{0} == '\'') {
  525. $bLiteral = true;
  526. $chBase = '\'';
  527. } else {
  528. $bLiteral = false;
  529. }
  530. if ($bLiteral) {
  531. $tripleBase = $chBase . $chBase . $chBase;
  532. // Congratulations - it's a literal!
  533. if (substr($list[$i], 0, 3) == $tripleBase) {
  534. if (substr($list[$i],-3,3) == $tripleBase) {
  535. // A big literal...
  536. $lit = substr($list[$i],3,-3);
  537. // print "++$lit++";
  538. $lit=str_replace('\n', '\\n',$lit);
  539. //$lit=ereg_replace("[^\\]" . $chBase, "\\" . $chBase, $lit);
  540. $lit = stripslashes($lit);
  541. $list[$i] = $chBase . $lit . $chBase;
  542. } else {
  543. die ('Incorrect string formatting: '.substr($list[$i],-3,3));
  544. }
  545. } else {
  546. if (strstr($list[$i],"\n")) {
  547. die('Newline in literal: ' . $list[$i]);
  548. }
  549. $list[$i] = stripslashes($list[$i]);
  550. }
  551. }
  552. }
  553. if (substr($list[$i],0,2)=='^^') {
  554. if ($list[$i][2]!='<') {
  555. $list[$i] = '^^<' . substr($list[$i], 2) . '>';
  556. }
  557. };
  558. }//foreach list item
  559. return $list;
  560. }//function applyStuff($prefixes, $list)
  561. /**
  562. * Returns an array of triples extracted from the list of n3 tokens
  563. * @param array $list
  564. * @returns array
  565. * @access private
  566. **/
  567. function getStatements($list) {
  568. $statements = array();
  569. while (in_array('.', $list)) {
  570. // for($i=0;$i<count($list); $i++) {
  571. // if ($list[$i]==".") {
  572. // while '.' in list {
  573. $pos=array_search('.',$list);
  574. $r=$this->getSpan($list, 0, $pos+1);
  575. $statement=$r[0];
  576. $list = $r[1];
  577. array_pop($statement);
  578. $statements[]=$statement;
  579. }
  580. return $statements;
  581. }
  582. /**
  583. * Gets a list of triples with same subject
  584. * e.g. :Gunnar :firstname "Gunnar" ; :lastname "Grimnes.
  585. * @param array $list
  586. * @returns array
  587. * @acces private
  588. **/
  589. function getPovs($list) {
  590. $povs = array();
  591. while (in_array(';', $list)) {
  592. $r=$this->posns($list,';');
  593. $pos=array_slice($r,0,2);
  594. $r = $this->getSpan($list, $pos[0], $pos[1]);
  595. $pov=$r[0];
  596. $list=$r[1];
  597. // skip lone semicolons, e.g. "<a> <b> <c> ; ."
  598. if (count($pov) == 1) continue;
  599. $povs[]=array_slice($pov,1);
  600. }
  601. return array($list, $povs);
  602. }
  603. /**
  604. * Gets a list of triples with same predicate
  605. * e.g. :Gunnar :likes "Cheese", "Wine".
  606. * @access private
  607. * @param array $list
  608. * @returns array
  609. **/
  610. function getObjs($list) {
  611. $objs = array();
  612. while (in_array(",",$list)) {
  613. $pos=array_search(",",$list);
  614. // for($i=0;$i<count($list); $i++) {
  615. // if ($list[$i]==",") {
  616. // while ',' in list {
  617. $get_array_fields=2;
  618. if (isset ($list[$pos+2])) {
  619. if (@$list[$pos+2][0]=='@') $get_array_fields++;
  620. if (@$list[$pos+2][0]=='^') $get_array_fields++;
  621. };
  622. if (isset ($list[$pos+3])) { if (@$list[$pos+3][0]=='^') $get_array_fields++;};
  623. $r=$this->getSpan($list, $pos, ($pos+$get_array_fields));
  624. $obj=$r[0];
  625. if (!isset($obj[2])) $obj[2]=' ';
  626. if (!isset($obj[3])) $obj[3]=' ';
  627. $list=$r[1];
  628. $objs[]=$obj;
  629. }
  630. return array($list, $objs);
  631. }
  632. /**
  633. * Does the real work, returns a list of subject, predicate, object triples.
  634. * @param array $list
  635. * @returns array
  636. * @access private
  637. **/
  638. function statementize($list) {
  639. if (count($list) == 1 && preg_match("/_".BNODE_PREFIX."[0-9]+_/",$list[0])) {
  640. if ($this->debug) print "Ignored bNode exists statement. $list\n";
  641. return array();
  642. }
  643. if (count($list) == 3) return array($list);
  644. if (count($list) < 3) {
  645. throw new Exception(
  646. 'N3 statement too short,'
  647. . ' only ' . count($list) . ' elements instead of 3:' . "\n"
  648. . implode("\n", $list)
  649. );
  650. }
  651. //Get all ;
  652. $r=$this->getPovs($list);
  653. $spo=$r[0];
  654. $po=$r[1];
  655. $all=array();
  656. // (spo, po), all = getPovs(list), []
  657. $subject = $spo[0];
  658. foreach ($po as $pop) {
  659. // for pop in po {
  660. $r=$this->getObjs($pop);
  661. $myPo=$r[0];
  662. $obj=$r[1];
  663. //myPo, obj = getObjs(pop)
  664. if (!isset($myPo[2])) $myPo[2]=' ';
  665. if (!isset($myPo[3])) $myPo[3]=' ';
  666. $predicate = $myPo[0];
  667. $all[]=array($subject,$predicate,$myPo[1],$myPo[2],$myPo[3]);
  668. // all.append([subject, predicate, myPo[1]])
  669. foreach ($obj as $o) $all[]=array($subject,$predicate, $o[1],$o[2],$o[3]);
  670. // for x in obj: all.append([subject, predicate, x])
  671. }
  672. $r = $this->getObjs($spo);
  673. $spo=$r[0];
  674. $objs=$r[1];
  675. //spo, objs = getObjs(spo)
  676. $subject=$spo[0];
  677. $predicate=$spo[1];
  678. if(!isset($spo[3])) $spo[3]=' ';
  679. if(!isset($spo[4])) $spo[4]=' ';
  680. $all[]=array($subject, $predicate, $spo[2],$spo[3],$spo[4]);
  681. foreach ($objs as $obj) $all[]=array($subject, $predicate, $obj[1],$obj[2],$obj[3]);
  682. return $all;
  683. }
  684. /**
  685. * Makes lists of elements in list into a seperate array element.
  686. * e.g. doLists(["a","b","[","c","]","d"], "[","]")=> ["a","b", ["c"], "d"]
  687. * @param array $list
  688. * @param string $schar
  689. * @param string $echar
  690. * @returns array
  691. * @access private
  692. **/
  693. function doLists($list, $schar, $echar) {
  694. while (in_array($schar, $list)) {
  695. // while schar in list {
  696. $ndict = array();
  697. $nestingLevel = 0;
  698. $biggest = 0;
  699. for ($i = 0; $i < count($list); $i++) {
  700. if ($list[$i] == $schar) {
  701. $nestingLevel += 1;
  702. if (!in_array($nestingLevel, array_keys($ndict))) {
  703. $ndict[$nestingLevel] = array(array($i));
  704. } else {
  705. $ndict[$nestingLevel][]=array($i);
  706. }
  707. }
  708. if ($list[$i] == $echar) {
  709. if (!in_array($nestingLevel, array_keys($ndict))) {
  710. $ndict[$nestingLevel]=array(array($i));
  711. } else {
  712. $ndict[$nestingLevel][count($ndict[$nestingLevel])-1][]=$i;
  713. $nestingLevel-= 1;
  714. # elif type(list[i]) == type([]) {
  715. # list[i] = doLists(list[i], schar, echar)
  716. }
  717. }
  718. }
  719. foreach (array_keys($ndict) as $key) {
  720. if ($key > $biggest) $biggest = $key;
  721. }
  722. $tol = $ndict[$biggest][0];
  723. $list = $this->listify($list, $tol[0], ($tol[1]+1));
  724. }
  725. return $list;
  726. }
  727. /**
  728. * Apply doLists for all different types of list.
  729. * @param array
  730. * @returns array
  731. * @access private
  732. **/
  733. function listStuff($list) {
  734. # y, z = zip(['[', ']'], ['{', '}'], ['(', ')'])
  735. # return map(doLists, [list, list, list], y, z).pop()
  736. $list = $this->doLists($list, '[', ']');
  737. $list = $this->doLists($list, '{', '}');
  738. return $this->doLists($list, '(', ')');
  739. }
  740. /**
  741. * Generates a new node id.
  742. * @access private
  743. * @returns string
  744. **/
  745. function bnodeID() {
  746. $this->bNode++;
  747. return "_".BNODE_PREFIX.$this->bNode."_";
  748. }
  749. /**
  750. * This makes bNodes out of variables like _:a etc.
  751. * @access private
  752. * @param array $list
  753. * @returns array
  754. **/
  755. function fixAnon($list) {
  756. // $map=array();
  757. for($i=0;$i<count($list);$i++) {
  758. $l=$list[$i];
  759. if (substr($l,0,2)=="_:") {
  760. if (!isset($this->bNodeMap[$l])) {
  761. $a=$this->bnodeID();
  762. $this->bNodeMap[$l]=$a;
  763. } else $a=$this->bNodeMap[$l];
  764. $list[$i]=$a;
  765. }
  766. }
  767. return $list;
  768. }
  769. /**
  770. * This makes [ ] lists into bnodes.
  771. * @access private
  772. * @param array $list
  773. * @return array
  774. **/
  775. function expandLists($list) {
  776. for($i=0;$i<count($list);$i++) {
  777. if (is_array($list[$i])) {
  778. if ( $list[$i][0]=='[' ) {
  779. $bnode=$this->bnodeID();
  780. $prop=$list[$i];
  781. $list[$i]=$bnode;
  782. $list[]=$bnode;
  783. $list=$this->array_concat($list, array_slice($prop,1,-1));
  784. $list[]='.';
  785. }elseif($list[$i][0]=='(') {
  786. $rdfNil = '<'. RDF_NAMESPACE_URI . RDF_NIL .'>';
  787. $rdfFirst = '<'. RDF_NAMESPACE_URI . RDF_FIRST .'>';
  788. $rdfRest = '<'. RDF_NAMESPACE_URI . RDF_REST .'>';
  789. // local copy of list without "(" and ")"
  790. $t_list = array_slice($list[$i], 1, -1);
  791. //prepare bnodes
  792. $fromBnode = $this->bnodeID();
  793. $toBnode = $this->bnodeID();
  794. //link first bnode into graph
  795. $list[$i] = $fromBnode;
  796. $count = count($t_list);
  797. //loop through list, convert to RDF linked list
  798. for ($idx = 0; $idx < $count; $idx++){
  799. // set rdf:first
  800. $list[] = $fromBnode;
  801. $list[] = $rdfFirst;
  802. $list[] = $t_list[$idx];
  803. $list[] = '.';
  804. // set rdf:rest (nil or next bnode)
  805. if ($idx == $count - 1) {
  806. $list[] = $fromBnode;
  807. $list[] = $rdfRest;
  808. $list[] = $rdfNil;
  809. $list[] = '.';
  810. }
  811. else {
  812. $list[] = $fromBnode;
  813. $list[] = $rdfRest;
  814. $list[] = $toBnode;
  815. $list[] = '.';
  816. $fromBnode = $toBnode;
  817. $toBnode = $this->bnodeID();
  818. }
  819. }
  820. }
  821. else {
  822. die('Only [ ] and () lists are supported!');
  823. }
  824. }
  825. }
  826. return $list;
  827. }
  828. /**
  829. * Main work-horse function. This converts a N3 string to a list of statements
  830. * @param string $s
  831. * @returns array
  832. * @access private
  833. **/
  834. function n3tolist($s) {
  835. // """Convert an N3 string into a list of triples as strings."""
  836. $result = array();
  837. $t = $this->filterWs($this->toke($s)); # tokenize the stream, and filter whitespace tokens
  838. if ($this->debug) {
  839. print "Filter WS:\n";
  840. var_dump($t);
  841. }
  842. $r=$this->getPrefixes($t); # get the prefix directives, and add to a dict
  843. $prefixes=$r[0];
  844. $t=$r[1];
  845. if ($this->debug) {
  846. print "Prefixes:\n";
  847. var_dump($prefixes);
  848. print "***\n";
  849. var_dump($t);
  850. }
  851. $t=$this->applyStuff($prefixes, $t);#apply prefixes, keywords, and string formatting
  852. if ($this->debug) {
  853. print "Stuff applied:\n";
  854. var_dump($t);
  855. }
  856. $t=$this->fixAnon($t); # fix _:a anons
  857. if ($this->debug) {
  858. print "Fix anon:\n";
  859. var_dump($t);
  860. }
  861. $t = $this->listStuff($t); # apply list stuff: todo
  862. if ($this->debug) {
  863. print "Lists done:\n";
  864. var_dump($t);
  865. }
  866. $t=$this->expandLists($t);
  867. if ($this->debug) {
  868. print "Lists applied:\n";
  869. var_dump($t);
  870. }
  871. $t = $this->getStatements($t); # get all of the "statements" from the stream
  872. foreach ($t as $stat) {
  873. $stats = $this->statementize($stat);
  874. foreach ($stats as $y) {
  875. $result[]=$y;
  876. }
  877. }
  878. // for x in [statementize(stat) for stat in t] {
  879. // for y in x: result.append(y)
  880. return $result;
  881. }
  882. /**
  883. * Constructs a RAP RDFNode from URI/Literal/Bnode
  884. * @access private
  885. * @param string $s
  886. * @returns object RDFNode
  887. **/
  888. function toRDFNode($s, $state)
  889. {
  890. $ins = substr($s, 1, -1);
  891. if ($s{0} == '"' || $s{0} == '\'') {
  892. $lang = NULL;
  893. if (count($state)>3) {
  894. for ($i = 3; $i < count($state); $i++) {
  895. if ($state[$i][0]=='@') {
  896. $lang = substr($state[3], 1);
  897. }
  898. if (substr($state[$i],0,2) == '^^') {
  899. $dtype = substr($state[$i],2);
  900. if ($dtype[0]=='<') {
  901. $dtype = substr($dtype,1,-1);
  902. }
  903. }
  904. }
  905. }
  906. if (UNIC_RDF) {
  907. $ins = $this->str2unicode_nfc($ins);
  908. }
  909. $new_Literal = new Literal($ins, $lang);
  910. if (isset($dtype)) {
  911. $new_Literal->setDatatype($dtype);
  912. }
  913. return $new_Literal;
  914. } else if (is_int($s)) {
  915. $value = new Literal($s);
  916. $value->setDatatype(XML_SCHEMA . 'integer');
  917. return $value;
  918. } else if (is_float($s)) {
  919. $value = new Literal($s);
  920. $value->setDatatype(XML_SCHEMA . 'double');
  921. return $value;
  922. } else if ($s == '@true') {
  923. $value = new Literal(true);
  924. $value->setDatatype(XML_SCHEMA . 'boolean');
  925. return $value;
  926. } else if ($s == '@false') {
  927. $value = new Literal(false);
  928. $value->setDatatype(XML_SCHEMA . 'boolean');
  929. return $value;
  930. }
  931. if (strstr($s, '_' . BNODE_PREFIX)) {
  932. if (($this->FixBnodes) || (!array_search($s,$this->bNodeMap))) {
  933. return new BlankNode($ins);
  934. } else {
  935. return new BlankNode(
  936. trim(
  937. substr(
  938. array_search($s, $this->bNodeMap),
  939. 2
  940. )
  941. )
  942. );
  943. };
  944. }
  945. return new Resource($ins);
  946. }//function toRDFNode($s, $state)
  947. } //end: N3Parser
  948. ?>