PageRenderTime 68ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/rdfapi-php/api/syntax/N3Parser.php

https://github.com/komagata/plnet
PHP | 1052 lines | 844 code | 78 blank | 130 comment | 42 complexity | d8a23eace8180640385702a1ff86ad33 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. // ----------------------------------------------------------------------------------
  3. // Class: N3Parser
  4. // ----------------------------------------------------------------------------------
  5. /**
  6. * PHP Notation3 Parser
  7. *
  8. * This parser can parse a subset of n3, reporting triples to a callback function
  9. * or constructing a RAP Model ( http://www.wiwiss.fu-berlin.de/suhl/bizer/rdfapi )
  10. *
  11. * Supported N3 features:
  12. * <ul>
  13. * <li>Standard things, repeated triples ( ; and , ), blank nodes using [ ], self-reference ('<>')</li>
  14. * <li>@prefix mappings</li>
  15. * <li>= maps to owl#sameAs</li>
  16. * <li>a maps to rdf-syntax-ns#type</li>
  17. * <li>Literal datytype- and xmlLanguageTag support
  18. * </ul>
  19. * Un-supported N3 Features include:
  20. * <ul>
  21. * <li>Reification using { }</li>
  22. * <li>. and ^ operators for tree traversal</li>
  23. * <li>Any log operators, like log:forAll etc.</li>
  24. * </ul>
  25. *
  26. * This parser is based on n3.py from Epp released 2nd March, 2002.
  27. * by Sean B. Palmer
  28. * ( http://infomesh.net/2002/eep/20020302-013802/n3.py )
  29. *
  30. * This parser is released under the GNU GPL license.
  31. * ( http://www.gnu.org/licenses/gpl.txt )
  32. *
  33. *
  34. *
  35. * @author Sean B. Palmer <sean@mysterylights.com>, Gunnar AA. Grimnes <ggrimnes@csd.abdn.ac.uk>, Daniel Westphal <mail@d-westphal.de>
  36. * @version $Id: N3Parser.php,v 1.25 2006/05/15 05:24:36 tgauss Exp $
  37. * @package syntax
  38. * @access public
  39. **/
  40. class N3Parser extends Object {
  41. /* ==================== Variables ==================== */
  42. var $Tokens;
  43. var $bNode;
  44. var $RDF_NS, $DAML_NS, $OWL_NS;
  45. var $debug;
  46. var $parseError;
  47. var $parsedNamespaces = array();
  48. /* ==================== Public Methods ==================== */
  49. /**
  50. * Constructor
  51. * @access public
  52. **/
  53. function N3Parser() {
  54. //Regular expressions:
  55. $Name = '[A-Za-z0-9_@\.]+[^\.,;\[\] ]*';
  56. $URI = '<[^> ]*>';
  57. $bNode = '_:'.$Name;
  58. $Univar = '\?'.$Name;
  59. $QName = '(?:[A-Za-z][A-Za-z0-9_@\.]*)?:'.$Name;
  60. $Literal = '"(\\\"|[^"])*"'; # '"(?:\\"|[^"])*"'
  61. // $Literal = '"[^"\\\\]*(?:\\.\\[^"\\]*)*"'; # '"(?:\\"|[^"])*"'
  62. $LangTag = '@[A-Za-z\-]*[^ \^\.\;\,]';
  63. $Datatype = '(\^\^)[^ ,\.;)]+';
  64. $Datatype_URI = '(\^\^)'.$URI;
  65. // $LLiteral = '"""[^"\\\\]*(?:(?:.|"(?!""))[^"\\\\]*)*"""';
  66. $LLiteral = '"""[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""';
  67. // '"""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
  68. $Comment = '# .*$';
  69. $Prefix = '(?:[A-Za-z][A-Za-z0-9_]*)?:';
  70. $PrefixDecl = '@prefix';
  71. $WS = '[ \t]';
  72. $this->RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; # for 'a' keyword
  73. $this->DAML_NS = 'http://www.daml.org/2001/03/daml+oil#'; # for '=' keyword
  74. $this->OWL_NS = 'http://www.w3.org/2002/07/owl#';
  75. // $t = array( $LLiteral, $URI); //, $Literal, $PrefixDecl, $QName, $bNode, $Prefix,
  76. // $Univar, 'a', '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.', $WS, $Comment);
  77. $t = array( $Datatype_URI,$Datatype,$LLiteral, $URI, $Literal, $PrefixDecl, $QName, $bNode, $Prefix, $Univar, 'a','=', '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.', $WS, $Comment,$LangTag);
  78. $this->Tokens="/(".join($t,"|").")/m";
  79. $this->bNode=0;
  80. $this->debug=0;
  81. $this->bNodeMap = array();
  82. $this->FixBnodes = FIX_BLANKNODES;
  83. $this->parseError=false;
  84. }
  85. /**
  86. * Sets, if BlankNode labels should be replaced by the generic label from the constants.php file
  87. * default is "false" -> the used label in n3 is parsed to the model
  88. * @param boolean
  89. * @access public
  90. **/
  91. function setFixBnodes($set) {
  92. if (($set===true) OR ($set===false)) $this->FixBnodes = $set;
  93. }
  94. /**
  95. * This parses a N3 string and prints out the triples
  96. * @param string $s
  97. * @access public
  98. **/
  99. function parse($s) {
  100. // """Get a string, tokenize, create list, convert to Eep store."""
  101. $stat=$this->n3tolist($s);
  102. foreach ( $stat as $t) {
  103. if (count($t)>3) {
  104. $object=$t[2];
  105. for ($i = 3; $i < 5; $i++){
  106. if ($t[$i][0]=='@')$object.=$t[$i];
  107. if (substr($t[$i],0,2)=='^^')$object.=$t[$i];
  108. };
  109. } else {$object=$t[2];};
  110. print '('.$t[0].', '.$t[1].', '.$object.")\n";
  111. }
  112. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  113. // for t in n3tolist(s)]
  114. }
  115. /**
  116. * This parses a N3 string and calls func($subject, $predicate, $object) with each trioke
  117. * @param string $s
  118. * @param string $func
  119. * @access public
  120. **/
  121. function uparse($s,$func) {
  122. // """Get a string, tokenize, create list, convert to Eep store."""
  123. $stat=$this->n3tolist($s);
  124. foreach ( $stat as $t) {
  125. if (count($t)>3) {
  126. $object=$t[2];
  127. for ($i = 3; $i < 5; $i++){
  128. if ($t[$i][0]=='@')$object.=$t[$i];
  129. if (substr($t[$i],0,2)=='^^')$object.=$t[$i];
  130. };
  131. } else {$object=$t[2];};
  132. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  133. $func($t[0],$t[1],$object);
  134. }
  135. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  136. // for t in n3tolist(s)]
  137. }
  138. /**
  139. * This parses a N3 string and returns a memmodel
  140. * @param string $s
  141. * @access public
  142. * @return object Model
  143. **/
  144. function parse2model($s,$model = false) {
  145. if($model == false){
  146. $m=new MemModel();
  147. }else{
  148. $m=$model;
  149. }
  150. // """Get a string, tokenize, create list, convert to Eep store."""
  151. $stat=$this->n3tolist($s);
  152. foreach ( $stat as $t) {
  153. $s=$this->toRDFNode($t[0],$t);
  154. $p=$this->toRDFNode($t[1],$t);
  155. $o=$this->toRDFNode($t[2],$t);
  156. $new_statement= new Statement($s,$p,$o);
  157. $m->add($new_statement);
  158. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  159. }
  160. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  161. // for t in n3tolist(s)]
  162. $m->addParsedNamespaces($this->parsedNamespaces);
  163. return $m;
  164. }
  165. /**
  166. * Generate a new MemModel from an URI or file.
  167. *
  168. * @access public
  169. * @param $path
  170. * @throws PhpError
  171. * @return object MemModel
  172. */
  173. function & generateModel($path,$dummy=false,$model=false) {
  174. $handle = fopen($path,'r') or die("N3 Parser: Could not open File: '$path' - Stopped parsing.");
  175. $done=false;
  176. $input="";
  177. while(!$done)
  178. {
  179. $input .= fread( $handle, 512 );
  180. $done = feof($handle);
  181. };
  182. fclose($handle);
  183. return $this->parse2model($input,$model);
  184. }
  185. /* ==================== Private Methods from here ==================== */
  186. // General list processing functions
  187. /**
  188. * Returns FALSE if argument is a whitespace character
  189. * @access private
  190. * @param string $s
  191. **/
  192. function isWS($s) {
  193. return !preg_match('/^(#.*|\s*)$/', $s);
  194. }
  195. /**
  196. * Returns true if the string is not a comment
  197. * @access private
  198. * @param string $s
  199. * @returns boolean
  200. **/
  201. function notComment($s) {
  202. if ($s=="") return false;
  203. $N3Comment = '^[ \t]*\#';
  204. if (ereg($N3Comment,$s)) return false;
  205. else return true;
  206. }
  207. /**
  208. * Removes all whitespace tokens from list
  209. * @access private
  210. * @param array $list
  211. **/
  212. function filterWs($list) {
  213. // var_dump($list);
  214. // """Filter whitespace from a list."""
  215. return array_filter($list, array($this,"isWS"));
  216. }
  217. /**
  218. * converts a string to its unicode NFC form (e.g. \uHHHH or \UHHHHHHHH).
  219. *
  220. * @param String $str
  221. * @return String
  222. * @access private
  223. *
  224. */
  225. function str2unicode_nfc($str=""){
  226. $result="";
  227. /* try to detect encoding */
  228. $tmp=str_replace("?", "", $str);
  229. if(strpos(utf8_decode($tmp), "?")===false){
  230. $str=utf8_decode($str);
  231. }
  232. for($i=0,$i_max=strlen($str);$i<$i_max;$i++){
  233. $nr=0;/* unicode dec nr */
  234. /* char */
  235. $char=$str[$i];
  236. /* utf8 binary */
  237. $utf8_char=utf8_encode($char);
  238. $bytes=strlen($utf8_char);
  239. if($bytes==1){
  240. /* 0####### (0-127) */
  241. $nr=ord($utf8_char);
  242. }
  243. elseif($bytes==2){
  244. /* 110##### 10###### = 192+x 128+x */
  245. $nr=((ord($utf8_char[0])-192)*64) + (ord($utf8_char[1])-128);
  246. }
  247. elseif($bytes==3){
  248. /* 1110#### 10###### 10###### = 224+x 128+x 128+x */
  249. $nr=((ord($utf8_char[0])-224)*4096) + ((ord($utf8_char[1])-128)*64) + (ord($utf8_char[2])-128);
  250. }
  251. elseif($bytes==4){
  252. /* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */
  253. $nr=((ord($utf8_char[0])-240)*262144) + ((ord($utf8_char[1])-128)*4096) + ((ord($utf8_char[2])-128)*64) + (ord($utf8_char[3])-128);
  254. }
  255. /* result (see http://www.w3.org/TR/rdf-testcases/#ntrip_strings) */
  256. if($nr<9){/* #x0-#x8 (0-8) */
  257. $result.="\\u".sprintf("%04X",$nr);
  258. }
  259. elseif($nr==9){/* #x9 (9) */
  260. $result.='\t';
  261. }
  262. elseif($nr==10){/* #xA (10) */
  263. $result.='\n';
  264. }
  265. elseif($nr<13){/* #xB-#xC (11-12) */
  266. $result.="\\u".sprintf("%04X",$nr);
  267. }
  268. elseif($nr==13){/* #xD (13) */
  269. $result.='\t';
  270. }
  271. elseif($nr<32){/* #xE-#x1F (14-31) */
  272. $result.="\\u".sprintf("%04X",$nr);
  273. }
  274. elseif($nr<34){/* #x20-#x21 (32-33) */
  275. $result.=$char;
  276. }
  277. elseif($nr==34){/* #x22 (34) */
  278. $result.='\"';
  279. }
  280. elseif($nr<92){/* #x23-#x5B (35-91) */
  281. $result.=$char;
  282. }
  283. elseif($nr==92){/* #x5C (92) */
  284. $result.='\\';
  285. }
  286. elseif($nr<127){/* #x5D-#x7E (93-126) */
  287. $result.=$char;
  288. }
  289. elseif($nr<65536){/* #x7F-#xFFFF (128-65535) */
  290. $result.="\\u".sprintf("%04X",$nr);
  291. }
  292. elseif($nr<1114112){/* #x10000-#x10FFFF (65536-1114111) */
  293. $result.="\\U".sprintf("%08X",$nr);
  294. }
  295. else{
  296. /* other chars are not defined => ignore */
  297. }
  298. }
  299. return $result;
  300. }
  301. /**
  302. * Gets a slice of an array.
  303. * Returns the wanted slice, as well as the remainder of the array.
  304. * e.g. getSpan(['p', 'q', 'r'], 1, 2) gives (['q'], ['p', 'r'])
  305. * @return array
  306. * @access private
  307. * @param array $list
  308. * @param integer $start
  309. * @param integer $end
  310. **/
  311. function getSpan($list, $start, $end) {
  312. $pre=array_slice($list, 0, $start);
  313. $post=array_slice($list, $end);
  314. return array(array_slice($list, $start,$end-$start),$this->array_concat($pre,$post));
  315. }
  316. /**
  317. * Concatenates two arrays
  318. * @param array $a
  319. * @param array $b
  320. * @returns array
  321. * @access private
  322. **/
  323. function array_concat($a, $b) {
  324. array_splice($a,count($a),0,$b);
  325. return $a;
  326. }
  327. /**
  328. * Returns an array with all indexes where item appears in list
  329. * @param array $list
  330. * @param string $item
  331. * @returns array
  332. * @access private
  333. **/
  334. function posns($list, $item) {
  335. $res=array();
  336. $i=0;
  337. foreach ( $list as $k=>$v) {
  338. if ($v === $item ) $res[]=$i;
  339. $i++;
  340. }
  341. $res[]=$i;
  342. return $res;
  343. }
  344. /* More N3 specific functions */
  345. /**
  346. * Returns a list of tokens
  347. * @param string $s
  348. * @returns array
  349. * @access private
  350. **/
  351. function toke($s) {
  352. // print "$s\n";
  353. // """Notation3 tokenizer. Takes in a string, returns a raw token list."""
  354. if (strlen($s) == 0) die('Document has no content!');
  355. $s=str_replace("\r\n","\n",$s);
  356. $s=str_replace("\r","\n",$s);
  357. //$lines=explode("\n",$s);
  358. //$reallines=array_filter($lines, array($this, "notComment"));
  359. // print "LINES: ".join($reallines, " ")." :LINES\n";
  360. //array_walk($reallines, array($this, "trimLine"));
  361. //$res=array();
  362. // foreach ($reallines as $l) {
  363. //preg_match_all($this->Tokens, $l, $newres);
  364. //$res=$this->array_concat($res,$newres[0]);
  365. //}
  366. $res=array();
  367. preg_match_all($this->Tokens, $s, $newres);
  368. $res=$this->array_concat($res, array_map('trim', $newres[0]));
  369. return $res;
  370. }
  371. /**
  372. * Returns a list with the elements between start and end as one quoted string
  373. * e.g. listify(["a","b","c","d"],1,2) => ["a","b c", "d"]
  374. * @param array $list
  375. * @param integer $start
  376. * @param integer $end
  377. * @returns array
  378. * @access private
  379. **/
  380. function listify($list, $start, $end) {
  381. //Re-form a list, merge elements start->end into one quoted element
  382. //Start and end are offsets...
  383. $l=$end-$start;
  384. $s=array_slice($list, 0, $start);
  385. $m=array_slice($list, $start,$l);
  386. $e=array_slice($list, $end);
  387. // array_push($s,"\"".join($m," ")."\"");
  388. array_push($s,$m);
  389. return $this->array_concat($s,$e);
  390. }
  391. /**
  392. * Returns an array with prefixes=>namespace mappings
  393. * @param array $list
  394. * @access private
  395. * @returns array
  396. **/
  397. function getPrefixes($list) {
  398. $prefixes=array();
  399. $ns=1;
  400. $name=2;
  401. foreach ($list as $l) {
  402. if ($l=='@prefix') {
  403. // while '@prefix' in list {
  404. $pos=current($list);
  405. //pos = list.index('@prefix')
  406. $r = $this->getSpan($list, $pos, ($pos+4)); # processes the prefix tokens
  407. $binding=$r[0];
  408. $list=$r[1];
  409. $prefixes[$binding[$ns]] = substr($binding[$name],1,-1);
  410. $this->parsedNamespaces[substr($binding[$name],1,-1)] = substr($binding[$ns],0,-1);
  411. }
  412. }
  413. if (count($prefixes)<1) $list= array_slice($list,0);
  414. return array($prefixes, $list);
  415. }
  416. /**
  417. * Callback function for replacing "a" elements with the right RDF uri.
  418. * @param string $l
  419. * @access private
  420. **/
  421. function replace_a_type(&$l,$p) {
  422. if ($l=='a') $l='<'.$this->RDF_NS.'type>';
  423. }
  424. /**
  425. * Callback function for replacing "=" elements with the right DAML+OIL uri.
  426. * @param string $l
  427. * @access private
  428. **/
  429. function replace_equal(&$l,$p) {
  430. if ($l=='=') $l='<'.$this->OWL_NS.'sameAs>';
  431. }
  432. /**
  433. * Callback function for replacing "this" elements with the right RDF uri.
  434. * @param string $l
  435. * @access private
  436. **/
  437. function replace_this($l,$p) {
  438. if ($l=='this') $l='<urn:urn-n:this>';
  439. }
  440. /**
  441. * Applies stuff :)
  442. * Expands namespace prefixes etc.
  443. * @param array $prefixes
  444. * @param array $list
  445. * @returns $list
  446. * @access private
  447. **/
  448. function applyStuff($prefixes, $list) {
  449. array_walk($list, array($this, 'replace_a_type'));
  450. array_walk($list, array($this, 'replace_equal'));
  451. array_walk($list, array($this, 'replace_this'));
  452. for ($i=0;$i<count($list);$i++) {
  453. // for i in range(len(list)) {
  454. // if (!strstr('<_"?.;,{}[]()',$list[$i]{0})) {
  455. // if a <> resource occours, change it to the parsed filename or local URI + timestamp
  456. if ($list[$i]=='<>') {
  457. if (!isset($path)) {
  458. if (!isset($_SERVER['SERVER_ADDR'])) $_SERVER['SERVER_ADDR']='localhost';
  459. if (!isset($_SERVER['REQUEST_URI'])) $_SERVER['REQUEST_URI']='/rdfapi-php';
  460. $list[$i]='<http://'.$_SERVER['SERVER_ADDR'].$_SERVER['REQUEST_URI'].'#generate_timestamp_'.time().'>';
  461. }else {$list[$i]='<'.$path.'>';};
  462. };
  463. if ((!strstr('<_"?.;,{}[]()@',$list[$i]{0}))AND (substr($list[$i],0,3)!='^^<')) {
  464. $_r= explode(":",$list[$i]);
  465. $ns=$_r[0].':';
  466. $name=$_r[1];
  467. if (isset($prefixes[$ns])) $list[$i] = '<'.$prefixes[$ns].$name.'>';
  468. else if (isset($prefixes[substr($ns,2)])) $list[$i] = '^^'.$prefixes[substr($ns,2)].$name.'';
  469. else {
  470. #die('Prefix not declared:'.$ns);
  471. $this->parseError=true;
  472. trigger_error('Prefix not declared: '.$ns, E_USER_ERROR);
  473. break;
  474. }
  475. } else {
  476. if ($list[$i]{0} == '"') { // Congratulations - it's a literal!
  477. if (substr($list[$i],0,3) == '"""') {
  478. if (substr($list[$i],-3,3) == '"""') { // A big literal...
  479. $lit = substr($list[$i],3,-3);
  480. // print "++$lit++";
  481. $lit=str_replace('\n', '\\n',$lit);
  482. $lit=ereg_replace("[^\\]\"", "\\\"", $lit);
  483. $list[$i] = '"'.$lit.'"';
  484. }
  485. else { die ('Incorrect string formatting: '.substr($list[$i],-3,3)); }
  486. } else {
  487. if (strstr($list[$i],"\n")) die('Newline in literal: '+$list[$i]);
  488. }
  489. }
  490. }
  491. if (substr($list[$i],0,2)=='^^') {
  492. if ($list[$i][2]!='<'){$list[$i]='^^<'.substr($list[$i],2).'>';};
  493. };
  494. }
  495. return $list;
  496. }
  497. /**
  498. * Returns an array of triples extracted from the list of n3 tokens
  499. * @param array $list
  500. * @returns array
  501. * @access private
  502. **/
  503. function getStatements($list) {
  504. $statements = array();
  505. while (in_array('.', $list)) {
  506. // for($i=0;$i<count($list); $i++) {
  507. // if ($list[$i]==".") {
  508. // while '.' in list {
  509. $pos=array_search('.',$list);
  510. $r=$this->getSpan($list, 0, $pos+1);
  511. $statement=$r[0];
  512. $list = $r[1];
  513. array_pop($statement);
  514. $statements[]=$statement;
  515. }
  516. return $statements;
  517. }
  518. /**
  519. * Gets a list of triples with same subject
  520. * e.g. :Gunnar :firstname "Gunnar" ; :lastname "Grimnes.
  521. * @param array $list
  522. * @returns array
  523. * @acces private
  524. **/
  525. function getPovs($list) {
  526. $povs = array();
  527. while (in_array(';', $list)) {
  528. $r=$this->posns($list,';');
  529. $pos=array_slice($r,0,2);
  530. $r = $this->getSpan($list, $pos[0], $pos[1]);
  531. $pov=$r[0];
  532. $list=$r[1];
  533. // skip lone semicolons, e.g. "<a> <b> <c> ; ."
  534. if (count($pov) == 1) continue;
  535. $povs[]=array_slice($pov,1);
  536. }
  537. return array($list, $povs);
  538. }
  539. /**
  540. * Gets a list of triples with same predicate
  541. * e.g. :Gunnar :likes "Cheese", "Wine".
  542. * @access private
  543. * @param array $list
  544. * @returns array
  545. **/
  546. function getObjs($list) {
  547. $objs = array();
  548. while (in_array(",",$list)) {
  549. $pos=array_search(",",$list);
  550. // for($i=0;$i<count($list); $i++) {
  551. // if ($list[$i]==",") {
  552. // while ',' in list {
  553. $get_array_fields=2;
  554. if (isset ($list[$pos+2])) {
  555. if (@$list[$pos+2][0]=='@') $get_array_fields++;
  556. if (@$list[$pos+2][0]=='^') $get_array_fields++;
  557. };
  558. if (isset ($list[$pos+3])) { if (@$list[$pos+3][0]=='^') $get_array_fields++;};
  559. $r=$this->getSpan($list, $pos, ($pos+$get_array_fields));
  560. $obj=$r[0];
  561. if (!isset($obj[2])) $obj[2]=' ';
  562. if (!isset($obj[3])) $obj[3]=' ';
  563. $list=$r[1];
  564. $objs[]=$obj;
  565. }
  566. return array($list, $objs);
  567. }
  568. /**
  569. * Does the real work, returns a list of subject, predicate, object triples.
  570. * @param array $list
  571. * @returns array
  572. * @access private
  573. **/
  574. function statementize($list) {
  575. if (count($list) == 1 && preg_match("/_".BNODE_PREFIX."[0-9]+_/",$list[0])) {
  576. if ($this->debug) print "Ignored bNode exists statement. $list\n";
  577. return array();
  578. }
  579. if (count($list) == 3) return array($list);
  580. if (count($list) < 3) die("Error: statement too short!");
  581. //Get all ;
  582. $r=$this->getPovs($list);
  583. $spo=$r[0];
  584. $po=$r[1];
  585. $all=array();
  586. // (spo, po), all = getPovs(list), []
  587. $subject = $spo[0];
  588. foreach ($po as $pop) {
  589. // for pop in po {
  590. $r=$this->getObjs($pop);
  591. $myPo=$r[0];
  592. $obj=$r[1];
  593. //myPo, obj = getObjs(pop)
  594. if (!isset($myPo[2])) $myPo[2]=' ';
  595. if (!isset($myPo[3])) $myPo[3]=' ';
  596. $predicate = $myPo[0];
  597. $all[]=array($subject,$predicate,$myPo[1],$myPo[2],$myPo[3]);
  598. // all.append([subject, predicate, myPo[1]])
  599. foreach ($obj as $o) $all[]=array($subject,$predicate, $o[1],$o[2],$o[3]);
  600. // for x in obj: all.append([subject, predicate, x])
  601. }
  602. $r = $this->getObjs($spo);
  603. $spo=$r[0];
  604. $objs=$r[1];
  605. //spo, objs = getObjs(spo)
  606. $subject=$spo[0];
  607. $predicate=$spo[1];
  608. if(!isset($spo[3])) $spo[3]=' ';
  609. if(!isset($spo[4])) $spo[4]=' ';
  610. $all[]=array($subject, $predicate, $spo[2],$spo[3],$spo[4]);
  611. foreach ($objs as $obj) $all[]=array($subject, $predicate, $obj[1],$obj[2],$obj[3]);
  612. return $all;
  613. }
  614. /**
  615. * Makes lists of elements in list into a seperate array element.
  616. * e.g. doLists(["a","b","[","c","]","d"], "[","]")=> ["a","b", ["c"], "d"]
  617. * @param array $list
  618. * @param string $schar
  619. * @param string $echar
  620. * @returns array
  621. * @access private
  622. **/
  623. function doLists($list, $schar, $echar) {
  624. while (in_array($schar, $list)) {
  625. // while schar in list {
  626. $ndict=array();
  627. $nestingLevel=0;
  628. $biggest=0;
  629. for ($i=0;$i<count($list);$i++) {
  630. if ($list[$i] == $schar) {
  631. $nestingLevel += 1;
  632. if (!in_array($nestingLevel, array_keys($ndict))) {
  633. $ndict[$nestingLevel] = array(array($i));
  634. } else {
  635. $ndict[$nestingLevel][]=array($i);
  636. }
  637. }
  638. if ($list[$i] == $echar) {
  639. if (!in_array($nestingLevel, array_keys($ndict))) {
  640. $ndict[$nestingLevel]=array(array($i));
  641. } else {
  642. $ndict[$nestingLevel][count($ndict[$nestingLevel])-1][]=$i;
  643. $nestingLevel-= 1;
  644. # elif type(list[i]) == type([]) {
  645. # list[i] = doLists(list[i], schar, echar)
  646. }
  647. }
  648. }
  649. foreach (array_keys($ndict) as $key)
  650. if ($key > $biggest) $biggest = $key;
  651. $tol = $ndict[$biggest][0];
  652. $list = $this->listify($list, $tol[0], ($tol[1]+1));
  653. }
  654. return $list;
  655. }
  656. /**
  657. * Apply doLists for all different types of list.
  658. * @param array
  659. * @returns array
  660. * @access private
  661. **/
  662. function listStuff($list) {
  663. # y, z = zip(['[', ']'], ['{', '}'], ['(', ')'])
  664. # return map(doLists, [list, list, list], y, z).pop()
  665. $list = $this->doLists($list, '[', ']');
  666. $list = $this->doLists($list, '{', '}');
  667. return $this->doLists($list, '(', ')');
  668. }
  669. /**
  670. * Generates a new node id.
  671. * @access private
  672. * @returns string
  673. **/
  674. function bnodeID() {
  675. $this->bNode++;
  676. return "_".BNODE_PREFIX.$this->bNode."_";
  677. }
  678. /**
  679. * This makes bNodes out of variables like _:a etc.
  680. * @access private
  681. * @param array $list
  682. * @returns array
  683. **/
  684. function fixAnon($list) {
  685. // $map=array();
  686. for($i=0;$i<count($list);$i++) {
  687. $l=$list[$i];
  688. if (substr($l,0,2)=="_:") {
  689. if (!isset($this->bNodeMap[$l])) {
  690. $a=$this->bnodeID();
  691. $this->bNodeMap[$l]=$a;
  692. } else $a=$this->bNodeMap[$l];
  693. $list[$i]=$a;
  694. }
  695. }
  696. return $list;
  697. }
  698. /**
  699. * This makes [ ] lists into bnodes.
  700. * @access private
  701. * @param array $list
  702. * @return array
  703. **/
  704. function expandLists($list) {
  705. for($i=0;$i<count($list);$i++) {
  706. if (is_array($list[$i])) {
  707. if ( $list[$i][0]=='[' ) {
  708. $bnode=$this->bnodeID();
  709. $prop=$list[$i];
  710. $list[$i]=$bnode;
  711. $list[]=$bnode;
  712. $list=$this->array_concat($list, array_slice($prop,1,-1));
  713. $list[]='.';
  714. }elseif($list[$i][0]=='(') {
  715. $rdfNil = '<'. RDF_NAMESPACE_URI . RDF_NIL .'>';
  716. $rdfFirst = '<'. RDF_NAMESPACE_URI . RDF_FIRST .'>';
  717. $rdfRest = '<'. RDF_NAMESPACE_URI . RDF_REST .'>';
  718. // local copy of list without "(" and ")"
  719. $t_list = array_slice($list[$i], 1, -1);
  720. //prepare bnodes
  721. $fromBnode = $this->bnodeID();
  722. $toBnode = $this->bnodeID();
  723. //link first bnode into graph
  724. $list[$i] = $fromBnode;
  725. $count = count($t_list);
  726. //loop through list, convert to RDF linked list
  727. for ($idx = 0; $idx < $count; $idx++){
  728. // set rdf:first
  729. $list[] = $fromBnode;
  730. $list[] = $rdfFirst;
  731. $list[] = $t_list[$idx];
  732. $list[] = '.';
  733. // set rdf:rest (nil or next bnode)
  734. if ($idx == $count - 1) {
  735. $list[] = $fromBnode;
  736. $list[] = $rdfRest;
  737. $list[] = $rdfNil;
  738. $list[] = '.';
  739. }
  740. else {
  741. $list[] = $fromBnode;
  742. $list[] = $rdfRest;
  743. $list[] = $toBnode;
  744. $list[] = '.';
  745. $fromBnode = $toBnode;
  746. $toBnode = $this->bnodeID();
  747. }
  748. }
  749. }
  750. else {
  751. die('Only [ ] and () lists are supported!');
  752. }
  753. }
  754. }
  755. return $list;
  756. }
  757. /**
  758. * Main work-horse function. This converts a N3 string to a list of statements
  759. * @param string $s
  760. * @returns array
  761. * @access private
  762. **/
  763. function n3tolist($s) {
  764. // """Convert an N3 string into a list of triples as strings."""
  765. $result = array();
  766. $t = $this->filterWs($this->toke($s)); # tokenize the stream, and filter whitespace tokens
  767. if ($this->debug) {
  768. print "Filter WS:\n";
  769. var_dump($t);
  770. }
  771. $r=$this->getPrefixes($t); # get the prefix directives, and add to a dict
  772. $prefixes=$r[0];
  773. $t=$r[1];
  774. if ($this->debug) {
  775. print "Prefixes:\n";
  776. var_dump($prefixes);
  777. print "***\n";
  778. var_dump($t);
  779. }
  780. $t=$this->applyStuff($prefixes, $t);#apply prefixes, keywords, and string formatting
  781. if ($this->debug) {
  782. print "Stuff applied:\n";
  783. var_dump($t);
  784. }
  785. $t=$this->fixAnon($t); # fix _:a anons
  786. if ($this->debug) {
  787. print "Fix anon:\n";
  788. var_dump($t);
  789. }
  790. $t = $this->listStuff($t); # apply list stuff: todo
  791. if ($this->debug) {
  792. print "Lists done:\n";
  793. var_dump($t);
  794. }
  795. $t=$this->expandLists($t);
  796. if ($this->debug) {
  797. print "Lists applied:\n";
  798. var_dump($t);
  799. }
  800. $t = $this->getStatements($t); # get all of the "statements" from the stream
  801. foreach ($t as $stat) {
  802. $stats=$this->statementize($stat);
  803. foreach ($stats as $y) {
  804. $result[]=$y;
  805. }
  806. }
  807. // for x in [statementize(stat) for stat in t] {
  808. // for y in x: result.append(y)
  809. return $result;
  810. }
  811. /**
  812. * Constructs a RAP RDFNode from URI/Literal/Bnode
  813. * @access private
  814. * @param string $s
  815. * @returns object RDFNode
  816. **/
  817. function toRDFNode($s,$state) {
  818. $ins=substr($s,1,-1);
  819. if ($s{0}=="\"") {
  820. $lang=NULL;
  821. if (count($state)>3) {
  822. for ($i = 3; $i < count($state); $i++){
  823. if ($state[$i][0]=='@')$lang=substr($state[3],1);
  824. if (substr($state[$i],0,2)=='^^'){
  825. $dtype=substr($state[$i],2);
  826. if ($dtype[0]=='<') $dtype= substr($dtype,1,-1);
  827. };
  828. };
  829. };
  830. if(UNIC_RDF){
  831. $ins=$this->str2unicode_nfc($ins);
  832. }
  833. $new_Literal=new Literal($ins,$lang);
  834. if (isset($dtype)) $new_Literal->setDatatype($dtype);
  835. return $new_Literal;
  836. };
  837. if (strstr($s,'_'.BNODE_PREFIX)) {
  838. if (($this->FixBnodes) OR (!array_search($s,$this->bNodeMap))) {
  839. return new BlankNode($ins);
  840. } else {return new BlankNode(trim(substr(array_search($s,$this->bNodeMap),2)));
  841. };
  842. }
  843. return new Resource($ins);
  844. }
  845. } //end: N3Parser
  846. ?>