PageRenderTime 62ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/rdfapi-php/api/rdql/RdqlParser.php

https://github.com/komagata/plnet
PHP | 1063 lines | 884 code | 52 blank | 127 comment | 82 complexity | 7bdf33010728df87eda48e5acbdd6df1 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. // ----------------------------------------------------------------------------------
  3. // Class: RdqlParser
  4. // ----------------------------------------------------------------------------------
  5. /**
  6. * This class contains methods for parsing an Rdql query string into PHP variables.
  7. * The output of the RdqlParser is an array with variables and constraints
  8. * of each query clause (Select, From, Where, And, Using).
  9. * To perform an RDQL query this array has to be passed to the RdqlEngine.
  10. *
  11. * @version $Id: RdqlParser.php,v 1.12 2006/06/08 06:25:14 tgauss Exp $
  12. * @author Radoslaw Oldakowski <radol@gmx.de>
  13. *
  14. * @package rdql
  15. * @access public
  16. */
  17. Class RdqlParser extends Object{
  18. /**
  19. * Parsed query variables and constraints.
  20. * { } are only used within the parser class and are not returned as parsed query.
  21. * ( [] stands for an integer index - 0..N )
  22. *
  23. * @var array ['selectVars'][] = ?VARNAME
  24. * ['sources'][]{['value']} = URI | QName
  25. * {['is_qname'] = boolean}
  26. * ['patterns'][]['subject']['value'] = VARorURI
  27. * {['is_qname'] = boolean}
  28. * ['predicate']['value'] = VARorURI
  29. * {['is_qname'] = boolean}
  30. * ['object']['value'] = VARorURIorLiterl
  31. * {['is_qname'] = boolean}
  32. * ['is_literal'] = boolean
  33. * ['l_lang'] = string
  34. * ['l_dtype'] = string
  35. * {['l_dtype_is_qname'] = boolean}
  36. * ['filters'][]['string'] = string
  37. * ['evalFilterStr'] = string
  38. * ['reqexEqExprs'][]['var'] = ?VARNAME
  39. * ['operator'] = (eq | ne)
  40. * ['regex'] = string
  41. * ['strEqExprs'][]['var'] = ?VARNAME
  42. * ['operator'] = (eq | ne)
  43. * ['value'] = string
  44. * ['value_type'] = ('variable' | 'URI' | 'QName' | 'Literal')
  45. * ['value_lang'] = string
  46. * ['value_dtype'] = string
  47. * {['value_dtype_is_qname'] = boolean}
  48. * ['numExpr']['vars'][] = ?VARNAME
  49. * {['ns'][PREFIX] = NAMESPACE}
  50. * @access private
  51. */
  52. var $parsedQuery;
  53. /**
  54. * Query string divided into a sequence of tokens.
  55. * A token is either: ' ' or "\n" or "\r" or "\t" or ',' or '(' or ')'
  56. * or a string containing any characters except from the above.
  57. *
  58. * @var array
  59. * @access private
  60. */
  61. var $tokens;
  62. /**
  63. * Parse the given RDQL query string and return an array with query variables and constraints.
  64. *
  65. * @param string $queryString
  66. * @return array $this->parsedQuery
  67. * @access public
  68. */
  69. function & parseQuery($queryString) {
  70. $cleanQueryString = $this->removeComments($queryString);
  71. $this->tokenize($cleanQueryString);
  72. $this->startParsing();
  73. if ($this->parsedQuery['selectVars'][0] == '*')
  74. $this->parsedQuery['selectVars'] = $this->findAllQueryVariables();
  75. else
  76. $this->_checkSelectVars();
  77. $this->replaceNamespacePrefixes();
  78. return $this->parsedQuery;
  79. }
  80. /**
  81. * Remove comments from the passed query string.
  82. *
  83. * @param string $query
  84. * @return string
  85. * @throws PHPError
  86. * @access private
  87. */
  88. function removeComments($query) {
  89. $last = strlen($query)-1;
  90. $query .= ' ';
  91. $clean = '';
  92. for ($i=0; $i<=$last; $i++) {
  93. // don't search for comments inside a 'literal'@lang^^dtype or "literal"@lang^^dtype
  94. if ($query{$i} == "'" || $query{$i} == '"') {
  95. $quotMark = $query{$i};
  96. do
  97. $clean .= $query{$i++};
  98. while($i < $last && $query{$i} != $quotMark);
  99. $clean .= $query{$i};
  100. // language
  101. if ($query{$i+1} == '@') {
  102. do{
  103. if ($query{$i+1} == '^' && $query{$i+2} == '^')
  104. break;
  105. $clean .= $query{++$i};
  106. }while ($i < $last && $query{$i} != ' ' && $query{$i} != "\t"
  107. && $query{$i} != "\n" && $query{$i} != "\r");
  108. }
  109. // datatype
  110. if ($query{$i+1} == '^' && $query{$i+2} == '^') {
  111. do
  112. $clean .= $query{++$i};
  113. while ($i < $last && $query{$i} != ' ' && $query{$i} != "\t"
  114. && $query{$i} != "\n" && $query{$i} != "\r" );
  115. }
  116. // don't search for comments inside an <URI> either
  117. }elseif ($query{$i} == '<') {
  118. do{
  119. $clean .= $query{$i++};
  120. }while($i < $last && $query{$i} != '>');
  121. $clean .= $query{$i};
  122. }elseif ($query{$i} == '/') {
  123. // clear: // comment
  124. if ($i < $last && $query{$i+1} == '/') {
  125. while($i < $last && $query{$i} != "\n" && $query{$i} != "\r")
  126. ++$i;
  127. $clean .= ' ';
  128. // clear: /*comment*/
  129. }elseif ($i < $last-2 && $query{$i+1} == '*') {
  130. $i += 2;
  131. while($i < $last && ($query{$i} != '*' || $query{$i+1} != '/'))
  132. ++$i;
  133. if ($i >= $last && ($query{$last-1} != '*' || $query{$last} != '/'))
  134. trigger_error(RDQL_SYN_ERR .": unterminated comment - '*/' missing", E_USER_ERROR);
  135. ++$i;
  136. }else
  137. $clean .= $query{$i};
  138. }else
  139. $clean .= $query{$i};
  140. }
  141. return $clean;
  142. }
  143. /**
  144. * Divide the query string into tokens.
  145. * A token is either: ' ' or "\n" or "\r" or '\t' or ',' or '(' or ')'
  146. * or a string containing any character except from the above.
  147. *
  148. * @param string $queryString
  149. * @access private
  150. */
  151. function tokenize($queryString) {
  152. $queryString = trim($queryString, " \r\n\t");
  153. $specialChars = array (" ", "\t", "\r", "\n", ",", "(", ")");
  154. $len = strlen($queryString);
  155. $this->tokens[0]='';
  156. $n = 0;
  157. for ($i=0; $i<$len; ++$i) {
  158. if (!in_array($queryString{$i}, $specialChars))
  159. $this->tokens[$n] .= $queryString{$i};
  160. else {
  161. if ($this->tokens[$n] != '')
  162. ++$n;
  163. $this->tokens[$n] = $queryString{$i};
  164. $this->tokens[++$n] = '';
  165. }
  166. }
  167. }
  168. /**
  169. * Start parsing of the tokenized query string.
  170. *
  171. * @access private
  172. */
  173. function startParsing() {
  174. $this->parseSelect();
  175. }
  176. /**
  177. * Parse the SELECT clause of an Rdql query.
  178. * When the parsing of the SELECT clause is finished, this method will call
  179. * a suitable method to parse the subsequent clause.
  180. *
  181. * @throws PhpError
  182. * @access private
  183. */
  184. function parseSelect() {
  185. $this->_clearWhiteSpaces();
  186. // Check if the queryString contains a "SELECT" token
  187. if (strcasecmp('SELECT', current($this->tokens)))
  188. trigger_error(RDQL_SEL_ERR ."'" .current($this->tokens)
  189. ."' - SELECT keyword expected", E_USER_ERROR);
  190. unset($this->tokens[key($this->tokens)]);
  191. $this->_clearWhiteSpaces();
  192. // Parse SELECT *
  193. if (current($this->tokens) == '*') {
  194. unset($this->tokens[key($this->tokens)]);
  195. $this->parsedQuery['selectVars'][0] = '*';
  196. $this->_clearWhiteSpaces();
  197. if (strcasecmp('FROM', current($this->tokens))
  198. && strcasecmp('SOURCE', current($this->tokens))
  199. && strcasecmp('WHERE', current($this->tokens)))
  200. trigger_error(RDQL_SYN_ERR .": '" .htmlspecialchars(current($this->tokens))
  201. ."' - SOURCE or WHERE clause expected", E_USER_ERROR);
  202. }
  203. // Parse SELECT ?Var (, ?Var)*
  204. $commaExpected = FALSE;
  205. $comma = FALSE;
  206. while (current($this->tokens) != NULL) {
  207. $k = key($this->tokens);
  208. $token = $this->tokens[$k];
  209. switch ($token) {
  210. case ',': if (!$commaExpected)
  211. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  212. $comma = TRUE;
  213. $commaExpected = FALSE;
  214. break;
  215. case '(':
  216. case ')': trigger_error(RDQL_SEL_ERR ." '$token' - illegal input", E_USER_ERROR);
  217. break;
  218. default :
  219. if (!strcasecmp('FROM', $token) || !strcasecmp('SOURCE', $token)) {
  220. if ($comma)
  221. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  222. unset($this->tokens[$k]);
  223. return $this->parseFrom();
  224. }elseif (!strcasecmp('WHERE', $token) && !$comma) {
  225. if ($comma)
  226. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  227. unset($this->tokens[$k]);
  228. return $this->parseWhere();
  229. }
  230. if ($token{0} == '?') {
  231. $this->parsedQuery['selectVars'][] = $this->_validateVar($token, RDQL_SEL_ERR);
  232. $commaExpected = TRUE;
  233. $comma = FALSE;
  234. }else
  235. trigger_error(RDQL_SEL_ERR ." '$token' - '?' missing", E_USER_ERROR);
  236. }
  237. unset($this->tokens[$k]);
  238. $this->_clearWhiteSpaces();
  239. }
  240. trigger_error(RDQL_SYN_ERR . ': WHERE clause missing', E_USER_ERROR);
  241. }
  242. /**
  243. * Parse the FROM/SOURCES clause of an Rdql query
  244. * When the parsing of this clause is finished, parseWhere() will be called.
  245. *
  246. * @throws PhpError
  247. * @access private
  248. */
  249. function parseFrom() {
  250. $comma = FALSE;
  251. $commaExpected = FALSE;
  252. $i = -1;
  253. while (current($this->tokens) != NULL) {
  254. $this->_clearWhiteSpaces();
  255. if (!strcasecmp('WHERE', current($this->tokens)) && count($this->parsedQuery['sources']) != 0) {
  256. if ($comma)
  257. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  258. unset($this->tokens[key($this->tokens)]);
  259. return $this->parseWhere();
  260. }
  261. if (current($this->tokens) == ',') {
  262. if ($commaExpected) {
  263. $comma = TRUE;
  264. $commaExpected = FALSE;
  265. unset($this->tokens[key($this->tokens)]);
  266. }else
  267. trigger_error(RDQL_SRC_ERR ."',' - unecpected comma", E_USER_ERROR);
  268. }else{
  269. $token = current($this->tokens);
  270. $this->parsedQuery['sources'][++$i]['value'] = $this->_validateURI($token, RDQL_SRC_ERR);
  271. if ($token{0} != '<')
  272. $this->parsedQuery['sources'][$i]['is_qname'] = TRUE;
  273. $commaExpected = TRUE;
  274. $comma = FALSE;
  275. }
  276. }
  277. trigger_error(RDQL_SYN_ERR .': WHERE clause missing', E_USER_ERROR);
  278. }
  279. /**'
  280. * Parse the WHERE clause of an Rdql query.
  281. * When the parsing of the WHERE clause is finished, this method will call
  282. * a suitable method to parse the subsequent clause if provided.
  283. *
  284. * @throws PhpError
  285. * @access private
  286. */
  287. function parseWhere() {
  288. $comma = FALSE;
  289. $commaExpected = FALSE;
  290. $i=0;
  291. do {
  292. $this->_clearWhiteSpaces();
  293. if (!strcasecmp('AND', current($this->tokens))
  294. && count($this->parsedQuery['patterns']) != 0){
  295. if ($comma)
  296. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  297. unset($this->tokens[key($this->tokens)]);
  298. return $this->parseAnd();
  299. }elseif (!strcasecmp('USING', current($this->tokens))
  300. && count($this->parsedQuery['patterns']) != 0) {
  301. if ($comma)
  302. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  303. unset($this->tokens[key($this->tokens)]);
  304. return $this->parseUsing();
  305. }
  306. if (current($this->tokens) == ',') {
  307. $comma = TRUE;
  308. $this->_checkComma($commaExpected, RDQL_WHR_ERR);
  309. }else{
  310. if (current($this->tokens) != '(')
  311. trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens)
  312. ."' - '(' expected", E_USER_ERROR);
  313. unset($this->tokens[key($this->tokens)]);
  314. $this->_clearWhiteSpaces();
  315. $this->parsedQuery['patterns'][$i]['subject'] = $this->_validateVarUri(current($this->tokens));
  316. $this->_checkComma(TRUE, RDQL_WHR_ERR);
  317. $this->parsedQuery['patterns'][$i]['predicate'] = $this->_validateVarUri(current($this->tokens));
  318. $this->_checkComma(TRUE, RDQL_WHR_ERR);
  319. $this->parsedQuery['patterns'][$i++]['object'] = $this->_validateVarUriLiteral(current($this->tokens));
  320. $this->_clearWhiteSpaces();
  321. if (current($this->tokens) != ')')
  322. trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens) ."' - ')' expected", E_USER_ERROR);
  323. unset($this->tokens[key($this->tokens)]);
  324. $this->_clearWhiteSpaces();
  325. $commaExpected = TRUE;
  326. $comma = FALSE;
  327. }
  328. }while(current($this->tokens) != NULL);
  329. if ($comma)
  330. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  331. }
  332. /**
  333. * Parse the AND clause of an Rdql query
  334. *
  335. * @throws PhpError
  336. * @access private
  337. * @todo clear comments
  338. */
  339. function parseAnd() {
  340. $this->_clearWhiteSpaces();
  341. $n = 0;
  342. $filterStr = '';
  343. while(current($this->tokens) != NULL) {
  344. $k = key($this->tokens);
  345. $token = $this->tokens[$k];
  346. if (!strcasecmp('USING', $token)) {
  347. $this->parseFilter($n, $filterStr);
  348. unset($this->tokens[$k]);
  349. return $this->parseUsing();
  350. }elseif ($token == ',') {
  351. $this->parseFilter($n, $filterStr);
  352. $filterStr = '';
  353. $token = '';
  354. ++$n;
  355. }
  356. $filterStr .= $token;
  357. unset($this->tokens[$k]);
  358. }
  359. $this->parseFilter($n, $filterStr);
  360. }
  361. /**
  362. * Parse the USING clause of an Rdql query
  363. *
  364. * @throws PhpError
  365. * @access private
  366. */
  367. function parseUsing() {
  368. $commaExpected = FALSE;
  369. $comma = FALSE;
  370. do {
  371. $this->_clearWhiteSpaces();
  372. if (current($this->tokens) == ',') {
  373. $comma = TRUE;
  374. $this->_checkComma($commaExpected, RDQL_USG_ERR);
  375. }else{
  376. $prefix = $this->_validatePrefix(current($this->tokens));
  377. $this->_clearWhiteSpaces();
  378. if (strcasecmp('FOR', current($this->tokens)))
  379. trigger_error(RDQL_USG_ERR ." keyword: 'FOR' missing in the namespace declaration: '", E_USER_ERROR);
  380. unset($this->tokens[key($this->tokens)]);
  381. $this->_clearWhiteSpaces();
  382. $this->parsedQuery['ns'][$prefix] = $this->_validateUri(current($this->tokens), RDQL_USG_ERR);
  383. $this->_clearWhiteSpaces();
  384. $commaExpected = TRUE;
  385. $comma = FALSE;
  386. }
  387. }while(current($this->tokens) != NULL);
  388. if ($comma)
  389. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  390. }
  391. /**
  392. * Check if a filter from the AND clause contains an equal number of '(' and ')'
  393. * and parse filter expressions.
  394. *
  395. * @param integer $n
  396. * @param string $filter
  397. * @throws PHPError
  398. * @access private
  399. */
  400. function parseFilter($n, $filter) {
  401. if ($filter == NULL)
  402. trigger_error(RDQL_AND_ERR ." ',' - unexpected comma", E_USER_ERROR);
  403. $paren = substr_count($filter, '(') - substr_count($filter, ')');
  404. if ($paren != 0) {
  405. if ($paren > 0)
  406. $errorMsg = "'" .htmlspecialchars($filter) ."' - ')' missing ";
  407. elseif ($paren < 0)
  408. $errorMsg = "'" .htmlspecialchars($filter) ."' - too many ')' ";
  409. trigger_error(RDQL_AND_ERR .$errorMsg, E_USER_ERROR);
  410. }
  411. $this->parsedQuery['filters'][$n] = $this->parseExpressions($filter);
  412. }
  413. /**
  414. * Parse expressions inside the passed filter:
  415. * 1) regex equality expressions: ?var [~~ | =~ | !~ ] REG_EX
  416. * 2a) string equality expressions: ?var [eq | ne] "literal"@lang^^dtype.
  417. * 2b) string equality expressions: ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
  418. * 3) numerical expressions: e.q. (?var1 - ?var2)*4 >= 20
  419. *
  420. * In cases 1-2 parse each expression of the given filter into an array of variables.
  421. * For each parsed expression put a place holder (e.g. ##RegEx_1##) into the filterStr.
  422. * The RDQLengine will then replace each place holder with the outcomming boolean value
  423. * of the corresponding expression.
  424. * The remaining filterStr contains only numerical expressions and place holders.
  425. *
  426. * @param string $filteStr
  427. * @return array ['string'] = string
  428. * ['evalFilterStr'] = string
  429. * ['reqexEqExprs'][]['var'] = ?VARNAME
  430. * ['operator'] = (eq | ne)
  431. * ['regex'] = string
  432. * ['strEqExprs'][]['var'] = ?VARNAME
  433. * ['operator'] = (eq | ne)
  434. * ['value'] = string
  435. * ['value_type'] = ('variable' | 'URI' | 'QName'| 'Literal')
  436. * ['value_lang'] = string
  437. * ['value_dtype'] = string
  438. * ['value_dtype_is_qname'] = boolean
  439. * ['numExpr']['vars'][] = ?VARNAME
  440. * @access private
  441. */
  442. function parseExpressions($filterStr) {
  443. $parsedFilter['string'] = $filterStr;
  444. $parsedFilter['regexEqExprs'] = array();
  445. $parsedFilter['strEqExprs'] = array();
  446. $parsedFilter['numExprVars'] = array();
  447. // parse regex string equality expressions, e.g. ?x ~~ !//foo.com/r!i
  448. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+([~!=]~)\s+(['|\"])?([^\s'\"]+)(['|\"])?/";
  449. preg_match_all($reg_ex, $filterStr, $eqExprs);
  450. foreach ($eqExprs[0] as $i => $eqExpr) {
  451. $this->_checkRegExQuotation($filterStr, $eqExprs[3][$i], $eqExprs[5][$i]);
  452. $parsedFilter['regexEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);
  453. $parsedFilter['regexEqExprs'][$i]['operator'] = $eqExprs[2][$i];
  454. $parsedFilter['regexEqExprs'][$i]['regex'] = $eqExprs[4][$i];
  455. $filterStr = str_replace($eqExpr, " ##RegEx_$i## ", $filterStr);
  456. }
  457. // parse ?var [eq | ne] "literal"@lang^^dtype
  458. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\'[^\']*\'|\"[^\"]*\")";
  459. $reg_ex .= "(@[a-zA-Z]+)?(\^{2}\S+:?\S+)?/i";
  460. preg_match_all($reg_ex, $filterStr, $eqExprs);
  461. foreach ($eqExprs[0] as $i => $eqExpr) {
  462. $parsedFilter['strEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);#
  463. $parsedFilter['strEqExprs'][$i]['operator'] = strtolower($eqExprs[2][$i]);
  464. $parsedFilter['strEqExprs'][$i]['value'] = trim($eqExprs[3][$i],"'\"");
  465. $parsedFilter['strEqExprs'][$i]['value_type'] = 'Literal';
  466. $parsedFilter['strEqExprs'][$i]['value_lang'] = substr($eqExprs[4][$i], 1);
  467. $dtype = substr($eqExprs[5][$i], 2);
  468. if ($dtype) {
  469. $parsedFilter['strEqExprs'][$i]['value_dtype'] = $this->_validateUri($dtype, RDQL_AND_ERR);
  470. if ($dtype{0} != '<')
  471. $parsedFilter['strEqExprs'][$i]['value_dtype_is_qname'] = TRUE;
  472. }else
  473. $parsedFilter['strEqExprs'][$i]['value_dtype'] = '';
  474. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$i## ", $filterStr);
  475. }
  476. // parse ?var [eq | ne] ?var
  477. $ii = count($parsedFilter['strEqExprs']);
  478. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\?[a-zA-Z0-9_]+)/i";
  479. preg_match_all($reg_ex, $filterStr, $eqExprs);
  480. foreach ($eqExprs[0] as $i => $eqExpr) {
  481. $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
  482. $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
  483. $parsedFilter['strEqExprs'][$ii]['value'] = $this->_isDefined($eqExprs[3][$i]);
  484. $parsedFilter['strEqExprs'][$ii]['value_type'] = 'variable';
  485. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
  486. $ii++;
  487. }
  488. // parse ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
  489. $reg_ex = "/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+((<\S+>)|(\S+:\S*))/i";
  490. preg_match_all($reg_ex, $filterStr, $eqExprs);
  491. foreach ($eqExprs[0] as $i => $eqExpr) {
  492. $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
  493. $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
  494. if ($eqExprs[4][$i]) {
  495. $parsedFilter['strEqExprs'][$ii]['value'] = trim($eqExprs[4][$i], "<>");
  496. $parsedFilter['strEqExprs'][$ii]['value_type'] = 'URI';
  497. }else if($eqExprs[5][$i]){
  498. $this->_validateQName($eqExprs[5][$i], RDQL_AND_ERR);
  499. $parsedFilter['strEqExprs'][$ii]['value'] = $eqExprs[5][$i];
  500. $parsedFilter['strEqExprs'][$ii]['value_type'] = 'QName';
  501. }
  502. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
  503. $ii++;
  504. }
  505. $parsedFilter['evalFilterStr'] = $filterStr;
  506. // all that is left are numerical expressions and place holders for the above expressions
  507. preg_match_all("/\?[a-zA-Z0-9_]+/", $filterStr, $vars);
  508. foreach ($vars[0] as $var) {
  509. $parsedFilter['numExprVars'][] = $this->_isDefined($var);
  510. }
  511. return $parsedFilter;
  512. }
  513. /**
  514. * Find all query variables used in the WHERE clause.
  515. *
  516. * @return array [] = ?VARNAME
  517. * @access private
  518. */
  519. function findAllQueryVariables() {
  520. $vars = array();
  521. foreach ($this->parsedQuery['patterns'] as $pattern) {
  522. $count = 0;
  523. foreach ($pattern as $v) {
  524. if ($v['value'] && $v['value']{0} == '?') {
  525. ++$count;
  526. if (!in_array($v['value'], $vars))
  527. $vars[] = $v['value'];
  528. }
  529. }
  530. if (!$count)
  531. trigger_error(RDQL_WHR_ERR .'pattern contains no variables', E_USER_ERROR);
  532. }
  533. return $vars;
  534. }
  535. /**
  536. * Replace all namespace prefixes in the pattern and constraint clause of an rdql query
  537. * with the namespaces declared in the USING clause and default namespaces.
  538. *
  539. * @access private
  540. */
  541. function replaceNamespacePrefixes() {
  542. global $default_prefixes;
  543. if (!isset($this->parsedQuery['ns']))
  544. $this->parsedQuery['ns'] = array();
  545. // add default namespaces
  546. // if in an rdql query a reserved prefix (e.g. rdf: rdfs:) is used
  547. // it will be overridden by the default namespace defined in constants.php
  548. $this->parsedQuery['ns'] = array_merge($this->parsedQuery['ns'], $default_prefixes);
  549. // replace namespace prefixes in the FROM clause
  550. if (isset($this->parsedQuery['sources']))
  551. foreach ($this->parsedQuery['sources'] as $n => $source) {
  552. if (isset($source['is_qname']))
  553. $this->parsedQuery['sources'][$n] = $this->_replaceNamespacePrefix($source['value'], RDQL_SRC_ERR);
  554. else {
  555. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  556. $source['value'] = eregi_replace("$prefix:", $uri, $source['value']);
  557. $this->parsedQuery['sources'][$n] = $source['value'];
  558. }
  559. }
  560. // replace namespace prefixes in the where clause
  561. foreach ($this->parsedQuery['patterns'] as $n => $pattern) {
  562. foreach ($pattern as $key => $v)
  563. if ($v['value'] && $v['value']{0} != '?') {
  564. if (isset($v['is_qname'])) {
  565. $this->parsedQuery['patterns'][$n][$key]['value']
  566. = $this->_replaceNamespacePrefix($v['value'], RDQL_WHR_ERR);
  567. unset($this->parsedQuery['patterns'][$n][$key]['is_qname']);
  568. } else { // is quoted URI (== <URI>) or Literal
  569. if (isset($this->parsedQuery['patterns'][$n][$key]['is_literal'])) {
  570. if (isset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname'])) {
  571. $this->parsedQuery['patterns'][$n][$key]['l_dtype']
  572. = $this->_replaceNamespacePrefix($v['l_dtype'], RDQL_WHR_ERR);
  573. unset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname']);
  574. }else {
  575. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  576. $this->parsedQuery['patterns'][$n][$key]['l_dtype']
  577. = eregi_replace("$prefix:", $uri, $this->parsedQuery['patterns'][$n][$key]['l_dtype']);
  578. }
  579. }else {
  580. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  581. $this->parsedQuery['patterns'][$n][$key]['value']
  582. = eregi_replace("$prefix:", $uri, $this->parsedQuery['patterns'][$n][$key]['value']);
  583. }
  584. }
  585. }
  586. }
  587. // replace prefixes in the constraint clause
  588. if (isset($this->parsedQuery['filters']))
  589. foreach ($this->parsedQuery['filters'] as $n => $filter)
  590. foreach ($filter['strEqExprs'] as $i => $expr) {
  591. if ($expr['value_type'] == 'QName') {
  592. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
  593. = $this->_replaceNamespacePrefix($expr['value'], RDQL_AND_ERR);
  594. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_type'] = 'URI';
  595. }
  596. if ($expr['value_type'] == 'URI')
  597. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  598. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
  599. = eregi_replace("$prefix:", $uri,
  600. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']);
  601. elseif ($expr['value_type'] == 'Literal') {
  602. if (isset($expr['value_dtype_is_qname'])) {
  603. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
  604. = $this->_replaceNamespacePrefix($expr['value_dtype'], RDQL_AND_ERR);
  605. unset($this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype_is_qname']);
  606. } else {
  607. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  608. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
  609. = eregi_replace("$prefix:", $uri,
  610. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']);
  611. }
  612. }
  613. }
  614. unset($this->parsedQuery['ns']);
  615. }
  616. // =============================================================================
  617. // *************************** helper functions ********************************
  618. // =============================================================================
  619. /**
  620. * Remove whitespace-tokens from the array $this->tokens
  621. *
  622. * @access private
  623. */
  624. function _clearWhiteSpaces() {
  625. while (current($this->tokens) == ' ' ||
  626. current($this->tokens) == "\n" ||
  627. current($this->tokens) == "\t" ||
  628. current($this->tokens) == "\r")
  629. unset($this->tokens[key($this->tokens)]);
  630. }
  631. /**
  632. * Check if the query string of the given clause contains an undesired ','.
  633. * If a comma was correctly placed then remove it and clear all whitespaces.
  634. *
  635. * @param string $commaExpected
  636. * @param string $clause_error
  637. * @throws PHPError
  638. * @access private
  639. */
  640. function _checkComma($commaExpected, $clause_error) {
  641. $this->_clearWhiteSpaces();
  642. if (current($this->tokens) == ',') {
  643. if (!$commaExpected)
  644. trigger_error($clause_error ."',' - unexpected comma", E_USER_ERROR);
  645. else {
  646. unset($this->tokens[key($this->tokens)]);
  647. $this->_checkComma(FALSE, $clause_error);
  648. }
  649. }
  650. }
  651. /**
  652. * Check if the given token is either a variable (?var) or the first token of an URI (<URI>).
  653. * In case of an URI this function returns the whole URI string.
  654. *
  655. * @param string $token
  656. * @return array ['value'] = string
  657. * @throws PHPError
  658. * @access private
  659. */
  660. function _validateVarUri($token) {
  661. if ($token{0} == '?') {
  662. $token_res['value'] = $this->_validateVar($token, RDQL_WHR_ERR);
  663. } else {
  664. $token_res['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
  665. if ($token{0} != '<')
  666. $token_res['is_qname'] = TRUE;
  667. }
  668. return $token_res;
  669. }
  670. /**
  671. * Check if the given token is either a variable (?var) or the first token
  672. * of either an URI (<URI>) or a literal ("Literal").
  673. * In case of a literal return an array with literal properties (value, language, datatype).
  674. * In case of a variable or an URI return only ['value'] = string.
  675. *
  676. * @param string $token
  677. * @return array ['value'] = string
  678. * ['is_qname'] = boolean
  679. * ['is_literal'] = boolean
  680. * ['l_lang'] = string
  681. * ['l_dtype'] = string
  682. * @throws PHPError
  683. * @access private
  684. */
  685. function _validateVarUriLiteral($token) {
  686. if ($token{0} == '?')
  687. $statement_object['value'] = $this->_validateVar($token, RDQL_WHR_ERR);
  688. elseif ($token{0} == "'" || $token{0} == '"')
  689. $statement_object = $this->_validateLiteral($token);
  690. elseif ($token{0} == '<')
  691. $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
  692. elseif (ereg(':', $token)) {
  693. $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
  694. $statement_object['is_qname'] = TRUE;
  695. }else
  696. trigger_error(RDQL_WHR_ERR ." '$token' - ?Variable, &lt;URI&gt;, QName, or \"LITERAL\" expected", E_USER_ERROR);
  697. return $statement_object;
  698. }
  699. /**
  700. * Check if the given token is a valid variable name (?var).
  701. *
  702. * @param string $token
  703. * @param string $clause_error
  704. * @return string
  705. * @throws PHPError
  706. * @access private
  707. */
  708. function _validateVar($token, $clause_error) {
  709. preg_match("/\?[a-zA-Z0-9_]+/", $token, $match);
  710. if (!isset($match[0]) || $match[0] != $token)
  711. trigger_error($clause_error ."'" .htmlspecialchars($token)
  712. ."' - variable name contains illegal characters", E_USER_ERROR);
  713. unset($this->tokens[key($this->tokens)]);
  714. return $token;
  715. }
  716. /**
  717. * Check if $token is the first token of a valid URI (<URI>) and return the whole URI string
  718. *
  719. * @param string $token
  720. * @param string $clause_error
  721. * @return string
  722. * @throws PHPError
  723. * @access private
  724. */
  725. function _validateUri($token, $clause_error) {
  726. if ($token{0} != '<') {
  727. if (strpos($token, ':') && $this->_validateQName($token, $clause_error)) {
  728. unset($this->tokens[key($this->tokens)]);
  729. return rtrim($token, ':');
  730. }
  731. $errmsg = $clause_error .'\'' .htmlspecialchars($token) .'\' ';
  732. if ($clause_error == RDQL_WHR_ERR)
  733. $errmsg .= "- ?Variable or &lt;URI&gt; or QName expected";
  734. else
  735. $errmsg .= "- &lt;URI&gt; or QName expected";
  736. trigger_error($errmsg, E_USER_ERROR);
  737. }else{
  738. $token_res = $token;
  739. while($token{strlen($token)-1} != '>' && $token != NULL) {
  740. if ($token == '(' || $token == ')' || $token == ',' ||
  741. $token == ' ' || $token == "\n" || $token == "\r") {
  742. trigger_error($clause_error .'\'' .htmlspecialchars($token_res)
  743. ."' - illegal input: '$token' - '>' missing", E_USER_ERROR);
  744. }
  745. unset($this->tokens[key($this->tokens)]);
  746. $token = current($this->tokens);
  747. $token_res .= $token;
  748. }
  749. if ($token == NULL)
  750. trigger_error($clause_error .'\'' .htmlspecialchars($token_res) ."' - '>' missing", E_USER_ERROR);
  751. unset($this->tokens[key($this->tokens)]);
  752. return trim($token_res, '<>');
  753. }
  754. }
  755. /**
  756. * Check if $token is the first token of a valid literal ("LITERAL") and
  757. * return an array with literal properties (value, language, datatype).
  758. *
  759. * @param string $token
  760. * @return array ['value'] = string
  761. * ['is_literal'] = boolean
  762. * ['l_lang'] = string
  763. * ['l_dtype'] = string
  764. * ['l_dtype_is_qname'] = boolean
  765. * @throws PHPError
  766. * @access private
  767. */
  768. function _validateLiteral($token) {
  769. $quotation_mark = $token{0};
  770. $statement_object = array ('value' => '',
  771. 'is_literal' => TRUE,
  772. 'l_lang' => '',
  773. 'l_dtype' => '');
  774. $this->tokens[key($this->tokens)] = substr($token,1);
  775. $return = FALSE;
  776. foreach ($this->tokens as $k => $token) {
  777. if ($token != NULL && $token{strlen($token)-1} == $quotation_mark) {
  778. $token = rtrim($token, $quotation_mark);
  779. $return = TRUE;
  780. // parse @language(^^datatype)?
  781. }elseif (strpos($token, $quotation_mark .'@') || substr($token, 0, 2) == $quotation_mark .'@') {
  782. $lang = substr($token, strpos($token, $quotation_mark .'@')+2);
  783. if (strpos($lang, '^^') || substr($lang, 0,2) == '^^') {
  784. $dtype = substr($lang, strpos($lang, '^^')+2);
  785. if (!$dtype)
  786. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
  787. .$token ." - datatype expected" ,E_USER_ERROR);
  788. $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
  789. if ($dtype{0} != '<')
  790. $statement_object['l_dtype_is_qname'] = TRUE;
  791. $lang = substr($lang, 0, strpos($lang, '^^'));
  792. }
  793. if (!$lang)
  794. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
  795. .$token ." - language expected" ,E_USER_ERROR);
  796. $statement_object['l_lang'] = $lang;
  797. $token = substr($token, 0, strpos($token, $quotation_mark .'@'));
  798. $return = TRUE;
  799. // parse ^^datatype
  800. }elseif (strpos($token, $quotation_mark .'^^') || substr($token, 0, 3) == $quotation_mark .'^^') {
  801. $dtype = substr($token, strpos($token, $quotation_mark .'^^')+3);
  802. if (!$dtype)
  803. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
  804. .$token ." - datatype expected" ,E_USER_ERROR);
  805. $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
  806. if ($dtype{0} != '<')
  807. $statement_object['l_dtype_is_qname'] = TRUE;
  808. $token = substr($token, 0, strpos($token, $quotation_mark .'^^'));
  809. $return = TRUE;
  810. }elseif (strpos($token, $quotation_mark))
  811. trigger_error(RDQL_WHR_ERR ."'$token' - illegal input", E_USER_ERROR);
  812. $statement_object['value'] .= $token;
  813. unset($this->tokens[$k]);
  814. if ($return)
  815. return $statement_object;
  816. }
  817. trigger_error(RDQL_WHR_ERR ."quotation end mark: $quotation_mark missing", E_USER_ERROR);
  818. }
  819. /**
  820. * Check if the given token is a valid QName.
  821. *
  822. * @param string $token
  823. * @param string $clause_error
  824. * @return boolean
  825. * @throws PHPError
  826. * @access private
  827. */
  828. function _validateQName($token, $clause_error) {
  829. $parts = explode(':', $token);
  830. if (count($parts) > 2)
  831. trigger_error($clause_error ."illegal QName: '$token'", E_USER_ERROR);
  832. if (!$this->_validateNCName($parts[0]))
  833. trigger_error($clause_error ."illegal prefix in QName: '$token'", E_USER_ERROR);
  834. if ($parts[1] && !$this->_validateNCName($parts[1]))
  835. trigger_error($clause_error ."illegal local part in QName: '$token'", E_USER_ERROR);
  836. return TRUE;
  837. }
  838. /**
  839. * Check if the given token is a valid NCName.
  840. *
  841. * @param string $token
  842. * @return boolean
  843. * @access private
  844. */
  845. function _validateNCName($token) {
  846. preg_match("/[a-zA-Z_]+[a-zA-Z_0-9.\-]*/", $token, $match);
  847. if (isset($match[0]) && $match[0] == $token)
  848. return TRUE;
  849. return FALSE;
  850. }
  851. /**
  852. * Check if the given token is a valid namespace prefix.
  853. *
  854. * @param string $token
  855. * @return string
  856. * @throws PHPError
  857. * @access private
  858. */
  859. function _validatePrefix($token) {
  860. if (!$this->_validateNCName($token))
  861. trigger_error(RDQL_USG_ERR ."'" .htmlspecialchars($token)
  862. ."' - illegal input, namespace prefix expected", E_USER_ERROR);
  863. unset($this->tokens[key($this->tokens)]);
  864. return $token;
  865. }
  866. /**
  867. * Replace a prefix in a given QName and return a full URI.
  868. *
  869. * @param string $qName
  870. * @param string $clasue_error
  871. * @return string
  872. * @throws PHPError
  873. * @access private
  874. */
  875. function _replaceNamespacePrefix($qName, $clause_error) {
  876. $qName_parts = explode(':', $qName);
  877. if (!array_key_exists($qName_parts[0], $this->parsedQuery['ns']))
  878. trigger_error($clause_error .'undefined prefix: \'' .$qName_parts[0] .'\' in: \'' .$qName .'\'', E_USER_ERROR);
  879. return $this->parsedQuery['ns'][$qName_parts[0]] .$qName_parts[1];
  880. }
  881. /**
  882. * Check if all variables from the SELECT clause are defined in the WHERE clause
  883. *
  884. * @access private
  885. */
  886. function _checkSelectVars() {
  887. foreach ($this->parsedQuery['selectVars'] as $var)
  888. $this->_isDefined($var);
  889. }
  890. /**
  891. * Check if the given variable is defined in the WHERE clause.
  892. *
  893. * @param $var string
  894. * @return string
  895. * @throws PHPError
  896. * @access private
  897. */
  898. function _isDefined($var) {
  899. $allQueryVars = $this->findAllQueryVariables();
  900. if (!in_array($var, $allQueryVars))
  901. trigger_error(RDQL_SYN_ERR .": '$var' - variable must be defined in the WHERE clause", E_USER_ERROR);
  902. return $var;
  903. }
  904. /**
  905. * Throw an error if the regular expression from the AND clause is not quoted.
  906. *
  907. * @param string $filterString
  908. * @param string $lQuotMark
  909. * @param string $rQuotMark
  910. * @throws PHPError
  911. * @access private
  912. */
  913. function _checkRegExQuotation($filterString, $lQuotMark, $rQuotMark) {
  914. if (!$lQuotMark)
  915. trigger_error(RDQL_AND_ERR ."'$filterString' - regular expressions must be quoted", E_USER_ERROR);
  916. if ($lQuotMark != $rQuotMark)
  917. trigger_error(RDQL_AND_ERR ."'$filterString' - quotation end mark in the regular expression missing", E_USER_ERROR);
  918. }
  919. } // end: Class RdqlParser
  920. ?>