PageRenderTime 67ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/src/tdt/core/model/semantics/rdfapi-php/api/rdql/RdqlParser.php

https://github.com/oSoc13/tdt-core
PHP | 1065 lines | 786 code | 89 blank | 190 comment | 126 complexity | cefeaf4b85b5404d0e51777ad304940d MD5 | raw file
Possible License(s): LGPL-3.0, LGPL-2.1
  1. <?php
  2. // ----------------------------------------------------------------------------------
  3. // Class: RdqlParser
  4. // ----------------------------------------------------------------------------------
  5. /**
  6. * This class contains methods for parsing an Rdql query string into PHP variables.
  7. * The output of the RdqlParser is an array with variables and constraints
  8. * of each query clause (Select, From, Where, And, Using).
  9. * To perform an RDQL query this array has to be passed to the RdqlEngine.
  10. *
  11. * @version $Id: RdqlParser.php 282 2006-06-08 06:25:14Z tgauss $
  12. * @author Radoslaw Oldakowski <radol@gmx.de>
  13. *
  14. * @package rdql
  15. * @access public
  16. */
  17. Class RdqlParser extends Object{
  18. /**
  19. * Parsed query variables and constraints.
  20. * { } are only used within the parser class and are not returned as parsed query.
  21. * ( [] stands for an integer index - 0..N )
  22. *
  23. * @var array ['selectVars'][] = ?VARNAME
  24. * ['sources'][]{['value']} = URI | QName
  25. * {['is_qname'] = boolean}
  26. * ['patterns'][]['subject']['value'] = VARorURI
  27. * {['is_qname'] = boolean}
  28. * ['predicate']['value'] = VARorURI
  29. * {['is_qname'] = boolean}
  30. * ['object']['value'] = VARorURIorLiterl
  31. * {['is_qname'] = boolean}
  32. * ['is_literal'] = boolean
  33. * ['l_lang'] = string
  34. * ['l_dtype'] = string
  35. * {['l_dtype_is_qname'] = boolean}
  36. * ['filters'][]['string'] = string
  37. * ['evalFilterStr'] = string
  38. * ['reqexEqExprs'][]['var'] = ?VARNAME
  39. * ['operator'] = (eq | ne)
  40. * ['regex'] = string
  41. * ['strEqExprs'][]['var'] = ?VARNAME
  42. * ['operator'] = (eq | ne)
  43. * ['value'] = string
  44. * ['value_type'] = ('variable' | 'URI' | 'QName' | 'Literal')
  45. * ['value_lang'] = string
  46. * ['value_dtype'] = string
  47. * {['value_dtype_is_qname'] = boolean}
  48. * ['numExpr']['vars'][] = ?VARNAME
  49. * {['ns'][PREFIX] = NAMESPACE}
  50. * @access private
  51. */
  52. var $parsedQuery;
  53. /**
  54. * Query string divided into a sequence of tokens.
  55. * A token is either: ' ' or "n" or "r" or "t" or ',' or '(' or ')'
  56. * or a string containing any characters except from the above.
  57. *
  58. * @var array
  59. * @access private
  60. */
  61. var $tokens;
  62. /**
  63. * Parse the given RDQL query string and return an array with query variables and constraints.
  64. *
  65. * @param string $queryString
  66. * @return array $this->parsedQuery
  67. * @access public
  68. */
  69. function parseQuery($queryString) {
  70. $cleanQueryString = $this->removeComments($queryString);
  71. $this->tokenize($cleanQueryString);
  72. $this->startParsing();
  73. if ($this->parsedQuery['selectVars'][0] == '*')
  74. $this->parsedQuery['selectVars'] = $this->findAllQueryVariables();
  75. else
  76. $this->_checkSelectVars();
  77. $this->replaceNamespacePrefixes();
  78. return $this->parsedQuery;
  79. }
  80. /**
  81. * Remove comments from the passed query string.
  82. *
  83. * @param string $query
  84. * @return string
  85. * @throws PHPError
  86. * @access private
  87. */
  88. function removeComments($query) {
  89. $last = strlen($query)-1;
  90. $query .= ' ';
  91. $clean = '';
  92. for ($i = 0;
  93. $i<=$last;
  94. $i++) {
  95. // don't search for comments inside a 'literal'@lang^^dtype or "literal"@lang^^dtype
  96. if ($query{$i} == "'" || $query{$i} == '"') {
  97. $quotMark = $query{$i};
  98. do
  99. $clean .= $query{$i++};
  100. while($i < $last && $query{$i} != $quotMark);
  101. $clean .= $query{$i};
  102. // language
  103. if ($query{$i+1} == '@') {
  104. do{
  105. if ($query{$i+1} == '^' && $query{$i+2} == '^')
  106. break;
  107. $clean .= $query{++$i};
  108. }while ($i < $last && $query{$i} != ' ' && $query{$i} != "t"
  109. && $query{$i} != "n" && $query{$i} != "r");
  110. }
  111. // datatype
  112. if ($query{$i+1} == '^' && $query{$i+2} == '^') {
  113. do
  114. $clean .= $query{++$i};
  115. while ($i < $last && $query{$i} != ' ' && $query{$i} != "t"
  116. && $query{$i} != "n" && $query{$i} != "r" );
  117. }
  118. // don't search for comments inside an <URI> either
  119. }elseif ($query{$i} == '<') {
  120. do{
  121. $clean .= $query{$i++};
  122. }while($i < $last && $query{$i} != '>');
  123. $clean .= $query{$i};
  124. }elseif ($query{$i} == '/') {
  125. // clear: // comment
  126. if ($i < $last && $query{$i+1} == '/') {
  127. while($i < $last && $query{$i} != "n" && $query{$i} != "r")
  128. ++$i;
  129. $clean .= ' ';
  130. // clear: /*comment*/
  131. }elseif ($i < $last-2 && $query{$i+1} == '*') {
  132. $i += 2;
  133. while($i < $last && ($query{$i} != '*' || $query{$i+1} != '/'))
  134. ++$i;
  135. if ($i >= $last && ($query{$last-1} != '*' || $query{$last} != '/'))
  136. trigger_error(RDQL_SYN_ERR .": unterminated comment - '*/' missing", E_USER_ERROR);
  137. ++$i;
  138. }else
  139. $clean .= $query{$i};
  140. }else
  141. $clean .= $query{$i};
  142. }
  143. return $clean;
  144. }
  145. /**
  146. * Divide the query string into tokens.
  147. * A token is either: ' ' or "n" or "r" or 't' or ',' or '(' or ')'
  148. * or a string containing any character except from the above.
  149. *
  150. * @param string $queryString
  151. * @access private
  152. */
  153. function tokenize($queryString) {
  154. $queryString = trim($queryString, " t");
  155. $specialChars = array (" ", "t", "r", "n", ",", "(", ")");
  156. $len = strlen($queryString);
  157. $this->tokens[0] = '';
  158. $n = 0;
  159. for ($i = 0;
  160. $i<$len;
  161. ++$i) {
  162. if (!in_array($queryString{$i}, $specialChars))
  163. $this->tokens[$n] .= $queryString{$i};
  164. else {
  165. if ($this->tokens[$n] != '')
  166. ++$n;
  167. $this->tokens[$n] = $queryString{$i};
  168. $this->tokens[++$n] = '';
  169. }
  170. }
  171. }
  172. /**
  173. * Start parsing of the tokenized query string.
  174. *
  175. * @access private
  176. */
  177. function startParsing() {
  178. $this->parseSelect();
  179. }
  180. /**
  181. * Parse the SELECT clause of an Rdql query.
  182. * When the parsing of the SELECT clause is finished, this method will call
  183. * a suitable method to parse the subsequent clause.
  184. *
  185. * @throws PhpError
  186. * @access private
  187. */
  188. function parseSelect() {
  189. $this->_clearWhiteSpaces();
  190. // Check if the queryString contains a "SELECT" token
  191. if (strcasecmp('SELECT', current($this->tokens)))
  192. trigger_error(RDQL_SEL_ERR ."'" .current($this->tokens)
  193. ."' - SELECT keyword expected", E_USER_ERROR);
  194. unset($this->tokens[key($this->tokens)]);
  195. $this->_clearWhiteSpaces();
  196. // Parse SELECT *
  197. if (current($this->tokens) == '*') {
  198. unset($this->tokens[key($this->tokens)]);
  199. $this->parsedQuery['selectVars'][0] = '*';
  200. $this->_clearWhiteSpaces();
  201. if (strcasecmp('FROM', current($this->tokens))
  202. && strcasecmp('SOURCE', current($this->tokens))
  203. && strcasecmp('WHERE', current($this->tokens)))
  204. trigger_error(RDQL_SYN_ERR .": '" .htmlspecialchars(current($this->tokens))
  205. ."' - SOURCE or WHERE clause expected", E_USER_ERROR);
  206. }
  207. // Parse SELECT ?Var (, ?Var)*
  208. $commaExpected = FALSE;
  209. $comma = FALSE;
  210. while (current($this->tokens) != NULL) {
  211. $k = key($this->tokens);
  212. $token = $this->tokens[$k];
  213. switch ($token) {
  214. case ',': if (!$commaExpected)
  215. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  216. $comma = TRUE;
  217. $commaExpected = FALSE;
  218. break;
  219. case '(':
  220. case ')': trigger_error(RDQL_SEL_ERR ." '$token' - illegal input", E_USER_ERROR);
  221. break;
  222. default :
  223. if (!strcasecmp('FROM', $token) ||!strcasecmp('SOURCE', $token)) {
  224. if ($comma)
  225. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  226. unset($this->tokens[$k]);
  227. return $this->parseFrom();
  228. }elseif (!strcasecmp('WHERE', $token) &&!$comma) {
  229. if ($comma)
  230. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  231. unset($this->tokens[$k]);
  232. return $this->parseWhere();
  233. }
  234. if ($token{0} == '?') {
  235. $this->parsedQuery['selectVars'][] = $this->_validateVar($token, RDQL_SEL_ERR);
  236. $commaExpected = TRUE;
  237. $comma = FALSE;
  238. }else
  239. trigger_error(RDQL_SEL_ERR ." '$token' - '?' missing", E_USER_ERROR);
  240. }
  241. unset($this->tokens[$k]);
  242. $this->_clearWhiteSpaces();
  243. }
  244. trigger_error(RDQL_SYN_ERR . ': WHERE clause missing', E_USER_ERROR);
  245. }
  246. /**
  247. * Parse the FROM/SOURCES clause of an Rdql query
  248. * When the parsing of this clause is finished, parseWhere() will be called.
  249. *
  250. * @throws PhpError
  251. * @access private
  252. */
  253. function parseFrom() {
  254. $comma = FALSE;
  255. $commaExpected = FALSE;
  256. $i = -1;
  257. while (current($this->tokens) != NULL) {
  258. $this->_clearWhiteSpaces();
  259. if (!strcasecmp('WHERE', current($this->tokens)) && count($this->parsedQuery['sources']) != 0) {
  260. if ($comma)
  261. trigger_error(RDQL_SEL_ERR ." ',' - unexpected comma", E_USER_ERROR);
  262. unset($this->tokens[key($this->tokens)]);
  263. return $this->parseWhere();
  264. }
  265. if (current($this->tokens) == ',') {
  266. if ($commaExpected) {
  267. $comma = TRUE;
  268. $commaExpected = FALSE;
  269. unset($this->tokens[key($this->tokens)]);
  270. }else
  271. trigger_error(RDQL_SRC_ERR ."',' - unecpected comma", E_USER_ERROR);
  272. }else{
  273. $token = current($this->tokens);
  274. $this->parsedQuery['sources'][++$i]['value'] = $this->_validateURI($token, RDQL_SRC_ERR);
  275. if ($token{0} != '<')
  276. $this->parsedQuery['sources'][$i]['is_qname'] = TRUE;
  277. $commaExpected = TRUE;
  278. $comma = FALSE;
  279. }
  280. }
  281. trigger_error(RDQL_SYN_ERR .': WHERE clause missing', E_USER_ERROR);
  282. }
  283. /* * '
  284. * Parse the WHERE clause of an Rdql query.
  285. * When the parsing of the WHERE clause is finished, this method will call
  286. * a suitable method to parse the subsequent clause if provided.
  287. *
  288. * @throws PhpError
  289. * @access private
  290. */
  291. function parseWhere() {
  292. $comma = FALSE;
  293. $commaExpected = FALSE;
  294. $i = 0;
  295. do {
  296. $this->_clearWhiteSpaces();
  297. if (!strcasecmp('AND', current($this->tokens))
  298. && count($this->parsedQuery['patterns']) != 0){
  299. if ($comma)
  300. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  301. unset($this->tokens[key($this->tokens)]);
  302. return $this->parseAnd();
  303. }elseif (!strcasecmp('USING', current($this->tokens))
  304. && count($this->parsedQuery['patterns']) != 0) {
  305. if ($comma)
  306. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  307. unset($this->tokens[key($this->tokens)]);
  308. return $this->parseUsing();
  309. }
  310. if (current($this->tokens) == ',') {
  311. $comma = TRUE;
  312. $this->_checkComma($commaExpected, RDQL_WHR_ERR);
  313. }else{
  314. if (current($this->tokens) != '(')
  315. trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens)
  316. ."' - '(' expected", E_USER_ERROR);
  317. unset($this->tokens[key($this->tokens)]);
  318. $this->_clearWhiteSpaces();
  319. $this->parsedQuery['patterns'][$i]['subject'] = $this->_validateVarUri(current($this->tokens));
  320. $this->_checkComma(TRUE, RDQL_WHR_ERR);
  321. $this->parsedQuery['patterns'][$i]['predicate'] = $this->_validateVarUri(current($this->tokens));
  322. $this->_checkComma(TRUE, RDQL_WHR_ERR);
  323. $this->parsedQuery['patterns'][$i++]['object'] = $this->_validateVarUriLiteral(current($this->tokens));
  324. $this->_clearWhiteSpaces();
  325. if (current($this->tokens) != ')')
  326. trigger_error(RDQL_WHR_ERR ."'" .current($this->tokens) ."' - ')' expected", E_USER_ERROR);
  327. unset($this->tokens[key($this->tokens)]);
  328. $this->_clearWhiteSpaces();
  329. $commaExpected = TRUE;
  330. $comma = FALSE;
  331. }
  332. }while(current($this->tokens) != NULL);
  333. if ($comma)
  334. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  335. }
  336. /**
  337. * Parse the AND clause of an Rdql query
  338. *
  339. * @throws PhpError
  340. * @access private
  341. * @todo clear comments
  342. */
  343. function parseAnd() {
  344. $this->_clearWhiteSpaces();
  345. $n = 0;
  346. $filterStr = '';
  347. while(current($this->tokens) != NULL) {
  348. $k = key($this->tokens);
  349. $token = $this->tokens[$k];
  350. if (!strcasecmp('USING', $token)) {
  351. $this->parseFilter($n, $filterStr);
  352. unset($this->tokens[$k]);
  353. return $this->parseUsing();
  354. }elseif ($token == ',') {
  355. $this->parseFilter($n, $filterStr);
  356. $filterStr = '';
  357. $token = '';
  358. ++$n;
  359. }
  360. $filterStr .= $token;
  361. unset($this->tokens[$k]);
  362. }
  363. $this->parseFilter($n, $filterStr);
  364. }
  365. /**
  366. * Parse the USING clause of an Rdql query
  367. *
  368. * @throws PhpError
  369. * @access private
  370. */
  371. function parseUsing() {
  372. $commaExpected = FALSE;
  373. $comma = FALSE;
  374. do {
  375. $this->_clearWhiteSpaces();
  376. if (current($this->tokens) == ',') {
  377. $comma = TRUE;
  378. $this->_checkComma($commaExpected, RDQL_USG_ERR);
  379. }else{
  380. $prefix = $this->_validatePrefix(current($this->tokens));
  381. $this->_clearWhiteSpaces();
  382. if (strcasecmp('FOR', current($this->tokens)))
  383. trigger_error(RDQL_USG_ERR ." keyword: 'FOR' missing in the namespace declaration: '", E_USER_ERROR);
  384. unset($this->tokens[key($this->tokens)]);
  385. $this->_clearWhiteSpaces();
  386. $this->parsedQuery['ns'][$prefix] = $this->_validateUri(current($this->tokens), RDQL_USG_ERR);
  387. $this->_clearWhiteSpaces();
  388. $commaExpected = TRUE;
  389. $comma = FALSE;
  390. }
  391. }while(current($this->tokens) != NULL);
  392. if ($comma)
  393. trigger_error(RDQL_WHR_ERR ." ',' - unexpected comma", E_USER_ERROR);
  394. }
  395. /**
  396. * Check if a filter from the AND clause contains an equal number of '(' and ')'
  397. * and parse filter expressions.
  398. *
  399. * @param integer $n
  400. * @param string $filter
  401. * @throws PHPError
  402. * @access private
  403. */
  404. function parseFilter($n, $filter) {
  405. if ($filter == NULL)
  406. trigger_error(RDQL_AND_ERR ." ',' - unexpected comma", E_USER_ERROR);
  407. $paren = substr_count($filter, '(') - substr_count($filter, ')');
  408. if ($paren != 0) {
  409. if ($paren > 0)
  410. $errorMsg = "'" .htmlspecialchars($filter) ."' - ')' missing ";
  411. elseif ($paren < 0)
  412. $errorMsg = "'" .htmlspecialchars($filter) ."' - too many ')' ";
  413. trigger_error(RDQL_AND_ERR .$errorMsg, E_USER_ERROR);
  414. }
  415. $this->parsedQuery['filters'][$n] = $this->parseExpressions($filter);
  416. }
  417. /**
  418. * Parse expressions inside the passed filter:
  419. * 1) regex equality expressions: ?var [~~ | =~ | !~ ] REG_EX
  420. * 2a) string equality expressions: ?var [eq | ne] "literal"@lang^^dtype.
  421. * 2b) string equality expressions: ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
  422. * 3) numerical expressions: e.q. (?var1 - ?var2)*4 >= 20
  423. *
  424. * In cases 1-2 parse each expression of the given filter into an array of variables.
  425. * For each parsed expression put a place holder (e.g. ##RegEx_1##) into the filterStr.
  426. * The RDQLengine will then replace each place holder with the outcomming boolean value
  427. * of the corresponding expression.
  428. * The remaining filterStr contains only numerical expressions and place holders.
  429. *
  430. * @param string $filteStr
  431. * @return array ['string'] = string
  432. * ['evalFilterStr'] = string
  433. * ['reqexEqExprs'][]['var'] = ?VARNAME
  434. * ['operator'] = (eq | ne)
  435. * ['regex'] = string
  436. * ['strEqExprs'][]['var'] = ?VARNAME
  437. * ['operator'] = (eq | ne)
  438. * ['value'] = string
  439. * ['value_type'] = ('variable' | 'URI' | 'QName'| 'Literal')
  440. * ['value_lang'] = string
  441. * ['value_dtype'] = string
  442. * ['value_dtype_is_qname'] = boolean
  443. * ['numExpr']['vars'][] = ?VARNAME
  444. * @access private
  445. */
  446. function parseExpressions($filterStr) {
  447. $parsedFilter['string'] = $filterStr;
  448. $parsedFilter['regexEqExprs'] = array();
  449. $parsedFilter['strEqExprs'] = array();
  450. $parsedFilter['numExprVars'] = array();
  451. // parse regex string equality expressions, e.g. ?x ~~ !//foo.com/r!i
  452. $reg_ex = "/(?[a-zA-Z0-9_]+)s+([~!=]~)s+(['|"])?(s'"]+)(['|"])?/";
  453. preg_match_all($reg_ex, $filterStr, $eqExprs);
  454. foreach ($eqExprs[0] as $i => $eqExpr) {
  455. $this->_checkRegExQuotation($filterStr, $eqExprs[3][$i], $eqExprs[5][$i]);
  456. $parsedFilter['regexEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);
  457. $parsedFilter['regexEqExprs'][$i]['operator'] = $eqExprs[2][$i];
  458. $parsedFilter['regexEqExprs'][$i]['regex'] = $eqExprs[4][$i];
  459. $filterStr = str_replace($eqExpr, " ##RegEx_$i## ", $filterStr);
  460. }
  461. // parse ?var [eq | ne] "literal"@lang^^dtype
  462. $reg_ex = "/(?[a-zA-Z0-9_]+)s+(eq|ne)s+('']*'|""]*")";
  463. $reg_ex .= "(@[a-zA-Z]+)?(^{2}S+:?S+)?/i";
  464. preg_match_all($reg_ex, $filterStr, $eqExprs);
  465. foreach ($eqExprs[0] as $i => $eqExpr) {
  466. $parsedFilter['strEqExprs'][$i]['var'] = $this->_isDefined($eqExprs[1][$i]);#
  467. $parsedFilter['strEqExprs'][$i]['operator'] = strtolower($eqExprs[2][$i]);
  468. $parsedFilter['strEqExprs'][$i]['value'] = trim($eqExprs[3][$i],"'"");
  469. $parsedFilter['strEqExprs'][$i]['value_type'] = 'Literal';
  470. $parsedFilter['strEqExprs'][$i]['value_lang'] = substr($eqExprs[4][$i], 1);
  471. $dtype = substr($eqExprs[5][$i], 2);
  472. if ($dtype) {
  473. $parsedFilter['strEqExprs'][$i]['value_dtype'] = $this->_validateUri($dtype, RDQL_AND_ERR);
  474. if ($dtype{0} != '<')
  475. $parsedFilter['strEqExprs'][$i]['value_dtype_is_qname'] = TRUE;
  476. }else
  477. $parsedFilter['strEqExprs'][$i]['value_dtype'] = '';
  478. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$i## ", $filterStr);
  479. }
  480. // parse ?var [eq | ne] ?var
  481. $ii = count($parsedFilter['strEqExprs']);
  482. $reg_ex = "/(?[a-zA-Z0-9_]+)s+(eq|ne)s+(?[a-zA-Z0-9_]+)/i";
  483. preg_match_all($reg_ex, $filterStr, $eqExprs);
  484. foreach ($eqExprs[0] as $i => $eqExpr) {
  485. $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
  486. $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
  487. $parsedFilter['strEqExprs'][$ii]['value'] = $this->_isDefined($eqExprs[3][$i]);
  488. $parsedFilter['strEqExprs'][$ii]['value_type'] = 'variable';
  489. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
  490. $ii++;
  491. }
  492. // parse ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
  493. $reg_ex = "/(?[a-zA-Z0-9_]+)s+(eq|ne)s+((<S+>)|(S+:S*))/i";
  494. preg_match_all($reg_ex, $filterStr, $eqExprs);
  495. foreach ($eqExprs[0] as $i => $eqExpr) {
  496. $parsedFilter['strEqExprs'][$ii]['var'] = $this->_isDefined($eqExprs[1][$i]);
  497. $parsedFilter['strEqExprs'][$ii]['operator'] = strtolower($eqExprs[2][$i]);
  498. if ($eqExprs[4][$i]) {
  499. $parsedFilter['strEqExprs'][$ii]['value'] = trim($eqExprs[4][$i], "<>");
  500. $parsedFilter['strEqExprs'][$ii]['value_type'] = 'URI';
  501. }else if($eqExprs[5][$i]){
  502. $this->_validateQName($eqExprs[5][$i], RDQL_AND_ERR);
  503. $parsedFilter['strEqExprs'][$ii]['value'] = $eqExprs[5][$i];
  504. $parsedFilter['strEqExprs'][$ii]['value_type'] = 'QName';
  505. }
  506. $filterStr = str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
  507. $ii++;
  508. }
  509. $parsedFilter['evalFilterStr'] = $filterStr;
  510. // all that is left are numerical expressions and place holders for the above expressions
  511. preg_match_all("/?[a-zA-Z0-9_]+/", $filterStr, $vars);
  512. foreach ($vars[0] as $var) {
  513. $parsedFilter['numExprVars'][] = $this->_isDefined($var);
  514. }
  515. return $parsedFilter;
  516. }
  517. /**
  518. * Find all query variables used in the WHERE clause.
  519. *
  520. * @return array [] = ?VARNAME
  521. * @access private
  522. */
  523. function findAllQueryVariables() {
  524. $vars = array();
  525. foreach ($this->parsedQuery['patterns'] as $pattern) {
  526. $count = 0;
  527. foreach ($pattern as $v) {
  528. if ($v['value'] && $v['value']{0} == '?') {
  529. ++$count;
  530. if (!in_array($v['value'], $vars))
  531. $vars[] = $v['value'];
  532. }
  533. }
  534. if (!$count)
  535. trigger_error(RDQL_WHR_ERR .'pattern contains no variables', E_USER_ERROR);
  536. }
  537. return $vars;
  538. }
  539. /**
  540. * Replace all namespace prefixes in the pattern and constraint clause of an rdql query
  541. * with the namespaces declared in the USING clause and default namespaces.
  542. *
  543. * @access private
  544. */
  545. function replaceNamespacePrefixes() {
  546. global $default_prefixes;
  547. if (!isset($this->parsedQuery['ns']))
  548. $this->parsedQuery['ns'] = array();
  549. // add default namespaces
  550. // if in an rdql query a reserved prefix (e.g. rdf: rdfs:) is used
  551. // it will be overridden by the default namespace defined in constants.php
  552. $this->parsedQuery['ns'] = array_merge($this->parsedQuery['ns'], $default_prefixes);
  553. // replace namespace prefixes in the FROM clause
  554. if (isset($this->parsedQuery['sources']))
  555. foreach ($this->parsedQuery['sources'] as $n => $source) {
  556. if (isset($source['is_qname']))
  557. $this->parsedQuery['sources'][$n] = $this->_replaceNamespacePrefix($source['value'], RDQL_SRC_ERR);
  558. else {
  559. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  560. $source['value'] = eregi_replace("$prefix:", $uri, $source['value']);
  561. $this->parsedQuery['sources'][$n] = $source['value'];
  562. }
  563. }
  564. // replace namespace prefixes in the where clause
  565. foreach ($this->parsedQuery['patterns'] as $n => $pattern) {
  566. foreach ($pattern as $key => $v)
  567. if ($v['value'] && $v['value']{0} != '?') {
  568. if (isset($v['is_qname'])) {
  569. $this->parsedQuery['patterns'][$n][$key]['value']
  570. = $this->_replaceNamespacePrefix($v['value'], RDQL_WHR_ERR);
  571. unset($this->parsedQuery['patterns'][$n][$key]['is_qname']);
  572. } else { // is quoted URI (== <URI>) or Literal
  573. if (isset($this->parsedQuery['patterns'][$n][$key]['is_literal'])) {
  574. if (isset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname'])) {
  575. $this->parsedQuery['patterns'][$n][$key]['l_dtype']
  576. = $this->_replaceNamespacePrefix($v['l_dtype'], RDQL_WHR_ERR);
  577. unset($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname']);
  578. }else {
  579. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  580. $this->parsedQuery['patterns'][$n][$key]['l_dtype']
  581. = eregi_replace("$prefix:", $uri, $this->parsedQuery['patterns'][$n][$key]['l_dtype']);
  582. }
  583. }else {
  584. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  585. $this->parsedQuery['patterns'][$n][$key]['value']
  586. = eregi_replace("$prefix:", $uri, $this->parsedQuery['patterns'][$n][$key]['value']);
  587. }
  588. }
  589. }
  590. }
  591. // replace prefixes in the constraint clause
  592. if (isset($this->parsedQuery['filters']))
  593. foreach ($this->parsedQuery['filters'] as $n => $filter)
  594. foreach ($filter['strEqExprs'] as $i => $expr) {
  595. if ($expr['value_type'] == 'QName') {
  596. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
  597. = $this->_replaceNamespacePrefix($expr['value'], RDQL_AND_ERR);
  598. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_type'] = 'URI';
  599. }
  600. if ($expr['value_type'] == 'URI')
  601. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  602. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
  603. = eregi_replace("$prefix:", $uri,
  604. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']);
  605. elseif ($expr['value_type'] == 'Literal') {
  606. if (isset($expr['value_dtype_is_qname'])) {
  607. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
  608. = $this->_replaceNamespacePrefix($expr['value_dtype'], RDQL_AND_ERR);
  609. unset($this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype_is_qname']);
  610. } else {
  611. foreach ($this->parsedQuery['ns'] as $prefix => $uri)
  612. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
  613. = eregi_replace("$prefix:", $uri,
  614. $this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']);
  615. }
  616. }
  617. }
  618. unset($this->parsedQuery['ns']);
  619. }
  620. // =============================================================================
  621. // *************************** helper functions ********************************
  622. // =============================================================================
  623. /**
  624. * Remove whitespace-tokens from the array $this->tokens
  625. *
  626. * @access private
  627. */
  628. function _clearWhiteSpaces() {
  629. while (current($this->tokens) == ' ' ||
  630. current($this->tokens) == "n" ||
  631. current($this->tokens) == "t" ||
  632. current($this->tokens) == "r")
  633. unset($this->tokens[key($this->tokens)]);
  634. }
  635. /**
  636. * Check if the query string of the given clause contains an undesired ', '.
  637. * If a comma was correctly placed then remove it and clear all whitespaces.
  638. *
  639. * @param string $commaExpected
  640. * @param string $clause_error
  641. * @throws PHPError
  642. * @access private
  643. */
  644. function _checkComma($commaExpected, $clause_error) {
  645. $this->_clearWhiteSpaces();
  646. if (current($this->tokens) == ', ') {
  647. if (!$commaExpected)
  648. trigger_error($clause_error ."', ' - unexpected comma", E_USER_ERROR);
  649. else {
  650. unset($this->tokens[key($this->tokens)]);
  651. $this->_checkComma(FALSE, $clause_error);
  652. }
  653. }
  654. }
  655. /**
  656. * Check if the given token is either a variable (?var) or the first token of an URI (<URI>).
  657. * In case of an URI this function returns the whole URI string.
  658. *
  659. * @param string $token
  660. * @return array ['value'] = string
  661. * @throws PHPError
  662. * @access private
  663. */
  664. function _validateVarUri($token) {
  665. if ($token{0} == '?') {
  666. $token_res['value'] = $this->_validateVar($token, RDQL_WHR_ERR);
  667. } else {
  668. $token_res['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
  669. if ($token{0} != '<')
  670. $token_res['is_qname'] = TRUE;
  671. }
  672. return $token_res;
  673. }
  674. /**
  675. * Check if the given token is either a variable (?var) or the first token
  676. * of either an URI (<URI>) or a literal ("Literal").
  677. * In case of a literal return an array with literal properties (value, language, datatype).
  678. * In case of a variable or an URI return only ['value'] = string.
  679. *
  680. * @param string $token
  681. * @return array ['value'] = string
  682. * ['is_qname'] = boolean
  683. * ['is_literal'] = boolean
  684. * ['l_lang'] = string
  685. * ['l_dtype'] = string
  686. * @throws PHPError
  687. * @access private
  688. */
  689. function _validateVarUriLiteral($token) {
  690. if ($token{0} == '?')
  691. $statement_object['value'] = $this->_validateVar($token, RDQL_WHR_ERR);
  692. elseif ($token{0} == "'" || $token{0} == '"')
  693. $statement_object = $this->_validateLiteral($token);
  694. elseif ($token{0} == '<')
  695. $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
  696. elseif (ereg(':', $token)) {
  697. $statement_object['value'] = $this->_validateUri($token, RDQL_WHR_ERR);
  698. $statement_object['is_qname'] = TRUE;
  699. }else
  700. trigger_error(RDQL_WHR_ERR ." '$token' - ?Variable, &lt;URI&gt;, QName, or "" expected", E_USER_ERROR);
  701. return $statement_object;
  702. }
  703. /**
  704. * Check if the given token is a valid variable name (?var).
  705. *
  706. * @param string $token
  707. * @param string $clause_error
  708. * @return string
  709. * @throws PHPError
  710. * @access private
  711. */
  712. function _validateVar($token, $clause_error) {
  713. preg_match("/?[a-zA-Z0-9_]+/", $token, $match);
  714. if (!isset($match[0]) || $match[0] != $token)
  715. trigger_error($clause_error ."'" .htmlspecialchars($token)
  716. ."' - variable name contains illegal characters", E_USER_ERROR);
  717. unset($this->tokens[key($this->tokens)]);
  718. return $token;
  719. }
  720. /**
  721. * Check if $token is the first token of a valid URI (<URI>) and return the whole URI string
  722. *
  723. * @param string $token
  724. * @param string $clause_error
  725. * @return string
  726. * @throws PHPError
  727. * @access private
  728. */
  729. function _validateUri($token, $clause_error) {
  730. if ($token{0} != '<') {
  731. if (strpos($token, ':') && $this->_validateQName($token, $clause_error)) {
  732. unset($this->tokens[key($this->tokens)]);
  733. return rtrim($token, ':');
  734. }
  735. $errmsg = $clause_error .''' .htmlspecialchars($token) .'' ';
  736. if ($clause_error == RDQL_WHR_ERR)
  737. $errmsg .= "- ?Variable or &lt;URI&gt; or QName expected";
  738. else
  739. $errmsg .= "- &lt;URI&gt; or QName expected";
  740. trigger_error($errmsg, E_USER_ERROR);
  741. }else{
  742. $token_res = $token;
  743. while($token{strlen($token)-1} != '>' && $token != NULL) {
  744. if ($token == '(' || $token == ')' || $token == ', ' ||
  745. $token == ' ' || $token == "n" || $token == "r") {
  746. trigger_error($clause_error .''' .htmlspecialchars($token_res)
  747. ."' - illegal input: '$token' - '>' missing", E_USER_ERROR);
  748. }
  749. unset($this->tokens[key($this->tokens)]);
  750. $token = current($this->tokens);
  751. $token_res .= $token;
  752. }
  753. if ($token == NULL)
  754. trigger_error($clause_error .''' .htmlspecialchars($token_res) ."' - '>' missing", E_USER_ERROR);
  755. unset($this->tokens[key($this->tokens)]);
  756. return trim($token_res, '<>');
  757. }
  758. }
  759. /**
  760. * Check if $token is the first token of a valid literal ("LITERAL") and
  761. * return an array with literal properties (value, language, datatype).
  762. *
  763. * @param string $token
  764. * @return array ['value'] = string
  765. * ['is_literal'] = boolean
  766. * ['l_lang'] = string
  767. * ['l_dtype'] = string
  768. * ['l_dtype_is_qname'] = boolean
  769. * @throws PHPError
  770. * @access private
  771. */
  772. function _validateLiteral($token) {
  773. $quotation_mark = $token{0};
  774. $statement_object = array ('value' => '',
  775. 'is_literal' => TRUE,
  776. 'l_lang' => '',
  777. 'l_dtype' => '');
  778. $this->tokens[key($this->tokens)] = substr($token,1);
  779. $return = FALSE;
  780. foreach ($this->tokens as $k => $token) {
  781. if ($token != NULL && $token{strlen($token)-1} == $quotation_mark) {
  782. $token = rtrim($token, $quotation_mark);
  783. $return = TRUE;
  784. // parse @language(^^datatype)?
  785. }elseif (strpos($token, $quotation_mark .'@') || substr($token, 0, 2) == $quotation_mark .'@') {
  786. $lang = substr($token, strpos($token, $quotation_mark .'@')+2);
  787. if (strpos($lang, '^^') || substr($lang, 0,2) == '^^') {
  788. $dtype = substr($lang, strpos($lang, '^^')+2);
  789. if (!$dtype)
  790. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
  791. .$token ." - datatype expected" ,E_USER_ERROR);
  792. $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
  793. if ($dtype{0} != '<')
  794. $statement_object['l_dtype_is_qname'] = TRUE;
  795. $lang = substr($lang, 0, strpos($lang, '^^'));
  796. }
  797. if (!$lang)
  798. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
  799. .$token ." - language expected" ,E_USER_ERROR);
  800. $statement_object['l_lang'] = $lang;
  801. $token = substr($token, 0, strpos($token, $quotation_mark .'@'));
  802. $return = TRUE;
  803. // parse ^^datatype
  804. }elseif (strpos($token, $quotation_mark .'^^') || substr($token, 0, 3) == $quotation_mark .'^^') {
  805. $dtype = substr($token, strpos($token, $quotation_mark .'^^')+3);
  806. if (!$dtype)
  807. trigger_error(RDQL_WHR_ERR .$quotation_mark .$statement_object['value']
  808. .$token ." - datatype expected" ,E_USER_ERROR);
  809. $statement_object['l_dtype'] = $this->_validateUri($dtype, RDQL_WHR_ERR);
  810. if ($dtype{0} != '<')
  811. $statement_object['l_dtype_is_qname'] = TRUE;
  812. $token = substr($token, 0, strpos($token, $quotation_mark .'^^'));
  813. $return = TRUE;
  814. }elseif (strpos($token, $quotation_mark))
  815. trigger_error(RDQL_WHR_ERR ."'$token' - illegal input", E_USER_ERROR);
  816. $statement_object['value'] .= $token;
  817. unset($this->tokens[$k]);
  818. if ($return)
  819. return $statement_object;
  820. }
  821. trigger_error(RDQL_WHR_ERR ."quotation end mark: $quotation_mark missing", E_USER_ERROR);
  822. }
  823. /**
  824. * Check if the given token is a valid QName.
  825. *
  826. * @param string $token
  827. * @param string $clause_error
  828. * @return boolean
  829. * @throws PHPError
  830. * @access private
  831. */
  832. function _validateQName($token, $clause_error) {
  833. $parts = explode(':', $token);
  834. if (count($parts) > 2)
  835. trigger_error($clause_error ."illegal QName: '$token'", E_USER_ERROR);
  836. if (!$this->_validateNCName($parts[0]))
  837. trigger_error($clause_error ."illegal prefix in QName: '$token'", E_USER_ERROR);
  838. if ($parts[1] && !$this->_validateNCName($parts[1]))
  839. trigger_error($clause_error ."illegal local part in QName: '$token'", E_USER_ERROR);
  840. return TRUE;
  841. }
  842. /**
  843. * Check if the given token is a valid NCName.
  844. *
  845. * @param string $token
  846. * @return boolean
  847. * @access private
  848. */
  849. function _validateNCName($token) {
  850. preg_match("/[a-zA-Z_]+[a-zA-Z_0-9.-]*/", $token, $match);
  851. if (isset($match[0]) && $match[0] == $token)
  852. return TRUE;
  853. return FALSE;
  854. }
  855. /**
  856. * Check if the given token is a valid namespace prefix.
  857. *
  858. * @param string $token
  859. * @return string
  860. * @throws PHPError
  861. * @access private
  862. */
  863. function _validatePrefix($token) {
  864. if (!$this->_validateNCName($token))
  865. trigger_error(RDQL_USG_ERR ."'" .htmlspecialchars($token)
  866. ."' - illegal input, namespace prefix expected", E_USER_ERROR);
  867. unset($this->tokens[key($this->tokens)]);
  868. return $token;
  869. }
  870. /**
  871. * Replace a prefix in a given QName and return a full URI.
  872. *
  873. * @param string $qName
  874. * @param string $clasue_error
  875. * @return string
  876. * @throws PHPError
  877. * @access private
  878. */
  879. function _replaceNamespacePrefix($qName, $clause_error) {
  880. $qName_parts = explode(':', $qName);
  881. if (!array_key_exists($qName_parts[0], $this->parsedQuery['ns']))
  882. trigger_error($clause_error .'undefined prefix: '' .$qName_parts[0] .'' in: '' .$qName .''', E_USER_ERROR);
  883. return $this->parsedQuery['ns'][$qName_parts[0]] .$qName_parts[1];
  884. }
  885. /**
  886. * Check if all variables from the SELECT clause are defined in the WHERE clause
  887. *
  888. * @access private
  889. */
  890. function _checkSelectVars() {
  891. foreach ($this->parsedQuery['selectVars'] as $var)
  892. $this->_isDefined($var);
  893. }
  894. /**
  895. * Check if the given variable is defined in the WHERE clause.
  896. *
  897. * @param $var string
  898. * @return string
  899. * @throws PHPError
  900. * @access private
  901. */
  902. function _isDefined($var) {
  903. $allQueryVars = $this->findAllQueryVariables();
  904. if (!in_array($var, $allQueryVars))
  905. trigger_error(RDQL_SYN_ERR .": '$var' - variable must be defined in the WHERE clause", E_USER_ERROR);
  906. return $var;
  907. }
  908. /**
  909. * Throw an error if the regular expression from the AND clause is not quoted.
  910. *
  911. * @param string $filterString
  912. * @param string $lQuotMark
  913. * @param string $rQuotMark
  914. * @throws PHPError
  915. * @access private
  916. */
  917. function _checkRegExQuotation($filterString, $lQuotMark, $rQuotMark) {
  918. if (!$lQuotMark)
  919. trigger_error(RDQL_AND_ERR ."'$filterString' - regular expressions must be quoted", E_USER_ERROR);
  920. if ($lQuotMark != $rQuotMark)
  921. trigger_error(RDQL_AND_ERR ."'$filterString' - quotation end mark in the regular expression missing", E_USER_ERROR);
  922. }
  923. } // end: Class RdqlParser
  924. ?>