PageRenderTime 56ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/php/core/vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Ntriples.php

https://github.com/smerrill/drupal-quickstart
PHP | 211 lines | 161 code | 2 blank | 48 comment | 1 complexity | a0fae9aa19f9d91081c6dd79bf50dad7 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-2.0, LGPL-2.1
  1. <?php
  2. /**
  3. * EasyRdf
  4. *
  5. * LICENSE
  6. *
  7. * Copyright (c) 2009-2013 Nicholas J Humfrey. All rights reserved.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright notice,
  14. * this list of conditions and the following disclaimer in the documentation
  15. * and/or other materials provided with the distribution.
  16. * 3. The name of the author 'Nicholas J Humfrey" may be used to endorse or
  17. * promote products derived from this software without specific prior
  18. * written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  24. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. * POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. * @package EasyRdf
  33. * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
  34. * @license http://www.opensource.org/licenses/bsd-license.php
  35. */
  36. /**
  37. * A pure-php class to parse N-Triples with no dependancies.
  38. *
  39. * @package EasyRdf
  40. * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
  41. * @license http://www.opensource.org/licenses/bsd-license.php
  42. */
  43. class EasyRdf_Parser_Ntriples extends EasyRdf_Parser
  44. {
  45. /**
  46. * Decodes an encoded N-Triples string. Any \-escape sequences are substituted
  47. * with their decoded value.
  48. *
  49. * @param string $str An encoded N-Triples string.
  50. * @return The unencoded string.
  51. **/
  52. protected function unescapeString($str)
  53. {
  54. if (strpos($str, '\\') === false) {
  55. return $str;
  56. }
  57. $mappings = array(
  58. 't' => chr(0x09),
  59. 'b' => chr(0x08),
  60. 'n' => chr(0x0A),
  61. 'r' => chr(0x0D),
  62. 'f' => chr(0x0C),
  63. '\"' => chr(0x22),
  64. '\'' => chr(0x27)
  65. );
  66. foreach ($mappings as $in => $out) {
  67. $str = preg_replace('/\x5c([' . $in . '])/', $out, $str);
  68. }
  69. if (stripos($str, '\u') === false) {
  70. return $str;
  71. }
  72. while (preg_match('/\\\(U)([0-9A-F]{8})/', $str, $matches) ||
  73. preg_match('/\\\(u)([0-9A-F]{4})/', $str, $matches)) {
  74. $no = hexdec($matches[2]);
  75. if ($no < 128) { // 0x80
  76. $char = chr($no);
  77. } elseif ($no < 2048) { // 0x800
  78. $char = chr(($no >> 6) + 192) .
  79. chr(($no & 63) + 128);
  80. } elseif ($no < 65536) { // 0x10000
  81. $char = chr(($no >> 12) + 224) .
  82. chr((($no >> 6) & 63) + 128) .
  83. chr(($no & 63) + 128);
  84. } elseif ($no < 2097152) { // 0x200000
  85. $char = chr(($no >> 18) + 240) .
  86. chr((($no >> 12) & 63) + 128) .
  87. chr((($no >> 6) & 63) + 128) .
  88. chr(($no & 63) + 128);
  89. } else {
  90. # FIXME: throw an exception instead?
  91. $char = '';
  92. }
  93. $str = str_replace('\\' . $matches[1] . $matches[2], $char, $str);
  94. }
  95. return $str;
  96. }
  97. /**
  98. * @ignore
  99. */
  100. protected function parseNtriplesSubject($sub, $lineNum)
  101. {
  102. if (preg_match('/<([^<>]+)>/', $sub, $matches)) {
  103. return $this->unescapeString($matches[1]);
  104. } elseif (preg_match('/_:([A-Za-z0-9]*)/', $sub, $matches)) {
  105. if (empty($matches[1])) {
  106. return $this->graph->newBNodeId();
  107. } else {
  108. $nodeid = $this->unescapeString($matches[1]);
  109. return $this->remapBnode($nodeid);
  110. }
  111. } else {
  112. throw new EasyRdf_Parser_Exception(
  113. "Failed to parse subject: $sub",
  114. $lineNum
  115. );
  116. }
  117. }
  118. /**
  119. * @ignore
  120. */
  121. protected function parseNtriplesObject($obj, $lineNum)
  122. {
  123. if (preg_match('/"(.+)"\^\^<([^<>]+)>/', $obj, $matches)) {
  124. return array(
  125. 'type' => 'literal',
  126. 'value' => $this->unescapeString($matches[1]),
  127. 'datatype' => $this->unescapeString($matches[2])
  128. );
  129. } elseif (preg_match('/"(.+)"@([\w\-]+)/', $obj, $matches)) {
  130. return array(
  131. 'type' => 'literal',
  132. 'value' => $this->unescapeString($matches[1]),
  133. 'lang' => $this->unescapeString($matches[2])
  134. );
  135. } elseif (preg_match('/"(.*)"/', $obj, $matches)) {
  136. return array('type' => 'literal', 'value' => $this->unescapeString($matches[1]));
  137. } elseif (preg_match('/<([^<>]+)>/', $obj, $matches)) {
  138. return array('type' => 'uri', 'value' => $matches[1]);
  139. } elseif (preg_match('/_:([A-Za-z0-9]*)/', $obj, $matches)) {
  140. if (empty($matches[1])) {
  141. return array(
  142. 'type' => 'bnode',
  143. 'value' => $this->graph->newBNodeId()
  144. );
  145. } else {
  146. $nodeid = $this->unescapeString($matches[1]);
  147. return array(
  148. 'type' => 'bnode',
  149. 'value' => $this->remapBnode($nodeid)
  150. );
  151. }
  152. } else {
  153. throw new EasyRdf_Parser_Exception(
  154. "Failed to parse object: $obj",
  155. $lineNum
  156. );
  157. }
  158. }
  159. /**
  160. * Parse an N-Triples document into an EasyRdf_Graph
  161. *
  162. * @param object EasyRdf_Graph $graph the graph to load the data into
  163. * @param string $data the RDF document data
  164. * @param string $format the format of the input data
  165. * @param string $baseUri the base URI of the data being parsed
  166. * @return integer The number of triples added to the graph
  167. */
  168. public function parse($graph, $data, $format, $baseUri)
  169. {
  170. parent::checkParseParams($graph, $data, $format, $baseUri);
  171. if ($format != 'ntriples') {
  172. throw new EasyRdf_Exception(
  173. "EasyRdf_Parser_Ntriples does not support: $format"
  174. );
  175. }
  176. $lines = preg_split("/\x0D?\x0A/", strval($data));
  177. foreach ($lines as $index => $line) {
  178. $lineNum = $index + 1;
  179. if (preg_match("/^\s*#/", $line)) {
  180. # Comment
  181. continue;
  182. } elseif (preg_match("/^\s*(.+?)\s+<([^<>]+?)>\s+(.+?)\s*\.\s*$/", $line, $matches)) {
  183. $this->addTriple(
  184. $this->parseNtriplesSubject($matches[1], $lineNum),
  185. $this->unescapeString($matches[2]),
  186. $this->parseNtriplesObject($matches[3], $lineNum)
  187. );
  188. } elseif (preg_match("/^\s*$/", $line)) {
  189. # Blank line
  190. continue;
  191. } else {
  192. throw new EasyRdf_Parser_Exception(
  193. "Failed to parse statement",
  194. $lineNum
  195. );
  196. }
  197. }
  198. return $this->tripleCount;
  199. }
  200. }