PageRenderTime 51ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/EasyRdf/Parser/Ntriples.php

https://github.com/andywer/easyrdf
PHP | 198 lines | 148 code | 2 blank | 48 comment | 1 complexity | ef06a4ba04911303bad3f7527f077c5b MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /**
  3. * EasyRdf
  4. *
  5. * LICENSE
  6. *
  7. * Copyright (c) 2009-2013 Nicholas J Humfrey. All rights reserved.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright notice,
  14. * this list of conditions and the following disclaimer in the documentation
  15. * and/or other materials provided with the distribution.
  16. * 3. The name of the author 'Nicholas J Humfrey" may be used to endorse or
  17. * promote products derived from this software without specific prior
  18. * written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  21. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  24. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. * POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. * @package EasyRdf
  33. * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
  34. * @license http://www.opensource.org/licenses/bsd-license.php
  35. */
  36. /**
  37. * A pure-php class to parse N-Triples with no dependancies.
  38. *
  39. * @package EasyRdf
  40. * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
  41. * @license http://www.opensource.org/licenses/bsd-license.php
  42. */
  43. class EasyRdf_Parser_Ntriples extends EasyRdf_Parser
  44. {
  45. /**
  46. * Decodes an encoded N-Triples string. Any \-escape sequences are substituted
  47. * with their decoded value.
  48. *
  49. * @param string $str An encoded N-Triples string.
  50. * @return The unencoded string.
  51. **/
  52. protected function unescapeString($str)
  53. {
  54. if (strpos($str, '\\') === false) {
  55. return $str;
  56. }
  57. $mappings = array(
  58. 't' => chr(0x09),
  59. 'b' => chr(0x08),
  60. 'n' => chr(0x0A),
  61. 'r' => chr(0x0D),
  62. 'f' => chr(0x0C),
  63. '\"' => chr(0x22),
  64. '\'' => chr(0x27)
  65. );
  66. foreach ($mappings as $in => $out) {
  67. $str = preg_replace('/\x5c([' . $in . '])/', $out, $str);
  68. }
  69. if (stripos($str, '\u') === false) {
  70. return $str;
  71. }
  72. while (preg_match('/\\\(U)([0-9A-F]{8})/', $str, $matches) ||
  73. preg_match('/\\\(u)([0-9A-F]{4})/', $str, $matches)) {
  74. $no = hexdec($matches[2]);
  75. if ($no < 128) {
  76. $char = chr($no);
  77. } elseif ($no < 2048) {
  78. $char = chr(($no >> 6) + 192) .
  79. chr(($no & 63) + 128);
  80. } elseif ($no < 65536) {
  81. $char = chr(($no >> 12) + 224) .
  82. chr((($no >> 6) & 63) + 128) .
  83. chr(($no & 63) + 128);
  84. } elseif ($no < 2097152) {
  85. $char = chr(($no >> 18) + 240) .
  86. chr((($no >> 12) & 63) + 128) .
  87. chr((($no >> 6) & 63) + 128) .
  88. chr(($no & 63) + 128);
  89. } else {
  90. $char = '';
  91. }
  92. $str = str_replace('\\' . $matches[1] . $matches[2], $char, $str);
  93. }
  94. return $str;
  95. }
  96. /**
  97. * @ignore
  98. */
  99. protected function parseNtriplesSubject($sub)
  100. {
  101. if (preg_match('/<([^<>]+)>/', $sub, $matches)) {
  102. return $this->unescapeString($matches[1]);
  103. } elseif (preg_match('/_:([A-Za-z0-9]*)/', $sub, $matches)) {
  104. if (empty($matches[1])) {
  105. return $this->graph->newBNodeId();
  106. } else {
  107. $nodeid = $this->unescapeString($matches[1]);
  108. return $this->remapBnode($nodeid);
  109. }
  110. } else {
  111. throw new EasyRdf_Exception(
  112. "Failed to parse subject: $sub"
  113. );
  114. }
  115. }
  116. /**
  117. * @ignore
  118. */
  119. protected function parseNtriplesObject($obj)
  120. {
  121. if (preg_match('/"(.+)"\^\^<([^<>]+)>/', $obj, $matches)) {
  122. return array(
  123. 'type' => 'literal',
  124. 'value' => $this->unescapeString($matches[1]),
  125. 'datatype' => $this->unescapeString($matches[2])
  126. );
  127. } elseif (preg_match('/"(.+)"@([\w\-]+)/', $obj, $matches)) {
  128. return array(
  129. 'type' => 'literal',
  130. 'value' => $this->unescapeString($matches[1]),
  131. 'lang' => $this->unescapeString($matches[2])
  132. );
  133. } elseif (preg_match('/"(.*)"/', $obj, $matches)) {
  134. return array('type' => 'literal', 'value' => $this->unescapeString($matches[1]));
  135. } elseif (preg_match('/<([^<>]+)>/', $obj, $matches)) {
  136. return array('type' => 'uri', 'value' => $matches[1]);
  137. } elseif (preg_match('/_:([A-Za-z0-9]*)/', $obj, $matches)) {
  138. if (empty($matches[1])) {
  139. return array(
  140. 'type' => 'bnode',
  141. 'value' => $this->graph->newBNodeId()
  142. );
  143. } else {
  144. $nodeid = $this->unescapeString($matches[1]);
  145. return array(
  146. 'type' => 'bnode',
  147. 'value' => $this->remapBnode($nodeid)
  148. );
  149. }
  150. } else {
  151. throw new EasyRdf_Exception(
  152. "Failed to parse object: $obj"
  153. );
  154. }
  155. }
  156. /**
  157. * Parse an N-Triples document into an EasyRdf_Graph
  158. *
  159. * @param object EasyRdf_Graph $graph the graph to load the data into
  160. * @param string $data the RDF document data
  161. * @param string $format the format of the input data
  162. * @param string $baseUri the base URI of the data being parsed
  163. * @return integer The number of triples added to the graph
  164. */
  165. public function parse($graph, $data, $format, $baseUri)
  166. {
  167. parent::checkParseParams($graph, $data, $format, $baseUri);
  168. if ($format != 'ntriples') {
  169. throw new EasyRdf_Exception(
  170. "EasyRdf_Parser_Ntriples does not support: $format"
  171. );
  172. }
  173. $lines = preg_split("/[\r\n]+/", strval($data));
  174. foreach ($lines as $line) {
  175. if (preg_match("/^\s*#/", $line)) {
  176. continue;
  177. } elseif (preg_match("/(.+)\s+<([^<>]+)>\s+(.+)\s*\./", $line, $matches)) {
  178. $this->addTriple(
  179. $this->parseNtriplesSubject($matches[1]),
  180. $this->unescapeString($matches[2]),
  181. $this->parseNtriplesObject($matches[3])
  182. );
  183. }
  184. }
  185. return $this->tripleCount;
  186. }
  187. }