PageRenderTime 126ms CodeModel.GetById 102ms app.highlight 5ms RepoModel.GetById 17ms app.codeStats 0ms

/lib/EasyRdf/Parser/Ntriples.php

https://github.com/andywer/easyrdf
PHP | 198 lines | 148 code | 2 blank | 48 comment | 1 complexity | ef06a4ba04911303bad3f7527f077c5b MD5 | raw file
  1<?php
  2
  3/**
  4 * EasyRdf
  5 *
  6 * LICENSE
  7 *
  8 * Copyright (c) 2009-2013 Nicholas J Humfrey.  All rights reserved.
  9 *
 10 * Redistribution and use in source and binary forms, with or without
 11 * modification, are permitted provided that the following conditions are met:
 12 * 1. Redistributions of source code must retain the above copyright
 13 *    notice, this list of conditions and the following disclaimer.
 14 * 2. Redistributions in binary form must reproduce the above copyright notice,
 15 *    this list of conditions and the following disclaimer in the documentation
 16 *    and/or other materials provided with the distribution.
 17 * 3. The name of the author 'Nicholas J Humfrey" may be used to endorse or
 18 *    promote products derived from this software without specific prior
 19 *    written permission.
 20 *
 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 31 * POSSIBILITY OF SUCH DAMAGE.
 32 *
 33 * @package    EasyRdf
 34 * @copyright  Copyright (c) 2009-2013 Nicholas J Humfrey
 35 * @license    http://www.opensource.org/licenses/bsd-license.php
 36 */
 37
 38/**
 39 * A pure-php class to parse N-Triples with no dependancies.
 40 *
 41 * @package    EasyRdf
 42 * @copyright  Copyright (c) 2009-2013 Nicholas J Humfrey
 43 * @license    http://www.opensource.org/licenses/bsd-license.php
 44 */
 45class EasyRdf_Parser_Ntriples extends EasyRdf_Parser
 46{
 47    /**
 48     * Decodes an encoded N-Triples string. Any \-escape sequences are substituted
 49     * with their decoded value.
 50     *
 51     * @param  string $str An encoded N-Triples string.
 52     * @return The unencoded string.
 53     **/
 54    protected function unescapeString($str)
 55    {
 56        if (strpos($str, '\\') === false) {
 57            return $str;
 58        }
 59
 60        $mappings = array(
 61            't' => chr(0x09),
 62            'b' => chr(0x08),
 63            'n' => chr(0x0A),
 64            'r' => chr(0x0D),
 65            'f' => chr(0x0C),
 66            '\"' => chr(0x22),
 67            '\'' => chr(0x27)
 68        );
 69        foreach ($mappings as $in => $out) {
 70            $str = preg_replace('/\x5c([' . $in . '])/', $out, $str);
 71        }
 72
 73        if (stripos($str, '\u') === false) {
 74            return $str;
 75        }
 76
 77        while (preg_match('/\\\(U)([0-9A-F]{8})/', $str, $matches) ||
 78               preg_match('/\\\(u)([0-9A-F]{4})/', $str, $matches)) {
 79            $no = hexdec($matches[2]);
 80            if ($no < 128) {
 81                $char = chr($no);
 82            } elseif ($no < 2048) {
 83                $char = chr(($no >> 6) + 192) .
 84                        chr(($no & 63) + 128);
 85            } elseif ($no < 65536) {
 86                $char = chr(($no >> 12) + 224) .
 87                        chr((($no >> 6) & 63) + 128) .
 88                        chr(($no & 63) + 128);
 89            } elseif ($no < 2097152) {
 90                $char = chr(($no >> 18) + 240) .
 91                        chr((($no >> 12) & 63) + 128) .
 92                        chr((($no >> 6) & 63) + 128) .
 93                        chr(($no & 63) + 128);
 94            } else {
 95                $char = '';
 96            }
 97            $str = str_replace('\\' . $matches[1] . $matches[2], $char, $str);
 98        }
 99        return $str;
100    }
101
102    /**
103     * @ignore
104     */
105    protected function parseNtriplesSubject($sub)
106    {
107        if (preg_match('/<([^<>]+)>/', $sub, $matches)) {
108            return $this->unescapeString($matches[1]);
109        } elseif (preg_match('/_:([A-Za-z0-9]*)/', $sub, $matches)) {
110            if (empty($matches[1])) {
111                return $this->graph->newBNodeId();
112            } else {
113                $nodeid = $this->unescapeString($matches[1]);
114                return $this->remapBnode($nodeid);
115            }
116        } else {
117            throw new EasyRdf_Exception(
118                "Failed to parse subject: $sub"
119            );
120        }
121    }
122
123    /**
124     * @ignore
125     */
126    protected function parseNtriplesObject($obj)
127    {
128        if (preg_match('/"(.+)"\^\^<([^<>]+)>/', $obj, $matches)) {
129            return array(
130                'type' => 'literal',
131                'value' => $this->unescapeString($matches[1]),
132                'datatype' => $this->unescapeString($matches[2])
133            );
134        } elseif (preg_match('/"(.+)"@([\w\-]+)/', $obj, $matches)) {
135            return array(
136                'type' => 'literal',
137                'value' => $this->unescapeString($matches[1]),
138                'lang' => $this->unescapeString($matches[2])
139            );
140        } elseif (preg_match('/"(.*)"/', $obj, $matches)) {
141            return array('type' => 'literal', 'value' => $this->unescapeString($matches[1]));
142        } elseif (preg_match('/<([^<>]+)>/', $obj, $matches)) {
143            return array('type' => 'uri', 'value' => $matches[1]);
144        } elseif (preg_match('/_:([A-Za-z0-9]*)/', $obj, $matches)) {
145            if (empty($matches[1])) {
146                return array(
147                    'type' => 'bnode',
148                    'value' => $this->graph->newBNodeId()
149                );
150            } else {
151                $nodeid = $this->unescapeString($matches[1]);
152                return array(
153                    'type' => 'bnode',
154                    'value' => $this->remapBnode($nodeid)
155                );
156            }
157        } else {
158            throw new EasyRdf_Exception(
159                "Failed to parse object: $obj"
160            );
161        }
162    }
163
164    /**
165      * Parse an N-Triples document into an EasyRdf_Graph
166      *
167      * @param object EasyRdf_Graph $graph   the graph to load the data into
168      * @param string               $data    the RDF document data
169      * @param string               $format  the format of the input data
170      * @param string               $baseUri the base URI of the data being parsed
171      * @return integer             The number of triples added to the graph
172      */
173    public function parse($graph, $data, $format, $baseUri)
174    {
175        parent::checkParseParams($graph, $data, $format, $baseUri);
176
177        if ($format != 'ntriples') {
178            throw new EasyRdf_Exception(
179                "EasyRdf_Parser_Ntriples does not support: $format"
180            );
181        }
182
183        $lines = preg_split("/[\r\n]+/", strval($data));
184        foreach ($lines as $line) {
185            if (preg_match("/^\s*#/", $line)) {
186                continue;
187            } elseif (preg_match("/(.+)\s+<([^<>]+)>\s+(.+)\s*\./", $line, $matches)) {
188                $this->addTriple(
189                    $this->parseNtriplesSubject($matches[1]),
190                    $this->unescapeString($matches[2]),
191                    $this->parseNtriplesObject($matches[3])
192                );
193            }
194        }
195
196        return $this->tripleCount;
197    }
198}