/src/php/document/get-document.php
PHP | 199 lines | 167 code | 11 blank | 21 comment | 22 complexity | 7f113620f5912de1211cbdf9980c8169 MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0, BSD-3-Clause
- <?php
- /* Copyright 2012 Aditi Muralidharan. See the file "LICENSE" for the full license governing this code. */
- /****************************************************************************
- get-document.php
-
- Functions for fetching a single document's sub-structure, along with
- the metadata associated with all sub-structures.
- ****************************************************************************/
- include_once '../util.php';
- include_once '../subsets/read.php';
- $wordseer_instance = getGetParam('instance');
- $path = '../../../instances/'.$wordseer_instance.'/config.php';
- include_once $path;
- //Query parameters
- $gov = getGetParam('gov');
- $govtype = getGetParam('govtype');
- $dep = getGetParam('dep');
- $deptype = getGetParam('deptype');
- $relation = getGetParam('relation');
- $collection = getGetParam('collection');
- $statistics = getGetParam('statistics');
- $phrasess = decodeGetJson('phrases');
- $metadata = decodeGetJson('metadata');
- $timing = getGetParam('timing');
- $documentID = getGetParam('id');
- $include_text = getGetParam('include_text') == 'true';
- if ($documentID) {
- $documentUnit = getDocument($documentID, $include_text);
- echo json_encode($documentUnit);
- }
- /** Retrieves information about the document with the given ID. If
- $include_text is true, also includes the text of the document, otherwise only
- retrieves the metadata. The fields returned are those expected by
- {@link WordSeer.model.DocumentModel}.
- */
- function getDocument($document_id, $include_text) {
- $units = array();
- $chilren = array();
- $metadata = array();
- if (!$include_text) {
- // Assemble the metadata for each unit and send it back.
- $sql = "SELECT * from metadata, metadata_structure
- WHERE metadata.document_id = $document_id
- AND metadata.property_id = metadata_structure.property_id
- AND metadata.unit_id = $document_id
- AND metadata.unit_name = 'document';";
- $result = mysql_query($sql) or die (mysql_error(). " Getting metadata
- <br> on query
- <br> $sql
- <at> documentviewer/get-document.php l. 75.");
- while ($row = mysql_fetch_assoc($result)) {
- array_push($metadata, $row);
- }
- } else {
- // Assemble the full text for each unit that matches the given filters
- // and send it back.
- //Query parameters
- $collection = getGetParam('collection');
- $phrasess = decodeGetJson('phrases');
- $metadata = decodeGetJson('metadata');
- include_once "../document/get-metadata.php";
- $sql = "SELECT * from document_structure
- WHERE document_id = $document_id
- ORDER BY unit_number ASC;";
- $sentence_ids_for_filters = getSentenceIDsForFilters($metadata,
- $collection, $phrases);
- if ($sentence_ids_for_filters != "all") {
- $unit_ids = getUnitsFromSentenceIDs($sentence_ids_for_filters);
- $unit_id_string = join(", ", $unit_ids);
- $sentence_id_string = join(", ", $sentence_ids_for_filters);
- $sql = " SELECT *
- FROM document_structure
- WHERE unit_id in ($unit_id_string)
- OR unit_id in ($sentence_id_string)
- ORDER BY unit_id, unit_number ASC;";
- }
- $result = mysql_query($sql) or die (mysql_error(). " Getting document units
- <br> on query
- <br> $sql
- <at> documentviewer/get-document.php l. 75.");
- $units = array("document"=>array(
- $document_id=>array(
- "metadata"=>array(),
- "unit_id"=>$document_id,
- "unit_name"=>"document",
- )));
- $children = array();
- $parent_ids = array();
- $unit_ids = array();
- $word_set_memberships = getWordSetMemberships();
- while ($row = mysql_fetch_assoc($result)) {
- $unit_name = $row["unit_name"];
- $unit_id = $row["unit_id"];
- array_push($unit_ids, $unit_id);
- $unit_number = $row["unit_number"];
- $parent_id = $row["parent_id"];
- $parent_name = $row["parent_name"];
- if (!array_key_exists($unit_name, $units)) {
- $units[$unit_name] = array();
- $parent_ids[$unit_name] = array();
- }
- if (!array_key_exists($unit_id, $units[$unit_name])){
- $units[$unit_name][$unit_id] = $row;
- $units[$unit_name][$unit_id]["metadata"] = array();
- $parent_ids[$unit_name][$unit_id] = array($parent_id, $parent_name);
- if ($unit_name == "sentence") {
- $units[$unit_name][$unit_id]["words"] = array();
- $units[$unit_name][$unit_id]["sentence_id"] = $unit_id;
- } else {
- if (!array_key_exists($unit_name, $children)) {
- $children[$unit_name] = array();
- }
- if (!array_key_exists($unit_id, $children[$unit_name])) {
- $children[$unit_name][$unit_id] = array();
- }
- }
- }
- if (!array_key_exists($parent_name, $children)) {
- $children[$parent_name] = array();
- }
- if (!array_key_exists($parent_id, $children[$parent_name])) {
- $children[$parent_name][$parent_id] = array();
- }
- array_push($children[$parent_name][$parent_id],
- array("id"=>$unit_id, "name"=>$unit_name));
- }
- // Assemble the metadata for each unit under each unit type and id.
- $unit_id_string = join(", ", $unit_ids);
- $sql = "SELECT *, metadata.unit_name as unit_name from metadata, metadata_structure
- WHERE metadata.document_id = $document_id
- AND metadata.property_id = metadata_structure.property_id
- AND unit_id in ($unit_id_string);";
- if ($timing != 0) {
- echo "<br> $sql <br>";
- }
- $result = mysql_query($sql) or die (mysql_error(). " Getting metadata
- <br> on query
- <br> $sql
- <at> documentviewer/get-document.php l. 75.");
- while ($row = mysql_fetch_assoc($result)) {
- array_push($units[$row["unit_name"]][$row["unit_id"]]["metadata"],
- $row);
- }
- // The top-level metadata for the document is under the "document"
- // unit, so pull it out.
- $metadata = $units["document"][$document_id]["metadata"];
- // Get the words in each sentence in the document.
- $sql = "SELECT surface, space_after, word_id, sentence_id
- from sentence_xref_word WHERE document_id = $document_id
- AND sentence_id in ($unit_id_string)
- ORDER BY position ASC;";
- $result = mysql_query($sql) or die (mysql_error(). " Getting words
- <br> on query
- <br> $sql
- <at> documentviewer/get-document.php l. 75.");
- while ($row = mysql_fetch_assoc($result)) {
- $word = array(
- 'word'=>replaceWeirdCharacters($row['surface']),
- 'word_id'=>$row['word_id'],
- 'space_after'=>str_replace("\n", "<br>", $row['space_after'])
- );
- if (array_key_exists($row['word_id'], $word_set_memberships)) {
- $word['word_set'] = join(" ",
- $word_set_memberships[$row['word_id']]);
- }
- array_push($units["sentence"][$row["sentence_id"]]["words"], $word);
- }
- }
- $sql = "SELECT title from document WHERE id = $document_id;";
- $result = mysql_query($sql) or die(mysql_error()." getting document title:
- <br> $sql");
- $row = mysql_fetch_assoc($result);
- $title = $row['title'];
- $results = array(
- "has_text"=>$include_text,
- "units"=>$units,
- "children"=>$children,
- "id"=>$document_id,
- "title"=>$row["title"],
- "metadata"=>$metadata);
- foreach ($metadata as $metadata_information) {
- $results[$metadata_information["property_name"]] =
- $metadata_information["value"];
- }
- array_merge($results, $results["metadata"]);
- return $results;
- }
- ?>