PageRenderTime 50ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/src/php/document/get-document.php

https://bitbucket.org/silverasm/wordseer
PHP | 199 lines | 167 code | 11 blank | 21 comment | 22 complexity | 7f113620f5912de1211cbdf9980c8169 MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0, BSD-3-Clause
  1. <?php
  2. /* Copyright 2012 Aditi Muralidharan. See the file "LICENSE" for the full license governing this code. */
  3. /****************************************************************************
  4. get-document.php
  5. Functions for fetching a single document's sub-structure, along with
  6. the metadata associated with all sub-structures.
  7. ****************************************************************************/
  8. include_once '../util.php';
  9. include_once '../subsets/read.php';
  10. $wordseer_instance = getGetParam('instance');
  11. $path = '../../../instances/'.$wordseer_instance.'/config.php';
  12. include_once $path;
  13. //Query parameters
  14. $gov = getGetParam('gov');
  15. $govtype = getGetParam('govtype');
  16. $dep = getGetParam('dep');
  17. $deptype = getGetParam('deptype');
  18. $relation = getGetParam('relation');
  19. $collection = getGetParam('collection');
  20. $statistics = getGetParam('statistics');
  21. $phrasess = decodeGetJson('phrases');
  22. $metadata = decodeGetJson('metadata');
  23. $timing = getGetParam('timing');
  24. $documentID = getGetParam('id');
  25. $include_text = getGetParam('include_text') == 'true';
  26. if ($documentID) {
  27. $documentUnit = getDocument($documentID, $include_text);
  28. echo json_encode($documentUnit);
  29. }
  30. /** Retrieves information about the document with the given ID. If
  31. $include_text is true, also includes the text of the document, otherwise only
  32. retrieves the metadata. The fields returned are those expected by
  33. {@link WordSeer.model.DocumentModel}.
  34. */
  35. function getDocument($document_id, $include_text) {
  36. $units = array();
  37. $chilren = array();
  38. $metadata = array();
  39. if (!$include_text) {
  40. // Assemble the metadata for each unit and send it back.
  41. $sql = "SELECT * from metadata, metadata_structure
  42. WHERE metadata.document_id = $document_id
  43. AND metadata.property_id = metadata_structure.property_id
  44. AND metadata.unit_id = $document_id
  45. AND metadata.unit_name = 'document';";
  46. $result = mysql_query($sql) or die (mysql_error(). " Getting metadata
  47. <br> on query
  48. <br> $sql
  49. <at> documentviewer/get-document.php l. 75.");
  50. while ($row = mysql_fetch_assoc($result)) {
  51. array_push($metadata, $row);
  52. }
  53. } else {
  54. // Assemble the full text for each unit that matches the given filters
  55. // and send it back.
  56. //Query parameters
  57. $collection = getGetParam('collection');
  58. $phrasess = decodeGetJson('phrases');
  59. $metadata = decodeGetJson('metadata');
  60. include_once "../document/get-metadata.php";
  61. $sql = "SELECT * from document_structure
  62. WHERE document_id = $document_id
  63. ORDER BY unit_number ASC;";
  64. $sentence_ids_for_filters = getSentenceIDsForFilters($metadata,
  65. $collection, $phrases);
  66. if ($sentence_ids_for_filters != "all") {
  67. $unit_ids = getUnitsFromSentenceIDs($sentence_ids_for_filters);
  68. $unit_id_string = join(", ", $unit_ids);
  69. $sentence_id_string = join(", ", $sentence_ids_for_filters);
  70. $sql = " SELECT *
  71. FROM document_structure
  72. WHERE unit_id in ($unit_id_string)
  73. OR unit_id in ($sentence_id_string)
  74. ORDER BY unit_id, unit_number ASC;";
  75. }
  76. $result = mysql_query($sql) or die (mysql_error(). " Getting document units
  77. <br> on query
  78. <br> $sql
  79. <at> documentviewer/get-document.php l. 75.");
  80. $units = array("document"=>array(
  81. $document_id=>array(
  82. "metadata"=>array(),
  83. "unit_id"=>$document_id,
  84. "unit_name"=>"document",
  85. )));
  86. $children = array();
  87. $parent_ids = array();
  88. $unit_ids = array();
  89. $word_set_memberships = getWordSetMemberships();
  90. while ($row = mysql_fetch_assoc($result)) {
  91. $unit_name = $row["unit_name"];
  92. $unit_id = $row["unit_id"];
  93. array_push($unit_ids, $unit_id);
  94. $unit_number = $row["unit_number"];
  95. $parent_id = $row["parent_id"];
  96. $parent_name = $row["parent_name"];
  97. if (!array_key_exists($unit_name, $units)) {
  98. $units[$unit_name] = array();
  99. $parent_ids[$unit_name] = array();
  100. }
  101. if (!array_key_exists($unit_id, $units[$unit_name])){
  102. $units[$unit_name][$unit_id] = $row;
  103. $units[$unit_name][$unit_id]["metadata"] = array();
  104. $parent_ids[$unit_name][$unit_id] = array($parent_id, $parent_name);
  105. if ($unit_name == "sentence") {
  106. $units[$unit_name][$unit_id]["words"] = array();
  107. $units[$unit_name][$unit_id]["sentence_id"] = $unit_id;
  108. } else {
  109. if (!array_key_exists($unit_name, $children)) {
  110. $children[$unit_name] = array();
  111. }
  112. if (!array_key_exists($unit_id, $children[$unit_name])) {
  113. $children[$unit_name][$unit_id] = array();
  114. }
  115. }
  116. }
  117. if (!array_key_exists($parent_name, $children)) {
  118. $children[$parent_name] = array();
  119. }
  120. if (!array_key_exists($parent_id, $children[$parent_name])) {
  121. $children[$parent_name][$parent_id] = array();
  122. }
  123. array_push($children[$parent_name][$parent_id],
  124. array("id"=>$unit_id, "name"=>$unit_name));
  125. }
  126. // Assemble the metadata for each unit under each unit type and id.
  127. $unit_id_string = join(", ", $unit_ids);
  128. $sql = "SELECT *, metadata.unit_name as unit_name from metadata, metadata_structure
  129. WHERE metadata.document_id = $document_id
  130. AND metadata.property_id = metadata_structure.property_id
  131. AND unit_id in ($unit_id_string);";
  132. if ($timing != 0) {
  133. echo "<br> $sql <br>";
  134. }
  135. $result = mysql_query($sql) or die (mysql_error(). " Getting metadata
  136. <br> on query
  137. <br> $sql
  138. <at> documentviewer/get-document.php l. 75.");
  139. while ($row = mysql_fetch_assoc($result)) {
  140. array_push($units[$row["unit_name"]][$row["unit_id"]]["metadata"],
  141. $row);
  142. }
  143. // The top-level metadata for the document is under the "document"
  144. // unit, so pull it out.
  145. $metadata = $units["document"][$document_id]["metadata"];
  146. // Get the words in each sentence in the document.
  147. $sql = "SELECT surface, space_after, word_id, sentence_id
  148. from sentence_xref_word WHERE document_id = $document_id
  149. AND sentence_id in ($unit_id_string)
  150. ORDER BY position ASC;";
  151. $result = mysql_query($sql) or die (mysql_error(). " Getting words
  152. <br> on query
  153. <br> $sql
  154. <at> documentviewer/get-document.php l. 75.");
  155. while ($row = mysql_fetch_assoc($result)) {
  156. $word = array(
  157. 'word'=>replaceWeirdCharacters($row['surface']),
  158. 'word_id'=>$row['word_id'],
  159. 'space_after'=>str_replace("\n", "<br>", $row['space_after'])
  160. );
  161. if (array_key_exists($row['word_id'], $word_set_memberships)) {
  162. $word['word_set'] = join(" ",
  163. $word_set_memberships[$row['word_id']]);
  164. }
  165. array_push($units["sentence"][$row["sentence_id"]]["words"], $word);
  166. }
  167. }
  168. $sql = "SELECT title from document WHERE id = $document_id;";
  169. $result = mysql_query($sql) or die(mysql_error()." getting document title:
  170. <br> $sql");
  171. $row = mysql_fetch_assoc($result);
  172. $title = $row['title'];
  173. $results = array(
  174. "has_text"=>$include_text,
  175. "units"=>$units,
  176. "children"=>$children,
  177. "id"=>$document_id,
  178. "title"=>$row["title"],
  179. "metadata"=>$metadata);
  180. foreach ($metadata as $metadata_information) {
  181. $results[$metadata_information["property_name"]] =
  182. $metadata_information["value"];
  183. }
  184. array_merge($results, $results["metadata"]);
  185. return $results;
  186. }
  187. ?>