/src/php/document/get-metadata.php
PHP | 1031 lines | 958 code | 33 blank | 40 comment | 101 complexity | 721b5ef5db6926fbc847e6516dcc6cd8 MD5 | raw file
Possible License(s): Apache-2.0, LGPL-3.0, BSD-3-Clause
- <?php
- /* Copyright 2012 Aditi Muralidharan. See the file "LICENSE" for the full license governing this code. */
- /** get-metadata.php
- **/
- include_once '../util.php';
- $wordseer_instance = getGetParam('instance');
- $path = '../../../instances/'.$wordseer_instance.'/config.php';
- include_once $path;
- /** Create a temporary table to hold the results of sentence ID's that match
- various filters.
- */
- global $num_filter_conditions;
- global $num_search_conditions;
- global $created;
- global $main_metadata_counts_table;
- global $main_metadata_counts_table_index;
- $main_metadata_counts_table_index = 0;
- $main_metadata_counts_table = "main_metadata_counts_".$main_metadata_counts_table_index;
- $sql = "SET session tmp_table_size = 1073741824;";
- mysql_query($sql);
- $sql = "SET session max_heap_table_size = 1073741824;";
- mysql_query($sql);
- resetSentenceFilters();
- if (strstr($_SERVER['REQUEST_URI'], 'get-metadata.php')) {
- metadata_dispatch(); // All the way at the bottom of this file.
- }
- function resetSentenceFilters() {
- global $num_filter_conditions;
- global $num_search_conditions;
- global $created;
- if (!$created) {
- $sql = "DROP TEMPORARY TABLE IF EXISTS `filtered_sent_ids`;";
- mysql_query($sql) or die (mysql_error(). " On <br> $sql");
- $sql = "CREATE TEMPORARY TABLE `filtered_sent_ids` (
- `id` int(11) NOT NULL DEFAULT '0',
- `document_id` int(11) NOT NULL DEFAULT '0',
- `num_matched` int(11) NOT NULL DEFAULT '1',
- `num_searches_matched` int(11) NOT NULL DEFAULT '0',
- PRIMARY KEY (`id`),
- KEY `num_matched` (`num_matched`, `id`),
- KEY `num_searches_matched` (`num_searches_matched`, `id`)
- ) ENGINE=MEMORY DEFAULT CHARSET=utf8";
- mysql_query($sql) or die (mysql_error()." on:
- <br> $sql get-metadata.php l 507");
- $num_filter_conditions = 0;
- $num_search_conditions = 0;
- }
- $created = true;
- }
- function resetUnitsFilter() {
- $sql = "DROP TEMPORARY TABLE IF EXISTS `filtered_unit_ids`;";
- mysql_query($sql) or die (mysql_error(). " On <br> $sql");
- $sql = "CREATE TEMPORARY TABLE `filtered_unit_ids` (
- `unit_id` int(11) NOT NULL DEFAULT '0',
- `sentence_id` int(11) NOT NULL DEFAULT '0',
- `document_id` int(11) NOT NULL DEFAULT '0',
- PRIMARY KEY (`unit_id`, `sentence_id`)
- ) ENGINE=MyISAM DEFAULT CHARSET=utf8";
- mysql_query($sql) or die (mysql_error()." on:
- <br> $sql <br> at get-metadata.php l 55");
- }
- /** Returns the document ID's that satisfy all the given metadata filters */
- function getDocumentIDsForMetadata($metadata){
- $timing = getGetParam('timing');
- $unitSets = getUnitsForMetadata($metadata);
- if($unitSets != "all"){
- $t1 = time();
- $units_intersection_sql = "";
- $unit_set_ids_strings = array();
- if (count($unitSets) > 0) {
- foreach ($unitSets as $unitSet) {
- $id_string = "(unit_id IN (".join(", ", $unitSet)."))";
- array_push($unit_set_ids_strings, $id_string);
- }
- $units_intersection_sql = "AND (".join( "
- AND ",
- $unit_set_ids_strings).") ";
- } else {
- $units_intersection_sql = " AND FALSE ";
- }
- $sql = "SELECT document_id from document_structure
- WHERE TRUE $units_intersection_sql;";
-
- $result = mysql_query($sql) or die("Error getting sentence IDs for
- metadata <br>
- ".mysql_error()."
- <br> on query
- <br>".$sql);
- $document_ids = array();
- while ($row = mysql_fetch_assoc($result)) {
- array_push($document_ids, $row["document_id"]);
- }
- $t2 = time();
- if ($timing) {
- echo "<br> SQL to get matching document ID's:<br> $sql <br>";
- echo "Time to get documentIDs for metadata: ".($t2 - $t1)."s<br>";
- }
- return $document_ids;
- }else{
- return "all";
- }
- }
- function getDocumentIDsForMetadataAndCollection($metadata, $collection) {
- global $timing;
- $timing = getGetParam('timing');
- $t1 = time();
- $document_ids = array();
- // Apply the metadata filter and get the resulting document IDs.
- $filtered_by_metadata = getDocumentIDsForMetadata($metadata);
- $metadata_filter_active = ($filtered_by_metadata != 'all');
- if ($timing) {
- echo "
- <br>Filtered document IDs: ".json_encode($filtered_by_metadata)."
- <br>";
- }
- // Apply the collections filter and get the resulting document IDs.
- $collection_filter_active =($collection != false && $collection != 'all');
- $filtered_by_collection = array();
- if($collection_filter_active){
- // subsets/read.php contains collection maniputlations.
- $filtered_by_collection = getDocumentIDsInCollection($collection);
- }
- if ($metadata_filter_active && $collection_filter_active) {
- $document_ids = array_intersect($filtered_by_metadata,
- $filtered_by_collection);
- } else if ($metadata_filter_active) {
- $document_ids = $filtered_by_metadata;
- } else if ($collection_filter_active) {
- $document_ids = $filtered_by_collection;
- } else {
- $document_ids = "all";
- }
- $t2 = time();
- if ($timing) {
- echo "All matching document IDs:<br>
- ".json_encode($document_ids)."
- <br>";
- echo "Time to get all matching document IDs: ".($t2 - $t1)."s<br><br>";
- }
- return $document_ids;
- }
- /** Returns "all" if there are no active filters, but a list of sentence IDs
- otherwise **/
- function getSentenceIDsForMetadataAndCollection($metadata, $collection) {
- $timing = getGetParam('timing');
- $t1 = time();
- $sentence_ids = array();
- // Apply the metadata filter and get the resulting sentence IDs.
- $filtered_by_metadata = getSentenceIDsForMetadata($metadata);
- $metadata_filter_active = ($filtered_by_metadata != 'all');
- // Apply the collections filter and get the resulting sentence IDs.
- $collection_filter_active =($collection != false && $collection != 'all');
- $filtered_by_collection = array();
- if($collection_filter_active){
- // subsets/read.php contains collection maniputlations.
- $filtered_by_collection = getSentenceIDsInCollection($collection);
- }
- if ($metadata_filter_active && $collection_filter_active) {
- $sentence_ids = array_intersect($filtered_by_metadata,
- $filtered_by_collection);
- } else if ($metadata_filter_active) {
- $sentence_ids = $filtered_by_metadata;
- } else if ($collection_filter_active) {
- $sentence_ids = $filtered_by_collection;
- } else {
- $sentence_ids = "all";
- }
- $t2 = time();
- if ($timing) {
- echo "<br>Total time to get sentence ids matching collection and metadata
- filters: ".($t2 - $t1)."s<br>";
- }
- return $sentence_ids;
- }
- function getSentenceIDsForMetadata($metadata) {
- $timing = getGetParam('timing');
- global $num_filter_conditions;
- $table_identifier = 'filtered_sent_ids';
- $insertion_fields = '(id, document_id)';
- $field_identifier = "DISTINCT sentence_id, document_id";
- $query_id_where = '';
- global $cache_results;
- global $query_id;
- if ($cache_results || $query_id) {
- $table_identifier = 'cached_filtered_sent_ids';
- $insertion_fields = '(id, document_id, query_id)';
- $field_identifier = "DISTINCT sentence_id, document_id, $query_id";
- $query_id_where = " AND query_id = $query_id ";
- }
- $alt_field_identifier = str_replace("sentence_id", "id",
- $field_identifier);
- $t1 = time();
- $unitSets = getUnitsForMetadata($metadata);
- $t2 = time();
- if ($timing) {
- echo "Time to get unit ids for metadata: ".($t2 - $t1)."s<br><br>";
- }
- if($unitSets != "all"){
- if (!$query_id || $cache_results) {
- if (count($unitSets) > 0) {
- foreach ($unitSets as $unitSet) {
- $units_where_clause = makeUnitsWhereClause($unitSet);
- $sql = "INSERT INTO $table_identifier $insertion_fields
- SELECT $field_identifier
- FROM sentence_xref_unit, sentence
- WHERE $units_where_clause
- AND sentence_xref_unit.sentence_id = sentence.id
- ON DUPLICATE KEY update num_matched = num_matched + 1;";
- $results = mysql_query($sql) or die (mysql_error()."<br> on
- $sql <br> get-metadata.php l. 210");
- // in case the units are sentences;
- $id_where_clause = makeIdWhereClause($unitSet);
- $sql = "INSERT INTO $table_identifier $insertion_fields
- SELECT $alt_field_identifier
- FROM sentence
- WHERE $id_where_clause
- ON DUPLICATE KEY update num_matched = num_matched + 1;";
- $results = mysql_query($sql) or die (mysql_error()."<br> on
- $sql <br> get-metadata.php l. 210");
- }
- }
- $num_filter_conditions += count($unitSets);
- updateSentenceFilterTable();
- }
- if (!$cache_results) {
- $sql = "SELECT id from $table_identifier WHERE TRUE
- $query_id_where;";
- $result = mysql_query($sql) or die("Error getting sentence IDs for
- metadata <br>
- ".mysql_error()."
- <br> on query
- <br>".$sql);
- $sentenceIDs = array();
- while ($row = mysql_fetch_assoc($result)) {
- array_push($sentenceIDs, $row["id"]);
- }
- $t3 = time();
- if ($timing) {
- echo "Time to get ".count($sentenceIDs)." sentence ids for
- metadata: ".($t3 - $t2)."s<br><br>";
- }
- return $sentenceIDs;
- } else {
- return array();
- }
- } else{
- return "all";
- }
- }
- function updateSentenceFilterTable() {
- global $num_filter_conditions;
- global $num_search_conditions;
- global $cache_results;
- global $query_id;
- $table_identifier = 'filtered_sent_ids';
- $query_id_where = '';
- if ($cache_results) {
- $table_identifier = 'cached_filtered_sent_ids';
- $query_id_where = " AND query_id = $query_id ";
- }
- $sql = "DELETE from $table_identifier
- WHERE (num_matched < $num_filter_conditions
- OR num_searches_matched < $num_search_conditions)
- $query_id_where;";
- mysql_query($sql) or die(mysql_error()." on <br> $sql
- <br> while clearing temporary filtered sentence id table,
- <br>get-metadata.php .l 177");
- }
- function updateTemporarySentenceFilterTable() {
- global $num_filter_conditions;
- global $num_search_conditions;
- $sql = "DELETE from filtered_sent_ids
- WHERE (num_matched < $num_filter_conditions
- OR num_searches_matched < $num_search_conditions);";
- mysql_query($sql) or die(mysql_error()." on <br> $sql
- <br> while clearing temporary filtered sentence id table,
- <br>get-metadata.php .l 680");
- }
- function displayFilterStatistics() {
- global $num_filter_conditions;
- global $num_search_conditions;
- global $cache_results;
- global $query_id;
- global $dont_cache_search_results;
- $table_identifier = 'filtered_sent_ids';
- $query_id_where = '';
- if (($cache_results || $query_id) && !$dont_cache_search_results) {
- echo "<br>Using cache: true<br>";
- $table_identifier = 'cached_filtered_sent_ids';
- $query_id_where = " AND query_id = $query_id ";
- }
- $sql = "SELECT * from $table_identifier WHERE TRUE $query_id_where;";
- $result = mysql_query($sql);
- echo "<br> num_filter_conditions = $num_filter_conditions
- <br> num_search_conditions = $num_search_conditions
- <br>Number of filtered sentences: ".mysql_num_rows($result);
- }
- function metadataIsEmpty($metadata){
- $all = false;
- if(!$metadata){
- $all = true;
- } else if(json_encode($metadata) == "{}") {
- if(count(array_keys($metadata)) == 0){
- $all = true;
- }
- }
- return $all;
- }
- /** Returns the unit ID's that satisfy all the given metadata filters
- Consults the document_id GET parameter to see whether the search should be
- restricted to the given document ID.
- */
- function getUnitsForMetadata($metadata){
- $timing = getGetParam('timing');
- global $query_id;
- global $cache_results;
- $document_id = getGetParam('document_id');
- $document_where_clause = "";
- if ($document_id) {
- $document_where_clause = " AND document_id = $document_id ";
- }
- $t1 = time();
- $all = metadataIsEmpty($metadata);
- $unitSets = "all";
- if (!$query_id || $cache_results) {
- if(!$all) {
- $unitSets = array();
- $andConditions = array();
- foreach(array_keys($metadata) as $identifier){
- $components = explode("_", $identifier);
- $type = $components[0];
- $length = count($components);
- $name = array_slice($components, 1);
- $property = mysql_escape_string(join("_", $name));
- if($type == "string") {
- $values = $metadata[$identifier];
- foreach($values as $val){
- $components = explode("__", mysql_escape_string($val));
- $value = $components[0];
- if (strstr($property, "_set")) {
- $value = $components[1];
- }
- $q = " (property_name = '$property' AND value = '$value')";
- array_push($andConditions, $q);
- }
- } else {
- $ranges = $metadata[$identifier];
- if ($timing != 0) {
- echo $identifier."<br>";
- echo json_encode($ranges)."<br>";
- }
- $queries = array();
- $property = mysql_escape_string($property);
- forEach($ranges as $range){
- $start = $range[0];
- $end = $range[1];
- $q = " (property_name = '$property' AND
- value*1 <= $end
- AND value*1 >= $start ) ";
- if (strstr($start, ' ') || strstr($end, ' ')) {
- $q = " (property_name = '$property' AND
- value <= '$end'
- AND value >= '$start' ) ";
- }
- array_push($queries, $q);
- }
- $q = join(" OR ", $queries);
- array_push($andConditions, "($q)");
- }
- }
- $sql = "SELECT unit_id from metadata
- WHERE
- ";
- foreach($andConditions as $condition){
- $unitSet = array();
- $q = $sql.$condition.$document_where_clause;
- $result = mysql_query($q.";") or die(mysql_error()."
- <br> on query
- <br> $q
- <br> at get-metadata.php line 237.");
- if ($timing) {
- echo "<br>
- $q
- <br>------------<br>";
- }
- while($row = mysql_fetch_assoc($result)){
- array_push($unitSet, $row["unit_id"]);
- }
- array_push($unitSets, $unitSet);
- }
- } else if ($document_id) {
- $unitSets = array();
- $sql = "SELECT unit_id from document_structure
- WHERE document_id = $document_id;";
- $result = mysql_query($sql.";") or die(mysql_error()."
- <br> on query:
- <br> $sql
- <br> While getting metadata for document id:'$document_id'
- <br> at get-metadata.php line 259.");
- if ($timing) {
- echo "<br>
- $sql
- <br>------------<br>";
- }
- $unitSet = array();
- while($row = mysql_fetch_assoc($result)){
- array_push($unitSet, $row["unit_id"]);
- }
- array_push($unitSets, $unitSet);
- } else {
- return "all";
- }
- $t2 = time();
- if ($timing) {
- echo "Time to get units for metadata: ".($t2 - $t1)."s<br><br>";
- }
- } else {
- $all = $all && !$document_id;
- if ($all) {
- return "all";
- } else {
- return array();
- }
- }
- return $unitSets;
- }
- /** Climbs the document structure tree up and down to get all the sub-units
- and parent units associated with this unit**/
- function getAllAssociatedUnits($units){
- $sentence_ids = getSentenceIDsForUnits($units);
- $associated_units = getUnitsFromSentenceIDs($sentence_ids);
- return $associated_units;
- }
- /** Get a list of sentence ids from a list of unit IDs**/
- function getSentenceIDsForUnits($units){
- global $timing;
- $sentenceIDs = array();
- $t1 = time();
- $where_clause = makeUnitsWhereClause($units);
- $sql = "SELECT DISTINCT sentence_id from sentence_xref_unit
- WHERE $where_clause;";
- $result = mysql_query($sql) or die ("Error getting sentences from unitSet
- <br>".mysql_error()."
- <br>$sql<br>");
- while ($row = mysql_fetch_assoc($result)) {
- array_push($sentenceIDs, $row['sentence_id']);
- }
- $t2 = time();
- if ($timing != 0) {
- echo "<br> Time to get sentence ID's for unit set: ".($t2 - $t1)."<br>";
- }
- return $sentenceIDs;
- }
- function makeUnitsWhereClause($units) {
- array_push($units, -1);
- return " unit_id IN (".join(", ", $units).") ";
- }
- function makeIdWhereClause($units) {
- array_push($units, -1);
- return " id IN (".join(", ", $units).") ";
- }
- /** Get a list of unit_id's from a list of sentence IDs.
- This function traces the document tree up from individual sentences to the
- document and returns all the unit-name unit-id pairs found along the way.
- **/
- function getUnitsFromSentenceIDs($sentence_ids){
- $timing = getGetParam('timing');
- $t1 = time();
- $sentence_id_string = join(", ", $sentence_ids);
- $sql = "SELECT distinct unit_id from sentence_xref_unit
- WHERE sentence_id in ($sentence_id_string);";
- $result = mysql_query($sql) or die ("Error getting associated units
- <br>".mysql_error()."<br>$sql<br>");
- $units = array();
- while ($row = mysql_fetch_assoc($result)) {
- array_push($units, $row["unit_id"]);
- }
- $t2 = time();
- if ($timing) {
- echo "<br>Time to get units from sentenceIDs: ".($t2 - $t1)."s
- <br>";
- }
- return $units;
- }
- /** Given a sentence ID, returns an associative array
- containing the document title **/
- function getMetadata($sentenceID){
- $query="SELECT document_id, title, sentence
- FROM document, sentence
- WHERE document.id = sentence.document_id
- AND sentence.id = $sentenceID;";
- $result = mysql_query($query);
- $row = mysql_fetch_assoc($result);
- return $row;
- }
- function getMetadataTreeFromSentenceIDs($sentenceIDs, $all){
- global $main_metadata_counts_table;
- $timing = getGetParam('timing');
- $t1 = time();
- $metadata = array();
- $sql = "";
- if ($all) {
- if (!table_exists($main_metadata_counts_table)) {
- // clear older tables from previous code versions.
- for ($i = 0; $i < $main_metadata_counts_table_index; $i++) {
- $sql = "DROP TABLE IF EXISTS `main_metadata_counts_$i`";
- $result = mysql_query($sql) or die(mysql_error()."<br>
- on query<br>
- $sql
- <br> made on get-metadata.php line 512");
- }
- $sql = "CREATE TABLE IF NOT EXISTS `$main_metadata_counts_table` (
- `property_name` varchar(100) NOT NULL DEFAULT '',
- `value` varchar(200) NOT NULL DEFAULT '',
- `count` int NOT NULL DEFAULT '0',
- `document_count` int NOT NULL DEFAULT '0',
- PRIMARY KEY `property_name` (`property_name`, `value`)
- ) ENGINE = MyISAM DEFAULT CHARSET = utf8";
- $result = mysql_query($sql) or die(mysql_error()."<br>
- on query<br>
- $sql
- <br> made on get-metadata.php line 457");
- // Count the sentence-level metadata
- $sql = "INSERT IGNORE INTO $main_metadata_counts_table
- (property_name, value, count, document_count)
- SELECT m.property_name as name,
- m.value as value,
- count(m.value),
- count(distinct m.document_id)
- FROM metadata_structure as ms, metadata as m,
- sentence_xref_unit as s
- WHERE
- ms.property_name = m.property_name
- AND ms.is_category = 1
- AND s.unit_id = m.unit_id
- GROUP BY m.property_id, m.value
- ORDER BY m.property_name ASC, m.value ASC;";
- $result = mysql_query($sql) or die(mysql_error()."<br>
- on query<br>
- $sql
- <br> made on get-metadata.php line 572");
- // Count the metadata belonging to other units above the sentence
- // level.
- $sql = "INSERT IGNORE INTO $main_metadata_counts_table
- (property_name, value, count, document_count)
- SELECT m.property_name as name,
- m.value as value,
- count(m.value),
- count(distinct m.document_id)
- FROM metadata_structure as ms, metadata as m
- WHERE
- ms.property_name = m.property_name
- AND ms.is_category = 1
- AND m.unit_name = 'sentence'
- GROUP BY m.property_id, m.value
- ORDER BY m.property_name ASC, m.value ASC;";
- $result = mysql_query($sql) or die(mysql_error()."<br>
- on query<br>
- $sql
- <br> made on get-metadata.php line 544");
- }
- $metadata_structure = loadMetadataStructure();
- $sql = "SELECT * from $main_metadata_counts_table;";
- $result = mysql_query($sql) or die(mysql_error()."<br>
- on query<br>
- $sql
- <br> made on get-metadata.php line 481");
- while ($row = mysql_fetch_assoc($result)) {
- $name = $row['property_name'];
- $value = $row['value'];
- $id = $metadata_structure[$name]['property_id'];
- $count = $row['count'];
- $document_count = $row['document_count'];
- $type = $metadata_structure[$name]['type'];
- if(!array_key_exists($name, $metadata)){
- $metadata[$name] = array("children"=>array(),
- "count"=>0,
- "document_count"=>0,
- "sentence_count"=>0,
- "text"=>$name,
- "propertyName"=>$name,
- "displayName"=> $metadata_structure[$name][
- 'name_to_display'],
- "property_id"=>$id,
- "type"=>$type);
- }
- array_push($metadata[$name]["children"],
- array("text"=>$value,
- "count"=>$count,
- "document_count"=>$document_count,
- "propertyName"=>$name,
- "value"=>$value));
-
- $metadata[$name]["count"] += $count;
- $metadata[$name]["document_count"] += $document_count;
- }
- } else {
- $metadata_structure = loadMetadataStructure();
- $info = getMetadataInformationForSentences($all);
- $sentences = array();
- foreach ($info['sentences'] as $row) {
- foreach (array_keys($row) as $identifier) {
- if ($identifier != "sentence_id" &&
- $identifier != "document_id" &&
- $identifier != "words" &&
- $identifier != "unit_id") {
- $components = explode("__", $identifier);
- $name = $components[1];
- $value = $row[$identifier];
- $id = $metadata_structure[$name]['property_id'];
- $type = $components[0];
- if(!array_key_exists($name, $metadata)){
- $sentences[$name] = array();
- $documents[$name] = array();
- $metadata[$name] = array("children"=>array(),
- "count"=>0,
- "document_count"=>0,
- "text"=>$name,
- "propertyName"=>$name,
- "displayName"=>$metadata_structure[$name][
- "name_to_display"],
- "property_id"=>$id,
- "type"=>$type);
- }
- if(!array_key_exists($value, $metadata[$name]["children"])){
- $metadata[$name]["children"][$value] = array("count"=>0,
- "document_count"=>0);
- $sentences[$name][$value] = array();
- $documents[$name][$value] = array();
- }
- if (!array_key_exists($row['sentence_id'],
- $sentences[$name][$value])) {
- $sentences[$name][$value][$row['sentence_id']] = 1;
- $leaf = $metadata[$name]["children"][$value]
- ["count"] += 1;
- $metadata[$name]["count"] += 1;
- if (!array_key_exists($row['document_id'],
- $documents[$name][$value])) {
- $documents[$name][$value][$row['document_id']] = 1;
- $metadata[$name]["document_count"] += 1;
- $metadata[$name]["children"][$value]
- ['document_count'] += 1;
- }
- }
- }
- }
- }
- foreach(array_keys($metadata) as $name){
- $children = $metadata[$name]["children"];
- $leaves = array();
- foreach(array_keys($children) as $value){
- array_push($leaves, array(
- "count"=>$children[$value]['count'],
- "document_count"=>$children[$value]['document_count'],
- "propertyName"=>$name,
- "text"=>$value,
- "value"=>$value));
- }
- $metadata[$name]["children"] = $leaves;
- }
- }
- $t2 = time();
- if ($timing) {
- echo "<br>
- Time to get metadata tree for sentence ids: ".($t2-$t1)."s
- <br>";
- }
- //Add in the names of the working sets belonging to this user and remove
- // all others.
- $user = getGetParam('user');
- if ($user) {
- $sql = "SELECT * from working_set where username = '$user';";
- $result = mysql_query($sql) or die(mysql_error()."<br>
- on query<br>
- $sql
- <br> made on get-metadata.php line 663");
- $set_names = array();
- while ($row = mysql_fetch_array($result)) {
- $set_names[$row['id']] = $row['name'];
- }
- $keys = array("document_set", "sentence_set", "word_set", "");
- foreach($keys as $name) {
- if ($metadata[$name]) {
- $metadata[$name]["count"] = 0;
- $children = $metadata[$name]["children"];
- if (count($children) > 0) {
- $renamed = array();
- foreach ($children as $child) {
- $set_name = $set_names[$child["value"]];
- if ($set_name) {
- $child["text"] = $set_name;
- array_push($renamed, $child);
- $metadata[$name]["count"] += $child["count"];
- }
- }
- if (count($renamed) > 0) {
- $metadata[$name]["children"] = $renamed;
- } else {
- unset($metadata[$name]);
- }
- } else {
- unset($metadata[$name]);
- }
- if ($metadata[$name]["count"] == 0) {
- unset($metadata[$name]);
- }
- }
- }
- } else {
- unset($metadata['document_set']);
- unset($metadata['sentence_set']);
- unset($metadata['word_set']);
- }
- return $metadata;
- }
- /**
- * Return format:
- * { docID1: { prop1:value1, prop2:value2, ...},
- */
- function getMetadataInformationForDocuments($documentIDs, $all) {
- $metadata = array();
- $sql = "SELECT `metadata`.`document_id`,
- `metadata`.`property_name`,
- `metadata`.`value`,
- `metadata`.`property_id`
- FROM `metadata_structure`, `metadata`
- WHERE `metadata`.`property_id` = `metadata_structure`.`property_id`
- AND `metadata_structure`.`is_category` != 0
- AND `metadata_structure`.`value_is_displayed` != 0
- AND `metadata_structure`.`unit_name`='document';";
- if (count($documentIDs) > 0) {
- $docIDsStr = join(', ', $documentIDs);
- $sql .= "AND `metadata`.`document_id` IN ($docIDsStr)";
- }
- $sql .= "ORDER BY `metadata`.`document_id`, `metadata`.`property_id`";
- if ($all || $documentIDs == 'all' || count($documentIDs) > 0) {
- $result = mysql_query($sql);
- while ($row = mysql_fetch_assoc($result)) {
- $metadata[$row['document_id']][$row['property_name']] = $row['value'];
- }
- }
- return $metadata;
- }
- /** Return format:
- {
- sentences: Array[{sentence_id:id, prop1:value1, ... , propN: valueN}, ..., ]
- documents: Array[{document_id:id, prop1: value1, ...., propM: valueM}]
- one for each sentence ID and document ID.
- */
- function getMetadataInformationForSentences($all) {
- global $timing;
- $info = array();
- $sentence_info = array();
- $document_info = array();
- $sentences = array();
- $documents = array();
- $metadata_structure = loadMetadataStructure();
- $table_identifier = 'filtered_sent_ids';
- $query_id_where = '';
- global $cache_results;
- global $query_id;
- global $dont_cache_search_results;
- if ($cache_results || $query_id && !$dont_cache_search_results) {
- $table_identifier = 'cached_filtered_sent_ids';
- $query_id_where = " AND query_id = $query_id ";
- }
- if ($all) {
- } else {
- resetUnitsFilter();
- $sql = " INSERT IGNORE INTO filtered_unit_ids
- (sentence_id, unit_id, document_id)
- SELECT sentence_id, unit_id, document_id
- from $table_identifier , sentence_xref_unit
- WHERE sentence_id = id
- $query_id_where;";
- $result = mysql_query($sql) or die ("<br> Error inserting unit ids's
- into filtered_unit_ids
- <br>".mysql_error()."
- <br> on query:
- <br> $sql
- <br> at get-metadata.php l. 538.");
-
- $sql = " INSERT IGNORE INTO filtered_unit_ids
- (sentence_id, unit_id, document_id)
- SELECT id, id, document_id from $table_identifier
- WHERE TRUE $query_id_where;";
- $result = mysql_query($sql) or die ("<br> Error inserting unit ids's
- into filtered_unit_ids
- <br>".mysql_error()."
- <br> on query:
- <br> $sql
- <br> at get-metadata.php l. 538.");
- $sql = "SELECT m.property_name as name,
- m.value as value,
- m.unit_id as unit_id,
- u.sentence_id as sentence_id,
- u.document_id as document_id
- FROM filtered_unit_ids as u, metadata as m
- WHERE m.unit_id = u.unit_id
- ORDER BY sentence_id;";
- $result = mysql_query($sql) or die(mysql_error()."<br>
- on query<br>
- $sql
- <br> made on get-metadata.php line 545");
- if ($timing) {
- $sql = "SELECT count(*) as count from filtered_unit_ids;";
- $res = mysql_query($sql);
- $row = mysql_fetch_assoc($res);
- echo "number of matched units: ".$row['count']."<br>";
- }
- while ($row = mysql_fetch_array($result)) {
- $sentence_id = $row['sentence_id'];
- $document_id = $row['document_id'];
- $unit_id = $row['unit_id'];
- if (!array_key_exists($sentence_id, $sentence_info)) {
- $sentence_info[$sentence_id] = array(
- 'sentence_id'=>array($sentence_id),
- 'document_id'=>array($document_id)
- );
- }
- $metadata_info = $metadata_structure[$row['name']];
- if ($metadata_info['is_category'] == 1
- || $metadata_info['value_is_displayed'] == 1) {
- $property_name_identifier = ($metadata_info['type']."__".
- $metadata_info['property_name']);
- if (!array_key_exists($property_name_identifier,
- $sentence_info[$sentence_id])) {
- $sentence_info[$sentence_id][$property_name_identifier] =
- array();
- }
- array_push($sentence_info[$sentence_id][$property_name_identifier],
- $row['value']);
- }
- }
- }
- $sentence_ids = array_keys($sentence_info);
- sort($sentence_ids);
- foreach($sentence_ids as $sentence_id) {
- $info = $sentence_info[$sentence_id];
- $properties = array();
- foreach(array_keys($info) as $property) {
- $components = explode("__", $property);
- $name = $components[1];
- if ($metadata_structure[$name]['is_category'] == 1
- || count($components) < 2) {
- array_push($properties, $property);
- }
- }
- $variants = makeAllVariants($info, $properties);
- foreach ($variants as $v) {
- array_push($sentences, $v);
- }
- }
- $info['sentences'] = $sentences;
- $info['documents'] = $documents;
- return $info;
- }
- /** If some sentences have multiple values for a metadata attribute, make
- replicas of them, one for each value, so that each combination of attributes
- is represented. Only do this for metadata for which is_category is true.
- */
- function makeAllVariants($info, $properties) {
- if (count($properties) == 0) {
- return array();
- } else if (count($properties) == 1) {
- return makeVariants($properties[0], $info[$properties[0]], array());
- } else {
- $other_variants = makeAllVariants($info, array_slice($properties, 1));
- $my_variants = array();
- foreach ($other_variants as $variant) {
- $variations = makeVariants($properties[0], $info[$properties[0]],
- $variant);
- foreach ($variations as $v) {
- array_push($my_variants, $v);
- }
- }
- return $my_variants;
- }
- }
- function makeVariants($property_name, $values, $input) {
- $variants = array();
- foreach ($values as $value) {
- $variant = array();
- foreach(array_keys($input) as $prop) {
- $variant[$prop] = $input[$prop];
- }
- $variant[$property_name] = $value;
- array_push($variants, $variant);
- }
- return $variants;
-
- }
- /** Return format
- {
- propertyName: { propertyID:< >, type: < >},
- }
- */
- function loadMetadataStructure() {
- $info = array();
- $sql = "SELECT * from metadata_structure;";
- $result = mysql_query($sql);
- while ($row = mysql_fetch_assoc($result)) {
- $info[$row['property_name']] = $row;
- }
- return $info;
- }
- /** Returns the sentence id's that match the given filters. Also consults the
- GET param "document_id".
- */
- function getSentenceIDsForFilters($metadata, $collection, $phrases) {
- global $timing;
- resetSentenceFilters();
- include_once "../phrases/get-phrases.php";
- if ($timing != 0) {
- echo "<br>----- Getting sentence ID's that match filters -------<br>";
- }
- $sentence_ids = array();
- // Apply the metadata and collection filters.
- $filtered_by_metadata_and_collection =
- getSentenceIDsForMetadataAndCollection($metadata, $collection);
- $metadata_and_collection_filter_active =
- ($filtered_by_metadata_and_collection != 'all');
- // Apply the phrases filter.
- $filtered_by_phrases = getSentenceIDsForPhrases($phrases);
- $phrases_filter_active = ($filtered_by_phrases != 'all');
- if ($metadata_and_collection_filter_active && $phrases_filter_active) {
- $sentence_ids = array_intersect($filtered_by_metadata_and_collection,
- $filtered_by_phrases);
- } else if ($metadata_and_collection_filter_active) {
- $sentence_ids = $filtered_by_metadata_and_collection;
- } else if ($phrases_filter_active) {
- $sentence_ids = $filtered_by_phrases;
- } else {
- $sentence_ids = "all";
- }
- if ($timing) {
- echo "<br> Number of sentence ID's that match all filters: ";
- if ($sentence_ids != "all") {
- echo count($sentence_ids)."<br>";
- } else {
- echo "all<br>";
- }
- echo "<br>Finished getting sentence ID's that match filters <hr>";
- }
- return $sentence_ids;
- }
- function metadata_dispatch() {
- include_once "../util.php";
- include_once "../grammaticalsearch/get-search-results.php";
- //Query parameters
- $gov = getGetParam('gov');
- $govtype = getGetParam('govtype');
- $dep = getGetParam('dep');
- $deptype = getGetParam('deptype');
- $relation = getGetParam('relation');
- $collection = getGetParam('collection');
- $statistics = getGetParam('statistics');
- $phrasess = decodeGetJson('phrases');
- $metadata = decodeGetJson('metadata');
- $timing = getGetParam('timing');
- if($relation == ""){
- if(!$statistics){
- $results = getSearchResults($gov, $govtype, $collection,
- $metadata, $phrases);
- }else{
- $results = array('statistics'=>array(), 'sentences'=>array());
- }
- }else{
- $results = getDependencySearchResults($gov, $govtype, $dep,
- $deptype, $relation, $collection, $statistics, $metadata, $phrases);
- }
- $metadata_results = array();
- foreach (array_keys($results['metadata']) as $property) {
- $children = $results['metadata'][$property]['children'];
- foreach ($children as $child) {
- array_push($metadata_results, $child);
- }
- }
- echo json_encode($metadata_results);
- }
- ?>