PageRenderTime 54ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/www/includes/easyparliament/searchengine.php

https://github.com/openaustralia/twfy
PHP | 641 lines | 524 code | 38 blank | 79 comment | 130 complexity | a0dca896dadcf1c5b3e0aae3fa984d3e MD5 | raw file
  1. <?php
  2. # vim:sw=4:ts=4:et:nowrap
  3. /*
  4. SEARCHENGINE class 2004-05-26
  5. francis@flourish.org
  6. Example usage:
  7. include_once INCLUDESPATH."easyparliament/searchengine.php";
  8. $searchengine = new SEARCHENGINE($searchstring);
  9. $description = $searchengine->query_description();
  10. $short_description = $searchengine->query_description_short();
  11. $count = $searchengine->run_count();
  12. // $first_result begins at 0
  13. $searchengine->run_search($first_result, $results_per_page);
  14. $gids = $searchengine->get_gids();
  15. $relevances = $searchengine->get_relevances();
  16. $bestpos = $searchengine->position_of_first_word($body);
  17. $extract = $searchengine->highlight($extract);
  18. */
  19. include_once INCLUDESPATH . 'dbtypes.php';
  20. if (defined('XAPIANDB') && XAPIANDB) {
  21. if (file_exists('/usr/local/share/php5/xapian.php'))
  22. include_once '/usr/local/share/php5/xapian.php';
  23. if (file_exists('/usr/local/share/xapian-bindings/php5/xapian.php'))
  24. include_once '/usr/local/share/xapian-bindings/php5/xapian.php';
  25. if (file_exists('/usr/share/php5/xapian.php'))
  26. include_once '/usr/share/php5/xapian.php';
  27. if (file_exists('/usr/share/php/xapian.php'))
  28. include_once '/usr/share/php/xapian.php';
  29. }
  30. global $xapiandb;
  31. class SEARCHENGINE {
  32. function SEARCHENGINE ($query) {
  33. if (!defined('XAPIANDB') || !XAPIANDB)
  34. return null;
  35. $this->query = $query;
  36. $this->stemmer = new XapianStem('english');
  37. $this->enquire = null;
  38. // Any characters other than this are treated as, basically, white space
  39. // (apart from quotes and minuses, special case below)
  40. // The colon is in here for prefixes speaker:10043 and so on.
  41. $this->wordchars = "A-Za-z0-9:";
  42. // An array of normal words.
  43. $this->words = array();
  44. // All quoted phrases, as an (array of (arrays of words in each phrase)).
  45. $this->phrases = array();
  46. // Items prefixed with a colon (speaker:10024) as an (array of (name, value))
  47. $this->prefixed = array();
  48. // Words you don't want
  49. $this->excluded = array();
  50. // Stemmed words // doesn't work yet
  51. // $this->rough = array();
  52. // Split words up into individual words, and quoted phrases
  53. preg_match_all('/(' .
  54. '"|' . # match either a quote, or...
  55. '(?:(?<![' .$this->wordchars. '])-)?' . # optionally a - (exclude)
  56. # if at start of word (i.e. not preceded by a word character, in
  57. # which case it is probably a hyphenated-word)
  58. '['.$this->wordchars.']+' . # followed by a string of word-characters
  59. ')/', $query, $all_words);
  60. if ($all_words) {
  61. $all_words = $all_words[0];
  62. } else {
  63. $all_words = array();
  64. }
  65. $in_quote = false;
  66. foreach ($all_words as $word) {
  67. if ($word == '"') {
  68. $in_quote = !$in_quote;
  69. if ($in_quote) {
  70. array_push($this->phrases, array());
  71. }
  72. continue;
  73. }
  74. if ($word == '') {
  75. continue;
  76. }
  77. if (strpos($word, ':') !== false) {
  78. $items = split(":", strtolower($word));
  79. $type = $items[0];
  80. $value = join(":", array_slice($items,1));
  81. if ($type == "section") {
  82. # Adding section:representatives but not removing debates & debate in case they are used anywhere
  83. if ($value == "debates" || $value == "debate" || $value == "representatives") $value = 1;
  84. elseif ($value == 'whall' || $value == 'westminster' || $value == 'westminhall') $value = 2;
  85. elseif ($value == "wrans" || $value == "wran") $value = 3;
  86. elseif ($value == 'wms' || $value == 'statements' || $value == 'statement') $value = 4;
  87. # Adding section:senate but not removing lords & lordsdebates in case they are used anywhere
  88. elseif ($value == 'lordsdebates' || $value == 'lords' || $value == 'senate') $value = 101;
  89. elseif ($value == 'ni') $value = 5;
  90. elseif ($value == 'pbc' || $value == 'standing') $value = 6;
  91. $type = "major";
  92. }
  93. if ($type == "groupby") {
  94. if ($value == "date" || $value == "day") $value = "day";
  95. if ($value == "debates" || $value == "debate" || $value == "department" || $value == "departments" || $value == "dept") $value = "debate";
  96. if ($value == "speech" || $value == "speeches") $value = "speech";
  97. }
  98. array_push($this->prefixed, array($type, $value));
  99. } elseif (strpos($word, '-') !== false) {
  100. array_push($this->excluded, str_replace("-", "", strtolower($word)));
  101. } /*else if (strpos($word, '~') !== false) {
  102. array_push($this->rough, str_replace("~", "", strtolower($word)));
  103. } */ elseif ($in_quote) {
  104. array_push($this->phrases[count($this->phrases) - 1], strtolower($word));
  105. } else {
  106. array_push($this->words, strtolower($word));
  107. }
  108. }
  109. twfy_debug("SEARCH", "words: " . var_export($this->words, true));
  110. twfy_debug("SEARCH", "phrases: " . var_export($this->phrases, true));
  111. twfy_debug("SEARCH", "prefixed: " . var_export($this->prefixed, true));
  112. twfy_debug("SEARCH", "excluded: " . var_export($this->excluded, true));
  113. // twfy_debug("SEARCH", "rough: " . var_export($this->rough, true));
  114. }
  115. function make_phrase($phrasearray) {
  116. return '"' . join(' ', $phrasearray) . '"';
  117. }
  118. function query_description_internal($long) {
  119. global $PAGE, $hansardmajors;
  120. if (!defined('XAPIANDB') || !XAPIANDB)
  121. return '';
  122. $description = "";
  123. if (count($this->words) > 0) {
  124. if ($long and $description == "") {
  125. $description .= " containing";
  126. }
  127. $description .= " the ". make_plural("word", count($this->words));
  128. $description .= " '";
  129. if (count($this->words) > 2) {
  130. $description .= join("', '", array_slice($this->words, 0, -2));
  131. $description .= "', '";
  132. $description .= $this->words[count($this->words)-2] . "', and '" . $this->words[count($this->words)-1];
  133. } elseif (count($this->words) == 2) {
  134. $description .= $this->words[0] . "' and '" . $this->words[1];
  135. } else {
  136. $description .= $this->words[0];
  137. }
  138. $description .= "'";
  139. }
  140. if (count($this->phrases) > 0) {
  141. if ($description == "") {
  142. if ($long) {
  143. $description .= " containing";
  144. }
  145. } else {
  146. $description .= " and";
  147. }
  148. $description .= " the ". make_plural("phrase", count($this->phrases)) . " ";
  149. $description .= join(', ', array_map(array($this, "make_phrase"), $this->phrases));
  150. }
  151. if (count($this->excluded) > 0) {
  152. if (count($this->words) > 0 or count($this->phrases) > 0) {
  153. $description .= " but not";
  154. } else {
  155. $description .= " excluding";
  156. }
  157. $description .= " the ". make_plural("word", count($this->excluded));
  158. $description .= " '" . join(' ', $this->excluded) . "'";
  159. }
  160. /* if (count($this->rough) > 0) {
  161. if ($description == "") {
  162. if ($long) {
  163. $description .= " containing ";
  164. }
  165. }
  166. $description .= " roughly words '" . join(' ', $this->rough) . "'";
  167. } */
  168. $major = array(); $speaker = array();
  169. foreach( $this->prefixed as $items ) {
  170. if ($items[0] == 'speaker') {
  171. $member = new MEMBER(array('person_id' => $items[1]));
  172. $name = $member->full_name();
  173. $speaker[] = $name;
  174. } elseif ($items[0] == 'major') {
  175. if (isset($hansardmajors[$items[1]]['title'])) {
  176. $major[] = $hansardmajors[$items[1]]['title'];
  177. } else {
  178. $PAGE->error_message("Unknown major section '$items[1]' ignored");
  179. }
  180. } elseif ($items[0] == 'groupby') {
  181. if ($items[1] == 'day') {
  182. $description .= ' grouped by day';
  183. } elseif ($items[1] == 'debate') {
  184. $description .= ' grouped by debate/department';
  185. } elseif ($items[1] == 'speech') {
  186. $description .= ' showing all speeches';
  187. } else {
  188. $PAGE->error_message("Unknown group by '$items[1]' ignored");
  189. }
  190. } elseif ($items[0] == "bias") {
  191. list($weight, $halflife) = explode(":", $items[1]);
  192. $description .= " bias by $weight halflife $halflife seconds";
  193. } elseif ($items[0] == 'date') {
  194. $description .= ' spoken on ' . $items[1];
  195. } elseif ($items[0] == 'batch') {
  196. # silently ignore, as description goes in email alerts
  197. #$description .= ' in search batch ' . $items[1];
  198. } else {
  199. $PAGE->error_message("Unknown search prefix '$items[0]' ignored");
  200. }
  201. }
  202. if (sizeof($speaker)) $description .= ' by ' . join(' or ', $speaker);
  203. if (sizeof($major)) $description .= ' in ' . join(' or ', $major);
  204. return trim($description);
  205. }
  206. // Return textual description of search
  207. function query_description_short() {
  208. return $this->query_description_internal(false);
  209. }
  210. // Return textual description of search
  211. function query_description_long() {
  212. return $this->query_description_internal(true);
  213. }
  214. // Return stem of a word
  215. function stem($word) {
  216. return $this->stemmer->stem_word(strtolower($word));
  217. }
  218. // Internal use mainly - you probably want query_description. Converts
  219. // parsed form of query that PHP knows into a full textual form again (for
  220. // feeding to Xapian's queryparser).
  221. function query_remade() {
  222. $remade = array();
  223. foreach( $this->phrases as $phrase ) {
  224. $remade[] = '"' . join(' ', $phrase) . '"';
  225. }
  226. if ($this->words) {
  227. $remade = array_merge($remade, $this->words);
  228. }
  229. $prefixes = array();
  230. foreach( $this->prefixed as $items ) {
  231. if (!isset($prefixes[$items[0]])) $prefixes[$items[0]] = array();
  232. if ($items[0] != 'groupby' && $items[0] != 'bias') {
  233. $prefixes[$items[0]][] = $items[0] . ':' . $items[1];
  234. }
  235. }
  236. foreach ($prefixes as $prefix) {
  237. if (count($prefix))
  238. $remade[] = '(' . join(' OR ', $prefix) . ')';
  239. }
  240. $query = trim(join(' AND ', $remade));
  241. if ($this->excluded) {
  242. $query .= ' NOT (' . join(' AND ', $this->excluded) . ')';
  243. }
  244. // $remade .= ' ' . join(' ', array_map(array($this, "stem"), $this->rough));
  245. return $query;
  246. }
  247. // Perform partial query to get a count of number of matches
  248. function run_count () {
  249. if (!defined('XAPIANDB') || !XAPIANDB)
  250. return null;
  251. $start = getmicrotime();
  252. global $xapiandb;
  253. if (!$xapiandb) {
  254. $xapiandb = new XapianDatabase(XAPIANDB);
  255. }
  256. if (!$this->enquire) {
  257. $this->enquire = new XapianEnquire($xapiandb);
  258. }
  259. $queryparser = new XapianQueryParser();
  260. $queryparser->set_stemming_strategy(QueryParser_STEM_NONE);
  261. $queryparser->set_default_op(Query_OP_AND);
  262. $queryparser->add_prefix("speaker", "speaker:");
  263. $queryparser->add_prefix("major", "major:");
  264. $queryparser->add_prefix('date', 'date:');
  265. $queryparser->add_prefix('batch', 'batch:');
  266. twfy_debug("SEARCH", "query remade -- ". $this->query_remade());
  267. // We rebuild (with query_remade) our query and feed that text string to
  268. // the query parser. This is because the error handling in the query parser
  269. // is a bit knackered, and we want to be sure our highlighting etc. exactly
  270. // matches. XXX don't need to do this for more recent Xapians
  271. $query = $queryparser->parse_query($this->query_remade());
  272. twfy_debug("SEARCH", "queryparser description -- " . $query->get_description());
  273. $this->enquire->set_query($query);
  274. // Set collapsing and sorting
  275. global $PAGE;
  276. $collapsed = false;
  277. foreach( $this->prefixed as $items ) {
  278. if ($items[0] == 'groupby') {
  279. $collapsed = true;
  280. if ($items[1] == 'day')
  281. $this->enquire->set_collapse_key(2);
  282. else if ($items[1] == 'debate')
  283. $this->enquire->set_collapse_key(3);
  284. else if ($items[1] == 'speech')
  285. ; // no collapse key
  286. else
  287. $PAGE->error_message("Unknown group by '$items[1]' ignored");
  288. } elseif ($items[0] == 'bias') {
  289. list($weight, $halflife) = explode(":", $items[1]);
  290. $this->enquire->set_bias($weight, intval($halflife));
  291. } elseif ($items[0] == 'speaker') {
  292. # Don't do any collapsing if we're searching for a person's speeches
  293. $collapsed = true;
  294. }
  295. }
  296. // default to grouping by subdebate, i.e. by page
  297. if (!$collapsed)
  298. $this->enquire->set_collapse_key(7);
  299. $matches = $this->enquire->get_mset(0, 500);
  300. // Take either: 1) the estimate which is sometimes too large or 2) the
  301. // size which is sometimes too low (it is limited to the 500 in the line
  302. // above). We get the exact mset we need later, according to which page
  303. // we are on.
  304. if ($matches->size() < 500) {
  305. $count = $matches->size();
  306. } else {
  307. $count = $matches->get_matches_estimated();
  308. }
  309. $duration = getmicrotime() - $start;
  310. twfy_debug ("SEARCH", "Search count took $duration seconds.");
  311. return $count;
  312. }
  313. // Perform the full search...
  314. function run_search ($first_result, $results_per_page, $sort_order='relevance') {
  315. $start = getmicrotime();
  316. // NOTE: this is to do sort by date
  317. switch ($sort_order) {
  318. case 'date':
  319. $this->enquire->set_sort_by_value_then_relevance(0, true);
  320. break;
  321. case 'created':
  322. $this->enquire->set_sort_by_value_then_relevance(6, true);
  323. default:
  324. //do nothing, default ordering is by relevance
  325. break;
  326. }
  327. $matches = $this->enquire->get_mset($first_result, $results_per_page);
  328. $this->gids = array();
  329. $this->created = array();
  330. $this->relevances = array();
  331. $iter = $matches->begin();
  332. $end = $matches->end();
  333. while (!$iter->equals($end))
  334. {
  335. $relevancy = $iter->get_percent();
  336. $weight = $iter->get_weight();
  337. $doc = $iter->get_document();
  338. $gid = $doc->get_data();
  339. if ($sort_order=='created') {
  340. array_push($this->created, $doc->get_value(6));
  341. }
  342. twfy_debug("SEARCH", "gid: $gid relevancy: $relevancy% weight: $weight");
  343. array_push($this->gids, "uk.org.publicwhip/".$gid);
  344. array_push($this->relevances, $relevancy);
  345. $iter->next();
  346. }
  347. $duration = getmicrotime() - $start;
  348. twfy_debug ("SEARCH", "Run search took $duration seconds.");
  349. }
  350. // ... use these to get the results
  351. function get_gids() {
  352. return $this->gids;
  353. }
  354. function get_relevances() {
  355. return $this->relevances;
  356. }
  357. function get_createds() {
  358. return $this->created;
  359. }
  360. // Puts HTML highlighting round all the matching words in the text
  361. function highlight($body) {
  362. // Contents will be used in preg_replace() to highlight the search terms.
  363. $findwords = array();
  364. $replacewords = array();
  365. foreach ($this->words as $word) {
  366. if (ctype_digit($word)) {
  367. array_push($findwords, "/\b($word|" . number_format($word) . ")\b/");
  368. } else {
  369. array_push($findwords, "/\b($word)\b/i");
  370. }
  371. array_push($replacewords, "<span class=\"hi\">\\1</span>");
  372. //array_push($findwords, "/([^>\.\'])\b(" . $word . ")\b([^<\'])/i");
  373. //array_push($replacewords, "\\1<span class=\"hi\">\\2</span>\\3");
  374. }
  375. foreach( $this->phrases as $phrase ) {
  376. $phrasematch = join($phrase, '[^'.$this->wordchars.']+');
  377. array_push($findwords, "/\b($phrasematch)\b/i");
  378. $replacewords[] = "<span class=\"hi\">\\1</span>";
  379. }
  380. // Highlight search words.
  381. $hlbody = preg_replace($findwords, $replacewords, $body);
  382. // Remove any highlighting within HTML.
  383. $hlbody = preg_replace('#<(a|phrase)\s([^>]*?)<span class="hi">(.*?)</span>([^>]*?)">#', "<\\1 \\2\\3\\4\">", $hlbody);
  384. $hlbody = preg_replace('#<(/?)<span class="hi">a</span>([^>]*?)>#', "<\\1a\\2>", $hlbody); # XXX Horrible hack
  385. // Collapse duplicates
  386. $hlbody = preg_replace("#</span>(\W+)<span class=\"hi\">#", "\\1", $hlbody);
  387. return $hlbody;
  388. }
  389. // Find the position of the first of the search words/phrases in $body.
  390. function position_of_first_word($body) {
  391. $lcbody = ' ' . strtolower($body) . ' '; // spaces to make regexp mapping easier
  392. $pos = -1;
  393. // look for phrases
  394. foreach( $this->phrases as $phrase ) {
  395. $phrasematch = join($phrase, '[^'.$this->wordchars.']+');
  396. if (preg_match('/([^'.$this->wordchars.']' . $phrasematch . '[^'.$this->wordchars. '])/', $lcbody, $matches))
  397. {
  398. $wordpos = strpos( $lcbody, $matches[0] );
  399. if ($wordpos) {
  400. if ( ($wordpos < $pos) || ($pos==-1) ) {
  401. $pos = $wordpos;
  402. }
  403. }
  404. }
  405. }
  406. // only look for earlier words if phrases weren't found
  407. if ($pos == -1)
  408. {
  409. foreach( $this->words as $word ) {
  410. if (ctype_digit($word)) $word = '(?:'.$word.'|'.number_format($word).')';
  411. if (preg_match('/([^'.$this->wordchars.']' . $word . '[^'.$this->wordchars. '])/', $lcbody, $matches))
  412. {
  413. $wordpos = strpos( $lcbody, $matches[0] );
  414. if ($wordpos) {
  415. if ( ($wordpos < $pos) || ($pos==-1) ) {
  416. $pos = $wordpos;
  417. }
  418. }
  419. }
  420. }
  421. }
  422. if ($pos == -1) {
  423. $pos = 0;
  424. }
  425. return $pos;
  426. }
  427. /*
  428. old stemming code (does syntax highlighting with stemming, but does it badly)
  429. $splitextract = preg_split("/([a-zA-Z]+)/", $extract, -1, PREG_SPLIT_DELIM_CAPTURE);
  430. $hlextract = "";
  431. foreach( $splitextract as $extractword) {
  432. $hl = false;
  433. foreach( $searchstring_stemwords as $word ) {
  434. if ($word == '') {
  435. continue;
  436. }
  437. $matchword = $searchengine->stem($extractword);
  438. #print "$extractword : $matchword : $word<br>";
  439. if ($matchword == $word) {
  440. $hl = true;
  441. break;
  442. }
  443. }
  444. if ($hl)
  445. $hlextract .= "<span class=\"hi\">$extractword</span>";
  446. else
  447. $hlextract .= $extractword;
  448. }
  449. $hlextract = preg_replace("#</span>\s+<span class=\"hi\">#", " ", $hlextract);
  450. */
  451. /* This doesn't work yet as PHP bindings are knackered - the idea is
  452. to do all parsing here and replace queryparser, so we can do stuff
  453. how we want more. e.g. sync highlighting with the queries better */
  454. // Instead we are now parsing in PHP, and rebuilding something to feed to
  455. // query parser. Yucky but works.
  456. /* $querydummy = new XapianQuery("dummy");
  457. $query1 = new XapianQuery("ethiopia");
  458. $query2 = new XapianQuery("economic");
  459. #$query = $querydummy->querycombine(Query_OP_AND, $query1, $query2);
  460. $query = new_QueryCombine(Query_OP_AND, $query1, $query2);
  461. #new_QueryCombine
  462. # $query = $query1->querycombine(Query_OP_OR, $query1, $query2);
  463. # foreach ($this->words as $word) {
  464. # $query = new XapianQuery(Query_OP_OR, $query, new XapianQuery($word));
  465. # }
  466. print "description:" . $query->get_description() . "<br>"; */
  467. }
  468. global $SEARCHENGINE;
  469. $SEARCHENGINE = null;
  470. function search_by_usage($search, $house = 0) {
  471. $data = array();
  472. $SEARCHENGINE = new SEARCHENGINE($search);
  473. $data['pagetitle'] = $SEARCHENGINE->query_description_short();
  474. $SEARCHENGINE = new SEARCHENGINE($search . ' groupby:speech');
  475. $count = $SEARCHENGINE->run_count();
  476. if ($count <= 0) {
  477. $data['error'] = 'No results';
  478. return $data;
  479. }
  480. $SEARCHENGINE->run_search(0, 10000, 'date');
  481. $gids = $SEARCHENGINE->get_gids();
  482. if (count($gids) <= 0) {
  483. $data['error'] = 'No results';
  484. return $data;
  485. }
  486. if (count($gids) == 10000)
  487. $data['limit_reached'] = true;
  488. # Fetch all the speakers of the results, count them up and get min/max date usage
  489. $speaker_count = array();
  490. $gids = join('","', $gids);
  491. $db = new ParlDB;
  492. $q = $db->query('SELECT gid,speaker_id,hdate FROM hansard WHERE gid IN ("' . $gids . '")');
  493. for ($n=0; $n<$q->rows(); $n++) {
  494. $gid = $q->field($n, 'gid');
  495. $speaker_id = $q->field($n, 'speaker_id'); # This is member ID
  496. $hdate = $q->field($n, 'hdate');
  497. if (!isset($speaker_count[$speaker_id])) {
  498. $speaker_count[$speaker_id] = 0;
  499. $maxdate[$speaker_id] = '1001-01-01';
  500. $mindate[$speaker_id] = '9999-12-31';
  501. }
  502. $speaker_count[$speaker_id]++;
  503. if ($hdate < $mindate[$speaker_id]) $mindate[$speaker_id] = $hdate;
  504. if ($hdate > $maxdate[$speaker_id]) $maxdate[$speaker_id] = $hdate;
  505. }
  506. # Fetch details of all the speakers
  507. if (count($speaker_count)) {
  508. $speaker_ids = join(',', array_keys($speaker_count));
  509. $q = $db->query('SELECT member_id, person_id, title,first_name,last_name,constituency,house,party,
  510. moffice_id, dept, position, from_date, to_date, left_house
  511. FROM member LEFT JOIN moffice ON member.person_id = moffice.person
  512. WHERE member_id IN (' . $speaker_ids . ')
  513. ' . ($house ? " AND house=$house" : '') . '
  514. ORDER BY left_house DESC');
  515. for ($n=0; $n<$q->rows(); $n++) {
  516. $mid = $q->field($n, 'member_id');
  517. if (!isset($pids[$mid])) {
  518. $title = $q->field($n, 'title');
  519. $first = $q->field($n, 'first_name');
  520. $last = $q->field($n, 'last_name');
  521. $cons = $q->field($n, 'constituency');
  522. $house = $q->field($n, 'house');
  523. $party = $q->field($n, 'party');
  524. $full_name = ucfirst(member_full_name($house, $title, $first, $last, $cons));
  525. $pid = $q->field($n, 'person_id');
  526. $pids[$mid] = $pid;
  527. $speakers[$pid]['house'] = $house;
  528. $speakers[$pid]['left'] = $q->field($n, 'left_house');
  529. }
  530. $dept = $q->field($n, 'dept');
  531. $posn = $q->field($n, 'position');
  532. $moffice_id = $q->field($n, 'moffice_id');
  533. if ($dept && $q->field($n, 'to_date') == '9999-12-31')
  534. $speakers[$pid]['office'][$moffice_id] = prettify_office($posn, $dept);
  535. if (!isset($speakers[$pid]['name'])) {
  536. $speakers[$pid]['name'] = ($house==2?'Senator ':'') . $full_name . ($house==1?' MP':'');
  537. $speakers[$pid]['party'] = $party;
  538. }
  539. }
  540. }
  541. $pids[0] = 0;
  542. $speakers[0] = array('party'=>'', 'name'=>'Headings, procedural text, etc.', 'house'=>0, 'count'=>0);
  543. $party_count = array();
  544. $ok = 0;
  545. foreach ($speaker_count as $speaker_id => $count) {
  546. if (!isset($pids[$speaker_id])) continue;
  547. $pid = $pids[$speaker_id];
  548. if (!isset($speakers[$pid]['pmindate'])) {
  549. $speakers[$pid]['count'] = 0;
  550. $speakers[$pid]['pmaxdate'] = '1001-01-01';
  551. $speakers[$pid]['pmindate'] = '9999-12-31';
  552. $ok = 1;
  553. }
  554. if (!isset($party_count[$speakers[$pid]['party']]))
  555. $party_count[$speakers[$pid]['party']] = 0;
  556. $speakers[$pid]['count'] += $count;
  557. $party_count[$speakers[$pid]['party']] += $count;
  558. if ($mindate[$speaker_id] < $speakers[$pid]['pmindate']) $speakers[$pid]['pmindate'] = $mindate[$speaker_id];
  559. if ($maxdate[$speaker_id] > $speakers[$pid]['pmaxdate']) $speakers[$pid]['pmaxdate'] = $maxdate[$speaker_id];
  560. }
  561. function sort_by_count($a, $b) {
  562. if ($a['count'] > $b['count']) return -1;
  563. if ($a['count'] < $b['count']) return 1;
  564. return 0;
  565. }
  566. if ($speakers[0]['count']==0) unset($speakers[0]);
  567. uasort($speakers, 'sort_by_count');
  568. arsort($party_count);
  569. if (!$ok) {
  570. $data['error'] = 'No results';
  571. return $data;
  572. }
  573. $data['party_count'] = $party_count;
  574. $data['speakers'] = $speakers;
  575. return $data;
  576. }