PageRenderTime 64ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/www/docs/regmem/index.php

https://github.com/mysociety/theyworkforyou
PHP | 406 lines | 383 code | 21 blank | 2 comment | 49 complexity | a99aaf0992b9aa3dccdb6637d8dbdc6a MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. include_once '../../includes/easyparliament/init.php';
  3. $dir = RAWDATA . 'scrapedxml/regmem';
  4. $dh = opendir($dir);
  5. $files = array();
  6. while ($file = readdir($dh)) {
  7. if (preg_match('#^regmem#', $file)) {
  8. $files[] = "$dir/$file";
  9. }
  10. }
  11. rsort($files);
  12. if (!DEVSITE) {
  13. header('Cache-Control: max-age=3600');
  14. }
  15. $PAGE->page_start();
  16. ?>
  17. <style type="text/css">
  18. blockquote { background-color: #f5fdea; border: solid 1px #4d6c25; padding: 3px; }
  19. td { vertical-align: top; }
  20. .a { background-color: #ccffcc; margin-bottom: 0.5em; }
  21. .r { background-color: #ffcccc; margin-bottom: 0.5em; }
  22. th { text-align: left; }
  23. table#regmem h2 { margin: 0; margin-top: 0.5em; padding-top: 0.5em; border-top: dotted 1px #333333; }
  24. #regmem h3 { margin: 0; border-bottom: dotted 1px #cccccc; }
  25. #mps li {
  26. float: left;
  27. width: 23%;
  28. }
  29. </style>
  30. <?php
  31. $f = get_http_var('f');
  32. if (!preg_match('#^\d\d\d\d-\d\d-\d\d$#', $f)) {
  33. $f = '';
  34. }
  35. $p = (integer) get_http_var('p');
  36. $d = get_http_var('d');
  37. if (!preg_match('#^\d\d\d\d-\d\d-\d\d$#', $d)) {
  38. $d = '';
  39. }
  40. $link = '<p align="center"><a href="./"><strong>List all MPs and Register editions</strong></a></p>';
  41. if ($f) {
  42. register_history($f);
  43. } elseif ($p) {
  44. person_history($p);
  45. } elseif ($d) {
  46. show_register($d);
  47. } else {
  48. $this_page = 'regmem';
  49. $PAGE->stripe_start();
  50. front_page();
  51. }
  52. $PAGE->stripe_end();
  53. $PAGE->page_end();
  54. function person_history($p) {
  55. global $files, $dir, $DATA, $PAGE, $this_page, $link, $cats;
  56. $this_page = 'regmem_mp';
  57. $name = '';
  58. $nil = array();
  59. $earliest = $files[0];
  60. foreach ($files as $_) {
  61. $file = _load_file($_);
  62. $date = preg_replace("#$dir/regmem(.*?)\.xml#", '$1', $_);
  63. $data[$_] = array();
  64. if (preg_match('#<regmem personid="uk.org.publicwhip/person/'.$p.'" (?:memberid="(.*?)" )?membername="(.*?)" date="(.*?)">(.*?)</regmem>#s', $file, $m)) {
  65. $earliest = $_;
  66. if (!$name) {
  67. $name = $m[2];
  68. $DATA->set_page_metadata($this_page, 'heading', $name);
  69. $PAGE->stripe_start();
  70. print $link;
  71. ?>
  72. <p>This page shows how <a href="/mp/?p=<?=$p ?>"><?=$name ?></a>'s entry in the Register of Members' Interests has changed over time, starting at the most recent and working back to the earliest we have managed to parse.
  73. Please be aware that changes in typography/styling at the source might mean something is marked as changed (ie. removed and added) when it hasn't; sorry about that, but we do our best with the source material.
  74. </p>
  75. <table id="regmem">
  76. <tr><th width="50%">Removed</th><th width="50%">Added</th></tr>
  77. <?php
  78. }
  79. $name = $m[2]; $ddata = $m[4];
  80. if (preg_match('/Nil\./', $ddata)) {
  81. $nil[$_] = true;
  82. }
  83. preg_match_all('#<category type="(.*?)" name="(.*?)">(.*?)</category>#s', $ddata, $mm, PREG_SET_ORDER);
  84. foreach ($mm as $k => $m) {
  85. $cat_type = $m[1];
  86. $cat_name = $m[2];
  87. $cats[$date][$cat_type] = $cat_name;
  88. $cat_data = canonicalise_data($m[3]);
  89. $data[$_][$cat_type] = $cat_data;
  90. }
  91. }
  92. }
  93. $out = '';
  94. foreach ($files as $i => $_) {
  95. if ($_ <= $earliest) {
  96. break;
  97. }
  98. $date_pre = preg_replace("#$dir/regmem(.*?)\.xml#", '$1', $_);
  99. $date_post = preg_replace("#$dir/regmem(.*?)\.xml#", '$1', $files[$i+1]);
  100. $pretty = format_date($date_pre, LONGDATEFORMAT);
  101. $oout = '';
  102. foreach ($data[$_] as $cat_type => $cat_data) {
  103. $old = array_key_exists($cat_type, $data[$files[$i+1]]) ? $data[$files[$i+1]][$cat_type] : '';
  104. $new = $data[$_][$cat_type];
  105. if ($diff = clean_diff($old, $new)) {
  106. $oout .= cat_heading($cat_type, $date_pre, $date_post) . $diff;
  107. }
  108. }
  109. foreach ($data[$files[$i+1]] as $cat_type => $cat_data) {
  110. if (array_key_exists($cat_type, $data[$_])) {
  111. continue;
  112. }
  113. if ($diff = clean_diff($data[$files[$i+1]][$cat_type], '')) {
  114. $oout .= cat_heading($cat_type, $date_pre, $date_post) . $diff;
  115. }
  116. }
  117. if ($oout) {
  118. $out .= span_row("<h2>$pretty - <a href=\"./?d=$date_pre#$p\">View full entry</a></h2>", true) . $oout;
  119. }
  120. }
  121. $_ = $earliest;
  122. $date = preg_replace("#$dir/regmem(.*?)\.xml#", '$1', $_);
  123. $pretty = format_date($date, LONGDATEFORMAT);
  124. $out .= span_row("<h2>$pretty (first entry we have)</h2>", true);
  125. if (array_key_exists($_, $nil)) {
  126. $out .= span_row('Nothing');
  127. }
  128. foreach ($data[$_] as $cat_type => $d) {
  129. $out .= cat_heading($cat_type, '', $date);
  130. $out .= span_row(prettify($d));
  131. }
  132. print $out;
  133. if ($name) {
  134. print '</table>';
  135. }
  136. }
  137. function register_history($f) {
  138. global $dir, $files, $names, $DATA, $PAGE, $link, $this_page;
  139. $this_page = 'regmem_diff';
  140. $new = 0;
  141. if ($f) {
  142. $f = "$dir/regmem$f.xml";
  143. $count = count($files);
  144. for ($i=0; $i<$count; ++$i) {
  145. if ($files[$i] == $f) {
  146. $new = $i;
  147. break;
  148. }
  149. }
  150. }
  151. $old = $new+1;
  152. $old = $files[$old];
  153. $old_iso = preg_replace("#$dir/regmem(.*?)\.xml#", '$1', $old);
  154. $old_pretty = format_date($old_iso, LONGDATEFORMAT);
  155. $new = $files[$new];
  156. $new_iso = preg_replace("#$dir/regmem(.*?)\.xml#", '$1', $new);
  157. $new_pretty = format_date($new_iso, LONGDATEFORMAT);
  158. $old = _load_file($old);
  159. $new = _load_file($new);
  160. $DATA->set_page_metadata($this_page, 'heading', 'Changes from '.$old_pretty.' to '.$new_pretty);
  161. $PAGE->stripe_start();
  162. print $link;
  163. $data = array();
  164. parse_file($old, $old_iso, 'old', $data);
  165. parse_file($new, $new_iso, 'new', $data);
  166. ?>
  167. <p>This page shows all the changes in the Register of Members' Interests between the editions of <a href="./?d=<?=$old_iso ?>"><?=$old_pretty ?></a> and <a href="./?d=<?=$new_iso ?>"><?=$new_pretty ?></a>, in alphabetical order by MP.</p>
  168. <table cellpadding="3" cellspacing="0" border="0" id="regmem">
  169. <tr><th width="50%">Removed</th><th width="50%">Added</th></tr>
  170. <?php
  171. uksort($data, 'by_name_ref');
  172. foreach ($data as $person_id => $v) {
  173. $out = '';
  174. foreach ($v as $cat_type => $vv) {
  175. $out .= cat_heading($cat_type, $old_iso, $new_iso);
  176. $old = (array_key_exists('old', $data[$person_id][$cat_type]) ? $data[$person_id][$cat_type]['old'] : '');
  177. $new = (array_key_exists('new', $data[$person_id][$cat_type]) ? $data[$person_id][$cat_type]['new'] : '');
  178. $out .= clean_diff($old, $new);
  179. }
  180. if ($out) {
  181. print span_row('<h2>'.$names[$person_id].' - <a href="?p='.$person_id.'">Register history</a> | <a href="/mp/?pid='.$person_id.'">MP&rsquo;s page</a></h2>', true) . $out;
  182. }
  183. }
  184. print '</table>';
  185. }
  186. function by_name_ref($a, $b) {
  187. global $names;
  188. $a = preg_replace('/^.* /', '', $names[$a]);
  189. $b = preg_replace('/^.* /', '', $names[$b]);
  190. if ($a > $b) {
  191. return 1;
  192. } elseif ($a < $b) {
  193. return -1;
  194. }
  195. return 0;
  196. }
  197. function parse_file($file, $date, $type, &$out) {
  198. global $cats, $names;
  199. preg_match_all('#<regmem personid="uk.org.publicwhip/person/(.*?)" (?:memberid="(.*?)" )?membername="(.*?)" date="(.*?)">(.*?)</regmem>#s', $file, $mm, PREG_SET_ORDER);
  200. foreach ($mm as $k => $m) {
  201. $person_id = $m[1]; $name = $m[3]; $data = $m[5];
  202. $names[$person_id] = $name;
  203. preg_match_all('#<category type="(.*?)" name="(.*?)">(.*?)</category>#s', $data, $mmm, PREG_SET_ORDER);
  204. foreach ($mmm as $k => $m) {
  205. $cat_type = $m[1];
  206. $cat_name = $m[2];
  207. $cats[$date][$cat_type] = $cat_name;
  208. $cat_data = canonicalise_data($m[3]);
  209. $out[$person_id][$cat_type][$type] = $cat_data;
  210. if ($type == 'new' && array_key_exists('old', $out[$person_id][$cat_type]) && $cat_data == $out[$person_id][$cat_type]['old']) {
  211. unset($out[$person_id][$cat_type]);
  212. }
  213. }
  214. }
  215. }
  216. function _load_file($f) {
  217. $file = file_get_contents($f);
  218. $file = utf8_encode($file);
  219. return $file;
  220. }
  221. function front_page() {
  222. global $files;
  223. foreach ($files as $_) {
  224. $file = _load_file($_);
  225. preg_match_all('#<regmem personid="uk.org.publicwhip/person/(.*?)" (?:memberid="(.*?)" )?membername="(.*?)" date="(.*?)">(.*?)</regmem>#s', $file, $m, PREG_SET_ORDER);
  226. foreach ($m as $k => $v) {
  227. $person_id = $v[1]; $name = $v[3];
  228. $names[$person_id] = $name;
  229. }
  230. }
  231. $c = 0; $year = 0;
  232. $view = ''; $compare = '';
  233. $count = count($files);
  234. for ($i=0; $i<$count; ++$i) {
  235. preg_match('/(\d\d\d\d)-(\d\d-\d\d)/', $files[$i], $m);
  236. $y = $m[1]; $md = $m[2];
  237. if ($c++) {
  238. $view .= ' | ';
  239. if ($i < $count-1) {
  240. $compare .= ' | ';
  241. }
  242. }
  243. if ($year != $y) {
  244. $year = $y;
  245. $view .= "<em>$year</em> ";
  246. if ($i < $count-1) {
  247. $compare .= "<em>$year</em> ";
  248. }
  249. }
  250. $months = array('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');
  251. preg_match('/(\d\d)-(\d\d)/', $md, $m);
  252. $date = ($m[2]+0) . ' '. $months[$m[1]-1];
  253. $view .= '<a href="./?d='.$y.'-'.$md.'">'.$date.'</a>';
  254. if ($i < $count-1) {
  255. $compare .= '<a href="?f='.$y.'-'.$md.'">'.$date.'</a>';
  256. }
  257. }
  258. ?>
  259. <p>This section of the site lets you see how MPs' entries in the Register of Members' Interests have changed over time, either by MP, or for a particular issue of the Register.</p>
  260. <p>The rules concerning what must be registered can be found in the House of Commons&rsquo; Code of Conduct at <a href="http://www.publications.parliament.uk/pa/cm200809/cmcode/735/73504.htm">http://www.publications.parliament.uk/pa/cm200809/cmcode/735/73504.htm</a>.</p>
  261. <p>So, either <strong>pick an issue to compare against the one previous:</strong></p>
  262. <p align="center"><?=$compare ?></p>
  263. <p><strong>View a particular edition of the Register of Members' Interests:</strong></p>
  264. <p align="center"><?=$view ?></p>
  265. <p>Or <strong>view the history of an MP's entry in the Register:</strong></p> <ul id="mps">
  266. <?php
  267. uasort($names, 'by_name');
  268. foreach ($names as $_ => $value) {
  269. print '<li><a href="?p='.$_.'">'.$value.'</a>';
  270. }
  271. print '</ul>';
  272. }
  273. function show_register($d) {
  274. global $dir, $files, $names, $PAGE, $DATA, $this_page, $link;
  275. $d = "$dir/regmem$d.xml";
  276. if (!in_array($d, $files)) {
  277. $d = $files[0];
  278. }
  279. $d_iso = preg_replace("#$dir/regmem(.*?)\.xml#", '$1', $d);
  280. $d_pretty = format_date($d_iso, LONGDATEFORMAT);
  281. $d = _load_file($d);
  282. $data = array();
  283. parse_file($d, $d_iso, 'only', $data);
  284. $this_page = 'regmem_date';
  285. $DATA->set_page_metadata($this_page, 'heading', "The Register of Members' Interests, $d_pretty");;
  286. $PAGE->stripe_start();
  287. print $link;
  288. ?>
  289. <p>This page shows the Register of Members' Interests as released on <?=$d_pretty ?>, in alphabetical order by MP.
  290. <?php if ($d_iso > '2002-05-14') { ?><a href="./?f=<?=$d_iso ?>">Compare this edition with the one before it</a></p><?php } ?>
  291. <div id="regmem">
  292. <?php
  293. uksort($data, 'by_name_ref');
  294. foreach ($data as $person_id => $v) {
  295. $out = '';
  296. foreach ($v as $cat_type => $vv) {
  297. $out .= cat_heading($cat_type, $d_iso, $d_iso, false);
  298. $d = (array_key_exists('only', $data[$person_id][$cat_type]) ? $data[$person_id][$cat_type]['only'] : '');
  299. $out .= prettify($d)."\n";
  300. }
  301. if ($out) {
  302. print '<div class="block">';
  303. print '<h2><a name="' . $person_id . '"></a>' . $names[$person_id] . ' - ';
  304. print '<a href="?p=' . $person_id . '">Register history</a> | ';
  305. print '<a href="/mp/?pid=' . $person_id . '">MP&rsquo;s page</a>';
  306. print '</h2> <div class="blockbody">';
  307. print "\n$out";
  308. print '</div></div>';
  309. }
  310. }
  311. print '</div>';
  312. }
  313. function by_name($a, $b) {
  314. $a = preg_replace('/^.* /', '', $a);
  315. $b = preg_replace('/^.* /', '', $b);
  316. if ($a > $b) {
  317. return 1;
  318. } elseif ($a < $b) {
  319. return -1;
  320. }
  321. return 0;
  322. }
  323. function canonicalise_data($cat_data) {
  324. $cat_data = preg_replace('#^.*?<item#s', '<item', $cat_data);
  325. $cat_data = str_replace(array('<i>', '</i>'), '', $cat_data);
  326. $cat_data = preg_replace('/<item subcategory="(.*?)">\s*/', '<item>($1) ', $cat_data);
  327. $cat_data = preg_replace('/<item([^>]*?)>\s*/', '<item>', $cat_data);
  328. $cat_data = preg_replace('/ +/', ' ', $cat_data);
  329. $cat_data = preg_replace('# (\d{1,2})th #', ' $1<sup>th</sup> ', $cat_data);
  330. return $cat_data;
  331. }
  332. function _clean($s) {
  333. $s = preg_replace("/&(pound|#163);/", "£", $s);
  334. $s = preg_replace("#</?(span|i|em)( [^>]*)?" . ">#i", '', $s);
  335. $s = preg_split("/\s*\n\s*/", $s);
  336. return $s;
  337. }
  338. function clean_diff($old, $new) {
  339. $old = _clean($old);
  340. $new = _clean($new);
  341. $r = array_diff($old, $new);
  342. $a = array_diff($new, $old);
  343. if (!count($r) && !count($a)) {
  344. return '';
  345. }
  346. $r = join("\n", $r); $r = $r ? '<td class="r"><ul>'.$r.'</ul></td>' : '<td>&nbsp;</td>';
  347. $a = join("\n", $a); $a = $a ? '<td class="a"><ul>'.$a.'</ul></td>' : '<td>&nbsp;</td>';
  348. $diff = '<tr>' . $r . $a . '</tr>';
  349. $diff = preg_replace('#<item.*?>(.*?)</item>#', '<li>$1</li>', $diff);
  350. return $diff;
  351. }
  352. function prettify($s) {
  353. $s = preg_replace('#<item>(.*?)</item>#', '<li>$1</li>', $s);
  354. return "<ul>$s</ul>";
  355. }
  356. function cat_heading($cat_type, $date_pre, $date_post, $table = true) {
  357. global $cats;
  358. $cat_pre = isset($cats[$date_pre][$cat_type]) ? $cats[$date_pre][$cat_type] : '';
  359. $cat_post = isset($cats[$date_post][$cat_type]) ? $cats[$date_post][$cat_type] : '';
  360. if ($cat_pre == $cat_post || !$cat_post || !$cat_pre) {
  361. if (!$cat_pre) {
  362. $cat_pre = $cat_post;
  363. }
  364. $row = "<h3>$cat_type. $cat_pre</h3>";
  365. if ($table) {
  366. return "<tr><th colspan=\"2\">$row</th></tr>\n";
  367. }
  368. return $row;
  369. } else {
  370. if ($table) {
  371. return "<tr><th><h3>$cat_type. $cat_post</h3></th><th><h3>$cat_type. $cat_pre</h3></th></tr>";
  372. } else {
  373. return "<h3>$cat_type. $cat_post / $cat_pre</h3>";
  374. }
  375. }
  376. }
  377. function span_row($s, $heading = false) {
  378. if ($heading) {
  379. return "<tr><th colspan=\"2\">$s</th></tr>\n";
  380. }
  381. return "<tr><td colspan=\"2\">$s</td></tr>\n";
  382. }