PageRenderTime 49ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/fala.php

https://gitlab.com/bjwebb-codedump/plingconica
PHP | 406 lines | 334 code | 28 blank | 44 comment | 42 complexity | 8757422ba5c90a36327dad74eb8d6e6a MD5 | raw file
Possible License(s): AGPL-3.0
  1. <?php
  2. /**
  3. * Plingconica - creating a Laconica instance full of plings data (http://plings.net)
  4. * Copyright (C) 2009 Ben Webb <bjwebb@freedomdreams.co.uk>
  5. * Copyright (C) 2009 Tim Davies <tim@practicalparticipation.co.uk> - http://www.timdavies.org.uk/
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. include_once "config.php";
  21. class BenDatabase {
  22. var $con;
  23. function BenDatabase($db) {
  24. global $db_host; global $db_name; global $db_pass;
  25. $this->con = mysql_connect($db_host,$db_name,$db_pass);
  26. if (!$this->con) {
  27. die('Could not connect: ' . mysql_error());
  28. }
  29. if (mysql_select_db($db, $this->con)); else die(mysql_error());
  30. }
  31. function queryExec($q) {
  32. if (mysql_query($q, $this->con));
  33. else die(mysql_error);
  34. }
  35. function query($q) {
  36. if ($result = mysql_query($q, $this->con));
  37. else die(mysql_error);
  38. if ($row = mysql_fetch_assoc($result)) {
  39. return true;
  40. }
  41. else {
  42. return false;
  43. }
  44. }
  45. function arrayQuery($q) {
  46. if ($result = mysql_query($q, $this->con));
  47. else die(mysql_error);
  48. $arr = array();
  49. while ($row = mysql_fetch_assoc($result)) {
  50. $arr[] = $row;
  51. }
  52. return $arr;
  53. }
  54. }
  55. /**
  56. * function cleanInput($input)
  57. */
  58. function clean($input) {
  59. if (ini_get('magic_quotes_gpc')) {
  60. // $input = stripslashes($input);
  61. }
  62. $input = trim($input);
  63. if(!is_numeric($input)) {
  64. // $input = mysql_real_escape_string($input);
  65. }
  66. return $input;
  67. }
  68. /**
  69. * generateLink($town,$district)
  70. */
  71. function generateLink($town,$district) {
  72. return $_SERVER['PHP_SELF'] . "?town=$town&district=$district";
  73. }
  74. /**
  75. * function getDB() - initiate the database
  76. *
  77. */
  78. function initDB() {
  79. if ($db = new BenDatabase('places')) {
  80. $q = @$db->query('SELECT * FROM places');
  81. if ($q === false) {
  82. $db->queryExec('CREATE TABLE places (id int, town char(255), district char(255), la char(255), postcode char(10), lacode char (10), PRIMARY KEY (id));');
  83. return $db;
  84. } else {
  85. return $db;
  86. }
  87. } else {
  88. die($err);
  89. }
  90. }
  91. /**
  92. * checkDB()
  93. *
  94. * Checks local sqLite cached datbase
  95. *
  96. * Currently implemented: town and town & county search;
  97. */
  98. $checkOptions[0] = "checkDB";
  99. function checkDB($town,$district,$lacode,$postcode) {
  100. global $log;
  101. $log[] = "Checking DB";
  102. $db = initDB();
  103. if($district) {
  104. $results = $db->arrayQuery("SELECT * FROM places WHERE town = '" . $town . "'");
  105. foreach($results as $result) {
  106. if($result['district'] == $district) {
  107. return array($result);
  108. }
  109. return NULL;
  110. }
  111. } else {
  112. $results = $db->arrayQuery("SELECT * FROM places WHERE town LIKE '" .ucfirst($town) . "'");
  113. }
  114. $log[] = $results;
  115. return $results;
  116. }
  117. $checkOptions[1] = "checkAuthorityWards";
  118. function checkAuthorityWards($town,$district,$lacode,$postcode) {
  119. global $log;
  120. $log[] = "Checking Authorities and Wards: Authority";
  121. $db = initDB();
  122. $results = $db->arrayQuery("SELECT * FROM authorities WHERE official_name LIKE '" . $town . "' OR common_name LIKE '" . $town . "'");
  123. if(count($results)) {
  124. $log[] = "Found an authority";
  125. return array(array('la' => $results[0]['common_name'], 'district' => $results[0]['county']));
  126. } else {
  127. $log[] = "Not an authority - check for wards...";
  128. $results = $db->arrayQuery("SELECT * FROM wards WHERE ward_name LIKE '" . $town . "'");
  129. if(count($results)) {
  130. $log[] = "Found a ward";
  131. $log[] = $results;
  132. return array(array('la' => $results[0]['la_name']));
  133. }
  134. }
  135. return $results;
  136. }
  137. /**
  138. * checkWiki()
  139. *
  140. * Scrapes Wikipedia
  141. *
  142. *
  143. */
  144. $checkOptions[2] = "checkWikiScrape";
  145. function checkWikiScrape($town,$district,$lacode,$postcode,$search = false) {
  146. global $log;
  147. $candidateLA = array("borough","unitary authority","london borough","governing body","district","metropolitan borough","type");
  148. $candidateCounty = array("county","metropolitan county","shire county","admin. county","admin county","administrative county","ceremonial county", "ceremonial and nonmetropolitan county","admin. county","admin county","administrative county");
  149. $extradata['postcode'] = array("postcode district","postcode");
  150. $extradata['region'] = array("region");
  151. $extradata['osref'] = array("os grid reference");
  152. if($_REQUEST['disam'] & !$search) { $town = str_replace(" ","_",$town . ", ". $district); }
  153. if($search) {
  154. $url = "http://en.wikipedia.org/wiki/Special:Search/".str_replace(" ","_",$town);
  155. $log[] = "Using Search <a href='$url' target='_blank'>$url</a>";
  156. } else {
  157. $url = "http://en.wikipedia.org/wiki/".str_replace(" ","_",$town);
  158. }
  159. $ch = curl_init();
  160. curl_setopt($ch, CURLOPT_URL,$url);
  161. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  162. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
  163. $resource = curl_exec($ch);
  164. include_once('simplehtmldom/simple_html_dom.php');
  165. $html = str_get_html($resource);
  166. if($html->find("div[class=noarticletext]") || strpos($html->find("title",0),"Search results")) {
  167. $log[] = "No such wikipedia article. Searching... <a href='$url' target='_blank'>$url</a>";
  168. if(!$search) {
  169. checkWikiScrape($town,$district,$lacode,$postcode,true);
  170. } elseif($html->find("ul[class=mw-search-results]")) {
  171. $log[] = "Search results page";
  172. foreach($html->find("ul[class=mw-search-results]") as $list) {
  173. foreach($list->find("li") as $item) {
  174. if(strstr(strtolower($item->plaintext),"town") || strstr(strtolower($item->plaintext),"borough") ||strstr(strtolower($item->plaintext),"village") ||strstr(strtolower($item->plaintext),"city") ) {
  175. $town = $item->find("a",0)->title;
  176. $log[] = "Decided to fetch page for " . $town;
  177. checkWikiScrape($town,$district,$lacode,$postcode);
  178. break;
  179. }
  180. }
  181. $log[] = "Looks like there were no good search results";
  182. }
  183. }
  184. } else {
  185. $log[] = "Wiki page returned";
  186. if($html->find('table[class=infobox]')) {
  187. $log[] = "Found Infobox";
  188. foreach($html->find('table[class=infobox]') as $infobox) {
  189. if(strstr($infobox->plaintext,"England") || strstr($infobox->plaintext,"United Kingdom")) {
  190. $log[] = "England or UK specific";
  191. foreach($infobox->find("tr") as $row) {
  192. $field_name = trim(strtolower(str_replace(":","",str_replace("-","",preg_replace("/&#?[a-z0-9]+;/i"," ",$row->childNodes(0)->plaintext))))); //We need to get rid of HTML entities (and tidy things up more on the next line)
  193. $infoarray[trim(strtolower(str_replace("-","",$field_name)))] = $row->childNodes(1)->plaintext;
  194. }
  195. $log[] = $infoarray;
  196. foreach($candidateLA as $checkThis) {
  197. if($infoarray[$checkThis]) {
  198. $results['la'] = $infoarray[$checkThis];
  199. $log[] = "We've got an LA:" . $results['la'];
  200. break;
  201. }
  202. }
  203. if(!$county) {
  204. foreach($candidateCounty as $checkThis) {
  205. if($infoarray[$checkThis]) {
  206. $results['district'] = $infoarray[$checkThis];
  207. $log[] = "We've got a District too: " . $results['district'];
  208. break;
  209. }
  210. }
  211. }
  212. foreach($extradata as $key => $search) {
  213. foreach($search as $checkThis) {
  214. if($infoarray[$checkThis]) {
  215. $results[$key] = $infoarray[$checkThis];
  216. $log[] = "We've got a $key too: " . $results[$key];
  217. break;
  218. }
  219. }
  220. }
  221. return array($results);
  222. } else {
  223. if(strstr(strtolower($resource),"disambiguation")) {
  224. $log[] = "Not UK specific - but may disambiguate... <a href='$url' target='_blank'>$url</a>";
  225. if($district) {
  226. $log[] = "Trying to district added...";
  227. return checkWikiScrape($town .", ".$district,$district,$lacode,$postcode,false);
  228. }
  229. }
  230. }
  231. }
  232. } else {
  233. $log[] = "No infobox - <a href='$url' target='_blank'>$url</a>";
  234. if(strstr($html->find("title",0),"Error")) {
  235. $log[] = "Wikipedia Error - try again later . $resource";
  236. return null;
  237. }
  238. if(strstr(strtolower($resource),"disambiguation")) {
  239. $log[] = "Not UK specific - but may disambiguate... <a href='$url' target='_blank'>$url</a>";
  240. //Try looking or 'dt' based lists...
  241. foreach($html->find("dt") as $dt) {
  242. if(strpos($dt->plaintext,"United Kingdom") || strpos($dt->plaintext,"England")) {
  243. $log[] = "Found something England related on disambiguation page...";
  244. foreach($dt->parent()->nextSibling()->find("li") as $li) {
  245. $log[] = $li->plaintext;
  246. }
  247. }
  248. }
  249. } else {
  250. $log[] = "Not UK specific - abort <a href='$url' target='_blank'>$url</a>";
  251. return null;
  252. }
  253. }
  254. }
  255. return $results;
  256. }
  257. //$checkOptions[3] = "checkGaze";
  258. function checkGaze($town,$district,$lacode,$postcode,$search = false) {
  259. global $log;
  260. $log[] = "Trying Gaze";
  261. $url = "http://gaze.mysociety.org/gaze-rest?f=find_places&country=GB&query=".$town;
  262. $ch = curl_init();
  263. curl_setopt($ch, CURLOPT_URL,$url);
  264. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  265. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
  266. $resource = curl_exec($ch);
  267. //print_r(curl_getinfo($ch));
  268. $log[] = $resource;
  269. }
  270. $checkOptions[3] = "checkGeoNames";
  271. function checkGeoNames($town,$district,$lacode,$postcode,$search = false) {
  272. global $log;
  273. $log[] = "Trying GeoNames";
  274. $url = "http://ws.geonames.org/search?maxRows=5&country=GB&q=".$town;
  275. $ch = curl_init();
  276. curl_setopt($ch, CURLOPT_URL,$url);
  277. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  278. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
  279. $resource = curl_exec($ch);
  280. //print_r(curl_getinfo($ch));
  281. $xml = simplexml_load_string($resource);
  282. foreach($xml->geoname as $location) {
  283. $url = "http://ws.geonames.org/hierarchy?geonameId=" . $location->geonameId;
  284. curl_setopt($ch, CURLOPT_URL,$url);
  285. $location_detail = curl_exec($ch);
  286. $local_xml = simplexml_load_string($location_detail);
  287. $count = count($local_xml->geoname);
  288. $results[$n]['town'] = $local_xml->geoname[$count - 1]->name;
  289. $results[$n]['district'] = $local_xml->geoname[$count - 2]->name;
  290. $n++;
  291. }
  292. $log[] = "Found $n possible location variations to choose from";
  293. return $results;
  294. }
  295. function getLA($town,$district = null,$lacode = null,$postcode=null) {
  296. //Get Query
  297. //CLEAN INPUT!!!
  298. $town = strtolower(clean($town));
  299. $district = strtolower(clean($district));
  300. $lacode = strtolower(clean($lacode));
  301. $postcode = strtolower(clean($postcode));
  302. global $log;
  303. global $checkOptions;
  304. foreach($checkOptions as $checkFunction) {
  305. if(function_exists($checkFunction)) {
  306. $result = call_user_func($checkFunction,$town,$district,$lacode,$postcode);
  307. } else {
  308. die($checkFunction . " does not exist");
  309. }
  310. $log[] = "Result Count: " . count($result);
  311. // We have a result!
  312. if(count($result) == 1) {
  313. $log[] = "Found unique record";
  314. $result = $result[0];
  315. if($result['la']) {
  316. $log[] = "Found LA: ". $result['la'];
  317. $la = trim($result['la']);
  318. //Lets fill in the gaps in our information
  319. $db = initDB();
  320. $results = $db->arrayQuery("SELECT * FROM authorities WHERE official_name = '" . $la . "' OR common_name = '" . $la . "' LIMIT 1");
  321. if($results) {
  322. $log[] = $results;
  323. return $results;
  324. break;
  325. } else {
  326. $log[] = "But it looks like we were wrong. Not in our authoritative list!";
  327. }
  328. } else {
  329. $log[] = "No LA - but checking if we can gather intelligence";
  330. if((!$town) && ($result['town'])) { $town = $result['town']; $log[] = "Updated town"; };
  331. if((!$district) && ($result['district'])) { $district = $result['district']; $log[] = "Updated district"; };
  332. if(!$lacode && $result['lacode']) { $lacode = $result['lacode']; $log[] = "Updated lacode"; };
  333. if(!$postcode && $result['postcode']) { $postcode = $result['postcode']; $log[] = "Updated postcode"; };
  334. }
  335. // Or we need to disambiguate?
  336. } elseif(count($result) > 1) {
  337. $log[] = "Disambiguating";
  338. // Nothing found - let's try something else
  339. } else {
  340. $log[] = "No success with ".$checkFunction;
  341. }
  342. }
  343. }