PageRenderTime 53ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/rs-data.php

https://bitbucket.org/wragge/rswrapper
PHP | 449 lines | 420 code | 2 blank | 27 comment | 94 complexity | 60eb50328e2045e74107e6586bff2ddc MD5 | raw file
  1. <?php
  2. include 'XmlDomConstruct.php';
  3. function getPage($url) {
  4. $ch = curl_init();
  5. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  6. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
  7. curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookies.txt');
  8. curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookies.txt');
  9. curl_setopt($ch, CURLOPT_URL, $url);
  10. $html= curl_exec($ch);
  11. if (!$html) {
  12. echo "cURL error number:" .curl_errno($ch);
  13. echo "cURL error:" . curl_error($ch);
  14. exit;
  15. }
  16. curl_close($ch);
  17. return $html;
  18. }
  19. //Deal with the query parameters
  20. $id = $_GET['id'];
  21. $format = $_GET['f'];
  22. //Check the id to see what type of item this is
  23. if ($id){
  24. if (preg_match('/^[0-9]+$/', $id) == 1) {
  25. $type="item";
  26. } else if (preg_match('/^CP[\s\+]{1}\d+$/i', $id) == 1) {
  27. $type = "person";
  28. } else if (preg_match('/^CA[\s\+]{1}\d+$/i', $id) == 1) {
  29. $type = "agency";
  30. } else if (preg_match('/^[A-Za-z0-9\/]+$/', $id) == 1) {
  31. $type = "series";
  32. } else {
  33. unset($id);
  34. }
  35. }
  36. if ($id) {
  37. //Get item details
  38. if ($type == 'item') {
  39. $url = "http://www.naa.gov.au/cgi-bin/Search?O=I&Number=$id";
  40. $result = getPage($url);
  41. if ($result) {
  42. preg_match('/<B>Title<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  43. $title = htmlspecialchars($matches[1]);
  44. preg_match('/<B>Control symbol<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  45. $control = $matches[1];
  46. preg_match('/HREF="SeriesDetail.asp\?M=0\&B=(.+?)"\>/', $result, $matches);
  47. $series = $matches[1];
  48. preg_match('/<B>Contents date range<\/B><BR>\r\n\t(.+?)<\/TD>/', $result, $matches);
  49. $cdates = preg_split('/ - /', $matches[1]);
  50. $contentsDates[] = array("start"=>$cdates[0], "end"=>$cdates[1]);
  51. preg_match('/<B>Location<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  52. $location = $matches[1];
  53. preg_match('/<B>Access status<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  54. $access = strtolower($matches[1]);
  55. preg_match('/<B>Extent<\/B><BR>(.*?)<\/TD>/', $result, $matches);
  56. $extent = $matches[1];
  57. //If it's been digitised find out how many pages are in the file
  58. if (strpos($result, "View digital copy")) {
  59. $digitised = "yes";
  60. $imgUrl = "http://naa12.naa.gov.au/scripts/imagine.asp?B=$id&I=1&SE=1";
  61. $imgResult = getPage($imgUrl, $ch);
  62. preg_match('/NAME="C" VALUE="(\d+)" ID="Hidden3"/', $imgResult, $matches);
  63. $pages = $matches[1];
  64. } else {
  65. $digitised = "no";
  66. $pages = "unknown";
  67. }
  68. $itemDetails = array("item"=>array(
  69. "title"=>$title,
  70. "series"=>$series,
  71. "controlSymbol"=>$control,
  72. "contentsDates"=>$contentsDates,
  73. "location"=>$location,
  74. "accessStatus"=>$access,
  75. "extent"=>$extent,
  76. "isDigitised"=>$digitised,
  77. "numberOfPages"=>$pages));
  78. } else {
  79. $error = 'Could not contact server';
  80. }
  81. } else if ($type == 'series') {
  82. $url = "http://www.naa.gov.au/cgi-bin/Search?Number=$id";
  83. $result = getPage($url);
  84. if ($result) {
  85. preg_match('/<B>Title<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  86. $title = htmlspecialchars($matches[1]);
  87. preg_match('/<B>Accumulation dates<\/B><BR>\r\n\t(.+?)<\/TD>/', $result, $matches);
  88. $adates = preg_split('/ - /', $matches[1]);
  89. $accumulationDates[] = array("start"=>$adates[0], "end"=>$adates[1]);
  90. preg_match('/<B>Contents dates<\/B><BR>\r\n\t(.+?)<\/TD>/', $result, $matches);
  91. $cdates = preg_split('/ - /', $matches[1]);
  92. $contentsDates[] = array("start"=>$cdates[0], "end"=>$cdates[1]);
  93. preg_match('/<B>Predominant form<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  94. $form = strtolower($matches[1]);
  95. preg_match('/<B>Quantity and location<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  96. $quantities = preg_split('/<br>/', $matches[1]);
  97. foreach ($quantities as $q) {
  98. preg_match('/(.+?) held in (.+)/', $q, $matches);
  99. $quantity = $matches[1];
  100. $location = $matches[2];
  101. $holdings[] = array("location"=>$location, "quantity"=>$quantity);
  102. }
  103. //Get details of recording agencies
  104. preg_match('/<B>Agency \/ person recording<\/B><BR><TABLE width=100\% >(.*?)<\/TABLE>/', $result, $matches);
  105. preg_match_all('/<td width=30\% VALIGN=TOP >(.+?)<\/td><td width=70\% ><A HREF="AgencyDetail.asp\?M=1\&B=CA\+(\d+)">CA \d+<\/a>, (.+?)<\/td>/', $matches[1], $agencies, PREG_SET_ORDER);
  106. foreach ($agencies as $agency) {
  107. preg_match('/(.+?) - (.+)/', $agency[1], $matches);
  108. $start = $matches[1];
  109. $end = $matches [2];
  110. $agId = "CA $agency[2]";
  111. $recAgencies[] = array("start"=>$start, "end"=>$end, "agencyId"=>"CA $agency[2]", "agencyTitle"=>$agency[3]);
  112. }
  113. //Get details of controlling agencies
  114. preg_match('/<B>Agency \/ person controlling<\/B><BR><TABLE width=100\% >(.*?)<\/TABLE>/', $result, $matches);
  115. preg_match_all('/<td width=30\% VALIGN=TOP >(.+?)<\/td><td width=70\% ><A HREF="AgencyDetail.asp\?M=1\&B=CA\+(\d+)">CA \d+<\/a>, (.+?)<\/td>/', $matches[1], $agencies, PREG_SET_ORDER);
  116. foreach ($agencies as $agency) {
  117. preg_match('/(.+?) - (.+)/', $agency[1], $matches);
  118. $start = $matches[1];
  119. $end = $matches [2];
  120. $agId = "CA $agency[2]";
  121. $conAgencies[] = array("start"=>$start, "end"=>$end, "agencyId"=>"CA $agency[2]", "agencyTitle"=>$agency[3]);
  122. }
  123. //Follow the 'Find items in this series' link
  124. $itemsUrl = "http://recordsearch.naa.gov.au/scripts/SearchOF.asp?DP=2&Q=SER_SERIES_NO=QT" . $id . "QT";
  125. $items = getPage($itemsUrl);
  126. if ($items) {
  127. //Get the number of items in this series described on RS
  128. preg_match('/<TD>Items\r\n\t<\/td><td>(.*?)<\/td>/', $items, $matches);
  129. $numItems = $matches[1];
  130. //Trim the asterix of the 'More than 20.000' message
  131. $numItems = str_replace('*', '', $numItems);
  132. }
  133. //open the series links page and harvest the details
  134. $linksUrl = "http://recordsearch.naa.gov.au/scripts/SeriesDetail.asp?M=2&B=" . $id;
  135. $links = getPage($linksUrl);
  136. if ($links) {
  137. //Get rid of line breaks and tabs
  138. $links = preg_replace('/\r/', '', $links);
  139. $links = preg_replace('/\n/', '', $links);
  140. $links = preg_replace('/\t/', '', $links);
  141. //Previous series
  142. preg_match('/<B>Previous series<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/', $links, $matches);
  143. preg_match_all('/<TR><td width=15% VALIGN=TOP >(.*?)<\/td><td width=85% ><A HREF="SeriesDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/', $matches[1], $pseries, PREG_SET_ORDER);
  144. foreach ($pseries as $ps) {
  145. if (strpos($ps[1], "-") !== false) {
  146. preg_match('/(.+?) - (.*)/', $s[1], $matches);
  147. $start = $matches[1];
  148. $end = $matches [2];
  149. } else {
  150. $start = $ps[1];
  151. $end = "";
  152. }
  153. $prevSeries[] = array("start"=>$start, "end"=>$end, "seriesId"=>$ps[2], "seriesTitle"=>$ps[3]);
  154. }
  155. //Subsequent series
  156. preg_match('/<B>Subsequent series<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  157. preg_match_all('/<TR><td width=15% VALIGN=TOP >(.*?)<\/td><td width=85% ><A HREF="SeriesDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $sseries, PREG_SET_ORDER);
  158. foreach ($sseries as $ss) {
  159. if (strpos($ss[1], "-") !== false) {
  160. preg_match('/(.+?) - (.*)/', $s[1], $matches);
  161. $start = $matches[1];
  162. $end = $matches [2];
  163. } else {
  164. $start = $ss[1];
  165. $end = "";
  166. }
  167. $subSeries[] = array("start"=>$start, "end"=>$end, "seriesId"=>$ss[2], "seriesTitle"=>$ss[3]);
  168. }
  169. //Controlling series
  170. preg_match('/<B>Controlling series<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  171. preg_match_all('/<TR><td width=30% VALIGN=TOP >(.*?)<\/td><td width=70% ><A HREF="SeriesDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $cseries, PREG_SET_ORDER);
  172. foreach ($cseries as $cs) {
  173. if (strpos($cs[1], "-") !== false) {
  174. preg_match('/(.+?) - (.*)/', $cs[1], $matches);
  175. $start = $matches[1];
  176. $end = $matches [2];
  177. } else {
  178. $start = $cs[1];
  179. $end = "";
  180. }
  181. $conSeries[] = array("start"=>$start, "end"=>$end, "seriesId"=>$cs[2], "seriesTitle"=>$cs[3]);
  182. }
  183. //Controlled series
  184. preg_match('/<B>Controlled series<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  185. preg_match_all('/<TR><td width=30% VALIGN=TOP >(.*?)<\/td><td width=70% ><A HREF="SeriesDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $conseries, PREG_SET_ORDER);
  186. foreach ($conseries as $cons) {
  187. if (strpos($cons[1], "-") !== false) {
  188. preg_match('/(.+?) - (.*)/', $cons[1], $matches);
  189. $start = $matches[1];
  190. $end = $matches [2];
  191. } else {
  192. $start = $cons[1];
  193. $end = "";
  194. }
  195. $contSeries[] = array("start"=>$start, "end"=>$end, "seriesId"=>$cons[2], "seriesTitle"=>$cons[3]);
  196. }
  197. //Related series
  198. preg_match('/<B>Related series<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  199. preg_match_all('/<TR><td width=30% VALIGN=TOP >(.*?)<\/td><td width=70% ><A HREF="SeriesDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $rseries, PREG_SET_ORDER);
  200. foreach ($rseries as $rs) {
  201. if (strpos($rs[1], "-") !== false) {
  202. preg_match('/(.+?) - (.*)/', $rs[1], $matches);
  203. $start = $matches[1];
  204. $end = $matches [2];
  205. } else {
  206. $start = $rs[1];
  207. $end = "";
  208. }
  209. $relSeries[] = array("start"=>$start, "end"=>$end, "seriesId"=>$rs[2], "seriesTitle"=>$rs[3]);
  210. }
  211. }
  212. $itemDetails = array("series"=>array(
  213. "title"=>$title,
  214. "accumulationDates"=>$accumulationDates,
  215. "contentsDates"=>$contentsDates,
  216. "form"=>$form,
  217. "holdings"=>array("holding"=>$holdings),
  218. "numberOfItems"=>$numItems,
  219. "recordingAgencies"=>array("agency"=>$recAgencies),
  220. "controllingAgencies"=>array("agency"=>$conAgencies)));
  221. if (count($prevSeries) != 0) {
  222. $itemDetails["series"]["previousSeries"] = array("series"=>$prevSeries);
  223. }
  224. if (count($subSeries) != 0) {
  225. $itemDetails["series"]["subsequentSeries"] = array("series"=>$subSeries);
  226. }
  227. if (count($conSeries) != 0) {
  228. $itemDetails["series"]["controllingSeries"] = array("series"=>$conSeries);
  229. }
  230. if (count($contSeries) != 0) {
  231. $itemDetails["series"]["controlledSeries"] = array("series"=>$contSeries);
  232. }
  233. if (count($relSeries) != 0) {
  234. $itemDetails["series"]["relatedSeries"] = array("series"=>$relSeries);
  235. }
  236. } else {
  237. $error = 'Could not contact server';
  238. }
  239. } else if ($type == "agency") {
  240. $url = "http://www.naa.gov.au/cgi-bin/Search?Number=" . urlencode($id);
  241. $result = getPage($url);
  242. if ($result) {
  243. preg_match('/<B>Title<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  244. $title = $matches[1];
  245. preg_match('/<B>Date range<\/B><BR>\r\n\t(.+?)<\/TD>/', $result, $matches);
  246. $dateRange = preg_split('/ - /',$matches[1]);
  247. $startDate = $dateRange[0];
  248. $endDate = $dateRange[1];
  249. preg_match('/<B>Status code<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  250. $statusCodes = preg_split('/; /', $matches[1]);
  251. preg_match('/<B>Location<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  252. $location = $matches[1];
  253. preg_match('/<B>Function<\/B><BR>(.+?)<\/TD>/', $result, $matches);
  254. $funcs = preg_split('/<br>/',$matches[1]);
  255. foreach ($funcs as $func) {
  256. $funcParts = preg_split('/: /', $func);
  257. $funcDates = preg_split('/ - /', $funcParts[0]);
  258. $funcName = strtolower(strip_tags($funcParts[1]));
  259. $functions[] = array("functionStart"=>$funcDates[0], "functionEnd"=>$funcDates[1], "functionName"=>$funcName);
  260. }
  261. //Follow the 'Find series recorded by this agency' link
  262. $itemsUrl = "http://recordsearch.naa.gov.au/scripts/searchOF.asp?DP=3&Q=SERIES%5FNO+IN+%28SELECT+SER%5FSERIES%5FNO+FROM+NAAS%2EDBO%2ELINKAGES+WHERE+LINK%5FTYPE%3DQTPRSQT+AND+PRV%5FREG%5FNO%3DQT" . urlencode($id) . "QT%29";
  263. $items = getPage($itemsUrl);
  264. if ($items) {
  265. //Get the number of items in this series described on RS
  266. preg_match('/<TD>Series\r\n\t<\/td><td>(.*?)<\/td>/', $items, $matches);
  267. $numItems = $matches[1];
  268. //Trim the asterix of the 'More than 20.000' message
  269. $numItems = str_replace('*', '', $numItems);
  270. }
  271. $linksUrl = "http://recordsearch.naa.gov.au/scripts/AgencyDetail.asp?M=2&B=" . urlencode($id);
  272. $links = getPage($linksUrl);
  273. if ($links) {
  274. //Get rid of line breaks and tabs
  275. $links = preg_replace('/\r/', '', $links);
  276. $links = preg_replace('/\n/', '', $links);
  277. $links = preg_replace('/\t/', '', $links);
  278. //Organisation controlling
  279. preg_match('/<B>Organisation controlling<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  280. preg_match_all('/<TR><td width=30% VALIGN=TOP >(.*?)<\/td><td width=70% ><A HREF="OrganisationDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $orgs, PREG_SET_ORDER);
  281. foreach ($orgs as $org) {
  282. if (strpos($org[1], "-") !== false) {
  283. preg_match('/(.+?) - (.*)/', $org[1], $matches);
  284. $start = $matches[1];
  285. $end = $matches [2];
  286. } else {
  287. $start = $org[1];
  288. $end = "";
  289. }
  290. $conOrgs[] = array("start"=>$start, "end"=>$end, "orgId"=>$org[2], "orgTitle"=>$org[3]);
  291. }
  292. //Superior agency
  293. preg_match('/<B>Superior agency<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  294. preg_match_all('/<TR><td width=30% VALIGN=TOP >(.*?)<\/td><td width=70% ><A HREF="AgencyDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $sagencies, PREG_SET_ORDER);
  295. foreach ($sagencies as $sa) {
  296. if (strpos($sa[1], "-") !== false) {
  297. preg_match('/(.+?) - (.*)/', $sa[1], $matches);
  298. $start = $matches[1];
  299. $end = $matches [2];
  300. } else {
  301. $start = $sa[1];
  302. $end = "";
  303. }
  304. $supAgencies[] = array("start"=>$start, "end"=>$end, "agencyId"=>$sa[2], "agencyTitle"=>$sa[3]);
  305. }
  306. //Controlled agency
  307. preg_match('/<B>Controlled agency<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  308. preg_match_all('/<TR><td width=30% VALIGN=TOP >(.*?)<\/td><td width=70% ><A HREF="AgencyDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $cagencies, PREG_SET_ORDER);
  309. foreach ($cagencies as $ca) {
  310. if (strpos($ca[1], "-") !== false) {
  311. preg_match('/(.+?) - (.*)/', $ca[1], $matches);
  312. $start = $matches[1];
  313. $end = $matches [2];
  314. } else {
  315. $start = $ca[1];
  316. $end = "";
  317. }
  318. $conAgencies[] = array("start"=>$start, "end"=>$end, "agencyId"=>$ca[2], "agencyTitle"=>$ca[3]);
  319. }
  320. //Previous agency
  321. preg_match('/<B>Previous agency<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  322. preg_match_all('/<TR><td width=15% VALIGN=TOP >(.*?)<\/td><td width=85% ><A HREF="AgencyDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $pagencies, PREG_SET_ORDER);
  323. foreach ($pagencies as $pa) {
  324. if (strpos($pa[1], "-") !== false) {
  325. preg_match('/(.+?) - (.*)/', $pa[1], $matches);
  326. $start = $matches[1];
  327. $end = $matches [2];
  328. } else {
  329. $start = $pa[1];
  330. $end = "";
  331. }
  332. $prevAgencies[] = array("start"=>$start, "end"=>$end, "agencyId"=>$pa[2], "agencyTitle"=>$pa[3]);
  333. }
  334. //Subsequent agency
  335. preg_match('/<B>Subsequent agency<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  336. preg_match_all('/<TR><td width=15% VALIGN=TOP >(.*?)<\/td><td width=85% ><A HREF="AgencyDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $nagencies, PREG_SET_ORDER);
  337. foreach ($nagencies as $na) {
  338. if (strpos($na[1], "-") !== false) {
  339. preg_match('/(.+?) - (.*)/', $na[1], $matches);
  340. $start = $matches[1];
  341. $end = $matches [2];
  342. } else {
  343. $start = $na[1];
  344. $end = "";
  345. }
  346. $nextAgencies[] = array("start"=>$start, "end"=>$end, "agencyId"=>$na[2], "agencyTitle"=>$na[3]);
  347. }
  348. //Person associated
  349. preg_match('/<B>Persons associated with agency<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  350. preg_match_all('/<TR><td width=30% VALIGN=TOP >(.*?)<\/td><td width=70% ><A HREF="PersonDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $persons, PREG_SET_ORDER);
  351. foreach ($persons as $person) {
  352. if (strpos($person[1], "-") !== false) {
  353. preg_match('/(.+?) - (.*)/', $person[1], $matches);
  354. $start = $matches[1];
  355. $end = $matches [2];
  356. } else {
  357. $start = $person[1];
  358. $end = "";
  359. }
  360. $assocPersons[] = array("start"=>$start, "end"=>$end, "personId"=>$person[2], "personName"=>$person[3]);
  361. }
  362. }
  363. $itemDetails = array("agency"=>array(
  364. "title"=>$title,
  365. "startDate"=>$startDate,
  366. "endDate"=>$endDate,
  367. "statusCodes"=>array("statusCode"=>$statusCodes),
  368. "location"=>$location,
  369. "functions"=>array("function"=>$functions),
  370. "numberOfSeries"=>$numItems));
  371. if (count($conOrgs) != 0) {
  372. $itemDetails["agency"]["controllingOrganisations"] = array("organisation"=>$conOrgs);
  373. }
  374. if (count($supAgencies) != 0) {
  375. $itemDetails["agency"]["superiorAgencies"] = array("agency"=>$supAgencies);
  376. }
  377. if (count($conAgencies) != 0) {
  378. $itemDetails["agency"]["controlledAgencies"] = array("agency"=>$conAgencies);
  379. }
  380. if (count($prevAgencies) != 0) {
  381. $itemDetails["agency"]["previousAgencies"] = array("agency"=>$prevAgencies);
  382. }
  383. if (count($nextAgencies) != 0) {
  384. $itemDetails["agency"]["subsequentAgencies"] = array("agency"=>$nextAgencies);
  385. }
  386. if (count($assocPersons) != 0) {
  387. $itemDetails["agency"]["associatedPersons"] = array("person"=>$assocPersons);
  388. }
  389. } else {
  390. $error = 'Could not contact server';
  391. }
  392. } else if ($type == "person") {
  393. $url = "http://www.naa.gov.au/cgi-bin/Search?Number=" . urlencode($id);
  394. $result = getPage($url);
  395. if ($result) {
  396. preg_match('/<B>Name<\/B><BR>(.+?)<\/TD>/i', $result, $matches);
  397. $name = $matches[1];
  398. preg_match('/<B>Date range<\/B><BR>\r\n\t(.+?)<\/TD>/', $result, $matches);
  399. $dateRange = preg_split('/ - /',$matches[1]);
  400. $birth = $dateRange[0];
  401. $death = $dateRange[1];
  402. $linksUrl = "http://recordsearch.naa.gov.au/scripts/PersonDetail.asp?M=2&B=" . urlencode($id);
  403. $links = getPage($linksUrl);
  404. if ($links) {
  405. //Get rid of line breaks and tabs
  406. $links = preg_replace('/\r/', '', $links);
  407. $links = preg_replace('/\n/', '', $links);
  408. $links = preg_replace('/\t/', '', $links);
  409. preg_match('/<B>Agencies associated with person<\/B><BR><TABLE width=100% >(.+?)<\/TABLE>/i', $links, $matches);
  410. preg_match_all('/<TR><td width=30% VALIGN=TOP >(.*?)<\/td><td width=70% ><A HREF="AgencyDetail.asp\?M=1\&B=.+?">(.+?)<\/a>, (.+?)<\/td><\/TR>/i', $matches[1], $agencies, PREG_SET_ORDER);
  411. foreach ($agencies as $a) {
  412. if (strpos($a[1], "-") !== false) {
  413. preg_match('/(.+?) - (.*)/', $a[1], $matches);
  414. $start = $matches[1];
  415. $end = $matches [2];
  416. } else {
  417. $start = $a[1];
  418. $end = "";
  419. }
  420. $assocAgencies[] = array("start"=>$start, "end"=>$end, "agencyId"=>$a[2], "agencyTitle"=>$a[3]);
  421. }
  422. }
  423. $itemDetails = array("person"=>array(
  424. "name"=>$name,
  425. "birthDate"=>$birth,
  426. "deathDate"=>$death));
  427. if (count($assocAgencies) != 0) {
  428. $itemDetails["person"]["associatedAgencies"] = array("agency"=>$assocAgencies);
  429. }
  430. }
  431. }
  432. } else {
  433. $error = 'Invalid id';
  434. }
  435. if (!$error) {
  436. if ($format == "json") {
  437. echo json_encode($itemDetails);
  438. } else {
  439. header ("content-type: text/xml");
  440. $dom = new XmlDomConstruct('1.0', 'utf-8');
  441. $dom->fromMixed($itemDetails);
  442. echo $dom->saveXML();
  443. }
  444. } else {
  445. echo $error;
  446. }
  447. ?>