PageRenderTime 56ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/api_taxon_name.php

https://github.com/BioScripts/bionames-api
PHP | 853 lines | 596 code | 178 blank | 79 comment | 84 complexity | 7e59a3cb88048ea6abefb4cda13e4190 MD5 | raw file
  1. <?php
  2. // Taxon name
  3. require_once (dirname(__FILE__) . '/couchsimple.php');
  4. require_once (dirname(__FILE__) . '/lib.php');
  5. require_once (dirname(__FILE__) . '/api_utils.php');
  6. //--------------------------------------------------------------------------------------------------
  7. function default_display()
  8. {
  9. echo "hi";
  10. }
  11. //--------------------------------------------------------------------------------------------------
  12. function clusters_with_name($name, $callback = '')
  13. {
  14. global $config;
  15. global $couch;
  16. global $stale_ok;
  17. $include_docs = true;
  18. $url = '_design/taxonName/_view/nameString?key=' . urlencode(json_encode($name));
  19. if ($include_docs)
  20. {
  21. $url .= '&include_docs=true';
  22. }
  23. if ($config['stale'])
  24. {
  25. $url .= '&stale=ok';
  26. }
  27. //echo $url;
  28. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  29. $response_obj = json_decode($resp);
  30. $obj = new stdclass;
  31. $obj->status = 404;
  32. $obj->url = $url;
  33. if (isset($response_obj->error))
  34. {
  35. $obj->error = $response_obj->error;
  36. }
  37. else
  38. {
  39. if (count($response_obj->rows) == 0)
  40. {
  41. $obj->error = 'Not found';
  42. }
  43. else
  44. {
  45. $obj->status = 200;
  46. $obj->clusters = array();
  47. foreach ($response_obj->rows as $row)
  48. {
  49. if ($include_docs)
  50. {
  51. $obj->clusters[] = $row->doc;
  52. }
  53. else
  54. {
  55. $obj->clusters[] = $row->value;
  56. }
  57. }
  58. }
  59. }
  60. api_output($obj, $callback);
  61. }
  62. //--------------------------------------------------------------------------------------------------
  63. // Unsorted array of publications containing a name
  64. // Question is how scalable this is if we return documents as well as ids?
  65. function publications_with_name_simple($name, $fields=array('all'), $callback = '', $include_docs = false)
  66. {
  67. global $config;
  68. global $couch;
  69. global $stale_ok;
  70. // Names listed as "tags" of articles
  71. $startkey = array($name);
  72. $endkey = array($name, new stdclass);
  73. $url = '_design/publication/_view/tags?startkey=' . urlencode(json_encode($startkey)) . '&endkey=' . urlencode(json_encode($endkey)) . '&reduce=false';
  74. /*
  75. if ($include_docs)
  76. {
  77. $url .= '&include_docs=true';
  78. }
  79. */
  80. if ($config['stale'])
  81. {
  82. $url .= '&stale=ok';
  83. }
  84. //echo $url;
  85. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  86. $response_obj = json_decode($resp);
  87. if (isset($response_obj->error))
  88. {
  89. $obj->error = $response_obj->error;
  90. }
  91. else
  92. {
  93. if (count($response_obj->rows) == 0)
  94. {
  95. $obj->error = 'Not found';
  96. }
  97. else
  98. {
  99. $obj->status = 200;
  100. $obj->publications = array();
  101. foreach ($response_obj->rows as $row)
  102. {
  103. $obj->publications[] = $row->value;
  104. }
  105. }
  106. }
  107. // names published
  108. $url = '_design/publication/_view/names?startkey=' . urlencode(json_encode($startkey)) . '&endkey=' . urlencode(json_encode($endkey)) . '&reduce=false';
  109. /*
  110. if ($include_docs)
  111. {
  112. $url .= '&include_docs=true';
  113. }
  114. */
  115. if ($config['stale'])
  116. {
  117. $url .= '&stale=ok';
  118. }
  119. //echo $url;
  120. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  121. $response_obj = json_decode($resp);
  122. if (isset($response_obj->error))
  123. {
  124. $obj->error = $response_obj->error;
  125. }
  126. else
  127. {
  128. if (count($response_obj->rows) == 0)
  129. {
  130. }
  131. else
  132. {
  133. unset ($obj->error);
  134. $obj->status = 200;
  135. foreach ($response_obj->rows as $row)
  136. {
  137. $obj->publications[] = $row->value;
  138. }
  139. }
  140. }
  141. $obj->publications = array_unique($obj->publications);
  142. // Fill out if wanted
  143. if ($include_docs)
  144. {
  145. $documents = array();
  146. foreach ($obj->publications as $id)
  147. {
  148. $documents[] = api_get_document_simplified($id, $fields);
  149. }
  150. $obj->publications = $documents;
  151. }
  152. //print_r($obj->publications);
  153. api_output($obj, $callback);
  154. }
  155. //--------------------------------------------------------------------------------------------------
  156. // Question is how scalable this is if we return documents as well as ids?
  157. function publications_with_name($name, $year = '', $fields=array('all'), $callback = '')
  158. {
  159. global $config;
  160. global $couch;
  161. global $stale_ok;
  162. // Names listed as "tags" of articles
  163. if ($year == '')
  164. {
  165. $startkey = array($name);
  166. $endkey = array($name, new stdclass);
  167. }
  168. else
  169. {
  170. $startkey = array($name, $year);
  171. $endkey = array($name, $year);
  172. }
  173. $url = '_design/publication/_view/tags?startkey=' . urlencode(json_encode($startkey)) . '&endkey=' . urlencode(json_encode($endkey)) . '&reduce=false';
  174. $include_docs = true;
  175. $include_docs = false;
  176. //$url = '_design/publication/_view/tags?key=' . urlencode(json_encode($name));
  177. if ($include_docs)
  178. {
  179. $url .= '&include_docs=true';
  180. }
  181. if ($config['stale'])
  182. {
  183. $url .= '&stale=ok';
  184. }
  185. //echo $url;
  186. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  187. //echo $resp;
  188. $response_obj = json_decode($resp);
  189. $obj = new stdclass;
  190. $obj->status = 404;
  191. $obj->url = $url;
  192. if (isset($response_obj->error))
  193. {
  194. $obj->error = $response_obj->error;
  195. }
  196. else
  197. {
  198. if (count($response_obj->rows) == 0)
  199. {
  200. $obj->error = 'Not found';
  201. }
  202. else
  203. {
  204. $obj->status = 200;
  205. $obj->years = array();
  206. foreach ($response_obj->rows as $row)
  207. {
  208. if (!isset($obj->years[$row->key[1]]))
  209. {
  210. $obj->years[$row->key[1]] = array();
  211. }
  212. if ($include_docs)
  213. {
  214. $obj->years[$row->key[1]][$row->id] = $row->doc;
  215. }
  216. else
  217. {
  218. $obj->years[$row->key[1]][] = $row->value;
  219. }
  220. }
  221. }
  222. }
  223. //print_r($obj);
  224. // Names published by a publication
  225. if ($year == '')
  226. {
  227. $startkey = array($name);
  228. $endkey = array($name, new stdclass);
  229. }
  230. else
  231. {
  232. $startkey = array($name, $year);
  233. $endkey = array($name, $year);
  234. }
  235. $url = '_design/publication/_view/names?startkey=' . urlencode(json_encode($startkey)) . '&endkey=' . urlencode(json_encode($endkey)) . '&reduce=false';
  236. if ($config['stale'])
  237. {
  238. $url .= '&stale=ok';
  239. }
  240. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  241. //echo $resp;
  242. $response_obj = json_decode($resp);
  243. if (isset($response_obj->error))
  244. {
  245. $obj->error = $response_obj->error;
  246. }
  247. else
  248. {
  249. if (count($response_obj->rows) == 0)
  250. {
  251. }
  252. else
  253. {
  254. unset ($obj->error);
  255. $obj->status = 200;
  256. foreach ($response_obj->rows as $row)
  257. {
  258. if (!isset($obj->years[$row->key[1]]))
  259. {
  260. $obj->years[$row->key[1]] = array();
  261. }
  262. $obj->years[$row->key[1]][] = $row->value;
  263. }
  264. }
  265. }
  266. if (isset($obj->years))
  267. {
  268. // remove duplicates
  269. $keys = array();
  270. $years = array();
  271. foreach ($obj->years as $k => $v)
  272. {
  273. $keys[] = $k;
  274. $years[$k] = array_unique($obj->years[$k]);
  275. }
  276. // sort by year
  277. sort($keys);
  278. $obj->years = array();
  279. foreach ($keys as $year)
  280. {
  281. if (1)
  282. {
  283. foreach ($years[$year] as $id)
  284. {
  285. $document = api_get_document_simplified($id, $fields);
  286. if ($document)
  287. {
  288. $obj->years[$year][$id] = $document;
  289. }
  290. }
  291. }
  292. else
  293. {
  294. $obj->years[$year] = $years[$year];
  295. }
  296. }
  297. }
  298. /*
  299. // populate
  300. foreach ($keys as $year)
  301. {
  302. foreach ($obj->years[$year] as $k => $v)
  303. {
  304. $document = api_get_document($v);
  305. if ($document)
  306. {
  307. $obj->years[$year][$v] = $document;
  308. unset($obj->years[$year][$k]);
  309. }
  310. }
  311. }
  312. */
  313. api_output($obj, $callback);
  314. }
  315. //--------------------------------------------------------------------------------------------------
  316. // may return less than limit as we filter duplicate names
  317. function name_suggest($name, $limit = 5, $callback = '')
  318. {
  319. global $config;
  320. global $couch;
  321. global $stale_ok;
  322. $url = "/_design/taxonName/_view/nameString?startkey=" . urlencode('"' . $name . '"') . "&endkey=" . urlencode('"' . $name . '\u9999"') . "&limit=$limit";
  323. //echo $url;
  324. if ($config['stale'])
  325. {
  326. $url .= '&stale=ok';
  327. }
  328. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  329. //echo $resp;
  330. $response_obj = json_decode($resp);
  331. $obj = new stdclass;
  332. $obj->status = 404;
  333. $obj->url = $url;
  334. if (isset($response_obj->error))
  335. {
  336. $obj->error = $response_obj->error;
  337. }
  338. else
  339. {
  340. if (count($response_obj->rows) == 0)
  341. {
  342. $obj->error = 'Not found';
  343. }
  344. else
  345. {
  346. $obj->status = 200;
  347. $obj->suggestions = array();
  348. foreach ($response_obj->rows as $row)
  349. {
  350. $obj->suggestions[] = $row->key;
  351. }
  352. $obj->suggestions = array_values(array_unique($obj->suggestions));
  353. }
  354. }
  355. api_output($obj, $callback);
  356. }
  357. //--------------------------------------------------------------------------------------------------
  358. // Names that share same epithet and author (handy for searching for possible synonyms)
  359. function name_same_epithet_author($epithet, $callback = '')
  360. {
  361. global $config;
  362. global $couch;
  363. global $stale_ok;
  364. $url = "/_design/taxonName/_view/epithet_author?key=" . urlencode('"' . $epithet . '"');
  365. if ($config['stale'])
  366. {
  367. $url .= '&stale=ok';
  368. }
  369. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  370. //echo $resp;
  371. $response_obj = json_decode($resp);
  372. $obj = new stdclass;
  373. $obj->status = 404;
  374. $obj->url = $url;
  375. if (isset($response_obj->error))
  376. {
  377. $obj->error = $response_obj->error;
  378. }
  379. else
  380. {
  381. if (count($response_obj->rows) == 0)
  382. {
  383. $obj->error = 'Not found';
  384. }
  385. else
  386. {
  387. $obj->status = 200;
  388. $obj->names = array();
  389. foreach ($response_obj->rows as $row)
  390. {
  391. $obj->names[] = $row->value;
  392. }
  393. $obj->names = array_values(array_unique($obj->names));
  394. }
  395. }
  396. api_output($obj, $callback);
  397. }
  398. //--------------------------------------------------------------------------------------------------
  399. // Names that are "related" e.g. possible synonyms
  400. function name_related($name, $callback = '')
  401. {
  402. global $config;
  403. global $couch;
  404. global $stale_ok;
  405. // BHL page co-occurrence
  406. $url = "/_design/bhl/_view/name_synonym?key=" . urlencode(json_encode($name));
  407. if ($config['stale'])
  408. {
  409. $url .= '&stale=ok';
  410. }
  411. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  412. //echo $resp;
  413. $response_obj = json_decode($resp);
  414. $obj = new stdclass;
  415. $obj->status = 404;
  416. $obj->url = $url;
  417. if (isset($response_obj->error))
  418. {
  419. $obj->error = $response_obj->error;
  420. }
  421. else
  422. {
  423. if (count($response_obj->rows) == 0)
  424. {
  425. $obj->error = 'Not found';
  426. }
  427. else
  428. {
  429. $obj->status = 200;
  430. $obj->related = array();
  431. foreach ($response_obj->rows as $row)
  432. {
  433. $obj->related[] = $row->value->name;
  434. }
  435. $obj->related = array_values(array_unique($obj->related));
  436. }
  437. }
  438. // Names that one or more classifications say are synonyms
  439. $url = "/_design/classification/_view/synonyms?key=" . urlencode(json_encode($name));
  440. if ($config['stale'])
  441. {
  442. $url .= '&stale=ok';
  443. }
  444. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  445. $response_obj = json_decode($resp);
  446. if (isset($response_obj->error))
  447. {
  448. $obj->error = $response_obj->error;
  449. }
  450. else
  451. {
  452. if (count($response_obj->rows) == 0)
  453. {
  454. }
  455. else
  456. {
  457. $obj->status = 200;
  458. unset($obj->error);
  459. if (!isset($obj->related))
  460. {
  461. $obj->related = array();
  462. }
  463. foreach ($response_obj->rows as $row)
  464. {
  465. $obj->related[] = $row->value;
  466. }
  467. $obj->related = array_values(array_unique($obj->related));
  468. }
  469. }
  470. // Make sure this name isn't in list of "related" names
  471. if (isset($obj->related))
  472. {
  473. // http://stackoverflow.com/a/8135667/9684
  474. $key = array_search($name,$obj->related);
  475. if($key!==false){
  476. unset($obj->related[$key]);
  477. }
  478. }
  479. api_output($obj, $callback);
  480. }
  481. //--------------------------------------------------------------------------------------------------
  482. // Return taxon concepts that include this name
  483. function name_to_concept($id, $callback = '')
  484. {
  485. global $config;
  486. global $couch;
  487. global $stale_ok;
  488. $url = "/_design/classification/_view/name_to_concept?key=" . urlencode('"' . $id . '"');
  489. if ($config['stale'])
  490. {
  491. $url .= '&stale=ok';
  492. }
  493. $resp = $couch->send("GET", "/" . $config['couchdb_options']['database'] . "/" . $url);
  494. //echo $resp;
  495. $response_obj = json_decode($resp);
  496. $obj = new stdclass;
  497. $obj->status = 404;
  498. $obj->url = $url;
  499. if (isset($response_obj->error))
  500. {
  501. $obj->error = $response_obj->error;
  502. }
  503. else
  504. {
  505. if (count($response_obj->rows) == 0)
  506. {
  507. $obj->error = 'Not found';
  508. }
  509. else
  510. {
  511. $obj->status = 200;
  512. $obj->concepts = array();
  513. foreach ($response_obj->rows as $row)
  514. {
  515. $obj->concepts[] = $row->value;
  516. }
  517. }
  518. }
  519. api_output($obj, $callback);
  520. }
  521. //--------------------------------------------------------------------------------------------------
  522. // Return names that resemble query string
  523. function name_did_you_mean($name, $callback = '')
  524. {
  525. global $config;
  526. $obj = new stdclass;
  527. $obj->status = 404;
  528. $cmd = $config['simstring'] . ' -d ' . $config['simstring_db'] . ' -t 0.75 cosine';
  529. $descriptorspec = array(
  530. 0 => array("pipe", "r"),
  531. 1 => array("pipe", "w")
  532. );
  533. $process = proc_open($cmd, $descriptorspec, $pipes);
  534. if (is_resource($process)) {
  535. fwrite($pipes[0], $name . "\n"); // NOTE: add \n to end of string!
  536. fclose($pipes[0]);
  537. $output = stream_get_contents($pipes[1]);
  538. fclose($pipes[1]);
  539. $return_value = proc_close($process);
  540. if ($return_value == 0)
  541. {
  542. $obj->status = 200;
  543. // clean
  544. $lines = explode("\n", $output);
  545. $hits = array();
  546. foreach ($lines as $line)
  547. {
  548. if (preg_match('/^\s+/', $line))
  549. {
  550. $hits[] = trim($line);
  551. }
  552. }
  553. $hits = array_unique($hits);
  554. //print_r($hits);
  555. //print_r(array($name));
  556. $candidates = array_values(array_diff($hits, array($name)));
  557. $distances = array();
  558. foreach ($candidates as $candidate)
  559. {
  560. $distances[$candidate] = levenshtein($name, $candidate);
  561. }
  562. $scores = array_values($distances);
  563. array_multisort($distances, SORT_ASC, SORT_NUMERIC, $scores);
  564. $obj->names = array_keys($distances);
  565. }
  566. }
  567. api_output($obj, $callback);
  568. }
  569. //--------------------------------------------------------------------------------------------------
  570. function main()
  571. {
  572. $callback = '';
  573. $handled = false;
  574. // If no query parameters
  575. if (count($_GET) == 0)
  576. {
  577. default_display();
  578. exit(0);
  579. }
  580. if (isset($_GET['callback']))
  581. {
  582. $callback = $_GET['callback'];
  583. }
  584. $include_docs = false;
  585. if (isset($_GET['include_docs']))
  586. {
  587. $include_docs = true;
  588. }
  589. // Optional fields to include
  590. $fields = array('all');
  591. if (isset($_GET['fields']))
  592. {
  593. $field_string = $_GET['fields'];
  594. $fields = explode(",", $field_string);
  595. }
  596. if (!$handled)
  597. {
  598. // Queries based on identifier
  599. if (isset($_GET['id']))
  600. {
  601. $id = $_GET['id'];
  602. if (!$handled)
  603. {
  604. if (isset($_GET['concepts']))
  605. {
  606. name_to_concept($id, $callback);
  607. $handled = true;
  608. }
  609. }
  610. }
  611. // Queries based on name string
  612. if (isset($_GET['name']))
  613. {
  614. $name = $_GET['name'];
  615. if (!$handled)
  616. {
  617. if (isset($_GET['publications']))
  618. {
  619. $year = '';
  620. if (isset($_GET['year']))
  621. {
  622. $year = $_GET['year'];
  623. publications_with_name($name, $year, $fields, $callback);
  624. $handled = true;
  625. }
  626. if (!$handled)
  627. {
  628. publications_with_name_simple($name, $fields, $callback, $include_docs);
  629. $handled = true;
  630. }
  631. }
  632. }
  633. if (!$handled)
  634. {
  635. if (isset($_GET['suggestions']))
  636. {
  637. name_suggest($name, 5, $callback);
  638. $handled = true;
  639. }
  640. }
  641. if (!$handled)
  642. {
  643. if (isset($_GET['related']))
  644. {
  645. name_related($name, $callback);
  646. $handled = true;
  647. }
  648. }
  649. if (!$handled)
  650. {
  651. if (isset($_GET['epithet']))
  652. {
  653. name_same_epithet_author($name, $callback);
  654. $handled = true;
  655. }
  656. }
  657. if (!$handled)
  658. {
  659. if (isset($_GET['didyoumean']))
  660. {
  661. name_did_you_mean($name, $callback);
  662. $handled = true;
  663. }
  664. }
  665. if (!$handled)
  666. {
  667. // show clusters for this name
  668. clusters_with_name($name, $callback);
  669. $handled = true;
  670. }
  671. }
  672. }
  673. }
  674. main();
  675. ?>