PageRenderTime 59ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/applications/hierarchy_import/col/import.php

https://github.com/BibAlex/eol_php_code
PHP | 349 lines | 246 code | 72 blank | 31 comment | 38 complexity | 255035a9d1a44cfeee9277e7e56d24c0 MD5 | raw file
  1. <?php
  2. exit;
  3. include_once(dirname(__FILE__)."/../../../config/environment.php");
  4. $mysqli =& $GLOBALS['mysqli_connection'];
  5. $mysqli_col = load_mysql_environment("col2010");
  6. //$mysqli->truncate_tables("test_db");
  7. $mysqli->begin_transaction();
  8. $GLOBALS['agents_file_path'] = temp_filepath();
  9. $GLOBALS['synonyms_file_path'] = temp_filepath();
  10. $GLOBALS['agents_file'] = fopen($GLOBALS['agents_file_path'], "w+");
  11. $GLOBALS['synonyms_file'] = fopen($GLOBALS['synonyms_file_path'], "w+");
  12. get_agents();
  13. get_parents();
  14. add_hierarchy();
  15. start_process();
  16. Tasks::rebuild_nested_set($GLOBALS['hierarchy']->id);
  17. fclose($GLOBALS['agents_file']);
  18. fclose($GLOBALS['synonyms_file']);
  19. $mysqli->load_data_infile($GLOBALS['agents_file_path'], 'agents_hierarchy_entries');
  20. $mysqli->load_data_infile($GLOBALS['synonyms_file_path'], 'synonyms');
  21. //
  22. // unlink($GLOBALS['agents_file_path']);
  23. // unlink($GLOBALS['synonyms_file_path']);
  24. $mysqli->end_transaction();
  25. exit;
  26. function get_parents()
  27. {
  28. global $mysqli_col;
  29. $result = $mysqli_col->query("SELECT t.record_id id, t.parent_id FROM taxa t WHERE is_accepted_name=1");
  30. while($result && $row=$result->fetch_assoc())
  31. {
  32. $GLOBALS['children'][$row['parent_id']][] = $row['id'];
  33. }
  34. if($result && $result->num_rows) $result->free();
  35. }
  36. function get_agents()
  37. {
  38. global $mysqli_col;
  39. $GLOBALS['agent_ids'] = array();
  40. $result = $mysqli_col->query("SELECT * FROM `databases` ORDER BY record_id");
  41. while($result && $row=$result->fetch_assoc())
  42. {
  43. $database_id = $row["record_id"];
  44. $database_url = $row["web_site"];
  45. $database_url = preg_replace("/^#/", "", $database_url);
  46. $database_url = preg_replace("/#.*$/", "", $database_url);
  47. $database_name = html_entity_decode(htmlspecialchars_decode(trim($row["database_full_name"])), ENT_COMPAT, "UTF-8");
  48. $contact_name = html_entity_decode(htmlspecialchars_decode(trim($row["contact_person"])), ENT_COMPAT, "UTF-8");
  49. $database_mock = Functions::mock_object("Agent", array( "full_name" => $database_name,
  50. "display_name" => $database_name,
  51. "homepage" => $database_url));
  52. $database_agent_id = Agent::insert($database_mock);
  53. $contact_mock = Functions::mock_object("Agent", array( "full_name" => $contact_name,
  54. "display_name" => $contact_name));
  55. $contact_agent_id = Agent::insert($contact_mock);
  56. $GLOBALS['agent_ids'][$database_id][AgentRole::insert("Source Database")] = $database_agent_id;
  57. $GLOBALS['agent_ids'][$database_id][AgentRole::insert("Source")] = $contact_agent_id;
  58. }
  59. }
  60. function add_hierarchy()
  61. {
  62. global $mysqli;
  63. $agent_params = array( "full_name" => "Catalogue of Life",
  64. "acronym" => "CoLP",
  65. "homepage" => "http://www.catalogueoflife.org/");
  66. $agent_id = Agent::insert(Functions::mock_object("Agent", $agent_params));
  67. $hierarchy_id = Hierarchy::find_by_agent_id($agent_id);
  68. if($hierarchy_id)
  69. {
  70. $hierarchy = new Hierarchy($hierarchy_id);
  71. $hierarchy_group_id = $hierarchy->hierarchy_group_id;
  72. $hierarchy_group_version = $hierarchy->latest_group_version()+1;
  73. }else
  74. {
  75. $hierarchy_group_id = Hierarchy::next_group_id();
  76. $hierarchy_group_version = 1;
  77. }
  78. $hierarchy_params = array( "label" => "Species 2000 & ITIS Catalogue of Life: Annual Checklist 2010",
  79. "description" => "2010 edition",
  80. "agent_id" => $agent_id,
  81. "hierarchy_group_id" => $hierarchy_group_id,
  82. "hierarchy_group_version" => $hierarchy_group_version);
  83. $GLOBALS['hierarchy'] = new Hierarchy(Hierarchy::insert($hierarchy_params));
  84. }
  85. function start_process()
  86. {
  87. global $mysqli_col;
  88. $result = $mysqli_col->query("SELECT t.record_id id, t.lsid, t.name taxon_name, t.taxon rank, t.parent_id, t.name_code, t.is_accepted_name, sn.genus, sn.species, sn.infraspecies, sn.infraspecies_marker, sn.author, sn.database_id FROM taxa t LEFT JOIN scientific_names sn ON (t.name_code=sn.name_code) WHERE t.record_id IN (". implode(",", @$GLOBALS['children'][0]) .")");
  89. $i=0;
  90. while($result && $row=$result->fetch_assoc())
  91. {
  92. $i++;
  93. //if($i<4) continue;
  94. //if($row['id']!=2242856) continue;
  95. add_col_taxon($row, 0, '', 0);
  96. }
  97. }
  98. function add_col_taxon($row, $parent_hierarchy_entry_id, $ancestry, $depth)
  99. {
  100. global $mysqli_col;
  101. global $mysqli;
  102. static $counter = 0;
  103. if($counter % 1000 == 0) echo "counter: $counter; memory: ".memory_get_usage()."; time: ".Functions::time_elapsed()."\n";
  104. $counter++;
  105. //if($depth==5) return;
  106. if($counter % 200 == 0)
  107. {
  108. echo "COMMITING\n";
  109. $mysqli->commit();
  110. }
  111. //if($counter>5000) return false;
  112. $id = $row["id"];
  113. $lsid = $row["lsid"];
  114. $name_code = $row["name_code"];
  115. $database_id = $row["database_id"];
  116. $is_accepted_name = $row["is_accepted_name"];
  117. $taxon_name = html_entity_decode(htmlspecialchars_decode(trim($row["taxon_name"])), ENT_COMPAT, "UTF-8");
  118. $rank_id = Rank::insert($row["rank"]);
  119. $scientific = true;
  120. if($name_code)
  121. {
  122. $genus = html_entity_decode(htmlspecialchars_decode(trim($row["genus"])), ENT_COMPAT, "UTF-8");
  123. $species = html_entity_decode(htmlspecialchars_decode(trim($row["species"])), ENT_COMPAT, "UTF-8");
  124. $infraspecies = html_entity_decode(htmlspecialchars_decode(trim($row["infraspecies"])), ENT_COMPAT, "UTF-8");
  125. $infraspecies_marker = trim($row["infraspecies_marker"]);
  126. $author = html_entity_decode(htmlspecialchars_decode(trim($row["author"])), ENT_COMPAT, "UTF-8");
  127. $database_id = trim($row["database_id"]);
  128. list($name_string, $canonical_form) = create_col_name($genus, $species, $infraspecies, $infraspecies_marker, $author, $database_id);
  129. if($database_id == 14) $scientific = false;
  130. }else
  131. {
  132. $name_string = $taxon_name;
  133. $canonical_form = $name_string;
  134. if(preg_match("/ /", $name_string))
  135. {
  136. $canonical_form = "";
  137. $scientific = false;
  138. }
  139. }
  140. if(!$name_string)
  141. {
  142. $name_string = 'Not assigned';
  143. echo "$id: no name_string\n";
  144. //return;
  145. }
  146. $name_id = Name::insert($name_string, $canonical_form);
  147. // if($scientific)
  148. // {
  149. // //Name::make_scientific_by_name_id($name_id);
  150. //
  151. // if($canonical_form && $canonical_form != $name_string)
  152. // {
  153. // $canonical_form_name_id = Name::insert($canonical_form, $canonical_form);
  154. // //Name::make_scientific_by_name_id($canonical_form_name_id);
  155. // }
  156. // }
  157. if(!$name_id)
  158. {
  159. echo "$id: no name_id\n";
  160. return;
  161. }
  162. $params = array("identifier" => $id,
  163. "name_id" => $name_id,
  164. "parent_id" => $parent_hierarchy_entry_id,
  165. "hierarchy_id" => $GLOBALS['hierarchy']->id,
  166. "rank_id" => $rank_id,
  167. "ancestry" => $ancestry);
  168. $hierarchy_entry_id = HierarchyEntry::insert($params, true);
  169. //$hierarchy_entry = new HierarchyEntry($hierarchy_entry_id);
  170. unset($params);
  171. if($name_code)
  172. {
  173. add_col_synonyms($hierarchy_entry_id, $name_code);
  174. add_col_common_names($hierarchy_entry_id, $name_code);
  175. add_col_agents($hierarchy_entry_id, $database_id);
  176. }
  177. if($ancestry) $ancestry .= "|".$name_id;
  178. else $ancestry = $name_id;
  179. if(@$GLOBALS['children'][$id])
  180. {
  181. $result = $mysqli_col->query("SELECT t.record_id id, t.lsid, t.name taxon_name, t.taxon rank, t.parent_id, t.name_code, t.is_accepted_name, sn.genus, sn.species, sn.infraspecies, sn.infraspecies_marker, sn.author, sn.database_id FROM taxa t LEFT JOIN scientific_names sn ON (t.name_code=sn.name_code) WHERE t.record_id IN (". implode(",", $GLOBALS['children'][$id]) .")");
  182. while($result && $row=$result->fetch_assoc())
  183. {
  184. add_col_taxon($row, $hierarchy_entry_id, $ancestry, $depth+1);
  185. }
  186. }
  187. }
  188. function create_col_name($genus, $species, $infraspecies, $infraspecies_marker, $author, $database_id)
  189. {
  190. $name_string = "";
  191. $canonical_form = "";
  192. // its a virus
  193. if($database_id == 14)
  194. {
  195. $name_string = $species;
  196. $canonical_form = $species;
  197. }else
  198. {
  199. $name_string = $genus;
  200. $canonical_form = $genus;
  201. if($species)
  202. {
  203. $name_string .= " $species";
  204. $canonical_form .= " $species";
  205. }
  206. if($infraspecies_marker) $name_string .= " $infraspecies_marker";
  207. if($infraspecies)
  208. {
  209. $name_string .= " $infraspecies";
  210. $canonical_form .= " $infraspecies";
  211. }
  212. if($author) $name_string .= " $author";
  213. }
  214. return array($name_string, $canonical_form);
  215. }
  216. function add_col_synonyms($hierarchy_entry_id, $name_code)
  217. {
  218. global $mysqli;
  219. global $mysqli_col;
  220. $result = $mysqli_col->query("SELECT * FROM scientific_names sn LEFT JOIN sp2000_statuses st ON (sn.sp2000_status_id=st.record_id) WHERE accepted_name_code='$name_code' AND name_code != accepted_name_code");
  221. $hierarchy_id = $GLOBALS['hierarchy']->id;
  222. while($result && $row=$result->fetch_assoc())
  223. {
  224. $genus = html_entity_decode(htmlspecialchars_decode(trim($row["genus"])), ENT_COMPAT, "UTF-8");
  225. $species = html_entity_decode(htmlspecialchars_decode(trim($row["species"])), ENT_COMPAT, "UTF-8");
  226. $infraspecies = html_entity_decode(htmlspecialchars_decode(trim($row["infraspecies"])), ENT_COMPAT, "UTF-8");
  227. $infraspecies_marker = trim($row["infraspecies_marker"]);
  228. $author = html_entity_decode(htmlspecialchars_decode(trim($row["author"])), ENT_COMPAT, "UTF-8");
  229. $database_id = trim($row["database_id"]);
  230. $relationship_id = SynonymRelation::insert(trim($row["sp2000_status"]));
  231. $scientific = true;
  232. list($name_string, $canonical_form) = create_col_name($genus, $species, $infraspecies, $infraspecies_marker, $author, $database_id);
  233. if($database_id == 14) $scientific = false;
  234. if(!$name_string) continue;
  235. $name_id = Name::insert($name_string, $canonical_form);
  236. // if($scientific)
  237. // {
  238. // //Name::make_scientific_by_name_id($name_id);
  239. //
  240. // if($canonical_form && $canonical_form != $name_string)
  241. // {
  242. // $canonical_form_name_id = Name::insert($canonical_form, $canonical_form);
  243. // //Name::make_scientific_by_name_id($canonical_form_name_id);
  244. // }
  245. // }
  246. fwrite($GLOBALS['synonyms_file'], "NULL\t$name_id\t$relationship_id\t0\t$hierarchy_entry_id\t0\t$hierarchy_id\t0\t0\n");
  247. }
  248. if($result && $result->num_rows) $result->free();
  249. }
  250. function add_col_common_names($hierarchy_entry_id, $name_code)
  251. {
  252. global $mysqli;
  253. global $mysqli_col;
  254. $result = $mysqli_col->query("SELECT common_name, language FROM common_names WHERE name_code='$name_code'");
  255. $common_name_id = SynonymRelation::insert("Common name");
  256. $hierarchy_id = $GLOBALS['hierarchy']->id;
  257. while($result && $row=$result->fetch_assoc())
  258. {
  259. $name_string = html_entity_decode(htmlspecialchars_decode(trim($row["common_name"])), ENT_COMPAT, "UTF-8");
  260. $language_id = Language::insert(trim($row["language"]));
  261. $name_id = Name::insert($name_string);
  262. fwrite($GLOBALS['synonyms_file'], "NULL\t$name_id\t$common_name_id\t$language_id\t$hierarchy_entry_id\t0\t$hierarchy_id\t0\t0\n");
  263. }
  264. if($result && $result->num_rows) $result->free();
  265. }
  266. function add_col_agents($hierarchy_entry_id, $database_id)
  267. {
  268. global $mysqli;
  269. if($database_id)
  270. {
  271. foreach(@$GLOBALS['agent_ids'][$database_id] as $role_id => $agent_id)
  272. {
  273. fwrite($GLOBALS['agents_file'], "$hierarchy_entry_id\t$agent_id\t$role_id\t0\n");
  274. }
  275. }
  276. }
  277. ?>