/analyse_data_scripts/geography_tests.php

https://github.com/caprenter/IATI-Data-Spotter · PHP · 199 lines · 193 code · 2 blank · 4 comment · 1 complexity · 1eba22843b55fcc772d4acc01c715166 MD5 · raw file

  1. <?php
  2. include ('functions/xml_child_exists.php');
  3. include ('settings.php'); //sets $corpus, $dir and $output_dir
  4. $output_file = $output_dir . $corpus . '_geography_new.csv';
  5. $tests = array("recipient-country","recipient-region");
  6. $geography = geography($dir);
  7. //echo $geography["no_activities"] . PHP_EOL;
  8. print_r($geography["no_activities"]);
  9. print_r($geography["activities_with_recipient_region_only"]);
  10. print_r($geography["activities_with_recipient_country_only"]);
  11. echo count($geography["activities_with_both"]) . PHP_EOL;
  12. print_r($geography["activities_with_more_than_one_recipient_region"]);
  13. print_r($geography["activities_with_more_than_one_recipient_country"]);
  14. echo count($geography["multiple_region_fail"]) . PHP_EOL;
  15. echo count($geography["multiple_country_fail"]) . PHP_EOL;
  16. //echo "No. Activities," . $geography["no_activities"][0] . "," . $geography["no_activities"][1] . "," . $geography["no_activities"][2] . "\n";
  17. //die;
  18. //Open the file to write
  19. $fh = fopen($output_file, 'w') or die("can't open file");
  20. foreach ($geography["hierarchies"] as $hierarchy) {
  21. fwrite($fh,",Hierarchy " . $hierarchy);
  22. }
  23. fwrite($fh,"\n");
  24. fwrite($fh,"No. Activities");
  25. foreach ($geography["hierarchies"] as $hierarchy) {
  26. fwrite($fh,"," . $geography["no_activities"][$hierarchy]);
  27. }
  28. fwrite($fh,"\n");
  29. fwrite($fh,"No. Activities with both region and country");
  30. foreach ($geography["hierarchies"] as $hierarchy) {
  31. fwrite($fh,"," . count($geography["activities_with_both"][$hierarchy]));
  32. }
  33. fwrite($fh,"\n");
  34. fwrite($fh,"No. Activities with at least one region or country");
  35. foreach ($geography["hierarchies"] as $hierarchy) {
  36. fwrite($fh,"," . ($geography["activities_with_recipient_country_only"][$hierarchy] + $geography["activities_with_recipient_region_only"][$hierarchy] + count($geography["activities_with_both"][$hierarchy])));
  37. //. $geography["activities_with_recipient_country_only"][$hierarchy] + $geography["activities_with_recipient_region_only"][$hierarchy] + count($geography["activities_with_both"][$hierarchy])
  38. }
  39. fwrite($fh,"\n");
  40. //fwrite($fh,"Region Only,Country Only,Both,>1 region,>1country,Country,,\n");
  41. fwrite($fh,"\n");
  42. foreach ($geography["hierarchies"] as $hierarchy) {
  43. fwrite($fh,"Hierarchy " . $hierarchy . ",,,,,,");
  44. }
  45. fwrite($fh,"\n");foreach ($geography["hierarchies"] as $hierarchy) {
  46. fwrite($fh,"Region Only,>1 region,Fail 100%,Country Only,>1country,Fail 100%,");
  47. }
  48. fwrite($fh,"\n");
  49. $row="";
  50. foreach ($geography["hierarchies"] as $hierarchy) {
  51. $row .= $geography["activities_with_recipient_region_only"][$hierarchy] . ",";
  52. $row .= $geography["activities_with_more_than_one_recipient_region"][$hierarchy] . ",";
  53. $row .= count($geography["multiple_region_fail"][$hierarchy]) . ",";
  54. $row .= $geography["activities_with_recipient_country_only"][$hierarchy] . ",";
  55. $row .= $geography["activities_with_more_than_one_recipient_country"][$hierarchy] . ",";
  56. $row .= count($geography["multiple_country_fail"][$hierarchy]) . ",";
  57. fwrite($fh,$row);
  58. $row="";
  59. }
  60. fwrite($fh,"\n");
  61. if (count($geography["activities_with_both"]) > 0 ) {
  62. fwrite($fh,"\nActivities with both\n");
  63. fwrite($fh,"File,ID,Hierarchy");
  64. fwrite($fh,"\n");
  65. foreach ($geography["hierarchies"] as $hierarchy) {
  66. foreach ($geography["activities_with_both"][$hierarchy] as $array) {
  67. fwrite($fh,$array[0] . "," . $array[1] . "," . $hierarchy . "\n");
  68. }
  69. }
  70. }
  71. if (count($geography["multiple_country_fail"]) > 0 ) {
  72. fwrite($fh,"\nActivities with multiple countries that don't add up to 100%\n");
  73. fwrite($fh,"File,ID,Hierarchy\n");
  74. foreach ($geography["hierarchies"] as $hierarchy) {
  75. foreach ($geography["multiple_country_fail"][$hierarchy] as $array) {
  76. fwrite($fh,$array[0] . "," . $array[1] . "," . $hierarchy . "\n");
  77. }
  78. }
  79. }
  80. if (count($geography["multiple_region_fail"]) > 0 ) {
  81. fwrite($fh,"\nActivities with multiple regions that don't add up to 100%\n");
  82. fwrite($fh,"File,ID,Hierarchy\n");
  83. foreach ($geography["hierarchies"] as $hierarchy) {
  84. foreach ($geography["multiple_region_fail"][$hierarchy] as $array) {
  85. fwrite($fh,$array[0] . "," . $array[1] . "," . $hierarchy . "\n");
  86. }
  87. }
  88. }
  89. fclose($fh);
  90. function geography($dir) {
  91. $no_activities = array();
  92. $activities_with_recipient_region_only = array();
  93. $activities_with_recipient_country_only = array();
  94. $activities_with_both = array();
  95. $activities_with_more_than_one_recipient_region = array();
  96. $activities_with_more_than_one_recipient_country = array();
  97. $multiple_region_fail = array();
  98. $multiple_country_fail = array();
  99. if ($handle = opendir($dir)) {
  100. //echo "Directory handle: $handle\n";
  101. //echo "Files:\n";
  102. /* This is the correct way to loop over the directory. */
  103. while (false !== ($file = readdir($handle))) {
  104. if ($file != "." && $file != "..") { //ignore these system files
  105. //echo $file . PHP_EOL;
  106. //load the xml
  107. if ($xml = simplexml_load_file($dir . $file)) {
  108. //print_r($xml);
  109. if(!xml_child_exists($xml, "//iati-organisation")) { //exclude organisation files
  110. $activities = $xml->{"iati-activity"};
  111. //print_r($attributes); die;
  112. foreach ($activities as $activity) {
  113. $hierarchy = (string)$activity->attributes()->hierarchy;
  114. if ($hierarchy && $hierarchy !=NULL) {
  115. $hierarchy = (string)$activity->attributes()->hierarchy;
  116. } else {
  117. $hierarchy = 0;
  118. }
  119. $found_hierarchies[] = $hierarchy;
  120. $no_activities[$hierarchy]++;
  121. //Is there a recipient country
  122. if ($activity->{'recipient-region'} && !$activity->{'recipient-country'}) {
  123. $activities_with_recipient_region_only[$hierarchy]++;
  124. }
  125. //Is there a recipient region
  126. if ($activity->{'recipient-country'} && !$activity->{'recipient-region'}) {
  127. $activities_with_recipient_country_only[$hierarchy]++;
  128. }
  129. if ($activity->{'recipient-region'} && $activity->{'recipient-country'}) {
  130. $activities_with_both[$hierarchy][] = array($file,(string)$activity->{'iati-identifier'});
  131. }
  132. if (count($activity->{'recipient-region'}) > 1) {
  133. //then we have more than one region specified.
  134. $activities_with_more_than_one_recipient_region[$hierarchy]++;
  135. //Do percentages add up to 100? If so store the id.
  136. $percentage_total_region = 0;
  137. foreach ($activity->{'recipient-region'} as $region) {
  138. $percentage = $region->attributes()->percentage;
  139. $percentage_total_region += $percentage;
  140. }
  141. if ($percentage_total_region !=100) {
  142. $multiple_region_fail[$hierarchy][] = array($file,(string)$activity->{'iati-identifier'});
  143. }
  144. }
  145. if (count($activity->{'recipient-country'}) > 1) {
  146. //then we have more than one country specified.
  147. $activities_with_more_than_one_recipient_country[$hierarchy]++;
  148. $percentage_total_country = 0;
  149. //Do percentages add up to 100? If so store the id.
  150. foreach ($activity->{'recipient-country'} as $country) {
  151. $percentage = $country->attributes()->percentage;
  152. $percentage_total_country += $percentage;
  153. }
  154. if ($percentage_total_country !=100) {
  155. $multiple_country_fail[$hierarchy][] = array($file,(string)$activity->{'iati-identifier'});
  156. }
  157. }
  158. } //end foreach
  159. }//end if not organisation file
  160. } //end if xml is created
  161. }// end if file is not a system file
  162. } //end while
  163. closedir($handle);
  164. }
  165. $found_hierarchies = array_unique($found_hierarchies);
  166. sort($found_hierarchies);
  167. $results = array("no_activities" => $no_activities,
  168. "activities_with_recipient_region_only" => $activities_with_recipient_region_only,
  169. "activities_with_recipient_country_only" => $activities_with_recipient_country_only,
  170. "activities_with_both" => $activities_with_both,
  171. "activities_with_more_than_one_recipient_region" => $activities_with_more_than_one_recipient_region,
  172. "activities_with_more_than_one_recipient_country" => $activities_with_more_than_one_recipient_country,
  173. "multiple_region_fail" => $multiple_region_fail,
  174. "multiple_country_fail" => $multiple_country_fail,
  175. "hierarchies" => $found_hierarchies
  176. );
  177. return $results;
  178. }
  179. ?>