/analyse_data_scripts_v1/percentage.php

https://github.com/caprenter/IATI-Data-Spotter · PHP · 141 lines · 122 code · 5 blank · 14 comment · 0 complexity · 8efb29a0476c142c5fcd2a2e47242610 MD5 · raw file

  1. <?php
  2. /* Uses xpath to get directly to the elements in the XML
  3. * Then loops through the elements to add the percentage attributes together
  4. *
  5. * We then run an array_count_values on that array to tell us how many of each we have found.
  6. * Then we output it to a file
  7. */
  8. //libxml_use_internal_errors ( true );
  9. //Helps us check that we only test activity files
  10. include ('functions/xml_child_exists.php');
  11. //$dir = '../data/dfid/'; //needs trailing slash
  12. $dir = $_SERVER['argv'][1] ."/";
  13. //$url = 'http://ec.europa.eu/europeaid/files/iati/'; //EU
  14. //$url = 'http://projects.dfid.gov.uk/iati/NonIATI/';
  15. $tests = array( "sector","recipient-region" );
  16. //Create a separate results file base on the data directory name
  17. $save_directory = substr($dir,8,-1);
  18. $data_file = $save_directory . "/" . substr($dir,8,-1) . "_percentages.txt";
  19. //echo $data_file; die;
  20. //Open the file to write
  21. $fh = fopen($data_file, 'w') or die("can't open file");
  22. //Run through each value pair counting
  23. foreach ($tests as $test) {
  24. $percentages = count_attributes($test, $dir);
  25. //Write our results to the file
  26. //frite($fh,$test . "\n"); //simple headers about what we are counting this time round
  27. //echo $test . PHP_EOL;
  28. //print_r($types);
  29. if ($percentages) {
  30. $number_activites = $percentages[1];
  31. fwrite($fh,$number_activites . " activities have more than one " . $test . " element\n");
  32. echo $number_activites . " activities have more than one " . $test . " element" . PHP_EOL;
  33. $percentages = array_count_values($percentages[0]);
  34. $copy_percentages = $percentages;
  35. if(isset($copy_percentages[100])) {
  36. unset($copy_percentages[100]);
  37. }
  38. $not_100 = array_sum($copy_percentages);
  39. fwrite($fh,$not_100 . " " . $test . " sums don't make 100%\n");
  40. echo $not_100 . " " . $test . " sums don't make 100%" . PHP_EOL;
  41. //print_r($types);
  42. ksort($percentages);
  43. //print_r($types);
  44. //echo $test[0] . "," .$test[1] . PHP_EOL;
  45. fwrite($fh,"Percentage,Count\n");
  46. echo "Percentage,Count" . PHP_EOL;
  47. foreach ($percentages as $key=>$value) {
  48. fwrite($fh,$key . "," . $value . "\n");
  49. echo $key . "," . $value . PHP_EOL;
  50. }
  51. } else {
  52. fwrite($fh,$test . "\nNone found\n");
  53. echo $test .PHP_EOL . "None found" . PHP_EOL;
  54. }
  55. }
  56. fclose($fh);
  57. function count_attributes($element, $dir) {
  58. if ($handle = opendir($dir)) {
  59. //echo "Directory handle: $handle\n";
  60. //echo "Files:\n";
  61. $number_of_elements = 0;
  62. /* This is the correct way to loop over the directory. */
  63. while (false !== ($file = readdir($handle))) {
  64. if ($file != "." && $file != "..") { //ignore these system files
  65. //echo $file . PHP_EOL;
  66. //load the xml
  67. if (@$xml = simplexml_load_file($dir . $file)) {
  68. //print_r($xml);
  69. if(!xml_child_exists($xml, "//iati-organisation")) { //exclude organisation files
  70. foreach ($xml as $activity) {
  71. //if ((string)$activity->attributes()->hierarchy == 2) {
  72. $elements = $activity->xpath("./" . $element);
  73. //print_r($elements); die;
  74. //print_r($attributes); die;
  75. //print_r($elements);
  76. $percentage = 0;
  77. $percentage_per_vocabulary = array();
  78. if (count($elements) >1) {
  79. //echo $element . "," . count($elements);
  80. $number_of_elements ++;
  81. foreach ($elements as $item) {
  82. $vocabulary = $item->attributes()->vocabulary;
  83. if ($vocabulary != NULL) {
  84. //echo $vocabulary;
  85. @$percentage_per_vocabulary["{$vocabulary}"] +=$item->attributes()->percentage;
  86. //print_r($percentage_per_vocabulary); die;
  87. $vocabulary = NULL;
  88. } else {
  89. $percentage += $item->attributes()->percentage;
  90. }
  91. }
  92. if (isset($percentage_per_vocabulary) && $percentage_per_vocabulary !=NULL) {
  93. //print_r($percentage_per_vocabulary); die;
  94. foreach ($percentage_per_vocabulary as $score) {
  95. $percentages[] = $score;
  96. }
  97. } else {
  98. //echo "," . $percentage;
  99. $percentages[] = $percentage;
  100. }
  101. //if ($percentage > 100) {
  102. // echo $file; die;
  103. //}
  104. } else {
  105. continue;
  106. }
  107. //} end hierarchy
  108. }
  109. }//end if not organisation file
  110. } //end if xml is created
  111. }// end if file is not a system file
  112. } //end while
  113. closedir($handle);
  114. }
  115. if (isset($percentages)) {
  116. return array($percentages,$number_of_elements);
  117. } else {
  118. return FALSE;
  119. }
  120. }
  121. ?>