PageRenderTime 58ms CodeModel.GetById 34ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/executive/white_house_confirmations/scraper.php

https://github.com/chaunceyt/votersdaily
PHP | 134 lines | 96 code | 26 blank | 12 comment | 13 complexity | 633c6931bc08d4a727806e745a961039 MD5 | raw file
Possible License(s): GPL-3.0
  1. #!/usr/bin/php -q
  2. <?php
  3. $PATH_TO_INCLUDES = dirname(dirname(dirname(__FILE__)));
  4. require $PATH_TO_INCLUDES.'/phputils/EventScraper.php';
  5. require $PATH_TO_INCLUDES.'/phputils/couchdb.php';
  6. /*
  7. * Voters Daily: PHP - White House Nominations Scraper
  8. * http://wiki.github.com/bouvard/votersdaily
  9. *
  10. * @author Chauncey Thorn <chaunceyt@gmail.com>
  11. * Link: http://www.cthorn.com/
  12. *
  13. */
  14. class WhiteHouseNominations extends EventScraper_Abstract
  15. {
  16. protected $url = 'http://www.socrata.com/views/n5m4-mism/rows.xml?accessType=API';
  17. public $parser_name = 'White House Confirmations Scraper';
  18. public $parser_version = '1.0';
  19. public $parser_frequency = '6.0';
  20. public function __construct()
  21. {
  22. parent::__construct();
  23. $this->year = date("Y");
  24. }
  25. public function run()
  26. {
  27. $events = $this->scrape();
  28. $this->add_events($events);
  29. }
  30. protected function scrape()
  31. {
  32. $events['couchdb'] = array();
  33. $scrape_start = microtime_float();
  34. $this->source_url = $this->url;
  35. $response = $this->urlopen($this->url);
  36. $this->access_time = time();
  37. $this->source_text = $response;
  38. $xml = new SimpleXMLElement($response);
  39. $nominations = $xml->rows;
  40. $total_nominations = sizeof($nominations->row);
  41. for($i=0; $i < $total_nominations; $i++) {
  42. $row_id = $nominations->row[$i]->attributes()->_id;
  43. $row_uuid = $nominations->row[$i]->attributes()->_uuid;
  44. $row_position = $nominations->row[$i]->attributes()->_position;
  45. if($nominations->row[$i]->confirmed == 'true') {
  46. $confirmed_str = ' was confirmed';
  47. }
  48. else {
  49. $confirmed_str = ' was not confirmed';
  50. }
  51. $description_str = $nominations->row[$i]->name . ' ' .$confirmed_str. ' ' .$nominations->row[$i]->position. ' ' . $nominations->row[$i]->agency->attributes()->description;
  52. //$description_str .= ' confirmed: (' . $nominations->row[$i]->confirmed . ')';
  53. //$description_str .= ' holdover: (' . $nominations->row[$i]->holdover.')';
  54. $_date_str = (string) $nominations->row[$i]->formal_nomination_date;
  55. list($_month,$_day,$_year) = explode('/', $_date_str);
  56. //if we don't have a date disregard
  57. if(!empty($_month) && !empty($_day) && !empty($_year)) {
  58. $_year = (int) $_year;
  59. $final_date_str = strftime('%Y-%m-%dT%H:%M:%SZ', mktime(0, 0, 0, $_month, $_day, $_year));
  60. list($e_year, $e_month, $e_day) = explode('-', date('Y-m-d', (int) $nominations->row[$i]->confirmation_vote));
  61. $end_date_value = strftime('%Y-%m-%dT%H:%M:%SZ', mktime(0,0,0,$e_month, $e_day,$e_year));
  62. $events[$i]['couchdb_id'] = (string) $final_date_str . ' - ' .$this->parser_name. ' - '.$nominations->row[$i]->name.' - ' . $this->_escape_str($nominations->row[$i]->position, 'title');
  63. $events[$i]['datetime'] = (string) $end_date_value;
  64. $events[$i]['end_datetime'] = null;
  65. $events[$i]['title'] = (string) 'Confirmation Vote for ' . $this->_escape_str($nominations->row[$i]->position);
  66. $events[$i]['description'] = (string) $this->_escape_str($description_str);
  67. $events[$i]['branch'] = (string) BranchName::$executive;
  68. $events[$i]['entity'] = (string) EntityName::$whitehouse;
  69. $events[$i]['nominee'] = (string) $nominations->row[$i]->name;
  70. $events[$i]['position'] = (string) $nominations->row[$i]->position;
  71. if($nominations->row[$i]->confirmed == 'true') {
  72. $events[$i]['is_confirmed'] = true;
  73. }
  74. else {
  75. $events[$i]['is_confirmed'] = false;
  76. }
  77. if($nominations->row[$i]->holdover == 'true') {
  78. $events[$i]['is_holdover'] = true;
  79. }
  80. else {
  81. $events[$i]['is_holdover'] = false;
  82. }
  83. $events[$i]['source_url'] = (string) $this->url;
  84. $_xml_string = '
  85. <row _id="'.$row_id.'" _uuid="'.$row_uuid.'" _position="'.$row_position.'">
  86. <name>' .$nominations->row[$i]->name . '</name>
  87. <agency url="http://whitehouse.gov/" description="NCH"/>
  88. <position>'.$nominations->row[$i]->position.'</position>
  89. <formal_nomination_date>'.$nominations->row[$i]->formal_nomination_date.'</formal_nomination_date>
  90. <confirmed>'.$nominations->row[$i]->confirmed.'</confirmed>
  91. <holdover>'.$nominations->row[$i]->holdover.'</holdover>
  92. <_tags/>
  93. </row>';
  94. $events[$i]['source_text'] = trim($_xml_string);
  95. $_access_time = date('D, d M Y H:i:s T', $this->access_time);
  96. $events[$i]['access_datetime'] = (string) $this->_vd_date_format($_access_time);
  97. $events[$i]['parser_name'] = (string) $this->parser_name;
  98. $events[$i]['parser_version'] = (string) $this->parser_version;
  99. } //if
  100. }
  101. $scrape_end = microtime_float();
  102. $this->parser_runtime = round(($scrape_end - $scrape_start), 4);
  103. return $events;
  104. }
  105. }//end of class
  106. $parser = new WhiteHouseNominations;
  107. $parser->run();
  108. exit(0);