/congress.py

https://github.com/michaelmyers/python-congress · Python · 138 lines · 79 code · 40 blank · 19 comment · 21 complexity · f0f42fec716d060ceb9b5efdea1e7900 MD5 · raw file

  1. """
  2. House Roll Call Votes to Excel
  3. """
  4. __author__ = "Michael Myers <michael.morris.myers@gmail.com>"
  5. __version__ = 1.0
  6. __license__ = "MIT"
  7. import urllib2
  8. from datetime import datetime
  9. from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
  10. from openpyxl.workbook import Workbook
  11. from openpyxl.writer.excel import ExcelWriter
  12. from openpyxl.cell import get_column_letter
  13. def roll_call_to_xlsx(year, minRC = 1, maxRC = 1):
  14. """
  15. Get roll call data for a given year and stores in
  16. """
  17. congressData = dict()
  18. nameList = list()
  19. voteData = dict()
  20. #Check to see if proper arguments passed in function
  21. if year not in range(1990, datetime.now().year+1):
  22. print "Date out of range, must be between 1990 and current year"
  23. return
  24. #No value was entered, default to max
  25. if maxRC == 1:
  26. maxRC = get_max_roll_call(year)
  27. if minRC > maxRC:
  28. print "Max is less than min"
  29. return
  30. for rollcall in range(minRC, maxRC+1):
  31. voteData.clear()
  32. rollcallstr = str(rollcall)
  33. url = 'http://clerk.house.gov/evs/' + str(year) + '/roll' + rollcallstr.zfill(3) + '.xml'
  34. page = urllib2.urlopen(url)
  35. soup = BeautifulStoneSoup(page)
  36. votes = soup.findAll('recorded-vote')
  37. numVotes = len(votes)
  38. for i in range(0, numVotes):
  39. name = votes[i].contents[0].contents[0]
  40. rawVote = votes[i].contents[1].contents[0]
  41. if 'Yes' in rawVote or 'Aye' in rawVote or 'Yea' in rawVote:
  42. vote = 'Yes'
  43. elif 'Not Voting' in rawVote:
  44. vote = 'Not Voting'
  45. elif 'Present' in rawVote:
  46. vote = 'Present'
  47. elif 'No' in rawVote or 'Nay' in rawVote:
  48. vote = 'No'
  49. else:
  50. vote = rawVote
  51. if name not in nameList:
  52. nameList.append(name)
  53. if name not in voteData:
  54. voteData[name] = vote
  55. if rollcall not in congressData:
  56. congressData[rollcall] = voteData.copy()
  57. print 'Added voteData for rollcall ' , rollcall
  58. nameList.sort()
  59. #Data received, now time for excel
  60. wb = Workbook()
  61. ew = ExcelWriter(workbook = wb)
  62. dest_filename = str(get_congress(year)) +'_Congress_Roll_Call_Data.xlsx'
  63. ws = wb.worksheets[0]
  64. ws.title = str(get_congress(year)) + "Congress Roll Call Data"
  65. colcount = len(congressData)
  66. rowcount = len(nameList)
  67. #Write roll call numbers as headers
  68. for col in range(0, colcount):
  69. ws.cell(row= 0, column= col + 1).value = minRC + col
  70. #Write names in first column
  71. for rw in range(rowcount):
  72. ws.cell( row = rw + 1, column = 0 ).value = nameList[rw]
  73. #Write the vote data for each roll call
  74. i = 0
  75. for col in range(minRC, minRC + colcount): #iterate through each column
  76. vD = congressData[col] #get the vote data for this rollcall
  77. print col
  78. i = i + 1
  79. for rw in range(0, rowcount): #iterate through the rows
  80. if nameList[rw] in vD.keys():
  81. ws.cell( row = rw + 1, column = i ).value = vD[nameList[rw]]
  82. ew.save( filename = dest_filename )
  83. return
  84. def get_max_roll_call(year):
  85. """Get the total number of Roll Calls for a given year of Congress
  86. Returns Integer
  87. """
  88. url = 'http://clerk.house.gov/evs/' + str(year) + '/index.asp'
  89. page = urllib2.urlopen(url)
  90. soup = BeautifulSoup(page)
  91. text = soup.find('a')
  92. return int(text.contents[0])
  93. def get_congress(year):
  94. """Returns the Congress number for a given year
  95. Borrowed from python-nytcongress / nytcongress.py by Chris Amico
  96. https://github.com/eyeseast/python-nytcongress
  97. """
  98. return (year - 1789) / 2 + 1