PageRenderTime 46ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/statsmodels/datasets/statecrime/data.py

https://github.com/josef-pkt/statsmodels
Python | 87 lines | 82 code | 3 blank | 2 comment | 5 complexity | 98848ddaf87e8e83832e75c16c1ef2b6 MD5 | raw file
  1. #! /usr/bin/env python
  2. """Statewide Crime Data"""
  3. __docformat__ = 'restructuredtext'
  4. COPYRIGHT = """Public domain."""
  5. TITLE = """Statewide Crime Data 2009"""
  6. SOURCE = """
  7. All data is for 2009 and was obtained from the American Statistical Abstracts except as indicated below.
  8. """
  9. DESCRSHORT = """State crime data 2009"""
  10. DESCRLONG = DESCRSHORT
  11. #suggested notes
  12. NOTE = """::
  13. Number of observations: 51
  14. Number of variables: 8
  15. Variable name definitions:
  16. state
  17. All 50 states plus DC.
  18. violent
  19. Rate of violent crimes / 100,000 population. Includes murder, forcible
  20. rape, robbery, and aggravated assault. Numbers for Illinois and
  21. Minnesota do not include forcible rapes. Footnote included with the
  22. American Statistical Abstract table reads:
  23. "The data collection methodology for the offense of forcible
  24. rape used by the Illinois and the Minnesota state Uniform Crime
  25. Reporting (UCR) Programs (with the exception of Rockford, Illinois,
  26. and Minneapolis and St. Paul, Minnesota) does not comply with
  27. national UCR guidelines. Consequently, their state figures for
  28. forcible rape and violent crime (of which forcible rape is a part)
  29. are not published in this table."
  30. murder
  31. Rate of murders / 100,000 population.
  32. hs_grad
  33. Precent of population having graduated from high school or higher.
  34. poverty
  35. % of individuals below the poverty line
  36. white
  37. Percent of population that is one race - white only. From 2009 American
  38. Community Survey
  39. single
  40. Calculated from 2009 1-year American Community Survey obtained obtained
  41. from Census. Variable is Male householder, no wife present, family
  42. household combined with Female household, no husband prsent, family
  43. household, divided by the total number of Family households.
  44. urban
  45. % of population in Urbanized Areas as of 2010 Census. Urbanized
  46. Areas are area of 50,000 or more people."""
  47. import numpy as np
  48. from statsmodels.datasets import utils as du
  49. from os.path import dirname, abspath
  50. def load():
  51. """
  52. Load the statecrime data and return a Dataset class instance.
  53. Returns
  54. -------
  55. Dataset instance:
  56. See DATASET_PROPOSAL.txt for more information.
  57. """
  58. data = _get_data()
  59. ##### SET THE INDICES #####
  60. #NOTE: None for exog_idx is the complement of endog_idx
  61. return du.process_recarray(data, endog_idx=2, exog_idx=[7, 4, 3, 5],
  62. dtype=float)
  63. def load_pandas():
  64. data = _get_data()
  65. ##### SET THE INDICES #####
  66. #NOTE: None for exog_idx is the complement of endog_idx
  67. return du.process_recarray_pandas(data, endog_idx=2, exog_idx=[7,4,3,5],
  68. dtype=float, index_idx=0)
  69. def _get_data():
  70. filepath = dirname(abspath(__file__))
  71. ##### EDIT THE FOLLOWING TO POINT TO DatasetName.csv #####
  72. with open(filepath + '/statecrime.csv', 'rb') as f:
  73. data = np.recfromtxt(f, delimiter=",", names=True, dtype=None)
  74. return data