PageRenderTime 46ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/Statistics/Sample/Histogram.hs

http://github.com/bos/statistics
Haskell | 108 lines | 52 code | 6 blank | 50 comment | 2 complexity | 36ad34aef8aa668880734e9b4e300899 MD5 | raw file
Possible License(s): BSD-2-Clause
  1. {-# LANGUAGE FlexibleContexts, BangPatterns #-}
  2. -- |
  3. -- Module : Statistics.Sample.Histogram
  4. -- Copyright : (c) 2011 Bryan O'Sullivan
  5. -- License : BSD3
  6. --
  7. -- Maintainer : bos@serpentine.com
  8. -- Stability : experimental
  9. -- Portability : portable
  10. --
  11. -- Functions for computing histograms of sample data.
  12. module Statistics.Sample.Histogram
  13. (
  14. histogram
  15. -- * Building blocks
  16. , histogram_
  17. , range
  18. ) where
  19. import Numeric.MathFunctions.Constants (m_epsilon,m_tiny)
  20. import Statistics.Function (minMax)
  21. import qualified Data.Vector.Generic as G
  22. import qualified Data.Vector.Generic.Mutable as GM
  23. -- | /O(n)/ Compute a histogram over a data set.
  24. --
  25. -- The result consists of a pair of vectors:
  26. --
  27. -- * The lower bound of each interval.
  28. --
  29. -- * The number of samples within the interval.
  30. --
  31. -- Interval (bin) sizes are uniform, and the upper and lower bounds
  32. -- are chosen automatically using the 'range' function. To specify
  33. -- these parameters directly, use the 'histogram_' function.
  34. histogram :: (G.Vector v0 Double, G.Vector v1 Double, Num b, G.Vector v1 b) =>
  35. Int -- ^ Number of bins (must be positive).
  36. -> v0 Double -- ^ Sample data (cannot be empty).
  37. -> (v1 Double, v1 b)
  38. histogram numBins xs = (G.generate numBins step, histogram_ numBins lo hi xs)
  39. where (lo,hi) = range numBins xs
  40. step i = lo + d * fromIntegral i
  41. d = (hi - lo) / fromIntegral numBins
  42. {-# INLINE histogram #-}
  43. -- | /O(n)/ Compute a histogram over a data set.
  44. --
  45. -- Interval (bin) sizes are uniform, based on the supplied upper
  46. -- and lower bounds.
  47. histogram_ :: (Num b, RealFrac a, G.Vector v0 a, G.Vector v1 b) =>
  48. Int
  49. -- ^ Number of bins. This value must be positive. A zero
  50. -- or negative value will cause an error.
  51. -> a
  52. -- ^ Lower bound on interval range. Sample data less than
  53. -- this will cause an error.
  54. -> a
  55. -- ^ Upper bound on interval range. This value must not be
  56. -- less than the lower bound. Sample data that falls above
  57. -- the upper bound will cause an error.
  58. -> v0 a
  59. -- ^ Sample data.
  60. -> v1 b
  61. histogram_ numBins lo hi xs0 = G.create (GM.replicate numBins 0 >>= bin xs0)
  62. where
  63. bin xs bins = go 0
  64. where
  65. go i | i >= len = return bins
  66. | otherwise = do
  67. let x = xs `G.unsafeIndex` i
  68. b = truncate $ (x - lo) / d
  69. write' bins b . (+1) =<< GM.read bins b
  70. go (i+1)
  71. write' bins' b !e = GM.write bins' b e
  72. len = G.length xs
  73. d = ((hi - lo) * (1 + realToFrac m_epsilon)) / fromIntegral numBins
  74. {-# INLINE histogram_ #-}
  75. -- | /O(n)/ Compute decent defaults for the lower and upper bounds of
  76. -- a histogram, based on the desired number of bins and the range of
  77. -- the sample data.
  78. --
  79. -- The upper and lower bounds used are @(lo-d, hi+d)@, where
  80. --
  81. -- @d = (maximum sample - minimum sample) / ((bins - 1) * 2)@
  82. --
  83. -- If all elements in the sample are the same and equal to @x@ range
  84. -- is set to @(x - |x|/10, x + |x|/10)@. And if @x@ is equal to 0 range
  85. -- is set to @(-1,1)@. This is needed to avoid creating histogram with
  86. -- zero bin size.
  87. range :: (G.Vector v Double) =>
  88. Int -- ^ Number of bins (must be positive).
  89. -> v Double -- ^ Sample data (cannot be empty).
  90. -> (Double, Double)
  91. range numBins xs
  92. | numBins < 1 = error "Statistics.Histogram.range: invalid bin count"
  93. | G.null xs = error "Statistics.Histogram.range: empty sample"
  94. | lo == hi = case abs lo / 10 of
  95. a | a < m_tiny -> (-1,1)
  96. | otherwise -> (lo - a, lo + a)
  97. | otherwise = (lo-d, hi+d)
  98. where
  99. d | numBins == 1 = 0
  100. | otherwise = (hi - lo) / ((fromIntegral numBins - 1) * 2)
  101. (lo,hi) = minMax xs
  102. {-# INLINE range #-}