PageRenderTime 52ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/bin/percentiles

https://bitbucket.org/flowblok/dotfiles
Python | 206 lines | 202 code | 3 blank | 1 comment | 0 complexity | 4e6043e6f5860a80cd72f1e299ca6004 MD5 | raw file
Possible License(s): WTFPL
  1. #!/usr/bin/env python
  2. import argparse
  3. from collections import deque, namedtuple
  4. import re
  5. import sys
  6. import textwrap
  7. import time
  8. DataPoint = namedtuple('DataPoint', 'when value')
  9. parser = argparse.ArgumentParser(
  10. epilog=textwrap.dedent('''
  11. timespec:
  12. * If a number is prefixed with a time unit (s, m or h),
  13. it is treated as a duration.
  14. * If no suffix is given, it is treated as a number of samples.
  15. interpolation suffixes:
  16. l: linearly interpolate the two closest points
  17. n: takes the nearest point
  18. '''),
  19. formatter_class=argparse.RawDescriptionHelpFormatter,
  20. )
  21. parser.add_argument(
  22. '-p', '--percentiles',
  23. help='''
  24. Comma separated list of percentiles to print.
  25. Each percentile is comprised of a floating point number, and optionally a
  26. letter suffix indicating the interpolation mode.
  27. ''',
  28. )
  29. parser.add_argument(
  30. '-t', '--time-provided',
  31. help='''
  32. Instead of adding the time when the line is read from stdin, this flag
  33. indicates that each line of input has two numbers, the first being a
  34. timestamp (in fractional seconds), and the second being (as usual) the
  35. value.
  36. ''',
  37. )
  38. parser.add_argument(
  39. '-w', '--window',
  40. help='''
  41. The distribution will be calculated as a moving window over the data,
  42. using the given timespec. If empty (the default), the distribution will be
  43. taken over the entire dataset.
  44. ''',
  45. )
  46. parser.add_argument(
  47. '-u', '--update',
  48. help='''
  49. A timespec indicating how often to print out the percentiles.
  50. If empty (the default), the percentiles will be printed once, at the end of
  51. the data.
  52. ''',
  53. )
  54. args = parser.parse_args()
  55. _TIMESPEC = re.compile(r'^([0-9.]+)([a-z]+)$')
  56. MULTIPLIERS = {
  57. 'ms': 1 / 1000.,
  58. 's': 1,
  59. 'm': 60,
  60. 'h': 60 * 60,
  61. }
  62. def parse_timespec(value):
  63. if not value:
  64. return ('all', 0)
  65. if value.isdigit():
  66. return ('samples', int(value))
  67. match = _TIMESPEC.search(value)
  68. if match:
  69. value, unit = match.groups()
  70. multiplier = MULTIPLIERS[unit]
  71. try:
  72. return ('seconds', float(value) * multiplier)
  73. except ValueError:
  74. pass
  75. _PERCENTILE = re.compile(r'^([0-9.]+)([ln]?)$')
  76. INTERPOLATIONS = {
  77. '': 'linear',
  78. 'l': 'linear',
  79. 'n': 'nearest',
  80. }
  81. def parse_percentile(value):
  82. match = _PERCENTILE.search(value)
  83. if match:
  84. value, interpolation = match.groups()
  85. interpolation = INTERPOLATIONS[interpolation]
  86. try:
  87. return float(value), interpolation
  88. except ValueError:
  89. pass
  90. def read_input():
  91. for line in sys.stdin:
  92. if args.time_provided:
  93. now, line = line.split()
  94. now = float(now)
  95. else:
  96. now = time.time()
  97. value = float(line)
  98. yield DataPoint(now, value)
  99. def moving_window_n_samples(data, n):
  100. queue = deque([], n)
  101. # almost fill the queue
  102. for i in xrange(n - 1):
  103. queue.append(next(data))
  104. for item in data:
  105. queue.append(item)
  106. yield tuple(queue)
  107. def moving_window_n_seconds(data, n):
  108. queue = deque()
  109. for item in data:
  110. now = data.when
  111. while now > n - queue[0].when:
  112. queue.popleft()
  113. queue.append(data)
  114. yield tuple(queue)
  115. def moving_window(data):
  116. kind, value = parse_timespec(args.window)
  117. if kind == 'all':
  118. return [data]
  119. elif kind == 'samples':
  120. return moving_window_n_samples(data, value)
  121. elif kind == 'seconds':
  122. return moving_window_n_seconds(data, value)
  123. raise NotImplementedError('oops.')
  124. def update_n_samples(windows, n):
  125. while True:
  126. yield next(windows)
  127. for i in xrange(n - 1):
  128. next(windows)
  129. def update_n_seconds(windows, n):
  130. last = 0
  131. for window in windows:
  132. now = window[-1].when
  133. if now - last > n:
  134. yield window
  135. last = now
  136. def update(windows):
  137. kind, value = parse_timespec(args.update)
  138. if kind == 'all':
  139. return windows
  140. elif kind == 'samples':
  141. return update_n_samples(windows, value)
  142. elif kind == 'seconds':
  143. return update_n_seconds(windows, value)
  144. raise NotImplementedError('oops.')
  145. def get_percentile(data, percentile, interpolation):
  146. assert 0.0 <= percentile <= 100.0
  147. n = len(data) - 1
  148. i = percentile * n / 100.0
  149. assert 0.0 <= i <= n
  150. if interpolation == 'nearest':
  151. i = int(round(i))
  152. return data[i]
  153. elif interpolation == 'linear':
  154. i_down = int(i)
  155. alpha = i - i_down
  156. lower = data[i_down]
  157. if alpha == 0.0:
  158. return lower
  159. upper = data[i_down + 1]
  160. return lower * (1.0 - alpha) + upper * alpha
  161. raise NotImplementedError('Unknown interpolation %r' % (interpolation,))
  162. def get_percentiles(window, percentiles):
  163. window = sorted(data.value for data in window)
  164. for percentile, interpolation in percentiles:
  165. yield get_percentile(window, percentile, interpolation)
  166. def main():
  167. percentiles = [
  168. parse_percentile(arg)
  169. for arg in args.percentiles.split(',')
  170. ]
  171. data = read_input()
  172. windows = moving_window(data)
  173. update_windows = update(windows)
  174. for window in update_windows:
  175. values = get_percentiles(window, percentiles)
  176. print '\t'.join('%g' % value for value in values)
  177. if __name__ == '__main__':
  178. main()