PageRenderTime 50ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/apiary/tools/span.py

https://bitbucket.org/lindenlab/apiary/
Python | 180 lines | 97 code | 29 blank | 54 comment | 21 complexity | 2f33c1118955e18f67b9d3dfb33e3693 MD5 | raw file
  1. #
  2. # $LicenseInfo:firstyear=2010&license=mit$
  3. #
  4. # Copyright (c) 2010, Linden Research, Inc.
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining a copy
  7. # of this software and associated documentation files (the "Software"), to deal
  8. # in the Software without restriction, including without limitation the rights
  9. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. # copies of the Software, and to permit persons to whom the Software is
  11. # furnished to do so, subject to the following conditions:
  12. #
  13. # The above copyright notice and this permission notice shall be included in
  14. # all copies or substantial portions of the Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. # THE SOFTWARE.
  23. # $/LicenseInfo$
  24. #
  25. '''
  26. A span represents an ordered pair. This module contains algorithms for selecting windows of span by start, end, or overlap.
  27. '''
  28. import bisect
  29. class Span (tuple):
  30. '''
  31. A Span is a specialized pair representing the partially open range [start, end) along with application associated data.
  32. It includes the start, but not the end. Note: This implies a Span cannot represent a point.
  33. '''
  34. def __new__(cls, start, end, data=None):
  35. assert start <= end, `start, end` # *FIX: Can magnitude be 0? Before we disallowed, but now we do for demo convenience.
  36. return tuple.__new__(cls, (start, end))
  37. def __init__(self, start, end, data=None):
  38. self.data = data
  39. def __repr__(self):
  40. return '<%s (%r,%r) %r>' % (self.__class__.__name__,
  41. self.start,
  42. self.end,
  43. self.data)
  44. @property
  45. def start(self):
  46. return self[0]
  47. @property
  48. def end(self):
  49. return self[1]
  50. @property
  51. def magnitude(self):
  52. lo, hi = self
  53. return hi - lo
  54. def contains(self, point):
  55. return self.start <= point < self.end
  56. def overlaps(self, other):
  57. assert isinstance(other, Span), `self, other`
  58. return self.contains(other.start) \
  59. or (other.end > self.start and self.contains(other.end)) \
  60. or other.contains(self.start) \
  61. or (self.end > other.start and other.contains(self.end))
  62. class SpanSequence (object):
  63. '''
  64. A SpanSequence is a sequence of spans ordered by start and end points.
  65. '''
  66. def __init__(self, spans=[]):
  67. self._spans = []
  68. for s in spans:
  69. self.insert(s)
  70. def __iter__(self):
  71. return iter(self._spans)
  72. def __cmp__(self, other):
  73. return cmp(self._spans, other._spans)
  74. def __len__(self):
  75. return len(self._spans)
  76. def insert(self, span):
  77. '''
  78. Slow unsorted insert. O(log N)
  79. '''
  80. assert isinstance(span, Span), `self, span`
  81. i = bisect.bisect(self._spans, span)
  82. if i > 0 and self._spans[i-1] == span:
  83. # *HACK: skip duplicate insert. Need to nail down a bug here. See *FIX comments in base.py and above.
  84. assert self._spans[i-1] <= span, `i, self._spans[i-1], span`
  85. return
  86. elif i < len(self._spans) and self._spans[i] == span:
  87. # *HACK: skip duplicate insert. Need to nail down a bug here. See *FIX comments in base.py and above.
  88. assert self._spans[i] >= span, `i, self._spans[i], span`
  89. return
  90. self._spans.insert(i, span)
  91. def append(self, span):
  92. '''
  93. Fast sorted append. The span argument must come at the end of this sequence.
  94. '''
  95. if self._spans:
  96. if self._spans[-1] == span:
  97. # *HACK: skip duplicate insert. Need to nail down a bug here. See *FIX comments in base.py and above.
  98. return
  99. assert span > self._spans[-1], `self._spans[-1], span`
  100. self._spans.append(span)
  101. def as_bins(self, binwidth=1.0):
  102. if self._spans:
  103. s = self._spans[0].start
  104. window = Span(s, s+binwidth)
  105. subseq = SpanSequence()
  106. for span in self:
  107. while not window.contains(span.start):
  108. yield window, subseq
  109. subseq = SpanSequence()
  110. window = Span(window.end, window.end + binwidth)
  111. subseq.append(span)
  112. yield window, subseq
  113. def concurrency_vector(self):
  114. '''
  115. Yield an ordered sequence of (t, span, spans) where spans consists
  116. of all spans which overlap t. The span is either newly added
  117. to spans if just starting, or just removed if ending. The t
  118. value is either span.start or span.end.
  119. '''
  120. q = [] # Contains a sorted list of (span.end, span)
  121. for span in self:
  122. while q and q[0][0] <= span.start:
  123. # Decrease concurrency:
  124. end, other = q.pop(0)
  125. yield (end, other, list(q))
  126. bisect.insort(q, (span.end, span))
  127. yield (span.start, span, list(q))
  128. while q:
  129. end, span = q.pop(0)
  130. yield (end, span, list(q))
  131. class SlidingWindowSequence (SpanSequence):
  132. '''
  133. A SlidingWindowSequence is a SpanSequence and ensures that it extends no more than some cutoff into the past.
  134. '''
  135. def __init__(self, width):
  136. SpanSequence.__init__(self)
  137. self.width = width
  138. def insert(self, span):
  139. SpanSequence.insert(self, span)
  140. self._enforce_cutoff()
  141. def append(self, span):
  142. SpanSequence.append(self, span)
  143. self._enforce_cutoff()
  144. def _enforce_cutoff(self):
  145. end = self._spans[-1].start
  146. cutoff = end - self.width
  147. for i, s in enumerate(self):
  148. if s.start > cutoff:
  149. break
  150. self._spans = self._spans[i:]