PageRenderTime 83ms CodeModel.GetById 40ms app.highlight 13ms RepoModel.GetById 27ms app.codeStats 0ms

/apiary/tools/span.py

https://bitbucket.org/lindenlab/apiary/
Python | 180 lines | 97 code | 29 blank | 54 comment | 25 complexity | 2f33c1118955e18f67b9d3dfb33e3693 MD5 | raw file
  1#
  2# $LicenseInfo:firstyear=2010&license=mit$
  3# 
  4# Copyright (c) 2010, Linden Research, Inc.
  5# 
  6# Permission is hereby granted, free of charge, to any person obtaining a copy
  7# of this software and associated documentation files (the "Software"), to deal
  8# in the Software without restriction, including without limitation the rights
  9# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10# copies of the Software, and to permit persons to whom the Software is
 11# furnished to do so, subject to the following conditions:
 12# 
 13# The above copyright notice and this permission notice shall be included in
 14# all copies or substantial portions of the Software.
 15# 
 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22# THE SOFTWARE.
 23# $/LicenseInfo$
 24#
 25
 26
 27'''
 28A span represents an ordered pair.  This module contains algorithms for selecting windows of span by start, end, or overlap.
 29'''
 30import bisect
 31
 32
 33class Span (tuple):
 34    '''
 35    A Span is a specialized pair representing the partially open range [start, end) along with application associated data.
 36
 37    It includes the start, but not the end. Note: This implies a Span cannot represent a point.
 38    '''
 39    def __new__(cls, start, end, data=None):
 40        assert start <= end, `start, end` # *FIX: Can magnitude be 0?  Before we disallowed, but now we do for demo convenience.
 41        return tuple.__new__(cls, (start, end))
 42
 43    def __init__(self, start, end, data=None):
 44        self.data = data
 45        
 46    def __repr__(self):
 47        return '<%s (%r,%r) %r>' % (self.__class__.__name__,
 48                                    self.start,
 49                                    self.end,
 50                                    self.data)
 51    
 52    @property
 53    def start(self):
 54        return self[0]
 55
 56    @property
 57    def end(self):
 58        return self[1]
 59
 60    @property
 61    def magnitude(self):
 62        lo, hi = self
 63        return hi - lo
 64    
 65    def contains(self, point):
 66        return self.start <= point < self.end
 67    
 68    def overlaps(self, other):
 69        assert isinstance(other, Span), `self, other`
 70        return self.contains(other.start) \
 71               or (other.end > self.start and self.contains(other.end)) \
 72               or other.contains(self.start) \
 73               or (self.end > other.start and other.contains(self.end))
 74    
 75
 76class SpanSequence (object):
 77    '''
 78    A SpanSequence is a sequence of spans ordered by start and end points.
 79    '''
 80    def __init__(self, spans=[]):
 81        self._spans = []
 82        for s in spans:
 83            self.insert(s)
 84
 85    def __iter__(self):
 86        return iter(self._spans)
 87    
 88    def __cmp__(self, other):
 89        return cmp(self._spans, other._spans)
 90    
 91    def __len__(self):
 92        return len(self._spans)
 93    
 94    def insert(self, span):
 95        '''
 96        Slow unsorted insert.  O(log N)
 97        '''
 98        assert isinstance(span, Span), `self, span`
 99        i = bisect.bisect(self._spans, span)
100        if i > 0 and self._spans[i-1] == span:
101            # *HACK: skip duplicate insert.  Need to nail down a bug here.  See *FIX comments in base.py and above.
102            assert self._spans[i-1] <= span, `i, self._spans[i-1], span`
103            return
104        elif i < len(self._spans) and self._spans[i] == span:
105            # *HACK: skip duplicate insert.  Need to nail down a bug here.  See *FIX comments in base.py and above.
106            assert self._spans[i] >= span, `i, self._spans[i], span`
107            return
108        self._spans.insert(i, span)
109
110    def append(self, span):
111        '''
112        Fast sorted append.  The span argument must come at the end of this sequence.
113        '''
114        if self._spans:
115            if self._spans[-1] == span:
116                # *HACK: skip duplicate insert.  Need to nail down a bug here.  See *FIX comments in base.py and above.
117                return
118            assert span > self._spans[-1], `self._spans[-1], span`
119        self._spans.append(span)
120
121    def as_bins(self, binwidth=1.0):
122        if self._spans:
123            s = self._spans[0].start
124            window = Span(s, s+binwidth)
125            subseq = SpanSequence()
126            for span in self:
127                while not window.contains(span.start):
128                    yield window, subseq
129                    subseq = SpanSequence()
130                    window = Span(window.end, window.end + binwidth)
131                subseq.append(span)
132            yield window, subseq
133                    
134    def concurrency_vector(self):
135        '''
136        Yield an ordered sequence of (t, span, spans) where spans consists
137        of all spans which overlap t.  The span is either newly added
138        to spans if just starting, or just removed if ending.  The t
139        value is either span.start or span.end.
140        '''
141        q = [] # Contains a sorted list of (span.end, span)
142
143        for span in self:
144            while q and q[0][0] <= span.start:
145                # Decrease concurrency:
146                end, other = q.pop(0)
147                yield (end, other, list(q))
148            bisect.insort(q, (span.end, span))
149            yield (span.start, span, list(q))
150
151        while q:
152            end, span = q.pop(0)
153            yield (end, span, list(q))
154
155
156class SlidingWindowSequence (SpanSequence):
157    '''
158    A SlidingWindowSequence is a SpanSequence and ensures that it extends no more than some cutoff into the past.
159    '''
160    def __init__(self, width):
161        SpanSequence.__init__(self)
162        self.width = width
163        
164    def insert(self, span):
165        SpanSequence.insert(self, span)
166        self._enforce_cutoff()
167        
168    def append(self, span):
169        SpanSequence.append(self, span)
170        self._enforce_cutoff()
171
172    def _enforce_cutoff(self):
173        end = self._spans[-1].start
174        cutoff = end - self.width
175        
176        for i, s in enumerate(self):
177            if s.start > cutoff:
178                break
179
180        self._spans = self._spans[i:]