PageRenderTime 23ms CodeModel.GetById 5ms app.highlight 14ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/util/topsort.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 219 lines | 198 code | 0 blank | 21 comment | 1 complexity | 690670a005f023ecedecb5e6842f279e MD5 | raw file
  1"""
  2Topological sort.
  3
  4From Tim Peters, see:
  5   http://mail.python.org/pipermail/python-list/1999-July/006660.html
  6
  7topsort takes a list of pairs, where each pair (x, y) is taken to
  8mean that x <= y wrt some abstract partial ordering.  The return
  9value is a list, representing a total ordering that respects all
 10the input constraints.
 11E.g.,
 12
 13   topsort( [(1,2), (3,3)] )
 14
 15may return any of (but nothing other than)
 16
 17   [3, 1, 2]
 18   [1, 3, 2]
 19   [1, 2, 3]
 20
 21because those are the permutations of the input elements that
 22respect the "1 precedes 2" and "3 precedes 3" input constraints.
 23Note that a constraint of the form (x, x) is really just a trick
 24to make sure x appears *somewhere* in the output list.
 25
 26If there's a cycle in the constraints, say
 27
 28   topsort( [(1,2), (2,1)] )
 29
 30then CycleError is raised, and the exception object supports
 31many methods to help analyze and break the cycles.  This requires
 32a good deal more code than topsort itself!
 33"""
 34
 35from exceptions import Exception
 36
 37class CycleError(Exception):
 38    def __init__(self, sofar, numpreds, succs):
 39        Exception.__init__(self, "cycle in constraints",
 40                           sofar, numpreds, succs)
 41        self.preds = None
 42
 43    # return as much of the total ordering as topsort was able to
 44    # find before it hit a cycle
 45    def get_partial(self):
 46        return self[1]
 47
 48    # return remaining elt -> count of predecessors map
 49    def get_pred_counts(self):
 50        return self[2]
 51
 52    # return remaining elt -> list of successors map
 53    def get_succs(self):
 54        return self[3]
 55
 56    # return remaining elements (== those that don't appear in
 57    # get_partial())
 58    def get_elements(self):
 59        return self.get_pred_counts().keys()
 60
 61    # Return a list of pairs representing the full state of what's
 62    # remaining (if you pass this list back to topsort, it will raise
 63    # CycleError again, and if you invoke get_pairlist on *that*
 64    # exception object, the result will be isomorphic to *this*
 65    # invocation of get_pairlist).
 66    # The idea is that you can use pick_a_cycle to find a cycle,
 67    # through some means or another pick an (x,y) pair in the cycle
 68    # you no longer want to respect, then remove that pair from the
 69    # output of get_pairlist and try topsort again.
 70    def get_pairlist(self):
 71        succs = self.get_succs()
 72        answer = []
 73        for x in self.get_elements():
 74            if succs.has_key(x):
 75                for y in succs[x]:
 76                    answer.append( (x, y) )
 77            else:
 78                # make sure x appears in topsort's output!
 79                answer.append( (x, x) )
 80        return answer
 81
 82    # return remaining elt -> list of predecessors map
 83    def get_preds(self):
 84        if self.preds is not None:
 85            return self.preds
 86        self.preds = preds = {}
 87        remaining_elts = self.get_elements()
 88        for x in remaining_elts:
 89            preds[x] = []
 90        succs = self.get_succs()
 91
 92        for x in remaining_elts:
 93            if succs.has_key(x):
 94                for y in succs[x]:
 95                    preds[y].append(x)
 96
 97        if __debug__:
 98            for x in remaining_elts:
 99                assert len(preds[x]) > 0
100        return preds
101
102    # return a cycle [x, ..., x] at random
103    def pick_a_cycle(self):
104        remaining_elts = self.get_elements()
105
106        # We know that everything in remaining_elts has a predecessor,
107        # but don't know that everything in it has a successor.  So
108        # crawling forward over succs may hit a dead end.  Instead we
109        # crawl backward over the preds until we hit a duplicate, then
110        # reverse the path.
111        preds = self.get_preds()
112        from random import choice
113        x = choice(remaining_elts)
114        answer = []
115        index = {}
116        in_answer = index.has_key
117        while not in_answer(x):
118            index[x] = len(answer) # index of x in answer
119            answer.append(x)
120            x = choice(preds[x])
121        answer.append(x)
122        answer = answer[index[x]:]
123        answer.reverse()
124        return answer
125
126def topsort(pairlist):
127    numpreds = {}   # elt -> # of predecessors
128    successors = {} # elt -> list of successors
129    for first, second in pairlist:
130        # make sure every elt is a key in numpreds
131        if not numpreds.has_key(first):
132            numpreds[first] = 0
133        if not numpreds.has_key(second):
134            numpreds[second] = 0
135
136        # if they're the same, there's no real dependence
137        if first == second:
138            continue
139
140        # since first < second, second gains a pred ...
141        numpreds[second] = numpreds[second] + 1
142
143        # ... and first gains a succ
144        if successors.has_key(first):
145            successors[first].append(second)
146        else:
147            successors[first] = [second]
148
149    # suck up everything without a predecessor
150    answer = filter(lambda x, numpreds=numpreds: numpreds[x] == 0,
151                    numpreds.keys())
152
153    # for everything in answer, knock down the pred count on
154    # its successors; note that answer grows *in* the loop
155    for x in answer:
156        assert numpreds[x] == 0
157        del numpreds[x]
158        if successors.has_key(x):
159            for y in successors[x]:
160                numpreds[y] = numpreds[y] - 1
161                if numpreds[y] == 0:
162                    answer.append(y)
163            # following "del" isn't needed; just makes
164            # CycleError details easier to grasp
165            del successors[x]
166
167    if numpreds:
168        # everything in numpreds has at least one predecessor ->
169        # there's a cycle
170        if __debug__:
171            for x in numpreds.keys():
172                assert numpreds[x] > 0
173        raise CycleError(answer, numpreds, successors)
174    return answer
175
176def topsort_levels(pairlist):
177    numpreds = {}   # elt -> # of predecessors
178    successors = {} # elt -> list of successors
179    for first, second in pairlist:
180        # make sure every elt is a key in numpreds
181        if not numpreds.has_key(first):
182            numpreds[first] = 0
183        if not numpreds.has_key(second):
184            numpreds[second] = 0
185
186        # if they're the same, there's no real dependence
187        if first == second:
188            continue
189
190        # since first < second, second gains a pred ...
191        numpreds[second] = numpreds[second] + 1
192
193        # ... and first gains a succ
194        if successors.has_key(first):
195            successors[first].append(second)
196        else:
197            successors[first] = [second]
198
199    answer = []
200
201    while 1:
202        # Suck up everything without a predecessor.
203        levparents = [x for x in numpreds.keys() if numpreds[x] == 0]
204        if not levparents:
205            break
206        answer.append( levparents )
207        for levparent in levparents:
208            del numpreds[levparent]
209            if successors.has_key(levparent):
210                for levparentsucc in successors[levparent]:
211                    numpreds[levparentsucc] -= 1
212                del successors[levparent]
213
214    if numpreds:
215        # Everything in num_parents has at least one child ->
216        # there's a cycle.
217        raise CycleError( answer, numpreds, successors )
218
219    return answer