/lib/galaxy/util/topsort.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 219 lines · 130 code · 27 blank · 62 comment · 39 complexity · 690670a005f023ecedecb5e6842f279e MD5 · raw file
- """
- Topological sort.
- From Tim Peters, see:
- http://mail.python.org/pipermail/python-list/1999-July/006660.html
- topsort takes a list of pairs, where each pair (x, y) is taken to
- mean that x <= y wrt some abstract partial ordering. The return
- value is a list, representing a total ordering that respects all
- the input constraints.
- E.g.,
- topsort( [(1,2), (3,3)] )
- may return any of (but nothing other than)
- [3, 1, 2]
- [1, 3, 2]
- [1, 2, 3]
- because those are the permutations of the input elements that
- respect the "1 precedes 2" and "3 precedes 3" input constraints.
- Note that a constraint of the form (x, x) is really just a trick
- to make sure x appears *somewhere* in the output list.
- If there's a cycle in the constraints, say
- topsort( [(1,2), (2,1)] )
- then CycleError is raised, and the exception object supports
- many methods to help analyze and break the cycles. This requires
- a good deal more code than topsort itself!
- """
- from exceptions import Exception
- class CycleError(Exception):
- def __init__(self, sofar, numpreds, succs):
- Exception.__init__(self, "cycle in constraints",
- sofar, numpreds, succs)
- self.preds = None
- # return as much of the total ordering as topsort was able to
- # find before it hit a cycle
- def get_partial(self):
- return self[1]
- # return remaining elt -> count of predecessors map
- def get_pred_counts(self):
- return self[2]
- # return remaining elt -> list of successors map
- def get_succs(self):
- return self[3]
- # return remaining elements (== those that don't appear in
- # get_partial())
- def get_elements(self):
- return self.get_pred_counts().keys()
- # Return a list of pairs representing the full state of what's
- # remaining (if you pass this list back to topsort, it will raise
- # CycleError again, and if you invoke get_pairlist on *that*
- # exception object, the result will be isomorphic to *this*
- # invocation of get_pairlist).
- # The idea is that you can use pick_a_cycle to find a cycle,
- # through some means or another pick an (x,y) pair in the cycle
- # you no longer want to respect, then remove that pair from the
- # output of get_pairlist and try topsort again.
- def get_pairlist(self):
- succs = self.get_succs()
- answer = []
- for x in self.get_elements():
- if succs.has_key(x):
- for y in succs[x]:
- answer.append( (x, y) )
- else:
- # make sure x appears in topsort's output!
- answer.append( (x, x) )
- return answer
- # return remaining elt -> list of predecessors map
- def get_preds(self):
- if self.preds is not None:
- return self.preds
- self.preds = preds = {}
- remaining_elts = self.get_elements()
- for x in remaining_elts:
- preds[x] = []
- succs = self.get_succs()
- for x in remaining_elts:
- if succs.has_key(x):
- for y in succs[x]:
- preds[y].append(x)
- if __debug__:
- for x in remaining_elts:
- assert len(preds[x]) > 0
- return preds
- # return a cycle [x, ..., x] at random
- def pick_a_cycle(self):
- remaining_elts = self.get_elements()
- # We know that everything in remaining_elts has a predecessor,
- # but don't know that everything in it has a successor. So
- # crawling forward over succs may hit a dead end. Instead we
- # crawl backward over the preds until we hit a duplicate, then
- # reverse the path.
- preds = self.get_preds()
- from random import choice
- x = choice(remaining_elts)
- answer = []
- index = {}
- in_answer = index.has_key
- while not in_answer(x):
- index[x] = len(answer) # index of x in answer
- answer.append(x)
- x = choice(preds[x])
- answer.append(x)
- answer = answer[index[x]:]
- answer.reverse()
- return answer
- def topsort(pairlist):
- numpreds = {} # elt -> # of predecessors
- successors = {} # elt -> list of successors
- for first, second in pairlist:
- # make sure every elt is a key in numpreds
- if not numpreds.has_key(first):
- numpreds[first] = 0
- if not numpreds.has_key(second):
- numpreds[second] = 0
- # if they're the same, there's no real dependence
- if first == second:
- continue
- # since first < second, second gains a pred ...
- numpreds[second] = numpreds[second] + 1
- # ... and first gains a succ
- if successors.has_key(first):
- successors[first].append(second)
- else:
- successors[first] = [second]
- # suck up everything without a predecessor
- answer = filter(lambda x, numpreds=numpreds: numpreds[x] == 0,
- numpreds.keys())
- # for everything in answer, knock down the pred count on
- # its successors; note that answer grows *in* the loop
- for x in answer:
- assert numpreds[x] == 0
- del numpreds[x]
- if successors.has_key(x):
- for y in successors[x]:
- numpreds[y] = numpreds[y] - 1
- if numpreds[y] == 0:
- answer.append(y)
- # following "del" isn't needed; just makes
- # CycleError details easier to grasp
- del successors[x]
- if numpreds:
- # everything in numpreds has at least one predecessor ->
- # there's a cycle
- if __debug__:
- for x in numpreds.keys():
- assert numpreds[x] > 0
- raise CycleError(answer, numpreds, successors)
- return answer
- def topsort_levels(pairlist):
- numpreds = {} # elt -> # of predecessors
- successors = {} # elt -> list of successors
- for first, second in pairlist:
- # make sure every elt is a key in numpreds
- if not numpreds.has_key(first):
- numpreds[first] = 0
- if not numpreds.has_key(second):
- numpreds[second] = 0
- # if they're the same, there's no real dependence
- if first == second:
- continue
- # since first < second, second gains a pred ...
- numpreds[second] = numpreds[second] + 1
- # ... and first gains a succ
- if successors.has_key(first):
- successors[first].append(second)
- else:
- successors[first] = [second]
- answer = []
- while 1:
- # Suck up everything without a predecessor.
- levparents = [x for x in numpreds.keys() if numpreds[x] == 0]
- if not levparents:
- break
- answer.append( levparents )
- for levparent in levparents:
- del numpreds[levparent]
- if successors.has_key(levparent):
- for levparentsucc in successors[levparent]:
- numpreds[levparentsucc] -= 1
- del successors[levparent]
- if numpreds:
- # Everything in num_parents has at least one child ->
- # there's a cycle.
- raise CycleError( answer, numpreds, successors )
- return answer