/lib/galaxy/util/topsort.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 219 lines · 130 code · 27 blank · 62 comment · 39 complexity · 690670a005f023ecedecb5e6842f279e MD5 · raw file

  1. """
  2. Topological sort.
  3. From Tim Peters, see:
  4. http://mail.python.org/pipermail/python-list/1999-July/006660.html
  5. topsort takes a list of pairs, where each pair (x, y) is taken to
  6. mean that x <= y wrt some abstract partial ordering. The return
  7. value is a list, representing a total ordering that respects all
  8. the input constraints.
  9. E.g.,
  10. topsort( [(1,2), (3,3)] )
  11. may return any of (but nothing other than)
  12. [3, 1, 2]
  13. [1, 3, 2]
  14. [1, 2, 3]
  15. because those are the permutations of the input elements that
  16. respect the "1 precedes 2" and "3 precedes 3" input constraints.
  17. Note that a constraint of the form (x, x) is really just a trick
  18. to make sure x appears *somewhere* in the output list.
  19. If there's a cycle in the constraints, say
  20. topsort( [(1,2), (2,1)] )
  21. then CycleError is raised, and the exception object supports
  22. many methods to help analyze and break the cycles. This requires
  23. a good deal more code than topsort itself!
  24. """
  25. from exceptions import Exception
  26. class CycleError(Exception):
  27. def __init__(self, sofar, numpreds, succs):
  28. Exception.__init__(self, "cycle in constraints",
  29. sofar, numpreds, succs)
  30. self.preds = None
  31. # return as much of the total ordering as topsort was able to
  32. # find before it hit a cycle
  33. def get_partial(self):
  34. return self[1]
  35. # return remaining elt -> count of predecessors map
  36. def get_pred_counts(self):
  37. return self[2]
  38. # return remaining elt -> list of successors map
  39. def get_succs(self):
  40. return self[3]
  41. # return remaining elements (== those that don't appear in
  42. # get_partial())
  43. def get_elements(self):
  44. return self.get_pred_counts().keys()
  45. # Return a list of pairs representing the full state of what's
  46. # remaining (if you pass this list back to topsort, it will raise
  47. # CycleError again, and if you invoke get_pairlist on *that*
  48. # exception object, the result will be isomorphic to *this*
  49. # invocation of get_pairlist).
  50. # The idea is that you can use pick_a_cycle to find a cycle,
  51. # through some means or another pick an (x,y) pair in the cycle
  52. # you no longer want to respect, then remove that pair from the
  53. # output of get_pairlist and try topsort again.
  54. def get_pairlist(self):
  55. succs = self.get_succs()
  56. answer = []
  57. for x in self.get_elements():
  58. if succs.has_key(x):
  59. for y in succs[x]:
  60. answer.append( (x, y) )
  61. else:
  62. # make sure x appears in topsort's output!
  63. answer.append( (x, x) )
  64. return answer
  65. # return remaining elt -> list of predecessors map
  66. def get_preds(self):
  67. if self.preds is not None:
  68. return self.preds
  69. self.preds = preds = {}
  70. remaining_elts = self.get_elements()
  71. for x in remaining_elts:
  72. preds[x] = []
  73. succs = self.get_succs()
  74. for x in remaining_elts:
  75. if succs.has_key(x):
  76. for y in succs[x]:
  77. preds[y].append(x)
  78. if __debug__:
  79. for x in remaining_elts:
  80. assert len(preds[x]) > 0
  81. return preds
  82. # return a cycle [x, ..., x] at random
  83. def pick_a_cycle(self):
  84. remaining_elts = self.get_elements()
  85. # We know that everything in remaining_elts has a predecessor,
  86. # but don't know that everything in it has a successor. So
  87. # crawling forward over succs may hit a dead end. Instead we
  88. # crawl backward over the preds until we hit a duplicate, then
  89. # reverse the path.
  90. preds = self.get_preds()
  91. from random import choice
  92. x = choice(remaining_elts)
  93. answer = []
  94. index = {}
  95. in_answer = index.has_key
  96. while not in_answer(x):
  97. index[x] = len(answer) # index of x in answer
  98. answer.append(x)
  99. x = choice(preds[x])
  100. answer.append(x)
  101. answer = answer[index[x]:]
  102. answer.reverse()
  103. return answer
  104. def topsort(pairlist):
  105. numpreds = {} # elt -> # of predecessors
  106. successors = {} # elt -> list of successors
  107. for first, second in pairlist:
  108. # make sure every elt is a key in numpreds
  109. if not numpreds.has_key(first):
  110. numpreds[first] = 0
  111. if not numpreds.has_key(second):
  112. numpreds[second] = 0
  113. # if they're the same, there's no real dependence
  114. if first == second:
  115. continue
  116. # since first < second, second gains a pred ...
  117. numpreds[second] = numpreds[second] + 1
  118. # ... and first gains a succ
  119. if successors.has_key(first):
  120. successors[first].append(second)
  121. else:
  122. successors[first] = [second]
  123. # suck up everything without a predecessor
  124. answer = filter(lambda x, numpreds=numpreds: numpreds[x] == 0,
  125. numpreds.keys())
  126. # for everything in answer, knock down the pred count on
  127. # its successors; note that answer grows *in* the loop
  128. for x in answer:
  129. assert numpreds[x] == 0
  130. del numpreds[x]
  131. if successors.has_key(x):
  132. for y in successors[x]:
  133. numpreds[y] = numpreds[y] - 1
  134. if numpreds[y] == 0:
  135. answer.append(y)
  136. # following "del" isn't needed; just makes
  137. # CycleError details easier to grasp
  138. del successors[x]
  139. if numpreds:
  140. # everything in numpreds has at least one predecessor ->
  141. # there's a cycle
  142. if __debug__:
  143. for x in numpreds.keys():
  144. assert numpreds[x] > 0
  145. raise CycleError(answer, numpreds, successors)
  146. return answer
  147. def topsort_levels(pairlist):
  148. numpreds = {} # elt -> # of predecessors
  149. successors = {} # elt -> list of successors
  150. for first, second in pairlist:
  151. # make sure every elt is a key in numpreds
  152. if not numpreds.has_key(first):
  153. numpreds[first] = 0
  154. if not numpreds.has_key(second):
  155. numpreds[second] = 0
  156. # if they're the same, there's no real dependence
  157. if first == second:
  158. continue
  159. # since first < second, second gains a pred ...
  160. numpreds[second] = numpreds[second] + 1
  161. # ... and first gains a succ
  162. if successors.has_key(first):
  163. successors[first].append(second)
  164. else:
  165. successors[first] = [second]
  166. answer = []
  167. while 1:
  168. # Suck up everything without a predecessor.
  169. levparents = [x for x in numpreds.keys() if numpreds[x] == 0]
  170. if not levparents:
  171. break
  172. answer.append( levparents )
  173. for levparent in levparents:
  174. del numpreds[levparent]
  175. if successors.has_key(levparent):
  176. for levparentsucc in successors[levparent]:
  177. numpreds[levparentsucc] -= 1
  178. del successors[levparent]
  179. if numpreds:
  180. # Everything in num_parents has at least one child ->
  181. # there's a cycle.
  182. raise CycleError( answer, numpreds, successors )
  183. return answer