/scripts/build_toolbox.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 166 lines · 119 code · 23 blank · 24 comment · 31 complexity · 447e39b2bd05e8076e88f9a3746ddf9b MD5 · raw file

  1. import os
  2. import sys
  3. from xml.etree import ElementTree as ET
  4. def prettify(elem):
  5. from xml.dom import minidom
  6. rough_string = ET.tostring(elem, 'utf-8')
  7. repaired = minidom.parseString(rough_string)
  8. return repaired.toprettyxml(indent=' ')
  9. # Build a list of all toolconf xml files in the tools directory
  10. def getfilenamelist(startdir):
  11. filenamelist = []
  12. for root, dirs, files in os.walk(startdir):
  13. for fn in files:
  14. fullfn = os.path.join(root, fn)
  15. if fn.endswith('toolconf.xml'):
  16. filenamelist.append(fullfn)
  17. elif fn.endswith('.xml'):
  18. try:
  19. doc = ET.parse(fullfn)
  20. except:
  21. print "An OOPS on", fullfn
  22. raise
  23. rootelement = doc.getroot()
  24. # Only interpret those 'tool' XML files that have
  25. # the 'section' element.
  26. if rootelement.tag == 'tool':
  27. if rootelement.findall('toolboxposition'):
  28. filenamelist.append(fullfn)
  29. else:
  30. print "DBG> tool config does not have a <section>:", fullfn
  31. return filenamelist
  32. class ToolBox(object):
  33. def __init__(self):
  34. from collections import defaultdict
  35. self.tools = defaultdict(list)
  36. self.sectionorders = {}
  37. def add(self, toolelement, toolboxpositionelement):
  38. section = toolboxpositionelement.attrib.get('section','')
  39. label = toolboxpositionelement.attrib.get('label','')
  40. order = int(toolboxpositionelement.attrib.get('order', '0'))
  41. sectionorder = int(toolboxpositionelement.attrib.get('sectionorder', '0'))
  42. # If this is the first time we encounter the section, store its order
  43. # number. If we have seen it before, ignore the given order and use
  44. # the stored one instead
  45. if not self.sectionorders.has_key(section):
  46. self.sectionorders[section] = sectionorder
  47. else:
  48. sectionorder = self.sectionorders[section]
  49. # Sortorder: add intelligent mix to the front
  50. self.tools[("%05d-%s"%(sectionorder,section), label, order, section)].append(toolelement)
  51. def addElementsTo(self, rootelement):
  52. toolkeys = self.tools.keys()
  53. toolkeys.sort()
  54. # Initialize the loop: IDs to zero, current section and label to ''
  55. currentsection = ''
  56. sectionnumber = 0
  57. currentlabel = ''
  58. labelnumber = 0
  59. for toolkey in toolkeys:
  60. section = toolkey[3]
  61. # If we change sections, add the new section to the XML tree,
  62. # and start adding stuff to the new section. If the new section
  63. # is '', start adding stuff to the root again.
  64. if currentsection != section:
  65. currentsection = section
  66. # Start the section with empty label
  67. currentlabel = ''
  68. if section:
  69. sectionnumber += 1
  70. attrib = {'name': section,
  71. 'id': "section%d"% sectionnumber}
  72. sectionelement = ET.Element('section', attrib)
  73. rootelement.append(sectionelement)
  74. currentelement = sectionelement
  75. else:
  76. currentelement = rootelement
  77. label = toolkey[1]
  78. # If we change labels, add the new label to the XML tree
  79. if currentlabel != label:
  80. currentlabel = label
  81. if label:
  82. labelnumber += 1
  83. attrib = {'text': label,
  84. 'id': "label%d"% labelnumber}
  85. labelelement = ET.Element('label', attrib)
  86. currentelement.append(labelelement)
  87. # Add the tools that are in this place
  88. for toolelement in self.tools[toolkey]:
  89. currentelement.append(toolelement)
  90. # Analyze all the toolconf xml files given in the filenamelist
  91. # Build a list of all sections
  92. def scanfiles(filenamelist):
  93. # Build an empty tool box
  94. toolbox = ToolBox()
  95. # Read each of the files in the list
  96. for fn in filenamelist:
  97. doc = ET.parse(fn)
  98. root = doc.getroot()
  99. if root.tag == 'tool':
  100. toolelements = [root]
  101. else:
  102. toolelements = doc.findall('tool')
  103. for toolelement in toolelements:
  104. # Figure out where the tool XML file is, absolute path.
  105. if toolelement.attrib.has_key('file'):
  106. # It is mentioned, we need to make it absolute
  107. fileattrib = os.path.join(os.getcwd(),
  108. os.path.dirname(fn),
  109. toolelement.attrib['file'])
  110. else:
  111. # It is the current file
  112. fileattrib = os.path.join(os.getcwd(), fn)
  113. # Store the file in the attibutes of the new tool element
  114. attrib = {'file': fileattrib}
  115. # Add the tags into the attributes
  116. tags = toolelement.find('tags')
  117. if tags:
  118. tagarray = []
  119. for tag in tags.findall('tag'):
  120. tagarray.append(tag.text)
  121. attrib['tags'] = ",".join(tagarray)
  122. else:
  123. print "DBG> No tags in",fn
  124. # Build the tool element
  125. newtoolelement = ET.Element('tool', attrib)
  126. toolboxpositionelements = toolelement.findall('toolboxposition')
  127. if not toolboxpositionelements:
  128. print "DBG> %s has no toolboxposition" % fn
  129. else:
  130. for toolboxpositionelement in toolboxpositionelements:
  131. toolbox.add(newtoolelement, toolboxpositionelement)
  132. return toolbox
  133. def assemble():
  134. filenamelist = []
  135. for directorytree in ['tools']:
  136. filenamelist.extend(getfilenamelist('tools'))
  137. filenamelist.sort()
  138. toolbox = scanfiles(filenamelist)
  139. toolboxelement = ET.Element('toolbox')
  140. toolbox.addElementsTo(toolboxelement)
  141. print prettify(toolboxelement)
  142. if __name__ == "__main__":
  143. assemble()