/Documentation/ReferenceManualScripts/doxygen/utils/linker/linker.py

https://github.com/aivanov-cern/cmssw · Python · 154 lines · 111 code · 36 blank · 7 comment · 35 complexity · e42f7f50979b5ce8c4b878ed19128302 MD5 · raw file

  1. import sys
  2. import os
  3. import re
  4. from BeautifulSoup import BeautifulSoup
  5. BASE = "http://cmssdt.cern.ch/SDT/doxygen/"
  6. INDEX = {}
  7. printOutput = False;
  8. def replace(regex,replacement,content):
  9. p = re.compile(regex,re.IGNORECASE);
  10. c = p.sub(replacement,content)
  11. return c
  12. def findMatchingFiles(w, source_htmls):
  13. ret = ""
  14. for srcFile in source_htmls:
  15. if srcFile.split("/")[-1].__str__().find(w) != -1:
  16. ret += " " + srcFile
  17. return ret
  18. def filter(s,w,k):
  19. o = s.split()
  20. if len(o) > 1:
  21. betterChoice = ""
  22. for i in range(len(o)):
  23. if re.search("[^a-zA-Z]"+w+"[^a-zA-Z]", o[i]):
  24. if re.search(".*"+k+".*",o[i]):
  25. return o[i]
  26. else:
  27. if betterChoice == "" or len(betterChoice) > o[i]:
  28. betterChoice = o[i]
  29. return betterChoice
  30. else:
  31. if re.search("[^a-zA-Z]"+w+"[^a-zA-Z]", s):
  32. return s
  33. else:
  34. return ""
  35. def getLink(word):
  36. if word.isdigit() or (len(word) < 5):
  37. return ""
  38. out = filter(findMatchingFiles(word, py_source_htmls),word,"")
  39. if not out or out == "":
  40. out = filter(findMatchingFiles(word, h_source_htmls),word,"")
  41. if not out or out == "":
  42. return ""
  43. return BASE+out.lstrip()
  44. def process(filename):
  45. if (filename != None) and (len(filename) < 5):
  46. return
  47. fh = open(filename,'r')
  48. html = fh.read()
  49. fh.close()
  50. content = ""
  51. # find only code block
  52. soup = BeautifulSoup(html)
  53. pres = soup.findAll("pre", {"class":"fragment"})
  54. for pre in pres:
  55. if pre.contents != None:
  56. content += pre.renderContents()
  57. # END OF find only code block
  58. # remove links
  59. content = replace(r'<a\b[^>]*>(.*?)</a>','',content)
  60. content = content.replace("&#39;", "'")
  61. content = content.replace("&quot;", '"')
  62. matches = []
  63. tmp = re.findall('[\w,\.]+_cf[i,g,f]',content)
  64. for t in tmp:
  65. matches.extend(t.split("."))
  66. matches.extend(re.findall('"\w+"',content))
  67. matches.extend(re.findall("'\w+'",content))
  68. set = {} #
  69. map(set.__setitem__, matches, []) # removing duplicate keywords
  70. matches = set.keys() #
  71. for match in matches:
  72. match = match.replace("'", "")
  73. match = match.replace('"', "")
  74. if (INDEX.has_key(match)):
  75. href = INDEX[match]
  76. else:
  77. href = getLink(match)
  78. if (href != ""):
  79. INDEX[match] = BASE+href[href.find("CMSSW_"):]
  80. link = "<a class=\"configfileLink\" href=\""+href+"\">"+match+"</a>"
  81. regex = r"\b"+match+r"\b"
  82. html = replace(regex, link, html)
  83. ########################
  84. if printOutput:
  85. print ">>>>>["+match+"]",
  86. ########################
  87. ########################
  88. if printOutput:
  89. print href
  90. ########################
  91. fh = open(filename,'w')
  92. fh.write(html)
  93. fh.close()
  94. if len(sys.argv) > 1:
  95. DIR = sys.argv[1] +"/doc/html/"
  96. global py_source_htmls
  97. global h_source_htmls
  98. h_source_htmls = []
  99. py_source_htmls = []
  100. print "ieskau h_source"
  101. query = "find "+DIR+" -name '*8h_source.html' -print"
  102. output = os.popen(query)
  103. h_source_htmls = output.read().split("\n")
  104. print "ieskau py_source"
  105. query = "find "+DIR+" -name '*8py_source.html' -print"
  106. output = os.popen(query)
  107. py_source_htmls = output.read().split("\n")
  108. query = 'find '+DIR+' \( -name "*cf[i,g,f]*py*html" -or -name "namespace*cf[i,g,f].html" \) -print '
  109. output = os.popen(query)
  110. files = output.read().split("\n")
  111. i = 0
  112. for file in files:
  113. i = i + 1
  114. print i.__str__()+") "+file
  115. process(file)
  116. print "-----------------------------------------------------------"
  117. else:
  118. print "not enough parameters"