PageRenderTime 3178ms CodeModel.GetById 47ms RepoModel.GetById 3ms app.codeStats 1ms

/config/BuildSystem/sourceDatabase.py

https://bitbucket.org/memmett/petsc-memmett-old
Python | 374 lines | 326 code | 11 blank | 37 comment | 40 complexity | 7e6808bf1349c7bbf25b43d8fe235cbc MD5 | raw file
  1. #!/usr/bin/env python
  2. '''A source code database
  3. SourceDB is a database of file information used to determine whether files
  4. should be rebuilt by the build system. All files names are stored relative
  5. to a given root, which is intended as the root of a Project.
  6. Relative or absolute pathnames may be used as keys, but absolute pathnames
  7. must fall under the database root. The value format is a tuple of the following:
  8. Checksum: The md5 checksum of the file
  9. Mod Time: The time the file was last modified
  10. Timestamp: The time theentry was last modified
  11. Dependencies: A tuple of files upon which this entry depends
  12. This script also provides some default actions:
  13. - insert <database file> <filename>
  14. Inserts this file from the database, or updates its entry if it
  15. already exists.
  16. - remove <database file> <filename>
  17. Removes this file from the database. The filename may also be a
  18. regular expression.
  19. '''
  20. import logger
  21. import errno
  22. import os
  23. import re
  24. import time
  25. import cPickle
  26. try:
  27. from hashlib import md5 as new_md5
  28. except ImportError:
  29. from md5 import new as new_md5
  30. class SourceDB (dict, logger.Logger):
  31. '''A SourceDB is a dictionary of file data used during the build process.'''
  32. includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
  33. isLoading = 0
  34. def __init__(self, root, filename = None):
  35. dict.__init__(self)
  36. logger.Logger.__init__(self)
  37. self.root = root
  38. self.filename = filename
  39. if self.filename is None:
  40. self.filename = os.path.join(str(root), 'bsSource.db')
  41. self.isDirty = 0
  42. return
  43. def __str__(self):
  44. output = ''
  45. for source in self:
  46. (checksum, mtime, timestamp, dependencies) = self[source]
  47. output += source+'\n'
  48. output += ' Checksum: '+str(checksum)+'\n'
  49. output += ' Mod Time: '+str(mtime)+'\n'
  50. output += ' Timestamp: '+str(timestamp)+'\n'
  51. output += ' Deps: '+str(dependencies)+'\n'
  52. return output
  53. def __setstate__(self, d):
  54. logger.Logger.__setstate__(self, d)
  55. # We have to prevent recursive calls to this when the pickled database is loaded in load()
  56. # This is to ensure that fresh copies of the database are obtained after unpickling
  57. if not SourceDB.isLoading:
  58. SourceDB.isLoading = 1
  59. self.load()
  60. SourceDB.isLoading = 0
  61. return
  62. def getRelativePath(self, path):
  63. '''Returns a relative source file path using the root'''
  64. if os.path.isabs(path):
  65. root = str(self.root)
  66. if not path.startswith(root+os.sep):
  67. raise ValueError('Absolute path '+path+' conflicts with root '+root)
  68. else:
  69. path = path[len(root)+1:]
  70. return path
  71. def checkValue(self, value):
  72. '''Validate the value, raising ValueError for problems'''
  73. if not isinstance(value, tuple):
  74. raise ValueError('Source database values must be tuples, '+str(type(value))+' given')
  75. if not len(value) == 4:
  76. raise ValueError('Source database values must have 4 items, '+str(len(value))+' given')
  77. (checksum, mtime, timestamp, dependencies) = value
  78. if not isinstance(checksum, str):
  79. raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given')
  80. if not isinstance(mtime, int):
  81. raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given')
  82. elif mtime < 0:
  83. raise ValueError('Negative modification time for source database, '+str(mtime))
  84. if not isinstance(timestamp, float):
  85. raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given')
  86. elif timestamp < 0:
  87. raise ValueError('Negative timestamp for source database, '+str(timestamp))
  88. if not isinstance(dependencies, tuple):
  89. raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given')
  90. return value
  91. def __getitem__(self, key):
  92. '''Converts the key to a relative source file path using the root'''
  93. return dict.__getitem__(self, self.getRelativePath(key))
  94. def __setitem__(self, key, value):
  95. '''Converts the key to a relative source file path using the root, and checks the validity of the value'''
  96. self.isDirty = 1
  97. return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value))
  98. def __delitem__(self, key):
  99. '''Converts the key to a relative source file path using the root'''
  100. self.isDirty = 1
  101. return dict.__delitem__(self, self.getRelativePath(key))
  102. def __contains__(self, key):
  103. '''Converts the key to a relative source file path using the root'''
  104. return dict.__contains__(self, self.getRelativePath(key))
  105. def has_key(self, key):
  106. '''This method just calls self.__contains__(key)'''
  107. return self.__contains__(key)
  108. def items(self):
  109. '''Converts each key to a relative source file path using the root'''
  110. return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)]
  111. def keys(self):
  112. '''Converts each key to a relative source file path using the root'''
  113. return map(self.getRelativePath, dict.keys(self))
  114. def update(self, d):
  115. '''Update the dictionary with the contents of d'''
  116. self.isDirty = 1
  117. for k in d:
  118. self[k] = d[k]
  119. return
  120. def getChecksum(source, chunkSize = 1024*1024):
  121. '''Return the md5 checksum for a given file, which may also be specified by its filename
  122. - The chunkSize argument specifies the size of blocks read from the file'''
  123. if isinstance(source, file):
  124. f = source
  125. else:
  126. f = file(source)
  127. m = new_md5()
  128. size = chunkSize
  129. buf = f.read(size)
  130. while buf:
  131. m.update(buf)
  132. buf = f.read(size)
  133. f.close()
  134. return m.hexdigest()
  135. getChecksum = staticmethod(getChecksum)
  136. def getModificationTime(source):
  137. t = os.path.getmtime(source)
  138. if isinstance(t, float):
  139. t = int(t)
  140. return t
  141. getModificationTime = staticmethod(getModificationTime)
  142. def updateSource(self, source, noChecksum = 0):
  143. self.isDirty = 1
  144. dependencies = ()
  145. try:
  146. (checksum, mtime, timestamp, dependencies) = self[source]
  147. except KeyError:
  148. pass
  149. self.logPrint('Updating '+source+' in source database', 3, 'sourceDB')
  150. if noChecksum:
  151. checksum = ''
  152. else:
  153. checksum = SourceDB.getChecksum(source)
  154. self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies)
  155. return
  156. def clearSource(self, source):
  157. '''This removes source information, but preserved dependencies'''
  158. if source in self:
  159. self.isDirty = 1
  160. self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB')
  161. (checksum, mtime, timestamp, dependencies) = self[source]
  162. self[source] = ('', 0, time.time(), dependencies)
  163. return
  164. def getDependencies(self, source):
  165. try:
  166. (checksum, mtime, timestamp, dependencies) = self[source]
  167. except KeyError:
  168. dependencies = ()
  169. return dependencies
  170. def addDependency(self, source, dependency):
  171. self.isDirty = 1
  172. dependencies = ()
  173. try:
  174. (checksum, mtime, timestamp, dependencies) = self[source]
  175. except KeyError:
  176. checksum = ''
  177. mtime = 0
  178. if not dependency in dependencies:
  179. self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB')
  180. dependencies = dependencies+(dependency,)
  181. self[source] = (checksum, mtime, time.time(), dependencies)
  182. return
  183. def calculateDependencies(self):
  184. self.logPrint('Recalculating dependencies', 1, 'sourceDB')
  185. for source in self:
  186. self.logPrint('Calculating '+source, 3, 'sourceDB')
  187. (checksum, mtime, timestamp, dependencies) = self[source]
  188. newDep = []
  189. try:
  190. file = file(source)
  191. except IOError, e:
  192. if e.errno == errno.ENOENT:
  193. del self[source]
  194. else:
  195. raise e
  196. comps = source.split('/')
  197. for line in file.xreadlines():
  198. m = self.includeRE.match(line)
  199. if m:
  200. filename = m.group('includeFile')
  201. matchNum = 0
  202. matchName = filename
  203. self.logPrint(' Includes '+filename, 3, 'sourceDB')
  204. for s in self:
  205. if s.find(filename) >= 0:
  206. self.logPrint(' Checking '+s, 3, 'sourceDB')
  207. c = s.split('/')
  208. for i in range(len(c)):
  209. if not comps[i] == c[i]: break
  210. if i > matchNum:
  211. self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
  212. matchName = s
  213. matchNum = i
  214. newDep.append(matchName)
  215. # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm
  216. self[source] = (checksum, mtime, timestamp, tuple(newDep))
  217. file.close()
  218. def load(self):
  219. '''Load the source database from the saved filename'''
  220. filename = str(self.filename)
  221. if os.path.exists(filename):
  222. self.clear()
  223. self.logPrint('Loading source database from '+filename, 2, 'sourceDB')
  224. dbFile = file(filename)
  225. newDB = cPickle.load(dbFile)
  226. dbFile.close()
  227. self.update(newDB)
  228. else:
  229. self.logPrint('Could not load source database from '+filename, 1, 'sourceDB')
  230. return
  231. def save(self, force = 0):
  232. '''Save the source database to a file. The saved database with have path names relative to the root.'''
  233. if not self.isDirty and not force:
  234. self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB')
  235. return
  236. filename = str(self.filename)
  237. if os.path.exists(os.path.dirname(filename)):
  238. self.logPrint('Saving source database in '+filename, 2, 'sourceDB')
  239. dbFile = file(filename, 'w')
  240. cPickle.dump(self, dbFile)
  241. dbFile.close()
  242. self.isDirty = 0
  243. else:
  244. self.logPrint('Could not save source database in '+filename, 1, 'sourceDB')
  245. return
  246. class DependencyAnalyzer (logger.Logger):
  247. def __init__(self, sourceDB):
  248. logger.Logger.__init__(self)
  249. self.sourceDB = sourceDB
  250. self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
  251. return
  252. def resolveDependency(self, source, dep):
  253. if dep in self.sourceDB: return dep
  254. # Choose the entry in sourceDB whose base matches dep,
  255. # and who has the most path components in common with source
  256. # This should be replaced by an appeal to cpp
  257. matchNum = 0
  258. matchName = dep
  259. components = source.split(os.sep)
  260. self.logPrint(' Includes '+filename, 3, 'sourceDB')
  261. for s in self.sourceDB:
  262. if s.find(dep) >= 0:
  263. self.logPrint(' Checking '+s, 3, 'sourceDB')
  264. comp = s.split(os.sep)
  265. for i in range(len(comp)):
  266. if not components[i] == comp[i]: break
  267. if i > matchNum:
  268. self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
  269. matchName = s
  270. matchNum = i
  271. if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source)
  272. return matchName
  273. def getNeighbors(self, source):
  274. file = file(source)
  275. adj = []
  276. for line in file.xreadlines():
  277. match = self.includeRE.match(line)
  278. if match:
  279. adj.append(self.resolveDependency(source, m.group('includeFile')))
  280. file.close()
  281. return adj
  282. def calculateDependencies(self):
  283. '''Should this be a generator?
  284. First assemble the DAG using #include relations
  285. Then calculate the depdencies with all pairs shortest-path
  286. - I think Floyd-Warshell and N-source Dijkstra are just as good
  287. '''
  288. # Assembling DAG
  289. dag = {}
  290. for source in self.sourceDB:
  291. try:
  292. dag[source] = self.getNeighbors(self, source)
  293. except IOError, e:
  294. if e.errno == errno.ENOENT:
  295. del self[source]
  296. else:
  297. raise e
  298. # Finding all-pairs shortest path
  299. if __name__ == '__main__':
  300. import sys
  301. try:
  302. if len(sys.argv) < 3:
  303. print 'sourceDatabase.py <database filename> [insert | remove] <filename>'
  304. else:
  305. if os.path.exists(sys.argv[1]):
  306. dbFile = file(sys.argv[1])
  307. sourceDB = cPickle.load(dbFile)
  308. dbFile.close()
  309. else:
  310. sys.exit('Could not load source database from '+sys.argv[1])
  311. if sys.argv[2] == 'insert':
  312. if sys.argv[3] in sourceDB:
  313. self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB')
  314. else:
  315. self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB')
  316. self.sourceDB.updateSource(sys.argv[3])
  317. elif sys.argv[2] == 'remove':
  318. if sys.argv[3] in sourceDB:
  319. sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB')
  320. del self.sourceDB[sys.argv[3]]
  321. else:
  322. sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB')
  323. removeRE = re.compile(sys.argv[3])
  324. removes = filter(removeRE.match, sourceDB.keys())
  325. for source in removes:
  326. self.logPrint('Removing '+source, 3, 'sourceDB')
  327. del self.sourceDB[source]
  328. else:
  329. sys.exit('Unknown source database action: '+sys.argv[2])
  330. sourceDB.save()
  331. except Exception, e:
  332. import traceback
  333. print traceback.print_tb(sys.exc_info()[2])
  334. sys.exit(str(e))
  335. sys.exit(0)