/test/python/WMCore_t/JobSplitting_t/LumiBased_t.py

https://github.com/dmwm/WMCore · Python · 307 lines · 197 code · 52 blank · 58 comment · 17 complexity · 9d29b03a7ce0d455f15a438770fcc6f0 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. _LumiBased_t_
  4. Lumi based splitting tests, using the DataStructs classes.
  5. See WMCore/WMBS/JobSplitting/ for the WMBS (SQL database) version.
  6. """
  7. from builtins import next, range
  8. import unittest
  9. from WMCore.DataStructs.File import File
  10. from WMCore.DataStructs.Fileset import Fileset
  11. from WMCore.DataStructs.Job import Job
  12. from WMCore.DataStructs.Subscription import Subscription
  13. from WMCore.DataStructs.Workflow import Workflow
  14. from WMCore.DataStructs.Run import Run
  15. from WMCore.JobSplitting.SplitterFactory import SplitterFactory
  16. from WMCore.Services.UUIDLib import makeUUID
  17. class LumiBasedTest(unittest.TestCase):
  18. """
  19. _LumiBasedTest_
  20. Test event based job splitting.
  21. """
  22. def setUp(self):
  23. """
  24. _setUp_
  25. Create two subscriptions: One that contains a single file and one that
  26. contains multiple files.
  27. """
  28. self.testWorkflow = Workflow()
  29. self.performanceParams = {'timePerEvent' : 12,
  30. 'memoryRequirement' : 2300,
  31. 'sizePerEvent' : 400}
  32. return
  33. def tearDown(self):
  34. """
  35. _tearDown_
  36. Nothing to do...
  37. """
  38. pass
  39. def createSubscription(self, nFiles, lumisPerFile, twoSites = False):
  40. """
  41. _createSubscription_
  42. Create a subscription for testing
  43. """
  44. baseName = makeUUID()
  45. testFileset = Fileset(name = baseName)
  46. for i in range(nFiles):
  47. newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
  48. events = 100)
  49. lumis = []
  50. for lumi in range(lumisPerFile):
  51. lumis.append((i * 100) + lumi)
  52. newFile.addRun(Run(i, *lumis))
  53. newFile.setLocation('blenheim')
  54. testFileset.addFile(newFile)
  55. if twoSites:
  56. for i in range(nFiles):
  57. newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000,
  58. events = 100)
  59. lumis = []
  60. for lumi in range(lumisPerFile):
  61. lumis.append(5 + 10 * (i * 100) + lumi) #lumis should be different
  62. newFile.addRun(Run(i, *lumis))
  63. newFile.setLocation('malpaquet')
  64. testFileset.addFile(newFile)
  65. testSubscription = Subscription(fileset = testFileset,
  66. workflow = self.testWorkflow,
  67. split_algo = "LumiBased",
  68. type = "Processing")
  69. return testSubscription
  70. def testA_FileSplitting(self):
  71. """
  72. _FileSplitting_
  73. Test that things work if we split files between jobs
  74. """
  75. splitter = SplitterFactory()
  76. oneSetSubscription = self.createSubscription(nFiles = 10, lumisPerFile = 1)
  77. jobFactory = splitter(package = "WMCore.DataStructs",
  78. subscription = oneSetSubscription)
  79. jobGroups = jobFactory(lumis_per_job = 3,
  80. halt_job_on_file_boundaries = True,
  81. performance = self.performanceParams)
  82. self.assertEqual(len(jobGroups), 1)
  83. self.assertEqual(len(jobGroups[0].jobs), 10)
  84. for job in jobGroups[0].jobs:
  85. self.assertTrue(len(job['input_files']), 1)
  86. twoLumiFiles = self.createSubscription(nFiles = 5, lumisPerFile = 2)
  87. jobFactory = splitter(package = "WMCore.DataStructs",
  88. subscription = twoLumiFiles)
  89. jobGroups = jobFactory(lumis_per_job = 1,
  90. halt_job_on_file_boundaries = True,
  91. performance = self.performanceParams)
  92. self.assertEqual(len(jobGroups), 1)
  93. self.assertEqual(len(jobGroups[0].jobs), 10)
  94. for job in jobGroups[0].jobs:
  95. self.assertEqual(len(job['input_files']), 1)
  96. wholeLumiFiles = self.createSubscription(nFiles = 5, lumisPerFile = 3)
  97. jobFactory = splitter(package = "WMCore.DataStructs",
  98. subscription = wholeLumiFiles)
  99. jobGroups = jobFactory(lumis_per_job = 2,
  100. halt_job_on_file_boundaries = True,
  101. performance = self.performanceParams)
  102. self.assertEqual(len(jobGroups), 1)
  103. # 10 because we split on run boundaries
  104. self.assertEqual(len(jobGroups[0].jobs), 10)
  105. jobList = jobGroups[0].jobs
  106. for job in jobList:
  107. # Have should have one file, half two
  108. self.assertTrue(len(job['input_files']) in [1,2])
  109. mask0 = jobList[0]['mask'].getRunAndLumis()
  110. self.assertEqual(mask0, {0: [[0, 1]]})
  111. mask1 = jobList[1]['mask'].getRunAndLumis()
  112. self.assertEqual(mask1, {0: [[2, 2]]})
  113. mask2 = jobList[2]['mask'].getRunAndLumis()
  114. self.assertEqual(mask2, {1: [[100, 101]]})
  115. mask3 = jobList[3]['mask'].getRunAndLumis()
  116. self.assertEqual(mask3, {1: [[102, 102]]})
  117. self.assertEqual(jobList[0]['mask'].getRunAndLumis(), {0: [[0, 1]]})
  118. # Do it with multiple sites
  119. twoSiteSubscription = self.createSubscription(nFiles = 5, lumisPerFile = 2, twoSites = True)
  120. jobFactory = splitter(package = "WMCore.DataStructs",
  121. subscription = twoSiteSubscription)
  122. jobGroups = jobFactory(lumis_per_job = 1,
  123. halt_job_on_file_boundaries = True,
  124. performance = self.performanceParams)
  125. self.assertEqual(len(jobGroups), 2)
  126. self.assertEqual(len(jobGroups[0].jobs), 10)
  127. for job in jobGroups[0].jobs:
  128. self.assertEqual(len(job['input_files']), 1)
  129. def testB_NoRunNoFileSplitting(self):
  130. """
  131. _NoRunNoFileSplitting_
  132. Test the splitting algorithm in the odder fringe
  133. cases that might be required.
  134. """
  135. splitter = SplitterFactory()
  136. testSubscription = self.createSubscription(nFiles = 5, lumisPerFile = 5, twoSites = False)
  137. jobFactory = splitter(package = "WMCore.DataStructs",
  138. subscription = testSubscription)
  139. jobGroups = jobFactory(lumis_per_job = 3,
  140. halt_job_on_file_boundaries = False,
  141. splitOnRun = False,
  142. performance = self.performanceParams)
  143. self.assertEqual(len(jobGroups), 1)
  144. jobs = jobGroups[0].jobs
  145. self.assertEqual(len(jobs), 9)
  146. # The first job should have three lumis from one run
  147. # The second three lumis from two different runs
  148. self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0: [[0, 2]]})
  149. job1runLumi = jobs[1]['mask'].getRunAndLumis()
  150. self.assertEqual(job1runLumi[0][0][0] + 1, job1runLumi[0][0][1]) # Run 0, startLumi+1 == endLumi
  151. self.assertEqual(job1runLumi[1][0][0], job1runLumi[1][0][1]) # Run 1, startLumi == endLumi
  152. # Assert that this works differently with file splitting on and run splitting on
  153. testSubscription = self.createSubscription(nFiles = 5, lumisPerFile = 5, twoSites = False)
  154. jobFactory = splitter(package = "WMCore.DataStructs",
  155. subscription = testSubscription)
  156. jobGroups = jobFactory(lumis_per_job = 3,
  157. halt_job_on_file_boundaries = True,
  158. splitOnRun = True,
  159. performance = self.performanceParams)
  160. self.assertEqual(len(jobGroups), 1)
  161. jobs = jobGroups[0].jobs
  162. self.assertEqual(len(jobs), 10)
  163. # In this case it should slice things up so that each job only has one run
  164. # in it.
  165. self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0: [[0, 2]]})
  166. self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {0: [[3, 4]]})
  167. return
  168. def testC_LumiCorrections(self):
  169. """
  170. _LumiCorrections_
  171. Test the splitting algorithm can handle lumis which
  172. cross multiple files.
  173. """
  174. splitter = SplitterFactory()
  175. testSubscription = self.createSubscription(nFiles = 2, lumisPerFile = 2, twoSites = False)
  176. files = testSubscription.getFileset().getFiles()
  177. self.assertEqual(len(files), 2)
  178. for runObj in files[0]['runs']:
  179. if runObj.run != 0:
  180. continue
  181. runObj.appendLumi(42)
  182. for runObj in files[1]['runs']:
  183. if runObj.run != 1:
  184. continue
  185. runObj.run = 0
  186. runObj.appendLumi(42)
  187. files[1]['locations'] = set(['blenheim'])
  188. jobFactory = splitter(package = "WMCore.DataStructs",
  189. subscription = testSubscription)
  190. jobGroups = jobFactory(lumis_per_job = 3,
  191. halt_job_on_file_boundaries = False,
  192. splitOnRun = False,
  193. performance = self.performanceParams,
  194. applyLumiCorrection = True
  195. )
  196. self.assertEqual(len(jobGroups), 1)
  197. jobs = jobGroups[0].jobs
  198. self.assertEqual(len(jobs), 2)
  199. self.assertEqual(len(jobs[0]['input_files']), 2)
  200. self.assertEqual(len(jobs[1]['input_files']), 1)
  201. self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0: [[0, 1], [42, 42]]})
  202. self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {0: [[100, 101]]})
  203. #Test that we are not removing all the lumis from the jobs anymore
  204. removedLumi = self.createSubscription(nFiles = 4, lumisPerFile = 1)
  205. #Setting the lumi of job 0 to value 100, as the one of job one
  206. runObj = next(iter(removedLumi.getFileset().getFiles()[0]['runs']))
  207. runObj.run = 1
  208. runObj[0] = 100
  209. jobFactory = splitter(package = "WMCore.DataStructs",
  210. subscription = removedLumi)
  211. jobGroups = jobFactory(lumis_per_job = 1,
  212. halt_job_on_file_boundaries = True,
  213. performance = self.performanceParams,
  214. applyLumiCorrection = True)
  215. # we need to end up with 3 jobs and one job with two input files
  216. jobs = jobGroups[0].jobs
  217. self.assertEqual(len(jobs), 3)
  218. self.assertEqual(len(jobs[0]['input_files']), 2)
  219. self.assertEqual(len(jobs[1]['input_files']), 1)
  220. self.assertEqual(len(jobs[2]['input_files']), 1)
  221. self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[100, 100]]})
  222. self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {2: [[200, 200]]})
  223. self.assertEqual(jobs[2]['mask'].getRunAndLumis(), {3: [[300, 300]]})
  224. #Check that if the last two jobs have the same duplicated lumi you do not get an error
  225. testSubscription = self.createSubscription(nFiles = 2, lumisPerFile = 2,
  226. twoSites = False)
  227. files = testSubscription.getFileset().getFiles()
  228. # Now modifying and adding the same duplicated lumis in the Nth and Nth-1 jobs
  229. for runObj in files[0]['runs']:
  230. if runObj.run != 0:
  231. continue
  232. runObj.appendLumi(42)
  233. for runObj in files[1]['runs']:
  234. runObj.run = 0
  235. runObj.lumis = [42]
  236. files[1]['locations'] = set(['blenheim'])
  237. jobFactory = splitter(package = "WMCore.DataStructs",
  238. subscription = testSubscription)
  239. jobGroups = jobFactory(events_per_job = 50,
  240. halt_job_on_file_boundaries = True,
  241. performance = self.performanceParams,
  242. applyLumiCorrection = True)
  243. self.assertEqual(len(jobGroups), 1)
  244. jobs = jobGroups[0].jobs
  245. self.assertEqual(len(jobs), 3)
  246. if __name__ == '__main__':
  247. unittest.main()