PageRenderTime 74ms CodeModel.GetById 38ms RepoModel.GetById 5ms app.codeStats 0ms

/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs

https://github.com/dorchard/ghc
Haskell | 272 lines | 107 code | 29 blank | 136 comment | 0 complexity | c2fccdfdcb29c29f207a0bd83fe177f8 MD5 | raw file
  1. {-# LANGUAGE BangPatterns #-}
  2. module RegAlloc.Graph.TrivColorable (
  3. trivColorable,
  4. )
  5. where
  6. #include "HsVersions.h"
  7. import RegClass
  8. import Reg
  9. import GraphBase
  10. import UniqFM
  11. import FastTypes
  12. import Platform
  13. import Panic
  14. -- trivColorable ---------------------------------------------------------------
  15. -- trivColorable function for the graph coloring allocator
  16. --
  17. -- This gets hammered by scanGraph during register allocation,
  18. -- so needs to be fairly efficient.
  19. --
  20. -- NOTE: This only works for arcitectures with just RcInteger and RcDouble
  21. -- (which are disjoint) ie. x86, x86_64 and ppc
  22. --
  23. -- The number of allocatable regs is hard coded in here so we can do
  24. -- a fast comparision in trivColorable.
  25. --
  26. -- It's ok if these numbers are _less_ than the actual number of free
  27. -- regs, but they can't be more or the register conflict
  28. -- graph won't color.
  29. --
  30. -- If the graph doesn't color then the allocator will panic, but it won't
  31. -- generate bad object code or anything nasty like that.
  32. --
  33. -- There is an allocatableRegsInClass :: RegClass -> Int, but doing
  34. -- the unboxing is too slow for us here.
  35. -- TODO: Is that still true? Could we use allocatableRegsInClass
  36. -- without losing performance now?
  37. --
  38. -- Look at includes/stg/MachRegs.h to get the numbers.
  39. --
  40. -- Disjoint registers ----------------------------------------------------------
  41. --
  42. -- The definition has been unfolded into individual cases for speed.
  43. -- Each architecture has a different register setup, so we use a
  44. -- different regSqueeze function for each.
  45. --
  46. accSqueeze
  47. :: FastInt
  48. -> FastInt
  49. -> (reg -> FastInt)
  50. -> UniqFM reg
  51. -> FastInt
  52. accSqueeze count maxCount squeeze ufm = acc count (eltsUFM ufm)
  53. where acc count [] = count
  54. acc count _ | count >=# maxCount = count
  55. acc count (r:rs) = acc (count +# squeeze r) rs
  56. {- Note [accSqueeze]
  57. ~~~~~~~~~~~~~~~~~~~~
  58. BL 2007/09
  59. Doing a nice fold over the UniqSet makes trivColorable use
  60. 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
  61. Therefore the UniqFM is made non-abstract and we use custom fold.
  62. MS 2010/04
  63. When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
  64. representation any more. But it is imperative that the assSqueeze stops
  65. the folding if the count gets greater or equal to maxCount. We thus convert
  66. UniqFM to a (lazy) list, do the fold and stops if necessary, which was
  67. the most efficient variant tried. Benchmark compiling 10-times SHA1.lhs follows.
  68. (original = previous implementation, folding = fold of the whole UFM,
  69. lazyFold = the current implementation,
  70. hackFold = using internal representation of Data.IntMap)
  71. original folding hackFold lazyFold
  72. -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s
  73. 100.00% 96.44% 97.72% 97.12%
  74. -fregs-graph 67.938s 74.875s 62.673s 64.679s
  75. 100.00% 110.21% 92.25% 95.20%
  76. -fregs-iterative 89.761s 143.913s 81.075s 86.912s
  77. 100.00% 160.33% 90.32% 96.83%
  78. -fnew-codegen 38.225s 37.142s 37.551s 37.119s
  79. 100.00% 97.17% 98.24% 97.11%
  80. -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s
  81. 100.00% 99.70% 95.19% 94.65%
  82. -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s
  83. 100.00% 166.23% 94.18% 100.95%
  84. -}
  85. trivColorable
  86. :: Platform
  87. -> (RegClass -> VirtualReg -> FastInt)
  88. -> (RegClass -> RealReg -> FastInt)
  89. -> Triv VirtualReg RegClass RealReg
  90. trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
  91. | let !cALLOCATABLE_REGS_INTEGER
  92. = iUnbox (case platformArch platform of
  93. ArchX86 -> 3
  94. ArchX86_64 -> 5
  95. ArchPPC -> 16
  96. ArchSPARC -> 14
  97. ArchPPC_64 -> panic "trivColorable ArchPPC_64"
  98. ArchARM _ _ _ -> panic "trivColorable ArchARM"
  99. ArchAlpha -> panic "trivColorable ArchAlpha"
  100. ArchMipseb -> panic "trivColorable ArchMipseb"
  101. ArchMipsel -> panic "trivColorable ArchMipsel"
  102. ArchUnknown -> panic "trivColorable ArchUnknown")
  103. , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_INTEGER
  104. (virtualRegSqueeze RcInteger)
  105. conflicts
  106. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER
  107. (realRegSqueeze RcInteger)
  108. exclusions
  109. = count3 <# cALLOCATABLE_REGS_INTEGER
  110. trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
  111. | let !cALLOCATABLE_REGS_FLOAT
  112. = iUnbox (case platformArch platform of
  113. ArchX86 -> 0
  114. ArchX86_64 -> 0
  115. ArchPPC -> 0
  116. ArchSPARC -> 22
  117. ArchPPC_64 -> panic "trivColorable ArchPPC_64"
  118. ArchARM _ _ _ -> panic "trivColorable ArchARM"
  119. ArchAlpha -> panic "trivColorable ArchAlpha"
  120. ArchMipseb -> panic "trivColorable ArchMipseb"
  121. ArchMipsel -> panic "trivColorable ArchMipsel"
  122. ArchUnknown -> panic "trivColorable ArchUnknown")
  123. , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_FLOAT
  124. (virtualRegSqueeze RcFloat)
  125. conflicts
  126. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT
  127. (realRegSqueeze RcFloat)
  128. exclusions
  129. = count3 <# cALLOCATABLE_REGS_FLOAT
  130. trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
  131. | let !cALLOCATABLE_REGS_DOUBLE
  132. = iUnbox (case platformArch platform of
  133. ArchX86 -> 6
  134. ArchX86_64 -> 0
  135. ArchPPC -> 26
  136. ArchSPARC -> 11
  137. ArchPPC_64 -> panic "trivColorable ArchPPC_64"
  138. ArchARM _ _ _ -> panic "trivColorable ArchARM"
  139. ArchAlpha -> panic "trivColorable ArchAlpha"
  140. ArchMipseb -> panic "trivColorable ArchMipseb"
  141. ArchMipsel -> panic "trivColorable ArchMipsel"
  142. ArchUnknown -> panic "trivColorable ArchUnknown")
  143. , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_DOUBLE
  144. (virtualRegSqueeze RcDouble)
  145. conflicts
  146. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE
  147. (realRegSqueeze RcDouble)
  148. exclusions
  149. = count3 <# cALLOCATABLE_REGS_DOUBLE
  150. trivColorable platform virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
  151. | let !cALLOCATABLE_REGS_SSE
  152. = iUnbox (case platformArch platform of
  153. ArchX86 -> 8
  154. ArchX86_64 -> 10
  155. ArchPPC -> 0
  156. ArchSPARC -> 0
  157. ArchPPC_64 -> panic "trivColorable ArchPPC_64"
  158. ArchARM _ _ _ -> panic "trivColorable ArchARM"
  159. ArchAlpha -> panic "trivColorable ArchAlpha"
  160. ArchMipseb -> panic "trivColorable ArchMipseb"
  161. ArchMipsel -> panic "trivColorable ArchMipsel"
  162. ArchUnknown -> panic "trivColorable ArchUnknown")
  163. , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_SSE
  164. (virtualRegSqueeze RcDoubleSSE)
  165. conflicts
  166. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_SSE
  167. (realRegSqueeze RcDoubleSSE)
  168. exclusions
  169. = count3 <# cALLOCATABLE_REGS_SSE
  170. -- Specification Code ----------------------------------------------------------
  171. --
  172. -- The trivColorable function for each particular architecture should
  173. -- implement the following function, but faster.
  174. --
  175. {-
  176. trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
  177. trivColorable classN conflicts exclusions
  178. = let
  179. acc :: Reg -> (Int, Int) -> (Int, Int)
  180. acc r (cd, cf)
  181. = case regClass r of
  182. RcInteger -> (cd+1, cf)
  183. RcFloat -> (cd, cf+1)
  184. _ -> panic "Regs.trivColorable: reg class not handled"
  185. tmp = foldUniqSet acc (0, 0) conflicts
  186. (countInt, countFloat) = foldUniqSet acc tmp exclusions
  187. squeese = worst countInt classN RcInteger
  188. + worst countFloat classN RcFloat
  189. in squeese < allocatableRegsInClass classN
  190. -- | Worst case displacement
  191. -- node N of classN has n neighbors of class C.
  192. --
  193. -- We currently only have RcInteger and RcDouble, which don't conflict at all.
  194. -- This is a bit boring compared to what's in RegArchX86.
  195. --
  196. worst :: Int -> RegClass -> RegClass -> Int
  197. worst n classN classC
  198. = case classN of
  199. RcInteger
  200. -> case classC of
  201. RcInteger -> min n (allocatableRegsInClass RcInteger)
  202. RcFloat -> 0
  203. RcDouble
  204. -> case classC of
  205. RcFloat -> min n (allocatableRegsInClass RcFloat)
  206. RcInteger -> 0
  207. -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
  208. -- i.e., these are the regs for which we are prepared to allow the
  209. -- register allocator to attempt to map VRegs to.
  210. allocatableRegs :: [RegNo]
  211. allocatableRegs
  212. = let isFree i = isFastTrue (freeReg i)
  213. in filter isFree allMachRegNos
  214. -- | The number of regs in each class.
  215. -- We go via top level CAFs to ensure that we're not recomputing
  216. -- the length of these lists each time the fn is called.
  217. allocatableRegsInClass :: RegClass -> Int
  218. allocatableRegsInClass cls
  219. = case cls of
  220. RcInteger -> allocatableRegsInteger
  221. RcFloat -> allocatableRegsDouble
  222. allocatableRegsInteger :: Int
  223. allocatableRegsInteger
  224. = length $ filter (\r -> regClass r == RcInteger)
  225. $ map RealReg allocatableRegs
  226. allocatableRegsFloat :: Int
  227. allocatableRegsFloat
  228. = length $ filter (\r -> regClass r == RcFloat
  229. $ map RealReg allocatableRegs
  230. -}