PageRenderTime 52ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs

http://github.com/ghc/ghc
Haskell | 283 lines | 118 code | 28 blank | 137 comment | 0 complexity | 002615396e360e3480bc7c56a3aebba0 MD5 | raw file
Possible License(s): MIT, BSD-3-Clause, GPL-3.0
  1. {-# LANGUAGE CPP #-}
  2. module RegAlloc.Graph.TrivColorable (
  3. trivColorable,
  4. )
  5. where
  6. #include "HsVersions.h"
  7. import RegClass
  8. import Reg
  9. import GraphBase
  10. import UniqFM
  11. import Platform
  12. import Panic
  13. -- trivColorable ---------------------------------------------------------------
  14. -- trivColorable function for the graph coloring allocator
  15. --
  16. -- This gets hammered by scanGraph during register allocation,
  17. -- so needs to be fairly efficient.
  18. --
  19. -- NOTE: This only works for arcitectures with just RcInteger and RcDouble
  20. -- (which are disjoint) ie. x86, x86_64 and ppc
  21. --
  22. -- The number of allocatable regs is hard coded in here so we can do
  23. -- a fast comparison in trivColorable.
  24. --
  25. -- It's ok if these numbers are _less_ than the actual number of free
  26. -- regs, but they can't be more or the register conflict
  27. -- graph won't color.
  28. --
  29. -- If the graph doesn't color then the allocator will panic, but it won't
  30. -- generate bad object code or anything nasty like that.
  31. --
  32. -- There is an allocatableRegsInClass :: RegClass -> Int, but doing
  33. -- the unboxing is too slow for us here.
  34. -- TODO: Is that still true? Could we use allocatableRegsInClass
  35. -- without losing performance now?
  36. --
  37. -- Look at includes/stg/MachRegs.h to get the numbers.
  38. --
  39. -- Disjoint registers ----------------------------------------------------------
  40. --
  41. -- The definition has been unfolded into individual cases for speed.
  42. -- Each architecture has a different register setup, so we use a
  43. -- different regSqueeze function for each.
  44. --
  45. accSqueeze
  46. :: Int
  47. -> Int
  48. -> (reg -> Int)
  49. -> UniqFM reg
  50. -> Int
  51. accSqueeze count maxCount squeeze ufm = acc count (nonDetEltsUFM ufm)
  52. -- See Note [Unique Determinism and code generation]
  53. where acc count [] = count
  54. acc count _ | count >= maxCount = count
  55. acc count (r:rs) = acc (count + squeeze r) rs
  56. {- Note [accSqueeze]
  57. ~~~~~~~~~~~~~~~~~~~~
  58. BL 2007/09
  59. Doing a nice fold over the UniqSet makes trivColorable use
  60. 32% of total compile time and 42% of total alloc when compiling SHA1.hs from darcs.
  61. Therefore the UniqFM is made non-abstract and we use custom fold.
  62. MS 2010/04
  63. When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
  64. representation any more. But it is imperative that the accSqueeze stops
  65. the folding if the count gets greater or equal to maxCount. We thus convert
  66. UniqFM to a (lazy) list, do the fold and stops if necessary, which was
  67. the most efficient variant tried. Benchmark compiling 10-times SHA1.hs follows.
  68. (original = previous implementation, folding = fold of the whole UFM,
  69. lazyFold = the current implementation,
  70. hackFold = using internal representation of Data.IntMap)
  71. original folding hackFold lazyFold
  72. -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s
  73. 100.00% 96.44% 97.72% 97.12%
  74. -fregs-graph 67.938s 74.875s 62.673s 64.679s
  75. 100.00% 110.21% 92.25% 95.20%
  76. -fregs-iterative 89.761s 143.913s 81.075s 86.912s
  77. 100.00% 160.33% 90.32% 96.83%
  78. -fnew-codegen 38.225s 37.142s 37.551s 37.119s
  79. 100.00% 97.17% 98.24% 97.11%
  80. -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s
  81. 100.00% 99.70% 95.19% 94.65%
  82. -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s
  83. 100.00% 166.23% 94.18% 100.95%
  84. -}
  85. trivColorable
  86. :: Platform
  87. -> (RegClass -> VirtualReg -> Int)
  88. -> (RegClass -> RealReg -> Int)
  89. -> Triv VirtualReg RegClass RealReg
  90. trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
  91. | let cALLOCATABLE_REGS_INTEGER
  92. = (case platformArch platform of
  93. ArchX86 -> 3
  94. ArchX86_64 -> 5
  95. ArchPPC -> 16
  96. ArchSPARC -> 14
  97. ArchSPARC64 -> panic "trivColorable ArchSPARC64"
  98. ArchPPC_64 _ -> panic "trivColorable ArchPPC_64"
  99. ArchARM _ _ _ -> panic "trivColorable ArchARM"
  100. ArchARM64 -> panic "trivColorable ArchARM64"
  101. ArchAlpha -> panic "trivColorable ArchAlpha"
  102. ArchMipseb -> panic "trivColorable ArchMipseb"
  103. ArchMipsel -> panic "trivColorable ArchMipsel"
  104. ArchJavaScript-> panic "trivColorable ArchJavaScript"
  105. ArchUnknown -> panic "trivColorable ArchUnknown")
  106. , count2 <- accSqueeze 0 cALLOCATABLE_REGS_INTEGER
  107. (virtualRegSqueeze RcInteger)
  108. conflicts
  109. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER
  110. (realRegSqueeze RcInteger)
  111. exclusions
  112. = count3 < cALLOCATABLE_REGS_INTEGER
  113. trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
  114. | let cALLOCATABLE_REGS_FLOAT
  115. = (case platformArch platform of
  116. ArchX86 -> 0
  117. ArchX86_64 -> 0
  118. ArchPPC -> 0
  119. ArchSPARC -> 22
  120. ArchSPARC64 -> panic "trivColorable ArchSPARC64"
  121. ArchPPC_64 _ -> panic "trivColorable ArchPPC_64"
  122. ArchARM _ _ _ -> panic "trivColorable ArchARM"
  123. ArchARM64 -> panic "trivColorable ArchARM64"
  124. ArchAlpha -> panic "trivColorable ArchAlpha"
  125. ArchMipseb -> panic "trivColorable ArchMipseb"
  126. ArchMipsel -> panic "trivColorable ArchMipsel"
  127. ArchJavaScript-> panic "trivColorable ArchJavaScript"
  128. ArchUnknown -> panic "trivColorable ArchUnknown")
  129. , count2 <- accSqueeze 0 cALLOCATABLE_REGS_FLOAT
  130. (virtualRegSqueeze RcFloat)
  131. conflicts
  132. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT
  133. (realRegSqueeze RcFloat)
  134. exclusions
  135. = count3 < cALLOCATABLE_REGS_FLOAT
  136. trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
  137. | let cALLOCATABLE_REGS_DOUBLE
  138. = (case platformArch platform of
  139. ArchX86 -> 6
  140. ArchX86_64 -> 0
  141. ArchPPC -> 26
  142. ArchSPARC -> 11
  143. ArchSPARC64 -> panic "trivColorable ArchSPARC64"
  144. ArchPPC_64 _ -> panic "trivColorable ArchPPC_64"
  145. ArchARM _ _ _ -> panic "trivColorable ArchARM"
  146. ArchARM64 -> panic "trivColorable ArchARM64"
  147. ArchAlpha -> panic "trivColorable ArchAlpha"
  148. ArchMipseb -> panic "trivColorable ArchMipseb"
  149. ArchMipsel -> panic "trivColorable ArchMipsel"
  150. ArchJavaScript-> panic "trivColorable ArchJavaScript"
  151. ArchUnknown -> panic "trivColorable ArchUnknown")
  152. , count2 <- accSqueeze 0 cALLOCATABLE_REGS_DOUBLE
  153. (virtualRegSqueeze RcDouble)
  154. conflicts
  155. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE
  156. (realRegSqueeze RcDouble)
  157. exclusions
  158. = count3 < cALLOCATABLE_REGS_DOUBLE
  159. trivColorable platform virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
  160. | let cALLOCATABLE_REGS_SSE
  161. = (case platformArch platform of
  162. ArchX86 -> 8
  163. ArchX86_64 -> 10
  164. ArchPPC -> 0
  165. ArchSPARC -> 0
  166. ArchSPARC64 -> panic "trivColorable ArchSPARC64"
  167. ArchPPC_64 _ -> panic "trivColorable ArchPPC_64"
  168. ArchARM _ _ _ -> panic "trivColorable ArchARM"
  169. ArchARM64 -> panic "trivColorable ArchARM64"
  170. ArchAlpha -> panic "trivColorable ArchAlpha"
  171. ArchMipseb -> panic "trivColorable ArchMipseb"
  172. ArchMipsel -> panic "trivColorable ArchMipsel"
  173. ArchJavaScript-> panic "trivColorable ArchJavaScript"
  174. ArchUnknown -> panic "trivColorable ArchUnknown")
  175. , count2 <- accSqueeze 0 cALLOCATABLE_REGS_SSE
  176. (virtualRegSqueeze RcDoubleSSE)
  177. conflicts
  178. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_SSE
  179. (realRegSqueeze RcDoubleSSE)
  180. exclusions
  181. = count3 < cALLOCATABLE_REGS_SSE
  182. -- Specification Code ----------------------------------------------------------
  183. --
  184. -- The trivColorable function for each particular architecture should
  185. -- implement the following function, but faster.
  186. --
  187. {-
  188. trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
  189. trivColorable classN conflicts exclusions
  190. = let
  191. acc :: Reg -> (Int, Int) -> (Int, Int)
  192. acc r (cd, cf)
  193. = case regClass r of
  194. RcInteger -> (cd+1, cf)
  195. RcFloat -> (cd, cf+1)
  196. _ -> panic "Regs.trivColorable: reg class not handled"
  197. tmp = nonDetFoldUFM acc (0, 0) conflicts
  198. (countInt, countFloat) = nonDetFoldUFM acc tmp exclusions
  199. squeese = worst countInt classN RcInteger
  200. + worst countFloat classN RcFloat
  201. in squeese < allocatableRegsInClass classN
  202. -- | Worst case displacement
  203. -- node N of classN has n neighbors of class C.
  204. --
  205. -- We currently only have RcInteger and RcDouble, which don't conflict at all.
  206. -- This is a bit boring compared to what's in RegArchX86.
  207. --
  208. worst :: Int -> RegClass -> RegClass -> Int
  209. worst n classN classC
  210. = case classN of
  211. RcInteger
  212. -> case classC of
  213. RcInteger -> min n (allocatableRegsInClass RcInteger)
  214. RcFloat -> 0
  215. RcDouble
  216. -> case classC of
  217. RcFloat -> min n (allocatableRegsInClass RcFloat)
  218. RcInteger -> 0
  219. -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
  220. -- i.e., these are the regs for which we are prepared to allow the
  221. -- register allocator to attempt to map VRegs to.
  222. allocatableRegs :: [RegNo]
  223. allocatableRegs
  224. = let isFree i = freeReg i
  225. in filter isFree allMachRegNos
  226. -- | The number of regs in each class.
  227. -- We go via top level CAFs to ensure that we're not recomputing
  228. -- the length of these lists each time the fn is called.
  229. allocatableRegsInClass :: RegClass -> Int
  230. allocatableRegsInClass cls
  231. = case cls of
  232. RcInteger -> allocatableRegsInteger
  233. RcFloat -> allocatableRegsDouble
  234. allocatableRegsInteger :: Int
  235. allocatableRegsInteger
  236. = length $ filter (\r -> regClass r == RcInteger)
  237. $ map RealReg allocatableRegs
  238. allocatableRegsFloat :: Int
  239. allocatableRegsFloat
  240. = length $ filter (\r -> regClass r == RcFloat
  241. $ map RealReg allocatableRegs
  242. -}