PageRenderTime 56ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs

http://github.com/ilyasergey/GHC-XAppFix
Haskell | 260 lines | 95 code | 29 blank | 136 comment | 0 complexity | c05fb5ee3f3cde0c96fb68649a1b0375 MD5 | raw file
  1. {-# LANGUAGE BangPatterns #-}
  2. module RegAlloc.Graph.TrivColorable (
  3. trivColorable,
  4. )
  5. where
  6. #include "HsVersions.h"
  7. import RegClass
  8. import Reg
  9. import GraphBase
  10. import UniqFM
  11. import FastTypes
  12. import Platform
  13. import Panic
  14. -- trivColorable ---------------------------------------------------------------
  15. -- trivColorable function for the graph coloring allocator
  16. --
  17. -- This gets hammered by scanGraph during register allocation,
  18. -- so needs to be fairly efficient.
  19. --
  20. -- NOTE: This only works for arcitectures with just RcInteger and RcDouble
  21. -- (which are disjoint) ie. x86, x86_64 and ppc
  22. --
  23. -- The number of allocatable regs is hard coded in here so we can do
  24. -- a fast comparision in trivColorable.
  25. --
  26. -- It's ok if these numbers are _less_ than the actual number of free
  27. -- regs, but they can't be more or the register conflict
  28. -- graph won't color.
  29. --
  30. -- If the graph doesn't color then the allocator will panic, but it won't
  31. -- generate bad object code or anything nasty like that.
  32. --
  33. -- There is an allocatableRegsInClass :: RegClass -> Int, but doing
  34. -- the unboxing is too slow for us here.
  35. -- TODO: Is that still true? Could we use allocatableRegsInClass
  36. -- without losing performance now?
  37. --
  38. -- Look at includes/stg/MachRegs.h to get the numbers.
  39. --
  40. -- Disjoint registers ----------------------------------------------------------
  41. --
  42. -- The definition has been unfolded into individual cases for speed.
  43. -- Each architecture has a different register setup, so we use a
  44. -- different regSqueeze function for each.
  45. --
  46. accSqueeze
  47. :: FastInt
  48. -> FastInt
  49. -> (reg -> FastInt)
  50. -> UniqFM reg
  51. -> FastInt
  52. accSqueeze count maxCount squeeze ufm = acc count (eltsUFM ufm)
  53. where acc count [] = count
  54. acc count _ | count >=# maxCount = count
  55. acc count (r:rs) = acc (count +# squeeze r) rs
  56. {- Note [accSqueeze]
  57. ~~~~~~~~~~~~~~~~~~~~
  58. BL 2007/09
  59. Doing a nice fold over the UniqSet makes trivColorable use
  60. 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
  61. Therefore the UniqFM is made non-abstract and we use custom fold.
  62. MS 2010/04
  63. When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
  64. representation any more. But it is imperative that the assSqueeze stops
  65. the folding if the count gets greater or equal to maxCount. We thus convert
  66. UniqFM to a (lazy) list, do the fold and stops if necessary, which was
  67. the most efficient variant tried. Benchmark compiling 10-times SHA1.lhs follows.
  68. (original = previous implementation, folding = fold of the whole UFM,
  69. lazyFold = the current implementation,
  70. hackFold = using internal representation of Data.IntMap)
  71. original folding hackFold lazyFold
  72. -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s
  73. 100.00% 96.44% 97.72% 97.12%
  74. -fregs-graph 67.938s 74.875s 62.673s 64.679s
  75. 100.00% 110.21% 92.25% 95.20%
  76. -fregs-iterative 89.761s 143.913s 81.075s 86.912s
  77. 100.00% 160.33% 90.32% 96.83%
  78. -fnew-codegen 38.225s 37.142s 37.551s 37.119s
  79. 100.00% 97.17% 98.24% 97.11%
  80. -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s
  81. 100.00% 99.70% 95.19% 94.65%
  82. -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s
  83. 100.00% 166.23% 94.18% 100.95%
  84. -}
  85. trivColorable
  86. :: Platform
  87. -> (RegClass -> VirtualReg -> FastInt)
  88. -> (RegClass -> RealReg -> FastInt)
  89. -> Triv VirtualReg RegClass RealReg
  90. trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
  91. | let !cALLOCATABLE_REGS_INTEGER
  92. = iUnbox (case platformArch platform of
  93. ArchX86 -> 3
  94. ArchX86_64 -> 5
  95. ArchPPC -> 16
  96. ArchSPARC -> 14
  97. ArchPPC_64 -> panic "trivColorable ArchPPC_64"
  98. ArchARM _ _ -> panic "trivColorable ArchARM"
  99. ArchUnknown -> panic "trivColorable ArchUnknown")
  100. , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_INTEGER
  101. (virtualRegSqueeze RcInteger)
  102. conflicts
  103. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER
  104. (realRegSqueeze RcInteger)
  105. exclusions
  106. = count3 <# cALLOCATABLE_REGS_INTEGER
  107. trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
  108. | let !cALLOCATABLE_REGS_FLOAT
  109. = iUnbox (case platformArch platform of
  110. ArchX86 -> 0
  111. ArchX86_64 -> 0
  112. ArchPPC -> 0
  113. ArchSPARC -> 22
  114. ArchPPC_64 -> panic "trivColorable ArchPPC_64"
  115. ArchARM _ _ -> panic "trivColorable ArchARM"
  116. ArchUnknown -> panic "trivColorable ArchUnknown")
  117. , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_FLOAT
  118. (virtualRegSqueeze RcFloat)
  119. conflicts
  120. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT
  121. (realRegSqueeze RcFloat)
  122. exclusions
  123. = count3 <# cALLOCATABLE_REGS_FLOAT
  124. trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
  125. | let !cALLOCATABLE_REGS_DOUBLE
  126. = iUnbox (case platformArch platform of
  127. ArchX86 -> 6
  128. ArchX86_64 -> 0
  129. ArchPPC -> 26
  130. ArchSPARC -> 11
  131. ArchPPC_64 -> panic "trivColorable ArchPPC_64"
  132. ArchARM _ _ -> panic "trivColorable ArchARM"
  133. ArchUnknown -> panic "trivColorable ArchUnknown")
  134. , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_DOUBLE
  135. (virtualRegSqueeze RcDouble)
  136. conflicts
  137. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE
  138. (realRegSqueeze RcDouble)
  139. exclusions
  140. = count3 <# cALLOCATABLE_REGS_DOUBLE
  141. trivColorable platform virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
  142. | let !cALLOCATABLE_REGS_SSE
  143. = iUnbox (case platformArch platform of
  144. ArchX86 -> 8
  145. ArchX86_64 -> 10
  146. ArchPPC -> 0
  147. ArchSPARC -> 0
  148. ArchPPC_64 -> panic "trivColorable ArchPPC_64"
  149. ArchARM _ _ -> panic "trivColorable ArchARM"
  150. ArchUnknown -> panic "trivColorable ArchUnknown")
  151. , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_SSE
  152. (virtualRegSqueeze RcDoubleSSE)
  153. conflicts
  154. , count3 <- accSqueeze count2 cALLOCATABLE_REGS_SSE
  155. (realRegSqueeze RcDoubleSSE)
  156. exclusions
  157. = count3 <# cALLOCATABLE_REGS_SSE
  158. -- Specification Code ----------------------------------------------------------
  159. --
  160. -- The trivColorable function for each particular architecture should
  161. -- implement the following function, but faster.
  162. --
  163. {-
  164. trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
  165. trivColorable classN conflicts exclusions
  166. = let
  167. acc :: Reg -> (Int, Int) -> (Int, Int)
  168. acc r (cd, cf)
  169. = case regClass r of
  170. RcInteger -> (cd+1, cf)
  171. RcFloat -> (cd, cf+1)
  172. _ -> panic "Regs.trivColorable: reg class not handled"
  173. tmp = foldUniqSet acc (0, 0) conflicts
  174. (countInt, countFloat) = foldUniqSet acc tmp exclusions
  175. squeese = worst countInt classN RcInteger
  176. + worst countFloat classN RcFloat
  177. in squeese < allocatableRegsInClass classN
  178. -- | Worst case displacement
  179. -- node N of classN has n neighbors of class C.
  180. --
  181. -- We currently only have RcInteger and RcDouble, which don't conflict at all.
  182. -- This is a bit boring compared to what's in RegArchX86.
  183. --
  184. worst :: Int -> RegClass -> RegClass -> Int
  185. worst n classN classC
  186. = case classN of
  187. RcInteger
  188. -> case classC of
  189. RcInteger -> min n (allocatableRegsInClass RcInteger)
  190. RcFloat -> 0
  191. RcDouble
  192. -> case classC of
  193. RcFloat -> min n (allocatableRegsInClass RcFloat)
  194. RcInteger -> 0
  195. -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
  196. -- i.e., these are the regs for which we are prepared to allow the
  197. -- register allocator to attempt to map VRegs to.
  198. allocatableRegs :: [RegNo]
  199. allocatableRegs
  200. = let isFree i = isFastTrue (freeReg i)
  201. in filter isFree allMachRegNos
  202. -- | The number of regs in each class.
  203. -- We go via top level CAFs to ensure that we're not recomputing
  204. -- the length of these lists each time the fn is called.
  205. allocatableRegsInClass :: RegClass -> Int
  206. allocatableRegsInClass cls
  207. = case cls of
  208. RcInteger -> allocatableRegsInteger
  209. RcFloat -> allocatableRegsDouble
  210. allocatableRegsInteger :: Int
  211. allocatableRegsInteger
  212. = length $ filter (\r -> regClass r == RcInteger)
  213. $ map RealReg allocatableRegs
  214. allocatableRegsFloat :: Int
  215. allocatableRegsFloat
  216. = length $ filter (\r -> regClass r == RcFloat
  217. $ map RealReg allocatableRegs
  218. -}