PageRenderTime 61ms CodeModel.GetById 33ms RepoModel.GetById 1ms app.codeStats 0ms

/ghc-7.0.4/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs

http://picorec.googlecode.com/
Haskell | 250 lines | 79 code | 38 blank | 133 comment | 1 complexity | 60fac395c3d4a72259a5432935544d97 MD5 | raw file
Possible License(s): BSD-3-Clause, BSD-2-Clause
  1. {-# OPTIONS -fno-warn-unused-binds #-}
  2. module RegAlloc.Graph.TrivColorable (
  3. trivColorable,
  4. )
  5. where
  6. #include "HsVersions.h"
  7. import RegClass
  8. import Reg
  9. import GraphBase
  10. import UniqFM
  11. import FastTypes
  12. -- trivColorable ---------------------------------------------------------------
  13. -- trivColorable function for the graph coloring allocator
  14. --
  15. -- This gets hammered by scanGraph during register allocation,
  16. -- so needs to be fairly efficient.
  17. --
  18. -- NOTE: This only works for arcitectures with just RcInteger and RcDouble
  19. -- (which are disjoint) ie. x86, x86_64 and ppc
  20. --
  21. -- The number of allocatable regs is hard coded here so we can do a fast
  22. -- comparision in trivColorable.
  23. --
  24. -- It's ok if these numbers are _less_ than the actual number of free regs,
  25. -- but they can't be more or the register conflict graph won't color.
  26. --
  27. -- If the graph doesn't color then the allocator will panic, but it won't
  28. -- generate bad object code or anything nasty like that.
  29. --
  30. -- There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing
  31. -- is too slow for us here.
  32. --
  33. -- Look at includes/stg/MachRegs.h to get these numbers.
  34. --
  35. #if i386_TARGET_ARCH
  36. #define ALLOCATABLE_REGS_INTEGER (_ILIT(3))
  37. #define ALLOCATABLE_REGS_DOUBLE (_ILIT(6))
  38. #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
  39. #define ALLOCATABLE_REGS_SSE (_ILIT(8))
  40. #elif x86_64_TARGET_ARCH
  41. #define ALLOCATABLE_REGS_INTEGER (_ILIT(5))
  42. #define ALLOCATABLE_REGS_DOUBLE (_ILIT(0))
  43. #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
  44. #define ALLOCATABLE_REGS_SSE (_ILIT(10))
  45. #elif powerpc_TARGET_ARCH
  46. #define ALLOCATABLE_REGS_INTEGER (_ILIT(16))
  47. #define ALLOCATABLE_REGS_DOUBLE (_ILIT(26))
  48. #define ALLOCATABLE_REGS_FLOAT (_ILIT(0))
  49. #define ALLOCATABLE_REGS_SSE (_ILIT(0))
  50. #elif sparc_TARGET_ARCH
  51. #define ALLOCATABLE_REGS_INTEGER (_ILIT(14))
  52. #define ALLOCATABLE_REGS_DOUBLE (_ILIT(11))
  53. #define ALLOCATABLE_REGS_FLOAT (_ILIT(22))
  54. #define ALLOCATABLE_REGS_SSE (_ILIT(0))
  55. #else
  56. #error ToDo: choose which trivColorable function to use for this architecture.
  57. #endif
  58. -- Disjoint registers ----------------------------------------------------------
  59. --
  60. -- The definition has been unfolded into individual cases for speed.
  61. -- Each architecture has a different register setup, so we use a
  62. -- different regSqueeze function for each.
  63. --
  64. accSqueeze
  65. :: FastInt
  66. -> FastInt
  67. -> (reg -> FastInt)
  68. -> UniqFM reg
  69. -> FastInt
  70. accSqueeze count maxCount squeeze ufm = acc count (eltsUFM ufm)
  71. where acc count [] = count
  72. acc count _ | count >=# maxCount = count
  73. acc count (r:rs) = acc (count +# squeeze r) rs
  74. {- Note [accSqueeze]
  75. ~~~~~~~~~~~~~~~~~~~~
  76. BL 2007/09
  77. Doing a nice fold over the UniqSet makes trivColorable use
  78. 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
  79. Therefore the UniqFM is made non-abstract and we use custom fold.
  80. MS 2010/04
  81. When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
  82. representation any more. But it is imperative that the assSqueeze stops
  83. the folding if the count gets greater or equal to maxCount. We thus convert
  84. UniqFM to a (lazy) list, do the fold and stops if necessary, which was
  85. the most efficient variant tried. Benchmark compiling 10-times SHA1.lhs follows.
  86. (original = previous implementation, folding = fold of the whole UFM,
  87. lazyFold = the current implementation,
  88. hackFold = using internal representation of Data.IntMap)
  89. original folding hackFold lazyFold
  90. -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s
  91. 100.00% 96.44% 97.72% 97.12%
  92. -fregs-graph 67.938s 74.875s 62.673s 64.679s
  93. 100.00% 110.21% 92.25% 95.20%
  94. -fregs-iterative 89.761s 143.913s 81.075s 86.912s
  95. 100.00% 160.33% 90.32% 96.83%
  96. -fnew-codegen 38.225s 37.142s 37.551s 37.119s
  97. 100.00% 97.17% 98.24% 97.11%
  98. -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s
  99. 100.00% 99.70% 95.19% 94.65%
  100. -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s
  101. 100.00% 166.23% 94.18% 100.95%
  102. -}
  103. trivColorable
  104. :: (RegClass -> VirtualReg -> FastInt)
  105. -> (RegClass -> RealReg -> FastInt)
  106. -> Triv VirtualReg RegClass RealReg
  107. trivColorable virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
  108. | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_INTEGER
  109. (virtualRegSqueeze RcInteger)
  110. conflicts
  111. , count3 <- accSqueeze count2 ALLOCATABLE_REGS_INTEGER
  112. (realRegSqueeze RcInteger)
  113. exclusions
  114. = count3 <# ALLOCATABLE_REGS_INTEGER
  115. trivColorable virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
  116. | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_FLOAT
  117. (virtualRegSqueeze RcFloat)
  118. conflicts
  119. , count3 <- accSqueeze count2 ALLOCATABLE_REGS_FLOAT
  120. (realRegSqueeze RcFloat)
  121. exclusions
  122. = count3 <# ALLOCATABLE_REGS_FLOAT
  123. trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
  124. | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_DOUBLE
  125. (virtualRegSqueeze RcDouble)
  126. conflicts
  127. , count3 <- accSqueeze count2 ALLOCATABLE_REGS_DOUBLE
  128. (realRegSqueeze RcDouble)
  129. exclusions
  130. = count3 <# ALLOCATABLE_REGS_DOUBLE
  131. trivColorable virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
  132. | count2 <- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_SSE
  133. (virtualRegSqueeze RcDoubleSSE)
  134. conflicts
  135. , count3 <- accSqueeze count2 ALLOCATABLE_REGS_SSE
  136. (realRegSqueeze RcDoubleSSE)
  137. exclusions
  138. = count3 <# ALLOCATABLE_REGS_SSE
  139. -- Specification Code ----------------------------------------------------------
  140. --
  141. -- The trivColorable function for each particular architecture should
  142. -- implement the following function, but faster.
  143. --
  144. {-
  145. trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
  146. trivColorable classN conflicts exclusions
  147. = let
  148. acc :: Reg -> (Int, Int) -> (Int, Int)
  149. acc r (cd, cf)
  150. = case regClass r of
  151. RcInteger -> (cd+1, cf)
  152. RcFloat -> (cd, cf+1)
  153. _ -> panic "Regs.trivColorable: reg class not handled"
  154. tmp = foldUniqSet acc (0, 0) conflicts
  155. (countInt, countFloat) = foldUniqSet acc tmp exclusions
  156. squeese = worst countInt classN RcInteger
  157. + worst countFloat classN RcFloat
  158. in squeese < allocatableRegsInClass classN
  159. -- | Worst case displacement
  160. -- node N of classN has n neighbors of class C.
  161. --
  162. -- We currently only have RcInteger and RcDouble, which don't conflict at all.
  163. -- This is a bit boring compared to what's in RegArchX86.
  164. --
  165. worst :: Int -> RegClass -> RegClass -> Int
  166. worst n classN classC
  167. = case classN of
  168. RcInteger
  169. -> case classC of
  170. RcInteger -> min n (allocatableRegsInClass RcInteger)
  171. RcFloat -> 0
  172. RcDouble
  173. -> case classC of
  174. RcFloat -> min n (allocatableRegsInClass RcFloat)
  175. RcInteger -> 0
  176. -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
  177. -- i.e., these are the regs for which we are prepared to allow the
  178. -- register allocator to attempt to map VRegs to.
  179. allocatableRegs :: [RegNo]
  180. allocatableRegs
  181. = let isFree i = isFastTrue (freeReg i)
  182. in filter isFree allMachRegNos
  183. -- | The number of regs in each class.
  184. -- We go via top level CAFs to ensure that we're not recomputing
  185. -- the length of these lists each time the fn is called.
  186. allocatableRegsInClass :: RegClass -> Int
  187. allocatableRegsInClass cls
  188. = case cls of
  189. RcInteger -> allocatableRegsInteger
  190. RcFloat -> allocatableRegsDouble
  191. allocatableRegsInteger :: Int
  192. allocatableRegsInteger
  193. = length $ filter (\r -> regClass r == RcInteger)
  194. $ map RealReg allocatableRegs
  195. allocatableRegsFloat :: Int
  196. allocatableRegsFloat
  197. = length $ filter (\r -> regClass r == RcFloat
  198. $ map RealReg allocatableRegs
  199. -}