PageRenderTime 313ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 1ms

/compiler/cmm/CmmSink.hs

https://bitbucket.org/carter/ghc
Haskell | 568 lines | 241 code | 83 blank | 244 comment | 14 complexity | 16826e668054a731a9afff2ca06f6ecb MD5 | raw file
  1. {-# LANGUAGE GADTs #-}
  2. module CmmSink (
  3. cmmSink
  4. ) where
  5. import CodeGen.Platform (callerSaves)
  6. import Cmm
  7. import CmmOpt
  8. import BlockId
  9. import CmmLive
  10. import CmmUtils
  11. import Hoopl
  12. import DynFlags
  13. import UniqFM
  14. import PprCmm ()
  15. import Data.List (partition)
  16. import qualified Data.Set as Set
  17. -- -----------------------------------------------------------------------------
  18. -- Sinking and inlining
  19. -- This is an optimisation pass that
  20. -- (a) moves assignments closer to their uses, to reduce register pressure
  21. -- (b) pushes assignments into a single branch of a conditional if possible
  22. -- (c) inlines assignments to registers that are mentioned only once
  23. -- (d) discards dead assignments
  24. --
  25. -- This tightens up lots of register-heavy code. It is particularly
  26. -- helpful in the Cmm generated by the Stg->Cmm code generator, in
  27. -- which every function starts with a copyIn sequence like:
  28. --
  29. -- x1 = R1
  30. -- x2 = Sp[8]
  31. -- x3 = Sp[16]
  32. -- if (Sp - 32 < SpLim) then L1 else L2
  33. --
  34. -- we really want to push the x1..x3 assignments into the L2 branch.
  35. --
  36. -- Algorithm:
  37. --
  38. -- * Start by doing liveness analysis.
  39. --
  40. -- * Keep a list of assignments A; earlier ones may refer to later ones
  41. --
  42. -- * Walk forwards through the graph, look at each node N:
  43. -- * If any assignments in A (1) occur only once in N, and (2) are
  44. -- not live after N, inline the assignment and remove it
  45. -- from A.
  46. -- * If N is an assignment:
  47. -- * If the register is not live after N, discard it
  48. -- * otherwise pick up the assignment and add it to A
  49. -- * If N is a non-assignment node:
  50. -- * remove any assignments from A that conflict with N, and
  51. -- place them before N in the current block. (we call this
  52. -- "dropping" the assignments).
  53. -- * An assignment conflicts with N if it:
  54. -- - assigns to a register mentioned in N
  55. -- - mentions a register assigned by N
  56. -- - reads from memory written by N
  57. -- * do this recursively, dropping dependent assignments
  58. -- * At a multi-way branch:
  59. -- * drop any assignments that are live on more than one branch
  60. -- * if any successor has more than one predecessor (a
  61. -- join-point), drop everything live in that successor
  62. --
  63. -- As a side-effect we'll delete some dead assignments (transitively,
  64. -- even). This isn't as good as removeDeadAssignments, but it's much
  65. -- cheaper.
  66. -- If we do this *before* stack layout, we might be able to avoid
  67. -- saving some things across calls/procpoints.
  68. --
  69. -- *but*, that will invalidate the liveness analysis, and we'll have
  70. -- to re-do it.
  71. -- -----------------------------------------------------------------------------
  72. -- things that we aren't optimising very well yet.
  73. --
  74. -- -----------
  75. -- (1) From GHC's FastString.hashStr:
  76. --
  77. -- s2ay:
  78. -- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp;
  79. -- c2gn:
  80. -- R1 = _s2au::I64;
  81. -- call (I64[Sp])(R1) args: 8, res: 0, upd: 8;
  82. -- c2gp:
  83. -- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128,
  84. -- 4091);
  85. -- _s2an::I64 = _s2an::I64 + 1;
  86. -- _s2au::I64 = _s2cO::I64;
  87. -- goto s2ay;
  88. --
  89. -- a nice loop, but we didn't eliminate the silly assignment at the end.
  90. -- See Note [dependent assignments], which would probably fix this.
  91. --
  92. -- -----------
  93. -- (2) From stg_atomically_frame in PrimOps.cmm
  94. --
  95. -- We have a diamond control flow:
  96. --
  97. -- x = ...
  98. -- |
  99. -- / \
  100. -- A B
  101. -- \ /
  102. -- |
  103. -- use of x
  104. --
  105. -- Now x won't be sunk down to its use, because we won't push it into
  106. -- both branches of the conditional. We certainly do have to check
  107. -- that we can sink it past all the code in both A and B, but having
  108. -- discovered that, we could sink it to its use.
  109. --
  110. -- -----------------------------------------------------------------------------
  111. type Assignment = (LocalReg, CmmExpr, AbsMem)
  112. -- Assignment caches AbsMem, an abstraction of the memory read by
  113. -- the RHS of the assignment.
  114. cmmSink :: DynFlags -> CmmGraph -> CmmGraph
  115. cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
  116. where
  117. liveness = cmmLiveness graph
  118. getLive l = mapFindWithDefault Set.empty l liveness
  119. blocks = postorderDfs graph
  120. join_pts = findJoinPoints blocks
  121. sink :: BlockEnv [Assignment] -> [CmmBlock] -> [CmmBlock]
  122. sink _ [] = []
  123. sink sunk (b:bs) =
  124. -- pprTrace "sink" (ppr lbl) $
  125. blockJoin first final_middle final_last : sink sunk' bs
  126. where
  127. lbl = entryLabel b
  128. (first, middle, last) = blockSplit b
  129. succs = successors last
  130. -- Annotate the middle nodes with the registers live *after*
  131. -- the node. This will help us decide whether we can inline
  132. -- an assignment in the current node or not.
  133. live = Set.unions (map getLive succs)
  134. live_middle = gen_kill last live
  135. ann_middles = annotate live_middle (blockToList middle)
  136. -- Now sink and inline in this block
  137. (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk)
  138. fold_last = constantFold dflags last
  139. (final_last, assigs') = tryToInline dflags live fold_last assigs
  140. -- We cannot sink into join points (successors with more than
  141. -- one predecessor), so identify the join points and the set
  142. -- of registers live in them.
  143. (joins, nonjoins) = partition (`mapMember` join_pts) succs
  144. live_in_joins = Set.unions (map getLive joins)
  145. -- We do not want to sink an assignment into multiple branches,
  146. -- so identify the set of registers live in multiple successors.
  147. -- This is made more complicated because when we sink an assignment
  148. -- into one branch, this might change the set of registers that are
  149. -- now live in multiple branches.
  150. init_live_sets = map getLive nonjoins
  151. live_in_multi live_sets r =
  152. case filter (Set.member r) live_sets of
  153. (_one:_two:_) -> True
  154. _ -> False
  155. -- Now, drop any assignments that we will not sink any further.
  156. (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs'
  157. drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
  158. where
  159. should_drop = conflicts dflags a final_last
  160. || {- not (isSmall rhs) && -} live_in_multi live_sets r
  161. || r `Set.member` live_in_joins
  162. live_sets' | should_drop = live_sets
  163. | otherwise = map upd live_sets
  164. upd set | r `Set.member` set = set `Set.union` live_rhs
  165. | otherwise = set
  166. live_rhs = foldRegsUsed extendRegSet emptyRegSet rhs
  167. final_middle = foldl blockSnoc middle' dropped_last
  168. sunk' = mapUnion sunk $
  169. mapFromList [ (l, filterAssignments dflags (getLive l) assigs'')
  170. | l <- succs ]
  171. {- TODO: enable this later, when we have some good tests in place to
  172. measure the effect and tune it.
  173. -- small: an expression we don't mind duplicating
  174. isSmall :: CmmExpr -> Bool
  175. isSmall (CmmReg (CmmLocal _)) = True -- not globals, we want to coalesce them instead
  176. isSmall (CmmLit _) = True
  177. isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y
  178. isSmall (CmmRegOff (CmmLocal _) _) = True
  179. isSmall _ = False
  180. isTrivial :: CmmExpr -> Bool
  181. isTrivial (CmmReg (CmmLocal _)) = True
  182. isTrivial (CmmLit _) = True
  183. isTrivial _ = False
  184. -}
  185. --
  186. -- annotate each node with the set of registers live *after* the node
  187. --
  188. annotate :: RegSet -> [CmmNode O O] -> [(RegSet, CmmNode O O)]
  189. annotate live nodes = snd $ foldr ann (live,[]) nodes
  190. where ann n (live,nodes) = (gen_kill n live, (live,n) : nodes)
  191. --
  192. -- Find the blocks that have multiple successors (join points)
  193. --
  194. findJoinPoints :: [CmmBlock] -> BlockEnv Int
  195. findJoinPoints blocks = mapFilter (>1) succ_counts
  196. where
  197. all_succs = concatMap successors blocks
  198. succ_counts :: BlockEnv Int
  199. succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs
  200. --
  201. -- filter the list of assignments to remove any assignments that
  202. -- are not live in a continuation.
  203. --
  204. filterAssignments :: DynFlags -> RegSet -> [Assignment] -> [Assignment]
  205. filterAssignments dflags live assigs = reverse (go assigs [])
  206. where go [] kept = kept
  207. go (a@(r,_,_):as) kept | needed = go as (a:kept)
  208. | otherwise = go as kept
  209. where
  210. needed = r `Set.member` live
  211. || any (conflicts dflags a) (map toNode kept)
  212. -- Note that we must keep assignments that are
  213. -- referred to by other assignments we have
  214. -- already kept.
  215. -- -----------------------------------------------------------------------------
  216. -- Walk through the nodes of a block, sinking and inlining assignments
  217. -- as we go.
  218. walk :: DynFlags
  219. -> [(RegSet, CmmNode O O)] -- nodes of the block, annotated with
  220. -- the set of registers live *after*
  221. -- this node.
  222. -> [Assignment] -- The current list of
  223. -- assignments we are sinking.
  224. -- Later assignments may refer
  225. -- to earlier ones.
  226. -> ( Block CmmNode O O -- The new block
  227. , [Assignment] -- Assignments to sink further
  228. )
  229. walk dflags nodes assigs = go nodes emptyBlock assigs
  230. where
  231. go [] block as = (block, as)
  232. go ((live,node):ns) block as
  233. | shouldDiscard node live = go ns block as
  234. | Just a <- shouldSink dflags node2 = go ns block (a : as1)
  235. | otherwise = go ns block' as'
  236. where
  237. node1 = constantFold dflags node
  238. (node2, as1) = tryToInline dflags live node1 as
  239. (dropped, as') = dropAssignmentsSimple dflags
  240. (\a -> conflicts dflags a node2) as1
  241. block' = foldl blockSnoc block dropped `blockSnoc` node2
  242. constantFold :: DynFlags -> CmmNode e x -> CmmNode e x
  243. constantFold dflags node = mapExpDeep f node
  244. where f (CmmMachOp op args) = cmmMachOpFold dflags op args
  245. f (CmmRegOff r 0) = CmmReg r
  246. f e = e
  247. --
  248. -- Heuristic to decide whether to pick up and sink an assignment
  249. -- Currently we pick up all assignments to local registers. It might
  250. -- be profitable to sink assignments to global regs too, but the
  251. -- liveness analysis doesn't track those (yet) so we can't.
  252. --
  253. shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment
  254. shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e)
  255. where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e
  256. shouldSink _ _other = Nothing
  257. --
  258. -- discard dead assignments. This doesn't do as good a job as
  259. -- removeDeadAsssignments, because it would need multiple passes
  260. -- to get all the dead code, but it catches the common case of
  261. -- superfluous reloads from the stack that the stack allocator
  262. -- leaves behind.
  263. --
  264. -- Also we catch "r = r" here. You might think it would fall
  265. -- out of inlining, but the inliner will see that r is live
  266. -- after the instruction and choose not to inline r in the rhs.
  267. --
  268. shouldDiscard :: CmmNode e x -> RegSet -> Bool
  269. shouldDiscard node live
  270. = case node of
  271. CmmAssign r (CmmReg r') | r == r' -> True
  272. CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
  273. _otherwise -> False
  274. toNode :: Assignment -> CmmNode O O
  275. toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
  276. dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> [Assignment]
  277. -> ([CmmNode O O], [Assignment])
  278. dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
  279. dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> [Assignment]
  280. -> ([CmmNode O O], [Assignment])
  281. dropAssignments dflags should_drop state assigs
  282. = (dropped, reverse kept)
  283. where
  284. (dropped,kept) = go state assigs [] []
  285. go _ [] dropped kept = (dropped, kept)
  286. go state (assig : rest) dropped kept
  287. | conflict = go state' rest (toNode assig : dropped) kept
  288. | otherwise = go state' rest dropped (assig:kept)
  289. where
  290. (dropit, state') = should_drop assig state
  291. conflict = dropit || any (conflicts dflags assig) dropped
  292. -- -----------------------------------------------------------------------------
  293. -- Try to inline assignments into a node.
  294. tryToInline
  295. :: DynFlags
  296. -> RegSet -- set of registers live after this
  297. -- node. We cannot inline anything
  298. -- that is live after the node, unless
  299. -- it is small enough to duplicate.
  300. -> CmmNode O x -- The node to inline into
  301. -> [Assignment] -- Assignments to inline
  302. -> (
  303. CmmNode O x -- New node
  304. , [Assignment] -- Remaining assignments
  305. )
  306. tryToInline dflags live node assigs = go usages node [] assigs
  307. where
  308. usages :: UniqFM Int
  309. usages = foldRegsUsed addUsage emptyUFM node
  310. go _usages node _skipped [] = (node, [])
  311. go usages node skipped (a@(l,rhs,_) : rest)
  312. | can_inline = inline_and_discard
  313. | False {- isTiny rhs -} = inline_and_keep
  314. -- ^^ seems to make things slightly worse
  315. where
  316. inline_and_discard = go usages' node' skipped rest
  317. inline_and_keep = (node'', a : rest')
  318. where (node'',rest') = go usages' node' (l:skipped) rest
  319. can_inline =
  320. not (l `elemRegSet` live)
  321. && not (skipped `regsUsedIn` rhs) -- Note [dependent assignments]
  322. && okToInline dflags rhs node
  323. && lookupUFM usages l == Just 1
  324. usages' = foldRegsUsed addUsage usages rhs
  325. node' = mapExpDeep inline node
  326. where inline (CmmReg (CmmLocal l')) | l == l' = rhs
  327. inline (CmmRegOff (CmmLocal l') off) | l == l'
  328. = cmmOffset dflags rhs off
  329. -- re-constant fold after inlining
  330. inline (CmmMachOp op args) = cmmMachOpFold dflags op args
  331. inline other = other
  332. go usages node skipped (assig@(l,rhs,_) : rest)
  333. = (node', assig : rest')
  334. where (node', rest') = go usages' node (l:skipped) rest
  335. usages' = foldRegsUsed (\m r -> addToUFM m r 2) usages rhs
  336. -- we must not inline anything that is mentioned in the RHS
  337. -- of a binding that we have already skipped, so we set the
  338. -- usages of the regs on the RHS to 2.
  339. -- Note [dependent assignments]
  340. --
  341. -- If our assignment list looks like
  342. --
  343. -- [ y = e, x = ... y ... ]
  344. --
  345. -- We cannot inline x. Remember this list is really in reverse order,
  346. -- so it means x = ... y ...; y = e
  347. --
  348. -- Hence if we inline x, the outer assignment to y will capture the
  349. -- reference in x's right hand side.
  350. --
  351. -- In this case we should rename the y in x's right-hand side,
  352. -- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ]
  353. -- Now we can go ahead and inline x.
  354. --
  355. -- For now we do nothing, because this would require putting
  356. -- everything inside UniqSM.
  357. addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
  358. addUsage m r = addToUFM_C (+) m r 1
  359. regsUsedIn :: [LocalReg] -> CmmExpr -> Bool
  360. regsUsedIn [] _ = False
  361. regsUsedIn ls e = wrapRecExpf f e False
  362. where f (CmmReg (CmmLocal l)) _ | l `elem` ls = True
  363. f (CmmRegOff (CmmLocal l) _) _ | l `elem` ls = True
  364. f _ z = z
  365. -- we don't inline into CmmUnsafeForeignCall if the expression refers
  366. -- to global registers. This is a HACK to avoid global registers
  367. -- clashing with C argument-passing registers, really the back-end
  368. -- ought to be able to handle it properly, but currently neither PprC
  369. -- nor the NCG can do it. See Note [Register parameter passing]
  370. -- See also StgCmmForeign:load_args_into_temps.
  371. okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool
  372. okToInline dflags expr CmmUnsafeForeignCall{} = not (anyCallerSavesRegs dflags expr)
  373. okToInline _ _ _ = True
  374. -- -----------------------------------------------------------------------------
  375. -- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
  376. -- @r = e@ can be safely commuted past @stmt@.
  377. --
  378. -- We only sink "r = G" assignments right now, so conflicts is very simple:
  379. --
  380. conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool
  381. conflicts dflags (r, rhs, addr) node
  382. -- (1) an assignment to a register conflicts with a use of the register
  383. | CmmAssign reg _ <- node, reg `regUsedIn` rhs = True
  384. | foldRegsUsed (\b r' -> r == r' || b) False node = True
  385. -- (2) a store to an address conflicts with a read of the same memory
  386. | CmmStore addr' e <- node
  387. , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True
  388. -- (3) an assignment to Hp/Sp conflicts with a heap/stack read respectively
  389. | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True
  390. | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
  391. | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True
  392. -- (4) assignments that read caller-saves GlobalRegs conflict with a
  393. -- foreign call. See Note [foreign calls clobber GlobalRegs].
  394. | CmmUnsafeForeignCall{} <- node, anyCallerSavesRegs dflags rhs = True
  395. -- (5) foreign calls clobber memory, but not heap/stack memory
  396. | CmmUnsafeForeignCall{} <- node, AnyMem <- addr = True
  397. -- (6) native calls clobber any memory
  398. | CmmCall{} <- node, memConflicts addr AnyMem = True
  399. -- (7) otherwise, no conflict
  400. | otherwise = False
  401. anyCallerSavesRegs :: DynFlags -> CmmExpr -> Bool
  402. anyCallerSavesRegs dflags e = wrapRecExpf f e False
  403. where f (CmmReg (CmmGlobal r)) _
  404. | callerSaves (targetPlatform dflags) r = True
  405. f _ z = z
  406. -- An abstraction of memory read or written.
  407. data AbsMem
  408. = NoMem -- no memory accessed
  409. | AnyMem -- arbitrary memory
  410. | HeapMem -- definitely heap memory
  411. | StackMem -- definitely stack memory
  412. | SpMem -- <size>[Sp+n]
  413. {-# UNPACK #-} !Int
  414. {-# UNPACK #-} !Int
  415. -- Having SpMem is important because it lets us float loads from Sp
  416. -- past stores to Sp as long as they don't overlap, and this helps to
  417. -- unravel some long sequences of
  418. -- x1 = [Sp + 8]
  419. -- x2 = [Sp + 16]
  420. -- ...
  421. -- [Sp + 8] = xi
  422. -- [Sp + 16] = xj
  423. --
  424. -- Note that SpMem is invalidated if Sp is changed, but the definition
  425. -- of 'conflicts' above handles that.
  426. -- ToDo: this won't currently fix the following commonly occurring code:
  427. -- x1 = [R1 + 8]
  428. -- x2 = [R1 + 16]
  429. -- ..
  430. -- [Hp - 8] = x1
  431. -- [Hp - 16] = x2
  432. -- ..
  433. -- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that
  434. -- assignments to [Hp + n] do not conflict with any other heap memory,
  435. -- but this is tricky to nail down. What if we had
  436. --
  437. -- x = Hp + n
  438. -- [x] = ...
  439. --
  440. -- the store to [x] should be "new heap", not "old heap".
  441. -- Furthermore, you could imagine that if we started inlining
  442. -- functions in Cmm then there might well be reads of heap memory
  443. -- that was written in the same basic block. To take advantage of
  444. -- non-aliasing of heap memory we will have to be more clever.
  445. bothMems :: AbsMem -> AbsMem -> AbsMem
  446. bothMems NoMem x = x
  447. bothMems x NoMem = x
  448. bothMems HeapMem HeapMem = HeapMem
  449. bothMems StackMem StackMem = StackMem
  450. bothMems (SpMem o1 w1) (SpMem o2 w2)
  451. | o1 == o2 = SpMem o1 (max w1 w2)
  452. | otherwise = StackMem
  453. bothMems SpMem{} StackMem = StackMem
  454. bothMems StackMem SpMem{} = StackMem
  455. bothMems _ _ = AnyMem
  456. memConflicts :: AbsMem -> AbsMem -> Bool
  457. memConflicts NoMem _ = False
  458. memConflicts _ NoMem = False
  459. memConflicts HeapMem StackMem = False
  460. memConflicts StackMem HeapMem = False
  461. memConflicts SpMem{} HeapMem = False
  462. memConflicts HeapMem SpMem{} = False
  463. memConflicts (SpMem o1 w1) (SpMem o2 w2)
  464. | o1 < o2 = o1 + w1 > o2
  465. | otherwise = o2 + w2 > o1
  466. memConflicts _ _ = True
  467. exprMem :: DynFlags -> CmmExpr -> AbsMem
  468. exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr)
  469. exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es)
  470. exprMem _ _ = NoMem
  471. loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem
  472. loadAddr dflags e w =
  473. case e of
  474. CmmReg r -> regAddr dflags r 0 w
  475. CmmRegOff r i -> regAddr dflags r i w
  476. _other | CmmGlobal Sp `regUsedIn` e -> StackMem
  477. | otherwise -> AnyMem
  478. regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem
  479. regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w)
  480. regAddr _ (CmmGlobal Hp) _ _ = HeapMem
  481. regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps
  482. regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself
  483. regAddr _ _ _ _ = AnyMem