PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/compiler/cmm/CmmRewriteAssignments.hs

https://bitbucket.org/khibino/ghc-hack
Haskell | 629 lines | 263 code | 55 blank | 311 comment | 12 complexity | e1026d6e25a464426a72a0bfb0ed3fc8 MD5 | raw file
Possible License(s): BSD-3-Clause, BSD-2-Clause, LGPL-3.0
  1. {-# LANGUAGE ViewPatterns #-}
  2. {-# LANGUAGE GADTs #-}
  3. {-# LANGUAGE FlexibleContexts #-}
  4. {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
  5. -- TODO: Get rid of this flag:
  6. {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
  7. -- This module implements generalized code motion for assignments to
  8. -- local registers, inlining and sinking when possible. It also does
  9. -- some amount of rewriting for stores to register slots, which are
  10. -- effectively equivalent to local registers.
  11. module CmmRewriteAssignments
  12. ( rewriteAssignments
  13. ) where
  14. import Cmm
  15. import CmmUtils
  16. import CmmOpt
  17. import OptimizationFuel
  18. import StgCmmUtils
  19. import Control.Monad
  20. import Platform
  21. import UniqFM
  22. import Unique
  23. import BlockId
  24. import Compiler.Hoopl hiding (Unique)
  25. import Data.Maybe
  26. import Prelude hiding (succ, zip)
  27. ----------------------------------------------------------------
  28. --- Main function
  29. rewriteAssignments :: Platform -> CmmGraph -> FuelUniqSM CmmGraph
  30. rewriteAssignments platform g = do
  31. -- Because we need to act on forwards and backwards information, we
  32. -- first perform usage analysis and bake this information into the
  33. -- graph (backwards transform), and then do a forwards transform
  34. -- to actually perform inlining and sinking.
  35. g' <- annotateUsage g
  36. g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $
  37. analRewFwd assignmentLattice
  38. assignmentTransfer
  39. (assignmentRewrite `thenFwdRw` machOpFoldRewrite platform)
  40. return (modifyGraph eraseRegUsage g'')
  41. ----------------------------------------------------------------
  42. --- Usage information
  43. -- We decorate all register assignments with approximate usage
  44. -- information, that is, the maximum number of times the register is
  45. -- referenced while it is live along all outgoing control paths.
  46. -- This analysis provides a precise upper bound for usage, so if a
  47. -- register is never referenced, we can remove it, as that assignment is
  48. -- dead.
  49. --
  50. -- This analysis is very similar to liveness analysis; we just keep a
  51. -- little extra info. (Maybe we should move it to CmmLive, and subsume
  52. -- the old liveness analysis.)
  53. --
  54. -- There are a few subtleties here:
  55. --
  56. -- - If a register goes dead, and then becomes live again, the usages
  57. -- of the disjoint live range don't count towards the original range.
  58. --
  59. -- a = 1; // used once
  60. -- b = a;
  61. -- a = 2; // used once
  62. -- c = a;
  63. --
  64. -- - A register may be used multiple times, but these all reside in
  65. -- different control paths, such that any given execution only uses
  66. -- it once. In that case, the usage count may still be 1.
  67. --
  68. -- a = 1; // used once
  69. -- if (b) {
  70. -- c = a + 3;
  71. -- } else {
  72. -- c = a + 1;
  73. -- }
  74. --
  75. -- This policy corresponds to an inlining strategy that does not
  76. -- duplicate computation but may increase binary size.
  77. --
  78. -- - If we naively implement a usage count, we have a counting to
  79. -- infinity problem across joins. Furthermore, knowing that
  80. -- something is used 2 or more times in one runtime execution isn't
  81. -- particularly useful for optimizations (inlining may be beneficial,
  82. -- but there's no way of knowing that without register pressure
  83. -- information.)
  84. --
  85. -- while (...) {
  86. -- // first iteration, b used once
  87. -- // second iteration, b used twice
  88. -- // third iteration ...
  89. -- a = b;
  90. -- }
  91. -- // b used zero times
  92. --
  93. -- There is an orthogonal question, which is that for every runtime
  94. -- execution, the register may be used only once, but if we inline it
  95. -- in every conditional path, the binary size might increase a lot.
  96. -- But tracking this information would be tricky, because it violates
  97. -- the finite lattice restriction Hoopl requires for termination;
  98. -- we'd thus need to supply an alternate proof, which is probably
  99. -- something we should defer until we actually have an optimization
  100. -- that would take advantage of this. (This might also interact
  101. -- strangely with liveness information.)
  102. --
  103. -- a = ...;
  104. -- // a is used one time, but in X different paths
  105. -- case (b) of
  106. -- 1 -> ... a ...
  107. -- 2 -> ... a ...
  108. -- 3 -> ... a ...
  109. -- ...
  110. --
  111. -- - Memory stores to local register slots (CmmStore (CmmStackSlot
  112. -- (LocalReg _) 0) _) have similar behavior to local registers,
  113. -- in that these locations are all disjoint from each other. Thus,
  114. -- we attempt to inline them too. Note that because these are only
  115. -- generated as part of the spilling process, most of the time this
  116. -- will refer to a local register and the assignment will immediately
  117. -- die on the subsequent call. However, if we manage to replace that
  118. -- local register with a memory location, it means that we've managed
  119. -- to preserve a value on the stack without having to move it to
  120. -- another memory location again! We collect usage information just
  121. -- to be safe in case extra computation is involved.
  122. data RegUsage = SingleUse | ManyUse
  123. deriving (Ord, Eq, Show)
  124. -- Absence in map = ZeroUse
  125. {-
  126. -- minBound is bottom, maxBound is top, least-upper-bound is max
  127. -- ToDo: Put this in Hoopl. Note that this isn't as useful as I
  128. -- originally hoped, because you usually want to leave out the bottom
  129. -- element when you have things like this put in maps. Maybe f is
  130. -- useful on its own as a combining function.
  131. boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a
  132. boundedOrdLattice n = DataflowLattice n minBound f
  133. where f _ (OldFact x) (NewFact y)
  134. | x >= y = (NoChange, x)
  135. | otherwise = (SomeChange, y)
  136. -}
  137. -- Custom node type we'll rewrite to. CmmAssign nodes to local
  138. -- registers are replaced with AssignLocal nodes.
  139. data WithRegUsage n e x where
  140. -- Plain will not contain CmmAssign nodes immediately after
  141. -- transformation, but as we rewrite assignments, we may have
  142. -- assignments here: these are assignments that should not be
  143. -- rewritten!
  144. Plain :: n e x -> WithRegUsage n e x
  145. AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O
  146. instance UserOfLocalRegs (n e x) => UserOfLocalRegs (WithRegUsage n e x) where
  147. foldRegsUsed f z (Plain n) = foldRegsUsed f z n
  148. foldRegsUsed f z (AssignLocal _ e _) = foldRegsUsed f z e
  149. instance DefinerOfLocalRegs (n e x) => DefinerOfLocalRegs (WithRegUsage n e x) where
  150. foldRegsDefd f z (Plain n) = foldRegsDefd f z n
  151. foldRegsDefd f z (AssignLocal r _ _) = foldRegsDefd f z r
  152. instance NonLocal n => NonLocal (WithRegUsage n) where
  153. entryLabel (Plain n) = entryLabel n
  154. successors (Plain n) = successors n
  155. liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x
  156. liftRegUsage = mapGraph Plain
  157. eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x
  158. eraseRegUsage = mapGraph f
  159. where f :: WithRegUsage CmmNode e x -> CmmNode e x
  160. f (AssignLocal l e _) = CmmAssign (CmmLocal l) e
  161. f (Plain n) = n
  162. type UsageMap = UniqFM RegUsage
  163. usageLattice :: DataflowLattice UsageMap
  164. usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f)
  165. where f _ (OldFact x) (NewFact y)
  166. | x >= y = (NoChange, x)
  167. | otherwise = (SomeChange, y)
  168. -- We reuse the names 'gen' and 'kill', although we're doing something
  169. -- slightly different from the Dragon Book
  170. usageTransfer :: BwdTransfer (WithRegUsage CmmNode) UsageMap
  171. usageTransfer = mkBTransfer3 first middle last
  172. where first _ f = f
  173. middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap
  174. middle n f = gen_kill n f
  175. last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap
  176. -- Checking for CmmCall/CmmForeignCall is unnecessary, because
  177. -- spills/reloads have already occurred by the time we do this
  178. -- analysis.
  179. -- XXX Deprecated warning is puzzling: what label are we
  180. -- supposed to use?
  181. -- ToDo: With a bit more cleverness here, we can avoid
  182. -- disappointment and heartbreak associated with the inability
  183. -- to inline into CmmCall and CmmForeignCall by
  184. -- over-estimating the usage to be ManyUse.
  185. last n f = gen_kill n (joinOutFacts usageLattice n f)
  186. gen_kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
  187. gen_kill a = gen a . kill a
  188. gen :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
  189. gen a f = foldRegsUsed increaseUsage f a
  190. kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
  191. kill a f = foldRegsDefd delFromUFM f a
  192. increaseUsage f r = addToUFM_C combine f r SingleUse
  193. where combine _ _ = ManyUse
  194. usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap
  195. usageRewrite = mkBRewrite3 first middle last
  196. where first _ _ = return Nothing
  197. middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
  198. middle (Plain (CmmAssign (CmmLocal l) e)) f
  199. = return . Just
  200. $ case lookupUFM f l of
  201. Nothing -> emptyGraph
  202. Just usage -> mkMiddle (AssignLocal l e usage)
  203. middle _ _ = return Nothing
  204. last _ _ = return Nothing
  205. type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
  206. annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage)
  207. annotateUsage vanilla_g =
  208. let g = modifyGraph liftRegUsage vanilla_g
  209. in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
  210. analRewBwd usageLattice usageTransfer usageRewrite
  211. ----------------------------------------------------------------
  212. --- Assignment tracking
  213. -- The idea is to maintain a map of local registers do expressions,
  214. -- such that the value of that register is the same as the value of that
  215. -- expression at any given time. We can then do several things,
  216. -- as described by Assignment.
  217. -- Assignment describes the various optimizations that are valid
  218. -- at a given point in the program.
  219. data Assignment =
  220. -- This assignment can always be inlined. It is cheap or single-use.
  221. AlwaysInline CmmExpr
  222. -- This assignment should be sunk down to its first use. (This will
  223. -- increase code size if the register is used in multiple control flow
  224. -- paths, but won't increase execution time, and the reduction of
  225. -- register pressure is worth it, I think.)
  226. | AlwaysSink CmmExpr
  227. -- We cannot safely optimize occurrences of this local register. (This
  228. -- corresponds to top in the lattice structure.)
  229. | NeverOptimize
  230. -- Extract the expression that is being assigned to
  231. xassign :: Assignment -> Maybe CmmExpr
  232. xassign (AlwaysInline e) = Just e
  233. xassign (AlwaysSink e) = Just e
  234. xassign NeverOptimize = Nothing
  235. -- Extracts the expression, but only if they're the same constructor
  236. xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr)
  237. xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e')
  238. xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e')
  239. xassign2 _ = Nothing
  240. -- Note: We'd like to make decisions about "not optimizing" as soon as
  241. -- possible, because this will make running the transfer function more
  242. -- efficient.
  243. type AssignmentMap = UniqFM Assignment
  244. assignmentLattice :: DataflowLattice AssignmentMap
  245. assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
  246. where add _ (OldFact old) (NewFact new)
  247. = case (old, new) of
  248. (NeverOptimize, _) -> (NoChange, NeverOptimize)
  249. (_, NeverOptimize) -> (SomeChange, NeverOptimize)
  250. (xassign2 -> Just (e, e'))
  251. | e == e' -> (NoChange, old)
  252. | otherwise -> (SomeChange, NeverOptimize)
  253. _ -> (SomeChange, NeverOptimize)
  254. -- Deletes sinks from assignment map, because /this/ is the place
  255. -- where it will be sunk to.
  256. deleteSinks :: UserOfLocalRegs n => n -> AssignmentMap -> AssignmentMap
  257. deleteSinks n m = foldRegsUsed (adjustUFM f) m n
  258. where f (AlwaysSink _) = NeverOptimize
  259. f old = old
  260. -- Invalidates any expressions that use a register.
  261. invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap
  262. -- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
  263. invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance]
  264. where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize
  265. f _ _ m = m
  266. {- This requires the entire spine of the map to be continually rebuilt,
  267. - which causes crazy memory usage!
  268. invalidateUsersOf reg = mapUFM (invalidateUsers' reg)
  269. where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize
  270. invalidateUsers' _ old = old
  271. -}
  272. -- Note [foldUFM performance]
  273. -- These calls to fold UFM no longer leak memory, but they do cause
  274. -- pretty killer amounts of allocation. So they'll be something to
  275. -- optimize; we need an algorithmic change to prevent us from having to
  276. -- traverse the /entire/ map continually.
  277. middleAssignment :: WithRegUsage CmmNode O O -> AssignmentMap -> AssignmentMap
  278. -- Algorithm for annotated assignments:
  279. -- 1. Delete any sinking assignments that were used by this instruction
  280. -- 2. Add the assignment to our list of valid local assignments with
  281. -- the correct optimization policy.
  282. -- 3. Look for all assignments that reference that register and
  283. -- invalidate them.
  284. middleAssignment n@(AssignLocal r e usage) assign
  285. = invalidateUsersOf (CmmLocal r) . add . deleteSinks n $ assign
  286. where add m = addToUFM m r
  287. $ case usage of
  288. SingleUse -> AlwaysInline e
  289. ManyUse -> decide e
  290. decide CmmLit{} = AlwaysInline e
  291. decide CmmReg{} = AlwaysInline e
  292. decide CmmLoad{} = AlwaysSink e
  293. decide CmmStackSlot{} = AlwaysSink e
  294. decide CmmMachOp{} = AlwaysSink e
  295. -- We'll always inline simple operations on the global
  296. -- registers, to reduce register pressure: Sp - 4 or Hp - 8
  297. -- EZY: Justify this optimization more carefully.
  298. decide CmmRegOff{} = AlwaysInline e
  299. -- Algorithm for unannotated assignments of global registers:
  300. -- 1. Delete any sinking assignments that were used by this instruction
  301. -- 2. Look for all assignments that reference this register and
  302. -- invalidate them.
  303. middleAssignment (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign
  304. = invalidateUsersOf reg . deleteSinks n $ assign
  305. -- Algorithm for unannotated assignments of *local* registers: do
  306. -- nothing (it's a reload, so no state should have changed)
  307. middleAssignment (Plain (CmmAssign (CmmLocal _) _)) assign = assign
  308. -- Algorithm for stores:
  309. -- 1. Delete any sinking assignments that were used by this instruction
  310. -- 2. Look for all assignments that load from memory locations that
  311. -- were clobbered by this store and invalidate them.
  312. middleAssignment (Plain n@(CmmStore lhs rhs)) assign
  313. = let m = deleteSinks n assign
  314. in foldUFM_Directly f m m -- [foldUFM performance]
  315. where f u (xassign -> Just x) m | (lhs, rhs) `clobbers` (u, x) = addToUFM_Directly m u NeverOptimize
  316. f _ _ m = m
  317. {- Also leaky
  318. = mapUFM_Directly p . deleteSinks n $ assign
  319. -- ToDo: There's a missed opportunity here: even if a memory
  320. -- access we're attempting to sink gets clobbered at some
  321. -- location, it's still /better/ to sink it to right before the
  322. -- point where it gets clobbered. How might we do this?
  323. -- Unfortunately, it's too late to change the assignment...
  324. where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize
  325. p _ old = old
  326. -}
  327. -- Assumption: Unsafe foreign calls don't clobber memory
  328. -- Since foreign calls clobber caller saved registers, we need
  329. -- invalidate any assignments that reference those global registers.
  330. -- This is kind of expensive. (One way to optimize this might be to
  331. -- store extra information about expressions that allow this and other
  332. -- checks to be done cheaply.)
  333. middleAssignment (Plain n@(CmmUnsafeForeignCall{})) assign
  334. = deleteCallerSaves (foldRegsDefd (\m r -> addToUFM m r NeverOptimize) (deleteSinks n assign) n)
  335. where deleteCallerSaves m = foldUFM_Directly f m m
  336. f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize
  337. f _ _ m = m
  338. g (CmmReg (CmmGlobal r)) _ | callerSaves r = True
  339. g (CmmRegOff (CmmGlobal r) _) _ | callerSaves r = True
  340. g _ b = b
  341. middleAssignment (Plain (CmmComment {})) assign
  342. = assign
  343. -- Assumptions:
  344. -- * Writes using Hp do not overlap with any other memory locations
  345. -- (An important invariant being relied on here is that we only ever
  346. -- use Hp to allocate values on the heap, which appears to be the
  347. -- case given hpReg usage, and that our heap writing code doesn't
  348. -- do anything stupid like overlapping writes.)
  349. -- * Stack slots do not overlap with any other memory locations
  350. -- * Stack slots for different areas do not overlap
  351. -- * Stack slots within the same area and different offsets may
  352. -- overlap; we need to do a size check (see 'overlaps').
  353. -- * Register slots only overlap with themselves. (But this shouldn't
  354. -- happen in practice, because we'll fail to inline a reload across
  355. -- the next spill.)
  356. -- * Non stack-slot stores always conflict with each other. (This is
  357. -- not always the case; we could probably do something special for Hp)
  358. clobbers :: (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore
  359. -> (Unique, CmmExpr) -- (register, expression) that may be clobbered
  360. -> Bool
  361. clobbers (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False
  362. clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
  363. -- ToDo: Also catch MachOp case
  364. clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
  365. | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
  366. clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
  367. where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
  368. = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
  369. f (CmmLoad e _) = containsStackSlot e
  370. f (CmmMachOp _ es) = or (map f es)
  371. f _ = False
  372. -- Maybe there's an invariant broken if this actually ever
  373. -- returns True
  374. containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
  375. containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
  376. containsStackSlot (CmmStackSlot{}) = True
  377. containsStackSlot _ = False
  378. clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
  379. where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
  380. f _ = False
  381. clobbers _ (_, e) = f e
  382. where f (CmmLoad (CmmStackSlot _ _) _) = False
  383. f (CmmLoad{}) = True -- conservative
  384. f (CmmMachOp _ es) = or (map f es)
  385. f _ = False
  386. -- Check for memory overlapping.
  387. -- Diagram:
  388. -- 4 8 12
  389. -- s -w- o
  390. -- [ I32 ]
  391. -- [ F64 ]
  392. -- s' -w'- o'
  393. type CallSubArea = (AreaId, Int, Int) -- area, offset, width
  394. overlaps :: CallSubArea -> CallSubArea -> Bool
  395. overlaps (a, _, _) (a', _, _) | a /= a' = False
  396. overlaps (_, o, w) (_, o', w') =
  397. let s = o - w
  398. s' = o' - w'
  399. in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK
  400. lastAssignment :: WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
  401. lastAssignment (Plain (CmmCall _ (Just k) _ _ _)) assign = [(k, invalidateVolatile k assign)]
  402. lastAssignment (Plain (CmmForeignCall {succ=k})) assign = [(k, invalidateVolatile k assign)]
  403. lastAssignment l assign = map (\id -> (id, deleteSinks l assign)) $ successors l
  404. -- Invalidates any expressions that have volatile contents: essentially,
  405. -- all terminals volatile except for literals and loads of stack slots
  406. -- that do not correspond to the call area for 'k' (the current call
  407. -- area is volatile because overflow return parameters may be written
  408. -- there.)
  409. -- Note: mapUFM could be expensive, but hopefully block boundaries
  410. -- aren't too common. If it is a problem, replace with something more
  411. -- clever.
  412. invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap
  413. invalidateVolatile k m = mapUFM p m
  414. where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
  415. where exp CmmLit{} = True
  416. exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _)
  417. | k' == k = False
  418. exp (CmmLoad (CmmStackSlot _ _) _) = True
  419. exp (CmmMachOp _ es) = and (map exp es)
  420. exp _ = False
  421. p _ = NeverOptimize -- probably shouldn't happen with AlwaysSink
  422. assignmentTransfer :: FwdTransfer (WithRegUsage CmmNode) AssignmentMap
  423. assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment)
  424. -- Note [Soundness of inlining]
  425. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  426. -- In the Hoopl paper, the soundness condition on rewrite functions is
  427. -- described as follows:
  428. --
  429. -- "If it replaces a node n by a replacement graph g, then g must
  430. -- be observationally equivalent to n under the assumptions
  431. -- expressed by the incoming dataflow fact f. Moreover, analysis of
  432. -- g must produce output fact(s) that are at least as informative
  433. -- as the fact(s) produced by applying the transfer function to n."
  434. --
  435. -- We consider the second condition in more detail here. It says given
  436. -- the rewrite R(n, f) = g, then for any incoming fact f' consistent
  437. -- with f (f' >= f), then running the transfer function T(f', n) <= T(f', g).
  438. -- For inlining this is not necessarily the case:
  439. --
  440. -- n = "x = a + 2"
  441. -- f = f' = {a = y}
  442. -- g = "x = y + 2"
  443. -- T(f', n) = {x = a + 2, a = y}
  444. -- T(f', g) = {x = y + 2, a = y}
  445. --
  446. -- y + 2 and a + 2 are not obviously comparable, and a naive
  447. -- implementation of the lattice would say they are incomparable.
  448. -- At best, this means we may be over-conservative, at worst, it means
  449. -- we may not terminate.
  450. --
  451. -- However, in the original Lerner-Grove-Chambers paper, soundness and
  452. -- termination are separated, and only equivalence of facts is required
  453. -- for soundness. Monotonicity of the transfer function is not required
  454. -- for termination (as the calculation of least-upper-bound prevents
  455. -- this from being a problem), but it means we won't necessarily find
  456. -- the least-fixed point.
  457. -- Note [Coherency of annotations]
  458. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  459. -- Is it possible for our usage annotations to become invalid after we
  460. -- start performing transformations? As the usage info only provides
  461. -- an upper bound, we only need to consider cases where the usages of
  462. -- a register may increase due to transformations--e.g. any reference
  463. -- to a local register in an AlwaysInline or AlwaysSink instruction, whose
  464. -- originating assignment was single use (we don't care about the
  465. -- many use case, because it is the top of the lattice). But such a
  466. -- case is not possible, because we always inline any single use
  467. -- register. QED.
  468. --
  469. -- TODO: A useful lint option would be to check this invariant that
  470. -- there is never a local register in the assignment map that is
  471. -- single-use.
  472. -- Note [Soundness of store rewriting]
  473. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  474. -- Its soundness depends on the invariant that no assignment is made to
  475. -- the local register before its store is accessed. This is clearly
  476. -- true with unoptimized spill-reload code, and as the store will always
  477. -- be rewritten first (if possible), there is no chance of it being
  478. -- propagated down before getting written (possibly with incorrect
  479. -- values from the assignment map, due to reassignment of the local
  480. -- register.) This is probably not locally sound.
  481. assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap
  482. assignmentRewrite = mkFRewrite3 first middle last
  483. where
  484. first _ _ = return Nothing
  485. middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O
  486. middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m
  487. middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) l e u
  488. last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l
  489. -- Tuple is (inline?, reloads for sinks)
  490. precompute :: AssignmentMap -> CmmNode O x -> (Bool, [WithRegUsage CmmNode O O])
  491. precompute assign n = foldRegsUsed f (False, []) n -- duplicates are harmless
  492. where f (i, l) r = case lookupUFM assign r of
  493. Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l)
  494. Just (AlwaysInline _) -> (True, l)
  495. Just NeverOptimize -> (i, l)
  496. -- This case can show up when we have
  497. -- limited optimization fuel.
  498. Nothing -> (i, l)
  499. rewrite :: AssignmentMap
  500. -> (Bool, [WithRegUsage CmmNode O O])
  501. -> (WithRegUsage CmmNode O x -> Graph (WithRegUsage CmmNode) O x)
  502. -> CmmNode O x
  503. -> Maybe (Graph (WithRegUsage CmmNode) O x)
  504. rewrite _ (False, []) _ _ = Nothing
  505. -- Note [CmmCall Inline Hack]
  506. -- Conservative hack: don't do any inlining on what will
  507. -- be translated into an OldCmm CmmCalls, since the code
  508. -- produced here tends to be unproblematic and I need to write
  509. -- lint passes to ensure that we don't put anything in the
  510. -- arguments that could be construed as a global register by
  511. -- some later translation pass. (For example, slots will turn
  512. -- into dereferences of Sp). See [Register parameter passing].
  513. -- ToDo: Fix this up to only bug out if all inlines were for
  514. -- CmmExprs with global registers (we can't use the
  515. -- straightforward mapExpDeep call, in this case.) ToDo: We miss
  516. -- an opportunity here, where all possible inlinings should
  517. -- instead be sunk.
  518. rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack]
  519. rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n))
  520. rewriteLocal :: AssignmentMap
  521. -> (Bool, [WithRegUsage CmmNode O O])
  522. -> LocalReg -> CmmExpr -> RegUsage
  523. -> Maybe (Graph (WithRegUsage CmmNode) O O)
  524. rewriteLocal _ (False, []) _ _ _ = Nothing
  525. rewriteLocal assign (i, xs) l e u = Just $ mkMiddles xs <*> mkMiddle n'
  526. where n' = AssignLocal l e' u
  527. e' = if i then wrapRecExp (inlineExp assign) e else e
  528. -- inlinable check omitted, since we can always inline into
  529. -- assignments.
  530. inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x
  531. inline False _ n = n
  532. inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack]
  533. inline True assign n = mapExpDeep (inlineExp assign) n
  534. inlineExp assign old@(CmmReg (CmmLocal r))
  535. = case lookupUFM assign r of
  536. Just (AlwaysInline x) -> x
  537. _ -> old
  538. inlineExp assign old@(CmmRegOff (CmmLocal r) i)
  539. = case lookupUFM assign r of
  540. Just (AlwaysInline x) ->
  541. case x of
  542. (CmmRegOff r' i') -> CmmRegOff r' (i + i')
  543. _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
  544. where rep = typeWidth (localRegType r)
  545. _ -> old
  546. -- See Note [Soundness of store rewriting]
  547. inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _)
  548. = case lookupUFM assign r of
  549. Just (AlwaysInline x) -> x
  550. _ -> old
  551. inlineExp _ old = old
  552. inlinable :: CmmNode e x -> Bool
  553. inlinable (CmmCall{}) = False
  554. inlinable (CmmForeignCall{}) = False
  555. inlinable (CmmUnsafeForeignCall{}) = False
  556. inlinable _ = True
  557. -- Need to interleave this with inlining, because machop folding results
  558. -- in literals, which we can inline more aggressively, and inlining
  559. -- gives us opportunities for more folding. However, we don't need any
  560. -- facts to do MachOp folding.
  561. machOpFoldRewrite :: Platform -> FwdRewrite FuelUniqSM (WithRegUsage CmmNode) a
  562. machOpFoldRewrite platform = mkFRewrite3 first middle last
  563. where first _ _ = return Nothing
  564. middle :: WithRegUsage CmmNode O O -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O O
  565. middle (Plain m) _ = return (fmap (mkMiddle . Plain) (foldNode m))
  566. middle (AssignLocal l e r) _ = return (fmap f (wrapRecExpM foldExp e))
  567. where f e' = mkMiddle (AssignLocal l e' r)
  568. last :: WithRegUsage CmmNode O C -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O C
  569. last (Plain l) _ = return (fmap (mkLast . Plain) (foldNode l))
  570. foldNode :: CmmNode e x -> Maybe (CmmNode e x)
  571. foldNode n = mapExpDeepM foldExp n
  572. foldExp (CmmMachOp op args) = cmmMachOpFoldM platform op args
  573. foldExp _ = Nothing
  574. -- ToDo: Outputable instance for UsageMap and AssignmentMap