PageRenderTime 68ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/compiler/cmm/CmmRewriteAssignments.hs

http://github.com/ghc/ghc
Haskell | 628 lines | 264 code | 55 blank | 309 comment | 11 complexity | ce3de1e187d8bc513cd1fb5615752efa MD5 | raw file
Possible License(s): MIT, BSD-3-Clause, GPL-3.0
  1. {-# LANGUAGE ViewPatterns #-}
  2. {-# LANGUAGE GADTs #-}
  3. {-# LANGUAGE FlexibleContexts #-}
  4. {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
  5. -- This module implements generalized code motion for assignments to
  6. -- local registers, inlining and sinking when possible. It also does
  7. -- some amount of rewriting for stores to register slots, which are
  8. -- effectively equivalent to local registers.
  9. module CmmRewriteAssignments
  10. ( rewriteAssignments
  11. ) where
  12. import StgCmmUtils -- XXX layering violation
  13. import Cmm
  14. import CmmUtils
  15. import CmmOpt
  16. import DynFlags
  17. import UniqSupply
  18. import UniqFM
  19. import Unique
  20. import BlockId
  21. import Hoopl
  22. import Compiler.Hoopl ((<*>), mkMiddle, mkLast)
  23. import Data.Maybe
  24. import Control.Monad
  25. import Prelude hiding (succ, zip)
  26. ----------------------------------------------------------------
  27. --- Main function
  28. rewriteAssignments :: DynFlags -> CmmGraph -> UniqSM CmmGraph
  29. rewriteAssignments dflags g = do
  30. -- Because we need to act on forwards and backwards information, we
  31. -- first perform usage analysis and bake this information into the
  32. -- graph (backwards transform), and then do a forwards transform
  33. -- to actually perform inlining and sinking.
  34. g' <- annotateUsage dflags g
  35. g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $
  36. analRewFwd assignmentLattice
  37. (assignmentTransfer dflags)
  38. (assignmentRewrite dflags `thenFwdRw` machOpFoldRewrite dflags)
  39. return (modifyGraph eraseRegUsage g'')
  40. ----------------------------------------------------------------
  41. --- Usage information
  42. -- We decorate all register assignments with approximate usage
  43. -- information, that is, the maximum number of times the register is
  44. -- referenced while it is live along all outgoing control paths.
  45. -- This analysis provides a precise upper bound for usage, so if a
  46. -- register is never referenced, we can remove it, as that assignment is
  47. -- dead.
  48. --
  49. -- This analysis is very similar to liveness analysis; we just keep a
  50. -- little extra info. (Maybe we should move it to CmmLive, and subsume
  51. -- the old liveness analysis.)
  52. --
  53. -- There are a few subtleties here:
  54. --
  55. -- - If a register goes dead, and then becomes live again, the usages
  56. -- of the disjoint live range don't count towards the original range.
  57. --
  58. -- a = 1; // used once
  59. -- b = a;
  60. -- a = 2; // used once
  61. -- c = a;
  62. --
  63. -- - A register may be used multiple times, but these all reside in
  64. -- different control paths, such that any given execution only uses
  65. -- it once. In that case, the usage count may still be 1.
  66. --
  67. -- a = 1; // used once
  68. -- if (b) {
  69. -- c = a + 3;
  70. -- } else {
  71. -- c = a + 1;
  72. -- }
  73. --
  74. -- This policy corresponds to an inlining strategy that does not
  75. -- duplicate computation but may increase binary size.
  76. --
  77. -- - If we naively implement a usage count, we have a counting to
  78. -- infinity problem across joins. Furthermore, knowing that
  79. -- something is used 2 or more times in one runtime execution isn't
  80. -- particularly useful for optimizations (inlining may be beneficial,
  81. -- but there's no way of knowing that without register pressure
  82. -- information.)
  83. --
  84. -- while (...) {
  85. -- // first iteration, b used once
  86. -- // second iteration, b used twice
  87. -- // third iteration ...
  88. -- a = b;
  89. -- }
  90. -- // b used zero times
  91. --
  92. -- There is an orthogonal question, which is that for every runtime
  93. -- execution, the register may be used only once, but if we inline it
  94. -- in every conditional path, the binary size might increase a lot.
  95. -- But tracking this information would be tricky, because it violates
  96. -- the finite lattice restriction Hoopl requires for termination;
  97. -- we'd thus need to supply an alternate proof, which is probably
  98. -- something we should defer until we actually have an optimization
  99. -- that would take advantage of this. (This might also interact
  100. -- strangely with liveness information.)
  101. --
  102. -- a = ...;
  103. -- // a is used one time, but in X different paths
  104. -- case (b) of
  105. -- 1 -> ... a ...
  106. -- 2 -> ... a ...
  107. -- 3 -> ... a ...
  108. -- ...
  109. --
  110. -- - Memory stores to local register slots (CmmStore (CmmStackSlot
  111. -- (LocalReg _) 0) _) have similar behavior to local registers,
  112. -- in that these locations are all disjoint from each other. Thus,
  113. -- we attempt to inline them too. Note that because these are only
  114. -- generated as part of the spilling process, most of the time this
  115. -- will refer to a local register and the assignment will immediately
  116. -- die on the subsequent call. However, if we manage to replace that
  117. -- local register with a memory location, it means that we've managed
  118. -- to preserve a value on the stack without having to move it to
  119. -- another memory location again! We collect usage information just
  120. -- to be safe in case extra computation is involved.
  121. data RegUsage = SingleUse | ManyUse
  122. deriving (Ord, Eq, Show)
  123. -- Absence in map = ZeroUse
  124. {-
  125. -- minBound is bottom, maxBound is top, least-upper-bound is max
  126. -- ToDo: Put this in Hoopl. Note that this isn't as useful as I
  127. -- originally hoped, because you usually want to leave out the bottom
  128. -- element when you have things like this put in maps. Maybe f is
  129. -- useful on its own as a combining function.
  130. boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a
  131. boundedOrdLattice n = DataflowLattice n minBound f
  132. where f _ (OldFact x) (NewFact y)
  133. | x >= y = (NoChange, x)
  134. | otherwise = (SomeChange, y)
  135. -}
  136. -- Custom node type we'll rewrite to. CmmAssign nodes to local
  137. -- registers are replaced with AssignLocal nodes.
  138. data WithRegUsage n e x where
  139. -- Plain will not contain CmmAssign nodes immediately after
  140. -- transformation, but as we rewrite assignments, we may have
  141. -- assignments here: these are assignments that should not be
  142. -- rewritten!
  143. Plain :: n e x -> WithRegUsage n e x
  144. AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O
  145. instance UserOfRegs LocalReg (n e x) => UserOfRegs LocalReg (WithRegUsage n e x) where
  146. foldRegsUsed dflags f z (Plain n) = foldRegsUsed dflags f z n
  147. foldRegsUsed dflags f z (AssignLocal _ e _) = foldRegsUsed dflags f z e
  148. instance DefinerOfRegs LocalReg (n e x) => DefinerOfRegs LocalReg (WithRegUsage n e x) where
  149. foldRegsDefd dflags f z (Plain n) = foldRegsDefd dflags f z n
  150. foldRegsDefd dflags f z (AssignLocal r _ _) = foldRegsDefd dflags f z r
  151. instance NonLocal n => NonLocal (WithRegUsage n) where
  152. entryLabel (Plain n) = entryLabel n
  153. successors (Plain n) = successors n
  154. liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x
  155. liftRegUsage = mapGraph Plain
  156. eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x
  157. eraseRegUsage = mapGraph f
  158. where f :: WithRegUsage CmmNode e x -> CmmNode e x
  159. f (AssignLocal l e _) = CmmAssign (CmmLocal l) e
  160. f (Plain n) = n
  161. type UsageMap = UniqFM RegUsage
  162. usageLattice :: DataflowLattice UsageMap
  163. usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f)
  164. where f _ (OldFact x) (NewFact y)
  165. | x >= y = (NoChange, x)
  166. | otherwise = (SomeChange, y)
  167. -- We reuse the names 'gen' and 'kill', although we're doing something
  168. -- slightly different from the Dragon Book
  169. usageTransfer :: DynFlags -> BwdTransfer (WithRegUsage CmmNode) UsageMap
  170. usageTransfer dflags = mkBTransfer3 first middle last
  171. where first _ f = f
  172. middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap
  173. middle n f = gen_kill n f
  174. last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap
  175. -- Checking for CmmCall/CmmForeignCall is unnecessary, because
  176. -- spills/reloads have already occurred by the time we do this
  177. -- analysis.
  178. -- XXX Deprecated warning is puzzling: what label are we
  179. -- supposed to use?
  180. -- ToDo: With a bit more cleverness here, we can avoid
  181. -- disappointment and heartbreak associated with the inability
  182. -- to inline into CmmCall and CmmForeignCall by
  183. -- over-estimating the usage to be ManyUse.
  184. last n f = gen_kill n (joinOutFacts usageLattice n f)
  185. gen_kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
  186. gen_kill a = gen a . kill a
  187. gen :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
  188. gen a f = foldLocalRegsUsed dflags increaseUsage f a
  189. kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
  190. kill a f = foldLocalRegsDefd dflags delFromUFM f a
  191. increaseUsage f r = addToUFM_C combine f r SingleUse
  192. where combine _ _ = ManyUse
  193. usageRewrite :: BwdRewrite UniqSM (WithRegUsage CmmNode) UsageMap
  194. usageRewrite = mkBRewrite3 first middle last
  195. where first _ _ = return Nothing
  196. middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
  197. middle (Plain (CmmAssign (CmmLocal l) e)) f
  198. = return . Just
  199. $ case lookupUFM f l of
  200. Nothing -> emptyGraph
  201. Just usage -> mkMiddle (AssignLocal l e usage)
  202. middle _ _ = return Nothing
  203. last _ _ = return Nothing
  204. type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
  205. annotateUsage :: DynFlags -> CmmGraph -> UniqSM (CmmGraphWithRegUsage)
  206. annotateUsage dflags vanilla_g =
  207. let g = modifyGraph liftRegUsage vanilla_g
  208. in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
  209. analRewBwd usageLattice (usageTransfer dflags) usageRewrite
  210. ----------------------------------------------------------------
  211. --- Assignment tracking
  212. -- The idea is to maintain a map of local registers do expressions,
  213. -- such that the value of that register is the same as the value of that
  214. -- expression at any given time. We can then do several things,
  215. -- as described by Assignment.
  216. -- Assignment describes the various optimizations that are valid
  217. -- at a given point in the program.
  218. data Assignment =
  219. -- This assignment can always be inlined. It is cheap or single-use.
  220. AlwaysInline CmmExpr
  221. -- This assignment should be sunk down to its first use. (This will
  222. -- increase code size if the register is used in multiple control flow
  223. -- paths, but won't increase execution time, and the reduction of
  224. -- register pressure is worth it, I think.)
  225. | AlwaysSink CmmExpr
  226. -- We cannot safely optimize occurrences of this local register. (This
  227. -- corresponds to top in the lattice structure.)
  228. | NeverOptimize
  229. -- Extract the expression that is being assigned to
  230. xassign :: Assignment -> Maybe CmmExpr
  231. xassign (AlwaysInline e) = Just e
  232. xassign (AlwaysSink e) = Just e
  233. xassign NeverOptimize = Nothing
  234. -- Extracts the expression, but only if they're the same constructor
  235. xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr)
  236. xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e')
  237. xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e')
  238. xassign2 _ = Nothing
  239. -- Note: We'd like to make decisions about "not optimizing" as soon as
  240. -- possible, because this will make running the transfer function more
  241. -- efficient.
  242. type AssignmentMap = UniqFM Assignment
  243. assignmentLattice :: DataflowLattice AssignmentMap
  244. assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
  245. where add _ (OldFact old) (NewFact new)
  246. = case (old, new) of
  247. (NeverOptimize, _) -> (NoChange, NeverOptimize)
  248. (_, NeverOptimize) -> (SomeChange, NeverOptimize)
  249. (xassign2 -> Just (e, e'))
  250. | e == e' -> (NoChange, old)
  251. | otherwise -> (SomeChange, NeverOptimize)
  252. _ -> (SomeChange, NeverOptimize)
  253. -- Deletes sinks from assignment map, because /this/ is the place
  254. -- where it will be sunk to.
  255. deleteSinks :: UserOfRegs LocalReg n => DynFlags -> n -> AssignmentMap -> AssignmentMap
  256. deleteSinks dflags n m = foldLocalRegsUsed dflags (adjustUFM f) m n
  257. where f (AlwaysSink _) = NeverOptimize
  258. f old = old
  259. -- Invalidates any expressions that use a register.
  260. invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap
  261. -- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
  262. invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance]
  263. where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize
  264. f _ _ m = m
  265. {- This requires the entire spine of the map to be continually rebuilt,
  266. - which causes crazy memory usage!
  267. invalidateUsersOf reg = mapUFM (invalidateUsers' reg)
  268. where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize
  269. invalidateUsers' _ old = old
  270. -}
  271. -- Note [foldUFM performance]
  272. -- These calls to fold UFM no longer leak memory, but they do cause
  273. -- pretty killer amounts of allocation. So they'll be something to
  274. -- optimize; we need an algorithmic change to prevent us from having to
  275. -- traverse the /entire/ map continually.
  276. middleAssignment :: DynFlags -> WithRegUsage CmmNode O O -> AssignmentMap
  277. -> AssignmentMap
  278. -- Algorithm for annotated assignments:
  279. -- 1. Delete any sinking assignments that were used by this instruction
  280. -- 2. Add the assignment to our list of valid local assignments with
  281. -- the correct optimization policy.
  282. -- 3. Look for all assignments that reference that register and
  283. -- invalidate them.
  284. middleAssignment dflags n@(AssignLocal r e usage) assign
  285. = invalidateUsersOf (CmmLocal r) . add . deleteSinks dflags n $ assign
  286. where add m = addToUFM m r
  287. $ case usage of
  288. SingleUse -> AlwaysInline e
  289. ManyUse -> decide e
  290. decide CmmLit{} = AlwaysInline e
  291. decide CmmReg{} = AlwaysInline e
  292. decide CmmLoad{} = AlwaysSink e
  293. decide CmmStackSlot{} = AlwaysSink e
  294. decide CmmMachOp{} = AlwaysSink e
  295. -- We'll always inline simple operations on the global
  296. -- registers, to reduce register pressure: Sp - 4 or Hp - 8
  297. -- EZY: Justify this optimization more carefully.
  298. decide CmmRegOff{} = AlwaysInline e
  299. -- Algorithm for unannotated assignments of global registers:
  300. -- 1. Delete any sinking assignments that were used by this instruction
  301. -- 2. Look for all assignments that reference this register and
  302. -- invalidate them.
  303. middleAssignment dflags (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign
  304. = invalidateUsersOf reg . deleteSinks dflags n $ assign
  305. -- Algorithm for unannotated assignments of *local* registers: do
  306. -- nothing (it's a reload, so no state should have changed)
  307. middleAssignment _ (Plain (CmmAssign (CmmLocal _) _)) assign = assign
  308. -- Algorithm for stores:
  309. -- 1. Delete any sinking assignments that were used by this instruction
  310. -- 2. Look for all assignments that load from memory locations that
  311. -- were clobbered by this store and invalidate them.
  312. middleAssignment dflags (Plain n@(CmmStore lhs rhs)) assign
  313. = let m = deleteSinks dflags n assign
  314. in foldUFM_Directly f m m -- [foldUFM performance]
  315. where f u (xassign -> Just x) m | clobbers dflags (lhs, rhs) (u, x) = addToUFM_Directly m u NeverOptimize
  316. f _ _ m = m
  317. {- Also leaky
  318. = mapUFM_Directly p . deleteSinks n $ assign
  319. -- ToDo: There's a missed opportunity here: even if a memory
  320. -- access we're attempting to sink gets clobbered at some
  321. -- location, it's still /better/ to sink it to right before the
  322. -- point where it gets clobbered. How might we do this?
  323. -- Unfortunately, it's too late to change the assignment...
  324. where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize
  325. p _ old = old
  326. -}
  327. -- Assumption: Unsafe foreign calls don't clobber memory
  328. -- Since foreign calls clobber caller saved registers, we need
  329. -- invalidate any assignments that reference those global registers.
  330. -- This is kind of expensive. (One way to optimize this might be to
  331. -- store extra information about expressions that allow this and other
  332. -- checks to be done cheaply.)
  333. middleAssignment dflags (Plain n@(CmmUnsafeForeignCall{})) assign
  334. = deleteCallerSaves (foldLocalRegsDefd dflags (\m r -> addToUFM m r NeverOptimize) (deleteSinks dflags n assign) n)
  335. where deleteCallerSaves m = foldUFM_Directly f m m
  336. f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize
  337. f _ _ m = m
  338. g (CmmReg (CmmGlobal r)) _ | callerSaves platform r = True
  339. g (CmmRegOff (CmmGlobal r) _) _ | callerSaves platform r = True
  340. g _ b = b
  341. platform = targetPlatform dflags
  342. middleAssignment _ (Plain (CmmComment {})) assign
  343. = assign
  344. -- Assumptions:
  345. -- * Writes using Hp do not overlap with any other memory locations
  346. -- (An important invariant being relied on here is that we only ever
  347. -- use Hp to allocate values on the heap, which appears to be the
  348. -- case given hpReg usage, and that our heap writing code doesn't
  349. -- do anything stupid like overlapping writes.)
  350. -- * Stack slots do not overlap with any other memory locations
  351. -- * Stack slots for different areas do not overlap
  352. -- * Stack slots within the same area and different offsets may
  353. -- overlap; we need to do a size check (see 'overlaps').
  354. -- * Register slots only overlap with themselves. (But this shouldn't
  355. -- happen in practice, because we'll fail to inline a reload across
  356. -- the next spill.)
  357. -- * Non stack-slot stores always conflict with each other. (This is
  358. -- not always the case; we could probably do something special for Hp)
  359. clobbers :: DynFlags
  360. -> (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore
  361. -> (Unique, CmmExpr) -- (register, expression) that may be clobbered
  362. -> Bool
  363. clobbers _ (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False
  364. clobbers _ (CmmReg (CmmGlobal Hp), _) (_, _) = False
  365. -- ToDo: Also catch MachOp case
  366. clobbers _ (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
  367. | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
  368. clobbers dflags (CmmStackSlot a o, rhs) (_, expr) = f expr
  369. where f (CmmLoad (CmmStackSlot a' o') t)
  370. = (a, o, widthInBytes (cmmExprWidth dflags rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
  371. f (CmmLoad e _) = containsStackSlot e
  372. f (CmmMachOp _ es) = or (map f es)
  373. f _ = False
  374. -- Maybe there's an invariant broken if this actually ever
  375. -- returns True
  376. containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
  377. containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
  378. containsStackSlot (CmmStackSlot{}) = True
  379. containsStackSlot _ = False
  380. clobbers _ _ (_, e) = f e
  381. where f (CmmLoad (CmmStackSlot _ _) _) = False
  382. f (CmmLoad{}) = True -- conservative
  383. f (CmmMachOp _ es) = or (map f es)
  384. f _ = False
  385. -- Check for memory overlapping.
  386. -- Diagram:
  387. -- 4 8 12
  388. -- s -w- o
  389. -- [ I32 ]
  390. -- [ F64 ]
  391. -- s' -w'- o'
  392. type CallSubArea = (Area, Int, Int) -- area, offset, width
  393. overlaps :: CallSubArea -> CallSubArea -> Bool
  394. overlaps (a, _, _) (a', _, _) | a /= a' = False
  395. overlaps (_, o, w) (_, o', w') =
  396. let s = o - w
  397. s' = o' - w'
  398. in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK
  399. lastAssignment :: DynFlags -> WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
  400. lastAssignment _ (Plain (CmmCall _ (Just k) _ _ _ _)) assign = [(k, invalidateVolatile k assign)]
  401. lastAssignment _ (Plain (CmmForeignCall {succ=k})) assign = [(k, invalidateVolatile k assign)]
  402. lastAssignment dflags l assign = map (\id -> (id, deleteSinks dflags l assign)) $ successors l
  403. -- Invalidates any expressions that have volatile contents: essentially,
  404. -- all terminals volatile except for literals and loads of stack slots
  405. -- that do not correspond to the call area for 'k' (the current call
  406. -- area is volatile because overflow return parameters may be written
  407. -- there.)
  408. -- Note: mapUFM could be expensive, but hopefully block boundaries
  409. -- aren't too common. If it is a problem, replace with something more
  410. -- clever.
  411. invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap
  412. invalidateVolatile k m = mapUFM p m
  413. where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
  414. where exp CmmLit{} = True
  415. exp (CmmLoad (CmmStackSlot (Young k') _) _)
  416. | k' == k = False
  417. exp (CmmLoad (CmmStackSlot _ _) _) = True
  418. exp (CmmMachOp _ es) = and (map exp es)
  419. exp _ = False
  420. p _ = NeverOptimize -- probably shouldn't happen with AlwaysSink
  421. assignmentTransfer :: DynFlags
  422. -> FwdTransfer (WithRegUsage CmmNode) AssignmentMap
  423. assignmentTransfer dflags
  424. = mkFTransfer3 (flip const)
  425. (middleAssignment dflags)
  426. ((mkFactBase assignmentLattice .) . lastAssignment dflags)
  427. -- Note [Soundness of inlining]
  428. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  429. -- In the Hoopl paper, the soundness condition on rewrite functions is
  430. -- described as follows:
  431. --
  432. -- "If it replaces a node n by a replacement graph g, then g must
  433. -- be observationally equivalent to n under the assumptions
  434. -- expressed by the incoming dataflow fact f. Moreover, analysis of
  435. -- g must produce output fact(s) that are at least as informative
  436. -- as the fact(s) produced by applying the transfer function to n."
  437. --
  438. -- We consider the second condition in more detail here. It says given
  439. -- the rewrite R(n, f) = g, then for any incoming fact f' consistent
  440. -- with f (f' >= f), then running the transfer function T(f', n) <= T(f', g).
  441. -- For inlining this is not necessarily the case:
  442. --
  443. -- n = "x = a + 2"
  444. -- f = f' = {a = y}
  445. -- g = "x = y + 2"
  446. -- T(f', n) = {x = a + 2, a = y}
  447. -- T(f', g) = {x = y + 2, a = y}
  448. --
  449. -- y + 2 and a + 2 are not obviously comparable, and a naive
  450. -- implementation of the lattice would say they are incomparable.
  451. -- At best, this means we may be over-conservative, at worst, it means
  452. -- we may not terminate.
  453. --
  454. -- However, in the original Lerner-Grove-Chambers paper, soundness and
  455. -- termination are separated, and only equivalence of facts is required
  456. -- for soundness. Monotonicity of the transfer function is not required
  457. -- for termination (as the calculation of least-upper-bound prevents
  458. -- this from being a problem), but it means we won't necessarily find
  459. -- the least-fixed point.
  460. -- Note [Coherency of annotations]
  461. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  462. -- Is it possible for our usage annotations to become invalid after we
  463. -- start performing transformations? As the usage info only provides
  464. -- an upper bound, we only need to consider cases where the usages of
  465. -- a register may increase due to transformations--e.g. any reference
  466. -- to a local register in an AlwaysInline or AlwaysSink instruction, whose
  467. -- originating assignment was single use (we don't care about the
  468. -- many use case, because it is the top of the lattice). But such a
  469. -- case is not possible, because we always inline any single use
  470. -- register. QED.
  471. --
  472. -- TODO: A useful lint option would be to check this invariant that
  473. -- there is never a local register in the assignment map that is
  474. -- single-use.
  475. -- Note [Soundness of store rewriting]
  476. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  477. -- Its soundness depends on the invariant that no assignment is made to
  478. -- the local register before its store is accessed. This is clearly
  479. -- true with unoptimized spill-reload code, and as the store will always
  480. -- be rewritten first (if possible), there is no chance of it being
  481. -- propagated down before getting written (possibly with incorrect
  482. -- values from the assignment map, due to reassignment of the local
  483. -- register.) This is probably not locally sound.
  484. assignmentRewrite :: DynFlags -> FwdRewrite UniqSM (WithRegUsage CmmNode) AssignmentMap
  485. assignmentRewrite dflags = mkFRewrite3 first middle last
  486. where
  487. first _ _ = return Nothing
  488. middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O
  489. middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m
  490. middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) l e u
  491. last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l
  492. -- Tuple is (inline?, reloads for sinks)
  493. precompute :: AssignmentMap -> CmmNode O x -> (Bool, [WithRegUsage CmmNode O O])
  494. precompute assign n = foldLocalRegsUsed dflags f (False, []) n -- duplicates are harmless
  495. where f (i, l) r = case lookupUFM assign r of
  496. Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l)
  497. Just (AlwaysInline _) -> (True, l)
  498. Just NeverOptimize -> (i, l)
  499. -- This case can show up when we have
  500. -- limited optimization fuel.
  501. Nothing -> (i, l)
  502. rewrite :: AssignmentMap
  503. -> (Bool, [WithRegUsage CmmNode O O])
  504. -> (WithRegUsage CmmNode O x -> Graph (WithRegUsage CmmNode) O x)
  505. -> CmmNode O x
  506. -> Maybe (Graph (WithRegUsage CmmNode) O x)
  507. rewrite _ (False, []) _ _ = Nothing
  508. -- Note [CmmCall Inline Hack]
  509. -- Conservative hack: don't do any inlining on what will
  510. -- be translated into an OldCmm CmmCalls, since the code
  511. -- produced here tends to be unproblematic and I need to write
  512. -- lint passes to ensure that we don't put anything in the
  513. -- arguments that could be construed as a global register by
  514. -- some later translation pass. (For example, slots will turn
  515. -- into dereferences of Sp). See [Register parameter passing].
  516. -- ToDo: Fix this up to only bug out if all inlines were for
  517. -- CmmExprs with global registers (we can't use the
  518. -- straightforward mapExpDeep call, in this case.) ToDo: We miss
  519. -- an opportunity here, where all possible inlinings should
  520. -- instead be sunk.
  521. rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack]
  522. rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n))
  523. rewriteLocal :: AssignmentMap
  524. -> (Bool, [WithRegUsage CmmNode O O])
  525. -> LocalReg -> CmmExpr -> RegUsage
  526. -> Maybe (Graph (WithRegUsage CmmNode) O O)
  527. rewriteLocal _ (False, []) _ _ _ = Nothing
  528. rewriteLocal assign (i, xs) l e u = Just $ mkMiddles xs <*> mkMiddle n'
  529. where n' = AssignLocal l e' u
  530. e' = if i then wrapRecExp (inlineExp assign) e else e
  531. -- inlinable check omitted, since we can always inline into
  532. -- assignments.
  533. inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x
  534. inline False _ n = n
  535. inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack]
  536. inline True assign n = mapExpDeep (inlineExp assign) n
  537. inlineExp assign old@(CmmReg (CmmLocal r))
  538. = case lookupUFM assign r of
  539. Just (AlwaysInline x) -> x
  540. _ -> old
  541. inlineExp assign old@(CmmRegOff (CmmLocal r) i)
  542. = case lookupUFM assign r of
  543. Just (AlwaysInline x) ->
  544. case x of
  545. (CmmRegOff r' i') -> CmmRegOff r' (i + i')
  546. _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
  547. where rep = typeWidth (localRegType r)
  548. _ -> old
  549. -- See Note [Soundness of store rewriting]
  550. inlineExp _ old = old
  551. inlinable :: CmmNode e x -> Bool
  552. inlinable (CmmCall{}) = False
  553. inlinable (CmmForeignCall{}) = False
  554. inlinable (CmmUnsafeForeignCall{}) = False
  555. inlinable _ = True
  556. -- Need to interleave this with inlining, because machop folding results
  557. -- in literals, which we can inline more aggressively, and inlining
  558. -- gives us opportunities for more folding. However, we don't need any
  559. -- facts to do MachOp folding.
  560. machOpFoldRewrite :: DynFlags -> FwdRewrite UniqSM (WithRegUsage CmmNode) a
  561. machOpFoldRewrite dflags = mkFRewrite3 first middle last
  562. where first _ _ = return Nothing
  563. middle :: WithRegUsage CmmNode O O -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O O
  564. middle (Plain m) _ = return (fmap (mkMiddle . Plain) (foldNode m))
  565. middle (AssignLocal l e r) _ = return (fmap f (wrapRecExpM foldExp e))
  566. where f e' = mkMiddle (AssignLocal l e' r)
  567. last :: WithRegUsage CmmNode O C -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O C
  568. last (Plain l) _ = return (fmap (mkLast . Plain) (foldNode l))
  569. foldNode :: CmmNode e x -> Maybe (CmmNode e x)
  570. foldNode n = mapExpDeepM foldExp n
  571. foldExp (CmmMachOp op args) = cmmMachOpFoldM dflags op args
  572. foldExp _ = Nothing
  573. -- ToDo: Outputable instance for UsageMap and AssignmentMap