PageRenderTime 58ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/compiler/cmm/CmmRewriteAssignments.hs

https://github.com/dorchard/ghc
Haskell | 631 lines | 264 code | 56 blank | 311 comment | 11 complexity | 42f36ac1c662e9e592466e0cc2a613cf MD5 | raw file
  1. {-# LANGUAGE ViewPatterns #-}
  2. {-# LANGUAGE GADTs #-}
  3. {-# LANGUAGE FlexibleContexts #-}
  4. {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
  5. -- TODO: Get rid of this flag:
  6. {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
  7. -- This module implements generalized code motion for assignments to
  8. -- local registers, inlining and sinking when possible. It also does
  9. -- some amount of rewriting for stores to register slots, which are
  10. -- effectively equivalent to local registers.
  11. module CmmRewriteAssignments
  12. ( rewriteAssignments
  13. ) where
  14. import StgCmmUtils -- XXX layering violation
  15. import Cmm
  16. import CmmUtils
  17. import CmmOpt
  18. import DynFlags
  19. import UniqSupply
  20. import UniqFM
  21. import Unique
  22. import BlockId
  23. import Hoopl
  24. import Compiler.Hoopl ((<*>), mkMiddle, mkLast)
  25. import Data.Maybe
  26. import Control.Monad
  27. import Prelude hiding (succ, zip)
  28. ----------------------------------------------------------------
  29. --- Main function
  30. rewriteAssignments :: DynFlags -> CmmGraph -> UniqSM CmmGraph
  31. rewriteAssignments dflags g = do
  32. -- Because we need to act on forwards and backwards information, we
  33. -- first perform usage analysis and bake this information into the
  34. -- graph (backwards transform), and then do a forwards transform
  35. -- to actually perform inlining and sinking.
  36. g' <- annotateUsage dflags g
  37. g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $
  38. analRewFwd assignmentLattice
  39. (assignmentTransfer dflags)
  40. (assignmentRewrite dflags `thenFwdRw` machOpFoldRewrite dflags)
  41. return (modifyGraph eraseRegUsage g'')
  42. ----------------------------------------------------------------
  43. --- Usage information
  44. -- We decorate all register assignments with approximate usage
  45. -- information, that is, the maximum number of times the register is
  46. -- referenced while it is live along all outgoing control paths.
  47. -- This analysis provides a precise upper bound for usage, so if a
  48. -- register is never referenced, we can remove it, as that assignment is
  49. -- dead.
  50. --
  51. -- This analysis is very similar to liveness analysis; we just keep a
  52. -- little extra info. (Maybe we should move it to CmmLive, and subsume
  53. -- the old liveness analysis.)
  54. --
  55. -- There are a few subtleties here:
  56. --
  57. -- - If a register goes dead, and then becomes live again, the usages
  58. -- of the disjoint live range don't count towards the original range.
  59. --
  60. -- a = 1; // used once
  61. -- b = a;
  62. -- a = 2; // used once
  63. -- c = a;
  64. --
  65. -- - A register may be used multiple times, but these all reside in
  66. -- different control paths, such that any given execution only uses
  67. -- it once. In that case, the usage count may still be 1.
  68. --
  69. -- a = 1; // used once
  70. -- if (b) {
  71. -- c = a + 3;
  72. -- } else {
  73. -- c = a + 1;
  74. -- }
  75. --
  76. -- This policy corresponds to an inlining strategy that does not
  77. -- duplicate computation but may increase binary size.
  78. --
  79. -- - If we naively implement a usage count, we have a counting to
  80. -- infinity problem across joins. Furthermore, knowing that
  81. -- something is used 2 or more times in one runtime execution isn't
  82. -- particularly useful for optimizations (inlining may be beneficial,
  83. -- but there's no way of knowing that without register pressure
  84. -- information.)
  85. --
  86. -- while (...) {
  87. -- // first iteration, b used once
  88. -- // second iteration, b used twice
  89. -- // third iteration ...
  90. -- a = b;
  91. -- }
  92. -- // b used zero times
  93. --
  94. -- There is an orthogonal question, which is that for every runtime
  95. -- execution, the register may be used only once, but if we inline it
  96. -- in every conditional path, the binary size might increase a lot.
  97. -- But tracking this information would be tricky, because it violates
  98. -- the finite lattice restriction Hoopl requires for termination;
  99. -- we'd thus need to supply an alternate proof, which is probably
  100. -- something we should defer until we actually have an optimization
  101. -- that would take advantage of this. (This might also interact
  102. -- strangely with liveness information.)
  103. --
  104. -- a = ...;
  105. -- // a is used one time, but in X different paths
  106. -- case (b) of
  107. -- 1 -> ... a ...
  108. -- 2 -> ... a ...
  109. -- 3 -> ... a ...
  110. -- ...
  111. --
  112. -- - Memory stores to local register slots (CmmStore (CmmStackSlot
  113. -- (LocalReg _) 0) _) have similar behavior to local registers,
  114. -- in that these locations are all disjoint from each other. Thus,
  115. -- we attempt to inline them too. Note that because these are only
  116. -- generated as part of the spilling process, most of the time this
  117. -- will refer to a local register and the assignment will immediately
  118. -- die on the subsequent call. However, if we manage to replace that
  119. -- local register with a memory location, it means that we've managed
  120. -- to preserve a value on the stack without having to move it to
  121. -- another memory location again! We collect usage information just
  122. -- to be safe in case extra computation is involved.
  123. data RegUsage = SingleUse | ManyUse
  124. deriving (Ord, Eq, Show)
  125. -- Absence in map = ZeroUse
  126. {-
  127. -- minBound is bottom, maxBound is top, least-upper-bound is max
  128. -- ToDo: Put this in Hoopl. Note that this isn't as useful as I
  129. -- originally hoped, because you usually want to leave out the bottom
  130. -- element when you have things like this put in maps. Maybe f is
  131. -- useful on its own as a combining function.
  132. boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a
  133. boundedOrdLattice n = DataflowLattice n minBound f
  134. where f _ (OldFact x) (NewFact y)
  135. | x >= y = (NoChange, x)
  136. | otherwise = (SomeChange, y)
  137. -}
  138. -- Custom node type we'll rewrite to. CmmAssign nodes to local
  139. -- registers are replaced with AssignLocal nodes.
  140. data WithRegUsage n e x where
  141. -- Plain will not contain CmmAssign nodes immediately after
  142. -- transformation, but as we rewrite assignments, we may have
  143. -- assignments here: these are assignments that should not be
  144. -- rewritten!
  145. Plain :: n e x -> WithRegUsage n e x
  146. AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O
  147. instance UserOfRegs LocalReg (n e x) => UserOfRegs LocalReg (WithRegUsage n e x) where
  148. foldRegsUsed dflags f z (Plain n) = foldRegsUsed dflags f z n
  149. foldRegsUsed dflags f z (AssignLocal _ e _) = foldRegsUsed dflags f z e
  150. instance DefinerOfRegs LocalReg (n e x) => DefinerOfRegs LocalReg (WithRegUsage n e x) where
  151. foldRegsDefd dflags f z (Plain n) = foldRegsDefd dflags f z n
  152. foldRegsDefd dflags f z (AssignLocal r _ _) = foldRegsDefd dflags f z r
  153. instance NonLocal n => NonLocal (WithRegUsage n) where
  154. entryLabel (Plain n) = entryLabel n
  155. successors (Plain n) = successors n
  156. liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x
  157. liftRegUsage = mapGraph Plain
  158. eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x
  159. eraseRegUsage = mapGraph f
  160. where f :: WithRegUsage CmmNode e x -> CmmNode e x
  161. f (AssignLocal l e _) = CmmAssign (CmmLocal l) e
  162. f (Plain n) = n
  163. type UsageMap = UniqFM RegUsage
  164. usageLattice :: DataflowLattice UsageMap
  165. usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f)
  166. where f _ (OldFact x) (NewFact y)
  167. | x >= y = (NoChange, x)
  168. | otherwise = (SomeChange, y)
  169. -- We reuse the names 'gen' and 'kill', although we're doing something
  170. -- slightly different from the Dragon Book
  171. usageTransfer :: DynFlags -> BwdTransfer (WithRegUsage CmmNode) UsageMap
  172. usageTransfer dflags = mkBTransfer3 first middle last
  173. where first _ f = f
  174. middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap
  175. middle n f = gen_kill n f
  176. last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap
  177. -- Checking for CmmCall/CmmForeignCall is unnecessary, because
  178. -- spills/reloads have already occurred by the time we do this
  179. -- analysis.
  180. -- XXX Deprecated warning is puzzling: what label are we
  181. -- supposed to use?
  182. -- ToDo: With a bit more cleverness here, we can avoid
  183. -- disappointment and heartbreak associated with the inability
  184. -- to inline into CmmCall and CmmForeignCall by
  185. -- over-estimating the usage to be ManyUse.
  186. last n f = gen_kill n (joinOutFacts usageLattice n f)
  187. gen_kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
  188. gen_kill a = gen a . kill a
  189. gen :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
  190. gen a f = foldLocalRegsUsed dflags increaseUsage f a
  191. kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
  192. kill a f = foldLocalRegsDefd dflags delFromUFM f a
  193. increaseUsage f r = addToUFM_C combine f r SingleUse
  194. where combine _ _ = ManyUse
  195. usageRewrite :: BwdRewrite UniqSM (WithRegUsage CmmNode) UsageMap
  196. usageRewrite = mkBRewrite3 first middle last
  197. where first _ _ = return Nothing
  198. middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
  199. middle (Plain (CmmAssign (CmmLocal l) e)) f
  200. = return . Just
  201. $ case lookupUFM f l of
  202. Nothing -> emptyGraph
  203. Just usage -> mkMiddle (AssignLocal l e usage)
  204. middle _ _ = return Nothing
  205. last _ _ = return Nothing
  206. type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
  207. annotateUsage :: DynFlags -> CmmGraph -> UniqSM (CmmGraphWithRegUsage)
  208. annotateUsage dflags vanilla_g =
  209. let g = modifyGraph liftRegUsage vanilla_g
  210. in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
  211. analRewBwd usageLattice (usageTransfer dflags) usageRewrite
  212. ----------------------------------------------------------------
  213. --- Assignment tracking
  214. -- The idea is to maintain a map of local registers do expressions,
  215. -- such that the value of that register is the same as the value of that
  216. -- expression at any given time. We can then do several things,
  217. -- as described by Assignment.
  218. -- Assignment describes the various optimizations that are valid
  219. -- at a given point in the program.
  220. data Assignment =
  221. -- This assignment can always be inlined. It is cheap or single-use.
  222. AlwaysInline CmmExpr
  223. -- This assignment should be sunk down to its first use. (This will
  224. -- increase code size if the register is used in multiple control flow
  225. -- paths, but won't increase execution time, and the reduction of
  226. -- register pressure is worth it, I think.)
  227. | AlwaysSink CmmExpr
  228. -- We cannot safely optimize occurrences of this local register. (This
  229. -- corresponds to top in the lattice structure.)
  230. | NeverOptimize
  231. -- Extract the expression that is being assigned to
  232. xassign :: Assignment -> Maybe CmmExpr
  233. xassign (AlwaysInline e) = Just e
  234. xassign (AlwaysSink e) = Just e
  235. xassign NeverOptimize = Nothing
  236. -- Extracts the expression, but only if they're the same constructor
  237. xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr)
  238. xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e')
  239. xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e')
  240. xassign2 _ = Nothing
  241. -- Note: We'd like to make decisions about "not optimizing" as soon as
  242. -- possible, because this will make running the transfer function more
  243. -- efficient.
  244. type AssignmentMap = UniqFM Assignment
  245. assignmentLattice :: DataflowLattice AssignmentMap
  246. assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
  247. where add _ (OldFact old) (NewFact new)
  248. = case (old, new) of
  249. (NeverOptimize, _) -> (NoChange, NeverOptimize)
  250. (_, NeverOptimize) -> (SomeChange, NeverOptimize)
  251. (xassign2 -> Just (e, e'))
  252. | e == e' -> (NoChange, old)
  253. | otherwise -> (SomeChange, NeverOptimize)
  254. _ -> (SomeChange, NeverOptimize)
  255. -- Deletes sinks from assignment map, because /this/ is the place
  256. -- where it will be sunk to.
  257. deleteSinks :: UserOfRegs LocalReg n => DynFlags -> n -> AssignmentMap -> AssignmentMap
  258. deleteSinks dflags n m = foldLocalRegsUsed dflags (adjustUFM f) m n
  259. where f (AlwaysSink _) = NeverOptimize
  260. f old = old
  261. -- Invalidates any expressions that use a register.
  262. invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap
  263. -- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
  264. invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance]
  265. where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize
  266. f _ _ m = m
  267. {- This requires the entire spine of the map to be continually rebuilt,
  268. - which causes crazy memory usage!
  269. invalidateUsersOf reg = mapUFM (invalidateUsers' reg)
  270. where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize
  271. invalidateUsers' _ old = old
  272. -}
  273. -- Note [foldUFM performance]
  274. -- These calls to fold UFM no longer leak memory, but they do cause
  275. -- pretty killer amounts of allocation. So they'll be something to
  276. -- optimize; we need an algorithmic change to prevent us from having to
  277. -- traverse the /entire/ map continually.
  278. middleAssignment :: DynFlags -> WithRegUsage CmmNode O O -> AssignmentMap
  279. -> AssignmentMap
  280. -- Algorithm for annotated assignments:
  281. -- 1. Delete any sinking assignments that were used by this instruction
  282. -- 2. Add the assignment to our list of valid local assignments with
  283. -- the correct optimization policy.
  284. -- 3. Look for all assignments that reference that register and
  285. -- invalidate them.
  286. middleAssignment dflags n@(AssignLocal r e usage) assign
  287. = invalidateUsersOf (CmmLocal r) . add . deleteSinks dflags n $ assign
  288. where add m = addToUFM m r
  289. $ case usage of
  290. SingleUse -> AlwaysInline e
  291. ManyUse -> decide e
  292. decide CmmLit{} = AlwaysInline e
  293. decide CmmReg{} = AlwaysInline e
  294. decide CmmLoad{} = AlwaysSink e
  295. decide CmmStackSlot{} = AlwaysSink e
  296. decide CmmMachOp{} = AlwaysSink e
  297. -- We'll always inline simple operations on the global
  298. -- registers, to reduce register pressure: Sp - 4 or Hp - 8
  299. -- EZY: Justify this optimization more carefully.
  300. decide CmmRegOff{} = AlwaysInline e
  301. -- Algorithm for unannotated assignments of global registers:
  302. -- 1. Delete any sinking assignments that were used by this instruction
  303. -- 2. Look for all assignments that reference this register and
  304. -- invalidate them.
  305. middleAssignment dflags (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign
  306. = invalidateUsersOf reg . deleteSinks dflags n $ assign
  307. -- Algorithm for unannotated assignments of *local* registers: do
  308. -- nothing (it's a reload, so no state should have changed)
  309. middleAssignment _ (Plain (CmmAssign (CmmLocal _) _)) assign = assign
  310. -- Algorithm for stores:
  311. -- 1. Delete any sinking assignments that were used by this instruction
  312. -- 2. Look for all assignments that load from memory locations that
  313. -- were clobbered by this store and invalidate them.
  314. middleAssignment dflags (Plain n@(CmmStore lhs rhs)) assign
  315. = let m = deleteSinks dflags n assign
  316. in foldUFM_Directly f m m -- [foldUFM performance]
  317. where f u (xassign -> Just x) m | clobbers dflags (lhs, rhs) (u, x) = addToUFM_Directly m u NeverOptimize
  318. f _ _ m = m
  319. {- Also leaky
  320. = mapUFM_Directly p . deleteSinks n $ assign
  321. -- ToDo: There's a missed opportunity here: even if a memory
  322. -- access we're attempting to sink gets clobbered at some
  323. -- location, it's still /better/ to sink it to right before the
  324. -- point where it gets clobbered. How might we do this?
  325. -- Unfortunately, it's too late to change the assignment...
  326. where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize
  327. p _ old = old
  328. -}
  329. -- Assumption: Unsafe foreign calls don't clobber memory
  330. -- Since foreign calls clobber caller saved registers, we need
  331. -- invalidate any assignments that reference those global registers.
  332. -- This is kind of expensive. (One way to optimize this might be to
  333. -- store extra information about expressions that allow this and other
  334. -- checks to be done cheaply.)
  335. middleAssignment dflags (Plain n@(CmmUnsafeForeignCall{})) assign
  336. = deleteCallerSaves (foldLocalRegsDefd dflags (\m r -> addToUFM m r NeverOptimize) (deleteSinks dflags n assign) n)
  337. where deleteCallerSaves m = foldUFM_Directly f m m
  338. f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize
  339. f _ _ m = m
  340. g (CmmReg (CmmGlobal r)) _ | callerSaves platform r = True
  341. g (CmmRegOff (CmmGlobal r) _) _ | callerSaves platform r = True
  342. g _ b = b
  343. platform = targetPlatform dflags
  344. middleAssignment _ (Plain (CmmComment {})) assign
  345. = assign
  346. -- Assumptions:
  347. -- * Writes using Hp do not overlap with any other memory locations
  348. -- (An important invariant being relied on here is that we only ever
  349. -- use Hp to allocate values on the heap, which appears to be the
  350. -- case given hpReg usage, and that our heap writing code doesn't
  351. -- do anything stupid like overlapping writes.)
  352. -- * Stack slots do not overlap with any other memory locations
  353. -- * Stack slots for different areas do not overlap
  354. -- * Stack slots within the same area and different offsets may
  355. -- overlap; we need to do a size check (see 'overlaps').
  356. -- * Register slots only overlap with themselves. (But this shouldn't
  357. -- happen in practice, because we'll fail to inline a reload across
  358. -- the next spill.)
  359. -- * Non stack-slot stores always conflict with each other. (This is
  360. -- not always the case; we could probably do something special for Hp)
  361. clobbers :: DynFlags
  362. -> (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore
  363. -> (Unique, CmmExpr) -- (register, expression) that may be clobbered
  364. -> Bool
  365. clobbers _ (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False
  366. clobbers _ (CmmReg (CmmGlobal Hp), _) (_, _) = False
  367. -- ToDo: Also catch MachOp case
  368. clobbers _ (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
  369. | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
  370. clobbers dflags (CmmStackSlot a o, rhs) (_, expr) = f expr
  371. where f (CmmLoad (CmmStackSlot a' o') t)
  372. = (a, o, widthInBytes (cmmExprWidth dflags rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
  373. f (CmmLoad e _) = containsStackSlot e
  374. f (CmmMachOp _ es) = or (map f es)
  375. f _ = False
  376. -- Maybe there's an invariant broken if this actually ever
  377. -- returns True
  378. containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
  379. containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
  380. containsStackSlot (CmmStackSlot{}) = True
  381. containsStackSlot _ = False
  382. clobbers _ _ (_, e) = f e
  383. where f (CmmLoad (CmmStackSlot _ _) _) = False
  384. f (CmmLoad{}) = True -- conservative
  385. f (CmmMachOp _ es) = or (map f es)
  386. f _ = False
  387. -- Check for memory overlapping.
  388. -- Diagram:
  389. -- 4 8 12
  390. -- s -w- o
  391. -- [ I32 ]
  392. -- [ F64 ]
  393. -- s' -w'- o'
  394. type CallSubArea = (Area, Int, Int) -- area, offset, width
  395. overlaps :: CallSubArea -> CallSubArea -> Bool
  396. overlaps (a, _, _) (a', _, _) | a /= a' = False
  397. overlaps (_, o, w) (_, o', w') =
  398. let s = o - w
  399. s' = o' - w'
  400. in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK
  401. lastAssignment :: DynFlags -> WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
  402. lastAssignment _ (Plain (CmmCall _ (Just k) _ _ _ _)) assign = [(k, invalidateVolatile k assign)]
  403. lastAssignment _ (Plain (CmmForeignCall {succ=k})) assign = [(k, invalidateVolatile k assign)]
  404. lastAssignment dflags l assign = map (\id -> (id, deleteSinks dflags l assign)) $ successors l
  405. -- Invalidates any expressions that have volatile contents: essentially,
  406. -- all terminals volatile except for literals and loads of stack slots
  407. -- that do not correspond to the call area for 'k' (the current call
  408. -- area is volatile because overflow return parameters may be written
  409. -- there.)
  410. -- Note: mapUFM could be expensive, but hopefully block boundaries
  411. -- aren't too common. If it is a problem, replace with something more
  412. -- clever.
  413. invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap
  414. invalidateVolatile k m = mapUFM p m
  415. where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
  416. where exp CmmLit{} = True
  417. exp (CmmLoad (CmmStackSlot (Young k') _) _)
  418. | k' == k = False
  419. exp (CmmLoad (CmmStackSlot _ _) _) = True
  420. exp (CmmMachOp _ es) = and (map exp es)
  421. exp _ = False
  422. p _ = NeverOptimize -- probably shouldn't happen with AlwaysSink
  423. assignmentTransfer :: DynFlags
  424. -> FwdTransfer (WithRegUsage CmmNode) AssignmentMap
  425. assignmentTransfer dflags
  426. = mkFTransfer3 (flip const)
  427. (middleAssignment dflags)
  428. ((mkFactBase assignmentLattice .) . lastAssignment dflags)
  429. -- Note [Soundness of inlining]
  430. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  431. -- In the Hoopl paper, the soundness condition on rewrite functions is
  432. -- described as follows:
  433. --
  434. -- "If it replaces a node n by a replacement graph g, then g must
  435. -- be observationally equivalent to n under the assumptions
  436. -- expressed by the incoming dataflow fact f. Moreover, analysis of
  437. -- g must produce output fact(s) that are at least as informative
  438. -- as the fact(s) produced by applying the transfer function to n."
  439. --
  440. -- We consider the second condition in more detail here. It says given
  441. -- the rewrite R(n, f) = g, then for any incoming fact f' consistent
  442. -- with f (f' >= f), then running the transfer function T(f', n) <= T(f', g).
  443. -- For inlining this is not necessarily the case:
  444. --
  445. -- n = "x = a + 2"
  446. -- f = f' = {a = y}
  447. -- g = "x = y + 2"
  448. -- T(f', n) = {x = a + 2, a = y}
  449. -- T(f', g) = {x = y + 2, a = y}
  450. --
  451. -- y + 2 and a + 2 are not obviously comparable, and a naive
  452. -- implementation of the lattice would say they are incomparable.
  453. -- At best, this means we may be over-conservative, at worst, it means
  454. -- we may not terminate.
  455. --
  456. -- However, in the original Lerner-Grove-Chambers paper, soundness and
  457. -- termination are separated, and only equivalence of facts is required
  458. -- for soundness. Monotonicity of the transfer function is not required
  459. -- for termination (as the calculation of least-upper-bound prevents
  460. -- this from being a problem), but it means we won't necessarily find
  461. -- the least-fixed point.
  462. -- Note [Coherency of annotations]
  463. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  464. -- Is it possible for our usage annotations to become invalid after we
  465. -- start performing transformations? As the usage info only provides
  466. -- an upper bound, we only need to consider cases where the usages of
  467. -- a register may increase due to transformations--e.g. any reference
  468. -- to a local register in an AlwaysInline or AlwaysSink instruction, whose
  469. -- originating assignment was single use (we don't care about the
  470. -- many use case, because it is the top of the lattice). But such a
  471. -- case is not possible, because we always inline any single use
  472. -- register. QED.
  473. --
  474. -- TODO: A useful lint option would be to check this invariant that
  475. -- there is never a local register in the assignment map that is
  476. -- single-use.
  477. -- Note [Soundness of store rewriting]
  478. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  479. -- Its soundness depends on the invariant that no assignment is made to
  480. -- the local register before its store is accessed. This is clearly
  481. -- true with unoptimized spill-reload code, and as the store will always
  482. -- be rewritten first (if possible), there is no chance of it being
  483. -- propagated down before getting written (possibly with incorrect
  484. -- values from the assignment map, due to reassignment of the local
  485. -- register.) This is probably not locally sound.
  486. assignmentRewrite :: DynFlags -> FwdRewrite UniqSM (WithRegUsage CmmNode) AssignmentMap
  487. assignmentRewrite dflags = mkFRewrite3 first middle last
  488. where
  489. first _ _ = return Nothing
  490. middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O
  491. middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m
  492. middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) l e u
  493. last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l
  494. -- Tuple is (inline?, reloads for sinks)
  495. precompute :: AssignmentMap -> CmmNode O x -> (Bool, [WithRegUsage CmmNode O O])
  496. precompute assign n = foldLocalRegsUsed dflags f (False, []) n -- duplicates are harmless
  497. where f (i, l) r = case lookupUFM assign r of
  498. Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l)
  499. Just (AlwaysInline _) -> (True, l)
  500. Just NeverOptimize -> (i, l)
  501. -- This case can show up when we have
  502. -- limited optimization fuel.
  503. Nothing -> (i, l)
  504. rewrite :: AssignmentMap
  505. -> (Bool, [WithRegUsage CmmNode O O])
  506. -> (WithRegUsage CmmNode O x -> Graph (WithRegUsage CmmNode) O x)
  507. -> CmmNode O x
  508. -> Maybe (Graph (WithRegUsage CmmNode) O x)
  509. rewrite _ (False, []) _ _ = Nothing
  510. -- Note [CmmCall Inline Hack]
  511. -- Conservative hack: don't do any inlining on what will
  512. -- be translated into an OldCmm CmmCalls, since the code
  513. -- produced here tends to be unproblematic and I need to write
  514. -- lint passes to ensure that we don't put anything in the
  515. -- arguments that could be construed as a global register by
  516. -- some later translation pass. (For example, slots will turn
  517. -- into dereferences of Sp). See [Register parameter passing].
  518. -- ToDo: Fix this up to only bug out if all inlines were for
  519. -- CmmExprs with global registers (we can't use the
  520. -- straightforward mapExpDeep call, in this case.) ToDo: We miss
  521. -- an opportunity here, where all possible inlinings should
  522. -- instead be sunk.
  523. rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack]
  524. rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n))
  525. rewriteLocal :: AssignmentMap
  526. -> (Bool, [WithRegUsage CmmNode O O])
  527. -> LocalReg -> CmmExpr -> RegUsage
  528. -> Maybe (Graph (WithRegUsage CmmNode) O O)
  529. rewriteLocal _ (False, []) _ _ _ = Nothing
  530. rewriteLocal assign (i, xs) l e u = Just $ mkMiddles xs <*> mkMiddle n'
  531. where n' = AssignLocal l e' u
  532. e' = if i then wrapRecExp (inlineExp assign) e else e
  533. -- inlinable check omitted, since we can always inline into
  534. -- assignments.
  535. inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x
  536. inline False _ n = n
  537. inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack]
  538. inline True assign n = mapExpDeep (inlineExp assign) n
  539. inlineExp assign old@(CmmReg (CmmLocal r))
  540. = case lookupUFM assign r of
  541. Just (AlwaysInline x) -> x
  542. _ -> old
  543. inlineExp assign old@(CmmRegOff (CmmLocal r) i)
  544. = case lookupUFM assign r of
  545. Just (AlwaysInline x) ->
  546. case x of
  547. (CmmRegOff r' i') -> CmmRegOff r' (i + i')
  548. _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
  549. where rep = typeWidth (localRegType r)
  550. _ -> old
  551. -- See Note [Soundness of store rewriting]
  552. inlineExp _ old = old
  553. inlinable :: CmmNode e x -> Bool
  554. inlinable (CmmCall{}) = False
  555. inlinable (CmmForeignCall{}) = False
  556. inlinable (CmmUnsafeForeignCall{}) = False
  557. inlinable _ = True
  558. -- Need to interleave this with inlining, because machop folding results
  559. -- in literals, which we can inline more aggressively, and inlining
  560. -- gives us opportunities for more folding. However, we don't need any
  561. -- facts to do MachOp folding.
  562. machOpFoldRewrite :: DynFlags -> FwdRewrite UniqSM (WithRegUsage CmmNode) a
  563. machOpFoldRewrite dflags = mkFRewrite3 first middle last
  564. where first _ _ = return Nothing
  565. middle :: WithRegUsage CmmNode O O -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O O
  566. middle (Plain m) _ = return (fmap (mkMiddle . Plain) (foldNode m))
  567. middle (AssignLocal l e r) _ = return (fmap f (wrapRecExpM foldExp e))
  568. where f e' = mkMiddle (AssignLocal l e' r)
  569. last :: WithRegUsage CmmNode O C -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O C
  570. last (Plain l) _ = return (fmap (mkLast . Plain) (foldNode l))
  571. foldNode :: CmmNode e x -> Maybe (CmmNode e x)
  572. foldNode n = mapExpDeepM foldExp n
  573. foldExp (CmmMachOp op args) = cmmMachOpFoldM dflags op args
  574. foldExp _ = Nothing
  575. -- ToDo: Outputable instance for UsageMap and AssignmentMap