PageRenderTime 62ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/compiler/cmm/CmmLayoutStack.hs

https://bitbucket.org/carter/ghc
Haskell | 1021 lines | 519 code | 156 blank | 346 comment | 12 complexity | 0d9e5a733546c97072c2def8837b1d83 MD5 | raw file
  1. {-# LANGUAGE RecordWildCards, GADTs #-}
  2. module CmmLayoutStack (
  3. cmmLayoutStack, setInfoTableStackMap
  4. ) where
  5. import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX layering violation
  6. import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX layering violation
  7. import StgCmmLayout ( entryCode ) -- XXX layering violation
  8. import Cmm
  9. import BlockId
  10. import CLabel
  11. import CmmUtils
  12. import MkGraph
  13. import Module
  14. import ForeignCall
  15. import CmmLive
  16. import CmmProcPoint
  17. import SMRep
  18. import Hoopl
  19. import UniqSupply
  20. import Maybes
  21. import UniqFM
  22. import Util
  23. import DynFlags
  24. import FastString
  25. import Outputable
  26. import qualified Data.Set as Set
  27. import Control.Monad.Fix
  28. import Data.Array as Array
  29. import Data.Bits
  30. import Data.List (nub)
  31. import Control.Monad (liftM)
  32. #include "HsVersions.h"
  33. data StackSlot = Occupied | Empty
  34. -- Occupied: a return address or part of an update frame
  35. instance Outputable StackSlot where
  36. ppr Occupied = ptext (sLit "XXX")
  37. ppr Empty = ptext (sLit "---")
  38. -- All stack locations are expressed as positive byte offsets from the
  39. -- "base", which is defined to be the address above the return address
  40. -- on the stack on entry to this CmmProc.
  41. --
  42. -- Lower addresses have higher StackLocs.
  43. --
  44. type StackLoc = ByteOff
  45. {-
  46. A StackMap describes the stack at any given point. At a continuation
  47. it has a particular layout, like this:
  48. | | <- base
  49. |-------------|
  50. | ret0 | <- base + 8
  51. |-------------|
  52. . upd frame . <- base + sm_ret_off
  53. |-------------|
  54. | |
  55. . vars .
  56. . (live/dead) .
  57. | | <- base + sm_sp - sm_args
  58. |-------------|
  59. | ret1 |
  60. . ret vals . <- base + sm_sp (<--- Sp points here)
  61. |-------------|
  62. Why do we include the final return address (ret0) in our stack map? I
  63. have absolutely no idea, but it seems to be done that way consistently
  64. in the rest of the code generator, so I played along here. --SDM
  65. Note that we will be constructing an info table for the continuation
  66. (ret1), which needs to describe the stack down to, but not including,
  67. the update frame (or ret0, if there is no update frame).
  68. -}
  69. data StackMap = StackMap
  70. { sm_sp :: StackLoc
  71. -- ^ the offset of Sp relative to the base on entry
  72. -- to this block.
  73. , sm_args :: ByteOff
  74. -- ^ the number of bytes of arguments in the area for this block
  75. -- Defn: the offset of young(L) relative to the base is given by
  76. -- (sm_sp - sm_args) of the StackMap for block L.
  77. , sm_ret_off :: ByteOff
  78. -- ^ Number of words of stack that we do not describe with an info
  79. -- table, because it contains an update frame.
  80. , sm_regs :: UniqFM (LocalReg,StackLoc)
  81. -- ^ regs on the stack
  82. }
  83. instance Outputable StackMap where
  84. ppr StackMap{..} =
  85. text "Sp = " <> int sm_sp $$
  86. text "sm_args = " <> int sm_args $$
  87. text "sm_ret_off = " <> int sm_ret_off $$
  88. text "sm_regs = " <> ppr (eltsUFM sm_regs)
  89. cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph
  90. -> UniqSM (CmmGraph, BlockEnv StackMap)
  91. cmmLayoutStack dflags procpoints entry_args
  92. graph0@(CmmGraph { g_entry = entry })
  93. = do
  94. -- pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
  95. -- We need liveness info. We could do removeDeadAssignments at
  96. -- the same time, but it buys nothing over doing cmmSink later,
  97. -- and costs a lot more than just cmmLiveness.
  98. -- (graph, liveness) <- removeDeadAssignments graph0
  99. let (graph, liveness) = (graph0, cmmLiveness graph0)
  100. -- pprTrace "liveness" (ppr liveness) $ return ()
  101. let blocks = postorderDfs graph
  102. (final_stackmaps, _final_high_sp, new_blocks) <-
  103. mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
  104. layout dflags procpoints liveness entry entry_args
  105. rec_stackmaps rec_high_sp blocks
  106. new_blocks' <- mapM (lowerSafeForeignCall dflags) new_blocks
  107. -- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return ()
  108. return (ofBlockList entry new_blocks', final_stackmaps)
  109. layout :: DynFlags
  110. -> BlockSet -- proc points
  111. -> BlockEnv CmmLive -- liveness
  112. -> BlockId -- entry
  113. -> ByteOff -- stack args on entry
  114. -> BlockEnv StackMap -- [final] stack maps
  115. -> ByteOff -- [final] Sp high water mark
  116. -> [CmmBlock] -- [in] blocks
  117. -> UniqSM
  118. ( BlockEnv StackMap -- [out] stack maps
  119. , ByteOff -- [out] Sp high water mark
  120. , [CmmBlock] -- [out] new blocks
  121. )
  122. layout dflags procpoints liveness entry entry_args final_stackmaps final_hwm blocks
  123. = go blocks init_stackmap entry_args []
  124. where
  125. (updfr, cont_info) = collectContInfo blocks
  126. init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
  127. , sm_args = entry_args
  128. , sm_ret_off = updfr
  129. , sm_regs = emptyUFM
  130. }
  131. go [] acc_stackmaps acc_hwm acc_blocks
  132. = return (acc_stackmaps, acc_hwm, acc_blocks)
  133. go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
  134. = do
  135. let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
  136. let stack0@StackMap { sm_sp = sp0 }
  137. = mapFindWithDefault
  138. (pprPanic "no stack map for" (ppr entry_lbl))
  139. entry_lbl acc_stackmaps
  140. -- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
  141. -- (a) Update the stack map to include the effects of
  142. -- assignments in this block
  143. let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
  144. -- (b) Insert assignments to reload all the live variables if this
  145. -- block is a proc point
  146. let middle1 = if entry_lbl `setMember` procpoints
  147. then foldr blockCons middle0 (insertReloads stack0)
  148. else middle0
  149. -- (c) Look at the last node and if we are making a call or
  150. -- jumping to a proc point, we must save the live
  151. -- variables, adjust Sp, and construct the StackMaps for
  152. -- each of the successor blocks. See handleLastNode for
  153. -- details.
  154. (middle2, sp_off, last1, fixup_blocks, out)
  155. <- handleLastNode dflags procpoints liveness cont_info
  156. acc_stackmaps stack1 middle0 last0
  157. -- pprTrace "layout(out)" (ppr out) $ return ()
  158. -- (d) Manifest Sp: run over the nodes in the block and replace
  159. -- CmmStackSlot with CmmLoad from Sp with a concrete offset.
  160. --
  161. -- our block:
  162. -- middle1 -- the original middle nodes
  163. -- middle2 -- live variable saves from handleLastNode
  164. -- Sp = Sp + sp_off -- Sp adjustment goes here
  165. -- last1 -- the last node
  166. --
  167. let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
  168. sp_high = final_hwm - entry_args
  169. -- The stack check value is adjusted by the Sp offset on
  170. -- entry to the proc, which is entry_args. We are
  171. -- assuming that we only do a stack check at the
  172. -- beginning of a proc, and we don't modify Sp before the
  173. -- check.
  174. final_blocks = manifestSp dflags final_stackmaps stack0 sp0 sp_high entry0
  175. middle_pre sp_off last1 fixup_blocks
  176. acc_stackmaps' = mapUnion acc_stackmaps out
  177. -- If this block jumps to the GC, then we do not take its
  178. -- stack usage into account for the high-water mark.
  179. -- Otherwise, if the only stack usage is in the stack-check
  180. -- failure block itself, we will do a redundant stack
  181. -- check. The stack has a buffer designed to accommodate
  182. -- the largest amount of stack needed for calling the GC.
  183. --
  184. this_sp_hwm | isGcJump last0 = 0
  185. | otherwise = sp0 - sp_off
  186. hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out))
  187. go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
  188. -- -----------------------------------------------------------------------------
  189. -- Not foolproof, but GCFun is the culprit we most want to catch
  190. isGcJump :: CmmNode O C -> Bool
  191. isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) })
  192. = l == GCFun || l == GCEnter1
  193. isGcJump _something_else = False
  194. -- -----------------------------------------------------------------------------
  195. -- This doesn't seem right somehow. We need to find out whether this
  196. -- proc will push some update frame material at some point, so that we
  197. -- can avoid using that area of the stack for spilling. The
  198. -- updfr_space field of the CmmProc *should* tell us, but it doesn't
  199. -- (I think maybe it gets filled in later when we do proc-point
  200. -- splitting).
  201. --
  202. -- So we'll just take the max of all the cml_ret_offs. This could be
  203. -- unnecessarily pessimistic, but probably not in the code we
  204. -- generate.
  205. collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
  206. collectContInfo blocks
  207. = (maximum ret_offs, mapFromList (catMaybes mb_argss))
  208. where
  209. (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
  210. get_cont b =
  211. case lastNode b of
  212. CmmCall { cml_cont = Just l, .. }
  213. -> (Just (l, cml_ret_args), cml_ret_off)
  214. CmmForeignCall { .. }
  215. -> (Just (succ, 0), updfr) -- ??
  216. _other -> (Nothing, 0)
  217. -- -----------------------------------------------------------------------------
  218. -- Updating the StackMap from middle nodes
  219. -- Look for loads from stack slots, and update the StackMap. This is
  220. -- purely for optimisation reasons, so that we can avoid saving a
  221. -- variable back to a different stack slot if it is already on the
  222. -- stack.
  223. --
  224. -- This happens a lot: for example when function arguments are passed
  225. -- on the stack and need to be immediately saved across a call, we
  226. -- want to just leave them where they are on the stack.
  227. --
  228. procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
  229. procMiddle stackmaps node sm
  230. = case node of
  231. CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
  232. -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
  233. where loc = getStackLoc area off stackmaps
  234. CmmAssign (CmmLocal r) _other
  235. -> sm { sm_regs = delFromUFM (sm_regs sm) r }
  236. _other
  237. -> sm
  238. getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
  239. getStackLoc Old n _ = n
  240. getStackLoc (Young l) n stackmaps =
  241. case mapLookup l stackmaps of
  242. Nothing -> pprPanic "getStackLoc" (ppr l)
  243. Just sm -> sm_sp sm - sm_args sm + n
  244. -- -----------------------------------------------------------------------------
  245. -- Handling stack allocation for a last node
  246. -- We take a single last node and turn it into:
  247. --
  248. -- C1 (some statements)
  249. -- Sp = Sp + N
  250. -- C2 (some more statements)
  251. -- call f() -- the actual last node
  252. --
  253. -- plus possibly some more blocks (we may have to add some fixup code
  254. -- between the last node and the continuation).
  255. --
  256. -- C1: is the code for saving the variables across this last node onto
  257. -- the stack, if the continuation is a call or jumps to a proc point.
  258. --
  259. -- C2: if the last node is a safe foreign call, we have to inject some
  260. -- extra code that goes *after* the Sp adjustment.
  261. handleLastNode
  262. :: DynFlags -> ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
  263. -> BlockEnv StackMap -> StackMap
  264. -> Block CmmNode O O
  265. -> CmmNode O C
  266. -> UniqSM
  267. ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
  268. , ByteOff -- amount to adjust Sp
  269. , CmmNode O C -- new last node
  270. , [CmmBlock] -- new blocks
  271. , BlockEnv StackMap -- stackmaps for the continuations
  272. )
  273. handleLastNode dflags procpoints liveness cont_info stackmaps
  274. stack0@StackMap { sm_sp = sp0 } middle last
  275. = case last of
  276. -- At each return / tail call,
  277. -- adjust Sp to point to the last argument pushed, which
  278. -- is cml_args, after popping any other junk from the stack.
  279. CmmCall{ cml_cont = Nothing, .. } -> do
  280. let sp_off = sp0 - cml_args
  281. return ([], sp_off, last, [], mapEmpty)
  282. -- At each CmmCall with a continuation:
  283. CmmCall{ cml_cont = Just cont_lbl, .. } ->
  284. return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
  285. CmmForeignCall{ succ = cont_lbl, .. } -> do
  286. return $ lastCall cont_lbl (wORD_SIZE dflags) (wORD_SIZE dflags) (sm_ret_off stack0)
  287. -- one word each for args and results: the return address
  288. CmmBranch{..} -> handleBranches
  289. CmmCondBranch{..} -> handleBranches
  290. CmmSwitch{..} -> handleBranches
  291. where
  292. -- Calls and ForeignCalls are handled the same way:
  293. lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
  294. -> ( [CmmNode O O]
  295. , ByteOff
  296. , CmmNode O C
  297. , [CmmBlock]
  298. , BlockEnv StackMap
  299. )
  300. lastCall lbl cml_args cml_ret_args cml_ret_off
  301. = ( assignments
  302. , spOffsetForCall sp0 cont_stack cml_args
  303. , last
  304. , [] -- no new blocks
  305. , mapSingleton lbl cont_stack )
  306. where
  307. (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
  308. prepareStack lbl cml_ret_args cml_ret_off
  309. | Just cont_stack <- mapLookup lbl stackmaps
  310. -- If we have already seen this continuation before, then
  311. -- we just have to make the stack look the same:
  312. = (fixupStack stack0 cont_stack, cont_stack)
  313. -- Otherwise, we have to allocate the stack frame
  314. | otherwise
  315. = (save_assignments, new_cont_stack)
  316. where
  317. (new_cont_stack, save_assignments)
  318. = setupStackFrame dflags lbl liveness cml_ret_off cml_ret_args stack0
  319. -- For other last nodes (branches), if any of the targets is a
  320. -- proc point, we have to set up the stack to match what the proc
  321. -- point is expecting.
  322. --
  323. handleBranches :: UniqSM ( [CmmNode O O]
  324. , ByteOff
  325. , CmmNode O C
  326. , [CmmBlock]
  327. , BlockEnv StackMap )
  328. handleBranches
  329. -- Note [diamond proc point]
  330. | Just l <- futureContinuation middle
  331. , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
  332. = do
  333. let cont_args = mapFindWithDefault 0 l cont_info
  334. (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
  335. out = mapFromList [ (l', cont_stack)
  336. | l' <- successors last ]
  337. return ( assigs
  338. , spOffsetForCall sp0 cont_stack (wORD_SIZE dflags)
  339. , last
  340. , []
  341. , out)
  342. | otherwise = do
  343. pps <- mapM handleBranch (successors last)
  344. let lbl_map :: LabelMap Label
  345. lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
  346. fix_lbl l = mapFindWithDefault l l lbl_map
  347. return ( []
  348. , 0
  349. , mapSuccessors fix_lbl last
  350. , concat [ blk | (_,_,_,blk) <- pps ]
  351. , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
  352. -- For each successor of this block
  353. handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
  354. handleBranch l
  355. -- (a) if the successor already has a stackmap, we need to
  356. -- shuffle the current stack to make it look the same.
  357. -- We have to insert a new block to make this happen.
  358. | Just stack2 <- mapLookup l stackmaps
  359. = do
  360. let assigs = fixupStack stack0 stack2
  361. (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
  362. return (l, tmp_lbl, stack2, block)
  363. -- (b) if the successor is a proc point, save everything
  364. -- on the stack.
  365. | l `setMember` procpoints
  366. = do
  367. let cont_args = mapFindWithDefault 0 l cont_info
  368. (stack2, assigs) =
  369. --pprTrace "first visit to proc point"
  370. -- (ppr l <+> ppr stack1) $
  371. setupStackFrame dflags l liveness (sm_ret_off stack0)
  372. cont_args stack0
  373. --
  374. (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs
  375. return (l, tmp_lbl, stack2, block)
  376. -- (c) otherwise, the current StackMap is the StackMap for
  377. -- the continuation. But we must remember to remove any
  378. -- variables from the StackMap that are *not* live at
  379. -- the destination, because this StackMap might be used
  380. -- by fixupStack if this is a join point.
  381. | otherwise = return (l, l, stack1, [])
  382. where live = mapFindWithDefault (panic "handleBranch") l liveness
  383. stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) }
  384. is_live (r,_) = r `elemRegSet` live
  385. makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap -> [CmmNode O O]
  386. -> UniqSM (Label, [CmmBlock])
  387. makeFixupBlock dflags sp0 l stack assigs
  388. | null assigs && sp0 == sm_sp stack = return (l, [])
  389. | otherwise = do
  390. tmp_lbl <- liftM mkBlockId $ getUniqueM
  391. let sp_off = sp0 - sm_sp stack
  392. block = blockJoin (CmmEntry tmp_lbl)
  393. (maybeAddSpAdj dflags sp_off (blockFromList assigs))
  394. (CmmBranch l)
  395. return (tmp_lbl, [block])
  396. -- Sp is currently pointing to current_sp,
  397. -- we want it to point to
  398. -- (sm_sp cont_stack - sm_args cont_stack + args)
  399. -- so the difference is
  400. -- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
  401. spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
  402. spOffsetForCall current_sp cont_stack args
  403. = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
  404. -- | create a sequence of assignments to establish the new StackMap,
  405. -- given the old StackMap.
  406. fixupStack :: StackMap -> StackMap -> [CmmNode O O]
  407. fixupStack old_stack new_stack = concatMap move new_locs
  408. where
  409. old_map = sm_regs old_stack
  410. new_locs = stackSlotRegs new_stack
  411. move (r,n)
  412. | Just (_,m) <- lookupUFM old_map r, n == m = []
  413. | otherwise = [CmmStore (CmmStackSlot Old n)
  414. (CmmReg (CmmLocal r))]
  415. setupStackFrame
  416. :: DynFlags
  417. -> BlockId -- label of continuation
  418. -> BlockEnv CmmLive -- liveness
  419. -> ByteOff -- updfr
  420. -> ByteOff -- bytes of return values on stack
  421. -> StackMap -- current StackMap
  422. -> (StackMap, [CmmNode O O])
  423. setupStackFrame dflags lbl liveness updfr_off ret_args stack0
  424. = (cont_stack, assignments)
  425. where
  426. -- get the set of LocalRegs live in the continuation
  427. live = mapFindWithDefault Set.empty lbl liveness
  428. -- the stack from the base to updfr_off is off-limits.
  429. -- our new stack frame contains:
  430. -- * saved live variables
  431. -- * the return address [young(C) + 8]
  432. -- * the args for the call,
  433. -- which are replaced by the return values at the return
  434. -- point.
  435. -- everything up to updfr_off is off-limits
  436. -- stack1 contains updfr_off, plus everything we need to save
  437. (stack1, assignments) = allocate dflags updfr_off live stack0
  438. -- And the Sp at the continuation is:
  439. -- sm_sp stack1 + ret_args
  440. cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
  441. , sm_args = ret_args
  442. , sm_ret_off = updfr_off
  443. }
  444. -- -----------------------------------------------------------------------------
  445. -- Note [diamond proc point]
  446. --
  447. -- This special case looks for the pattern we get from a typical
  448. -- tagged case expression:
  449. --
  450. -- Sp[young(L1)] = L1
  451. -- if (R1 & 7) != 0 goto L1 else goto L2
  452. -- L2:
  453. -- call [R1] returns to L1
  454. -- L1: live: {y}
  455. -- x = R1
  456. --
  457. -- If we let the generic case handle this, we get
  458. --
  459. -- Sp[-16] = L1
  460. -- if (R1 & 7) != 0 goto L1a else goto L2
  461. -- L2:
  462. -- Sp[-8] = y
  463. -- Sp = Sp - 16
  464. -- call [R1] returns to L1
  465. -- L1a:
  466. -- Sp[-8] = y
  467. -- Sp = Sp - 16
  468. -- goto L1
  469. -- L1:
  470. -- x = R1
  471. --
  472. -- The code for saving the live vars is duplicated in each branch, and
  473. -- furthermore there is an extra jump in the fast path (assuming L1 is
  474. -- a proc point, which it probably is if there is a heap check).
  475. --
  476. -- So to fix this we want to set up the stack frame before the
  477. -- conditional jump. How do we know when to do this, and when it is
  478. -- safe? The basic idea is, when we see the assignment
  479. --
  480. -- Sp[young(L)] = L
  481. --
  482. -- we know that
  483. -- * we are definitely heading for L
  484. -- * there can be no more reads from another stack area, because young(L)
  485. -- overlaps with it.
  486. --
  487. -- We don't necessarily know that everything live at L is live now
  488. -- (some might be assigned between here and the jump to L). So we
  489. -- simplify and only do the optimisation when we see
  490. --
  491. -- (1) a block containing an assignment of a return address L
  492. -- (2) ending in a branch where one (and only) continuation goes to L,
  493. -- and no other continuations go to proc points.
  494. --
  495. -- then we allocate the stack frame for L at the end of the block,
  496. -- before the branch.
  497. --
  498. -- We could generalise (2), but that would make it a bit more
  499. -- complicated to handle, and this currently catches the common case.
  500. futureContinuation :: Block CmmNode O O -> Maybe BlockId
  501. futureContinuation middle = foldBlockNodesB f middle Nothing
  502. where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
  503. f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
  504. = Just l
  505. f _ r = r
  506. -- -----------------------------------------------------------------------------
  507. -- Saving live registers
  508. -- | Given a set of live registers and a StackMap, save all the registers
  509. -- on the stack and return the new StackMap and the assignments to do
  510. -- the saving.
  511. --
  512. allocate :: DynFlags -> ByteOff -> RegSet -> StackMap
  513. -> (StackMap, [CmmNode O O])
  514. allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0
  515. , sm_regs = regs0 }
  516. =
  517. -- pprTrace "allocate" (ppr live $$ ppr stackmap) $
  518. -- we only have to save regs that are not already in a slot
  519. let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
  520. regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
  521. in
  522. -- make a map of the stack
  523. let stack = reverse $ Array.elems $
  524. accumArray (\_ x -> x) Empty (1, toWords dflags (max sp0 ret_off)) $
  525. ret_words ++ live_words
  526. where ret_words =
  527. [ (x, Occupied)
  528. | x <- [ 1 .. toWords dflags ret_off] ]
  529. live_words =
  530. [ (toWords dflags x, Occupied)
  531. | (r,off) <- eltsUFM regs1,
  532. let w = localRegBytes dflags r,
  533. x <- [ off, off - wORD_SIZE dflags .. off - w + 1] ]
  534. in
  535. -- Pass over the stack: find slots to save all the new live variables,
  536. -- choosing the oldest slots first (hence a foldr).
  537. let
  538. save slot ([], stack, n, assigs, regs) -- no more regs to save
  539. = ([], slot:stack, plusW dflags n 1, assigs, regs)
  540. save slot (to_save, stack, n, assigs, regs)
  541. = case slot of
  542. Occupied -> (to_save, Occupied:stack, plusW dflags n 1, assigs, regs)
  543. Empty
  544. | Just (stack', r, to_save') <-
  545. select_save to_save (slot:stack)
  546. -> let assig = CmmStore (CmmStackSlot Old n')
  547. (CmmReg (CmmLocal r))
  548. n' = plusW dflags n 1
  549. in
  550. (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
  551. | otherwise
  552. -> (to_save, slot:stack, plusW dflags n 1, assigs, regs)
  553. -- we should do better here: right now we'll fit the smallest first,
  554. -- but it would make more sense to fit the biggest first.
  555. select_save :: [LocalReg] -> [StackSlot]
  556. -> Maybe ([StackSlot], LocalReg, [LocalReg])
  557. select_save regs stack = go regs []
  558. where go [] _no_fit = Nothing
  559. go (r:rs) no_fit
  560. | Just rest <- dropEmpty words stack
  561. = Just (replicate words Occupied ++ rest, r, rs++no_fit)
  562. | otherwise
  563. = go rs (r:no_fit)
  564. where words = localRegWords dflags r
  565. -- fill in empty slots as much as possible
  566. (still_to_save, save_stack, n, save_assigs, save_regs)
  567. = foldr save (to_save, [], 0, [], []) stack
  568. -- push any remaining live vars on the stack
  569. (push_sp, push_assigs, push_regs)
  570. = foldr push (n, [], []) still_to_save
  571. where
  572. push r (n, assigs, regs)
  573. = (n', assig : assigs, (r,(r,n')) : regs)
  574. where
  575. n' = n + localRegBytes dflags r
  576. assig = CmmStore (CmmStackSlot Old n')
  577. (CmmReg (CmmLocal r))
  578. trim_sp
  579. | not (null push_regs) = push_sp
  580. | otherwise
  581. = plusW dflags n (- length (takeWhile isEmpty save_stack))
  582. final_regs = regs1 `addListToUFM` push_regs
  583. `addListToUFM` save_regs
  584. in
  585. -- XXX should be an assert
  586. if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
  587. if (trim_sp .&. (wORD_SIZE dflags - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
  588. ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
  589. , push_assigs ++ save_assigs )
  590. -- -----------------------------------------------------------------------------
  591. -- Manifesting Sp
  592. -- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The
  593. -- block looks like this:
  594. --
  595. -- middle_pre -- the middle nodes
  596. -- Sp = Sp + sp_off -- Sp adjustment goes here
  597. -- last -- the last node
  598. --
  599. -- And we have some extra blocks too (that don't contain Sp adjustments)
  600. --
  601. -- The adjustment for middle_pre will be different from that for
  602. -- middle_post, because the Sp adjustment intervenes.
  603. --
  604. manifestSp
  605. :: DynFlags
  606. -> BlockEnv StackMap -- StackMaps for other blocks
  607. -> StackMap -- StackMap for this block
  608. -> ByteOff -- Sp on entry to the block
  609. -> ByteOff -- SpHigh
  610. -> CmmNode C O -- first node
  611. -> [CmmNode O O] -- middle
  612. -> ByteOff -- sp_off
  613. -> CmmNode O C -- last node
  614. -> [CmmBlock] -- new blocks
  615. -> [CmmBlock] -- final blocks with Sp manifest
  616. manifestSp dflags stackmaps stack0 sp0 sp_high
  617. first middle_pre sp_off last fixup_blocks
  618. = final_block : fixup_blocks'
  619. where
  620. area_off = getAreaOff stackmaps
  621. adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
  622. adj_pre_sp = mapExpDeep (areaToSp dflags sp0 sp_high area_off)
  623. adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off)
  624. final_middle = maybeAddSpAdj dflags sp_off $
  625. blockFromList $
  626. map adj_pre_sp $
  627. elimStackStores stack0 stackmaps area_off $
  628. middle_pre
  629. final_last = optStackCheck (adj_post_sp last)
  630. final_block = blockJoin first final_middle final_last
  631. fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
  632. getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
  633. getAreaOff _ Old = 0
  634. getAreaOff stackmaps (Young l) =
  635. case mapLookup l stackmaps of
  636. Just sm -> sm_sp sm - sm_args sm
  637. Nothing -> pprPanic "getAreaOff" (ppr l)
  638. maybeAddSpAdj :: DynFlags -> ByteOff -> Block CmmNode O O -> Block CmmNode O O
  639. maybeAddSpAdj _ 0 block = block
  640. maybeAddSpAdj dflags sp_off block
  641. = block `blockSnoc` CmmAssign spReg (cmmOffset dflags (CmmReg spReg) sp_off)
  642. {-
  643. Sp(L) is the Sp offset on entry to block L relative to the base of the
  644. OLD area.
  645. SpArgs(L) is the size of the young area for L, i.e. the number of
  646. arguments.
  647. - in block L, each reference to [old + N] turns into
  648. [Sp + Sp(L) - N]
  649. - in block L, each reference to [young(L') + N] turns into
  650. [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
  651. - be careful with the last node of each block: Sp has already been adjusted
  652. to be Sp + Sp(L) - Sp(L')
  653. -}
  654. areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
  655. areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n) =
  656. cmmOffset dflags (CmmReg spReg) (sp_old - area_off area - n)
  657. areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark) = mkIntExpr dflags sp_hwm
  658. areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check]
  659. [CmmMachOp (MO_Sub _)
  660. [ CmmReg (CmmGlobal Sp)
  661. , CmmLit (CmmInt 0 _)],
  662. CmmReg (CmmGlobal SpLim)]) = zeroExpr dflags
  663. areaToSp _ _ _ _ other = other
  664. -- -----------------------------------------------------------------------------
  665. -- Note [null stack check]
  666. --
  667. -- If the high-water Sp is zero, then we end up with
  668. --
  669. -- if (Sp - 0 < SpLim) then .. else ..
  670. --
  671. -- and possibly some dead code for the failure case. Optimising this
  672. -- away depends on knowing that SpLim <= Sp, so it is really the job
  673. -- of the stack layout algorithm, hence we do it now. This is also
  674. -- convenient because control-flow optimisation later will drop the
  675. -- dead code.
  676. optStackCheck :: CmmNode O C -> CmmNode O C
  677. optStackCheck n = -- Note [null stack check]
  678. case n of
  679. CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
  680. other -> other
  681. -- -----------------------------------------------------------------------------
  682. -- | Eliminate stores of the form
  683. --
  684. -- Sp[area+n] = r
  685. --
  686. -- when we know that r is already in the same slot as Sp[area+n]. We
  687. -- could do this in a later optimisation pass, but that would involve
  688. -- a separate analysis and we already have the information to hand
  689. -- here. It helps clean up some extra stack stores in common cases.
  690. --
  691. -- Note that we may have to modify the StackMap as we walk through the
  692. -- code using procMiddle, since an assignment to a variable in the
  693. -- StackMap will invalidate its mapping there.
  694. --
  695. elimStackStores :: StackMap
  696. -> BlockEnv StackMap
  697. -> (Area -> ByteOff)
  698. -> [CmmNode O O]
  699. -> [CmmNode O O]
  700. elimStackStores stackmap stackmaps area_off nodes
  701. = go stackmap nodes
  702. where
  703. go _stackmap [] = []
  704. go stackmap (n:ns)
  705. = case n of
  706. CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
  707. | Just (_,off) <- lookupUFM (sm_regs stackmap) r
  708. , area_off area + m == off
  709. -> -- pprTrace "eliminated a node!" (ppr r) $
  710. go stackmap ns
  711. _otherwise
  712. -> n : go (procMiddle stackmaps n stackmap) ns
  713. -- -----------------------------------------------------------------------------
  714. -- Update info tables to include stack liveness
  715. setInfoTableStackMap :: DynFlags -> BlockEnv StackMap -> CmmDecl -> CmmDecl
  716. setInfoTableStackMap dflags stackmaps (CmmProc top_info@TopInfo{..} l g)
  717. = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l g
  718. where
  719. fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
  720. info_tbl { cit_rep = StackRep (get_liveness lbl) }
  721. fix_info _ other = other
  722. get_liveness :: BlockId -> Liveness
  723. get_liveness lbl
  724. = case mapLookup lbl stackmaps of
  725. Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls)
  726. Just sm -> stackMapToLiveness dflags sm
  727. setInfoTableStackMap _ _ d = d
  728. stackMapToLiveness :: DynFlags -> StackMap -> Liveness
  729. stackMapToLiveness dflags StackMap{..} =
  730. reverse $ Array.elems $
  731. accumArray (\_ x -> x) True (toWords dflags sm_ret_off + 1,
  732. toWords dflags (sm_sp - sm_args)) live_words
  733. where
  734. live_words = [ (toWords dflags off, False)
  735. | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]
  736. -- -----------------------------------------------------------------------------
  737. -- Lowering safe foreign calls
  738. {-
  739. Note [lower safe foreign calls]
  740. We start with
  741. Sp[young(L1)] = L1
  742. ,-----------------------
  743. | r1 = foo(x,y,z) returns to L1
  744. '-----------------------
  745. L1:
  746. R1 = r1 -- copyIn, inserted by mkSafeCall
  747. ...
  748. the stack layout algorithm will arrange to save and reload everything
  749. live across the call. Our job now is to expand the call so we get
  750. Sp[young(L1)] = L1
  751. ,-----------------------
  752. | SAVE_THREAD_STATE()
  753. | token = suspendThread(BaseReg, interruptible)
  754. | r = foo(x,y,z)
  755. | BaseReg = resumeThread(token)
  756. | LOAD_THREAD_STATE()
  757. | R1 = r -- copyOut
  758. | jump Sp[0]
  759. '-----------------------
  760. L1:
  761. r = R1 -- copyIn, inserted by mkSafeCall
  762. ...
  763. Note the copyOut, which saves the results in the places that L1 is
  764. expecting them (see Note {safe foreign call convention]).
  765. -}
  766. lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock
  767. lowerSafeForeignCall dflags block
  768. | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
  769. = do
  770. -- Both 'id' and 'new_base' are KindNonPtr because they're
  771. -- RTS-only objects and are not subject to garbage collection
  772. id <- newTemp (bWord dflags)
  773. new_base <- newTemp (cmmRegType dflags (CmmGlobal BaseReg))
  774. let (caller_save, caller_load) = callerSaveVolatileRegs dflags
  775. load_tso <- newTemp (gcWord dflags)
  776. load_stack <- newTemp (gcWord dflags)
  777. let suspend = saveThreadState dflags <*>
  778. caller_save <*>
  779. mkMiddle (callSuspendThread dflags id intrbl)
  780. midCall = mkUnsafeCall tgt res args
  781. resume = mkMiddle (callResumeThread new_base id) <*>
  782. -- Assign the result to BaseReg: we
  783. -- might now have a different Capability!
  784. mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
  785. caller_load <*>
  786. loadThreadState dflags load_tso load_stack
  787. (ret_args, regs, copyout) = copyOutOflow dflags NativeReturn Jump (Young succ)
  788. (map (CmmReg . CmmLocal) res)
  789. updfr []
  790. -- NB. after resumeThread returns, the top-of-stack probably contains
  791. -- the stack frame for succ, but it might not: if the current thread
  792. -- received an exception during the call, then the stack might be
  793. -- different. Hence we continue by jumping to the top stack frame,
  794. -- not by jumping to succ.
  795. jump = CmmCall { cml_target = entryCode dflags $
  796. CmmLoad (CmmReg spReg) (bWord dflags)
  797. , cml_cont = Just succ
  798. , cml_args_regs = regs
  799. , cml_args = widthInBytes (wordWidth dflags)
  800. , cml_ret_args = ret_args
  801. , cml_ret_off = updfr }
  802. graph' <- lgraphOfAGraph $ suspend <*>
  803. midCall <*>
  804. resume <*>
  805. copyout <*>
  806. mkLast jump
  807. case toBlockList graph' of
  808. [one] -> let (_, middle', last) = blockSplit one
  809. in return (blockJoin entry (middle `blockAppend` middle') last)
  810. _ -> panic "lowerSafeForeignCall0"
  811. -- Block doesn't end in a safe foreign call:
  812. | otherwise = return block
  813. foreignLbl :: FastString -> CmmExpr
  814. foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))
  815. newTemp :: CmmType -> UniqSM LocalReg
  816. newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
  817. callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O
  818. callSuspendThread dflags id intrbl =
  819. CmmUnsafeForeignCall
  820. (ForeignTarget (foreignLbl (fsLit "suspendThread"))
  821. (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn))
  822. [id] [CmmReg (CmmGlobal BaseReg), mkIntExpr dflags (fromEnum intrbl)]
  823. callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
  824. callResumeThread new_base id =
  825. CmmUnsafeForeignCall
  826. (ForeignTarget (foreignLbl (fsLit "resumeThread"))
  827. (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn))
  828. [new_base] [CmmReg (CmmLocal id)]
  829. -- -----------------------------------------------------------------------------
  830. plusW :: DynFlags -> ByteOff -> WordOff -> ByteOff
  831. plusW dflags b w = b + w * wORD_SIZE dflags
  832. dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
  833. dropEmpty 0 ss = Just ss
  834. dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
  835. dropEmpty _ _ = Nothing
  836. isEmpty :: StackSlot -> Bool
  837. isEmpty Empty = True
  838. isEmpty _ = False
  839. localRegBytes :: DynFlags -> LocalReg -> ByteOff
  840. localRegBytes dflags r
  841. = roundUpToWords dflags (widthInBytes (typeWidth (localRegType r)))
  842. localRegWords :: DynFlags -> LocalReg -> WordOff
  843. localRegWords dflags = toWords dflags . localRegBytes dflags
  844. toWords :: DynFlags -> ByteOff -> WordOff
  845. toWords dflags x = x `quot` wORD_SIZE dflags
  846. insertReloads :: StackMap -> [CmmNode O O]
  847. insertReloads stackmap =
  848. [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
  849. (localRegType r))
  850. | (r,sp) <- stackSlotRegs stackmap
  851. ]
  852. stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
  853. stackSlotRegs sm = eltsUFM (sm_regs sm)