PageRenderTime 62ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/compiler/nativeGen/AsmCodeGen.lhs

https://bitbucket.org/khibino/ghc-hack
Haskell | 976 lines | 600 code | 145 blank | 231 comment | 31 complexity | ea4720bbc8e799a1300f3df24b2207d4 MD5 | raw file
Possible License(s): BSD-3-Clause, BSD-2-Clause, LGPL-3.0
  1. -- -----------------------------------------------------------------------------
  2. --
  3. -- (c) The University of Glasgow 1993-2004
  4. --
  5. -- This is the top-level module in the native code generator.
  6. --
  7. -- -----------------------------------------------------------------------------
  8. \begin{code}
  9. {-# OPTIONS -fno-warn-tabs #-}
  10. -- The above warning supression flag is a temporary kludge.
  11. -- While working on this module you are encouraged to remove it and
  12. -- detab the module (please do the detabbing in a separate patch). See
  13. -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
  14. -- for details
  15. module AsmCodeGen ( nativeCodeGen ) where
  16. #include "HsVersions.h"
  17. #include "nativeGen/NCG.h"
  18. import qualified X86.CodeGen
  19. import qualified X86.Regs
  20. import qualified X86.Instr
  21. import qualified X86.Ppr
  22. import qualified SPARC.CodeGen
  23. import qualified SPARC.Regs
  24. import qualified SPARC.Instr
  25. import qualified SPARC.Ppr
  26. import qualified SPARC.ShortcutJump
  27. import qualified SPARC.CodeGen.Expand
  28. import qualified PPC.CodeGen
  29. import qualified PPC.Cond
  30. import qualified PPC.Regs
  31. import qualified PPC.RegInfo
  32. import qualified PPC.Instr
  33. import qualified PPC.Ppr
  34. import RegAlloc.Liveness
  35. import qualified RegAlloc.Linear.Main as Linear
  36. import qualified GraphColor as Color
  37. import qualified RegAlloc.Graph.Main as Color
  38. import qualified RegAlloc.Graph.Stats as Color
  39. import qualified RegAlloc.Graph.TrivColorable as Color
  40. import TargetReg
  41. import Platform
  42. import Config
  43. import Instruction
  44. import PIC
  45. import Reg
  46. import NCGMonad
  47. import BlockId
  48. import CgUtils ( fixStgRegisters )
  49. import OldCmm
  50. import CmmOpt ( cmmEliminateDeadBlocks, cmmMiniInline, cmmMachOpFold )
  51. import OldPprCmm
  52. import CLabel
  53. import UniqFM
  54. import Unique ( Unique, getUnique )
  55. import UniqSupply
  56. import DynFlags
  57. import StaticFlags
  58. import Util
  59. import BasicTypes ( Alignment )
  60. import Digraph
  61. import Pretty (Doc)
  62. import qualified Pretty
  63. import BufWrite
  64. import Outputable
  65. import FastString
  66. import UniqSet
  67. import ErrUtils
  68. import Module
  69. -- DEBUGGING ONLY
  70. --import OrdList
  71. import Data.List
  72. import Data.Maybe
  73. import Control.Monad
  74. import System.IO
  75. {-
  76. The native-code generator has machine-independent and
  77. machine-dependent modules.
  78. This module ("AsmCodeGen") is the top-level machine-independent
  79. module. Before entering machine-dependent land, we do some
  80. machine-independent optimisations (defined below) on the
  81. 'CmmStmts's.
  82. We convert to the machine-specific 'Instr' datatype with
  83. 'cmmCodeGen', assuming an infinite supply of registers. We then use
  84. a machine-independent register allocator ('regAlloc') to rejoin
  85. reality. Obviously, 'regAlloc' has machine-specific helper
  86. functions (see about "RegAllocInfo" below).
  87. Finally, we order the basic blocks of the function so as to minimise
  88. the number of jumps between blocks, by utilising fallthrough wherever
  89. possible.
  90. The machine-dependent bits break down as follows:
  91. * ["MachRegs"] Everything about the target platform's machine
  92. registers (and immediate operands, and addresses, which tend to
  93. intermingle/interact with registers).
  94. * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
  95. have a module of its own), plus a miscellany of other things
  96. (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
  97. * ["MachCodeGen"] is where 'Cmm' stuff turns into
  98. machine instructions.
  99. * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
  100. a 'Doc').
  101. * ["RegAllocInfo"] In the register allocator, we manipulate
  102. 'MRegsState's, which are 'BitSet's, one bit per machine register.
  103. When we want to say something about a specific machine register
  104. (e.g., ``it gets clobbered by this instruction''), we set/unset
  105. its bit. Obviously, we do this 'BitSet' thing for efficiency
  106. reasons.
  107. The 'RegAllocInfo' module collects together the machine-specific
  108. info needed to do register allocation.
  109. * ["RegisterAlloc"] The (machine-independent) register allocator.
  110. -}
  111. -- -----------------------------------------------------------------------------
  112. -- Top-level of the native codegen
  113. data NcgImpl statics instr jumpDest = NcgImpl {
  114. cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
  115. generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
  116. getJumpDestBlockId :: jumpDest -> Maybe BlockId,
  117. canShortcut :: instr -> Maybe jumpDest,
  118. shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
  119. shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
  120. pprNatCmmDecl :: Platform -> NatCmmDecl statics instr -> Doc,
  121. maxSpillSlots :: Int,
  122. allocatableRegs :: [RealReg],
  123. ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
  124. ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
  125. ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr]
  126. }
  127. --------------------
  128. nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> [RawCmmGroup] -> IO ()
  129. nativeCodeGen dflags h us cmms
  130. = let platform = targetPlatform dflags
  131. nCG' :: (PlatformOutputable statics, PlatformOutputable instr, Instruction instr) => NcgImpl statics instr jumpDest -> IO ()
  132. nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms
  133. x86NcgImpl = NcgImpl {
  134. cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
  135. ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr
  136. ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
  137. ,canShortcut = X86.Instr.canShortcut
  138. ,shortcutStatics = X86.Instr.shortcutStatics
  139. ,shortcutJump = X86.Instr.shortcutJump
  140. ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
  141. ,maxSpillSlots = X86.Instr.maxSpillSlots (target32Bit platform)
  142. ,allocatableRegs = X86.Regs.allocatableRegs
  143. ,ncg_x86fp_kludge = id
  144. ,ncgExpandTop = id
  145. ,ncgMakeFarBranches = id
  146. }
  147. in case platformArch platform of
  148. ArchX86 -> nCG' (x86NcgImpl { ncg_x86fp_kludge = map x86fp_kludge })
  149. ArchX86_64 -> nCG' x86NcgImpl
  150. ArchPPC ->
  151. nCG' $ NcgImpl {
  152. cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
  153. ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr
  154. ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
  155. ,canShortcut = PPC.RegInfo.canShortcut
  156. ,shortcutStatics = PPC.RegInfo.shortcutStatics
  157. ,shortcutJump = PPC.RegInfo.shortcutJump
  158. ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
  159. ,maxSpillSlots = PPC.Instr.maxSpillSlots
  160. ,allocatableRegs = PPC.Regs.allocatableRegs
  161. ,ncg_x86fp_kludge = id
  162. ,ncgExpandTop = id
  163. ,ncgMakeFarBranches = makeFarBranches
  164. }
  165. ArchSPARC ->
  166. nCG' $ NcgImpl {
  167. cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
  168. ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr
  169. ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
  170. ,canShortcut = SPARC.ShortcutJump.canShortcut
  171. ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
  172. ,shortcutJump = SPARC.ShortcutJump.shortcutJump
  173. ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
  174. ,maxSpillSlots = SPARC.Instr.maxSpillSlots
  175. ,allocatableRegs = SPARC.Regs.allocatableRegs
  176. ,ncg_x86fp_kludge = id
  177. ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
  178. ,ncgMakeFarBranches = id
  179. }
  180. ArchARM _ _ ->
  181. panic "nativeCodeGen: No NCG for ARM"
  182. ArchPPC_64 ->
  183. panic "nativeCodeGen: No NCG for PPC 64"
  184. ArchUnknown ->
  185. panic "nativeCodeGen: No NCG for unknown arch"
  186. nativeCodeGen' :: (PlatformOutputable statics, PlatformOutputable instr, Instruction instr)
  187. => DynFlags
  188. -> NcgImpl statics instr jumpDest
  189. -> Handle -> UniqSupply -> [RawCmmGroup] -> IO ()
  190. nativeCodeGen' dflags ncgImpl h us cmms
  191. = do
  192. let platform = targetPlatform dflags
  193. split_cmms = concat $ map add_split cmms
  194. -- BufHandle is a performance hack. We could hide it inside
  195. -- Pretty if it weren't for the fact that we do lots of little
  196. -- printDocs here (in order to do codegen in constant space).
  197. bufh <- newBufHandle h
  198. (imports, prof) <- cmmNativeGens dflags ncgImpl bufh us split_cmms [] [] 0
  199. bFlush bufh
  200. let (native, colorStats, linearStats)
  201. = unzip3 prof
  202. -- dump native code
  203. dumpIfSet_dyn dflags
  204. Opt_D_dump_asm "Asm code"
  205. (vcat $ map (docToSDoc . pprNatCmmDecl ncgImpl platform) $ concat native)
  206. -- dump global NCG stats for graph coloring allocator
  207. (case concat $ catMaybes colorStats of
  208. [] -> return ()
  209. stats -> do
  210. -- build the global register conflict graph
  211. let graphGlobal
  212. = foldl Color.union Color.initGraph
  213. $ [ Color.raGraph stat
  214. | stat@Color.RegAllocStatsStart{} <- stats]
  215. dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
  216. $ Color.pprStats stats graphGlobal
  217. dumpIfSet_dyn dflags
  218. Opt_D_dump_asm_conflicts "Register conflict graph"
  219. $ Color.dotGraph
  220. (targetRegDotColor platform)
  221. (Color.trivColorable platform
  222. (targetVirtualRegSqueeze platform)
  223. (targetRealRegSqueeze platform))
  224. $ graphGlobal)
  225. -- dump global NCG stats for linear allocator
  226. (case concat $ catMaybes linearStats of
  227. [] -> return ()
  228. stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
  229. $ Linear.pprStats (concat native) stats)
  230. -- write out the imports
  231. Pretty.printDoc Pretty.LeftMode h
  232. $ makeImportsDoc dflags (concat imports)
  233. return ()
  234. where add_split tops
  235. | dopt Opt_SplitObjs dflags = split_marker : tops
  236. | otherwise = tops
  237. split_marker = CmmProc Nothing mkSplitMarkerLabel (ListGraph [])
  238. -- | Do native code generation on all these cmms.
  239. --
  240. cmmNativeGens :: (PlatformOutputable statics, PlatformOutputable instr, Instruction instr)
  241. => DynFlags
  242. -> NcgImpl statics instr jumpDest
  243. -> BufHandle
  244. -> UniqSupply
  245. -> [RawCmmDecl]
  246. -> [[CLabel]]
  247. -> [ ([NatCmmDecl statics instr],
  248. Maybe [Color.RegAllocStats statics instr],
  249. Maybe [Linear.RegAllocStats]) ]
  250. -> Int
  251. -> IO ( [[CLabel]],
  252. [([NatCmmDecl statics instr],
  253. Maybe [Color.RegAllocStats statics instr],
  254. Maybe [Linear.RegAllocStats])] )
  255. cmmNativeGens _ _ _ _ [] impAcc profAcc _
  256. = return (reverse impAcc, reverse profAcc)
  257. cmmNativeGens dflags ncgImpl h us (cmm : cmms) impAcc profAcc count
  258. = do
  259. let platform = targetPlatform dflags
  260. (us', native, imports, colorStats, linearStats)
  261. <- cmmNativeGen dflags ncgImpl us cmm count
  262. Pretty.bufLeftRender h
  263. $ {-# SCC "pprNativeCode" #-} Pretty.vcat $ map (pprNatCmmDecl ncgImpl platform) native
  264. -- carefully evaluate this strictly. Binding it with 'let'
  265. -- and then using 'seq' doesn't work, because the let
  266. -- apparently gets inlined first.
  267. lsPprNative <- return $!
  268. if dopt Opt_D_dump_asm dflags
  269. || dopt Opt_D_dump_asm_stats dflags
  270. then native
  271. else []
  272. count' <- return $! count + 1;
  273. -- force evaulation all this stuff to avoid space leaks
  274. seqString (showSDoc $ vcat $ map (pprPlatform platform) imports) `seq` return ()
  275. cmmNativeGens dflags ncgImpl
  276. h us' cmms
  277. (imports : impAcc)
  278. ((lsPprNative, colorStats, linearStats) : profAcc)
  279. count'
  280. where seqString [] = ()
  281. seqString (x:xs) = x `seq` seqString xs `seq` ()
  282. -- | Complete native code generation phase for a single top-level chunk of Cmm.
  283. -- Dumping the output of each stage along the way.
  284. -- Global conflict graph and NGC stats
  285. cmmNativeGen
  286. :: (PlatformOutputable statics, PlatformOutputable instr, Instruction instr)
  287. => DynFlags
  288. -> NcgImpl statics instr jumpDest
  289. -> UniqSupply
  290. -> RawCmmDecl -- ^ the cmm to generate code for
  291. -> Int -- ^ sequence number of this top thing
  292. -> IO ( UniqSupply
  293. , [NatCmmDecl statics instr] -- native code
  294. , [CLabel] -- things imported by this cmm
  295. , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
  296. , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
  297. cmmNativeGen dflags ncgImpl us cmm count
  298. = do
  299. let platform = targetPlatform dflags
  300. -- rewrite assignments to global regs
  301. let fixed_cmm =
  302. {-# SCC "fixStgRegisters" #-}
  303. fixStgRegisters cmm
  304. -- cmm to cmm optimisations
  305. let (opt_cmm, imports) =
  306. {-# SCC "cmmToCmm" #-}
  307. cmmToCmm dflags fixed_cmm
  308. dumpIfSet_dyn dflags
  309. Opt_D_dump_opt_cmm "Optimised Cmm"
  310. (pprCmmGroup platform [opt_cmm])
  311. -- generate native code from cmm
  312. let ((native, lastMinuteImports), usGen) =
  313. {-# SCC "genMachCode" #-}
  314. initUs us $ genMachCode dflags (cmmTopCodeGen ncgImpl) opt_cmm
  315. dumpIfSet_dyn dflags
  316. Opt_D_dump_asm_native "Native code"
  317. (vcat $ map (docToSDoc . pprNatCmmDecl ncgImpl platform) native)
  318. -- tag instructions with register liveness information
  319. let (withLiveness, usLive) =
  320. {-# SCC "regLiveness" #-}
  321. initUs usGen
  322. $ mapUs (regLiveness platform)
  323. $ map natCmmTopToLive native
  324. dumpIfSet_dyn dflags
  325. Opt_D_dump_asm_liveness "Liveness annotations added"
  326. (vcat $ map (pprPlatform platform) withLiveness)
  327. -- allocate registers
  328. (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
  329. if ( dopt Opt_RegsGraph dflags
  330. || dopt Opt_RegsIterative dflags)
  331. then do
  332. -- the regs usable for allocation
  333. let (alloc_regs :: UniqFM (UniqSet RealReg))
  334. = foldr (\r -> plusUFM_C unionUniqSets
  335. $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
  336. emptyUFM
  337. $ allocatableRegs ncgImpl
  338. -- do the graph coloring register allocation
  339. let ((alloced, regAllocStats), usAlloc)
  340. = {-# SCC "RegAlloc" #-}
  341. initUs usLive
  342. $ Color.regAlloc
  343. dflags
  344. alloc_regs
  345. (mkUniqSet [0 .. maxSpillSlots ncgImpl])
  346. withLiveness
  347. -- dump out what happened during register allocation
  348. dumpIfSet_dyn dflags
  349. Opt_D_dump_asm_regalloc "Registers allocated"
  350. (vcat $ map (docToSDoc . pprNatCmmDecl ncgImpl platform) alloced)
  351. dumpIfSet_dyn dflags
  352. Opt_D_dump_asm_regalloc_stages "Build/spill stages"
  353. (vcat $ map (\(stage, stats)
  354. -> text "# --------------------------"
  355. $$ text "# cmm " <> int count <> text " Stage " <> int stage
  356. $$ pprPlatform platform stats)
  357. $ zip [0..] regAllocStats)
  358. let mPprStats =
  359. if dopt Opt_D_dump_asm_stats dflags
  360. then Just regAllocStats else Nothing
  361. -- force evaluation of the Maybe to avoid space leak
  362. mPprStats `seq` return ()
  363. return ( alloced, usAlloc
  364. , mPprStats
  365. , Nothing)
  366. else do
  367. -- do linear register allocation
  368. let ((alloced, regAllocStats), usAlloc)
  369. = {-# SCC "RegAlloc" #-}
  370. initUs usLive
  371. $ liftM unzip
  372. $ mapUs (Linear.regAlloc dflags) withLiveness
  373. dumpIfSet_dyn dflags
  374. Opt_D_dump_asm_regalloc "Registers allocated"
  375. (vcat $ map (docToSDoc . pprNatCmmDecl ncgImpl platform) alloced)
  376. let mPprStats =
  377. if dopt Opt_D_dump_asm_stats dflags
  378. then Just (catMaybes regAllocStats) else Nothing
  379. -- force evaluation of the Maybe to avoid space leak
  380. mPprStats `seq` return ()
  381. return ( alloced, usAlloc
  382. , Nothing
  383. , mPprStats)
  384. ---- x86fp_kludge. This pass inserts ffree instructions to clear
  385. ---- the FPU stack on x86. The x86 ABI requires that the FPU stack
  386. ---- is clear, and library functions can return odd results if it
  387. ---- isn't.
  388. ----
  389. ---- NB. must happen before shortcutBranches, because that
  390. ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
  391. let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
  392. ---- generate jump tables
  393. let tabled =
  394. {-# SCC "generateJumpTables" #-}
  395. generateJumpTables ncgImpl kludged
  396. ---- shortcut branches
  397. let shorted =
  398. {-# SCC "shortcutBranches" #-}
  399. shortcutBranches dflags ncgImpl tabled
  400. ---- sequence blocks
  401. let sequenced =
  402. {-# SCC "sequenceBlocks" #-}
  403. map (sequenceTop ncgImpl) shorted
  404. ---- expansion of SPARC synthetic instrs
  405. let expanded =
  406. {-# SCC "sparc_expand" #-}
  407. ncgExpandTop ncgImpl sequenced
  408. dumpIfSet_dyn dflags
  409. Opt_D_dump_asm_expanded "Synthetic instructions expanded"
  410. (vcat $ map (docToSDoc . pprNatCmmDecl ncgImpl platform) expanded)
  411. return ( usAlloc
  412. , expanded
  413. , lastMinuteImports ++ imports
  414. , ppr_raStatsColor
  415. , ppr_raStatsLinear)
  416. x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
  417. x86fp_kludge top@(CmmData _ _) = top
  418. x86fp_kludge (CmmProc info lbl (ListGraph code)) =
  419. CmmProc info lbl (ListGraph $ X86.Instr.i386_insert_ffrees code)
  420. -- | Build a doc for all the imports.
  421. --
  422. makeImportsDoc :: DynFlags -> [CLabel] -> Pretty.Doc
  423. makeImportsDoc dflags imports
  424. = dyld_stubs imports
  425. Pretty.$$
  426. -- On recent versions of Darwin, the linker supports
  427. -- dead-stripping of code and data on a per-symbol basis.
  428. -- There's a hack to make this work in PprMach.pprNatCmmDecl.
  429. (if platformHasSubsectionsViaSymbols (targetPlatform dflags)
  430. then Pretty.text ".subsections_via_symbols"
  431. else Pretty.empty)
  432. Pretty.$$
  433. -- On recent GNU ELF systems one can mark an object file
  434. -- as not requiring an executable stack. If all objects
  435. -- linked into a program have this note then the program
  436. -- will not use an executable stack, which is good for
  437. -- security. GHC generated code does not need an executable
  438. -- stack so add the note in:
  439. (if platformHasGnuNonexecStack (targetPlatform dflags)
  440. then Pretty.text ".section .note.GNU-stack,\"\",@progbits"
  441. else Pretty.empty)
  442. -- And just because every other compiler does, lets stick in
  443. -- an identifier directive: .ident "GHC x.y.z"
  444. Pretty.$$ let compilerIdent = Pretty.text "GHC" Pretty.<+>
  445. Pretty.text cProjectVersion
  446. in Pretty.text ".ident" Pretty.<+>
  447. Pretty.doubleQuotes compilerIdent
  448. where
  449. -- Generate "symbol stubs" for all external symbols that might
  450. -- come from a dynamic library.
  451. dyld_stubs :: [CLabel] -> Pretty.Doc
  452. {- dyld_stubs imps = Pretty.vcat $ map pprDyldSymbolStub $
  453. map head $ group $ sort imps-}
  454. platform = targetPlatform dflags
  455. arch = platformArch platform
  456. os = platformOS platform
  457. -- (Hack) sometimes two Labels pretty-print the same, but have
  458. -- different uniques; so we compare their text versions...
  459. dyld_stubs imps
  460. | needImportedSymbols arch os
  461. = Pretty.vcat $
  462. (pprGotDeclaration arch os :) $
  463. map ( pprImportedSymbol platform . fst . head) $
  464. groupBy (\(_,a) (_,b) -> a == b) $
  465. sortBy (\(_,a) (_,b) -> compare a b) $
  466. map doPpr $
  467. imps
  468. | otherwise
  469. = Pretty.empty
  470. doPpr lbl = (lbl, renderWithStyle (pprCLabel platform lbl) astyle)
  471. astyle = mkCodeStyle AsmStyle
  472. -- -----------------------------------------------------------------------------
  473. -- Sequencing the basic blocks
  474. -- Cmm BasicBlocks are self-contained entities: they always end in a
  475. -- jump, either non-local or to another basic block in the same proc.
  476. -- In this phase, we attempt to place the basic blocks in a sequence
  477. -- such that as many of the local jumps as possible turn into
  478. -- fallthroughs.
  479. sequenceTop
  480. :: Instruction instr
  481. => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
  482. sequenceTop _ top@(CmmData _ _) = top
  483. sequenceTop ncgImpl (CmmProc info lbl (ListGraph blocks)) =
  484. CmmProc info lbl (ListGraph $ ncgMakeFarBranches ncgImpl $ sequenceBlocks blocks)
  485. -- The algorithm is very simple (and stupid): we make a graph out of
  486. -- the blocks where there is an edge from one block to another iff the
  487. -- first block ends by jumping to the second. Then we topologically
  488. -- sort this graph. Then traverse the list: for each block, we first
  489. -- output the block, then if it has an out edge, we move the
  490. -- destination of the out edge to the front of the list, and continue.
  491. -- FYI, the classic layout for basic blocks uses postorder DFS; this
  492. -- algorithm is implemented in Hoopl.
  493. sequenceBlocks
  494. :: Instruction instr
  495. => [NatBasicBlock instr]
  496. -> [NatBasicBlock instr]
  497. sequenceBlocks [] = []
  498. sequenceBlocks (entry:blocks) =
  499. seqBlocks (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
  500. -- the first block is the entry point ==> it must remain at the start.
  501. sccBlocks
  502. :: Instruction instr
  503. => [NatBasicBlock instr]
  504. -> [SCC ( NatBasicBlock instr
  505. , Unique
  506. , [Unique])]
  507. sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
  508. -- we're only interested in the last instruction of
  509. -- the block, and only if it has a single destination.
  510. getOutEdges
  511. :: Instruction instr
  512. => [instr] -> [Unique]
  513. getOutEdges instrs
  514. = case jumpDestsOfInstr (last instrs) of
  515. [one] -> [getUnique one]
  516. _many -> []
  517. mkNode :: (Instruction t)
  518. => GenBasicBlock t
  519. -> (GenBasicBlock t, Unique, [Unique])
  520. mkNode block@(BasicBlock id instrs) = (block, getUnique id, getOutEdges instrs)
  521. seqBlocks :: (Eq t) => [(GenBasicBlock t1, t, [t])] -> [GenBasicBlock t1]
  522. seqBlocks [] = []
  523. seqBlocks ((block,_,[]) : rest)
  524. = block : seqBlocks rest
  525. seqBlocks ((block@(BasicBlock id instrs),_,[next]) : rest)
  526. | can_fallthrough = BasicBlock id (init instrs) : seqBlocks rest'
  527. | otherwise = block : seqBlocks rest'
  528. where
  529. (can_fallthrough, rest') = reorder next [] rest
  530. -- TODO: we should do a better job for cycles; try to maximise the
  531. -- fallthroughs within a loop.
  532. seqBlocks _ = panic "AsmCodegen:seqBlocks"
  533. reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)])
  534. reorder _ accum [] = (False, reverse accum)
  535. reorder id accum (b@(block,id',out) : rest)
  536. | id == id' = (True, (block,id,out) : reverse accum ++ rest)
  537. | otherwise = reorder id (b:accum) rest
  538. -- -----------------------------------------------------------------------------
  539. -- Making far branches
  540. -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
  541. -- big, we have to work around this limitation.
  542. makeFarBranches
  543. :: [NatBasicBlock PPC.Instr.Instr]
  544. -> [NatBasicBlock PPC.Instr.Instr]
  545. makeFarBranches blocks
  546. | last blockAddresses < nearLimit = blocks
  547. | otherwise = zipWith handleBlock blockAddresses blocks
  548. where
  549. blockAddresses = scanl (+) 0 $ map blockLen blocks
  550. blockLen (BasicBlock _ instrs) = length instrs
  551. handleBlock addr (BasicBlock id instrs)
  552. = BasicBlock id (zipWith makeFar [addr..] instrs)
  553. makeFar _ (PPC.Instr.BCC PPC.Cond.ALWAYS tgt) = PPC.Instr.BCC PPC.Cond.ALWAYS tgt
  554. makeFar addr (PPC.Instr.BCC cond tgt)
  555. | abs (addr - targetAddr) >= nearLimit
  556. = PPC.Instr.BCCFAR cond tgt
  557. | otherwise
  558. = PPC.Instr.BCC cond tgt
  559. where Just targetAddr = lookupUFM blockAddressMap tgt
  560. makeFar _ other = other
  561. nearLimit = 7000 -- 8192 instructions are allowed; let's keep some
  562. -- distance, as we have a few pseudo-insns that are
  563. -- pretty-printed as multiple instructions,
  564. -- and it's just not worth the effort to calculate
  565. -- things exactly
  566. blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses
  567. -- -----------------------------------------------------------------------------
  568. -- Generate jump tables
  569. -- Analyzes all native code and generates data sections for all jump
  570. -- table instructions.
  571. generateJumpTables
  572. :: NcgImpl statics instr jumpDest
  573. -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
  574. generateJumpTables ncgImpl xs = concatMap f xs
  575. where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs
  576. f p = [p]
  577. g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
  578. -- -----------------------------------------------------------------------------
  579. -- Shortcut branches
  580. shortcutBranches
  581. :: DynFlags
  582. -> NcgImpl statics instr jumpDest
  583. -> [NatCmmDecl statics instr]
  584. -> [NatCmmDecl statics instr]
  585. shortcutBranches dflags ncgImpl tops
  586. | optLevel dflags < 1 = tops -- only with -O or higher
  587. | otherwise = map (apply_mapping ncgImpl mapping) tops'
  588. where
  589. (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
  590. mapping = foldr plusUFM emptyUFM mappings
  591. build_mapping :: NcgImpl statics instr jumpDest
  592. -> GenCmmDecl d t (ListGraph instr)
  593. -> (GenCmmDecl d t (ListGraph instr), UniqFM jumpDest)
  594. build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
  595. build_mapping _ (CmmProc info lbl (ListGraph []))
  596. = (CmmProc info lbl (ListGraph []), emptyUFM)
  597. build_mapping ncgImpl (CmmProc info lbl (ListGraph (head:blocks)))
  598. = (CmmProc info lbl (ListGraph (head:others)), mapping)
  599. -- drop the shorted blocks, but don't ever drop the first one,
  600. -- because it is pointed to by a global label.
  601. where
  602. -- find all the blocks that just consist of a jump that can be
  603. -- shorted.
  604. -- Don't completely eliminate loops here -- that can leave a dangling jump!
  605. (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
  606. split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
  607. | Just jd <- canShortcut ncgImpl insn,
  608. Just dest <- getJumpDestBlockId ncgImpl jd,
  609. (setMember dest s) || dest == id -- loop checks
  610. = (s, shortcut_blocks, b : others)
  611. split (s, shortcut_blocks, others) (BasicBlock id [insn])
  612. | Just dest <- canShortcut ncgImpl insn
  613. = (setInsert id s, (id,dest) : shortcut_blocks, others)
  614. split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
  615. -- build a mapping from BlockId to JumpDest for shorting branches
  616. mapping = foldl add emptyUFM shortcut_blocks
  617. add ufm (id,dest) = addToUFM ufm id dest
  618. apply_mapping :: NcgImpl statics instr jumpDest
  619. -> UniqFM jumpDest
  620. -> GenCmmDecl statics h (ListGraph instr)
  621. -> GenCmmDecl statics h (ListGraph instr)
  622. apply_mapping ncgImpl ufm (CmmData sec statics)
  623. = CmmData sec (shortcutStatics ncgImpl (lookupUFM ufm) statics)
  624. apply_mapping ncgImpl ufm (CmmProc info lbl (ListGraph blocks))
  625. = CmmProc info lbl (ListGraph $ map short_bb blocks)
  626. where
  627. short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
  628. short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
  629. -- shortcutJump should apply the mapping repeatedly,
  630. -- just in case we can short multiple branches.
  631. -- -----------------------------------------------------------------------------
  632. -- Instruction selection
  633. -- Native code instruction selection for a chunk of stix code. For
  634. -- this part of the computation, we switch from the UniqSM monad to
  635. -- the NatM monad. The latter carries not only a Unique, but also an
  636. -- Int denoting the current C stack pointer offset in the generated
  637. -- code; this is needed for creating correct spill offsets on
  638. -- architectures which don't offer, or for which it would be
  639. -- prohibitively expensive to employ, a frame pointer register. Viz,
  640. -- x86.
  641. -- The offset is measured in bytes, and indicates the difference
  642. -- between the current (simulated) C stack-ptr and the value it was at
  643. -- the beginning of the block. For stacks which grow down, this value
  644. -- should be either zero or negative.
  645. -- Switching between the two monads whilst carrying along the same
  646. -- Unique supply breaks abstraction. Is that bad?
  647. genMachCode
  648. :: DynFlags
  649. -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
  650. -> RawCmmDecl
  651. -> UniqSM
  652. ( [NatCmmDecl statics instr]
  653. , [CLabel])
  654. genMachCode dflags cmmTopCodeGen cmm_top
  655. = do { initial_us <- getUs
  656. ; let initial_st = mkNatM_State initial_us 0 dflags
  657. (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
  658. final_delta = natm_delta final_st
  659. final_imports = natm_imports final_st
  660. ; if final_delta == 0
  661. then return (new_tops, final_imports)
  662. else pprPanic "genMachCode: nonzero final delta" (int final_delta)
  663. }
  664. -- -----------------------------------------------------------------------------
  665. -- Generic Cmm optimiser
  666. {-
  667. Here we do:
  668. (a) Constant folding
  669. (b) Simple inlining: a temporary which is assigned to and then
  670. used, once, can be shorted.
  671. (c) Position independent code and dynamic linking
  672. (i) introduce the appropriate indirections
  673. and position independent refs
  674. (ii) compile a list of imported symbols
  675. (d) Some arch-specific optimizations
  676. (a) and (b) will be moving to the new Hoopl pipeline, however, (c) and
  677. (d) are only needed by the native backend and will continue to live
  678. here.
  679. Ideas for other things we could do (put these in Hoopl please!):
  680. - shortcut jumps-to-jumps
  681. - simple CSE: if an expr is assigned to a temp, then replace later occs of
  682. that expr with the temp, until the expr is no longer valid (can push through
  683. temp assignments, and certain assigns to mem...)
  684. -}
  685. cmmToCmm :: DynFlags -> RawCmmDecl -> (RawCmmDecl, [CLabel])
  686. cmmToCmm _ top@(CmmData _ _) = (top, [])
  687. cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do
  688. let platform = targetPlatform dflags
  689. blocks' <- mapM cmmBlockConFold (cmmMiniInline platform (cmmEliminateDeadBlocks blocks))
  690. return $ CmmProc info lbl (ListGraph blocks')
  691. newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #))
  692. instance Monad CmmOptM where
  693. return x = CmmOptM $ \(imports, _) -> (# x,imports #)
  694. (CmmOptM f) >>= g =
  695. CmmOptM $ \(imports, dflags) ->
  696. case f (imports, dflags) of
  697. (# x, imports' #) ->
  698. case g x of
  699. CmmOptM g' -> g' (imports', dflags)
  700. addImportCmmOpt :: CLabel -> CmmOptM ()
  701. addImportCmmOpt lbl = CmmOptM $ \(imports, _dflags) -> (# (), lbl:imports #)
  702. getDynFlagsCmmOpt :: CmmOptM DynFlags
  703. getDynFlagsCmmOpt = CmmOptM $ \(imports, dflags) -> (# dflags, imports #)
  704. runCmmOpt :: DynFlags -> CmmOptM a -> (a, [CLabel])
  705. runCmmOpt dflags (CmmOptM f) = case f ([], dflags) of
  706. (# result, imports #) -> (result, imports)
  707. cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
  708. cmmBlockConFold (BasicBlock id stmts) = do
  709. stmts' <- mapM cmmStmtConFold stmts
  710. return $ BasicBlock id stmts'
  711. -- This does three optimizations, but they're very quick to check, so we don't
  712. -- bother turning them off even when the Hoopl code is active. Since
  713. -- this is on the old Cmm representation, we can't reuse the code either:
  714. -- * reg = reg --> nop
  715. -- * if 0 then jump --> nop
  716. -- * if 1 then jump --> jump
  717. -- We might be tempted to skip this step entirely of not opt_PIC, but
  718. -- there is some PowerPC code for the non-PIC case, which would also
  719. -- have to be separated.
  720. cmmStmtConFold :: CmmStmt -> CmmOptM CmmStmt
  721. cmmStmtConFold stmt
  722. = case stmt of
  723. CmmAssign reg src
  724. -> do src' <- cmmExprConFold DataReference src
  725. return $ case src' of
  726. CmmReg reg' | reg == reg' -> CmmNop
  727. new_src -> CmmAssign reg new_src
  728. CmmStore addr src
  729. -> do addr' <- cmmExprConFold DataReference addr
  730. src' <- cmmExprConFold DataReference src
  731. return $ CmmStore addr' src'
  732. CmmJump addr regs
  733. -> do addr' <- cmmExprConFold JumpReference addr
  734. return $ CmmJump addr' regs
  735. CmmCall target regs args returns
  736. -> do target' <- case target of
  737. CmmCallee e conv -> do
  738. e' <- cmmExprConFold CallReference e
  739. return $ CmmCallee e' conv
  740. other -> return other
  741. args' <- mapM (\(CmmHinted arg hint) -> do
  742. arg' <- cmmExprConFold DataReference arg
  743. return (CmmHinted arg' hint)) args
  744. return $ CmmCall target' regs args' returns
  745. CmmCondBranch test dest
  746. -> do test' <- cmmExprConFold DataReference test
  747. dflags <- getDynFlagsCmmOpt
  748. let platform = targetPlatform dflags
  749. return $ case test' of
  750. CmmLit (CmmInt 0 _) ->
  751. CmmComment (mkFastString ("deleted: " ++
  752. showSDoc (pprStmt platform stmt)))
  753. CmmLit (CmmInt _ _) -> CmmBranch dest
  754. _other -> CmmCondBranch test' dest
  755. CmmSwitch expr ids
  756. -> do expr' <- cmmExprConFold DataReference expr
  757. return $ CmmSwitch expr' ids
  758. other
  759. -> return other
  760. cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
  761. cmmExprConFold referenceKind expr = do
  762. dflags <- getDynFlagsCmmOpt
  763. -- Skip constant folding if new code generator is running
  764. -- (this optimization is done in Hoopl)
  765. let expr' = if dopt Opt_TryNewCodeGen dflags
  766. then expr
  767. else cmmExprCon (targetPlatform dflags) expr
  768. cmmExprNative referenceKind expr'
  769. cmmExprCon :: Platform -> CmmExpr -> CmmExpr
  770. cmmExprCon platform (CmmLoad addr rep) = CmmLoad (cmmExprCon platform addr) rep
  771. cmmExprCon platform (CmmMachOp mop args)
  772. = cmmMachOpFold platform mop (map (cmmExprCon platform) args)
  773. cmmExprCon _ other = other
  774. -- handles both PIC and non-PIC cases... a very strange mixture
  775. -- of things to do.
  776. cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
  777. cmmExprNative referenceKind expr = do
  778. dflags <- getDynFlagsCmmOpt
  779. let platform = targetPlatform dflags
  780. arch = platformArch platform
  781. case expr of
  782. CmmLoad addr rep
  783. -> do addr' <- cmmExprNative DataReference addr
  784. return $ CmmLoad addr' rep
  785. CmmMachOp mop args
  786. -> do args' <- mapM (cmmExprNative DataReference) args
  787. return $ CmmMachOp mop args'
  788. CmmLit (CmmLabel lbl)
  789. -> do
  790. cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
  791. CmmLit (CmmLabelOff lbl off)
  792. -> do
  793. dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
  794. -- need to optimize here, since it's late
  795. return $ cmmMachOpFold platform (MO_Add wordWidth) [
  796. dynRef,
  797. (CmmLit $ CmmInt (fromIntegral off) wordWidth)
  798. ]
  799. -- On powerpc (non-PIC), it's easier to jump directly to a label than
  800. -- to use the register table, so we replace these registers
  801. -- with the corresponding labels:
  802. CmmReg (CmmGlobal EagerBlackholeInfo)
  803. | arch == ArchPPC && not opt_PIC
  804. -> cmmExprNative referenceKind $
  805. CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_EAGER_BLACKHOLE_info")))
  806. CmmReg (CmmGlobal GCEnter1)
  807. | arch == ArchPPC && not opt_PIC
  808. -> cmmExprNative referenceKind $
  809. CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_enter_1")))
  810. CmmReg (CmmGlobal GCFun)
  811. | arch == ArchPPC && not opt_PIC
  812. -> cmmExprNative referenceKind $
  813. CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_fun")))
  814. other
  815. -> return other
  816. \end{code}