PageRenderTime 62ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/compiler/nativeGen/AsmCodeGen.lhs

http://github.com/ghc/ghc
Haskell | 1038 lines | 664 code | 151 blank | 223 comment | 38 complexity | e2a7a9fc3d972ca704efe06e9fe0c6a0 MD5 | raw file
Possible License(s): MIT, BSD-3-Clause, GPL-3.0
  1. -- -----------------------------------------------------------------------------
  2. --
  3. -- (c) The University of Glasgow 1993-2004
  4. --
  5. -- This is the top-level module in the native code generator.
  6. --
  7. -- -----------------------------------------------------------------------------
  8. \begin{code}
  9. {-# LANGUAGE GADTs #-}
  10. module AsmCodeGen ( nativeCodeGen ) where
  11. #include "HsVersions.h"
  12. #include "nativeGen/NCG.h"
  13. import qualified X86.CodeGen
  14. import qualified X86.Regs
  15. import qualified X86.Instr
  16. import qualified X86.Ppr
  17. import qualified SPARC.CodeGen
  18. import qualified SPARC.Regs
  19. import qualified SPARC.Instr
  20. import qualified SPARC.Ppr
  21. import qualified SPARC.ShortcutJump
  22. import qualified SPARC.CodeGen.Expand
  23. import qualified PPC.CodeGen
  24. import qualified PPC.Regs
  25. import qualified PPC.RegInfo
  26. import qualified PPC.Instr
  27. import qualified PPC.Ppr
  28. import RegAlloc.Liveness
  29. import qualified RegAlloc.Linear.Main as Linear
  30. import qualified GraphColor as Color
  31. import qualified RegAlloc.Graph.Main as Color
  32. import qualified RegAlloc.Graph.Stats as Color
  33. import qualified RegAlloc.Graph.TrivColorable as Color
  34. import TargetReg
  35. import Platform
  36. import Config
  37. import Instruction
  38. import PIC
  39. import Reg
  40. import NCGMonad
  41. import BlockId
  42. import CgUtils ( fixStgRegisters )
  43. import Cmm
  44. import CmmUtils
  45. import Hoopl
  46. import CmmOpt ( cmmMachOpFold )
  47. import PprCmm
  48. import CLabel
  49. import UniqFM
  50. import UniqSupply
  51. import DynFlags
  52. import Util
  53. import BasicTypes ( Alignment )
  54. import Digraph
  55. import qualified Pretty
  56. import BufWrite
  57. import Outputable
  58. import FastString
  59. import UniqSet
  60. import ErrUtils
  61. import Module
  62. import Stream (Stream)
  63. import qualified Stream
  64. -- DEBUGGING ONLY
  65. --import OrdList
  66. import Data.List
  67. import Data.Maybe
  68. import Control.Exception
  69. import Control.Applicative (Applicative(..))
  70. import Control.Monad
  71. import System.IO
  72. {-
  73. The native-code generator has machine-independent and
  74. machine-dependent modules.
  75. This module ("AsmCodeGen") is the top-level machine-independent
  76. module. Before entering machine-dependent land, we do some
  77. machine-independent optimisations (defined below) on the
  78. 'CmmStmts's.
  79. We convert to the machine-specific 'Instr' datatype with
  80. 'cmmCodeGen', assuming an infinite supply of registers. We then use
  81. a machine-independent register allocator ('regAlloc') to rejoin
  82. reality. Obviously, 'regAlloc' has machine-specific helper
  83. functions (see about "RegAllocInfo" below).
  84. Finally, we order the basic blocks of the function so as to minimise
  85. the number of jumps between blocks, by utilising fallthrough wherever
  86. possible.
  87. The machine-dependent bits break down as follows:
  88. * ["MachRegs"] Everything about the target platform's machine
  89. registers (and immediate operands, and addresses, which tend to
  90. intermingle/interact with registers).
  91. * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
  92. have a module of its own), plus a miscellany of other things
  93. (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
  94. * ["MachCodeGen"] is where 'Cmm' stuff turns into
  95. machine instructions.
  96. * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
  97. a 'SDoc').
  98. * ["RegAllocInfo"] In the register allocator, we manipulate
  99. 'MRegsState's, which are 'BitSet's, one bit per machine register.
  100. When we want to say something about a specific machine register
  101. (e.g., ``it gets clobbered by this instruction''), we set/unset
  102. its bit. Obviously, we do this 'BitSet' thing for efficiency
  103. reasons.
  104. The 'RegAllocInfo' module collects together the machine-specific
  105. info needed to do register allocation.
  106. * ["RegisterAlloc"] The (machine-independent) register allocator.
  107. -}
  108. -- -----------------------------------------------------------------------------
  109. -- Top-level of the native codegen
  110. data NcgImpl statics instr jumpDest = NcgImpl {
  111. cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
  112. generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
  113. getJumpDestBlockId :: jumpDest -> Maybe BlockId,
  114. canShortcut :: instr -> Maybe jumpDest,
  115. shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
  116. shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
  117. pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
  118. maxSpillSlots :: Int,
  119. allocatableRegs :: [RealReg],
  120. ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
  121. ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
  122. ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr),
  123. ncgMakeFarBranches :: BlockEnv CmmStatics -> [NatBasicBlock instr] -> [NatBasicBlock instr]
  124. }
  125. --------------------
  126. nativeCodeGen :: DynFlags -> Module -> Handle -> UniqSupply
  127. -> Stream IO RawCmmGroup ()
  128. -> IO UniqSupply
  129. nativeCodeGen dflags this_mod h us cmms
  130. = let platform = targetPlatform dflags
  131. nCG' :: (Outputable statics, Outputable instr, Instruction instr)
  132. => NcgImpl statics instr jumpDest -> IO UniqSupply
  133. nCG' ncgImpl = nativeCodeGen' dflags this_mod ncgImpl h us cmms
  134. in case platformArch platform of
  135. ArchX86 -> nCG' (x86NcgImpl dflags)
  136. ArchX86_64 -> nCG' (x86_64NcgImpl dflags)
  137. ArchPPC -> nCG' (ppcNcgImpl dflags)
  138. ArchSPARC -> nCG' (sparcNcgImpl dflags)
  139. ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
  140. ArchPPC_64 -> panic "nativeCodeGen: No NCG for PPC 64"
  141. ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
  142. ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
  143. ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
  144. ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
  145. ArchJavaScript -> panic "nativeCodeGen: No NCG for JavaScript"
  146. x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
  147. x86NcgImpl dflags
  148. = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }
  149. x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
  150. x86_64NcgImpl dflags
  151. = NcgImpl {
  152. cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
  153. ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
  154. ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
  155. ,canShortcut = X86.Instr.canShortcut
  156. ,shortcutStatics = X86.Instr.shortcutStatics
  157. ,shortcutJump = X86.Instr.shortcutJump
  158. ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
  159. ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
  160. ,allocatableRegs = X86.Regs.allocatableRegs platform
  161. ,ncg_x86fp_kludge = id
  162. ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
  163. ,ncgExpandTop = id
  164. ,ncgMakeFarBranches = const id
  165. }
  166. where platform = targetPlatform dflags
  167. ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
  168. ppcNcgImpl dflags
  169. = NcgImpl {
  170. cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
  171. ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
  172. ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
  173. ,canShortcut = PPC.RegInfo.canShortcut
  174. ,shortcutStatics = PPC.RegInfo.shortcutStatics
  175. ,shortcutJump = PPC.RegInfo.shortcutJump
  176. ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
  177. ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
  178. ,allocatableRegs = PPC.Regs.allocatableRegs platform
  179. ,ncg_x86fp_kludge = id
  180. ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform
  181. ,ncgExpandTop = id
  182. ,ncgMakeFarBranches = PPC.Instr.makeFarBranches
  183. }
  184. where platform = targetPlatform dflags
  185. sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
  186. sparcNcgImpl dflags
  187. = NcgImpl {
  188. cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
  189. ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
  190. ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
  191. ,canShortcut = SPARC.ShortcutJump.canShortcut
  192. ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
  193. ,shortcutJump = SPARC.ShortcutJump.shortcutJump
  194. ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
  195. ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
  196. ,allocatableRegs = SPARC.Regs.allocatableRegs
  197. ,ncg_x86fp_kludge = id
  198. ,ncgAllocMoreStack = noAllocMoreStack
  199. ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
  200. ,ncgMakeFarBranches = const id
  201. }
  202. --
  203. -- Allocating more stack space for spilling is currently only
  204. -- supported for the linear register allocator on x86/x86_64, the rest
  205. -- default to the panic below. To support allocating extra stack on
  206. -- more platforms provide a definition of ncgAllocMoreStack.
  207. --
  208. noAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr)
  209. noAllocMoreStack amount _
  210. = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
  211. ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
  212. ++ " is a known limitation in the linear allocator.\n"
  213. ++ "\n"
  214. ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
  215. ++ " You can still file a bug report if you like.\n"
  216. type NativeGenAcc statics instr
  217. = ([[CLabel]],
  218. [([NatCmmDecl statics instr],
  219. Maybe [Color.RegAllocStats statics instr],
  220. Maybe [Linear.RegAllocStats])])
  221. nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
  222. => DynFlags
  223. -> Module
  224. -> NcgImpl statics instr jumpDest
  225. -> Handle
  226. -> UniqSupply
  227. -> Stream IO RawCmmGroup ()
  228. -> IO UniqSupply
  229. nativeCodeGen' dflags this_mod ncgImpl h us cmms
  230. = do
  231. let split_cmms = Stream.map add_split cmms
  232. -- BufHandle is a performance hack. We could hide it inside
  233. -- Pretty if it weren't for the fact that we do lots of little
  234. -- printDocs here (in order to do codegen in constant space).
  235. bufh <- newBufHandle h
  236. (ngs, us') <- cmmNativeGenStream dflags this_mod ncgImpl bufh us split_cmms ([], [])
  237. finishNativeGen dflags ncgImpl bufh ngs
  238. return us'
  239. where add_split tops
  240. | gopt Opt_SplitObjs dflags = split_marker : tops
  241. | otherwise = tops
  242. split_marker = CmmProc mapEmpty mkSplitMarkerLabel []
  243. (ofBlockList (panic "split_marker_entry") [])
  244. finishNativeGen :: Instruction instr
  245. => DynFlags
  246. -> NcgImpl statics instr jumpDest
  247. -> BufHandle
  248. -> NativeGenAcc statics instr
  249. -> IO ()
  250. finishNativeGen dflags ncgImpl bufh@(BufHandle _ _ h) (imports, prof)
  251. = do
  252. bFlush bufh
  253. let platform = targetPlatform dflags
  254. let (native, colorStats, linearStats)
  255. = unzip3 prof
  256. -- dump native code
  257. dumpIfSet_dyn dflags
  258. Opt_D_dump_asm "Asm code"
  259. (vcat $ map (pprNatCmmDecl ncgImpl) $ concat native)
  260. -- dump global NCG stats for graph coloring allocator
  261. (case concat $ catMaybes colorStats of
  262. [] -> return ()
  263. stats -> do
  264. -- build the global register conflict graph
  265. let graphGlobal
  266. = foldl Color.union Color.initGraph
  267. $ [ Color.raGraph stat
  268. | stat@Color.RegAllocStatsStart{} <- stats]
  269. dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
  270. $ Color.pprStats stats graphGlobal
  271. dumpIfSet_dyn dflags
  272. Opt_D_dump_asm_conflicts "Register conflict graph"
  273. $ Color.dotGraph
  274. (targetRegDotColor platform)
  275. (Color.trivColorable platform
  276. (targetVirtualRegSqueeze platform)
  277. (targetRealRegSqueeze platform))
  278. $ graphGlobal)
  279. -- dump global NCG stats for linear allocator
  280. (case concat $ catMaybes linearStats of
  281. [] -> return ()
  282. stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
  283. $ Linear.pprStats (concat native) stats)
  284. -- write out the imports
  285. Pretty.printDoc Pretty.LeftMode (pprCols dflags) h
  286. $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
  287. $ makeImportsDoc dflags (concat imports)
  288. cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
  289. => DynFlags
  290. -> Module
  291. -> NcgImpl statics instr jumpDest
  292. -> BufHandle
  293. -> UniqSupply
  294. -> Stream IO RawCmmGroup ()
  295. -> NativeGenAcc statics instr
  296. -> IO (NativeGenAcc statics instr, UniqSupply)
  297. cmmNativeGenStream dflags this_mod ncgImpl h us cmm_stream ngs@(impAcc, profAcc)
  298. = do r <- Stream.runStream cmm_stream
  299. case r of
  300. Left () ->
  301. return ((reverse impAcc, reverse profAcc) , us)
  302. Right (cmms, cmm_stream') -> do
  303. (ngs',us') <- cmmNativeGens dflags this_mod ncgImpl h us cmms ngs 0
  304. cmmNativeGenStream dflags this_mod ncgImpl h us' cmm_stream' ngs'
  305. -- | Do native code generation on all these cmms.
  306. --
  307. cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)
  308. => DynFlags
  309. -> Module
  310. -> NcgImpl statics instr jumpDest
  311. -> BufHandle
  312. -> UniqSupply
  313. -> [RawCmmDecl]
  314. -> NativeGenAcc statics instr
  315. -> Int
  316. -> IO (NativeGenAcc statics instr, UniqSupply)
  317. cmmNativeGens _ _ _ _ us [] ngs _
  318. = return (ngs, us)
  319. cmmNativeGens dflags this_mod ncgImpl h us (cmm : cmms) (impAcc, profAcc) count
  320. = do
  321. (us', native, imports, colorStats, linearStats)
  322. <- {-# SCC "cmmNativeGen" #-} cmmNativeGen dflags this_mod ncgImpl us cmm count
  323. {-# SCC "pprNativeCode" #-} Pretty.bufLeftRender h
  324. $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
  325. $ vcat $ map (pprNatCmmDecl ncgImpl) native
  326. let !lsPprNative =
  327. if dopt Opt_D_dump_asm dflags
  328. || dopt Opt_D_dump_asm_stats dflags
  329. then native
  330. else []
  331. let !count' = count + 1
  332. -- force evaluation all this stuff to avoid space leaks
  333. {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
  334. cmmNativeGens dflags this_mod ncgImpl h
  335. us' cmms ((imports : impAcc),
  336. ((lsPprNative, colorStats, linearStats) : profAcc))
  337. count'
  338. where seqString [] = ()
  339. seqString (x:xs) = x `seq` seqString xs
  340. -- | Complete native code generation phase for a single top-level chunk of Cmm.
  341. -- Dumping the output of each stage along the way.
  342. -- Global conflict graph and NGC stats
  343. cmmNativeGen
  344. :: (Outputable statics, Outputable instr, Instruction instr)
  345. => DynFlags
  346. -> Module
  347. -> NcgImpl statics instr jumpDest
  348. -> UniqSupply
  349. -> RawCmmDecl -- ^ the cmm to generate code for
  350. -> Int -- ^ sequence number of this top thing
  351. -> IO ( UniqSupply
  352. , [NatCmmDecl statics instr] -- native code
  353. , [CLabel] -- things imported by this cmm
  354. , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
  355. , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
  356. cmmNativeGen dflags this_mod ncgImpl us cmm count
  357. = do
  358. let platform = targetPlatform dflags
  359. -- rewrite assignments to global regs
  360. let fixed_cmm =
  361. {-# SCC "fixStgRegisters" #-}
  362. fixStgRegisters dflags cmm
  363. -- cmm to cmm optimisations
  364. let (opt_cmm, imports) =
  365. {-# SCC "cmmToCmm" #-}
  366. cmmToCmm dflags this_mod fixed_cmm
  367. dumpIfSet_dyn dflags
  368. Opt_D_dump_opt_cmm "Optimised Cmm"
  369. (pprCmmGroup [opt_cmm])
  370. -- generate native code from cmm
  371. let ((native, lastMinuteImports), usGen) =
  372. {-# SCC "genMachCode" #-}
  373. initUs us $ genMachCode dflags this_mod (cmmTopCodeGen ncgImpl) opt_cmm
  374. dumpIfSet_dyn dflags
  375. Opt_D_dump_asm_native "Native code"
  376. (vcat $ map (pprNatCmmDecl ncgImpl) native)
  377. -- tag instructions with register liveness information
  378. let (withLiveness, usLive) =
  379. {-# SCC "regLiveness" #-}
  380. initUs usGen
  381. $ mapM (regLiveness platform)
  382. $ map natCmmTopToLive native
  383. dumpIfSet_dyn dflags
  384. Opt_D_dump_asm_liveness "Liveness annotations added"
  385. (vcat $ map ppr withLiveness)
  386. -- allocate registers
  387. (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
  388. if ( gopt Opt_RegsGraph dflags
  389. || gopt Opt_RegsIterative dflags)
  390. then do
  391. -- the regs usable for allocation
  392. let (alloc_regs :: UniqFM (UniqSet RealReg))
  393. = foldr (\r -> plusUFM_C unionUniqSets
  394. $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
  395. emptyUFM
  396. $ allocatableRegs ncgImpl
  397. -- do the graph coloring register allocation
  398. let ((alloced, regAllocStats), usAlloc)
  399. = {-# SCC "RegAlloc" #-}
  400. initUs usLive
  401. $ Color.regAlloc
  402. dflags
  403. alloc_regs
  404. (mkUniqSet [0 .. maxSpillSlots ncgImpl])
  405. withLiveness
  406. -- dump out what happened during register allocation
  407. dumpIfSet_dyn dflags
  408. Opt_D_dump_asm_regalloc "Registers allocated"
  409. (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
  410. dumpIfSet_dyn dflags
  411. Opt_D_dump_asm_regalloc_stages "Build/spill stages"
  412. (vcat $ map (\(stage, stats)
  413. -> text "# --------------------------"
  414. $$ text "# cmm " <> int count <> text " Stage " <> int stage
  415. $$ ppr stats)
  416. $ zip [0..] regAllocStats)
  417. let mPprStats =
  418. if dopt Opt_D_dump_asm_stats dflags
  419. then Just regAllocStats else Nothing
  420. -- force evaluation of the Maybe to avoid space leak
  421. mPprStats `seq` return ()
  422. return ( alloced, usAlloc
  423. , mPprStats
  424. , Nothing)
  425. else do
  426. -- do linear register allocation
  427. let reg_alloc proc = do
  428. (alloced, maybe_more_stack, ra_stats) <-
  429. Linear.regAlloc dflags proc
  430. case maybe_more_stack of
  431. Nothing -> return ( alloced, ra_stats )
  432. Just amount -> do
  433. alloced' <- ncgAllocMoreStack ncgImpl amount alloced
  434. return (alloced', ra_stats )
  435. let ((alloced, regAllocStats), usAlloc)
  436. = {-# SCC "RegAlloc" #-}
  437. initUs usLive
  438. $ liftM unzip
  439. $ mapM reg_alloc withLiveness
  440. dumpIfSet_dyn dflags
  441. Opt_D_dump_asm_regalloc "Registers allocated"
  442. (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
  443. let mPprStats =
  444. if dopt Opt_D_dump_asm_stats dflags
  445. then Just (catMaybes regAllocStats) else Nothing
  446. -- force evaluation of the Maybe to avoid space leak
  447. mPprStats `seq` return ()
  448. return ( alloced, usAlloc
  449. , Nothing
  450. , mPprStats)
  451. ---- x86fp_kludge. This pass inserts ffree instructions to clear
  452. ---- the FPU stack on x86. The x86 ABI requires that the FPU stack
  453. ---- is clear, and library functions can return odd results if it
  454. ---- isn't.
  455. ----
  456. ---- NB. must happen before shortcutBranches, because that
  457. ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
  458. let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
  459. ---- generate jump tables
  460. let tabled =
  461. {-# SCC "generateJumpTables" #-}
  462. generateJumpTables ncgImpl kludged
  463. ---- shortcut branches
  464. let shorted =
  465. {-# SCC "shortcutBranches" #-}
  466. shortcutBranches dflags ncgImpl tabled
  467. ---- sequence blocks
  468. let sequenced =
  469. {-# SCC "sequenceBlocks" #-}
  470. map (sequenceTop ncgImpl) shorted
  471. ---- expansion of SPARC synthetic instrs
  472. let expanded =
  473. {-# SCC "sparc_expand" #-}
  474. ncgExpandTop ncgImpl sequenced
  475. dumpIfSet_dyn dflags
  476. Opt_D_dump_asm_expanded "Synthetic instructions expanded"
  477. (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
  478. return ( usAlloc
  479. , expanded
  480. , lastMinuteImports ++ imports
  481. , ppr_raStatsColor
  482. , ppr_raStatsLinear)
  483. x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
  484. x86fp_kludge top@(CmmData _ _) = top
  485. x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
  486. CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
  487. -- | Build a doc for all the imports.
  488. --
  489. makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
  490. makeImportsDoc dflags imports
  491. = dyld_stubs imports
  492. $$
  493. -- On recent versions of Darwin, the linker supports
  494. -- dead-stripping of code and data on a per-symbol basis.
  495. -- There's a hack to make this work in PprMach.pprNatCmmDecl.
  496. (if platformHasSubsectionsViaSymbols platform
  497. then text ".subsections_via_symbols"
  498. else empty)
  499. $$
  500. -- On recent GNU ELF systems one can mark an object file
  501. -- as not requiring an executable stack. If all objects
  502. -- linked into a program have this note then the program
  503. -- will not use an executable stack, which is good for
  504. -- security. GHC generated code does not need an executable
  505. -- stack so add the note in:
  506. (if platformHasGnuNonexecStack platform
  507. then text ".section .note.GNU-stack,\"\",@progbits"
  508. else empty)
  509. $$
  510. -- And just because every other compiler does, lets stick in
  511. -- an identifier directive: .ident "GHC x.y.z"
  512. (if platformHasIdentDirective platform
  513. then let compilerIdent = text "GHC" <+> text cProjectVersion
  514. in text ".ident" <+> doubleQuotes compilerIdent
  515. else empty)
  516. where
  517. platform = targetPlatform dflags
  518. arch = platformArch platform
  519. os = platformOS platform
  520. -- Generate "symbol stubs" for all external symbols that might
  521. -- come from a dynamic library.
  522. dyld_stubs :: [CLabel] -> SDoc
  523. {- dyld_stubs imps = vcat $ map pprDyldSymbolStub $
  524. map head $ group $ sort imps-}
  525. -- (Hack) sometimes two Labels pretty-print the same, but have
  526. -- different uniques; so we compare their text versions...
  527. dyld_stubs imps
  528. | needImportedSymbols dflags arch os
  529. = vcat $
  530. (pprGotDeclaration dflags arch os :) $
  531. map ( pprImportedSymbol dflags platform . fst . head) $
  532. groupBy (\(_,a) (_,b) -> a == b) $
  533. sortBy (\(_,a) (_,b) -> compare a b) $
  534. map doPpr $
  535. imps
  536. | otherwise
  537. = empty
  538. doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
  539. astyle = mkCodeStyle AsmStyle
  540. -- -----------------------------------------------------------------------------
  541. -- Sequencing the basic blocks
  542. -- Cmm BasicBlocks are self-contained entities: they always end in a
  543. -- jump, either non-local or to another basic block in the same proc.
  544. -- In this phase, we attempt to place the basic blocks in a sequence
  545. -- such that as many of the local jumps as possible turn into
  546. -- fallthroughs.
  547. sequenceTop
  548. :: Instruction instr
  549. => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
  550. sequenceTop _ top@(CmmData _ _) = top
  551. sequenceTop ncgImpl (CmmProc info lbl live (ListGraph blocks)) =
  552. CmmProc info lbl live (ListGraph $ ncgMakeFarBranches ncgImpl info $ sequenceBlocks info blocks)
  553. -- The algorithm is very simple (and stupid): we make a graph out of
  554. -- the blocks where there is an edge from one block to another iff the
  555. -- first block ends by jumping to the second. Then we topologically
  556. -- sort this graph. Then traverse the list: for each block, we first
  557. -- output the block, then if it has an out edge, we move the
  558. -- destination of the out edge to the front of the list, and continue.
  559. -- FYI, the classic layout for basic blocks uses postorder DFS; this
  560. -- algorithm is implemented in Hoopl.
  561. sequenceBlocks
  562. :: Instruction instr
  563. => BlockEnv i
  564. -> [NatBasicBlock instr]
  565. -> [NatBasicBlock instr]
  566. sequenceBlocks _ [] = []
  567. sequenceBlocks infos (entry:blocks) =
  568. seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
  569. -- the first block is the entry point ==> it must remain at the start.
  570. sccBlocks
  571. :: Instruction instr
  572. => [NatBasicBlock instr]
  573. -> [SCC ( NatBasicBlock instr
  574. , BlockId
  575. , [BlockId])]
  576. sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
  577. -- we're only interested in the last instruction of
  578. -- the block, and only if it has a single destination.
  579. getOutEdges
  580. :: Instruction instr
  581. => [instr] -> [BlockId]
  582. getOutEdges instrs
  583. = case jumpDestsOfInstr (last instrs) of
  584. [one] -> [one]
  585. _many -> []
  586. mkNode :: (Instruction t)
  587. => GenBasicBlock t
  588. -> (GenBasicBlock t, BlockId, [BlockId])
  589. mkNode block@(BasicBlock id instrs) = (block, id, getOutEdges instrs)
  590. seqBlocks :: BlockEnv i -> [(GenBasicBlock t1, BlockId, [BlockId])]
  591. -> [GenBasicBlock t1]
  592. seqBlocks _ [] = []
  593. seqBlocks infos ((block,_,[]) : rest)
  594. = block : seqBlocks infos rest
  595. seqBlocks infos ((block@(BasicBlock id instrs),_,[next]) : rest)
  596. | can_fallthrough = BasicBlock id (init instrs) : seqBlocks infos rest'
  597. | otherwise = block : seqBlocks infos rest'
  598. where
  599. can_fallthrough = not (mapMember next infos) && can_reorder
  600. (can_reorder, rest') = reorder next [] rest
  601. -- TODO: we should do a better job for cycles; try to maximise the
  602. -- fallthroughs within a loop.
  603. seqBlocks _ _ = panic "AsmCodegen:seqBlocks"
  604. reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)])
  605. reorder _ accum [] = (False, reverse accum)
  606. reorder id accum (b@(block,id',out) : rest)
  607. | id == id' = (True, (block,id,out) : reverse accum ++ rest)
  608. | otherwise = reorder id (b:accum) rest
  609. -- -----------------------------------------------------------------------------
  610. -- Generate jump tables
  611. -- Analyzes all native code and generates data sections for all jump
  612. -- table instructions.
  613. generateJumpTables
  614. :: NcgImpl statics instr jumpDest
  615. -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
  616. generateJumpTables ncgImpl xs = concatMap f xs
  617. where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
  618. f p = [p]
  619. g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
  620. -- -----------------------------------------------------------------------------
  621. -- Shortcut branches
  622. shortcutBranches
  623. :: DynFlags
  624. -> NcgImpl statics instr jumpDest
  625. -> [NatCmmDecl statics instr]
  626. -> [NatCmmDecl statics instr]
  627. shortcutBranches dflags ncgImpl tops
  628. | optLevel dflags < 1 = tops -- only with -O or higher
  629. | otherwise = map (apply_mapping ncgImpl mapping) tops'
  630. where
  631. (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
  632. mapping = foldr plusUFM emptyUFM mappings
  633. build_mapping :: NcgImpl statics instr jumpDest
  634. -> GenCmmDecl d (BlockEnv t) (ListGraph instr)
  635. -> (GenCmmDecl d (BlockEnv t) (ListGraph instr), UniqFM jumpDest)
  636. build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
  637. build_mapping _ (CmmProc info lbl live (ListGraph []))
  638. = (CmmProc info lbl live (ListGraph []), emptyUFM)
  639. build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
  640. = (CmmProc info lbl live (ListGraph (head:others)), mapping)
  641. -- drop the shorted blocks, but don't ever drop the first one,
  642. -- because it is pointed to by a global label.
  643. where
  644. -- find all the blocks that just consist of a jump that can be
  645. -- shorted.
  646. -- Don't completely eliminate loops here -- that can leave a dangling jump!
  647. (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
  648. split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
  649. | Just jd <- canShortcut ncgImpl insn,
  650. Just dest <- getJumpDestBlockId ncgImpl jd,
  651. not (has_info id),
  652. (setMember dest s) || dest == id -- loop checks
  653. = (s, shortcut_blocks, b : others)
  654. split (s, shortcut_blocks, others) (BasicBlock id [insn])
  655. | Just dest <- canShortcut ncgImpl insn,
  656. not (has_info id)
  657. = (setInsert id s, (id,dest) : shortcut_blocks, others)
  658. split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
  659. -- do not eliminate blocks that have an info table
  660. has_info l = mapMember l info
  661. -- build a mapping from BlockId to JumpDest for shorting branches
  662. mapping = foldl add emptyUFM shortcut_blocks
  663. add ufm (id,dest) = addToUFM ufm id dest
  664. apply_mapping :: NcgImpl statics instr jumpDest
  665. -> UniqFM jumpDest
  666. -> GenCmmDecl statics h (ListGraph instr)
  667. -> GenCmmDecl statics h (ListGraph instr)
  668. apply_mapping ncgImpl ufm (CmmData sec statics)
  669. = CmmData sec (shortcutStatics ncgImpl (lookupUFM ufm) statics)
  670. apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
  671. = CmmProc info lbl live (ListGraph $ map short_bb blocks)
  672. where
  673. short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
  674. short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
  675. -- shortcutJump should apply the mapping repeatedly,
  676. -- just in case we can short multiple branches.
  677. -- -----------------------------------------------------------------------------
  678. -- Instruction selection
  679. -- Native code instruction selection for a chunk of stix code. For
  680. -- this part of the computation, we switch from the UniqSM monad to
  681. -- the NatM monad. The latter carries not only a Unique, but also an
  682. -- Int denoting the current C stack pointer offset in the generated
  683. -- code; this is needed for creating correct spill offsets on
  684. -- architectures which don't offer, or for which it would be
  685. -- prohibitively expensive to employ, a frame pointer register. Viz,
  686. -- x86.
  687. -- The offset is measured in bytes, and indicates the difference
  688. -- between the current (simulated) C stack-ptr and the value it was at
  689. -- the beginning of the block. For stacks which grow down, this value
  690. -- should be either zero or negative.
  691. -- Switching between the two monads whilst carrying along the same
  692. -- Unique supply breaks abstraction. Is that bad?
  693. genMachCode
  694. :: DynFlags
  695. -> Module
  696. -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
  697. -> RawCmmDecl
  698. -> UniqSM
  699. ( [NatCmmDecl statics instr]
  700. , [CLabel])
  701. genMachCode dflags this_mod cmmTopCodeGen cmm_top
  702. = do { initial_us <- getUs
  703. ; let initial_st = mkNatM_State initial_us 0 dflags this_mod
  704. (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
  705. final_delta = natm_delta final_st
  706. final_imports = natm_imports final_st
  707. ; if final_delta == 0
  708. then return (new_tops, final_imports)
  709. else pprPanic "genMachCode: nonzero final delta" (int final_delta)
  710. }
  711. -- -----------------------------------------------------------------------------
  712. -- Generic Cmm optimiser
  713. {-
  714. Here we do:
  715. (a) Constant folding
  716. (c) Position independent code and dynamic linking
  717. (i) introduce the appropriate indirections
  718. and position independent refs
  719. (ii) compile a list of imported symbols
  720. (d) Some arch-specific optimizations
  721. (a) will be moving to the new Hoopl pipeline, however, (c) and
  722. (d) are only needed by the native backend and will continue to live
  723. here.
  724. Ideas for other things we could do (put these in Hoopl please!):
  725. - shortcut jumps-to-jumps
  726. - simple CSE: if an expr is assigned to a temp, then replace later occs of
  727. that expr with the temp, until the expr is no longer valid (can push through
  728. temp assignments, and certain assigns to mem...)
  729. -}
  730. cmmToCmm :: DynFlags -> Module -> RawCmmDecl -> (RawCmmDecl, [CLabel])
  731. cmmToCmm _ _ top@(CmmData _ _) = (top, [])
  732. cmmToCmm dflags this_mod (CmmProc info lbl live graph)
  733. = runCmmOpt dflags this_mod $
  734. do blocks' <- mapM cmmBlockConFold (toBlockList graph)
  735. return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks')
  736. newtype CmmOptM a = CmmOptM (DynFlags -> Module -> [CLabel] -> (# a, [CLabel] #))
  737. instance Functor CmmOptM where
  738. fmap = liftM
  739. instance Applicative CmmOptM where
  740. pure = return
  741. (<*>) = ap
  742. instance Monad CmmOptM where
  743. return x = CmmOptM $ \_ _ imports -> (# x, imports #)
  744. (CmmOptM f) >>= g =
  745. CmmOptM $ \dflags this_mod imports ->
  746. case f dflags this_mod imports of
  747. (# x, imports' #) ->
  748. case g x of
  749. CmmOptM g' -> g' dflags this_mod imports'
  750. instance CmmMakeDynamicReferenceM CmmOptM where
  751. addImport = addImportCmmOpt
  752. getThisModule = CmmOptM $ \_ this_mod imports -> (# this_mod, imports #)
  753. addImportCmmOpt :: CLabel -> CmmOptM ()
  754. addImportCmmOpt lbl = CmmOptM $ \_ _ imports -> (# (), lbl:imports #)
  755. instance HasDynFlags CmmOptM where
  756. getDynFlags = CmmOptM $ \dflags _ imports -> (# dflags, imports #)
  757. runCmmOpt :: DynFlags -> Module -> CmmOptM a -> (a, [CLabel])
  758. runCmmOpt dflags this_mod (CmmOptM f) = case f dflags this_mod [] of
  759. (# result, imports #) -> (result, imports)
  760. cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock
  761. cmmBlockConFold block = do
  762. let (entry, middle, last) = blockSplit block
  763. stmts = blockToList middle
  764. stmts' <- mapM cmmStmtConFold stmts
  765. last' <- cmmStmtConFold last
  766. return $ blockJoin entry (blockFromList stmts') last'
  767. -- This does three optimizations, but they're very quick to check, so we don't
  768. -- bother turning them off even when the Hoopl code is active. Since
  769. -- this is on the old Cmm representation, we can't reuse the code either:
  770. -- * reg = reg --> nop
  771. -- * if 0 then jump --> nop
  772. -- * if 1 then jump --> jump
  773. -- We might be tempted to skip this step entirely of not Opt_PIC, but
  774. -- there is some PowerPC code for the non-PIC case, which would also
  775. -- have to be separated.
  776. cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x)
  777. cmmStmtConFold stmt
  778. = case stmt of
  779. CmmAssign reg src
  780. -> do src' <- cmmExprConFold DataReference src
  781. return $ case src' of
  782. CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
  783. new_src -> CmmAssign reg new_src
  784. CmmStore addr src
  785. -> do addr' <- cmmExprConFold DataReference addr
  786. src' <- cmmExprConFold DataReference src
  787. return $ CmmStore addr' src'
  788. CmmCall { cml_target = addr }
  789. -> do addr' <- cmmExprConFold JumpReference addr
  790. return $ stmt { cml_target = addr' }
  791. CmmUnsafeForeignCall target regs args
  792. -> do target' <- case target of
  793. ForeignTarget e conv -> do
  794. e' <- cmmExprConFold CallReference e
  795. return $ ForeignTarget e' conv
  796. PrimTarget _ ->
  797. return target
  798. args' <- mapM (cmmExprConFold DataReference) args
  799. return $ CmmUnsafeForeignCall target' regs args'
  800. CmmCondBranch test true false
  801. -> do test' <- cmmExprConFold DataReference test
  802. return $ case test' of
  803. CmmLit (CmmInt 0 _) -> CmmBranch false
  804. CmmLit (CmmInt _ _) -> CmmBranch true
  805. _other -> CmmCondBranch test' true false
  806. CmmSwitch expr ids
  807. -> do expr' <- cmmExprConFold DataReference expr
  808. return $ CmmSwitch expr' ids
  809. other
  810. -> return other
  811. cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
  812. cmmExprConFold referenceKind expr = do
  813. dflags <- getDynFlags
  814. -- With -O1 and greater, the cmmSink pass does constant-folding, so
  815. -- we don't need to do it again here.
  816. let expr' = if optLevel dflags >= 1
  817. then expr
  818. else cmmExprCon dflags expr
  819. cmmExprNative referenceKind expr'
  820. cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr
  821. cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep
  822. cmmExprCon dflags (CmmMachOp mop args)
  823. = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args)
  824. cmmExprCon _ other = other
  825. -- handles both PIC and non-PIC cases... a very strange mixture
  826. -- of things to do.
  827. cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
  828. cmmExprNative referenceKind expr = do
  829. dflags <- getDynFlags
  830. let platform = targetPlatform dflags
  831. arch = platformArch platform
  832. case expr of
  833. CmmLoad addr rep
  834. -> do addr' <- cmmExprNative DataReference addr
  835. return $ CmmLoad addr' rep
  836. CmmMachOp mop args
  837. -> do args' <- mapM (cmmExprNative DataReference) args
  838. return $ CmmMachOp mop args'
  839. CmmLit (CmmBlock id)
  840. -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
  841. -- we must convert block Ids to CLabels here, because we
  842. -- might have to do the PIC transformation. Hence we must
  843. -- not modify BlockIds beyond this point.
  844. CmmLit (CmmLabel lbl)
  845. -> do
  846. cmmMakeDynamicReference dflags referenceKind lbl
  847. CmmLit (CmmLabelOff lbl off)
  848. -> do
  849. dynRef <- cmmMakeDynamicReference dflags referenceKind lbl
  850. -- need to optimize here, since it's late
  851. return $ cmmMachOpFold dflags (MO_Add (wordWidth dflags)) [
  852. dynRef,
  853. (CmmLit $ CmmInt (fromIntegral off) (wordWidth dflags))
  854. ]
  855. -- On powerpc (non-PIC), it's easier to jump directly to a label than
  856. -- to use the register table, so we replace these registers
  857. -- with the corresponding labels:
  858. CmmReg (CmmGlobal EagerBlackholeInfo)
  859. | arch == ArchPPC && not (gopt Opt_PIC dflags)
  860. -> cmmExprNative referenceKind $
  861. CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_EAGER_BLACKHOLE_info")))
  862. CmmReg (CmmGlobal GCEnter1)
  863. | arch == ArchPPC && not (gopt Opt_PIC dflags)
  864. -> cmmExprNative referenceKind $
  865. CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_enter_1")))
  866. CmmReg (CmmGlobal GCFun)
  867. | arch == ArchPPC && not (gopt Opt_PIC dflags)
  868. -> cmmExprNative referenceKind $
  869. CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_fun")))
  870. other
  871. -> return other
  872. \end{code}