PageRenderTime 54ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/compiler/nativeGen/AsmCodeGen.lhs

https://bitbucket.org/carter/ghc
Haskell | 1043 lines | 657 code | 153 blank | 233 comment | 34 complexity | c66c3e1915fa0ec13e349db4bb2f0a6c MD5 | raw file
  1. -- -----------------------------------------------------------------------------
  2. --
  3. -- (c) The University of Glasgow 1993-2004
  4. --
  5. -- This is the top-level module in the native code generator.
  6. --
  7. -- -----------------------------------------------------------------------------
  8. \begin{code}
  9. module AsmCodeGen ( nativeCodeGen ) where
  10. #include "HsVersions.h"
  11. #include "nativeGen/NCG.h"
  12. import qualified X86.CodeGen
  13. import qualified X86.Regs
  14. import qualified X86.Instr
  15. import qualified X86.Ppr
  16. import qualified SPARC.CodeGen
  17. import qualified SPARC.Regs
  18. import qualified SPARC.Instr
  19. import qualified SPARC.Ppr
  20. import qualified SPARC.ShortcutJump
  21. import qualified SPARC.CodeGen.Expand
  22. import qualified PPC.CodeGen
  23. import qualified PPC.Cond
  24. import qualified PPC.Regs
  25. import qualified PPC.RegInfo
  26. import qualified PPC.Instr
  27. import qualified PPC.Ppr
  28. import RegAlloc.Liveness
  29. import qualified RegAlloc.Linear.Main as Linear
  30. import qualified GraphColor as Color
  31. import qualified RegAlloc.Graph.Main as Color
  32. import qualified RegAlloc.Graph.Stats as Color
  33. import qualified RegAlloc.Graph.TrivColorable as Color
  34. import TargetReg
  35. import Platform
  36. import Config
  37. import Instruction
  38. import PIC
  39. import Reg
  40. import NCGMonad
  41. import BlockId
  42. import CgUtils ( fixStgRegisters )
  43. import OldCmm
  44. import CmmOpt ( cmmMachOpFold )
  45. import OldPprCmm
  46. import CLabel
  47. import UniqFM
  48. import UniqSupply
  49. import DynFlags
  50. import Util
  51. import BasicTypes ( Alignment )
  52. import Digraph
  53. import qualified Pretty
  54. import BufWrite
  55. import Outputable
  56. import FastString
  57. import UniqSet
  58. import ErrUtils
  59. import Module
  60. import Stream (Stream)
  61. import qualified Stream
  62. -- DEBUGGING ONLY
  63. --import OrdList
  64. import Data.List
  65. import Data.Maybe
  66. import Control.Monad
  67. import System.IO
  68. {-
  69. The native-code generator has machine-independent and
  70. machine-dependent modules.
  71. This module ("AsmCodeGen") is the top-level machine-independent
  72. module. Before entering machine-dependent land, we do some
  73. machine-independent optimisations (defined below) on the
  74. 'CmmStmts's.
  75. We convert to the machine-specific 'Instr' datatype with
  76. 'cmmCodeGen', assuming an infinite supply of registers. We then use
  77. a machine-independent register allocator ('regAlloc') to rejoin
  78. reality. Obviously, 'regAlloc' has machine-specific helper
  79. functions (see about "RegAllocInfo" below).
  80. Finally, we order the basic blocks of the function so as to minimise
  81. the number of jumps between blocks, by utilising fallthrough wherever
  82. possible.
  83. The machine-dependent bits break down as follows:
  84. * ["MachRegs"] Everything about the target platform's machine
  85. registers (and immediate operands, and addresses, which tend to
  86. intermingle/interact with registers).
  87. * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
  88. have a module of its own), plus a miscellany of other things
  89. (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
  90. * ["MachCodeGen"] is where 'Cmm' stuff turns into
  91. machine instructions.
  92. * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
  93. a 'SDoc').
  94. * ["RegAllocInfo"] In the register allocator, we manipulate
  95. 'MRegsState's, which are 'BitSet's, one bit per machine register.
  96. When we want to say something about a specific machine register
  97. (e.g., ``it gets clobbered by this instruction''), we set/unset
  98. its bit. Obviously, we do this 'BitSet' thing for efficiency
  99. reasons.
  100. The 'RegAllocInfo' module collects together the machine-specific
  101. info needed to do register allocation.
  102. * ["RegisterAlloc"] The (machine-independent) register allocator.
  103. -}
  104. -- -----------------------------------------------------------------------------
  105. -- Top-level of the native codegen
  106. data NcgImpl statics instr jumpDest = NcgImpl {
  107. cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
  108. generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
  109. getJumpDestBlockId :: jumpDest -> Maybe BlockId,
  110. canShortcut :: instr -> Maybe jumpDest,
  111. shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
  112. shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
  113. pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
  114. maxSpillSlots :: Int,
  115. allocatableRegs :: [RealReg],
  116. ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
  117. ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
  118. ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr,
  119. ncgMakeFarBranches :: [NatBasicBlock instr] -> [NatBasicBlock instr]
  120. }
  121. --------------------
  122. nativeCodeGen :: DynFlags -> Handle -> UniqSupply -> Stream IO RawCmmGroup () -> IO ()
  123. nativeCodeGen dflags h us cmms
  124. = let platform = targetPlatform dflags
  125. nCG' :: (Outputable statics, Outputable instr, Instruction instr) => NcgImpl statics instr jumpDest -> IO ()
  126. nCG' ncgImpl = nativeCodeGen' dflags ncgImpl h us cmms
  127. x86NcgImpl = NcgImpl {
  128. cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
  129. ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
  130. ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
  131. ,canShortcut = X86.Instr.canShortcut
  132. ,shortcutStatics = X86.Instr.shortcutStatics
  133. ,shortcutJump = X86.Instr.shortcutJump
  134. ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
  135. ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
  136. ,allocatableRegs = X86.Regs.allocatableRegs platform
  137. ,ncg_x86fp_kludge = id
  138. ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
  139. ,ncgExpandTop = id
  140. ,ncgMakeFarBranches = id
  141. }
  142. in case platformArch platform of
  143. ArchX86 -> nCG' (x86NcgImpl { ncg_x86fp_kludge = map x86fp_kludge })
  144. ArchX86_64 -> nCG' x86NcgImpl
  145. ArchPPC ->
  146. nCG' $ NcgImpl {
  147. cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
  148. ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
  149. ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
  150. ,canShortcut = PPC.RegInfo.canShortcut
  151. ,shortcutStatics = PPC.RegInfo.shortcutStatics
  152. ,shortcutJump = PPC.RegInfo.shortcutJump
  153. ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
  154. ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
  155. ,allocatableRegs = PPC.Regs.allocatableRegs platform
  156. ,ncg_x86fp_kludge = id
  157. ,ncgAllocMoreStack = noAllocMoreStack
  158. ,ncgExpandTop = id
  159. ,ncgMakeFarBranches = makeFarBranches
  160. }
  161. ArchSPARC ->
  162. nCG' $ NcgImpl {
  163. cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
  164. ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
  165. ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
  166. ,canShortcut = SPARC.ShortcutJump.canShortcut
  167. ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
  168. ,shortcutJump = SPARC.ShortcutJump.shortcutJump
  169. ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
  170. ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
  171. ,allocatableRegs = SPARC.Regs.allocatableRegs
  172. ,ncg_x86fp_kludge = id
  173. ,ncgAllocMoreStack = noAllocMoreStack
  174. ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
  175. ,ncgMakeFarBranches = id
  176. }
  177. ArchARM _ _ _ ->
  178. panic "nativeCodeGen: No NCG for ARM"
  179. ArchPPC_64 ->
  180. panic "nativeCodeGen: No NCG for PPC 64"
  181. ArchUnknown ->
  182. panic "nativeCodeGen: No NCG for unknown arch"
  183. --
  184. -- Allocating more stack space for spilling is currently only
  185. -- supported for the linear register allocator on x86/x86_64, the rest
  186. -- default to the panic below. To support allocating extra stack on
  187. -- more platforms provide a definition of ncgAllocMoreStack.
  188. --
  189. noAllocMoreStack :: Int -> NatCmmDecl statics instr -> NatCmmDecl statics instr
  190. noAllocMoreStack amount _
  191. = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
  192. ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
  193. ++ " is a known limitation in the linear allocator.\n"
  194. ++ "\n"
  195. ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
  196. ++ " You can still file a bug report if you like.\n"
  197. nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
  198. => DynFlags
  199. -> NcgImpl statics instr jumpDest
  200. -> Handle -> UniqSupply -> Stream IO RawCmmGroup () -> IO ()
  201. nativeCodeGen' dflags ncgImpl h us cmms
  202. = do
  203. let platform = targetPlatform dflags
  204. split_cmms = Stream.map add_split cmms
  205. -- BufHandle is a performance hack. We could hide it inside
  206. -- Pretty if it weren't for the fact that we do lots of little
  207. -- printDocs here (in order to do codegen in constant space).
  208. bufh <- newBufHandle h
  209. (imports, prof) <- cmmNativeGenStream dflags ncgImpl bufh us split_cmms [] [] 0
  210. bFlush bufh
  211. let (native, colorStats, linearStats)
  212. = unzip3 prof
  213. -- dump native code
  214. dumpIfSet_dyn dflags
  215. Opt_D_dump_asm "Asm code"
  216. (vcat $ map (pprNatCmmDecl ncgImpl) $ concat native)
  217. -- dump global NCG stats for graph coloring allocator
  218. (case concat $ catMaybes colorStats of
  219. [] -> return ()
  220. stats -> do
  221. -- build the global register conflict graph
  222. let graphGlobal
  223. = foldl Color.union Color.initGraph
  224. $ [ Color.raGraph stat
  225. | stat@Color.RegAllocStatsStart{} <- stats]
  226. dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
  227. $ Color.pprStats stats graphGlobal
  228. dumpIfSet_dyn dflags
  229. Opt_D_dump_asm_conflicts "Register conflict graph"
  230. $ Color.dotGraph
  231. (targetRegDotColor platform)
  232. (Color.trivColorable platform
  233. (targetVirtualRegSqueeze platform)
  234. (targetRealRegSqueeze platform))
  235. $ graphGlobal)
  236. -- dump global NCG stats for linear allocator
  237. (case concat $ catMaybes linearStats of
  238. [] -> return ()
  239. stats -> dumpSDoc dflags Opt_D_dump_asm_stats "NCG stats"
  240. $ Linear.pprStats (concat native) stats)
  241. -- write out the imports
  242. Pretty.printDoc Pretty.LeftMode (pprCols dflags) h
  243. $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
  244. $ makeImportsDoc dflags (concat imports)
  245. return ()
  246. where add_split tops
  247. | dopt Opt_SplitObjs dflags = split_marker : tops
  248. | otherwise = tops
  249. split_marker = CmmProc mapEmpty mkSplitMarkerLabel (ListGraph [])
  250. cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
  251. => DynFlags
  252. -> NcgImpl statics instr jumpDest
  253. -> BufHandle
  254. -> UniqSupply
  255. -> Stream IO RawCmmGroup ()
  256. -> [[CLabel]]
  257. -> [ ([NatCmmDecl statics instr],
  258. Maybe [Color.RegAllocStats statics instr],
  259. Maybe [Linear.RegAllocStats]) ]
  260. -> Int
  261. -> IO ( [[CLabel]],
  262. [([NatCmmDecl statics instr],
  263. Maybe [Color.RegAllocStats statics instr],
  264. Maybe [Linear.RegAllocStats])] )
  265. cmmNativeGenStream dflags ncgImpl h us cmm_stream impAcc profAcc count
  266. = do
  267. r <- Stream.runStream cmm_stream
  268. case r of
  269. Left () -> return (reverse impAcc, reverse profAcc)
  270. Right (cmms, cmm_stream') -> do
  271. (impAcc,profAcc,us') <- cmmNativeGens dflags ncgImpl h us cmms
  272. impAcc profAcc count
  273. cmmNativeGenStream dflags ncgImpl h us' cmm_stream'
  274. impAcc profAcc count
  275. -- | Do native code generation on all these cmms.
  276. --
  277. cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)
  278. => DynFlags
  279. -> NcgImpl statics instr jumpDest
  280. -> BufHandle
  281. -> UniqSupply
  282. -> [RawCmmDecl]
  283. -> [[CLabel]]
  284. -> [ ([NatCmmDecl statics instr],
  285. Maybe [Color.RegAllocStats statics instr],
  286. Maybe [Linear.RegAllocStats]) ]
  287. -> Int
  288. -> IO ( [[CLabel]],
  289. [([NatCmmDecl statics instr],
  290. Maybe [Color.RegAllocStats statics instr],
  291. Maybe [Linear.RegAllocStats])],
  292. UniqSupply )
  293. cmmNativeGens _ _ _ us [] impAcc profAcc _
  294. = return (impAcc,profAcc,us)
  295. cmmNativeGens dflags ncgImpl h us (cmm : cmms) impAcc profAcc count
  296. = do
  297. (us', native, imports, colorStats, linearStats)
  298. <- {-# SCC "cmmNativeGen" #-} cmmNativeGen dflags ncgImpl us cmm count
  299. {-# SCC "pprNativeCode" #-} Pretty.bufLeftRender h
  300. $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
  301. $ vcat $ map (pprNatCmmDecl ncgImpl) native
  302. -- carefully evaluate this strictly. Binding it with 'let'
  303. -- and then using 'seq' doesn't work, because the let
  304. -- apparently gets inlined first.
  305. lsPprNative <- return $!
  306. if dopt Opt_D_dump_asm dflags
  307. || dopt Opt_D_dump_asm_stats dflags
  308. then native
  309. else []
  310. count' <- return $! count + 1;
  311. -- force evaulation all this stuff to avoid space leaks
  312. {-# SCC "seqString" #-} seqString (showSDoc dflags $ vcat $ map ppr imports) `seq` return ()
  313. cmmNativeGens dflags ncgImpl
  314. h us' cmms
  315. (imports : impAcc)
  316. ((lsPprNative, colorStats, linearStats) : profAcc)
  317. count'
  318. where seqString [] = ()
  319. seqString (x:xs) = x `seq` seqString xs `seq` ()
  320. -- | Complete native code generation phase for a single top-level chunk of Cmm.
  321. -- Dumping the output of each stage along the way.
  322. -- Global conflict graph and NGC stats
  323. cmmNativeGen
  324. :: (Outputable statics, Outputable instr, Instruction instr)
  325. => DynFlags
  326. -> NcgImpl statics instr jumpDest
  327. -> UniqSupply
  328. -> RawCmmDecl -- ^ the cmm to generate code for
  329. -> Int -- ^ sequence number of this top thing
  330. -> IO ( UniqSupply
  331. , [NatCmmDecl statics instr] -- native code
  332. , [CLabel] -- things imported by this cmm
  333. , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
  334. , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
  335. cmmNativeGen dflags ncgImpl us cmm count
  336. = do
  337. let platform = targetPlatform dflags
  338. -- rewrite assignments to global regs
  339. let fixed_cmm =
  340. {-# SCC "fixStgRegisters" #-}
  341. fixStgRegisters dflags cmm
  342. -- cmm to cmm optimisations
  343. let (opt_cmm, imports) =
  344. {-# SCC "cmmToCmm" #-}
  345. cmmToCmm dflags fixed_cmm
  346. dumpIfSet_dyn dflags
  347. Opt_D_dump_opt_cmm "Optimised Cmm"
  348. (pprCmmGroup [opt_cmm])
  349. -- generate native code from cmm
  350. let ((native, lastMinuteImports), usGen) =
  351. {-# SCC "genMachCode" #-}
  352. initUs us $ genMachCode dflags (cmmTopCodeGen ncgImpl) opt_cmm
  353. dumpIfSet_dyn dflags
  354. Opt_D_dump_asm_native "Native code"
  355. (vcat $ map (pprNatCmmDecl ncgImpl) native)
  356. -- tag instructions with register liveness information
  357. let (withLiveness, usLive) =
  358. {-# SCC "regLiveness" #-}
  359. initUs usGen
  360. $ mapM (regLiveness platform)
  361. $ map natCmmTopToLive native
  362. dumpIfSet_dyn dflags
  363. Opt_D_dump_asm_liveness "Liveness annotations added"
  364. (vcat $ map ppr withLiveness)
  365. -- allocate registers
  366. (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
  367. if ( dopt Opt_RegsGraph dflags
  368. || dopt Opt_RegsIterative dflags)
  369. then do
  370. -- the regs usable for allocation
  371. let (alloc_regs :: UniqFM (UniqSet RealReg))
  372. = foldr (\r -> plusUFM_C unionUniqSets
  373. $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
  374. emptyUFM
  375. $ allocatableRegs ncgImpl
  376. -- do the graph coloring register allocation
  377. let ((alloced, regAllocStats), usAlloc)
  378. = {-# SCC "RegAlloc" #-}
  379. initUs usLive
  380. $ Color.regAlloc
  381. dflags
  382. alloc_regs
  383. (mkUniqSet [0 .. maxSpillSlots ncgImpl])
  384. withLiveness
  385. -- dump out what happened during register allocation
  386. dumpIfSet_dyn dflags
  387. Opt_D_dump_asm_regalloc "Registers allocated"
  388. (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
  389. dumpIfSet_dyn dflags
  390. Opt_D_dump_asm_regalloc_stages "Build/spill stages"
  391. (vcat $ map (\(stage, stats)
  392. -> text "# --------------------------"
  393. $$ text "# cmm " <> int count <> text " Stage " <> int stage
  394. $$ ppr stats)
  395. $ zip [0..] regAllocStats)
  396. let mPprStats =
  397. if dopt Opt_D_dump_asm_stats dflags
  398. then Just regAllocStats else Nothing
  399. -- force evaluation of the Maybe to avoid space leak
  400. mPprStats `seq` return ()
  401. return ( alloced, usAlloc
  402. , mPprStats
  403. , Nothing)
  404. else do
  405. -- do linear register allocation
  406. let reg_alloc proc = do
  407. (alloced, maybe_more_stack, ra_stats) <-
  408. Linear.regAlloc dflags proc
  409. case maybe_more_stack of
  410. Nothing -> return ( alloced, ra_stats )
  411. Just amount ->
  412. return ( ncgAllocMoreStack ncgImpl amount alloced
  413. , ra_stats )
  414. let ((alloced, regAllocStats), usAlloc)
  415. = {-# SCC "RegAlloc" #-}
  416. initUs usLive
  417. $ liftM unzip
  418. $ mapM reg_alloc withLiveness
  419. dumpIfSet_dyn dflags
  420. Opt_D_dump_asm_regalloc "Registers allocated"
  421. (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
  422. let mPprStats =
  423. if dopt Opt_D_dump_asm_stats dflags
  424. then Just (catMaybes regAllocStats) else Nothing
  425. -- force evaluation of the Maybe to avoid space leak
  426. mPprStats `seq` return ()
  427. return ( alloced, usAlloc
  428. , Nothing
  429. , mPprStats)
  430. ---- x86fp_kludge. This pass inserts ffree instructions to clear
  431. ---- the FPU stack on x86. The x86 ABI requires that the FPU stack
  432. ---- is clear, and library functions can return odd results if it
  433. ---- isn't.
  434. ----
  435. ---- NB. must happen before shortcutBranches, because that
  436. ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
  437. let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
  438. ---- generate jump tables
  439. let tabled =
  440. {-# SCC "generateJumpTables" #-}
  441. generateJumpTables ncgImpl kludged
  442. ---- shortcut branches
  443. let shorted =
  444. {-# SCC "shortcutBranches" #-}
  445. shortcutBranches dflags ncgImpl tabled
  446. ---- sequence blocks
  447. let sequenced =
  448. {-# SCC "sequenceBlocks" #-}
  449. map (sequenceTop ncgImpl) shorted
  450. ---- expansion of SPARC synthetic instrs
  451. let expanded =
  452. {-# SCC "sparc_expand" #-}
  453. ncgExpandTop ncgImpl sequenced
  454. dumpIfSet_dyn dflags
  455. Opt_D_dump_asm_expanded "Synthetic instructions expanded"
  456. (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
  457. return ( usAlloc
  458. , expanded
  459. , lastMinuteImports ++ imports
  460. , ppr_raStatsColor
  461. , ppr_raStatsLinear)
  462. x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
  463. x86fp_kludge top@(CmmData _ _) = top
  464. x86fp_kludge (CmmProc info lbl (ListGraph code)) =
  465. CmmProc info lbl (ListGraph $ X86.Instr.i386_insert_ffrees code)
  466. -- | Build a doc for all the imports.
  467. --
  468. makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
  469. makeImportsDoc dflags imports
  470. = dyld_stubs imports
  471. $$
  472. -- On recent versions of Darwin, the linker supports
  473. -- dead-stripping of code and data on a per-symbol basis.
  474. -- There's a hack to make this work in PprMach.pprNatCmmDecl.
  475. (if platformHasSubsectionsViaSymbols (targetPlatform dflags)
  476. then text ".subsections_via_symbols"
  477. else empty)
  478. $$
  479. -- On recent GNU ELF systems one can mark an object file
  480. -- as not requiring an executable stack. If all objects
  481. -- linked into a program have this note then the program
  482. -- will not use an executable stack, which is good for
  483. -- security. GHC generated code does not need an executable
  484. -- stack so add the note in:
  485. (if platformHasGnuNonexecStack (targetPlatform dflags)
  486. then text ".section .note.GNU-stack,\"\",@progbits"
  487. else empty)
  488. $$
  489. -- And just because every other compiler does, lets stick in
  490. -- an identifier directive: .ident "GHC x.y.z"
  491. (if platformHasIdentDirective (targetPlatform dflags)
  492. then let compilerIdent = text "GHC" <+> text cProjectVersion
  493. in text ".ident" <+> doubleQuotes compilerIdent
  494. else empty)
  495. where
  496. -- Generate "symbol stubs" for all external symbols that might
  497. -- come from a dynamic library.
  498. dyld_stubs :: [CLabel] -> SDoc
  499. {- dyld_stubs imps = vcat $ map pprDyldSymbolStub $
  500. map head $ group $ sort imps-}
  501. platform = targetPlatform dflags
  502. arch = platformArch platform
  503. os = platformOS platform
  504. -- (Hack) sometimes two Labels pretty-print the same, but have
  505. -- different uniques; so we compare their text versions...
  506. dyld_stubs imps
  507. | needImportedSymbols dflags arch os
  508. = vcat $
  509. (pprGotDeclaration dflags arch os :) $
  510. map ( pprImportedSymbol dflags platform . fst . head) $
  511. groupBy (\(_,a) (_,b) -> a == b) $
  512. sortBy (\(_,a) (_,b) -> compare a b) $
  513. map doPpr $
  514. imps
  515. | otherwise
  516. = empty
  517. doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
  518. astyle = mkCodeStyle AsmStyle
  519. -- -----------------------------------------------------------------------------
  520. -- Sequencing the basic blocks
  521. -- Cmm BasicBlocks are self-contained entities: they always end in a
  522. -- jump, either non-local or to another basic block in the same proc.
  523. -- In this phase, we attempt to place the basic blocks in a sequence
  524. -- such that as many of the local jumps as possible turn into
  525. -- fallthroughs.
  526. sequenceTop
  527. :: Instruction instr
  528. => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
  529. sequenceTop _ top@(CmmData _ _) = top
  530. sequenceTop ncgImpl (CmmProc info lbl (ListGraph blocks)) =
  531. CmmProc info lbl (ListGraph $ ncgMakeFarBranches ncgImpl $ sequenceBlocks info blocks)
  532. -- The algorithm is very simple (and stupid): we make a graph out of
  533. -- the blocks where there is an edge from one block to another iff the
  534. -- first block ends by jumping to the second. Then we topologically
  535. -- sort this graph. Then traverse the list: for each block, we first
  536. -- output the block, then if it has an out edge, we move the
  537. -- destination of the out edge to the front of the list, and continue.
  538. -- FYI, the classic layout for basic blocks uses postorder DFS; this
  539. -- algorithm is implemented in Hoopl.
  540. sequenceBlocks
  541. :: Instruction instr
  542. => BlockEnv i
  543. -> [NatBasicBlock instr]
  544. -> [NatBasicBlock instr]
  545. sequenceBlocks _ [] = []
  546. sequenceBlocks infos (entry:blocks) =
  547. seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
  548. -- the first block is the entry point ==> it must remain at the start.
  549. sccBlocks
  550. :: Instruction instr
  551. => [NatBasicBlock instr]
  552. -> [SCC ( NatBasicBlock instr
  553. , BlockId
  554. , [BlockId])]
  555. sccBlocks blocks = stronglyConnCompFromEdgedVerticesR (map mkNode blocks)
  556. -- we're only interested in the last instruction of
  557. -- the block, and only if it has a single destination.
  558. getOutEdges
  559. :: Instruction instr
  560. => [instr] -> [BlockId]
  561. getOutEdges instrs
  562. = case jumpDestsOfInstr (last instrs) of
  563. [one] -> [one]
  564. _many -> []
  565. mkNode :: (Instruction t)
  566. => GenBasicBlock t
  567. -> (GenBasicBlock t, BlockId, [BlockId])
  568. mkNode block@(BasicBlock id instrs) = (block, id, getOutEdges instrs)
  569. seqBlocks :: BlockEnv i -> [(GenBasicBlock t1, BlockId, [BlockId])]
  570. -> [GenBasicBlock t1]
  571. seqBlocks _ [] = []
  572. seqBlocks infos ((block,_,[]) : rest)
  573. = block : seqBlocks infos rest
  574. seqBlocks infos ((block@(BasicBlock id instrs),_,[next]) : rest)
  575. | can_fallthrough = BasicBlock id (init instrs) : seqBlocks infos rest'
  576. | otherwise = block : seqBlocks infos rest'
  577. where
  578. can_fallthrough = not (mapMember next infos) && can_reorder
  579. (can_reorder, rest') = reorder next [] rest
  580. -- TODO: we should do a better job for cycles; try to maximise the
  581. -- fallthroughs within a loop.
  582. seqBlocks _ _ = panic "AsmCodegen:seqBlocks"
  583. reorder :: (Eq a) => a -> [(t, a, t1)] -> [(t, a, t1)] -> (Bool, [(t, a, t1)])
  584. reorder _ accum [] = (False, reverse accum)
  585. reorder id accum (b@(block,id',out) : rest)
  586. | id == id' = (True, (block,id,out) : reverse accum ++ rest)
  587. | otherwise = reorder id (b:accum) rest
  588. -- -----------------------------------------------------------------------------
  589. -- Making far branches
  590. -- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
  591. -- big, we have to work around this limitation.
  592. makeFarBranches
  593. :: [NatBasicBlock PPC.Instr.Instr]
  594. -> [NatBasicBlock PPC.Instr.Instr]
  595. makeFarBranches blocks
  596. | last blockAddresses < nearLimit = blocks
  597. | otherwise = zipWith handleBlock blockAddresses blocks
  598. where
  599. blockAddresses = scanl (+) 0 $ map blockLen blocks
  600. blockLen (BasicBlock _ instrs) = length instrs
  601. handleBlock addr (BasicBlock id instrs)
  602. = BasicBlock id (zipWith makeFar [addr..] instrs)
  603. makeFar _ (PPC.Instr.BCC PPC.Cond.ALWAYS tgt) = PPC.Instr.BCC PPC.Cond.ALWAYS tgt
  604. makeFar addr (PPC.Instr.BCC cond tgt)
  605. | abs (addr - targetAddr) >= nearLimit
  606. = PPC.Instr.BCCFAR cond tgt
  607. | otherwise
  608. = PPC.Instr.BCC cond tgt
  609. where Just targetAddr = lookupUFM blockAddressMap tgt
  610. makeFar _ other = other
  611. nearLimit = 7000 -- 8192 instructions are allowed; let's keep some
  612. -- distance, as we have a few pseudo-insns that are
  613. -- pretty-printed as multiple instructions,
  614. -- and it's just not worth the effort to calculate
  615. -- things exactly
  616. blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses
  617. -- -----------------------------------------------------------------------------
  618. -- Generate jump tables
  619. -- Analyzes all native code and generates data sections for all jump
  620. -- table instructions.
  621. generateJumpTables
  622. :: NcgImpl statics instr jumpDest
  623. -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
  624. generateJumpTables ncgImpl xs = concatMap f xs
  625. where f p@(CmmProc _ _ (ListGraph xs)) = p : concatMap g xs
  626. f p = [p]
  627. g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
  628. -- -----------------------------------------------------------------------------
  629. -- Shortcut branches
  630. shortcutBranches
  631. :: DynFlags
  632. -> NcgImpl statics instr jumpDest
  633. -> [NatCmmDecl statics instr]
  634. -> [NatCmmDecl statics instr]
  635. shortcutBranches dflags ncgImpl tops
  636. | optLevel dflags < 1 = tops -- only with -O or higher
  637. | otherwise = map (apply_mapping ncgImpl mapping) tops'
  638. where
  639. (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
  640. mapping = foldr plusUFM emptyUFM mappings
  641. build_mapping :: NcgImpl statics instr jumpDest
  642. -> GenCmmDecl d (BlockEnv t) (ListGraph instr)
  643. -> (GenCmmDecl d (BlockEnv t) (ListGraph instr), UniqFM jumpDest)
  644. build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
  645. build_mapping _ (CmmProc info lbl (ListGraph []))
  646. = (CmmProc info lbl (ListGraph []), emptyUFM)
  647. build_mapping ncgImpl (CmmProc info lbl (ListGraph (head:blocks)))
  648. = (CmmProc info lbl (ListGraph (head:others)), mapping)
  649. -- drop the shorted blocks, but don't ever drop the first one,
  650. -- because it is pointed to by a global label.
  651. where
  652. -- find all the blocks that just consist of a jump that can be
  653. -- shorted.
  654. -- Don't completely eliminate loops here -- that can leave a dangling jump!
  655. (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
  656. split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
  657. | Just jd <- canShortcut ncgImpl insn,
  658. Just dest <- getJumpDestBlockId ncgImpl jd,
  659. not (has_info id),
  660. (setMember dest s) || dest == id -- loop checks
  661. = (s, shortcut_blocks, b : others)
  662. split (s, shortcut_blocks, others) (BasicBlock id [insn])
  663. | Just dest <- canShortcut ncgImpl insn,
  664. not (has_info id)
  665. = (setInsert id s, (id,dest) : shortcut_blocks, others)
  666. split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
  667. -- do not eliminate blocks that have an info table
  668. has_info l = mapMember l info
  669. -- build a mapping from BlockId to JumpDest for shorting branches
  670. mapping = foldl add emptyUFM shortcut_blocks
  671. add ufm (id,dest) = addToUFM ufm id dest
  672. apply_mapping :: NcgImpl statics instr jumpDest
  673. -> UniqFM jumpDest
  674. -> GenCmmDecl statics h (ListGraph instr)
  675. -> GenCmmDecl statics h (ListGraph instr)
  676. apply_mapping ncgImpl ufm (CmmData sec statics)
  677. = CmmData sec (shortcutStatics ncgImpl (lookupUFM ufm) statics)
  678. apply_mapping ncgImpl ufm (CmmProc info lbl (ListGraph blocks))
  679. = CmmProc info lbl (ListGraph $ map short_bb blocks)
  680. where
  681. short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
  682. short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
  683. -- shortcutJump should apply the mapping repeatedly,
  684. -- just in case we can short multiple branches.
  685. -- -----------------------------------------------------------------------------
  686. -- Instruction selection
  687. -- Native code instruction selection for a chunk of stix code. For
  688. -- this part of the computation, we switch from the UniqSM monad to
  689. -- the NatM monad. The latter carries not only a Unique, but also an
  690. -- Int denoting the current C stack pointer offset in the generated
  691. -- code; this is needed for creating correct spill offsets on
  692. -- architectures which don't offer, or for which it would be
  693. -- prohibitively expensive to employ, a frame pointer register. Viz,
  694. -- x86.
  695. -- The offset is measured in bytes, and indicates the difference
  696. -- between the current (simulated) C stack-ptr and the value it was at
  697. -- the beginning of the block. For stacks which grow down, this value
  698. -- should be either zero or negative.
  699. -- Switching between the two monads whilst carrying along the same
  700. -- Unique supply breaks abstraction. Is that bad?
  701. genMachCode
  702. :: DynFlags
  703. -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
  704. -> RawCmmDecl
  705. -> UniqSM
  706. ( [NatCmmDecl statics instr]
  707. , [CLabel])
  708. genMachCode dflags cmmTopCodeGen cmm_top
  709. = do { initial_us <- getUs
  710. ; let initial_st = mkNatM_State initial_us 0 dflags
  711. (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
  712. final_delta = natm_delta final_st
  713. final_imports = natm_imports final_st
  714. ; if final_delta == 0
  715. then return (new_tops, final_imports)
  716. else pprPanic "genMachCode: nonzero final delta" (int final_delta)
  717. }
  718. -- -----------------------------------------------------------------------------
  719. -- Generic Cmm optimiser
  720. {-
  721. Here we do:
  722. (a) Constant folding
  723. (c) Position independent code and dynamic linking
  724. (i) introduce the appropriate indirections
  725. and position independent refs
  726. (ii) compile a list of imported symbols
  727. (d) Some arch-specific optimizations
  728. (a) will be moving to the new Hoopl pipeline, however, (c) and
  729. (d) are only needed by the native backend and will continue to live
  730. here.
  731. Ideas for other things we could do (put these in Hoopl please!):
  732. - shortcut jumps-to-jumps
  733. - simple CSE: if an expr is assigned to a temp, then replace later occs of
  734. that expr with the temp, until the expr is no longer valid (can push through
  735. temp assignments, and certain assigns to mem...)
  736. -}
  737. cmmToCmm :: DynFlags -> RawCmmDecl -> (RawCmmDecl, [CLabel])
  738. cmmToCmm _ top@(CmmData _ _) = (top, [])
  739. cmmToCmm dflags (CmmProc info lbl (ListGraph blocks)) = runCmmOpt dflags $ do
  740. blocks' <- mapM cmmBlockConFold blocks
  741. return $ CmmProc info lbl (ListGraph blocks')
  742. newtype CmmOptM a = CmmOptM (([CLabel], DynFlags) -> (# a, [CLabel] #))
  743. instance Monad CmmOptM where
  744. return x = CmmOptM $ \(imports, _) -> (# x,imports #)
  745. (CmmOptM f) >>= g =
  746. CmmOptM $ \(imports, dflags) ->
  747. case f (imports, dflags) of
  748. (# x, imports' #) ->
  749. case g x of
  750. CmmOptM g' -> g' (imports', dflags)
  751. addImportCmmOpt :: CLabel -> CmmOptM ()
  752. addImportCmmOpt lbl = CmmOptM $ \(imports, _dflags) -> (# (), lbl:imports #)
  753. instance HasDynFlags CmmOptM where
  754. getDynFlags = CmmOptM $ \(imports, dflags) -> (# dflags, imports #)
  755. runCmmOpt :: DynFlags -> CmmOptM a -> (a, [CLabel])
  756. runCmmOpt dflags (CmmOptM f) = case f ([], dflags) of
  757. (# result, imports #) -> (result, imports)
  758. cmmBlockConFold :: CmmBasicBlock -> CmmOptM CmmBasicBlock
  759. cmmBlockConFold (BasicBlock id stmts) = do
  760. stmts' <- mapM cmmStmtConFold stmts
  761. return $ BasicBlock id stmts'
  762. -- This does three optimizations, but they're very quick to check, so we don't
  763. -- bother turning them off even when the Hoopl code is active. Since
  764. -- this is on the old Cmm representation, we can't reuse the code either:
  765. -- * reg = reg --> nop
  766. -- * if 0 then jump --> nop
  767. -- * if 1 then jump --> jump
  768. -- We might be tempted to skip this step entirely of not Opt_PIC, but
  769. -- there is some PowerPC code for the non-PIC case, which would also
  770. -- have to be separated.
  771. cmmStmtConFold :: CmmStmt -> CmmOptM CmmStmt
  772. cmmStmtConFold stmt
  773. = case stmt of
  774. CmmAssign reg src
  775. -> do src' <- cmmExprConFold DataReference src
  776. return $ case src' of
  777. CmmReg reg' | reg == reg' -> CmmNop
  778. new_src -> CmmAssign reg new_src
  779. CmmStore addr src
  780. -> do addr' <- cmmExprConFold DataReference addr
  781. src' <- cmmExprConFold DataReference src
  782. return $ CmmStore addr' src'
  783. CmmJump addr live
  784. -> do addr' <- cmmExprConFold JumpReference addr
  785. return $ CmmJump addr' live
  786. CmmCall target regs args returns
  787. -> do target' <- case target of
  788. CmmCallee e conv -> do
  789. e' <- cmmExprConFold CallReference e
  790. return $ CmmCallee e' conv
  791. op@(CmmPrim _ Nothing) ->
  792. return op
  793. CmmPrim op (Just stmts) ->
  794. do stmts' <- mapM cmmStmtConFold stmts
  795. return $ CmmPrim op (Just stmts')
  796. args' <- mapM (\(CmmHinted arg hint) -> do
  797. arg' <- cmmExprConFold DataReference arg
  798. return (CmmHinted arg' hint)) args
  799. return $ CmmCall target' regs args' returns
  800. CmmCondBranch test dest
  801. -> do test' <- cmmExprConFold DataReference test
  802. dflags <- getDynFlags
  803. return $ case test' of
  804. CmmLit (CmmInt 0 _) ->
  805. CmmComment (mkFastString ("deleted: " ++
  806. showSDoc dflags (pprStmt stmt)))
  807. CmmLit (CmmInt _ _) -> CmmBranch dest
  808. _other -> CmmCondBranch test' dest
  809. CmmSwitch expr ids
  810. -> do expr' <- cmmExprConFold DataReference expr
  811. return $ CmmSwitch expr' ids
  812. other
  813. -> return other
  814. cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
  815. cmmExprConFold referenceKind expr = do
  816. dflags <- getDynFlags
  817. -- With -O1 and greater, the cmmSink pass does constant-folding, so
  818. -- we don't need to do it again here.
  819. let expr' = if optLevel dflags >= 1
  820. then expr
  821. else cmmExprCon dflags expr
  822. cmmExprNative referenceKind expr'
  823. cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr
  824. cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep
  825. cmmExprCon dflags (CmmMachOp mop args)
  826. = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args)
  827. cmmExprCon _ other = other
  828. -- handles both PIC and non-PIC cases... a very strange mixture
  829. -- of things to do.
  830. cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
  831. cmmExprNative referenceKind expr = do
  832. dflags <- getDynFlags
  833. let platform = targetPlatform dflags
  834. arch = platformArch platform
  835. case expr of
  836. CmmLoad addr rep
  837. -> do addr' <- cmmExprNative DataReference addr
  838. return $ CmmLoad addr' rep
  839. CmmMachOp mop args
  840. -> do args' <- mapM (cmmExprNative DataReference) args
  841. return $ CmmMachOp mop args'
  842. CmmLit (CmmBlock id)
  843. -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
  844. -- we must convert block Ids to CLabels here, because we
  845. -- might have to do the PIC transformation. Hence we must
  846. -- not modify BlockIds beyond this point.
  847. CmmLit (CmmLabel lbl)
  848. -> do
  849. cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
  850. CmmLit (CmmLabelOff lbl off)
  851. -> do
  852. dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl
  853. -- need to optimize here, since it's late
  854. return $ cmmMachOpFold dflags (MO_Add (wordWidth dflags)) [
  855. dynRef,
  856. (CmmLit $ CmmInt (fromIntegral off) (wordWidth dflags))
  857. ]
  858. -- On powerpc (non-PIC), it's easier to jump directly to a label than
  859. -- to use the register table, so we replace these registers
  860. -- with the corresponding labels:
  861. CmmReg (CmmGlobal EagerBlackholeInfo)
  862. | arch == ArchPPC && not (dopt Opt_PIC dflags)
  863. -> cmmExprNative referenceKind $
  864. CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_EAGER_BLACKHOLE_info")))
  865. CmmReg (CmmGlobal GCEnter1)
  866. | arch == ArchPPC && not (dopt Opt_PIC dflags)
  867. -> cmmExprNative referenceKind $
  868. CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_enter_1")))
  869. CmmReg (CmmGlobal GCFun)
  870. | arch == ArchPPC && not (dopt Opt_PIC dflags)
  871. -> cmmExprNative referenceKind $
  872. CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "__stg_gc_fun")))
  873. other
  874. -> return other
  875. \end{code}