PageRenderTime 30ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/compiler/nativeGen/AsmCodeGen.hs

http://github.com/ghc/ghc
Haskell | 1135 lines | 736 code | 154 blank | 245 comment | 44 complexity | 4d16c84f301378d71c912dd2eeb3a880 MD5 | raw file
Possible License(s): MIT, BSD-3-Clause, GPL-3.0
  1. -- -----------------------------------------------------------------------------
  2. --
  3. -- (c) The University of Glasgow 1993-2004
  4. --
  5. -- This is the top-level module in the native code generator.
  6. --
  7. -- -----------------------------------------------------------------------------
  8. {-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, UnboxedTuples #-}
  9. module AsmCodeGen ( nativeCodeGen ) where
  10. #include "HsVersions.h"
  11. #include "nativeGen/NCG.h"
  12. import qualified X86.CodeGen
  13. import qualified X86.Regs
  14. import qualified X86.Instr
  15. import qualified X86.Ppr
  16. import qualified SPARC.CodeGen
  17. import qualified SPARC.Regs
  18. import qualified SPARC.Instr
  19. import qualified SPARC.Ppr
  20. import qualified SPARC.ShortcutJump
  21. import qualified SPARC.CodeGen.Expand
  22. import qualified PPC.CodeGen
  23. import qualified PPC.Regs
  24. import qualified PPC.RegInfo
  25. import qualified PPC.Instr
  26. import qualified PPC.Ppr
  27. import RegAlloc.Liveness
  28. import qualified RegAlloc.Linear.Main as Linear
  29. import qualified GraphColor as Color
  30. import qualified RegAlloc.Graph.Main as Color
  31. import qualified RegAlloc.Graph.Stats as Color
  32. import qualified RegAlloc.Graph.TrivColorable as Color
  33. import TargetReg
  34. import Platform
  35. import Config
  36. import Instruction
  37. import PIC
  38. import Reg
  39. import NCGMonad
  40. import Dwarf
  41. import Debug
  42. import BlockId
  43. import CgUtils ( fixStgRegisters )
  44. import Cmm
  45. import CmmUtils
  46. import Hoopl
  47. import CmmOpt ( cmmMachOpFold )
  48. import PprCmm
  49. import CLabel
  50. import UniqFM
  51. import UniqSupply
  52. import DynFlags
  53. import Util
  54. import Unique
  55. import BasicTypes ( Alignment )
  56. import Digraph
  57. import qualified Pretty
  58. import BufWrite
  59. import Outputable
  60. import FastString
  61. import UniqSet
  62. import ErrUtils
  63. import Module
  64. import Stream (Stream)
  65. import qualified Stream
  66. -- DEBUGGING ONLY
  67. --import OrdList
  68. import Data.List
  69. import Data.Maybe
  70. import Data.Ord ( comparing )
  71. import Control.Exception
  72. import Control.Monad
  73. import System.IO
  74. {-
  75. The native-code generator has machine-independent and
  76. machine-dependent modules.
  77. This module ("AsmCodeGen") is the top-level machine-independent
  78. module. Before entering machine-dependent land, we do some
  79. machine-independent optimisations (defined below) on the
  80. 'CmmStmts's.
  81. We convert to the machine-specific 'Instr' datatype with
  82. 'cmmCodeGen', assuming an infinite supply of registers. We then use
  83. a machine-independent register allocator ('regAlloc') to rejoin
  84. reality. Obviously, 'regAlloc' has machine-specific helper
  85. functions (see about "RegAllocInfo" below).
  86. Finally, we order the basic blocks of the function so as to minimise
  87. the number of jumps between blocks, by utilising fallthrough wherever
  88. possible.
  89. The machine-dependent bits break down as follows:
  90. * ["MachRegs"] Everything about the target platform's machine
  91. registers (and immediate operands, and addresses, which tend to
  92. intermingle/interact with registers).
  93. * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
  94. have a module of its own), plus a miscellany of other things
  95. (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
  96. * ["MachCodeGen"] is where 'Cmm' stuff turns into
  97. machine instructions.
  98. * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
  99. a 'SDoc').
  100. * ["RegAllocInfo"] In the register allocator, we manipulate
  101. 'MRegsState's, which are 'BitSet's, one bit per machine register.
  102. When we want to say something about a specific machine register
  103. (e.g., ``it gets clobbered by this instruction''), we set/unset
  104. its bit. Obviously, we do this 'BitSet' thing for efficiency
  105. reasons.
  106. The 'RegAllocInfo' module collects together the machine-specific
  107. info needed to do register allocation.
  108. * ["RegisterAlloc"] The (machine-independent) register allocator.
  109. -}
  110. -- -----------------------------------------------------------------------------
  111. -- Top-level of the native codegen
  112. data NcgImpl statics instr jumpDest = NcgImpl {
  113. cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
  114. generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
  115. getJumpDestBlockId :: jumpDest -> Maybe BlockId,
  116. canShortcut :: instr -> Maybe jumpDest,
  117. shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
  118. shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
  119. pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
  120. maxSpillSlots :: Int,
  121. allocatableRegs :: [RealReg],
  122. ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
  123. ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
  124. ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr),
  125. ncgMakeFarBranches :: BlockEnv CmmStatics -> [NatBasicBlock instr] -> [NatBasicBlock instr]
  126. }
  127. --------------------
  128. nativeCodeGen :: DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
  129. -> Stream IO RawCmmGroup ()
  130. -> IO UniqSupply
  131. nativeCodeGen dflags this_mod modLoc h us cmms
  132. = let platform = targetPlatform dflags
  133. nCG' :: (Outputable statics, Outputable instr, Instruction instr)
  134. => NcgImpl statics instr jumpDest -> IO UniqSupply
  135. nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
  136. in case platformArch platform of
  137. ArchX86 -> nCG' (x86NcgImpl dflags)
  138. ArchX86_64 -> nCG' (x86_64NcgImpl dflags)
  139. ArchPPC -> nCG' (ppcNcgImpl dflags)
  140. ArchSPARC -> nCG' (sparcNcgImpl dflags)
  141. ArchSPARC64 -> panic "nativeCodeGen: No NCG for SPARC64"
  142. ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
  143. ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64"
  144. ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags)
  145. ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
  146. ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
  147. ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
  148. ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
  149. ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
  150. x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
  151. x86NcgImpl dflags
  152. = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }
  153. x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
  154. x86_64NcgImpl dflags
  155. = NcgImpl {
  156. cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
  157. ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
  158. ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
  159. ,canShortcut = X86.Instr.canShortcut
  160. ,shortcutStatics = X86.Instr.shortcutStatics
  161. ,shortcutJump = X86.Instr.shortcutJump
  162. ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
  163. ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
  164. ,allocatableRegs = X86.Regs.allocatableRegs platform
  165. ,ncg_x86fp_kludge = id
  166. ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
  167. ,ncgExpandTop = id
  168. ,ncgMakeFarBranches = const id
  169. }
  170. where platform = targetPlatform dflags
  171. ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
  172. ppcNcgImpl dflags
  173. = NcgImpl {
  174. cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
  175. ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
  176. ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
  177. ,canShortcut = PPC.RegInfo.canShortcut
  178. ,shortcutStatics = PPC.RegInfo.shortcutStatics
  179. ,shortcutJump = PPC.RegInfo.shortcutJump
  180. ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
  181. ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
  182. ,allocatableRegs = PPC.Regs.allocatableRegs platform
  183. ,ncg_x86fp_kludge = id
  184. ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform
  185. ,ncgExpandTop = id
  186. ,ncgMakeFarBranches = PPC.Instr.makeFarBranches
  187. }
  188. where platform = targetPlatform dflags
  189. sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
  190. sparcNcgImpl dflags
  191. = NcgImpl {
  192. cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
  193. ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
  194. ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
  195. ,canShortcut = SPARC.ShortcutJump.canShortcut
  196. ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
  197. ,shortcutJump = SPARC.ShortcutJump.shortcutJump
  198. ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
  199. ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
  200. ,allocatableRegs = SPARC.Regs.allocatableRegs
  201. ,ncg_x86fp_kludge = id
  202. ,ncgAllocMoreStack = noAllocMoreStack
  203. ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
  204. ,ncgMakeFarBranches = const id
  205. }
  206. --
  207. -- Allocating more stack space for spilling is currently only
  208. -- supported for the linear register allocator on x86/x86_64, the rest
  209. -- default to the panic below. To support allocating extra stack on
  210. -- more platforms provide a definition of ncgAllocMoreStack.
  211. --
  212. noAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr)
  213. noAllocMoreStack amount _
  214. = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
  215. ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
  216. ++ " is a known limitation in the linear allocator.\n"
  217. ++ "\n"
  218. ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
  219. ++ " You can still file a bug report if you like.\n"
  220. -- | Data accumulated during code generation. Mostly about statistics,
  221. -- but also collects debug data for DWARF generation.
  222. data NativeGenAcc statics instr
  223. = NGS { ngs_imports :: ![[CLabel]]
  224. , ngs_natives :: ![[NatCmmDecl statics instr]]
  225. -- ^ Native code generated, for statistics. This might
  226. -- hold a lot of data, so it is important to clear this
  227. -- field as early as possible if it isn't actually
  228. -- required.
  229. , ngs_colorStats :: ![[Color.RegAllocStats statics instr]]
  230. , ngs_linearStats :: ![[Linear.RegAllocStats]]
  231. , ngs_labels :: ![Label]
  232. , ngs_debug :: ![DebugBlock]
  233. , ngs_dwarfFiles :: !DwarfFiles
  234. }
  235. nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
  236. => DynFlags
  237. -> Module -> ModLocation
  238. -> NcgImpl statics instr jumpDest
  239. -> Handle
  240. -> UniqSupply
  241. -> Stream IO RawCmmGroup ()
  242. -> IO UniqSupply
  243. nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
  244. = do
  245. -- BufHandle is a performance hack. We could hide it inside
  246. -- Pretty if it weren't for the fact that we do lots of little
  247. -- printDocs here (in order to do codegen in constant space).
  248. bufh <- newBufHandle h
  249. let ngs0 = NGS [] [] [] [] [] [] emptyUFM
  250. (ngs, us') <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
  251. cmms ngs0
  252. finishNativeGen dflags modLoc bufh us' ngs
  253. finishNativeGen :: Instruction instr
  254. => DynFlags
  255. -> ModLocation
  256. -> BufHandle
  257. -> UniqSupply
  258. -> NativeGenAcc statics instr
  259. -> IO UniqSupply
  260. finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
  261. = do
  262. -- Write debug data and finish
  263. let emitDw = debugLevel dflags > 0 && not (gopt Opt_SplitObjs dflags)
  264. us' <- if not emitDw then return us else do
  265. (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
  266. emitNativeCode dflags bufh dwarf
  267. return us'
  268. bFlush bufh
  269. -- dump global NCG stats for graph coloring allocator
  270. let stats = concat (ngs_colorStats ngs)
  271. when (not (null stats)) $ do
  272. -- build the global register conflict graph
  273. let graphGlobal
  274. = foldl Color.union Color.initGraph
  275. $ [ Color.raGraph stat
  276. | stat@Color.RegAllocStatsStart{} <- stats]
  277. dump_stats (Color.pprStats stats graphGlobal)
  278. let platform = targetPlatform dflags
  279. dumpIfSet_dyn dflags
  280. Opt_D_dump_asm_conflicts "Register conflict graph"
  281. $ Color.dotGraph
  282. (targetRegDotColor platform)
  283. (Color.trivColorable platform
  284. (targetVirtualRegSqueeze platform)
  285. (targetRealRegSqueeze platform))
  286. $ graphGlobal
  287. -- dump global NCG stats for linear allocator
  288. let linearStats = concat (ngs_linearStats ngs)
  289. when (not (null linearStats)) $
  290. dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
  291. -- write out the imports
  292. Pretty.printDoc Pretty.LeftMode (pprCols dflags) h
  293. $ withPprStyleDoc dflags (mkCodeStyle AsmStyle)
  294. $ makeImportsDoc dflags (concat (ngs_imports ngs))
  295. return us'
  296. where
  297. dump_stats = dumpSDoc dflags alwaysQualify Opt_D_dump_asm_stats "NCG stats"
  298. cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
  299. => DynFlags
  300. -> Module -> ModLocation
  301. -> NcgImpl statics instr jumpDest
  302. -> BufHandle
  303. -> UniqSupply
  304. -> Stream IO RawCmmGroup ()
  305. -> NativeGenAcc statics instr
  306. -> IO (NativeGenAcc statics instr, UniqSupply)
  307. cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
  308. = do r <- Stream.runStream cmm_stream
  309. case r of
  310. Left () ->
  311. return (ngs { ngs_imports = reverse $ ngs_imports ngs
  312. , ngs_natives = reverse $ ngs_natives ngs
  313. , ngs_colorStats = reverse $ ngs_colorStats ngs
  314. , ngs_linearStats = reverse $ ngs_linearStats ngs
  315. },
  316. us)
  317. Right (cmms, cmm_stream') -> do
  318. -- Generate debug information
  319. let debugFlag = debugLevel dflags > 0
  320. !ndbgs | debugFlag = cmmDebugGen modLoc cmms
  321. | otherwise = []
  322. dbgMap = debugToMap ndbgs
  323. -- Insert split marker, generate native code
  324. let splitObjs = gopt Opt_SplitObjs dflags
  325. split_marker = CmmProc mapEmpty mkSplitMarkerLabel [] $
  326. ofBlockList (panic "split_marker_entry") []
  327. cmms' | splitObjs = split_marker : cmms
  328. | otherwise = cmms
  329. (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap us
  330. cmms' ngs 0
  331. -- Link native code information into debug blocks
  332. let !ldbgs = cmmDebugLink (ngs_labels ngs') ndbgs
  333. dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos"
  334. (vcat $ map ppr ldbgs)
  335. -- Emit & clear DWARF information when generating split
  336. -- object files, as we need it to land in the same object file
  337. -- When using split sections, note that we do not split the debug
  338. -- info but emit all the info at once in finishNativeGen.
  339. (ngs'', us'') <-
  340. if debugFlag && splitObjs
  341. then do (dwarf, us'') <- dwarfGen dflags modLoc us ldbgs
  342. emitNativeCode dflags h dwarf
  343. return (ngs' { ngs_debug = []
  344. , ngs_dwarfFiles = emptyUFM
  345. , ngs_labels = [] },
  346. us'')
  347. else return (ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs
  348. , ngs_labels = [] },
  349. us')
  350. cmmNativeGenStream dflags this_mod modLoc ncgImpl h us''
  351. cmm_stream' ngs''
  352. -- | Do native code generation on all these cmms.
  353. --
  354. cmmNativeGens :: (Outputable statics, Outputable instr, Instruction instr)
  355. => DynFlags
  356. -> Module -> ModLocation
  357. -> NcgImpl statics instr jumpDest
  358. -> BufHandle
  359. -> LabelMap DebugBlock
  360. -> UniqSupply
  361. -> [RawCmmDecl]
  362. -> NativeGenAcc statics instr
  363. -> Int
  364. -> IO (NativeGenAcc statics instr, UniqSupply)
  365. cmmNativeGens _ _ _ _ _ _ us [] ngs !_
  366. = return (ngs, us)
  367. cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap us
  368. (cmm : cmms) ngs count
  369. = do
  370. let fileIds = ngs_dwarfFiles ngs
  371. (us', fileIds', native, imports, colorStats, linearStats)
  372. <- {-# SCC "cmmNativeGen" #-}
  373. cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
  374. cmm count
  375. -- Generate .file directives for every new file that has been
  376. -- used. Note that it is important that we generate these in
  377. -- ascending order, as Clang's 3.6 assembler complains.
  378. let newFileIds = sortBy (comparing snd) $
  379. nonDetEltsUFM $ fileIds' `minusUFM` fileIds
  380. -- See Note [Unique Determinism and code generation]
  381. pprDecl (f,n) = text "\t.file " <> ppr n <+>
  382. doubleQuotes (ftext f)
  383. emitNativeCode dflags h $ vcat $
  384. map pprDecl newFileIds ++
  385. map (pprNatCmmDecl ncgImpl) native
  386. -- force evaluation all this stuff to avoid space leaks
  387. {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
  388. let !labels' = if debugLevel dflags > 0
  389. then cmmDebugLabels isMetaInstr native else []
  390. !natives' = if dopt Opt_D_dump_asm_stats dflags
  391. then native : ngs_natives ngs else []
  392. mCon = maybe id (:)
  393. ngs' = ngs{ ngs_imports = imports : ngs_imports ngs
  394. , ngs_natives = natives'
  395. , ngs_colorStats = colorStats `mCon` ngs_colorStats ngs
  396. , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
  397. , ngs_labels = ngs_labels ngs ++ labels'
  398. , ngs_dwarfFiles = fileIds'
  399. }
  400. cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap us'
  401. cmms ngs' (count + 1)
  402. where seqString [] = ()
  403. seqString (x:xs) = x `seq` seqString xs
  404. emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
  405. emitNativeCode dflags h sdoc = do
  406. {-# SCC "pprNativeCode" #-} Pretty.bufLeftRender h
  407. $ withPprStyleDoc dflags (mkCodeStyle AsmStyle) sdoc
  408. -- dump native code
  409. dumpIfSet_dyn dflags
  410. Opt_D_dump_asm "Asm code"
  411. sdoc
  412. -- | Complete native code generation phase for a single top-level chunk of Cmm.
  413. -- Dumping the output of each stage along the way.
  414. -- Global conflict graph and NGC stats
  415. cmmNativeGen
  416. :: (Outputable statics, Outputable instr, Instruction instr)
  417. => DynFlags
  418. -> Module -> ModLocation
  419. -> NcgImpl statics instr jumpDest
  420. -> UniqSupply
  421. -> DwarfFiles
  422. -> LabelMap DebugBlock
  423. -> RawCmmDecl -- ^ the cmm to generate code for
  424. -> Int -- ^ sequence number of this top thing
  425. -> IO ( UniqSupply
  426. , DwarfFiles
  427. , [NatCmmDecl statics instr] -- native code
  428. , [CLabel] -- things imported by this cmm
  429. , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
  430. , Maybe [Linear.RegAllocStats]) -- stats for the linear register allocators
  431. cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
  432. = do
  433. let platform = targetPlatform dflags
  434. -- rewrite assignments to global regs
  435. let fixed_cmm =
  436. {-# SCC "fixStgRegisters" #-}
  437. fixStgRegisters dflags cmm
  438. -- cmm to cmm optimisations
  439. let (opt_cmm, imports) =
  440. {-# SCC "cmmToCmm" #-}
  441. cmmToCmm dflags this_mod fixed_cmm
  442. dumpIfSet_dyn dflags
  443. Opt_D_dump_opt_cmm "Optimised Cmm"
  444. (pprCmmGroup [opt_cmm])
  445. -- generate native code from cmm
  446. let ((native, lastMinuteImports, fileIds'), usGen) =
  447. {-# SCC "genMachCode" #-}
  448. initUs us $ genMachCode dflags this_mod modLoc
  449. (cmmTopCodeGen ncgImpl)
  450. fileIds dbgMap opt_cmm
  451. dumpIfSet_dyn dflags
  452. Opt_D_dump_asm_native "Native code"
  453. (vcat $ map (pprNatCmmDecl ncgImpl) native)
  454. -- tag instructions with register liveness information
  455. let (withLiveness, usLive) =
  456. {-# SCC "regLiveness" #-}
  457. initUs usGen
  458. $ mapM (regLiveness platform)
  459. $ map natCmmTopToLive native
  460. dumpIfSet_dyn dflags
  461. Opt_D_dump_asm_liveness "Liveness annotations added"
  462. (vcat $ map ppr withLiveness)
  463. -- allocate registers
  464. (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
  465. if ( gopt Opt_RegsGraph dflags
  466. || gopt Opt_RegsIterative dflags )
  467. then do
  468. -- the regs usable for allocation
  469. let (alloc_regs :: UniqFM (UniqSet RealReg))
  470. = foldr (\r -> plusUFM_C unionUniqSets
  471. $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
  472. emptyUFM
  473. $ allocatableRegs ncgImpl
  474. -- do the graph coloring register allocation
  475. let ((alloced, regAllocStats), usAlloc)
  476. = {-# SCC "RegAlloc-color" #-}
  477. initUs usLive
  478. $ Color.regAlloc
  479. dflags
  480. alloc_regs
  481. (mkUniqSet [0 .. maxSpillSlots ncgImpl])
  482. withLiveness
  483. -- dump out what happened during register allocation
  484. dumpIfSet_dyn dflags
  485. Opt_D_dump_asm_regalloc "Registers allocated"
  486. (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
  487. dumpIfSet_dyn dflags
  488. Opt_D_dump_asm_regalloc_stages "Build/spill stages"
  489. (vcat $ map (\(stage, stats)
  490. -> text "# --------------------------"
  491. $$ text "# cmm " <> int count <> text " Stage " <> int stage
  492. $$ ppr stats)
  493. $ zip [0..] regAllocStats)
  494. let mPprStats =
  495. if dopt Opt_D_dump_asm_stats dflags
  496. then Just regAllocStats else Nothing
  497. -- force evaluation of the Maybe to avoid space leak
  498. mPprStats `seq` return ()
  499. return ( alloced, usAlloc
  500. , mPprStats
  501. , Nothing)
  502. else do
  503. -- do linear register allocation
  504. let reg_alloc proc = do
  505. (alloced, maybe_more_stack, ra_stats) <-
  506. Linear.regAlloc dflags proc
  507. case maybe_more_stack of
  508. Nothing -> return ( alloced, ra_stats )
  509. Just amount -> do
  510. alloced' <- ncgAllocMoreStack ncgImpl amount alloced
  511. return (alloced', ra_stats )
  512. let ((alloced, regAllocStats), usAlloc)
  513. = {-# SCC "RegAlloc-linear" #-}
  514. initUs usLive
  515. $ liftM unzip
  516. $ mapM reg_alloc withLiveness
  517. dumpIfSet_dyn dflags
  518. Opt_D_dump_asm_regalloc "Registers allocated"
  519. (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
  520. let mPprStats =
  521. if dopt Opt_D_dump_asm_stats dflags
  522. then Just (catMaybes regAllocStats) else Nothing
  523. -- force evaluation of the Maybe to avoid space leak
  524. mPprStats `seq` return ()
  525. return ( alloced, usAlloc
  526. , Nothing
  527. , mPprStats)
  528. ---- x86fp_kludge. This pass inserts ffree instructions to clear
  529. ---- the FPU stack on x86. The x86 ABI requires that the FPU stack
  530. ---- is clear, and library functions can return odd results if it
  531. ---- isn't.
  532. ----
  533. ---- NB. must happen before shortcutBranches, because that
  534. ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
  535. let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
  536. ---- generate jump tables
  537. let tabled =
  538. {-# SCC "generateJumpTables" #-}
  539. generateJumpTables ncgImpl kludged
  540. ---- shortcut branches
  541. let shorted =
  542. {-# SCC "shortcutBranches" #-}
  543. shortcutBranches dflags ncgImpl tabled
  544. ---- sequence blocks
  545. let sequenced =
  546. {-# SCC "sequenceBlocks" #-}
  547. map (sequenceTop ncgImpl) shorted
  548. ---- expansion of SPARC synthetic instrs
  549. let expanded =
  550. {-# SCC "sparc_expand" #-}
  551. ncgExpandTop ncgImpl sequenced
  552. dumpIfSet_dyn dflags
  553. Opt_D_dump_asm_expanded "Synthetic instructions expanded"
  554. (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
  555. return ( usAlloc
  556. , fileIds'
  557. , expanded
  558. , lastMinuteImports ++ imports
  559. , ppr_raStatsColor
  560. , ppr_raStatsLinear)
  561. x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
  562. x86fp_kludge top@(CmmData _ _) = top
  563. x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
  564. CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
  565. -- | Build a doc for all the imports.
  566. --
  567. makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
  568. makeImportsDoc dflags imports
  569. = dyld_stubs imports
  570. $$
  571. -- On recent versions of Darwin, the linker supports
  572. -- dead-stripping of code and data on a per-symbol basis.
  573. -- There's a hack to make this work in PprMach.pprNatCmmDecl.
  574. (if platformHasSubsectionsViaSymbols platform
  575. then text ".subsections_via_symbols"
  576. else Outputable.empty)
  577. $$
  578. -- On recent GNU ELF systems one can mark an object file
  579. -- as not requiring an executable stack. If all objects
  580. -- linked into a program have this note then the program
  581. -- will not use an executable stack, which is good for
  582. -- security. GHC generated code does not need an executable
  583. -- stack so add the note in:
  584. (if platformHasGnuNonexecStack platform
  585. then text ".section .note.GNU-stack,\"\",@progbits"
  586. else Outputable.empty)
  587. $$
  588. -- And just because every other compiler does, let's stick in
  589. -- an identifier directive: .ident "GHC x.y.z"
  590. (if platformHasIdentDirective platform
  591. then let compilerIdent = text "GHC" <+> text cProjectVersion
  592. in text ".ident" <+> doubleQuotes compilerIdent
  593. else Outputable.empty)
  594. where
  595. platform = targetPlatform dflags
  596. arch = platformArch platform
  597. os = platformOS platform
  598. -- Generate "symbol stubs" for all external symbols that might
  599. -- come from a dynamic library.
  600. dyld_stubs :: [CLabel] -> SDoc
  601. {- dyld_stubs imps = vcat $ map pprDyldSymbolStub $
  602. map head $ group $ sort imps-}
  603. -- (Hack) sometimes two Labels pretty-print the same, but have
  604. -- different uniques; so we compare their text versions...
  605. dyld_stubs imps
  606. | needImportedSymbols dflags arch os
  607. = vcat $
  608. (pprGotDeclaration dflags arch os :) $
  609. map ( pprImportedSymbol dflags platform . fst . head) $
  610. groupBy (\(_,a) (_,b) -> a == b) $
  611. sortBy (\(_,a) (_,b) -> compare a b) $
  612. map doPpr $
  613. imps
  614. | otherwise
  615. = Outputable.empty
  616. doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
  617. astyle = mkCodeStyle AsmStyle
  618. -- -----------------------------------------------------------------------------
  619. -- Sequencing the basic blocks
  620. -- Cmm BasicBlocks are self-contained entities: they always end in a
  621. -- jump, either non-local or to another basic block in the same proc.
  622. -- In this phase, we attempt to place the basic blocks in a sequence
  623. -- such that as many of the local jumps as possible turn into
  624. -- fallthroughs.
  625. sequenceTop
  626. :: Instruction instr
  627. => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
  628. sequenceTop _ top@(CmmData _ _) = top
  629. sequenceTop ncgImpl (CmmProc info lbl live (ListGraph blocks)) =
  630. CmmProc info lbl live (ListGraph $ ncgMakeFarBranches ncgImpl info $ sequenceBlocks info blocks)
  631. -- The algorithm is very simple (and stupid): we make a graph out of
  632. -- the blocks where there is an edge from one block to another iff the
  633. -- first block ends by jumping to the second. Then we topologically
  634. -- sort this graph. Then traverse the list: for each block, we first
  635. -- output the block, then if it has an out edge, we move the
  636. -- destination of the out edge to the front of the list, and continue.
  637. -- FYI, the classic layout for basic blocks uses postorder DFS; this
  638. -- algorithm is implemented in Hoopl.
  639. sequenceBlocks
  640. :: Instruction instr
  641. => BlockEnv i
  642. -> [NatBasicBlock instr]
  643. -> [NatBasicBlock instr]
  644. sequenceBlocks _ [] = []
  645. sequenceBlocks infos (entry:blocks) =
  646. seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
  647. -- the first block is the entry point ==> it must remain at the start.
  648. sccBlocks
  649. :: Instruction instr
  650. => [NatBasicBlock instr]
  651. -> [SCC ( NatBasicBlock instr
  652. , BlockId
  653. , [BlockId])]
  654. sccBlocks blocks = stronglyConnCompFromEdgedVerticesUniqR (map mkNode blocks)
  655. -- we're only interested in the last instruction of
  656. -- the block, and only if it has a single destination.
  657. getOutEdges
  658. :: Instruction instr
  659. => [instr] -> [BlockId]
  660. getOutEdges instrs
  661. = case jumpDestsOfInstr (last instrs) of
  662. [one] -> [one]
  663. _many -> []
  664. mkNode :: (Instruction t)
  665. => GenBasicBlock t
  666. -> (GenBasicBlock t, BlockId, [BlockId])
  667. mkNode block@(BasicBlock id instrs) = (block, id, getOutEdges instrs)
  668. seqBlocks :: BlockEnv i -> [(GenBasicBlock t1, BlockId, [BlockId])]
  669. -> [GenBasicBlock t1]
  670. seqBlocks infos blocks = placeNext pullable0 todo0
  671. where
  672. -- pullable: Blocks that are not yet placed
  673. -- todo: Original order of blocks, to be followed if we have no good
  674. -- reason not to;
  675. -- may include blocks that have already been placed, but then
  676. -- these are not in pullable
  677. pullable0 = listToUFM [ (i,(b,n)) | (b,i,n) <- blocks ]
  678. todo0 = [i | (_,i,_) <- blocks ]
  679. placeNext _ [] = []
  680. placeNext pullable (i:rest)
  681. | Just (block, pullable') <- lookupDeleteUFM pullable i
  682. = place pullable' rest block
  683. | otherwise
  684. -- We already placed this block, so ignore
  685. = placeNext pullable rest
  686. place pullable todo (block,[])
  687. = block : placeNext pullable todo
  688. place pullable todo (block@(BasicBlock id instrs),[next])
  689. | mapMember next infos
  690. = block : placeNext pullable todo
  691. | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
  692. = BasicBlock id (init instrs) : place pullable' todo nextBlock
  693. | otherwise
  694. = block : placeNext pullable todo
  695. place _ _ (_,tooManyNextNodes)
  696. = pprPanic "seqBlocks" (ppr tooManyNextNodes)
  697. lookupDeleteUFM :: Uniquable key => UniqFM elt -> key -> Maybe (elt, UniqFM elt)
  698. lookupDeleteUFM m k = do -- Maybe monad
  699. v <- lookupUFM m k
  700. return (v, delFromUFM m k)
  701. -- -----------------------------------------------------------------------------
  702. -- Generate jump tables
  703. -- Analyzes all native code and generates data sections for all jump
  704. -- table instructions.
  705. generateJumpTables
  706. :: NcgImpl statics instr jumpDest
  707. -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
  708. generateJumpTables ncgImpl xs = concatMap f xs
  709. where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
  710. f p = [p]
  711. g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
  712. -- -----------------------------------------------------------------------------
  713. -- Shortcut branches
  714. shortcutBranches
  715. :: DynFlags
  716. -> NcgImpl statics instr jumpDest
  717. -> [NatCmmDecl statics instr]
  718. -> [NatCmmDecl statics instr]
  719. shortcutBranches dflags ncgImpl tops
  720. | optLevel dflags < 1 = tops -- only with -O or higher
  721. | otherwise = map (apply_mapping ncgImpl mapping) tops'
  722. where
  723. (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
  724. mapping = foldr plusUFM emptyUFM mappings
  725. build_mapping :: NcgImpl statics instr jumpDest
  726. -> GenCmmDecl d (BlockEnv t) (ListGraph instr)
  727. -> (GenCmmDecl d (BlockEnv t) (ListGraph instr), UniqFM jumpDest)
  728. build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
  729. build_mapping _ (CmmProc info lbl live (ListGraph []))
  730. = (CmmProc info lbl live (ListGraph []), emptyUFM)
  731. build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
  732. = (CmmProc info lbl live (ListGraph (head:others)), mapping)
  733. -- drop the shorted blocks, but don't ever drop the first one,
  734. -- because it is pointed to by a global label.
  735. where
  736. -- find all the blocks that just consist of a jump that can be
  737. -- shorted.
  738. -- Don't completely eliminate loops here -- that can leave a dangling jump!
  739. (_, shortcut_blocks, others) = foldl split (emptyBlockSet, [], []) blocks
  740. split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
  741. | Just jd <- canShortcut ncgImpl insn,
  742. Just dest <- getJumpDestBlockId ncgImpl jd,
  743. not (has_info id),
  744. (setMember dest s) || dest == id -- loop checks
  745. = (s, shortcut_blocks, b : others)
  746. split (s, shortcut_blocks, others) (BasicBlock id [insn])
  747. | Just dest <- canShortcut ncgImpl insn,
  748. not (has_info id)
  749. = (setInsert id s, (id,dest) : shortcut_blocks, others)
  750. split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
  751. -- do not eliminate blocks that have an info table
  752. has_info l = mapMember l info
  753. -- build a mapping from BlockId to JumpDest for shorting branches
  754. mapping = foldl add emptyUFM shortcut_blocks
  755. add ufm (id,dest) = addToUFM ufm id dest
  756. apply_mapping :: NcgImpl statics instr jumpDest
  757. -> UniqFM jumpDest
  758. -> GenCmmDecl statics h (ListGraph instr)
  759. -> GenCmmDecl statics h (ListGraph instr)
  760. apply_mapping ncgImpl ufm (CmmData sec statics)
  761. = CmmData sec (shortcutStatics ncgImpl (lookupUFM ufm) statics)
  762. apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
  763. = CmmProc info lbl live (ListGraph $ map short_bb blocks)
  764. where
  765. short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
  766. short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
  767. -- shortcutJump should apply the mapping repeatedly,
  768. -- just in case we can short multiple branches.
  769. -- -----------------------------------------------------------------------------
  770. -- Instruction selection
  771. -- Native code instruction selection for a chunk of stix code. For
  772. -- this part of the computation, we switch from the UniqSM monad to
  773. -- the NatM monad. The latter carries not only a Unique, but also an
  774. -- Int denoting the current C stack pointer offset in the generated
  775. -- code; this is needed for creating correct spill offsets on
  776. -- architectures which don't offer, or for which it would be
  777. -- prohibitively expensive to employ, a frame pointer register. Viz,
  778. -- x86.
  779. -- The offset is measured in bytes, and indicates the difference
  780. -- between the current (simulated) C stack-ptr and the value it was at
  781. -- the beginning of the block. For stacks which grow down, this value
  782. -- should be either zero or negative.
  783. -- Switching between the two monads whilst carrying along the same
  784. -- Unique supply breaks abstraction. Is that bad?
  785. genMachCode
  786. :: DynFlags
  787. -> Module -> ModLocation
  788. -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
  789. -> DwarfFiles
  790. -> LabelMap DebugBlock
  791. -> RawCmmDecl
  792. -> UniqSM
  793. ( [NatCmmDecl statics instr]
  794. , [CLabel]
  795. , DwarfFiles)
  796. genMachCode dflags this_mod modLoc cmmTopCodeGen fileIds dbgMap cmm_top
  797. = do { initial_us <- getUniqueSupplyM
  798. ; let initial_st = mkNatM_State initial_us 0 dflags this_mod
  799. modLoc fileIds dbgMap
  800. (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
  801. final_delta = natm_delta final_st
  802. final_imports = natm_imports final_st
  803. ; if final_delta == 0
  804. then return (new_tops, final_imports, natm_fileid final_st)
  805. else pprPanic "genMachCode: nonzero final delta" (int final_delta)
  806. }
  807. -- -----------------------------------------------------------------------------
  808. -- Generic Cmm optimiser
  809. {-
  810. Here we do:
  811. (a) Constant folding
  812. (c) Position independent code and dynamic linking
  813. (i) introduce the appropriate indirections
  814. and position independent refs
  815. (ii) compile a list of imported symbols
  816. (d) Some arch-specific optimizations
  817. (a) will be moving to the new Hoopl pipeline, however, (c) and
  818. (d) are only needed by the native backend and will continue to live
  819. here.
  820. Ideas for other things we could do (put these in Hoopl please!):
  821. - shortcut jumps-to-jumps
  822. - simple CSE: if an expr is assigned to a temp, then replace later occs of
  823. that expr with the temp, until the expr is no longer valid (can push through
  824. temp assignments, and certain assigns to mem...)
  825. -}
  826. cmmToCmm :: DynFlags -> Module -> RawCmmDecl -> (RawCmmDecl, [CLabel])
  827. cmmToCmm _ _ top@(CmmData _ _) = (top, [])
  828. cmmToCmm dflags this_mod (CmmProc info lbl live graph)
  829. = runCmmOpt dflags this_mod $
  830. do blocks' <- mapM cmmBlockConFold (toBlockList graph)
  831. return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks')
  832. newtype CmmOptM a = CmmOptM (DynFlags -> Module -> [CLabel] -> (# a, [CLabel] #))
  833. instance Functor CmmOptM where
  834. fmap = liftM
  835. instance Applicative CmmOptM where
  836. pure x = CmmOptM $ \_ _ imports -> (# x, imports #)
  837. (<*>) = ap
  838. instance Monad CmmOptM where
  839. (CmmOptM f) >>= g =
  840. CmmOptM $ \dflags this_mod imports ->
  841. case f dflags this_mod imports of
  842. (# x, imports' #) ->
  843. case g x of
  844. CmmOptM g' -> g' dflags this_mod imports'
  845. instance CmmMakeDynamicReferenceM CmmOptM where
  846. addImport = addImportCmmOpt
  847. getThisModule = CmmOptM $ \_ this_mod imports -> (# this_mod, imports #)
  848. addImportCmmOpt :: CLabel -> CmmOptM ()
  849. addImportCmmOpt lbl = CmmOptM $ \_ _ imports -> (# (), lbl:imports #)
  850. instance HasDynFlags CmmOptM where
  851. getDynFlags = CmmOptM $ \dflags _ imports -> (# dflags, imports #)
  852. runCmmOpt :: DynFlags -> Module -> CmmOptM a -> (a, [CLabel])
  853. runCmmOpt dflags this_mod (CmmOptM f) = case f dflags this_mod [] of
  854. (# result, imports #) -> (result, imports)
  855. cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock
  856. cmmBlockConFold block = do
  857. let (entry, middle, last) = blockSplit block
  858. stmts = blockToList middle
  859. stmts' <- mapM cmmStmtConFold stmts
  860. last' <- cmmStmtConFold last
  861. return $ blockJoin entry (blockFromList stmts') last'
  862. -- This does three optimizations, but they're very quick to check, so we don't
  863. -- bother turning them off even when the Hoopl code is active. Since
  864. -- this is on the old Cmm representation, we can't reuse the code either:
  865. -- * reg = reg --> nop
  866. -- * if 0 then jump --> nop
  867. -- * if 1 then jump --> jump
  868. -- We might be tempted to skip this step entirely of not Opt_PIC, but
  869. -- there is some PowerPC code for the non-PIC case, which would also
  870. -- have to be separated.
  871. cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x)
  872. cmmStmtConFold stmt
  873. = case stmt of
  874. CmmAssign reg src
  875. -> do src' <- cmmExprConFold DataReference src
  876. return $ case src' of
  877. CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
  878. new_src -> CmmAssign reg new_src
  879. CmmStore addr src
  880. -> do addr' <- cmmExprConFold DataReference addr
  881. src' <- cmmExprConFold DataReference src
  882. return $ CmmStore addr' src'
  883. CmmCall { cml_target = addr }
  884. -> do addr' <- cmmExprConFold JumpReference addr
  885. return $ stmt { cml_target = addr' }
  886. CmmUnsafeForeignCall target regs args
  887. -> do target' <- case target of
  888. ForeignTarget e conv -> do
  889. e' <- cmmExprConFold CallReference e
  890. return $ ForeignTarget e' conv
  891. PrimTarget _ ->
  892. return target
  893. args' <- mapM (cmmExprConFold DataReference) args
  894. return $ CmmUnsafeForeignCall target' regs args'
  895. CmmCondBranch test true false likely
  896. -> do test' <- cmmExprConFold DataReference test
  897. return $ case test' of
  898. CmmLit (CmmInt 0 _) -> CmmBranch false
  899. CmmLit (CmmInt _ _) -> CmmBranch true
  900. _other -> CmmCondBranch test' true false likely
  901. CmmSwitch expr ids
  902. -> do expr' <- cmmExprConFold DataReference expr
  903. return $ CmmSwitch expr' ids
  904. other
  905. -> return other
  906. cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
  907. cmmExprConFold referenceKind expr = do
  908. dflags <- getDynFlags
  909. -- With -O1 and greater, the cmmSink pass does constant-folding, so
  910. -- we don't need to do it again here.
  911. let expr' = if optLevel dflags >= 1
  912. then expr
  913. else cmmExprCon dflags expr
  914. cmmExprNative referenceKind expr'
  915. cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr
  916. cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep
  917. cmmExprCon dflags (CmmMachOp mop args)
  918. = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args)
  919. cmmExprCon _ other = other
  920. -- handles both PIC and non-PIC cases... a very strange mixture
  921. -- of things to do.
  922. cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
  923. cmmExprNative referenceKind expr = do
  924. dflags <- getDynFlags
  925. let platform = targetPlatform dflags
  926. arch = platformArch platform
  927. case expr of
  928. CmmLoad addr rep
  929. -> do addr' <- cmmExprNative DataReference addr
  930. return $ CmmLoad addr' rep
  931. CmmMachOp mop args
  932. -> do args' <- mapM (cmmExprNative DataReference) args
  933. return $ CmmMachOp mop args'
  934. CmmLit (CmmBlock id)
  935. -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
  936. -- we must convert block Ids to CLabels here, because we
  937. -- might have to do the PIC transformation. Hence we must
  938. -- not modify BlockIds beyond this point.
  939. CmmLit (CmmLabel lbl)
  940. -> do
  941. cmmMakeDynamicReference dflags referenceKind lbl
  942. CmmLit (CmmLabelOff lbl off)
  943. -> do
  944. dynRef <- cmmMakeDynamicReference dflags referenceKind lbl
  945. -- need to optimize here, since it's late
  946. return $ cmmMachOpFold dflags (MO_Add (wordWidth dflags)) [
  947. dynRef,
  948. (CmmLit $ CmmInt (fromIntegral off) (wordWidth dflags))
  949. ]
  950. -- On powerpc (non-PIC), it's easier to jump directly to a label than
  951. -- to use the register table, so we replace these registers
  952. -- with the corresponding labels:
  953. CmmReg (CmmGlobal EagerBlackholeInfo)
  954. | arch == ArchPPC && not (gopt Opt_PIC dflags)
  955. -> cmmExprNative referenceKind $
  956. CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info")))
  957. CmmReg (CmmGlobal GCEnter1)
  958. | arch == ArchPPC && not (gopt Opt_PIC dflags)
  959. -> cmmExprNative referenceKind $
  960. CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1")))
  961. CmmReg (CmmGlobal GCFun)
  962. | arch == ArchPPC && not (gopt Opt_PIC dflags)
  963. -> cmmExprNative referenceKind $
  964. CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun")))
  965. other
  966. -> return other