PageRenderTime 62ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/compiler/cmm/CmmOpt.hs

https://bitbucket.org/khibino/ghc-hack
Haskell | 695 lines | 392 code | 88 blank | 215 comment | 44 complexity | c44af4bac5a657a9cdf546f56f4baf9a MD5 | raw file
Possible License(s): BSD-3-Clause, BSD-2-Clause, LGPL-3.0
  1. -----------------------------------------------------------------------------
  2. --
  3. -- Cmm optimisation
  4. --
  5. -- (c) The University of Glasgow 2006
  6. --
  7. -----------------------------------------------------------------------------
  8. module CmmOpt (
  9. cmmEliminateDeadBlocks,
  10. cmmMiniInline,
  11. cmmMachOpFold,
  12. cmmMachOpFoldM,
  13. cmmLoopifyForC,
  14. ) where
  15. #include "HsVersions.h"
  16. import OldCmm
  17. import OldPprCmm
  18. import CmmNode (wrapRecExp)
  19. import CmmUtils
  20. import StaticFlags
  21. import UniqFM
  22. import Unique
  23. import Util
  24. import FastTypes
  25. import Outputable
  26. import Platform
  27. import BlockId
  28. import Data.Bits
  29. import Data.Maybe
  30. import Data.List
  31. -- -----------------------------------------------------------------------------
  32. -- Eliminates dead blocks
  33. {-
  34. We repeatedly expand the set of reachable blocks until we hit a
  35. fixpoint, and then prune any blocks that were not in this set. This is
  36. actually a required optimization, as dead blocks can cause problems
  37. for invariants in the linear register allocator (and possibly other
  38. places.)
  39. -}
  40. -- Deep fold over statements could probably be abstracted out, but it
  41. -- might not be worth the effort since OldCmm is moribund
  42. cmmEliminateDeadBlocks :: [CmmBasicBlock] -> [CmmBasicBlock]
  43. cmmEliminateDeadBlocks [] = []
  44. cmmEliminateDeadBlocks blocks@(BasicBlock base_id _:_) =
  45. let -- Calculate what's reachable from what block
  46. reachableMap = foldl' f emptyUFM blocks -- lazy in values
  47. where f m (BasicBlock block_id stmts) = addToUFM m block_id (reachableFrom stmts)
  48. reachableFrom stmts = foldl stmt [] stmts
  49. where
  50. stmt m CmmNop = m
  51. stmt m (CmmComment _) = m
  52. stmt m (CmmAssign _ e) = expr m e
  53. stmt m (CmmStore e1 e2) = expr (expr m e1) e2
  54. stmt m (CmmCall c _ as _) = f (actuals m as) c
  55. where f m (CmmCallee e _) = expr m e
  56. f m (CmmPrim _) = m
  57. stmt m (CmmBranch b) = b:m
  58. stmt m (CmmCondBranch e b) = b:(expr m e)
  59. stmt m (CmmSwitch e bs) = catMaybes bs ++ expr m e
  60. stmt m (CmmJump e as) = expr (actuals m as) e
  61. stmt m (CmmReturn as) = actuals m as
  62. actuals m as = foldl' (\m h -> expr m (hintlessCmm h)) m as
  63. -- We have to do a deep fold into CmmExpr because
  64. -- there may be a BlockId in the CmmBlock literal.
  65. expr m (CmmLit l) = lit m l
  66. expr m (CmmLoad e _) = expr m e
  67. expr m (CmmReg _) = m
  68. expr m (CmmMachOp _ es) = foldl' expr m es
  69. expr m (CmmStackSlot _ _) = m
  70. expr m (CmmRegOff _ _) = m
  71. lit m (CmmBlock b) = b:m
  72. lit m _ = m
  73. -- go todo done
  74. reachable = go [base_id] (setEmpty :: BlockSet)
  75. where go [] m = m
  76. go (x:xs) m
  77. | setMember x m = go xs m
  78. | otherwise = go (add ++ xs) (setInsert x m)
  79. where add = fromMaybe (panic "cmmEliminateDeadBlocks: unknown block")
  80. (lookupUFM reachableMap x)
  81. in filter (\(BasicBlock block_id _) -> setMember block_id reachable) blocks
  82. -- -----------------------------------------------------------------------------
  83. -- The mini-inliner
  84. {-
  85. This pass inlines assignments to temporaries. Temporaries that are
  86. only used once are unconditionally inlined. Temporaries that are used
  87. two or more times are only inlined if they are assigned a literal. It
  88. works as follows:
  89. - count uses of each temporary
  90. - for each temporary:
  91. - attempt to push it forward to the statement that uses it
  92. - only push forward past assignments to other temporaries
  93. (assumes that temporaries are single-assignment)
  94. - if we reach the statement that uses it, inline the rhs
  95. and delete the original assignment.
  96. [N.B. In the Quick C-- compiler, this optimization is achieved by a
  97. combination of two dataflow passes: forward substitution (peephole
  98. optimization) and dead-assignment elimination. ---NR]
  99. Possible generalisations: here is an example from factorial
  100. Fac_zdwfac_entry:
  101. cmG:
  102. _smi = R2;
  103. if (_smi != 0) goto cmK;
  104. R1 = R3;
  105. jump I64[Sp];
  106. cmK:
  107. _smn = _smi * R3;
  108. R2 = _smi + (-1);
  109. R3 = _smn;
  110. jump Fac_zdwfac_info;
  111. We want to inline _smi and _smn. To inline _smn:
  112. - we must be able to push forward past assignments to global regs.
  113. We can do this if the rhs of the assignment we are pushing
  114. forward doesn't refer to the global reg being assigned to; easy
  115. to test.
  116. To inline _smi:
  117. - It is a trivial replacement, reg for reg, but it occurs more than
  118. once.
  119. - We can inline trivial assignments even if the temporary occurs
  120. more than once, as long as we don't eliminate the original assignment
  121. (this doesn't help much on its own).
  122. - We need to be able to propagate the assignment forward through jumps;
  123. if we did this, we would find that it can be inlined safely in all
  124. its occurrences.
  125. -}
  126. countUses :: UserOfLocalRegs a => a -> UniqFM Int
  127. countUses a = foldRegsUsed (\m r -> addToUFM m r (count m r + 1)) emptyUFM a
  128. where count m r = lookupWithDefaultUFM m (0::Int) r
  129. cmmMiniInline :: Platform -> [CmmBasicBlock] -> [CmmBasicBlock]
  130. cmmMiniInline platform blocks = map do_inline blocks
  131. where do_inline (BasicBlock id stmts)
  132. = BasicBlock id (cmmMiniInlineStmts platform (countUses blocks) stmts)
  133. cmmMiniInlineStmts :: Platform -> UniqFM Int -> [CmmStmt] -> [CmmStmt]
  134. cmmMiniInlineStmts _ _ [] = []
  135. cmmMiniInlineStmts platform uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
  136. -- not used: just discard this assignment
  137. | Nothing <- lookupUFM uses u
  138. = cmmMiniInlineStmts platform uses stmts
  139. -- used (literal): try to inline at all the use sites
  140. | Just n <- lookupUFM uses u, isLit expr
  141. =
  142. ncgDebugTrace ("nativeGen: inlining " ++ showSDoc (pprStmt platform stmt)) $
  143. case lookForInlineLit u expr stmts of
  144. (m, stmts')
  145. | n == m -> cmmMiniInlineStmts platform (delFromUFM uses u) stmts'
  146. | otherwise ->
  147. stmt : cmmMiniInlineStmts platform (adjustUFM (\x -> x - m) uses u) stmts'
  148. -- used (foldable to literal): try to inline at all the use sites
  149. | Just n <- lookupUFM uses u,
  150. e@(CmmLit _) <- wrapRecExp foldExp expr
  151. =
  152. ncgDebugTrace ("nativeGen: inlining " ++ showSDoc (pprStmt platform stmt)) $
  153. case lookForInlineLit u e stmts of
  154. (m, stmts')
  155. | n == m -> cmmMiniInlineStmts platform (delFromUFM uses u) stmts'
  156. | otherwise ->
  157. stmt : cmmMiniInlineStmts platform (adjustUFM (\x -> x - m) uses u) stmts'
  158. -- used once (non-literal): try to inline at the use site
  159. | Just 1 <- lookupUFM uses u,
  160. Just stmts' <- lookForInline u expr stmts
  161. =
  162. ncgDebugTrace ("nativeGen: inlining " ++ showSDoc (pprStmt platform stmt)) $
  163. cmmMiniInlineStmts platform uses stmts'
  164. where
  165. foldExp (CmmMachOp op args) = cmmMachOpFold platform op args
  166. foldExp e = e
  167. ncgDebugTrace str x = if ncgDebugIsOn then trace str x else x
  168. cmmMiniInlineStmts platform uses (stmt:stmts)
  169. = stmt : cmmMiniInlineStmts platform uses stmts
  170. -- | Takes a register, a 'CmmLit' expression assigned to that
  171. -- register, and a list of statements. Inlines the expression at all
  172. -- use sites of the register. Returns the number of substituations
  173. -- made and the, possibly modified, list of statements.
  174. lookForInlineLit :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt])
  175. lookForInlineLit _ _ [] = (0, [])
  176. lookForInlineLit u expr stmts@(stmt : rest)
  177. | Just n <- lookupUFM (countUses stmt) u
  178. = case lookForInlineLit u expr rest of
  179. (m, stmts) -> let z = n + m
  180. in z `seq` (z, inlineStmt u expr stmt : stmts)
  181. | ok_to_skip
  182. = case lookForInlineLit u expr rest of
  183. (n, stmts) -> (n, stmt : stmts)
  184. | otherwise
  185. = (0, stmts)
  186. where
  187. -- We skip over assignments to registers, unless the register
  188. -- being assigned to is the one we're inlining.
  189. ok_to_skip = case stmt of
  190. CmmAssign (CmmLocal (LocalReg u' _)) _ | u' == u -> False
  191. _other -> True
  192. lookForInline :: Unique -> CmmExpr -> [CmmStmt] -> Maybe [CmmStmt]
  193. lookForInline u expr stmts = lookForInline' u expr regset stmts
  194. where regset = foldRegsUsed extendRegSet emptyRegSet expr
  195. lookForInline' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> Maybe [CmmStmt]
  196. lookForInline' _ _ _ [] = panic "lookForInline' []"
  197. lookForInline' u expr regset (stmt : rest)
  198. | Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline
  199. = Just (inlineStmt u expr stmt : rest)
  200. | ok_to_skip
  201. = case lookForInline' u expr regset rest of
  202. Nothing -> Nothing
  203. Just stmts -> Just (stmt:stmts)
  204. | otherwise
  205. = Nothing
  206. where
  207. -- we don't inline into CmmCall if the expression refers to global
  208. -- registers. This is a HACK to avoid global registers clashing with
  209. -- C argument-passing registers, really the back-end ought to be able
  210. -- to handle it properly, but currently neither PprC nor the NCG can
  211. -- do it. See also CgForeignCall:load_args_into_temps.
  212. ok_to_inline = case stmt of
  213. CmmCall{} -> hasNoGlobalRegs expr
  214. _ -> True
  215. -- Expressions aren't side-effecting. Temporaries may or may not
  216. -- be single-assignment depending on the source (the old code
  217. -- generator creates single-assignment code, but hand-written Cmm
  218. -- and Cmm from the new code generator is not single-assignment.)
  219. -- So we do an extra check to make sure that the register being
  220. -- changed is not one we were relying on. I don't know how much of a
  221. -- performance hit this is (we have to create a regset for every
  222. -- instruction.) -- EZY
  223. ok_to_skip = case stmt of
  224. CmmNop -> True
  225. CmmComment{} -> True
  226. CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True
  227. CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr)
  228. _other -> False
  229. inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
  230. inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
  231. inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
  232. inlineStmt u a (CmmCall target regs es ret)
  233. = CmmCall (infn target) regs es' ret
  234. where infn (CmmCallee fn cconv) = CmmCallee (inlineExpr u a fn) cconv
  235. infn (CmmPrim p) = CmmPrim p
  236. es' = [ (CmmHinted (inlineExpr u a e) hint) | (CmmHinted e hint) <- es ]
  237. inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
  238. inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
  239. inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
  240. inlineStmt _ _ other_stmt = other_stmt
  241. inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
  242. inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
  243. | u == u' = a
  244. | otherwise = e
  245. inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
  246. | u == u' = CmmMachOp (MO_Add width) [a, CmmLit (CmmInt (fromIntegral off) width)]
  247. | otherwise = e
  248. where
  249. width = typeWidth rep
  250. inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
  251. inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
  252. inlineExpr _ _ other_expr = other_expr
  253. -- -----------------------------------------------------------------------------
  254. -- MachOp constant folder
  255. -- Now, try to constant-fold the MachOps. The arguments have already
  256. -- been optimized and folded.
  257. cmmMachOpFold
  258. :: Platform
  259. -> MachOp -- The operation from an CmmMachOp
  260. -> [CmmExpr] -- The optimized arguments
  261. -> CmmExpr
  262. cmmMachOpFold platform op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM platform op args)
  263. -- Returns Nothing if no changes, useful for Hoopl, also reduces
  264. -- allocation!
  265. cmmMachOpFoldM
  266. :: Platform
  267. -> MachOp
  268. -> [CmmExpr]
  269. -> Maybe CmmExpr
  270. cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
  271. = Just $ case op of
  272. MO_S_Neg _ -> CmmLit (CmmInt (-x) rep)
  273. MO_Not _ -> CmmLit (CmmInt (complement x) rep)
  274. -- these are interesting: we must first narrow to the
  275. -- "from" type, in order to truncate to the correct size.
  276. -- The final narrow/widen to the destination type
  277. -- is implicit in the CmmLit.
  278. MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to)
  279. MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
  280. MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
  281. _ -> panic "cmmMachOpFoldM: unknown unary op"
  282. -- Eliminate conversion NOPs
  283. cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
  284. cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
  285. -- Eliminate nested conversions where possible
  286. cmmMachOpFoldM platform conv_outer [CmmMachOp conv_inner [x]]
  287. | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
  288. Just (_, rep3,signed2) <- isIntConversion conv_outer
  289. = case () of
  290. -- widen then narrow to the same size is a nop
  291. _ | rep1 < rep2 && rep1 == rep3 -> Just x
  292. -- Widen then narrow to different size: collapse to single conversion
  293. -- but remember to use the signedness from the widening, just in case
  294. -- the final conversion is a widen.
  295. | rep1 < rep2 && rep2 > rep3 ->
  296. Just $ cmmMachOpFold platform (intconv signed1 rep1 rep3) [x]
  297. -- Nested widenings: collapse if the signedness is the same
  298. | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
  299. Just $ cmmMachOpFold platform (intconv signed1 rep1 rep3) [x]
  300. -- Nested narrowings: collapse
  301. | rep1 > rep2 && rep2 > rep3 ->
  302. Just $ cmmMachOpFold platform (MO_UU_Conv rep1 rep3) [x]
  303. | otherwise ->
  304. Nothing
  305. where
  306. isIntConversion (MO_UU_Conv rep1 rep2)
  307. = Just (rep1,rep2,False)
  308. isIntConversion (MO_SS_Conv rep1 rep2)
  309. = Just (rep1,rep2,True)
  310. isIntConversion _ = Nothing
  311. intconv True = MO_SS_Conv
  312. intconv False = MO_UU_Conv
  313. -- ToDo: a narrow of a load can be collapsed into a narrow load, right?
  314. -- but what if the architecture only supports word-sized loads, should
  315. -- we do the transformation anyway?
  316. cmmMachOpFoldM _ mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
  317. = case mop of
  318. -- for comparisons: don't forget to narrow the arguments before
  319. -- comparing, since they might be out of range.
  320. MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordWidth)
  321. MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordWidth)
  322. MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordWidth)
  323. MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordWidth)
  324. MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordWidth)
  325. MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordWidth)
  326. MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordWidth)
  327. MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordWidth)
  328. MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordWidth)
  329. MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordWidth)
  330. MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
  331. MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
  332. MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
  333. MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
  334. MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r)
  335. MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
  336. MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
  337. MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r)
  338. MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r)
  339. MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r)
  340. MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
  341. MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
  342. MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
  343. _ -> Nothing
  344. where
  345. x_u = narrowU xrep x
  346. y_u = narrowU xrep y
  347. x_s = narrowS xrep x
  348. y_s = narrowS xrep y
  349. -- When possible, shift the constants to the right-hand side, so that we
  350. -- can match for strength reductions. Note that the code generator will
  351. -- also assume that constants have been shifted to the right when
  352. -- possible.
  353. cmmMachOpFoldM platform op [x@(CmmLit _), y]
  354. | not (isLit y) && isCommutableMachOp op
  355. = Just (cmmMachOpFold platform op [y, x])
  356. -- Turn (a+b)+c into a+(b+c) where possible. Because literals are
  357. -- moved to the right, it is more likely that we will find
  358. -- opportunities for constant folding when the expression is
  359. -- right-associated.
  360. --
  361. -- ToDo: this appears to introduce a quadratic behaviour due to the
  362. -- nested cmmMachOpFold. Can we fix this?
  363. --
  364. -- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
  365. -- is also a lit (otherwise arg1 would be on the right). If we
  366. -- put arg1 on the left of the rearranged expression, we'll get into a
  367. -- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
  368. --
  369. -- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
  370. -- PicBaseReg from the corresponding label (or label difference).
  371. --
  372. cmmMachOpFoldM platform mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
  373. | mop2 `associates_with` mop1
  374. && not (isLit arg1) && not (isPicReg arg1)
  375. = Just (cmmMachOpFold platform mop2 [arg1, cmmMachOpFold platform mop1 [arg2,arg3]])
  376. where
  377. MO_Add{} `associates_with` MO_Sub{} = True
  378. mop1 `associates_with` mop2 =
  379. mop1 == mop2 && isAssociativeMachOp mop1
  380. -- special case: (a - b) + c ==> a + (c - b)
  381. cmmMachOpFoldM platform mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
  382. | not (isLit arg1) && not (isPicReg arg1)
  383. = Just (cmmMachOpFold platform mop1 [arg1, cmmMachOpFold platform mop2 [arg3,arg2]])
  384. -- Make a RegOff if we can
  385. cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
  386. = Just $ CmmRegOff reg (fromIntegral (narrowS rep n))
  387. cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
  388. = Just $ CmmRegOff reg (off + fromIntegral (narrowS rep n))
  389. cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
  390. = Just $ CmmRegOff reg (- fromIntegral (narrowS rep n))
  391. cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
  392. = Just $ CmmRegOff reg (off - fromIntegral (narrowS rep n))
  393. -- Fold label(+/-)offset into a CmmLit where possible
  394. cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
  395. = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
  396. cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
  397. = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
  398. cmmMachOpFoldM _ (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
  399. = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
  400. -- Comparison of literal with widened operand: perform the comparison
  401. -- at the smaller width, as long as the literal is within range.
  402. -- We can't do the reverse trick, when the operand is narrowed:
  403. -- narrowing throws away bits from the operand, there's no way to do
  404. -- the same comparison at the larger size.
  405. cmmMachOpFoldM platform cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
  406. | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
  407. platformArch platform `elem` [ArchX86, ArchX86_64],
  408. -- if the operand is widened:
  409. Just (rep, signed, narrow_fn) <- maybe_conversion conv,
  410. -- and this is a comparison operation:
  411. Just narrow_cmp <- maybe_comparison cmp rep signed,
  412. -- and the literal fits in the smaller size:
  413. i == narrow_fn rep i
  414. -- then we can do the comparison at the smaller size
  415. = Just (cmmMachOpFold platform narrow_cmp [x, CmmLit (CmmInt i rep)])
  416. where
  417. maybe_conversion (MO_UU_Conv from to)
  418. | to > from
  419. = Just (from, False, narrowU)
  420. maybe_conversion (MO_SS_Conv from to)
  421. | to > from
  422. = Just (from, True, narrowS)
  423. -- don't attempt to apply this optimisation when the source
  424. -- is a float; see #1916
  425. maybe_conversion _ = Nothing
  426. -- careful (#2080): if the original comparison was signed, but
  427. -- we were doing an unsigned widen, then we must do an
  428. -- unsigned comparison at the smaller size.
  429. maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep)
  430. maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep)
  431. maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep)
  432. maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep)
  433. maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep)
  434. maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep)
  435. maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep)
  436. maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep)
  437. maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep)
  438. maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep)
  439. maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep)
  440. maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep)
  441. maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
  442. maybe_comparison _ _ _ = Nothing
  443. -- We can often do something with constants of 0 and 1 ...
  444. cmmMachOpFoldM _ mop [x, y@(CmmLit (CmmInt 0 _))]
  445. = case mop of
  446. MO_Add _ -> Just x
  447. MO_Sub _ -> Just x
  448. MO_Mul _ -> Just y
  449. MO_And _ -> Just y
  450. MO_Or _ -> Just x
  451. MO_Xor _ -> Just x
  452. MO_Shl _ -> Just x
  453. MO_S_Shr _ -> Just x
  454. MO_U_Shr _ -> Just x
  455. MO_Ne _ | isComparisonExpr x -> Just x
  456. MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x'
  457. MO_U_Gt _ | isComparisonExpr x -> Just x
  458. MO_S_Gt _ | isComparisonExpr x -> Just x
  459. MO_U_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
  460. MO_S_Lt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
  461. MO_U_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
  462. MO_S_Ge _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
  463. MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
  464. MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x'
  465. _ -> Nothing
  466. cmmMachOpFoldM _ mop [x, (CmmLit (CmmInt 1 rep))]
  467. = case mop of
  468. MO_Mul _ -> Just x
  469. MO_S_Quot _ -> Just x
  470. MO_U_Quot _ -> Just x
  471. MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
  472. MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep)
  473. MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x'
  474. MO_Eq _ | isComparisonExpr x -> Just x
  475. MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
  476. MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x'
  477. MO_U_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
  478. MO_S_Gt _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
  479. MO_U_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
  480. MO_S_Le _ | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
  481. MO_U_Ge _ | isComparisonExpr x -> Just x
  482. MO_S_Ge _ | isComparisonExpr x -> Just x
  483. _ -> Nothing
  484. -- Now look for multiplication/division by powers of 2 (integers).
  485. cmmMachOpFoldM platform mop [x, (CmmLit (CmmInt n _))]
  486. = case mop of
  487. MO_Mul rep
  488. | Just p <- exactLog2 n ->
  489. Just (cmmMachOpFold platform (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
  490. MO_U_Quot rep
  491. | Just p <- exactLog2 n ->
  492. Just (cmmMachOpFold platform (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
  493. MO_S_Quot rep
  494. | Just p <- exactLog2 n,
  495. CmmReg _ <- x -> -- We duplicate x below, hence require
  496. -- it is a reg. FIXME: remove this restriction.
  497. -- shift right is not the same as quot, because it rounds
  498. -- to minus infinity, whereasq quot rounds toward zero.
  499. -- To fix this up, we add one less than the divisor to the
  500. -- dividend if it is a negative number.
  501. --
  502. -- to avoid a test/jump, we use the following sequence:
  503. -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
  504. -- x2 = y & (divisor-1)
  505. -- result = (x+x2) >>= log2(divisor)
  506. -- this could be done a bit more simply using conditional moves,
  507. -- but we're processor independent here.
  508. --
  509. -- we optimise the divide by 2 case slightly, generating
  510. -- x1 = x >> word_size-1 (unsigned)
  511. -- return = (x + x1) >>= log2(divisor)
  512. let
  513. bits = fromIntegral (widthInBits rep) - 1
  514. shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
  515. x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
  516. x2 = if p == 1 then x1 else
  517. CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
  518. x3 = CmmMachOp (MO_Add rep) [x, x2]
  519. in
  520. Just (cmmMachOpFold platform (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)])
  521. _ -> Nothing
  522. -- Anything else is just too hard.
  523. cmmMachOpFoldM _ _ _ = Nothing
  524. -- -----------------------------------------------------------------------------
  525. -- exactLog2
  526. -- This algorithm for determining the $\log_2$ of exact powers of 2 comes
  527. -- from GCC. It requires bit manipulation primitives, and we use GHC
  528. -- extensions. Tough.
  529. --
  530. -- Used to be in MachInstrs --SDM.
  531. -- ToDo: remove use of unboxery --SDM.
  532. -- Unboxery removed in favor of FastInt; but is the function supposed to fail
  533. -- on inputs >= 2147483648, or was that just an implementation artifact?
  534. -- And is this speed-critical, or can we just use Integer operations
  535. -- (including Data.Bits)?
  536. -- --Isaac Dupree
  537. exactLog2 :: Integer -> Maybe Integer
  538. exactLog2 x_
  539. = if (x_ <= 0 || x_ >= 2147483648) then
  540. Nothing
  541. else
  542. case iUnbox (fromInteger x_) of { x ->
  543. if (x `bitAndFastInt` negateFastInt x) /=# x then
  544. Nothing
  545. else
  546. Just (toInteger (iBox (pow2 x)))
  547. }
  548. where
  549. pow2 x | x ==# _ILIT(1) = _ILIT(0)
  550. | otherwise = _ILIT(1) +# pow2 (x `shiftR_FastInt` _ILIT(1))
  551. -- -----------------------------------------------------------------------------
  552. -- Loopify for C
  553. {-
  554. This is a simple pass that replaces tail-recursive functions like this:
  555. fac() {
  556. ...
  557. jump fac();
  558. }
  559. with this:
  560. fac() {
  561. L:
  562. ...
  563. goto L;
  564. }
  565. the latter generates better C code, because the C compiler treats it
  566. like a loop, and brings full loop optimisation to bear.
  567. In my measurements this makes little or no difference to anything
  568. except factorial, but what the hell.
  569. -}
  570. cmmLoopifyForC :: RawCmmDecl -> RawCmmDecl
  571. cmmLoopifyForC p@(CmmProc Nothing _ _) = p -- only if there's an info table, ignore case alts
  572. cmmLoopifyForC (CmmProc (Just info@(Statics info_lbl _)) entry_lbl
  573. (ListGraph blocks@(BasicBlock top_id _ : _))) =
  574. -- pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $
  575. CmmProc (Just info) entry_lbl (ListGraph blocks')
  576. where blocks' = [ BasicBlock id (map do_stmt stmts)
  577. | BasicBlock id stmts <- blocks ]
  578. do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl
  579. = CmmBranch top_id
  580. do_stmt stmt = stmt
  581. jump_lbl | tablesNextToCode = info_lbl
  582. | otherwise = entry_lbl
  583. cmmLoopifyForC top = top
  584. -- -----------------------------------------------------------------------------
  585. -- Utils
  586. isLit :: CmmExpr -> Bool
  587. isLit (CmmLit _) = True
  588. isLit _ = False
  589. isComparisonExpr :: CmmExpr -> Bool
  590. isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
  591. isComparisonExpr _ = False
  592. isPicReg :: CmmExpr -> Bool
  593. isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
  594. isPicReg _ = False