PageRenderTime 58ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/compiler/simplCore/OccurAnal.hs

http://github.com/ghc/ghc
Haskell | 1923 lines | 894 code | 254 blank | 775 comment | 50 complexity | 6737c031cc4e467f62f119fa7b731e63 MD5 | raw file
Possible License(s): MIT, BSD-3-Clause, GPL-3.0
  1. {-
  2. (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
  3. ************************************************************************
  4. * *
  5. \section[OccurAnal]{Occurrence analysis pass}
  6. * *
  7. ************************************************************************
  8. The occurrence analyser re-typechecks a core expression, returning a new
  9. core expression with (hopefully) improved usage information.
  10. -}
  11. {-# LANGUAGE CPP, BangPatterns #-}
  12. module OccurAnal (
  13. occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
  14. ) where
  15. #include "HsVersions.h"
  16. import CoreSyn
  17. import CoreFVs
  18. import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp,
  19. stripTicksTopE, mkTicks )
  20. import Id
  21. import Name( localiseName )
  22. import BasicTypes
  23. import Module( Module )
  24. import Coercion
  25. import VarSet
  26. import VarEnv
  27. import Var
  28. import Demand ( argOneShots, argsOneShots )
  29. import Maybes ( orElse )
  30. import Digraph ( SCC(..), stronglyConnCompFromEdgedVerticesUniqR )
  31. import Unique
  32. import UniqFM
  33. import Util
  34. import Outputable
  35. import Data.List
  36. import Control.Arrow ( second )
  37. {-
  38. ************************************************************************
  39. * *
  40. \subsection[OccurAnal-main]{Counting occurrences: main function}
  41. * *
  42. ************************************************************************
  43. Here's the externally-callable interface:
  44. -}
  45. occurAnalysePgm :: Module -- Used only in debug output
  46. -> (Activation -> Bool)
  47. -> [CoreRule] -> [CoreVect] -> VarSet
  48. -> CoreProgram -> CoreProgram
  49. occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
  50. | isEmptyVarEnv final_usage
  51. = occ_anald_binds
  52. | otherwise -- See Note [Glomming]
  53. = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
  54. 2 (ppr final_usage ) )
  55. occ_anald_glommed_binds
  56. where
  57. init_env = initOccEnv active_rule
  58. (final_usage, occ_anald_binds) = go init_env binds
  59. (_, occ_anald_glommed_binds) = occAnalRecBind init_env imp_rule_edges
  60. (flattenBinds occ_anald_binds)
  61. initial_uds
  62. -- It's crucial to re-analyse the glommed-together bindings
  63. -- so that we establish the right loop breakers. Otherwise
  64. -- we can easily create an infinite loop (Trac #9583 is an example)
  65. initial_uds = addIdOccs emptyDetails
  66. (rulesFreeVars imp_rules `unionVarSet`
  67. vectsFreeVars vects `unionVarSet`
  68. vectVars)
  69. -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
  70. -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
  71. -- reflected in 'vectors' — see Note [Vectorisation declarations and occurrences].)
  72. -- Note [Preventing loops due to imported functions rules]
  73. imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
  74. [ mapVarEnv (const maps_to) (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
  75. | imp_rule <- imp_rules
  76. , not (isBuiltinRule imp_rule) -- See Note [Plugin rules]
  77. , let maps_to = exprFreeIds (ru_rhs imp_rule)
  78. `delVarSetList` ru_bndrs imp_rule
  79. , arg <- ru_args imp_rule ]
  80. go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
  81. go _ []
  82. = (initial_uds, [])
  83. go env (bind:binds)
  84. = (final_usage, bind' ++ binds')
  85. where
  86. (bs_usage, binds') = go env binds
  87. (final_usage, bind') = occAnalBind env imp_rule_edges bind bs_usage
  88. occurAnalyseExpr :: CoreExpr -> CoreExpr
  89. -- Do occurrence analysis, and discard occurrence info returned
  90. occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
  91. occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
  92. occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
  93. occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
  94. occurAnalyseExpr' enable_binder_swap expr
  95. = snd (occAnal env expr)
  96. where
  97. env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
  98. -- To be conservative, we say that all inlines and rules are active
  99. all_active_rules = \_ -> True
  100. {- Note [Plugin rules]
  101. ~~~~~~~~~~~~~~~~~~~~~~
  102. Conal Elliott (Trac #11651) built a GHC plugin that added some
  103. BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
  104. do some domain-specific transformations that could not be expressed
  105. with an ordinary pattern-matching CoreRule. But then we can't extract
  106. the dependencies (in imp_rule_edges) from ru_rhs etc, because a
  107. BuiltinRule doesn't have any of that stuff.
  108. So we simply assume that BuiltinRules have no dependencies, and filter
  109. them out from the imp_rule_edges comprehension.
  110. -}
  111. {-
  112. ************************************************************************
  113. * *
  114. \subsection[OccurAnal-main]{Counting occurrences: main function}
  115. * *
  116. ************************************************************************
  117. Bindings
  118. ~~~~~~~~
  119. -}
  120. type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs
  121. noImpRuleEdges :: ImpRuleEdges
  122. noImpRuleEdges = emptyVarEnv
  123. occAnalBind :: OccEnv -- The incoming OccEnv
  124. -> ImpRuleEdges
  125. -> CoreBind
  126. -> UsageDetails -- Usage details of scope
  127. -> (UsageDetails, -- Of the whole let(rec)
  128. [CoreBind])
  129. occAnalBind env top_env (NonRec binder rhs) body_usage
  130. = occAnalNonRecBind env top_env binder rhs body_usage
  131. occAnalBind env top_env (Rec pairs) body_usage
  132. = occAnalRecBind env top_env pairs body_usage
  133. -----------------
  134. occAnalNonRecBind :: OccEnv -> ImpRuleEdges -> Var -> CoreExpr
  135. -> UsageDetails -> (UsageDetails, [CoreBind])
  136. occAnalNonRecBind env imp_rule_edges binder rhs body_usage
  137. | isTyVar binder -- A type let; we don't gather usage info
  138. = (body_usage, [NonRec binder rhs])
  139. | not (binder `usedIn` body_usage) -- It's not mentioned
  140. = (body_usage, [])
  141. | otherwise -- It's mentioned in the body
  142. = (body_usage' +++ rhs_usage4, [NonRec tagged_binder rhs'])
  143. where
  144. (body_usage', tagged_binder) = tagBinder body_usage binder
  145. (rhs_usage1, rhs') = occAnalNonRecRhs env tagged_binder rhs
  146. rhs_usage2 = addIdOccs rhs_usage1 (idUnfoldingVars binder)
  147. rhs_usage3 = addIdOccs rhs_usage2 (idRuleVars binder)
  148. -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
  149. rhs_usage4 = maybe rhs_usage3 (addIdOccs rhs_usage3) $
  150. lookupVarEnv imp_rule_edges binder
  151. -- See Note [Preventing loops due to imported functions rules]
  152. -----------------
  153. occAnalRecBind :: OccEnv -> ImpRuleEdges -> [(Var,CoreExpr)]
  154. -> UsageDetails -> (UsageDetails, [CoreBind])
  155. occAnalRecBind env imp_rule_edges pairs body_usage
  156. = foldr occAnalRec (body_usage, []) sccs
  157. -- For a recursive group, we
  158. -- * occ-analyse all the RHSs
  159. -- * compute strongly-connected components
  160. -- * feed those components to occAnalRec
  161. where
  162. bndr_set = mkVarSet (map fst pairs)
  163. sccs :: [SCC (Node Details)]
  164. sccs = {-# SCC "occAnalBind.scc" #-}
  165. stronglyConnCompFromEdgedVerticesUniqR nodes
  166. nodes :: [Node Details]
  167. nodes = {-# SCC "occAnalBind.assoc" #-}
  168. map (makeNode env imp_rule_edges bndr_set) pairs
  169. {-
  170. Note [Dead code]
  171. ~~~~~~~~~~~~~~~~
  172. Dropping dead code for a cyclic Strongly Connected Component is done
  173. in a very simple way:
  174. the entire SCC is dropped if none of its binders are mentioned
  175. in the body; otherwise the whole thing is kept.
  176. The key observation is that dead code elimination happens after
  177. dependency analysis: so 'occAnalBind' processes SCCs instead of the
  178. original term's binding groups.
  179. Thus 'occAnalBind' does indeed drop 'f' in an example like
  180. letrec f = ...g...
  181. g = ...(...g...)...
  182. in
  183. ...g...
  184. when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
  185. 'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
  186. 'AcyclicSCC f', where 'body_usage' won't contain 'f'.
  187. ------------------------------------------------------------
  188. Note [Forming Rec groups]
  189. ~~~~~~~~~~~~~~~~~~~~~~~~~
  190. We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
  191. and "g uses f", no matter how indirectly. We do a SCC analysis
  192. with an edge f -> g if "f uses g".
  193. More precisely, "f uses g" iff g should be in scope wherever f is.
  194. That is, g is free in:
  195. a) the rhs 'ef'
  196. b) or the RHS of a rule for f (Note [Rules are extra RHSs])
  197. c) or the LHS or a rule for f (Note [Rule dependency info])
  198. These conditions apply regardless of the activation of the RULE (eg it might be
  199. inactive in this phase but become active later). Once a Rec is broken up
  200. it can never be put back together, so we must be conservative.
  201. The principle is that, regardless of rule firings, every variable is
  202. always in scope.
  203. * Note [Rules are extra RHSs]
  204. ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  205. A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
  206. keeps the specialised "children" alive. If the parent dies
  207. (because it isn't referenced any more), then the children will die
  208. too (unless they are already referenced directly).
  209. To that end, we build a Rec group for each cyclic strongly
  210. connected component,
  211. *treating f's rules as extra RHSs for 'f'*.
  212. More concretely, the SCC analysis runs on a graph with an edge
  213. from f -> g iff g is mentioned in
  214. (a) f's rhs
  215. (b) f's RULES
  216. These are rec_edges.
  217. Under (b) we include variables free in *either* LHS *or* RHS of
  218. the rule. The former might seems silly, but see Note [Rule
  219. dependency info]. So in Example [eftInt], eftInt and eftIntFB
  220. will be put in the same Rec, even though their 'main' RHSs are
  221. both non-recursive.
  222. * Note [Rule dependency info]
  223. ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  224. The VarSet in a RuleInfo is used for dependency analysis in the
  225. occurrence analyser. We must track free vars in *both* lhs and rhs.
  226. Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
  227. Why both? Consider
  228. x = y
  229. RULE f x = v+4
  230. Then if we substitute y for x, we'd better do so in the
  231. rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
  232. as well as 'v'
  233. * Note [Rules are visible in their own rec group]
  234. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  235. We want the rules for 'f' to be visible in f's right-hand side.
  236. And we'd like them to be visible in other functions in f's Rec
  237. group. E.g. in Note [Specialisation rules] we want f' rule
  238. to be visible in both f's RHS, and fs's RHS.
  239. This means that we must simplify the RULEs first, before looking
  240. at any of the definitions. This is done by Simplify.simplRecBind,
  241. when it calls addLetIdInfo.
  242. ------------------------------------------------------------
  243. Note [Choosing loop breakers]
  244. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  245. Loop breaking is surprisingly subtle. First read the section 4 of
  246. "Secrets of the GHC inliner". This describes our basic plan.
  247. We avoid infinite inlinings by choosing loop breakers, and
  248. ensuring that a loop breaker cuts each loop.
  249. Fundamentally, we do SCC analysis on a graph. For each recursive
  250. group we choose a loop breaker, delete all edges to that node,
  251. re-analyse the SCC, and iterate.
  252. But what is the graph? NOT the same graph as was used for Note
  253. [Forming Rec groups]! In particular, a RULE is like an equation for
  254. 'f' that is *always* inlined if it is applicable. We do *not* disable
  255. rules for loop-breakers. It's up to whoever makes the rules to make
  256. sure that the rules themselves always terminate. See Note [Rules for
  257. recursive functions] in Simplify.hs
  258. Hence, if
  259. f's RHS (or its INLINE template if it has one) mentions g, and
  260. g has a RULE that mentions h, and
  261. h has a RULE that mentions f
  262. then we *must* choose f to be a loop breaker. Example: see Note
  263. [Specialisation rules].
  264. In general, take the free variables of f's RHS, and augment it with
  265. all the variables reachable by RULES from those starting points. That
  266. is the whole reason for computing rule_fv_env in occAnalBind. (Of
  267. course we only consider free vars that are also binders in this Rec
  268. group.) See also Note [Finding rule RHS free vars]
  269. Note that when we compute this rule_fv_env, we only consider variables
  270. free in the *RHS* of the rule, in contrast to the way we build the
  271. Rec group in the first place (Note [Rule dependency info])
  272. Note that if 'g' has RHS that mentions 'w', we should add w to
  273. g's loop-breaker edges. More concretely there is an edge from f -> g
  274. iff
  275. (a) g is mentioned in f's RHS `xor` f's INLINE rhs
  276. (see Note [Inline rules])
  277. (b) or h is mentioned in f's RHS, and
  278. g appears in the RHS of an active RULE of h
  279. or a transitive sequence of active rules starting with h
  280. Why "active rules"? See Note [Finding rule RHS free vars]
  281. Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
  282. chosen as a loop breaker, because their RHSs don't mention each other.
  283. And indeed both can be inlined safely.
  284. Note again that the edges of the graph we use for computing loop breakers
  285. are not the same as the edges we use for computing the Rec blocks.
  286. That's why we compute
  287. - rec_edges for the Rec block analysis
  288. - loop_breaker_edges for the loop breaker analysis
  289. * Note [Finding rule RHS free vars]
  290. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  291. Consider this real example from Data Parallel Haskell
  292. tagZero :: Array Int -> Array Tag
  293. {-# INLINE [1] tagZeroes #-}
  294. tagZero xs = pmap (\x -> fromBool (x==0)) xs
  295. {-# RULES "tagZero" [~1] forall xs n.
  296. pmap fromBool <blah blah> = tagZero xs #-}
  297. So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
  298. However, tagZero can only be inlined in phase 1 and later, while
  299. the RULE is only active *before* phase 1. So there's no problem.
  300. To make this work, we look for the RHS free vars only for
  301. *active* rules. That's the reason for the occ_rule_act field
  302. of the OccEnv.
  303. * Note [Weak loop breakers]
  304. ~~~~~~~~~~~~~~~~~~~~~~~~~
  305. There is a last nasty wrinkle. Suppose we have
  306. Rec { f = f_rhs
  307. RULE f [] = g
  308. h = h_rhs
  309. g = h
  310. ...more...
  311. }
  312. Remember that we simplify the RULES before any RHS (see Note
  313. [Rules are visible in their own rec group] above).
  314. So we must *not* postInlineUnconditionally 'g', even though
  315. its RHS turns out to be trivial. (I'm assuming that 'g' is
  316. not choosen as a loop breaker.) Why not? Because then we
  317. drop the binding for 'g', which leaves it out of scope in the
  318. RULE!
  319. Here's a somewhat different example of the same thing
  320. Rec { g = h
  321. ; h = ...f...
  322. ; f = f_rhs
  323. RULE f [] = g }
  324. Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
  325. g, because the RULE for f is active throughout. So the RHS of h
  326. might rewrite to h = ...g...
  327. So g must remain in scope in the output program!
  328. We "solve" this by:
  329. Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
  330. iff g is a "missing free variable" of the Rec group
  331. A "missing free variable" x is one that is mentioned in an RHS or
  332. INLINE or RULE of a binding in the Rec group, but where the
  333. dependency on x may not show up in the loop_breaker_edges (see
  334. note [Choosing loop breakers} above).
  335. A normal "strong" loop breaker has IAmLoopBreaker False. So
  336. Inline postInlineUnconditionally
  337. strong IAmLoopBreaker False no no
  338. weak IAmLoopBreaker True yes no
  339. other yes yes
  340. The **sole** reason for this kind of loop breaker is so that
  341. postInlineUnconditionally does not fire. Ugh. (Typically it'll
  342. inline via the usual callSiteInline stuff, so it'll be dead in the
  343. next pass, so the main Ugh is the tiresome complication.)
  344. Note [Rules for imported functions]
  345. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  346. Consider this
  347. f = /\a. B.g a
  348. RULE B.g Int = 1 + f Int
  349. Note that
  350. * The RULE is for an imported function.
  351. * f is non-recursive
  352. Now we
  353. can get
  354. f Int --> B.g Int Inlining f
  355. --> 1 + f Int Firing RULE
  356. and so the simplifier goes into an infinite loop. This
  357. would not happen if the RULE was for a local function,
  358. because we keep track of dependencies through rules. But
  359. that is pretty much impossible to do for imported Ids. Suppose
  360. f's definition had been
  361. f = /\a. C.h a
  362. where (by some long and devious process), C.h eventually inlines to
  363. B.g. We could only spot such loops by exhaustively following
  364. unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
  365. f.
  366. Note that RULES for imported functions are important in practice; they
  367. occur a lot in the libraries.
  368. We regard this potential infinite loop as a *programmer* error.
  369. It's up the programmer not to write silly rules like
  370. RULE f x = f x
  371. and the example above is just a more complicated version.
  372. Note [Preventing loops due to imported functions rules]
  373. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  374. Consider:
  375. import GHC.Base (foldr)
  376. {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
  377. filter p xs = build (\c n -> foldr (filterFB c p) n xs)
  378. filterFB c p = ...
  379. f = filter p xs
  380. Note that filter is not a loop-breaker, so what happens is:
  381. f = filter p xs
  382. = {inline} build (\c n -> foldr (filterFB c p) n xs)
  383. = {inline} foldr (filterFB (:) p) [] xs
  384. = {RULE} filter p xs
  385. We are in an infinite loop.
  386. A more elaborate example (that I actually saw in practice when I went to
  387. mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
  388. {-# LANGUAGE RankNTypes #-}
  389. module GHCList where
  390. import Prelude hiding (filter)
  391. import GHC.Base (build)
  392. {-# INLINABLE filter #-}
  393. filter :: (a -> Bool) -> [a] -> [a]
  394. filter p [] = []
  395. filter p (x:xs) = if p x then x : filter p xs else filter p xs
  396. {-# NOINLINE [0] filterFB #-}
  397. filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
  398. filterFB c p x r | p x = x `c` r
  399. | otherwise = r
  400. {-# RULES
  401. "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr
  402. (filterFB c p) n xs)
  403. "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p
  404. #-}
  405. Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
  406. are not), the unfolding given to "filter" in the interface file will be:
  407. filter p [] = []
  408. filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
  409. else build (\c n -> foldr (filterFB c p) n xs
  410. Note that because this unfolding does not mention "filter", filter is not
  411. marked as a strong loop breaker. Therefore at a use site in another module:
  412. filter p xs
  413. = {inline}
  414. case xs of [] -> []
  415. (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
  416. else build (\c n -> foldr (filterFB c p) n xs)
  417. build (\c n -> foldr (filterFB c p) n xs)
  418. = {inline} foldr (filterFB (:) p) [] xs
  419. = {RULE} filter p xs
  420. And we are in an infinite loop again, except that this time the loop is producing an
  421. infinitely large *term* (an unrolling of filter) and so the simplifier finally
  422. dies with "ticks exhausted"
  423. Because of this problem, we make a small change in the occurrence analyser
  424. designed to mark functions like "filter" as strong loop breakers on the basis that:
  425. 1. The RHS of filter mentions the local function "filterFB"
  426. 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
  427. So for each RULE for an *imported* function we are going to add
  428. dependency edges between the *local* FVS of the rule LHS and the
  429. *local* FVS of the rule RHS. We don't do anything special for RULES on
  430. local functions because the standard occurrence analysis stuff is
  431. pretty good at getting loop-breakerness correct there.
  432. It is important to note that even with this extra hack we aren't always going to get
  433. things right. For example, it might be that the rule LHS mentions an imported Id,
  434. and another module has a RULE that can rewrite that imported Id to one of our local
  435. Ids.
  436. Note [Specialising imported functions]
  437. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  438. BUT for *automatically-generated* rules, the programmer can't be
  439. responsible for the "programmer error" in Note [Rules for imported
  440. functions]. In paricular, consider specialising a recursive function
  441. defined in another module. If we specialise a recursive function B.g,
  442. we get
  443. g_spec = .....(B.g Int).....
  444. RULE B.g Int = g_spec
  445. Here, g_spec doesn't look recursive, but when the rule fires, it
  446. becomes so. And if B.g was mutually recursive, the loop might
  447. not be as obvious as it is here.
  448. To avoid this,
  449. * When specialising a function that is a loop breaker,
  450. give a NOINLINE pragma to the specialised function
  451. Note [Glomming]
  452. ~~~~~~~~~~~~~~~
  453. RULES for imported Ids can make something at the top refer to something at the bottom:
  454. f = \x -> B.g (q x)
  455. h = \y -> 3
  456. RULE: B.g (q x) = h x
  457. Applying this rule makes f refer to h, although f doesn't appear to
  458. depend on h. (And, as in Note [Rules for imported functions], the
  459. dependency might be more indirect. For example, f might mention C.t
  460. rather than B.g, where C.t eventually inlines to B.g.)
  461. NOTICE that this cannot happen for rules whose head is a
  462. locally-defined function, because we accurately track dependencies
  463. through RULES. It only happens for rules whose head is an imported
  464. function (B.g in the example above).
  465. Solution:
  466. - When simplifying, bring all top level identifiers into
  467. scope at the start, ignoring the Rec/NonRec structure, so
  468. that when 'h' pops up in f's rhs, we find it in the in-scope set
  469. (as the simplifier generally expects). This happens in simplTopBinds.
  470. - In the occurrence analyser, if there are any out-of-scope
  471. occurrences that pop out of the top, which will happen after
  472. firing the rule: f = \x -> h x
  473. h = \y -> 3
  474. then just glom all the bindings into a single Rec, so that
  475. the *next* iteration of the occurrence analyser will sort
  476. them all out. This part happens in occurAnalysePgm.
  477. ------------------------------------------------------------
  478. Note [Inline rules]
  479. ~~~~~~~~~~~~~~~~~~~
  480. None of the above stuff about RULES applies to Inline Rules,
  481. stored in a CoreUnfolding. The unfolding, if any, is simplified
  482. at the same time as the regular RHS of the function (ie *not* like
  483. Note [Rules are visible in their own rec group]), so it should be
  484. treated *exactly* like an extra RHS.
  485. Or, rather, when computing loop-breaker edges,
  486. * If f has an INLINE pragma, and it is active, we treat the
  487. INLINE rhs as f's rhs
  488. * If it's inactive, we treat f as having no rhs
  489. * If it has no INLINE pragma, we look at f's actual rhs
  490. There is a danger that we'll be sub-optimal if we see this
  491. f = ...f...
  492. [INLINE f = ..no f...]
  493. where f is recursive, but the INLINE is not. This can just about
  494. happen with a sufficiently odd set of rules; eg
  495. foo :: Int -> Int
  496. {-# INLINE [1] foo #-}
  497. foo x = x+1
  498. bar :: Int -> Int
  499. {-# INLINE [1] bar #-}
  500. bar x = foo x + 1
  501. {-# RULES "foo" [~1] forall x. foo x = bar x #-}
  502. Here the RULE makes bar recursive; but it's INLINE pragma remains
  503. non-recursive. It's tempting to then say that 'bar' should not be
  504. a loop breaker, but an attempt to do so goes wrong in two ways:
  505. a) We may get
  506. $df = ...$cfoo...
  507. $cfoo = ...$df....
  508. [INLINE $cfoo = ...no-$df...]
  509. But we want $cfoo to depend on $df explicitly so that we
  510. put the bindings in the right order to inline $df in $cfoo
  511. and perhaps break the loop altogether. (Maybe this
  512. b)
  513. Example [eftInt]
  514. ~~~~~~~~~~~~~~~
  515. Example (from GHC.Enum):
  516. eftInt :: Int# -> Int# -> [Int]
  517. eftInt x y = ...(non-recursive)...
  518. {-# INLINE [0] eftIntFB #-}
  519. eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
  520. eftIntFB c n x y = ...(non-recursive)...
  521. {-# RULES
  522. "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
  523. "eftIntList" [1] eftIntFB (:) [] = eftInt
  524. #-}
  525. Note [Specialisation rules]
  526. ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  527. Consider this group, which is typical of what SpecConstr builds:
  528. fs a = ....f (C a)....
  529. f x = ....f (C a)....
  530. {-# RULE f (C a) = fs a #-}
  531. So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
  532. But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
  533. - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
  534. - fs is inlined (say it's small)
  535. - now there's another opportunity to apply the RULE
  536. This showed up when compiling Control.Concurrent.Chan.getChanContents.
  537. -}
  538. type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
  539. -- which is gotten from the Id.
  540. data Details
  541. = ND { nd_bndr :: Id -- Binder
  542. , nd_rhs :: CoreExpr -- RHS, already occ-analysed
  543. , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings
  544. -- ignoring phase (ie assuming all are active)
  545. -- See Note [Forming Rec groups]
  546. , nd_inl :: IdSet -- Free variables of
  547. -- the stable unfolding (if present and active)
  548. -- or the RHS (if not)
  549. -- but excluding any RULES
  550. -- This is the IdSet that may be used if the Id is inlined
  551. , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds
  552. -- but are *not* in nd_inl. These are the ones whose
  553. -- dependencies might not be respected by loop_breaker_edges
  554. -- See Note [Weak loop breakers]
  555. , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES
  556. }
  557. instance Outputable Details where
  558. ppr nd = text "ND" <> braces
  559. (sep [ text "bndr =" <+> ppr (nd_bndr nd)
  560. , text "uds =" <+> ppr (nd_uds nd)
  561. , text "inl =" <+> ppr (nd_inl nd)
  562. , text "weak =" <+> ppr (nd_weak nd)
  563. , text "rule =" <+> ppr (nd_active_rule_fvs nd)
  564. ])
  565. makeNode :: OccEnv -> ImpRuleEdges -> VarSet -> (Var, CoreExpr) -> Node Details
  566. makeNode env imp_rule_edges bndr_set (bndr, rhs)
  567. = (details, varUnique bndr, nonDetKeysUFM node_fvs)
  568. -- It's OK to use nonDetKeysUFM here as stronglyConnCompFromEdgedVerticesR
  569. -- is still deterministic with edges in nondeterministic order as
  570. -- explained in Note [Deterministic SCC] in Digraph.
  571. where
  572. details = ND { nd_bndr = bndr
  573. , nd_rhs = rhs'
  574. , nd_uds = rhs_usage3
  575. , nd_weak = node_fvs `minusVarSet` inl_fvs
  576. , nd_inl = inl_fvs
  577. , nd_active_rule_fvs = active_rule_fvs }
  578. -- Constructing the edges for the main Rec computation
  579. -- See Note [Forming Rec groups]
  580. (rhs_usage1, rhs') = occAnalRecRhs env rhs
  581. rhs_usage2 = addIdOccs rhs_usage1 all_rule_fvs -- Note [Rules are extra RHSs]
  582. -- Note [Rule dependency info]
  583. rhs_usage3 = case mb_unf_fvs of
  584. Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs
  585. Nothing -> rhs_usage2
  586. node_fvs = udFreeVars bndr_set rhs_usage3
  587. -- Finding the free variables of the rules
  588. is_active = occ_rule_act env :: Activation -> Bool
  589. rules = filterOut isBuiltinRule (idCoreRules bndr)
  590. rules_w_fvs :: [(Activation, VarSet)] -- Find the RHS fvs
  591. rules_w_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) (lookupVarEnv imp_rule_edges bndr)
  592. -- See Note [Preventing loops due to imported functions rules]
  593. [ (ru_act rule, fvs)
  594. | rule <- rules
  595. , let fvs = exprFreeVars (ru_rhs rule)
  596. `delVarSetList` ru_bndrs rule
  597. , not (isEmptyVarSet fvs) ]
  598. all_rule_fvs = rule_lhs_fvs `unionVarSet` rule_rhs_fvs
  599. rule_rhs_fvs = mapUnionVarSet snd rules_w_fvs
  600. rule_lhs_fvs = mapUnionVarSet (\ru -> exprsFreeVars (ru_args ru)
  601. `delVarSetList` ru_bndrs ru) rules
  602. active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_fvs, is_active a]
  603. -- Finding the free variables of the INLINE pragma (if any)
  604. unf = realIdUnfolding bndr -- Ignore any current loop-breaker flag
  605. mb_unf_fvs = stableUnfoldingVars unf
  606. -- Find the "nd_inl" free vars; for the loop-breaker phase
  607. inl_fvs = case mb_unf_fvs of
  608. Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
  609. Just unf_fvs -> unf_fvs
  610. -- We could check for an *active* INLINE (returning
  611. -- emptyVarSet for an inactive one), but is_active
  612. -- isn't the right thing (it tells about
  613. -- RULE activation), so we'd need more plumbing
  614. -----------------------------
  615. occAnalRec :: SCC (Node Details)
  616. -> (UsageDetails, [CoreBind])
  617. -> (UsageDetails, [CoreBind])
  618. -- The NonRec case is just like a Let (NonRec ...) above
  619. occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}, _, _))
  620. (body_uds, binds)
  621. | not (bndr `usedIn` body_uds)
  622. = (body_uds, binds) -- See Note [Dead code]
  623. | otherwise -- It's mentioned in the body
  624. = (body_uds' +++ rhs_uds,
  625. NonRec tagged_bndr rhs : binds)
  626. where
  627. (body_uds', tagged_bndr) = tagBinder body_uds bndr
  628. -- The Rec case is the interesting one
  629. -- See Note [Loop breaking]
  630. occAnalRec (CyclicSCC nodes) (body_uds, binds)
  631. | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
  632. = (body_uds, binds) -- See Note [Dead code]
  633. | otherwise -- At this point we always build a single Rec
  634. = -- pprTrace "occAnalRec" (vcat
  635. -- [ text "tagged nodes" <+> ppr tagged_nodes
  636. -- , text "lb edges" <+> ppr loop_breaker_edges])
  637. (final_uds, Rec pairs : binds)
  638. where
  639. bndrs = [b | (ND { nd_bndr = b }, _, _) <- nodes]
  640. bndr_set = mkVarSet bndrs
  641. ----------------------------
  642. -- Tag the binders with their occurrence info
  643. tagged_nodes = map tag_node nodes
  644. total_uds = foldl add_uds body_uds nodes
  645. final_uds = total_uds `minusVarEnv` bndr_set
  646. add_uds usage_so_far (nd, _, _) = usage_so_far +++ nd_uds nd
  647. tag_node :: Node Details -> Node Details
  648. tag_node (details@ND { nd_bndr = bndr }, k, ks)
  649. | let bndr1 = setBinderOcc total_uds bndr
  650. = (details { nd_bndr = bndr1 }, k, ks)
  651. ---------------------------
  652. -- Now reconstruct the cycle
  653. pairs :: [(Id,CoreExpr)]
  654. pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs tagged_nodes []
  655. | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_edges []
  656. -- If weak_fvs is empty, the loop_breaker_edges will include all
  657. -- the edges in tagged_nodes, so there isn't any point in doing
  658. -- a fresh SCC computation that will yield a single CyclicSCC result.
  659. weak_fvs :: VarSet
  660. weak_fvs = mapUnionVarSet (nd_weak . fstOf3) nodes
  661. -- See Note [Choosing loop breakers] for loop_breaker_edges
  662. loop_breaker_edges = map mk_node tagged_nodes
  663. mk_node (details@(ND { nd_inl = inl_fvs }), k, _)
  664. = (details, k, nonDetKeysUFM (extendFvs_ rule_fv_env inl_fvs))
  665. -- It's OK to use nonDetKeysUFM here as
  666. -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
  667. -- in nondeterministic order as explained in
  668. -- Note [Deterministic SCC] in Digraph.
  669. ------------------------------------
  670. rule_fv_env :: IdEnv IdSet
  671. -- Maps a variable f to the variables from this group
  672. -- mentioned in RHS of active rules for f
  673. -- Domain is *subset* of bound vars (others have no rule fvs)
  674. rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
  675. init_rule_fvs -- See Note [Finding rule RHS free vars]
  676. = [ (b, trimmed_rule_fvs)
  677. | (ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs },_,_) <- nodes
  678. , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
  679. , not (isEmptyVarSet trimmed_rule_fvs)]
  680. {-
  681. @loopBreakSCC@ is applied to the list of (binder,rhs) pairs for a cyclic
  682. strongly connected component (there's guaranteed to be a cycle). It returns the
  683. same pairs, but
  684. a) in a better order,
  685. b) with some of the Ids having a IAmALoopBreaker pragma
  686. The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means
  687. that the simplifier can guarantee not to loop provided it never records an inlining
  688. for these no-inline guys.
  689. Furthermore, the order of the binds is such that if we neglect dependencies
  690. on the no-inline Ids then the binds are topologically sorted. This means
  691. that the simplifier will generally do a good job if it works from top bottom,
  692. recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
  693. -}
  694. type Binding = (Id,CoreExpr)
  695. mk_loop_breaker :: Node Details -> Binding
  696. mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
  697. = (setIdOccInfo bndr strongLoopBreaker, rhs)
  698. mk_non_loop_breaker :: VarSet -> Node Details -> Binding
  699. -- See Note [Weak loop breakers]
  700. mk_non_loop_breaker used_in_rules (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
  701. | bndr `elemVarSet` used_in_rules = (setIdOccInfo bndr weakLoopBreaker, rhs)
  702. | otherwise = (bndr, rhs)
  703. udFreeVars :: VarSet -> UsageDetails -> VarSet
  704. -- Find the subset of bndrs that are mentioned in uds
  705. udFreeVars bndrs uds = intersectUFM_C (\b _ -> b) bndrs uds
  706. loopBreakNodes :: Int
  707. -> VarSet -- All binders
  708. -> VarSet -- Binders whose dependencies may be "missing"
  709. -- See Note [Weak loop breakers]
  710. -> [Node Details]
  711. -> [Binding] -- Append these to the end
  712. -> [Binding]
  713. -- Return the bindings sorted into a plausible order, and marked with loop breakers.
  714. loopBreakNodes depth bndr_set weak_fvs nodes binds
  715. = go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
  716. where
  717. go [] binds = binds
  718. go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
  719. loop_break_scc scc binds
  720. = case scc of
  721. AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds
  722. CyclicSCC [node] -> mk_loop_breaker node : binds
  723. CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds
  724. reOrderNodes :: Int -> VarSet -> VarSet -> [Node Details] -> [Binding] -> [Binding]
  725. -- Choose a loop breaker, mark it no-inline,
  726. -- do SCC analysis on the rest, and recursively sort them out
  727. reOrderNodes _ _ _ [] _ = panic "reOrderNodes"
  728. reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
  729. = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
  730. -- text "chosen" <+> ppr chosen_nodes) $
  731. loopBreakNodes new_depth bndr_set weak_fvs unchosen $
  732. (map mk_loop_breaker chosen_nodes ++ binds)
  733. where
  734. (chosen_nodes, unchosen) = choose_loop_breaker (score node) [node] [] nodes
  735. approximate_loop_breaker = depth >= 2
  736. new_depth | approximate_loop_breaker = 0
  737. | otherwise = depth+1
  738. -- After two iterations (d=0, d=1) give up
  739. -- and approximate, returning to d=0
  740. choose_loop_breaker :: Int -- Best score so far
  741. -> [Node Details] -- Nodes with this score
  742. -> [Node Details] -- Nodes with higher scores
  743. -> [Node Details] -- Unprocessed nodes
  744. -> ([Node Details], [Node Details])
  745. -- This loop looks for the bind with the lowest score
  746. -- to pick as the loop breaker. The rest accumulate in
  747. choose_loop_breaker _ loop_nodes acc []
  748. = (loop_nodes, acc) -- Done
  749. -- If approximate_loop_breaker is True, we pick *all*
  750. -- nodes with lowest score, else just one
  751. -- See Note [Complexity of loop breaking]
  752. choose_loop_breaker loop_sc loop_nodes acc (node : nodes)
  753. | sc < loop_sc -- Lower score so pick this new one
  754. = choose_loop_breaker sc [node] (loop_nodes ++ acc) nodes
  755. | approximate_loop_breaker && sc == loop_sc
  756. = choose_loop_breaker loop_sc (node : loop_nodes) acc nodes
  757. | otherwise -- Higher score so don't pick it
  758. = choose_loop_breaker loop_sc loop_nodes (node : acc) nodes
  759. where
  760. sc = score node
  761. score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker
  762. score (ND { nd_bndr = bndr, nd_rhs = rhs }, _, _)
  763. | not (isId bndr) = 100 -- A type or cercion variable is never a loop breaker
  764. | isDFunId bndr = 9 -- Never choose a DFun as a loop breaker
  765. -- Note [DFuns should not be loop breakers]
  766. | Just be_very_keen <- hasStableCoreUnfolding_maybe (idUnfolding bndr)
  767. = if be_very_keen then 6 -- Note [Loop breakers and INLINE/INLINEABLE pragmas]
  768. else 3
  769. -- Data structures are more important than INLINE pragmas
  770. -- so that dictionary/method recursion unravels
  771. -- Note that this case hits all stable unfoldings, so we
  772. -- never look at 'rhs' for stable unfoldings. That's right, because
  773. -- 'rhs' is irrelevant for inlining things with a stable unfolding
  774. | is_con_app rhs = 5 -- Data types help with cases: Note [Constructor applications]
  775. | exprIsTrivial rhs = 10 -- Practically certain to be inlined
  776. -- Used to have also: && not (isExportedId bndr)
  777. -- But I found this sometimes cost an extra iteration when we have
  778. -- rec { d = (a,b); a = ...df...; b = ...df...; df = d }
  779. -- where df is the exported dictionary. Then df makes a really
  780. -- bad choice for loop breaker
  781. -- If an Id is marked "never inline" then it makes a great loop breaker
  782. -- The only reason for not checking that here is that it is rare
  783. -- and I've never seen a situation where it makes a difference,
  784. -- so it probably isn't worth the time to test on every binder
  785. -- | isNeverActive (idInlinePragma bndr) = -10
  786. | isOneOcc (idOccInfo bndr) = 2 -- Likely to be inlined
  787. | canUnfold (realIdUnfolding bndr) = 1
  788. -- The Id has some kind of unfolding
  789. -- Ignore loop-breaker-ness here because that is what we are setting!
  790. | otherwise = 0
  791. -- Checking for a constructor application
  792. -- Cheap and cheerful; the simplifier moves casts out of the way
  793. -- The lambda case is important to spot x = /\a. C (f a)
  794. -- which comes up when C is a dictionary constructor and
  795. -- f is a default method.
  796. -- Example: the instance for Show (ST s a) in GHC.ST
  797. --
  798. -- However we *also* treat (\x. C p q) as a con-app-like thing,
  799. -- Note [Closure conversion]
  800. is_con_app (Var v) = isConLikeId v
  801. is_con_app (App f _) = is_con_app f
  802. is_con_app (Lam _ e) = is_con_app e
  803. is_con_app (Tick _ e) = is_con_app e
  804. is_con_app _ = False
  805. {-
  806. Note [Complexity of loop breaking]
  807. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  808. The loop-breaking algorithm knocks out one binder at a time, and
  809. performs a new SCC analysis on the remaining binders. That can
  810. behave very badly in tightly-coupled groups of bindings; in the
  811. worst case it can be (N**2)*log N, because it does a full SCC
  812. on N, then N-1, then N-2 and so on.
  813. To avoid this, we switch plans after 2 (or whatever) attempts:
  814. Plan A: pick one binder with the lowest score, make it
  815. a loop breaker, and try again
  816. Plan B: pick *all* binders with the lowest score, make them
  817. all loop breakers, and try again
  818. Since there are only a small finite number of scores, this will
  819. terminate in a constant number of iterations, rather than O(N)
  820. iterations.
  821. You might thing that it's very unlikely, but RULES make it much
  822. more likely. Here's a real example from Trac #1969:
  823. Rec { $dm = \d.\x. op d
  824. {-# RULES forall d. $dm Int d = $s$dm1
  825. forall d. $dm Bool d = $s$dm2 #-}
  826. dInt = MkD .... opInt ...
  827. dInt = MkD .... opBool ...
  828. opInt = $dm dInt
  829. opBool = $dm dBool
  830. $s$dm1 = \x. op dInt
  831. $s$dm2 = \x. op dBool }
  832. The RULES stuff means that we can't choose $dm as a loop breaker
  833. (Note [Choosing loop breakers]), so we must choose at least (say)
  834. opInt *and* opBool, and so on. The number of loop breakders is
  835. linear in the number of instance declarations.
  836. Note [Loop breakers and INLINE/INLINEABLE pragmas]
  837. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  838. Avoid choosing a function with an INLINE pramga as the loop breaker!
  839. If such a function is mutually-recursive with a non-INLINE thing,
  840. then the latter should be the loop-breaker.
  841. It's vital to distinguish between INLINE and INLINEABLE (the
  842. Bool returned by hasStableCoreUnfolding_maybe). If we start with
  843. Rec { {-# INLINEABLE f #-}
  844. f x = ...f... }
  845. and then worker/wrapper it through strictness analysis, we'll get
  846. Rec { {-# INLINEABLE $wf #-}
  847. $wf p q = let x = (p,q) in ...f...
  848. {-# INLINE f #-}
  849. f x = case x of (p,q) -> $wf p q }
  850. Now it is vital that we choose $wf as the loop breaker, so we can
  851. inline 'f' in '$wf'.
  852. Note [DFuns should not be loop breakers]
  853. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  854. It's particularly bad to make a DFun into a loop breaker. See
  855. Note [How instance declarations are translated] in TcInstDcls
  856. We give DFuns a higher score than ordinary CONLIKE things because
  857. if there's a choice we want the DFun to be the non-looop breker. Eg
  858. rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
  859. $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
  860. {-# DFUN #-}
  861. $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
  862. }
  863. Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
  864. if we can't unravel the DFun first.
  865. Note [Constructor applications]
  866. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  867. It's really really important to inline dictionaries. Real
  868. example (the Enum Ordering instance from GHC.Base):
  869. rec f = \ x -> case d of (p,q,r) -> p x
  870. g = \ x -> case d of (p,q,r) -> q x
  871. d = (v, f, g)
  872. Here, f and g occur just once; but we can't inline them into d.
  873. On the other hand we *could* simplify those case expressions if
  874. we didn't stupidly choose d as the loop breaker.
  875. But we won't because constructor args are marked "Many".
  876. Inlining dictionaries is really essential to unravelling
  877. the loops in static numeric dictionaries, see GHC.Float.
  878. Note [Closure conversion]
  879. ~~~~~~~~~~~~~~~~~~~~~~~~~
  880. We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
  881. The immediate motivation came from the result of a closure-conversion transformation
  882. which generated code like this:
  883. data Clo a b = forall c. Clo (c -> a -> b) c
  884. ($:) :: Clo a b -> a -> b
  885. Clo f env $: x = f env x
  886. rec { plus = Clo plus1 ()
  887. ; plus1 _ n = Clo plus2 n
  888. ; plus2 Zero n = n
  889. ; plus2 (Succ m) n = Succ (plus $: m $: n) }
  890. If we inline 'plus' and 'plus1', everything unravels nicely. But if
  891. we choose 'plus1' as the loop breaker (which is entirely possible
  892. otherwise), the loop does not unravel nicely.
  893. @occAnalRhs@ deals with the question of bindings where the Id is marked
  894. by an INLINE pragma. For these we record that anything which occurs
  895. in its RHS occurs many times. This pessimistically assumes that ths
  896. inlined binder also occurs many times in its scope, but if it doesn't
  897. we'll catch it next time round. At worst this costs an extra simplifier pass.
  898. ToDo: try using the occurrence info for the inline'd binder.
  899. [March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC.
  900. [June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC.
  901. -}
  902. occAnalRecRhs :: OccEnv -> CoreExpr -- Rhs
  903. -> (UsageDetails, CoreExpr)
  904. -- Returned usage details covers only the RHS,
  905. -- and *not* the RULE or INLINE template for the Id
  906. occAnalRecRhs env rhs = occAnal (rhsCtxt env) rhs
  907. occAnalNonRecRhs :: OccEnv
  908. -> Id -> CoreExpr -- Binder and rhs
  909. -- Binder is already tagged with occurrence info
  910. -> (UsageDetails, CoreExpr)
  911. -- Returned usage details covers only the RHS,
  912. -- and *not* the RULE or INLINE template for the Id
  913. occAnalNonRecRhs env bndr rhs
  914. = occAnal rhs_env rhs
  915. where
  916. -- See Note [Cascading inlines]
  917. env1 | certainly_inline = env
  918. | otherwise = rhsCtxt env
  919. -- See Note [Use one-shot info]
  920. rhs_env = env1 { occ_one_shots = argOneShots OneShotLam dmd }
  921. certainly_inline -- See Note [Cascading inlines]
  922. = case idOccInfo bndr of
  923. OneOcc in_lam one_br _ -> not in_lam && one_br && active && not_stable
  924. _ -> False
  925. dmd = idDemandInfo bndr
  926. active = isAlwaysActive (idInlineActivation bndr)
  927. not_stable = not (isStableUnfolding (idUnfolding bndr))
  928. addIdOccs :: UsageDetails -> VarSet -> UsageDetails
  929. addIdOccs usage id_set = nonDetFoldUFM addIdOcc usage id_set
  930. -- It's OK to use nonDetFoldUFM here because addIdOcc commutes
  931. addIdOcc :: Id -> UsageDetails -> UsageDetails
  932. addIdOcc v u | isId v = addOneOcc u v NoOccInfo
  933. | otherwise = u
  934. -- Give a non-committal binder info (i.e NoOccInfo) because
  935. -- a) Many copies of the specialised thing can appear
  936. -- b) We don't want to substitute a BIG expression inside a RULE
  937. -- even if that's the only occurrence of the thing
  938. -- (Same goes for INLINE.)
  939. {-
  940. Note [Cascading inlines]
  941. ~~~~~~~~~~~~~~~~~~~~~~~~
  942. By default we use an rhsCtxt for the RHS of a binding. This tells the
  943. occ anal n that it's looking at an RHS, which has an effect in
  944. occAnalApp. In particular, for constructor applications, it makes
  945. the arguments appear to have NoOccInfo, so that we don't inline into
  946. them. Thus x = f y
  947. k = Just x
  948. we do not want to inline x.
  949. But there's a problem. Consider
  950. x1 = a0 : []
  951. x2 = a1 : x1
  952. x3 = a2 : x2
  953. g = f x3
  954. First time round, it looks as if x1 and x2 occur as an arg of a
  955. let-bound constructor ==> give them a many-occurrence.
  956. But then x3 is inlined (unconditionally as it happens) and
  957. next time round, x2 will be, and the next time round x1 will be
  958. Result: multiple simplifier iterations. Sigh.
  959. So, when analysing the RHS of x3 we notice that x3 will itself
  960. definitely inline the next time round, and so we analyse x3's rhs in
  961. an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff.
  962. Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
  963. If we say "yes" when preInlineUnconditionally says "no" the simplifier iterates
  964. indefinitely:
  965. x = f y
  966. k = Just x
  967. inline ==>
  968. k = Just (f y)
  969. float ==>
  970. x1 = f y
  971. k = Just x1
  972. This is worse than the slow cascade, so we only want to say "certainly_inline"
  973. if it really is certain. Look at the note with preInlineUnconditionally
  974. for the various clauses.
  975. Expressions
  976. ~~~~~~~~~~~
  977. -}
  978. occAnal :: OccEnv
  979. -> CoreExpr
  980. -> (UsageDetails, -- Gives info only about the "interesting" Ids
  981. CoreExpr)
  982. occAnal _ expr@(Type _) = (emptyDetails, expr)
  983. occAnal _ expr@(Lit _) = (emptyDetails, expr)
  984. occAnal env expr@(Var v) = (mkOneOcc env v False, expr)
  985. -- At one stage, I gathered the idRuleVars for v here too,
  986. -- which in a way is the right thing to do.
  987. -- But that went wrong right after specialisation, when
  988. -- the *occurrences* of the overloaded function didn't have any
  989. -- rules in them, so the *specialised* versions looked as if they
  990. -- weren't used at all.
  991. occAnal _ (Coercion co)
  992. = (addIdOccs emptyDetails (coVarsOfCo co), Coercion co)
  993. -- See Note [Gather occurrences of coercion variables]
  994. {-
  995. Note [Gather occurrences of coercion variables]
  996. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  997. We need to gather info about what coercion variables appear, so that
  998. we can sort them into the right place when doing dependency analysis.
  999. -}
  1000. occAnal env (Tick tickish body)
  1001. | tickish `tickishScopesLike` SoftScope
  1002. = (usage, Tick tickish body')
  1003. | Breakpoint _ ids <- tickish
  1004. = (usage_lam +++ mkVarEnv (zip ids (repeat NoOccInfo)), Tick tickish body')
  1005. -- never substitute for any of the Ids in a Breakpoint
  1006. | otherwise
  1007. = (usage_lam, Tick tickish body')
  1008. where
  1009. !(usage,body') = occAnal env body
  1010. -- for a non-soft tick scope, we can inline lambdas only
  1011. usage_lam = mapVarEnv markInsideLam usage
  1012. occAnal env (Cast expr co)
  1013. = case occAnal env expr of { (usage, expr') ->
  1014. let usage1 = markManyIf (isRhsEnv env) usage
  1015. usage2 = addIdOccs usage1 (coVarsOfCo co)
  1016. -- See Note [Gather occurrences of coercion variables]
  1017. in (usage2, Cast expr' co)
  1018. -- If we see let x = y `cast` co
  1019. -- then mark y as 'Many' so that we don't
  1020. -- immediately inline y again.
  1021. }
  1022. occAnal env app@(App _ _)
  1023. = occAnalApp env (collectArgsTicks tickishFloatable app)
  1024. -- Ignore type variables altogether
  1025. -- (a) occurrences inside type lambdas only not marked as InsideLam
  1026. -- (b) type variables not in environment
  1027. occAnal env (Lam x body) | isTyVar x
  1028. = case occAnal env body of { (body_usage, body') ->
  1029. (body_usage, Lam x body')
  1030. }
  1031. -- For value lambdas we do a special hack. Consider
  1032. -- (\x. \y. ...x...)
  1033. -- If we did nothing, x is used inside the \y, so would be marked
  1034. -- as dangerous to dup. But in the common case where the abstraction
  1035. -- is applied to two arguments this is over-pessimistic.
  1036. -- So instead, we just mark each binder with its occurrence
  1037. -- info in the *body* of the multiple lambda.
  1038. -- Then, the simplifier is careful when partially applying lambdas.
  1039. occAnal env expr@(Lam _ _)
  1040. = case occAnal env_body body of { (body_usage, body') ->
  1041. let
  1042. (final_usage, tagged_binders) = tagLamBinders body_usage binders'
  1043. -- Use binders' to put one-shot info on the lambdas
  1044. really_final_usage
  1045. | all isOneShotBndr binders' = final_usage
  1046. | otherwise = mapVarEnv markInsideLam final_usage
  1047. in
  1048. (really_final_usage, mkLams tagged_binders body') }
  1049. where
  1050. (binders, body) = collectBinders expr
  1051. (env_body, binders') = oneShotGroup env binders
  1052. occAnal env (Case scrut bndr ty alts)
  1053. = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
  1054. case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') ->
  1055. let
  1056. alts_usage = foldr combineAltsUsageDetails emptyDetails alts_usage_s
  1057. (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
  1058. total_usage = scrut_usage +++ alts_usage1
  1059. in
  1060. total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
  1061. where
  1062. -- Note [Case binder usage]
  1063. -- ~~~~~~~~~~~~~~~~~~~~~~~~
  1064. -- The case binder gets a usage of either "many" or "dead", never "one".
  1065. -- Reason: we like to inline single occurrences, to eliminate a binding,
  1066. -- but inlining a case binder *doesn't* eliminate a binding.
  1067. -- We *don't* want to transform
  1068. -- case x of w { (p,q) -> f w }
  1069. -- into
  1070. -- case x of w { (p,q) -> f (p,q) }
  1071. tag_case_bndr usage bndr
  1072. = case lookupVarEnv usage bndr of
  1073. Nothing -> (usage, setIdOccInfo bndr IAmDead)
  1074. Just _ -> (usage `delVarEnv` bndr, setIdOccInfo bndr NoOccInfo)
  1075. alt_env = mkAltEnv env scrut bndr
  1076. occ_anal_alt = occAnalAlt alt_env
  1077. occ_anal_scrut (Var v) (alt1 : other_alts)
  1078. | not (null other_alts) || not (isDefaultAlt alt1)
  1079. = (mkOneOcc env v True, Var v) -- The 'True' says that the variable occurs
  1080. -- in an interesting context; the case has
  1081. -- at least one non-default alternative
  1082. occ_anal_scrut (Tick t e) alts
  1083. | t `tickishScopesLike` SoftScope
  1084. -- No reason to not look through all ticks here, but only
  1085. -- for soft-scoped ticks we can do so without having to
  1086. -- update returned occurance info (see occAnal)
  1087. = second (Tick t) $ occ_anal_scrut e alts
  1088. occ_anal_scrut scrut _alts
  1089. = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt
  1090. occAnal env (Let bind body)
  1091. = case occAnal env body of { (body_usage, body') ->
  1092. case occAnalBind env noImpRuleEdges bind body_usage of { (final_usage, new_binds) ->
  1093. (final_usage, mkLets new_binds body') }}
  1094. occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
  1095. occAnalArgs _ [] _
  1096. = (emptyDetails, [])
  1097. occAnalArgs env (arg:args) one_shots
  1098. | isTypeArg arg
  1099. = case occAnalArgs env args one_shots of { (uds, args') ->
  1100. (uds, arg:args') }
  1101. | otherwise
  1102. = case argCtxt env one_shots of { (arg_env, one_shots') ->
  1103. case occAnal arg_env arg of { (uds1, arg') ->
  1104. case occAnalArgs env args one_shots' of { (uds2, args') ->
  1105. (uds1 +++ uds2, arg':args') }}}
  1106. {-
  1107. Applications are dealt with specially because we want
  1108. the "build hack" to work.
  1109. Note [Arguments of let-bound constructors]
  1110. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1111. Consider
  1112. f x = let y = expensive x in
  1113. let z = (True,y) in
  1114. (case z of {(p,q)->q}, case z of {(p,q)->q})
  1115. We feel free to duplicate the WHNF (True,y), but that means
  1116. that y may be duplicated thereby.
  1117. If we aren't careful we duplicate the (expensive x) call!
  1118. Constructors are rather like lambdas in this way.
  1119. -}
  1120. occAnalApp :: OccEnv
  1121. -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
  1122. -> (UsageDetails, Expr CoreBndr)
  1123. occAnalApp env (Var fun, args, ticks)
  1124. | null ticks = (uds, mkApps (Var fun) args')
  1125. | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args')
  1126. where
  1127. uds = fun_uds +++ final_args_uds
  1128. !(args_uds, args') = occAnalArgs env args one_shots
  1129. !final_args_uds = markManyIf (isRhsEnv env && is_exp) args_uds
  1130. -- We mark the free vars of the argument of a constructor or PAP
  1131. -- as "many", if it is the RHS of a let(rec).
  1132. -- This means that nothing gets inlined into a constructor argument
  1133. -- position, which is what we want. Typically those constructor
  1134. -- arguments are just variables, or trivial expressions.
  1135. --
  1136. -- This is the *whole point* of the isRhsEnv predicate
  1137. -- See Note [Arguments of let-bound constructors]
  1138. n_val_args = valArgCount args
  1139. fun_uds = mkOneOcc env fun (n_val_args > 0)
  1140. is_exp = isExpandableApp fun n_val_args
  1141. -- See Note [CONLIKE pragma] in BasicTypes
  1142. -- The definition of is_exp should match that in
  1143. -- Simplify.prepareRhs
  1144. one_shots = argsOneShots (idStrictness fun) n_val_args
  1145. -- See Note [Use one-shot info]
  1146. occAnalApp env (fun, args, ticks)
  1147. = (fun_uds +++ args_uds, mkTicks ticks $ mkApps fun' args')
  1148. where
  1149. !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
  1150. -- The addAppCtxt is a bit cunning. One iteration of the simplifier
  1151. -- often leaves behind beta redexs like
  1152. -- (\x y -> e) a1 a2
  1153. -- Here we would like to mark x,y as one-shot, and treat the whole
  1154. -- thing much like a let. We do this by pushing some True items
  1155. -- onto the context stack.
  1156. !(args_uds, args') = occAnalArgs env args []
  1157. markManyIf :: Bool -- If this is true
  1158. -> UsageDetails -- Then do markMany on this
  1159. -> UsageDetails
  1160. markManyIf True uds = mapVarEnv markMany uds
  1161. markManyIf False uds = uds
  1162. {-
  1163. Note [Use one-shot information]
  1164. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1165. The occurrrence analyser propagates one-shot-lambda information in two
  1166. situations:
  1167. * Applications: eg build (\c n -> blah)
  1168. Propagate one-shot info from the strictness signature of 'build' to
  1169. the \c n.
  1170. This strictness signature can come from a module interface, in the case of
  1171. an imported function, or from a previous run of the demand analyser.
  1172. * Let-bindings: eg let f = \c. let ... in \n -> blah
  1173. in (build f, build f)
  1174. Propagate one-shot info from the demanand-info on 'f' to the
  1175. lambdas in its RHS (which may not be syntactically at the top)
  1176. This information must have come from a previous run of the demanand
  1177. analyser.
  1178. Previously, the demand analyser would *also* set the one-shot information, but
  1179. that code was buggy (see #11770), so doing it only in on place, namely here, is
  1180. saner.
  1181. Note [Binders in case alternatives]
  1182. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1183. Consider
  1184. case x of y { (a,b) -> f y }
  1185. We treat 'a', 'b' as dead, because they don't physically occur in the
  1186. case alternative. (Indeed, a variable is dead iff it doesn't occur in
  1187. its scope in the output of OccAnal.) It really helps to know when
  1188. binders are unused. See esp the call to isDeadBinder in
  1189. Simplify.mkDupableAlt
  1190. In this example, though, the Simplifier will bring 'a' and 'b' back to
  1191. life, beause it binds 'y' to (a,b) (imagine got inlined and
  1192. scrutinised y).
  1193. -}
  1194. occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
  1195. -> CoreAlt
  1196. -> (UsageDetails, Alt IdWithOccInfo)
  1197. occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
  1198. = case occAnal env rhs of { (rhs_usage1, rhs1) ->
  1199. let
  1200. (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
  1201. -- See Note [Binders in case alternatives]
  1202. (alt_usg', rhs2) =
  1203. wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
  1204. in
  1205. (alt_usg', (con, tagged_bndrs, rhs2)) }
  1206. wrapAltRHS :: OccEnv
  1207. -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv
  1208. -> UsageDetails -- usage for entire alt (p -> rhs)
  1209. -> [Var] -- alt binders
  1210. -> CoreExpr -- alt RHS
  1211. -> (UsageDetails, CoreExpr)
  1212. wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
  1213. | occ_binder_swap env
  1214. , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
  1215. -- handles condition (a) in Note [Binder swap]
  1216. , not captured -- See condition (b) in Note [Binder swap]
  1217. = ( alt_usg' +++ let_rhs_usg
  1218. , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
  1219. where
  1220. captured = any (`usedIn` let_rhs_usg) bndrs
  1221. -- The rhs of the let may include coercion variables
  1222. -- if the scrutinee was a cast, so we must gather their
  1223. -- usage. See Note [Gather occurrences of coercion variables]
  1224. (let_rhs_usg, let_rhs') = occAnal env let_rhs
  1225. (alt_usg', tagged_scrut_var) = tagBinder alt_usg scrut_var
  1226. wrapAltRHS _ _ alt_usg _ alt_rhs
  1227. = (alt_usg, alt_rhs)
  1228. {-
  1229. ************************************************************************
  1230. * *
  1231. OccEnv
  1232. * *
  1233. ************************************************************************
  1234. -}
  1235. data OccEnv
  1236. = OccEnv { occ_encl :: !OccEncl -- Enclosing context information
  1237. , occ_one_shots :: !OneShots -- Tells about linearity
  1238. , occ_gbl_scrut :: GlobalScruts
  1239. , occ_rule_act :: Activation -> Bool -- Which rules are active
  1240. -- See Note [Finding rule RHS free vars]
  1241. , occ_binder_swap :: !Bool -- enable the binder_swap
  1242. -- See CorePrep Note [Dead code in CorePrep]
  1243. }
  1244. type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees]
  1245. -----------------------------
  1246. -- OccEncl is used to control whether to inline into constructor arguments
  1247. -- For example:
  1248. -- x = (p,q) -- Don't inline p or q
  1249. -- y = /\a -> (p a, q a) -- Still don't inline p or q
  1250. -- z = f (p,q) -- Do inline p,q; it may make a rule fire
  1251. -- So OccEncl tells enought about the context to know what to do when
  1252. -- we encounter a constructor application or PAP.
  1253. data OccEncl
  1254. = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda
  1255. -- Don't inline into constructor args here
  1256. | OccVanilla -- Argument of function, body of lambda, scruintee of case etc.
  1257. -- Do inline into constructor args here
  1258. instance Outputable OccEncl where
  1259. ppr OccRhs = text "occRhs"
  1260. ppr OccVanilla = text "occVanilla"
  1261. type OneShots = [OneShotInfo]
  1262. -- [] No info
  1263. --
  1264. -- one_shot_info:ctxt Analysing a function-valued expression that
  1265. -- will be applied as described by one_shot_info
  1266. initOccEnv :: (Activation -> Bool) -> OccEnv
  1267. initOccEnv active_rule
  1268. = OccEnv { occ_encl = OccVanilla
  1269. , occ_one_shots = []
  1270. , occ_gbl_scrut = emptyVarSet
  1271. , occ_rule_act = active_rule
  1272. , occ_binder_swap = True }
  1273. vanillaCtxt :: OccEnv -> OccEnv
  1274. vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
  1275. rhsCtxt :: OccEnv -> OccEnv
  1276. rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
  1277. argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
  1278. argCtxt env []
  1279. = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
  1280. argCtxt env (one_shots:one_shots_s)
  1281. = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
  1282. isRhsEnv :: OccEnv -> Bool
  1283. isRhsEnv (OccEnv { occ_encl = OccRhs }) = True
  1284. isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
  1285. oneShotGroup :: OccEnv -> [CoreBndr]
  1286. -> ( OccEnv
  1287. , [CoreBndr] )
  1288. -- The result binders have one-shot-ness set that they might not have had originally.
  1289. -- This happens in (build (\c n -> e)). Here the occurrence analyser
  1290. -- linearity context knows that c,n are one-shot, and it records that fact in
  1291. -- the binder. This is useful to guide subsequent float-in/float-out tranformations
  1292. oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
  1293. = go ctxt bndrs []
  1294. where
  1295. go ctxt [] rev_bndrs
  1296. = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
  1297. , reverse rev_bndrs )
  1298. go [] bndrs rev_bndrs
  1299. = ( env { occ_one_shots = [], occ_encl = OccVanilla }
  1300. , reverse rev_bndrs ++ bndrs )
  1301. go ctxt (bndr:bndrs) rev_bndrs
  1302. | isId bndr
  1303. = case ctxt of
  1304. [] -> go [] bndrs (bndr : rev_bndrs)
  1305. (one_shot : ctxt) -> go ctxt bndrs (bndr': rev_bndrs)
  1306. where
  1307. bndr' = updOneShotInfo bndr one_shot
  1308. -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
  1309. -- one-shot info might be better than what we can infer, e.g.
  1310. -- due to explicit use of the magic 'oneShot' function.
  1311. -- See Note [The oneShot function]
  1312. | otherwise
  1313. = go ctxt bndrs (bndr:rev_bndrs)
  1314. addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
  1315. addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
  1316. = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
  1317. transClosureFV :: UniqFM VarSet -> UniqFM VarSet
  1318. -- If (f,g), (g,h) are in the input, then (f,h) is in the output
  1319. -- as well as (f,g), (g,h)
  1320. transClosureFV env
  1321. | no_change = env
  1322. | otherwise = transClosureFV (listToUFM new_fv_list)
  1323. where
  1324. (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
  1325. -- It's OK to use nonDetUFMToList here because we'll forget the
  1326. -- ordering by creating a new set with listToUFM
  1327. bump no_change (b,fvs)
  1328. | no_change_here = (no_change, (b,fvs))
  1329. | otherwise = (False, (b,new_fvs))
  1330. where
  1331. (new_fvs, no_change_here) = extendFvs env fvs
  1332. -------------
  1333. extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
  1334. extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag
  1335. extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
  1336. -- (extendFVs env s) returns
  1337. -- (s `union` env(s), env(s) `subset` s)
  1338. extendFvs env s
  1339. | isNullUFM env
  1340. = (s, True)
  1341. | otherwise
  1342. = (s `unionVarSet` extras, extras `subVarSet` s)
  1343. where
  1344. extras :: VarSet -- env(s)
  1345. extras = nonDetFoldUFM unionVarSet emptyVarSet $
  1346. -- It's OK to use nonDetFoldUFM here because unionVarSet commutes
  1347. intersectUFM_C (\x _ -> x) env s
  1348. {-
  1349. ************************************************************************
  1350. * *
  1351. Binder swap
  1352. * *
  1353. ************************************************************************
  1354. Note [Binder swap]
  1355. ~~~~~~~~~~~~~~~~~~
  1356. We do these two transformations right here:
  1357. (1) case x of b { pi -> ri }
  1358. ==>
  1359. case x of b { pi -> let x=b in ri }
  1360. (2) case (x |> co) of b { pi -> ri }
  1361. ==>
  1362. case (x |> co) of b { pi -> let x = b |> sym co in ri }
  1363. Why (2)? See Note [Case of cast]
  1364. In both cases, in a particular alternative (pi -> ri), we only
  1365. add the binding if
  1366. (a) x occurs free in (pi -> ri)
  1367. (ie it occurs in ri, but is not bound in pi)
  1368. (b) the pi does not bind b (or the free vars of co)
  1369. We need (a) and (b) for the inserted binding to be correct.
  1370. For the alternatives where we inject the binding, we can transfer
  1371. all x's OccInfo to b. And that is the point.
  1372. Notice that
  1373. * The deliberate shadowing of 'x'.
  1374. * That (a) rapidly becomes false, so no bindings are injected.
  1375. The reason for doing these transformations here is because it allows
  1376. us to adjust the OccInfo for 'x' and 'b' as we go.
  1377. * Suppose the only occurrences of 'x' are the scrutinee and in the
  1378. ri; then this transformation makes it occur just once, and hence
  1379. get inlined right away.
  1380. * If we do this in the Simplifier, we don't know whether 'x' is used
  1381. in ri, so we are forced to pessimistically zap b's OccInfo even
  1382. though it is typically dead (ie neither it nor x appear in the
  1383. ri). There's nothing actually wrong with zapping it, except that
  1384. it's kind of nice to know which variables are dead. My nose
  1385. tells me to keep this information as robustly as possible.
  1386. The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
  1387. {x=b}; it's Nothing if the binder-swap doesn't happen.
  1388. There is a danger though. Consider
  1389. let v = x +# y
  1390. in case (f v) of w -> ...v...v...
  1391. And suppose that (f v) expands to just v. Then we'd like to
  1392. use 'w' instead of 'v' in the alternative. But it may be too
  1393. late; we may have substituted the (cheap) x+#y for v in the
  1394. same simplifier pass that reduced (f v) to v.
  1395. I think this is just too bad. CSE will recover some of it.
  1396. Note [Case of cast]
  1397. ~~~~~~~~~~~~~~~~~~~
  1398. Consider case (x `cast` co) of b { I# ->
  1399. ... (case (x `cast` co) of {...}) ...
  1400. We'd like to eliminate the inner case. That is the motivation for
  1401. equation (2) in Note [Binder swap]. When we get to the inner case, we
  1402. inline x, cancel the casts, and away we go.
  1403. Note [Binder swap on GlobalId scrutinees]
  1404. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1405. When the scrutinee is a GlobalId we must take care in two ways
  1406. i) In order to *know* whether 'x' occurs free in the RHS, we need its
  1407. occurrence info. BUT, we don't gather occurrence info for
  1408. GlobalIds. That's the reason for the (small) occ_gbl_scrut env in
  1409. OccEnv is for: it says "gather occurrence info for these".
  1410. ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
  1411. has an External Name. See, for example, SimplEnv Note [Global Ids in
  1412. the substitution].
  1413. Note [Zap case binders in proxy bindings]
  1414. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1415. From the original
  1416. case x of cb(dead) { p -> ...x... }
  1417. we will get
  1418. case x of cb(live) { p -> let x = cb in ...x... }
  1419. Core Lint never expects to find an *occurrence* of an Id marked
  1420. as Dead, so we must zap the OccInfo on cb before making the
  1421. binding x = cb. See Trac #5028.
  1422. Historical note [no-case-of-case]
  1423. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1424. We *used* to suppress the binder-swap in case expressions when
  1425. -fno-case-of-case is on. Old remarks:
  1426. "This happens in the first simplifier pass,
  1427. and enhances full laziness. Here's the bad case:
  1428. f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
  1429. If we eliminate the inner case, we trap it inside the I# v -> arm,
  1430. which might prevent some full laziness happening. I've seen this
  1431. in action in spectral/cichelli/Prog.hs:
  1432. [(m,n) | m <- [1..max], n <- [1..max]]
  1433. Hence the check for NoCaseOfCase."
  1434. However, now the full-laziness pass itself reverses the binder-swap, so this
  1435. check is no longer necessary.
  1436. Historical note [Suppressing the case binder-swap]
  1437. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1438. This old note describes a problem that is also fixed by doing the
  1439. binder-swap in OccAnal:
  1440. There is another situation when it might make sense to suppress the
  1441. case-expression binde-swap. If we have
  1442. case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
  1443. ...other cases .... }
  1444. We'll perform the binder-swap for the outer case, giving
  1445. case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
  1446. ...other cases .... }
  1447. But there is no point in doing it for the inner case, because w1 can't
  1448. be inlined anyway. Furthermore, doing the case-swapping involves
  1449. zapping w2's occurrence info (see paragraphs that follow), and that
  1450. forces us to bind w2 when doing case merging. So we get
  1451. case x of w1 { A -> let w2 = w1 in e1
  1452. B -> let w2 = w1 in e2
  1453. ...other cases .... }
  1454. This is plain silly in the common case where w2 is dead.
  1455. Even so, I can't see a good way to implement this idea. I tried
  1456. not doing the binder-swap if the scrutinee was already evaluated
  1457. but that failed big-time:
  1458. data T = MkT !Int
  1459. case v of w { MkT x ->
  1460. case x of x1 { I# y1 ->
  1461. case x of x2 { I# y2 -> ...
  1462. Notice that because MkT is strict, x is marked "evaluated". But to
  1463. eliminate the last case, we must either make sure that x (as well as
  1464. x1) has unfolding MkT y1. The straightforward thing to do is to do
  1465. the binder-swap. So this whole note is a no-op.
  1466. It's fixed by doing the binder-swap in OccAnal because we can do the
  1467. binder-swap unconditionally and still get occurrence analysis
  1468. information right.
  1469. -}
  1470. mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
  1471. -- Does two things: a) makes the occ_one_shots = OccVanilla
  1472. -- b) extends the GlobalScruts if possible
  1473. -- c) returns a proxy mapping, binding the scrutinee
  1474. -- to the case binder, if possible
  1475. mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
  1476. = case stripTicksTopE (const True) scrut of
  1477. Var v -> add_scrut v case_bndr'
  1478. Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
  1479. -- See Note [Case of cast]
  1480. _ -> (env { occ_encl = OccVanilla }, Nothing)
  1481. where
  1482. add_scrut v rhs = ( env { occ_encl = OccVanilla, occ_gbl_scrut = pe `extendVarSet` v }
  1483. , Just (localise v, rhs) )
  1484. case_bndr' = Var (zapIdOccInfo case_bndr) -- See Note [Zap case binders in proxy bindings]
  1485. localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) (idType scrut_var)
  1486. -- Localise the scrut_var before shadowing it; we're making a
  1487. -- new binding for it, and it might have an External Name, or
  1488. -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
  1489. -- Also we don't want any INLINE or NOINLINE pragmas!
  1490. {-
  1491. ************************************************************************
  1492. * *
  1493. \subsection[OccurAnal-types]{OccEnv}
  1494. * *
  1495. ************************************************************************
  1496. -}
  1497. type UsageDetails = IdEnv OccInfo -- A finite map from ids to their usage
  1498. -- INVARIANT: never IAmDead
  1499. -- (Deadness is signalled by not being in the map at all)
  1500. (+++), combineAltsUsageDetails
  1501. :: UsageDetails -> UsageDetails -> UsageDetails
  1502. (+++) usage1 usage2
  1503. = plusVarEnv_C addOccInfo usage1 usage2
  1504. combineAltsUsageDetails usage1 usage2
  1505. = plusVarEnv_C orOccInfo usage1 usage2
  1506. addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
  1507. addOneOcc usage id info
  1508. = plusVarEnv_C addOccInfo usage (unitVarEnv id info)
  1509. -- ToDo: make this more efficient
  1510. emptyDetails :: UsageDetails
  1511. emptyDetails = (emptyVarEnv :: UsageDetails)
  1512. usedIn :: Id -> UsageDetails -> Bool
  1513. v `usedIn` details = isExportedId v || v `elemVarEnv` details
  1514. type IdWithOccInfo = Id
  1515. tagLamBinders :: UsageDetails -- Of scope
  1516. -> [Id] -- Binders
  1517. -> (UsageDetails, -- Details with binders removed
  1518. [IdWithOccInfo]) -- Tagged binders
  1519. -- Used for lambda and case binders
  1520. -- It copes with the fact that lambda bindings can have a
  1521. -- stable unfolding, used for join points
  1522. tagLamBinders usage binders = usage' `seq` (usage', bndrs')
  1523. where
  1524. (usage', bndrs') = mapAccumR tag_lam usage binders
  1525. tag_lam usage bndr = (usage2, setBinderOcc usage bndr)
  1526. where
  1527. usage1 = usage `delVarEnv` bndr
  1528. usage2 | isId bndr = addIdOccs usage1 (idUnfoldingVars bndr)
  1529. | otherwise = usage1
  1530. tagBinder :: UsageDetails -- Of scope
  1531. -> Id -- Binders
  1532. -> (UsageDetails, -- Details with binders removed
  1533. IdWithOccInfo) -- Tagged binders
  1534. tagBinder usage binder
  1535. = let
  1536. usage' = usage `delVarEnv` binder
  1537. binder' = setBinderOcc usage binder
  1538. in
  1539. usage' `seq` (usage', binder')
  1540. setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr
  1541. setBinderOcc usage bndr
  1542. | isTyVar bndr = bndr
  1543. | isExportedId bndr = case idOccInfo bndr of
  1544. NoOccInfo -> bndr
  1545. _ -> setIdOccInfo bndr NoOccInfo
  1546. -- Don't use local usage info for visible-elsewhere things
  1547. -- BUT *do* erase any IAmALoopBreaker annotation, because we're
  1548. -- about to re-generate it and it shouldn't be "sticky"
  1549. | otherwise = setIdOccInfo bndr occ_info
  1550. where
  1551. occ_info = lookupVarEnv usage bndr `orElse` IAmDead
  1552. {-
  1553. ************************************************************************
  1554. * *
  1555. \subsection{Operations over OccInfo}
  1556. * *
  1557. ************************************************************************
  1558. -}
  1559. mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
  1560. mkOneOcc env id int_cxt
  1561. | isLocalId id
  1562. = unitVarEnv id (OneOcc False True int_cxt)
  1563. | id `elemVarEnv` occ_gbl_scrut env
  1564. = unitVarEnv id NoOccInfo
  1565. | otherwise
  1566. = emptyDetails
  1567. markMany, markInsideLam :: OccInfo -> OccInfo
  1568. markMany _ = NoOccInfo
  1569. markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
  1570. markInsideLam occ = occ
  1571. addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
  1572. addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
  1573. NoOccInfo -- Both branches are at least One
  1574. -- (Argument is never IAmDead)
  1575. -- (orOccInfo orig new) is used
  1576. -- when combining occurrence info from branches of a case
  1577. orOccInfo (OneOcc in_lam1 _ int_cxt1)
  1578. (OneOcc in_lam2 _ int_cxt2)
  1579. = OneOcc (in_lam1 || in_lam2)
  1580. False -- False, because it occurs in both branches
  1581. (int_cxt1 && int_cxt2)
  1582. orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
  1583. NoOccInfo