PageRenderTime 132ms CodeModel.GetById 106ms app.highlight 9ms RepoModel.GetById 1ms app.codeStats 2ms

/compiler/simplCore/OccurAnal.lhs

https://github.com/crdueck/ghc
Haskell | 1871 lines | 1298 code | 332 blank | 241 comment | 99 complexity | 02e865b7738e52239e1b418a1c0c111f MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1
   2%
   3% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4%
   5%************************************************************************
   6%*                                                                      *
   7\section[OccurAnal]{Occurrence analysis pass}
   8%*                                                                      *
   9%************************************************************************
  10
  11The occurrence analyser re-typechecks a core expression, returning a new
  12core expression with (hopefully) improved usage information.
  13
  14\begin{code}
  15{-# LANGUAGE BangPatterns #-}
  16module OccurAnal (
  17        occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
  18    ) where
  19
  20#include "HsVersions.h"
  21
  22import CoreSyn
  23import CoreFVs
  24import CoreUtils        ( exprIsTrivial, isDefaultAlt, isExpandableApp )
  25import Id
  26import Name( localiseName )
  27import BasicTypes
  28import Module( Module )
  29import Coercion
  30
  31import VarSet
  32import VarEnv
  33import Var
  34import Demand           ( argOneShots, argsOneShots )
  35import Maybes           ( orElse )
  36import Digraph          ( SCC(..), stronglyConnCompFromEdgedVerticesR )
  37import Unique
  38import UniqFM
  39import Util
  40import Outputable
  41import FastString
  42import Data.List
  43\end{code}
  44
  45
  46%************************************************************************
  47%*                                                                      *
  48\subsection[OccurAnal-main]{Counting occurrences: main function}
  49%*                                                                      *
  50%************************************************************************
  51
  52Here's the externally-callable interface:
  53
  54\begin{code}
  55occurAnalysePgm :: Module       -- Used only in debug output
  56                -> (Activation -> Bool) 
  57                -> [CoreRule] -> [CoreVect] -> VarSet
  58                -> CoreProgram -> CoreProgram
  59occurAnalysePgm this_mod active_rule imp_rules vects vectVars binds
  60  | isEmptyVarEnv final_usage
  61  = binds'
  62  | otherwise   -- See Note [Glomming]
  63  = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
  64                   2 (ppr final_usage ) )
  65    [Rec (flattenBinds binds')]
  66  where
  67    (final_usage, binds') = go (initOccEnv active_rule) binds
  68
  69    initial_uds = addIdOccs emptyDetails 
  70                            (rulesFreeVars imp_rules `unionVarSet` 
  71                             vectsFreeVars vects `unionVarSet`
  72                             vectVars)
  73    -- The RULES and VECTORISE declarations keep things alive! (For VECTORISE declarations,
  74    -- we only get them *until* the vectoriser runs. Afterwards, these dependencies are
  75    -- reflected in 'vectors' — see Note [Vectorisation declarations and occurences].)
  76
  77    -- Note [Preventing loops due to imported functions rules]
  78    imp_rules_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
  79                            [ mapVarEnv (const maps_to) (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
  80                            | imp_rule <- imp_rules
  81                            , let maps_to = exprFreeIds (ru_rhs imp_rule)
  82                                             `delVarSetList` ru_bndrs imp_rule
  83                            , arg <- ru_args imp_rule ]
  84
  85    go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
  86    go _ []
  87        = (initial_uds, [])
  88    go env (bind:binds)
  89        = (final_usage, bind' ++ binds')
  90        where
  91           (bs_usage, binds')   = go env binds
  92           (final_usage, bind') = occAnalBind env env imp_rules_edges bind bs_usage
  93
  94occurAnalyseExpr :: CoreExpr -> CoreExpr
  95        -- Do occurrence analysis, and discard occurence info returned
  96occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
  97
  98occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
  99occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
 100
 101occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
 102occurAnalyseExpr' enable_binder_swap expr
 103  = snd (occAnal env expr)
 104  where
 105    env = (initOccEnv all_active_rules) {occ_binder_swap = enable_binder_swap}
 106    -- To be conservative, we say that all inlines and rules are active
 107    all_active_rules = \_ -> True
 108\end{code}
 109
 110
 111%************************************************************************
 112%*                                                                      *
 113\subsection[OccurAnal-main]{Counting occurrences: main function}
 114%*                                                                      *
 115%************************************************************************
 116
 117Bindings
 118~~~~~~~~
 119
 120\begin{code}
 121occAnalBind :: OccEnv           -- The incoming OccEnv
 122            -> OccEnv           -- Same, but trimmed by (binderOf bind)
 123            -> IdEnv IdSet      -- Mapping from FVs of imported RULE LHSs to RHS FVs
 124            -> CoreBind
 125            -> UsageDetails             -- Usage details of scope
 126            -> (UsageDetails,           -- Of the whole let(rec)
 127                [CoreBind])
 128
 129occAnalBind env _ imp_rules_edges (NonRec binder rhs) body_usage
 130  | isTyVar binder      -- A type let; we don't gather usage info
 131  = (body_usage, [NonRec binder rhs])
 132
 133  | not (binder `usedIn` body_usage)    -- It's not mentioned
 134  = (body_usage, [])
 135
 136  | otherwise                   -- It's mentioned in the body
 137  = (body_usage' +++ rhs_usage4, [NonRec tagged_binder rhs'])
 138  where
 139    (body_usage', tagged_binder) = tagBinder body_usage binder
 140    (rhs_usage1, rhs')           = occAnalNonRecRhs env tagged_binder rhs
 141    rhs_usage2 = addIdOccs rhs_usage1 (idUnfoldingVars binder)
 142    rhs_usage3 = addIdOccs rhs_usage2 (idRuleVars binder)
 143       -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
 144    rhs_usage4 = maybe rhs_usage3 (addIdOccs rhs_usage3) $ lookupVarEnv imp_rules_edges binder
 145       -- See Note [Preventing loops due to imported functions rules]
 146
 147occAnalBind _ env imp_rules_edges (Rec pairs) body_usage
 148  = foldr occAnalRec (body_usage, []) sccs
 149        -- For a recursive group, we
 150        --      * occ-analyse all the RHSs
 151        --      * compute strongly-connected components
 152        --      * feed those components to occAnalRec
 153  where
 154    bndr_set = mkVarSet (map fst pairs)
 155
 156    sccs :: [SCC (Node Details)]
 157    sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompFromEdgedVerticesR nodes
 158
 159    nodes :: [Node Details]
 160    nodes = {-# SCC "occAnalBind.assoc" #-} map (makeNode env imp_rules_edges bndr_set) pairs
 161\end{code}
 162
 163Note [Dead code]
 164~~~~~~~~~~~~~~~~
 165Dropping dead code for a cyclic Strongly Connected Component is done
 166in a very simple way:
 167
 168        the entire SCC is dropped if none of its binders are mentioned
 169        in the body; otherwise the whole thing is kept.
 170
 171The key observation is that dead code elimination happens after
 172dependency analysis: so 'occAnalBind' processes SCCs instead of the
 173original term's binding groups.
 174
 175Thus 'occAnalBind' does indeed drop 'f' in an example like
 176
 177        letrec f = ...g...
 178               g = ...(...g...)...
 179        in
 180           ...g...
 181
 182when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
 183'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
 184'AcyclicSCC f', where 'body_usage' won't contain 'f'.
 185
 186------------------------------------------------------------
 187Note [Forming Rec groups]
 188~~~~~~~~~~~~~~~~~~~~~~~~~
 189We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
 190and "g uses f", no matter how indirectly.  We do a SCC analysis
 191with an edge f -> g if "f uses g".
 192
 193More precisely, "f uses g" iff g should be in scope whereever f is.
 194That is, g is free in:
 195  a) the rhs 'ef'
 196  b) or the RHS of a rule for f (Note [Rules are extra RHSs])
 197  c) or the LHS or a rule for f (Note [Rule dependency info])
 198
 199These conditions apply regardless of the activation of the RULE (eg it might be
 200inactive in this phase but become active later).  Once a Rec is broken up
 201it can never be put back together, so we must be conservative.
 202
 203The principle is that, regardless of rule firings, every variale is
 204always in scope.
 205
 206  * Note [Rules are extra RHSs]
 207    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 208    A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
 209    keeps the specialised "children" alive.  If the parent dies
 210    (because it isn't referenced any more), then the children will die
 211    too (unless they are already referenced directly).
 212
 213    To that end, we build a Rec group for each cyclic strongly
 214    connected component,
 215        *treating f's rules as extra RHSs for 'f'*.
 216    More concretely, the SCC analysis runs on a graph with an edge
 217    from f -> g iff g is mentioned in
 218        (a) f's rhs
 219        (b) f's RULES
 220    These are rec_edges.
 221
 222    Under (b) we include variables free in *either* LHS *or* RHS of
 223    the rule.  The former might seems silly, but see Note [Rule
 224    dependency info].  So in Example [eftInt], eftInt and eftIntFB
 225    will be put in the same Rec, even though their 'main' RHSs are
 226    both non-recursive.
 227
 228  * Note [Rule dependency info]
 229    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 230    The VarSet in a SpecInfo is used for dependency analysis in the
 231    occurrence analyser.  We must track free vars in *both* lhs and rhs.
 232    Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
 233    Why both? Consider
 234        x = y
 235        RULE f x = v+4
 236    Then if we substitute y for x, we'd better do so in the
 237    rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
 238    as well as 'v'
 239
 240  * Note [Rules are visible in their own rec group]
 241    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 242    We want the rules for 'f' to be visible in f's right-hand side.
 243    And we'd like them to be visible in other functions in f's Rec
 244    group.  E.g. in Note [Specialisation rules] we want f' rule
 245    to be visible in both f's RHS, and fs's RHS.
 246
 247    This means that we must simplify the RULEs first, before looking
 248    at any of the definitions.  This is done by Simplify.simplRecBind,
 249    when it calls addLetIdInfo.
 250
 251------------------------------------------------------------
 252Note [Choosing loop breakers]
 253~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 254Loop breaking is surprisingly subtle.  First read the section 4 of
 255"Secrets of the GHC inliner".  This describes our basic plan.
 256We avoid infinite inlinings by choosing loop breakers, and
 257ensuring that a loop breaker cuts each loop.
 258
 259Fundamentally, we do SCC analysis on a graph.  For each recursive
 260group we choose a loop breaker, delete all edges to that node,
 261re-analyse the SCC, and iterate.
 262
 263But what is the graph?  NOT the same graph as was used for Note
 264[Forming Rec groups]!  In particular, a RULE is like an equation for
 265'f' that is *always* inlined if it is applicable.  We do *not* disable
 266rules for loop-breakers.  It's up to whoever makes the rules to make
 267sure that the rules themselves always terminate.  See Note [Rules for
 268recursive functions] in Simplify.lhs
 269
 270Hence, if
 271    f's RHS (or its INLINE template if it has one) mentions g, and
 272    g has a RULE that mentions h, and
 273    h has a RULE that mentions f
 274
 275then we *must* choose f to be a loop breaker.  Example: see Note
 276[Specialisation rules].
 277
 278In general, take the free variables of f's RHS, and augment it with
 279all the variables reachable by RULES from those starting points.  That
 280is the whole reason for computing rule_fv_env in occAnalBind.  (Of
 281course we only consider free vars that are also binders in this Rec
 282group.)  See also Note [Finding rule RHS free vars]
 283
 284Note that when we compute this rule_fv_env, we only consider variables
 285free in the *RHS* of the rule, in contrast to the way we build the
 286Rec group in the first place (Note [Rule dependency info])
 287
 288Note that if 'g' has RHS that mentions 'w', we should add w to
 289g's loop-breaker edges.  More concretely there is an edge from f -> g
 290iff
 291        (a) g is mentioned in f's RHS `xor` f's INLINE rhs
 292            (see Note [Inline rules])
 293        (b) or h is mentioned in f's RHS, and
 294            g appears in the RHS of an active RULE of h
 295            or a transitive sequence of active rules starting with h
 296
 297Why "active rules"?  See Note [Finding rule RHS free vars]
 298
 299Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
 300chosen as a loop breaker, because their RHSs don't mention each other.
 301And indeed both can be inlined safely.
 302
 303Note again that the edges of the graph we use for computing loop breakers
 304are not the same as the edges we use for computing the Rec blocks.
 305That's why we compute
 306
 307- rec_edges          for the Rec block analysis
 308- loop_breaker_edges for the loop breaker analysis
 309
 310  * Note [Finding rule RHS free vars]
 311    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 312    Consider this real example from Data Parallel Haskell
 313         tagZero :: Array Int -> Array Tag
 314         {-# INLINE [1] tagZeroes #-}
 315         tagZero xs = pmap (\x -> fromBool (x==0)) xs
 316
 317         {-# RULES "tagZero" [~1] forall xs n.
 318             pmap fromBool <blah blah> = tagZero xs #-}
 319    So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
 320    However, tagZero can only be inlined in phase 1 and later, while
 321    the RULE is only active *before* phase 1.  So there's no problem.
 322
 323    To make this work, we look for the RHS free vars only for
 324    *active* rules. That's the reason for the occ_rule_act field
 325    of the OccEnv.
 326
 327  * Note [Weak loop breakers]
 328    ~~~~~~~~~~~~~~~~~~~~~~~~~
 329    There is a last nasty wrinkle.  Suppose we have
 330
 331        Rec { f = f_rhs
 332              RULE f [] = g
 333
 334              h = h_rhs
 335              g = h
 336              ...more...
 337        }
 338
 339    Remember that we simplify the RULES before any RHS (see Note
 340    [Rules are visible in their own rec group] above).
 341
 342    So we must *not* postInlineUnconditionally 'g', even though
 343    its RHS turns out to be trivial.  (I'm assuming that 'g' is
 344    not choosen as a loop breaker.)  Why not?  Because then we
 345    drop the binding for 'g', which leaves it out of scope in the
 346    RULE!
 347
 348    Here's a somewhat different example of the same thing
 349        Rec { g = h
 350            ; h = ...f...
 351            ; f = f_rhs
 352              RULE f [] = g }
 353    Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
 354    g, because the RULE for f is active throughout.  So the RHS of h
 355    might rewrite to     h = ...g...
 356    So g must remain in scope in the output program!
 357
 358    We "solve" this by:
 359
 360        Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
 361        iff g is a "missing free variable" of the Rec group
 362
 363    A "missing free variable" x is one that is mentioned in an RHS or
 364    INLINE or RULE of a binding in the Rec group, but where the
 365    dependency on x may not show up in the loop_breaker_edges (see
 366    note [Choosing loop breakers} above).
 367
 368    A normal "strong" loop breaker has IAmLoopBreaker False.  So
 369
 370                                Inline  postInlineUnconditionally
 371        IAmLoopBreaker False    no      no
 372        IAmLoopBreaker True     yes     no
 373        other                   yes     yes
 374
 375    The **sole** reason for this kind of loop breaker is so that
 376    postInlineUnconditionally does not fire.  Ugh.  (Typically it'll
 377    inline via the usual callSiteInline stuff, so it'll be dead in the
 378    next pass, so the main Ugh is the tiresome complication.)
 379
 380Note [Rules for imported functions]
 381~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 382Consider this
 383   f = /\a. B.g a
 384   RULE B.g Int = 1 + f Int
 385Note that
 386  * The RULE is for an imported function.
 387  * f is non-recursive
 388Now we
 389can get
 390   f Int --> B.g Int      Inlining f
 391         --> 1 + f Int    Firing RULE
 392and so the simplifier goes into an infinite loop. This
 393would not happen if the RULE was for a local function,
 394because we keep track of dependencies through rules.  But
 395that is pretty much impossible to do for imported Ids.  Suppose
 396f's definition had been
 397   f = /\a. C.h a
 398where (by some long and devious process), C.h eventually inlines to
 399B.g.  We could only spot such loops by exhaustively following
 400unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
 401f.
 402
 403Note that RULES for imported functions are important in practice; they
 404occur a lot in the libraries.
 405
 406We regard this potential infinite loop as a *programmer* error.
 407It's up the programmer not to write silly rules like
 408     RULE f x = f x
 409and the example above is just a more complicated version.
 410
 411Note [Preventing loops due to imported functions rules]
 412~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 413Consider:
 414  import GHC.Base (foldr)
 415
 416  {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
 417  filter p xs = build (\c n -> foldr (filterFB c p) n xs)
 418  filterFB c p = ...
 419
 420  f = filter p xs
 421
 422Note that filter is not a loop-breaker, so what happens is:
 423  f =          filter p xs
 424    = {inline} build (\c n -> foldr (filterFB c p) n xs)
 425    = {inline} foldr (filterFB (:) p) [] xs
 426    = {RULE}   filter p xs
 427
 428We are in an infinite loop.
 429
 430A more elaborate example (that I actually saw in practice when I went to
 431mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
 432  {-# LANGUAGE RankNTypes #-}
 433  module GHCList where
 434
 435  import Prelude hiding (filter)
 436  import GHC.Base (build)
 437
 438  {-# INLINABLE filter #-}
 439  filter :: (a -> Bool) -> [a] -> [a]
 440  filter p [] = []
 441  filter p (x:xs) = if p x then x : filter p xs else filter p xs
 442
 443  {-# NOINLINE [0] filterFB #-}
 444  filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
 445  filterFB c p x r | p x       = x `c` r
 446                   | otherwise = r
 447
 448  {-# RULES
 449  "filter"     [~1] forall p xs.  filter p xs = build (\c n -> foldr
 450  (filterFB c p) n xs)
 451  "filterList" [1]  forall p.     foldr (filterFB (:) p) [] = filter p
 452   #-}
 453
 454Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
 455are not), the unfolding given to "filter" in the interface file will be:
 456  filter p []     = []
 457  filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
 458                           else     build (\c n -> foldr (filterFB c p) n xs
 459
 460Note that because this unfolding does not mention "filter", filter is not
 461marked as a strong loop breaker. Therefore at a use site in another module:
 462  filter p xs
 463    = {inline}
 464      case xs of []     -> []
 465                 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
 466                                  else     build (\c n -> foldr (filterFB c p) n xs)
 467
 468  build (\c n -> foldr (filterFB c p) n xs)
 469    = {inline} foldr (filterFB (:) p) [] xs
 470    = {RULE}   filter p xs
 471
 472And we are in an infinite loop again, except that this time the loop is producing an
 473infinitely large *term* (an unrolling of filter) and so the simplifier finally
 474dies with "ticks exhausted"
 475
 476Because of this problem, we make a small change in the occurrence analyser
 477designed to mark functions like "filter" as strong loop breakers on the basis that:
 478  1. The RHS of filter mentions the local function "filterFB"
 479  2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
 480
 481So for each RULE for an *imported* function we are going to add
 482dependency edges between the *local* FVS of the rule LHS and the
 483*local* FVS of the rule RHS. We don't do anything special for RULES on
 484local functions because the standard occurrence analysis stuff is
 485pretty good at getting loop-breakerness correct there.
 486
 487It is important to note that even with this extra hack we aren't always going to get
 488things right. For example, it might be that the rule LHS mentions an imported Id,
 489and another module has a RULE that can rewrite that imported Id to one of our local
 490Ids.
 491
 492Note [Specialising imported functions]
 493~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 494BUT for *automatically-generated* rules, the programmer can't be
 495responsible for the "programmer error" in Note [Rules for imported
 496functions].  In paricular, consider specialising a recursive function
 497defined in another module.  If we specialise a recursive function B.g,
 498we get
 499         g_spec = .....(B.g Int).....
 500         RULE B.g Int = g_spec
 501Here, g_spec doesn't look recursive, but when the rule fires, it
 502becomes so.  And if B.g was mutually recursive, the loop might
 503not be as obvious as it is here.
 504
 505To avoid this,
 506 * When specialising a function that is a loop breaker,
 507   give a NOINLINE pragma to the specialised function
 508
 509Note [Glomming]
 510~~~~~~~~~~~~~~~
 511RULES for imported Ids can make something at the top refer to something at the bottom:
 512        f = \x -> B.g (q x)
 513        h = \y -> 3
 514
 515        RULE:  B.g (q x) = h x
 516
 517Applying this rule makes f refer to h, although f doesn't appear to
 518depend on h.  (And, as in Note [Rules for imported functions], the
 519dependency might be more indirect. For example, f might mention C.t
 520rather than B.g, where C.t eventually inlines to B.g.)
 521
 522NOTICE that this cannot happen for rules whose head is a
 523locally-defined function, because we accurately track dependencies
 524through RULES.  It only happens for rules whose head is an imported
 525function (B.g in the example above).
 526
 527Solution:
 528  - When simplifying, bring all top level identifiers into
 529    scope at the start, ignoring the Rec/NonRec structure, so
 530    that when 'h' pops up in f's rhs, we find it in the in-scope set
 531    (as the simplifier generally expects). This happens in simplTopBinds.
 532
 533  - In the occurrence analyser, if there are any out-of-scope
 534    occurrences that pop out of the top, which will happen after
 535    firing the rule:      f = \x -> h x
 536                          h = \y -> 3
 537    then just glom all the bindings into a single Rec, so that
 538    the *next* iteration of the occurrence analyser will sort
 539    them all out.   This part happens in occurAnalysePgm.
 540
 541------------------------------------------------------------
 542Note [Inline rules]
 543~~~~~~~~~~~~~~~~~~~
 544None of the above stuff about RULES applies to Inline Rules,
 545stored in a CoreUnfolding.  The unfolding, if any, is simplified
 546at the same time as the regular RHS of the function (ie *not* like
 547Note [Rules are visible in their own rec group]), so it should be
 548treated *exactly* like an extra RHS.
 549
 550Or, rather, when computing loop-breaker edges,
 551  * If f has an INLINE pragma, and it is active, we treat the
 552    INLINE rhs as f's rhs
 553  * If it's inactive, we treat f as having no rhs
 554  * If it has no INLINE pragma, we look at f's actual rhs
 555
 556
 557There is a danger that we'll be sub-optimal if we see this
 558     f = ...f...
 559     [INLINE f = ..no f...]
 560where f is recursive, but the INLINE is not. This can just about
 561happen with a sufficiently odd set of rules; eg
 562
 563        foo :: Int -> Int
 564        {-# INLINE [1] foo #-}
 565        foo x = x+1
 566
 567        bar :: Int -> Int
 568        {-# INLINE [1] bar #-}
 569        bar x = foo x + 1
 570
 571        {-# RULES "foo" [~1] forall x. foo x = bar x #-}
 572
 573Here the RULE makes bar recursive; but it's INLINE pragma remains
 574non-recursive. It's tempting to then say that 'bar' should not be
 575a loop breaker, but an attempt to do so goes wrong in two ways:
 576   a) We may get
 577         $df = ...$cfoo...
 578         $cfoo = ...$df....
 579         [INLINE $cfoo = ...no-$df...]
 580      But we want $cfoo to depend on $df explicitly so that we
 581      put the bindings in the right order to inline $df in $cfoo
 582      and perhaps break the loop altogether.  (Maybe this
 583   b)
 584
 585
 586Example [eftInt]
 587~~~~~~~~~~~~~~~
 588Example (from GHC.Enum):
 589
 590  eftInt :: Int# -> Int# -> [Int]
 591  eftInt x y = ...(non-recursive)...
 592
 593  {-# INLINE [0] eftIntFB #-}
 594  eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
 595  eftIntFB c n x y = ...(non-recursive)...
 596
 597  {-# RULES
 598  "eftInt"  [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
 599  "eftIntList"  [1] eftIntFB  (:) [] = eftInt
 600   #-}
 601
 602Note [Specialisation rules]
 603~~~~~~~~~~~~~~~~~~~~~~~~~~~
 604Consider this group, which is typical of what SpecConstr builds:
 605
 606   fs a = ....f (C a)....
 607   f  x = ....f (C a)....
 608   {-# RULE f (C a) = fs a #-}
 609
 610So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
 611
 612But watch out!  If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
 613  - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
 614  - fs is inlined (say it's small)
 615  - now there's another opportunity to apply the RULE
 616
 617This showed up when compiling Control.Concurrent.Chan.getChanContents.
 618
 619
 620\begin{code}
 621type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
 622                                                -- which is gotten from the Id.
 623data Details
 624  = ND { nd_bndr :: Id          -- Binder
 625       , nd_rhs  :: CoreExpr    -- RHS, already occ-analysed
 626
 627       , nd_uds  :: UsageDetails  -- Usage from RHS, and RULES, and InlineRule unfolding
 628                                  -- ignoring phase (ie assuming all are active)
 629                                  -- See Note [Forming Rec groups]
 630
 631       , nd_inl  :: IdSet       -- Free variables of
 632                                --   the InlineRule (if present and active)
 633                                --   or the RHS (ir no InlineRule)
 634                                -- but excluding any RULES
 635                                -- This is the IdSet that may be used if the Id is inlined
 636
 637       , nd_weak :: IdSet       -- Binders of this Rec that are mentioned in nd_uds
 638                                -- but are *not* in nd_inl.  These are the ones whose
 639                                -- dependencies might not be respected by loop_breaker_edges
 640                                -- See Note [Weak loop breakers]
 641
 642       , nd_active_rule_fvs :: IdSet   -- Free variables of the RHS of active RULES
 643  }
 644
 645instance Outputable Details where
 646   ppr nd = ptext (sLit "ND") <> braces
 647             (sep [ ptext (sLit "bndr =") <+> ppr (nd_bndr nd)
 648                  , ptext (sLit "uds =") <+> ppr (nd_uds nd)
 649                  , ptext (sLit "inl =") <+> ppr (nd_inl nd)
 650                  , ptext (sLit "weak =") <+> ppr (nd_weak nd)
 651                  , ptext (sLit "rule =") <+> ppr (nd_active_rule_fvs nd)
 652             ])
 653
 654makeNode :: OccEnv -> IdEnv IdSet -> VarSet -> (Var, CoreExpr) -> Node Details
 655makeNode env imp_rules_edges bndr_set (bndr, rhs)
 656  = (details, varUnique bndr, keysUFM node_fvs)
 657  where
 658    details = ND { nd_bndr = bndr
 659                 , nd_rhs  = rhs'
 660                 , nd_uds  = rhs_usage3
 661                 , nd_weak = node_fvs `minusVarSet` inl_fvs
 662                 , nd_inl  = inl_fvs
 663                 , nd_active_rule_fvs = active_rule_fvs }
 664
 665    -- Constructing the edges for the main Rec computation
 666    -- See Note [Forming Rec groups]
 667    (rhs_usage1, rhs') = occAnalRecRhs env rhs
 668    rhs_usage2 = addIdOccs rhs_usage1 all_rule_fvs   -- Note [Rules are extra RHSs]
 669                                                     -- Note [Rule dependency info]
 670    rhs_usage3 = case mb_unf_fvs of
 671                   Just unf_fvs -> addIdOccs rhs_usage2 unf_fvs
 672                   Nothing      -> rhs_usage2
 673    node_fvs = udFreeVars bndr_set rhs_usage3
 674
 675    -- Finding the free variables of the rules
 676    is_active = occ_rule_act env :: Activation -> Bool
 677    rules = filterOut isBuiltinRule (idCoreRules bndr)
 678    rules_w_fvs :: [(Activation, VarSet)]    -- Find the RHS fvs
 679    rules_w_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) (lookupVarEnv imp_rules_edges bndr)
 680                   -- See Note [Preventing loops due to imported functions rules]
 681                  [ (ru_act rule, fvs)
 682                  | rule <- rules
 683                  , let fvs = exprFreeVars (ru_rhs rule)
 684                              `delVarSetList` ru_bndrs rule
 685                  , not (isEmptyVarSet fvs) ]
 686    all_rule_fvs = foldr (unionVarSet . snd) rule_lhs_fvs rules_w_fvs
 687    rule_lhs_fvs = foldr (unionVarSet . (\ru -> exprsFreeVars (ru_args ru)
 688                                                `delVarSetList` ru_bndrs ru))
 689                         emptyVarSet rules
 690    active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_fvs, is_active a]
 691
 692    -- Finding the free variables of the INLINE pragma (if any)
 693    unf        = realIdUnfolding bndr     -- Ignore any current loop-breaker flag
 694    mb_unf_fvs = stableUnfoldingVars unf
 695
 696    -- Find the "nd_inl" free vars; for the loop-breaker phase
 697    inl_fvs = case mb_unf_fvs of
 698                Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
 699                Just unf_fvs -> unf_fvs
 700                      -- We could check for an *active* INLINE (returning
 701                      -- emptyVarSet for an inactive one), but is_active
 702                      -- isn't the right thing (it tells about
 703                      -- RULE activation), so we'd need more plumbing
 704
 705-----------------------------
 706occAnalRec :: SCC (Node Details)
 707           -> (UsageDetails, [CoreBind])
 708           -> (UsageDetails, [CoreBind])
 709
 710        -- The NonRec case is just like a Let (NonRec ...) above
 711occAnalRec (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs, nd_uds = rhs_uds}, _, _))
 712           (body_uds, binds)
 713  | not (bndr `usedIn` body_uds)
 714  = (body_uds, binds)           -- See Note [Dead code]
 715
 716  | otherwise                   -- It's mentioned in the body
 717  = (body_uds' +++ rhs_uds,
 718     NonRec tagged_bndr rhs : binds)
 719  where
 720    (body_uds', tagged_bndr) = tagBinder body_uds bndr
 721
 722        -- The Rec case is the interesting one
 723        -- See Note [Loop breaking]
 724occAnalRec (CyclicSCC nodes) (body_uds, binds)
 725  | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
 726  = (body_uds, binds)                   -- See Note [Dead code]
 727
 728  | otherwise   -- At this point we always build a single Rec
 729  = -- pprTrace "occAnalRec" (vcat
 730    --   [ text "tagged nodes" <+> ppr tagged_nodes
 731    --   , text "lb edges" <+> ppr loop_breaker_edges])
 732    (final_uds, Rec pairs : binds)
 733
 734  where
 735    bndrs    = [b | (ND { nd_bndr = b }, _, _) <- nodes]
 736    bndr_set = mkVarSet bndrs
 737
 738        ----------------------------
 739        -- Tag the binders with their occurrence info
 740    tagged_nodes = map tag_node nodes
 741    total_uds = foldl add_uds body_uds nodes
 742    final_uds = total_uds `minusVarEnv` bndr_set
 743    add_uds usage_so_far (nd, _, _) = usage_so_far +++ nd_uds nd
 744
 745    tag_node :: Node Details -> Node Details
 746    tag_node (details@ND { nd_bndr = bndr }, k, ks)
 747      = (details { nd_bndr = setBinderOcc total_uds bndr }, k, ks)
 748
 749    ---------------------------
 750    -- Now reconstruct the cycle
 751    pairs :: [(Id,CoreExpr)]
 752    pairs | isEmptyVarSet weak_fvs = reOrderNodes   0 bndr_set weak_fvs tagged_nodes       []
 753          | otherwise              = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_edges []
 754          -- If weak_fvs is empty, the loop_breaker_edges will include all
 755          -- the edges in tagged_nodes, so there isn't any point in doing
 756          -- a fresh SCC computation that will yield a single CyclicSCC result.
 757
 758    weak_fvs :: VarSet
 759    weak_fvs = foldr (unionVarSet . nd_weak . fstOf3) emptyVarSet nodes
 760
 761        -- See Note [Choosing loop breakers] for loop_breaker_edges
 762    loop_breaker_edges = map mk_node tagged_nodes
 763    mk_node (details@(ND { nd_inl = inl_fvs }), k, _)
 764      = (details, k, keysUFM (extendFvs_ rule_fv_env inl_fvs))
 765
 766    ------------------------------------
 767    rule_fv_env :: IdEnv IdSet
 768        -- Maps a variable f to the variables from this group
 769        --      mentioned in RHS of active rules for f
 770        -- Domain is *subset* of bound vars (others have no rule fvs)
 771    rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
 772    init_rule_fvs   -- See Note [Finding rule RHS free vars]
 773      = [ (b, trimmed_rule_fvs)
 774        | (ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs },_,_) <- nodes
 775        , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
 776        , not (isEmptyVarSet trimmed_rule_fvs)]
 777\end{code}
 778
 779@loopBreakSCC@ is applied to the list of (binder,rhs) pairs for a cyclic
 780strongly connected component (there's guaranteed to be a cycle).  It returns the
 781same pairs, but
 782        a) in a better order,
 783        b) with some of the Ids having a IAmALoopBreaker pragma
 784
 785The "loop-breaker" Ids are sufficient to break all cycles in the SCC.  This means
 786that the simplifier can guarantee not to loop provided it never records an inlining
 787for these no-inline guys.
 788
 789Furthermore, the order of the binds is such that if we neglect dependencies
 790on the no-inline Ids then the binds are topologically sorted.  This means
 791that the simplifier will generally do a good job if it works from top bottom,
 792recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
 793
 794\begin{code}
 795type Binding = (Id,CoreExpr)
 796
 797mk_loop_breaker :: Node Details -> Binding
 798mk_loop_breaker (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
 799  = (setIdOccInfo bndr strongLoopBreaker, rhs)
 800
 801mk_non_loop_breaker :: VarSet -> Node Details -> Binding
 802-- See Note [Weak loop breakers]
 803mk_non_loop_breaker used_in_rules (ND { nd_bndr = bndr, nd_rhs = rhs}, _, _)
 804  | bndr `elemVarSet` used_in_rules = (setIdOccInfo bndr weakLoopBreaker, rhs)
 805  | otherwise                       = (bndr, rhs)
 806
 807udFreeVars :: VarSet -> UsageDetails -> VarSet
 808-- Find the subset of bndrs that are mentioned in uds
 809udFreeVars bndrs uds = intersectUFM_C (\b _ -> b) bndrs uds
 810
 811loopBreakNodes :: Int
 812               -> VarSet        -- All binders
 813               -> VarSet        -- Binders whose dependencies may be "missing"
 814                                -- See Note [Weak loop breakers]
 815               -> [Node Details]
 816               -> [Binding]             -- Append these to the end
 817               -> [Binding]
 818-- Return the bindings sorted into a plausible order, and marked with loop breakers.
 819loopBreakNodes depth bndr_set weak_fvs nodes binds
 820  = go (stronglyConnCompFromEdgedVerticesR nodes) binds
 821  where
 822    go []         binds = binds
 823    go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
 824
 825    loop_break_scc scc binds
 826      = case scc of
 827          AcyclicSCC node  -> mk_non_loop_breaker weak_fvs node : binds
 828          CyclicSCC [node] -> mk_loop_breaker node : binds
 829          CyclicSCC nodes  -> reOrderNodes depth bndr_set weak_fvs nodes binds
 830
 831reOrderNodes :: Int -> VarSet -> VarSet -> [Node Details] -> [Binding] -> [Binding]
 832    -- Choose a loop breaker, mark it no-inline,
 833    -- do SCC analysis on the rest, and recursively sort them out
 834reOrderNodes _ _ _ [] _  = panic "reOrderNodes"
 835reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
 836  = -- pprTrace "reOrderNodes" (text "unchosen" <+> ppr unchosen $$
 837    --                           text "chosen" <+> ppr chosen_nodes) $
 838    loopBreakNodes new_depth bndr_set weak_fvs unchosen $
 839    (map mk_loop_breaker chosen_nodes ++ binds)
 840  where
 841    (chosen_nodes, unchosen) = choose_loop_breaker (score node) [node] [] nodes
 842
 843    approximate_loop_breaker = depth >= 2
 844    new_depth | approximate_loop_breaker = 0
 845              | otherwise                = depth+1
 846        -- After two iterations (d=0, d=1) give up
 847        -- and approximate, returning to d=0
 848
 849    choose_loop_breaker :: Int                  -- Best score so far
 850                        -> [Node Details]       -- Nodes with this score
 851                        -> [Node Details]       -- Nodes with higher scores
 852                        -> [Node Details]       -- Unprocessed nodes
 853                        -> ([Node Details], [Node Details])
 854        -- This loop looks for the bind with the lowest score
 855        -- to pick as the loop  breaker.  The rest accumulate in
 856    choose_loop_breaker _ loop_nodes acc []
 857        = (loop_nodes, acc)        -- Done
 858
 859        -- If approximate_loop_breaker is True, we pick *all*
 860        -- nodes with lowest score, else just one
 861        -- See Note [Complexity of loop breaking]
 862    choose_loop_breaker loop_sc loop_nodes acc (node : nodes)
 863        | sc < loop_sc  -- Lower score so pick this new one
 864        = choose_loop_breaker sc [node] (loop_nodes ++ acc) nodes
 865
 866        | approximate_loop_breaker && sc == loop_sc
 867        = choose_loop_breaker loop_sc (node : loop_nodes) acc nodes
 868
 869        | otherwise     -- Higher score so don't pick it
 870        = choose_loop_breaker loop_sc loop_nodes (node : acc) nodes
 871        where
 872          sc = score node
 873
 874    score :: Node Details -> Int        -- Higher score => less likely to be picked as loop breaker
 875    score (ND { nd_bndr = bndr, nd_rhs = rhs }, _, _)
 876        | not (isId bndr) = 100     -- A type or cercion variable is never a loop breaker
 877
 878        | isDFunId bndr = 9   -- Never choose a DFun as a loop breaker
 879                              -- Note [DFuns should not be loop breakers]
 880
 881        | Just inl_source <- isStableCoreUnfolding_maybe (idUnfolding bndr)
 882        = case inl_source of
 883             InlineWrapper {} -> 10  -- Note [INLINE pragmas]
 884             _other           ->  3  -- Data structures are more important than this
 885                                     -- so that dictionary/method recursion unravels
 886                -- Note that this case hits all InlineRule things, so we
 887                -- never look at 'rhs' for InlineRule stuff. That's right, because
 888                -- 'rhs' is irrelevant for inlining things with an InlineRule
 889
 890        | is_con_app rhs = 5  -- Data types help with cases: Note [Constructor applications]
 891
 892        | exprIsTrivial rhs = 10  -- Practically certain to be inlined
 893                -- Used to have also: && not (isExportedId bndr)
 894                -- But I found this sometimes cost an extra iteration when we have
 895                --      rec { d = (a,b); a = ...df...; b = ...df...; df = d }
 896                -- where df is the exported dictionary. Then df makes a really
 897                -- bad choice for loop breaker
 898
 899
 900-- If an Id is marked "never inline" then it makes a great loop breaker
 901-- The only reason for not checking that here is that it is rare
 902-- and I've never seen a situation where it makes a difference,
 903-- so it probably isn't worth the time to test on every binder
 904--      | isNeverActive (idInlinePragma bndr) = -10
 905
 906        | isOneOcc (idOccInfo bndr) = 2  -- Likely to be inlined
 907
 908        | canUnfold (realIdUnfolding bndr) = 1
 909                -- The Id has some kind of unfolding
 910                -- Ignore loop-breaker-ness here because that is what we are setting!
 911
 912        | otherwise = 0
 913
 914        -- Checking for a constructor application
 915        -- Cheap and cheerful; the simplifer moves casts out of the way
 916        -- The lambda case is important to spot x = /\a. C (f a)
 917        -- which comes up when C is a dictionary constructor and
 918        -- f is a default method.
 919        -- Example: the instance for Show (ST s a) in GHC.ST
 920        --
 921        -- However we *also* treat (\x. C p q) as a con-app-like thing,
 922        --      Note [Closure conversion]
 923    is_con_app (Var v)    = isConLikeId v
 924    is_con_app (App f _)  = is_con_app f
 925    is_con_app (Lam _ e)  = is_con_app e
 926    is_con_app (Tick _ e) = is_con_app e
 927    is_con_app _          = False
 928\end{code}
 929
 930Note [Complexity of loop breaking]
 931~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 932The loop-breaking algorithm knocks out one binder at a time, and
 933performs a new SCC analysis on the remaining binders.  That can
 934behave very badly in tightly-coupled groups of bindings; in the
 935worst case it can be (N**2)*log N, because it does a full SCC
 936on N, then N-1, then N-2 and so on.
 937
 938To avoid this, we switch plans after 2 (or whatever) attempts:
 939  Plan A: pick one binder with the lowest score, make it
 940          a loop breaker, and try again
 941  Plan B: pick *all* binders with the lowest score, make them
 942          all loop breakers, and try again
 943Since there are only a small finite number of scores, this will
 944terminate in a constant number of iterations, rather than O(N)
 945iterations.
 946
 947You might thing that it's very unlikely, but RULES make it much
 948more likely.  Here's a real example from Trac #1969:
 949  Rec { $dm = \d.\x. op d
 950        {-# RULES forall d. $dm Int d  = $s$dm1
 951                  forall d. $dm Bool d = $s$dm2 #-}
 952
 953        dInt = MkD .... opInt ...
 954        dInt = MkD .... opBool ...
 955        opInt  = $dm dInt
 956        opBool = $dm dBool
 957
 958        $s$dm1 = \x. op dInt
 959        $s$dm2 = \x. op dBool }
 960The RULES stuff means that we can't choose $dm as a loop breaker
 961(Note [Choosing loop breakers]), so we must choose at least (say)
 962opInt *and* opBool, and so on.  The number of loop breakders is
 963linear in the number of instance declarations.
 964
 965Note [INLINE pragmas]
 966~~~~~~~~~~~~~~~~~~~~~
 967Avoid choosing a function with an INLINE pramga as the loop breaker!
 968If such a function is mutually-recursive with a non-INLINE thing,
 969then the latter should be the loop-breaker.
 970
 971Usually this is just a question of optimisation. But a particularly
 972bad case is wrappers generated by the demand analyser: if you make
 973then into a loop breaker you may get an infinite inlining loop.  For
 974example:
 975  rec {
 976        $wfoo x = ....foo x....
 977
 978        {-loop brk-} foo x = ...$wfoo x...
 979  }
 980The interface file sees the unfolding for $wfoo, and sees that foo is
 981strict (and hence it gets an auto-generated wrapper).  Result: an
 982infinite inlining in the importing scope.  So be a bit careful if you
 983change this.  A good example is Tree.repTree in
 984nofib/spectral/minimax. If the repTree wrapper is chosen as the loop
 985breaker then compiling Game.hs goes into an infinite loop.  This
 986happened when we gave is_con_app a lower score than inline candidates:
 987
 988  Tree.repTree
 989    = __inline_me (/\a. \w w1 w2 ->
 990                   case Tree.$wrepTree @ a w w1 w2 of
 991                    { (# ww1, ww2 #) -> Branch @ a ww1 ww2 })
 992  Tree.$wrepTree
 993    = /\a w w1 w2 ->
 994      (# w2_smP, map a (Tree a) (Tree.repTree a w1 w) (w w2) #)
 995
 996Here we do *not* want to choose 'repTree' as the loop breaker.
 997
 998Note [DFuns should not be loop breakers]
 999~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1000It's particularly bad to make a DFun into a loop breaker.  See
1001Note [How instance declarations are translated] in TcInstDcls
1002
1003We give DFuns a higher score than ordinary CONLIKE things because
1004if there's a choice we want the DFun to be the non-looop breker. Eg
1005
1006rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1007
1008      $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1009      {-# DFUN #-}
1010      $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1011    }
1012
1013Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1014if we can't unravel the DFun first.
1015
1016Note [Constructor applications]
1017~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1018It's really really important to inline dictionaries.  Real
1019example (the Enum Ordering instance from GHC.Base):
1020
1021     rec     f = \ x -> case d of (p,q,r) -> p x
1022             g = \ x -> case d of (p,q,r) -> q x
1023             d = (v, f, g)
1024
1025Here, f and g occur just once; but we can't inline them into d.
1026On the other hand we *could* simplify those case expressions if
1027we didn't stupidly choose d as the loop breaker.
1028But we won't because constructor args are marked "Many".
1029Inlining dictionaries is really essential to unravelling
1030the loops in static numeric dictionaries, see GHC.Float.
1031
1032Note [Closure conversion]
1033~~~~~~~~~~~~~~~~~~~~~~~~~
1034We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1035The immediate motivation came from the result of a closure-conversion transformation
1036which generated code like this:
1037
1038    data Clo a b = forall c. Clo (c -> a -> b) c
1039
1040    ($:) :: Clo a b -> a -> b
1041    Clo f env $: x = f env x
1042
1043    rec { plus = Clo plus1 ()
1044
1045        ; plus1 _ n = Clo plus2 n
1046
1047        ; plus2 Zero     n = n
1048        ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1049
1050If we inline 'plus' and 'plus1', everything unravels nicely.  But if
1051we choose 'plus1' as the loop breaker (which is entirely possible
1052otherwise), the loop does not unravel nicely.
1053
1054
1055@occAnalRhs@ deals with the question of bindings where the Id is marked
1056by an INLINE pragma.  For these we record that anything which occurs
1057in its RHS occurs many times.  This pessimistically assumes that ths
1058inlined binder also occurs many times in its scope, but if it doesn't
1059we'll catch it next time round.  At worst this costs an extra simplifier pass.
1060ToDo: try using the occurrence info for the inline'd binder.
1061
1062[March 97] We do the same for atomic RHSs.  Reason: see notes with loopBreakSCC.
1063[June 98, SLPJ]  I've undone this change; I don't understand it.  See notes with loopBreakSCC.
1064
1065
1066\begin{code}
1067occAnalRecRhs :: OccEnv -> CoreExpr    -- Rhs
1068           -> (UsageDetails, CoreExpr)
1069              -- Returned usage details covers only the RHS,
1070              -- and *not* the RULE or INLINE template for the Id
1071occAnalRecRhs env rhs = occAnal (rhsCtxt env) rhs
1072
1073occAnalNonRecRhs :: OccEnv
1074                 -> Id -> CoreExpr    -- Binder and rhs
1075                     -- Binder is already tagged with occurrence info
1076                 -> (UsageDetails, CoreExpr)
1077              -- Returned usage details covers only the RHS,
1078              -- and *not* the RULE or INLINE template for the Id
1079occAnalNonRecRhs env bndr rhs
1080  = occAnal rhs_env rhs
1081  where
1082    -- See Note [Use one-shot info]
1083    env1 = env { occ_one_shots = argOneShots dmd }
1084
1085    -- See Note [Cascading inlines]
1086    rhs_env | certainly_inline = env1
1087            | otherwise        = rhsCtxt env1
1088
1089    certainly_inline -- See Note [Cascading inlines]
1090      = case idOccInfo bndr of
1091          OneOcc in_lam one_br _ -> not in_lam && one_br && active && not_stable
1092          _                      -> False
1093
1094    dmd        = idDemandInfo bndr
1095    active     = isAlwaysActive (idInlineActivation bndr)
1096    not_stable = not (isStableUnfolding (idUnfolding bndr))
1097
1098addIdOccs :: UsageDetails -> VarSet -> UsageDetails
1099addIdOccs usage id_set = foldVarSet add usage id_set
1100  where
1101    add v u | isId v    = addOneOcc u v NoOccInfo
1102            | otherwise = u
1103        -- Give a non-committal binder info (i.e NoOccInfo) because
1104        --   a) Many copies of the specialised thing can appear
1105        --   b) We don't want to substitute a BIG expression inside a RULE
1106        --      even if that's the only occurrence of the thing
1107        --      (Same goes for INLINE.)
1108\end{code}
1109
1110Note [Cascading inlines]
1111~~~~~~~~~~~~~~~~~~~~~~~~
1112By default we use an rhsCtxt for the RHS of a binding.  This tells the
1113occ anal n that it's looking at an RHS, which has an effect in
1114occAnalApp.  In particular, for constructor applications, it makes
1115the arguments appear to have NoOccInfo, so that we don't inline into
1116them. Thus    x = f y
1117              k = Just x
1118we do not want to inline x.
1119
1120But there's a problem.  Consider
1121     x1 = a0 : []
1122     x2 = a1 : x1
1123     x3 = a2 : x2
1124     g  = f x3
1125First time round, it looks as if x1 and x2 occur as an arg of a
1126let-bound constructor ==> give them a many-occurrence.
1127But then x3 is inlined (unconditionally as it happens) and
1128next time round, x2 will be, and the next time round x1 will be
1129Result: multiple simplifier iterations.  Sigh.
1130
1131So, when analysing the RHS of x3 we notice that x3 will itself
1132definitely inline the next time round, and so we analyse x3's rhs in
1133an ordinary context, not rhsCtxt.  Hence the "certainly_inline" stuff.
1134
1135Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1136If we say "yes" when preInlineUnconditionally says "no" the simplifier iterates
1137indefinitely:
1138        x = f y
1139        k = Just x
1140inline ==>
1141        k = Just (f y)
1142float ==>
1143        x1 = f y
1144        k = Just x1
1145
1146This is worse than the slow cascade, so we only want to say "certainly_inline"
1147if it really is certain.  Look at the note with preInlineUnconditionally
1148for the various clauses.
1149
1150Expressions
1151~~~~~~~~~~~
1152\begin{code}
1153occAnal :: OccEnv
1154        -> CoreExpr
1155        -> (UsageDetails,       -- Gives info only about the "interesting" Ids
1156            CoreExpr)
1157
1158occAnal _   expr@(Type _) = (emptyDetails,         expr)
1159occAnal _   expr@(Lit _)  = (emptyDetails,         expr)
1160occAnal env expr@(Var v)  = (mkOneOcc env v False, expr)
1161    -- At one stage, I gathered the idRuleVars for v here too,
1162    -- which in a way is the right thing to do.
1163    -- But that went wrong right after specialisation, when
1164    -- the *occurrences* of the overloaded function didn't have any
1165    -- rules in them, so the *specialised* versions looked as if they
1166    -- weren't used at all.
1167
1168occAnal _ (Coercion co)
1169  = (addIdOccs emptyDetails (coVarsOfCo co), Coercion co)
1170        -- See Note [Gather occurrences of coercion veriables]
1171\end{code}
1172
1173Note [Gather occurrences of coercion veriables]
1174~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1175We need to gather info about what coercion variables appear, so that
1176we can sort them into the right place when doing dependency analysis.
1177
1178\begin{code}
1179occAnal env (Tick tickish body)
1180  | Breakpoint _ ids <- tickish
1181  = (mapVarEnv markInsideSCC usage
1182         +++ mkVarEnv (zip ids (repeat NoOccInfo)), Tick tickish body')
1183    -- never substitute for any of the Ids in a Breakpoint
1184
1185  | tickishScoped tickish
1186  = (mapVarEnv markInsideSCC usage, Tick tickish body')
1187
1188  | otherwise
1189  = (usage, Tick tickish body')
1190  where
1191    !(usage,body') = occAnal env body
1192
1193occAnal env (Cast expr co)
1194  = case occAnal env expr of { (usage, expr') ->
1195    let usage1 = markManyIf (isRhsEnv env) usage
1196        usage2 = addIdOccs usage1 (coVarsOfCo co)
1197          -- See Note [Gather occurrences of coercion veriables]
1198    in (usage2, Cast expr' co)
1199        -- If we see let x = y `cast` co
1200        -- then mark y as 'Many' so that we don't
1201        -- immediately inline y again.
1202    }
1203\end{code}
1204
1205\begin{code}
1206occAnal env app@(App _ _)
1207  = occAnalApp env (collectArgs app)
1208
1209-- Ignore type variables altogether
1210--   (a) occurrences inside type lambdas only not marked as InsideLam
1211--   (b) type variables not in environment
1212
1213occAnal env (Lam x body) | isTyVar x
1214  = case occAnal env body of { (body_usage, body') ->
1215    (body_usage, Lam x body')
1216    }
1217
1218-- For value lambdas we do a special hack.  Consider
1219--      (\x. \y. ...x...)
1220-- If we did nothing, x is used inside the \y, so would be marked
1221-- as dangerous to dup.  But in the common case where the abstraction
1222-- is applied to two arguments this is over-pessimistic.
1223-- So instead, we just mark each binder with its occurrence
1224-- info in the *body* of the multiple lambda.
1225-- Then, the…

Large files files are truncated, but you can click here to view the full file