/gcc/loop-unroll.c
C | 2338 lines | 1499 code | 335 blank | 504 comment | 331 complexity | 749acc6eafb3c7a0be8ef025b6f3f217 MD5 | raw file
Possible License(s): AGPL-1.0, GPL-3.0, LGPL-2.0, GPL-2.0, LGPL-2.1
Large files files are truncated, but you can click here to view the full file
- /* Loop unrolling and peeling.
- Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010
- Free Software Foundation, Inc.
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 3, or (at your option) any later
- version.
- GCC is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
- <http://www.gnu.org/licenses/>. */
- #include "config.h"
- #include "system.h"
- #include "coretypes.h"
- #include "tm.h"
- #include "rtl.h"
- #include "hard-reg-set.h"
- #include "obstack.h"
- #include "basic-block.h"
- #include "cfgloop.h"
- #include "cfglayout.h"
- #include "params.h"
- #include "output.h"
- #include "expr.h"
- #include "hashtab.h"
- #include "recog.h"
- /* This pass performs loop unrolling and peeling. We only perform these
- optimizations on innermost loops (with single exception) because
- the impact on performance is greatest here, and we want to avoid
- unnecessary code size growth. The gain is caused by greater sequentiality
- of code, better code to optimize for further passes and in some cases
- by fewer testings of exit conditions. The main problem is code growth,
- that impacts performance negatively due to effect of caches.
- What we do:
- -- complete peeling of once-rolling loops; this is the above mentioned
- exception, as this causes loop to be cancelled completely and
- does not cause code growth
- -- complete peeling of loops that roll (small) constant times.
- -- simple peeling of first iterations of loops that do not roll much
- (according to profile feedback)
- -- unrolling of loops that roll constant times; this is almost always
- win, as we get rid of exit condition tests.
- -- unrolling of loops that roll number of times that we can compute
- in runtime; we also get rid of exit condition tests here, but there
- is the extra expense for calculating the number of iterations
- -- simple unrolling of remaining loops; this is performed only if we
- are asked to, as the gain is questionable in this case and often
- it may even slow down the code
- For more detailed descriptions of each of those, see comments at
- appropriate function below.
- There is a lot of parameters (defined and described in params.def) that
- control how much we unroll/peel.
- ??? A great problem is that we don't have a good way how to determine
- how many times we should unroll the loop; the experiments I have made
- showed that this choice may affect performance in order of several %.
- */
- /* Information about induction variables to split. */
- struct iv_to_split
- {
- rtx insn; /* The insn in that the induction variable occurs. */
- rtx base_var; /* The variable on that the values in the further
- iterations are based. */
- rtx step; /* Step of the induction variable. */
- struct iv_to_split *next; /* Next entry in walking order. */
- unsigned n_loc;
- unsigned loc[3]; /* Location where the definition of the induction
- variable occurs in the insn. For example if
- N_LOC is 2, the expression is located at
- XEXP (XEXP (single_set, loc[0]), loc[1]). */
- };
- /* Information about accumulators to expand. */
- struct var_to_expand
- {
- rtx insn; /* The insn in that the variable expansion occurs. */
- rtx reg; /* The accumulator which is expanded. */
- VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */
- struct var_to_expand *next; /* Next entry in walking order. */
- enum rtx_code op; /* The type of the accumulation - addition, subtraction
- or multiplication. */
- int expansion_count; /* Count the number of expansions generated so far. */
- int reuse_expansion; /* The expansion we intend to reuse to expand
- the accumulator. If REUSE_EXPANSION is 0 reuse
- the original accumulator. Else use
- var_expansions[REUSE_EXPANSION - 1]. */
- unsigned accum_pos; /* The position in which the accumulator is placed in
- the insn src. For example in x = x + something
- accum_pos is 0 while in x = something + x accum_pos
- is 1. */
- };
- /* Information about optimization applied in
- the unrolled loop. */
- struct opt_info
- {
- htab_t insns_to_split; /* A hashtable of insns to split. */
- struct iv_to_split *iv_to_split_head; /* The first iv to split. */
- struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
- htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
- to expand. */
- struct var_to_expand *var_to_expand_head; /* The first var to expand. */
- struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
- unsigned first_new_block; /* The first basic block that was
- duplicated. */
- basic_block loop_exit; /* The loop exit basic block. */
- basic_block loop_preheader; /* The loop preheader basic block. */
- };
- static void decide_unrolling_and_peeling (int);
- static void peel_loops_completely (int);
- static void decide_peel_simple (struct loop *, int);
- static void decide_peel_once_rolling (struct loop *, int);
- static void decide_peel_completely (struct loop *, int);
- static void decide_unroll_stupid (struct loop *, int);
- static void decide_unroll_constant_iterations (struct loop *, int);
- static void decide_unroll_runtime_iterations (struct loop *, int);
- static void peel_loop_simple (struct loop *);
- static void peel_loop_completely (struct loop *);
- static void unroll_loop_stupid (struct loop *);
- static void unroll_loop_constant_iterations (struct loop *);
- static void unroll_loop_runtime_iterations (struct loop *);
- static struct opt_info *analyze_insns_in_loop (struct loop *);
- static void opt_info_start_duplication (struct opt_info *);
- static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
- static void free_opt_info (struct opt_info *);
- static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
- static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
- static struct iv_to_split *analyze_iv_to_split_insn (rtx);
- static void expand_var_during_unrolling (struct var_to_expand *, rtx);
- static void insert_var_expansion_initialization (struct var_to_expand *,
- basic_block);
- static void combine_var_copies_in_loop_exit (struct var_to_expand *,
- basic_block);
- static rtx get_expansion (struct var_to_expand *);
- /* Unroll and/or peel (depending on FLAGS) LOOPS. */
- void
- unroll_and_peel_loops (int flags)
- {
- struct loop *loop;
- bool check;
- loop_iterator li;
- /* First perform complete loop peeling (it is almost surely a win,
- and affects parameters for further decision a lot). */
- peel_loops_completely (flags);
- /* Now decide rest of unrolling and peeling. */
- decide_unrolling_and_peeling (flags);
- /* Scan the loops, inner ones first. */
- FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
- {
- check = true;
- /* And perform the appropriate transformations. */
- switch (loop->lpt_decision.decision)
- {
- case LPT_PEEL_COMPLETELY:
- /* Already done. */
- gcc_unreachable ();
- case LPT_PEEL_SIMPLE:
- peel_loop_simple (loop);
- break;
- case LPT_UNROLL_CONSTANT:
- unroll_loop_constant_iterations (loop);
- break;
- case LPT_UNROLL_RUNTIME:
- unroll_loop_runtime_iterations (loop);
- break;
- case LPT_UNROLL_STUPID:
- unroll_loop_stupid (loop);
- break;
- case LPT_NONE:
- check = false;
- break;
- default:
- gcc_unreachable ();
- }
- if (check)
- {
- #ifdef ENABLE_CHECKING
- verify_dominators (CDI_DOMINATORS);
- verify_loop_structure ();
- #endif
- }
- }
- iv_analysis_done ();
- }
- /* Check whether exit of the LOOP is at the end of loop body. */
- static bool
- loop_exit_at_end_p (struct loop *loop)
- {
- struct niter_desc *desc = get_simple_loop_desc (loop);
- rtx insn;
- if (desc->in_edge->dest != loop->latch)
- return false;
- /* Check that the latch is empty. */
- FOR_BB_INSNS (loop->latch, insn)
- {
- if (INSN_P (insn))
- return false;
- }
- return true;
- }
- /* Depending on FLAGS, check whether to peel loops completely and do so. */
- static void
- peel_loops_completely (int flags)
- {
- struct loop *loop;
- loop_iterator li;
- /* Scan the loops, the inner ones first. */
- FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
- {
- loop->lpt_decision.decision = LPT_NONE;
- if (dump_file)
- fprintf (dump_file,
- "\n;; *** Considering loop %d for complete peeling ***\n",
- loop->num);
- loop->ninsns = num_loop_insns (loop);
- decide_peel_once_rolling (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
- decide_peel_completely (loop, flags);
- if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
- {
- peel_loop_completely (loop);
- #ifdef ENABLE_CHECKING
- verify_dominators (CDI_DOMINATORS);
- verify_loop_structure ();
- #endif
- }
- }
- }
- /* Decide whether unroll or peel loops (depending on FLAGS) and how much. */
- static void
- decide_unrolling_and_peeling (int flags)
- {
- struct loop *loop;
- loop_iterator li;
- /* Scan the loops, inner ones first. */
- FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
- {
- loop->lpt_decision.decision = LPT_NONE;
- if (dump_file)
- fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
- /* Do not peel cold areas. */
- if (optimize_loop_for_size_p (loop))
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, cold area\n");
- continue;
- }
- /* Can the loop be manipulated? */
- if (!can_duplicate_loop_p (loop))
- {
- if (dump_file)
- fprintf (dump_file,
- ";; Not considering loop, cannot duplicate\n");
- continue;
- }
- /* Skip non-innermost loops. */
- if (loop->inner)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is not innermost\n");
- continue;
- }
- loop->ninsns = num_loop_insns (loop);
- loop->av_ninsns = average_num_loop_insns (loop);
- /* Try transformations one by one in decreasing order of
- priority. */
- decide_unroll_constant_iterations (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
- decide_unroll_runtime_iterations (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
- decide_unroll_stupid (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
- decide_peel_simple (loop, flags);
- }
- }
- /* Decide whether the LOOP is once rolling and suitable for complete
- peeling. */
- static void
- decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
- {
- struct niter_desc *desc;
- if (dump_file)
- fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
- /* Is the loop small enough? */
- if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is too big\n");
- return;
- }
- /* Check for simple loops. */
- desc = get_simple_loop_desc (loop);
- /* Check number of iterations. */
- if (!desc->simple_p
- || desc->assumptions
- || desc->infinite
- || !desc->const_iter
- || desc->niter != 0)
- {
- if (dump_file)
- fprintf (dump_file,
- ";; Unable to prove that the loop rolls exactly once\n");
- return;
- }
- /* Success. */
- if (dump_file)
- fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
- loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
- }
- /* Decide whether the LOOP is suitable for complete peeling. */
- static void
- decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
- {
- unsigned npeel;
- struct niter_desc *desc;
- if (dump_file)
- fprintf (dump_file, "\n;; Considering peeling completely\n");
- /* Skip non-innermost loops. */
- if (loop->inner)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is not innermost\n");
- return;
- }
- /* Do not peel cold areas. */
- if (optimize_loop_for_size_p (loop))
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, cold area\n");
- return;
- }
- /* Can the loop be manipulated? */
- if (!can_duplicate_loop_p (loop))
- {
- if (dump_file)
- fprintf (dump_file,
- ";; Not considering loop, cannot duplicate\n");
- return;
- }
- /* npeel = number of iterations to peel. */
- npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
- if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
- npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
- /* Is the loop small enough? */
- if (!npeel)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is too big\n");
- return;
- }
- /* Check for simple loops. */
- desc = get_simple_loop_desc (loop);
- /* Check number of iterations. */
- if (!desc->simple_p
- || desc->assumptions
- || !desc->const_iter
- || desc->infinite)
- {
- if (dump_file)
- fprintf (dump_file,
- ";; Unable to prove that the loop iterates constant times\n");
- return;
- }
- if (desc->niter > npeel - 1)
- {
- if (dump_file)
- {
- fprintf (dump_file,
- ";; Not peeling loop completely, rolls too much (");
- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
- fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
- }
- return;
- }
- /* Success. */
- if (dump_file)
- fprintf (dump_file, ";; Decided to peel loop completely\n");
- loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
- }
- /* Peel all iterations of LOOP, remove exit edges and cancel the loop
- completely. The transformation done:
- for (i = 0; i < 4; i++)
- body;
- ==>
- i = 0;
- body; i++;
- body; i++;
- body; i++;
- body; i++;
- */
- static void
- peel_loop_completely (struct loop *loop)
- {
- sbitmap wont_exit;
- unsigned HOST_WIDE_INT npeel;
- unsigned i;
- VEC (edge, heap) *remove_edges;
- edge ein;
- struct niter_desc *desc = get_simple_loop_desc (loop);
- struct opt_info *opt_info = NULL;
- npeel = desc->niter;
- if (npeel)
- {
- bool ok;
- wont_exit = sbitmap_alloc (npeel + 1);
- sbitmap_ones (wont_exit);
- RESET_BIT (wont_exit, 0);
- if (desc->noloop_assumptions)
- RESET_BIT (wont_exit, 1);
- remove_edges = NULL;
- if (flag_split_ivs_in_unroller)
- opt_info = analyze_insns_in_loop (loop);
- opt_info_start_duplication (opt_info);
- ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- npeel,
- wont_exit, desc->out_edge,
- &remove_edges,
- DLTHE_FLAG_UPDATE_FREQ
- | DLTHE_FLAG_COMPLETTE_PEEL
- | (opt_info
- ? DLTHE_RECORD_COPY_NUMBER : 0));
- gcc_assert (ok);
- free (wont_exit);
- if (opt_info)
- {
- apply_opt_in_copies (opt_info, npeel, false, true);
- free_opt_info (opt_info);
- }
- /* Remove the exit edges. */
- for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++)
- remove_path (ein);
- VEC_free (edge, heap, remove_edges);
- }
- ein = desc->in_edge;
- free_simple_loop_desc (loop);
- /* Now remove the unreachable part of the last iteration and cancel
- the loop. */
- remove_path (ein);
- if (dump_file)
- fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
- }
- /* Decide whether to unroll LOOP iterating constant number of times
- and how much. */
- static void
- decide_unroll_constant_iterations (struct loop *loop, int flags)
- {
- unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
- struct niter_desc *desc;
- if (!(flags & UAP_UNROLL))
- {
- /* We were not asked to, just return back silently. */
- return;
- }
- if (dump_file)
- fprintf (dump_file,
- "\n;; Considering unrolling loop with constant "
- "number of iterations\n");
- /* nunroll = total number of copies of the original loop body in
- unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
- nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
- nunroll_by_av
- = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
- if (nunroll > nunroll_by_av)
- nunroll = nunroll_by_av;
- if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
- nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
- /* Skip big loops. */
- if (nunroll <= 1)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is too big\n");
- return;
- }
- /* Check for simple loops. */
- desc = get_simple_loop_desc (loop);
- /* Check number of iterations. */
- if (!desc->simple_p || !desc->const_iter || desc->assumptions)
- {
- if (dump_file)
- fprintf (dump_file,
- ";; Unable to prove that the loop iterates constant times\n");
- return;
- }
- /* Check whether the loop rolls enough to consider. */
- if (desc->niter < 2 * nunroll)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
- return;
- }
- /* Success; now compute number of iterations to unroll. We alter
- nunroll so that as few as possible copies of loop body are
- necessary, while still not decreasing the number of unrollings
- too much (at most by 1). */
- best_copies = 2 * nunroll + 10;
- i = 2 * nunroll + 2;
- if (i - 1 >= desc->niter)
- i = desc->niter - 2;
- for (; i >= nunroll - 1; i--)
- {
- unsigned exit_mod = desc->niter % (i + 1);
- if (!loop_exit_at_end_p (loop))
- n_copies = exit_mod + i + 1;
- else if (exit_mod != (unsigned) i
- || desc->noloop_assumptions != NULL_RTX)
- n_copies = exit_mod + i + 2;
- else
- n_copies = i + 1;
- if (n_copies < best_copies)
- {
- best_copies = n_copies;
- best_unroll = i;
- }
- }
- if (dump_file)
- fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
- best_unroll + 1, best_copies, nunroll);
- loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
- loop->lpt_decision.times = best_unroll;
- if (dump_file)
- fprintf (dump_file,
- ";; Decided to unroll the constant times rolling loop, %d times.\n",
- loop->lpt_decision.times);
- }
- /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
- times. The transformation does this:
- for (i = 0; i < 102; i++)
- body;
- ==>
- i = 0;
- body; i++;
- body; i++;
- while (i < 102)
- {
- body; i++;
- body; i++;
- body; i++;
- body; i++;
- }
- */
- static void
- unroll_loop_constant_iterations (struct loop *loop)
- {
- unsigned HOST_WIDE_INT niter;
- unsigned exit_mod;
- sbitmap wont_exit;
- unsigned i;
- VEC (edge, heap) *remove_edges;
- edge e;
- unsigned max_unroll = loop->lpt_decision.times;
- struct niter_desc *desc = get_simple_loop_desc (loop);
- bool exit_at_end = loop_exit_at_end_p (loop);
- struct opt_info *opt_info = NULL;
- bool ok;
- niter = desc->niter;
- /* Should not get here (such loop should be peeled instead). */
- gcc_assert (niter > max_unroll + 1);
- exit_mod = niter % (max_unroll + 1);
- wont_exit = sbitmap_alloc (max_unroll + 1);
- sbitmap_ones (wont_exit);
- remove_edges = NULL;
- if (flag_split_ivs_in_unroller
- || flag_variable_expansion_in_unroller)
- opt_info = analyze_insns_in_loop (loop);
- if (!exit_at_end)
- {
- /* The exit is not at the end of the loop; leave exit test
- in the first copy, so that the loops that start with test
- of exit condition have continuous body after unrolling. */
- if (dump_file)
- fprintf (dump_file, ";; Condition on beginning of loop.\n");
- /* Peel exit_mod iterations. */
- RESET_BIT (wont_exit, 0);
- if (desc->noloop_assumptions)
- RESET_BIT (wont_exit, 1);
- if (exit_mod)
- {
- opt_info_start_duplication (opt_info);
- ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- exit_mod,
- wont_exit, desc->out_edge,
- &remove_edges,
- DLTHE_FLAG_UPDATE_FREQ
- | (opt_info && exit_mod > 1
- ? DLTHE_RECORD_COPY_NUMBER
- : 0));
- gcc_assert (ok);
- if (opt_info && exit_mod > 1)
- apply_opt_in_copies (opt_info, exit_mod, false, false);
- desc->noloop_assumptions = NULL_RTX;
- desc->niter -= exit_mod;
- desc->niter_max -= exit_mod;
- }
- SET_BIT (wont_exit, 1);
- }
- else
- {
- /* Leave exit test in last copy, for the same reason as above if
- the loop tests the condition at the end of loop body. */
- if (dump_file)
- fprintf (dump_file, ";; Condition on end of loop.\n");
- /* We know that niter >= max_unroll + 2; so we do not need to care of
- case when we would exit before reaching the loop. So just peel
- exit_mod + 1 iterations. */
- if (exit_mod != max_unroll
- || desc->noloop_assumptions)
- {
- RESET_BIT (wont_exit, 0);
- if (desc->noloop_assumptions)
- RESET_BIT (wont_exit, 1);
- opt_info_start_duplication (opt_info);
- ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- exit_mod + 1,
- wont_exit, desc->out_edge,
- &remove_edges,
- DLTHE_FLAG_UPDATE_FREQ
- | (opt_info && exit_mod > 0
- ? DLTHE_RECORD_COPY_NUMBER
- : 0));
- gcc_assert (ok);
- if (opt_info && exit_mod > 0)
- apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
- desc->niter -= exit_mod + 1;
- desc->niter_max -= exit_mod + 1;
- desc->noloop_assumptions = NULL_RTX;
- SET_BIT (wont_exit, 0);
- SET_BIT (wont_exit, 1);
- }
- RESET_BIT (wont_exit, max_unroll);
- }
- /* Now unroll the loop. */
- opt_info_start_duplication (opt_info);
- ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
- max_unroll,
- wont_exit, desc->out_edge,
- &remove_edges,
- DLTHE_FLAG_UPDATE_FREQ
- | (opt_info
- ? DLTHE_RECORD_COPY_NUMBER
- : 0));
- gcc_assert (ok);
- if (opt_info)
- {
- apply_opt_in_copies (opt_info, max_unroll, true, true);
- free_opt_info (opt_info);
- }
- free (wont_exit);
- if (exit_at_end)
- {
- basic_block exit_block = get_bb_copy (desc->in_edge->src);
- /* Find a new in and out edge; they are in the last copy we have made. */
- if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
- {
- desc->out_edge = EDGE_SUCC (exit_block, 0);
- desc->in_edge = EDGE_SUCC (exit_block, 1);
- }
- else
- {
- desc->out_edge = EDGE_SUCC (exit_block, 1);
- desc->in_edge = EDGE_SUCC (exit_block, 0);
- }
- }
- desc->niter /= max_unroll + 1;
- desc->niter_max /= max_unroll + 1;
- desc->niter_expr = GEN_INT (desc->niter);
- /* Remove the edges. */
- for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
- remove_path (e);
- VEC_free (edge, heap, remove_edges);
- if (dump_file)
- fprintf (dump_file,
- ";; Unrolled loop %d times, constant # of iterations %i insns\n",
- max_unroll, num_loop_insns (loop));
- }
- /* Decide whether to unroll LOOP iterating runtime computable number of times
- and how much. */
- static void
- decide_unroll_runtime_iterations (struct loop *loop, int flags)
- {
- unsigned nunroll, nunroll_by_av, i;
- struct niter_desc *desc;
- if (!(flags & UAP_UNROLL))
- {
- /* We were not asked to, just return back silently. */
- return;
- }
- if (dump_file)
- fprintf (dump_file,
- "\n;; Considering unrolling loop with runtime "
- "computable number of iterations\n");
- /* nunroll = total number of copies of the original loop body in
- unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
- nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
- nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
- if (nunroll > nunroll_by_av)
- nunroll = nunroll_by_av;
- if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
- nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
- /* Skip big loops. */
- if (nunroll <= 1)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is too big\n");
- return;
- }
- /* Check for simple loops. */
- desc = get_simple_loop_desc (loop);
- /* Check simpleness. */
- if (!desc->simple_p || desc->assumptions)
- {
- if (dump_file)
- fprintf (dump_file,
- ";; Unable to prove that the number of iterations "
- "can be counted in runtime\n");
- return;
- }
- if (desc->const_iter)
- {
- if (dump_file)
- fprintf (dump_file, ";; Loop iterates constant times\n");
- return;
- }
- /* If we have profile feedback, check whether the loop rolls. */
- if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
- return;
- }
- /* Success; now force nunroll to be power of 2, as we are unable to
- cope with overflows in computation of number of iterations. */
- for (i = 1; 2 * i <= nunroll; i *= 2)
- continue;
- loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
- loop->lpt_decision.times = i - 1;
- if (dump_file)
- fprintf (dump_file,
- ";; Decided to unroll the runtime computable "
- "times rolling loop, %d times.\n",
- loop->lpt_decision.times);
- }
- /* Splits edge E and inserts the sequence of instructions INSNS on it, and
- returns the newly created block. If INSNS is NULL_RTX, nothing is changed
- and NULL is returned instead. */
- basic_block
- split_edge_and_insert (edge e, rtx insns)
- {
- basic_block bb;
- if (!insns)
- return NULL;
- bb = split_edge (e);
- emit_insn_after (insns, BB_END (bb));
- /* ??? We used to assume that INSNS can contain control flow insns, and
- that we had to try to find sub basic blocks in BB to maintain a valid
- CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
- and call break_superblocks when going out of cfglayout mode. But it
- turns out that this never happens; and that if it does ever happen,
- the verify_flow_info call in loop_optimizer_finalize would fail.
- There are two reasons why we expected we could have control flow insns
- in INSNS. The first is when a comparison has to be done in parts, and
- the second is when the number of iterations is computed for loops with
- the number of iterations known at runtime. In both cases, test cases
- to get control flow in INSNS appear to be impossible to construct:
- * If do_compare_rtx_and_jump needs several branches to do comparison
- in a mode that needs comparison by parts, we cannot analyze the
- number of iterations of the loop, and we never get to unrolling it.
- * The code in expand_divmod that was suspected to cause creation of
- branching code seems to be only accessed for signed division. The
- divisions used by # of iterations analysis are always unsigned.
- Problems might arise on architectures that emits branching code
- for some operations that may appear in the unroller (especially
- for division), but we have no such architectures.
- Considering all this, it was decided that we should for now assume
- that INSNS can in theory contain control flow insns, but in practice
- it never does. So we don't handle the theoretical case, and should
- a real failure ever show up, we have a pretty good clue for how to
- fix it. */
- return bb;
- }
- /* Unroll LOOP for that we are able to count number of iterations in runtime
- LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
- extra care for case n < 0):
- for (i = 0; i < n; i++)
- body;
- ==>
- i = 0;
- mod = n % 4;
- switch (mod)
- {
- case 3:
- body; i++;
- case 2:
- body; i++;
- case 1:
- body; i++;
- case 0: ;
- }
- while (i < n)
- {
- body; i++;
- body; i++;
- body; i++;
- body; i++;
- }
- */
- static void
- unroll_loop_runtime_iterations (struct loop *loop)
- {
- rtx old_niter, niter, init_code, branch_code, tmp;
- unsigned i, j, p;
- basic_block preheader, *body, swtch, ezc_swtch;
- VEC (basic_block, heap) *dom_bbs;
- sbitmap wont_exit;
- int may_exit_copy;
- unsigned n_peel;
- VEC (edge, heap) *remove_edges;
- edge e;
- bool extra_zero_check, last_may_exit;
- unsigned max_unroll = loop->lpt_decision.times;
- struct niter_desc *desc = get_simple_loop_desc (loop);
- bool exit_at_end = loop_exit_at_end_p (loop);
- struct opt_info *opt_info = NULL;
- bool ok;
- if (flag_split_ivs_in_unroller
- || flag_variable_expansion_in_unroller)
- opt_info = analyze_insns_in_loop (loop);
- /* Remember blocks whose dominators will have to be updated. */
- dom_bbs = NULL;
- body = get_loop_body (loop);
- for (i = 0; i < loop->num_nodes; i++)
- {
- VEC (basic_block, heap) *ldom;
- basic_block bb;
- ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
- for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++)
- if (!flow_bb_inside_loop_p (loop, bb))
- VEC_safe_push (basic_block, heap, dom_bbs, bb);
- VEC_free (basic_block, heap, ldom);
- }
- free (body);
- if (!exit_at_end)
- {
- /* Leave exit in first copy (for explanation why see comment in
- unroll_loop_constant_iterations). */
- may_exit_copy = 0;
- n_peel = max_unroll - 1;
- extra_zero_check = true;
- last_may_exit = false;
- }
- else
- {
- /* Leave exit in last copy (for explanation why see comment in
- unroll_loop_constant_iterations). */
- may_exit_copy = max_unroll;
- n_peel = max_unroll;
- extra_zero_check = false;
- last_may_exit = true;
- }
- /* Get expression for number of iterations. */
- start_sequence ();
- old_niter = niter = gen_reg_rtx (desc->mode);
- tmp = force_operand (copy_rtx (desc->niter_expr), niter);
- if (tmp != niter)
- emit_move_insn (niter, tmp);
- /* Count modulo by ANDing it with max_unroll; we use the fact that
- the number of unrollings is a power of two, and thus this is correct
- even if there is overflow in the computation. */
- niter = expand_simple_binop (desc->mode, AND,
- niter,
- GEN_INT (max_unroll),
- NULL_RTX, 0, OPTAB_LIB_WIDEN);
- init_code = get_insns ();
- end_sequence ();
- unshare_all_rtl_in_chain (init_code);
- /* Precondition the loop. */
- split_edge_and_insert (loop_preheader_edge (loop), init_code);
- remove_edges = NULL;
- wont_exit = sbitmap_alloc (max_unroll + 2);
- /* Peel the first copy of loop body (almost always we must leave exit test
- here; the only exception is when we have extra zero check and the number
- of iterations is reliable. Also record the place of (possible) extra
- zero check. */
- sbitmap_zero (wont_exit);
- if (extra_zero_check
- && !desc->noloop_assumptions)
- SET_BIT (wont_exit, 1);
- ezc_swtch = loop_preheader_edge (loop)->src;
- ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- 1, wont_exit, desc->out_edge,
- &remove_edges,
- DLTHE_FLAG_UPDATE_FREQ);
- gcc_assert (ok);
- /* Record the place where switch will be built for preconditioning. */
- swtch = split_edge (loop_preheader_edge (loop));
- for (i = 0; i < n_peel; i++)
- {
- /* Peel the copy. */
- sbitmap_zero (wont_exit);
- if (i != n_peel - 1 || !last_may_exit)
- SET_BIT (wont_exit, 1);
- ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- 1, wont_exit, desc->out_edge,
- &remove_edges,
- DLTHE_FLAG_UPDATE_FREQ);
- gcc_assert (ok);
- /* Create item for switch. */
- j = n_peel - i - (extra_zero_check ? 0 : 1);
- p = REG_BR_PROB_BASE / (i + 2);
- preheader = split_edge (loop_preheader_edge (loop));
- branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
- block_label (preheader), p,
- NULL_RTX);
- /* We rely on the fact that the compare and jump cannot be optimized out,
- and hence the cfg we create is correct. */
- gcc_assert (branch_code != NULL_RTX);
- swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
- set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
- single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
- e = make_edge (swtch, preheader,
- single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
- e->probability = p;
- }
- if (extra_zero_check)
- {
- /* Add branch for zero iterations. */
- p = REG_BR_PROB_BASE / (max_unroll + 1);
- swtch = ezc_swtch;
- preheader = split_edge (loop_preheader_edge (loop));
- branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
- block_label (preheader), p,
- NULL_RTX);
- gcc_assert (branch_code != NULL_RTX);
- swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
- set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
- single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
- e = make_edge (swtch, preheader,
- single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
- e->probability = p;
- }
- /* Recount dominators for outer blocks. */
- iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
- /* And unroll loop. */
- sbitmap_ones (wont_exit);
- RESET_BIT (wont_exit, may_exit_copy);
- opt_info_start_duplication (opt_info);
- ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
- max_unroll,
- wont_exit, desc->out_edge,
- &remove_edges,
- DLTHE_FLAG_UPDATE_FREQ
- | (opt_info
- ? DLTHE_RECORD_COPY_NUMBER
- : 0));
- gcc_assert (ok);
- if (opt_info)
- {
- apply_opt_in_copies (opt_info, max_unroll, true, true);
- free_opt_info (opt_info);
- }
- free (wont_exit);
- if (exit_at_end)
- {
- basic_block exit_block = get_bb_copy (desc->in_edge->src);
- /* Find a new in and out edge; they are in the last copy we have
- made. */
- if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
- {
- desc->out_edge = EDGE_SUCC (exit_block, 0);
- desc->in_edge = EDGE_SUCC (exit_block, 1);
- }
- else
- {
- desc->out_edge = EDGE_SUCC (exit_block, 1);
- desc->in_edge = EDGE_SUCC (exit_block, 0);
- }
- }
- /* Remove the edges. */
- for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
- remove_path (e);
- VEC_free (edge, heap, remove_edges);
- /* We must be careful when updating the number of iterations due to
- preconditioning and the fact that the value must be valid at entry
- of the loop. After passing through the above code, we see that
- the correct new number of iterations is this: */
- gcc_assert (!desc->const_iter);
- desc->niter_expr =
- simplify_gen_binary (UDIV, desc->mode, old_niter,
- GEN_INT (max_unroll + 1));
- desc->niter_max /= max_unroll + 1;
- if (exit_at_end)
- {
- desc->niter_expr =
- simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
- desc->noloop_assumptions = NULL_RTX;
- desc->niter_max--;
- }
- if (dump_file)
- fprintf (dump_file,
- ";; Unrolled loop %d times, counting # of iterations "
- "in runtime, %i insns\n",
- max_unroll, num_loop_insns (loop));
- VEC_free (basic_block, heap, dom_bbs);
- }
- /* Decide whether to simply peel LOOP and how much. */
- static void
- decide_peel_simple (struct loop *loop, int flags)
- {
- unsigned npeel;
- struct niter_desc *desc;
- if (!(flags & UAP_PEEL))
- {
- /* We were not asked to, just return back silently. */
- return;
- }
- if (dump_file)
- fprintf (dump_file, "\n;; Considering simply peeling loop\n");
- /* npeel = number of iterations to peel. */
- npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
- if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
- npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
- /* Skip big loops. */
- if (!npeel)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is too big\n");
- return;
- }
- /* Check for simple loops. */
- desc = get_simple_loop_desc (loop);
- /* Check number of iterations. */
- if (desc->simple_p && !desc->assumptions && desc->const_iter)
- {
- if (dump_file)
- fprintf (dump_file, ";; Loop iterates constant times\n");
- return;
- }
- /* Do not simply peel loops with branches inside -- it increases number
- of mispredicts. */
- if (num_loop_branches (loop) > 1)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not peeling, contains branches\n");
- return;
- }
- if (loop->header->count)
- {
- unsigned niter = expected_loop_iterations (loop);
- if (niter + 1 > npeel)
- {
- if (dump_file)
- {
- fprintf (dump_file, ";; Not peeling loop, rolls too much (");
- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
- (HOST_WIDEST_INT) (niter + 1));
- fprintf (dump_file, " iterations > %d [maximum peelings])\n",
- npeel);
- }
- return;
- }
- npeel = niter + 1;
- }
- else
- {
- /* For now we have no good heuristics to decide whether loop peeling
- will be effective, so disable it. */
- if (dump_file)
- fprintf (dump_file,
- ";; Not peeling loop, no evidence it will be profitable\n");
- return;
- }
- /* Success. */
- loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
- loop->lpt_decision.times = npeel;
- if (dump_file)
- fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
- loop->lpt_decision.times);
- }
- /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
- while (cond)
- body;
- ==>
- if (!cond) goto end;
- body;
- if (!cond) goto end;
- body;
- while (cond)
- body;
- end: ;
- */
- static void
- peel_loop_simple (struct loop *loop)
- {
- sbitmap wont_exit;
- unsigned npeel = loop->lpt_decision.times;
- struct niter_desc *desc = get_simple_loop_desc (loop);
- struct opt_info *opt_info = NULL;
- bool ok;
- if (flag_split_ivs_in_unroller && npeel > 1)
- opt_info = analyze_insns_in_loop (loop);
- wont_exit = sbitmap_alloc (npeel + 1);
- sbitmap_zero (wont_exit);
- opt_info_start_duplication (opt_info);
- ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- npeel, wont_exit, NULL,
- NULL, DLTHE_FLAG_UPDATE_FREQ
- | (opt_info
- ? DLTHE_RECORD_COPY_NUMBER
- : 0));
- gcc_assert (ok);
- free (wont_exit);
- if (opt_info)
- {
- apply_opt_in_copies (opt_info, npeel, false, false);
- free_opt_info (opt_info);
- }
- if (desc->simple_p)
- {
- if (desc->const_iter)
- {
- desc->niter -= npeel;
- desc->niter_expr = GEN_INT (desc->niter);
- desc->noloop_assumptions = NULL_RTX;
- }
- else
- {
- /* We cannot just update niter_expr, as its value might be clobbered
- inside loop. We could handle this by counting the number into
- temporary just like we do in runtime unrolling, but it does not
- seem worthwhile. */
- free_simple_loop_desc (loop);
- }
- }
- if (dump_file)
- fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
- }
- /* Decide whether to unroll LOOP stupidly and how much. */
- static void
- decide_unroll_stupid (struct loop *loop, int flags)
- {
- unsigned nunroll, nunroll_by_av, i;
- struct niter_desc *desc;
- if (!(flags & UAP_UNROLL_ALL))
- {
- /* We were not asked to, just return back silently. */
- return;
- }
- if (dump_file)
- fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
- /* nunroll = total number of copies of the original loop body in
- unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
- nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
- nunroll_by_av
- = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
- if (nunroll > nunroll_by_av)
- nunroll = nunroll_by_av;
- if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
- nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
- /* Skip big loops. */
- if (nunroll <= 1)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is too big\n");
- return;
- }
- /* Check for simple loops. */
- desc = get_simple_loop_desc (loop);
- /* Check simpleness. */
- if (desc->simple_p && !desc->assumptions)
- {
- if (dump_file)
- fprintf (dump_file, ";; The loop is simple\n");
- return;
- }
- /* Do not unroll loops with branches inside -- it increases number
- of mispredicts. */
- if (num_loop_branches (loop) > 1)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not unrolling, contains branches\n");
- return;
- }
- /* If we have profile feedback, check whether the loop rolls. */
- if (loop->header->count
- && expected_loop_iterations (loop) < 2 * nunroll)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
- return;
- }
- /* Success. Now force nunroll to be power of 2, as it seems that this
- improves results (partially because of better alignments, partially
- because of some dark magic). */
- for (i = 1; 2 * i <= nunroll; i *= 2)
- continue;
- loop->lpt_decision.decision = LPT_UNROLL_STUPID;
- loop->lpt_decision.times = i - 1;
- if (dump_file)
- fprintf (dump_file,
- ";; Decided to unroll the loop stupidly, %d times.\n",
- loop->lpt_decision.times);
- }
- /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
- while (cond)
- body;
- ==>
- while (cond)
- {
- body;
- if (!cond) break;
- body;
- if (!cond) break;
- body;
- if (!cond) break;
- body;
- }
- */
- static void
- unroll_loop_stupid (struct loop *loop)
- {
- sbitmap wont_exit;
- unsigned nunroll = loop->lpt_decision.times;
- struct niter_desc *desc = get_simple_loop_desc (loop);
- struct opt_info *opt_info = NULL;
- bool ok;
- if (flag_split_ivs_in_unroller
- || flag_variable_expansion_in_unroller)
- opt_info = analyze_insns_in_loop (loop);
- wont_exit = sbitmap_alloc (nunroll + 1);
- sbitmap_zero (wont_exit);
- opt_info_start_duplication (opt_info);
- ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
- nunroll, wont_exit,
- NULL, NULL,
- DLTHE_FLAG_UPDATE_FREQ
- | (opt_info
- ? DLTHE_RECORD_COPY_NUMBER
- : 0));
- gcc_assert (ok);
- if (opt_info)
- {
- apply_opt_in_copies (opt_info, nunroll, true, true);
- free_opt_info (opt_info);
- }
- free (wont_exit);
- if (desc->simple_p)
- {
- /* We indeed may get here provided that there are nontrivial assumptions
- for a loop to be really simple. We could update the counts, but the
- problem is that we are unable to decide which exit will be taken
- (not really true in case the number of iterations is constant,
- but noone will do anything with this information, so we do not
- worry about it). */
- desc->simple_p = false;
- }
- if (dump_file)
- fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
- nunroll, num_loop_insns (loop));
- }
- /* A hash function for information about insns to split. */
- static hashval_t
- si_info_hash (const void *ivts)
- {
- return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
- }
- /* An equality functions for information about insns to split. */
- static int
- si_info_eq (const void *ivts1, const void *ivts2)
- {
- const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1;
- const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2;
- return i1->insn == i2->insn;
- }
- /* Return a hash for VES, which is really a "var_to_expand *". */
- static hashval_t
- ve_info_hash (const void *ves)
- {
- return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
- }
- /* Return true if IVTS1 and IVTS2 (which are really both of type
- "var_to_expand *") refer to the same instruction. */
- static int
- ve_info_eq (const void *ivts1, const void *ivts2)
- {
- const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
- const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
- return i1->insn == i2->insn;
- }
- /* Returns true if REG is referenced in one nondebug insn in LOOP.
- Set *DEBUG_USES to the number of debug insns that reference the
- variable. */
- bool
- referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
- int *debug_uses)
- {
- basic_block *body, bb;
- unsigned i;
- int count_ref = 0;
- rtx insn;
- body = get_loop_body (loop);
- for (i = 0; i < loop->num_nodes; i++)
- {
- bb = body[i];
- FOR_BB_INSNS (bb, insn)
- if (!rtx_referenced_p (reg, insn))
- continue;
- else if (DEBUG_INSN_P (insn))
- ++*debug_uses;
- else if (++count_ref > 1)
- break;
- }
- free (body);
- return (count_ref == 1);
- }
- /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
- static void
- reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
- {
- basic_block *body, bb;
- unsigned i;
- rtx insn;
- body = get_loop_body (loop);
- for (i = 0; debug_uses && i < loop->num_nodes; i++)
- {
- bb = body[i];
- FOR_BB_INSNS (bb, insn)
- if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
- continue;
- else
- {
- validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
- gen_rtx_UNKNOWN_VAR_LOC (), 0);
- if (!--debug_uses)
- break;
- }
- }
- free (body);
- }
- /* Determine whether INSN contains an accumulator
- which can be expanded into separate copies,
- one for each copy of the LOOP body.
- for (i = 0 ; i < n; i++)
- sum += a[i];
- ==>
- sum += a[i]
- ....
- i = i+1;
- sum1 += a[i]
- ....
- i = i+1
- sum2 += a[i];
- ....
- Return NULL if INSN contains no opportunity for expansion of accumulator.
- Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
- information and return a pointer to it.
- */
- static struct var_to_expand *
- analyze_insn_to_expand_var (struct loop *loop, rtx insn)
- {
- rtx set, dest, src, op1, op2, something;
- struct var_to_expand *ves;
- enum machine_mode mode1, mode2;
- unsigned accum_pos;
- int debug_uses = 0;
- set = single_set (insn);
- if (!set)
- return NULL;
- dest = SET_DEST (set);
- src = SET_SRC (set);
- if (GET_CODE (src) != PLUS
- && GET_CODE (src) != MINUS
- && GET_CODE (src) != MULT)
- return NULL;
- /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
- in MD. But if there is no optab to generate the insn, we can not
- perform the variable expansion. This can happen if an MD provides
- an insn but not a named pattern to generate it, for example to avoid
- producing code that needs additional mode switches like for x87/mmx.
- So we check have_insn_for which looks for an optab for the operation
- in SRC. If it doesn't exist, we can't perform the expansion even
- though INSN is valid. */
- if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
- return NULL;
- op1 = XEXP (src, 0);
- op2 = XEXP (src, 1);
- if (!REG_P (dest)
- && !(GET_CODE (dest) == SUBREG
- && REG_P (SUBREG_REG (dest))))
- return NULL;
- if (rtx_equal_p (dest, op1))
- accum_pos = 0;
- else if (rtx_equal_p (dest, op2))
- accum_pos = 1;
- else
- return NULL;
- /* The method of expansion that we are using; which includes
- the initialization of the expansions with zero and the summation of
- the expansions at the end of the computation will yield wrong results
- for (x = something - x) thus avoid using it in that case. */
- if (accum_pos == 1
- && GET_CODE (src) == MINUS)
- return NULL;
- something = (accum_pos == 0) ? op2 : op1;
- if (rtx_referenced_p (dest, something))
- return NULL;
- if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
- return NULL;
- mode1 = GET_MODE (dest);
- mode2 = GET_MODE (something);
- if ((FLOAT_MODE_P (mode1)
- || FLOAT_MODE_P (mode2))
- && !flag_associative_math)
- return NULL;
- if (dump_file)
- {
- fprintf (dump_file,
- "\n;; Expanding Accumulator ");
- print_rtl (dump_file, dest);
- fprintf (dump_file, "\n");
- }
- if (debug_uses)
- /* Instead of resetting the debug insns, we could replace each
- debug use in the loop with the sum or product of all expanded
- accummulators. Since we'll only know of all expansions at the
- end, we'd have to keep track of which vars_to_expand a debug
- insn in the loop references, take note of each copy of the
- debug insn during unrolling, and when it's all done, compute
- the sum or product of each variable and adjust the original
- debug insn and each copy thereof. What a pain! */
- reset_debug_uses_in_loop (loop, dest, debug_uses);
- /* Record the accumulator to expand. */
- ves = XNEW (struct var_to_expand);
- ves->insn = insn;
- ves->reg = copy_rtx (dest);
- ves->var_expansions = VEC_alloc (rtx, heap, 1);
- ves->next = NULL;
- ves->op = GET_CODE (src);
- ves->expansion_count = 0;
- ves->reuse_expansion = 0;
- ves->accum_pos = accum_pos;
- return ves;
- }
- /* Determine whether there is an induction variable in INSN that
- we would like to split during unrolling.
- I.e. replace
- i = i + 1;
- ...
- i = i + 1;
- ...
- i = i + 1;
- ...
- type chains by
- i0 = i + 1
- ...
- i = i0 + 1
- ...
- i = i0 + 2
- ...
- Return NULL if INSN contains no interesting IVs. Otherwise, allocate
- an IV_TO_SPLIT structure, fill it with the relevant information and return a
- pointer to it. */
- static struct iv_to_split *
- analyze_iv_to_split_insn (rtx insn)
- {
- rtx set, dest;
- struct rtx_iv iv;
- struct iv_to_split *ivts;
- bool ok;
- /* For now we just split the basic induction variables. Later this may be
- extended for example by sel…
Large files files are truncated, but you can click here to view the full file