mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
Fall back to single-lane SLP before falling back to no SLP
The following changes the fallback to disable SLP when any of the discovered SLP instances failed to pass vectorization checking into a fallback that emulates what no SLP would do with SLP - force single-lane discovery for all instances. The patch does not remove the final fallback to disable SLP but it reduces the fallout from failing vectorization when any non-SLP stmt survives analysis. * tree-vectorizer.h (vect_analyze_slp): Add force_single_lane parameter. * tree-vect-slp.cc (vect_analyze_slp_instance): Remove defaulting of force_single_lane. (vect_build_slp_instance): Likewise. Pass down appropriate force_single_lane. (vect_analyze_slp): Add force_sigle_lane parameter and pass it down appropriately. (vect_slp_analyze_bb_1): Always do multi-lane SLP. * tree-vect-loop.cc (vect_analyze_loop_2): Track two SLP modes and adjust accordingly. (vect_analyze_loop_1): Save the SLP mode when unrolling. * gcc.dg/vect/vect-outer-slp-1.c: Adjust.
This commit is contained in:
parent
d3a7302ec5
commit
77bd23a3e2
@ -29,4 +29,4 @@ void foo (void)
|
||||
/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
|
||||
/* We don't yet support SLP inductions for variable length vectors. */
|
||||
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { xfail vect_variable_length } } } */
|
||||
/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-not " = VEC_PERM_EXPR" "vect" } } */
|
||||
|
@ -2718,7 +2718,7 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo)
|
||||
static opt_result
|
||||
vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
|
||||
unsigned *suggested_unroll_factor,
|
||||
bool& slp_done_for_suggested_uf)
|
||||
unsigned& slp_done_for_suggested_uf)
|
||||
{
|
||||
opt_result ok = opt_result::success ();
|
||||
int res;
|
||||
@ -2787,11 +2787,11 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
|
||||
/* If the slp decision is false when suggested unroll factor is worked
|
||||
out, and we are applying suggested unroll factor, we can simply skip
|
||||
all slp related analyses this time. */
|
||||
bool slp = !applying_suggested_uf || slp_done_for_suggested_uf;
|
||||
unsigned slp = !applying_suggested_uf ? 2 : slp_done_for_suggested_uf;
|
||||
|
||||
/* Classify all cross-iteration scalar data-flow cycles.
|
||||
Cross-iteration cycles caused by virtual phis are analyzed separately. */
|
||||
vect_analyze_scalar_cycles (loop_vinfo, slp);
|
||||
vect_analyze_scalar_cycles (loop_vinfo, slp == 2);
|
||||
|
||||
vect_pattern_recog (loop_vinfo);
|
||||
|
||||
@ -2854,18 +2854,23 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
|
||||
vect_compute_single_scalar_iteration_cost (loop_vinfo);
|
||||
|
||||
poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
bool saved_can_use_partial_vectors_p
|
||||
= LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
|
||||
|
||||
/* This is the point where we can re-start analysis with SLP forced off. */
|
||||
start_over:
|
||||
|
||||
if (slp)
|
||||
{
|
||||
/* Check the SLP opportunities in the loop, analyze and build
|
||||
SLP trees. */
|
||||
ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
|
||||
ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo),
|
||||
slp == 1);
|
||||
if (!ok)
|
||||
return ok;
|
||||
|
||||
/* If there are any SLP instances mark them as pure_slp. */
|
||||
slp = vect_make_slp_decision (loop_vinfo);
|
||||
if (slp)
|
||||
if (vect_make_slp_decision (loop_vinfo))
|
||||
{
|
||||
/* Find stmts that need to be both vectorized and SLPed. */
|
||||
vect_detect_hybrid_slp (loop_vinfo);
|
||||
@ -2881,16 +2886,10 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
|
||||
}
|
||||
}
|
||||
|
||||
bool saved_can_use_partial_vectors_p
|
||||
= LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
|
||||
|
||||
/* We don't expect to have to roll back to anything other than an empty
|
||||
set of rgroups. */
|
||||
gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
|
||||
|
||||
/* This is the point where we can re-start analysis with SLP forced off. */
|
||||
start_over:
|
||||
|
||||
/* When we arrive here with SLP disabled and we are supposed
|
||||
to use SLP for everything fail vectorization. */
|
||||
if (!slp && param_vect_force_slp)
|
||||
@ -3218,15 +3217,14 @@ again:
|
||||
/* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */
|
||||
gcc_assert (!ok);
|
||||
|
||||
/* Try again with SLP forced off but if we didn't do any SLP there is
|
||||
/* Try again with SLP degraded but if we didn't do any SLP there is
|
||||
no point in re-trying. */
|
||||
if (!slp)
|
||||
return ok;
|
||||
|
||||
/* If the slp decision is true when suggested unroll factor is worked
|
||||
out, and we are applying suggested unroll factor, we don't need to
|
||||
re-try any more. */
|
||||
if (applying_suggested_uf && slp_done_for_suggested_uf)
|
||||
/* If we are applying suggested unroll factor, we don't need to
|
||||
re-try any more as we want to keep the SLP mode fixed. */
|
||||
if (applying_suggested_uf)
|
||||
return ok;
|
||||
|
||||
/* If there are reduction chains re-trying will fail anyway. */
|
||||
@ -3271,11 +3269,18 @@ again:
|
||||
}
|
||||
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
if (slp)
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"re-trying with single-lane SLP\n");
|
||||
else
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"re-trying with SLP disabled\n");
|
||||
}
|
||||
|
||||
/* Roll back state appropriately. No SLP this time. */
|
||||
slp = false;
|
||||
/* Roll back state appropriately. Degrade SLP this time. From multi-
|
||||
to single-lane to disabled. */
|
||||
--slp;
|
||||
/* Restore vectorization factor as it were without SLP. */
|
||||
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
|
||||
/* Free the SLP instances. */
|
||||
@ -3420,7 +3425,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
|
||||
machine_mode vector_mode = vector_modes[mode_i];
|
||||
loop_vinfo->vector_mode = vector_mode;
|
||||
unsigned int suggested_unroll_factor = 1;
|
||||
bool slp_done_for_suggested_uf = false;
|
||||
unsigned slp_done_for_suggested_uf = 0;
|
||||
|
||||
/* Run the main analysis. */
|
||||
opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
|
||||
|
@ -3488,7 +3488,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
||||
scalar_stmts_to_slp_tree_map_t *bst_map,
|
||||
stmt_vec_info stmt_info, slp_instance_kind kind,
|
||||
unsigned max_tree_size, unsigned *limit,
|
||||
bool force_single_lane = false);
|
||||
bool force_single_lane);
|
||||
|
||||
/* Build an interleaving scheme for the store sources RHS_NODES from
|
||||
SCALAR_STMTS. */
|
||||
@ -3684,7 +3684,7 @@ vect_build_slp_instance (vec_info *vinfo,
|
||||
scalar_stmts_to_slp_tree_map_t *bst_map,
|
||||
/* ??? We need stmt_info for group splitting. */
|
||||
stmt_vec_info stmt_info_,
|
||||
bool force_single_lane = false)
|
||||
bool force_single_lane)
|
||||
{
|
||||
/* If there's no budget left bail out early. */
|
||||
if (*limit == 0)
|
||||
@ -3891,7 +3891,7 @@ vect_build_slp_instance (vec_info *vinfo,
|
||||
group1_size);
|
||||
bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
|
||||
kind, max_tree_size,
|
||||
limit);
|
||||
limit, false);
|
||||
/* Split the rest at the failure point and possibly
|
||||
re-analyze the remaining matching part if it has
|
||||
at least two lanes. */
|
||||
@ -3904,14 +3904,14 @@ vect_build_slp_instance (vec_info *vinfo,
|
||||
if (i - group1_size > 1)
|
||||
res |= vect_analyze_slp_instance (vinfo, bst_map, rest2,
|
||||
kind, max_tree_size,
|
||||
limit);
|
||||
limit, false);
|
||||
}
|
||||
/* Re-analyze the non-matching tail if it has at least
|
||||
two lanes. */
|
||||
if (i + 1 < group_size)
|
||||
res |= vect_analyze_slp_instance (vinfo, bst_map,
|
||||
rest, kind, max_tree_size,
|
||||
limit);
|
||||
limit, false);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
@ -4544,7 +4544,8 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
|
||||
trees of packed scalar stmts if SLP is possible. */
|
||||
|
||||
opt_result
|
||||
vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
|
||||
bool force_single_lane)
|
||||
{
|
||||
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
|
||||
unsigned int i;
|
||||
@ -4561,7 +4562,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
/* Find SLP sequences starting from groups of grouped stores. */
|
||||
FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
|
||||
vect_analyze_slp_instance (vinfo, bst_map, first_element,
|
||||
slp_inst_kind_store, max_tree_size, &limit);
|
||||
slp_inst_kind_store, max_tree_size, &limit,
|
||||
force_single_lane);
|
||||
|
||||
/* For loops also start SLP discovery from non-grouped stores. */
|
||||
if (loop_vinfo)
|
||||
@ -4581,7 +4583,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
stmts.quick_push (stmt_info);
|
||||
vect_build_slp_instance (vinfo, slp_inst_kind_store,
|
||||
stmts, roots, remain, max_tree_size,
|
||||
&limit, bst_map, NULL);
|
||||
&limit, bst_map, NULL, force_single_lane);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4598,7 +4600,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
bb_vinfo->roots[i].stmts,
|
||||
bb_vinfo->roots[i].roots,
|
||||
bb_vinfo->roots[i].remain,
|
||||
max_tree_size, &limit, bst_map, NULL))
|
||||
max_tree_size, &limit, bst_map, NULL,
|
||||
false))
|
||||
{
|
||||
bb_vinfo->roots[i].stmts = vNULL;
|
||||
bb_vinfo->roots[i].roots = vNULL;
|
||||
@ -4614,9 +4617,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
if (! STMT_VINFO_RELEVANT_P (first_element)
|
||||
&& ! STMT_VINFO_LIVE_P (first_element))
|
||||
;
|
||||
else if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
|
||||
else if (force_single_lane
|
||||
|| ! vect_analyze_slp_instance (vinfo, bst_map, first_element,
|
||||
slp_inst_kind_reduc_chain,
|
||||
max_tree_size, &limit))
|
||||
max_tree_size, &limit,
|
||||
force_single_lane))
|
||||
{
|
||||
/* Dissolve reduction chain group. */
|
||||
stmt_vec_info vinfo = first_element;
|
||||
@ -4656,7 +4661,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
{
|
||||
/* Do not discover SLP reductions combining lane-reducing
|
||||
ops, that will fail later. */
|
||||
if (!lane_reducing_stmt_p (STMT_VINFO_STMT (next_info)))
|
||||
if (!force_single_lane
|
||||
&& !lane_reducing_stmt_p (STMT_VINFO_STMT (next_info)))
|
||||
scalar_stmts.quick_push (next_info);
|
||||
else
|
||||
{
|
||||
@ -4670,7 +4676,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
slp_inst_kind_reduc_group,
|
||||
stmts, roots, remain,
|
||||
max_tree_size, &limit,
|
||||
bst_map, NULL);
|
||||
bst_map, NULL,
|
||||
force_single_lane);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4683,7 +4690,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
slp_inst_kind_reduc_group,
|
||||
scalar_stmts, roots, remain,
|
||||
max_tree_size, &limit, bst_map,
|
||||
NULL))
|
||||
NULL, force_single_lane))
|
||||
{
|
||||
if (scalar_stmts.length () <= 1)
|
||||
scalar_stmts.release ();
|
||||
@ -4699,7 +4706,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
slp_inst_kind_reduc_group,
|
||||
stmts, roots, remain,
|
||||
max_tree_size, &limit,
|
||||
bst_map, NULL);
|
||||
bst_map, NULL, force_single_lane);
|
||||
}
|
||||
saved_stmts.release ();
|
||||
}
|
||||
@ -4731,7 +4738,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
slp_inst_kind_reduc_group,
|
||||
stmts, roots, remain,
|
||||
max_tree_size, &limit,
|
||||
bst_map, NULL);
|
||||
bst_map, NULL, force_single_lane);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -8934,7 +8941,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
|
||||
|
||||
/* Check the SLP opportunities in the basic block, analyze and build SLP
|
||||
trees. */
|
||||
if (!vect_analyze_slp (bb_vinfo, n_stmts))
|
||||
if (!vect_analyze_slp (bb_vinfo, n_stmts, false))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
|
@ -2538,7 +2538,7 @@ extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree>
|
||||
unsigned * = nullptr, bool = false);
|
||||
extern bool vect_slp_analyze_operations (vec_info *);
|
||||
extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &);
|
||||
extern opt_result vect_analyze_slp (vec_info *, unsigned);
|
||||
extern opt_result vect_analyze_slp (vec_info *, unsigned, bool);
|
||||
extern bool vect_make_slp_decision (loop_vec_info);
|
||||
extern void vect_detect_hybrid_slp (loop_vec_info);
|
||||
extern void vect_optimize_slp (vec_info *);
|
||||
|
Loading…
Reference in New Issue
Block a user