mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
tree-optimization/116973 - SLP permute lower heuristic and single-lane SLP
When forcing single-lane SLP to emulate non-SLP behavior we need to disable heuristics designed to optimize SLP loads and instead in all cases resort to an interleaving scheme as requested by forcefully doing single-lane SLP. This fixes the remaining fallout for --param vect-force-slp=1 on x86. PR tree-optimization/116973 * tree-vect-slp.cc (vect_lower_load_permutations): Add force_single_lane parameter. Disable heuristic that keeps some load-permutations. (vect_analyze_slp): Pass force_single_lane to vect_lower_load_permutations.
This commit is contained in:
parent
1b35b92935
commit
0d4b254b20
@ -4402,7 +4402,8 @@ vllp_cmp (const void *a_, const void *b_)
|
||||
static void
|
||||
vect_lower_load_permutations (loop_vec_info loop_vinfo,
|
||||
scalar_stmts_to_slp_tree_map_t *bst_map,
|
||||
const array_slice<slp_tree> &loads)
|
||||
const array_slice<slp_tree> &loads,
|
||||
bool force_single_lane)
|
||||
{
|
||||
/* We at this point want to lower without a fixed VF or vector
|
||||
size in mind which means we cannot actually compute whether we
|
||||
@ -4494,7 +4495,8 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
|
||||
extracting it from the larger load.
|
||||
??? Long-term some of the lowering should move to where
|
||||
the vector types involved are fixed. */
|
||||
if (ld_lanes_lanes == 0
|
||||
if (!force_single_lane
|
||||
&& ld_lanes_lanes == 0
|
||||
&& contiguous
|
||||
&& (SLP_TREE_LANES (load) > 1 || loads.size () == 1)
|
||||
&& pow2p_hwi (SLP_TREE_LANES (load))
|
||||
@ -4668,7 +4670,8 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
|
||||
|
||||
static void
|
||||
vect_lower_load_permutations (loop_vec_info loop_vinfo,
|
||||
scalar_stmts_to_slp_tree_map_t *bst_map)
|
||||
scalar_stmts_to_slp_tree_map_t *bst_map,
|
||||
bool force_single_lane)
|
||||
{
|
||||
/* Gather and sort loads across all instances. */
|
||||
hash_set<slp_tree> visited;
|
||||
@ -4696,14 +4699,16 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
|
||||
if (STMT_VINFO_GROUPED_ACCESS (a0))
|
||||
vect_lower_load_permutations (loop_vinfo, bst_map,
|
||||
make_array_slice (&loads[firsti],
|
||||
i - firsti));
|
||||
i - firsti),
|
||||
force_single_lane);
|
||||
firsti = i;
|
||||
}
|
||||
if (firsti < loads.length ()
|
||||
&& STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (loads[firsti])[0]))
|
||||
vect_lower_load_permutations (loop_vinfo, bst_map,
|
||||
make_array_slice (&loads[firsti],
|
||||
loads.length () - firsti));
|
||||
loads.length () - firsti),
|
||||
force_single_lane);
|
||||
}
|
||||
|
||||
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
|
||||
@ -5097,7 +5102,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
|
||||
like schemes. */
|
||||
if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
|
||||
{
|
||||
vect_lower_load_permutations (loop_vinfo, bst_map);
|
||||
vect_lower_load_permutations (loop_vinfo, bst_map, force_single_lane);
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
|
Loading…
Reference in New Issue
Block a user