tree-optimization/116973 - SLP permute lower heuristic and single-lane SLP

When forcing single-lane SLP to emulate non-SLP behavior we need to disable heuristics designed to optimize SLP loads and instead in all cases resort to an interleaving scheme as requested by forcefully doing single-lane SLP. This fixes the remaining fallout for --param vect-force-slp=1 on x86. PR tree-optimization/116973 * tree-vect-slp.cc (vect_lower_load_permutations): Add force_single_lane parameter. Disable heuristic that keeps some load-permutations. (vect_analyze_slp): Pass force_single_lane to vect_lower_load_permutations.
2024-11-21 13:40:47 +00:00 · 2024-11-12 10:31:30 +01:00 · 2024-11-12 10:31:30 +01:00 · 0d4b254b20
commit 0d4b254b20
parent 1b35b92935
1 changed files with 11 additions and 6 deletions
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@ -4402,7 +4402,8 @@ vllp_cmp (const void *a_, const void *b_)
 static void
 vect_lower_load_permutations (loop_vec_info loop_vinfo,
 			      scalar_stmts_to_slp_tree_map_t *bst_map,
-			      const array_slice<slp_tree> &loads)
+			      const array_slice<slp_tree> &loads,
 			      bool force_single_lane)
 {
  /* We at this point want to lower without a fixed VF or vector
     size in mind which means we cannot actually compute whether we
@ -4494,7 +4495,8 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
 	 extracting it from the larger load.
 	 ???  Long-term some of the lowering should move to where
 	 the vector types involved are fixed.  */
-      if (ld_lanes_lanes == 0
+      if (!force_single_lane
 	  && ld_lanes_lanes == 0
 	  && contiguous
 	  && (SLP_TREE_LANES (load) > 1 || loads.size () == 1)
 	  && pow2p_hwi (SLP_TREE_LANES (load))
@ -4668,7 +4670,8 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
 static void
 vect_lower_load_permutations (loop_vec_info loop_vinfo,
-			      scalar_stmts_to_slp_tree_map_t *bst_map)
+			      scalar_stmts_to_slp_tree_map_t *bst_map,
 			      bool force_single_lane)
 {
  /* Gather and sort loads across all instances.  */
  hash_set<slp_tree> visited;
@ -4696,14 +4699,16 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
      if (STMT_VINFO_GROUPED_ACCESS (a0))
 	vect_lower_load_permutations (loop_vinfo, bst_map,
 				      make_array_slice (&loads[firsti],
-							i - firsti));
+							i - firsti),
 				      force_single_lane);
      firsti = i;
    }
  if (firsti < loads.length ()
      && STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (loads[firsti])[0]))
    vect_lower_load_permutations (loop_vinfo, bst_map,
 				  make_array_slice (&loads[firsti],
-						    loads.length () - firsti));
+						    loads.length () - firsti),
 				  force_single_lane);
 }
 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
@ -5097,7 +5102,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
     like schemes.  */
  if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    {
-      vect_lower_load_permutations (loop_vinfo, bst_map);
+      vect_lower_load_permutations (loop_vinfo, bst_map, force_single_lane);
      if (dump_enabled_p ())
 	{
 	  dump_printf_loc (MSG_NOTE, vect_location,