mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
tree-optimization/116974 - Handle single-lane SLP for OMP scan store
The following massages the GIMPLE matching way of handling scan stores to work with single-lane SLP. I do not fully understand all the cases that can happen and the stmt matching at vectorizable_store time is less than ideal - but the following gets me all the testcases to pass with and without forced SLP. Long term we want to perform the matching at SLP discovery time, properly chaining the various SLP instances the current state ends up with. PR tree-optimization/116974 * tree-vect-stmts.cc (check_scan_store): Pass in the SLP node instead of just a flag. Allow single-lane scan stores. (vectorizable_store): Adjust. * tree-vect-loop.cc (vect_analyze_loop_2): Empty scan_map before re-trying.
This commit is contained in:
parent
dc90578f0b
commit
9df0772d50
@ -3369,6 +3369,8 @@ again:
|
||||
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
|
||||
= saved_can_use_partial_vectors_p;
|
||||
LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
|
||||
if (loop_vinfo->scan_map)
|
||||
loop_vinfo->scan_map->empty ();
|
||||
|
||||
goto start_over;
|
||||
}
|
||||
|
@ -7445,7 +7445,7 @@ scan_store_can_perm_p (tree vectype, tree init,
|
||||
|
||||
static bool
|
||||
check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
|
||||
enum vect_def_type rhs_dt, bool slp, tree mask,
|
||||
enum vect_def_type rhs_dt, slp_tree slp_node, tree mask,
|
||||
vect_memory_access_type memory_access_type)
|
||||
{
|
||||
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
|
||||
@ -7453,7 +7453,7 @@ check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
|
||||
tree ref_type;
|
||||
|
||||
gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
|
||||
if (slp
|
||||
if ((slp_node && SLP_TREE_LANES (slp_node) > 1)
|
||||
|| mask
|
||||
|| memory_access_type != VMAT_CONTIGUOUS
|
||||
|| TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
|
||||
@ -7848,8 +7848,8 @@ check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
|
||||
Handle only the transformation, checking is done in check_scan_store. */
|
||||
|
||||
static bool
|
||||
vectorizable_scan_store (vec_info *vinfo,
|
||||
stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
||||
vectorizable_scan_store (vec_info *vinfo, stmt_vec_info stmt_info,
|
||||
slp_tree slp_node, gimple_stmt_iterator *gsi,
|
||||
gimple **vec_stmt, int ncopies)
|
||||
{
|
||||
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
|
||||
@ -7961,16 +7961,34 @@ vectorizable_scan_store (vec_info *vinfo,
|
||||
tree orig = NULL_TREE;
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
|
||||
ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
|
||||
auto_vec<tree> vec_oprnds1;
|
||||
/* The initialization is invariant. */
|
||||
vec_oprnd1 = vect_init_vector (vinfo, stmt_info, *init, vectype, NULL);
|
||||
auto_vec<tree> vec_oprnds2;
|
||||
auto_vec<tree> vec_oprnds3;
|
||||
vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
|
||||
*init, &vec_oprnds1,
|
||||
ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
|
||||
rhs2, &vec_oprnds3);
|
||||
for (int j = 0; j < ncopies; j++)
|
||||
if (ldataref_ptr == NULL)
|
||||
{
|
||||
/* We want to lookup the vector operands of the reduction, not those
|
||||
of the store - for SLP we have to use the proper SLP node for the
|
||||
lookup, which should be the single child of the scan store. */
|
||||
vect_get_vec_defs (vinfo, stmt_info, SLP_TREE_CHILDREN (slp_node)[0],
|
||||
ncopies, rhs1, &vec_oprnds2, rhs2, &vec_oprnds3);
|
||||
/* ??? For SLP we do not key the def on 'rhs1' or 'rhs2' but get
|
||||
them in SLP child order. So we have to swap here with logic
|
||||
similar to above. */
|
||||
stmt_vec_info load
|
||||
= SLP_TREE_SCALAR_STMTS (SLP_TREE_CHILDREN
|
||||
(SLP_TREE_CHILDREN (slp_node)[0])[0])[0];
|
||||
dr_vec_info *dr_info = STMT_VINFO_DR_INFO (load);
|
||||
tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
|
||||
if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)))
|
||||
for (unsigned i = 0; i < vec_oprnds2.length (); ++i)
|
||||
std::swap (vec_oprnds2[i], vec_oprnds3[i]);;
|
||||
}
|
||||
else
|
||||
vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
|
||||
rhs2, &vec_oprnds3);
|
||||
for (unsigned j = 0; j < vec_oprnds3.length (); j++)
|
||||
{
|
||||
vec_oprnd1 = vec_oprnds1[j];
|
||||
if (ldataref_ptr == NULL)
|
||||
vec_oprnd2 = vec_oprnds2[j];
|
||||
vec_oprnd3 = vec_oprnds3[j];
|
||||
@ -7988,8 +8006,11 @@ vectorizable_scan_store (vec_info *vinfo,
|
||||
vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
|
||||
gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
|
||||
if (! slp_node)
|
||||
{
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
|
||||
}
|
||||
}
|
||||
|
||||
tree v = vec_oprnd2;
|
||||
@ -8003,8 +8024,11 @@ vectorizable_scan_store (vec_info *vinfo,
|
||||
? zero_vec : vec_oprnd1, v,
|
||||
perms[i]);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
|
||||
if (! slp_node)
|
||||
{
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
|
||||
}
|
||||
|
||||
if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
|
||||
{
|
||||
@ -8021,7 +8045,8 @@ vectorizable_scan_store (vec_info *vinfo,
|
||||
new_temp, vec_oprnd1);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info,
|
||||
g, gsi);
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
if (! slp_node)
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
new_temp = new_temp2;
|
||||
}
|
||||
|
||||
@ -8039,7 +8064,8 @@ vectorizable_scan_store (vec_info *vinfo,
|
||||
tree new_temp2 = make_ssa_name (vectype);
|
||||
g = gimple_build_assign (new_temp2, code, v, new_temp);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
if (! slp_node)
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
|
||||
v = new_temp2;
|
||||
}
|
||||
@ -8047,7 +8073,8 @@ vectorizable_scan_store (vec_info *vinfo,
|
||||
tree new_temp = make_ssa_name (vectype);
|
||||
gimple *g = gimple_build_assign (new_temp, code, orig, v);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
if (! slp_node)
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
|
||||
tree last_perm_arg = new_temp;
|
||||
/* For exclusive scan, new_temp computed above is the exclusive scan
|
||||
@ -8058,14 +8085,16 @@ vectorizable_scan_store (vec_info *vinfo,
|
||||
last_perm_arg = make_ssa_name (vectype);
|
||||
g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
if (! slp_node)
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
}
|
||||
|
||||
orig = make_ssa_name (vectype);
|
||||
g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
|
||||
last_perm_arg, perms[units_log2]);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
if (! slp_node)
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
|
||||
if (!inscan_var_store)
|
||||
{
|
||||
@ -8075,12 +8104,13 @@ vectorizable_scan_store (vec_info *vinfo,
|
||||
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
|
||||
g = gimple_build_assign (data_ref, new_temp);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
if (! slp_node)
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
}
|
||||
}
|
||||
|
||||
if (inscan_var_store)
|
||||
for (int j = 0; j < ncopies; j++)
|
||||
for (unsigned j = 0; j < vec_oprnds3.length (); j++)
|
||||
{
|
||||
if (j != 0)
|
||||
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
|
||||
@ -8091,7 +8121,8 @@ vectorizable_scan_store (vec_info *vinfo,
|
||||
vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
|
||||
gimple *g = gimple_build_assign (data_ref, orig);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
if (! slp_node)
|
||||
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -8308,7 +8339,7 @@ vectorizable_store (vec_info *vinfo,
|
||||
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
|
||||
{
|
||||
if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
|
||||
if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node, mask,
|
||||
memory_access_type))
|
||||
return false;
|
||||
}
|
||||
@ -8366,7 +8397,7 @@ vectorizable_store (vec_info *vinfo,
|
||||
if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
|
||||
{
|
||||
gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
|
||||
gcc_assert (!slp);
|
||||
gcc_assert (!slp || SLP_TREE_LANES (slp_node) == 1);
|
||||
if (costing_p)
|
||||
{
|
||||
unsigned int inside_cost = 0, prologue_cost = 0;
|
||||
@ -8385,7 +8416,8 @@ vectorizable_store (vec_info *vinfo,
|
||||
|
||||
return true;
|
||||
}
|
||||
return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
|
||||
return vectorizable_scan_store (vinfo, stmt_info, slp_node,
|
||||
gsi, vec_stmt, ncopies);
|
||||
}
|
||||
|
||||
if (grouped_store || slp)
|
||||
|
Loading…
Reference in New Issue
Block a user