openmp: Tune omp_max_vf for offload targets

If requested, return the vectorization factor appropriate for the offload
device, if any.

This change gives a significant speedup in the BabelStream "dot" benchmark on
amdgcn.

The omp_adjust_chunk_size usecase is set "false", for now, but I intend to
change that in a follow-up patch.

Note that NVPTX SIMT offload does not use this code-path.

gcc/ChangeLog:

	* gimple-loop-versioning.cc (loop_versioning::loop_versioning): Set
	omp_max_vf to offload == false.
	* omp-expand.cc (omp_adjust_chunk_size): Likewise.
	* omp-general.cc (omp_max_vf): Add "offload" parameter, and detect
	amdgcn offload devices.
	* omp-general.h (omp_max_vf): Likewise.
	* omp-low.cc (lower_rec_simd_input_clauses): Pass offload state to
	omp_max_vf.
This commit is contained in:
Andrew Stubbs 2024-10-21 12:29:54 +00:00
parent 137b26412f
commit 5c9de3df85
5 changed files with 20 additions and 6 deletions

View File

@ -554,7 +554,7 @@ loop_versioning::loop_versioning (function *fn)
handled efficiently by scalar code. omp_max_vf calculates the handled efficiently by scalar code. omp_max_vf calculates the
maximum number of bytes in a vector, when such a value is relevant maximum number of bytes in a vector, when such a value is relevant
to loop optimization. */ to loop optimization. */
m_maximum_scale = estimated_poly_value (omp_max_vf ()); m_maximum_scale = estimated_poly_value (omp_max_vf (false));
m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE); m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
} }

View File

@ -212,7 +212,7 @@ omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
if (!simd_schedule || integer_zerop (chunk_size)) if (!simd_schedule || integer_zerop (chunk_size))
return chunk_size; return chunk_size;
poly_uint64 vf = omp_max_vf (); poly_uint64 vf = omp_max_vf (false);
if (known_eq (vf, 1U)) if (known_eq (vf, 1U))
return chunk_size; return chunk_size;

View File

@ -987,10 +987,11 @@ find_combined_omp_for (tree *tp, int *walk_subtrees, void *data)
return NULL_TREE; return NULL_TREE;
} }
/* Return maximum possible vectorization factor for the target. */ /* Return maximum possible vectorization factor for the target, or for
the OpenMP offload target if one exists. */
poly_uint64 poly_uint64
omp_max_vf (void) omp_max_vf (bool offload)
{ {
if (!optimize if (!optimize
|| optimize_debug || optimize_debug
@ -999,6 +1000,18 @@ omp_max_vf (void)
&& OPTION_SET_P (flag_tree_loop_vectorize))) && OPTION_SET_P (flag_tree_loop_vectorize)))
return 1; return 1;
if (ENABLE_OFFLOADING && offload)
{
for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
{
if (startswith (c, "amdgcn"))
return ordered_max (64, omp_max_vf (false));
else if ((c = strchr (c, ':')))
c++;
}
/* Otherwise, fall through to host VF. */
}
auto_vector_modes modes; auto_vector_modes modes;
targetm.vectorize.autovectorize_vector_modes (&modes, true); targetm.vectorize.autovectorize_vector_modes (&modes, true);
if (!modes.is_empty ()) if (!modes.is_empty ())

View File

@ -162,7 +162,7 @@ extern void omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
struct omp_for_data_loop *loops); struct omp_for_data_loop *loops);
extern gimple *omp_build_barrier (tree lhs); extern gimple *omp_build_barrier (tree lhs);
extern tree find_combined_omp_for (tree *, int *, void *); extern tree find_combined_omp_for (tree *, int *, void *);
extern poly_uint64 omp_max_vf (void); extern poly_uint64 omp_max_vf (bool);
extern int omp_max_simt_vf (void); extern int omp_max_simt_vf (void);
extern const char *omp_context_name_list_prop (tree); extern const char *omp_context_name_list_prop (tree);
extern void omp_construct_traits_to_codes (tree, int, enum tree_code *); extern void omp_construct_traits_to_codes (tree, int, enum tree_code *);

View File

@ -4589,7 +4589,8 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
{ {
if (known_eq (sctx->max_vf, 0U)) if (known_eq (sctx->max_vf, 0U))
{ {
sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf (); sctx->max_vf = (sctx->is_simt ? omp_max_simt_vf ()
: omp_max_vf (omp_maybe_offloaded_ctx (ctx)));
if (maybe_gt (sctx->max_vf, 1U)) if (maybe_gt (sctx->max_vf, 1U))
{ {
tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),