mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
openmp: Tune omp_max_vf for offload targets
If requested, return the vectorization factor appropriate for the offload device, if any. This change gives a significant speedup in the BabelStream "dot" benchmark on amdgcn. The omp_adjust_chunk_size usecase is set "false", for now, but I intend to change that in a follow-up patch. Note that NVPTX SIMT offload does not use this code-path. gcc/ChangeLog: * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Set omp_max_vf to offload == false. * omp-expand.cc (omp_adjust_chunk_size): Likewise. * omp-general.cc (omp_max_vf): Add "offload" parameter, and detect amdgcn offload devices. * omp-general.h (omp_max_vf): Likewise. * omp-low.cc (lower_rec_simd_input_clauses): Pass offload state to omp_max_vf.
This commit is contained in:
parent
137b26412f
commit
5c9de3df85
@ -554,7 +554,7 @@ loop_versioning::loop_versioning (function *fn)
|
||||
handled efficiently by scalar code. omp_max_vf calculates the
|
||||
maximum number of bytes in a vector, when such a value is relevant
|
||||
to loop optimization. */
|
||||
m_maximum_scale = estimated_poly_value (omp_max_vf ());
|
||||
m_maximum_scale = estimated_poly_value (omp_max_vf (false));
|
||||
m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
|
||||
}
|
||||
|
||||
|
@ -212,7 +212,7 @@ omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
|
||||
if (!simd_schedule || integer_zerop (chunk_size))
|
||||
return chunk_size;
|
||||
|
||||
poly_uint64 vf = omp_max_vf ();
|
||||
poly_uint64 vf = omp_max_vf (false);
|
||||
if (known_eq (vf, 1U))
|
||||
return chunk_size;
|
||||
|
||||
|
@ -987,10 +987,11 @@ find_combined_omp_for (tree *tp, int *walk_subtrees, void *data)
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Return maximum possible vectorization factor for the target. */
|
||||
/* Return maximum possible vectorization factor for the target, or for
|
||||
the OpenMP offload target if one exists. */
|
||||
|
||||
poly_uint64
|
||||
omp_max_vf (void)
|
||||
omp_max_vf (bool offload)
|
||||
{
|
||||
if (!optimize
|
||||
|| optimize_debug
|
||||
@ -999,6 +1000,18 @@ omp_max_vf (void)
|
||||
&& OPTION_SET_P (flag_tree_loop_vectorize)))
|
||||
return 1;
|
||||
|
||||
if (ENABLE_OFFLOADING && offload)
|
||||
{
|
||||
for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
|
||||
{
|
||||
if (startswith (c, "amdgcn"))
|
||||
return ordered_max (64, omp_max_vf (false));
|
||||
else if ((c = strchr (c, ':')))
|
||||
c++;
|
||||
}
|
||||
/* Otherwise, fall through to host VF. */
|
||||
}
|
||||
|
||||
auto_vector_modes modes;
|
||||
targetm.vectorize.autovectorize_vector_modes (&modes, true);
|
||||
if (!modes.is_empty ())
|
||||
|
@ -162,7 +162,7 @@ extern void omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
|
||||
struct omp_for_data_loop *loops);
|
||||
extern gimple *omp_build_barrier (tree lhs);
|
||||
extern tree find_combined_omp_for (tree *, int *, void *);
|
||||
extern poly_uint64 omp_max_vf (void);
|
||||
extern poly_uint64 omp_max_vf (bool);
|
||||
extern int omp_max_simt_vf (void);
|
||||
extern const char *omp_context_name_list_prop (tree);
|
||||
extern void omp_construct_traits_to_codes (tree, int, enum tree_code *);
|
||||
|
@ -4589,7 +4589,8 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
|
||||
{
|
||||
if (known_eq (sctx->max_vf, 0U))
|
||||
{
|
||||
sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf ();
|
||||
sctx->max_vf = (sctx->is_simt ? omp_max_simt_vf ()
|
||||
: omp_max_vf (omp_maybe_offloaded_ctx (ctx)));
|
||||
if (maybe_gt (sctx->max_vf, 1U))
|
||||
{
|
||||
tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
|
||||
|
Loading…
Reference in New Issue
Block a user