openmp: Add IFN_GOMP_MAX_VF

Delay omp_max_vf call until after the host and device compilers have diverged
so that the max_vf value can be tuned exactly right on both variants.

This change means that the ompdevlow pass must be enabled for functions that
use OpenMP directives with both "simd" and "schedule" enabled.

gcc/ChangeLog:

	* internal-fn.cc (expand_GOMP_MAX_VF): New function.
	* internal-fn.def (GOMP_MAX_VF): New internal function.
	* omp-expand.cc (omp_adjust_chunk_size): Emit IFN_GOMP_MAX_VF when
	called in offload context, otherwise assume host context.
	* omp-offload.cc (execute_omp_device_lower): Expand IFN_GOMP_MAX_VF.
This commit is contained in:
Andrew Stubbs 2024-11-01 15:00:25 +00:00
parent 896c6c2893
commit 2a2e6e9894
4 changed files with 34 additions and 8 deletions

View File

@ -510,6 +510,14 @@ expand_GOMP_SIMT_VF (internal_fn, gcall *)
/* This should get expanded in omp_device_lower pass. */
static void
expand_GOMP_MAX_VF (internal_fn, gcall *)
{
gcc_unreachable ();
}
/* This should get expanded in omp_device_lower pass. */
static void
expand_GOMP_TARGET_REV (internal_fn, gcall *)
{

View File

@ -465,6 +465,7 @@ DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_MAX_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_ORDERED_PRED, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMT_VOTE_ANY, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)

View File

@ -229,15 +229,29 @@ omp_adjust_chunk_size (tree chunk_size, bool simd_schedule, bool offload)
if (!simd_schedule || integer_zerop (chunk_size))
return chunk_size;
poly_uint64 vf = omp_max_vf (offload);
if (known_eq (vf, 1U))
return chunk_size;
tree vf;
tree type = TREE_TYPE (chunk_size);
chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
build_int_cst (type, vf - 1));
return fold_build2 (BIT_AND_EXPR, type, chunk_size,
build_int_cst (type, -vf));
if (offload)
{
cfun->curr_properties &= ~PROP_gimple_lomp_dev;
vf = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_MAX_VF,
unsigned_type_node, 0);
vf = fold_convert (type, vf);
}
else
{
poly_uint64 vf_num = omp_max_vf (false);
if (known_eq (vf_num, 1U))
return chunk_size;
vf = build_int_cst (type, vf_num);
}
tree vf_minus_one = fold_build2 (MINUS_EXPR, type, vf,
build_int_cst (type, 1));
tree negative_vf = fold_build1 (NEGATE_EXPR, type, vf);
chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, vf_minus_one);
return fold_build2 (BIT_AND_EXPR, type, chunk_size, negative_vf);
}
/* Collect additional arguments needed to emit a combined

View File

@ -2754,6 +2754,9 @@ execute_omp_device_lower ()
case IFN_GOMP_SIMT_VF:
rhs = build_int_cst (type, vf);
break;
case IFN_GOMP_MAX_VF:
rhs = build_int_cst (type, omp_max_vf (false));
break;
case IFN_GOMP_SIMT_ORDERED_PRED:
rhs = vf == 1 ? integer_zero_node : NULL_TREE;
if (rhs || !lhs)