mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
openmp: Add IFN_GOMP_MAX_VF
Delay omp_max_vf call until after the host and device compilers have diverged so that the max_vf value can be tuned exactly right on both variants. This change means that the ompdevlow pass must be enabled for functions that use OpenMP directives with both "simd" and "schedule" enabled. gcc/ChangeLog: * internal-fn.cc (expand_GOMP_MAX_VF): New function. * internal-fn.def (GOMP_MAX_VF): New internal function. * omp-expand.cc (omp_adjust_chunk_size): Emit IFN_GOMP_MAX_VF when called in offload context, otherwise assume host context. * omp-offload.cc (execute_omp_device_lower): Expand IFN_GOMP_MAX_VF.
This commit is contained in:
parent
896c6c2893
commit
2a2e6e9894
@ -510,6 +510,14 @@ expand_GOMP_SIMT_VF (internal_fn, gcall *)
|
|||||||
|
|
||||||
/* This should get expanded in omp_device_lower pass. */
|
/* This should get expanded in omp_device_lower pass. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
expand_GOMP_MAX_VF (internal_fn, gcall *)
|
||||||
|
{
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This should get expanded in omp_device_lower pass. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
expand_GOMP_TARGET_REV (internal_fn, gcall *)
|
expand_GOMP_TARGET_REV (internal_fn, gcall *)
|
||||||
{
|
{
|
||||||
|
@ -465,6 +465,7 @@ DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
|
|||||||
DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
|
DEF_INTERNAL_FN (GOMP_MAX_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
DEF_INTERNAL_FN (GOMP_SIMT_ORDERED_PRED, ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_SIMT_ORDERED_PRED, ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
DEF_INTERNAL_FN (GOMP_SIMT_VOTE_ANY, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
DEF_INTERNAL_FN (GOMP_SIMT_VOTE_ANY, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
|
||||||
|
@ -229,15 +229,29 @@ omp_adjust_chunk_size (tree chunk_size, bool simd_schedule, bool offload)
|
|||||||
if (!simd_schedule || integer_zerop (chunk_size))
|
if (!simd_schedule || integer_zerop (chunk_size))
|
||||||
return chunk_size;
|
return chunk_size;
|
||||||
|
|
||||||
poly_uint64 vf = omp_max_vf (offload);
|
tree vf;
|
||||||
if (known_eq (vf, 1U))
|
|
||||||
return chunk_size;
|
|
||||||
|
|
||||||
tree type = TREE_TYPE (chunk_size);
|
tree type = TREE_TYPE (chunk_size);
|
||||||
chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
|
|
||||||
build_int_cst (type, vf - 1));
|
if (offload)
|
||||||
return fold_build2 (BIT_AND_EXPR, type, chunk_size,
|
{
|
||||||
build_int_cst (type, -vf));
|
cfun->curr_properties &= ~PROP_gimple_lomp_dev;
|
||||||
|
vf = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_MAX_VF,
|
||||||
|
unsigned_type_node, 0);
|
||||||
|
vf = fold_convert (type, vf);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
poly_uint64 vf_num = omp_max_vf (false);
|
||||||
|
if (known_eq (vf_num, 1U))
|
||||||
|
return chunk_size;
|
||||||
|
vf = build_int_cst (type, vf_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
tree vf_minus_one = fold_build2 (MINUS_EXPR, type, vf,
|
||||||
|
build_int_cst (type, 1));
|
||||||
|
tree negative_vf = fold_build1 (NEGATE_EXPR, type, vf);
|
||||||
|
chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, vf_minus_one);
|
||||||
|
return fold_build2 (BIT_AND_EXPR, type, chunk_size, negative_vf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Collect additional arguments needed to emit a combined
|
/* Collect additional arguments needed to emit a combined
|
||||||
|
@ -2754,6 +2754,9 @@ execute_omp_device_lower ()
|
|||||||
case IFN_GOMP_SIMT_VF:
|
case IFN_GOMP_SIMT_VF:
|
||||||
rhs = build_int_cst (type, vf);
|
rhs = build_int_cst (type, vf);
|
||||||
break;
|
break;
|
||||||
|
case IFN_GOMP_MAX_VF:
|
||||||
|
rhs = build_int_cst (type, omp_max_vf (false));
|
||||||
|
break;
|
||||||
case IFN_GOMP_SIMT_ORDERED_PRED:
|
case IFN_GOMP_SIMT_ORDERED_PRED:
|
||||||
rhs = vf == 1 ? integer_zero_node : NULL_TREE;
|
rhs = vf == 1 ? integer_zero_node : NULL_TREE;
|
||||||
if (rhs || !lhs)
|
if (rhs || !lhs)
|
||||||
|
Loading…
Reference in New Issue
Block a user