From 2a2e6e9894f42fef9315aaad80c36843718ca0cb Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Fri, 1 Nov 2024 15:00:25 +0000 Subject: [PATCH] openmp: Add IFN_GOMP_MAX_VF Delay omp_max_vf call until after the host and device compilers have diverged so that the max_vf value can be tuned exactly right on both variants. This change means that the ompdevlow pass must be enabled for functions that use OpenMP directives with both "simd" and "schedule" enabled. gcc/ChangeLog: * internal-fn.cc (expand_GOMP_MAX_VF): New function. * internal-fn.def (GOMP_MAX_VF): New internal function. * omp-expand.cc (omp_adjust_chunk_size): Emit IFN_GOMP_MAX_VF when called in offload context, otherwise assume host context. * omp-offload.cc (execute_omp_device_lower): Expand IFN_GOMP_MAX_VF. --- gcc/internal-fn.cc | 8 ++++++++ gcc/internal-fn.def | 1 + gcc/omp-expand.cc | 30 ++++++++++++++++++++++-------- gcc/omp-offload.cc | 3 +++ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 1b3fe7be047..0ee5f5bc7c5 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -510,6 +510,14 @@ expand_GOMP_SIMT_VF (internal_fn, gcall *) /* This should get expanded in omp_device_lower pass. */ +static void +expand_GOMP_MAX_VF (internal_fn, gcall *) +{ + gcc_unreachable (); +} + +/* This should get expanded in omp_device_lower pass. */ + static void expand_GOMP_TARGET_REV (internal_fn, gcall *) { diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 2d455938271..c3d0efc0f2c 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -465,6 +465,7 @@ DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_MAX_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMT_ORDERED_PRED, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMT_VOTE_ANY, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc index b0f9d375b6c..80fb1843445 100644 --- a/gcc/omp-expand.cc +++ b/gcc/omp-expand.cc @@ -229,15 +229,29 @@ omp_adjust_chunk_size (tree chunk_size, bool simd_schedule, bool offload) if (!simd_schedule || integer_zerop (chunk_size)) return chunk_size; - poly_uint64 vf = omp_max_vf (offload); - if (known_eq (vf, 1U)) - return chunk_size; - + tree vf; tree type = TREE_TYPE (chunk_size); - chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, - build_int_cst (type, vf - 1)); - return fold_build2 (BIT_AND_EXPR, type, chunk_size, - build_int_cst (type, -vf)); + + if (offload) + { + cfun->curr_properties &= ~PROP_gimple_lomp_dev; + vf = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_MAX_VF, + unsigned_type_node, 0); + vf = fold_convert (type, vf); + } + else + { + poly_uint64 vf_num = omp_max_vf (false); + if (known_eq (vf_num, 1U)) + return chunk_size; + vf = build_int_cst (type, vf_num); + } + + tree vf_minus_one = fold_build2 (MINUS_EXPR, type, vf, + build_int_cst (type, 1)); + tree negative_vf = fold_build1 (NEGATE_EXPR, type, vf); + chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, vf_minus_one); + return fold_build2 (BIT_AND_EXPR, type, chunk_size, negative_vf); } /* Collect additional arguments needed to emit a combined diff --git a/gcc/omp-offload.cc b/gcc/omp-offload.cc index 25ce8133fe5..372b019f9d6 100644 --- a/gcc/omp-offload.cc +++ b/gcc/omp-offload.cc @@ -2754,6 +2754,9 @@ execute_omp_device_lower () case IFN_GOMP_SIMT_VF: rhs = build_int_cst (type, vf); break; + case IFN_GOMP_MAX_VF: + rhs = build_int_cst (type, omp_max_vf (false)); + break; case IFN_GOMP_SIMT_ORDERED_PRED: rhs = vf == 1 ? integer_zero_node : NULL_TREE; if (rhs || !lhs)