libgomp, openmp: Add ompx_gnu_pinned_mem_alloc

This creates a new predefined allocator as a shortcut for using pinned
memory with OpenMP.  This is not in the OpenMP standard so it uses the "ompx"
namespace and an independent enum baseline of 200 (selected to not clash with
other known implementations).

The allocator is equivalent to using a custom allocator with the pinned
trait and the null fallback trait.  One motivation for having this feature is
for use by the (planned) -foffload-memory=pinned feature.

gcc/fortran/ChangeLog:

	* openmp.cc (is_predefined_allocator): Update valid ranges to
	incorporate ompx_gnu_pinned_mem_alloc.

libgomp/ChangeLog:

	* allocator.c (ompx_gnu_min_predefined_alloc): New.
	(ompx_gnu_max_predefined_alloc): New.
	(predefined_alloc_mapping): Rename to ...
	(predefined_omp_alloc_mapping): ... this.
	(predefined_ompx_gnu_alloc_mapping): New.
	(_Static_assert): Adjust for the new name, and add a new assert for the
	new table.
	(predefined_allocator_p): New.
	(predefined_alloc_mapping): New.
	(omp_aligned_alloc): Support ompx_gnu_pinned_mem_alloc.
	Use predefined_allocator_p and predefined_alloc_mapping.
	(omp_free): Likewise.
	(omp_alligned_calloc): Likewise.
	(omp_realloc): Likewise.
	* env.c (parse_allocator): Add ompx_gnu_pinned_mem_alloc.
	* libgomp.texi: Document ompx_gnu_pinned_mem_alloc.
	* omp.h.in (omp_allocator_handle_t): Add ompx_gnu_pinned_mem_alloc.
	* omp_lib.f90.in: Add ompx_gnu_pinned_mem_alloc.
	* omp_lib.h.in: Add ompx_gnu_pinned_mem_alloc.
	* testsuite/libgomp.c/alloc-pinned-5.c: New test.
	* testsuite/libgomp.c/alloc-pinned-6.c: New test.
	* testsuite/libgomp.fortran/alloc-pinned-1.f90: New test.

gcc/testsuite/ChangeLog:

	* gfortran.dg/gomp/allocate-pinned-1.f90: New test.

Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
This commit is contained in:
Andrew Stubbs 2024-06-12 11:09:33 +00:00
parent 90efaebf95
commit 64001441ec
11 changed files with 336 additions and 37 deletions

View File

@ -7423,8 +7423,9 @@ resolve_omp_udr_clause (gfc_omp_namelist *n, gfc_namespace *ns,
}
/* Assume that a constant expression in the range 1 (omp_default_mem_alloc)
to 8 (omp_thread_mem_alloc) range is fine. The original symbol name is
already lost during matching via gfc_match_expr. */
to 8 (omp_thread_mem_alloc) range, or 200 (ompx_gnu_pinned_mem_alloc) is
fine. The original symbol name is already lost during matching via
gfc_match_expr. */
static bool
is_predefined_allocator (gfc_expr *expr)
{
@ -7433,8 +7434,10 @@ is_predefined_allocator (gfc_expr *expr)
&& expr->ts.type == BT_INTEGER
&& expr->ts.kind == gfc_c_intptr_kind
&& expr->expr_type == EXPR_CONSTANT
&& mpz_sgn (expr->value.integer) > 0
&& mpz_cmp_si (expr->value.integer, 8) <= 0);
&& ((mpz_sgn (expr->value.integer) > 0
&& mpz_cmp_si (expr->value.integer, 8) <= 0)
|| (mpz_cmp_si (expr->value.integer, 200) >= 0
&& mpz_cmp_si (expr->value.integer, 200) <= 0)));
}
/* Resolve declarative ALLOCATE statement. Note: Common block vars only appear

View File

@ -0,0 +1,16 @@
! Test that the ompx_gnu_pinned_mem_alloc is accepted by the parser
module m
use iso_c_binding
integer, parameter :: omp_allocator_handle_kind = c_intptr_t
integer (kind=omp_allocator_handle_kind), &
parameter :: ompx_gnu_pinned_mem_alloc = 200
end
subroutine f ()
use m
implicit none
! The "Sorry" is here temporarily only to avoid excess error failures.
integer, save :: i ! { dg-error "Sorry, !.OMP allocate for variable 'i' at .1. with SAVE attribute not yet implemented" }
!$omp allocate(i) allocator(ompx_gnu_pinned_mem_alloc)
end

View File

@ -99,6 +99,8 @@ GOMP_is_alloc (void *ptr)
#define omp_max_predefined_alloc omp_thread_mem_alloc
#define ompx_gnu_min_predefined_alloc ompx_gnu_pinned_mem_alloc
#define ompx_gnu_max_predefined_alloc ompx_gnu_pinned_mem_alloc
/* These macros may be overridden in config/<target>/allocator.c.
The defaults (no override) are to return NULL for pinned memory requests
@ -131,7 +133,7 @@ GOMP_is_alloc (void *ptr)
The index to this table is the omp_allocator_handle_t enum value.
When the user calls omp_alloc with a predefined allocator this
table determines what memory they get. */
static const omp_memspace_handle_t predefined_alloc_mapping[] = {
static const omp_memspace_handle_t predefined_omp_alloc_mapping[] = {
omp_default_mem_space, /* omp_null_allocator doesn't actually use this. */
omp_default_mem_space, /* omp_default_mem_alloc. */
omp_large_cap_mem_space, /* omp_large_cap_mem_alloc. */
@ -142,11 +144,41 @@ static const omp_memspace_handle_t predefined_alloc_mapping[] = {
omp_low_lat_mem_space, /* omp_pteam_mem_alloc (implementation defined). */
omp_low_lat_mem_space, /* omp_thread_mem_alloc (implementation defined). */
};
static const omp_memspace_handle_t predefined_ompx_gnu_alloc_mapping[] = {
omp_default_mem_space, /* ompx_gnu_pinned_mem_alloc. */
};
#define ARRAY_SIZE(A) (sizeof (A) / sizeof ((A)[0]))
_Static_assert (ARRAY_SIZE (predefined_alloc_mapping)
_Static_assert (ARRAY_SIZE (predefined_omp_alloc_mapping)
== omp_max_predefined_alloc + 1,
"predefined_alloc_mapping must match omp_memspace_handle_t");
"predefined_omp_alloc_mapping must match omp_memspace_handle_t");
_Static_assert (ARRAY_SIZE (predefined_ompx_gnu_alloc_mapping)
== (ompx_gnu_max_predefined_alloc
- ompx_gnu_min_predefined_alloc) + 1,
"predefined_ompx_gnu_alloc_mapping must match"
" omp_memspace_handle_t");
static inline bool
predefined_allocator_p (omp_allocator_handle_t allocator)
{
return allocator <= ompx_gnu_max_predefined_alloc;
}
static inline omp_memspace_handle_t
predefined_alloc_mapping (omp_allocator_handle_t allocator)
{
if (allocator <= omp_max_predefined_alloc)
return predefined_omp_alloc_mapping[allocator];
else if (allocator >= ompx_gnu_min_predefined_alloc
&& allocator <= ompx_gnu_max_predefined_alloc)
{
int index = allocator - ompx_gnu_min_predefined_alloc;
return predefined_ompx_gnu_alloc_mapping[index];
}
else
/* This should never happen. */
return omp_default_mem_space;
}
enum gomp_numa_memkind_kind
{
@ -556,7 +588,7 @@ retry:
allocator = (omp_allocator_handle_t) thr->ts.def_allocator;
}
if (allocator > omp_max_predefined_alloc)
if (!predefined_allocator_p (allocator))
{
allocator_data = (struct omp_allocator_data *) allocator;
if (new_alignment < allocator_data->alignment)
@ -685,9 +717,11 @@ retry:
omp_memspace_handle_t memspace;
memspace = (allocator_data
? allocator_data->memspace
: predefined_alloc_mapping[allocator]);
ptr = MEMSPACE_ALLOC (memspace, new_size,
allocator_data && allocator_data->pinned);
: predefined_alloc_mapping (allocator));
int pinned = (allocator_data
? allocator_data->pinned
: allocator == ompx_gnu_pinned_mem_alloc);
ptr = MEMSPACE_ALLOC (memspace, new_size, pinned);
}
if (ptr == NULL)
goto fail;
@ -708,7 +742,8 @@ retry:
fail:;
int fallback = (allocator_data
? allocator_data->fallback
: allocator == omp_default_mem_alloc
: (allocator == omp_default_mem_alloc
|| allocator == ompx_gnu_pinned_mem_alloc)
? omp_atv_null_fb
: omp_atv_default_mem_fb);
switch (fallback)
@ -764,7 +799,7 @@ omp_free (void *ptr, omp_allocator_handle_t allocator)
return;
(void) allocator;
data = &((struct omp_mem_header *) ptr)[-1];
if (data->allocator > omp_max_predefined_alloc)
if (!predefined_allocator_p (data->allocator))
{
struct omp_allocator_data *allocator_data
= (struct omp_allocator_data *) (data->allocator);
@ -822,7 +857,8 @@ omp_free (void *ptr, omp_allocator_handle_t allocator)
}
#endif
memspace = predefined_alloc_mapping[data->allocator];
memspace = predefined_alloc_mapping (data->allocator);
pinned = (data->allocator == ompx_gnu_pinned_mem_alloc);
}
MEMSPACE_FREE (memspace, data->ptr, data->size, pinned);
@ -860,7 +896,7 @@ retry:
allocator = (omp_allocator_handle_t) thr->ts.def_allocator;
}
if (allocator > omp_max_predefined_alloc)
if (!predefined_allocator_p (allocator))
{
allocator_data = (struct omp_allocator_data *) allocator;
if (new_alignment < allocator_data->alignment)
@ -995,9 +1031,11 @@ retry:
omp_memspace_handle_t memspace;
memspace = (allocator_data
? allocator_data->memspace
: predefined_alloc_mapping[allocator]);
ptr = MEMSPACE_CALLOC (memspace, new_size,
allocator_data && allocator_data->pinned);
: predefined_alloc_mapping (allocator));
int pinned = (allocator_data
? allocator_data->pinned
: allocator == ompx_gnu_pinned_mem_alloc);
ptr = MEMSPACE_CALLOC (memspace, new_size, pinned);
}
if (ptr == NULL)
goto fail;
@ -1018,7 +1056,8 @@ retry:
fail:;
int fallback = (allocator_data
? allocator_data->fallback
: allocator == omp_default_mem_alloc
: (allocator == omp_default_mem_alloc
|| allocator == ompx_gnu_pinned_mem_alloc)
? omp_atv_null_fb
: omp_atv_default_mem_fb);
switch (fallback)
@ -1076,7 +1115,7 @@ retry:
if (allocator == omp_null_allocator)
allocator = free_allocator;
if (allocator > omp_max_predefined_alloc)
if (!predefined_allocator_p (allocator))
{
allocator_data = (struct omp_allocator_data *) allocator;
if (new_alignment < allocator_data->alignment)
@ -1104,7 +1143,7 @@ retry:
}
#endif
}
if (free_allocator > omp_max_predefined_alloc)
if (!predefined_allocator_p (free_allocator))
{
free_allocator_data = (struct omp_allocator_data *) free_allocator;
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
@ -1228,11 +1267,14 @@ retry:
else
#endif
if (prev_size)
{
int was_pinned = (free_allocator_data
? free_allocator_data->pinned
: free_allocator == ompx_gnu_pinned_mem_alloc);
new_ptr = MEMSPACE_REALLOC (allocator_data->memspace, data->ptr,
data->size, new_size,
(free_allocator_data
&& free_allocator_data->pinned),
data->size, new_size, was_pinned,
allocator_data->pinned);
}
else
new_ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size,
allocator_data->pinned);
@ -1287,11 +1329,15 @@ retry:
omp_memspace_handle_t memspace;
memspace = (allocator_data
? allocator_data->memspace
: predefined_alloc_mapping[allocator]);
: predefined_alloc_mapping (allocator));
int was_pinned = (free_allocator_data
? free_allocator_data->pinned
: free_allocator == ompx_gnu_pinned_mem_alloc);
int pinned = (allocator_data
? allocator_data->pinned
: allocator == ompx_gnu_pinned_mem_alloc);
new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, new_size,
(free_allocator_data
&& free_allocator_data->pinned),
allocator_data && allocator_data->pinned);
was_pinned, pinned);
}
if (new_ptr == NULL)
goto fail;
@ -1324,9 +1370,11 @@ retry:
omp_memspace_handle_t memspace;
memspace = (allocator_data
? allocator_data->memspace
: predefined_alloc_mapping[allocator]);
new_ptr = MEMSPACE_ALLOC (memspace, new_size,
allocator_data && allocator_data->pinned);
: predefined_alloc_mapping (allocator));
int pinned = (allocator_data
? allocator_data->pinned
: allocator == ompx_gnu_pinned_mem_alloc);
new_ptr = MEMSPACE_ALLOC (memspace, new_size, pinned);
}
if (new_ptr == NULL)
goto fail;
@ -1380,8 +1428,10 @@ retry:
omp_memspace_handle_t was_memspace;
was_memspace = (free_allocator_data
? free_allocator_data->memspace
: predefined_alloc_mapping[free_allocator]);
int was_pinned = (free_allocator_data && free_allocator_data->pinned);
: predefined_alloc_mapping (free_allocator));
int was_pinned = (free_allocator_data
? free_allocator_data->pinned
: free_allocator == ompx_gnu_pinned_mem_alloc);
MEMSPACE_FREE (was_memspace, data->ptr, data->size, was_pinned);
}
return ret;
@ -1389,7 +1439,8 @@ retry:
fail:;
int fallback = (allocator_data
? allocator_data->fallback
: allocator == omp_default_mem_alloc
: (allocator == omp_default_mem_alloc
|| allocator == ompx_gnu_pinned_mem_alloc)
? omp_atv_null_fb
: omp_atv_default_mem_fb);
switch (fallback)

View File

@ -1264,6 +1264,7 @@ parse_allocator (const char *env, const char *val, void *const params[])
C (omp_cgroup_mem_alloc, false)
C (omp_pteam_mem_alloc, false)
C (omp_thread_mem_alloc, false)
C (ompx_gnu_pinned_mem_alloc, false)
C (omp_default_mem_space, true)
C (omp_large_cap_mem_space, true)
C (omp_const_mem_space, true)

View File

@ -3440,6 +3440,7 @@ value.
@item omp_cgroup_mem_alloc @tab omp_low_lat_mem_space (implementation defined)
@item omp_pteam_mem_alloc @tab omp_low_lat_mem_space (implementation defined)
@item omp_thread_mem_alloc @tab omp_low_lat_mem_space (implementation defined)
@item ompx_gnu_pinned_mem_alloc @tab omp_default_mem_space (GNU extension)
@end multitable
The predefined allocators use the default values for the traits,
@ -3465,7 +3466,7 @@ as listed below. Except that the last three allocators have the
@item @code{fb_data} @tab @emph{unsupported as it needs an allocator handle}
@tab (none)
@item @code{pinned} @tab @code{true}, @code{false}
@tab @code{false}
@tab See below
@item @code{partition} @tab @code{environment}, @code{nearest},
@code{blocked}, @code{interleaved}
@tab @code{environment}
@ -3476,6 +3477,10 @@ For the @code{fallback} trait, the default value is @code{null_fb} for the
with device memory; for all other allocators, it is @code{default_mem_fb}
by default.
For the @code{pinned} trait, the default value is @code{true} for
predefined allocator @code{ompx_gnu_pinned_mem_alloc} (a GNU extension), and
@code{false} for all others.
Examples:
@smallexample
OMP_ALLOCATOR=omp_high_bw_mem_alloc

View File

@ -134,6 +134,7 @@ typedef enum omp_allocator_handle_t __GOMP_UINTPTR_T_ENUM
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
ompx_gnu_pinned_mem_alloc = 200,
__omp_allocator_handle_t_max__ = __UINTPTR_MAX__
} omp_allocator_handle_t;

View File

@ -158,6 +158,8 @@
parameter :: omp_pteam_mem_alloc = 7
integer (kind=omp_allocator_handle_kind), &
parameter :: omp_thread_mem_alloc = 8
integer (kind=omp_allocator_handle_kind), &
parameter :: ompx_gnu_pinned_mem_alloc = 200
integer (omp_memspace_handle_kind), &
parameter :: omp_default_mem_space = 0
integer (omp_memspace_handle_kind), &

View File

@ -155,6 +155,7 @@
integer (omp_allocator_handle_kind) omp_cgroup_mem_alloc
integer (omp_allocator_handle_kind) omp_pteam_mem_alloc
integer (omp_allocator_handle_kind) omp_thread_mem_alloc
integer (omp_allocator_handle_kind) ompx_gnu_pinned_mem_alloc
parameter (omp_null_allocator = 0)
parameter (omp_default_mem_alloc = 1)
parameter (omp_large_cap_mem_alloc = 2)
@ -164,6 +165,7 @@
parameter (omp_cgroup_mem_alloc = 6)
parameter (omp_pteam_mem_alloc = 7)
parameter (omp_thread_mem_alloc = 8)
parameter (ompx_gnu_pinned_mem_alloc = 200)
integer (omp_memspace_handle_kind) omp_default_mem_space
integer (omp_memspace_handle_kind) omp_large_cap_mem_space
integer (omp_memspace_handle_kind) omp_const_mem_space

View File

@ -0,0 +1,100 @@
/* { dg-do run } */
/* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
/* Test that ompx_gnu_pinned_mem_alloc works. */
#include <stdio.h>
#include <stdlib.h>
#ifdef __linux__
#include <sys/types.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/resource.h>
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define CHECK_SIZE(SIZE) { \
struct rlimit limit; \
if (getrlimit (RLIMIT_MEMLOCK, &limit) \
|| limit.rlim_cur <= SIZE) \
{ \
fprintf (stderr, "insufficient lockable memory; please increase ulimit\n"); \
abort (); \
} \
}
int
get_pinned_mem ()
{
int pid = getpid ();
char buf[100];
sprintf (buf, "/proc/%d/status", pid);
FILE *proc = fopen (buf, "r");
if (!proc)
abort ();
while (fgets (buf, 100, proc))
{
int val;
if (sscanf (buf, "VmLck: %d", &val))
{
fclose (proc);
return val;
}
}
abort ();
}
#else
#error "OS unsupported"
#endif
static void
verify0 (char *p, size_t s)
{
for (size_t i = 0; i < s; ++i)
if (p[i] != 0)
abort ();
}
#include <omp.h>
int
main ()
{
/* Allocate at least a page each time, allowing space for overhead,
but stay within the ulimit. */
const int SIZE = PAGE_SIZE - 128;
CHECK_SIZE (SIZE * 5);
// Sanity check
if (get_pinned_mem () != 0)
abort ();
void *p = omp_alloc (SIZE, ompx_gnu_pinned_mem_alloc);
if (!p)
abort ();
int amount = get_pinned_mem ();
if (amount == 0)
abort ();
p = omp_realloc (p, SIZE * 2, ompx_gnu_pinned_mem_alloc,
ompx_gnu_pinned_mem_alloc);
int amount2 = get_pinned_mem ();
if (amount2 <= amount)
abort ();
/* SIZE*2 ensures that it doesn't slot into the space possibly
vacated by realloc. */
p = omp_calloc (1, SIZE * 2, ompx_gnu_pinned_mem_alloc);
if (get_pinned_mem () <= amount2)
abort ();
verify0 (p, SIZE * 2);
return 0;
}

View File

@ -0,0 +1,102 @@
/* { dg-do run } */
/* Test that ompx_gnu_pinned_mem_alloc fails correctly. */
#include <stdio.h>
#include <stdlib.h>
#ifdef __linux__
#include <sys/types.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/resource.h>
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
int
get_pinned_mem ()
{
int pid = getpid ();
char buf[100];
sprintf (buf, "/proc/%d/status", pid);
FILE *proc = fopen (buf, "r");
if (!proc)
abort ();
while (fgets (buf, 100, proc))
{
int val;
if (sscanf (buf, "VmLck: %d", &val))
{
fclose (proc);
return val;
}
}
abort ();
}
void
set_pin_limit (int size)
{
struct rlimit limit;
if (getrlimit (RLIMIT_MEMLOCK, &limit))
abort ();
limit.rlim_cur = (limit.rlim_max < size ? limit.rlim_max : size);
if (setrlimit (RLIMIT_MEMLOCK, &limit))
abort ();
}
#else
#define PAGE_SIZE 10000 * 1024 /* unknown */
int
get_pinned_mem ()
{
return 0;
}
void
set_pin_limit ()
{
}
#endif
#include <omp.h>
int
main ()
{
/* Allocate at least a page each time, but stay within the ulimit. */
const int SIZE = PAGE_SIZE * 4;
/* Ensure that the limit is smaller than the allocation. */
set_pin_limit (SIZE / 2);
// Sanity check
if (get_pinned_mem () != 0)
abort ();
// Should fail
void *p = omp_alloc (SIZE, ompx_gnu_pinned_mem_alloc);
if (p)
abort ();
// Should fail
p = omp_calloc (1, SIZE, ompx_gnu_pinned_mem_alloc);
if (p)
abort ();
// Should fail to realloc
void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
p = omp_realloc (notpinned, SIZE, ompx_gnu_pinned_mem_alloc,
omp_default_mem_alloc);
if (!notpinned || p)
abort ();
// No memory should have been pinned
int amount = get_pinned_mem ();
if (amount != 0)
abort ();
return 0;
}

View File

@ -0,0 +1,16 @@
! Ensure that the ompx_gnu_pinned_mem_alloc predefined allocator is present and
! accepted. The majority of the functionality testing lives in the C tests.
!
! { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } }
program main
use omp_lib
use ISO_C_Binding
implicit none (external, type)
type(c_ptr) :: p
p = omp_alloc (10_c_size_t, ompx_gnu_pinned_mem_alloc);
if (.not. c_associated (p)) stop 1
call omp_free (p, ompx_gnu_pinned_mem_alloc);
end program main