RISC-V: Introduce -mvector-strict-align.

this patch disables movmisalign by default and introduces
the -mno-vector-strict-align option to override it and re-enable
movmisalign.  For now, generic-ooo is the only uarch that supports
misaligned vector access.

The patch also adds a check_effective_target_riscv_v_misalign_ok to
the testsuite which enables or disables the vector misalignment tests
depending on whether the target under test can execute a misaligned
vle32.

Changes from v3:
 - Adressed Kito's comments.
 - Made -mscalar-strict-align a real alias.

gcc/ChangeLog:

	* config/riscv/riscv-opts.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
	Move from here...
	* config/riscv/riscv.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
	...to here and map to riscv_vector_unaligned_access_p.
	* config/riscv/riscv.opt: Add -mvector-strict-align.
	* config/riscv/riscv.cc (struct riscv_tune_param): Add
	vector_unaligned_access.
	(riscv_override_options_internal): Set
	riscv_vector_unaligned_access_p.
	* doc/invoke.texi: Document -mvector-strict-align.

gcc/testsuite/ChangeLog:

	* lib/target-supports.exp: Add
	check_effective_target_riscv_v_misalign_ok.
	* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Add
	-mno-vector-strict-align.
	* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Ditto.
	* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto.
	* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto.
	* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: Ditto.
	* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto.
This commit is contained in:
Robin Dapp 2024-05-28 21:19:26 +02:00 committed by Robin Dapp
parent 3eb9f6eab9
commit 68b0742a49
13 changed files with 89 additions and 12 deletions

View File

@ -147,9 +147,6 @@ enum rvv_vector_bits_enum {
? 0 \
: 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1))
/* TODO: Enable RVV movmisalign by default for now. */
#define TARGET_VECTOR_MISALIGN_SUPPORTED 1
/* The maximmum LMUL according to user configuration. */
#define TARGET_MAX_LMUL \
(int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)

View File

@ -288,6 +288,7 @@ struct riscv_tune_param
unsigned short memory_cost;
unsigned short fmv_cost;
bool slow_unaligned_access;
bool vector_unaligned_access;
bool use_divmod_expansion;
bool overlap_op_by_pieces;
unsigned int fusible_ops;
@ -300,6 +301,10 @@ struct riscv_tune_param
/* Whether unaligned accesses execute very slowly. */
bool riscv_slow_unaligned_access_p;
/* Whether misaligned vector accesses are supported (i.e. do not
throw an exception). */
bool riscv_vector_unaligned_access_p;
/* Whether user explicitly passed -mstrict-align. */
bool riscv_user_wants_strict_align;
@ -442,6 +447,7 @@ static const struct riscv_tune_param rocket_tune_info = {
5, /* memory_cost */
8, /* fmv_cost */
true, /* slow_unaligned_access */
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
RISCV_FUSE_NOTHING, /* fusible_ops */
@ -460,6 +466,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
3, /* memory_cost */
8, /* fmv_cost */
true, /* slow_unaligned_access */
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
RISCV_FUSE_NOTHING, /* fusible_ops */
@ -478,6 +485,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = {
3, /* memory_cost */
4, /* fmv_cost */
true, /* slow_unaligned_access */
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
@ -496,6 +504,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = {
3, /* memory_cost */
4, /* fmv_cost */
true, /* slow_unaligned_access */
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
@ -514,6 +523,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
5, /* memory_cost */
8, /* fmv_cost */
false, /* slow_unaligned_access */
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
RISCV_FUSE_NOTHING, /* fusible_ops */
@ -532,6 +542,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
3, /* memory_cost */
3, /* fmv_cost */
true, /* slow_unaligned_access */
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
@ -550,6 +561,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
4, /* memory_cost */
4, /* fmv_cost */
false, /* slow_unaligned_access */
true, /* vector_unaligned_access */
false, /* use_divmod_expansion */
true, /* overlap_op_by_pieces */
RISCV_FUSE_NOTHING, /* fusible_ops */
@ -568,6 +580,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
2, /* memory_cost */
8, /* fmv_cost */
false, /* slow_unaligned_access */
false, /* vector_unaligned_access */
false, /* use_divmod_expansion */
false, /* overlap_op_by_pieces */
RISCV_FUSE_NOTHING, /* fusible_ops */
@ -9714,6 +9727,12 @@ riscv_override_options_internal (struct gcc_options *opts)
riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
|| TARGET_STRICT_ALIGN);
/* By default, when -mno-vector-strict-align is not specified, do not allow
unaligned vector memory accesses except if -mtune's setting explicitly
allows it. */
riscv_vector_unaligned_access_p = opts->x_rvv_vector_strict_align == 0
|| cpu->tune_param->vector_unaligned_access;
/* Make a note if user explicitly passed -mstrict-align for later
builtin macro generation. Can't use target_flags_explicitly since
it is set even for -mno-strict-align. */

View File

@ -934,6 +934,10 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
|| (riscv_microarchitecture == sifive_p400) \
|| (riscv_microarchitecture == sifive_p600))
/* True if the target supports misaligned vector loads and stores. */
#define TARGET_VECTOR_MISALIGN_SUPPORTED \
riscv_vector_unaligned_access_p
#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
/* Control the assembler format that we output. */
@ -1161,6 +1165,7 @@ while (0)
#ifndef USED_FOR_TARGET
extern const enum reg_class riscv_regno_to_class[];
extern bool riscv_slow_unaligned_access_p;
extern bool riscv_vector_unaligned_access_p;
extern bool riscv_user_wants_strict_align;
extern unsigned riscv_stack_boundary;
extern unsigned riscv_bytes_per_vector_chunk;

View File

@ -128,6 +128,14 @@ mstrict-align
Target Mask(STRICT_ALIGN) Save
Do not generate unaligned memory accesses.
mscalar-strict-align
Target Save Alias(mstrict-align)
Do not generate unaligned scalar memory accesses.
mvector-strict-align
Target Save Var(rvv_vector_strict_align) Init(1)
Do not create element-misaligned vector memory accesses.
Enum
Name(code_model) Type(enum riscv_code_model)
Known code models (for use with the -mcmodel= option):

View File

@ -31104,6 +31104,23 @@ Do not or do generate unaligned memory accesses. The default is set depending
on whether the processor we are optimizing for supports fast unaligned access
or not.
@opindex mscalar-strict-align
@opindex mno-scalar-strict-align
@item -mscalar-strict-align
@itemx -mno-scalar-strict-align
Do not or do generate unaligned memory accesses. The default is set depending
on whether the processor we are optimizing for supports fast unaligned access
or not. This is an alias for @option{-mstrict-align}.
@opindex mvector-strict-align
@opindex mno-vector-strict-align
@item -mvector-strict-align
@itemx -mno-vector-strict-align
Do not or do generate unaligned vector memory accesses. The default is set
to off unless the processor we are optimizing for explicitly supports
element-misaligned vector memory access.
@opindex mcmodel=
@opindex mcmodel=medlow
@item -mcmodel=medlow

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic" } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -mno-vector-strict-align" } */
int
x264_pixel_8x8 (unsigned char *pix1, unsigned char *pix2, int i_stride_pix2)

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
#include <stdint-gcc.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
#include <stdint-gcc.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
#include <stdint-gcc.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mno-vector-strict-align" } */
#include <stdint-gcc.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2" } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2 -mno-vector-strict-align" } */
#include <stdint-gcc.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns" } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns -mno-vector-strict-align" } */
#include <stdlib.h>

View File

@ -2044,7 +2044,7 @@ proc check_effective_target_riscv_zvfh_ok { } {
# check if we can execute vector insns with the given hardware or
# simulator
set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
if { [check_runtime ${gcc_march}_exec {
if { [check_runtime ${gcc_march}_zvfh_exec {
int main()
{
asm ("vsetivli zero,8,e16,m1,ta,ma");
@ -2057,6 +2057,8 @@ proc check_effective_target_riscv_zvfh_ok { } {
return 0
}
# Return 1 if we can execute code when using dg-add-options riscv_zvbb
proc check_effective_target_riscv_zvbb_ok { } {
# If the target already supports v without any added options,
# we may assume we can execute just fine.
@ -2076,6 +2078,28 @@ proc check_effective_target_riscv_zvbb_ok { } {
} } "-march=${gcc_march}"] } {
return 1
}
return 0
}
# Return 1 if we can load a vector from a 1-byte aligned address.
proc check_effective_target_riscv_v_misalign_ok { } {
if { ![check_effective_target_riscv_v_ok] } {
return 0
}
set gcc_march [riscv_get_arch]
if { [check_runtime ${gcc_march}_misalign_exec {
int main() {
unsigned char a[16]
= {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
asm ("vsetivli zero,7,e8,m1,ta,ma");
asm ("addi a7,%0,1" : : "r" (a) : "a7" );
asm ("vle8.v v8,0(a7)" : : : "v8");
return 0; } } "-march=${gcc_march}"] } {
return 1
}
return 0
}
@ -8184,7 +8208,6 @@ proc check_effective_target_vect_hw_misalign { } {
|| ([istarget mips*-*-*] && [et-is-effective-target mips_msa])
|| ([istarget s390*-*-*]
&& [check_effective_target_s390_vx])
|| ([istarget riscv*-*-*])
|| ([istarget loongarch*-*-*])
|| [istarget amdgcn*-*-*] } {
return 1
@ -8193,6 +8216,11 @@ proc check_effective_target_vect_hw_misalign { } {
&& ![check_effective_target_arm_vect_no_misalign] } {
return 1
}
if { [istarget riscv*-*-*]
&& [check_effective_target_riscv_v_misalign_ok] } {
return 1
}
return 0
}]
}
@ -11610,6 +11638,9 @@ proc check_vect_support_and_set_flags { } {
} elseif [istarget riscv*-*-*] {
if [check_effective_target_riscv_v] {
set dg-do-what-default run
if [check_effective_target_riscv_v_misalign_ok] {
lappend DEFAULT_VECTCFLAGS "-mno-vector-strict-align"
}
} else {
foreach item [add_options_for_riscv_v ""] {
lappend DEFAULT_VECTCFLAGS $item