mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
RISC-V: Introduce -mvector-strict-align.
this patch disables movmisalign by default and introduces the -mno-vector-strict-align option to override it and re-enable movmisalign. For now, generic-ooo is the only uarch that supports misaligned vector access. The patch also adds a check_effective_target_riscv_v_misalign_ok to the testsuite which enables or disables the vector misalignment tests depending on whether the target under test can execute a misaligned vle32. Changes from v3: - Adressed Kito's comments. - Made -mscalar-strict-align a real alias. gcc/ChangeLog: * config/riscv/riscv-opts.h (TARGET_VECTOR_MISALIGN_SUPPORTED): Move from here... * config/riscv/riscv.h (TARGET_VECTOR_MISALIGN_SUPPORTED): ...to here and map to riscv_vector_unaligned_access_p. * config/riscv/riscv.opt: Add -mvector-strict-align. * config/riscv/riscv.cc (struct riscv_tune_param): Add vector_unaligned_access. (riscv_override_options_internal): Set riscv_vector_unaligned_access_p. * doc/invoke.texi: Document -mvector-strict-align. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Add check_effective_target_riscv_v_misalign_ok. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Add -mno-vector-strict-align. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto.
This commit is contained in:
parent
3eb9f6eab9
commit
68b0742a49
@ -147,9 +147,6 @@ enum rvv_vector_bits_enum {
|
||||
? 0 \
|
||||
: 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1))
|
||||
|
||||
/* TODO: Enable RVV movmisalign by default for now. */
|
||||
#define TARGET_VECTOR_MISALIGN_SUPPORTED 1
|
||||
|
||||
/* The maximmum LMUL according to user configuration. */
|
||||
#define TARGET_MAX_LMUL \
|
||||
(int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)
|
||||
|
@ -288,6 +288,7 @@ struct riscv_tune_param
|
||||
unsigned short memory_cost;
|
||||
unsigned short fmv_cost;
|
||||
bool slow_unaligned_access;
|
||||
bool vector_unaligned_access;
|
||||
bool use_divmod_expansion;
|
||||
bool overlap_op_by_pieces;
|
||||
unsigned int fusible_ops;
|
||||
@ -300,6 +301,10 @@ struct riscv_tune_param
|
||||
/* Whether unaligned accesses execute very slowly. */
|
||||
bool riscv_slow_unaligned_access_p;
|
||||
|
||||
/* Whether misaligned vector accesses are supported (i.e. do not
|
||||
throw an exception). */
|
||||
bool riscv_vector_unaligned_access_p;
|
||||
|
||||
/* Whether user explicitly passed -mstrict-align. */
|
||||
bool riscv_user_wants_strict_align;
|
||||
|
||||
@ -442,6 +447,7 @@ static const struct riscv_tune_param rocket_tune_info = {
|
||||
5, /* memory_cost */
|
||||
8, /* fmv_cost */
|
||||
true, /* slow_unaligned_access */
|
||||
false, /* vector_unaligned_access */
|
||||
false, /* use_divmod_expansion */
|
||||
false, /* overlap_op_by_pieces */
|
||||
RISCV_FUSE_NOTHING, /* fusible_ops */
|
||||
@ -460,6 +466,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
|
||||
3, /* memory_cost */
|
||||
8, /* fmv_cost */
|
||||
true, /* slow_unaligned_access */
|
||||
false, /* vector_unaligned_access */
|
||||
false, /* use_divmod_expansion */
|
||||
false, /* overlap_op_by_pieces */
|
||||
RISCV_FUSE_NOTHING, /* fusible_ops */
|
||||
@ -478,6 +485,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = {
|
||||
3, /* memory_cost */
|
||||
4, /* fmv_cost */
|
||||
true, /* slow_unaligned_access */
|
||||
false, /* vector_unaligned_access */
|
||||
false, /* use_divmod_expansion */
|
||||
false, /* overlap_op_by_pieces */
|
||||
RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
|
||||
@ -496,6 +504,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = {
|
||||
3, /* memory_cost */
|
||||
4, /* fmv_cost */
|
||||
true, /* slow_unaligned_access */
|
||||
false, /* vector_unaligned_access */
|
||||
false, /* use_divmod_expansion */
|
||||
false, /* overlap_op_by_pieces */
|
||||
RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
|
||||
@ -514,6 +523,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
|
||||
5, /* memory_cost */
|
||||
8, /* fmv_cost */
|
||||
false, /* slow_unaligned_access */
|
||||
false, /* vector_unaligned_access */
|
||||
false, /* use_divmod_expansion */
|
||||
false, /* overlap_op_by_pieces */
|
||||
RISCV_FUSE_NOTHING, /* fusible_ops */
|
||||
@ -532,6 +542,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
|
||||
3, /* memory_cost */
|
||||
3, /* fmv_cost */
|
||||
true, /* slow_unaligned_access */
|
||||
false, /* vector_unaligned_access */
|
||||
false, /* use_divmod_expansion */
|
||||
false, /* overlap_op_by_pieces */
|
||||
RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
|
||||
@ -550,6 +561,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
|
||||
4, /* memory_cost */
|
||||
4, /* fmv_cost */
|
||||
false, /* slow_unaligned_access */
|
||||
true, /* vector_unaligned_access */
|
||||
false, /* use_divmod_expansion */
|
||||
true, /* overlap_op_by_pieces */
|
||||
RISCV_FUSE_NOTHING, /* fusible_ops */
|
||||
@ -568,6 +580,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
|
||||
2, /* memory_cost */
|
||||
8, /* fmv_cost */
|
||||
false, /* slow_unaligned_access */
|
||||
false, /* vector_unaligned_access */
|
||||
false, /* use_divmod_expansion */
|
||||
false, /* overlap_op_by_pieces */
|
||||
RISCV_FUSE_NOTHING, /* fusible_ops */
|
||||
@ -9714,6 +9727,12 @@ riscv_override_options_internal (struct gcc_options *opts)
|
||||
riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
|
||||
|| TARGET_STRICT_ALIGN);
|
||||
|
||||
/* By default, when -mno-vector-strict-align is not specified, do not allow
|
||||
unaligned vector memory accesses except if -mtune's setting explicitly
|
||||
allows it. */
|
||||
riscv_vector_unaligned_access_p = opts->x_rvv_vector_strict_align == 0
|
||||
|| cpu->tune_param->vector_unaligned_access;
|
||||
|
||||
/* Make a note if user explicitly passed -mstrict-align for later
|
||||
builtin macro generation. Can't use target_flags_explicitly since
|
||||
it is set even for -mno-strict-align. */
|
||||
|
@ -934,6 +934,10 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
|
||||
|| (riscv_microarchitecture == sifive_p400) \
|
||||
|| (riscv_microarchitecture == sifive_p600))
|
||||
|
||||
/* True if the target supports misaligned vector loads and stores. */
|
||||
#define TARGET_VECTOR_MISALIGN_SUPPORTED \
|
||||
riscv_vector_unaligned_access_p
|
||||
|
||||
#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
|
||||
|
||||
/* Control the assembler format that we output. */
|
||||
@ -1161,6 +1165,7 @@ while (0)
|
||||
#ifndef USED_FOR_TARGET
|
||||
extern const enum reg_class riscv_regno_to_class[];
|
||||
extern bool riscv_slow_unaligned_access_p;
|
||||
extern bool riscv_vector_unaligned_access_p;
|
||||
extern bool riscv_user_wants_strict_align;
|
||||
extern unsigned riscv_stack_boundary;
|
||||
extern unsigned riscv_bytes_per_vector_chunk;
|
||||
|
@ -128,6 +128,14 @@ mstrict-align
|
||||
Target Mask(STRICT_ALIGN) Save
|
||||
Do not generate unaligned memory accesses.
|
||||
|
||||
mscalar-strict-align
|
||||
Target Save Alias(mstrict-align)
|
||||
Do not generate unaligned scalar memory accesses.
|
||||
|
||||
mvector-strict-align
|
||||
Target Save Var(rvv_vector_strict_align) Init(1)
|
||||
Do not create element-misaligned vector memory accesses.
|
||||
|
||||
Enum
|
||||
Name(code_model) Type(enum riscv_code_model)
|
||||
Known code models (for use with the -mcmodel= option):
|
||||
|
@ -31104,6 +31104,23 @@ Do not or do generate unaligned memory accesses. The default is set depending
|
||||
on whether the processor we are optimizing for supports fast unaligned access
|
||||
or not.
|
||||
|
||||
@opindex mscalar-strict-align
|
||||
@opindex mno-scalar-strict-align
|
||||
@item -mscalar-strict-align
|
||||
@itemx -mno-scalar-strict-align
|
||||
Do not or do generate unaligned memory accesses. The default is set depending
|
||||
on whether the processor we are optimizing for supports fast unaligned access
|
||||
or not. This is an alias for @option{-mstrict-align}.
|
||||
|
||||
@opindex mvector-strict-align
|
||||
@opindex mno-vector-strict-align
|
||||
@item -mvector-strict-align
|
||||
@itemx -mno-vector-strict-align
|
||||
Do not or do generate unaligned vector memory accesses. The default is set
|
||||
to off unless the processor we are optimizing for explicitly supports
|
||||
element-misaligned vector memory access.
|
||||
|
||||
|
||||
@opindex mcmodel=
|
||||
@opindex mcmodel=medlow
|
||||
@item -mcmodel=medlow
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic" } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -mno-vector-strict-align" } */
|
||||
|
||||
int
|
||||
x264_pixel_8x8 (unsigned char *pix1, unsigned char *pix2, int i_stride_pix2)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2" } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m4 -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
|
||||
|
||||
#include <stdint-gcc.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m8 -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
|
||||
|
||||
#include <stdint-gcc.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2" } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2 -mno-vector-strict-align" } */
|
||||
|
||||
#include <stdint-gcc.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mno-vector-strict-align" } */
|
||||
|
||||
#include <stdint-gcc.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2" } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2 -mno-vector-strict-align" } */
|
||||
|
||||
#include <stdint-gcc.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns" } */
|
||||
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 -mrvv-max-lmul=m4 -fno-tree-loop-distribute-patterns -mno-vector-strict-align" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
|
@ -2044,7 +2044,7 @@ proc check_effective_target_riscv_zvfh_ok { } {
|
||||
# check if we can execute vector insns with the given hardware or
|
||||
# simulator
|
||||
set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
|
||||
if { [check_runtime ${gcc_march}_exec {
|
||||
if { [check_runtime ${gcc_march}_zvfh_exec {
|
||||
int main()
|
||||
{
|
||||
asm ("vsetivli zero,8,e16,m1,ta,ma");
|
||||
@ -2057,6 +2057,8 @@ proc check_effective_target_riscv_zvfh_ok { } {
|
||||
return 0
|
||||
}
|
||||
|
||||
# Return 1 if we can execute code when using dg-add-options riscv_zvbb
|
||||
|
||||
proc check_effective_target_riscv_zvbb_ok { } {
|
||||
# If the target already supports v without any added options,
|
||||
# we may assume we can execute just fine.
|
||||
@ -2076,6 +2078,28 @@ proc check_effective_target_riscv_zvbb_ok { } {
|
||||
} } "-march=${gcc_march}"] } {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
# Return 1 if we can load a vector from a 1-byte aligned address.
|
||||
|
||||
proc check_effective_target_riscv_v_misalign_ok { } {
|
||||
|
||||
if { ![check_effective_target_riscv_v_ok] } {
|
||||
return 0
|
||||
}
|
||||
|
||||
set gcc_march [riscv_get_arch]
|
||||
if { [check_runtime ${gcc_march}_misalign_exec {
|
||||
int main() {
|
||||
unsigned char a[16]
|
||||
= {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
asm ("vsetivli zero,7,e8,m1,ta,ma");
|
||||
asm ("addi a7,%0,1" : : "r" (a) : "a7" );
|
||||
asm ("vle8.v v8,0(a7)" : : : "v8");
|
||||
return 0; } } "-march=${gcc_march}"] } {
|
||||
return 1
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
@ -8184,7 +8208,6 @@ proc check_effective_target_vect_hw_misalign { } {
|
||||
|| ([istarget mips*-*-*] && [et-is-effective-target mips_msa])
|
||||
|| ([istarget s390*-*-*]
|
||||
&& [check_effective_target_s390_vx])
|
||||
|| ([istarget riscv*-*-*])
|
||||
|| ([istarget loongarch*-*-*])
|
||||
|| [istarget amdgcn*-*-*] } {
|
||||
return 1
|
||||
@ -8193,6 +8216,11 @@ proc check_effective_target_vect_hw_misalign { } {
|
||||
&& ![check_effective_target_arm_vect_no_misalign] } {
|
||||
return 1
|
||||
}
|
||||
if { [istarget riscv*-*-*]
|
||||
&& [check_effective_target_riscv_v_misalign_ok] } {
|
||||
return 1
|
||||
}
|
||||
|
||||
return 0
|
||||
}]
|
||||
}
|
||||
@ -11610,6 +11638,9 @@ proc check_vect_support_and_set_flags { } {
|
||||
} elseif [istarget riscv*-*-*] {
|
||||
if [check_effective_target_riscv_v] {
|
||||
set dg-do-what-default run
|
||||
if [check_effective_target_riscv_v_misalign_ok] {
|
||||
lappend DEFAULT_VECTCFLAGS "-mno-vector-strict-align"
|
||||
}
|
||||
} else {
|
||||
foreach item [add_options_for_riscv_v ""] {
|
||||
lappend DEFAULT_VECTCFLAGS $item
|
||||
|
Loading…
Reference in New Issue
Block a user