mirror of
https://github.com/gcc-mirror/gcc.git
synced 2024-11-21 13:40:47 +00:00
IBM Z: Handle hard registers in s390_md_asm_adjust()
gen_fprx2_to_tf() and gen_tf_to_fprx2() cannot handle hard registers, since the subregs they create do not pass validation. Change s390_md_asm_adjust() to manually copy between hard VRs and FPRs instead of using these two functions. gcc/ChangeLog: PR target/100217 * config/s390/s390.c (s390_hard_fp_reg_p): New function. (s390_md_asm_adjust): Handle hard registers. gcc/testsuite/ChangeLog: PR target/100217 * gcc.target/s390/vector/long-double-asm-in-out-hard-fp-reg.c: New test. * gcc.target/s390/vector/long-double-asm-inout-hard-fp-reg.c: New test.
This commit is contained in:
parent
80bbb7ff83
commit
4f48c335d3
@ -16754,6 +16754,23 @@ f_constraint_p (const char *constraint)
|
||||
return seen_f_p && !seen_v_p;
|
||||
}
|
||||
|
||||
/* Return TRUE iff X is a hard floating-point (and not a vector) register. */
|
||||
|
||||
static bool
|
||||
s390_hard_fp_reg_p (rtx x)
|
||||
{
|
||||
if (!(REG_P (x) && HARD_REGISTER_P (x) && REG_ATTRS (x)))
|
||||
return false;
|
||||
|
||||
tree decl = REG_EXPR (x);
|
||||
if (!(HAS_DECL_ASSEMBLER_NAME_P (decl) && DECL_ASSEMBLER_NAME_SET_P (decl)))
|
||||
return false;
|
||||
|
||||
const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
|
||||
|
||||
return name[0] == '*' && name[1] == 'f';
|
||||
}
|
||||
|
||||
/* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
|
||||
constraints when long doubles are stored in vector registers. */
|
||||
|
||||
@ -16787,9 +16804,24 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
|
||||
gcc_assert (allows_reg);
|
||||
gcc_assert (!is_inout);
|
||||
/* Copy output value from a FPR pair into a vector register. */
|
||||
rtx fprx2 = gen_reg_rtx (FPRX2mode);
|
||||
rtx fprx2;
|
||||
push_to_sequence2 (after_md_seq, after_md_end);
|
||||
if (s390_hard_fp_reg_p (outputs[i]))
|
||||
{
|
||||
fprx2 = gen_rtx_REG (FPRX2mode, REGNO (outputs[i]));
|
||||
/* The first half is already at the correct location, copy only the
|
||||
* second one. Use the UNSPEC pattern instead of the SUBREG one,
|
||||
* since s390_can_change_mode_class() rejects
|
||||
* (subreg:DF (reg:TF %fN) 8) and thus subreg validation fails. */
|
||||
rtx v1 = gen_rtx_REG (V2DFmode, REGNO (outputs[i]));
|
||||
rtx v3 = gen_rtx_REG (V2DFmode, REGNO (outputs[i]) + 1);
|
||||
emit_insn (gen_vec_permiv2df (v1, v1, v3, const0_rtx));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprx2 = gen_reg_rtx (FPRX2mode);
|
||||
emit_insn (gen_fprx2_to_tf (outputs[i], fprx2));
|
||||
}
|
||||
after_md_seq = get_insns ();
|
||||
after_md_end = get_last_insn ();
|
||||
end_sequence ();
|
||||
@ -16813,8 +16845,20 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
|
||||
continue;
|
||||
gcc_assert (allows_reg);
|
||||
/* Copy input value from a vector register into a FPR pair. */
|
||||
rtx fprx2 = gen_reg_rtx (FPRX2mode);
|
||||
rtx fprx2;
|
||||
if (s390_hard_fp_reg_p (inputs[i]))
|
||||
{
|
||||
fprx2 = gen_rtx_REG (FPRX2mode, REGNO (inputs[i]));
|
||||
/* Copy only the second half. */
|
||||
rtx v1 = gen_rtx_REG (V2DFmode, REGNO (inputs[i]) + 1);
|
||||
rtx v2 = gen_rtx_REG (V2DFmode, REGNO (inputs[i]));
|
||||
emit_insn (gen_vec_permiv2df (v1, v2, v1, GEN_INT (3)));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprx2 = gen_reg_rtx (FPRX2mode);
|
||||
emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i]));
|
||||
}
|
||||
inputs[i] = fprx2;
|
||||
input_modes[i] = FPRX2mode;
|
||||
}
|
||||
|
@ -0,0 +1,33 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */
|
||||
/* { dg-do run { target { s390_z14_hw } } } */
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
__attribute__ ((noipa)) static long double
|
||||
sqxbr (long double x)
|
||||
{
|
||||
register long double in asm("f0") = x;
|
||||
register long double out asm("f1");
|
||||
|
||||
asm("sqxbr\t%0,%1" : "=f"(out) : "f"(in));
|
||||
asm("# %0" : "+f"(out));
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/* Ideally `vpdi %v3,%v1,%v3,5` should be optimized away, but the compiler
|
||||
* can't do it, because the UNSPEC pattern operates on the whole register.
|
||||
* Using the SUBREG pattern solves this problem, but it's fragile.
|
||||
*/
|
||||
/* { dg-final { scan-assembler-times {\n\tvpdi\t%v2,%v0,%v2,5\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\n\tvpdi\t%v1,%v1,%v3,0\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\n\tvpdi\t%v3,%v1,%v3,5\n} 1 } } */
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
long double x = 0x1.0000000000001p+0L,
|
||||
exp = 1.00000000000000011102230246251564788e+0L;
|
||||
assert (sqxbr (x) == exp);
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -march=z14 -mzarch --save-temps" } */
|
||||
/* { dg-do run { target { s390_z14_hw } } } */
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
__attribute__ ((noipa)) static long double
|
||||
sqxbr (long double x)
|
||||
{
|
||||
register long double inout asm("f4") = x;
|
||||
|
||||
asm("sqxbr\t%0,%0" : "+f"(inout));
|
||||
asm("# %0" : "+f"(inout));
|
||||
|
||||
return inout;
|
||||
}
|
||||
|
||||
/* Ideally there should be just one `vpdi %v6,%v4,%v6,5`, but the compiler
|
||||
* can't optimize it away, because the UNSPEC pattern operates on the whole
|
||||
* register. Using the SUBREG pattern solves this problem, but it's fragile.
|
||||
*/
|
||||
/* { dg-final { scan-assembler-times {\n\tvpdi\t%v6,%v4,%v6,5\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\n\tvpdi\t%v4,%v4,%v6,0\n} 2 } } */
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
long double x = 0x1.0000000000001p+0L,
|
||||
exp = 1.00000000000000011102230246251564788e+0L;
|
||||
assert (sqxbr (x) == exp);
|
||||
}
|
Loading…
Reference in New Issue
Block a user