From 481ba4fb5fce8257f5dbeb994dac2748c0237fa2 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 9 Apr 2024 08:17:25 +0200
Subject: [PATCH] libquadmath: Use soft-fp for sqrtq finite positive arguments
 [PR114623]

sqrt should be 0.5ulp precise, but the current implementation is less
precise than that.
The following patch uses the soft-fp code (like e.g. glibc for x86) for it
if possible.  I didn't want to replicate the libgcc infrastructure for
choosing the right sfp-machine.h, so the patch just uses a single generic
implementation.  As the code is used solely for the finite positive arguments,
it shouldn't generate NaNs (so the exact form of canonical QNaN/SNaN is
irrelevant), and sqrt for these shouldn't produce underflows/overflows either,
for < 1.0 arguments it always returns larger values than the argument and for
> 1.0 smaller values than the argument.

2024-04-09  Jakub Jelinek  <jakub@redhat.com>

	PR libquadmath/114623
	* sfp-machine.h: New file.
	* math/sqrtq.c: Include from libgcc/soft-fp also soft-fp.h and quad.h
	if possible.
	(USE_SOFT_FP): Define in that case.
	(sqrtq): Use soft-fp based implementation for the finite positive
	arguments if possible.
---
 libquadmath/math/sqrtq.c  | 25 +++++++++++++++++-
 libquadmath/sfp-machine.h | 54 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 libquadmath/sfp-machine.h

diff --git a/libquadmath/math/sqrtq.c b/libquadmath/math/sqrtq.c
index 56ea5d3243c..8ca2828d42c 100644
--- a/libquadmath/math/sqrtq.c
+++ b/libquadmath/math/sqrtq.c
@@ -1,6 +1,17 @@
 #include "quadmath-imp.h"
 #include <math.h>
 #include <float.h>
+#if __has_include("../../libgcc/soft-fp/soft-fp.h") \
+    && __has_include("../../libgcc/soft-fp/quad.h") \
+    && defined(FE_TONEAREST) \
+    && defined(FE_UPWARD) \
+    && defined(FE_DOWNWARD) \
+    && defined(FE_TOWARDZERO) \
+    && defined(FE_INEXACT)
+#define USE_SOFT_FP 1
+#include "../../libgcc/soft-fp/soft-fp.h"
+#include "../../libgcc/soft-fp/quad.h"
+#endif
 
 __float128
 sqrtq (const __float128 x)
@@ -20,6 +31,18 @@ sqrtq (const __float128 x)
       return (x - x) / (x - x);
     }
 
+#if USE_SOFT_FP
+  FP_DECL_EX;
+  FP_DECL_Q (X);
+  FP_DECL_Q (Y);
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_Q (X, x);
+  FP_SQRT_Q (Y, X);
+  FP_PACK_Q (y, Y);
+  FP_HANDLE_EXCEPTIONS;
+  return y;
+#else
   if (x <= DBL_MAX && x >= DBL_MIN)
   {
     /* Use double result as starting point.  */
@@ -59,5 +82,5 @@ sqrtq (const __float128 x)
   y -= 0.5q * (y - x / y);
   y -= 0.5q * (y - x / y);
   return y;
+#endif
 }
-
diff --git a/libquadmath/sfp-machine.h b/libquadmath/sfp-machine.h
new file mode 100644
index 00000000000..e37d5b3c656
--- /dev/null
+++ b/libquadmath/sfp-machine.h
@@ -0,0 +1,54 @@
+/* libquadmath uses soft-fp only for sqrtq and only for
+   the positive finite case, so it doesn't care about
+   NaN representation, nor tininess after rounding vs.
+   before rounding, all it cares about is current rounding
+   mode and raising inexact exceptions.  */
+#if __SIZEOF_LONG__ == 8
+#define _FP_W_TYPE_SIZE 64
+#define _FP_I_TYPE long long
+#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0
+#else
+#define _FP_W_TYPE_SIZE 32
+#define _FP_I_TYPE int
+#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0
+#endif
+#define _FP_W_TYPE unsigned _FP_I_TYPE
+#define _FP_WS_TYPE signed _FP_I_TYPE
+#define _FP_QNANNEGATEDP 0
+#define _FP_NANSIGN_Q 1
+#define _FP_KEEPNANFRACP 1
+#define _FP_TININESS_AFTER_ROUNDING 0
+#define _FP_DECL_EX \
+  unsigned int fp_roundmode __attribute__ ((unused)) = FP_RND_NEAREST;
+#define FP_ROUNDMODE fp_roundmode
+#define FP_INIT_ROUNDMODE \
+  do					\
+    {					\
+      switch (fegetround ())		\
+        {				\
+        case FE_UPWARD:			\
+          fp_roundmode = FP_RND_PINF;	\
+          break;			\
+        case FE_DOWNWARD:		\
+          fp_roundmode = FP_RND_MINF;	\
+          break;			\
+        case FE_TOWARDZERO:		\
+          fp_roundmode = FP_RND_ZERO;	\
+          break;			\
+	default:			\
+	  break;			\
+        }				\
+    }					\
+  while (0)
+#define FP_HANDLE_EXCEPTIONS \
+  do					\
+    {					\
+      if (_fex & FP_EX_INEXACT)		\
+	{				\
+	  volatile double eight = 8.0;	\
+	  volatile double eps		\
+	    = DBL_EPSILON;		\
+	  eight += eps;			\
+	}				\
+    }					\
+  while (0)