diff --git a/gcc/testsuite/gcc.target/aarch64/pr116139.c b/gcc/testsuite/gcc.target/aarch64/pr116139.c
new file mode 100644
index 00000000000..78a21323030
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116139.c
@@ -0,0 +1,35 @@
+/* PR tree-optimization/116139 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param fully-pipelined-fma=1 -mcpu=neoverse-n3" } */
+
+#define LOOP_COUNT 800000000
+typedef double data_e;
+
+data_e
+foo (data_e in)
+{
+  data_e a1, a2, a3, a4;
+  data_e tmp, result = 0;
+  a1 = in + 0.1;
+  a2 = in * 0.1;
+  a3 = in + 0.01;
+  a4 = in * 0.59;
+
+  data_e result2 = 0;
+
+  for (int ic = 0; ic < LOOP_COUNT; ic++)
+    {
+      tmp = a1 + a2 * a2 + a3 * a3 + a4 * a4 ;
+      result += tmp - ic;
+      result2 = result2 / 2 - tmp;
+
+      a1 += 0.91;
+      a2 += 0.1;
+      a3 -= 0.01;
+      a4 -= 0.89;
+
+    }
+
+  return result + result2;
+}
+
diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
index d74352268b5..70c810c5198 100644
--- a/gcc/tree-ssa-reassoc.cc
+++ b/gcc/tree-ssa-reassoc.cc
@@ -5509,16 +5509,15 @@ get_reassociation_width (vec<operand_entry *> *ops, int mult_num, tree lhs,
      , it is latency(MULT)*2 + latency(ADD)*2.  Assuming latency(MULT) >=
      latency(ADD), the first variant is preferred.
 
-     Find out if we can get a smaller width considering FMA.  */
-  if (width > 1 && mult_num && param_fully_pipelined_fma)
-    {
-      /* When param_fully_pipelined_fma is set, assume FMUL and FMA use the
-	 same units that can also do FADD.  For other scenarios, such as when
-	 FMUL and FADD are using separated units, the following code may not
-	 appy.  */
-      int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode);
-      gcc_checking_assert (width_mult <= width);
+     Find out if we can get a smaller width considering FMA.
+     Assume FMUL and FMA use the same units that can also do FADD.
+     For other scenarios, such as when FMUL and FADD are using separated units,
+     the following code may not apply.  */
 
+  int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode);
+  if (width > 1 && mult_num && param_fully_pipelined_fma
+      && width_mult <= width)
+    {
       /* Latency of MULT_EXPRs.  */
       int lat_mul
 	= get_mult_latency_consider_fma (ops_num, mult_num, width_mult);