Re: BUG #19340: Wrong result from CORR() function

Поиск
Список
Период
Сортировка
От	Tom Lane
Тема	Re: BUG #19340: Wrong result from CORR() function
Дата	4 декабря 01:52:31
Msg-id	923070.1764802351@sss.pgh.pa.us обсуждение исходный текст
Ответ на	Re: BUG #19340: Wrong result from CORR() function (Dean Rasheed <dean.a.rasheed@gmail.com>)
Ответы	Re: BUG #19340: Wrong result from CORR() function Re: BUG #19340: Wrong result from CORR() function Re: BUG #19340: Wrong result from CORR() function
Список	pgsql-bugs
Дерево обсуждения
Attached is a fleshed-out patch proposal that fixes the related
aggregates and adds test cases.

Dean Rasheed <dean.a.rasheed@gmail.com> writes:
> Also, given that any NaN input results in NaN output, regardless of
> whether or not the inputs are all the same, the values for commonX and
> commonY don't matter if any input is NaN. I think that allows the
> accumulator function's tests to be simplified (no need to test if new
> values are NaN).

I'm not convinced about that.  I have it as

        if (newvalX != commonX || isnan(newvalX))
            commonX = get_float8_nan();

and I think you're saying we could just write

        if (newvalX != commonX)
            commonX = get_float8_nan();

My concern is that if newvalX is NaN but commonX isn't, then
I believe that the non-NaN-aware "!=" test is supposed to return
false, which'd cause us to not update commonX to NaN as required.
Maybe we could make it work by writing

        if (!(newvalX == commonX))
            commonX = get_float8_nan();

but I don't have a lot of faith in C compilers getting that right.

> Then there's this:
> SELECT corr(1e-100 + x*1e-105, 1e-100 + x*1e-105) FROM generate_series(1, 3) x;
> The correct answer in this case is also 1, which could be achieved by
> taking the square roots of Sxx and Syy separately, before multiplying,
> but it might also be sensible to clamp the result to the range [-1,1].

Poking at this, I soon found a test case where even with the separate
sqrt() calls we'd produce a result slightly outside [-1, 1] (running
this test over more values of x is sufficient).  So now I think we
should do both the separate sqrt and the clamp.

            regards, tom lane

diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index 7b97d2be6ca..fddff3f878a 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -3319,9 +3319,16 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
  * As with the preceding aggregates, we use the Youngs-Cramer algorithm to
  * reduce rounding errors in the aggregate final functions.
  *
- * The transition datatype for all these aggregates is a 6-element array of
+ * The transition datatype for all these aggregates is an 8-element array of
  * float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y),
- * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order.
+ * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)), commonX, and commonY
+ * in that order.
+ *
+ * commonX is defined as the common X value if all the X values were the same,
+ * else NaN; likewise for commonY.  This is useful for deciding whether corr()
+ * and related functions should return NULL.  This representation cannot
+ * distinguish all-the-values-were-NaN from the-values-weren't-all-the-same,
+ * but that's okay because we want to return NaN for all-NaN input.
  *
  * Note that Y is the first argument to all these aggregates!
  *
@@ -3345,17 +3352,21 @@ float8_regr_accum(PG_FUNCTION_ARGS)
                 Sy,
                 Syy,
                 Sxy,
+                commonX,
+                commonY,
                 tmpX,
                 tmpY,
                 scale;

-    transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
+    transvalues = check_float8_array(transarray, "float8_regr_accum", 8);
     N = transvalues[0];
     Sx = transvalues[1];
     Sxx = transvalues[2];
     Sy = transvalues[3];
     Syy = transvalues[4];
     Sxy = transvalues[5];
+    commonX = transvalues[6];
+    commonY = transvalues[7];

     /*
      * Use the Youngs-Cramer algorithm to incorporate the new values into the
@@ -3397,6 +3408,16 @@ float8_regr_accum(PG_FUNCTION_ARGS)
             if (isinf(Sxy))
                 Sxy = get_float8_nan();
         }
+
+        /*
+         * Check to see if we have seen distinct inputs.  We can use a test
+         * that's a bit cheaper than float8_ne() because if commonX is already
+         * NaN, it does not matter whether the != test returns true or not.
+         */
+        if (newvalX != commonX || isnan(newvalX))
+            commonX = get_float8_nan();
+        if (newvalY != commonY || isnan(newvalY))
+            commonY = get_float8_nan();
     }
     else
     {
@@ -3410,6 +3431,9 @@ float8_regr_accum(PG_FUNCTION_ARGS)
             Sxx = Sxy = get_float8_nan();
         if (isnan(newvalY) || isinf(newvalY))
             Syy = Sxy = get_float8_nan();
+
+        commonX = newvalX;
+        commonY = newvalY;
     }

     /*
@@ -3425,12 +3449,14 @@ float8_regr_accum(PG_FUNCTION_ARGS)
         transvalues[3] = Sy;
         transvalues[4] = Syy;
         transvalues[5] = Sxy;
+        transvalues[6] = commonX;
+        transvalues[7] = commonY;

         PG_RETURN_ARRAYTYPE_P(transarray);
     }
     else
     {
-        Datum        transdatums[6];
+        Datum        transdatums[8];
         ArrayType  *result;

         transdatums[0] = Float8GetDatumFast(N);
@@ -3439,8 +3465,10 @@ float8_regr_accum(PG_FUNCTION_ARGS)
         transdatums[3] = Float8GetDatumFast(Sy);
         transdatums[4] = Float8GetDatumFast(Syy);
         transdatums[5] = Float8GetDatumFast(Sxy);
+        transdatums[6] = Float8GetDatumFast(commonX);
+        transdatums[7] = Float8GetDatumFast(commonY);

-        result = construct_array_builtin(transdatums, 6, FLOAT8OID);
+        result = construct_array_builtin(transdatums, 8, FLOAT8OID);

         PG_RETURN_ARRAYTYPE_P(result);
     }
@@ -3449,7 +3477,7 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 /*
  * float8_regr_combine
  *
- * An aggregate combine function used to combine two 6 fields
+ * An aggregate combine function used to combine two 8-fields
  * aggregate transition data into a single transition data.
  * This function is used only in two stage aggregation and
  * shouldn't be called outside aggregate context.
@@ -3467,12 +3495,16 @@ float8_regr_combine(PG_FUNCTION_ARGS)
                 Sy1,
                 Syy1,
                 Sxy1,
+                Cx1,
+                Cy1,
                 N2,
                 Sx2,
                 Sxx2,
                 Sy2,
                 Syy2,
                 Sxy2,
+                Cx2,
+                Cy2,
                 tmp1,
                 tmp2,
                 N,
@@ -3480,10 +3512,12 @@ float8_regr_combine(PG_FUNCTION_ARGS)
                 Sxx,
                 Sy,
                 Syy,
-                Sxy;
+                Sxy,
+                Cx,
+                Cy;

-    transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6);
-    transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6);
+    transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 8);
+    transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 8);

     N1 = transvalues1[0];
     Sx1 = transvalues1[1];
@@ -3491,6 +3525,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
     Sy1 = transvalues1[3];
     Syy1 = transvalues1[4];
     Sxy1 = transvalues1[5];
+    Cx1 = transvalues1[6];
+    Cy1 = transvalues1[7];

     N2 = transvalues2[0];
     Sx2 = transvalues2[1];
@@ -3498,6 +3534,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
     Sy2 = transvalues2[3];
     Syy2 = transvalues2[4];
     Sxy2 = transvalues2[5];
+    Cx2 = transvalues2[6];
+    Cy2 = transvalues2[7];

     /*--------------------
      * The transition values combine using a generalization of the
@@ -3523,6 +3561,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
         Sy = Sy2;
         Syy = Syy2;
         Sxy = Sxy2;
+        Cx = Cx2;
+        Cy = Cy2;
     }
     else if (N2 == 0.0)
     {
@@ -3532,6 +3572,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
         Sy = Sy1;
         Syy = Syy1;
         Sxy = Sxy1;
+        Cx = Cx1;
+        Cy = Cy1;
     }
     else
     {
@@ -3549,6 +3591,14 @@ float8_regr_combine(PG_FUNCTION_ARGS)
         Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N;
         if (unlikely(isinf(Sxy)) && !isinf(Sxy1) && !isinf(Sxy2))
             float_overflow_error();
+        if (float8_eq(Cx1, Cx2))
+            Cx = Cx1;
+        else
+            Cx = get_float8_nan();
+        if (float8_eq(Cy1, Cy2))
+            Cy = Cy1;
+        else
+            Cy = get_float8_nan();
     }

     /*
@@ -3564,12 +3614,14 @@ float8_regr_combine(PG_FUNCTION_ARGS)
         transvalues1[3] = Sy;
         transvalues1[4] = Syy;
         transvalues1[5] = Sxy;
+        transvalues1[6] = Cx;
+        transvalues1[7] = Cy;

         PG_RETURN_ARRAYTYPE_P(transarray1);
     }
     else
     {
-        Datum        transdatums[6];
+        Datum        transdatums[8];
         ArrayType  *result;

         transdatums[0] = Float8GetDatumFast(N);
@@ -3578,8 +3630,10 @@ float8_regr_combine(PG_FUNCTION_ARGS)
         transdatums[3] = Float8GetDatumFast(Sy);
         transdatums[4] = Float8GetDatumFast(Syy);
         transdatums[5] = Float8GetDatumFast(Sxy);
+        transdatums[6] = Float8GetDatumFast(Cx);
+        transdatums[7] = Float8GetDatumFast(Cy);

-        result = construct_array_builtin(transdatums, 6, FLOAT8OID);
+        result = construct_array_builtin(transdatums, 8, FLOAT8OID);

         PG_RETURN_ARRAYTYPE_P(result);
     }
@@ -3594,7 +3648,7 @@ float8_regr_sxx(PG_FUNCTION_ARGS)
     float8        N,
                 Sxx;

-    transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
+    transvalues = check_float8_array(transarray, "float8_regr_sxx", 8);
     N = transvalues[0];
     Sxx = transvalues[2];

@@ -3615,7 +3669,7 @@ float8_regr_syy(PG_FUNCTION_ARGS)
     float8        N,
                 Syy;

-    transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
+    transvalues = check_float8_array(transarray, "float8_regr_syy", 8);
     N = transvalues[0];
     Syy = transvalues[4];

@@ -3636,7 +3690,7 @@ float8_regr_sxy(PG_FUNCTION_ARGS)
     float8        N,
                 Sxy;

-    transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
+    transvalues = check_float8_array(transarray, "float8_regr_sxy", 8);
     N = transvalues[0];
     Sxy = transvalues[5];

@@ -3657,7 +3711,7 @@ float8_regr_avgx(PG_FUNCTION_ARGS)
     float8        N,
                 Sx;

-    transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
+    transvalues = check_float8_array(transarray, "float8_regr_avgx", 8);
     N = transvalues[0];
     Sx = transvalues[1];

@@ -3676,7 +3730,7 @@ float8_regr_avgy(PG_FUNCTION_ARGS)
     float8        N,
                 Sy;

-    transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
+    transvalues = check_float8_array(transarray, "float8_regr_avgy", 8);
     N = transvalues[0];
     Sy = transvalues[3];

@@ -3695,7 +3749,7 @@ float8_covar_pop(PG_FUNCTION_ARGS)
     float8        N,
                 Sxy;

-    transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
+    transvalues = check_float8_array(transarray, "float8_covar_pop", 8);
     N = transvalues[0];
     Sxy = transvalues[5];

@@ -3714,7 +3768,7 @@ float8_covar_samp(PG_FUNCTION_ARGS)
     float8        N,
                 Sxy;

-    transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
+    transvalues = check_float8_array(transarray, "float8_covar_samp", 8);
     N = transvalues[0];
     Sxy = transvalues[5];

@@ -3733,13 +3787,18 @@ float8_corr(PG_FUNCTION_ARGS)
     float8        N,
                 Sxx,
                 Syy,
-                Sxy;
+                Sxy,
+                commonX,
+                commonY,
+                result;

-    transvalues = check_float8_array(transarray, "float8_corr", 6);
+    transvalues = check_float8_array(transarray, "float8_corr", 8);
     N = transvalues[0];
     Sxx = transvalues[2];
     Syy = transvalues[4];
     Sxy = transvalues[5];
+    commonX = transvalues[6];
+    commonY = transvalues[7];

     /* if N is 0 we should return NULL */
     if (N < 1.0)
@@ -3747,11 +3806,37 @@ float8_corr(PG_FUNCTION_ARGS)

     /* Note that Sxx and Syy are guaranteed to be non-negative */

-    /* per spec, return NULL for horizontal and vertical lines */
+    /*
+     * Per spec, return NULL for horizontal and vertical lines.  We can detect
+     * constant inputs exactly by checking commonX and commonY, even though
+     * Sxx and/or Syy might be nonzero due to roundoff error.
+     */
+    if (!isnan(commonX) || !isnan(commonY))
+        PG_RETURN_NULL();
+
+    /*
+     * Although we now know that the inputs weren't all equal, Sxx and/or Syy
+     * could be zero due to roundoff error, if the inputs were close together.
+     * It seems best to return NULL in that case; blindly applying the result
+     * formula would yield NaN, which seems wrong if the inputs did not
+     * include any NaN.
+     */
     if (Sxx == 0 || Syy == 0)
         PG_RETURN_NULL();

-    PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy));
+    /*
+     * We compute sqrt(Sxx) * sqrt(Syy) not sqrt(Sxx * Syy) because the raw
+     * product could underflow or overflow.  Despite all these precautions,
+     * this formula can yield results outside [-1, 1] due to roundoff error.
+     * Clamp it to the expected range.
+     */
+    result = Sxy / (sqrt(Sxx) * sqrt(Syy));
+    if (result < -1)
+        result = -1;
+    else if (result > 1)
+        result = 1;
+
+    PG_RETURN_FLOAT8(result);
 }

 Datum
@@ -3762,13 +3847,17 @@ float8_regr_r2(PG_FUNCTION_ARGS)
     float8        N,
                 Sxx,
                 Syy,
-                Sxy;
+                Sxy,
+                commonX,
+                commonY;

-    transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
+    transvalues = check_float8_array(transarray, "float8_regr_r2", 8);
     N = transvalues[0];
     Sxx = transvalues[2];
     Syy = transvalues[4];
     Sxy = transvalues[5];
+    commonX = transvalues[6];
+    commonY = transvalues[7];

     /* if N is 0 we should return NULL */
     if (N < 1.0)
@@ -3776,12 +3865,12 @@ float8_regr_r2(PG_FUNCTION_ARGS)

     /* Note that Sxx and Syy are guaranteed to be non-negative */

-    /* per spec, return NULL for a vertical line */
-    if (Sxx == 0)
+    /* per spec, return NULL for a vertical line (see float8_corr comments) */
+    if (!isnan(commonX) || Sxx == 0)
         PG_RETURN_NULL();

-    /* per spec, return 1.0 for a horizontal line */
-    if (Syy == 0)
+    /* per spec, return 1.0 for a horizontal line (see float8_corr comments) */
+    if (!isnan(commonY) || Syy == 0)
         PG_RETURN_FLOAT8(1.0);

     PG_RETURN_FLOAT8((Sxy * Sxy) / (Sxx * Syy));
@@ -3794,12 +3883,14 @@ float8_regr_slope(PG_FUNCTION_ARGS)
     float8       *transvalues;
     float8        N,
                 Sxx,
-                Sxy;
+                Sxy,
+                commonX;

-    transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
+    transvalues = check_float8_array(transarray, "float8_regr_slope", 8);
     N = transvalues[0];
     Sxx = transvalues[2];
     Sxy = transvalues[5];
+    commonX = transvalues[6];

     /* if N is 0 we should return NULL */
     if (N < 1.0)
@@ -3807,8 +3898,8 @@ float8_regr_slope(PG_FUNCTION_ARGS)

     /* Note that Sxx is guaranteed to be non-negative */

-    /* per spec, return NULL for a vertical line */
-    if (Sxx == 0)
+    /* per spec, return NULL for a vertical line (see float8_corr comments) */
+    if (!isnan(commonX) || Sxx == 0)
         PG_RETURN_NULL();

     PG_RETURN_FLOAT8(Sxy / Sxx);
@@ -3823,14 +3914,16 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
                 Sx,
                 Sxx,
                 Sy,
-                Sxy;
+                Sxy,
+                commonX;

-    transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
+    transvalues = check_float8_array(transarray, "float8_regr_intercept", 8);
     N = transvalues[0];
     Sx = transvalues[1];
     Sxx = transvalues[2];
     Sy = transvalues[3];
     Sxy = transvalues[5];
+    commonX = transvalues[6];

     /* if N is 0 we should return NULL */
     if (N < 1.0)
@@ -3838,8 +3931,8 @@ float8_regr_intercept(PG_FUNCTION_ARGS)

     /* Note that Sxx is guaranteed to be non-negative */

-    /* per spec, return NULL for a vertical line */
-    if (Sxx == 0)
+    /* per spec, return NULL for a vertical line (see float8_corr comments) */
+    if (!isnan(commonX) || Sxx == 0)
         PG_RETURN_NULL();

     PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 870769e8f14..f22ccfbf49f 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -475,37 +475,37 @@
   aggcombinefn => 'int8pl', aggtranstype => 'int8', agginitval => '0' },
 { aggfnoid => 'regr_sxx', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_sxx', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_syy', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_syy', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_sxy', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_sxy', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_avgx', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_avgx', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_avgy', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_avgy', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_r2', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_r2', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_slope', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_slope', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_intercept', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_intercept', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'covar_pop', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_covar_pop', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'covar_samp', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_covar_samp', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'corr', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_corr', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },

 # boolean-and and boolean-or
 { aggfnoid => 'bool_and', aggtransfn => 'booland_statefunc',
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index be0e1573183..7ba2e613d1e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -515,6 +515,57 @@ SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
        NaN |
 (1 row)

+-- check some cases that formerly had poor roundoff-error behavior
+SET extra_float_digits = 1;
+SELECT corr(0.09, g), regr_r2(0.09, g)
+  FROM generate_series(1, 30) g;
+ corr | regr_r2
+------+---------
+      |       1
+(1 row)
+
+SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)
+  FROM generate_series(1, 30) g;
+ corr | regr_r2 | regr_slope | regr_intercept
+------+---------+------------+----------------
+      |         |            |
+(1 row)
+
+SELECT corr(1.3 + g * 1e-16, 1.3 + g * 1e-16)
+  FROM generate_series(1, 3) g;
+ corr
+------
+
+(1 row)
+
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+  FROM generate_series(1, 3) g;
+ corr
+------
+    1
+(1 row)
+
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+  FROM generate_series(1, 30) g;
+ corr
+------
+    1
+(1 row)
+
+SET extra_float_digits = 0;
+-- these examples pose definitional questions for NaN inputs
+SELECT corr(g, 'NaN') FROM generate_series(1, 30) g;
+ corr
+------
+  NaN
+(1 row)
+
+SELECT corr(0.1, 'NaN') FROM generate_series(1, 30) g;
+ corr
+------
+
+(1 row)
+
 -- test accum and combine functions directly
 CREATE TABLE regr_test (x float8, y float8);
 INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
@@ -538,10 +589,10 @@ SELECT float8_accum('{4,140,2900}'::float8[], 100);
  {5,240,6280}
 (1 row)

-SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
-      float8_regr_accum
-------------------------------
- {5,240,6280,1490,95080,8680}
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050,100,0}'::float8[], 200, 100);
+          float8_regr_accum
+--------------------------------------
+ {5,240,6280,1490,95080,8680,100,NaN}
 (1 row)

 SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
@@ -576,25 +627,25 @@ SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
  {5,240,6280}
 (1 row)

-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
-                           '{0,0,0,0,0,0}'::float8[]);
-    float8_regr_combine
----------------------------
- {3,60,200,750,20000,2000}
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,1,NaN}'::float8[],
+                           '{0,0,0,0,0,0,0,0}'::float8[]);
+       float8_regr_combine
+---------------------------------
+ {3,60,200,750,20000,2000,1,NaN}
 (1 row)

-SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
-                           '{2,180,200,740,57800,-3400}'::float8[]);
-     float8_regr_combine
------------------------------
- {2,180,200,740,57800,-3400}
+SELECT float8_regr_combine('{0,0,0,0,0,0,0,0}'::float8[],
+                           '{2,180,200,740,57800,-3400,NaN,1}'::float8[]);
+        float8_regr_combine
+-----------------------------------
+ {2,180,200,740,57800,-3400,NaN,1}
 (1 row)

-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
-                           '{2,180,200,740,57800,-3400}'::float8[]);
-     float8_regr_combine
-------------------------------
- {5,240,6280,1490,95080,8680}
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,7,8}'::float8[],
+                           '{2,180,200,740,57800,-3400,7,9}'::float8[]);
+        float8_regr_combine
+------------------------------------
+ {5,240,6280,1490,95080,8680,7,NaN}
 (1 row)

 DROP TABLE regr_test;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 77ca6ffa3a9..5eb412bd43b 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -140,6 +140,24 @@ SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
 SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
 SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);

+-- check some cases that formerly had poor roundoff-error behavior
+SET extra_float_digits = 1;
+SELECT corr(0.09, g), regr_r2(0.09, g)
+  FROM generate_series(1, 30) g;
+SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)
+  FROM generate_series(1, 30) g;
+SELECT corr(1.3 + g * 1e-16, 1.3 + g * 1e-16)
+  FROM generate_series(1, 3) g;
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+  FROM generate_series(1, 3) g;
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+  FROM generate_series(1, 30) g;
+SET extra_float_digits = 0;
+
+-- these examples pose definitional questions for NaN inputs
+SELECT corr(g, 'NaN') FROM generate_series(1, 30) g;
+SELECT corr(0.1, 'NaN') FROM generate_series(1, 30) g;
+
 -- test accum and combine functions directly
 CREATE TABLE regr_test (x float8, y float8);
 INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
@@ -148,7 +166,7 @@ FROM regr_test WHERE x IN (10,20,30,80);
 SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
 FROM regr_test;
 SELECT float8_accum('{4,140,2900}'::float8[], 100);
-SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050,100,0}'::float8[], 200, 100);
 SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
 FROM regr_test WHERE x IN (10,20,30);
 SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
@@ -156,12 +174,12 @@ FROM regr_test WHERE x IN (80,100);
 SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
 SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
 SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
-                           '{0,0,0,0,0,0}'::float8[]);
-SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
-                           '{2,180,200,740,57800,-3400}'::float8[]);
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
-                           '{2,180,200,740,57800,-3400}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,1,NaN}'::float8[],
+                           '{0,0,0,0,0,0,0,0}'::float8[]);
+SELECT float8_regr_combine('{0,0,0,0,0,0,0,0}'::float8[],
+                           '{2,180,200,740,57800,-3400,NaN,1}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,7,8}'::float8[],
+                           '{2,180,200,740,57800,-3400,7,9}'::float8[]);
 DROP TABLE regr_test;

 -- test count, distinct
В списке pgsql-bugs по дате отправления:
Вход в личный кабинет

Восстановление пароля

Подтверждение аккаунта

Изменение пароля

Re: BUG #19340: Wrong result from CORR() function