Re: BUG #19340: Wrong result from CORR() function
| От | Tom Lane |
|---|---|
| Тема | Re: BUG #19340: Wrong result from CORR() function |
| Дата | |
| Msg-id | 923070.1764802351@sss.pgh.pa.us обсуждение исходный текст |
| Ответ на | Re: BUG #19340: Wrong result from CORR() function (Dean Rasheed <dean.a.rasheed@gmail.com>) |
| Ответы |
Re: BUG #19340: Wrong result from CORR() function
Re: BUG #19340: Wrong result from CORR() function Re: BUG #19340: Wrong result from CORR() function |
| Список | pgsql-bugs |
Attached is a fleshed-out patch proposal that fixes the related
aggregates and adds test cases.
Dean Rasheed <dean.a.rasheed@gmail.com> writes:
> Also, given that any NaN input results in NaN output, regardless of
> whether or not the inputs are all the same, the values for commonX and
> commonY don't matter if any input is NaN. I think that allows the
> accumulator function's tests to be simplified (no need to test if new
> values are NaN).
I'm not convinced about that. I have it as
if (newvalX != commonX || isnan(newvalX))
commonX = get_float8_nan();
and I think you're saying we could just write
if (newvalX != commonX)
commonX = get_float8_nan();
My concern is that if newvalX is NaN but commonX isn't, then
I believe that the non-NaN-aware "!=" test is supposed to return
false, which'd cause us to not update commonX to NaN as required.
Maybe we could make it work by writing
if (!(newvalX == commonX))
commonX = get_float8_nan();
but I don't have a lot of faith in C compilers getting that right.
> Then there's this:
> SELECT corr(1e-100 + x*1e-105, 1e-100 + x*1e-105) FROM generate_series(1, 3) x;
> The correct answer in this case is also 1, which could be achieved by
> taking the square roots of Sxx and Syy separately, before multiplying,
> but it might also be sensible to clamp the result to the range [-1,1].
Poking at this, I soon found a test case where even with the separate
sqrt() calls we'd produce a result slightly outside [-1, 1] (running
this test over more values of x is sufficient). So now I think we
should do both the separate sqrt and the clamp.
regards, tom lane
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index 7b97d2be6ca..fddff3f878a 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -3319,9 +3319,16 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
* As with the preceding aggregates, we use the Youngs-Cramer algorithm to
* reduce rounding errors in the aggregate final functions.
*
- * The transition datatype for all these aggregates is a 6-element array of
+ * The transition datatype for all these aggregates is an 8-element array of
* float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y),
- * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order.
+ * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)), commonX, and commonY
+ * in that order.
+ *
+ * commonX is defined as the common X value if all the X values were the same,
+ * else NaN; likewise for commonY. This is useful for deciding whether corr()
+ * and related functions should return NULL. This representation cannot
+ * distinguish all-the-values-were-NaN from the-values-weren't-all-the-same,
+ * but that's okay because we want to return NaN for all-NaN input.
*
* Note that Y is the first argument to all these aggregates!
*
@@ -3345,17 +3352,21 @@ float8_regr_accum(PG_FUNCTION_ARGS)
Sy,
Syy,
Sxy,
+ commonX,
+ commonY,
tmpX,
tmpY,
scale;
- transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_accum", 8);
N = transvalues[0];
Sx = transvalues[1];
Sxx = transvalues[2];
Sy = transvalues[3];
Syy = transvalues[4];
Sxy = transvalues[5];
+ commonX = transvalues[6];
+ commonY = transvalues[7];
/*
* Use the Youngs-Cramer algorithm to incorporate the new values into the
@@ -3397,6 +3408,16 @@ float8_regr_accum(PG_FUNCTION_ARGS)
if (isinf(Sxy))
Sxy = get_float8_nan();
}
+
+ /*
+ * Check to see if we have seen distinct inputs. We can use a test
+ * that's a bit cheaper than float8_ne() because if commonX is already
+ * NaN, it does not matter whether the != test returns true or not.
+ */
+ if (newvalX != commonX || isnan(newvalX))
+ commonX = get_float8_nan();
+ if (newvalY != commonY || isnan(newvalY))
+ commonY = get_float8_nan();
}
else
{
@@ -3410,6 +3431,9 @@ float8_regr_accum(PG_FUNCTION_ARGS)
Sxx = Sxy = get_float8_nan();
if (isnan(newvalY) || isinf(newvalY))
Syy = Sxy = get_float8_nan();
+
+ commonX = newvalX;
+ commonY = newvalY;
}
/*
@@ -3425,12 +3449,14 @@ float8_regr_accum(PG_FUNCTION_ARGS)
transvalues[3] = Sy;
transvalues[4] = Syy;
transvalues[5] = Sxy;
+ transvalues[6] = commonX;
+ transvalues[7] = commonY;
PG_RETURN_ARRAYTYPE_P(transarray);
}
else
{
- Datum transdatums[6];
+ Datum transdatums[8];
ArrayType *result;
transdatums[0] = Float8GetDatumFast(N);
@@ -3439,8 +3465,10 @@ float8_regr_accum(PG_FUNCTION_ARGS)
transdatums[3] = Float8GetDatumFast(Sy);
transdatums[4] = Float8GetDatumFast(Syy);
transdatums[5] = Float8GetDatumFast(Sxy);
+ transdatums[6] = Float8GetDatumFast(commonX);
+ transdatums[7] = Float8GetDatumFast(commonY);
- result = construct_array_builtin(transdatums, 6, FLOAT8OID);
+ result = construct_array_builtin(transdatums, 8, FLOAT8OID);
PG_RETURN_ARRAYTYPE_P(result);
}
@@ -3449,7 +3477,7 @@ float8_regr_accum(PG_FUNCTION_ARGS)
/*
* float8_regr_combine
*
- * An aggregate combine function used to combine two 6 fields
+ * An aggregate combine function used to combine two 8-fields
* aggregate transition data into a single transition data.
* This function is used only in two stage aggregation and
* shouldn't be called outside aggregate context.
@@ -3467,12 +3495,16 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy1,
Syy1,
Sxy1,
+ Cx1,
+ Cy1,
N2,
Sx2,
Sxx2,
Sy2,
Syy2,
Sxy2,
+ Cx2,
+ Cy2,
tmp1,
tmp2,
N,
@@ -3480,10 +3512,12 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sxx,
Sy,
Syy,
- Sxy;
+ Sxy,
+ Cx,
+ Cy;
- transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6);
- transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6);
+ transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 8);
+ transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 8);
N1 = transvalues1[0];
Sx1 = transvalues1[1];
@@ -3491,6 +3525,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy1 = transvalues1[3];
Syy1 = transvalues1[4];
Sxy1 = transvalues1[5];
+ Cx1 = transvalues1[6];
+ Cy1 = transvalues1[7];
N2 = transvalues2[0];
Sx2 = transvalues2[1];
@@ -3498,6 +3534,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy2 = transvalues2[3];
Syy2 = transvalues2[4];
Sxy2 = transvalues2[5];
+ Cx2 = transvalues2[6];
+ Cy2 = transvalues2[7];
/*--------------------
* The transition values combine using a generalization of the
@@ -3523,6 +3561,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy = Sy2;
Syy = Syy2;
Sxy = Sxy2;
+ Cx = Cx2;
+ Cy = Cy2;
}
else if (N2 == 0.0)
{
@@ -3532,6 +3572,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy = Sy1;
Syy = Syy1;
Sxy = Sxy1;
+ Cx = Cx1;
+ Cy = Cy1;
}
else
{
@@ -3549,6 +3591,14 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N;
if (unlikely(isinf(Sxy)) && !isinf(Sxy1) && !isinf(Sxy2))
float_overflow_error();
+ if (float8_eq(Cx1, Cx2))
+ Cx = Cx1;
+ else
+ Cx = get_float8_nan();
+ if (float8_eq(Cy1, Cy2))
+ Cy = Cy1;
+ else
+ Cy = get_float8_nan();
}
/*
@@ -3564,12 +3614,14 @@ float8_regr_combine(PG_FUNCTION_ARGS)
transvalues1[3] = Sy;
transvalues1[4] = Syy;
transvalues1[5] = Sxy;
+ transvalues1[6] = Cx;
+ transvalues1[7] = Cy;
PG_RETURN_ARRAYTYPE_P(transarray1);
}
else
{
- Datum transdatums[6];
+ Datum transdatums[8];
ArrayType *result;
transdatums[0] = Float8GetDatumFast(N);
@@ -3578,8 +3630,10 @@ float8_regr_combine(PG_FUNCTION_ARGS)
transdatums[3] = Float8GetDatumFast(Sy);
transdatums[4] = Float8GetDatumFast(Syy);
transdatums[5] = Float8GetDatumFast(Sxy);
+ transdatums[6] = Float8GetDatumFast(Cx);
+ transdatums[7] = Float8GetDatumFast(Cy);
- result = construct_array_builtin(transdatums, 6, FLOAT8OID);
+ result = construct_array_builtin(transdatums, 8, FLOAT8OID);
PG_RETURN_ARRAYTYPE_P(result);
}
@@ -3594,7 +3648,7 @@ float8_regr_sxx(PG_FUNCTION_ARGS)
float8 N,
Sxx;
- transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_sxx", 8);
N = transvalues[0];
Sxx = transvalues[2];
@@ -3615,7 +3669,7 @@ float8_regr_syy(PG_FUNCTION_ARGS)
float8 N,
Syy;
- transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_syy", 8);
N = transvalues[0];
Syy = transvalues[4];
@@ -3636,7 +3690,7 @@ float8_regr_sxy(PG_FUNCTION_ARGS)
float8 N,
Sxy;
- transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_sxy", 8);
N = transvalues[0];
Sxy = transvalues[5];
@@ -3657,7 +3711,7 @@ float8_regr_avgx(PG_FUNCTION_ARGS)
float8 N,
Sx;
- transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_avgx", 8);
N = transvalues[0];
Sx = transvalues[1];
@@ -3676,7 +3730,7 @@ float8_regr_avgy(PG_FUNCTION_ARGS)
float8 N,
Sy;
- transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_avgy", 8);
N = transvalues[0];
Sy = transvalues[3];
@@ -3695,7 +3749,7 @@ float8_covar_pop(PG_FUNCTION_ARGS)
float8 N,
Sxy;
- transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
+ transvalues = check_float8_array(transarray, "float8_covar_pop", 8);
N = transvalues[0];
Sxy = transvalues[5];
@@ -3714,7 +3768,7 @@ float8_covar_samp(PG_FUNCTION_ARGS)
float8 N,
Sxy;
- transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
+ transvalues = check_float8_array(transarray, "float8_covar_samp", 8);
N = transvalues[0];
Sxy = transvalues[5];
@@ -3733,13 +3787,18 @@ float8_corr(PG_FUNCTION_ARGS)
float8 N,
Sxx,
Syy,
- Sxy;
+ Sxy,
+ commonX,
+ commonY,
+ result;
- transvalues = check_float8_array(transarray, "float8_corr", 6);
+ transvalues = check_float8_array(transarray, "float8_corr", 8);
N = transvalues[0];
Sxx = transvalues[2];
Syy = transvalues[4];
Sxy = transvalues[5];
+ commonX = transvalues[6];
+ commonY = transvalues[7];
/* if N is 0 we should return NULL */
if (N < 1.0)
@@ -3747,11 +3806,37 @@ float8_corr(PG_FUNCTION_ARGS)
/* Note that Sxx and Syy are guaranteed to be non-negative */
- /* per spec, return NULL for horizontal and vertical lines */
+ /*
+ * Per spec, return NULL for horizontal and vertical lines. We can detect
+ * constant inputs exactly by checking commonX and commonY, even though
+ * Sxx and/or Syy might be nonzero due to roundoff error.
+ */
+ if (!isnan(commonX) || !isnan(commonY))
+ PG_RETURN_NULL();
+
+ /*
+ * Although we now know that the inputs weren't all equal, Sxx and/or Syy
+ * could be zero due to roundoff error, if the inputs were close together.
+ * It seems best to return NULL in that case; blindly applying the result
+ * formula would yield NaN, which seems wrong if the inputs did not
+ * include any NaN.
+ */
if (Sxx == 0 || Syy == 0)
PG_RETURN_NULL();
- PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy));
+ /*
+ * We compute sqrt(Sxx) * sqrt(Syy) not sqrt(Sxx * Syy) because the raw
+ * product could underflow or overflow. Despite all these precautions,
+ * this formula can yield results outside [-1, 1] due to roundoff error.
+ * Clamp it to the expected range.
+ */
+ result = Sxy / (sqrt(Sxx) * sqrt(Syy));
+ if (result < -1)
+ result = -1;
+ else if (result > 1)
+ result = 1;
+
+ PG_RETURN_FLOAT8(result);
}
Datum
@@ -3762,13 +3847,17 @@ float8_regr_r2(PG_FUNCTION_ARGS)
float8 N,
Sxx,
Syy,
- Sxy;
+ Sxy,
+ commonX,
+ commonY;
- transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_r2", 8);
N = transvalues[0];
Sxx = transvalues[2];
Syy = transvalues[4];
Sxy = transvalues[5];
+ commonX = transvalues[6];
+ commonY = transvalues[7];
/* if N is 0 we should return NULL */
if (N < 1.0)
@@ -3776,12 +3865,12 @@ float8_regr_r2(PG_FUNCTION_ARGS)
/* Note that Sxx and Syy are guaranteed to be non-negative */
- /* per spec, return NULL for a vertical line */
- if (Sxx == 0)
+ /* per spec, return NULL for a vertical line (see float8_corr comments) */
+ if (!isnan(commonX) || Sxx == 0)
PG_RETURN_NULL();
- /* per spec, return 1.0 for a horizontal line */
- if (Syy == 0)
+ /* per spec, return 1.0 for a horizontal line (see float8_corr comments) */
+ if (!isnan(commonY) || Syy == 0)
PG_RETURN_FLOAT8(1.0);
PG_RETURN_FLOAT8((Sxy * Sxy) / (Sxx * Syy));
@@ -3794,12 +3883,14 @@ float8_regr_slope(PG_FUNCTION_ARGS)
float8 *transvalues;
float8 N,
Sxx,
- Sxy;
+ Sxy,
+ commonX;
- transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_slope", 8);
N = transvalues[0];
Sxx = transvalues[2];
Sxy = transvalues[5];
+ commonX = transvalues[6];
/* if N is 0 we should return NULL */
if (N < 1.0)
@@ -3807,8 +3898,8 @@ float8_regr_slope(PG_FUNCTION_ARGS)
/* Note that Sxx is guaranteed to be non-negative */
- /* per spec, return NULL for a vertical line */
- if (Sxx == 0)
+ /* per spec, return NULL for a vertical line (see float8_corr comments) */
+ if (!isnan(commonX) || Sxx == 0)
PG_RETURN_NULL();
PG_RETURN_FLOAT8(Sxy / Sxx);
@@ -3823,14 +3914,16 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
Sx,
Sxx,
Sy,
- Sxy;
+ Sxy,
+ commonX;
- transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_intercept", 8);
N = transvalues[0];
Sx = transvalues[1];
Sxx = transvalues[2];
Sy = transvalues[3];
Sxy = transvalues[5];
+ commonX = transvalues[6];
/* if N is 0 we should return NULL */
if (N < 1.0)
@@ -3838,8 +3931,8 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
/* Note that Sxx is guaranteed to be non-negative */
- /* per spec, return NULL for a vertical line */
- if (Sxx == 0)
+ /* per spec, return NULL for a vertical line (see float8_corr comments) */
+ if (!isnan(commonX) || Sxx == 0)
PG_RETURN_NULL();
PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 870769e8f14..f22ccfbf49f 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -475,37 +475,37 @@
aggcombinefn => 'int8pl', aggtranstype => 'int8', agginitval => '0' },
{ aggfnoid => 'regr_sxx', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_sxx', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_syy', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_syy', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_sxy', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_sxy', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_avgx', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_avgx', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_avgy', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_avgy', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_r2', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_r2', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_slope', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_slope', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_intercept', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_intercept', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'covar_pop', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_covar_pop', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'covar_samp', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_covar_samp', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'corr', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_corr', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
# boolean-and and boolean-or
{ aggfnoid => 'bool_and', aggtransfn => 'booland_statefunc',
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index be0e1573183..7ba2e613d1e 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -515,6 +515,57 @@ SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
NaN |
(1 row)
+-- check some cases that formerly had poor roundoff-error behavior
+SET extra_float_digits = 1;
+SELECT corr(0.09, g), regr_r2(0.09, g)
+ FROM generate_series(1, 30) g;
+ corr | regr_r2
+------+---------
+ | 1
+(1 row)
+
+SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)
+ FROM generate_series(1, 30) g;
+ corr | regr_r2 | regr_slope | regr_intercept
+------+---------+------------+----------------
+ | | |
+(1 row)
+
+SELECT corr(1.3 + g * 1e-16, 1.3 + g * 1e-16)
+ FROM generate_series(1, 3) g;
+ corr
+------
+
+(1 row)
+
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+ FROM generate_series(1, 3) g;
+ corr
+------
+ 1
+(1 row)
+
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+ FROM generate_series(1, 30) g;
+ corr
+------
+ 1
+(1 row)
+
+SET extra_float_digits = 0;
+-- these examples pose definitional questions for NaN inputs
+SELECT corr(g, 'NaN') FROM generate_series(1, 30) g;
+ corr
+------
+ NaN
+(1 row)
+
+SELECT corr(0.1, 'NaN') FROM generate_series(1, 30) g;
+ corr
+------
+
+(1 row)
+
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
@@ -538,10 +589,10 @@ SELECT float8_accum('{4,140,2900}'::float8[], 100);
{5,240,6280}
(1 row)
-SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
- float8_regr_accum
-------------------------------
- {5,240,6280,1490,95080,8680}
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050,100,0}'::float8[], 200, 100);
+ float8_regr_accum
+--------------------------------------
+ {5,240,6280,1490,95080,8680,100,NaN}
(1 row)
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
@@ -576,25 +627,25 @@ SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
{5,240,6280}
(1 row)
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
- '{0,0,0,0,0,0}'::float8[]);
- float8_regr_combine
----------------------------
- {3,60,200,750,20000,2000}
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,1,NaN}'::float8[],
+ '{0,0,0,0,0,0,0,0}'::float8[]);
+ float8_regr_combine
+---------------------------------
+ {3,60,200,750,20000,2000,1,NaN}
(1 row)
-SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
- '{2,180,200,740,57800,-3400}'::float8[]);
- float8_regr_combine
------------------------------
- {2,180,200,740,57800,-3400}
+SELECT float8_regr_combine('{0,0,0,0,0,0,0,0}'::float8[],
+ '{2,180,200,740,57800,-3400,NaN,1}'::float8[]);
+ float8_regr_combine
+-----------------------------------
+ {2,180,200,740,57800,-3400,NaN,1}
(1 row)
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
- '{2,180,200,740,57800,-3400}'::float8[]);
- float8_regr_combine
-------------------------------
- {5,240,6280,1490,95080,8680}
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,7,8}'::float8[],
+ '{2,180,200,740,57800,-3400,7,9}'::float8[]);
+ float8_regr_combine
+------------------------------------
+ {5,240,6280,1490,95080,8680,7,NaN}
(1 row)
DROP TABLE regr_test;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 77ca6ffa3a9..5eb412bd43b 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -140,6 +140,24 @@ SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
+-- check some cases that formerly had poor roundoff-error behavior
+SET extra_float_digits = 1;
+SELECT corr(0.09, g), regr_r2(0.09, g)
+ FROM generate_series(1, 30) g;
+SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)
+ FROM generate_series(1, 30) g;
+SELECT corr(1.3 + g * 1e-16, 1.3 + g * 1e-16)
+ FROM generate_series(1, 3) g;
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+ FROM generate_series(1, 3) g;
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+ FROM generate_series(1, 30) g;
+SET extra_float_digits = 0;
+
+-- these examples pose definitional questions for NaN inputs
+SELECT corr(g, 'NaN') FROM generate_series(1, 30) g;
+SELECT corr(0.1, 'NaN') FROM generate_series(1, 30) g;
+
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
@@ -148,7 +166,7 @@ FROM regr_test WHERE x IN (10,20,30,80);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test;
SELECT float8_accum('{4,140,2900}'::float8[], 100);
-SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050,100,0}'::float8[], 200, 100);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test WHERE x IN (10,20,30);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
@@ -156,12 +174,12 @@ FROM regr_test WHERE x IN (80,100);
SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
- '{0,0,0,0,0,0}'::float8[]);
-SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
- '{2,180,200,740,57800,-3400}'::float8[]);
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
- '{2,180,200,740,57800,-3400}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,1,NaN}'::float8[],
+ '{0,0,0,0,0,0,0,0}'::float8[]);
+SELECT float8_regr_combine('{0,0,0,0,0,0,0,0}'::float8[],
+ '{2,180,200,740,57800,-3400,NaN,1}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,7,8}'::float8[],
+ '{2,180,200,740,57800,-3400,7,9}'::float8[]);
DROP TABLE regr_test;
-- test count, distinct
В списке pgsql-bugs по дате отправления: