Re: BUG #19340: Wrong result from CORR() function
| От | Tom Lane |
|---|---|
| Тема | Re: BUG #19340: Wrong result from CORR() function |
| Дата | |
| Msg-id | 1872685.1764973053@sss.pgh.pa.us обсуждение исходный текст |
| Ответ на | Re: BUG #19340: Wrong result from CORR() function (Dean Rasheed <dean.a.rasheed@gmail.com>) |
| Ответы |
Re: BUG #19340: Wrong result from CORR() function
|
| Список | pgsql-bugs |
Dean Rasheed <dean.a.rasheed@gmail.com> writes:
> Looking at float8_regr_accum(), I think it would be preferable to
> arrange for it to leave Sxx, Syy, and Sxy zero until distinct X and Y
> values are seen. I.e., something like this:
That seems like a good idea. I was initially worried that the extra
isnan() checks would slow down aggregation noticeably in the normal
case where we soon discover that the inputs aren't all equal. They
don't seem to though. For me, the attached v2 clocks in at only 0.9%
slower than HEAD, IOW actually faster than v1; which I suspect just
proves that what we're trying to measure here is comparable to the
noise threshold.
I took your other suggestions too, except for:
> I also wonder if it would be worth doing something similar for the
> single-variable aggregates so that var_pop(), var_samp(),
> stddev_pop(), and stddev_samp() would all return exactly zero, and
> avg() would return the exact common value, if all the inputs were
> constant.
I'm less excited about this, because for all those aggregates,
you have the option of using the numeric variant if you're
dissatisified with the accuracy of the float8 variant. Also,
given that the per-input work is substantially less, the overhead
of tracking the common input value would probably be noticeably
greater. If somebody else wants to investigate that, I won't
stand in the way, but I don't want to do it.
regards, tom lane
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index 7b97d2be6ca..05f920e3aec 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -3319,9 +3319,16 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
* As with the preceding aggregates, we use the Youngs-Cramer algorithm to
* reduce rounding errors in the aggregate final functions.
*
- * The transition datatype for all these aggregates is a 6-element array of
+ * The transition datatype for all these aggregates is an 8-element array of
* float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y),
- * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order.
+ * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)), commonX, and commonY
+ * in that order.
+ *
+ * commonX is defined as the common X value if all the X values were the same,
+ * else NaN; likewise for commonY. This is useful for deciding whether corr()
+ * and related functions should return NULL. This representation cannot
+ * distinguish all-the-values-were-NaN from the-values-weren't-all-the-same,
+ * but that's okay because we want to return NaN for all-NaN input.
*
* Note that Y is the first argument to all these aggregates!
*
@@ -3345,17 +3352,21 @@ float8_regr_accum(PG_FUNCTION_ARGS)
Sy,
Syy,
Sxy,
+ commonX,
+ commonY,
tmpX,
tmpY,
scale;
- transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_accum", 8);
N = transvalues[0];
Sx = transvalues[1];
Sxx = transvalues[2];
Sy = transvalues[3];
Syy = transvalues[4];
Sxy = transvalues[5];
+ commonX = transvalues[6];
+ commonY = transvalues[7];
/*
* Use the Youngs-Cramer algorithm to incorporate the new values into the
@@ -3366,36 +3377,58 @@ float8_regr_accum(PG_FUNCTION_ARGS)
Sy += newvalY;
if (transvalues[0] > 0.0)
{
- tmpX = newvalX * N - Sx;
- tmpY = newvalY * N - Sy;
- scale = 1.0 / (N * transvalues[0]);
- Sxx += tmpX * tmpX * scale;
- Syy += tmpY * tmpY * scale;
- Sxy += tmpX * tmpY * scale;
+ /*
+ * Check to see if we have seen distinct inputs. We can use a test
+ * that's a bit cheaper than float8_ne() because if commonX is already
+ * NaN, it does not matter whether the != test returns true or not.
+ */
+ if (newvalX != commonX || isnan(newvalX))
+ commonX = get_float8_nan();
+ if (newvalY != commonY || isnan(newvalY))
+ commonY = get_float8_nan();
/*
- * Overflow check. We only report an overflow error when finite
- * inputs lead to infinite results. Note also that Sxx, Syy and Sxy
- * should be NaN if any of the relevant inputs are infinite, so we
- * intentionally prevent them from becoming infinite.
+ * If we have not seen distinct inputs, then Sxx, Syy, and/or Sxy
+ * should remain exactly zero (since Sx's exact value would be N *
+ * commonX, etc). Carrying out these calculations would just add the
+ * possibility of injecting roundoff error.
*/
- if (isinf(Sx) || isinf(Sxx) || isinf(Sy) || isinf(Syy) || isinf(Sxy))
+ if (isnan(commonX) || isnan(commonY))
{
- if (((isinf(Sx) || isinf(Sxx)) &&
- !isinf(transvalues[1]) && !isinf(newvalX)) ||
- ((isinf(Sy) || isinf(Syy)) &&
- !isinf(transvalues[3]) && !isinf(newvalY)) ||
- (isinf(Sxy) &&
- !isinf(transvalues[1]) && !isinf(newvalX) &&
- !isinf(transvalues[3]) && !isinf(newvalY)))
- float_overflow_error();
+ tmpX = newvalX * N - Sx;
+ tmpY = newvalY * N - Sy;
+ scale = 1.0 / (N * transvalues[0]);
+ if (isnan(commonX))
+ Sxx += tmpX * tmpX * scale;
+ if (isnan(commonY))
+ Syy += tmpY * tmpY * scale;
+ if (isnan(commonX) && isnan(commonY))
+ Sxy += tmpX * tmpY * scale;
+
+ /*
+ * Overflow check. We only report an overflow error when finite
+ * inputs lead to infinite results. Note also that Sxx, Syy and
+ * Sxy should be NaN if any of the relevant inputs are infinite,
+ * so we intentionally prevent them from becoming infinite.
+ */
+ if (isinf(Sx) || isinf(Sxx) || isinf(Sy) || isinf(Syy) || isinf(Sxy))
+ {
+ if (((isinf(Sx) || isinf(Sxx)) &&
+ !isinf(transvalues[1]) && !isinf(newvalX)) ||
+ ((isinf(Sy) || isinf(Syy)) &&
+ !isinf(transvalues[3]) && !isinf(newvalY)) ||
+ (isinf(Sxy) &&
+ !isinf(transvalues[1]) && !isinf(newvalX) &&
+ !isinf(transvalues[3]) && !isinf(newvalY)))
+ float_overflow_error();
- if (isinf(Sxx))
- Sxx = get_float8_nan();
- if (isinf(Syy))
- Syy = get_float8_nan();
- if (isinf(Sxy))
- Sxy = get_float8_nan();
+ if (isinf(Sxx))
+ Sxx = get_float8_nan();
+ if (isinf(Syy))
+ Syy = get_float8_nan();
+ if (isinf(Sxy))
+ Sxy = get_float8_nan();
+ }
}
}
else
@@ -3410,6 +3443,9 @@ float8_regr_accum(PG_FUNCTION_ARGS)
Sxx = Sxy = get_float8_nan();
if (isnan(newvalY) || isinf(newvalY))
Syy = Sxy = get_float8_nan();
+
+ commonX = newvalX;
+ commonY = newvalY;
}
/*
@@ -3425,12 +3461,14 @@ float8_regr_accum(PG_FUNCTION_ARGS)
transvalues[3] = Sy;
transvalues[4] = Syy;
transvalues[5] = Sxy;
+ transvalues[6] = commonX;
+ transvalues[7] = commonY;
PG_RETURN_ARRAYTYPE_P(transarray);
}
else
{
- Datum transdatums[6];
+ Datum transdatums[8];
ArrayType *result;
transdatums[0] = Float8GetDatumFast(N);
@@ -3439,8 +3477,10 @@ float8_regr_accum(PG_FUNCTION_ARGS)
transdatums[3] = Float8GetDatumFast(Sy);
transdatums[4] = Float8GetDatumFast(Syy);
transdatums[5] = Float8GetDatumFast(Sxy);
+ transdatums[6] = Float8GetDatumFast(commonX);
+ transdatums[7] = Float8GetDatumFast(commonY);
- result = construct_array_builtin(transdatums, 6, FLOAT8OID);
+ result = construct_array_builtin(transdatums, 8, FLOAT8OID);
PG_RETURN_ARRAYTYPE_P(result);
}
@@ -3449,7 +3489,7 @@ float8_regr_accum(PG_FUNCTION_ARGS)
/*
* float8_regr_combine
*
- * An aggregate combine function used to combine two 6 fields
+ * An aggregate combine function used to combine two 8-fields
* aggregate transition data into a single transition data.
* This function is used only in two stage aggregation and
* shouldn't be called outside aggregate context.
@@ -3467,12 +3507,16 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy1,
Syy1,
Sxy1,
+ Cx1,
+ Cy1,
N2,
Sx2,
Sxx2,
Sy2,
Syy2,
Sxy2,
+ Cx2,
+ Cy2,
tmp1,
tmp2,
N,
@@ -3480,10 +3524,12 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sxx,
Sy,
Syy,
- Sxy;
+ Sxy,
+ Cx,
+ Cy;
- transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6);
- transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6);
+ transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 8);
+ transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 8);
N1 = transvalues1[0];
Sx1 = transvalues1[1];
@@ -3491,6 +3537,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy1 = transvalues1[3];
Syy1 = transvalues1[4];
Sxy1 = transvalues1[5];
+ Cx1 = transvalues1[6];
+ Cy1 = transvalues1[7];
N2 = transvalues2[0];
Sx2 = transvalues2[1];
@@ -3498,6 +3546,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy2 = transvalues2[3];
Syy2 = transvalues2[4];
Sxy2 = transvalues2[5];
+ Cx2 = transvalues2[6];
+ Cy2 = transvalues2[7];
/*--------------------
* The transition values combine using a generalization of the
@@ -3523,6 +3573,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy = Sy2;
Syy = Syy2;
Sxy = Sxy2;
+ Cx = Cx2;
+ Cy = Cy2;
}
else if (N2 == 0.0)
{
@@ -3532,6 +3584,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sy = Sy1;
Syy = Syy1;
Sxy = Sxy1;
+ Cx = Cx1;
+ Cy = Cy1;
}
else
{
@@ -3549,6 +3603,14 @@ float8_regr_combine(PG_FUNCTION_ARGS)
Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N;
if (unlikely(isinf(Sxy)) && !isinf(Sxy1) && !isinf(Sxy2))
float_overflow_error();
+ if (float8_eq(Cx1, Cx2))
+ Cx = Cx1;
+ else
+ Cx = get_float8_nan();
+ if (float8_eq(Cy1, Cy2))
+ Cy = Cy1;
+ else
+ Cy = get_float8_nan();
}
/*
@@ -3564,12 +3626,14 @@ float8_regr_combine(PG_FUNCTION_ARGS)
transvalues1[3] = Sy;
transvalues1[4] = Syy;
transvalues1[5] = Sxy;
+ transvalues1[6] = Cx;
+ transvalues1[7] = Cy;
PG_RETURN_ARRAYTYPE_P(transarray1);
}
else
{
- Datum transdatums[6];
+ Datum transdatums[8];
ArrayType *result;
transdatums[0] = Float8GetDatumFast(N);
@@ -3578,8 +3642,10 @@ float8_regr_combine(PG_FUNCTION_ARGS)
transdatums[3] = Float8GetDatumFast(Sy);
transdatums[4] = Float8GetDatumFast(Syy);
transdatums[5] = Float8GetDatumFast(Sxy);
+ transdatums[6] = Float8GetDatumFast(Cx);
+ transdatums[7] = Float8GetDatumFast(Cy);
- result = construct_array_builtin(transdatums, 6, FLOAT8OID);
+ result = construct_array_builtin(transdatums, 8, FLOAT8OID);
PG_RETURN_ARRAYTYPE_P(result);
}
@@ -3594,7 +3660,7 @@ float8_regr_sxx(PG_FUNCTION_ARGS)
float8 N,
Sxx;
- transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_sxx", 8);
N = transvalues[0];
Sxx = transvalues[2];
@@ -3615,7 +3681,7 @@ float8_regr_syy(PG_FUNCTION_ARGS)
float8 N,
Syy;
- transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_syy", 8);
N = transvalues[0];
Syy = transvalues[4];
@@ -3636,7 +3702,7 @@ float8_regr_sxy(PG_FUNCTION_ARGS)
float8 N,
Sxy;
- transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_sxy", 8);
N = transvalues[0];
Sxy = transvalues[5];
@@ -3655,16 +3721,22 @@ float8_regr_avgx(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- Sx;
+ Sx,
+ commonX;
- transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_avgx", 8);
N = transvalues[0];
Sx = transvalues[1];
+ commonX = transvalues[6];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
+ /* if all inputs were the same just return that, avoiding roundoff error */
+ if (!isnan(commonX))
+ PG_RETURN_FLOAT8(commonX);
+
PG_RETURN_FLOAT8(Sx / N);
}
@@ -3674,16 +3746,22 @@ float8_regr_avgy(PG_FUNCTION_ARGS)
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
float8 *transvalues;
float8 N,
- Sy;
+ Sy,
+ commonY;
- transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_avgy", 8);
N = transvalues[0];
Sy = transvalues[3];
+ commonY = transvalues[7];
/* if N is 0 we should return NULL */
if (N < 1.0)
PG_RETURN_NULL();
+ /* if all inputs were the same just return that, avoiding roundoff error */
+ if (!isnan(commonY))
+ PG_RETURN_FLOAT8(commonY);
+
PG_RETURN_FLOAT8(Sy / N);
}
@@ -3695,7 +3773,7 @@ float8_covar_pop(PG_FUNCTION_ARGS)
float8 N,
Sxy;
- transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
+ transvalues = check_float8_array(transarray, "float8_covar_pop", 8);
N = transvalues[0];
Sxy = transvalues[5];
@@ -3714,7 +3792,7 @@ float8_covar_samp(PG_FUNCTION_ARGS)
float8 N,
Sxy;
- transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
+ transvalues = check_float8_array(transarray, "float8_covar_samp", 8);
N = transvalues[0];
Sxy = transvalues[5];
@@ -3733,9 +3811,12 @@ float8_corr(PG_FUNCTION_ARGS)
float8 N,
Sxx,
Syy,
- Sxy;
+ Sxy,
+ product,
+ sqrtproduct,
+ result;
- transvalues = check_float8_array(transarray, "float8_corr", 6);
+ transvalues = check_float8_array(transarray, "float8_corr", 8);
N = transvalues[0];
Sxx = transvalues[2];
Syy = transvalues[4];
@@ -3751,7 +3832,29 @@ float8_corr(PG_FUNCTION_ARGS)
if (Sxx == 0 || Syy == 0)
PG_RETURN_NULL();
- PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy));
+ /*
+ * The product Sxx * Syy might underflow or overflow. If so, we can
+ * recover by computing sqrt(Sxx) * sqrt(Syy) instead of sqrt(Sxx * Syy).
+ * However, the double sqrt() calculation is a bit slower and less
+ * accurate, so don't do it if we don't have to.
+ */
+ product = Sxx * Syy;
+ if (product == 0 || isinf(product))
+ sqrtproduct = sqrt(Sxx) * sqrt(Syy);
+ else
+ sqrtproduct = sqrt(product);
+ result = Sxy / sqrtproduct;
+
+ /*
+ * Despite all these precautions, this formula can yield results outside
+ * [-1, 1] due to roundoff error. Clamp it to the expected range.
+ */
+ if (result < -1)
+ result = -1;
+ else if (result > 1)
+ result = 1;
+
+ PG_RETURN_FLOAT8(result);
}
Datum
@@ -3764,7 +3867,7 @@ float8_regr_r2(PG_FUNCTION_ARGS)
Syy,
Sxy;
- transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_r2", 8);
N = transvalues[0];
Sxx = transvalues[2];
Syy = transvalues[4];
@@ -3796,7 +3899,7 @@ float8_regr_slope(PG_FUNCTION_ARGS)
Sxx,
Sxy;
- transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_slope", 8);
N = transvalues[0];
Sxx = transvalues[2];
Sxy = transvalues[5];
@@ -3825,7 +3928,7 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
Sy,
Sxy;
- transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
+ transvalues = check_float8_array(transarray, "float8_regr_intercept", 8);
N = transvalues[0];
Sx = transvalues[1];
Sxx = transvalues[2];
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 870769e8f14..f22ccfbf49f 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -475,37 +475,37 @@
aggcombinefn => 'int8pl', aggtranstype => 'int8', agginitval => '0' },
{ aggfnoid => 'regr_sxx', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_sxx', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_syy', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_syy', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_sxy', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_sxy', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_avgx', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_avgx', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_avgy', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_avgy', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_r2', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_r2', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_slope', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_slope', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'regr_intercept', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_regr_intercept', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'covar_pop', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_covar_pop', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'covar_samp', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_covar_samp', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
{ aggfnoid => 'corr', aggtransfn => 'float8_regr_accum',
aggfinalfn => 'float8_corr', aggcombinefn => 'float8_regr_combine',
- aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+ aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
# boolean-and and boolean-or
{ aggfnoid => 'bool_and', aggtransfn => 'booland_statefunc',
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index be0e1573183..464a5ce39da 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -515,6 +515,55 @@ SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
NaN |
(1 row)
+-- check some cases that formerly had poor roundoff-error behavior
+SELECT corr(0.09, g), regr_r2(0.09, g)
+ FROM generate_series(1, 30) g;
+ corr | regr_r2
+------+---------
+ | 1
+(1 row)
+
+SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)
+ FROM generate_series(1, 30) g;
+ corr | regr_r2 | regr_slope | regr_intercept
+------+---------+------------+----------------
+ | | |
+(1 row)
+
+SELECT corr(1.3 + g * 1e-16, 1.3 + g * 1e-16)
+ FROM generate_series(1, 3) g;
+ corr
+------
+
+(1 row)
+
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+ FROM generate_series(1, 3) g;
+ corr
+------
+ 1
+(1 row)
+
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+ FROM generate_series(1, 30) g;
+ corr
+------
+ 1
+(1 row)
+
+-- these examples pose definitional questions for NaN inputs
+SELECT corr(g, 'NaN') FROM generate_series(1, 30) g;
+ corr
+------
+ NaN
+(1 row)
+
+SELECT corr(0.1, 'NaN') FROM generate_series(1, 30) g;
+ corr
+------
+
+(1 row)
+
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
@@ -538,10 +587,10 @@ SELECT float8_accum('{4,140,2900}'::float8[], 100);
{5,240,6280}
(1 row)
-SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
- float8_regr_accum
-------------------------------
- {5,240,6280,1490,95080,8680}
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050,100,0}'::float8[], 200, 100);
+ float8_regr_accum
+---------------------------------------
+ {5,240,2900,1490,95080,15050,100,NaN}
(1 row)
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
@@ -576,25 +625,25 @@ SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
{5,240,6280}
(1 row)
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
- '{0,0,0,0,0,0}'::float8[]);
- float8_regr_combine
----------------------------
- {3,60,200,750,20000,2000}
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,1,NaN}'::float8[],
+ '{0,0,0,0,0,0,0,0}'::float8[]);
+ float8_regr_combine
+---------------------------------
+ {3,60,200,750,20000,2000,1,NaN}
(1 row)
-SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
- '{2,180,200,740,57800,-3400}'::float8[]);
- float8_regr_combine
------------------------------
- {2,180,200,740,57800,-3400}
+SELECT float8_regr_combine('{0,0,0,0,0,0,0,0}'::float8[],
+ '{2,180,200,740,57800,-3400,NaN,1}'::float8[]);
+ float8_regr_combine
+-----------------------------------
+ {2,180,200,740,57800,-3400,NaN,1}
(1 row)
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
- '{2,180,200,740,57800,-3400}'::float8[]);
- float8_regr_combine
-------------------------------
- {5,240,6280,1490,95080,8680}
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,7,8}'::float8[],
+ '{2,180,200,740,57800,-3400,7,9}'::float8[]);
+ float8_regr_combine
+------------------------------------
+ {5,240,6280,1490,95080,8680,7,NaN}
(1 row)
DROP TABLE regr_test;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 77ca6ffa3a9..0bc7fea8186 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -140,6 +140,22 @@ SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
+-- check some cases that formerly had poor roundoff-error behavior
+SELECT corr(0.09, g), regr_r2(0.09, g)
+ FROM generate_series(1, 30) g;
+SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)
+ FROM generate_series(1, 30) g;
+SELECT corr(1.3 + g * 1e-16, 1.3 + g * 1e-16)
+ FROM generate_series(1, 3) g;
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+ FROM generate_series(1, 3) g;
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+ FROM generate_series(1, 30) g;
+
+-- these examples pose definitional questions for NaN inputs
+SELECT corr(g, 'NaN') FROM generate_series(1, 30) g;
+SELECT corr(0.1, 'NaN') FROM generate_series(1, 30) g;
+
-- test accum and combine functions directly
CREATE TABLE regr_test (x float8, y float8);
INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
@@ -148,7 +164,7 @@ FROM regr_test WHERE x IN (10,20,30,80);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test;
SELECT float8_accum('{4,140,2900}'::float8[], 100);
-SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050,100,0}'::float8[], 200, 100);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
FROM regr_test WHERE x IN (10,20,30);
SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
@@ -156,12 +172,12 @@ FROM regr_test WHERE x IN (80,100);
SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
- '{0,0,0,0,0,0}'::float8[]);
-SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
- '{2,180,200,740,57800,-3400}'::float8[]);
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
- '{2,180,200,740,57800,-3400}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,1,NaN}'::float8[],
+ '{0,0,0,0,0,0,0,0}'::float8[]);
+SELECT float8_regr_combine('{0,0,0,0,0,0,0,0}'::float8[],
+ '{2,180,200,740,57800,-3400,NaN,1}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,7,8}'::float8[],
+ '{2,180,200,740,57800,-3400,7,9}'::float8[]);
DROP TABLE regr_test;
-- test count, distinct
В списке pgsql-bugs по дате отправления: