Обсуждение: Reduce palloc's in numeric operations.
Hello, I will propose reduce palloc's in numeric operations. The numeric operations are slow by nature, but usually it is not a problem for on-disk operations. Altough the slowdown is enhanced on on-memory operations. I inspcted them and found some very short term pallocs. These palloc's are used for temporary storage for digits of unpaked numerics. The formats of numeric digits in packed and unpaked forms are same. So we can kicked out a part of palloc's using digits in packed numeric in-place to make unpakced one. In this patch, I added new function set_var_from_num_nocopy() to do this. And make use of it for operands which won't modified. The performance gain seems quite moderate.... 'SELECT SUM(numeric_column) FROM on_memory_table' for ten million rows and about 8 digits numeric runs for 3480 ms aganst original 3930 ms. It's 11% gain. 'SELECT SUM(int_column) FROM on_memory_table' needed 1570 ms. Similary 8% gain for about 30 - 50 digits numeric. Performance of avg(numeric) made no gain in contrast. Do you think this worth doing? regards, -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 68c1f1d..8e88bd5 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -367,6 +367,7 @@ static void zero_var(NumericVar *var);static const char *set_var_from_str(const char *str, const char*cp, NumericVar *dest);static void set_var_from_num(Numeric value, NumericVar *dest); +static void set_var_from_num_nocopy(Numeric num, NumericVar *dest);static void set_var_from_var(NumericVar *value, NumericVar*dest);static char *get_str_from_var(NumericVar *var, int dscale);static char *get_str_from_var_sci(NumericVar*var, int rscale); @@ -540,12 +541,10 @@ numeric_out(PG_FUNCTION_ARGS) * from rounding. */ init_var(&x); - set_var_from_num(num, &x); + set_var_from_num_nocopy(num, &x); str = get_str_from_var(&x, x.dscale); - free_var(&x); - PG_RETURN_CSTRING(str);} @@ -617,11 +616,10 @@ numeric_out_sci(Numeric num, int scale) return pstrdup("NaN"); init_var(&x); - set_var_from_num(num, &x); + set_var_from_num_nocopy(num, &x); str = get_str_from_var_sci(&x, scale); - free_var(&x); return str;} @@ -696,7 +694,7 @@ numeric_send(PG_FUNCTION_ARGS) int i; init_var(&x); - set_var_from_num(num, &x); + set_var_from_num_nocopy(num, &x); pq_begintypsend(&buf); @@ -707,8 +705,6 @@ numeric_send(PG_FUNCTION_ARGS) for (i = 0; i < x.ndigits; i++) pq_sendint(&buf, x.digits[i],sizeof(NumericDigit)); - free_var(&x); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf));} @@ -1577,15 +1573,13 @@ numeric_add(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); add_var(&arg1, &arg2, &result); res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1620,15 +1614,13 @@ numeric_sub(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); sub_var(&arg1, &arg2, &result); res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1667,15 +1659,13 @@ numeric_mul(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); mul_var(&arg1, &arg2, &result, arg1.dscale + arg2.dscale); res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1711,8 +1701,8 @@ numeric_div(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); /* * Select scale for division result @@ -1726,8 +1716,6 @@ numeric_div(PG_FUNCTION_ARGS) res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1762,8 +1750,8 @@ numeric_div_trunc(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); /* * Do the divide and return the result @@ -1772,8 +1760,6 @@ numeric_div_trunc(PG_FUNCTION_ARGS) res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1802,15 +1788,13 @@ numeric_mod(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); mod_var(&arg1, &arg2, &result); res = make_result(&result); - free_var(&result); - free_var(&arg2); free_var(&arg1); PG_RETURN_NUMERIC(res); @@ -1980,7 +1964,7 @@ numeric_sqrt(PG_FUNCTION_ARGS) init_var(&arg); init_var(&result); - set_var_from_num(num, &arg); + set_var_from_num_nocopy(num, &arg); /* Assume the input was normalized, so arg.weight is accurate */ sweight =(arg.weight + 1) * DEC_DIGITS / 2 - 1; @@ -1998,7 +1982,6 @@ numeric_sqrt(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg); PG_RETURN_NUMERIC(res);} @@ -2033,7 +2016,7 @@ numeric_exp(PG_FUNCTION_ARGS) init_var(&arg); init_var(&result); - set_var_from_num(num, &arg); + set_var_from_num_nocopy(num, &arg); /* convert input to float8, ignoring overflow */ val = numericvar_to_double_no_overflow(&arg); @@ -2061,7 +2044,6 @@ numeric_exp(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg); PG_RETURN_NUMERIC(res);} @@ -2091,7 +2073,7 @@ numeric_ln(PG_FUNCTION_ARGS) init_var(&arg); init_var(&result); - set_var_from_num(num, &arg); + set_var_from_num_nocopy(num, &arg); /* Approx decimal digits before decimal point */ dec_digits = (arg.weight+ 1) * DEC_DIGITS; @@ -2112,7 +2094,6 @@ numeric_ln(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg); PG_RETURN_NUMERIC(res);} @@ -2146,8 +2127,8 @@ numeric_log(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); /* * Call log_var() to compute and return the result; note it handles scale @@ -2158,8 +2139,6 @@ numeric_log(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg2); - free_var(&arg1); PG_RETURN_NUMERIC(res);} @@ -2195,8 +2174,8 @@ numeric_power(PG_FUNCTION_ARGS) init_var(&arg2_trunc); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); set_var_from_var(&arg2, &arg2_trunc); trunc_var(&arg2_trunc, 0); @@ -2227,9 +2206,7 @@ numeric_power(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg2); free_var(&arg2_trunc); - free_var(&arg1); PG_RETURN_NUMERIC(res);} @@ -2277,9 +2254,8 @@ numeric_int4(PG_FUNCTION_ARGS) /* Convert to variable format, then convert to int4 */ init_var(&x); - set_var_from_num(num, &x); + set_var_from_num_nocopy(num, &x); result = numericvar_to_int4(&x); - free_var(&x); PG_RETURN_INT32(result);} @@ -2345,15 +2321,13 @@ numeric_int8(PG_FUNCTION_ARGS) /* Convert to variable format and thence to int8 */ init_var(&x); - set_var_from_num(num, &x); + set_var_from_num_nocopy(num, &x); if (!numericvar_to_int8(&x, &result)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("bigint out of range"))); - free_var(&x); - PG_RETURN_INT64(result);} @@ -2393,15 +2367,13 @@ numeric_int2(PG_FUNCTION_ARGS) /* Convert to variable format and thence to int8 */ init_var(&x); - set_var_from_num(num, &x); + set_var_from_num_nocopy(num, &x); if (!numericvar_to_int8(&x, &val)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("smallint out of range"))); - free_var(&x); - /* Down-convert to int2 */ result = (int16) val; @@ -2764,7 +2736,7 @@ numeric_stddev_internal(ArrayType *transarray, return make_result(&const_nan); init_var(&vN); - set_var_from_num(N, &vN); + set_var_from_num_nocopy(N, &vN); /* * Sample stddev and variance are undefined when N <= 1; population stddev @@ -2786,9 +2758,9 @@ numeric_stddev_internal(ArrayType *transarray, sub_var(&vN, &const_one, &vNminus1); init_var(&vsumX); - set_var_from_num(sumX, &vsumX); + set_var_from_num_nocopy(sumX, &vsumX); init_var(&vsumX2); - set_var_from_num(sumX2, &vsumX2); + set_var_from_num_nocopy(sumX2, &vsumX2); /* compute rscale for mul_var calls */ rscale = vsumX.dscale * 2; @@ -2816,10 +2788,7 @@ numeric_stddev_internal(ArrayType *transarray, res = make_result(&vsumX); } - free_var(&vN); free_var(&vNminus1); - free_var(&vsumX); - free_var(&vsumX2); return res;} @@ -3448,6 +3417,21 @@ set_var_from_num(Numeric num, NumericVar *dest) memcpy(dest->digits, NUMERIC_DIGITS(num), ndigits* sizeof(NumericDigit));} +/* + * set_var_from_num_nocopy() - + * + * Convert the packed db format into a variable - without copying digits + */ +static void +set_var_from_num_nocopy(Numeric num, NumericVar *dest) +{ + dest->ndigits = NUMERIC_NDIGITS(num); + dest->weight = NUMERIC_WEIGHT(num); + dest->sign = NUMERIC_SIGN(num); + dest->dscale = NUMERIC_DSCALE(num); + dest->digits = NUMERIC_DIGITS(num); +} +/* * set_var_from_var() -
On 14.09.2012 11:25, Kyotaro HORIGUCHI wrote: > Hello, I will propose reduce palloc's in numeric operations. > > The numeric operations are slow by nature, but usually it is not > a problem for on-disk operations. Altough the slowdown is > enhanced on on-memory operations. > > I inspcted them and found some very short term pallocs. These > palloc's are used for temporary storage for digits of unpaked > numerics. > > The formats of numeric digits in packed and unpaked forms are > same. So we can kicked out a part of palloc's using digits in > packed numeric in-place to make unpakced one. > > In this patch, I added new function set_var_from_num_nocopy() to > do this. And make use of it for operands which won't modified. Have to be careful to really not modify the operands. numeric_out() and numeric_out_sci() are wrong; they call get_str_from_var(), which modifies the argument. Same with numeric_expr(): it passes the argument to numericvar_to_double_no_overflow(), which passes it to get_str_from_var(). numericvar_to_int8() also modifies its argument, so all the functions that use that, directly or indirectly, must make a copy. Perhaps get_str_from_var(), and the other functions that currently scribble on the arguments, should be modified to not do so. They could easily make a copy of the argument within the function. Then the callers could safely use set_var_from_num_nocopy(). The performance would be the same, you would have the same number of pallocs, but you would get rid of the surprising argument-modifying behavior of those functions. > The performance gain seems quite moderate.... > > 'SELECT SUM(numeric_column) FROM on_memory_table' for ten million > rows and about 8 digits numeric runs for 3480 ms aganst original > 3930 ms. It's 11% gain. 'SELECT SUM(int_column) FROM > on_memory_table' needed 1570 ms. > > Similary 8% gain for about 30 - 50 digits numeric. Performance of > avg(numeric) made no gain in contrast. > > Do you think this worth doing? Yes, I think this is worthwhile. I'm seeing an even bigger gain, with smaller numerics. I created a table with this: CREATE TABLE numtest AS SELECT a::numeric AS col FROM generate_series(1, 10000000) a; And repeated this query with \timing: SELECT SUM(col) FROM numtest; The execution time of that query fell from about 5300 ms to 4300 ms, ie. about 20%. - Heikki
Kyotaro HORIGUCHI wrote: > Hello, I will propose reduce palloc's in numeric operations. > > The numeric operations are slow by nature, but usually it is not > a problem for on-disk operations. Altough the slowdown is > enhanced on on-memory operations. > > I inspcted them and found some very short term pallocs. These > palloc's are used for temporary storage for digits of unpaked > numerics. This looks like a neat little patch. Some feedback has been provided by Heikki (thanks!) and since we're still waiting for an updated version, I have marked this Returned with Feedback for the time being. Please make sure to address the remaining issues and submit to the next commitfest. Thanks. -- Álvaro Herrera http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
Hello. My colleague found that pg_stat_replication.sync_state shows false state for some condition. This prevents Pacemaker from completing fail-over that could safely be done. The point is in walsender.c, pg_stat_get_wal_senders() below, (as of REL9_2_1) 1555: if (walsnd->pid != 0) 1556: { 1557: /* 1558: * Treat a standby such as a pg_basebackup backgroundprocess 1559: * which always returns an invalid flush location, as an 1560: * asynchronous standby.1561: */ ! 1562: sync_priority[i] = XLogRecPtrIsInvalid(walsnd->flush) ? 1563: 0 : walsnd->sync_standby_priority; Here, XLogRecPtrIsInvalid(walsnd->flush) is defined as (walsnd->flush.xrecoff == 0) which becomes true as usual at every WAL 'file's (not segments) boundary. xrecoff == 0 is certainly invalid for the start point of WAL *RECORD*, but should be considered valid in replication stream. This check was introduced at 9.2.0 and the version up between 9.1.4 and 9.1.5. | DEBUG: write 4/0 flush 3/FEFFEC68 apply 3/FEFFEC68| DEBUG: write 4/0 flush 4/0 apply 3/FEFFEC68| DEBUG: HOGE: flush= 3/FEFFEC68 sync_priority[0] = 1| DEBUG: write 4/111C0 flush 4/0 apply 3/FEFFECC0 !| DEBUG: HOGE: flush = 4/0 sync_priority[0] = 0 This value zero of sync_priority[0] makes sync_status 'async' errorneously and confuses Pacemaker. # The log line marked with 'HOGE' above printed by applying the # patch at the bottom of this message and invoking 'select # sync_state from pg_stat_replication' periodically. To increase # the chance to see the symptom, sleep 1 second for 'file' # boundaries :-) The Heikki's recent(?) commit 0ab9d1c4b31622e9176472b4276f3e9831e3d6ba which changes the format of XLogRecPtr from logid:xrecoff struct to 64 bit linear address would fix the false indication. But I suppose this patch won't be applied to existing 9.1.x and 9.2.x because of the modification onto streaming protocol. As far as I see the patch, it would'nt change the meaning of XLogRecPtr to change XLogRecPtrIsInvalid from (xrecoff == 0) to (xrecoff == 0 && xlogid == 0). But this change affects rather wide portion where handling WAL nevertheless what is needed here is only to stop the false indication. On the other hand, pg_basebackup seems return 0/0 as flush and apply positions so it seems enough only to add xlogid == 0 into the condition. The patch attached for REL9_2_1 does this and yields the result following. | DEBUG: write 2/FEFFFD48 flush 2/FEFFFD48 apply 2/FEFF7AB0| DEBUG: write 3/0 flush 2/FEFFFD48 apply 2/FEFF7E88| DEBUG: write 3/0 flush 3/0 apply 2/FEFFFD48| DEBUG: HOGE: flush = 2/FEFFFD48 sync_priority[0] = 1| DEBUG: write 3/E338flush 3/0 apply 2/FEFFFF80 !| DEBUG: HOGE: flush = 3/0 sync_priority[0] = 1 I think this patch should be applied for 9.2.2 and 9.1.7. regards, -- Kyotaro Horiguchi NTT Open Source Software Center ============================================================ ===== The patch for this test. ============================================================ diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 064ddd5..19f79d1 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -618,6 +618,10 @@ ProcessStandbyReplyMessage(void) reply.flush.xlogid, reply.flush.xrecoff, reply.apply.xlogid,reply.apply.xrecoff); + if (reply.write.xrecoff == 0 || + reply.flush.xrecoff == 0) + sleep(1); + /* * Update shared state for this WalSender process based on reply data from * standby. @@ -1561,7 +1565,10 @@ pg_stat_get_wal_senders(PG_FUNCTION_ARGS) */ sync_priority[i] = XLogRecPtrIsInvalid(walsnd->flush)? 0 : walsnd->sync_standby_priority; - + elog(DEBUG1, "HOGE: flush = %X/%X sync_priority[%d] = %d", + walsnd->flush.xlogid, walsnd->flush.xrecoff, + i, sync_priority[i]); + if (walsnd->state == WALSNDSTATE_STREAMING && walsnd->sync_standby_priority > 0 && (priority == 0 || diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 064ddd5..1d4cbc4 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1555,11 +1555,11 @@ pg_stat_get_wal_senders(PG_FUNCTION_ARGS) if (walsnd->pid != 0) { /* - * Treat a standby such as a pg_basebackup background process - * which always returns an invalid flush location, as an + * Treat a standby such as a pg_basebackup background process which + * always returns 0/0 (InvalidXLogRecPtr) as flush location, as an * asynchronous standby. */ - sync_priority[i] = XLogRecPtrIsInvalid(walsnd->flush) ? + sync_priority[i] = XLByteEQ(walsnd->flush, InvalidXLogRecPtr) ? 0 : walsnd->sync_standby_priority; if (walsnd->state == WALSNDSTATE_STREAMING &&
On Thu, Oct 18, 2012 at 5:42 PM, Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp> wrote: > Hello. My colleague found that pg_stat_replication.sync_state > shows false state for some condition. > > This prevents Pacemaker from completing fail-over that could > safely be done. > > The point is in walsender.c, pg_stat_get_wal_senders() below, (as > of REL9_2_1) > > 1555: if (walsnd->pid != 0) > 1556: { > 1557: /* > 1558: * Treat a standby such as a pg_basebackup background process > 1559: * which always returns an invalid flush location, as an > 1560: * asynchronous standby. > 1561: */ > ! 1562: sync_priority[i] = XLogRecPtrIsInvalid(walsnd->flush) ? > 1563: 0 : walsnd->sync_standby_priority; > > Here, XLogRecPtrIsInvalid(walsnd->flush) is defined as > (walsnd->flush.xrecoff == 0) which becomes true as usual at every > WAL 'file's (not segments) boundary. xrecoff == 0 is certainly > invalid for the start point of WAL *RECORD*, but should be > considered valid in replication stream. This check was introduced > at 9.2.0 and the version up between 9.1.4 and 9.1.5. > > | DEBUG: write 4/0 flush 3/FEFFEC68 apply 3/FEFFEC68 > | DEBUG: write 4/0 flush 4/0 apply 3/FEFFEC68 > | DEBUG: HOGE: flush = 3/FEFFEC68 sync_priority[0] = 1 > | DEBUG: write 4/111C0 flush 4/0 apply 3/FEFFECC0 > !| DEBUG: HOGE: flush = 4/0 sync_priority[0] = 0 > > This value zero of sync_priority[0] makes sync_status 'async' > errorneously and confuses Pacemaker. > > # The log line marked with 'HOGE' above printed by applying the > # patch at the bottom of this message and invoking 'select > # sync_state from pg_stat_replication' periodically. To increase > # the chance to see the symptom, sleep 1 second for 'file' > # boundaries :-) > > The Heikki's recent(?) commit > 0ab9d1c4b31622e9176472b4276f3e9831e3d6ba which changes the format > of XLogRecPtr from logid:xrecoff struct to 64 bit linear address > would fix the false indication. But I suppose this patch won't be > applied to existing 9.1.x and 9.2.x because of the modification > onto streaming protocol. > > As far as I see the patch, it would'nt change the meaning of > XLogRecPtr to change XLogRecPtrIsInvalid from (xrecoff == 0) to > (xrecoff == 0 && xlogid == 0). But this change affects rather > wide portion where handling WAL nevertheless what is needed here > is only to stop the false indication. > > On the other hand, pg_basebackup seems return 0/0 as flush and > apply positions so it seems enough only to add xlogid == 0 into > the condition. The patch attached for REL9_2_1 does this and > yields the result following. > > | DEBUG: write 2/FEFFFD48 flush 2/FEFFFD48 apply 2/FEFF7AB0 > | DEBUG: write 3/0 flush 2/FEFFFD48 apply 2/FEFF7E88 > | DEBUG: write 3/0 flush 3/0 apply 2/FEFFFD48 > | DEBUG: HOGE: flush = 2/FEFFFD48 sync_priority[0] = 1 > | DEBUG: write 3/E338 flush 3/0 apply 2/FEFFFF80 > !| DEBUG: HOGE: flush = 3/0 sync_priority[0] = 1 > > I think this patch should be applied for 9.2.2 and 9.1.7. Looks good to me, though I don't think the source code comment needs to be updated in the way the patch does. Regards, -- Fujii Masao
Thank you for comment. > > I think this patch should be applied for 9.2.2 and 9.1.7. > > Looks good to me, though I don't think the source code comment needs > to be updated in the way the patch does. Ok, the patch for walsender.c becomes 1 liner, quite simple. However, I've forgotten to treat other three portions in walsender.c and syncrep.c also does XLogRecPtrIsInvalid(<XLogPtr which comes from WAL receiver>). This new patch includes the changes for them. By the way, XLogRecPtrIsInvliad() seems to be used also in gist.c, gistget.c, gistvacuum.c, visibilitymap.c, clog.c, slru.c, xlog.c. In these files, LSN's fed to XLogRecPtrIsInvalid() looks to be *valid* start point of WAL records, but I'm not sure of that. regards, -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index 088f7b6..148756d 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -382,7 +382,7 @@ SyncRepReleaseWaiters(void) */ if (MyWalSnd->sync_standby_priority == 0 || MyWalSnd->state< WALSNDSTATE_STREAMING || - XLogRecPtrIsInvalid(MyWalSnd->flush)) + XLByteEQ(MyWalSnd->flush, InvalidXLogRecPtr)) return; /* @@ -403,7 +403,7 @@ SyncRepReleaseWaiters(void) walsnd->sync_standby_priority > 0 && (priority == 0|| priority > walsnd->sync_standby_priority) && - !XLogRecPtrIsInvalid(walsnd->flush)) + !XLogByteEQ(walsnd->flush, InvalidXLogRecPtr)) { priority = walsnd->sync_standby_priority; syncWalSnd = walsnd; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 064ddd5..6c27449 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1559,14 +1559,14 @@ pg_stat_get_wal_senders(PG_FUNCTION_ARGS) * which always returns an invalid flush location,as an * asynchronous standby. */ - sync_priority[i] = XLogRecPtrIsInvalid(walsnd->flush) ? + sync_priority[i] = XLByteEQ(walsnd->flush, InvalidXLogRecPtr) ? 0 : walsnd->sync_standby_priority; if (walsnd->state == WALSNDSTATE_STREAMING && walsnd->sync_standby_priority> 0 && (priority == 0 || priority > walsnd->sync_standby_priority)&& - !XLogRecPtrIsInvalid(walsnd->flush)) + !XLByteEQ(walsnd->flush, InvalidXLogRecPtr)) { priority = walsnd->sync_standby_priority; sync_standby = i;
Ouch! I'm sorry to have sent truly buggy version, please abandon v2 patch sent just before. Added include "access/transam.h" to syncrep.c and corrected the name of XLByteEQ. > Thank you for comment. > > > > I think this patch should be applied for 9.2.2 and 9.1.7. > > > > Looks good to me, though I don't think the source code comment needs > > to be updated in the way the patch does. > > Ok, the patch for walsender.c becomes 1 liner, quite simple. > > However, I've forgotten to treat other three portions in > walsender.c and syncrep.c also does XLogRecPtrIsInvalid(<XLogPtr > which comes from WAL receiver>). This new patch includes the > changes for them. > > By the way, XLogRecPtrIsInvliad() seems to be used also in > gist.c, gistget.c, gistvacuum.c, visibilitymap.c, clog.c, slru.c, > xlog.c. > > In these files, LSN's fed to XLogRecPtrIsInvalid() looks to be > *valid* start point of WAL records, but I'm not sure of that. regards, -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index 088f7b6..6caf586 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -46,6 +46,7 @@#include <unistd.h>#include "access/xact.h" +#include "access/transam.h"#include "miscadmin.h"#include "replication/syncrep.h"#include "replication/walsender.h" @@ -382,7 +383,7 @@ SyncRepReleaseWaiters(void) */ if (MyWalSnd->sync_standby_priority == 0 || MyWalSnd->state< WALSNDSTATE_STREAMING || - XLogRecPtrIsInvalid(MyWalSnd->flush)) + XLByteEQ(MyWalSnd->flush, InvalidXLogRecPtr)) return; /* @@ -403,7 +404,7 @@ SyncRepReleaseWaiters(void) walsnd->sync_standby_priority > 0 && (priority == 0|| priority > walsnd->sync_standby_priority) && - !XLogRecPtrIsInvalid(walsnd->flush)) + !XLByteEQ(walsnd->flush, InvalidXLogRecPtr)) { priority = walsnd->sync_standby_priority; syncWalSnd = walsnd; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 064ddd5..6c27449 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1559,14 +1559,14 @@ pg_stat_get_wal_senders(PG_FUNCTION_ARGS) * which always returns an invalid flush location,as an * asynchronous standby. */ - sync_priority[i] = XLogRecPtrIsInvalid(walsnd->flush) ? + sync_priority[i] = XLByteEQ(walsnd->flush, InvalidXLogRecPtr) ? 0 : walsnd->sync_standby_priority; if (walsnd->state == WALSNDSTATE_STREAMING && walsnd->sync_standby_priority> 0 && (priority == 0 || priority > walsnd->sync_standby_priority)&& - !XLogRecPtrIsInvalid(walsnd->flush)) + !XLByteEQ(walsnd->flush, InvalidXLogRecPtr)) { priority = walsnd->sync_standby_priority; sync_standby = i;
On Fri, Oct 19, 2012 at 5:46 PM, Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp> wrote: > Ouch! I'm sorry to have sent truly buggy version, please abandon > v2 patch sent just before. > > Added include "access/transam.h" to syncrep.c and corrected the > name of XLByteEQ. Thanks for updating the patch! This looks good to me. >> Thank you for comment. >> >> > > I think this patch should be applied for 9.2.2 and 9.1.7. >> > >> > Looks good to me, though I don't think the source code comment needs >> > to be updated in the way the patch does. >> >> Ok, the patch for walsender.c becomes 1 liner, quite simple. >> >> However, I've forgotten to treat other three portions in >> walsender.c and syncrep.c also does XLogRecPtrIsInvalid(<XLogPtr >> which comes from WAL receiver>). This new patch includes the >> changes for them. Good catch. Regards, -- Fujii Masao
On Fri, Oct 19, 2012 at 10:29 PM, Fujii Masao <masao.fujii@gmail.com> wrote: > On Fri, Oct 19, 2012 at 5:46 PM, Kyotaro HORIGUCHI > <horiguchi.kyotaro@lab.ntt.co.jp> wrote: >> Ouch! I'm sorry to have sent truly buggy version, please abandon >> v2 patch sent just before. >> >> Added include "access/transam.h" to syncrep.c and corrected the >> name of XLByteEQ. > > Thanks for updating the patch! This looks good to me. > >>> Thank you for comment. >>> >>> > > I think this patch should be applied for 9.2.2 and 9.1.7. >>> > >>> > Looks good to me, though I don't think the source code comment needs >>> > to be updated in the way the patch does. >>> >>> Ok, the patch for walsender.c becomes 1 liner, quite simple. >>> >>> However, I've forgotten to treat other three portions in >>> walsender.c and syncrep.c also does XLogRecPtrIsInvalid(<XLogPtr >>> which comes from WAL receiver>). This new patch includes the >>> changes for them. > > Good catch. Does any commiter pick up this? Regards, -- Fujii Masao
Fujii Masao escribió: > On Fri, Oct 19, 2012 at 10:29 PM, Fujii Masao <masao.fujii@gmail.com> wrote: > >>> However, I've forgotten to treat other three portions in > >>> walsender.c and syncrep.c also does XLogRecPtrIsInvalid(<XLogPtr > >>> which comes from WAL receiver>). This new patch includes the > >>> changes for them. > > > > Good catch. > > Does any commiter pick up this? If not, please add to next commitfest so that we don't forget. -- Álvaro Herrera http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
On Fri, Nov 9, 2012 at 4:06 AM, Alvaro Herrera <alvherre@2ndquadrant.com> wrote: > Fujii Masao escribió: >> On Fri, Oct 19, 2012 at 10:29 PM, Fujii Masao <masao.fujii@gmail.com> wrote: > >> >>> However, I've forgotten to treat other three portions in >> >>> walsender.c and syncrep.c also does XLogRecPtrIsInvalid(<XLogPtr >> >>> which comes from WAL receiver>). This new patch includes the >> >>> changes for them. >> > >> > Good catch. >> >> Does any commiter pick up this? > > If not, please add to next commitfest so that we don't forget. Yep, I added this to next CF. This is just a bug fix, so please feel free to pick up this even before CF. Regards, -- Fujii Masao
Thanks for comments, > Have to be careful to really not modify the > operands. numeric_out() and numeric_out_sci() are wrong; they > call get_str_from_var(), which modifies the argument. Same with > numeric_expr(): it passes the argument to > numericvar_to_double_no_overflow(), which passes it to > get_str_from_var(). numericvar_to_int8() also modifies its > argument, so all the functions that use that, directly or > indirectly, must make a copy. mmm. My carefulness was a bit short to pick up them... I overlooked that get_str_from_var() and numeric_to_int8() calls round_var() which rewrites the operand. I reverted numeric_out() and numeric_int8(), numeric_int2(). Altough, I couldn't find in get_str_from_var_sci() where the operand would be modified. The lines where var showing in get_str_from_var_sci() is listed below. | 2:get_str_from_var_sci(NumericVar *var, int rscale) | 21: if (var->ndigits > 0) | 23: exponent = (var->weight + 1) * DEC_DIGITS; | 29: exponent -= DEC_DIGITS - (int) log10(var->digits[0]); | 59: div_var(var, &denominator, &significand, rscale, true); The only suspect is div_var at this level, and do the same thing for var1 in div_var. | 2:div_var(NumericVar *var1, NumericVar *var2, NumericVar *result, | 20: int var1ndigits = var1->ndigits; | 35: if (var1ndigits == 0) | 47: if (var1->sign == var2->sign) | 51: res_weight = var1->weight - var2->weight; | 68: div_ndigits = Max(div_ndigits, var1ndigits); | 83: memcpy(dividend + 1, var1->digits, var1ndigits * sizeof(NumericDigit)); | 132: for (i = var1ndigits; i >= 0; i--) No line seems to modify var1 as far as I see so I've left numeric_out_sci() modified in this patch. Well, I found some other bugs in numeric_stddev_internal. vN was errorniously freed and vsumX2 in is used as work. They are fixed in this new patch. > Perhaps get_str_from_var(), and the other functions that > currently scribble on the arguments, should be modified to not > do so. They could easily make a copy of the argument within the > function. Then the callers could safely use > set_var_from_num_nocopy(). The performance would be the same, > you would have the same number of pallocs, but you would get > rid of the surprising argument-modifying behavior of those > functions. I agree with that. const qualifiers on parameters would rule this mechanically. I try this for the next version of this patch. > SELECT SUM(col) FROM numtest; > > The execution time of that query fell from about 5300 ms to 4300 ms, ie. about 20%. Wow, it seems more promising than I expected. Thanks. regards, -- 堀口恭太郎 日本電信電話株式会社 NTTオープンソフトウェアセンタ Phone: 03-5860-5115 / Fax: 03-5463-5490 diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 68c1f1d..fcff325 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -367,6 +367,7 @@ static void zero_var(NumericVar *var);static const char *set_var_from_str(const char *str, const char*cp, NumericVar *dest);static void set_var_from_num(Numeric value, NumericVar *dest); +static void set_var_from_num_nocopy(Numeric num, NumericVar *dest);static void set_var_from_var(NumericVar *value, NumericVar*dest);static char *get_str_from_var(NumericVar *var, int dscale);static char *get_str_from_var_sci(NumericVar*var, int rscale); @@ -617,11 +618,10 @@ numeric_out_sci(Numeric num, int scale) return pstrdup("NaN"); init_var(&x); - set_var_from_num(num, &x); + set_var_from_num_nocopy(num, &x); str = get_str_from_var_sci(&x, scale); - free_var(&x); return str;} @@ -696,7 +696,7 @@ numeric_send(PG_FUNCTION_ARGS) int i; init_var(&x); - set_var_from_num(num, &x); + set_var_from_num_nocopy(num, &x); pq_begintypsend(&buf); @@ -707,8 +707,6 @@ numeric_send(PG_FUNCTION_ARGS) for (i = 0; i < x.ndigits; i++) pq_sendint(&buf, x.digits[i],sizeof(NumericDigit)); - free_var(&x); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf));} @@ -1577,15 +1575,13 @@ numeric_add(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); add_var(&arg1, &arg2, &result); res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1620,15 +1616,13 @@ numeric_sub(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); sub_var(&arg1, &arg2, &result); res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1667,15 +1661,13 @@ numeric_mul(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); mul_var(&arg1, &arg2, &result, arg1.dscale + arg2.dscale); res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1711,8 +1703,8 @@ numeric_div(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); /* * Select scale for division result @@ -1726,8 +1718,6 @@ numeric_div(PG_FUNCTION_ARGS) res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1762,8 +1752,8 @@ numeric_div_trunc(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); /* * Do the divide and return the result @@ -1772,8 +1762,6 @@ numeric_div_trunc(PG_FUNCTION_ARGS) res = make_result(&result); - free_var(&arg1); - free_var(&arg2); free_var(&result); PG_RETURN_NUMERIC(res); @@ -1802,15 +1790,13 @@ numeric_mod(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); mod_var(&arg1, &arg2, &result); res = make_result(&result); - free_var(&result); - free_var(&arg2); free_var(&arg1); PG_RETURN_NUMERIC(res); @@ -1980,7 +1966,7 @@ numeric_sqrt(PG_FUNCTION_ARGS) init_var(&arg); init_var(&result); - set_var_from_num(num, &arg); + set_var_from_num_nocopy(num, &arg); /* Assume the input was normalized, so arg.weight is accurate */ sweight =(arg.weight + 1) * DEC_DIGITS / 2 - 1; @@ -1998,7 +1984,6 @@ numeric_sqrt(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg); PG_RETURN_NUMERIC(res);} @@ -2033,7 +2018,7 @@ numeric_exp(PG_FUNCTION_ARGS) init_var(&arg); init_var(&result); - set_var_from_num(num, &arg); + set_var_from_num_nocopy(num, &arg); /* convert input to float8, ignoring overflow */ val = numericvar_to_double_no_overflow(&arg); @@ -2061,7 +2046,6 @@ numeric_exp(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg); PG_RETURN_NUMERIC(res);} @@ -2091,7 +2075,7 @@ numeric_ln(PG_FUNCTION_ARGS) init_var(&arg); init_var(&result); - set_var_from_num(num, &arg); + set_var_from_num_nocopy(num, &arg); /* Approx decimal digits before decimal point */ dec_digits = (arg.weight+ 1) * DEC_DIGITS; @@ -2112,7 +2096,6 @@ numeric_ln(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg); PG_RETURN_NUMERIC(res);} @@ -2146,8 +2129,8 @@ numeric_log(PG_FUNCTION_ARGS) init_var(&arg2); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); /* * Call log_var() to compute and return the result; note it handles scale @@ -2158,8 +2141,6 @@ numeric_log(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg2); - free_var(&arg1); PG_RETURN_NUMERIC(res);} @@ -2195,8 +2176,8 @@ numeric_power(PG_FUNCTION_ARGS) init_var(&arg2_trunc); init_var(&result); - set_var_from_num(num1, &arg1); - set_var_from_num(num2, &arg2); + set_var_from_num_nocopy(num1, &arg1); + set_var_from_num_nocopy(num2, &arg2); set_var_from_var(&arg2, &arg2_trunc); trunc_var(&arg2_trunc, 0); @@ -2227,9 +2208,7 @@ numeric_power(PG_FUNCTION_ARGS) res = make_result(&result); free_var(&result); - free_var(&arg2); free_var(&arg2_trunc); - free_var(&arg1); PG_RETURN_NUMERIC(res);} @@ -2277,9 +2256,8 @@ numeric_int4(PG_FUNCTION_ARGS) /* Convert to variable format, then convert to int4 */ init_var(&x); - set_var_from_num(num, &x); + set_var_from_num_nocopy(num, &x); result = numericvar_to_int4(&x); - free_var(&x); PG_RETURN_INT32(result);} @@ -2400,8 +2378,6 @@ numeric_int2(PG_FUNCTION_ARGS) (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("smallint out of range"))); - free_var(&x); - /* Down-convert to int2 */ result = (int16) val; @@ -2411,6 +2387,8 @@ numeric_int2(PG_FUNCTION_ARGS) (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("smallint out of range"))); + free_var(&x); + PG_RETURN_INT16(result);} @@ -2764,7 +2742,7 @@ numeric_stddev_internal(ArrayType *transarray, return make_result(&const_nan); init_var(&vN); - set_var_from_num(N, &vN); + set_var_from_num_nocopy(N, &vN); /* * Sample stddev and variance are undefined when N <= 1; population stddev @@ -2777,7 +2755,6 @@ numeric_stddev_internal(ArrayType *transarray, if (cmp_var(&vN, comp) <= 0) { - free_var(&vN); *is_null = true; return NULL; } @@ -2786,7 +2763,7 @@ numeric_stddev_internal(ArrayType *transarray, sub_var(&vN, &const_one, &vNminus1); init_var(&vsumX); - set_var_from_num(sumX, &vsumX); + set_var_from_num_nocopy(sumX, &vsumX); init_var(&vsumX2); set_var_from_num(sumX2, &vsumX2); @@ -2816,9 +2793,7 @@ numeric_stddev_internal(ArrayType *transarray, res = make_result(&vsumX); } - free_var(&vN); free_var(&vNminus1); - free_var(&vsumX); free_var(&vsumX2); return res; @@ -3448,6 +3423,21 @@ set_var_from_num(Numeric num, NumericVar *dest) memcpy(dest->digits, NUMERIC_DIGITS(num), ndigits* sizeof(NumericDigit));} +/* + * set_var_from_num_nocopy() - + * + * Convert the packed db format into a variable - without copying digits + */ +static void +set_var_from_num_nocopy(Numeric num, NumericVar *dest) +{ + dest->ndigits = NUMERIC_NDIGITS(num); + dest->weight = NUMERIC_WEIGHT(num); + dest->sign = NUMERIC_SIGN(num); + dest->dscale = NUMERIC_DSCALE(num); + dest->digits = NUMERIC_DIGITS(num); +} +/* * set_var_from_var() -
On 09.11.2012 15:28, Fujii Masao wrote: > On Fri, Nov 9, 2012 at 4:06 AM, Alvaro Herrera<alvherre@2ndquadrant.com> wrote: >> Fujii Masao escribió: >>> On Fri, Oct 19, 2012 at 10:29 PM, Fujii Masao<masao.fujii@gmail.com> wrote: >> >>>>>> However, I've forgotten to treat other three portions in >>>>>> walsender.c and syncrep.c also does XLogRecPtrIsInvalid(<XLogPtr >>>>>> which comes from WAL receiver>). This new patch includes the >>>>>> changes for them. >>>> >>>> Good catch. >>> >>> Does any commiter pick up this? >> >> If not, please add to next commitfest so that we don't forget. > > Yep, I added this to next CF. This is just a bug fix, so please feel free to > pick up this even before CF. Committed, thanks. - Heikki
Heikki Linnakangas <hlinnakangas@vmware.com> writes: > Committed, thanks. Doesn't seem to have been pushed to master? regards, tom lane
On 23.11.2012 19:55, Tom Lane wrote: > Heikki Linnakangas<hlinnakangas@vmware.com> writes: >> Committed, thanks. > > Doesn't seem to have been pushed to master? On purpose. Per commit message: > 9.3 doesn't have this problem because XLogRecPtr is now a single 64-bit > integer, so XLogRecPtrIsInvalid() does the right thing. Apply to 9.2, and > 9.1 where pg_stat_replication view was introduced. I considered applying it to master anyway, just to keep the branches in sync. But XLogRecPtrIsInvalid(var) seems slightly more readable than XLByteEQ(var, InvalidXLogRecPtr), so I decided not to. - Heikki