Re: Wrong usage of RelationNeedsWAL
| От | Kyotaro Horiguchi |
|---|---|
| Тема | Re: Wrong usage of RelationNeedsWAL |
| Дата | |
| Msg-id | 20210121.002844.690876870629206205.horikyota.ntt@gmail.com обсуждение исходный текст |
| Ответ на | Re: Wrong usage of RelationNeedsWAL (Kyotaro Horiguchi <horikyota.ntt@gmail.com>) |
| Ответы |
Re: Wrong usage of RelationNeedsWAL
|
| Список | pgsql-hackers |
At Wed, 20 Jan 2021 17:34:44 +0900 (JST), Kyotaro Horiguchi <horikyota.ntt@gmail.com> wrote in
> Anyway, it seems actually dangerous that cause pruning on wal-skipped
> relation.
>
> > with your patch versions. Could you try implementing both test procedures in
> > src/test/modules/snapshot_too_old? There's no need to make the test use
> > wal_level=minimal explicitly; it's sufficient to catch these bugs when
> > wal_level=minimal is in the TEMP_CONFIG file.
>
> In the attached, TestForOldSnapshot() considers XLogIsNeeded(),
> instead of moving the condition on LSN to TestForOldSnapshot_impl for
> performance.
>
> I'll add the test part in the next version.
This is it.
However, with the previous patch, two existing tests sto_using_cursor
and sto_using_select behaves differently from the master. That change
is coming from the omission of actual LSN check in TestForOldSnapshot
while wal_level=minimal. So we have no choice other than actually
updating page LSN.
In the scenario under discussion there are two places we need to do
that. one is heap_page_prune, and the other is RelationCopyStorge. As
a PoC, I used gistXLogAssignLSN() as is for thie purpose. See the
attached third file.
If it is the right direction, I will move XLOG_GIST_ASSIGN_LSN to
XLOG_ASSIGN_LSN and move gistXLogAssignLSN() to XLogAdvanceLSN() or
XLogNop() or such.
With the third patch, the test succedds both wal_level = replica and
minimal.
regards.
--
Kyotaro Horiguchi
NTT Open Source Software Center
From 332ac10844befb8a9c00df9f68993eb3696f0a85 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
Date: Wed, 20 Jan 2021 20:57:03 +0900
Subject: [PATCH v5 1/3] Test for snapshot too old and wal_level=minimal
---
src/test/modules/snapshot_too_old/Makefile | 12 ++++-
.../expected/sto_wal_optimized.out | 24 ++++++++++
.../input_sto/sto_wal_optimized.spec | 44 +++++++++++++++++++
src/test/modules/snapshot_too_old/sto.conf | 3 ++
4 files changed, 82 insertions(+), 1 deletion(-)
create mode 100644 src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out
create mode 100644 src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index dfb4537f63..1e0b0b6efc 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -4,7 +4,7 @@
# we have to clean those result files explicitly
EXTRA_CLEAN = $(pg_regress_clean_files)
-ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
+ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index sto_wal_optimized
ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
# Disabled because these tests require "old_snapshot_threshold" >= 0, which
@@ -22,6 +22,16 @@ include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
+.PHONY: tablespace-setup specfile-setup
+tablespace-setup:
+ rm -rf ./testtablespace
+ mkdir ./testtablespace
+
+specfile-setup: input_sto/*.spec
+ sed 's!@srcdir@!$(realpath $(top_srcdir))!g' $? > specs/$(notdir $?)
+
+check: tablespace-setup specfile-setup
+
# But it can nonetheless be very helpful to run tests on preexisting
# installation, allow to do so, but only if requested explicitly.
installcheck-force:
diff --git a/src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out
b/src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out
new file mode 100644
index 0000000000..4038d0a713
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out
@@ -0,0 +1,24 @@
+Parsed test spec with 3 sessions
+
+starting permutation: s1a1 s1a2 s2a1 s2a2 s1b1 a3-0 a3-1 s3-2 s3-3 s3-4 s2b1
+step s1a1: BEGIN;
+step s1a2: DELETE FROM t;
+step s2a1: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2a2: SELECT 1;
+?column?
+
+1
+step s1b1: COMMIT;
+step a3-0: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
+setting pg_sleep
+
+0
+step a3-1: BEGIN;
+step s3-2: ALTER TABLE t SET TABLESPACE tsp1;
+step s3-3: SELECT count(*) FROM t;
+count
+
+0
+step s3-4: COMMIT;
+step s2b1: SELECT count(*) FROM t;
+ERROR: snapshot too old
diff --git a/src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec
b/src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec
new file mode 100644
index 0000000000..02adb50581
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec
@@ -0,0 +1,44 @@
+# This test provokes a "snapshot too old" error
+#
+# The sleep is needed because with a threshold of zero a statement could error
+# on changes it made. With more normal settings no external delay is needed,
+# but we don't want these tests to run long enough to see that, since
+# granularity is in minutes.
+#
+# Since results depend on the value of old_snapshot_threshold, sneak that into
+# the line generated by the sleep, so that a surprising values isn't so hard
+# to identify.
+
+setup
+{
+ CREATE TABLESPACE tsp1 LOCATION '@srcdir@/src/test/modules/snapshot_too_old/testtablespace';
+}
+
+setup
+{
+ CREATE TABLE t AS SELECT a FROM generate_series(0, 9) a;
+}
+
+session "s1"
+step "s1a1" { BEGIN; }
+step "s1a2" { DELETE FROM t; }
+step "s1b1" { COMMIT; }
+
+session "s2"
+# s2a : take snapshot
+step "s2a1" { BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step "s2a2" { SELECT 1; }
+# s2b : wanted "snapshot too old"
+step "s2b1" { SELECT count(*) FROM t; }
+
+session "s3"
+# s3-0 : wait for snapshot is expired
+step "a3-0" { SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
+step "a3-1" { BEGIN; }
+# s3-2 : start skipping WAL when wal_level=minimal
+step "s3-2" { ALTER TABLE t SET TABLESPACE tsp1; }
+# s3-3 : early pruning. w/o WAL and LSN changes when wal_level=minimal
+step "s3-3" { SELECT count(*) FROM t; }
+step "s3-4" { COMMIT; }
+
+permutation "s1a1" "s1a2" "s2a1" "s2a2" "s1b1" "a3-0" "a3-1" "s3-2" "s3-3" "s3-4" "s2b1"
diff --git a/src/test/modules/snapshot_too_old/sto.conf b/src/test/modules/snapshot_too_old/sto.conf
index 7eeaeeb0dc..09887fc725 100644
--- a/src/test/modules/snapshot_too_old/sto.conf
+++ b/src/test/modules/snapshot_too_old/sto.conf
@@ -1,2 +1,5 @@
autovacuum = off
old_snapshot_threshold = 0
+
+# needed when setting wal_level=minimal
+wal_skip_threshold = 0
--
2.27.0
From 2e6e3930fdd495ec4b23150b7577889a90b56bf4 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
Date: Mon, 18 Jan 2021 14:47:21 +0900
Subject: [PATCH v5 2/3] Do not use RelationNeedsWAL to identify relation
persistence
RelationNeedsWAL() may return false for a permanent relation when
wal_level=minimal and the relation is created or truncated in the
current transaction. Directly examine relpersistence instead of using
the function to know relation persistence.
---
src/backend/catalog/pg_publication.c | 2 +-
src/backend/optimizer/util/plancat.c | 3 ++-
src/include/storage/bufmgr.h | 8 +++++++-
src/include/utils/rel.h | 2 +-
src/include/utils/snapmgr.h | 2 +-
5 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
index 5f8e1c64e1..84d2efcfd2 100644
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -67,7 +67,7 @@ check_publication_add_relation(Relation targetrel)
errdetail("System tables cannot be added to publications.")));
/* UNLOGGED and TEMP relations cannot be part of publication. */
- if (!RelationNeedsWAL(targetrel))
+ if (targetrel->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("table \"%s\" cannot be replicated",
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index da322b453e..177e6e336a 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -126,7 +126,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
relation = table_open(relationObjectId, NoLock);
/* Temporary and unlogged relations are inaccessible during recovery. */
- if (!RelationNeedsWAL(relation) && RecoveryInProgress())
+ if (relation->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT &&
+ RecoveryInProgress())
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot access temporary or unlogged relations during recovery")));
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index fb00fda6a7..e641174798 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -14,6 +14,7 @@
#ifndef BUFMGR_H
#define BUFMGR_H
+#include "access/xlog.h"
#include "storage/block.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
@@ -278,12 +279,17 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
{
Assert(relation != NULL);
+ /*
+ * While wal_level=minimal, early pruning can happen without updating page
+ * LSN. Don't take the fast-return path while wal_level=minimal so that we
+ * don't miss early pruning.
+ */
if (old_snapshot_threshold >= 0
&& (snapshot) != NULL
&& ((snapshot)->snapshot_type == SNAPSHOT_MVCC
|| (snapshot)->snapshot_type == SNAPSHOT_TOAST)
&& !XLogRecPtrIsInvalid((snapshot)->lsn)
- && PageGetLSN(page) > (snapshot)->lsn)
+ && (!XLogIsNeeded() || PageGetLSN(page) > (snapshot)->lsn))
TestForOldSnapshot_impl(snapshot, relation);
}
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 10b63982c0..f58d65cf28 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -554,7 +554,7 @@ typedef struct ViewOptions
/*
* RelationNeedsWAL
- * True if relation needs WAL.
+ * True if relation needs WAL at the time.
*
* Returns false if wal_level = minimal and this relation is created or
* truncated in the current transaction. See "Skipping WAL for New
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index 579be352c5..c21ee3c289 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -37,7 +37,7 @@
*/
#define RelationAllowsEarlyPruning(rel) \
( \
- RelationNeedsWAL(rel) \
+ (rel)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT \
&& !IsCatalogRelation(rel) \
&& !RelationIsAccessibleInLogicalDecoding(rel) \
)
--
2.27.0
From 67bf139a9355adc93801c5b35b912154504075c0 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
Date: Wed, 20 Jan 2021 23:59:39 +0900
Subject: [PATCH v5 3/3] Poc: Keep page-LSN updated while WAL-skipping.
WAL-skipping optimization omits bumping page LSN when ALTER TABLE SET
TABLESPACE. Also heap_page_prune does the same. However,
old_snapshot_threshold feature needs page LSN to be kept updated on
these operations so that TestForOldSnapshot properly find identify
whether a snapshot is invalidated.
---
src/backend/access/heap/pruneheap.c | 14 ++++++++++++++
src/backend/catalog/storage.c | 8 ++++++++
src/include/storage/bufmgr.h | 3 ++-
src/include/utils/rel.h | 4 ++--
4 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index e3a716a2a2..33c0841b36 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -70,6 +70,8 @@ static void heap_prune_record_redirect(PruneState *prstate,
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
+/* XXXXXXXX tmporary modification */
+extern XLogRecPtr gistXLogAssignLSN(void);
/*
* Optionally prune and repair fragmentation in the specified page.
@@ -331,6 +333,18 @@ heap_page_prune(Relation relation, Buffer buffer,
PageSetLSN(BufferGetPage(buffer), recptr);
}
+ else if (relation->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
+ {
+ XLogRecPtr currlsn = GetXLogInsertRecPtr();
+ Page page = BufferGetPage(buffer);
+
+ Assert(wal_level < WAL_LEVEL_REPLICA);
+
+ /* we need to update page LSN to notify reader of pruning */
+ if (PageGetLSN(page) == currlsn)
+ currlsn = gistXLogAssignLSN();
+ PageSetLSN(page, currlsn);
+ }
}
else
{
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index cba7a9ada0..488ac6f760 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -75,6 +75,8 @@ typedef struct PendingRelSync
static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
HTAB *pendingSyncHash = NULL;
+/* XXXXXXXX tmporary modification */
+extern XLogRecPtr gistXLogAssignLSN(void);
/*
* AddPendingSync
@@ -414,6 +416,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
bool copying_initfork;
BlockNumber nblocks;
BlockNumber blkno;
+ XLogRecPtr fakepagelsn;
page = (Page) buf.data;
@@ -434,6 +437,9 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
use_wal = XLogIsNeeded() &&
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
+ if (!use_wal)
+ fakepagelsn = gistXLogAssignLSN();
+
nblocks = smgrnblocks(src, forkNum);
for (blkno = 0; blkno < nblocks; blkno++)
@@ -460,6 +466,8 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
*/
if (use_wal)
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
+ else
+ PageSetLSN(page, fakepagelsn);
PageSetChecksumInplace(page, blkno);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index e641174798..9ab9e7394c 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -19,6 +19,7 @@
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "storage/relfilenode.h"
+#include "utils/rel.h"
#include "utils/relcache.h"
#include "utils/snapmgr.h"
@@ -289,7 +290,7 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
&& ((snapshot)->snapshot_type == SNAPSHOT_MVCC
|| (snapshot)->snapshot_type == SNAPSHOT_TOAST)
&& !XLogRecPtrIsInvalid((snapshot)->lsn)
- && (!XLogIsNeeded() || PageGetLSN(page) > (snapshot)->lsn))
+ && PageGetLSN(page) > (snapshot)->lsn)
TestForOldSnapshot_impl(snapshot, relation);
}
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index f58d65cf28..35e53c8d74 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -618,8 +618,8 @@ typedef struct ViewOptions
* decoding snapshot.
*/
#define RelationIsAccessibleInLogicalDecoding(relation) \
- (XLogLogicalInfoActive() && \
- RelationNeedsWAL(relation) && \
+ (XLogLogicalInfoActive() && \
+ (relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT && \
(IsCatalogRelation(relation) || RelationIsUsedAsCatalogTable(relation)))
/*
--
2.27.0
В списке pgsql-hackers по дате отправления: