Обсуждение: BUG #16378: Invalid memory access on interrupting CLUSTER after CREATE TEMP TABLE
BUG #16378: Invalid memory access on interrupting CLUSTER after CREATE TEMP TABLE
От
PG Bug reporting form
Дата:
The following bug has been logged on the website: Bug reference: 16378 Logged by: Alexander Lakhin Email address: exclusion@gmail.com PostgreSQL version: 12.2 Operating system: Ubuntu 18.04 Description: The following script: valgrind --leak-check=no --time-stamp=yes --read-var-info=yes \ --gen-suppressions=all --suppressions=src/tools/valgrind.supp \ --trace-children=yes \ pg_ctl -w -t 30 -D "$PGDB" -l postmaster.log start echo " CREATE TEMP TABLE temp_table(i int); CREATE TABLE clstr(id serial primary key, a int); INSERT INTO clstr(a) SELECT g.i % 42 FROM generate_series(1, 133000) g(i); CREATE INDEX clstr_a ON clstr(a); CLUSTER clstr USING clstr_a; " | time psql postgres >>psql.log 2>&1 & sleep 28 echo " SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE query LIKE 'CLUSTER%' " | psql postgres leads to a cassert-enabled server crash with the following messages in the log (for the master branch): 2020-04-19 09:31:16.351 MSK [28410] STATEMENT: INSERT INTO clstr(a) SELECT g.i % 42 FROM generate_series(1, 133000) g(i); 2020-04-19 09:31:21.187 MSK [28410] FATAL: terminating connection due to administrator command 2020-04-19 09:31:21.187 MSK [28410] STATEMENT: CLUSTER clstr USING clstr_a; ==00:00:00:36.289 28410== Invalid read of size 4 ==00:00:00:36.289 28410== at 0x42AFED: list_member_oid (list.c:678) ==00:00:00:36.289 28410== by 0x2B8FCD: ReindexIsProcessingIndex (index.c:3852) ==00:00:00:36.289 28410== by 0x237D42: systable_beginscan (genam.c:367) ==00:00:00:36.289 28410== by 0x2AD82A: findDependentObjects (dependency.c:560) ==00:00:00:36.289 28410== by 0x2AEBB1: performDeletion (dependency.c:340) ==00:00:00:36.289 28410== by 0x2B9833: RemoveTempRelations (namespace.c:4232) ==00:00:00:36.289 28410== by 0x2B9C86: RemoveTempRelationsCallback (namespace.c:4251) ==00:00:00:36.289 28410== by 0x532D2B: shmem_exit (ipc.c:239) ==00:00:00:36.289 28410== by 0x532E28: proc_exit_prepare (ipc.c:194) ==00:00:00:36.289 28410== by 0x532EAD: proc_exit (ipc.c:107) ==00:00:00:36.289 28410== by 0x6892D6: errfinish (elog.c:578) ==00:00:00:36.289 28410== by 0x55BA87: ProcessInterrupts (postgres.c:3076) ==00:00:00:36.289 28410== Address 0x1232af10 is 0 bytes inside a block of size 48 free'd ==00:00:00:36.289 28410== at 0x6B66A9: pfree (mcxt.c:1061) ==00:00:00:36.289 28410== by 0x42A4A6: list_free_private (list.c:1364) ==00:00:00:36.289 28410== by 0x42B6EB: list_free (list.c:1378) ==00:00:00:36.289 28410== by 0x67AD26: RelationGetIndexAttrBitmap (relcache.c:5107) ==00:00:00:36.289 28410== by 0x21D2E6: heap_update (heapam.c:2973) ==00:00:00:36.289 28410== by 0x21EDED: simple_heap_update (heapam.c:3901) ==00:00:00:36.289 28410== by 0x2B97A0: CatalogTupleUpdate (indexing.c:230) ==00:00:00:36.289 28410== by 0x32F8C0: copy_table_data (cluster.c:927) ==00:00:00:36.289 28410== by 0x330E67: rebuild_relation (cluster.c:580) ==00:00:00:36.289 28410== by 0x330FB0: cluster_rel (cluster.c:403) ==00:00:00:36.289 28410== by 0x331271: cluster (cluster.c:173) ==00:00:00:36.289 28410== by 0x563105: standard_ProcessUtility (utility.c:819) ==00:00:00:36.289 28410== Block was alloc'd at ==00:00:00:36.289 28410== at 0x6B6063: palloc (mcxt.c:974) ==00:00:00:36.289 28410== by 0x42A43C: new_list (list.c:134) ==00:00:00:36.289 28410== by 0x42BC71: list_copy (list.c:1410) ==00:00:00:36.289 28410== by 0x67996C: RelationGetIndexList (relcache.c:4521) ==00:00:00:36.289 28410== by 0x67AC49: RelationGetIndexAttrBitmap (relcache.c:5101) ==00:00:00:36.289 28410== by 0x21D2E6: heap_update (heapam.c:2973) ==00:00:00:36.289 28410== by 0x21EDED: simple_heap_update (heapam.c:3901) ==00:00:00:36.289 28410== by 0x2B97A0: CatalogTupleUpdate (indexing.c:230) ==00:00:00:36.289 28410== by 0x32F8C0: copy_table_data (cluster.c:927) ==00:00:00:36.289 28410== by 0x330E67: rebuild_relation (cluster.c:580) ==00:00:00:36.289 28410== by 0x330FB0: cluster_rel (cluster.c:403) ==00:00:00:36.289 28410== by 0x331271: cluster (cluster.c:173) ==00:00:00:36.289 28410== { <insert_a_suppression_name_here> Memcheck:Addr4 fun:list_member_oid fun:ReindexIsProcessingIndex fun:systable_beginscan fun:findDependentObjects fun:performDeletion fun:RemoveTempRelations fun:RemoveTempRelationsCallback fun:shmem_exit fun:proc_exit_prepare fun:proc_exit fun:errfinish fun:ProcessInterrupts } TRAP: FailedAssertion("IsOidList(list)", File: "list.c", Line: 678) ==00:00:00:36.291 28481== ==00:00:00:36.291 28481== HEAP SUMMARY: ==00:00:00:36.291 28481== in use at exit: 1,302,653 bytes in 440 blocks ==00:00:00:36.291 28481== total heap usage: 9,446 allocs, 312 frees, 9,487,792 bytes allocated ==00:00:00:36.291 28481== ==00:00:00:36.291 28481== For a detailed leak analysis, rerun with: --leak-check=full ==00:00:00:36.291 28481== ==00:00:00:36.291 28481== For counts of detected and suppressed errors, rerun with: -v ==00:00:00:36.291 28481== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 7 from 3) postgres: law postgres [local] CLUSTER(ExceptionalCondition+0x7c)[0x685026] postgres: law postgres [local] CLUSTER(list_member_oid+0x46)[0x42b023] postgres: law postgres [local] CLUSTER(ReindexIsProcessingIndex+0x1a)[0x2b8fce] postgres: law postgres [local] CLUSTER(systable_beginscan+0x37)[0x237d43] postgres: law postgres [local] CLUSTER(+0x1a582b)[0x2ad82b] postgres: law postgres [local] CLUSTER(performDeletion+0x74)[0x2aebb2] postgres: law postgres [local] CLUSTER(+0x1b1834)[0x2b9834] postgres: law postgres [local] CLUSTER(+0x1b1c87)[0x2b9c87] postgres: law postgres [local] CLUSTER(shmem_exit+0x68)[0x532d2c] postgres: law postgres [local] CLUSTER(+0x42ae29)[0x532e29] postgres: law postgres [local] CLUSTER(proc_exit+0x8)[0x532eae] postgres: law postgres [local] CLUSTER(errfinish+0x3d7)[0x6892d7] postgres: law postgres [local] CLUSTER(ProcessInterrupts+0x35b)[0x55ba88] postgres: law postgres [local] CLUSTER(+0x120394)[0x228394] postgres: law postgres [local] CLUSTER(+0x147603)[0x24f603] postgres: law postgres [local] CLUSTER(btbuild+0x7b)[0x251053] postgres: law postgres [local] CLUSTER(index_build+0x14a)[0x2b71e6] postgres: law postgres [local] CLUSTER(reindex_index+0x3c7)[0x2b7ad6] postgres: law postgres [local] CLUSTER(reindex_relation+0x1b9)[0x2b91ac] postgres: law postgres [local] CLUSTER(finish_heap_swap+0xe0)[0x330b84] postgres: law postgres [local] CLUSTER(+0x228e95)[0x330e95] postgres: law postgres [local] CLUSTER(cluster_rel+0xe5)[0x330fb1] postgres: law postgres [local] CLUSTER(cluster+0x9d)[0x331272] postgres: law postgres [local] CLUSTER(standard_ProcessUtility+0x638)[0x563106] postgres: law postgres [local] CLUSTER(ProcessUtility+0xcd)[0x5636ad] postgres: law postgres [local] CLUSTER(+0x457bfb)[0x55fbfb] postgres: law postgres [local] CLUSTER(+0x458862)[0x560862] postgres: law postgres [local] CLUSTER(PortalRun+0x2b8)[0x56153d] postgres: law postgres [local] CLUSTER(+0x455821)[0x55d821] postgres: law postgres [local] CLUSTER(PostgresMain+0x8b8)[0x55f852] postgres: law postgres [local] CLUSTER(+0x3c474d)[0x4cc74d] postgres: law postgres [local] CLUSTER(+0x3c77e6)[0x4cf7e6] postgres: law postgres [local] CLUSTER(+0x3c7aea)[0x4cfaea] postgres: law postgres [local] CLUSTER(PostmasterMain+0x109b)[0x4d0e63] postgres: law postgres [local] CLUSTER(main+0x20c)[0x4194d6] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7)[0x6285b97] postgres: law postgres [local] CLUSTER(_start+0x2a)[0x1c0b6a] The required sleep delay may vary (25-30 seconds delay works for me), but I've managed to reproduce it reliably on REL_10_STABLE..master.
PG Bug reporting form <noreply@postgresql.org> writes: > The following script: > ... > leads to a cassert-enabled server crash with the following messages in the > log (for the master branch): Hm, I can get a crash here without valgrind, as long as it's a cassert build. We're accessing a list that's been thrown away by memory context cleanup, so this results: TRAP: FailedAssertion("IsOidList(list)", File: "list.c", Line: 678) The timing is a bit finicky, but no more so than you report for the valgrind case. The difficulty is that the pendingReindexedIndexes list is kept in some transaction-local context, so it gets flushed during the transaction abort that is the first step of proc_exit processing. But the static pointer to it is still set, causing big problems if we do any system catalog accesses later --- like, say, while dropping the session's temp tables. One idea would be to keep the list in TopMemoryContext, but that feels like a band-aid solution. I think more likely what we ought to do is stop trying to use a PG_TRY in reindex_relation to drive cleanup, and instead hook ResetReindexPending into transaction abort processing honestly. I wonder how many other uses of PG_TRY have similar issues? It's not really obvious that this is an unsafe coding pattern. regards, tom lane
I wrote: > The difficulty is that the pendingReindexedIndexes list is kept in > some transaction-local context, so it gets flushed during the transaction > abort that is the first step of proc_exit processing. But the static > pointer to it is still set, causing big problems if we do any system > catalog accesses later --- like, say, while dropping the session's > temp tables. > One idea would be to keep the list in TopMemoryContext, but that feels > like a band-aid solution. I think more likely what we ought to do is > stop trying to use a PG_TRY in reindex_relation to drive cleanup, > and instead hook ResetReindexPending into transaction abort processing > honestly. I spent some more time looking at this. The "band-aid solution" is no solution at all; consider the situation where we have some temp tables and we get a SIGTERM while doing a REINDEX on pg_class. If we don't reset the reindex state then our attempts to access pg_class indexes while clearing out temp tables will fail. There seem to be two distinct routes to a non-band-aid solution: 1. Instead of PG_TRY, use PG_ENSURE_ERROR_CLEANUP to ensure the reindex state gets cleared. This will work since the cleanup code will get run before any other proc_exit callbacks. However, the fact that the reindex state is getting propagated to parallel workers makes me fear that this solution is inadequate, because the parallel workers won't have any such cleanup callback. If they try to do any catalog access during transaction abort they might have problems. (Parallel reindex of a system catalog is mind-bendingly scary anyway, but I don't want this patch to make it worse.) 2. Clear the reindex state early in transaction abort, as suggested above and as implemented by the patch below. This adds a few cycles to xact abort, which is what the previous coding was trying to avoid, but I think we are just stuck with doing that. Thoughts? regards, tom lane diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 6b1ae1f..a979256 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -30,6 +30,7 @@ #include "access/xlog.h" #include "access/xloginsert.h" #include "access/xlogutils.h" +#include "catalog/index.h" #include "catalog/namespace.h" #include "catalog/pg_enum.h" #include "catalog/storage.h" @@ -2662,6 +2663,9 @@ AbortTransaction(void) */ SetUserIdAndSecContext(s->prevUser, s->prevSecContext); + /* Forget about any active REINDEX. */ + ResetReindexState(); + /* If in parallel mode, clean up workers and exit parallel mode. */ if (IsInParallelMode()) { @@ -4961,6 +4965,9 @@ AbortSubTransaction(void) */ SetUserIdAndSecContext(s->prevUser, s->prevSecContext); + /* Forget about any active REINDEX. */ + ResetReindexState(); + /* Exit from parallel mode, if necessary. */ if (IsInParallelMode()) { diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index bd7ec92..321d9d6 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -131,7 +131,6 @@ static void SetReindexProcessing(Oid heapOid, Oid indexOid); static void ResetReindexProcessing(void); static void SetReindexPending(List *indexes); static void RemoveReindexPending(Oid indexOid); -static void ResetReindexPending(void); /* @@ -3525,9 +3524,6 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, indexInfo->ii_ExclusionStrats = NULL; } - /* ensure SetReindexProcessing state isn't leaked */ - PG_TRY(); - { /* Suppress use of the target index while rebuilding it */ SetReindexProcessing(heapId, indexId); @@ -3537,13 +3533,9 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, /* Initialize the index and rebuild */ /* Note: we do not need to re-establish pkey setting */ index_build(heapRelation, iRel, indexInfo, true, true); - } - PG_FINALLY(); - { - /* Make sure flag gets cleared on error exit */ + + /* Re-allow use of target index */ ResetReindexProcessing(); - } - PG_END_TRY(); /* * If the index is marked invalid/not-ready/dead (ie, it's from a failed @@ -3715,7 +3707,6 @@ reindex_relation(Oid relid, int flags, int options) */ indexIds = RelationGetIndexList(rel); - PG_TRY(); { ListCell *indexId; char persistence; @@ -3780,12 +3771,6 @@ reindex_relation(Oid relid, int flags, int options) i++; } } - PG_FINALLY(); - { - /* Make sure list gets cleared on error exit */ - ResetReindexPending(); - } - PG_END_TRY(); /* * Close rel, but continue to hold the lock. @@ -3819,6 +3804,7 @@ reindex_relation(Oid relid, int flags, int options) static Oid currentlyReindexedHeap = InvalidOid; static Oid currentlyReindexedIndex = InvalidOid; static List *pendingReindexedIndexes = NIL; +static int reindexingNestLevel = 0; /* * ReindexIsProcessingHeap @@ -3855,8 +3841,6 @@ ReindexIsProcessingIndex(Oid indexOid) /* * SetReindexProcessing * Set flag that specified heap/index are being reindexed. - * - * NB: caller must use a PG_TRY block to ensure ResetReindexProcessing is done. */ static void SetReindexProcessing(Oid heapOid, Oid indexOid) @@ -3869,6 +3853,8 @@ SetReindexProcessing(Oid heapOid, Oid indexOid) currentlyReindexedIndex = indexOid; /* Index is no longer "pending" reindex. */ RemoveReindexPending(indexOid); + /* This may have been set already, but in case it isn't, do so now. */ + reindexingNestLevel = GetCurrentTransactionNestLevel(); } /* @@ -3878,17 +3864,16 @@ SetReindexProcessing(Oid heapOid, Oid indexOid) static void ResetReindexProcessing(void) { - /* This may be called in leader error path */ currentlyReindexedHeap = InvalidOid; currentlyReindexedIndex = InvalidOid; + /* reindexingNestLevel remains set till end of (sub)transaction */ } /* * SetReindexPending * Mark the given indexes as pending reindex. * - * NB: caller must use a PG_TRY block to ensure ResetReindexPending is done. - * Also, we assume that the current memory context stays valid throughout. + * NB: we assume that the current memory context stays valid throughout. */ static void SetReindexPending(List *indexes) @@ -3899,6 +3884,7 @@ SetReindexPending(List *indexes) if (IsInParallelMode()) elog(ERROR, "cannot modify reindex state during a parallel operation"); pendingReindexedIndexes = list_copy(indexes); + reindexingNestLevel = GetCurrentTransactionNestLevel(); } /* @@ -3915,14 +3901,32 @@ RemoveReindexPending(Oid indexOid) } /* - * ResetReindexPending - * Unset reindex-pending status. + * ResetReindexState + * Clear all reindexing state during (sub)transaction abort. */ -static void -ResetReindexPending(void) +void +ResetReindexState(void) { - /* This may be called in leader error path */ - pendingReindexedIndexes = NIL; + /* + * Because reindexing is not re-entrant, we don't need to cope with nested + * reindexing states. We just need to avoid messing up the outer-level + * state in case a subtransaction fails within a REINDEX. So checking the + * current nest level against that of the reindex operation is sufficient. + */ + if (reindexingNestLevel >= GetCurrentTransactionNestLevel()) + { + currentlyReindexedHeap = InvalidOid; + currentlyReindexedIndex = InvalidOid; + + /* + * We needn't try to release the contents of pendingReindexedIndexes; + * that list should be in a transaction-lifespan context, so it will + * go away automatically. + */ + pendingReindexedIndexes = NIL; + + reindexingNestLevel = 0; + } } /* @@ -3975,4 +3979,7 @@ RestoreReindexState(void *reindexstate) lappend_oid(pendingReindexedIndexes, sistate->pendingReindexedIndexes[c]); MemoryContextSwitchTo(oldcontext); + + /* Note the worker has its own transaction nesting level */ + reindexingNestLevel = GetCurrentTransactionNestLevel(); } diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index a2890c1..887d300 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -131,6 +131,8 @@ extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot); extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action); +extern Oid IndexGetRelation(Oid indexId, bool missing_ok); + extern void reindex_index(Oid indexId, bool skip_constraint_checks, char relpersistence, int options); @@ -145,8 +147,8 @@ extern bool reindex_relation(Oid relid, int flags, int options); extern bool ReindexIsProcessingHeap(Oid heapOid); extern bool ReindexIsProcessingIndex(Oid indexOid); -extern Oid IndexGetRelation(Oid indexId, bool missing_ok); +extern void ResetReindexState(void); extern Size EstimateReindexStateSpace(void); extern void SerializeReindexState(Size maxsize, char *start_address); extern void RestoreReindexState(void *reindexstate);