Обсуждение: BUG #16378: Invalid memory access on interrupting CLUSTER after CREATE TEMP TABLE

Поиск
Список
Период
Сортировка

BUG #16378: Invalid memory access on interrupting CLUSTER after CREATE TEMP TABLE

От
PG Bug reporting form
Дата:
The following bug has been logged on the website:

Bug reference:      16378
Logged by:          Alexander Lakhin
Email address:      exclusion@gmail.com
PostgreSQL version: 12.2
Operating system:   Ubuntu 18.04
Description:

The following script:
valgrind --leak-check=no --time-stamp=yes --read-var-info=yes  \
 --gen-suppressions=all  --suppressions=src/tools/valgrind.supp \
 --trace-children=yes \
pg_ctl -w -t 30 -D "$PGDB" -l postmaster.log start

echo "
CREATE TEMP TABLE temp_table(i int);

CREATE TABLE clstr(id serial primary key, a int);
INSERT INTO clstr(a) SELECT g.i % 42 FROM generate_series(1, 133000) g(i);
CREATE INDEX clstr_a ON clstr(a);
CLUSTER clstr USING clstr_a;
" | time psql postgres >>psql.log 2>&1 &

sleep 28

echo "
SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE query LIKE
'CLUSTER%'
" | psql postgres

leads to a cassert-enabled server crash with the following messages in the
log (for the master branch):
2020-04-19 09:31:16.351 MSK [28410] STATEMENT:  INSERT INTO clstr(a) SELECT
g.i % 42 FROM generate_series(1, 133000) g(i);
2020-04-19 09:31:21.187 MSK [28410] FATAL:  terminating connection due to
administrator command
2020-04-19 09:31:21.187 MSK [28410] STATEMENT:  CLUSTER clstr USING
clstr_a;
==00:00:00:36.289 28410== Invalid read of size 4
==00:00:00:36.289 28410==    at 0x42AFED: list_member_oid (list.c:678)
==00:00:00:36.289 28410==    by 0x2B8FCD: ReindexIsProcessingIndex
(index.c:3852)
==00:00:00:36.289 28410==    by 0x237D42: systable_beginscan (genam.c:367)
==00:00:00:36.289 28410==    by 0x2AD82A: findDependentObjects
(dependency.c:560)
==00:00:00:36.289 28410==    by 0x2AEBB1: performDeletion
(dependency.c:340)
==00:00:00:36.289 28410==    by 0x2B9833: RemoveTempRelations
(namespace.c:4232)
==00:00:00:36.289 28410==    by 0x2B9C86: RemoveTempRelationsCallback
(namespace.c:4251)
==00:00:00:36.289 28410==    by 0x532D2B: shmem_exit (ipc.c:239)
==00:00:00:36.289 28410==    by 0x532E28: proc_exit_prepare (ipc.c:194)
==00:00:00:36.289 28410==    by 0x532EAD: proc_exit (ipc.c:107)
==00:00:00:36.289 28410==    by 0x6892D6: errfinish (elog.c:578)
==00:00:00:36.289 28410==    by 0x55BA87: ProcessInterrupts
(postgres.c:3076)
==00:00:00:36.289 28410==  Address 0x1232af10 is 0 bytes inside a block of
size 48 free'd
==00:00:00:36.289 28410==    at 0x6B66A9: pfree (mcxt.c:1061)
==00:00:00:36.289 28410==    by 0x42A4A6: list_free_private (list.c:1364)
==00:00:00:36.289 28410==    by 0x42B6EB: list_free (list.c:1378)
==00:00:00:36.289 28410==    by 0x67AD26: RelationGetIndexAttrBitmap
(relcache.c:5107)
==00:00:00:36.289 28410==    by 0x21D2E6: heap_update (heapam.c:2973)
==00:00:00:36.289 28410==    by 0x21EDED: simple_heap_update
(heapam.c:3901)
==00:00:00:36.289 28410==    by 0x2B97A0: CatalogTupleUpdate
(indexing.c:230)
==00:00:00:36.289 28410==    by 0x32F8C0: copy_table_data (cluster.c:927)
==00:00:00:36.289 28410==    by 0x330E67: rebuild_relation (cluster.c:580)
==00:00:00:36.289 28410==    by 0x330FB0: cluster_rel (cluster.c:403)
==00:00:00:36.289 28410==    by 0x331271: cluster (cluster.c:173)
==00:00:00:36.289 28410==    by 0x563105: standard_ProcessUtility
(utility.c:819)
==00:00:00:36.289 28410==  Block was alloc'd at
==00:00:00:36.289 28410==    at 0x6B6063: palloc (mcxt.c:974)
==00:00:00:36.289 28410==    by 0x42A43C: new_list (list.c:134)
==00:00:00:36.289 28410==    by 0x42BC71: list_copy (list.c:1410)
==00:00:00:36.289 28410==    by 0x67996C: RelationGetIndexList
(relcache.c:4521)
==00:00:00:36.289 28410==    by 0x67AC49: RelationGetIndexAttrBitmap
(relcache.c:5101)
==00:00:00:36.289 28410==    by 0x21D2E6: heap_update (heapam.c:2973)
==00:00:00:36.289 28410==    by 0x21EDED: simple_heap_update
(heapam.c:3901)
==00:00:00:36.289 28410==    by 0x2B97A0: CatalogTupleUpdate
(indexing.c:230)
==00:00:00:36.289 28410==    by 0x32F8C0: copy_table_data (cluster.c:927)
==00:00:00:36.289 28410==    by 0x330E67: rebuild_relation (cluster.c:580)
==00:00:00:36.289 28410==    by 0x330FB0: cluster_rel (cluster.c:403)
==00:00:00:36.289 28410==    by 0x331271: cluster (cluster.c:173)
==00:00:00:36.289 28410== 
{
   <insert_a_suppression_name_here>
   Memcheck:Addr4
   fun:list_member_oid
   fun:ReindexIsProcessingIndex
   fun:systable_beginscan
   fun:findDependentObjects
   fun:performDeletion
   fun:RemoveTempRelations
   fun:RemoveTempRelationsCallback
   fun:shmem_exit
   fun:proc_exit_prepare
   fun:proc_exit
   fun:errfinish
   fun:ProcessInterrupts
}
TRAP: FailedAssertion("IsOidList(list)", File: "list.c", Line: 678)
==00:00:00:36.291 28481== 
==00:00:00:36.291 28481== HEAP SUMMARY:
==00:00:00:36.291 28481==     in use at exit: 1,302,653 bytes in 440
blocks
==00:00:00:36.291 28481==   total heap usage: 9,446 allocs, 312 frees,
9,487,792 bytes allocated
==00:00:00:36.291 28481== 
==00:00:00:36.291 28481== For a detailed leak analysis, rerun with:
--leak-check=full
==00:00:00:36.291 28481== 
==00:00:00:36.291 28481== For counts of detected and suppressed errors,
rerun with: -v
==00:00:00:36.291 28481== ERROR SUMMARY: 0 errors from 0 contexts
(suppressed: 7 from 3)
postgres: law postgres [local]
CLUSTER(ExceptionalCondition+0x7c)[0x685026]
postgres: law postgres [local] CLUSTER(list_member_oid+0x46)[0x42b023]
postgres: law postgres [local]
CLUSTER(ReindexIsProcessingIndex+0x1a)[0x2b8fce]
postgres: law postgres [local] CLUSTER(systable_beginscan+0x37)[0x237d43]
postgres: law postgres [local] CLUSTER(+0x1a582b)[0x2ad82b]
postgres: law postgres [local] CLUSTER(performDeletion+0x74)[0x2aebb2]
postgres: law postgres [local] CLUSTER(+0x1b1834)[0x2b9834]
postgres: law postgres [local] CLUSTER(+0x1b1c87)[0x2b9c87]
postgres: law postgres [local] CLUSTER(shmem_exit+0x68)[0x532d2c]
postgres: law postgres [local] CLUSTER(+0x42ae29)[0x532e29]
postgres: law postgres [local] CLUSTER(proc_exit+0x8)[0x532eae]
postgres: law postgres [local] CLUSTER(errfinish+0x3d7)[0x6892d7]
postgres: law postgres [local] CLUSTER(ProcessInterrupts+0x35b)[0x55ba88]
postgres: law postgres [local] CLUSTER(+0x120394)[0x228394]
postgres: law postgres [local] CLUSTER(+0x147603)[0x24f603]
postgres: law postgres [local] CLUSTER(btbuild+0x7b)[0x251053]
postgres: law postgres [local] CLUSTER(index_build+0x14a)[0x2b71e6]
postgres: law postgres [local] CLUSTER(reindex_index+0x3c7)[0x2b7ad6]
postgres: law postgres [local] CLUSTER(reindex_relation+0x1b9)[0x2b91ac]
postgres: law postgres [local] CLUSTER(finish_heap_swap+0xe0)[0x330b84]
postgres: law postgres [local] CLUSTER(+0x228e95)[0x330e95]
postgres: law postgres [local] CLUSTER(cluster_rel+0xe5)[0x330fb1]
postgres: law postgres [local] CLUSTER(cluster+0x9d)[0x331272]
postgres: law postgres [local]
CLUSTER(standard_ProcessUtility+0x638)[0x563106]
postgres: law postgres [local] CLUSTER(ProcessUtility+0xcd)[0x5636ad]
postgres: law postgres [local] CLUSTER(+0x457bfb)[0x55fbfb]
postgres: law postgres [local] CLUSTER(+0x458862)[0x560862]
postgres: law postgres [local] CLUSTER(PortalRun+0x2b8)[0x56153d]
postgres: law postgres [local] CLUSTER(+0x455821)[0x55d821]
postgres: law postgres [local] CLUSTER(PostgresMain+0x8b8)[0x55f852]
postgres: law postgres [local] CLUSTER(+0x3c474d)[0x4cc74d]
postgres: law postgres [local] CLUSTER(+0x3c77e6)[0x4cf7e6]
postgres: law postgres [local] CLUSTER(+0x3c7aea)[0x4cfaea]
postgres: law postgres [local] CLUSTER(PostmasterMain+0x109b)[0x4d0e63]
postgres: law postgres [local] CLUSTER(main+0x20c)[0x4194d6]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7)[0x6285b97]
postgres: law postgres [local] CLUSTER(_start+0x2a)[0x1c0b6a]

The required sleep delay may vary (25-30 seconds delay works for me), but
I've managed to reproduce it reliably on REL_10_STABLE..master.


Re: BUG #16378: Invalid memory access on interrupting CLUSTER after CREATE TEMP TABLE

От
Tom Lane
Дата:
PG Bug reporting form <noreply@postgresql.org> writes:
> The following script:
> ...
> leads to a cassert-enabled server crash with the following messages in the
> log (for the master branch):

Hm, I can get a crash here without valgrind, as long as it's a cassert
build.  We're accessing a list that's been thrown away by memory context
cleanup, so this results:
TRAP: FailedAssertion("IsOidList(list)", File: "list.c", Line: 678)
The timing is a bit finicky, but no more so than you report for the
valgrind case.

The difficulty is that the pendingReindexedIndexes list is kept in
some transaction-local context, so it gets flushed during the transaction
abort that is the first step of proc_exit processing.  But the static
pointer to it is still set, causing big problems if we do any system
catalog accesses later --- like, say, while dropping the session's
temp tables.

One idea would be to keep the list in TopMemoryContext, but that feels
like a band-aid solution.  I think more likely what we ought to do is
stop trying to use a PG_TRY in reindex_relation to drive cleanup,
and instead hook ResetReindexPending into transaction abort processing
honestly.

I wonder how many other uses of PG_TRY have similar issues?  It's
not really obvious that this is an unsafe coding pattern.

            regards, tom lane



Re: BUG #16378: Invalid memory access on interrupting CLUSTER after CREATE TEMP TABLE

От
Tom Lane
Дата:
I wrote:
> The difficulty is that the pendingReindexedIndexes list is kept in
> some transaction-local context, so it gets flushed during the transaction
> abort that is the first step of proc_exit processing.  But the static
> pointer to it is still set, causing big problems if we do any system
> catalog accesses later --- like, say, while dropping the session's
> temp tables.

> One idea would be to keep the list in TopMemoryContext, but that feels
> like a band-aid solution.  I think more likely what we ought to do is
> stop trying to use a PG_TRY in reindex_relation to drive cleanup,
> and instead hook ResetReindexPending into transaction abort processing
> honestly.

I spent some more time looking at this.  The "band-aid solution" is no
solution at all; consider the situation where we have some temp tables
and we get a SIGTERM while doing a REINDEX on pg_class.  If we don't
reset the reindex state then our attempts to access pg_class indexes
while clearing out temp tables will fail.

There seem to be two distinct routes to a non-band-aid solution:

1. Instead of PG_TRY, use PG_ENSURE_ERROR_CLEANUP to ensure the reindex
state gets cleared.  This will work since the cleanup code will get
run before any other proc_exit callbacks.  However, the fact that the
reindex state is getting propagated to parallel workers makes me fear
that this solution is inadequate, because the parallel workers won't
have any such cleanup callback.  If they try to do any catalog access
during transaction abort they might have problems.  (Parallel reindex
of a system catalog is mind-bendingly scary anyway, but I don't want
this patch to make it worse.)

2. Clear the reindex state early in transaction abort, as suggested above
and as implemented by the patch below.  This adds a few cycles to xact
abort, which is what the previous coding was trying to avoid, but I think
we are just stuck with doing that.

Thoughts?

            regards, tom lane

diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 6b1ae1f..a979256 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -30,6 +30,7 @@
 #include "access/xlog.h"
 #include "access/xloginsert.h"
 #include "access/xlogutils.h"
+#include "catalog/index.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_enum.h"
 #include "catalog/storage.h"
@@ -2662,6 +2663,9 @@ AbortTransaction(void)
      */
     SetUserIdAndSecContext(s->prevUser, s->prevSecContext);

+    /* Forget about any active REINDEX. */
+    ResetReindexState();
+
     /* If in parallel mode, clean up workers and exit parallel mode. */
     if (IsInParallelMode())
     {
@@ -4961,6 +4965,9 @@ AbortSubTransaction(void)
      */
     SetUserIdAndSecContext(s->prevUser, s->prevSecContext);

+    /* Forget about any active REINDEX. */
+    ResetReindexState();
+
     /* Exit from parallel mode, if necessary. */
     if (IsInParallelMode())
     {
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index bd7ec92..321d9d6 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -131,7 +131,6 @@ static void SetReindexProcessing(Oid heapOid, Oid indexOid);
 static void ResetReindexProcessing(void);
 static void SetReindexPending(List *indexes);
 static void RemoveReindexPending(Oid indexOid);
-static void ResetReindexPending(void);


 /*
@@ -3525,9 +3524,6 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
         indexInfo->ii_ExclusionStrats = NULL;
     }

-    /* ensure SetReindexProcessing state isn't leaked */
-    PG_TRY();
-    {
         /* Suppress use of the target index while rebuilding it */
         SetReindexProcessing(heapId, indexId);

@@ -3537,13 +3533,9 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
         /* Initialize the index and rebuild */
         /* Note: we do not need to re-establish pkey setting */
         index_build(heapRelation, iRel, indexInfo, true, true);
-    }
-    PG_FINALLY();
-    {
-        /* Make sure flag gets cleared on error exit */
+
+        /* Re-allow use of target index */
         ResetReindexProcessing();
-    }
-    PG_END_TRY();

     /*
      * If the index is marked invalid/not-ready/dead (ie, it's from a failed
@@ -3715,7 +3707,6 @@ reindex_relation(Oid relid, int flags, int options)
      */
     indexIds = RelationGetIndexList(rel);

-    PG_TRY();
     {
         ListCell   *indexId;
         char        persistence;
@@ -3780,12 +3771,6 @@ reindex_relation(Oid relid, int flags, int options)
             i++;
         }
     }
-    PG_FINALLY();
-    {
-        /* Make sure list gets cleared on error exit */
-        ResetReindexPending();
-    }
-    PG_END_TRY();

     /*
      * Close rel, but continue to hold the lock.
@@ -3819,6 +3804,7 @@ reindex_relation(Oid relid, int flags, int options)
 static Oid    currentlyReindexedHeap = InvalidOid;
 static Oid    currentlyReindexedIndex = InvalidOid;
 static List *pendingReindexedIndexes = NIL;
+static int    reindexingNestLevel = 0;

 /*
  * ReindexIsProcessingHeap
@@ -3855,8 +3841,6 @@ ReindexIsProcessingIndex(Oid indexOid)
 /*
  * SetReindexProcessing
  *        Set flag that specified heap/index are being reindexed.
- *
- * NB: caller must use a PG_TRY block to ensure ResetReindexProcessing is done.
  */
 static void
 SetReindexProcessing(Oid heapOid, Oid indexOid)
@@ -3869,6 +3853,8 @@ SetReindexProcessing(Oid heapOid, Oid indexOid)
     currentlyReindexedIndex = indexOid;
     /* Index is no longer "pending" reindex. */
     RemoveReindexPending(indexOid);
+    /* This may have been set already, but in case it isn't, do so now. */
+    reindexingNestLevel = GetCurrentTransactionNestLevel();
 }

 /*
@@ -3878,17 +3864,16 @@ SetReindexProcessing(Oid heapOid, Oid indexOid)
 static void
 ResetReindexProcessing(void)
 {
-    /* This may be called in leader error path */
     currentlyReindexedHeap = InvalidOid;
     currentlyReindexedIndex = InvalidOid;
+    /* reindexingNestLevel remains set till end of (sub)transaction */
 }

 /*
  * SetReindexPending
  *        Mark the given indexes as pending reindex.
  *
- * NB: caller must use a PG_TRY block to ensure ResetReindexPending is done.
- * Also, we assume that the current memory context stays valid throughout.
+ * NB: we assume that the current memory context stays valid throughout.
  */
 static void
 SetReindexPending(List *indexes)
@@ -3899,6 +3884,7 @@ SetReindexPending(List *indexes)
     if (IsInParallelMode())
         elog(ERROR, "cannot modify reindex state during a parallel operation");
     pendingReindexedIndexes = list_copy(indexes);
+    reindexingNestLevel = GetCurrentTransactionNestLevel();
 }

 /*
@@ -3915,14 +3901,32 @@ RemoveReindexPending(Oid indexOid)
 }

 /*
- * ResetReindexPending
- *        Unset reindex-pending status.
+ * ResetReindexState
+ *        Clear all reindexing state during (sub)transaction abort.
  */
-static void
-ResetReindexPending(void)
+void
+ResetReindexState(void)
 {
-    /* This may be called in leader error path */
-    pendingReindexedIndexes = NIL;
+    /*
+     * Because reindexing is not re-entrant, we don't need to cope with nested
+     * reindexing states.  We just need to avoid messing up the outer-level
+     * state in case a subtransaction fails within a REINDEX.  So checking the
+     * current nest level against that of the reindex operation is sufficient.
+     */
+    if (reindexingNestLevel >= GetCurrentTransactionNestLevel())
+    {
+        currentlyReindexedHeap = InvalidOid;
+        currentlyReindexedIndex = InvalidOid;
+
+        /*
+         * We needn't try to release the contents of pendingReindexedIndexes;
+         * that list should be in a transaction-lifespan context, so it will
+         * go away automatically.
+         */
+        pendingReindexedIndexes = NIL;
+
+        reindexingNestLevel = 0;
+    }
 }

 /*
@@ -3975,4 +3979,7 @@ RestoreReindexState(void *reindexstate)
             lappend_oid(pendingReindexedIndexes,
                         sistate->pendingReindexedIndexes[c]);
     MemoryContextSwitchTo(oldcontext);
+
+    /* Note the worker has its own transaction nesting level */
+    reindexingNestLevel = GetCurrentTransactionNestLevel();
 }
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index a2890c1..887d300 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -131,6 +131,8 @@ extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);

 extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action);

+extern Oid    IndexGetRelation(Oid indexId, bool missing_ok);
+
 extern void reindex_index(Oid indexId, bool skip_constraint_checks,
                           char relpersistence, int options);

@@ -145,8 +147,8 @@ extern bool reindex_relation(Oid relid, int flags, int options);

 extern bool ReindexIsProcessingHeap(Oid heapOid);
 extern bool ReindexIsProcessingIndex(Oid indexOid);
-extern Oid    IndexGetRelation(Oid indexId, bool missing_ok);

+extern void ResetReindexState(void);
 extern Size EstimateReindexStateSpace(void);
 extern void SerializeReindexState(Size maxsize, char *start_address);
 extern void RestoreReindexState(void *reindexstate);