Re: pg_dump versus hash partitioning

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: pg_dump versus hash partitioning
Дата
Msg-id 2441413.1678750752@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: pg_dump versus hash partitioning  (Julien Rouhaud <rjuju123@gmail.com>)
Ответы Re: pg_dump versus hash partitioning  (Julien Rouhaud <rjuju123@gmail.com>)
Список pgsql-hackers
Julien Rouhaud <rjuju123@gmail.com> writes:
> On Sun, Mar 12, 2023 at 03:46:52PM -0400, Tom Lane wrote:
>> The trick is to detect in pg_restore whether pg_dump chose to do
>> load-via-partition-root.

> Given that this approach wouldn't help with existing dump files (at least if
> using COPY, in any case the one using INSERT are doomed), so I'm slightly in
> favor of the first approach, and later add an easy and non magic incantation
> way to produce dumps that don't depend on partitioning.

Yeah, we need to do both.  Attached find an updated patch series:

0001: TAP test that exhibits both this deadlock problem and the
different-hash-codes problem.  I'm not sure if we want to commit
this, or if it should be in exactly this form --- the second set
of tests with a manual --load-via-partition-root switch will be
pretty redundant after this patch series.

0002: Make pg_restore detect load-via-partition-root by examining the
COPY commands embedded in the dump, and skip the TRUNCATE if so,
thereby fixing the deadlock issue.  This is the best we can do for
legacy dump files, I think, but it should be good enough.

0003: Also detect load-via-partition-root by adding a label in the
dump.  This is a more bulletproof solution going forward.

0004-0006: same as previous patches, but rebased over these.
This gets us to a place where the new TAP test passes.

I've not done anything about modifying the documentation, but I still
think we could remove the warning label on --load-via-partition-root.

            regards, tom lane

From 33acd81f90ccd1ebdd75fbdf58024edb8f10f6a1 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 13 Mar 2023 19:01:42 -0400
Subject: [PATCH v2 1/6] Create a test case for dump-and-restore of hashed-enum
 partitioning.

I'm not sure we want to commit this, at least not in this form,
but it's able to demonstrate both the deadlock problem (when using
--load-via-partition-root) and the change-of-hash-code problem
(when not).
---
 src/bin/pg_dump/meson.build               |  1 +
 src/bin/pg_dump/t/004_pg_dump_parallel.pl | 66 +++++++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 src/bin/pg_dump/t/004_pg_dump_parallel.pl

diff --git a/src/bin/pg_dump/meson.build b/src/bin/pg_dump/meson.build
index ab4c25c781..b2fb7ac77f 100644
--- a/src/bin/pg_dump/meson.build
+++ b/src/bin/pg_dump/meson.build
@@ -96,6 +96,7 @@ tests += {
       't/001_basic.pl',
       't/002_pg_dump.pl',
       't/003_pg_dump_with_server.pl',
+      't/004_pg_dump_parallel.pl',
       't/010_dump_connstr.pl',
     ],
   },
diff --git a/src/bin/pg_dump/t/004_pg_dump_parallel.pl b/src/bin/pg_dump/t/004_pg_dump_parallel.pl
new file mode 100644
index 0000000000..c3f7d20b13
--- /dev/null
+++ b/src/bin/pg_dump/t/004_pg_dump_parallel.pl
@@ -0,0 +1,66 @@
+
+# Copyright (c) 2021-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $dbname1 = 'regression_src';
+my $dbname2 = 'regression_dest1';
+my $dbname3 = 'regression_dest2';
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init;
+$node->start;
+
+my $backupdir = $node->backup_dir;
+
+$node->run_log([ 'createdb', $dbname1 ]);
+$node->run_log([ 'createdb', $dbname2 ]);
+$node->run_log([ 'createdb', $dbname3 ]);
+
+$node->safe_psql(
+    $dbname1,
+    qq{
+create type digit as enum ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9');
+create table t0 (en digit, data int) partition by hash(en);
+create table t0_p1 partition of t0 for values with (modulus 3, remainder 0);
+create table t0_p2 partition of t0 for values with (modulus 3, remainder 1);
+create table t0_p3 partition of t0 for values with (modulus 3, remainder 2);
+insert into t0 select (x%10)::text::digit, x from generate_series(1,1000) x;
+    });
+
+$node->command_ok(
+    [
+        'pg_dump', '-Fd', '--no-sync', '-j2', '-f', "$backupdir/dump1",
+        $node->connstr($dbname1)
+    ],
+    'parallel dump');
+
+$node->command_ok(
+    [
+        'pg_restore', '-v',
+        '-d',         $node->connstr($dbname2),
+        '-j3',        "$backupdir/dump1"
+    ],
+    'parallel restore');
+
+$node->command_ok(
+    [
+        'pg_dump', '-Fd', '--no-sync', '-j2', '-f', "$backupdir/dump2",
+        '--load-via-partition-root', $node->connstr($dbname1)
+    ],
+    'parallel dump via partition root');
+
+$node->command_ok(
+    [
+        'pg_restore', '-v',
+        '-d',         $node->connstr($dbname3),
+        '-j3',        "$backupdir/dump2"
+    ],
+    'parallel restore via partition root');
+
+done_testing();
--
2.31.1

From 685b4a1c0aaa59b2b6047051d082838b88528587 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 13 Mar 2023 19:05:05 -0400
Subject: [PATCH v2 2/6] Avoid TRUNCATE when restoring load-via-partition-root
 data.

This fixes the deadlock problem, allowing 004_pg_dump_parallel.pl to
succeed in the test where there's a manual --load-via-partition-root
switch.  It relies on the dump having used COPY commands, though.
---
 src/bin/pg_dump/pg_backup_archiver.c | 61 ++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 61ebb8fe85..cb3a2c21f5 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -86,6 +86,7 @@ static RestorePass _tocEntryRestorePass(TocEntry *te);
 static bool _tocEntryIsACL(TocEntry *te);
 static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
 static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
+static bool is_load_via_partition_root(TocEntry *te);
 static void buildTocEntryArrays(ArchiveHandle *AH);
 static void _moveBefore(TocEntry *pos, TocEntry *te);
 static int    _discoverArchiveFormat(ArchiveHandle *AH);
@@ -887,6 +888,8 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
                 }
                 else
                 {
+                    bool        use_truncate;
+
                     _disableTriggersIfNecessary(AH, te);

                     /* Select owner and schema as necessary */
@@ -898,13 +901,23 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)

                     /*
                      * In parallel restore, if we created the table earlier in
-                     * the run then we wrap the COPY in a transaction and
-                     * precede it with a TRUNCATE.  If archiving is not on
-                     * this prevents WAL-logging the COPY.  This obtains a
-                     * speedup similar to that from using single_txn mode in
-                     * non-parallel restores.
+                     * the run and we are not restoring a
+                     * load-via-partition-root data item then we wrap the COPY
+                     * in a transaction and precede it with a TRUNCATE.  If
+                     * archiving is not on this prevents WAL-logging the COPY.
+                     * This obtains a speedup similar to that from using
+                     * single_txn mode in non-parallel restores.
+                     *
+                     * We mustn't do this for load-via-partition-root cases
+                     * because some data might get moved across partition
+                     * boundaries, risking deadlock and/or loss of previously
+                     * loaded data.  (We assume that all partitions of a
+                     * partitioned table will be treated the same way.)
                      */
-                    if (is_parallel && te->created)
+                    use_truncate = is_parallel && te->created &&
+                        !is_load_via_partition_root(te);
+
+                    if (use_truncate)
                     {
                         /*
                          * Parallel restore is always talking directly to a
@@ -942,7 +955,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
                     AH->outputKind = OUTPUT_SQLCMDS;

                     /* close out the transaction started above */
-                    if (is_parallel && te->created)
+                    if (use_truncate)
                         CommitTransaction(&AH->public);

                     _enableTriggersIfNecessary(AH, te);
@@ -1036,6 +1049,40 @@ _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te)
              fmtQualifiedId(te->namespace, te->tag));
 }

+/*
+ * Detect whether a TABLE DATA TOC item is performing "load via partition
+ * root", that is the target table is an ancestor partition rather than the
+ * table the TOC item is nominally for.
+ *
+ * In newer archive files this can be detected by checking for a special
+ * comment placed in te->defn.  In older files we have to fall back to seeing
+ * if the COPY statement targets the named table or some other one.  This
+ * will not work for data dumped as INSERT commands, so we could give a false
+ * negative in that case; fortunately, that's a rarely-used option.
+ */
+static bool
+is_load_via_partition_root(TocEntry *te)
+{
+    if (te->copyStmt && *te->copyStmt)
+    {
+        PQExpBuffer copyStmt = createPQExpBuffer();
+        bool        result;
+
+        /*
+         * Build the initial part of the COPY as it would appear if the
+         * nominal target table is the actual target.  If we see anything
+         * else, it must be a load-via-partition-root case.
+         */
+        appendPQExpBuffer(copyStmt, "COPY %s ",
+                          fmtQualifiedId(te->namespace, te->tag));
+        result = strncmp(te->copyStmt, copyStmt->data, copyStmt->len) != 0;
+        destroyPQExpBuffer(copyStmt);
+        return result;
+    }
+    /* Assume it's not load-via-partition-root */
+    return false;
+}
+
 /*
  * This is a routine that is part of the dumper interface, hence the 'Archive*' parameter.
  */
--
2.31.1

From 28b122cb9be438e08e3754672559a9e59ea39711 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 13 Mar 2023 19:08:40 -0400
Subject: [PATCH v2 3/6] Add a marker to TABLE DATA items using
 load-via-partition-root.

Set the te->defn field of such an item to a comment like

-- load via partition root <table name>

This provides a mechanism for load-via-partition-root detection
that works even with --inserts mode, and as a side benefit the
dump contents are a bit less confusing.  We still need the COPY
test to work with old dump files, though.
---
 src/bin/pg_dump/pg_backup_archiver.c | 11 ++++++--
 src/bin/pg_dump/pg_dump.c            | 39 ++++++++++++++++------------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index cb3a2c21f5..1ecd719db2 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -1063,6 +1063,9 @@ _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te)
 static bool
 is_load_via_partition_root(TocEntry *te)
 {
+    if (te->defn &&
+        strncmp(te->defn, "-- load via partition root ", 27) == 0)
+        return true;
     if (te->copyStmt && *te->copyStmt)
     {
         PQExpBuffer copyStmt = createPQExpBuffer();
@@ -3002,8 +3005,12 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
             res = res & ~REQ_DATA;
     }

-    /* If there's no definition command, there's no schema component */
-    if (!te->defn || !te->defn[0])
+    /*
+     * If there's no definition command, there's no schema component.  Treat
+     * "load via partition root" comments as not schema.
+     */
+    if (!te->defn || !te->defn[0] ||
+        strncmp(te->defn, "-- load via partition root ", 27) == 0)
         res = res & ~REQ_SCHEMA;

     /*
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 4217908f84..b0376ee56c 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -2443,34 +2443,38 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo)
     PQExpBuffer copyBuf = createPQExpBuffer();
     PQExpBuffer clistBuf = createPQExpBuffer();
     DataDumperPtr dumpFn;
+    char       *tdDefn = NULL;
     char       *copyStmt;
     const char *copyFrom;

     /* We had better have loaded per-column details about this table */
     Assert(tbinfo->interesting);

+    /*
+     * When load-via-partition-root is set, get the root table name for the
+     * partition table, so that we can reload data through the root table.
+     * Then construct a comment to be inserted into the TOC entry's defn
+     * field, so that such cases can be identified reliably.
+     */
+    if (dopt->load_via_partition_root && tbinfo->ispartition)
+    {
+        TableInfo  *parentTbinfo;
+
+        parentTbinfo = getRootTableInfo(tbinfo);
+        copyFrom = fmtQualifiedDumpable(parentTbinfo);
+        printfPQExpBuffer(copyBuf, "-- load via partition root %s",
+                          copyFrom);
+        tdDefn = pg_strdup(copyBuf->data);
+    }
+    else
+        copyFrom = fmtQualifiedDumpable(tbinfo);
+
     if (dopt->dump_inserts == 0)
     {
         /* Dump/restore using COPY */
         dumpFn = dumpTableData_copy;
-
-        /*
-         * When load-via-partition-root is set, get the root table name for
-         * the partition table, so that we can reload data through the root
-         * table.
-         */
-        if (dopt->load_via_partition_root && tbinfo->ispartition)
-        {
-            TableInfo  *parentTbinfo;
-
-            parentTbinfo = getRootTableInfo(tbinfo);
-            copyFrom = fmtQualifiedDumpable(parentTbinfo);
-        }
-        else
-            copyFrom = fmtQualifiedDumpable(tbinfo);
-
         /* must use 2 steps here 'cause fmtId is nonreentrant */
-        appendPQExpBuffer(copyBuf, "COPY %s ",
+        printfPQExpBuffer(copyBuf, "COPY %s ",
                           copyFrom);
         appendPQExpBuffer(copyBuf, "%s FROM stdin;\n",
                           fmtCopyColumnList(tbinfo, clistBuf));
@@ -2498,6 +2502,7 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo)
                                        .owner = tbinfo->rolname,
                                        .description = "TABLE DATA",
                                        .section = SECTION_DATA,
+                                       .createStmt = tdDefn,
                                        .copyStmt = copyStmt,
                                        .deps = &(tbinfo->dobj.dumpId),
                                        .nDeps = 1,
--
2.31.1

From e68c58928285679663dcea32ea98409a31f15a5a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 13 Mar 2023 19:11:44 -0400
Subject: [PATCH v2 4/6] Force load-via-partition-root for hash partitioning on
 an enum column.

Without this, dump and restore will almost always fail because the
enum values will receive different OIDs in the destination database,
and their hash codes will therefore be different.  (Improving the
hash algorithm would not make this situation better, and would indeed
break other cases as well.)  This allows 004_pg_dump_parallel.pl to
pass in full.

Discussion: https://postgr.es/m/1376149.1675268279@sss.pgh.pa.us
---
 src/bin/pg_dump/common.c  |  18 +++---
 src/bin/pg_dump/pg_dump.c | 122 +++++++++++++++++++++++++++++++++++---
 src/bin/pg_dump/pg_dump.h |   2 +
 3 files changed, 125 insertions(+), 17 deletions(-)

diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index a2b74901e4..44558b60e8 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -230,6 +230,9 @@ getSchemaData(Archive *fout, int *numTablesPtr)
     pg_log_info("flagging inherited columns in subtables");
     flagInhAttrs(fout->dopt, tblinfo, numTables);

+    pg_log_info("reading partitioning data");
+    getPartitioningInfo(fout);
+
     pg_log_info("reading indexes");
     getIndexes(fout, tblinfo, numTables);

@@ -285,7 +288,6 @@ static void
 flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
               InhInfo *inhinfo, int numInherits)
 {
-    DumpOptions *dopt = fout->dopt;
     int            i,
                 j;

@@ -301,18 +303,18 @@ flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
             continue;

         /*
-         * Normally, we don't bother computing anything for non-target tables,
-         * but if load-via-partition-root is specified, we gather information
-         * on every partition in the system so that getRootTableInfo can trace
-         * from any given to leaf partition all the way up to the root.  (We
-         * don't need to mark them as interesting for getTableAttrs, though.)
+         * Normally, we don't bother computing anything for non-target tables.
+         * However, we must find the parents of non-root partitioned tables in
+         * any case, so that we can trace from leaf partitions up to the root
+         * (in case a leaf is to be dumped but its parents are not).  We need
+         * not mark such parents interesting for getTableAttrs, though.
          */
         if (!tblinfo[i].dobj.dump)
         {
             mark_parents = false;

-            if (!dopt->load_via_partition_root ||
-                !tblinfo[i].ispartition)
+            if (!(tblinfo[i].relkind == RELKIND_PARTITIONED_TABLE &&
+                  tblinfo[i].ispartition))
                 find_parents = false;
         }

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index b0376ee56c..7f40b455af 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -320,6 +320,7 @@ static void appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions,
 static char *get_synchronized_snapshot(Archive *fout);
 static void setupDumpWorker(Archive *AH);
 static TableInfo *getRootTableInfo(const TableInfo *tbinfo);
+static bool forcePartitionRootLoad(const TableInfo *tbinfo);


 int
@@ -2227,11 +2228,13 @@ dumpTableData_insert(Archive *fout, const void *dcontext)
             insertStmt = createPQExpBuffer();

             /*
-             * When load-via-partition-root is set, get the root table name
-             * for the partition table, so that we can reload data through the
-             * root table.
+             * When load-via-partition-root is set or forced, get the root
+             * table name for the partition table, so that we can reload data
+             * through the root table.
              */
-            if (dopt->load_via_partition_root && tbinfo->ispartition)
+            if (tbinfo->ispartition &&
+                (dopt->load_via_partition_root ||
+                 forcePartitionRootLoad(tbinfo)))
                 targettab = getRootTableInfo(tbinfo);
             else
                 targettab = tbinfo;
@@ -2429,6 +2432,35 @@ getRootTableInfo(const TableInfo *tbinfo)
     return parentTbinfo;
 }

+/*
+ * forcePartitionRootLoad
+ *     Check if we must force load_via_partition_root for this partition.
+ *
+ * This is required if any level of ancestral partitioned table has an
+ * unsafe partitioning scheme.
+ */
+static bool
+forcePartitionRootLoad(const TableInfo *tbinfo)
+{
+    TableInfo  *parentTbinfo;
+
+    Assert(tbinfo->ispartition);
+    Assert(tbinfo->numParents == 1);
+
+    parentTbinfo = tbinfo->parents[0];
+    if (parentTbinfo->unsafe_partitions)
+        return true;
+    while (parentTbinfo->ispartition)
+    {
+        Assert(parentTbinfo->numParents == 1);
+        parentTbinfo = parentTbinfo->parents[0];
+        if (parentTbinfo->unsafe_partitions)
+            return true;
+    }
+
+    return false;
+}
+
 /*
  * dumpTableData -
  *      dump the contents of a single table
@@ -2451,12 +2483,14 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo)
     Assert(tbinfo->interesting);

     /*
-     * When load-via-partition-root is set, get the root table name for the
-     * partition table, so that we can reload data through the root table.
-     * Then construct a comment to be inserted into the TOC entry's defn
-     * field, so that such cases can be identified reliably.
+     * When load-via-partition-root is set or forced, get the root table name
+     * for the partition table, so that we can reload data through the root
+     * table.  Then construct a comment to be inserted into the TOC entry's
+     * defn field, so that such cases can be identified reliably.
      */
-    if (dopt->load_via_partition_root && tbinfo->ispartition)
+    if (tbinfo->ispartition &&
+        (dopt->load_via_partition_root ||
+         forcePartitionRootLoad(tbinfo)))
     {
         TableInfo  *parentTbinfo;

@@ -6764,6 +6798,76 @@ getInherits(Archive *fout, int *numInherits)
     return inhinfo;
 }

+/*
+ * getPartitioningInfo
+ *      get information about partitioning
+ *
+ * For the most part, we only collect partitioning info about tables we
+ * intend to dump.  However, this function has to consider all partitioned
+ * tables in the database, because we need to know about parents of partitions
+ * we are going to dump even if the parents themselves won't be dumped.
+ *
+ * Specifically, what we need to know is whether each partitioned table
+ * has an "unsafe" partitioning scheme that requires us to force
+ * load-via-partition-root mode for its children.  Currently the only case
+ * for which we force that is hash partitioning on enum columns, since the
+ * hash codes depend on enum value OIDs which won't be replicated across
+ * dump-and-reload.  There are other cases in which load-via-partition-root
+ * might be necessary, but we expect users to cope with them.
+ */
+void
+getPartitioningInfo(Archive *fout)
+{
+    PQExpBuffer query;
+    PGresult   *res;
+    int            ntups;
+
+    /* no partitions before v10 */
+    if (fout->remoteVersion < 100000)
+        return;
+    /* needn't bother if schema-only dump */
+    if (fout->dopt->schemaOnly)
+        return;
+
+    query = createPQExpBuffer();
+
+    /*
+     * Unsafe partitioning schemes are exactly those for which hash enum_ops
+     * appears among the partition opclasses.  We needn't check partstrat.
+     *
+     * Note that this query may well retrieve info about tables we aren't
+     * going to dump and hence have no lock on.  That's okay since we need not
+     * invoke any unsafe server-side functions.
+     */
+    appendPQExpBufferStr(query,
+                         "SELECT partrelid FROM pg_partitioned_table WHERE\n"
+                         "(SELECT c.oid FROM pg_opclass c JOIN pg_am a "
+                         "ON c.opcmethod = a.oid\n"
+                         "WHERE opcname = 'enum_ops' "
+                         "AND opcnamespace = 'pg_catalog'::regnamespace "
+                         "AND amname = 'hash') = ANY(partclass)");
+
+    res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+    ntups = PQntuples(res);
+
+    for (int i = 0; i < ntups; i++)
+    {
+        Oid            tabrelid = atooid(PQgetvalue(res, i, 0));
+        TableInfo  *tbinfo;
+
+        tbinfo = findTableByOid(tabrelid);
+        if (tbinfo == NULL)
+            pg_fatal("failed sanity check, table OID %u appearing in pg_partitioned_table not found",
+                     tabrelid);
+        tbinfo->unsafe_partitions = true;
+    }
+
+    PQclear(res);
+
+    destroyPQExpBuffer(query);
+}
+
 /*
  * getIndexes
  *      get information about every index on a dumpable table
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index cdca0b993d..ffa37265c8 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -318,6 +318,7 @@ typedef struct _tableInfo
     bool        dummy_view;        /* view's real definition must be postponed */
     bool        postponed_def;    /* matview must be postponed into post-data */
     bool        ispartition;    /* is table a partition? */
+    bool        unsafe_partitions;    /* is it an unsafe partitioned table? */

     /*
      * These fields are computed only if we decide the table is interesting
@@ -714,6 +715,7 @@ extern ConvInfo *getConversions(Archive *fout, int *numConversions);
 extern TableInfo *getTables(Archive *fout, int *numTables);
 extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables);
 extern InhInfo *getInherits(Archive *fout, int *numInherits);
+extern void getPartitioningInfo(Archive *fout);
 extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables);
 extern void getExtendedStatistics(Archive *fout);
 extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables);
--
2.31.1

From 5836a653373a382ee3d3b287ab5377230852a554 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 13 Mar 2023 19:13:35 -0400
Subject: [PATCH v2 5/6] Don't create useless TableAttachInfo objects.

It's silly to create a TableAttachInfo object that we're not
going to dump, when we know perfectly well at creation time
that it won't be dumped.

Discussion: https://postgr.es/m/1376149.1675268279@sss.pgh.pa.us
---
 src/bin/pg_dump/common.c  | 3 ++-
 src/bin/pg_dump/pg_dump.c | 3 ---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index 44558b60e8..df48d0bb17 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -336,7 +336,8 @@ flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
         }

         /* Create TableAttachInfo object if needed */
-        if (tblinfo[i].dobj.dump && tblinfo[i].ispartition)
+        if ((tblinfo[i].dobj.dump & DUMP_COMPONENT_DEFINITION) &&
+            tblinfo[i].ispartition)
         {
             TableAttachInfo *attachinfo;

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 7f40b455af..a4a06b4a20 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -16120,9 +16120,6 @@ dumpTableAttach(Archive *fout, const TableAttachInfo *attachinfo)
     if (dopt->dataOnly)
         return;

-    if (!(attachinfo->partitionTbl->dobj.dump & DUMP_COMPONENT_DEFINITION))
-        return;
-
     q = createPQExpBuffer();

     if (!fout->is_prepared[PREPQUERY_DUMPTABLEATTACH])
--
2.31.1

From dcdf1c516d06afa75759d38e0903b8585b95ef72 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 13 Mar 2023 19:16:48 -0400
Subject: [PATCH v2 6/6] Simplify pg_dump's creation of parent-table links.

Instead of trying to optimize this by skipping creation of the
links for tables we don't plan to dump, just create them all in
bulk with a single scan over the pg_inherits data.  The previous
approach was more or less O(N^2) in the number of pg_inherits
entries, not to mention being way too complicated.

Discussion: https://postgr.es/m/1376149.1675268279@sss.pgh.pa.us
---
 src/bin/pg_dump/common.c  | 125 ++++++++++++++++----------------------
 src/bin/pg_dump/pg_dump.h |   5 +-
 2 files changed, 54 insertions(+), 76 deletions(-)

diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index df48d0bb17..5d988986ed 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -83,8 +83,6 @@ static void flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
                           InhInfo *inhinfo, int numInherits);
 static void flagInhIndexes(Archive *fout, TableInfo *tblinfo, int numTables);
 static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables);
-static void findParentsByOid(TableInfo *self,
-                             InhInfo *inhinfo, int numInherits);
 static int    strInArray(const char *pattern, char **arr, int arr_size);
 static IndxInfo *findIndexByOid(Oid oid);

@@ -288,45 +286,70 @@ static void
 flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
               InhInfo *inhinfo, int numInherits)
 {
+    TableInfo  *child = NULL;
+    TableInfo  *parent = NULL;
     int            i,
                 j;

-    for (i = 0; i < numTables; i++)
+    /*
+     * Set up links from child tables to their parents.
+     *
+     * We used to attempt to skip this work for tables that are not to be
+     * dumped; but the optimizable cases are rare in practice, and setting up
+     * these links in bulk is cheaper than the old way.  (Note in particular
+     * that it's very rare for a child to have more than one parent.)
+     */
+    for (i = 0; i < numInherits; i++)
     {
-        bool        find_parents = true;
-        bool        mark_parents = true;
-
-        /* Some kinds never have parents */
-        if (tblinfo[i].relkind == RELKIND_SEQUENCE ||
-            tblinfo[i].relkind == RELKIND_VIEW ||
-            tblinfo[i].relkind == RELKIND_MATVIEW)
-            continue;
-
         /*
-         * Normally, we don't bother computing anything for non-target tables.
-         * However, we must find the parents of non-root partitioned tables in
-         * any case, so that we can trace from leaf partitions up to the root
-         * (in case a leaf is to be dumped but its parents are not).  We need
-         * not mark such parents interesting for getTableAttrs, though.
+         * Skip a hashtable lookup if it's same table as last time.  This is
+         * unlikely for the child, but less so for the parent.  (Maybe we
+         * should ask the backend for a sorted array to make it more likely?
+         * Not clear the sorting effort would be repaid, though.)
          */
-        if (!tblinfo[i].dobj.dump)
+        if (child == NULL ||
+            child->dobj.catId.oid != inhinfo[i].inhrelid)
         {
-            mark_parents = false;
+            child = findTableByOid(inhinfo[i].inhrelid);

-            if (!(tblinfo[i].relkind == RELKIND_PARTITIONED_TABLE &&
-                  tblinfo[i].ispartition))
-                find_parents = false;
+            /*
+             * If we find no TableInfo, assume the pg_inherits entry is for a
+             * partitioned index, which we don't need to track.
+             */
+            if (child == NULL)
+                continue;
         }
+        if (parent == NULL ||
+            parent->dobj.catId.oid != inhinfo[i].inhparent)
+        {
+            parent = findTableByOid(inhinfo[i].inhparent);
+            if (parent == NULL)
+                pg_fatal("failed sanity check, parent OID %u of table \"%s\" (OID %u) not found",
+                         inhinfo[i].inhparent,
+                         child->dobj.name,
+                         child->dobj.catId.oid);
+        }
+        /* Add this parent to the child's list of parents. */
+        if (child->numParents > 0)
+            child->parents = pg_realloc_array(child->parents,
+                                              TableInfo *,
+                                              child->numParents + 1);
+        else
+            child->parents = pg_malloc_array(TableInfo *, 1);
+        child->parents[child->numParents++] = parent;
+    }

-        /* If needed, find all the immediate parent tables. */
-        if (find_parents)
-            findParentsByOid(&tblinfo[i], inhinfo, numInherits);
-
+    /*
+     * Now consider all child tables and mark parents interesting as needed.
+     */
+    for (i = 0; i < numTables; i++)
+    {
         /*
          * If needed, mark the parents as interesting for getTableAttrs and
-         * getIndexes.
+         * getIndexes.  We only need this for direct parents of dumpable
+         * tables.
          */
-        if (mark_parents)
+        if (tblinfo[i].dobj.dump)
         {
             int            numParents = tblinfo[i].numParents;
             TableInfo **parents = tblinfo[i].parents;
@@ -996,52 +1019,6 @@ findOwningExtension(CatalogId catalogId)
 }


-/*
- * findParentsByOid
- *      find a table's parents in tblinfo[]
- */
-static void
-findParentsByOid(TableInfo *self,
-                 InhInfo *inhinfo, int numInherits)
-{
-    Oid            oid = self->dobj.catId.oid;
-    int            i,
-                j;
-    int            numParents;
-
-    numParents = 0;
-    for (i = 0; i < numInherits; i++)
-    {
-        if (inhinfo[i].inhrelid == oid)
-            numParents++;
-    }
-
-    self->numParents = numParents;
-
-    if (numParents > 0)
-    {
-        self->parents = pg_malloc_array(TableInfo *, numParents);
-        j = 0;
-        for (i = 0; i < numInherits; i++)
-        {
-            if (inhinfo[i].inhrelid == oid)
-            {
-                TableInfo  *parent;
-
-                parent = findTableByOid(inhinfo[i].inhparent);
-                if (parent == NULL)
-                    pg_fatal("failed sanity check, parent OID %u of table \"%s\" (OID %u) not found",
-                             inhinfo[i].inhparent,
-                             self->dobj.name,
-                             oid);
-                self->parents[j++] = parent;
-            }
-        }
-    }
-    else
-        self->parents = NULL;
-}
-
 /*
  * parseOidArray
  *      parse a string of numbers delimited by spaces into a character array
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index ffa37265c8..283cd1a602 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -320,6 +320,9 @@ typedef struct _tableInfo
     bool        ispartition;    /* is table a partition? */
     bool        unsafe_partitions;    /* is it an unsafe partitioned table? */

+    int            numParents;        /* number of (immediate) parent tables */
+    struct _tableInfo **parents;    /* TableInfos of immediate parents */
+
     /*
      * These fields are computed only if we decide the table is interesting
      * (it's either a table to dump, or a direct parent of a dumpable table).
@@ -351,8 +354,6 @@ typedef struct _tableInfo
     /*
      * Stuff computed only for dumpable tables.
      */
-    int            numParents;        /* number of (immediate) parent tables */
-    struct _tableInfo **parents;    /* TableInfos of immediate parents */
     int            numIndexes;        /* number of indexes */
     struct _indxInfo *indexes;    /* indexes */
     struct _tableDataInfo *dataObj; /* TableDataInfo, if dumping its data */
--
2.31.1


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Jeff Davis
Дата:
Сообщение: ICU 54 and earlier are too dangerous
Следующее
От: Michael Paquier
Дата:
Сообщение: Re: pg_dump/pg_restore: Fix stdin/stdout handling of custom format on Win32