Обсуждение: Is the member name of hashctl inappropriate?

Поиск
Список
Период
Сортировка

Is the member name of hashctl inappropriate?

От
ywgrit
Дата:

The definition of hashctl is shown below

typedef struct HASHCTL
{
      long           num_partitions; /* # partitions (must be power of 2) */
      long           ssize;                 /* segment size */
      long           dsize;                 /* (initial) directory size */
      long           max_dsize;             /* limit to dsize if dir size is limited */
      long           ffactor;               /* fill factor */
      Size           keysize;               /* hash key length in bytes */
      Size           entrysize;             /* total user element size in bytes */
      HashValueFunc hash;                     /* hash function */
      HashCompareFunc match;         /* key comparison function */
      HashCopyFunc keycopy;           /* key copying function */
      HashAllocFunc alloc;           /* memory allocator */
      MemoryContext hcxt;                     /* memory context to use for allocations */
      HASHHDR   *hctl;                       /* location of header in shared mem */
} HASHCTL;


/*
* Key copying functions must have this signature. The return value is not
* used. (The definition is set up to allow memcpy() and strlcpy() to be
* used directly.)
*/
typedef void *(*HashCopyFunc) (void *dest, const void *src, Size keysize);

According to the description, the keycopy function only copies the key, but in reality it copies the entire entry, i.e., the key and the value, is the name wrong? This may make the developer pass in an inappropriate keycopy parameter when creating the htab.

Thanks

Re: Is the member name of hashctl inappropriate?

От
Tom Lane
Дата:
ywgrit <yw987194828@gmail.com> writes:
> According to the description, the keycopy function only copies the key, but
> in reality it copies the entire entry, i.e., the key and the value,

On what grounds do you claim that?  dynahash.c only ever passes "keysize"
as the size parameter.

            regards, tom lane



Re: Is the member name of hashctl inappropriate?

От
ywgrit
Дата:

You are right, I came to this erroneous conclusion based on the following mishandled experiment: My test program is shown below.

typedef struct ColumnIdentifier
{
       Oid        relid;
       AttrNumber resno;
ColumnIdentifier;

typedef struct ColumnType
{
       ColumnIdentifier colId;
       Oid              vartype;
ColumnType;


int
ColumnIdentifier_compare(const void *key1const void *key2Size keysize)
{
const ColumnIdentifier *colId_key1 = (const ColumnIdentifier *key1;
const ColumnIdentifier *colId_key2 = (const ColumnIdentifier *key2;

return colId_key1->relid == colId_key2->relid && colId_key1->resno == colId_key2->resno ? 0 : 1;
}


void *
ColumnIdentifier_copy(void *destconst void *srcSize keysize)
{
ColumnIdentifier *colId_dest = (ColumnIdentifier *dest;
ColumnIdentifier *colId_src = (ColumnIdentifier *src;

colId_dest->relid = colId_src->relid;
colId_dest->resno = colId_src->resno;

       return NULL;                            /* not used */
}


                       HASHCTL       hashctl;
                       hashctl.hash = tag_hash;
                       hashctl.match = ColumnIdentifier_compare;
                       hashctl.keycopy = ColumnIdentifier_copy;
                       hashctl.keysize = sizeof(ColumnIdentifier);
                       hashctl.entrysize = sizeof(ColumnType);
                       HTAB *htab = hash_create("type of column",
                                                 512 /* nelem */,
                                                 &hashctl,
                                                 HASH_ELEM | HASH_FUNCTION |
                                                 HASH_COMPARE | HASH_KEYCOPY);
ColumnType *entry = NULL;

                       ColumnIdentifier *colId = (ColumnIdentifier *MemoryContextAllocZero(CurrentMemoryContextsizeof(ColumnIdentifier));
                       ColumnType *coltype = (ColumnType *MemoryContextAllocZero(CurrentMemoryContextsizeof(ColumnType));
                       
                       coltype->colId.relid = colId->relid = 16384;
                       coltype->colId.resno = colId->resno = 1;
                       coltype->vartype = INT4OID;
                       
                       hash_search(htabcoltypeHASH_ENTERNULL);
                       entry = hash_search(htabcolIdHASH_FINDNULL);
                       
                       Assert(entry->colId.relid == colId->relid);
                       Assert(entry->colId.resno == colId->resno);
                       Assert(entry->vartype == INT4OID); // entry->vartype == 0

As shown above, entry->vartype is not assigned when keycopy copies only the key. I modified ColumnIdentifier_copy as shown below, the keycopy copies the entire entry.

void *
ColumnIdentifier_copy(void *destconst void *srcSize keysize)
{
const ColumnType *coltype_src = (const ColumnType *src;
const ColumnType *coltype_dest = (const ColumnType *dest;
  
  coltype_dest->colId->relid = coltype_src->colId->relid;
  coltype_dest->colId->resno = coltype_src->colId->resno;
  coltype_dest->vartype = coltype_src->vartype;
   
       return NULL;                            /* not used */
}

The result is that entry->vartype is now the same as var->vartype, which leads me to believe that keycopy "should" copy the entire entry. Before sending the initial email, I looked at the implementation of "hash_search_with_hash_value" and found the line "hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize)", which made me wonder how data field is copied into the HTAB?

But at the time I ignored a note above: "Caller is expected to fill the data field on return". Now I know that the data field needs to be filled manually, so it was my misuse. Thanks for the correction!

Thanks


Tom Lane <tgl@sss.pgh.pa.us> 于2023年9月13日周三 09:36写道:
ywgrit <yw987194828@gmail.com> writes:
> According to the description, the keycopy function only copies the key, but
> in reality it copies the entire entry, i.e., the key and the value,

On what grounds do you claim that?  dynahash.c only ever passes "keysize"
as the size parameter.

                        regards, tom lane