Re: jsonb format is pessimal for toast compression
| От | Tom Lane |
|---|---|
| Тема | Re: jsonb format is pessimal for toast compression |
| Дата | |
| Msg-id | 16663.1411701009@sss.pgh.pa.us обсуждение исходный текст |
| Ответ на | Re: jsonb format is pessimal for toast compression (Tom Lane <tgl@sss.pgh.pa.us>) |
| Список | pgsql-hackers |
I wrote:
> The "offsets-and-lengths" patch seems like the approach we ought to
> compare to my patch, but it looks pretty unfinished to me: AFAICS it
> includes logic to understand offsets sprinkled into a mostly-lengths
> array, but no logic that would actually *store* any such offsets,
> which means it's going to act just like my patch for performance
> purposes.
> In the interests of pushing this forward, I will work today on
> trying to finish and review Heikki's offsets-and-lengths patch
> so that we have something we can do performance testing on.
> I doubt that the performance testing will tell us anything we
> don't expect, but we should do it anyway.
I've now done that, and attached is what I think would be a committable
version. Having done this work, I no longer think that this approach
is significantly messier code-wise than the all-lengths version, and
it does have the merit of not degrading on very large objects/arrays.
So at the moment I'm leaning to this solution not the all-lengths one.
To get a sense of the compression effects of varying the stride distance,
I repeated the compression measurements I'd done on 14 August with Pavel's
geometry data (<24077.1408052877@sss.pgh.pa.us>). The upshot of that was
min max avg
external text representation 220 172685 880.3
JSON representation (compressed text) 224 78565 541.3
pg_column_size, JSONB HEAD repr. 225 82540 639.0
pg_column_size, all-lengths repr. 225 66794 531.1
Here's what I get with this patch and different stride distances:
JB_OFFSET_STRIDE = 8 225 68551 559.7
JB_OFFSET_STRIDE = 16 225 67601 552.3
JB_OFFSET_STRIDE = 32 225 67120 547.4
JB_OFFSET_STRIDE = 64 225 66886 546.9
JB_OFFSET_STRIDE = 128 225 66879 546.9
JB_OFFSET_STRIDE = 256 225 66846 546.8
So at least for that test data, 32 seems like the sweet spot.
We are giving up a couple percent of space in comparison to the
all-lengths version, but this is probably an acceptable tradeoff
for not degrading on very large arrays.
I've not done any speed testing.
regards, tom lane
diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c
index 2fd87fc..9beebb3 100644
*** a/src/backend/utils/adt/jsonb.c
--- b/src/backend/utils/adt/jsonb.c
*************** jsonb_from_cstring(char *json, int len)
*** 196,207 ****
static size_t
checkStringLen(size_t len)
{
! if (len > JENTRY_POSMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string too long to represent as jsonb string"),
errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.",
! JENTRY_POSMASK)));
return len;
}
--- 196,207 ----
static size_t
checkStringLen(size_t len)
{
! if (len > JENTRY_OFFLENMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string too long to represent as jsonb string"),
errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.",
! JENTRY_OFFLENMASK)));
return len;
}
diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c
index 04f35bf..f157df3 100644
*** a/src/backend/utils/adt/jsonb_util.c
--- b/src/backend/utils/adt/jsonb_util.c
***************
*** 26,40 ****
* in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
* reserved for that in the JsonbContainer.header field.
*
! * (the total size of an array's elements is also limited by JENTRY_POSMASK,
! * but we're not concerned about that here)
*/
#define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
#define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
! static void fillJsonbValue(JEntry *array, int index, char *base_addr,
JsonbValue *result);
! static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
static Jsonb *convertToJsonb(JsonbValue *val);
static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
--- 26,41 ----
* in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
* reserved for that in the JsonbContainer.header field.
*
! * (The total size of an array's or object's elements is also limited by
! * JENTRY_OFFLENMASK, but we're not concerned about that here.)
*/
#define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
#define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
! static void fillJsonbValue(JsonbContainer *container, int index,
! char *base_addr, uint32 offset,
JsonbValue *result);
! static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
static Jsonb *convertToJsonb(JsonbValue *val);
static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
*************** static void convertJsonbArray(StringInfo
*** 42,48 ****
static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
! static int reserveFromBuffer(StringInfo buffer, int len);
static void appendToBuffer(StringInfo buffer, const char *data, int len);
static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
static short padBufferToInt(StringInfo buffer);
--- 43,49 ----
static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
! static int reserveFromBuffer(StringInfo buffer, int len);
static void appendToBuffer(StringInfo buffer, const char *data, int len);
static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
static short padBufferToInt(StringInfo buffer);
*************** JsonbValueToJsonb(JsonbValue *val)
*** 108,113 ****
--- 109,166 ----
}
/*
+ * Get the offset of the variable-length portion of a Jsonb node within
+ * the variable-length-data part of its container. The node is identified
+ * by index within the container's JEntry array.
+ */
+ uint32
+ getJsonbOffset(const JsonbContainer *jc, int index)
+ {
+ uint32 offset = 0;
+ int i;
+
+ /*
+ * Start offset of this entry is equal to the end offset of the previous
+ * entry. Walk backwards to the most recent entry stored as an end
+ * offset, returning that offset plus any lengths in between.
+ */
+ for (i = index - 1; i >= 0; i--)
+ {
+ offset += JBE_OFFLENFLD(jc->children[i]);
+ if (JBE_HAS_OFF(jc->children[i]))
+ break;
+ }
+
+ return offset;
+ }
+
+ /*
+ * Get the length of the variable-length portion of a Jsonb node.
+ * The node is identified by index within the container's JEntry array.
+ */
+ uint32
+ getJsonbLength(const JsonbContainer *jc, int index)
+ {
+ uint32 off;
+ uint32 len;
+
+ /*
+ * If the length is stored directly in the JEntry, just return it.
+ * Otherwise, get the begin offset of the entry, and subtract that from
+ * the stored end+1 offset.
+ */
+ if (JBE_HAS_OFF(jc->children[index]))
+ {
+ off = getJsonbOffset(jc, index);
+ len = JBE_OFFLENFLD(jc->children[index]) - off;
+ }
+ else
+ len = JBE_OFFLENFLD(jc->children[index]);
+
+ return len;
+ }
+
+ /*
* BT comparator worker function. Returns an integer less than, equal to, or
* greater than zero, indicating whether a is less than, equal to, or greater
* than b. Consistent with the requirements for a B-Tree operator class
*************** compareJsonbContainers(JsonbContainer *a
*** 201,207 ****
*
* If the two values were of the same container type, then there'd
* have been a chance to observe the variation in the number of
! * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
* either two heterogeneously-typed containers, or a container and
* some scalar type.
*
--- 254,260 ----
*
* If the two values were of the same container type, then there'd
* have been a chance to observe the variation in the number of
! * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
* either two heterogeneously-typed containers, or a container and
* some scalar type.
*
*************** findJsonbValueFromContainer(JsonbContain
*** 272,295 ****
{
JEntry *children = container->children;
int count = (container->header & JB_CMASK);
! JsonbValue *result = palloc(sizeof(JsonbValue));
Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
if (flags & JB_FARRAY & container->header)
{
char *base_addr = (char *) (children + count);
int i;
for (i = 0; i < count; i++)
{
! fillJsonbValue(children, i, base_addr, result);
if (key->type == result->type)
{
if (equalsJsonbScalarValue(key, result))
return result;
}
}
}
else if (flags & JB_FOBJECT & container->header)
--- 325,357 ----
{
JEntry *children = container->children;
int count = (container->header & JB_CMASK);
! JsonbValue *result;
Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
+ /* Quick out without a palloc cycle if object/array is empty */
+ if (count <= 0)
+ return NULL;
+
+ result = palloc(sizeof(JsonbValue));
+
if (flags & JB_FARRAY & container->header)
{
char *base_addr = (char *) (children + count);
+ uint32 offset = 0;
int i;
for (i = 0; i < count; i++)
{
! fillJsonbValue(container, i, base_addr, offset, result);
if (key->type == result->type)
{
if (equalsJsonbScalarValue(key, result))
return result;
}
+
+ JBE_ADVANCE_OFFSET(offset, children[i]);
}
}
else if (flags & JB_FOBJECT & container->header)
*************** findJsonbValueFromContainer(JsonbContain
*** 297,332 ****
/* Since this is an object, account for *Pairs* of Jentrys */
char *base_addr = (char *) (children + count * 2);
uint32 stopLow = 0,
! stopMiddle;
! /* Object key past by caller must be a string */
Assert(key->type == jbvString);
/* Binary search on object/pair keys *only* */
! while (stopLow < count)
{
! int index;
int difference;
JsonbValue candidate;
! /*
! * Note how we compensate for the fact that we're iterating
! * through pairs (not entries) throughout.
! */
! stopMiddle = stopLow + (count - stopLow) / 2;
!
! index = stopMiddle * 2;
candidate.type = jbvString;
! candidate.val.string.val = base_addr + JBE_OFF(children, index);
! candidate.val.string.len = JBE_LEN(children, index);
difference = lengthCompareJsonbStringValue(&candidate, key);
if (difference == 0)
{
! /* Found our key, return value */
! fillJsonbValue(children, index + 1, base_addr, result);
return result;
}
--- 359,393 ----
/* Since this is an object, account for *Pairs* of Jentrys */
char *base_addr = (char *) (children + count * 2);
uint32 stopLow = 0,
! stopHigh = count;
! /* Object key passed by caller must be a string */
Assert(key->type == jbvString);
/* Binary search on object/pair keys *only* */
! while (stopLow < stopHigh)
{
! uint32 stopMiddle;
int difference;
JsonbValue candidate;
! stopMiddle = stopLow + (stopHigh - stopLow) / 2;
candidate.type = jbvString;
! candidate.val.string.val =
! base_addr + getJsonbOffset(container, stopMiddle);
! candidate.val.string.len = getJsonbLength(container, stopMiddle);
difference = lengthCompareJsonbStringValue(&candidate, key);
if (difference == 0)
{
! /* Found our key, return corresponding value */
! int index = stopMiddle + count;
!
! fillJsonbValue(container, index, base_addr,
! getJsonbOffset(container, index),
! result);
return result;
}
*************** findJsonbValueFromContainer(JsonbContain
*** 335,341 ****
if (difference < 0)
stopLow = stopMiddle + 1;
else
! count = stopMiddle;
}
}
}
--- 396,402 ----
if (difference < 0)
stopLow = stopMiddle + 1;
else
! stopHigh = stopMiddle;
}
}
}
*************** getIthJsonbValueFromContainer(JsonbConta
*** 368,374 ****
result = palloc(sizeof(JsonbValue));
! fillJsonbValue(container->children, i, base_addr, result);
return result;
}
--- 429,437 ----
result = palloc(sizeof(JsonbValue));
! fillJsonbValue(container, i, base_addr,
! getJsonbOffset(container, i),
! result);
return result;
}
*************** getIthJsonbValueFromContainer(JsonbConta
*** 377,389 ****
* A helper function to fill in a JsonbValue to represent an element of an
* array, or a key or value of an object.
*
* A nested array or object will be returned as jbvBinary, ie. it won't be
* expanded.
*/
static void
! fillJsonbValue(JEntry *children, int index, char *base_addr, JsonbValue *result)
{
! JEntry entry = children[index];
if (JBE_ISNULL(entry))
{
--- 440,459 ----
* A helper function to fill in a JsonbValue to represent an element of an
* array, or a key or value of an object.
*
+ * The node's JEntry is at container->children[index], and its variable-length
+ * data is at base_addr + offset. We make the caller determine the offset
+ * since in many cases the caller can amortize that work across multiple
+ * children. When it can't, it can just call getJsonbOffset().
+ *
* A nested array or object will be returned as jbvBinary, ie. it won't be
* expanded.
*/
static void
! fillJsonbValue(JsonbContainer *container, int index,
! char *base_addr, uint32 offset,
! JsonbValue *result)
{
! JEntry entry = container->children[index];
if (JBE_ISNULL(entry))
{
*************** fillJsonbValue(JEntry *children, int ind
*** 392,405 ****
else if (JBE_ISSTRING(entry))
{
result->type = jbvString;
! result->val.string.val = base_addr + JBE_OFF(children, index);
! result->val.string.len = JBE_LEN(children, index);
Assert(result->val.string.len >= 0);
}
else if (JBE_ISNUMERIC(entry))
{
result->type = jbvNumeric;
! result->val.numeric = (Numeric) (base_addr + INTALIGN(JBE_OFF(children, index)));
}
else if (JBE_ISBOOL_TRUE(entry))
{
--- 462,475 ----
else if (JBE_ISSTRING(entry))
{
result->type = jbvString;
! result->val.string.val = base_addr + offset;
! result->val.string.len = getJsonbLength(container, index);
Assert(result->val.string.len >= 0);
}
else if (JBE_ISNUMERIC(entry))
{
result->type = jbvNumeric;
! result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
}
else if (JBE_ISBOOL_TRUE(entry))
{
*************** fillJsonbValue(JEntry *children, int ind
*** 415,422 ****
{
Assert(JBE_ISCONTAINER(entry));
result->type = jbvBinary;
! result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(JBE_OFF(children, index)));
! result->val.binary.len = JBE_LEN(children, index) - (INTALIGN(JBE_OFF(children, index)) - JBE_OFF(children,
index));
}
}
--- 485,494 ----
{
Assert(JBE_ISCONTAINER(entry));
result->type = jbvBinary;
! /* Remove alignment padding from data pointer and length */
! result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
! result->val.binary.len = getJsonbLength(container, index) -
! (INTALIGN(offset) - offset);
}
}
*************** recurse:
*** 668,680 ****
* a full conversion
*/
val->val.array.rawScalar = (*it)->isScalar;
! (*it)->i = 0;
/* Set state for next call */
(*it)->state = JBI_ARRAY_ELEM;
return WJB_BEGIN_ARRAY;
case JBI_ARRAY_ELEM:
! if ((*it)->i >= (*it)->nElems)
{
/*
* All elements within array already processed. Report this
--- 740,754 ----
* a full conversion
*/
val->val.array.rawScalar = (*it)->isScalar;
! (*it)->curIndex = 0;
! (*it)->curDataOffset = 0;
! (*it)->curValueOffset = 0; /* not actually used */
/* Set state for next call */
(*it)->state = JBI_ARRAY_ELEM;
return WJB_BEGIN_ARRAY;
case JBI_ARRAY_ELEM:
! if ((*it)->curIndex >= (*it)->nElems)
{
/*
* All elements within array already processed. Report this
*************** recurse:
*** 686,692 ****
return WJB_END_ARRAY;
}
! fillJsonbValue((*it)->children, (*it)->i++, (*it)->dataProper, val);
if (!IsAJsonbScalar(val) && !skipNested)
{
--- 760,772 ----
return WJB_END_ARRAY;
}
! fillJsonbValue((*it)->container, (*it)->curIndex,
! (*it)->dataProper, (*it)->curDataOffset,
! val);
!
! JBE_ADVANCE_OFFSET((*it)->curDataOffset,
! (*it)->children[(*it)->curIndex]);
! (*it)->curIndex++;
if (!IsAJsonbScalar(val) && !skipNested)
{
*************** recurse:
*** 697,704 ****
else
{
/*
! * Scalar item in array, or a container and caller didn't
! * want us to recurse into it.
*/
return WJB_ELEM;
}
--- 777,784 ----
else
{
/*
! * Scalar item in array, or a container and caller didn't want
! * us to recurse into it.
*/
return WJB_ELEM;
}
*************** recurse:
*** 712,724 ****
* v->val.object.pairs is not actually set, because we aren't
* doing a full conversion
*/
! (*it)->i = 0;
/* Set state for next call */
(*it)->state = JBI_OBJECT_KEY;
return WJB_BEGIN_OBJECT;
case JBI_OBJECT_KEY:
! if ((*it)->i >= (*it)->nElems)
{
/*
* All pairs within object already processed. Report this to
--- 792,807 ----
* v->val.object.pairs is not actually set, because we aren't
* doing a full conversion
*/
! (*it)->curIndex = 0;
! (*it)->curDataOffset = 0;
! (*it)->curValueOffset = getJsonbOffset((*it)->container,
! (*it)->nElems);
/* Set state for next call */
(*it)->state = JBI_OBJECT_KEY;
return WJB_BEGIN_OBJECT;
case JBI_OBJECT_KEY:
! if ((*it)->curIndex >= (*it)->nElems)
{
/*
* All pairs within object already processed. Report this to
*************** recurse:
*** 732,738 ****
else
{
/* Return key of a key/value pair. */
! fillJsonbValue((*it)->children, (*it)->i * 2, (*it)->dataProper, val);
if (val->type != jbvString)
elog(ERROR, "unexpected jsonb type as object key");
--- 815,823 ----
else
{
/* Return key of a key/value pair. */
! fillJsonbValue((*it)->container, (*it)->curIndex,
! (*it)->dataProper, (*it)->curDataOffset,
! val);
if (val->type != jbvString)
elog(ERROR, "unexpected jsonb type as object key");
*************** recurse:
*** 745,752 ****
/* Set state for next call */
(*it)->state = JBI_OBJECT_KEY;
! fillJsonbValue((*it)->children, ((*it)->i++) * 2 + 1,
! (*it)->dataProper, val);
/*
* Value may be a container, in which case we recurse with new,
--- 830,844 ----
/* Set state for next call */
(*it)->state = JBI_OBJECT_KEY;
! fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
! (*it)->dataProper, (*it)->curValueOffset,
! val);
!
! JBE_ADVANCE_OFFSET((*it)->curDataOffset,
! (*it)->children[(*it)->curIndex]);
! JBE_ADVANCE_OFFSET((*it)->curValueOffset,
! (*it)->children[(*it)->curIndex + (*it)->nElems]);
! (*it)->curIndex++;
/*
* Value may be a container, in which case we recurse with new,
*************** iteratorFromContainer(JsonbContainer *co
*** 795,805 ****
break;
case JB_FOBJECT:
-
- /*
- * Offset reflects that nElems indicates JsonbPairs in an object.
- * Each key and each value contain Jentry metadata just the same.
- */
it->dataProper =
(char *) it->children + it->nElems * sizeof(JEntry) * 2;
it->state = JBI_OBJECT_START;
--- 887,892 ----
*************** reserveFromBuffer(StringInfo buffer, int
*** 1209,1216 ****
buffer->len += len;
/*
! * Keep a trailing null in place, even though it's not useful for us;
! * it seems best to preserve the invariants of StringInfos.
*/
buffer->data[buffer->len] = '\0';
--- 1296,1303 ----
buffer->len += len;
/*
! * Keep a trailing null in place, even though it's not useful for us; it
! * seems best to preserve the invariants of StringInfos.
*/
buffer->data[buffer->len] = '\0';
*************** convertToJsonb(JsonbValue *val)
*** 1284,1291 ****
/*
* Note: the JEntry of the root is discarded. Therefore the root
! * JsonbContainer struct must contain enough information to tell what
! * kind of value it is.
*/
res = (Jsonb *) buffer.data;
--- 1371,1378 ----
/*
* Note: the JEntry of the root is discarded. Therefore the root
! * JsonbContainer struct must contain enough information to tell what kind
! * of value it is.
*/
res = (Jsonb *) buffer.data;
*************** convertToJsonb(JsonbValue *val)
*** 1298,1307 ****
/*
* Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
*
! * The JEntry header for this node is returned in *header. It is filled in
! * with the length of this value, but if it is stored in an array or an
! * object (which is always, except for the root node), it is the caller's
! * responsibility to adjust it with the offset within the container.
*
* If the value is an array or an object, this recurses. 'level' is only used
* for debugging purposes.
--- 1385,1394 ----
/*
* Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
*
! * The JEntry header for this node is returned in *header. It is filled in
! * with the length of this value and appropriate type bits. If we wish to
! * store an end offset rather than a length, it is the caller's responsibility
! * to adjust for that.
*
* If the value is an array or an object, this recurses. 'level' is only used
* for debugging purposes.
*************** convertJsonbValue(StringInfo buffer, JEn
*** 1315,1324 ****
return;
/*
! * A JsonbValue passed as val should never have a type of jbvBinary,
! * and neither should any of its sub-components. Those values will be
! * produced by convertJsonbArray and convertJsonbObject, the results of
! * which will not be passed back to this function as an argument.
*/
if (IsAJsonbScalar(val))
--- 1402,1411 ----
return;
/*
! * A JsonbValue passed as val should never have a type of jbvBinary, and
! * neither should any of its sub-components. Those values will be produced
! * by convertJsonbArray and convertJsonbObject, the results of which will
! * not be passed back to this function as an argument.
*/
if (IsAJsonbScalar(val))
*************** convertJsonbValue(StringInfo buffer, JEn
*** 1334,1457 ****
static void
convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
{
! int offset;
! int metaoffset;
int i;
int totallen;
uint32 header;
! /* Initialize pointer into conversion buffer at this level */
! offset = buffer->len;
padBufferToInt(buffer);
/*
! * Construct the header Jentry, stored in the beginning of the variable-
! * length payload.
*/
! header = val->val.array.nElems | JB_FARRAY;
if (val->val.array.rawScalar)
{
! Assert(val->val.array.nElems == 1);
Assert(level == 0);
header |= JB_FSCALAR;
}
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
! /* reserve space for the JEntries of the elements. */
! metaoffset = reserveFromBuffer(buffer, sizeof(JEntry) * val->val.array.nElems);
totallen = 0;
! for (i = 0; i < val->val.array.nElems; i++)
{
JsonbValue *elem = &val->val.array.elems[i];
int len;
JEntry meta;
convertJsonbValue(buffer, &meta, elem, level + 1);
! len = meta & JENTRY_POSMASK;
totallen += len;
! if (totallen > JENTRY_POSMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
! JENTRY_POSMASK)));
! if (i > 0)
! meta = (meta & ~JENTRY_POSMASK) | totallen;
! copyToBuffer(buffer, metaoffset, (char *) &meta, sizeof(JEntry));
! metaoffset += sizeof(JEntry);
}
! totallen = buffer->len - offset;
! /* Initialize the header of this node, in the container's JEntry array */
*pheader = JENTRY_ISCONTAINER | totallen;
}
static void
convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
{
! uint32 header;
! int offset;
! int metaoffset;
int i;
int totallen;
! /* Initialize pointer into conversion buffer at this level */
! offset = buffer->len;
padBufferToInt(buffer);
! /* Initialize header */
! header = val->val.object.nPairs | JB_FOBJECT;
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
! /* reserve space for the JEntries of the keys and values */
! metaoffset = reserveFromBuffer(buffer, sizeof(JEntry) * val->val.object.nPairs * 2);
totallen = 0;
! for (i = 0; i < val->val.object.nPairs; i++)
{
! JsonbPair *pair = &val->val.object.pairs[i];
! int len;
! JEntry meta;
! /* put key */
convertJsonbScalar(buffer, &meta, &pair->key);
! len = meta & JENTRY_POSMASK;
totallen += len;
! if (totallen > JENTRY_POSMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
! errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
! JENTRY_POSMASK)));
! if (i > 0)
! meta = (meta & ~JENTRY_POSMASK) | totallen;
! copyToBuffer(buffer, metaoffset, (char *) &meta, sizeof(JEntry));
! metaoffset += sizeof(JEntry);
! convertJsonbValue(buffer, &meta, &pair->value, level);
! len = meta & JENTRY_POSMASK;
totallen += len;
! if (totallen > JENTRY_POSMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
! errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
! JENTRY_POSMASK)));
! meta = (meta & ~JENTRY_POSMASK) | totallen;
! copyToBuffer(buffer, metaoffset, (char *) &meta, sizeof(JEntry));
! metaoffset += sizeof(JEntry);
}
! totallen = buffer->len - offset;
*pheader = JENTRY_ISCONTAINER | totallen;
}
--- 1421,1620 ----
static void
convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
{
! int base_offset;
! int jentry_offset;
int i;
int totallen;
uint32 header;
+ int nElems = val->val.array.nElems;
! /* Remember where in the buffer this array starts. */
! base_offset = buffer->len;
+ /* Align to 4-byte boundary (any padding counts as part of my data) */
padBufferToInt(buffer);
/*
! * Construct the header Jentry and store it in the beginning of the
! * variable-length payload.
*/
! header = nElems | JB_FARRAY;
if (val->val.array.rawScalar)
{
! Assert(nElems == 1);
Assert(level == 0);
header |= JB_FSCALAR;
}
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
!
! /* Reserve space for the JEntries of the elements. */
! jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
totallen = 0;
! for (i = 0; i < nElems; i++)
{
JsonbValue *elem = &val->val.array.elems[i];
int len;
JEntry meta;
+ /*
+ * Convert element, producing a JEntry and appending its
+ * variable-length data to buffer
+ */
convertJsonbValue(buffer, &meta, elem, level + 1);
!
! len = JBE_OFFLENFLD(meta);
totallen += len;
! /*
! * Bail out if total variable-length data exceeds what will fit in a
! * JEntry length field. We check this in each iteration, not just
! * once at the end, to forestall possible integer overflow.
! */
! if (totallen > JENTRY_OFFLENMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
! JENTRY_OFFLENMASK)));
! /*
! * Convert each JB_OFFSET_STRIDE'th length to an offset.
! */
! if ((i % JB_OFFSET_STRIDE) == 0)
! meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
!
! copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
! jentry_offset += sizeof(JEntry);
}
! /* Total data size is everything we've appended to buffer */
! totallen = buffer->len - base_offset;
! /* Check length again, since we didn't include the metadata above */
! if (totallen > JENTRY_OFFLENMASK)
! ereport(ERROR,
! (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
! errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
! JENTRY_OFFLENMASK)));
!
! /* Initialize the header of this node in the container's JEntry array */
*pheader = JENTRY_ISCONTAINER | totallen;
}
static void
convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
{
! int base_offset;
! int jentry_offset;
int i;
int totallen;
+ uint32 header;
+ int nPairs = val->val.object.nPairs;
! /* Remember where in the buffer this object starts. */
! base_offset = buffer->len;
+ /* Align to 4-byte boundary (any padding counts as part of my data) */
padBufferToInt(buffer);
! /*
! * Construct the header Jentry and store it in the beginning of the
! * variable-length payload.
! */
! header = nPairs | JB_FOBJECT;
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
! /* Reserve space for the JEntries of the keys and values. */
! jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
+ /*
+ * Iterate over the keys, then over the values, since that is the ordering
+ * we want in the on-disk representation.
+ */
totallen = 0;
! for (i = 0; i < nPairs; i++)
{
! JsonbPair *pair = &val->val.object.pairs[i];
! int len;
! JEntry meta;
! /*
! * Convert key, producing a JEntry and appending its variable-length
! * data to buffer
! */
convertJsonbScalar(buffer, &meta, &pair->key);
! len = JBE_OFFLENFLD(meta);
totallen += len;
! /*
! * Bail out if total variable-length data exceeds what will fit in a
! * JEntry length field. We check this in each iteration, not just
! * once at the end, to forestall possible integer overflow.
! */
! if (totallen > JENTRY_OFFLENMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
! errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
! JENTRY_OFFLENMASK)));
! /*
! * Convert each JB_OFFSET_STRIDE'th length to an offset.
! */
! if ((i % JB_OFFSET_STRIDE) == 0)
! meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
! copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
! jentry_offset += sizeof(JEntry);
! }
! for (i = 0; i < nPairs; i++)
! {
! JsonbPair *pair = &val->val.object.pairs[i];
! int len;
! JEntry meta;
!
! /*
! * Convert value, producing a JEntry and appending its variable-length
! * data to buffer
! */
! convertJsonbValue(buffer, &meta, &pair->value, level + 1);
!
! len = JBE_OFFLENFLD(meta);
totallen += len;
! /*
! * Bail out if total variable-length data exceeds what will fit in a
! * JEntry length field. We check this in each iteration, not just
! * once at the end, to forestall possible integer overflow.
! */
! if (totallen > JENTRY_OFFLENMASK)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
! errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
! JENTRY_OFFLENMASK)));
! /*
! * Convert each JB_OFFSET_STRIDE'th length to an offset.
! */
! if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
! meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
!
! copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
! jentry_offset += sizeof(JEntry);
}
! /* Total data size is everything we've appended to buffer */
! totallen = buffer->len - base_offset;
+ /* Check length again, since we didn't include the metadata above */
+ if (totallen > JENTRY_OFFLENMASK)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
+ JENTRY_OFFLENMASK)));
+
+ /* Initialize the header of this node in the container's JEntry array */
*pheader = JENTRY_ISCONTAINER | totallen;
}
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index 91e3e14..b89e4cb 100644
*** a/src/include/utils/jsonb.h
--- b/src/include/utils/jsonb.h
*************** typedef struct JsonbValue JsonbValue;
*** 83,91 ****
* buffer is accessed, but they can also be deep copied and passed around.
*
* Jsonb is a tree structure. Each node in the tree consists of a JEntry
! * header, and a variable-length content. The JEntry header indicates what
! * kind of a node it is, e.g. a string or an array, and the offset and length
! * of its variable-length portion within the container.
*
* The JEntry and the content of a node are not stored physically together.
* Instead, the container array or object has an array that holds the JEntrys
--- 83,91 ----
* buffer is accessed, but they can also be deep copied and passed around.
*
* Jsonb is a tree structure. Each node in the tree consists of a JEntry
! * header and a variable-length content (possibly of zero size). The JEntry
! * header indicates what kind of a node it is, e.g. a string or an array,
! * and provides the length of its variable-length portion.
*
* The JEntry and the content of a node are not stored physically together.
* Instead, the container array or object has an array that holds the JEntrys
*************** typedef struct JsonbValue JsonbValue;
*** 95,134 ****
* hold its JEntry. Hence, no JEntry header is stored for the root node. It
* is implicitly known that the root node must be an array or an object,
* so we can get away without the type indicator as long as we can distinguish
! * the two. For that purpose, both an array and an object begins with a uint32
* header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
* scalar value needs to be stored as a Jsonb value, what we actually store is
* an array with one element, with the flags in the array's header field set
* to JB_FSCALAR | JB_FARRAY.
*
- * To encode the length and offset of the variable-length portion of each
- * node in a compact way, the JEntry stores only the end offset within the
- * variable-length portion of the container node. For the first JEntry in the
- * container's JEntry array, that equals to the length of the node data. The
- * begin offset and length of the rest of the entries can be calculated using
- * the end offset of the previous JEntry in the array.
- *
* Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
* the variable-length portion of some node types is aligned to a 4-byte
* boundary, while others are not. When alignment is needed, the padding is
* in the beginning of the node that requires it. For example, if a numeric
* node is stored after a string node, so that the numeric node begins at
* offset 3, the variable-length portion of the numeric node will begin with
! * one padding byte.
*/
/*
! * Jentry format.
*
! * The least significant 28 bits store the end offset of the entry (see
! * JBE_ENDPOS, JBE_OFF, JBE_LEN macros below). The next three bits
! * are used to store the type of the entry. The most significant bit
! * is unused, and should be set to zero.
*/
typedef uint32 JEntry;
! #define JENTRY_POSMASK 0x0FFFFFFF
#define JENTRY_TYPEMASK 0x70000000
/* values stored in the type bits */
#define JENTRY_ISSTRING 0x00000000
--- 95,146 ----
* hold its JEntry. Hence, no JEntry header is stored for the root node. It
* is implicitly known that the root node must be an array or an object,
* so we can get away without the type indicator as long as we can distinguish
! * the two. For that purpose, both an array and an object begin with a uint32
* header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
* scalar value needs to be stored as a Jsonb value, what we actually store is
* an array with one element, with the flags in the array's header field set
* to JB_FSCALAR | JB_FARRAY.
*
* Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
* the variable-length portion of some node types is aligned to a 4-byte
* boundary, while others are not. When alignment is needed, the padding is
* in the beginning of the node that requires it. For example, if a numeric
* node is stored after a string node, so that the numeric node begins at
* offset 3, the variable-length portion of the numeric node will begin with
! * one padding byte so that the actual numeric data is 4-byte aligned.
*/
/*
! * JEntry format.
*
! * The least significant 28 bits store either the data length of the entry,
! * or its end+1 offset from the start of the variable-length portion of the
! * containing object. The next three bits store the type of the entry, and
! * the high-order bit tells whether the least significant bits store a length
! * or an offset.
! *
! * The reason for the offset-or-length complication is to compromise between
! * access speed and data compressibility. In the initial design each JEntry
! * always stored an offset, but this resulted in JEntry arrays with horrible
! * compressibility properties, so that TOAST compression of a JSONB did not
! * work well. Storing only lengths would greatly improve compressibility,
! * but it makes random access into large arrays expensive (O(N) not O(1)).
! * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
! * a length in the rest. This results in reasonably compressible data (as
! * long as the stride isn't too small). We may have to examine as many as
! * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
! * given item, but that's still O(1) no matter how large the container is.
! *
! * We could avoid eating a flag bit for this purpose if we were to store
! * the stride in the container header, or if we were willing to treat the
! * stride as an unchangeable constant. Neither of those options is very
! * attractive though.
*/
typedef uint32 JEntry;
! #define JENTRY_OFFLENMASK 0x0FFFFFFF
#define JENTRY_TYPEMASK 0x70000000
+ #define JENTRY_HAS_OFF 0x80000000
/* values stored in the type bits */
#define JENTRY_ISSTRING 0x00000000
*************** typedef uint32 JEntry;
*** 138,144 ****
#define JENTRY_ISNULL 0x40000000
#define JENTRY_ISCONTAINER 0x50000000 /* array or object */
! /* Note possible multiple evaluations */
#define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
#define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
#define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
--- 150,158 ----
#define JENTRY_ISNULL 0x40000000
#define JENTRY_ISCONTAINER 0x50000000 /* array or object */
! /* Access macros. Note possible multiple evaluations */
! #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
! #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
#define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
#define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
#define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
*************** typedef uint32 JEntry;
*** 147,166 ****
#define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
#define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
/*
! * Macros for getting the offset and length of an element. Note multiple
! * evaluations and access to prior array element.
*/
! #define JBE_ENDPOS(je_) ((je_) & JENTRY_POSMASK)
! #define JBE_OFF(ja, i) ((i) == 0 ? 0 : JBE_ENDPOS((ja)[i - 1]))
! #define JBE_LEN(ja, i) ((i) == 0 ? JBE_ENDPOS((ja)[i]) \
! : JBE_ENDPOS((ja)[i]) - JBE_ENDPOS((ja)[i - 1]))
/*
* A jsonb array or object node, within a Jsonb Datum.
*
! * An array has one child for each element. An object has two children for
! * each key/value pair.
*/
typedef struct JsonbContainer
{
--- 161,194 ----
#define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
#define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
+ /* Macro for advancing an offset variable to the next JEntry */
+ #define JBE_ADVANCE_OFFSET(offset, je) \
+ do { \
+ JEntry je_ = (je); \
+ if (JBE_HAS_OFF(je_)) \
+ (offset) = JBE_OFFLENFLD(je_); \
+ else \
+ (offset) += JBE_OFFLENFLD(je_); \
+ } while(0)
+
/*
! * We store an offset, not a length, every JB_OFFSET_STRIDE children.
! * Caution: this macro should only be referenced when creating a JSONB
! * value. When examining an existing value, pay attention to the HAS_OFF
! * bits instead. This allows changes in the offset-placement heuristic
! * without breaking on-disk compatibility.
*/
! #define JB_OFFSET_STRIDE 32
/*
* A jsonb array or object node, within a Jsonb Datum.
*
! * An array has one child for each element, stored in array order.
! *
! * An object has two children for each key/value pair. The keys all appear
! * first, in key sort order; then the values appear, in an order matching the
! * key order. This arrangement keeps the keys compact in memory, making a
! * search for a particular key more cache-friendly.
*/
typedef struct JsonbContainer
{
*************** typedef struct JsonbContainer
*** 172,179 ****
} JsonbContainer;
/* flags for the header-field in JsonbContainer */
! #define JB_CMASK 0x0FFFFFFF
! #define JB_FSCALAR 0x10000000
#define JB_FOBJECT 0x20000000
#define JB_FARRAY 0x40000000
--- 200,207 ----
} JsonbContainer;
/* flags for the header-field in JsonbContainer */
! #define JB_CMASK 0x0FFFFFFF /* mask for count field */
! #define JB_FSCALAR 0x10000000 /* flag bits */
#define JB_FOBJECT 0x20000000
#define JB_FARRAY 0x40000000
*************** struct JsonbValue
*** 248,265 ****
(jsonbval)->type <= jbvBool)
/*
! * Pair within an Object.
*
! * Pairs with duplicate keys are de-duplicated. We store the order for the
! * benefit of doing so in a well-defined way with respect to the original
! * observed order (which is "last observed wins"). This is only used briefly
! * when originally constructing a Jsonb.
*/
struct JsonbPair
{
JsonbValue key; /* Must be a jbvString */
JsonbValue value; /* May be of any type */
! uint32 order; /* preserves order of pairs with equal keys */
};
/* Conversion state used when parsing Jsonb from text, or for type coercion */
--- 276,295 ----
(jsonbval)->type <= jbvBool)
/*
! * Key/value pair within an Object.
*
! * This struct type is only used briefly while constructing a Jsonb; it is
! * *not* the on-disk representation.
! *
! * Pairs with duplicate keys are de-duplicated. We store the originally
! * observed pair ordering for the purpose of removing duplicates in a
! * well-defined way (which is "last observed wins").
*/
struct JsonbPair
{
JsonbValue key; /* Must be a jbvString */
JsonbValue value; /* May be of any type */
! uint32 order; /* Pair's index in original sequence */
};
/* Conversion state used when parsing Jsonb from text, or for type coercion */
*************** typedef struct JsonbIterator
*** 287,306 ****
{
/* Container being iterated */
JsonbContainer *container;
! uint32 nElems; /* Number of elements in children array (will be
! * nPairs for objects) */
bool isScalar; /* Pseudo-array scalar value? */
! JEntry *children;
! /* Current item in buffer (up to nElems, but must * 2 for objects) */
! int i;
/*
! * Data proper. This points just past end of children array.
! * We use the JBE_OFF() macro on the Jentrys to find offsets of each
! * child in this area.
*/
! char *dataProper;
/* Private state */
JsonbIterState state;
--- 317,341 ----
{
/* Container being iterated */
JsonbContainer *container;
! uint32 nElems; /* Number of elements in children array (will
! * be nPairs for objects) */
bool isScalar; /* Pseudo-array scalar value? */
! JEntry *children; /* JEntrys for child nodes */
! /* Data proper. This points to the beginning of the variable-length data */
! char *dataProper;
! /* Current item in buffer (up to nElems) */
! int curIndex;
!
! /* Data offset corresponding to current item */
! uint32 curDataOffset;
/*
! * If the container is an object, we want to return keys and values
! * alternately; so curDataOffset points to the current key, and
! * curValueOffset points to the current value.
*/
! uint32 curValueOffset;
/* Private state */
JsonbIterState state;
*************** extern Datum gin_consistent_jsonb_path(P
*** 344,349 ****
--- 379,386 ----
extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS);
/* Support functions */
+ extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
+ extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
uint32 flags,
В списке pgsql-hackers по дате отправления: