1// Copyright (C) 2016 The Qt Company Ltd.
2// Copyright (C) 2016 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#ifndef QBINARYJSON_P_H
6#define QBINARYJSON_P_H
7
8//
9// W A R N I N G
10// -------------
11//
12// This file is not part of the Qt API. It exists purely as an
13// implementation detail. This header file may change from version to
14// version without notice, or even be removed.
15//
16// We mean it.
17//
18
19#include "qbinaryjson.h"
20
21#include <private/qbinaryjsonvalue_p.h>
22#include <private/qendian_p.h>
23
24#include <limits>
25
26QT_BEGIN_NAMESPACE
27
28/*
29 This defines a binary data structure for Json data. The data structure is optimised for fast reading
30 and minimum allocations. The whole data structure can be mmap'ed and used directly.
31
32 In most cases the binary structure is not as space efficient as a utf8 encoded text representation, but
33 much faster to access.
34
35 The size requirements are:
36
37 String:
38 Latin1 data: 2 bytes header + string.length()
39 Full Unicode: 4 bytes header + 2*(string.length())
40
41 Values: 4 bytes + size of data (size can be 0 for some data)
42 bool: 0 bytes
43 double: 8 bytes (0 if integer with less than 27bits)
44 string: see above
45 array: size of array
46 object: size of object
47 Array: 12 bytes + 4*length + size of Value data
48 Object: 12 bytes + 8*length + size of Key Strings + size of Value data
49
50 For an example such as
51
52 { // object: 12 + 5*8 = 52
53 "firstName": "John", // key 12, value 8 = 20
54 "lastName" : "Smith", // key 12, value 8 = 20
55 "age" : 25, // key 8, value 0 = 8
56 "address" : // key 12, object below = 140
57 { // object: 12 + 4*8
58 "streetAddress": "21 2nd Street", // key 16, value 16
59 "city" : "New York", // key 8, value 12
60 "state" : "NY", // key 8, value 4
61 "postalCode" : "10021" // key 12, value 8
62 }, // object total: 128
63 "phoneNumber": // key: 16, value array below = 172
64 [ // array: 12 + 2*4 + values below: 156
65 { // object 12 + 2*8
66 "type" : "home", // key 8, value 8
67 "number": "212 555-1234" // key 8, value 16
68 }, // object total: 68
69 { // object 12 + 2*8
70 "type" : "fax", // key 8, value 8
71 "number": "646 555-4567" // key 8, value 16
72 } // object total: 68
73 ] // array total: 156
74 } // great total: 412 bytes
75
76 The uncompressed text file used roughly 500 bytes, so in this case we end up using about
77 the same space as the text representation.
78
79 Other measurements have shown a slightly bigger binary size than a compact text
80 representation where all possible whitespace was stripped out.
81*/
82
83namespace QBinaryJsonPrivate {
84
85class Array;
86class Object;
87class Value;
88class Entry;
89
90template<typename T>
91using q_littleendian = QLEInteger<T>;
92
93using qle_short = q_littleendian<short>;
94using qle_ushort = q_littleendian<unsigned short>;
95using qle_int = q_littleendian<int>;
96using qle_uint = q_littleendian<unsigned int>;
97
98template<typename... Accessors>
99using qle_bitfield = QLEIntegerBitfieldUnion<uint, Accessors...>;
100
101template<int pos, int width>
102using qle_bitfield_accessor
103 = QSpecialIntegerAccessor<QLittleEndianStorageType<uint>, pos, width>;
104
105template<int pos, int width>
106using qle_signedbitfield_accessor
107 = QSpecialIntegerAccessor<QLittleEndianStorageType<uint>, pos, width, int>;
108
109using offset = qle_uint;
110
111// round the size up to the next 4 byte boundary
112inline uint alignedSize(uint size) { return (size + 3) & ~3; }
113
114const int MaxLatin1Length = 0x7fff;
115
116static inline bool useCompressed(QStringView s)
117{
118 if (s.size() > MaxLatin1Length)
119 return false;
120 return QtPrivate::isLatin1(s);
121}
122
123static inline bool useCompressed(QLatin1String s)
124{
125 return s.size() <= MaxLatin1Length;
126}
127
128static inline uint qStringSize(const QString &string, bool compress)
129{
130 uint l = 2 + string.size();
131 if (!compress)
132 l *= 2;
133 return alignedSize(size: l);
134}
135
136// returns INT_MAX if it can't compress it into 28 bits
137static inline int compressedNumber(double d)
138{
139 // this relies on details of how ieee floats are represented
140 const int exponent_off = 52;
141 const quint64 fraction_mask = 0x000fffffffffffffULL;
142 const quint64 exponent_mask = 0x7ff0000000000000ULL;
143
144 quint64 val;
145 memcpy (dest: &val, src: &d, n: sizeof(double));
146 int exp = (int)((val & exponent_mask) >> exponent_off) - 1023;
147 if (exp < 0 || exp > 25)
148 return std::numeric_limits<int>::max();
149
150 quint64 non_int = val & (fraction_mask >> exp);
151 if (non_int)
152 return std::numeric_limits<int>::max();
153
154 bool neg = (val >> 63) != 0;
155 val &= fraction_mask;
156 val |= ((quint64)1 << 52);
157 int res = (int)(val >> (52 - exp));
158 return neg ? -res : res;
159}
160
161class Latin1String;
162
163class String
164{
165public:
166 explicit String(const char *data) : d(reinterpret_cast<const Data *>(data)) {}
167
168 struct Data {
169 qle_uint length;
170 qle_ushort utf16[1];
171 };
172 const Data *d;
173
174 uint byteSize() const { return sizeof(uint) + sizeof(ushort) * d->length; }
175 bool isValid(uint maxSize) const
176 {
177 // Check byteSize() <= maxSize, avoiding integer overflow
178 return maxSize >= sizeof(uint)
179 && uint(d->length) <= (maxSize - sizeof(uint)) / sizeof(ushort);
180 }
181
182 static void copy(char *dest, QStringView str)
183 {
184 Data *data = reinterpret_cast<Data *>(dest);
185 data->length = str.size();
186 qToLittleEndian<quint16>(source: str.utf16(), count: str.size(), dest: data->utf16);
187 fillTrailingZeros(data);
188 }
189
190 static void fillTrailingZeros(Data *data)
191 {
192 if (data->length & 1)
193 data->utf16[data->length] = 0;
194 }
195
196 bool operator ==(QStringView str) const
197 {
198 int slen = str.size();
199 int l = d->length;
200 if (slen != l)
201 return false;
202 const auto *s = reinterpret_cast<const ushort *>(str.utf16());
203 const qle_ushort *a = d->utf16;
204 const ushort *b = s;
205 while (l-- && *a == *b)
206 a++,b++;
207 return (l == -1);
208 }
209
210 bool operator ==(const String &str) const
211 {
212 if (d->length != str.d->length)
213 return false;
214 return !memcmp(s1: d->utf16, s2: str.d->utf16, n: d->length * sizeof(ushort));
215 }
216
217 QString toString() const
218 {
219#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
220 return QString(reinterpret_cast<const QChar *>(d->utf16), d->length);
221#else
222 const uint l = d->length;
223 QString str(l, Qt::Uninitialized);
224 QChar *ch = str.data();
225 for (uint i = 0; i < l; ++i)
226 ch[i] = QChar(d->utf16[i]);
227 return str;
228#endif
229 }
230};
231
232class Latin1String
233{
234public:
235 explicit Latin1String(const char *data) : d(reinterpret_cast<const Data *>(data)) {}
236
237 struct Data {
238 qle_ushort length;
239 char latin1[1];
240 };
241 const Data *d;
242
243 uint byteSize() const { return sizeof(ushort) + sizeof(char) * (d->length); }
244 bool isValid(uint maxSize) const { return byteSize() <= maxSize; }
245
246 static void copy(char *dest, QStringView src)
247 {
248 Data *data = reinterpret_cast<Data *>(dest);
249 data->length = src.size(); // ### narrows from int to ushort
250 auto *l = data->latin1;
251 const auto srcData = src.utf16();
252 for (qsizetype i = 0; i < src.size(); ++i)
253 l[i] = char(srcData[i]);
254
255 for (uint len = data->length; quintptr(l + len) & 0x3; ++len)
256 l[len] = 0;
257 }
258
259 QLatin1String toQLatin1String() const noexcept { return QLatin1String(d->latin1, d->length); }
260 QString toString() const { return QString::fromLatin1(str: d->latin1, size: d->length); }
261};
262
263static inline void copyString(char *dest, QStringView str, bool compress)
264{
265 if (compress)
266 Latin1String::copy(dest, src: str);
267 else
268 String::copy(dest, str);
269}
270
271/*
272 Base is the base class for both Object and Array. Both classes work more or less the same way.
273 The class starts with a header (defined by the struct below), then followed by data (the data for
274 values in the Array case and Entry's (see below) for objects.
275
276 After the data a table follows (tableOffset points to it) containing Value objects for Arrays, and
277 offsets from the beginning of the object to Entry's in the case of Object.
278
279 Entry's in the Object's table are lexicographically sorted by key in the table(). This allows the usage
280 of a binary search over the keys in an Object.
281 */
282class Base
283{
284private:
285 using IsObjectAccessor = qle_bitfield_accessor<0, 1>;
286 using LengthAccessor = qle_bitfield_accessor<1, 31>;
287public:
288 qle_uint size;
289 qle_bitfield<IsObjectAccessor, LengthAccessor> isObjectAndLength;
290 offset tableOffset;
291 // content follows here
292
293 void setIsObject() { isObjectAndLength.set<IsObjectAccessor>(1); }
294 bool isObject() const { return !!isObjectAndLength.get<IsObjectAccessor>(); }
295
296 void setIsArray() { isObjectAndLength.set<IsObjectAccessor>(0); }
297 bool isArray() const { return !isObject(); }
298
299 void setLength(uint length) { isObjectAndLength.set<LengthAccessor>(length); }
300 uint length() const { return isObjectAndLength.get<LengthAccessor>(); }
301
302 offset *table()
303 {
304 return reinterpret_cast<offset *>(reinterpret_cast<char *>(this) + tableOffset);
305 }
306
307 const offset *table() const
308 {
309 return reinterpret_cast<const offset *>(reinterpret_cast<const char *>(this) + tableOffset);
310 }
311
312 uint reserveSpace(uint dataSize, uint posInTable, uint numItems, bool replace);
313};
314
315class Object : public Base
316{
317public:
318 const Entry *entryAt(uint i) const
319 {
320 return reinterpret_cast<const Entry *>(reinterpret_cast<const char *>(this) + table()[i]);
321 }
322
323 Entry *entryAt(uint i)
324 {
325 return reinterpret_cast<Entry *>(reinterpret_cast<char *>(this) + table()[i]);
326 }
327
328 uint indexOf(QStringView key, bool *exists) const;
329 QJsonObject toJsonObject() const;
330 bool isValid(uint maxSize) const;
331};
332
333class Array : public Base
334{
335public:
336 const Value *at(uint i) const { return reinterpret_cast<const Value *>(table() + i); }
337 Value *at(uint i) { return reinterpret_cast<Value *>(table() + i); }
338
339 QJsonArray toJsonArray() const;
340 bool isValid(uint maxSize) const;
341};
342
343class Value
344{
345private:
346 using TypeAccessor = qle_bitfield_accessor<0, 3>;
347 using LatinOrIntValueAccessor = qle_bitfield_accessor<3, 1>;
348 using LatinKeyAccessor = qle_bitfield_accessor<4, 1>;
349 using ValueAccessor = qle_bitfield_accessor<5, 27>;
350 using IntValueAccessor = qle_signedbitfield_accessor<5, 27>;
351
352 qle_bitfield<
353 TypeAccessor,
354 LatinOrIntValueAccessor,
355 LatinKeyAccessor,
356 ValueAccessor,
357 IntValueAccessor
358 > m_data;
359
360
361 int intValue() const { return m_data.get<IntValueAccessor>(); }
362
363public:
364 enum {
365 MaxSize = (1 << 27) - 1
366 };
367
368 inline const char *data(const Base *b) const
369 {
370 return reinterpret_cast<const char *>(b) + value();
371 }
372
373 uint usedStorage(const Base *b) const;
374
375 bool toBoolean() const
376 {
377 Q_ASSERT(type() == QJsonValue::Bool);
378 return value() != 0;
379 }
380
381 double toDouble(const Base *b) const
382 {
383 Q_ASSERT(type() == QJsonValue::Double);
384 if (isLatinOrIntValue())
385 return intValue();
386
387 auto i = qFromLittleEndian<quint64>(src: reinterpret_cast<const uchar *>(b) + value());
388 double d;
389 memcpy(dest: &d, src: &i, n: sizeof(double));
390 return d;
391 }
392
393 QString toString(const Base *b) const
394 {
395 return isLatinOrIntValue()
396 ? asLatin1String(b).toString()
397 : asString(b).toString();
398 }
399
400 String asString(const Base *b) const
401 {
402 Q_ASSERT(type() == QJsonValue::String && !isLatinOrIntValue());
403 return String(data(b));
404 }
405
406 Latin1String asLatin1String(const Base *b) const
407 {
408 Q_ASSERT(type() == QJsonValue::String && isLatinOrIntValue());
409 return Latin1String(data(b));
410 }
411
412 const Base *base(const Base *b) const
413 {
414 Q_ASSERT(type() == QJsonValue::Array || type() == QJsonValue::Object);
415 return reinterpret_cast<const Base *>(data(b));
416 }
417
418 QJsonValue toJsonValue(const Base *b) const;
419 bool isValid(const Base *b) const;
420
421 static uint requiredStorage(const QBinaryJsonValue &v, bool *compressed);
422 static uint valueToStore(const QBinaryJsonValue &v, uint offset);
423 static void copyData(const QBinaryJsonValue &v, char *dest, bool compressed);
424
425 void setIsLatinKey(bool isLatinKey) { m_data.set<LatinKeyAccessor>(isLatinKey); }
426 bool isLatinKey() const { return m_data.get<LatinKeyAccessor>(); }
427
428 void setIsLatinOrIntValue(bool v) { m_data.set<LatinOrIntValueAccessor>(v); }
429 bool isLatinOrIntValue() const { return m_data.get<LatinOrIntValueAccessor>(); }
430
431 void setType(uint type) { m_data.set<TypeAccessor>(type); }
432 uint type() const { return m_data.get<TypeAccessor>(); }
433
434 void setValue(uint value) { m_data.set<ValueAccessor>(value); }
435 uint value() const { return m_data.get<ValueAccessor>(); }
436};
437
438class Entry {
439public:
440 Value value;
441 // key
442 // value data follows key
443
444 uint size() const
445 {
446 uint s = sizeof(Entry);
447 if (value.isLatinKey())
448 s += shallowLatin1Key().byteSize();
449 else
450 s += shallowKey().byteSize();
451 return alignedSize(size: s);
452 }
453
454 uint usedStorage(Base *b) const
455 {
456 return size() + value.usedStorage(b);
457 }
458
459 String shallowKey() const
460 {
461 Q_ASSERT(!value.isLatinKey());
462 return String(reinterpret_cast<const char *>(this) + sizeof(Entry));
463 }
464
465 Latin1String shallowLatin1Key() const
466 {
467 Q_ASSERT(value.isLatinKey());
468 return Latin1String(reinterpret_cast<const char *>(this) + sizeof(Entry));
469 }
470
471 QString key() const
472 {
473 return value.isLatinKey()
474 ? shallowLatin1Key().toString()
475 : shallowKey().toString();
476 }
477
478 bool isValid(uint maxSize) const
479 {
480 if (maxSize < sizeof(Entry))
481 return false;
482 maxSize -= sizeof(Entry);
483 return value.isLatinKey()
484 ? shallowLatin1Key().isValid(maxSize)
485 : shallowKey().isValid(maxSize);
486 }
487
488 bool operator ==(QStringView key) const
489 {
490 return value.isLatinKey()
491 ? (shallowLatin1Key().toQLatin1String() == key)
492 : (shallowKey() == key);
493 }
494
495 bool operator >=(QStringView key) const
496 {
497 return value.isLatinKey()
498 ? (shallowLatin1Key().toQLatin1String() >= key)
499 : (shallowKey().toString() >= key);
500 }
501};
502
503class Header {
504public:
505 qle_uint tag; // 'qbjs'
506 qle_uint version; // 1
507 Base *root() { return reinterpret_cast<Base *>(this + 1); }
508 const Base *root() const { return reinterpret_cast<const Base *>(this + 1); }
509};
510
511class ConstData
512{
513 Q_DISABLE_COPY_MOVE(ConstData)
514public:
515 const uint alloc;
516 union {
517 const char *rawData;
518 const Header *header;
519 };
520
521 ConstData(const char *raw, uint a) : alloc(a), rawData(raw) {}
522 bool isValid() const;
523 QJsonDocument toJsonDocument() const;
524};
525
526class MutableData
527{
528 Q_DISABLE_COPY_MOVE(MutableData)
529public:
530 QAtomicInt ref;
531 uint alloc;
532 union {
533 char *rawData;
534 Header *header;
535 };
536 uint compactionCounter : 31;
537
538 MutableData(char *raw, uint a)
539 : alloc(a), rawData(raw), compactionCounter(0)
540 {
541 }
542
543 MutableData(uint reserved, QJsonValue::Type valueType)
544 : rawData(nullptr), compactionCounter(0)
545 {
546 Q_ASSERT(valueType == QJsonValue::Array || valueType == QJsonValue::Object);
547
548 alloc = sizeof(Header) + sizeof(Base) + reserved + sizeof(offset);
549 header = reinterpret_cast<Header *>(malloc(size: alloc));
550 Q_CHECK_PTR(header);
551 header->tag = QJsonDocument::BinaryFormatTag;
552 header->version = 1;
553 Base *b = header->root();
554 b->size = sizeof(Base);
555 if (valueType == QJsonValue::Object)
556 b->setIsObject();
557 else
558 b->setIsArray();
559 b->tableOffset = sizeof(Base);
560 b->setLength(0);
561 }
562
563 ~MutableData()
564 {
565 free(ptr: rawData);
566 }
567
568 MutableData *clone(const Base *b, uint reserve = 0)
569 {
570 uint size = sizeof(Header) + b->size;
571 if (b == header->root() && ref.loadRelaxed() == 1 && alloc >= size + reserve)
572 return this;
573
574 if (reserve) {
575 if (reserve < 128)
576 reserve = 128;
577 size = qMax(a: size + reserve, b: qMin(a: size *2, b: uint(Value::MaxSize)));
578 if (size > Value::MaxSize) {
579 qWarning(msg: "QJson: Document too large to store in data structure");
580 return nullptr;
581 }
582 }
583 char *raw = reinterpret_cast<char *>(malloc(size: size));
584 Q_CHECK_PTR(raw);
585 memcpy(dest: raw + sizeof(Header), src: b, n: b->size);
586 auto *h = reinterpret_cast<Header *>(raw);
587 h->tag = QJsonDocument::BinaryFormatTag;
588 h->version = 1;
589 auto *d = new MutableData(raw, size);
590 d->compactionCounter = (b == header->root()) ? compactionCounter : 0;
591 return d;
592 }
593
594 char *takeRawData(uint *size)
595 {
596 *size = alloc;
597 char *result = rawData;
598 rawData = nullptr;
599 alloc = 0;
600 return result;
601 }
602
603 void compact();
604};
605
606} // namespace QBinaryJsonPrivate
607
608Q_DECLARE_TYPEINFO(QBinaryJsonPrivate::Value, Q_PRIMITIVE_TYPE);
609
610QT_END_NAMESPACE
611
612#endif // QBINARYJSON_P_H
613

source code of qt5compat/src/core5/serialization/qbinaryjson_p.h