1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // Copyright (C) 2016 Intel Corporation. |
3 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
4 | |
5 | #ifndef QBINARYJSON_P_H |
6 | #define QBINARYJSON_P_H |
7 | |
8 | // |
9 | // W A R N I N G |
10 | // ------------- |
11 | // |
12 | // This file is not part of the Qt API. It exists purely as an |
13 | // implementation detail. This header file may change from version to |
14 | // version without notice, or even be removed. |
15 | // |
16 | // We mean it. |
17 | // |
18 | |
19 | #include "qbinaryjson.h" |
20 | |
21 | #include <private/qbinaryjsonvalue_p.h> |
22 | #include <private/qendian_p.h> |
23 | |
24 | #include <limits> |
25 | |
26 | QT_BEGIN_NAMESPACE |
27 | |
28 | /* |
29 | This defines a binary data structure for Json data. The data structure is optimised for fast reading |
30 | and minimum allocations. The whole data structure can be mmap'ed and used directly. |
31 | |
32 | In most cases the binary structure is not as space efficient as a utf8 encoded text representation, but |
33 | much faster to access. |
34 | |
35 | The size requirements are: |
36 | |
37 | String: |
38 | Latin1 data: 2 bytes header + string.length() |
39 | Full Unicode: 4 bytes header + 2*(string.length()) |
40 | |
41 | Values: 4 bytes + size of data (size can be 0 for some data) |
42 | bool: 0 bytes |
43 | double: 8 bytes (0 if integer with less than 27bits) |
44 | string: see above |
45 | array: size of array |
46 | object: size of object |
47 | Array: 12 bytes + 4*length + size of Value data |
48 | Object: 12 bytes + 8*length + size of Key Strings + size of Value data |
49 | |
50 | For an example such as |
51 | |
52 | { // object: 12 + 5*8 = 52 |
53 | "firstName": "John", // key 12, value 8 = 20 |
54 | "lastName" : "Smith", // key 12, value 8 = 20 |
55 | "age" : 25, // key 8, value 0 = 8 |
56 | "address" : // key 12, object below = 140 |
57 | { // object: 12 + 4*8 |
58 | "streetAddress": "21 2nd Street", // key 16, value 16 |
59 | "city" : "New York", // key 8, value 12 |
60 | "state" : "NY", // key 8, value 4 |
61 | "postalCode" : "10021" // key 12, value 8 |
62 | }, // object total: 128 |
63 | "phoneNumber": // key: 16, value array below = 172 |
64 | [ // array: 12 + 2*4 + values below: 156 |
65 | { // object 12 + 2*8 |
66 | "type" : "home", // key 8, value 8 |
67 | "number": "212 555-1234" // key 8, value 16 |
68 | }, // object total: 68 |
69 | { // object 12 + 2*8 |
70 | "type" : "fax", // key 8, value 8 |
71 | "number": "646 555-4567" // key 8, value 16 |
72 | } // object total: 68 |
73 | ] // array total: 156 |
74 | } // great total: 412 bytes |
75 | |
76 | The uncompressed text file used roughly 500 bytes, so in this case we end up using about |
77 | the same space as the text representation. |
78 | |
79 | Other measurements have shown a slightly bigger binary size than a compact text |
80 | representation where all possible whitespace was stripped out. |
81 | */ |
82 | |
83 | namespace QBinaryJsonPrivate { |
84 | |
85 | class Array; |
86 | class Object; |
87 | class Value; |
88 | class Entry; |
89 | |
90 | template<typename T> |
91 | using q_littleendian = QLEInteger<T>; |
92 | |
93 | using qle_short = q_littleendian<short>; |
94 | using qle_ushort = q_littleendian<unsigned short>; |
95 | using qle_int = q_littleendian<int>; |
96 | using qle_uint = q_littleendian<unsigned int>; |
97 | |
98 | template<typename... Accessors> |
99 | using qle_bitfield = QLEIntegerBitfieldUnion<uint, Accessors...>; |
100 | |
101 | template<int pos, int width> |
102 | using qle_bitfield_accessor |
103 | = QSpecialIntegerAccessor<QLittleEndianStorageType<uint>, pos, width>; |
104 | |
105 | template<int pos, int width> |
106 | using qle_signedbitfield_accessor |
107 | = QSpecialIntegerAccessor<QLittleEndianStorageType<uint>, pos, width, int>; |
108 | |
109 | using offset = qle_uint; |
110 | |
111 | // round the size up to the next 4 byte boundary |
112 | inline uint alignedSize(uint size) { return (size + 3) & ~3; } |
113 | |
114 | const int MaxLatin1Length = 0x7fff; |
115 | |
116 | static inline bool useCompressed(QStringView s) |
117 | { |
118 | if (s.size() > MaxLatin1Length) |
119 | return false; |
120 | return QtPrivate::isLatin1(s); |
121 | } |
122 | |
123 | static inline bool useCompressed(QLatin1String s) |
124 | { |
125 | return s.size() <= MaxLatin1Length; |
126 | } |
127 | |
128 | static inline uint qStringSize(const QString &string, bool compress) |
129 | { |
130 | uint l = 2 + string.size(); |
131 | if (!compress) |
132 | l *= 2; |
133 | return alignedSize(size: l); |
134 | } |
135 | |
136 | // returns INT_MAX if it can't compress it into 28 bits |
137 | static inline int compressedNumber(double d) |
138 | { |
139 | // this relies on details of how ieee floats are represented |
140 | const int exponent_off = 52; |
141 | const quint64 fraction_mask = 0x000fffffffffffffULL; |
142 | const quint64 exponent_mask = 0x7ff0000000000000ULL; |
143 | |
144 | quint64 val; |
145 | memcpy (dest: &val, src: &d, n: sizeof(double)); |
146 | int exp = (int)((val & exponent_mask) >> exponent_off) - 1023; |
147 | if (exp < 0 || exp > 25) |
148 | return std::numeric_limits<int>::max(); |
149 | |
150 | quint64 non_int = val & (fraction_mask >> exp); |
151 | if (non_int) |
152 | return std::numeric_limits<int>::max(); |
153 | |
154 | bool neg = (val >> 63) != 0; |
155 | val &= fraction_mask; |
156 | val |= ((quint64)1 << 52); |
157 | int res = (int)(val >> (52 - exp)); |
158 | return neg ? -res : res; |
159 | } |
160 | |
161 | class Latin1String; |
162 | |
163 | class String |
164 | { |
165 | public: |
166 | explicit String(const char *data) : d(reinterpret_cast<const Data *>(data)) {} |
167 | |
168 | struct Data { |
169 | qle_uint length; |
170 | qle_ushort utf16[1]; |
171 | }; |
172 | const Data *d; |
173 | |
174 | uint byteSize() const { return sizeof(uint) + sizeof(ushort) * d->length; } |
175 | bool isValid(uint maxSize) const |
176 | { |
177 | // Check byteSize() <= maxSize, avoiding integer overflow |
178 | return maxSize >= sizeof(uint) |
179 | && uint(d->length) <= (maxSize - sizeof(uint)) / sizeof(ushort); |
180 | } |
181 | |
182 | static void copy(char *dest, QStringView str) |
183 | { |
184 | Data *data = reinterpret_cast<Data *>(dest); |
185 | data->length = str.size(); |
186 | qToLittleEndian<quint16>(source: str.utf16(), count: str.size(), dest: data->utf16); |
187 | fillTrailingZeros(data); |
188 | } |
189 | |
190 | static void fillTrailingZeros(Data *data) |
191 | { |
192 | if (data->length & 1) |
193 | data->utf16[data->length] = 0; |
194 | } |
195 | |
196 | bool operator ==(QStringView str) const |
197 | { |
198 | int slen = str.size(); |
199 | int l = d->length; |
200 | if (slen != l) |
201 | return false; |
202 | const auto *s = reinterpret_cast<const ushort *>(str.utf16()); |
203 | const qle_ushort *a = d->utf16; |
204 | const ushort *b = s; |
205 | while (l-- && *a == *b) |
206 | a++,b++; |
207 | return (l == -1); |
208 | } |
209 | |
210 | bool operator ==(const String &str) const |
211 | { |
212 | if (d->length != str.d->length) |
213 | return false; |
214 | return !memcmp(s1: d->utf16, s2: str.d->utf16, n: d->length * sizeof(ushort)); |
215 | } |
216 | |
217 | QString toString() const |
218 | { |
219 | #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
220 | return QString(reinterpret_cast<const QChar *>(d->utf16), d->length); |
221 | #else |
222 | const uint l = d->length; |
223 | QString str(l, Qt::Uninitialized); |
224 | QChar *ch = str.data(); |
225 | for (uint i = 0; i < l; ++i) |
226 | ch[i] = QChar(d->utf16[i]); |
227 | return str; |
228 | #endif |
229 | } |
230 | }; |
231 | |
232 | class Latin1String |
233 | { |
234 | public: |
235 | explicit Latin1String(const char *data) : d(reinterpret_cast<const Data *>(data)) {} |
236 | |
237 | struct Data { |
238 | qle_ushort length; |
239 | char latin1[1]; |
240 | }; |
241 | const Data *d; |
242 | |
243 | uint byteSize() const { return sizeof(ushort) + sizeof(char) * (d->length); } |
244 | bool isValid(uint maxSize) const { return byteSize() <= maxSize; } |
245 | |
246 | static void copy(char *dest, QStringView src) |
247 | { |
248 | Data *data = reinterpret_cast<Data *>(dest); |
249 | data->length = src.size(); // ### narrows from int to ushort |
250 | auto *l = data->latin1; |
251 | const auto srcData = src.utf16(); |
252 | for (qsizetype i = 0; i < src.size(); ++i) |
253 | l[i] = char(srcData[i]); |
254 | |
255 | for (uint len = data->length; quintptr(l + len) & 0x3; ++len) |
256 | l[len] = 0; |
257 | } |
258 | |
259 | QLatin1String toQLatin1String() const noexcept { return QLatin1String(d->latin1, d->length); } |
260 | QString toString() const { return QString::fromLatin1(str: d->latin1, size: d->length); } |
261 | }; |
262 | |
263 | static inline void copyString(char *dest, QStringView str, bool compress) |
264 | { |
265 | if (compress) |
266 | Latin1String::copy(dest, src: str); |
267 | else |
268 | String::copy(dest, str); |
269 | } |
270 | |
271 | /* |
272 | Base is the base class for both Object and Array. Both classes work more or less the same way. |
273 | The class starts with a header (defined by the struct below), then followed by data (the data for |
274 | values in the Array case and Entry's (see below) for objects. |
275 | |
276 | After the data a table follows (tableOffset points to it) containing Value objects for Arrays, and |
277 | offsets from the beginning of the object to Entry's in the case of Object. |
278 | |
279 | Entry's in the Object's table are lexicographically sorted by key in the table(). This allows the usage |
280 | of a binary search over the keys in an Object. |
281 | */ |
282 | class Base |
283 | { |
284 | private: |
285 | using IsObjectAccessor = qle_bitfield_accessor<0, 1>; |
286 | using LengthAccessor = qle_bitfield_accessor<1, 31>; |
287 | public: |
288 | qle_uint size; |
289 | qle_bitfield<IsObjectAccessor, LengthAccessor> isObjectAndLength; |
290 | offset tableOffset; |
291 | // content follows here |
292 | |
293 | void setIsObject() { isObjectAndLength.set<IsObjectAccessor>(1); } |
294 | bool isObject() const { return !!isObjectAndLength.get<IsObjectAccessor>(); } |
295 | |
296 | void setIsArray() { isObjectAndLength.set<IsObjectAccessor>(0); } |
297 | bool isArray() const { return !isObject(); } |
298 | |
299 | void setLength(uint length) { isObjectAndLength.set<LengthAccessor>(length); } |
300 | uint length() const { return isObjectAndLength.get<LengthAccessor>(); } |
301 | |
302 | offset *table() |
303 | { |
304 | return reinterpret_cast<offset *>(reinterpret_cast<char *>(this) + tableOffset); |
305 | } |
306 | |
307 | const offset *table() const |
308 | { |
309 | return reinterpret_cast<const offset *>(reinterpret_cast<const char *>(this) + tableOffset); |
310 | } |
311 | |
312 | uint reserveSpace(uint dataSize, uint posInTable, uint numItems, bool replace); |
313 | }; |
314 | |
315 | class Object : public Base |
316 | { |
317 | public: |
318 | const Entry *entryAt(uint i) const |
319 | { |
320 | return reinterpret_cast<const Entry *>(reinterpret_cast<const char *>(this) + table()[i]); |
321 | } |
322 | |
323 | Entry *entryAt(uint i) |
324 | { |
325 | return reinterpret_cast<Entry *>(reinterpret_cast<char *>(this) + table()[i]); |
326 | } |
327 | |
328 | uint indexOf(QStringView key, bool *exists) const; |
329 | QJsonObject toJsonObject() const; |
330 | bool isValid(uint maxSize) const; |
331 | }; |
332 | |
333 | class Array : public Base |
334 | { |
335 | public: |
336 | const Value *at(uint i) const { return reinterpret_cast<const Value *>(table() + i); } |
337 | Value *at(uint i) { return reinterpret_cast<Value *>(table() + i); } |
338 | |
339 | QJsonArray toJsonArray() const; |
340 | bool isValid(uint maxSize) const; |
341 | }; |
342 | |
343 | class Value |
344 | { |
345 | private: |
346 | using TypeAccessor = qle_bitfield_accessor<0, 3>; |
347 | using LatinOrIntValueAccessor = qle_bitfield_accessor<3, 1>; |
348 | using LatinKeyAccessor = qle_bitfield_accessor<4, 1>; |
349 | using ValueAccessor = qle_bitfield_accessor<5, 27>; |
350 | using IntValueAccessor = qle_signedbitfield_accessor<5, 27>; |
351 | |
352 | qle_bitfield< |
353 | TypeAccessor, |
354 | LatinOrIntValueAccessor, |
355 | LatinKeyAccessor, |
356 | ValueAccessor, |
357 | IntValueAccessor |
358 | > m_data; |
359 | |
360 | |
361 | int intValue() const { return m_data.get<IntValueAccessor>(); } |
362 | |
363 | public: |
364 | enum { |
365 | MaxSize = (1 << 27) - 1 |
366 | }; |
367 | |
368 | inline const char *data(const Base *b) const |
369 | { |
370 | return reinterpret_cast<const char *>(b) + value(); |
371 | } |
372 | |
373 | uint usedStorage(const Base *b) const; |
374 | |
375 | bool toBoolean() const |
376 | { |
377 | Q_ASSERT(type() == QJsonValue::Bool); |
378 | return value() != 0; |
379 | } |
380 | |
381 | double toDouble(const Base *b) const |
382 | { |
383 | Q_ASSERT(type() == QJsonValue::Double); |
384 | if (isLatinOrIntValue()) |
385 | return intValue(); |
386 | |
387 | auto i = qFromLittleEndian<quint64>(src: reinterpret_cast<const uchar *>(b) + value()); |
388 | double d; |
389 | memcpy(dest: &d, src: &i, n: sizeof(double)); |
390 | return d; |
391 | } |
392 | |
393 | QString toString(const Base *b) const |
394 | { |
395 | return isLatinOrIntValue() |
396 | ? asLatin1String(b).toString() |
397 | : asString(b).toString(); |
398 | } |
399 | |
400 | String asString(const Base *b) const |
401 | { |
402 | Q_ASSERT(type() == QJsonValue::String && !isLatinOrIntValue()); |
403 | return String(data(b)); |
404 | } |
405 | |
406 | Latin1String asLatin1String(const Base *b) const |
407 | { |
408 | Q_ASSERT(type() == QJsonValue::String && isLatinOrIntValue()); |
409 | return Latin1String(data(b)); |
410 | } |
411 | |
412 | const Base *base(const Base *b) const |
413 | { |
414 | Q_ASSERT(type() == QJsonValue::Array || type() == QJsonValue::Object); |
415 | return reinterpret_cast<const Base *>(data(b)); |
416 | } |
417 | |
418 | QJsonValue toJsonValue(const Base *b) const; |
419 | bool isValid(const Base *b) const; |
420 | |
421 | static uint requiredStorage(const QBinaryJsonValue &v, bool *compressed); |
422 | static uint valueToStore(const QBinaryJsonValue &v, uint offset); |
423 | static void copyData(const QBinaryJsonValue &v, char *dest, bool compressed); |
424 | |
425 | void setIsLatinKey(bool isLatinKey) { m_data.set<LatinKeyAccessor>(isLatinKey); } |
426 | bool isLatinKey() const { return m_data.get<LatinKeyAccessor>(); } |
427 | |
428 | void setIsLatinOrIntValue(bool v) { m_data.set<LatinOrIntValueAccessor>(v); } |
429 | bool isLatinOrIntValue() const { return m_data.get<LatinOrIntValueAccessor>(); } |
430 | |
431 | void setType(uint type) { m_data.set<TypeAccessor>(type); } |
432 | uint type() const { return m_data.get<TypeAccessor>(); } |
433 | |
434 | void setValue(uint value) { m_data.set<ValueAccessor>(value); } |
435 | uint value() const { return m_data.get<ValueAccessor>(); } |
436 | }; |
437 | |
438 | class Entry { |
439 | public: |
440 | Value value; |
441 | // key |
442 | // value data follows key |
443 | |
444 | uint size() const |
445 | { |
446 | uint s = sizeof(Entry); |
447 | if (value.isLatinKey()) |
448 | s += shallowLatin1Key().byteSize(); |
449 | else |
450 | s += shallowKey().byteSize(); |
451 | return alignedSize(size: s); |
452 | } |
453 | |
454 | uint usedStorage(Base *b) const |
455 | { |
456 | return size() + value.usedStorage(b); |
457 | } |
458 | |
459 | String shallowKey() const |
460 | { |
461 | Q_ASSERT(!value.isLatinKey()); |
462 | return String(reinterpret_cast<const char *>(this) + sizeof(Entry)); |
463 | } |
464 | |
465 | Latin1String shallowLatin1Key() const |
466 | { |
467 | Q_ASSERT(value.isLatinKey()); |
468 | return Latin1String(reinterpret_cast<const char *>(this) + sizeof(Entry)); |
469 | } |
470 | |
471 | QString key() const |
472 | { |
473 | return value.isLatinKey() |
474 | ? shallowLatin1Key().toString() |
475 | : shallowKey().toString(); |
476 | } |
477 | |
478 | bool isValid(uint maxSize) const |
479 | { |
480 | if (maxSize < sizeof(Entry)) |
481 | return false; |
482 | maxSize -= sizeof(Entry); |
483 | return value.isLatinKey() |
484 | ? shallowLatin1Key().isValid(maxSize) |
485 | : shallowKey().isValid(maxSize); |
486 | } |
487 | |
488 | bool operator ==(QStringView key) const |
489 | { |
490 | return value.isLatinKey() |
491 | ? (shallowLatin1Key().toQLatin1String() == key) |
492 | : (shallowKey() == key); |
493 | } |
494 | |
495 | bool operator >=(QStringView key) const |
496 | { |
497 | return value.isLatinKey() |
498 | ? (shallowLatin1Key().toQLatin1String() >= key) |
499 | : (shallowKey().toString() >= key); |
500 | } |
501 | }; |
502 | |
503 | class { |
504 | public: |
505 | qle_uint ; // 'qbjs' |
506 | qle_uint ; // 1 |
507 | Base *() { return reinterpret_cast<Base *>(this + 1); } |
508 | const Base *() const { return reinterpret_cast<const Base *>(this + 1); } |
509 | }; |
510 | |
511 | class ConstData |
512 | { |
513 | Q_DISABLE_COPY_MOVE(ConstData) |
514 | public: |
515 | const uint alloc; |
516 | union { |
517 | const char *rawData; |
518 | const Header *; |
519 | }; |
520 | |
521 | ConstData(const char *raw, uint a) : alloc(a), rawData(raw) {} |
522 | bool isValid() const; |
523 | QJsonDocument toJsonDocument() const; |
524 | }; |
525 | |
526 | class MutableData |
527 | { |
528 | Q_DISABLE_COPY_MOVE(MutableData) |
529 | public: |
530 | QAtomicInt ref; |
531 | uint alloc; |
532 | union { |
533 | char *rawData; |
534 | Header *; |
535 | }; |
536 | uint compactionCounter : 31; |
537 | |
538 | MutableData(char *raw, uint a) |
539 | : alloc(a), rawData(raw), compactionCounter(0) |
540 | { |
541 | } |
542 | |
543 | MutableData(uint reserved, QJsonValue::Type valueType) |
544 | : rawData(nullptr), compactionCounter(0) |
545 | { |
546 | Q_ASSERT(valueType == QJsonValue::Array || valueType == QJsonValue::Object); |
547 | |
548 | alloc = sizeof(Header) + sizeof(Base) + reserved + sizeof(offset); |
549 | header = reinterpret_cast<Header *>(malloc(size: alloc)); |
550 | Q_CHECK_PTR(header); |
551 | header->tag = QJsonDocument::BinaryFormatTag; |
552 | header->version = 1; |
553 | Base *b = header->root(); |
554 | b->size = sizeof(Base); |
555 | if (valueType == QJsonValue::Object) |
556 | b->setIsObject(); |
557 | else |
558 | b->setIsArray(); |
559 | b->tableOffset = sizeof(Base); |
560 | b->setLength(0); |
561 | } |
562 | |
563 | ~MutableData() |
564 | { |
565 | free(ptr: rawData); |
566 | } |
567 | |
568 | MutableData *clone(const Base *b, uint reserve = 0) |
569 | { |
570 | uint size = sizeof(Header) + b->size; |
571 | if (b == header->root() && ref.loadRelaxed() == 1 && alloc >= size + reserve) |
572 | return this; |
573 | |
574 | if (reserve) { |
575 | if (reserve < 128) |
576 | reserve = 128; |
577 | size = qMax(a: size + reserve, b: qMin(a: size *2, b: uint(Value::MaxSize))); |
578 | if (size > Value::MaxSize) { |
579 | qWarning(msg: "QJson: Document too large to store in data structure" ); |
580 | return nullptr; |
581 | } |
582 | } |
583 | char *raw = reinterpret_cast<char *>(malloc(size: size)); |
584 | Q_CHECK_PTR(raw); |
585 | memcpy(dest: raw + sizeof(Header), src: b, n: b->size); |
586 | auto *h = reinterpret_cast<Header *>(raw); |
587 | h->tag = QJsonDocument::BinaryFormatTag; |
588 | h->version = 1; |
589 | auto *d = new MutableData(raw, size); |
590 | d->compactionCounter = (b == header->root()) ? compactionCounter : 0; |
591 | return d; |
592 | } |
593 | |
594 | char *takeRawData(uint *size) |
595 | { |
596 | *size = alloc; |
597 | char *result = rawData; |
598 | rawData = nullptr; |
599 | alloc = 0; |
600 | return result; |
601 | } |
602 | |
603 | void compact(); |
604 | }; |
605 | |
606 | } // namespace QBinaryJsonPrivate |
607 | |
608 | Q_DECLARE_TYPEINFO(QBinaryJsonPrivate::Value, Q_PRIMITIVE_TYPE); |
609 | |
610 | QT_END_NAMESPACE |
611 | |
612 | #endif // QBINARYJSON_P_H |
613 | |