1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Copyright (C) 2016 Intel Corporation. |
5 | ** Contact: https://www.qt.io/licensing/ |
6 | ** |
7 | ** This file is part of the QtCore module of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:LGPL$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU Lesser General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
22 | ** packaging of this file. Please review the following information to |
23 | ** ensure the GNU Lesser General Public License version 3 requirements |
24 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
25 | ** |
26 | ** GNU General Public License Usage |
27 | ** Alternatively, this file may be used under the terms of the GNU |
28 | ** General Public License version 2.0 or (at your option) the GNU General |
29 | ** Public license version 3 or any later version approved by the KDE Free |
30 | ** Qt Foundation. The licenses are as published by the Free Software |
31 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
32 | ** included in the packaging of this file. Please review the following |
33 | ** information to ensure the GNU General Public License requirements will |
34 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
35 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
36 | ** |
37 | ** $QT_END_LICENSE$ |
38 | ** |
39 | ****************************************************************************/ |
40 | |
41 | #ifndef QBINARYJSON_P_H |
42 | #define QBINARYJSON_P_H |
43 | |
44 | // |
45 | // W A R N I N G |
46 | // ------------- |
47 | // |
48 | // This file is not part of the Qt API. It exists purely as an |
49 | // implementation detail. This header file may change from version to |
50 | // version without notice, or even be removed. |
51 | // |
52 | // We mean it. |
53 | // |
54 | |
55 | #include <private/qbinaryjsonvalue_p.h> |
56 | #include <private/qendian_p.h> |
57 | |
58 | #include <qjsondocument.h> |
59 | |
60 | #include <limits> |
61 | |
62 | QT_REQUIRE_CONFIG(binaryjson); |
63 | |
64 | QT_BEGIN_NAMESPACE |
65 | |
66 | // in qstring.cpp |
67 | void qt_to_latin1_unchecked(uchar *dst, const ushort *uc, qsizetype len); |
68 | void qt_from_latin1(ushort *dst, const char *str, size_t size) noexcept; |
69 | |
70 | /* |
71 | This defines a binary data structure for Json data. The data structure is optimised for fast reading |
72 | and minimum allocations. The whole data structure can be mmap'ed and used directly. |
73 | |
74 | In most cases the binary structure is not as space efficient as a utf8 encoded text representation, but |
75 | much faster to access. |
76 | |
77 | The size requirements are: |
78 | |
79 | String: |
80 | Latin1 data: 2 bytes header + string.length() |
81 | Full Unicode: 4 bytes header + 2*(string.length()) |
82 | |
83 | Values: 4 bytes + size of data (size can be 0 for some data) |
84 | bool: 0 bytes |
85 | double: 8 bytes (0 if integer with less than 27bits) |
86 | string: see above |
87 | array: size of array |
88 | object: size of object |
89 | Array: 12 bytes + 4*length + size of Value data |
90 | Object: 12 bytes + 8*length + size of Key Strings + size of Value data |
91 | |
92 | For an example such as |
93 | |
94 | { // object: 12 + 5*8 = 52 |
95 | "firstName": "John", // key 12, value 8 = 20 |
96 | "lastName" : "Smith", // key 12, value 8 = 20 |
97 | "age" : 25, // key 8, value 0 = 8 |
98 | "address" : // key 12, object below = 140 |
99 | { // object: 12 + 4*8 |
100 | "streetAddress": "21 2nd Street", // key 16, value 16 |
101 | "city" : "New York", // key 8, value 12 |
102 | "state" : "NY", // key 8, value 4 |
103 | "postalCode" : "10021" // key 12, value 8 |
104 | }, // object total: 128 |
105 | "phoneNumber": // key: 16, value array below = 172 |
106 | [ // array: 12 + 2*4 + values below: 156 |
107 | { // object 12 + 2*8 |
108 | "type" : "home", // key 8, value 8 |
109 | "number": "212 555-1234" // key 8, value 16 |
110 | }, // object total: 68 |
111 | { // object 12 + 2*8 |
112 | "type" : "fax", // key 8, value 8 |
113 | "number": "646 555-4567" // key 8, value 16 |
114 | } // object total: 68 |
115 | ] // array total: 156 |
116 | } // great total: 412 bytes |
117 | |
118 | The uncompressed text file used roughly 500 bytes, so in this case we end up using about |
119 | the same space as the text representation. |
120 | |
121 | Other measurements have shown a slightly bigger binary size than a compact text |
122 | representation where all possible whitespace was stripped out. |
123 | */ |
124 | namespace QBinaryJsonPrivate { |
125 | |
126 | class Array; |
127 | class Object; |
128 | class Value; |
129 | class Entry; |
130 | |
131 | template<typename T> |
132 | using q_littleendian = QLEInteger<T>; |
133 | |
134 | using qle_short = q_littleendian<short>; |
135 | using qle_ushort = q_littleendian<unsigned short>; |
136 | using qle_int = q_littleendian<int>; |
137 | using qle_uint = q_littleendian<unsigned int>; |
138 | |
139 | template<int pos, int width> |
140 | using qle_bitfield = QLEIntegerBitfield<uint, pos, width>; |
141 | |
142 | template<int pos, int width> |
143 | using qle_signedbitfield = QLEIntegerBitfield<int, pos, width>; |
144 | |
145 | using offset = qle_uint; |
146 | |
147 | // round the size up to the next 4 byte boundary |
148 | inline uint alignedSize(uint size) { return (size + 3) & ~3; } |
149 | |
150 | const int MaxLatin1Length = 0x7fff; |
151 | |
152 | static inline bool useCompressed(QStringView s) |
153 | { |
154 | if (s.length() > MaxLatin1Length) |
155 | return false; |
156 | return QtPrivate::isLatin1(s); |
157 | } |
158 | |
159 | static inline bool useCompressed(QLatin1String s) |
160 | { |
161 | return s.size() <= MaxLatin1Length; |
162 | } |
163 | |
164 | static inline uint qStringSize(const QString &string, bool compress) |
165 | { |
166 | uint l = 2 + string.size(); |
167 | if (!compress) |
168 | l *= 2; |
169 | return alignedSize(size: l); |
170 | } |
171 | |
172 | // returns INT_MAX if it can't compress it into 28 bits |
173 | static inline int compressedNumber(double d) |
174 | { |
175 | // this relies on details of how ieee floats are represented |
176 | const int exponent_off = 52; |
177 | const quint64 fraction_mask = 0x000fffffffffffffULL; |
178 | const quint64 exponent_mask = 0x7ff0000000000000ULL; |
179 | |
180 | quint64 val; |
181 | memcpy (dest: &val, src: &d, n: sizeof(double)); |
182 | int exp = (int)((val & exponent_mask) >> exponent_off) - 1023; |
183 | if (exp < 0 || exp > 25) |
184 | return std::numeric_limits<int>::max(); |
185 | |
186 | quint64 non_int = val & (fraction_mask >> exp); |
187 | if (non_int) |
188 | return std::numeric_limits<int>::max(); |
189 | |
190 | bool neg = (val >> 63) != 0; |
191 | val &= fraction_mask; |
192 | val |= ((quint64)1 << 52); |
193 | int res = (int)(val >> (52 - exp)); |
194 | return neg ? -res : res; |
195 | } |
196 | |
197 | class Latin1String; |
198 | |
199 | class String |
200 | { |
201 | public: |
202 | explicit String(const char *data) : d(reinterpret_cast<const Data *>(data)) {} |
203 | |
204 | struct Data { |
205 | qle_uint length; |
206 | qle_ushort utf16[1]; |
207 | }; |
208 | const Data *d; |
209 | |
210 | uint byteSize() const { return sizeof(uint) + sizeof(ushort) * d->length; } |
211 | bool isValid(uint maxSize) const |
212 | { |
213 | // Check byteSize() <= maxSize, avoiding integer overflow |
214 | return maxSize >= sizeof(uint) |
215 | && uint(d->length) <= (maxSize - sizeof(uint)) / sizeof(ushort); |
216 | } |
217 | |
218 | static void copy(char *dest, QStringView str) |
219 | { |
220 | Data *data = reinterpret_cast<Data *>(dest); |
221 | data->length = str.length(); |
222 | qToLittleEndian<quint16>(source: str.utf16(), count: str.length(), dest: data->utf16); |
223 | fillTrailingZeros(data); |
224 | } |
225 | |
226 | static void fillTrailingZeros(Data *data) |
227 | { |
228 | if (data->length & 1) |
229 | data->utf16[data->length] = 0; |
230 | } |
231 | |
232 | bool operator ==(QStringView str) const |
233 | { |
234 | int slen = str.length(); |
235 | int l = d->length; |
236 | if (slen != l) |
237 | return false; |
238 | const auto *s = reinterpret_cast<const ushort *>(str.utf16()); |
239 | const qle_ushort *a = d->utf16; |
240 | const ushort *b = s; |
241 | while (l-- && *a == *b) |
242 | a++,b++; |
243 | return (l == -1); |
244 | } |
245 | |
246 | bool operator ==(const String &str) const |
247 | { |
248 | if (d->length != str.d->length) |
249 | return false; |
250 | return !memcmp(s1: d->utf16, s2: str.d->utf16, n: d->length * sizeof(ushort)); |
251 | } |
252 | |
253 | QString toString() const |
254 | { |
255 | #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
256 | return QString(reinterpret_cast<const QChar *>(d->utf16), d->length); |
257 | #else |
258 | const uint l = d->length; |
259 | QString str(l, Qt::Uninitialized); |
260 | QChar *ch = str.data(); |
261 | for (uint i = 0; i < l; ++i) |
262 | ch[i] = QChar(d->utf16[i]); |
263 | return str; |
264 | #endif |
265 | } |
266 | }; |
267 | |
268 | class Latin1String |
269 | { |
270 | public: |
271 | explicit Latin1String(const char *data) : d(reinterpret_cast<const Data *>(data)) {} |
272 | |
273 | struct Data { |
274 | qle_ushort length; |
275 | char latin1[1]; |
276 | }; |
277 | const Data *d; |
278 | |
279 | uint byteSize() const { return sizeof(ushort) + sizeof(char) * (d->length); } |
280 | bool isValid(uint maxSize) const { return byteSize() <= maxSize; } |
281 | |
282 | static void copy(char *dest, QStringView src) |
283 | { |
284 | Data *data = reinterpret_cast<Data *>(dest); |
285 | data->length = src.length(); |
286 | auto *l = reinterpret_cast<uchar *>(data->latin1); |
287 | const auto *uc = reinterpret_cast<const ushort *>(src.utf16()); |
288 | qt_to_latin1_unchecked(dst: l, uc, len: data->length); |
289 | |
290 | for (uint len = data->length; quintptr(l + len) & 0x3; ++len) |
291 | l[len] = 0; |
292 | } |
293 | |
294 | QLatin1String toQLatin1String() const noexcept { return QLatin1String(d->latin1, d->length); } |
295 | QString toString() const { return QString::fromLatin1(str: d->latin1, size: d->length); } |
296 | }; |
297 | |
298 | static inline void copyString(char *dest, QStringView str, bool compress) |
299 | { |
300 | if (compress) |
301 | Latin1String::copy(dest, src: str); |
302 | else |
303 | String::copy(dest, str); |
304 | } |
305 | |
306 | /* |
307 | Base is the base class for both Object and Array. Both classes work more or less the same way. |
308 | The class starts with a header (defined by the struct below), then followed by data (the data for |
309 | values in the Array case and Entry's (see below) for objects. |
310 | |
311 | After the data a table follows (tableOffset points to it) containing Value objects for Arrays, and |
312 | offsets from the beginning of the object to Entry's in the case of Object. |
313 | |
314 | Entry's in the Object's table are lexicographically sorted by key in the table(). This allows the usage |
315 | of a binary search over the keys in an Object. |
316 | */ |
317 | class Base |
318 | { |
319 | public: |
320 | qle_uint size; |
321 | union { |
322 | uint _dummy; |
323 | qle_bitfield<0, 1> is_object; |
324 | qle_bitfield<1, 31> length; |
325 | }; |
326 | offset tableOffset; |
327 | // content follows here |
328 | |
329 | bool isObject() const { return !!is_object; } |
330 | bool isArray() const { return !isObject(); } |
331 | |
332 | offset *table() |
333 | { |
334 | return reinterpret_cast<offset *>(reinterpret_cast<char *>(this) + tableOffset); |
335 | } |
336 | |
337 | const offset *table() const |
338 | { |
339 | return reinterpret_cast<const offset *>(reinterpret_cast<const char *>(this) + tableOffset); |
340 | } |
341 | |
342 | uint reserveSpace(uint dataSize, uint posInTable, uint numItems, bool replace); |
343 | }; |
344 | |
345 | class Object : public Base |
346 | { |
347 | public: |
348 | const Entry *entryAt(uint i) const |
349 | { |
350 | return reinterpret_cast<const Entry *>(reinterpret_cast<const char *>(this) + table()[i]); |
351 | } |
352 | |
353 | Entry *entryAt(uint i) |
354 | { |
355 | return reinterpret_cast<Entry *>(reinterpret_cast<char *>(this) + table()[i]); |
356 | } |
357 | |
358 | uint indexOf(QStringView key, bool *exists) const; |
359 | QJsonObject toJsonObject() const; |
360 | bool isValid(uint maxSize) const; |
361 | }; |
362 | |
363 | class Array : public Base |
364 | { |
365 | public: |
366 | const Value *at(uint i) const { return reinterpret_cast<const Value *>(table() + i); } |
367 | Value *at(uint i) { return reinterpret_cast<Value *>(table() + i); } |
368 | |
369 | QJsonArray toJsonArray() const; |
370 | bool isValid(uint maxSize) const; |
371 | }; |
372 | |
373 | class Value |
374 | { |
375 | public: |
376 | enum { |
377 | MaxSize = (1 << 27) - 1 |
378 | }; |
379 | union { |
380 | uint _dummy; |
381 | qle_bitfield<0, 3> type; |
382 | qle_bitfield<3, 1> latinOrIntValue; |
383 | qle_bitfield<4, 1> latinKey; |
384 | qle_bitfield<5, 27> value; |
385 | qle_signedbitfield<5, 27> int_value; |
386 | }; |
387 | |
388 | inline const char *data(const Base *b) const |
389 | { |
390 | return reinterpret_cast<const char *>(b) + value; |
391 | } |
392 | |
393 | uint usedStorage(const Base *b) const; |
394 | |
395 | bool toBoolean() const |
396 | { |
397 | Q_ASSERT(type == QJsonValue::Bool); |
398 | return value != 0; |
399 | } |
400 | |
401 | double toDouble(const Base *b) const |
402 | { |
403 | Q_ASSERT(type == QJsonValue::Double); |
404 | if (latinOrIntValue) |
405 | return int_value; |
406 | |
407 | auto i = qFromLittleEndian<quint64>(src: reinterpret_cast<const uchar *>(b) + value); |
408 | double d; |
409 | memcpy(dest: &d, src: &i, n: sizeof(double)); |
410 | return d; |
411 | } |
412 | |
413 | QString toString(const Base *b) const |
414 | { |
415 | return latinOrIntValue |
416 | ? asLatin1String(b).toString() |
417 | : asString(b).toString(); |
418 | } |
419 | |
420 | String asString(const Base *b) const |
421 | { |
422 | Q_ASSERT(type == QJsonValue::String && !latinOrIntValue); |
423 | return String(data(b)); |
424 | } |
425 | |
426 | Latin1String asLatin1String(const Base *b) const |
427 | { |
428 | Q_ASSERT(type == QJsonValue::String && latinOrIntValue); |
429 | return Latin1String(data(b)); |
430 | } |
431 | |
432 | const Base *base(const Base *b) const |
433 | { |
434 | Q_ASSERT(type == QJsonValue::Array || type == QJsonValue::Object); |
435 | return reinterpret_cast<const Base *>(data(b)); |
436 | } |
437 | |
438 | QJsonValue toJsonValue(const Base *b) const; |
439 | bool isValid(const Base *b) const; |
440 | |
441 | static uint requiredStorage(const QBinaryJsonValue &v, bool *compressed); |
442 | static uint valueToStore(const QBinaryJsonValue &v, uint offset); |
443 | static void copyData(const QBinaryJsonValue &v, char *dest, bool compressed); |
444 | }; |
445 | |
446 | class Entry { |
447 | public: |
448 | Value value; |
449 | // key |
450 | // value data follows key |
451 | |
452 | uint size() const |
453 | { |
454 | uint s = sizeof(Entry); |
455 | if (value.latinKey) |
456 | s += shallowLatin1Key().byteSize(); |
457 | else |
458 | s += shallowKey().byteSize(); |
459 | return alignedSize(size: s); |
460 | } |
461 | |
462 | uint usedStorage(Base *b) const |
463 | { |
464 | return size() + value.usedStorage(b); |
465 | } |
466 | |
467 | String shallowKey() const |
468 | { |
469 | Q_ASSERT(!value.latinKey); |
470 | return String(reinterpret_cast<const char *>(this) + sizeof(Entry)); |
471 | } |
472 | |
473 | Latin1String shallowLatin1Key() const |
474 | { |
475 | Q_ASSERT(value.latinKey); |
476 | return Latin1String(reinterpret_cast<const char *>(this) + sizeof(Entry)); |
477 | } |
478 | |
479 | QString key() const |
480 | { |
481 | return value.latinKey |
482 | ? shallowLatin1Key().toString() |
483 | : shallowKey().toString(); |
484 | } |
485 | |
486 | bool isValid(uint maxSize) const |
487 | { |
488 | if (maxSize < sizeof(Entry)) |
489 | return false; |
490 | maxSize -= sizeof(Entry); |
491 | return value.latinKey |
492 | ? shallowLatin1Key().isValid(maxSize) |
493 | : shallowKey().isValid(maxSize); |
494 | } |
495 | |
496 | bool operator ==(QStringView key) const |
497 | { |
498 | return value.latinKey |
499 | ? (shallowLatin1Key().toQLatin1String() == key) |
500 | : (shallowKey() == key); |
501 | } |
502 | |
503 | bool operator >=(QStringView key) const |
504 | { |
505 | return value.latinKey |
506 | ? (shallowLatin1Key().toQLatin1String() >= key) |
507 | : (shallowKey().toString() >= key); |
508 | } |
509 | }; |
510 | |
511 | class { |
512 | public: |
513 | qle_uint ; // 'qbjs' |
514 | qle_uint ; // 1 |
515 | Base *() { return reinterpret_cast<Base *>(this + 1); } |
516 | const Base *() const { return reinterpret_cast<const Base *>(this + 1); } |
517 | }; |
518 | |
519 | class ConstData |
520 | { |
521 | Q_DISABLE_COPY_MOVE(ConstData) |
522 | public: |
523 | const uint alloc; |
524 | union { |
525 | const char *rawData; |
526 | const Header *; |
527 | }; |
528 | |
529 | ConstData(const char *raw, uint a) : alloc(a), rawData(raw) {} |
530 | bool isValid() const; |
531 | QJsonDocument toJsonDocument() const; |
532 | }; |
533 | |
534 | class MutableData |
535 | { |
536 | Q_DISABLE_COPY_MOVE(MutableData) |
537 | public: |
538 | QAtomicInt ref; |
539 | uint alloc; |
540 | union { |
541 | char *rawData; |
542 | Header *; |
543 | }; |
544 | uint compactionCounter : 31; |
545 | |
546 | MutableData(char *raw, uint a) |
547 | : alloc(a), rawData(raw), compactionCounter(0) |
548 | { |
549 | } |
550 | |
551 | MutableData(uint reserved, QJsonValue::Type valueType) |
552 | : rawData(nullptr), compactionCounter(0) |
553 | { |
554 | Q_ASSERT(valueType == QJsonValue::Array || valueType == QJsonValue::Object); |
555 | |
556 | alloc = sizeof(Header) + sizeof(Base) + reserved + sizeof(offset); |
557 | header = reinterpret_cast<Header *>(malloc(size: alloc)); |
558 | Q_CHECK_PTR(header); |
559 | header->tag = QJsonDocument::BinaryFormatTag; |
560 | header->version = 1; |
561 | Base *b = header->root(); |
562 | b->size = sizeof(Base); |
563 | b->is_object = (valueType == QJsonValue::Object); |
564 | b->tableOffset = sizeof(Base); |
565 | b->length = 0; |
566 | } |
567 | |
568 | ~MutableData() |
569 | { |
570 | free(ptr: rawData); |
571 | } |
572 | |
573 | MutableData *clone(const Base *b, uint reserve = 0) |
574 | { |
575 | uint size = sizeof(Header) + b->size; |
576 | if (b == header->root() && ref.loadRelaxed() == 1 && alloc >= size + reserve) |
577 | return this; |
578 | |
579 | if (reserve) { |
580 | if (reserve < 128) |
581 | reserve = 128; |
582 | size = qMax(a: size + reserve, b: qMin(a: size *2, b: uint(Value::MaxSize))); |
583 | if (size > Value::MaxSize) { |
584 | qWarning(msg: "QJson: Document too large to store in data structure" ); |
585 | return nullptr; |
586 | } |
587 | } |
588 | char *raw = reinterpret_cast<char *>(malloc(size: size)); |
589 | Q_CHECK_PTR(raw); |
590 | memcpy(dest: raw + sizeof(Header), src: b, n: b->size); |
591 | auto *h = reinterpret_cast<Header *>(raw); |
592 | h->tag = QJsonDocument::BinaryFormatTag; |
593 | h->version = 1; |
594 | auto *d = new MutableData(raw, size); |
595 | d->compactionCounter = (b == header->root()) ? compactionCounter : 0; |
596 | return d; |
597 | } |
598 | |
599 | char *takeRawData(uint *size) |
600 | { |
601 | *size = alloc; |
602 | char *result = rawData; |
603 | rawData = nullptr; |
604 | alloc = 0; |
605 | return result; |
606 | } |
607 | |
608 | void compact(); |
609 | }; |
610 | |
611 | } // namespace QBinaryJsonPrivate |
612 | |
613 | Q_DECLARE_TYPEINFO(QBinaryJsonPrivate::Value, Q_PRIMITIVE_TYPE); |
614 | |
615 | QT_END_NAMESPACE |
616 | |
617 | #endif // QBINARYJSON_P_H |
618 | |