1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtNetwork module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #include "bitstreams_p.h" |
41 | #include "huffman_p.h" |
42 | |
43 | #include <QtCore/qbytearray.h> |
44 | |
45 | #include <algorithm> |
46 | #include <limits> |
47 | |
48 | QT_BEGIN_NAMESPACE |
49 | |
50 | namespace HPack |
51 | { |
52 | |
53 | /* |
54 | The static Huffman code used here was extracted from: |
55 | https://http2.github.io/http2-spec/compression.html#huffman.code |
56 | |
57 | This code was generated from statistics obtained on a large |
58 | sample of HTTP headers. It is a canonical Huffman code |
59 | with some tweaking to ensure that no symbol has a unique |
60 | code length. All codes were left-aligned - for implementation |
61 | convenience. |
62 | |
63 | Using binary trees to implement decoding would be prohibitively |
64 | expensive (both memory and time-wise). Instead we use a table-based |
65 | approach and any given code itself works as an index into such table(s). |
66 | We have 256 possible byte values and code lengths in |
67 | a range [5, 26]. This would require a huge table (and most of entries |
68 | would be 'wasted', since we only have to encode 256 elements). |
69 | Instead we use multi-level tables. The first level table |
70 | is using 9-bit length index; some entries in this table are 'terminal', |
71 | some reference the next level table(s). |
72 | |
73 | For example, bytes with values 48 and 49 (ASCII codes for '0' and '1') |
74 | both have code length 5, Huffman codes are: 00000 and 00001. They |
75 | both are placed in the 'root' table, |
76 | the 'root' table has index length == 9: |
77 | [00000 | 4 remaining bits] |
78 | ... |
79 | [00001 | 4 remaining bits] |
80 | |
81 | All entires with indices between these two will 'point' to value 48 |
82 | with bitLength == 5 so that bit stream (for example) 000001010 will be |
83 | decoded as: 48 + "put 1010 back into bitstream". |
84 | |
85 | A good description can be found here: |
86 | http://commandlinefanatic.com/cgi-bin/showarticle.cgi?article=art007 |
87 | or just google "Efficient Huffman Decoding". |
88 | Also see comments below about 'filling holes'. |
89 | */ |
90 | |
91 | namespace |
92 | { |
93 | |
94 | const CodeEntry staticHuffmanCodeTable[] |
95 | { |
96 | { .byteValue: 0, .huffmanCode: 0xffc00000ul, .bitLength: 13}, // 11111111|11000 |
97 | { .byteValue: 1, .huffmanCode: 0xffffb000ul, .bitLength: 23}, // 11111111|11111111|1011000 |
98 | { .byteValue: 2, .huffmanCode: 0xfffffe20ul, .bitLength: 28}, // 11111111|11111111|11111110|0010 |
99 | { .byteValue: 3, .huffmanCode: 0xfffffe30ul, .bitLength: 28}, // 11111111|11111111|11111110|0011 |
100 | { .byteValue: 4, .huffmanCode: 0xfffffe40ul, .bitLength: 28}, // 11111111|11111111|11111110|0100 |
101 | { .byteValue: 5, .huffmanCode: 0xfffffe50ul, .bitLength: 28}, // 11111111|11111111|11111110|0101 |
102 | { .byteValue: 6, .huffmanCode: 0xfffffe60ul, .bitLength: 28}, // 11111111|11111111|11111110|0110 |
103 | { .byteValue: 7, .huffmanCode: 0xfffffe70ul, .bitLength: 28}, // 11111111|11111111|11111110|0111 |
104 | { .byteValue: 8, .huffmanCode: 0xfffffe80ul, .bitLength: 28}, // 11111111|11111111|11111110|1000 |
105 | { .byteValue: 9, .huffmanCode: 0xffffea00ul, .bitLength: 24}, // 11111111|11111111|11101010 |
106 | { .byteValue: 10, .huffmanCode: 0xfffffff0ul, .bitLength: 30}, // 11111111|11111111|11111111|111100 |
107 | { .byteValue: 11, .huffmanCode: 0xfffffe90ul, .bitLength: 28}, // 11111111|11111111|11111110|1001 |
108 | { .byteValue: 12, .huffmanCode: 0xfffffea0ul, .bitLength: 28}, // 11111111|11111111|11111110|1010 |
109 | { .byteValue: 13, .huffmanCode: 0xfffffff4ul, .bitLength: 30}, // 11111111|11111111|11111111|111101 |
110 | { .byteValue: 14, .huffmanCode: 0xfffffeb0ul, .bitLength: 28}, // 11111111|11111111|11111110|1011 |
111 | { .byteValue: 15, .huffmanCode: 0xfffffec0ul, .bitLength: 28}, // 11111111|11111111|11111110|1100 |
112 | { .byteValue: 16, .huffmanCode: 0xfffffed0ul, .bitLength: 28}, // 11111111|11111111|11111110|1101 |
113 | { .byteValue: 17, .huffmanCode: 0xfffffee0ul, .bitLength: 28}, // 11111111|11111111|11111110|1110 |
114 | { .byteValue: 18, .huffmanCode: 0xfffffef0ul, .bitLength: 28}, // 11111111|11111111|11111110|1111 |
115 | { .byteValue: 19, .huffmanCode: 0xffffff00ul, .bitLength: 28}, // 11111111|11111111|11111111|0000 |
116 | { .byteValue: 20, .huffmanCode: 0xffffff10ul, .bitLength: 28}, // 11111111|11111111|11111111|0001 |
117 | { .byteValue: 21, .huffmanCode: 0xffffff20ul, .bitLength: 28}, // 11111111|11111111|11111111|0010 |
118 | { .byteValue: 22, .huffmanCode: 0xfffffff8ul, .bitLength: 30}, // 11111111|11111111|11111111|111110 |
119 | { .byteValue: 23, .huffmanCode: 0xffffff30ul, .bitLength: 28}, // 11111111|11111111|11111111|0011 |
120 | { .byteValue: 24, .huffmanCode: 0xffffff40ul, .bitLength: 28}, // 11111111|11111111|11111111|0100 |
121 | { .byteValue: 25, .huffmanCode: 0xffffff50ul, .bitLength: 28}, // 11111111|11111111|11111111|0101 |
122 | { .byteValue: 26, .huffmanCode: 0xffffff60ul, .bitLength: 28}, // 11111111|11111111|11111111|0110 |
123 | { .byteValue: 27, .huffmanCode: 0xffffff70ul, .bitLength: 28}, // 11111111|11111111|11111111|0111 |
124 | { .byteValue: 28, .huffmanCode: 0xffffff80ul, .bitLength: 28}, // 11111111|11111111|11111111|1000 |
125 | { .byteValue: 29, .huffmanCode: 0xffffff90ul, .bitLength: 28}, // 11111111|11111111|11111111|1001 |
126 | { .byteValue: 30, .huffmanCode: 0xffffffa0ul, .bitLength: 28}, // 11111111|11111111|11111111|1010 |
127 | { .byteValue: 31, .huffmanCode: 0xffffffb0ul, .bitLength: 28}, // 11111111|11111111|11111111|1011 |
128 | { .byteValue: 32, .huffmanCode: 0x50000000ul, .bitLength: 6}, // ' ' 010100 |
129 | { .byteValue: 33, .huffmanCode: 0xfe000000ul, .bitLength: 10}, // '!' 11111110|00 |
130 | { .byteValue: 34, .huffmanCode: 0xfe400000ul, .bitLength: 10}, // '"' 11111110|01 |
131 | { .byteValue: 35, .huffmanCode: 0xffa00000ul, .bitLength: 12}, // '#' 11111111|1010 |
132 | { .byteValue: 36, .huffmanCode: 0xffc80000ul, .bitLength: 13}, // '$' 11111111|11001 |
133 | { .byteValue: 37, .huffmanCode: 0x54000000ul, .bitLength: 6}, // '%' 010101 |
134 | { .byteValue: 38, .huffmanCode: 0xf8000000ul, .bitLength: 8}, // '&' 11111000 |
135 | { .byteValue: 39, .huffmanCode: 0xff400000ul, .bitLength: 11}, // ''' 11111111|010 |
136 | { .byteValue: 40, .huffmanCode: 0xfe800000ul, .bitLength: 10}, // '(' 11111110|10 |
137 | { .byteValue: 41, .huffmanCode: 0xfec00000ul, .bitLength: 10}, // ')' 11111110|11 |
138 | { .byteValue: 42, .huffmanCode: 0xf9000000ul, .bitLength: 8}, // '*' 11111001 |
139 | { .byteValue: 43, .huffmanCode: 0xff600000ul, .bitLength: 11}, // '+' 11111111|011 |
140 | { .byteValue: 44, .huffmanCode: 0xfa000000ul, .bitLength: 8}, // ',' 11111010 |
141 | { .byteValue: 45, .huffmanCode: 0x58000000ul, .bitLength: 6}, // '-' 010110 |
142 | { .byteValue: 46, .huffmanCode: 0x5c000000ul, .bitLength: 6}, // '.' 010111 |
143 | { .byteValue: 47, .huffmanCode: 0x60000000ul, .bitLength: 6}, // '/' 011000 |
144 | { .byteValue: 48, .huffmanCode: 0x00000000ul, .bitLength: 5}, // '0' 00000 |
145 | { .byteValue: 49, .huffmanCode: 0x08000000ul, .bitLength: 5}, // '1' 00001 |
146 | { .byteValue: 50, .huffmanCode: 0x10000000ul, .bitLength: 5}, // '2' 00010 |
147 | { .byteValue: 51, .huffmanCode: 0x64000000ul, .bitLength: 6}, // '3' 011001 |
148 | { .byteValue: 52, .huffmanCode: 0x68000000ul, .bitLength: 6}, // '4' 011010 |
149 | { .byteValue: 53, .huffmanCode: 0x6c000000ul, .bitLength: 6}, // '5' 011011 |
150 | { .byteValue: 54, .huffmanCode: 0x70000000ul, .bitLength: 6}, // '6' 011100 |
151 | { .byteValue: 55, .huffmanCode: 0x74000000ul, .bitLength: 6}, // '7' 011101 |
152 | { .byteValue: 56, .huffmanCode: 0x78000000ul, .bitLength: 6}, // '8' 011110 |
153 | { .byteValue: 57, .huffmanCode: 0x7c000000ul, .bitLength: 6}, // '9' 011111 |
154 | { .byteValue: 58, .huffmanCode: 0xb8000000ul, .bitLength: 7}, // ':' 1011100 |
155 | { .byteValue: 59, .huffmanCode: 0xfb000000ul, .bitLength: 8}, // ';' 11111011 |
156 | { .byteValue: 60, .huffmanCode: 0xfff80000ul, .bitLength: 15}, // '<' 11111111|1111100 |
157 | { .byteValue: 61, .huffmanCode: 0x80000000ul, .bitLength: 6}, // '=' 100000 |
158 | { .byteValue: 62, .huffmanCode: 0xffb00000ul, .bitLength: 12}, // '>' 11111111|1011 |
159 | { .byteValue: 63, .huffmanCode: 0xff000000ul, .bitLength: 10}, // '?' 11111111|00 |
160 | { .byteValue: 64, .huffmanCode: 0xffd00000ul, .bitLength: 13}, // '@' 11111111|11010 |
161 | { .byteValue: 65, .huffmanCode: 0x84000000ul, .bitLength: 6}, // 'A' 100001 |
162 | { .byteValue: 66, .huffmanCode: 0xba000000ul, .bitLength: 7}, // 'B' 1011101 |
163 | { .byteValue: 67, .huffmanCode: 0xbc000000ul, .bitLength: 7}, // 'C' 1011110 |
164 | { .byteValue: 68, .huffmanCode: 0xbe000000ul, .bitLength: 7}, // 'D' 1011111 |
165 | { .byteValue: 69, .huffmanCode: 0xc0000000ul, .bitLength: 7}, // 'E' 1100000 |
166 | { .byteValue: 70, .huffmanCode: 0xc2000000ul, .bitLength: 7}, // 'F' 1100001 |
167 | { .byteValue: 71, .huffmanCode: 0xc4000000ul, .bitLength: 7}, // 'G' 1100010 |
168 | { .byteValue: 72, .huffmanCode: 0xc6000000ul, .bitLength: 7}, // 'H' 1100011 |
169 | { .byteValue: 73, .huffmanCode: 0xc8000000ul, .bitLength: 7}, // 'I' 1100100 |
170 | { .byteValue: 74, .huffmanCode: 0xca000000ul, .bitLength: 7}, // 'J' 1100101 |
171 | { .byteValue: 75, .huffmanCode: 0xcc000000ul, .bitLength: 7}, // 'K' 1100110 |
172 | { .byteValue: 76, .huffmanCode: 0xce000000ul, .bitLength: 7}, // 'L' 1100111 |
173 | { .byteValue: 77, .huffmanCode: 0xd0000000ul, .bitLength: 7}, // 'M' 1101000 |
174 | { .byteValue: 78, .huffmanCode: 0xd2000000ul, .bitLength: 7}, // 'N' 1101001 |
175 | { .byteValue: 79, .huffmanCode: 0xd4000000ul, .bitLength: 7}, // 'O' 1101010 |
176 | { .byteValue: 80, .huffmanCode: 0xd6000000ul, .bitLength: 7}, // 'P' 1101011 |
177 | { .byteValue: 81, .huffmanCode: 0xd8000000ul, .bitLength: 7}, // 'Q' 1101100 |
178 | { .byteValue: 82, .huffmanCode: 0xda000000ul, .bitLength: 7}, // 'R' 1101101 |
179 | { .byteValue: 83, .huffmanCode: 0xdc000000ul, .bitLength: 7}, // 'S' 1101110 |
180 | { .byteValue: 84, .huffmanCode: 0xde000000ul, .bitLength: 7}, // 'T' 1101111 |
181 | { .byteValue: 85, .huffmanCode: 0xe0000000ul, .bitLength: 7}, // 'U' 1110000 |
182 | { .byteValue: 86, .huffmanCode: 0xe2000000ul, .bitLength: 7}, // 'V' 1110001 |
183 | { .byteValue: 87, .huffmanCode: 0xe4000000ul, .bitLength: 7}, // 'W' 1110010 |
184 | { .byteValue: 88, .huffmanCode: 0xfc000000ul, .bitLength: 8}, // 'X' 11111100 |
185 | { .byteValue: 89, .huffmanCode: 0xe6000000ul, .bitLength: 7}, // 'Y' 1110011 |
186 | { .byteValue: 90, .huffmanCode: 0xfd000000ul, .bitLength: 8}, // 'Z' 11111101 |
187 | { .byteValue: 91, .huffmanCode: 0xffd80000ul, .bitLength: 13}, // '[' 11111111|11011 |
188 | { .byteValue: 92, .huffmanCode: 0xfffe0000ul, .bitLength: 19}, // '\' 11111111|11111110|000 |
189 | { .byteValue: 93, .huffmanCode: 0xffe00000ul, .bitLength: 13}, // ']' 11111111|11100 |
190 | { .byteValue: 94, .huffmanCode: 0xfff00000ul, .bitLength: 14}, // '^' 11111111|111100 |
191 | { .byteValue: 95, .huffmanCode: 0x88000000ul, .bitLength: 6}, // '_' 100010 |
192 | { .byteValue: 96, .huffmanCode: 0xfffa0000ul, .bitLength: 15}, // '`' 11111111|1111101 |
193 | { .byteValue: 97, .huffmanCode: 0x18000000ul, .bitLength: 5}, // 'a' 00011 |
194 | { .byteValue: 98, .huffmanCode: 0x8c000000ul, .bitLength: 6}, // 'b' 100011 |
195 | { .byteValue: 99, .huffmanCode: 0x20000000ul, .bitLength: 5}, // 'c' 00100 |
196 | {.byteValue: 100, .huffmanCode: 0x90000000ul, .bitLength: 6}, // 'd' 100100 |
197 | {.byteValue: 101, .huffmanCode: 0x28000000ul, .bitLength: 5}, // 'e' 00101 |
198 | {.byteValue: 102, .huffmanCode: 0x94000000ul, .bitLength: 6}, // 'f' 100101 |
199 | {.byteValue: 103, .huffmanCode: 0x98000000ul, .bitLength: 6}, // 'g' 100110 |
200 | {.byteValue: 104, .huffmanCode: 0x9c000000ul, .bitLength: 6}, // 'h' 100111 |
201 | {.byteValue: 105, .huffmanCode: 0x30000000ul, .bitLength: 5}, // 'i' 00110 |
202 | {.byteValue: 106, .huffmanCode: 0xe8000000ul, .bitLength: 7}, // 'j' 1110100 |
203 | {.byteValue: 107, .huffmanCode: 0xea000000ul, .bitLength: 7}, // 'k' 1110101 |
204 | {.byteValue: 108, .huffmanCode: 0xa0000000ul, .bitLength: 6}, // 'l' 101000 |
205 | {.byteValue: 109, .huffmanCode: 0xa4000000ul, .bitLength: 6}, // 'm' 101001 |
206 | {.byteValue: 110, .huffmanCode: 0xa8000000ul, .bitLength: 6}, // 'n' 101010 |
207 | {.byteValue: 111, .huffmanCode: 0x38000000ul, .bitLength: 5}, // 'o' 00111 |
208 | {.byteValue: 112, .huffmanCode: 0xac000000ul, .bitLength: 6}, // 'p' 101011 |
209 | {.byteValue: 113, .huffmanCode: 0xec000000ul, .bitLength: 7}, // 'q' 1110110 |
210 | {.byteValue: 114, .huffmanCode: 0xb0000000ul, .bitLength: 6}, // 'r' 101100 |
211 | {.byteValue: 115, .huffmanCode: 0x40000000ul, .bitLength: 5}, // 's' 01000 |
212 | {.byteValue: 116, .huffmanCode: 0x48000000ul, .bitLength: 5}, // 't' 01001 |
213 | {.byteValue: 117, .huffmanCode: 0xb4000000ul, .bitLength: 6}, // 'u' 101101 |
214 | {.byteValue: 118, .huffmanCode: 0xee000000ul, .bitLength: 7}, // 'v' 1110111 |
215 | {.byteValue: 119, .huffmanCode: 0xf0000000ul, .bitLength: 7}, // 'w' 1111000 |
216 | {.byteValue: 120, .huffmanCode: 0xf2000000ul, .bitLength: 7}, // 'x' 1111001 |
217 | {.byteValue: 121, .huffmanCode: 0xf4000000ul, .bitLength: 7}, // 'y' 1111010 |
218 | {.byteValue: 122, .huffmanCode: 0xf6000000ul, .bitLength: 7}, // 'z' 1111011 |
219 | {.byteValue: 123, .huffmanCode: 0xfffc0000ul, .bitLength: 15}, // '{' 11111111|1111110 |
220 | {.byteValue: 124, .huffmanCode: 0xff800000ul, .bitLength: 11}, // '|' 11111111|100 |
221 | {.byteValue: 125, .huffmanCode: 0xfff40000ul, .bitLength: 14}, // '}' 11111111|111101 |
222 | {.byteValue: 126, .huffmanCode: 0xffe80000ul, .bitLength: 13}, // '~' 11111111|11101 |
223 | {.byteValue: 127, .huffmanCode: 0xffffffc0ul, .bitLength: 28}, // 11111111|11111111|11111111|1100 |
224 | {.byteValue: 128, .huffmanCode: 0xfffe6000ul, .bitLength: 20}, // 11111111|11111110|0110 |
225 | {.byteValue: 129, .huffmanCode: 0xffff4800ul, .bitLength: 22}, // 11111111|11111111|010010 |
226 | {.byteValue: 130, .huffmanCode: 0xfffe7000ul, .bitLength: 20}, // 11111111|11111110|0111 |
227 | {.byteValue: 131, .huffmanCode: 0xfffe8000ul, .bitLength: 20}, // 11111111|11111110|1000 |
228 | {.byteValue: 132, .huffmanCode: 0xffff4c00ul, .bitLength: 22}, // 11111111|11111111|010011 |
229 | {.byteValue: 133, .huffmanCode: 0xffff5000ul, .bitLength: 22}, // 11111111|11111111|010100 |
230 | {.byteValue: 134, .huffmanCode: 0xffff5400ul, .bitLength: 22}, // 11111111|11111111|010101 |
231 | {.byteValue: 135, .huffmanCode: 0xffffb200ul, .bitLength: 23}, // 11111111|11111111|1011001 |
232 | {.byteValue: 136, .huffmanCode: 0xffff5800ul, .bitLength: 22}, // 11111111|11111111|010110 |
233 | {.byteValue: 137, .huffmanCode: 0xffffb400ul, .bitLength: 23}, // 11111111|11111111|1011010 |
234 | {.byteValue: 138, .huffmanCode: 0xffffb600ul, .bitLength: 23}, // 11111111|11111111|1011011 |
235 | {.byteValue: 139, .huffmanCode: 0xffffb800ul, .bitLength: 23}, // 11111111|11111111|1011100 |
236 | {.byteValue: 140, .huffmanCode: 0xffffba00ul, .bitLength: 23}, // 11111111|11111111|1011101 |
237 | {.byteValue: 141, .huffmanCode: 0xffffbc00ul, .bitLength: 23}, // 11111111|11111111|1011110 |
238 | {.byteValue: 142, .huffmanCode: 0xffffeb00ul, .bitLength: 24}, // 11111111|11111111|11101011 |
239 | {.byteValue: 143, .huffmanCode: 0xffffbe00ul, .bitLength: 23}, // 11111111|11111111|1011111 |
240 | {.byteValue: 144, .huffmanCode: 0xffffec00ul, .bitLength: 24}, // 11111111|11111111|11101100 |
241 | {.byteValue: 145, .huffmanCode: 0xffffed00ul, .bitLength: 24}, // 11111111|11111111|11101101 |
242 | {.byteValue: 146, .huffmanCode: 0xffff5c00ul, .bitLength: 22}, // 11111111|11111111|010111 |
243 | {.byteValue: 147, .huffmanCode: 0xffffc000ul, .bitLength: 23}, // 11111111|11111111|1100000 |
244 | {.byteValue: 148, .huffmanCode: 0xffffee00ul, .bitLength: 24}, // 11111111|11111111|11101110 |
245 | {.byteValue: 149, .huffmanCode: 0xffffc200ul, .bitLength: 23}, // 11111111|11111111|1100001 |
246 | {.byteValue: 150, .huffmanCode: 0xffffc400ul, .bitLength: 23}, // 11111111|11111111|1100010 |
247 | {.byteValue: 151, .huffmanCode: 0xffffc600ul, .bitLength: 23}, // 11111111|11111111|1100011 |
248 | {.byteValue: 152, .huffmanCode: 0xffffc800ul, .bitLength: 23}, // 11111111|11111111|1100100 |
249 | {.byteValue: 153, .huffmanCode: 0xfffee000ul, .bitLength: 21}, // 11111111|11111110|11100 |
250 | {.byteValue: 154, .huffmanCode: 0xffff6000ul, .bitLength: 22}, // 11111111|11111111|011000 |
251 | {.byteValue: 155, .huffmanCode: 0xffffca00ul, .bitLength: 23}, // 11111111|11111111|1100101 |
252 | {.byteValue: 156, .huffmanCode: 0xffff6400ul, .bitLength: 22}, // 11111111|11111111|011001 |
253 | {.byteValue: 157, .huffmanCode: 0xffffcc00ul, .bitLength: 23}, // 11111111|11111111|1100110 |
254 | {.byteValue: 158, .huffmanCode: 0xffffce00ul, .bitLength: 23}, // 11111111|11111111|1100111 |
255 | {.byteValue: 159, .huffmanCode: 0xffffef00ul, .bitLength: 24}, // 11111111|11111111|11101111 |
256 | {.byteValue: 160, .huffmanCode: 0xffff6800ul, .bitLength: 22}, // 11111111|11111111|011010 |
257 | {.byteValue: 161, .huffmanCode: 0xfffee800ul, .bitLength: 21}, // 11111111|11111110|11101 |
258 | {.byteValue: 162, .huffmanCode: 0xfffe9000ul, .bitLength: 20}, // 11111111|11111110|1001 |
259 | {.byteValue: 163, .huffmanCode: 0xffff6c00ul, .bitLength: 22}, // 11111111|11111111|011011 |
260 | {.byteValue: 164, .huffmanCode: 0xffff7000ul, .bitLength: 22}, // 11111111|11111111|011100 |
261 | {.byteValue: 165, .huffmanCode: 0xffffd000ul, .bitLength: 23}, // 11111111|11111111|1101000 |
262 | {.byteValue: 166, .huffmanCode: 0xffffd200ul, .bitLength: 23}, // 11111111|11111111|1101001 |
263 | {.byteValue: 167, .huffmanCode: 0xfffef000ul, .bitLength: 21}, // 11111111|11111110|11110 |
264 | {.byteValue: 168, .huffmanCode: 0xffffd400ul, .bitLength: 23}, // 11111111|11111111|1101010 |
265 | {.byteValue: 169, .huffmanCode: 0xffff7400ul, .bitLength: 22}, // 11111111|11111111|011101 |
266 | {.byteValue: 170, .huffmanCode: 0xffff7800ul, .bitLength: 22}, // 11111111|11111111|011110 |
267 | {.byteValue: 171, .huffmanCode: 0xfffff000ul, .bitLength: 24}, // 11111111|11111111|11110000 |
268 | {.byteValue: 172, .huffmanCode: 0xfffef800ul, .bitLength: 21}, // 11111111|11111110|11111 |
269 | {.byteValue: 173, .huffmanCode: 0xffff7c00ul, .bitLength: 22}, // 11111111|11111111|011111 |
270 | {.byteValue: 174, .huffmanCode: 0xffffd600ul, .bitLength: 23}, // 11111111|11111111|1101011 |
271 | {.byteValue: 175, .huffmanCode: 0xffffd800ul, .bitLength: 23}, // 11111111|11111111|1101100 |
272 | {.byteValue: 176, .huffmanCode: 0xffff0000ul, .bitLength: 21}, // 11111111|11111111|00000 |
273 | {.byteValue: 177, .huffmanCode: 0xffff0800ul, .bitLength: 21}, // 11111111|11111111|00001 |
274 | {.byteValue: 178, .huffmanCode: 0xffff8000ul, .bitLength: 22}, // 11111111|11111111|100000 |
275 | {.byteValue: 179, .huffmanCode: 0xffff1000ul, .bitLength: 21}, // 11111111|11111111|00010 |
276 | {.byteValue: 180, .huffmanCode: 0xffffda00ul, .bitLength: 23}, // 11111111|11111111|1101101 |
277 | {.byteValue: 181, .huffmanCode: 0xffff8400ul, .bitLength: 22}, // 11111111|11111111|100001 |
278 | {.byteValue: 182, .huffmanCode: 0xffffdc00ul, .bitLength: 23}, // 11111111|11111111|1101110 |
279 | {.byteValue: 183, .huffmanCode: 0xffffde00ul, .bitLength: 23}, // 11111111|11111111|1101111 |
280 | {.byteValue: 184, .huffmanCode: 0xfffea000ul, .bitLength: 20}, // 11111111|11111110|1010 |
281 | {.byteValue: 185, .huffmanCode: 0xffff8800ul, .bitLength: 22}, // 11111111|11111111|100010 |
282 | {.byteValue: 186, .huffmanCode: 0xffff8c00ul, .bitLength: 22}, // 11111111|11111111|100011 |
283 | {.byteValue: 187, .huffmanCode: 0xffff9000ul, .bitLength: 22}, // 11111111|11111111|100100 |
284 | {.byteValue: 188, .huffmanCode: 0xffffe000ul, .bitLength: 23}, // 11111111|11111111|1110000 |
285 | {.byteValue: 189, .huffmanCode: 0xffff9400ul, .bitLength: 22}, // 11111111|11111111|100101 |
286 | {.byteValue: 190, .huffmanCode: 0xffff9800ul, .bitLength: 22}, // 11111111|11111111|100110 |
287 | {.byteValue: 191, .huffmanCode: 0xffffe200ul, .bitLength: 23}, // 11111111|11111111|1110001 |
288 | {.byteValue: 192, .huffmanCode: 0xfffff800ul, .bitLength: 26}, // 11111111|11111111|11111000|00 |
289 | {.byteValue: 193, .huffmanCode: 0xfffff840ul, .bitLength: 26}, // 11111111|11111111|11111000|01 |
290 | {.byteValue: 194, .huffmanCode: 0xfffeb000ul, .bitLength: 20}, // 11111111|11111110|1011 |
291 | {.byteValue: 195, .huffmanCode: 0xfffe2000ul, .bitLength: 19}, // 11111111|11111110|001 |
292 | {.byteValue: 196, .huffmanCode: 0xffff9c00ul, .bitLength: 22}, // 11111111|11111111|100111 |
293 | {.byteValue: 197, .huffmanCode: 0xffffe400ul, .bitLength: 23}, // 11111111|11111111|1110010 |
294 | {.byteValue: 198, .huffmanCode: 0xffffa000ul, .bitLength: 22}, // 11111111|11111111|101000 |
295 | {.byteValue: 199, .huffmanCode: 0xfffff600ul, .bitLength: 25}, // 11111111|11111111|11110110|0 |
296 | {.byteValue: 200, .huffmanCode: 0xfffff880ul, .bitLength: 26}, // 11111111|11111111|11111000|10 |
297 | {.byteValue: 201, .huffmanCode: 0xfffff8c0ul, .bitLength: 26}, // 11111111|11111111|11111000|11 |
298 | {.byteValue: 202, .huffmanCode: 0xfffff900ul, .bitLength: 26}, // 11111111|11111111|11111001|00 |
299 | {.byteValue: 203, .huffmanCode: 0xfffffbc0ul, .bitLength: 27}, // 11111111|11111111|11111011|110 |
300 | {.byteValue: 204, .huffmanCode: 0xfffffbe0ul, .bitLength: 27}, // 11111111|11111111|11111011|111 |
301 | {.byteValue: 205, .huffmanCode: 0xfffff940ul, .bitLength: 26}, // 11111111|11111111|11111001|01 |
302 | {.byteValue: 206, .huffmanCode: 0xfffff100ul, .bitLength: 24}, // 11111111|11111111|11110001 |
303 | {.byteValue: 207, .huffmanCode: 0xfffff680ul, .bitLength: 25}, // 11111111|11111111|11110110|1 |
304 | {.byteValue: 208, .huffmanCode: 0xfffe4000ul, .bitLength: 19}, // 11111111|11111110|010 |
305 | {.byteValue: 209, .huffmanCode: 0xffff1800ul, .bitLength: 21}, // 11111111|11111111|00011 |
306 | {.byteValue: 210, .huffmanCode: 0xfffff980ul, .bitLength: 26}, // 11111111|11111111|11111001|10 |
307 | {.byteValue: 211, .huffmanCode: 0xfffffc00ul, .bitLength: 27}, // 11111111|11111111|11111100|000 |
308 | {.byteValue: 212, .huffmanCode: 0xfffffc20ul, .bitLength: 27}, // 11111111|11111111|11111100|001 |
309 | {.byteValue: 213, .huffmanCode: 0xfffff9c0ul, .bitLength: 26}, // 11111111|11111111|11111001|11 |
310 | {.byteValue: 214, .huffmanCode: 0xfffffc40ul, .bitLength: 27}, // 11111111|11111111|11111100|010 |
311 | {.byteValue: 215, .huffmanCode: 0xfffff200ul, .bitLength: 24}, // 11111111|11111111|11110010 |
312 | {.byteValue: 216, .huffmanCode: 0xffff2000ul, .bitLength: 21}, // 11111111|11111111|00100 |
313 | {.byteValue: 217, .huffmanCode: 0xffff2800ul, .bitLength: 21}, // 11111111|11111111|00101 |
314 | {.byteValue: 218, .huffmanCode: 0xfffffa00ul, .bitLength: 26}, // 11111111|11111111|11111010|00 |
315 | {.byteValue: 219, .huffmanCode: 0xfffffa40ul, .bitLength: 26}, // 11111111|11111111|11111010|01 |
316 | {.byteValue: 220, .huffmanCode: 0xffffffd0ul, .bitLength: 28}, // 11111111|11111111|11111111|1101 |
317 | {.byteValue: 221, .huffmanCode: 0xfffffc60ul, .bitLength: 27}, // 11111111|11111111|11111100|011 |
318 | {.byteValue: 222, .huffmanCode: 0xfffffc80ul, .bitLength: 27}, // 11111111|11111111|11111100|100 |
319 | {.byteValue: 223, .huffmanCode: 0xfffffca0ul, .bitLength: 27}, // 11111111|11111111|11111100|101 |
320 | {.byteValue: 224, .huffmanCode: 0xfffec000ul, .bitLength: 20}, // 11111111|11111110|1100 |
321 | {.byteValue: 225, .huffmanCode: 0xfffff300ul, .bitLength: 24}, // 11111111|11111111|11110011 |
322 | {.byteValue: 226, .huffmanCode: 0xfffed000ul, .bitLength: 20}, // 11111111|11111110|1101 |
323 | {.byteValue: 227, .huffmanCode: 0xffff3000ul, .bitLength: 21}, // 11111111|11111111|00110 |
324 | {.byteValue: 228, .huffmanCode: 0xffffa400ul, .bitLength: 22}, // 11111111|11111111|101001 |
325 | {.byteValue: 229, .huffmanCode: 0xffff3800ul, .bitLength: 21}, // 11111111|11111111|00111 |
326 | {.byteValue: 230, .huffmanCode: 0xffff4000ul, .bitLength: 21}, // 11111111|11111111|01000 |
327 | {.byteValue: 231, .huffmanCode: 0xffffe600ul, .bitLength: 23}, // 11111111|11111111|1110011 |
328 | {.byteValue: 232, .huffmanCode: 0xffffa800ul, .bitLength: 22}, // 11111111|11111111|101010 |
329 | {.byteValue: 233, .huffmanCode: 0xffffac00ul, .bitLength: 22}, // 11111111|11111111|101011 |
330 | {.byteValue: 234, .huffmanCode: 0xfffff700ul, .bitLength: 25}, // 11111111|11111111|11110111|0 |
331 | {.byteValue: 235, .huffmanCode: 0xfffff780ul, .bitLength: 25}, // 11111111|11111111|11110111|1 |
332 | {.byteValue: 236, .huffmanCode: 0xfffff400ul, .bitLength: 24}, // 11111111|11111111|11110100 |
333 | {.byteValue: 237, .huffmanCode: 0xfffff500ul, .bitLength: 24}, // 11111111|11111111|11110101 |
334 | {.byteValue: 238, .huffmanCode: 0xfffffa80ul, .bitLength: 26}, // 11111111|11111111|11111010|10 |
335 | {.byteValue: 239, .huffmanCode: 0xffffe800ul, .bitLength: 23}, // 11111111|11111111|1110100 |
336 | {.byteValue: 240, .huffmanCode: 0xfffffac0ul, .bitLength: 26}, // 11111111|11111111|11111010|11 |
337 | {.byteValue: 241, .huffmanCode: 0xfffffcc0ul, .bitLength: 27}, // 11111111|11111111|11111100|110 |
338 | {.byteValue: 242, .huffmanCode: 0xfffffb00ul, .bitLength: 26}, // 11111111|11111111|11111011|00 |
339 | {.byteValue: 243, .huffmanCode: 0xfffffb40ul, .bitLength: 26}, // 11111111|11111111|11111011|01 |
340 | {.byteValue: 244, .huffmanCode: 0xfffffce0ul, .bitLength: 27}, // 11111111|11111111|11111100|111 |
341 | {.byteValue: 245, .huffmanCode: 0xfffffd00ul, .bitLength: 27}, // 11111111|11111111|11111101|000 |
342 | {.byteValue: 246, .huffmanCode: 0xfffffd20ul, .bitLength: 27}, // 11111111|11111111|11111101|001 |
343 | {.byteValue: 247, .huffmanCode: 0xfffffd40ul, .bitLength: 27}, // 11111111|11111111|11111101|010 |
344 | {.byteValue: 248, .huffmanCode: 0xfffffd60ul, .bitLength: 27}, // 11111111|11111111|11111101|011 |
345 | {.byteValue: 249, .huffmanCode: 0xffffffe0ul, .bitLength: 28}, // 11111111|11111111|11111111|1110 |
346 | {.byteValue: 250, .huffmanCode: 0xfffffd80ul, .bitLength: 27}, // 11111111|11111111|11111101|100 |
347 | {.byteValue: 251, .huffmanCode: 0xfffffda0ul, .bitLength: 27}, // 11111111|11111111|11111101|101 |
348 | {.byteValue: 252, .huffmanCode: 0xfffffdc0ul, .bitLength: 27}, // 11111111|11111111|11111101|110 |
349 | {.byteValue: 253, .huffmanCode: 0xfffffde0ul, .bitLength: 27}, // 11111111|11111111|11111101|111 |
350 | {.byteValue: 254, .huffmanCode: 0xfffffe00ul, .bitLength: 27}, // 11111111|11111111|11111110|000 |
351 | {.byteValue: 255, .huffmanCode: 0xfffffb80ul, .bitLength: 26}, // 11111111|11111111|11111011|10 |
352 | {.byteValue: 256, .huffmanCode: 0xfffffffcul, .bitLength: 30} // EOS 11111111|11111111|11111111|111111 |
353 | }; |
354 | |
355 | void write_huffman_code(BitOStream &outputStream, const CodeEntry &code) |
356 | { |
357 | // Append octet by octet. |
358 | auto bitLength = code.bitLength; |
359 | const auto hc = code.huffmanCode >> (32 - bitLength); |
360 | |
361 | if (bitLength > 24) { |
362 | outputStream.writeBits(bits: uchar(hc >> 24), bitLength: bitLength - 24); |
363 | bitLength = 24; |
364 | } |
365 | |
366 | if (bitLength > 16) { |
367 | outputStream.writeBits(bits: uchar(hc >> 16), bitLength: bitLength - 16); |
368 | bitLength = 16; |
369 | } |
370 | |
371 | if (bitLength > 8) { |
372 | outputStream.writeBits(bits: uchar(hc >> 8), bitLength: bitLength - 8); |
373 | bitLength = 8; |
374 | } |
375 | |
376 | outputStream.writeBits(bits: uchar(hc), bitLength); |
377 | } |
378 | |
379 | } |
380 | |
381 | // That's from HPACK's specs - we deal with octets. |
382 | static_assert(std::numeric_limits<uchar>::digits == 8, "octets expected" ); |
383 | |
384 | quint64 huffman_encoded_bit_length(const QByteArray &inputData) |
385 | { |
386 | quint64 bitLength = 0; |
387 | for (int i = 0, e = inputData.size(); i < e; ++i) |
388 | bitLength += staticHuffmanCodeTable[int(inputData[i])].bitLength; |
389 | |
390 | return bitLength; |
391 | } |
392 | |
393 | void huffman_encode_string(const QByteArray &inputData, BitOStream &outputStream) |
394 | { |
395 | for (int i = 0, e = inputData.size(); i < e; ++i) { |
396 | const auto value = uchar(inputData[i]); |
397 | write_huffman_code(outputStream, code: staticHuffmanCodeTable[value]); |
398 | } |
399 | |
400 | // Pad bits ... |
401 | if (outputStream.bitLength() % 8) |
402 | outputStream.writeBits(bits: 0xff, bitLength: 8 - outputStream.bitLength() % 8); |
403 | } |
404 | |
405 | bool padding_is_valid(quint32 chunk, quint32 nBits) |
406 | { |
407 | Q_ASSERT(nBits); |
408 | |
409 | // HPACK, 5.2: "A padding strictly longer than 7 bits MUST be |
410 | // treated as a decoding error." |
411 | if (nBits > 7) |
412 | return false; |
413 | // HPACK, 5.2: |
414 | // "A padding not corresponding to the most significant bits |
415 | // of the code for the EOS symbol MUST be treated as a decoding error." |
416 | return (chunk >> (32 - nBits)) == quint32((1 << nBits) - 1); |
417 | } |
418 | |
419 | HuffmanDecoder::HuffmanDecoder() |
420 | : minCodeLength() |
421 | { |
422 | const auto nCodes = sizeof staticHuffmanCodeTable / sizeof staticHuffmanCodeTable[0]; |
423 | |
424 | std::vector<CodeEntry> symbols(staticHuffmanCodeTable, staticHuffmanCodeTable + nCodes); |
425 | // Now we sort: by bit length first (in the descending order) and by the symbol |
426 | // value (descending). Descending order: to make sure we do not create prefix tables with |
427 | // short 'indexLength' first and having longer codes that do not fit into such tables later. |
428 | std::sort(first: symbols.begin(), last: symbols.end(), comp: [](const CodeEntry &code1, const CodeEntry &code2) { |
429 | if (code1.bitLength == code2.bitLength) |
430 | return code1.byteValue > code2.byteValue; |
431 | return code1.bitLength > code2.bitLength; |
432 | }); |
433 | |
434 | minCodeLength = symbols.back().bitLength; // The shortest one, currently it's 5. |
435 | |
436 | // TODO: add a verification - Huffman codes |
437 | // within a given bit length range also |
438 | // should be in descending order. |
439 | |
440 | // Initialize 'prefixTables' and 'tableData'. |
441 | addTable(prefixLength: 0, indexLength: quint32(BitConstants::rootPrefix)); // 'root' table. |
442 | |
443 | for (const auto &s : symbols) { |
444 | quint32 tableIndex = 0; |
445 | while (true) { |
446 | Q_ASSERT(tableIndex < prefixTables.size()); |
447 | // Note, by value - since prefixTables will be updated in between. |
448 | const auto table = prefixTables[tableIndex]; |
449 | // We skip prefixed bits (if any) and use indexed bits only: |
450 | const auto entryIndex = s.huffmanCode << table.prefixLength >> (32 - table.indexLength); |
451 | // Again, by value. |
452 | PrefixTableEntry entry = tableEntry(table, index: entryIndex); |
453 | // How many bits were coded by previous tables and this table: |
454 | const auto codedLength = table.prefixLength + table.indexLength; |
455 | if (codedLength < s.bitLength) { |
456 | // We have to add a new prefix table ... (if it's not done yet). |
457 | if (!entry.bitLength) { |
458 | entry.nextTable = addTable(prefixLength: codedLength, indexLength: std::min<quint32>(a: quint32(BitConstants::childPrefix), |
459 | b: s.bitLength - codedLength)); |
460 | entry.bitLength = s.bitLength; |
461 | entry.byteValue = s.byteValue; |
462 | setTableEntry(table, index: entryIndex, entry); |
463 | } |
464 | tableIndex = entry.nextTable; |
465 | } else { |
466 | // We found the slot for our code (terminal entry): |
467 | entry.byteValue = s.byteValue; |
468 | entry.bitLength = s.bitLength; |
469 | // Refer to our own table as 'nextTable': |
470 | entry.nextTable = tableIndex; |
471 | setTableEntry(table, index: entryIndex, entry); |
472 | break; |
473 | } |
474 | } |
475 | } |
476 | |
477 | // Now, we have a table(s) and have to fill 'holes' to |
478 | // 'fix' terminal entries. |
479 | for (const auto &table : prefixTables) { |
480 | const quint32 codedLength = table.prefixLength + table.indexLength; |
481 | for (quint32 j = 0; j < table.size();) { |
482 | const PrefixTableEntry &entry = tableEntry(table, index: j); |
483 | if (entry.bitLength && entry.bitLength < codedLength) { |
484 | const quint32 range = 1 << (codedLength - entry.bitLength); |
485 | for (quint32 k = 1; k < range; ++k) |
486 | setTableEntry(table, index: j + k, entry); |
487 | j += range; |
488 | } else { |
489 | ++j; |
490 | } |
491 | } |
492 | } |
493 | } |
494 | |
495 | bool HuffmanDecoder::decodeStream(BitIStream &inputStream, QByteArray &outputBuffer) |
496 | { |
497 | while (true) { |
498 | quint32 chunk = 0; |
499 | const quint32 readBits = inputStream.peekBits(from: inputStream.streamOffset(), length: 32, dstPtr: &chunk); |
500 | if (!readBits) |
501 | return !inputStream.hasMoreBits(); |
502 | |
503 | if (readBits < minCodeLength) { |
504 | inputStream.skipBits(nBits: readBits); |
505 | return padding_is_valid(chunk, nBits: readBits); |
506 | } |
507 | |
508 | quint32 tableIndex = 0; |
509 | const PrefixTable *table = &prefixTables[tableIndex]; |
510 | quint32 entryIndex = chunk >> (32 - table->indexLength); |
511 | PrefixTableEntry entry = tableEntry(table: *table, index: entryIndex); |
512 | |
513 | while (true) { |
514 | if (entry.nextTable == tableIndex) |
515 | break; |
516 | |
517 | tableIndex = entry.nextTable; |
518 | table = &prefixTables[tableIndex]; |
519 | entryIndex = chunk << table->prefixLength >> (32 - table->indexLength); |
520 | entry = tableEntry(table: *table, index: entryIndex); |
521 | } |
522 | |
523 | if (entry.bitLength > readBits) { |
524 | inputStream.skipBits(nBits: readBits); |
525 | return padding_is_valid(chunk, nBits: readBits); |
526 | } |
527 | |
528 | if (!entry.bitLength || entry.byteValue == 256) { |
529 | //EOS (256) == compression error (HPACK). |
530 | inputStream.skipBits(nBits: readBits); |
531 | return false; |
532 | } |
533 | |
534 | outputBuffer.append(c: entry.byteValue); |
535 | inputStream.skipBits(nBits: entry.bitLength); |
536 | } |
537 | |
538 | return false; |
539 | } |
540 | |
541 | quint32 HuffmanDecoder::addTable(quint32 prefix, quint32 index) |
542 | { |
543 | PrefixTable newTable{prefix, index}; |
544 | newTable.offset = quint32(tableData.size()); |
545 | prefixTables.push_back(x: newTable); |
546 | // Add entries for this table: |
547 | tableData.resize(new_size: tableData.size() + newTable.size()); |
548 | |
549 | return quint32(prefixTables.size() - 1); |
550 | } |
551 | |
552 | PrefixTableEntry HuffmanDecoder::tableEntry(const PrefixTable &table, quint32 index) |
553 | { |
554 | Q_ASSERT(index < table.size()); |
555 | return tableData[table.offset + index]; |
556 | } |
557 | |
558 | void HuffmanDecoder::setTableEntry(const PrefixTable &table, quint32 index, |
559 | const PrefixTableEntry &entry) |
560 | { |
561 | Q_ASSERT(index < table.size()); |
562 | tableData[table.offset + index] = entry; |
563 | } |
564 | |
565 | bool huffman_decode_string(BitIStream &inputStream, QByteArray *outputBuffer) |
566 | { |
567 | Q_ASSERT(outputBuffer); |
568 | |
569 | static HuffmanDecoder decoder; |
570 | return decoder.decodeStream(inputStream, outputBuffer&: *outputBuffer); |
571 | } |
572 | |
573 | } |
574 | |
575 | QT_END_NAMESPACE |
576 | |