1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "bitstreams_p.h" |
5 | #include "huffman_p.h" |
6 | |
7 | #include <QtCore/qbytearray.h> |
8 | |
9 | #include <algorithm> |
10 | #include <limits> |
11 | |
12 | QT_BEGIN_NAMESPACE |
13 | |
14 | namespace HPack |
15 | { |
16 | |
17 | /* |
18 | The static Huffman code used here was extracted from: |
19 | https://http2.github.io/http2-spec/compression.html#huffman.code |
20 | |
21 | This code was generated from statistics obtained on a large |
22 | sample of HTTP headers. It is a canonical Huffman code |
23 | with some tweaking to ensure that no symbol has a unique |
24 | code length. All codes were left-aligned - for implementation |
25 | convenience. |
26 | |
27 | Using binary trees to implement decoding would be prohibitively |
28 | expensive (both memory and time-wise). Instead we use a table-based |
29 | approach and any given code itself works as an index into such table(s). |
30 | We have 256 possible byte values and code lengths in |
31 | a range [5, 26]. This would require a huge table (and most of entries |
32 | would be 'wasted', since we only have to encode 256 elements). |
33 | Instead we use multi-level tables. The first level table |
34 | is using 9-bit length index; some entries in this table are 'terminal', |
35 | some reference the next level table(s). |
36 | |
37 | For example, bytes with values 48 and 49 (ASCII codes for '0' and '1') |
38 | both have code length 5, Huffman codes are: 00000 and 00001. They |
39 | both are placed in the 'root' table, |
40 | the 'root' table has index length == 9: |
41 | [00000 | 4 remaining bits] |
42 | ... |
43 | [00001 | 4 remaining bits] |
44 | |
45 | All entries with indices between these two will 'point' to value 48 |
46 | with bitLength == 5 so that bit stream (for example) 000001010 will be |
47 | decoded as: 48 + "put 1010 back into bitstream". |
48 | |
49 | A good description can be found here: |
50 | http://commandlinefanatic.com/cgi-bin/showarticle.cgi?article=art007 |
51 | or just google "Efficient Huffman Decoding". |
52 | Also see comments below about 'filling holes'. |
53 | */ |
54 | |
55 | namespace |
56 | { |
57 | |
58 | const CodeEntry staticHuffmanCodeTable[] |
59 | { |
60 | { .byteValue: 0, .huffmanCode: 0xffc00000ul, .bitLength: 13}, // 11111111|11000 |
61 | { .byteValue: 1, .huffmanCode: 0xffffb000ul, .bitLength: 23}, // 11111111|11111111|1011000 |
62 | { .byteValue: 2, .huffmanCode: 0xfffffe20ul, .bitLength: 28}, // 11111111|11111111|11111110|0010 |
63 | { .byteValue: 3, .huffmanCode: 0xfffffe30ul, .bitLength: 28}, // 11111111|11111111|11111110|0011 |
64 | { .byteValue: 4, .huffmanCode: 0xfffffe40ul, .bitLength: 28}, // 11111111|11111111|11111110|0100 |
65 | { .byteValue: 5, .huffmanCode: 0xfffffe50ul, .bitLength: 28}, // 11111111|11111111|11111110|0101 |
66 | { .byteValue: 6, .huffmanCode: 0xfffffe60ul, .bitLength: 28}, // 11111111|11111111|11111110|0110 |
67 | { .byteValue: 7, .huffmanCode: 0xfffffe70ul, .bitLength: 28}, // 11111111|11111111|11111110|0111 |
68 | { .byteValue: 8, .huffmanCode: 0xfffffe80ul, .bitLength: 28}, // 11111111|11111111|11111110|1000 |
69 | { .byteValue: 9, .huffmanCode: 0xffffea00ul, .bitLength: 24}, // 11111111|11111111|11101010 |
70 | { .byteValue: 10, .huffmanCode: 0xfffffff0ul, .bitLength: 30}, // 11111111|11111111|11111111|111100 |
71 | { .byteValue: 11, .huffmanCode: 0xfffffe90ul, .bitLength: 28}, // 11111111|11111111|11111110|1001 |
72 | { .byteValue: 12, .huffmanCode: 0xfffffea0ul, .bitLength: 28}, // 11111111|11111111|11111110|1010 |
73 | { .byteValue: 13, .huffmanCode: 0xfffffff4ul, .bitLength: 30}, // 11111111|11111111|11111111|111101 |
74 | { .byteValue: 14, .huffmanCode: 0xfffffeb0ul, .bitLength: 28}, // 11111111|11111111|11111110|1011 |
75 | { .byteValue: 15, .huffmanCode: 0xfffffec0ul, .bitLength: 28}, // 11111111|11111111|11111110|1100 |
76 | { .byteValue: 16, .huffmanCode: 0xfffffed0ul, .bitLength: 28}, // 11111111|11111111|11111110|1101 |
77 | { .byteValue: 17, .huffmanCode: 0xfffffee0ul, .bitLength: 28}, // 11111111|11111111|11111110|1110 |
78 | { .byteValue: 18, .huffmanCode: 0xfffffef0ul, .bitLength: 28}, // 11111111|11111111|11111110|1111 |
79 | { .byteValue: 19, .huffmanCode: 0xffffff00ul, .bitLength: 28}, // 11111111|11111111|11111111|0000 |
80 | { .byteValue: 20, .huffmanCode: 0xffffff10ul, .bitLength: 28}, // 11111111|11111111|11111111|0001 |
81 | { .byteValue: 21, .huffmanCode: 0xffffff20ul, .bitLength: 28}, // 11111111|11111111|11111111|0010 |
82 | { .byteValue: 22, .huffmanCode: 0xfffffff8ul, .bitLength: 30}, // 11111111|11111111|11111111|111110 |
83 | { .byteValue: 23, .huffmanCode: 0xffffff30ul, .bitLength: 28}, // 11111111|11111111|11111111|0011 |
84 | { .byteValue: 24, .huffmanCode: 0xffffff40ul, .bitLength: 28}, // 11111111|11111111|11111111|0100 |
85 | { .byteValue: 25, .huffmanCode: 0xffffff50ul, .bitLength: 28}, // 11111111|11111111|11111111|0101 |
86 | { .byteValue: 26, .huffmanCode: 0xffffff60ul, .bitLength: 28}, // 11111111|11111111|11111111|0110 |
87 | { .byteValue: 27, .huffmanCode: 0xffffff70ul, .bitLength: 28}, // 11111111|11111111|11111111|0111 |
88 | { .byteValue: 28, .huffmanCode: 0xffffff80ul, .bitLength: 28}, // 11111111|11111111|11111111|1000 |
89 | { .byteValue: 29, .huffmanCode: 0xffffff90ul, .bitLength: 28}, // 11111111|11111111|11111111|1001 |
90 | { .byteValue: 30, .huffmanCode: 0xffffffa0ul, .bitLength: 28}, // 11111111|11111111|11111111|1010 |
91 | { .byteValue: 31, .huffmanCode: 0xffffffb0ul, .bitLength: 28}, // 11111111|11111111|11111111|1011 |
92 | { .byteValue: 32, .huffmanCode: 0x50000000ul, .bitLength: 6}, // ' ' 010100 |
93 | { .byteValue: 33, .huffmanCode: 0xfe000000ul, .bitLength: 10}, // '!' 11111110|00 |
94 | { .byteValue: 34, .huffmanCode: 0xfe400000ul, .bitLength: 10}, // '"' 11111110|01 |
95 | { .byteValue: 35, .huffmanCode: 0xffa00000ul, .bitLength: 12}, // '#' 11111111|1010 |
96 | { .byteValue: 36, .huffmanCode: 0xffc80000ul, .bitLength: 13}, // '$' 11111111|11001 |
97 | { .byteValue: 37, .huffmanCode: 0x54000000ul, .bitLength: 6}, // '%' 010101 |
98 | { .byteValue: 38, .huffmanCode: 0xf8000000ul, .bitLength: 8}, // '&' 11111000 |
99 | { .byteValue: 39, .huffmanCode: 0xff400000ul, .bitLength: 11}, // ''' 11111111|010 |
100 | { .byteValue: 40, .huffmanCode: 0xfe800000ul, .bitLength: 10}, // '(' 11111110|10 |
101 | { .byteValue: 41, .huffmanCode: 0xfec00000ul, .bitLength: 10}, // ')' 11111110|11 |
102 | { .byteValue: 42, .huffmanCode: 0xf9000000ul, .bitLength: 8}, // '*' 11111001 |
103 | { .byteValue: 43, .huffmanCode: 0xff600000ul, .bitLength: 11}, // '+' 11111111|011 |
104 | { .byteValue: 44, .huffmanCode: 0xfa000000ul, .bitLength: 8}, // ',' 11111010 |
105 | { .byteValue: 45, .huffmanCode: 0x58000000ul, .bitLength: 6}, // '-' 010110 |
106 | { .byteValue: 46, .huffmanCode: 0x5c000000ul, .bitLength: 6}, // '.' 010111 |
107 | { .byteValue: 47, .huffmanCode: 0x60000000ul, .bitLength: 6}, // '/' 011000 |
108 | { .byteValue: 48, .huffmanCode: 0x00000000ul, .bitLength: 5}, // '0' 00000 |
109 | { .byteValue: 49, .huffmanCode: 0x08000000ul, .bitLength: 5}, // '1' 00001 |
110 | { .byteValue: 50, .huffmanCode: 0x10000000ul, .bitLength: 5}, // '2' 00010 |
111 | { .byteValue: 51, .huffmanCode: 0x64000000ul, .bitLength: 6}, // '3' 011001 |
112 | { .byteValue: 52, .huffmanCode: 0x68000000ul, .bitLength: 6}, // '4' 011010 |
113 | { .byteValue: 53, .huffmanCode: 0x6c000000ul, .bitLength: 6}, // '5' 011011 |
114 | { .byteValue: 54, .huffmanCode: 0x70000000ul, .bitLength: 6}, // '6' 011100 |
115 | { .byteValue: 55, .huffmanCode: 0x74000000ul, .bitLength: 6}, // '7' 011101 |
116 | { .byteValue: 56, .huffmanCode: 0x78000000ul, .bitLength: 6}, // '8' 011110 |
117 | { .byteValue: 57, .huffmanCode: 0x7c000000ul, .bitLength: 6}, // '9' 011111 |
118 | { .byteValue: 58, .huffmanCode: 0xb8000000ul, .bitLength: 7}, // ':' 1011100 |
119 | { .byteValue: 59, .huffmanCode: 0xfb000000ul, .bitLength: 8}, // ';' 11111011 |
120 | { .byteValue: 60, .huffmanCode: 0xfff80000ul, .bitLength: 15}, // '<' 11111111|1111100 |
121 | { .byteValue: 61, .huffmanCode: 0x80000000ul, .bitLength: 6}, // '=' 100000 |
122 | { .byteValue: 62, .huffmanCode: 0xffb00000ul, .bitLength: 12}, // '>' 11111111|1011 |
123 | { .byteValue: 63, .huffmanCode: 0xff000000ul, .bitLength: 10}, // '?' 11111111|00 |
124 | { .byteValue: 64, .huffmanCode: 0xffd00000ul, .bitLength: 13}, // '@' 11111111|11010 |
125 | { .byteValue: 65, .huffmanCode: 0x84000000ul, .bitLength: 6}, // 'A' 100001 |
126 | { .byteValue: 66, .huffmanCode: 0xba000000ul, .bitLength: 7}, // 'B' 1011101 |
127 | { .byteValue: 67, .huffmanCode: 0xbc000000ul, .bitLength: 7}, // 'C' 1011110 |
128 | { .byteValue: 68, .huffmanCode: 0xbe000000ul, .bitLength: 7}, // 'D' 1011111 |
129 | { .byteValue: 69, .huffmanCode: 0xc0000000ul, .bitLength: 7}, // 'E' 1100000 |
130 | { .byteValue: 70, .huffmanCode: 0xc2000000ul, .bitLength: 7}, // 'F' 1100001 |
131 | { .byteValue: 71, .huffmanCode: 0xc4000000ul, .bitLength: 7}, // 'G' 1100010 |
132 | { .byteValue: 72, .huffmanCode: 0xc6000000ul, .bitLength: 7}, // 'H' 1100011 |
133 | { .byteValue: 73, .huffmanCode: 0xc8000000ul, .bitLength: 7}, // 'I' 1100100 |
134 | { .byteValue: 74, .huffmanCode: 0xca000000ul, .bitLength: 7}, // 'J' 1100101 |
135 | { .byteValue: 75, .huffmanCode: 0xcc000000ul, .bitLength: 7}, // 'K' 1100110 |
136 | { .byteValue: 76, .huffmanCode: 0xce000000ul, .bitLength: 7}, // 'L' 1100111 |
137 | { .byteValue: 77, .huffmanCode: 0xd0000000ul, .bitLength: 7}, // 'M' 1101000 |
138 | { .byteValue: 78, .huffmanCode: 0xd2000000ul, .bitLength: 7}, // 'N' 1101001 |
139 | { .byteValue: 79, .huffmanCode: 0xd4000000ul, .bitLength: 7}, // 'O' 1101010 |
140 | { .byteValue: 80, .huffmanCode: 0xd6000000ul, .bitLength: 7}, // 'P' 1101011 |
141 | { .byteValue: 81, .huffmanCode: 0xd8000000ul, .bitLength: 7}, // 'Q' 1101100 |
142 | { .byteValue: 82, .huffmanCode: 0xda000000ul, .bitLength: 7}, // 'R' 1101101 |
143 | { .byteValue: 83, .huffmanCode: 0xdc000000ul, .bitLength: 7}, // 'S' 1101110 |
144 | { .byteValue: 84, .huffmanCode: 0xde000000ul, .bitLength: 7}, // 'T' 1101111 |
145 | { .byteValue: 85, .huffmanCode: 0xe0000000ul, .bitLength: 7}, // 'U' 1110000 |
146 | { .byteValue: 86, .huffmanCode: 0xe2000000ul, .bitLength: 7}, // 'V' 1110001 |
147 | { .byteValue: 87, .huffmanCode: 0xe4000000ul, .bitLength: 7}, // 'W' 1110010 |
148 | { .byteValue: 88, .huffmanCode: 0xfc000000ul, .bitLength: 8}, // 'X' 11111100 |
149 | { .byteValue: 89, .huffmanCode: 0xe6000000ul, .bitLength: 7}, // 'Y' 1110011 |
150 | { .byteValue: 90, .huffmanCode: 0xfd000000ul, .bitLength: 8}, // 'Z' 11111101 |
151 | { .byteValue: 91, .huffmanCode: 0xffd80000ul, .bitLength: 13}, // '[' 11111111|11011 |
152 | { .byteValue: 92, .huffmanCode: 0xfffe0000ul, .bitLength: 19}, // '\' 11111111|11111110|000 |
153 | { .byteValue: 93, .huffmanCode: 0xffe00000ul, .bitLength: 13}, // ']' 11111111|11100 |
154 | { .byteValue: 94, .huffmanCode: 0xfff00000ul, .bitLength: 14}, // '^' 11111111|111100 |
155 | { .byteValue: 95, .huffmanCode: 0x88000000ul, .bitLength: 6}, // '_' 100010 |
156 | { .byteValue: 96, .huffmanCode: 0xfffa0000ul, .bitLength: 15}, // '`' 11111111|1111101 |
157 | { .byteValue: 97, .huffmanCode: 0x18000000ul, .bitLength: 5}, // 'a' 00011 |
158 | { .byteValue: 98, .huffmanCode: 0x8c000000ul, .bitLength: 6}, // 'b' 100011 |
159 | { .byteValue: 99, .huffmanCode: 0x20000000ul, .bitLength: 5}, // 'c' 00100 |
160 | {.byteValue: 100, .huffmanCode: 0x90000000ul, .bitLength: 6}, // 'd' 100100 |
161 | {.byteValue: 101, .huffmanCode: 0x28000000ul, .bitLength: 5}, // 'e' 00101 |
162 | {.byteValue: 102, .huffmanCode: 0x94000000ul, .bitLength: 6}, // 'f' 100101 |
163 | {.byteValue: 103, .huffmanCode: 0x98000000ul, .bitLength: 6}, // 'g' 100110 |
164 | {.byteValue: 104, .huffmanCode: 0x9c000000ul, .bitLength: 6}, // 'h' 100111 |
165 | {.byteValue: 105, .huffmanCode: 0x30000000ul, .bitLength: 5}, // 'i' 00110 |
166 | {.byteValue: 106, .huffmanCode: 0xe8000000ul, .bitLength: 7}, // 'j' 1110100 |
167 | {.byteValue: 107, .huffmanCode: 0xea000000ul, .bitLength: 7}, // 'k' 1110101 |
168 | {.byteValue: 108, .huffmanCode: 0xa0000000ul, .bitLength: 6}, // 'l' 101000 |
169 | {.byteValue: 109, .huffmanCode: 0xa4000000ul, .bitLength: 6}, // 'm' 101001 |
170 | {.byteValue: 110, .huffmanCode: 0xa8000000ul, .bitLength: 6}, // 'n' 101010 |
171 | {.byteValue: 111, .huffmanCode: 0x38000000ul, .bitLength: 5}, // 'o' 00111 |
172 | {.byteValue: 112, .huffmanCode: 0xac000000ul, .bitLength: 6}, // 'p' 101011 |
173 | {.byteValue: 113, .huffmanCode: 0xec000000ul, .bitLength: 7}, // 'q' 1110110 |
174 | {.byteValue: 114, .huffmanCode: 0xb0000000ul, .bitLength: 6}, // 'r' 101100 |
175 | {.byteValue: 115, .huffmanCode: 0x40000000ul, .bitLength: 5}, // 's' 01000 |
176 | {.byteValue: 116, .huffmanCode: 0x48000000ul, .bitLength: 5}, // 't' 01001 |
177 | {.byteValue: 117, .huffmanCode: 0xb4000000ul, .bitLength: 6}, // 'u' 101101 |
178 | {.byteValue: 118, .huffmanCode: 0xee000000ul, .bitLength: 7}, // 'v' 1110111 |
179 | {.byteValue: 119, .huffmanCode: 0xf0000000ul, .bitLength: 7}, // 'w' 1111000 |
180 | {.byteValue: 120, .huffmanCode: 0xf2000000ul, .bitLength: 7}, // 'x' 1111001 |
181 | {.byteValue: 121, .huffmanCode: 0xf4000000ul, .bitLength: 7}, // 'y' 1111010 |
182 | {.byteValue: 122, .huffmanCode: 0xf6000000ul, .bitLength: 7}, // 'z' 1111011 |
183 | {.byteValue: 123, .huffmanCode: 0xfffc0000ul, .bitLength: 15}, // '{' 11111111|1111110 |
184 | {.byteValue: 124, .huffmanCode: 0xff800000ul, .bitLength: 11}, // '|' 11111111|100 |
185 | {.byteValue: 125, .huffmanCode: 0xfff40000ul, .bitLength: 14}, // '}' 11111111|111101 |
186 | {.byteValue: 126, .huffmanCode: 0xffe80000ul, .bitLength: 13}, // '~' 11111111|11101 |
187 | {.byteValue: 127, .huffmanCode: 0xffffffc0ul, .bitLength: 28}, // 11111111|11111111|11111111|1100 |
188 | {.byteValue: 128, .huffmanCode: 0xfffe6000ul, .bitLength: 20}, // 11111111|11111110|0110 |
189 | {.byteValue: 129, .huffmanCode: 0xffff4800ul, .bitLength: 22}, // 11111111|11111111|010010 |
190 | {.byteValue: 130, .huffmanCode: 0xfffe7000ul, .bitLength: 20}, // 11111111|11111110|0111 |
191 | {.byteValue: 131, .huffmanCode: 0xfffe8000ul, .bitLength: 20}, // 11111111|11111110|1000 |
192 | {.byteValue: 132, .huffmanCode: 0xffff4c00ul, .bitLength: 22}, // 11111111|11111111|010011 |
193 | {.byteValue: 133, .huffmanCode: 0xffff5000ul, .bitLength: 22}, // 11111111|11111111|010100 |
194 | {.byteValue: 134, .huffmanCode: 0xffff5400ul, .bitLength: 22}, // 11111111|11111111|010101 |
195 | {.byteValue: 135, .huffmanCode: 0xffffb200ul, .bitLength: 23}, // 11111111|11111111|1011001 |
196 | {.byteValue: 136, .huffmanCode: 0xffff5800ul, .bitLength: 22}, // 11111111|11111111|010110 |
197 | {.byteValue: 137, .huffmanCode: 0xffffb400ul, .bitLength: 23}, // 11111111|11111111|1011010 |
198 | {.byteValue: 138, .huffmanCode: 0xffffb600ul, .bitLength: 23}, // 11111111|11111111|1011011 |
199 | {.byteValue: 139, .huffmanCode: 0xffffb800ul, .bitLength: 23}, // 11111111|11111111|1011100 |
200 | {.byteValue: 140, .huffmanCode: 0xffffba00ul, .bitLength: 23}, // 11111111|11111111|1011101 |
201 | {.byteValue: 141, .huffmanCode: 0xffffbc00ul, .bitLength: 23}, // 11111111|11111111|1011110 |
202 | {.byteValue: 142, .huffmanCode: 0xffffeb00ul, .bitLength: 24}, // 11111111|11111111|11101011 |
203 | {.byteValue: 143, .huffmanCode: 0xffffbe00ul, .bitLength: 23}, // 11111111|11111111|1011111 |
204 | {.byteValue: 144, .huffmanCode: 0xffffec00ul, .bitLength: 24}, // 11111111|11111111|11101100 |
205 | {.byteValue: 145, .huffmanCode: 0xffffed00ul, .bitLength: 24}, // 11111111|11111111|11101101 |
206 | {.byteValue: 146, .huffmanCode: 0xffff5c00ul, .bitLength: 22}, // 11111111|11111111|010111 |
207 | {.byteValue: 147, .huffmanCode: 0xffffc000ul, .bitLength: 23}, // 11111111|11111111|1100000 |
208 | {.byteValue: 148, .huffmanCode: 0xffffee00ul, .bitLength: 24}, // 11111111|11111111|11101110 |
209 | {.byteValue: 149, .huffmanCode: 0xffffc200ul, .bitLength: 23}, // 11111111|11111111|1100001 |
210 | {.byteValue: 150, .huffmanCode: 0xffffc400ul, .bitLength: 23}, // 11111111|11111111|1100010 |
211 | {.byteValue: 151, .huffmanCode: 0xffffc600ul, .bitLength: 23}, // 11111111|11111111|1100011 |
212 | {.byteValue: 152, .huffmanCode: 0xffffc800ul, .bitLength: 23}, // 11111111|11111111|1100100 |
213 | {.byteValue: 153, .huffmanCode: 0xfffee000ul, .bitLength: 21}, // 11111111|11111110|11100 |
214 | {.byteValue: 154, .huffmanCode: 0xffff6000ul, .bitLength: 22}, // 11111111|11111111|011000 |
215 | {.byteValue: 155, .huffmanCode: 0xffffca00ul, .bitLength: 23}, // 11111111|11111111|1100101 |
216 | {.byteValue: 156, .huffmanCode: 0xffff6400ul, .bitLength: 22}, // 11111111|11111111|011001 |
217 | {.byteValue: 157, .huffmanCode: 0xffffcc00ul, .bitLength: 23}, // 11111111|11111111|1100110 |
218 | {.byteValue: 158, .huffmanCode: 0xffffce00ul, .bitLength: 23}, // 11111111|11111111|1100111 |
219 | {.byteValue: 159, .huffmanCode: 0xffffef00ul, .bitLength: 24}, // 11111111|11111111|11101111 |
220 | {.byteValue: 160, .huffmanCode: 0xffff6800ul, .bitLength: 22}, // 11111111|11111111|011010 |
221 | {.byteValue: 161, .huffmanCode: 0xfffee800ul, .bitLength: 21}, // 11111111|11111110|11101 |
222 | {.byteValue: 162, .huffmanCode: 0xfffe9000ul, .bitLength: 20}, // 11111111|11111110|1001 |
223 | {.byteValue: 163, .huffmanCode: 0xffff6c00ul, .bitLength: 22}, // 11111111|11111111|011011 |
224 | {.byteValue: 164, .huffmanCode: 0xffff7000ul, .bitLength: 22}, // 11111111|11111111|011100 |
225 | {.byteValue: 165, .huffmanCode: 0xffffd000ul, .bitLength: 23}, // 11111111|11111111|1101000 |
226 | {.byteValue: 166, .huffmanCode: 0xffffd200ul, .bitLength: 23}, // 11111111|11111111|1101001 |
227 | {.byteValue: 167, .huffmanCode: 0xfffef000ul, .bitLength: 21}, // 11111111|11111110|11110 |
228 | {.byteValue: 168, .huffmanCode: 0xffffd400ul, .bitLength: 23}, // 11111111|11111111|1101010 |
229 | {.byteValue: 169, .huffmanCode: 0xffff7400ul, .bitLength: 22}, // 11111111|11111111|011101 |
230 | {.byteValue: 170, .huffmanCode: 0xffff7800ul, .bitLength: 22}, // 11111111|11111111|011110 |
231 | {.byteValue: 171, .huffmanCode: 0xfffff000ul, .bitLength: 24}, // 11111111|11111111|11110000 |
232 | {.byteValue: 172, .huffmanCode: 0xfffef800ul, .bitLength: 21}, // 11111111|11111110|11111 |
233 | {.byteValue: 173, .huffmanCode: 0xffff7c00ul, .bitLength: 22}, // 11111111|11111111|011111 |
234 | {.byteValue: 174, .huffmanCode: 0xffffd600ul, .bitLength: 23}, // 11111111|11111111|1101011 |
235 | {.byteValue: 175, .huffmanCode: 0xffffd800ul, .bitLength: 23}, // 11111111|11111111|1101100 |
236 | {.byteValue: 176, .huffmanCode: 0xffff0000ul, .bitLength: 21}, // 11111111|11111111|00000 |
237 | {.byteValue: 177, .huffmanCode: 0xffff0800ul, .bitLength: 21}, // 11111111|11111111|00001 |
238 | {.byteValue: 178, .huffmanCode: 0xffff8000ul, .bitLength: 22}, // 11111111|11111111|100000 |
239 | {.byteValue: 179, .huffmanCode: 0xffff1000ul, .bitLength: 21}, // 11111111|11111111|00010 |
240 | {.byteValue: 180, .huffmanCode: 0xffffda00ul, .bitLength: 23}, // 11111111|11111111|1101101 |
241 | {.byteValue: 181, .huffmanCode: 0xffff8400ul, .bitLength: 22}, // 11111111|11111111|100001 |
242 | {.byteValue: 182, .huffmanCode: 0xffffdc00ul, .bitLength: 23}, // 11111111|11111111|1101110 |
243 | {.byteValue: 183, .huffmanCode: 0xffffde00ul, .bitLength: 23}, // 11111111|11111111|1101111 |
244 | {.byteValue: 184, .huffmanCode: 0xfffea000ul, .bitLength: 20}, // 11111111|11111110|1010 |
245 | {.byteValue: 185, .huffmanCode: 0xffff8800ul, .bitLength: 22}, // 11111111|11111111|100010 |
246 | {.byteValue: 186, .huffmanCode: 0xffff8c00ul, .bitLength: 22}, // 11111111|11111111|100011 |
247 | {.byteValue: 187, .huffmanCode: 0xffff9000ul, .bitLength: 22}, // 11111111|11111111|100100 |
248 | {.byteValue: 188, .huffmanCode: 0xffffe000ul, .bitLength: 23}, // 11111111|11111111|1110000 |
249 | {.byteValue: 189, .huffmanCode: 0xffff9400ul, .bitLength: 22}, // 11111111|11111111|100101 |
250 | {.byteValue: 190, .huffmanCode: 0xffff9800ul, .bitLength: 22}, // 11111111|11111111|100110 |
251 | {.byteValue: 191, .huffmanCode: 0xffffe200ul, .bitLength: 23}, // 11111111|11111111|1110001 |
252 | {.byteValue: 192, .huffmanCode: 0xfffff800ul, .bitLength: 26}, // 11111111|11111111|11111000|00 |
253 | {.byteValue: 193, .huffmanCode: 0xfffff840ul, .bitLength: 26}, // 11111111|11111111|11111000|01 |
254 | {.byteValue: 194, .huffmanCode: 0xfffeb000ul, .bitLength: 20}, // 11111111|11111110|1011 |
255 | {.byteValue: 195, .huffmanCode: 0xfffe2000ul, .bitLength: 19}, // 11111111|11111110|001 |
256 | {.byteValue: 196, .huffmanCode: 0xffff9c00ul, .bitLength: 22}, // 11111111|11111111|100111 |
257 | {.byteValue: 197, .huffmanCode: 0xffffe400ul, .bitLength: 23}, // 11111111|11111111|1110010 |
258 | {.byteValue: 198, .huffmanCode: 0xffffa000ul, .bitLength: 22}, // 11111111|11111111|101000 |
259 | {.byteValue: 199, .huffmanCode: 0xfffff600ul, .bitLength: 25}, // 11111111|11111111|11110110|0 |
260 | {.byteValue: 200, .huffmanCode: 0xfffff880ul, .bitLength: 26}, // 11111111|11111111|11111000|10 |
261 | {.byteValue: 201, .huffmanCode: 0xfffff8c0ul, .bitLength: 26}, // 11111111|11111111|11111000|11 |
262 | {.byteValue: 202, .huffmanCode: 0xfffff900ul, .bitLength: 26}, // 11111111|11111111|11111001|00 |
263 | {.byteValue: 203, .huffmanCode: 0xfffffbc0ul, .bitLength: 27}, // 11111111|11111111|11111011|110 |
264 | {.byteValue: 204, .huffmanCode: 0xfffffbe0ul, .bitLength: 27}, // 11111111|11111111|11111011|111 |
265 | {.byteValue: 205, .huffmanCode: 0xfffff940ul, .bitLength: 26}, // 11111111|11111111|11111001|01 |
266 | {.byteValue: 206, .huffmanCode: 0xfffff100ul, .bitLength: 24}, // 11111111|11111111|11110001 |
267 | {.byteValue: 207, .huffmanCode: 0xfffff680ul, .bitLength: 25}, // 11111111|11111111|11110110|1 |
268 | {.byteValue: 208, .huffmanCode: 0xfffe4000ul, .bitLength: 19}, // 11111111|11111110|010 |
269 | {.byteValue: 209, .huffmanCode: 0xffff1800ul, .bitLength: 21}, // 11111111|11111111|00011 |
270 | {.byteValue: 210, .huffmanCode: 0xfffff980ul, .bitLength: 26}, // 11111111|11111111|11111001|10 |
271 | {.byteValue: 211, .huffmanCode: 0xfffffc00ul, .bitLength: 27}, // 11111111|11111111|11111100|000 |
272 | {.byteValue: 212, .huffmanCode: 0xfffffc20ul, .bitLength: 27}, // 11111111|11111111|11111100|001 |
273 | {.byteValue: 213, .huffmanCode: 0xfffff9c0ul, .bitLength: 26}, // 11111111|11111111|11111001|11 |
274 | {.byteValue: 214, .huffmanCode: 0xfffffc40ul, .bitLength: 27}, // 11111111|11111111|11111100|010 |
275 | {.byteValue: 215, .huffmanCode: 0xfffff200ul, .bitLength: 24}, // 11111111|11111111|11110010 |
276 | {.byteValue: 216, .huffmanCode: 0xffff2000ul, .bitLength: 21}, // 11111111|11111111|00100 |
277 | {.byteValue: 217, .huffmanCode: 0xffff2800ul, .bitLength: 21}, // 11111111|11111111|00101 |
278 | {.byteValue: 218, .huffmanCode: 0xfffffa00ul, .bitLength: 26}, // 11111111|11111111|11111010|00 |
279 | {.byteValue: 219, .huffmanCode: 0xfffffa40ul, .bitLength: 26}, // 11111111|11111111|11111010|01 |
280 | {.byteValue: 220, .huffmanCode: 0xffffffd0ul, .bitLength: 28}, // 11111111|11111111|11111111|1101 |
281 | {.byteValue: 221, .huffmanCode: 0xfffffc60ul, .bitLength: 27}, // 11111111|11111111|11111100|011 |
282 | {.byteValue: 222, .huffmanCode: 0xfffffc80ul, .bitLength: 27}, // 11111111|11111111|11111100|100 |
283 | {.byteValue: 223, .huffmanCode: 0xfffffca0ul, .bitLength: 27}, // 11111111|11111111|11111100|101 |
284 | {.byteValue: 224, .huffmanCode: 0xfffec000ul, .bitLength: 20}, // 11111111|11111110|1100 |
285 | {.byteValue: 225, .huffmanCode: 0xfffff300ul, .bitLength: 24}, // 11111111|11111111|11110011 |
286 | {.byteValue: 226, .huffmanCode: 0xfffed000ul, .bitLength: 20}, // 11111111|11111110|1101 |
287 | {.byteValue: 227, .huffmanCode: 0xffff3000ul, .bitLength: 21}, // 11111111|11111111|00110 |
288 | {.byteValue: 228, .huffmanCode: 0xffffa400ul, .bitLength: 22}, // 11111111|11111111|101001 |
289 | {.byteValue: 229, .huffmanCode: 0xffff3800ul, .bitLength: 21}, // 11111111|11111111|00111 |
290 | {.byteValue: 230, .huffmanCode: 0xffff4000ul, .bitLength: 21}, // 11111111|11111111|01000 |
291 | {.byteValue: 231, .huffmanCode: 0xffffe600ul, .bitLength: 23}, // 11111111|11111111|1110011 |
292 | {.byteValue: 232, .huffmanCode: 0xffffa800ul, .bitLength: 22}, // 11111111|11111111|101010 |
293 | {.byteValue: 233, .huffmanCode: 0xffffac00ul, .bitLength: 22}, // 11111111|11111111|101011 |
294 | {.byteValue: 234, .huffmanCode: 0xfffff700ul, .bitLength: 25}, // 11111111|11111111|11110111|0 |
295 | {.byteValue: 235, .huffmanCode: 0xfffff780ul, .bitLength: 25}, // 11111111|11111111|11110111|1 |
296 | {.byteValue: 236, .huffmanCode: 0xfffff400ul, .bitLength: 24}, // 11111111|11111111|11110100 |
297 | {.byteValue: 237, .huffmanCode: 0xfffff500ul, .bitLength: 24}, // 11111111|11111111|11110101 |
298 | {.byteValue: 238, .huffmanCode: 0xfffffa80ul, .bitLength: 26}, // 11111111|11111111|11111010|10 |
299 | {.byteValue: 239, .huffmanCode: 0xffffe800ul, .bitLength: 23}, // 11111111|11111111|1110100 |
300 | {.byteValue: 240, .huffmanCode: 0xfffffac0ul, .bitLength: 26}, // 11111111|11111111|11111010|11 |
301 | {.byteValue: 241, .huffmanCode: 0xfffffcc0ul, .bitLength: 27}, // 11111111|11111111|11111100|110 |
302 | {.byteValue: 242, .huffmanCode: 0xfffffb00ul, .bitLength: 26}, // 11111111|11111111|11111011|00 |
303 | {.byteValue: 243, .huffmanCode: 0xfffffb40ul, .bitLength: 26}, // 11111111|11111111|11111011|01 |
304 | {.byteValue: 244, .huffmanCode: 0xfffffce0ul, .bitLength: 27}, // 11111111|11111111|11111100|111 |
305 | {.byteValue: 245, .huffmanCode: 0xfffffd00ul, .bitLength: 27}, // 11111111|11111111|11111101|000 |
306 | {.byteValue: 246, .huffmanCode: 0xfffffd20ul, .bitLength: 27}, // 11111111|11111111|11111101|001 |
307 | {.byteValue: 247, .huffmanCode: 0xfffffd40ul, .bitLength: 27}, // 11111111|11111111|11111101|010 |
308 | {.byteValue: 248, .huffmanCode: 0xfffffd60ul, .bitLength: 27}, // 11111111|11111111|11111101|011 |
309 | {.byteValue: 249, .huffmanCode: 0xffffffe0ul, .bitLength: 28}, // 11111111|11111111|11111111|1110 |
310 | {.byteValue: 250, .huffmanCode: 0xfffffd80ul, .bitLength: 27}, // 11111111|11111111|11111101|100 |
311 | {.byteValue: 251, .huffmanCode: 0xfffffda0ul, .bitLength: 27}, // 11111111|11111111|11111101|101 |
312 | {.byteValue: 252, .huffmanCode: 0xfffffdc0ul, .bitLength: 27}, // 11111111|11111111|11111101|110 |
313 | {.byteValue: 253, .huffmanCode: 0xfffffde0ul, .bitLength: 27}, // 11111111|11111111|11111101|111 |
314 | {.byteValue: 254, .huffmanCode: 0xfffffe00ul, .bitLength: 27}, // 11111111|11111111|11111110|000 |
315 | {.byteValue: 255, .huffmanCode: 0xfffffb80ul, .bitLength: 26}, // 11111111|11111111|11111011|10 |
316 | {.byteValue: 256, .huffmanCode: 0xfffffffcul, .bitLength: 30} // EOS 11111111|11111111|11111111|111111 |
317 | }; |
318 | |
319 | void write_huffman_code(BitOStream &outputStream, const CodeEntry &code) |
320 | { |
321 | // Append octet by octet. |
322 | auto bitLength = code.bitLength; |
323 | const auto hc = code.huffmanCode >> (32 - bitLength); |
324 | |
325 | if (bitLength > 24) { |
326 | outputStream.writeBits(bits: uchar(hc >> 24), bitLength: bitLength - 24); |
327 | bitLength = 24; |
328 | } |
329 | |
330 | if (bitLength > 16) { |
331 | outputStream.writeBits(bits: uchar(hc >> 16), bitLength: bitLength - 16); |
332 | bitLength = 16; |
333 | } |
334 | |
335 | if (bitLength > 8) { |
336 | outputStream.writeBits(bits: uchar(hc >> 8), bitLength: bitLength - 8); |
337 | bitLength = 8; |
338 | } |
339 | |
340 | outputStream.writeBits(bits: uchar(hc), bitLength); |
341 | } |
342 | |
343 | } |
344 | |
345 | // That's from HPACK's specs - we deal with octets. |
346 | static_assert(std::numeric_limits<uchar>::digits == 8, "octets expected" ); |
347 | |
348 | quint64 huffman_encoded_bit_length(QByteArrayView inputData) |
349 | { |
350 | quint64 bitLength = 0; |
351 | for (int i = 0, e = inputData.size(); i < e; ++i) |
352 | bitLength += staticHuffmanCodeTable[int(inputData[i])].bitLength; |
353 | |
354 | return bitLength; |
355 | } |
356 | |
357 | void huffman_encode_string(QByteArrayView inputData, BitOStream &outputStream) |
358 | { |
359 | for (int i = 0, e = inputData.size(); i < e; ++i) { |
360 | const auto value = uchar(inputData[i]); |
361 | write_huffman_code(outputStream, code: staticHuffmanCodeTable[value]); |
362 | } |
363 | |
364 | // Pad bits ... |
365 | if (outputStream.bitLength() % 8) |
366 | outputStream.writeBits(bits: 0xff, bitLength: 8 - outputStream.bitLength() % 8); |
367 | } |
368 | |
369 | static constexpr |
370 | bool padding_is_valid(quint32 chunk, quint32 nBits) |
371 | { |
372 | Q_ASSERT(nBits); |
373 | |
374 | // HPACK, 5.2: "A padding strictly longer than 7 bits MUST be |
375 | // treated as a decoding error." |
376 | if (nBits > 7) |
377 | return false; |
378 | // HPACK, 5.2: |
379 | // "A padding not corresponding to the most significant bits |
380 | // of the code for the EOS symbol MUST be treated as a decoding error." |
381 | return (chunk >> (32 - nBits)) == quint32((1 << nBits) - 1); |
382 | } |
383 | |
384 | HuffmanDecoder::HuffmanDecoder() |
385 | : minCodeLength() |
386 | { |
387 | const auto nCodes = sizeof staticHuffmanCodeTable / sizeof staticHuffmanCodeTable[0]; |
388 | |
389 | std::vector<CodeEntry> symbols(staticHuffmanCodeTable, staticHuffmanCodeTable + nCodes); |
390 | // Now we sort: by bit length first (in the descending order) and by the symbol |
391 | // value (descending). Descending order: to make sure we do not create prefix tables with |
392 | // short 'indexLength' first and having longer codes that do not fit into such tables later. |
393 | std::sort(first: symbols.begin(), last: symbols.end(), comp: [](const CodeEntry &code1, const CodeEntry &code2) { |
394 | if (code1.bitLength == code2.bitLength) |
395 | return code1.byteValue > code2.byteValue; |
396 | return code1.bitLength > code2.bitLength; |
397 | }); |
398 | |
399 | minCodeLength = symbols.back().bitLength; // The shortest one, currently it's 5. |
400 | |
401 | // TODO: add a verification - Huffman codes |
402 | // within a given bit length range also |
403 | // should be in descending order. |
404 | |
405 | // Initialize 'prefixTables' and 'tableData'. |
406 | addTable(prefixLength: 0, indexLength: quint32(BitConstants::rootPrefix)); // 'root' table. |
407 | |
408 | for (const auto &s : symbols) { |
409 | quint32 tableIndex = 0; |
410 | while (true) { |
411 | Q_ASSERT(tableIndex < prefixTables.size()); |
412 | // Note, by value - since prefixTables will be updated in between. |
413 | const auto table = prefixTables[tableIndex]; |
414 | // We skip prefixed bits (if any) and use indexed bits only: |
415 | const auto entryIndex = s.huffmanCode << table.prefixLength >> (32 - table.indexLength); |
416 | // Again, by value. |
417 | PrefixTableEntry entry = tableEntry(table, index: entryIndex); |
418 | // How many bits were coded by previous tables and this table: |
419 | const auto codedLength = table.prefixLength + table.indexLength; |
420 | if (codedLength < s.bitLength) { |
421 | // We have to add a new prefix table ... (if it's not done yet). |
422 | if (!entry.bitLength) { |
423 | entry.nextTable = addTable(prefixLength: codedLength, indexLength: std::min<quint32>(a: quint32(BitConstants::childPrefix), |
424 | b: s.bitLength - codedLength)); |
425 | entry.bitLength = s.bitLength; |
426 | entry.byteValue = s.byteValue; |
427 | setTableEntry(table, index: entryIndex, entry); |
428 | } |
429 | tableIndex = entry.nextTable; |
430 | } else { |
431 | // We found the slot for our code (terminal entry): |
432 | entry.byteValue = s.byteValue; |
433 | entry.bitLength = s.bitLength; |
434 | // Refer to our own table as 'nextTable': |
435 | entry.nextTable = tableIndex; |
436 | setTableEntry(table, index: entryIndex, entry); |
437 | break; |
438 | } |
439 | } |
440 | } |
441 | |
442 | // Now, we have a table(s) and have to fill 'holes' to |
443 | // 'fix' terminal entries. |
444 | for (const auto &table : prefixTables) { |
445 | const quint32 codedLength = table.prefixLength + table.indexLength; |
446 | for (quint32 j = 0; j < table.size();) { |
447 | const PrefixTableEntry &entry = tableEntry(table, index: j); |
448 | if (entry.bitLength && entry.bitLength < codedLength) { |
449 | const quint32 range = 1 << (codedLength - entry.bitLength); |
450 | for (quint32 k = 1; k < range; ++k) |
451 | setTableEntry(table, index: j + k, entry); |
452 | j += range; |
453 | } else { |
454 | ++j; |
455 | } |
456 | } |
457 | } |
458 | } |
459 | |
460 | bool HuffmanDecoder::decodeStream(BitIStream &inputStream, QByteArray &outputBuffer) |
461 | { |
462 | while (true) { |
463 | quint32 chunk = 0; |
464 | const quint32 readBits = inputStream.peekBits(from: inputStream.streamOffset(), length: 32, dstPtr: &chunk); |
465 | if (!readBits) |
466 | return !inputStream.hasMoreBits(); |
467 | |
468 | if (readBits < minCodeLength) { |
469 | inputStream.skipBits(nBits: readBits); |
470 | return padding_is_valid(chunk, nBits: readBits); |
471 | } |
472 | |
473 | quint32 tableIndex = 0; |
474 | const PrefixTable *table = &prefixTables[tableIndex]; |
475 | quint32 entryIndex = chunk >> (32 - table->indexLength); |
476 | PrefixTableEntry entry = tableEntry(table: *table, index: entryIndex); |
477 | |
478 | while (true) { |
479 | if (entry.nextTable == tableIndex) |
480 | break; |
481 | |
482 | tableIndex = entry.nextTable; |
483 | table = &prefixTables[tableIndex]; |
484 | entryIndex = chunk << table->prefixLength >> (32 - table->indexLength); |
485 | entry = tableEntry(table: *table, index: entryIndex); |
486 | } |
487 | |
488 | if (entry.bitLength > readBits) { |
489 | inputStream.skipBits(nBits: readBits); |
490 | return padding_is_valid(chunk, nBits: readBits); |
491 | } |
492 | |
493 | if (!entry.bitLength || entry.byteValue == 256) { |
494 | //EOS (256) == compression error (HPACK). |
495 | inputStream.skipBits(nBits: readBits); |
496 | return false; |
497 | } |
498 | |
499 | outputBuffer.append(c: entry.byteValue); |
500 | inputStream.skipBits(nBits: entry.bitLength); |
501 | } |
502 | |
503 | return false; |
504 | } |
505 | |
506 | quint32 HuffmanDecoder::addTable(quint32 prefix, quint32 index) |
507 | { |
508 | PrefixTable newTable{prefix, index}; |
509 | newTable.offset = quint32(tableData.size()); |
510 | prefixTables.push_back(x: newTable); |
511 | // Add entries for this table: |
512 | tableData.resize(new_size: tableData.size() + newTable.size()); |
513 | |
514 | return quint32(prefixTables.size() - 1); |
515 | } |
516 | |
517 | PrefixTableEntry HuffmanDecoder::tableEntry(const PrefixTable &table, quint32 index) |
518 | { |
519 | Q_ASSERT(index < table.size()); |
520 | return tableData[table.offset + index]; |
521 | } |
522 | |
523 | void HuffmanDecoder::setTableEntry(const PrefixTable &table, quint32 index, |
524 | const PrefixTableEntry &entry) |
525 | { |
526 | Q_ASSERT(index < table.size()); |
527 | tableData[table.offset + index] = entry; |
528 | } |
529 | |
530 | bool huffman_decode_string(BitIStream &inputStream, QByteArray *outputBuffer) |
531 | { |
532 | Q_ASSERT(outputBuffer); |
533 | |
534 | static HuffmanDecoder decoder; |
535 | return decoder.decodeStream(inputStream, outputBuffer&: *outputBuffer); |
536 | } |
537 | |
538 | } |
539 | |
540 | QT_END_NAMESPACE |
541 | |