1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Rockchip RK3288 VPU codec driver |
4 | * |
5 | * Copyright (c) 2014 Rockchip Electronics Co., Ltd. |
6 | * Hertz Wong <hertz.wong@rock-chips.com> |
7 | * Herman Chen <herman.chen@rock-chips.com> |
8 | * |
9 | * Copyright (C) 2014 Google, Inc. |
10 | * Tomasz Figa <tfiga@chromium.org> |
11 | */ |
12 | |
13 | #include <linux/types.h> |
14 | #include <media/v4l2-h264.h> |
15 | #include <media/v4l2-mem2mem.h> |
16 | |
17 | #include "hantro.h" |
18 | #include "hantro_hw.h" |
19 | |
20 | /* Size with u32 units. */ |
21 | #define CABAC_INIT_BUFFER_SIZE (460 * 2) |
22 | #define POC_BUFFER_SIZE 34 |
23 | #define SCALING_LIST_SIZE (6 * 16 + 2 * 64) |
24 | |
25 | /* |
26 | * For valid and long term reference marking, index are reversed, so bit 31 |
27 | * indicates the status of the picture 0. |
28 | */ |
29 | #define REF_BIT(i) BIT(32 - 1 - (i)) |
30 | |
31 | /* Data structure describing auxiliary buffer format. */ |
32 | struct hantro_h264_dec_priv_tbl { |
33 | u32 cabac_table[CABAC_INIT_BUFFER_SIZE]; |
34 | u32 poc[POC_BUFFER_SIZE]; |
35 | u8 scaling_list[SCALING_LIST_SIZE]; |
36 | }; |
37 | |
38 | /* |
39 | * Constant CABAC table. |
40 | * From drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c |
41 | * in https://chromium.googlesource.com/chromiumos/third_party/kernel, |
42 | * chromeos-3.14 branch. |
43 | */ |
44 | static const u32 h264_cabac_table[] = { |
45 | 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07330000, |
46 | 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
47 | 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
48 | 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
49 | 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, |
50 | 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x000b0137, |
51 | 0x0045ef7f, 0xf3660052, 0xf94aeb6b, 0xe57fe17f, 0xe87fee5f, 0xe57feb72, |
52 | 0xe27fef7b, 0xf473f07a, 0xf573f43f, 0xfe44f154, 0xf368fd46, 0xf85df65a, |
53 | 0xe27fff4a, 0xfa61f95b, 0xec7ffc38, 0xfb52f94c, 0xea7df95d, 0xf557fd4d, |
54 | 0xfb47fc3f, 0xfc44f454, 0xf93ef941, 0x083d0538, 0xfe420140, 0x003dfe4e, |
55 | 0x01320734, 0x0a23002c, 0x0b26012d, 0x002e052c, 0x1f110133, 0x07321c13, |
56 | 0x10210e3e, 0xf36cf164, 0xf365f35b, 0xf45ef658, 0xf054f656, 0xf953f357, |
57 | 0xed5e0146, 0x0048fb4a, 0x123bf866, 0xf164005f, 0xfc4b0248, 0xf54bfd47, |
58 | 0x0f2ef345, 0x003e0041, 0x1525f148, 0x09391036, 0x003e0c48, 0x18000f09, |
59 | 0x08190d12, 0x0f090d13, 0x0a250c12, 0x061d1421, 0x0f1e042d, 0x013a003e, |
60 | 0x073d0c26, 0x0b2d0f27, 0x0b2a0d2c, 0x102d0c29, 0x0a311e22, 0x122a0a37, |
61 | 0x1133112e, 0x00591aed, 0x16ef1aef, 0x1ee71cec, 0x21e925e5, 0x21e928e4, |
62 | 0x26ef21f5, 0x28f129fa, 0x26012911, 0x1efa1b03, 0x1a1625f0, 0x23fc26f8, |
63 | 0x26fd2503, 0x26052a00, 0x23102716, 0x0e301b25, 0x153c0c44, 0x0261fd47, |
64 | 0xfa2afb32, 0xfd36fe3e, 0x003a013f, 0xfe48ff4a, 0xf75bfb43, 0xfb1bfd27, |
65 | 0xfe2c002e, 0xf040f844, 0xf64efa4d, 0xf656f45c, 0xf137f63c, 0xfa3efc41, |
66 | 0xf449f84c, 0xf950f758, 0xef6ef561, 0xec54f54f, 0xfa49fc4a, 0xf356f360, |
67 | 0xf561ed75, 0xf84efb21, 0xfc30fe35, 0xfd3ef347, 0xf64ff456, 0xf35af261, |
68 | 0x0000fa5d, 0xfa54f84f, 0x0042ff47, 0x003efe3c, 0xfe3bfb4b, 0xfd3efc3a, |
69 | 0xf742ff4f, 0x00470344, 0x0a2cf93e, 0x0f240e28, 0x101b0c1d, 0x012c1424, |
70 | 0x1220052a, 0x01300a3e, 0x112e0940, 0xf468f561, 0xf060f958, 0xf855f955, |
71 | 0xf755f358, 0x0442fd4d, 0xfd4cfa4c, 0x0a3aff4c, 0xff53f963, 0xf25f025f, |
72 | 0x004cfb4a, 0x0046f54b, 0x01440041, 0xf249033e, 0x043eff44, 0xf34b0b37, |
73 | 0x05400c46, 0x0f060613, 0x07100c0e, 0x120d0d0b, 0x0d0f0f10, 0x0c170d17, |
74 | 0x0f140e1a, 0x0e2c1128, 0x112f1811, 0x15151916, 0x1f1b161d, 0x13230e32, |
75 | 0x0a39073f, 0xfe4dfc52, 0xfd5e0945, 0xf46d24dd, 0x24de20e6, 0x25e22ce0, |
76 | 0x22ee22f1, 0x28f121f9, 0x23fb2100, 0x2602210d, 0x17230d3a, 0x1dfd1a00, |
77 | 0x161e1ff9, 0x23f122fd, 0x220324ff, 0x2205200b, 0x2305220c, 0x270b1e1d, |
78 | 0x221a1d27, 0x13421f15, 0x1f1f1932, 0xef78ec70, 0xee72f555, 0xf15cf259, |
79 | 0xe647f151, 0xf2500044, 0xf246e838, 0xe944e832, 0xf54a17f3, 0x1af328f1, |
80 | 0x31f22c03, 0x2d062c22, 0x21361352, 0xfd4bff17, 0x0122012b, 0x0036fe37, |
81 | 0x003d0140, 0x0044f75c, 0xf26af361, 0xf15af45a, 0xee58f649, 0xf74ff256, |
82 | 0xf649f646, 0xf645fb42, 0xf740fb3a, 0x023b15f6, 0x18f51cf8, 0x1cff1d03, |
83 | 0x1d092314, 0x1d240e43, 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, |
84 | 0xfa35ff36, 0x07331721, 0x17021500, 0x01090031, 0xdb760539, 0xf34ef541, |
85 | 0x013e0c31, 0xfc491132, 0x1240092b, 0x1d001a43, 0x105a0968, 0xd27fec68, |
86 | 0x0143f34e, 0xf541013e, 0xfa56ef5f, 0xfa3d092d, 0xfd45fa51, 0xf5600637, |
87 | 0x0743fb56, 0x0258003a, 0xfd4cf65e, 0x05360445, 0xfd510058, 0xf943fb4a, |
88 | 0xfc4afb50, 0xf948013a, 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, |
89 | 0x0d29033e, 0x002dfc4e, 0xfd60e57e, 0xe462e765, 0xe943e452, 0xec5ef053, |
90 | 0xea6eeb5b, 0xee66f35d, 0xe37ff95c, 0xfb59f960, 0xf36cfd2e, 0xff41ff39, |
91 | 0xf75dfd4a, 0xf75cf857, 0xe97e0536, 0x063c063b, 0x0645ff30, 0x0044fc45, |
92 | 0xf858fe55, 0xfa4eff4b, 0xf94d0236, 0x0532fd44, 0x0132062a, 0xfc51013f, |
93 | 0xfc460043, 0x0239fe4c, 0x0b230440, 0x013d0b23, 0x12190c18, 0x0d1d0d24, |
94 | 0xf65df949, 0xfe490d2e, 0x0931f964, 0x09350235, 0x0535fe3d, 0x00380038, |
95 | 0xf33ffb3c, 0xff3e0439, 0xfa450439, 0x0e270433, 0x0d440340, 0x013d093f, |
96 | 0x07321027, 0x052c0434, 0x0b30fb3c, 0xff3b003b, 0x1621052c, 0x0e2bff4e, |
97 | 0x003c0945, 0x0b1c0228, 0x032c0031, 0x002e022c, 0x0233002f, 0x0427023e, |
98 | 0x062e0036, 0x0336023a, 0x043f0633, 0x06390735, 0x06340637, 0x0b2d0e24, |
99 | 0x0835ff52, 0x0737fd4e, 0x0f2e161f, 0xff541907, 0x1ef91c03, 0x1c042000, |
100 | 0x22ff1e06, 0x1e062009, 0x1f131a1b, 0x1a1e2514, 0x1c221146, 0x0143053b, |
101 | 0x0943101e, 0x12201223, 0x161d181f, 0x1726122b, 0x14290b3f, 0x093b0940, |
102 | 0xff5efe59, 0xf76cfa4c, 0xfe2c002d, 0x0034fd40, 0xfe3bfc46, 0xfc4bf852, |
103 | 0xef66f74d, 0x0318002a, 0x00300037, 0xfa3bf947, 0xf453f557, 0xe277013a, |
104 | 0xfd1dff24, 0x0126022b, 0xfa37003a, 0x0040fd4a, 0xf65a0046, 0xfc1d051f, |
105 | 0x072a013b, 0xfe3afd48, 0xfd51f561, 0x003a0805, 0x0a0e0e12, 0x0d1b0228, |
106 | 0x003afd46, 0xfa4ff855, 0x0000f36a, 0xf06af657, 0xeb72ee6e, 0xf262ea6e, |
107 | 0xeb6aee67, 0xeb6be96c, 0xe670f660, 0xf45ffb5b, 0xf75dea5e, 0xfb560943, |
108 | 0xfc50f655, 0xff46073c, 0x093a053d, 0x0c320f32, 0x12311136, 0x0a29072e, |
109 | 0xff330731, 0x08340929, 0x062f0237, 0x0d290a2c, 0x06320535, 0x0d31043f, |
110 | 0x0640fe45, 0xfe3b0646, 0x0a2c091f, 0x0c2b0335, 0x0e220a26, 0xfd340d28, |
111 | 0x1120072c, 0x07260d32, 0x0a391a2b, 0x0e0b0b0e, 0x090b120b, 0x150917fe, |
112 | 0x20f120f1, 0x22eb27e9, 0x2adf29e1, 0x2ee426f4, 0x151d2de8, 0x35d330e6, |
113 | 0x41d52bed, 0x27f61e09, 0x121a141b, 0x0039f252, 0xfb4bed61, 0xdd7d1b00, |
114 | 0x1c001ffc, 0x1b062208, 0x1e0a1816, 0x21131620, 0x1a1f1529, 0x1a2c172f, |
115 | 0x10410e47, 0x083c063f, 0x11411518, 0x17141a17, 0x1b201c17, 0x1c181728, |
116 | 0x18201c1d, 0x172a1339, 0x1635163d, 0x0b560c28, 0x0b330e3b, 0xfc4ff947, |
117 | 0xfb45f746, 0xf842f644, 0xed49f445, 0xf046f143, 0xec3eed46, 0xf042ea41, |
118 | 0xec3f09fe, 0x1af721f7, 0x27f929fe, 0x2d033109, 0x2d1b243b, 0xfa42f923, |
119 | 0xf92af82d, 0xfb30f438, 0xfa3cfb3e, 0xf842f84c, 0xfb55fa51, 0xf64df951, |
120 | 0xef50ee49, 0xfc4af653, 0xf747f743, 0xff3df842, 0xf242003b, 0x023b15f3, |
121 | 0x21f227f9, 0x2efe3302, 0x3c063d11, 0x37222a3e, 0x14f10236, 0x034a14f1, |
122 | 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331619, 0x22001000, 0xfe090429, |
123 | 0xe3760241, 0xfa47f34f, 0x05340932, 0xfd460a36, 0x1a221316, 0x28003902, |
124 | 0x29241a45, 0xd37ff165, 0xfc4cfa47, 0xf34f0534, 0x0645f35a, 0x0034082b, |
125 | 0xfe45fb52, 0xf660023b, 0x024bfd57, 0xfd640138, 0xfd4afa55, 0x003bfd51, |
126 | 0xf956fb5f, 0xff42ff4d, 0x0146fe56, 0xfb48003d, 0x0029003f, 0x003f003f, |
127 | 0xf7530456, 0x0061f948, 0x0d29033e, 0x0d0f0733, 0x0250d97f, 0xee5bef60, |
128 | 0xe651dd62, 0xe866e961, 0xe577e863, 0xeb6eee66, 0xdc7f0050, 0xfb59f95e, |
129 | 0xfc5c0027, 0x0041f154, 0xdd7ffe49, 0xf468f75b, 0xe17f0337, 0x07380737, |
130 | 0x083dfd35, 0x0044f94a, 0xf758f367, 0xf35bf759, 0xf25cf84c, 0xf457e96e, |
131 | 0xe869f64e, 0xec70ef63, 0xb27fba7f, 0xce7fd27f, 0xfc42fb4e, 0xfc47f848, |
132 | 0x023bff37, 0xf946fa4b, 0xf859de77, 0xfd4b2014, 0x1e16d47f, 0x0036fb3d, |
133 | 0x003aff3c, 0xfd3df843, 0xe754f24a, 0xfb410534, 0x0239003d, 0xf745f546, |
134 | 0x1237fc47, 0x003a073d, 0x09291219, 0x0920052b, 0x092f002c, 0x0033022e, |
135 | 0x1326fc42, 0x0f260c2a, 0x09220059, 0x042d0a1c, 0x0a1f21f5, 0x34d5120f, |
136 | 0x1c0023ea, 0x26e72200, 0x27ee20f4, 0x66a20000, 0x38f121fc, 0x1d0a25fb, |
137 | 0x33e327f7, 0x34de45c6, 0x43c12cfb, 0x200737e3, 0x20010000, 0x1b2421e7, |
138 | 0x22e224e4, 0x26e426e5, 0x22ee23f0, 0x22f220f8, 0x25fa2300, 0x1e0a1c12, |
139 | 0x1a191d29, 0x004b0248, 0x084d0e23, 0x121f1123, 0x151e112d, 0x142a122d, |
140 | 0x1b1a1036, 0x07421038, 0x0b490a43, 0xf674e970, 0xf147f93d, 0x0035fb42, |
141 | 0xf54df750, 0xf754f657, 0xde7feb65, 0xfd27fb35, 0xf93df54b, 0xf14def5b, |
142 | 0xe76be76f, 0xe47af54c, 0xf62cf634, 0xf639f73a, 0xf048f945, 0xfc45fb4a, |
143 | 0xf7560242, 0xf7220120, 0x0b1f0534, 0xfe37fe43, 0x0049f859, 0x03340704, |
144 | 0x0a081108, 0x10130325, 0xff3dfb49, 0xff46fc4e, 0x0000eb7e, 0xe97cec6e, |
145 | 0xe67ee77c, 0xef69e579, 0xe575ef66, 0xe675e574, 0xdf7af65f, 0xf264f85f, |
146 | 0xef6fe472, 0xfa59fe50, 0xfc52f755, 0xf851ff48, 0x05400143, 0x09380045, |
147 | 0x01450745, 0xf945fa43, 0xf04dfe40, 0x023dfa43, 0xfd400239, 0xfd41fd42, |
148 | 0x003e0933, 0xff42fe47, 0xfe4bff46, 0xf7480e3c, 0x1025002f, 0x12230b25, |
149 | 0x0c290a29, 0x02300c29, 0x0d29003b, 0x03321328, 0x03421232, 0x13fa12fa, |
150 | 0x0e001af4, 0x1ff021e7, 0x21ea25e4, 0x27e22ae2, 0x2fd62ddc, 0x31de29ef, |
151 | 0x200945b9, 0x3fc142c0, 0x4db636d9, 0x34dd29f6, 0x240028ff, 0x1e0e1c1a, |
152 | 0x17250c37, 0x0b4125df, 0x27dc28db, 0x26e22edf, 0x2ae228e8, 0x31e326f4, |
153 | 0x28f626fd, 0x2efb1f14, 0x1d1e192c, 0x0c300b31, 0x1a2d1616, 0x17161b15, |
154 | 0x21141a1c, 0x1e181b22, 0x122a1927, 0x12320c46, 0x15360e47, 0x0b531920, |
155 | 0x15311536, 0xfb55fa51, 0xf64df951, 0xef50ee49, 0xfc4af653, 0xf747f743, |
156 | 0xff3df842, 0xf242003b, 0x023b11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, |
157 | 0x421b2f39, 0xfb470018, 0xff24fe2a, 0xfe34f739, 0xfa3ffc41, 0xfc43f952, |
158 | 0xfd51fd4c, 0xf948fa4e, 0xf448f244, 0xfd46fa4c, 0xfb42fb3e, 0x0039fc3d, |
159 | 0xf73c0136, 0x023a11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, 0x421b2f39, |
160 | 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331d10, |
161 | 0x19000e00, 0xf633fd3e, 0xe5631a10, 0xfc55e866, 0x05390639, 0xef490e39, |
162 | 0x1428140a, 0x1d003600, 0x252a0c61, 0xe07fea75, 0xfe4afc55, 0xe8660539, |
163 | 0xfa5df258, 0xfa2c0437, 0xf559f167, 0xeb741339, 0x143a0454, 0x0660013f, |
164 | 0xfb55f36a, 0x053f064b, 0xfd5aff65, 0x0337fc4f, 0xfe4bf461, 0xf932013c, |
165 | 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x0722f758, |
166 | 0xec7fdc7f, 0xef5bf25f, 0xe754e756, 0xf459ef5b, 0xe17ff24c, 0xee67f35a, |
167 | 0xdb7f0b50, 0x054c0254, 0x054efa37, 0x043df253, 0xdb7ffb4f, 0xf568f55b, |
168 | 0xe27f0041, 0xfe4f0048, 0xfc5cfa38, 0x0344f847, 0xf362fc56, 0xf458fb52, |
169 | 0xfd48fc43, 0xf848f059, 0xf745ff3b, 0x05420439, 0xfc47fe47, 0x023aff4a, |
170 | 0xfc2cff45, 0x003ef933, 0xfc2ffa2a, 0xfd29fa35, 0x084cf74e, 0xf5530934, |
171 | 0x0043fb5a, 0x0143f148, 0xfb4bf850, 0xeb53eb40, 0xf31fe740, 0xe35e094b, |
172 | 0x113ff84a, 0xfb23fe1b, 0x0d5b0341, 0xf945084d, 0xf642033e, 0xfd44ec51, |
173 | 0x001e0107, 0xfd17eb4a, 0x1042e97c, 0x11252cee, 0x32deea7f, 0x0427002a, |
174 | 0x07220b1d, 0x081f0625, 0x072a0328, 0x08210d2b, 0x0d24042f, 0x0337023a, |
175 | 0x063c082c, 0x0b2c0e2a, 0x07300438, 0x04340d25, 0x0931133a, 0x0a300c2d, |
176 | 0x00451421, 0x083f23ee, 0x21e71cfd, 0x180a1b00, 0x22f234d4, 0x27e81311, |
177 | 0x1f19241d, 0x1821220f, 0x1e141649, 0x1422131f, 0x1b2c1310, 0x0f240f24, |
178 | 0x151c1915, 0x1e141f0c, 0x1b10182a, 0x005d0e38, 0x0f391a26, 0xe87fe873, |
179 | 0xea52f73e, 0x0035003b, 0xf255f359, 0xf35ef55c, 0xe37feb64, 0xf239f443, |
180 | 0xf547f64d, 0xeb55f058, 0xe968f162, 0xdb7ff652, 0xf830f83d, 0xf842f946, |
181 | 0xf24bf64f, 0xf753f45c, 0xee6cfc4f, 0xea45f04b, 0xfe3a013a, 0xf34ef753, |
182 | 0xfc51f363, 0xf351fa26, 0xf33efa3a, 0xfe3bf049, 0xf64cf356, 0xf753f657, |
183 | 0x0000ea7f, 0xe77fe778, 0xe57fed72, 0xe975e776, 0xe675e871, 0xe476e178, |
184 | 0xdb7cf65e, 0xf166f663, 0xf36ace7f, 0xfb5c1139, 0xfb56f35e, 0xf45bfe4d, |
185 | 0x0047ff49, 0x0440f951, 0x05400f39, 0x01430044, 0xf6430144, 0x004d0240, |
186 | 0x0044fb4e, 0x0737053b, 0x02410e36, 0x0f2c053c, 0x0246fe4c, 0xee560c46, |
187 | 0x0540f446, 0x0b370538, 0x00450241, 0xfa4a0536, 0x0736fa4c, 0xf552fe4d, |
188 | 0xfe4d192a, 0x11f310f7, 0x11f41beb, 0x25e229d8, 0x2ad730d1, 0x27e02ed8, |
189 | 0x34cd2ed7, 0x34d92bed, 0x200b3dc9, 0x38d23ece, 0x51bd2dec, 0x23fe1c0f, |
190 | 0x22012701, 0x1e111426, 0x122d0f36, 0x004f24f0, 0x25f225ef, 0x2001220f, |
191 | 0x1d0f1819, 0x22161f10, 0x23121f1c, 0x2129241c, 0x1b2f153e, 0x121f131a, |
192 | 0x24181817, 0x1b10181e, 0x1f1d1629, 0x162a103c, 0x0f340e3c, 0x034ef07b, |
193 | 0x15351638, 0x193d1521, 0x1332113d, 0xfd4ef84a, 0xf748f648, 0xee4bf447, |
194 | 0xf53ffb46, 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, |
195 | 0x21ff2107, 0x1f0c2517, 0x1f261440, 0xf747f925, 0xf82cf531, 0xf638f43b, |
196 | 0xf83ff743, 0xfa44f64f, 0xfd4ef84a, 0xf748f648, 0xee4bf447, 0xf53ffb46, |
197 | 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, 0x21ff2107, |
198 | 0x1f0c2517, 0x1f261440 |
199 | }; |
200 | |
201 | static void |
202 | assemble_scaling_list(struct hantro_ctx *ctx) |
203 | { |
204 | const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; |
205 | const struct v4l2_ctrl_h264_scaling_matrix *scaling = ctrls->scaling; |
206 | const struct v4l2_ctrl_h264_pps *pps = ctrls->pps; |
207 | const size_t num_list_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4); |
208 | const size_t list_len_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4[0]); |
209 | const size_t list_len_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8[0]); |
210 | struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu; |
211 | u32 *dst = (u32 *)tbl->scaling_list; |
212 | const u32 *src; |
213 | int i, j; |
214 | |
215 | if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)) |
216 | return; |
217 | |
218 | for (i = 0; i < num_list_4x4; i++) { |
219 | src = (u32 *)&scaling->scaling_list_4x4[i]; |
220 | for (j = 0; j < list_len_4x4 / 4; j++) |
221 | *dst++ = swab32(src[j]); |
222 | } |
223 | |
224 | /* Only Intra/Inter Y lists */ |
225 | for (i = 0; i < 2; i++) { |
226 | src = (u32 *)&scaling->scaling_list_8x8[i]; |
227 | for (j = 0; j < list_len_8x8 / 4; j++) |
228 | *dst++ = swab32(src[j]); |
229 | } |
230 | } |
231 | |
232 | static void prepare_table(struct hantro_ctx *ctx) |
233 | { |
234 | const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; |
235 | const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; |
236 | const struct v4l2_ctrl_h264_sps *sps = ctrls->sps; |
237 | struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu; |
238 | const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; |
239 | u32 dpb_longterm = 0; |
240 | u32 dpb_valid = 0; |
241 | int i; |
242 | |
243 | for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) { |
244 | tbl->poc[i * 2] = dpb[i].top_field_order_cnt; |
245 | tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt; |
246 | |
247 | if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_VALID)) |
248 | continue; |
249 | |
250 | /* |
251 | * Set up bit maps of valid and long term DPBs. |
252 | * NOTE: The bits are reversed, i.e. MSb is DPB 0. For frame |
253 | * decoding, bit 31 to 15 are used, while for field decoding, |
254 | * all bits are used, with bit 31 being a top field, 30 a bottom |
255 | * field and so on. |
256 | */ |
257 | if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) { |
258 | if (dpb[i].fields & V4L2_H264_TOP_FIELD_REF) |
259 | dpb_valid |= REF_BIT(i * 2); |
260 | |
261 | if (dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF) |
262 | dpb_valid |= REF_BIT(i * 2 + 1); |
263 | |
264 | if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) { |
265 | dpb_longterm |= REF_BIT(i * 2); |
266 | dpb_longterm |= REF_BIT(i * 2 + 1); |
267 | } |
268 | } else { |
269 | dpb_valid |= REF_BIT(i); |
270 | |
271 | if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) |
272 | dpb_longterm |= REF_BIT(i); |
273 | } |
274 | } |
275 | ctx->h264_dec.dpb_valid = dpb_valid; |
276 | ctx->h264_dec.dpb_longterm = dpb_longterm; |
277 | |
278 | if ((dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) || |
279 | !(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)) { |
280 | tbl->poc[32] = ctx->h264_dec.cur_poc; |
281 | tbl->poc[33] = 0; |
282 | } else { |
283 | tbl->poc[32] = dec_param->top_field_order_cnt; |
284 | tbl->poc[33] = dec_param->bottom_field_order_cnt; |
285 | } |
286 | |
287 | assemble_scaling_list(ctx); |
288 | } |
289 | |
290 | static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a, |
291 | const struct v4l2_h264_dpb_entry *b) |
292 | { |
293 | return a->reference_ts == b->reference_ts; |
294 | } |
295 | |
296 | static void update_dpb(struct hantro_ctx *ctx) |
297 | { |
298 | const struct v4l2_ctrl_h264_decode_params *dec_param; |
299 | DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, }; |
300 | DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, }; |
301 | unsigned int i, j; |
302 | |
303 | dec_param = ctx->h264_dec.ctrls.decode; |
304 | |
305 | /* Disable all entries by default. */ |
306 | for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++) |
307 | ctx->h264_dec.dpb[i].flags = 0; |
308 | |
309 | /* Try to match new DPB entries with existing ones by their POCs. */ |
310 | for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) { |
311 | const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i]; |
312 | |
313 | if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID)) |
314 | continue; |
315 | |
316 | /* |
317 | * To cut off some comparisons, iterate only on target DPB |
318 | * entries which are not used yet. |
319 | */ |
320 | for_each_clear_bit(j, used, ARRAY_SIZE(ctx->h264_dec.dpb)) { |
321 | struct v4l2_h264_dpb_entry *cdpb; |
322 | |
323 | cdpb = &ctx->h264_dec.dpb[j]; |
324 | if (!dpb_entry_match(a: cdpb, b: ndpb)) |
325 | continue; |
326 | |
327 | *cdpb = *ndpb; |
328 | set_bit(nr: j, addr: used); |
329 | break; |
330 | } |
331 | |
332 | if (j == ARRAY_SIZE(ctx->h264_dec.dpb)) |
333 | set_bit(nr: i, addr: new); |
334 | } |
335 | |
336 | /* For entries that could not be matched, use remaining free slots. */ |
337 | for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) { |
338 | const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i]; |
339 | struct v4l2_h264_dpb_entry *cdpb; |
340 | |
341 | /* |
342 | * Both arrays are of the same sizes, so there is no way |
343 | * we can end up with no space in target array, unless |
344 | * something is buggy. |
345 | */ |
346 | j = find_first_zero_bit(addr: used, ARRAY_SIZE(ctx->h264_dec.dpb)); |
347 | if (WARN_ON(j >= ARRAY_SIZE(ctx->h264_dec.dpb))) |
348 | return; |
349 | |
350 | cdpb = &ctx->h264_dec.dpb[j]; |
351 | *cdpb = *ndpb; |
352 | set_bit(nr: j, addr: used); |
353 | } |
354 | } |
355 | |
356 | dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx, |
357 | unsigned int dpb_idx) |
358 | { |
359 | struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb; |
360 | dma_addr_t dma_addr = 0; |
361 | s32 cur_poc = ctx->h264_dec.cur_poc; |
362 | u32 flags; |
363 | |
364 | if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE) |
365 | dma_addr = hantro_get_ref(ctx, ts: dpb[dpb_idx].reference_ts); |
366 | |
367 | if (!dma_addr) { |
368 | struct vb2_v4l2_buffer *dst_buf; |
369 | struct vb2_buffer *buf; |
370 | |
371 | /* |
372 | * If a DPB entry is unused or invalid, address of current |
373 | * destination buffer is returned. |
374 | */ |
375 | dst_buf = hantro_get_dst_buf(ctx); |
376 | buf = &dst_buf->vb2_buf; |
377 | dma_addr = hantro_get_dec_buf_addr(ctx, vb: buf); |
378 | } |
379 | |
380 | flags = dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD ? 0x2 : 0; |
381 | flags |= abs(dpb[dpb_idx].top_field_order_cnt - cur_poc) < |
382 | abs(dpb[dpb_idx].bottom_field_order_cnt - cur_poc) ? |
383 | 0x1 : 0; |
384 | |
385 | return dma_addr | flags; |
386 | } |
387 | |
388 | u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx) |
389 | { |
390 | const struct v4l2_h264_dpb_entry *dpb = &ctx->h264_dec.dpb[dpb_idx]; |
391 | |
392 | if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) |
393 | return 0; |
394 | return dpb->frame_num; |
395 | } |
396 | |
397 | /* |
398 | * Removes all references with the same parity as the current picture from the |
399 | * reference list. The remaining list will have references with the opposite |
400 | * parity. This is effectively a deduplication of references since each buffer |
401 | * stores two fields. For this reason, each buffer is found twice in the |
402 | * reference list. |
403 | * |
404 | * This technique has been chosen through trial and error. This simple approach |
405 | * resulted in the highest conformance score. Note that this method may suffer |
406 | * worse quality in the case an opposite reference frame has been lost. If this |
407 | * becomes a problem in the future, it should be possible to add a preprocessing |
408 | * to identify un-paired fields and avoid removing them. |
409 | */ |
410 | static void deduplicate_reflist(struct v4l2_h264_reflist_builder *b, |
411 | struct v4l2_h264_reference *reflist) |
412 | { |
413 | int write_idx = 0; |
414 | int i; |
415 | |
416 | if (b->cur_pic_fields == V4L2_H264_FRAME_REF) { |
417 | write_idx = b->num_valid; |
418 | goto done; |
419 | } |
420 | |
421 | for (i = 0; i < b->num_valid; i++) { |
422 | if (!(b->cur_pic_fields == reflist[i].fields)) { |
423 | reflist[write_idx++] = reflist[i]; |
424 | continue; |
425 | } |
426 | } |
427 | |
428 | done: |
429 | /* Should not happen unless we have a bug in the reflist builder. */ |
430 | if (WARN_ON(write_idx > 16)) |
431 | write_idx = 16; |
432 | |
433 | /* Clear the remaining, some streams fails otherwise */ |
434 | for (; write_idx < 16; write_idx++) |
435 | reflist[write_idx].index = 15; |
436 | } |
437 | |
438 | int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx) |
439 | { |
440 | struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec; |
441 | struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls; |
442 | struct v4l2_h264_reflist_builder reflist_builder; |
443 | |
444 | hantro_start_prepare_run(ctx); |
445 | |
446 | ctrls->scaling = |
447 | hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX); |
448 | if (WARN_ON(!ctrls->scaling)) |
449 | return -EINVAL; |
450 | |
451 | ctrls->decode = |
452 | hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS); |
453 | if (WARN_ON(!ctrls->decode)) |
454 | return -EINVAL; |
455 | |
456 | ctrls->sps = |
457 | hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SPS); |
458 | if (WARN_ON(!ctrls->sps)) |
459 | return -EINVAL; |
460 | |
461 | ctrls->pps = |
462 | hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_PPS); |
463 | if (WARN_ON(!ctrls->pps)) |
464 | return -EINVAL; |
465 | |
466 | /* Update the DPB with new refs. */ |
467 | update_dpb(ctx); |
468 | |
469 | /* Build the P/B{0,1} ref lists. */ |
470 | v4l2_h264_init_reflist_builder(b: &reflist_builder, dec_params: ctrls->decode, |
471 | sps: ctrls->sps, dpb: ctx->h264_dec.dpb); |
472 | h264_ctx->cur_poc = reflist_builder.cur_pic_order_count; |
473 | |
474 | /* Prepare data in memory. */ |
475 | prepare_table(ctx); |
476 | |
477 | v4l2_h264_build_p_ref_list(builder: &reflist_builder, reflist: h264_ctx->reflists.p); |
478 | v4l2_h264_build_b_ref_lists(builder: &reflist_builder, b0_reflist: h264_ctx->reflists.b0, |
479 | b1_reflist: h264_ctx->reflists.b1); |
480 | |
481 | /* |
482 | * Reduce ref lists to at most 16 entries, Hantro hardware will deduce |
483 | * the actual picture lists in field through the dpb_valid, |
484 | * dpb_longterm bitmap along with the current frame parity. |
485 | */ |
486 | if (reflist_builder.cur_pic_fields != V4L2_H264_FRAME_REF) { |
487 | deduplicate_reflist(b: &reflist_builder, reflist: h264_ctx->reflists.p); |
488 | deduplicate_reflist(b: &reflist_builder, reflist: h264_ctx->reflists.b0); |
489 | deduplicate_reflist(b: &reflist_builder, reflist: h264_ctx->reflists.b1); |
490 | } |
491 | |
492 | return 0; |
493 | } |
494 | |
495 | void hantro_h264_dec_exit(struct hantro_ctx *ctx) |
496 | { |
497 | struct hantro_dev *vpu = ctx->dev; |
498 | struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec; |
499 | struct hantro_aux_buf *priv = &h264_dec->priv; |
500 | |
501 | dma_free_coherent(dev: vpu->dev, size: priv->size, cpu_addr: priv->cpu, dma_handle: priv->dma); |
502 | } |
503 | |
504 | int hantro_h264_dec_init(struct hantro_ctx *ctx) |
505 | { |
506 | struct hantro_dev *vpu = ctx->dev; |
507 | struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec; |
508 | struct hantro_aux_buf *priv = &h264_dec->priv; |
509 | struct hantro_h264_dec_priv_tbl *tbl; |
510 | |
511 | priv->cpu = dma_alloc_coherent(dev: vpu->dev, size: sizeof(*tbl), dma_handle: &priv->dma, |
512 | GFP_KERNEL); |
513 | if (!priv->cpu) |
514 | return -ENOMEM; |
515 | |
516 | priv->size = sizeof(*tbl); |
517 | tbl = priv->cpu; |
518 | memcpy(tbl->cabac_table, h264_cabac_table, sizeof(tbl->cabac_table)); |
519 | |
520 | return 0; |
521 | } |
522 | |