1 | /* |
2 | * Copyright 2021 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * Authors: AMD |
23 | * |
24 | */ |
25 | |
26 | #include "rc_calc_fpu.h" |
27 | |
28 | #include "qp_tables.h" |
29 | #include "amdgpu_dm/dc_fpu.h" |
30 | |
31 | #define table_hash(mode, bpc, max_min) ((mode << 16) | (bpc << 8) | max_min) |
32 | |
33 | #define MODE_SELECT(val444, val422, val420) \ |
34 | (cm == CM_444 || cm == CM_RGB) ? (val444) : (cm == CM_422 ? (val422) : (val420)) |
35 | |
36 | |
37 | #define TABLE_CASE(mode, bpc, max) case (table_hash(mode, BPC_##bpc, max)): \ |
38 | table = qp_table_##mode##_##bpc##bpc_##max; \ |
39 | table_size = sizeof(qp_table_##mode##_##bpc##bpc_##max)/sizeof(*qp_table_##mode##_##bpc##bpc_##max); \ |
40 | break |
41 | |
42 | static int median3(int a, int b, int c) |
43 | { |
44 | if (a > b) |
45 | swap(a, b); |
46 | if (b > c) |
47 | swap(b, c); |
48 | if (a > b) |
49 | swap(b, c); |
50 | |
51 | return b; |
52 | } |
53 | |
54 | static double dsc_roundf(double num) |
55 | { |
56 | if (num < 0.0) |
57 | num = num - 0.5; |
58 | else |
59 | num = num + 0.5; |
60 | |
61 | return (int)(num); |
62 | } |
63 | |
64 | static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, |
65 | enum max_min max_min, float bpp) |
66 | { |
67 | int mode = MODE_SELECT(444, 422, 420); |
68 | int sel = table_hash(mode, bpc, max_min); |
69 | int table_size = 0; |
70 | int index; |
71 | const struct qp_entry *table = NULL; |
72 | |
73 | // alias enum |
74 | enum { min = DAL_MM_MIN, max = DAL_MM_MAX }; |
75 | switch (sel) { |
76 | TABLE_CASE(444, 8, max); |
77 | TABLE_CASE(444, 8, min); |
78 | TABLE_CASE(444, 10, max); |
79 | TABLE_CASE(444, 10, min); |
80 | TABLE_CASE(444, 12, max); |
81 | TABLE_CASE(444, 12, min); |
82 | TABLE_CASE(422, 8, max); |
83 | TABLE_CASE(422, 8, min); |
84 | TABLE_CASE(422, 10, max); |
85 | TABLE_CASE(422, 10, min); |
86 | TABLE_CASE(422, 12, max); |
87 | TABLE_CASE(422, 12, min); |
88 | TABLE_CASE(420, 8, max); |
89 | TABLE_CASE(420, 8, min); |
90 | TABLE_CASE(420, 10, max); |
91 | TABLE_CASE(420, 10, min); |
92 | TABLE_CASE(420, 12, max); |
93 | TABLE_CASE(420, 12, min); |
94 | } |
95 | |
96 | if (!table) |
97 | return; |
98 | |
99 | index = (bpp - table[0].bpp) * 2; |
100 | |
101 | /* requested size is bigger than the table */ |
102 | if (index >= table_size) { |
103 | dm_error("ERROR: Requested rc_calc to find a bpp entry that exceeds the table size\n" ); |
104 | return; |
105 | } |
106 | |
107 | memcpy(qps, table[index].qps, sizeof(qp_set)); |
108 | } |
109 | |
110 | static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp) |
111 | { |
112 | int *p = ofs; |
113 | |
114 | if (mode == CM_444 || mode == CM_RGB) { |
115 | *p++ = (bpp <= 6) ? (0) : ((((bpp >= 8) && (bpp <= 12))) ? (2) : ((bpp >= 15) ? (10) : ((((bpp > 6) && (bpp < 8))) ? (0 + dsc_roundf(num: (bpp - 6) * (2 / 2.0))) : (2 + dsc_roundf(num: (bpp - 12) * (8 / 3.0)))))); |
116 | *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (8) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf(num: (bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf(num: (bpp - 12) * (8 / 3.0)))))); |
117 | *p++ = (bpp <= 6) ? (-2) : ((((bpp >= 8) && (bpp <= 12))) ? (0) : ((bpp >= 15) ? (6) : ((((bpp > 6) && (bpp < 8))) ? (-2 + dsc_roundf(num: (bpp - 6) * (2 / 2.0))) : (0 + dsc_roundf(num: (bpp - 12) * (6 / 3.0)))))); |
118 | *p++ = (bpp <= 6) ? (-4) : ((((bpp >= 8) && (bpp <= 12))) ? (-2) : ((bpp >= 15) ? (4) : ((((bpp > 6) && (bpp < 8))) ? (-4 + dsc_roundf(num: (bpp - 6) * (2 / 2.0))) : (-2 + dsc_roundf(num: (bpp - 12) * (6 / 3.0)))))); |
119 | *p++ = (bpp <= 6) ? (-6) : ((((bpp >= 8) && (bpp <= 12))) ? (-4) : ((bpp >= 15) ? (2) : ((((bpp > 6) && (bpp < 8))) ? (-6 + dsc_roundf(num: (bpp - 6) * (2 / 2.0))) : (-4 + dsc_roundf(num: (bpp - 12) * (6 / 3.0)))))); |
120 | *p++ = (bpp <= 12) ? (-6) : ((bpp >= 15) ? (0) : (-6 + dsc_roundf(num: (bpp - 12) * (6 / 3.0)))); |
121 | *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-2) : (-8 + dsc_roundf(num: (bpp - 12) * (6 / 3.0)))); |
122 | *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-4) : (-8 + dsc_roundf(num: (bpp - 12) * (4 / 3.0)))); |
123 | *p++ = (bpp <= 12) ? (-8) : ((bpp >= 15) ? (-6) : (-8 + dsc_roundf(num: (bpp - 12) * (2 / 3.0)))); |
124 | *p++ = (bpp <= 12) ? (-10) : ((bpp >= 15) ? (-8) : (-10 + dsc_roundf(num: (bpp - 12) * (2 / 3.0)))); |
125 | *p++ = -10; |
126 | *p++ = (bpp <= 6) ? (-12) : ((bpp >= 8) ? (-10) : (-12 + dsc_roundf(num: (bpp - 6) * (2 / 2.0)))); |
127 | *p++ = -12; |
128 | *p++ = -12; |
129 | *p++ = -12; |
130 | } else if (mode == CM_422) { |
131 | *p++ = (bpp <= 8) ? (2) : ((bpp >= 10) ? (10) : (2 + dsc_roundf(num: (bpp - 8) * (8 / 2.0)))); |
132 | *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (8) : (0 + dsc_roundf(num: (bpp - 8) * (8 / 2.0)))); |
133 | *p++ = (bpp <= 8) ? (0) : ((bpp >= 10) ? (6) : (0 + dsc_roundf(num: (bpp - 8) * (6 / 2.0)))); |
134 | *p++ = (bpp <= 8) ? (-2) : ((bpp >= 10) ? (4) : (-2 + dsc_roundf(num: (bpp - 8) * (6 / 2.0)))); |
135 | *p++ = (bpp <= 8) ? (-4) : ((bpp >= 10) ? (2) : (-4 + dsc_roundf(num: (bpp - 8) * (6 / 2.0)))); |
136 | *p++ = (bpp <= 8) ? (-6) : ((bpp >= 10) ? (0) : (-6 + dsc_roundf(num: (bpp - 8) * (6 / 2.0)))); |
137 | *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-2) : (-8 + dsc_roundf(num: (bpp - 8) * (6 / 2.0)))); |
138 | *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-4) : (-8 + dsc_roundf(num: (bpp - 8) * (4 / 2.0)))); |
139 | *p++ = (bpp <= 8) ? (-8) : ((bpp >= 10) ? (-6) : (-8 + dsc_roundf(num: (bpp - 8) * (2 / 2.0)))); |
140 | *p++ = (bpp <= 8) ? (-10) : ((bpp >= 10) ? (-8) : (-10 + dsc_roundf(num: (bpp - 8) * (2 / 2.0)))); |
141 | *p++ = -10; |
142 | *p++ = (bpp <= 6) ? (-12) : ((bpp >= 7) ? (-10) : (-12 + dsc_roundf(num: (bpp - 6) * (2.0 / 1)))); |
143 | *p++ = -12; |
144 | *p++ = -12; |
145 | *p++ = -12; |
146 | } else { |
147 | *p++ = (bpp <= 6) ? (2) : ((bpp >= 8) ? (10) : (2 + dsc_roundf(num: (bpp - 6) * (8 / 2.0)))); |
148 | *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (8) : (0 + dsc_roundf(num: (bpp - 6) * (8 / 2.0)))); |
149 | *p++ = (bpp <= 6) ? (0) : ((bpp >= 8) ? (6) : (0 + dsc_roundf(num: (bpp - 6) * (6 / 2.0)))); |
150 | *p++ = (bpp <= 6) ? (-2) : ((bpp >= 8) ? (4) : (-2 + dsc_roundf(num: (bpp - 6) * (6 / 2.0)))); |
151 | *p++ = (bpp <= 6) ? (-4) : ((bpp >= 8) ? (2) : (-4 + dsc_roundf(num: (bpp - 6) * (6 / 2.0)))); |
152 | *p++ = (bpp <= 6) ? (-6) : ((bpp >= 8) ? (0) : (-6 + dsc_roundf(num: (bpp - 6) * (6 / 2.0)))); |
153 | *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-2) : (-8 + dsc_roundf(num: (bpp - 6) * (6 / 2.0)))); |
154 | *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-4) : (-8 + dsc_roundf(num: (bpp - 6) * (4 / 2.0)))); |
155 | *p++ = (bpp <= 6) ? (-8) : ((bpp >= 8) ? (-6) : (-8 + dsc_roundf(num: (bpp - 6) * (2 / 2.0)))); |
156 | *p++ = (bpp <= 6) ? (-10) : ((bpp >= 8) ? (-8) : (-10 + dsc_roundf(num: (bpp - 6) * (2 / 2.0)))); |
157 | *p++ = -10; |
158 | *p++ = (bpp <= 4) ? (-12) : ((bpp >= 5) ? (-10) : (-12 + dsc_roundf(num: (bpp - 4) * (2 / 1.0)))); |
159 | *p++ = -12; |
160 | *p++ = -12; |
161 | *p++ = -12; |
162 | } |
163 | } |
164 | |
165 | void _do_calc_rc_params(struct rc_params *rc, |
166 | enum colour_mode cm, |
167 | enum bits_per_comp bpc, |
168 | u16 drm_bpp, |
169 | bool is_navite_422_or_420, |
170 | int slice_width, |
171 | int slice_height, |
172 | int minor_version) |
173 | { |
174 | float bpp; |
175 | float bpp_group; |
176 | float initial_xmit_delay_factor; |
177 | int padding_pixels; |
178 | int i; |
179 | |
180 | dc_assert_fp_enabled(); |
181 | |
182 | bpp = ((float)drm_bpp / 16.0); |
183 | /* in native_422 or native_420 modes, the bits_per_pixel is double the |
184 | * target bpp (the latter is what calc_rc_params expects) |
185 | */ |
186 | if (is_navite_422_or_420) |
187 | bpp /= 2.0; |
188 | |
189 | rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); |
190 | rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); |
191 | |
192 | bpp_group = MODE_SELECT(bpp, bpp * 2.0, bpp * 2.0); |
193 | |
194 | switch (cm) { |
195 | case CM_420: |
196 | rc->initial_fullness_offset = (bpp >= 6) ? (2048) : ((bpp <= 4) ? (6144) : ((((bpp > 4) && (bpp <= 5))) ? (6144 - dsc_roundf(num: (bpp - 4) * (512))) : (5632 - dsc_roundf(num: (bpp - 5) * (3584))))); |
197 | rc->first_line_bpg_offset = median3(a: 0, b: (12 + (int) (0.09 * min(34, slice_height - 8))), c: (int)((3 * bpc * 3) - (3 * bpp_group))); |
198 | rc->second_line_bpg_offset = median3(a: 0, b: 12, c: (int)((3 * bpc * 3) - (3 * bpp_group))); |
199 | break; |
200 | case CM_422: |
201 | rc->initial_fullness_offset = (bpp >= 8) ? (2048) : ((bpp <= 7) ? (5632) : (5632 - dsc_roundf(num: (bpp - 7) * (3584)))); |
202 | rc->first_line_bpg_offset = median3(a: 0, b: (12 + (int) (0.09 * min(34, slice_height - 8))), c: (int)((3 * bpc * 4) - (3 * bpp_group))); |
203 | rc->second_line_bpg_offset = 0; |
204 | break; |
205 | case CM_444: |
206 | case CM_RGB: |
207 | rc->initial_fullness_offset = (bpp >= 12) ? (2048) : ((bpp <= 8) ? (6144) : ((((bpp > 8) && (bpp <= 10))) ? (6144 - dsc_roundf(num: (bpp - 8) * (512 / 2))) : (5632 - dsc_roundf(num: (bpp - 10) * (3584 / 2))))); |
208 | rc->first_line_bpg_offset = median3(a: 0, b: (12 + (int) (0.09 * min(34, slice_height - 8))), c: (int)(((3 * bpc + (cm == CM_444 ? 0 : 2)) * 3) - (3 * bpp_group))); |
209 | rc->second_line_bpg_offset = 0; |
210 | break; |
211 | } |
212 | |
213 | initial_xmit_delay_factor = (cm == CM_444 || cm == CM_RGB) ? 1.0 : 2.0; |
214 | rc->initial_xmit_delay = dsc_roundf(num: 8192.0/2.0/bpp/initial_xmit_delay_factor); |
215 | |
216 | if (cm == CM_422 || cm == CM_420) |
217 | slice_width /= 2; |
218 | |
219 | padding_pixels = ((slice_width % 3) != 0) ? (3 - (slice_width % 3)) * (rc->initial_xmit_delay / slice_width) : 0; |
220 | if (3 * bpp_group >= (((rc->initial_xmit_delay + 2) / 3) * (3 + (cm == CM_422)))) { |
221 | if ((rc->initial_xmit_delay + padding_pixels) % 3 == 1) |
222 | rc->initial_xmit_delay++; |
223 | } |
224 | |
225 | rc->flatness_min_qp = ((bpc == BPC_8) ? (3) : ((bpc == BPC_10) ? (7) : (11))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); |
226 | rc->flatness_max_qp = ((bpc == BPC_8) ? (12) : ((bpc == BPC_10) ? (16) : (20))) - ((minor_version == 1 && cm == CM_444) ? 1 : 0); |
227 | rc->flatness_det_thresh = 2 << (bpc - 8); |
228 | |
229 | get_qp_set(qps: rc->qp_min, cm, bpc, max_min: DAL_MM_MIN, bpp); |
230 | get_qp_set(qps: rc->qp_max, cm, bpc, max_min: DAL_MM_MAX, bpp); |
231 | if (cm == CM_444 && minor_version == 1) { |
232 | for (i = 0; i < QP_SET_SIZE; ++i) { |
233 | rc->qp_min[i] = rc->qp_min[i] > 0 ? rc->qp_min[i] - 1 : 0; |
234 | rc->qp_max[i] = rc->qp_max[i] > 0 ? rc->qp_max[i] - 1 : 0; |
235 | } |
236 | } |
237 | get_ofs_set(ofs: rc->ofs, mode: cm, bpp); |
238 | |
239 | /* fixed parameters */ |
240 | rc->rc_model_size = 8192; |
241 | rc->rc_edge_factor = 6; |
242 | rc->rc_tgt_offset_hi = 3; |
243 | rc->rc_tgt_offset_lo = 3; |
244 | |
245 | rc->rc_buf_thresh[0] = 896; |
246 | rc->rc_buf_thresh[1] = 1792; |
247 | rc->rc_buf_thresh[2] = 2688; |
248 | rc->rc_buf_thresh[3] = 3584; |
249 | rc->rc_buf_thresh[4] = 4480; |
250 | rc->rc_buf_thresh[5] = 5376; |
251 | rc->rc_buf_thresh[6] = 6272; |
252 | rc->rc_buf_thresh[7] = 6720; |
253 | rc->rc_buf_thresh[8] = 7168; |
254 | rc->rc_buf_thresh[9] = 7616; |
255 | rc->rc_buf_thresh[10] = 7744; |
256 | rc->rc_buf_thresh[11] = 7872; |
257 | rc->rc_buf_thresh[12] = 8000; |
258 | rc->rc_buf_thresh[13] = 8064; |
259 | } |
260 | |
261 | |