1 | /* |
2 | * Copyright 2019-2021 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * Authors: AMD |
23 | * |
24 | */ |
25 | #include "resource.h" |
26 | #include "clk_mgr.h" |
27 | #include "dcn20/dcn20_resource.h" |
28 | #include "dcn303/dcn303_resource.h" |
29 | |
30 | #include "dml/dcn20/dcn20_fpu.h" |
31 | #include "dcn303_fpu.h" |
32 | |
33 | struct _vcs_dpi_ip_params_st dcn3_03_ip = { |
34 | .use_min_dcfclk = 0, |
35 | .clamp_min_dcfclk = 0, |
36 | .odm_capable = 1, |
37 | .gpuvm_enable = 1, |
38 | .hostvm_enable = 0, |
39 | .gpuvm_max_page_table_levels = 4, |
40 | .hostvm_max_page_table_levels = 4, |
41 | .hostvm_cached_page_table_levels = 0, |
42 | .pte_group_size_bytes = 2048, |
43 | .num_dsc = 2, |
44 | .rob_buffer_size_kbytes = 184, |
45 | .det_buffer_size_kbytes = 184, |
46 | .dpte_buffer_size_in_pte_reqs_luma = 64, |
47 | .dpte_buffer_size_in_pte_reqs_chroma = 34, |
48 | .pde_proc_buffer_size_64k_reqs = 48, |
49 | .dpp_output_buffer_pixels = 2560, |
50 | .opp_output_buffer_lines = 1, |
51 | .pixel_chunk_size_kbytes = 8, |
52 | .pte_enable = 1, |
53 | .max_page_table_levels = 2, |
54 | .pte_chunk_size_kbytes = 2, // ? |
55 | .meta_chunk_size_kbytes = 2, |
56 | .writeback_chunk_size_kbytes = 8, |
57 | .line_buffer_size_bits = 789504, |
58 | .is_line_buffer_bpp_fixed = 0, // ? |
59 | .line_buffer_fixed_bpp = 0, // ? |
60 | .dcc_supported = true, |
61 | .writeback_interface_buffer_size_kbytes = 90, |
62 | .writeback_line_buffer_buffer_size = 0, |
63 | .max_line_buffer_lines = 12, |
64 | .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640 |
65 | .writeback_chroma_buffer_size_kbytes = 8, |
66 | .writeback_chroma_line_buffer_width_pixels = 4, |
67 | .writeback_max_hscl_ratio = 1, |
68 | .writeback_max_vscl_ratio = 1, |
69 | .writeback_min_hscl_ratio = 1, |
70 | .writeback_min_vscl_ratio = 1, |
71 | .writeback_max_hscl_taps = 1, |
72 | .writeback_max_vscl_taps = 1, |
73 | .writeback_line_buffer_luma_buffer_size = 0, |
74 | .writeback_line_buffer_chroma_buffer_size = 14643, |
75 | .cursor_buffer_size = 8, |
76 | .cursor_chunk_size = 2, |
77 | .max_num_otg = 2, |
78 | .max_num_dpp = 2, |
79 | .max_num_wb = 1, |
80 | .max_dchub_pscl_bw_pix_per_clk = 4, |
81 | .max_pscl_lb_bw_pix_per_clk = 2, |
82 | .max_lb_vscl_bw_pix_per_clk = 4, |
83 | .max_vscl_hscl_bw_pix_per_clk = 4, |
84 | .max_hscl_ratio = 6, |
85 | .max_vscl_ratio = 6, |
86 | .hscl_mults = 4, |
87 | .vscl_mults = 4, |
88 | .max_hscl_taps = 8, |
89 | .max_vscl_taps = 8, |
90 | .dispclk_ramp_margin_percent = 1, |
91 | .underscan_factor = 1.11, |
92 | .min_vblank_lines = 32, |
93 | .dppclk_delay_subtotal = 46, |
94 | .dynamic_metadata_vm_enabled = true, |
95 | .dppclk_delay_scl_lb_only = 16, |
96 | .dppclk_delay_scl = 50, |
97 | .dppclk_delay_cnvc_formatter = 27, |
98 | .dppclk_delay_cnvc_cursor = 6, |
99 | .dispclk_delay_subtotal = 119, |
100 | .dcfclk_cstate_latency = 5.2, // SRExitTime |
101 | .max_inter_dcn_tile_repeaters = 8, |
102 | .max_num_hdmi_frl_outputs = 1, |
103 | .odm_combine_4to1_supported = false, |
104 | |
105 | .xfc_supported = false, |
106 | .xfc_fill_bw_overhead_percent = 10.0, |
107 | .xfc_fill_constant_bytes = 0, |
108 | .gfx7_compat_tiling_supported = 0, |
109 | .number_of_cursors = 1, |
110 | }; |
111 | |
112 | struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc = { |
113 | .clock_limits = { |
114 | { |
115 | .state = 0, |
116 | .dispclk_mhz = 562.0, |
117 | .dppclk_mhz = 300.0, |
118 | .phyclk_mhz = 300.0, |
119 | .phyclk_d18_mhz = 667.0, |
120 | .dscclk_mhz = 405.6, |
121 | }, |
122 | }, |
123 | |
124 | .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ |
125 | .num_states = 1, |
126 | .sr_exit_time_us = 35.5, |
127 | .sr_enter_plus_exit_time_us = 40, |
128 | .urgent_latency_us = 4.0, |
129 | .urgent_latency_pixel_data_only_us = 4.0, |
130 | .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, |
131 | .urgent_latency_vm_data_only_us = 4.0, |
132 | .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, |
133 | .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, |
134 | .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, |
135 | .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, |
136 | .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, |
137 | .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, |
138 | .max_avg_sdp_bw_use_normal_percent = 60.0, |
139 | .max_avg_dram_bw_use_normal_percent = 40.0, |
140 | .writeback_latency_us = 12.0, |
141 | .max_request_size_bytes = 256, |
142 | .fabric_datapath_to_dcn_data_return_bytes = 64, |
143 | .dcn_downspread_percent = 0.5, |
144 | .downspread_percent = 0.38, |
145 | .dram_page_open_time_ns = 50.0, |
146 | .dram_rw_turnaround_time_ns = 17.5, |
147 | .dram_return_buffer_per_channel_bytes = 8192, |
148 | .round_trip_ping_latency_dcfclk_cycles = 156, |
149 | .urgent_out_of_order_return_per_channel_bytes = 4096, |
150 | .channel_interleave_bytes = 256, |
151 | .num_banks = 8, |
152 | .gpuvm_min_page_size_bytes = 4096, |
153 | .hostvm_min_page_size_bytes = 4096, |
154 | .dram_clock_change_latency_us = 404, |
155 | .dummy_pstate_latency_us = 5, |
156 | .writeback_dram_clock_change_latency_us = 23.0, |
157 | .return_bus_width_bytes = 64, |
158 | .dispclk_dppclk_vco_speed_mhz = 3650, |
159 | .xfc_bus_transport_time_us = 20, // ? |
160 | .xfc_xbuf_latency_tolerance_us = 4, // ? |
161 | .use_urgent_burst_bw = 1, // ? |
162 | .do_urgent_latency_adjustment = true, |
163 | .urgent_latency_adjustment_fabric_clock_component_us = 1.0, |
164 | .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, |
165 | }; |
166 | |
167 | static void dcn303_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, |
168 | unsigned int *optimal_dcfclk, |
169 | unsigned int *optimal_fclk) |
170 | { |
171 | double bw_from_dram, bw_from_dram1, bw_from_dram2; |
172 | |
173 | bw_from_dram1 = uclk_mts * dcn3_03_soc.num_chans * |
174 | dcn3_03_soc.dram_channel_width_bytes * (dcn3_03_soc.max_avg_dram_bw_use_normal_percent / 100); |
175 | bw_from_dram2 = uclk_mts * dcn3_03_soc.num_chans * |
176 | dcn3_03_soc.dram_channel_width_bytes * (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100); |
177 | |
178 | bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; |
179 | |
180 | if (optimal_fclk) |
181 | *optimal_fclk = bw_from_dram / |
182 | (dcn3_03_soc.fabric_datapath_to_dcn_data_return_bytes * |
183 | (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100)); |
184 | |
185 | if (optimal_dcfclk) |
186 | *optimal_dcfclk = bw_from_dram / |
187 | (dcn3_03_soc.return_bus_width_bytes * (dcn3_03_soc.max_avg_sdp_bw_use_normal_percent / 100)); |
188 | } |
189 | |
190 | |
191 | void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) |
192 | { |
193 | unsigned int i, j; |
194 | unsigned int num_states = 0; |
195 | |
196 | unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; |
197 | unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; |
198 | unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; |
199 | unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; |
200 | |
201 | unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200}; |
202 | unsigned int num_dcfclk_sta_targets = 4; |
203 | unsigned int num_uclk_states; |
204 | |
205 | dc_assert_fp_enabled(); |
206 | |
207 | if (dc->ctx->dc_bios->vram_info.num_chans) |
208 | dcn3_03_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; |
209 | |
210 | if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) |
211 | dcn3_03_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; |
212 | |
213 | dcn3_03_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; |
214 | dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; |
215 | |
216 | if (bw_params->clk_table.entries[0].memclk_mhz) { |
217 | int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; |
218 | |
219 | for (i = 0; i < MAX_NUM_DPM_LVL; i++) { |
220 | if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) |
221 | max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; |
222 | if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) |
223 | max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; |
224 | if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) |
225 | max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; |
226 | if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) |
227 | max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; |
228 | } |
229 | if (!max_dcfclk_mhz) |
230 | max_dcfclk_mhz = dcn3_03_soc.clock_limits[0].dcfclk_mhz; |
231 | if (!max_dispclk_mhz) |
232 | max_dispclk_mhz = dcn3_03_soc.clock_limits[0].dispclk_mhz; |
233 | if (!max_dppclk_mhz) |
234 | max_dppclk_mhz = dcn3_03_soc.clock_limits[0].dppclk_mhz; |
235 | if (!max_phyclk_mhz) |
236 | max_phyclk_mhz = dcn3_03_soc.clock_limits[0].phyclk_mhz; |
237 | |
238 | if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { |
239 | dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; |
240 | num_dcfclk_sta_targets++; |
241 | } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { |
242 | for (i = 0; i < num_dcfclk_sta_targets; i++) { |
243 | if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { |
244 | dcfclk_sta_targets[i] = max_dcfclk_mhz; |
245 | break; |
246 | } |
247 | } |
248 | /* Update size of array since we "removed" duplicates */ |
249 | num_dcfclk_sta_targets = i + 1; |
250 | } |
251 | |
252 | num_uclk_states = bw_params->clk_table.num_entries; |
253 | |
254 | /* Calculate optimal dcfclk for each uclk */ |
255 | for (i = 0; i < num_uclk_states; i++) { |
256 | dcn303_get_optimal_dcfclk_fclk_for_uclk(uclk_mts: bw_params->clk_table.entries[i].memclk_mhz * 16, |
257 | optimal_dcfclk: &optimal_dcfclk_for_uclk[i], NULL); |
258 | if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) |
259 | optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; |
260 | } |
261 | |
262 | /* Calculate optimal uclk for each dcfclk sta target */ |
263 | for (i = 0; i < num_dcfclk_sta_targets; i++) { |
264 | for (j = 0; j < num_uclk_states; j++) { |
265 | if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { |
266 | optimal_uclk_for_dcfclk_sta_targets[i] = |
267 | bw_params->clk_table.entries[j].memclk_mhz * 16; |
268 | break; |
269 | } else { |
270 | /* condition where (dcfclk_sta_targets[i] >= optimal_dcfclk_for_uclk[j]): |
271 | * This is required for dcn303 because it just so happens that the memory |
272 | * bandwidth is low enough such that all the optimal DCFCLK for each UCLK |
273 | * is lower than the smallest DCFCLK STA target. In this case we need to |
274 | * populate the optimal UCLK for each DCFCLK STA target to be the max UCLK. |
275 | */ |
276 | if (j == num_uclk_states - 1) { |
277 | optimal_uclk_for_dcfclk_sta_targets[i] = |
278 | bw_params->clk_table.entries[j].memclk_mhz * 16; |
279 | } |
280 | } |
281 | } |
282 | } |
283 | |
284 | i = 0; |
285 | j = 0; |
286 | /* create the final dcfclk and uclk table */ |
287 | while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { |
288 | if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { |
289 | dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; |
290 | dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; |
291 | } else { |
292 | if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { |
293 | dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; |
294 | dram_speed_mts[num_states++] = |
295 | bw_params->clk_table.entries[j++].memclk_mhz * 16; |
296 | } else { |
297 | j = num_uclk_states; |
298 | } |
299 | } |
300 | } |
301 | |
302 | while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { |
303 | dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; |
304 | dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; |
305 | } |
306 | |
307 | while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && |
308 | optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { |
309 | dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; |
310 | dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; |
311 | } |
312 | |
313 | dcn3_03_soc.num_states = num_states; |
314 | for (i = 0; i < dcn3_03_soc.num_states; i++) { |
315 | dcn3_03_soc.clock_limits[i].state = i; |
316 | dcn3_03_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; |
317 | dcn3_03_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; |
318 | dcn3_03_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; |
319 | |
320 | /* Fill all states with max values of all other clocks */ |
321 | dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; |
322 | dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; |
323 | dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; |
324 | /* Populate from bw_params for DTBCLK, SOCCLK */ |
325 | if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) |
326 | dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz; |
327 | else |
328 | dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; |
329 | if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) |
330 | dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz; |
331 | else |
332 | dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; |
333 | /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */ |
334 | /* FCLK, PHYCLK_D18, DSCCLK */ |
335 | dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz; |
336 | dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz; |
337 | } |
338 | |
339 | if (dcn3_03_soc.num_chans <= 4) { |
340 | for (i = 0; i < dcn3_03_soc.num_states; i++) { |
341 | if (dcn3_03_soc.clock_limits[i].dram_speed_mts > 1700) |
342 | break; |
343 | |
344 | if (dcn3_03_soc.clock_limits[i].dram_speed_mts >= 1500) { |
345 | dcn3_03_soc.clock_limits[i].dcfclk_mhz = 100; |
346 | dcn3_03_soc.clock_limits[i].fabricclk_mhz = 100; |
347 | } |
348 | } |
349 | } |
350 | |
351 | /* re-init DML with updated bb */ |
352 | dml_init_instance(lib: &dc->dml, soc_bb: &dcn3_03_soc, ip_params: &dcn3_03_ip, project: DML_PROJECT_DCN30); |
353 | if (dc->current_state) |
354 | dml_init_instance(lib: &dc->current_state->bw_ctx.dml, soc_bb: &dcn3_03_soc, ip_params: &dcn3_03_ip, project: DML_PROJECT_DCN30); |
355 | } |
356 | } |
357 | |
358 | void dcn303_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info) |
359 | { |
360 | dc_assert_fp_enabled(); |
361 | |
362 | if (bb_info.dram_clock_change_latency_100ns > 0) |
363 | dcn3_03_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; |
364 | |
365 | if (bb_info.dram_sr_enter_exit_latency_100ns > 0) |
366 | dcn3_03_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; |
367 | |
368 | if (bb_info.dram_sr_exit_latency_100ns > 0) |
369 | dcn3_03_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; |
370 | } |
371 | |