1 | /* |
2 | * Copyright 2021 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * Authors: AMD |
23 | * |
24 | */ |
25 | |
26 | #include "resource.h" |
27 | #include "clk_mgr.h" |
28 | #include "dcn20/dcn20_resource.h" |
29 | #include "dcn302/dcn302_resource.h" |
30 | |
31 | #include "dml/dcn20/dcn20_fpu.h" |
32 | #include "dcn302_fpu.h" |
33 | |
34 | struct _vcs_dpi_ip_params_st dcn3_02_ip = { |
35 | .use_min_dcfclk = 0, |
36 | .clamp_min_dcfclk = 0, |
37 | .odm_capable = 1, |
38 | .gpuvm_enable = 1, |
39 | .hostvm_enable = 0, |
40 | .gpuvm_max_page_table_levels = 4, |
41 | .hostvm_max_page_table_levels = 4, |
42 | .hostvm_cached_page_table_levels = 0, |
43 | .pte_group_size_bytes = 2048, |
44 | .num_dsc = 5, |
45 | .rob_buffer_size_kbytes = 184, |
46 | .det_buffer_size_kbytes = 184, |
47 | .dpte_buffer_size_in_pte_reqs_luma = 64, |
48 | .dpte_buffer_size_in_pte_reqs_chroma = 34, |
49 | .pde_proc_buffer_size_64k_reqs = 48, |
50 | .dpp_output_buffer_pixels = 2560, |
51 | .opp_output_buffer_lines = 1, |
52 | .pixel_chunk_size_kbytes = 8, |
53 | .pte_enable = 1, |
54 | .max_page_table_levels = 2, |
55 | .pte_chunk_size_kbytes = 2, // ? |
56 | .meta_chunk_size_kbytes = 2, |
57 | .writeback_chunk_size_kbytes = 8, |
58 | .line_buffer_size_bits = 789504, |
59 | .is_line_buffer_bpp_fixed = 0, // ? |
60 | .line_buffer_fixed_bpp = 0, // ? |
61 | .dcc_supported = true, |
62 | .writeback_interface_buffer_size_kbytes = 90, |
63 | .writeback_line_buffer_buffer_size = 0, |
64 | .max_line_buffer_lines = 12, |
65 | .writeback_luma_buffer_size_kbytes = 12, // writeback_line_buffer_buffer_size = 656640 |
66 | .writeback_chroma_buffer_size_kbytes = 8, |
67 | .writeback_chroma_line_buffer_width_pixels = 4, |
68 | .writeback_max_hscl_ratio = 1, |
69 | .writeback_max_vscl_ratio = 1, |
70 | .writeback_min_hscl_ratio = 1, |
71 | .writeback_min_vscl_ratio = 1, |
72 | .writeback_max_hscl_taps = 1, |
73 | .writeback_max_vscl_taps = 1, |
74 | .writeback_line_buffer_luma_buffer_size = 0, |
75 | .writeback_line_buffer_chroma_buffer_size = 14643, |
76 | .cursor_buffer_size = 8, |
77 | .cursor_chunk_size = 2, |
78 | .max_num_otg = 5, |
79 | .max_num_dpp = 5, |
80 | .max_num_wb = 1, |
81 | .max_dchub_pscl_bw_pix_per_clk = 4, |
82 | .max_pscl_lb_bw_pix_per_clk = 2, |
83 | .max_lb_vscl_bw_pix_per_clk = 4, |
84 | .max_vscl_hscl_bw_pix_per_clk = 4, |
85 | .max_hscl_ratio = 6, |
86 | .max_vscl_ratio = 6, |
87 | .hscl_mults = 4, |
88 | .vscl_mults = 4, |
89 | .max_hscl_taps = 8, |
90 | .max_vscl_taps = 8, |
91 | .dispclk_ramp_margin_percent = 1, |
92 | .underscan_factor = 1.11, |
93 | .min_vblank_lines = 32, |
94 | .dppclk_delay_subtotal = 46, |
95 | .dynamic_metadata_vm_enabled = true, |
96 | .dppclk_delay_scl_lb_only = 16, |
97 | .dppclk_delay_scl = 50, |
98 | .dppclk_delay_cnvc_formatter = 27, |
99 | .dppclk_delay_cnvc_cursor = 6, |
100 | .dispclk_delay_subtotal = 119, |
101 | .dcfclk_cstate_latency = 5.2, // SRExitTime |
102 | .max_inter_dcn_tile_repeaters = 8, |
103 | .max_num_hdmi_frl_outputs = 1, |
104 | .odm_combine_4to1_supported = true, |
105 | |
106 | .xfc_supported = false, |
107 | .xfc_fill_bw_overhead_percent = 10.0, |
108 | .xfc_fill_constant_bytes = 0, |
109 | .gfx7_compat_tiling_supported = 0, |
110 | .number_of_cursors = 1, |
111 | }; |
112 | |
113 | struct _vcs_dpi_soc_bounding_box_st dcn3_02_soc = { |
114 | .clock_limits = { |
115 | { |
116 | .state = 0, |
117 | .dispclk_mhz = 562.0, |
118 | .dppclk_mhz = 300.0, |
119 | .phyclk_mhz = 300.0, |
120 | .phyclk_d18_mhz = 667.0, |
121 | .dscclk_mhz = 405.6, |
122 | }, |
123 | }, |
124 | |
125 | .min_dcfclk = 500.0, /* TODO: set this to actual min DCFCLK */ |
126 | .num_states = 1, |
127 | .sr_exit_time_us = 26.5, |
128 | .sr_enter_plus_exit_time_us = 31, |
129 | .urgent_latency_us = 4.0, |
130 | .urgent_latency_pixel_data_only_us = 4.0, |
131 | .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, |
132 | .urgent_latency_vm_data_only_us = 4.0, |
133 | .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, |
134 | .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, |
135 | .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, |
136 | .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 80.0, |
137 | .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, |
138 | .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0, |
139 | .max_avg_sdp_bw_use_normal_percent = 60.0, |
140 | .max_avg_dram_bw_use_normal_percent = 40.0, |
141 | .writeback_latency_us = 12.0, |
142 | .max_request_size_bytes = 256, |
143 | .fabric_datapath_to_dcn_data_return_bytes = 64, |
144 | .dcn_downspread_percent = 0.5, |
145 | .downspread_percent = 0.38, |
146 | .dram_page_open_time_ns = 50.0, |
147 | .dram_rw_turnaround_time_ns = 17.5, |
148 | .dram_return_buffer_per_channel_bytes = 8192, |
149 | .round_trip_ping_latency_dcfclk_cycles = 156, |
150 | .urgent_out_of_order_return_per_channel_bytes = 4096, |
151 | .channel_interleave_bytes = 256, |
152 | .num_banks = 8, |
153 | .gpuvm_min_page_size_bytes = 4096, |
154 | .hostvm_min_page_size_bytes = 4096, |
155 | .dram_clock_change_latency_us = 404, |
156 | .dummy_pstate_latency_us = 5, |
157 | .writeback_dram_clock_change_latency_us = 23.0, |
158 | .return_bus_width_bytes = 64, |
159 | .dispclk_dppclk_vco_speed_mhz = 3650, |
160 | .xfc_bus_transport_time_us = 20, // ? |
161 | .xfc_xbuf_latency_tolerance_us = 4, // ? |
162 | .use_urgent_burst_bw = 1, // ? |
163 | .do_urgent_latency_adjustment = true, |
164 | .urgent_latency_adjustment_fabric_clock_component_us = 1.0, |
165 | .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, |
166 | }; |
167 | |
168 | static void dcn302_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, |
169 | unsigned int *optimal_dcfclk, |
170 | unsigned int *optimal_fclk) |
171 | { |
172 | |
173 | double bw_from_dram, bw_from_dram1, bw_from_dram2; |
174 | |
175 | bw_from_dram1 = uclk_mts * dcn3_02_soc.num_chans * |
176 | dcn3_02_soc.dram_channel_width_bytes * |
177 | (dcn3_02_soc.max_avg_dram_bw_use_normal_percent / 100); |
178 | bw_from_dram2 = uclk_mts * dcn3_02_soc.num_chans * |
179 | dcn3_02_soc.dram_channel_width_bytes * |
180 | (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100); |
181 | |
182 | bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; |
183 | |
184 | if (optimal_fclk) |
185 | *optimal_fclk = bw_from_dram / |
186 | (dcn3_02_soc.fabric_datapath_to_dcn_data_return_bytes * |
187 | (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100)); |
188 | |
189 | if (optimal_dcfclk) |
190 | *optimal_dcfclk = bw_from_dram / |
191 | (dcn3_02_soc.return_bus_width_bytes * |
192 | (dcn3_02_soc.max_avg_sdp_bw_use_normal_percent / 100)); |
193 | } |
194 | |
195 | void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) |
196 | { |
197 | unsigned int i, j; |
198 | unsigned int num_states = 0; |
199 | |
200 | unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; |
201 | unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; |
202 | unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; |
203 | unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; |
204 | |
205 | unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {694, 875, 1000, 1200}; |
206 | unsigned int num_dcfclk_sta_targets = 4; |
207 | unsigned int num_uclk_states; |
208 | |
209 | dc_assert_fp_enabled(); |
210 | |
211 | if (dc->ctx->dc_bios->vram_info.num_chans) |
212 | dcn3_02_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; |
213 | |
214 | if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) |
215 | dcn3_02_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; |
216 | |
217 | dcn3_02_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; |
218 | dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; |
219 | |
220 | if (bw_params->clk_table.entries[0].memclk_mhz) { |
221 | int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; |
222 | |
223 | for (i = 0; i < MAX_NUM_DPM_LVL; i++) { |
224 | if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) |
225 | max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; |
226 | if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) |
227 | max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; |
228 | if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) |
229 | max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; |
230 | if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) |
231 | max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; |
232 | } |
233 | if (!max_dcfclk_mhz) |
234 | max_dcfclk_mhz = dcn3_02_soc.clock_limits[0].dcfclk_mhz; |
235 | if (!max_dispclk_mhz) |
236 | max_dispclk_mhz = dcn3_02_soc.clock_limits[0].dispclk_mhz; |
237 | if (!max_dppclk_mhz) |
238 | max_dppclk_mhz = dcn3_02_soc.clock_limits[0].dppclk_mhz; |
239 | if (!max_phyclk_mhz) |
240 | max_phyclk_mhz = dcn3_02_soc.clock_limits[0].phyclk_mhz; |
241 | |
242 | if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { |
243 | /* If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array */ |
244 | dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; |
245 | num_dcfclk_sta_targets++; |
246 | } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { |
247 | /* If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates */ |
248 | for (i = 0; i < num_dcfclk_sta_targets; i++) { |
249 | if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { |
250 | dcfclk_sta_targets[i] = max_dcfclk_mhz; |
251 | break; |
252 | } |
253 | } |
254 | /* Update size of array since we "removed" duplicates */ |
255 | num_dcfclk_sta_targets = i + 1; |
256 | } |
257 | |
258 | num_uclk_states = bw_params->clk_table.num_entries; |
259 | |
260 | /* Calculate optimal dcfclk for each uclk */ |
261 | for (i = 0; i < num_uclk_states; i++) { |
262 | dcn302_get_optimal_dcfclk_fclk_for_uclk(uclk_mts: bw_params->clk_table.entries[i].memclk_mhz * 16, |
263 | optimal_dcfclk: &optimal_dcfclk_for_uclk[i], NULL); |
264 | if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) |
265 | optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; |
266 | } |
267 | |
268 | /* Calculate optimal uclk for each dcfclk sta target */ |
269 | for (i = 0; i < num_dcfclk_sta_targets; i++) { |
270 | for (j = 0; j < num_uclk_states; j++) { |
271 | if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { |
272 | optimal_uclk_for_dcfclk_sta_targets[i] = |
273 | bw_params->clk_table.entries[j].memclk_mhz * 16; |
274 | break; |
275 | } |
276 | } |
277 | } |
278 | |
279 | i = 0; |
280 | j = 0; |
281 | /* create the final dcfclk and uclk table */ |
282 | while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { |
283 | if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { |
284 | dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; |
285 | dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; |
286 | } else { |
287 | if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { |
288 | dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; |
289 | dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; |
290 | } else { |
291 | j = num_uclk_states; |
292 | } |
293 | } |
294 | } |
295 | |
296 | while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { |
297 | dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; |
298 | dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; |
299 | } |
300 | |
301 | while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && |
302 | optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { |
303 | dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; |
304 | dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; |
305 | } |
306 | |
307 | dcn3_02_soc.num_states = num_states; |
308 | for (i = 0; i < dcn3_02_soc.num_states; i++) { |
309 | dcn3_02_soc.clock_limits[i].state = i; |
310 | dcn3_02_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; |
311 | dcn3_02_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; |
312 | dcn3_02_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; |
313 | |
314 | /* Fill all states with max values of all other clocks */ |
315 | dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; |
316 | dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; |
317 | dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; |
318 | /* Populate from bw_params for DTBCLK, SOCCLK */ |
319 | if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) |
320 | dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz; |
321 | else |
322 | dcn3_02_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; |
323 | if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) |
324 | dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz; |
325 | else |
326 | dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; |
327 | /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */ |
328 | /* FCLK, PHYCLK_D18, DSCCLK */ |
329 | dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz; |
330 | dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz; |
331 | } |
332 | /* re-init DML with updated bb */ |
333 | dml_init_instance(lib: &dc->dml, soc_bb: &dcn3_02_soc, ip_params: &dcn3_02_ip, project: DML_PROJECT_DCN30); |
334 | if (dc->current_state) |
335 | dml_init_instance(lib: &dc->current_state->bw_ctx.dml, soc_bb: &dcn3_02_soc, ip_params: &dcn3_02_ip, project: DML_PROJECT_DCN30); |
336 | } |
337 | } |
338 | |
339 | void dcn302_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info) |
340 | { |
341 | |
342 | dc_assert_fp_enabled(); |
343 | |
344 | if (bb_info.dram_clock_change_latency_100ns > 0) |
345 | dcn3_02_soc.dram_clock_change_latency_us = |
346 | bb_info.dram_clock_change_latency_100ns * 10; |
347 | |
348 | if (bb_info.dram_sr_enter_exit_latency_100ns > 0) |
349 | dcn3_02_soc.sr_enter_plus_exit_time_us = |
350 | bb_info.dram_sr_enter_exit_latency_100ns * 10; |
351 | |
352 | if (bb_info.dram_sr_exit_latency_100ns > 0) |
353 | dcn3_02_soc.sr_exit_time_us = |
354 | bb_info.dram_sr_exit_latency_100ns * 10; |
355 | } |
356 | |
357 | |
358 | |