1 | /* SPDX-License-Identifier: MIT */ |
2 | /* |
3 | * Copyright 2023 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * |
23 | * Authors: AMD |
24 | * |
25 | */ |
26 | |
27 | #include "dml2_policy.h" |
28 | |
29 | static void get_optimal_ntuple( |
30 | const struct soc_bounding_box_st *socbb, |
31 | struct soc_state_bounding_box_st *entry) |
32 | { |
33 | if (entry->dcfclk_mhz > 0) { |
34 | float bw_on_sdp = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); |
35 | |
36 | entry->fabricclk_mhz = bw_on_sdp / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); |
37 | entry->dram_speed_mts = bw_on_sdp / (socbb->num_chans * |
38 | socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); |
39 | } else if (entry->fabricclk_mhz > 0) { |
40 | float bw_on_fabric = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); |
41 | |
42 | entry->dcfclk_mhz = bw_on_fabric / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); |
43 | entry->dram_speed_mts = bw_on_fabric / (socbb->num_chans * |
44 | socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); |
45 | } else if (entry->dram_speed_mts > 0) { |
46 | float bw_on_dram = (float)(entry->dram_speed_mts * socbb->num_chans * |
47 | socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); |
48 | |
49 | entry->fabricclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); |
50 | entry->dcfclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); |
51 | } |
52 | } |
53 | |
54 | static float calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st *socbb, |
55 | struct soc_state_bounding_box_st *entry) |
56 | { |
57 | float memory_bw_mbytes_sec = (float)(entry->dram_speed_mts * socbb->num_chans * |
58 | socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100)); |
59 | |
60 | float fabric_bw_mbytes_sec = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100)); |
61 | |
62 | float sdp_bw_mbytes_sec = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100)); |
63 | |
64 | float limiting_bw_mbytes_sec = memory_bw_mbytes_sec; |
65 | |
66 | if (fabric_bw_mbytes_sec < limiting_bw_mbytes_sec) |
67 | limiting_bw_mbytes_sec = fabric_bw_mbytes_sec; |
68 | |
69 | if (sdp_bw_mbytes_sec < limiting_bw_mbytes_sec) |
70 | limiting_bw_mbytes_sec = sdp_bw_mbytes_sec; |
71 | |
72 | return limiting_bw_mbytes_sec; |
73 | } |
74 | |
75 | static void insert_entry_into_table_sorted(const struct soc_bounding_box_st *socbb, |
76 | struct soc_states_st *table, |
77 | struct soc_state_bounding_box_st *entry) |
78 | { |
79 | int index = 0; |
80 | int i = 0; |
81 | float net_bw_of_new_state = 0; |
82 | |
83 | get_optimal_ntuple(socbb, entry); |
84 | |
85 | if (table->num_states == 0) { |
86 | index = 0; |
87 | } else { |
88 | net_bw_of_new_state = calculate_net_bw_in_mbytes_sec(socbb, entry); |
89 | while (net_bw_of_new_state > calculate_net_bw_in_mbytes_sec(socbb, entry: &table->state_array[index])) { |
90 | index++; |
91 | if (index >= (int) table->num_states) |
92 | break; |
93 | } |
94 | |
95 | for (i = table->num_states; i > index; i--) { |
96 | table->state_array[i] = table->state_array[i - 1]; |
97 | } |
98 | //ASSERT(index < MAX_CLK_TABLE_SIZE); |
99 | } |
100 | |
101 | table->state_array[index] = *entry; |
102 | table->state_array[index].dcfclk_mhz = (int)entry->dcfclk_mhz; |
103 | table->state_array[index].fabricclk_mhz = (int)entry->fabricclk_mhz; |
104 | table->state_array[index].dram_speed_mts = (int)entry->dram_speed_mts; |
105 | table->num_states++; |
106 | } |
107 | |
108 | static void remove_entry_from_table_at_index(struct soc_states_st *table, |
109 | unsigned int index) |
110 | { |
111 | int i; |
112 | |
113 | if (table->num_states == 0) |
114 | return; |
115 | |
116 | for (i = index; i < (int) table->num_states - 1; i++) { |
117 | table->state_array[i] = table->state_array[i + 1]; |
118 | } |
119 | memset(&table->state_array[--table->num_states], 0, sizeof(struct soc_state_bounding_box_st)); |
120 | } |
121 | |
122 | int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s, |
123 | struct dml2_policy_build_synthetic_soc_states_params *p) |
124 | { |
125 | int i, j; |
126 | unsigned int min_fclk_mhz = p->in_states->state_array[0].fabricclk_mhz; |
127 | unsigned int min_dcfclk_mhz = p->in_states->state_array[0].dcfclk_mhz; |
128 | unsigned int min_socclk_mhz = p->in_states->state_array[0].socclk_mhz; |
129 | |
130 | int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, |
131 | max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, |
132 | max_uclk_mhz = 0, max_socclk_mhz = 0; |
133 | |
134 | int num_uclk_dpms = 0, num_fclk_dpms = 0; |
135 | |
136 | for (i = 0; i < __DML_MAX_STATE_ARRAY_SIZE__; i++) { |
137 | if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) |
138 | max_dcfclk_mhz = (int) p->in_states->state_array[i].dcfclk_mhz; |
139 | if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) |
140 | max_fclk_mhz = (int) p->in_states->state_array[i].fabricclk_mhz; |
141 | if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) |
142 | max_socclk_mhz = (int) p->in_states->state_array[i].socclk_mhz; |
143 | if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) |
144 | max_uclk_mhz = (int) p->in_states->state_array[i].dram_speed_mts; |
145 | if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) |
146 | max_dispclk_mhz = (int) p->in_states->state_array[i].dispclk_mhz; |
147 | if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) |
148 | max_dppclk_mhz = (int) p->in_states->state_array[i].dppclk_mhz; |
149 | if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) |
150 | max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; |
151 | if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) |
152 | max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; |
153 | |
154 | if (p->in_states->state_array[i].fabricclk_mhz > 0) |
155 | num_fclk_dpms++; |
156 | if (p->in_states->state_array[i].dram_speed_mts > 0) |
157 | num_uclk_dpms++; |
158 | } |
159 | |
160 | if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dppclk_mhz || !max_phyclk_mhz || !max_dtbclk_mhz) |
161 | return -1; |
162 | |
163 | p->out_states->num_states = 0; |
164 | |
165 | s->entry = p->in_states->state_array[0]; |
166 | |
167 | s->entry.dispclk_mhz = max_dispclk_mhz; |
168 | s->entry.dppclk_mhz = max_dppclk_mhz; |
169 | s->entry.dtbclk_mhz = max_dtbclk_mhz; |
170 | s->entry.phyclk_mhz = max_phyclk_mhz; |
171 | |
172 | s->entry.dscclk_mhz = max_dispclk_mhz / 3; |
173 | s->entry.phyclk_mhz = max_phyclk_mhz; |
174 | s->entry.dtbclk_mhz = max_dtbclk_mhz; |
175 | |
176 | // Insert all the DCFCLK STAs first |
177 | for (i = 0; i < p->num_dcfclk_stas; i++) { |
178 | s->entry.dcfclk_mhz = p->dcfclk_stas_mhz[i]; |
179 | s->entry.fabricclk_mhz = 0; |
180 | s->entry.dram_speed_mts = 0; |
181 | if (i > 0) |
182 | s->entry.socclk_mhz = max_socclk_mhz; |
183 | |
184 | insert_entry_into_table_sorted(socbb: p->in_bbox, table: p->out_states, entry: &s->entry); |
185 | } |
186 | |
187 | // Insert the UCLK DPMS |
188 | for (i = 0; i < num_uclk_dpms; i++) { |
189 | s->entry.dcfclk_mhz = 0; |
190 | s->entry.fabricclk_mhz = 0; |
191 | s->entry.dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; |
192 | if (i == 0) { |
193 | s->entry.socclk_mhz = min_socclk_mhz; |
194 | } else { |
195 | s->entry.socclk_mhz = max_socclk_mhz; |
196 | } |
197 | |
198 | insert_entry_into_table_sorted(socbb: p->in_bbox, table: p->out_states, entry: &s->entry); |
199 | } |
200 | |
201 | // Insert FCLK DPMs (if present) |
202 | if (num_fclk_dpms > 2) { |
203 | for (i = 0; i < num_fclk_dpms; i++) { |
204 | s->entry.dcfclk_mhz = 0; |
205 | s->entry.fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; |
206 | s->entry.dram_speed_mts = 0; |
207 | |
208 | insert_entry_into_table_sorted(socbb: p->in_bbox, table: p->out_states, entry: &s->entry); |
209 | } |
210 | } |
211 | // Add max FCLK |
212 | else { |
213 | s->entry.dcfclk_mhz = 0; |
214 | s->entry.fabricclk_mhz = p->in_states->state_array[num_fclk_dpms - 1].fabricclk_mhz; |
215 | s->entry.dram_speed_mts = 0; |
216 | |
217 | insert_entry_into_table_sorted(socbb: p->in_bbox, table: p->out_states, entry: &s->entry); |
218 | } |
219 | |
220 | // Remove states that require higher clocks than are supported |
221 | for (i = p->out_states->num_states - 1; i >= 0; i--) { |
222 | if (p->out_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz || |
223 | p->out_states->state_array[i].fabricclk_mhz > max_fclk_mhz || |
224 | p->out_states->state_array[i].dram_speed_mts > max_uclk_mhz) |
225 | remove_entry_from_table_at_index(table: p->out_states, index: i); |
226 | } |
227 | |
228 | // At this point, the table contains all "points of interest" based on |
229 | // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock |
230 | // ratios (by derate, are exact). |
231 | |
232 | // Round up UCLK to DPMs |
233 | for (i = p->out_states->num_states - 1; i >= 0; i--) { |
234 | for (j = 0; j < num_uclk_dpms; j++) { |
235 | if (p->in_states->state_array[j].dram_speed_mts >= p->out_states->state_array[i].dram_speed_mts) { |
236 | p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[j].dram_speed_mts; |
237 | break; |
238 | } |
239 | } |
240 | } |
241 | |
242 | // If FCLK is coarse grained, round up to next DPMs |
243 | if (num_fclk_dpms > 2) { |
244 | for (i = p->out_states->num_states - 1; i >= 0; i--) { |
245 | for (j = 0; j < num_fclk_dpms; j++) { |
246 | if (p->in_states->state_array[j].fabricclk_mhz >= p->out_states->state_array[i].fabricclk_mhz) { |
247 | p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[j].fabricclk_mhz; |
248 | break; |
249 | } |
250 | } |
251 | } |
252 | } |
253 | |
254 | // Clamp to min FCLK/DCFCLK |
255 | for (i = p->out_states->num_states - 1; i >= 0; i--) { |
256 | if (p->out_states->state_array[i].fabricclk_mhz < min_fclk_mhz) { |
257 | p->out_states->state_array[i].fabricclk_mhz = min_fclk_mhz; |
258 | } |
259 | if (p->out_states->state_array[i].dcfclk_mhz < min_dcfclk_mhz) { |
260 | p->out_states->state_array[i].dcfclk_mhz = min_dcfclk_mhz; |
261 | } |
262 | } |
263 | |
264 | // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. |
265 | i = 0; |
266 | while (i < (int) p->out_states->num_states - 1) { |
267 | if (p->out_states->state_array[i].dcfclk_mhz == p->out_states->state_array[i + 1].dcfclk_mhz && |
268 | p->out_states->state_array[i].fabricclk_mhz == p->out_states->state_array[i + 1].fabricclk_mhz && |
269 | p->out_states->state_array[i].dram_speed_mts == p->out_states->state_array[i + 1].dram_speed_mts) |
270 | remove_entry_from_table_at_index(table: p->out_states, index: i); |
271 | else |
272 | i++; |
273 | } |
274 | |
275 | return 0; |
276 | } |
277 | |
278 | void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy) |
279 | { |
280 | for (int i = 0; i < __DML_NUM_PLANES__; i++) { |
281 | policy->MPCCombineUse[i] = dml_mpc_as_needed_for_voltage; // TOREVIEW: Is this still needed? When is MPCC useful for pstate given CRB? |
282 | policy->ODMUse[i] = dml_odm_use_policy_combine_as_needed; |
283 | policy->ImmediateFlipRequirement[i] = dml_immediate_flip_required; |
284 | policy->AllowForPStateChangeOrStutterInVBlank[i] = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; |
285 | } |
286 | |
287 | /* Change the default policy initializations as per spreadsheet. We might need to |
288 | * review and change them later on as per Jun's earlier comments. |
289 | */ |
290 | policy->UseUnboundedRequesting = dml_unbounded_requesting_enable; |
291 | policy->UseMinimumRequiredDCFCLK = false; |
292 | policy->DRAMClockChangeRequirementFinal = true; // TOREVIEW: What does this mean? |
293 | policy->FCLKChangeRequirementFinal = true; // TOREVIEW: What does this mean? |
294 | policy->USRRetrainingRequiredFinal = true; |
295 | policy->EnhancedPrefetchScheduleAccelerationFinal = true; // TOREVIEW: What does this mean? |
296 | policy->NomDETInKByteOverrideEnable = false; |
297 | policy->NomDETInKByteOverrideValue = 0; |
298 | policy->DCCProgrammingAssumesScanDirectionUnknownFinal = true; |
299 | policy->SynchronizeTimingsFinal = true; |
300 | policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = true; |
301 | policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = true; // TOREVIEW: What does this mean? |
302 | policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = true; // TOREVIEW: What does this mean? |
303 | if (project == dml_project_dcn35 || |
304 | project == dml_project_dcn351) { |
305 | policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; |
306 | policy->EnhancedPrefetchScheduleAccelerationFinal = 0; |
307 | policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ |
308 | policy->UseOnlyMaxPrefetchModes = 1; |
309 | } |
310 | } |
311 | |