1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright 2023 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27#include "display_mode_core.h"
28#include "display_mode_util.h"
29#include "display_mode_lib_defines.h"
30
31#include "dml_assert.h"
32
33#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
34// ---------------------------
35// Declaration Begins
36// ---------------------------
37static void CalculateBytePerPixelAndBlockSizes(
38 enum dml_source_format_class SourcePixelFormat,
39 enum dml_swizzle_mode SurfaceTiling,
40 // Output
41 dml_uint_t *BytePerPixelY,
42 dml_uint_t *BytePerPixelC,
43 dml_float_t *BytePerPixelDETY,
44 dml_float_t *BytePerPixelDETC,
45 dml_uint_t *BlockHeight256BytesY,
46 dml_uint_t *BlockHeight256BytesC,
47 dml_uint_t *BlockWidth256BytesY,
48 dml_uint_t *BlockWidth256BytesC,
49 dml_uint_t *MacroTileHeightY,
50 dml_uint_t *MacroTileHeightC,
51 dml_uint_t *MacroTileWidthY,
52 dml_uint_t *MacroTileWidthC);
53
54static dml_float_t CalculateWriteBackDISPCLK(
55 enum dml_source_format_class WritebackPixelFormat,
56 dml_float_t PixelClock,
57 dml_float_t WritebackHRatio,
58 dml_float_t WritebackVRatio,
59 dml_uint_t WritebackHTaps,
60 dml_uint_t WritebackVTaps,
61 dml_uint_t WritebackSourceWidth,
62 dml_uint_t WritebackDestinationWidth,
63 dml_uint_t HTotal,
64 dml_uint_t WritebackLineBufferSize,
65 dml_float_t DISPCLKDPPCLKVCOSpeed);
66
67static void CalculateVMRowAndSwath(
68 struct display_mode_lib_scratch_st *s,
69 struct CalculateVMRowAndSwath_params_st *p);
70
71static void CalculateOutputLink(
72 dml_float_t PHYCLKPerState,
73 dml_float_t PHYCLKD18PerState,
74 dml_float_t PHYCLKD32PerState,
75 dml_float_t Downspreading,
76 dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
77 enum dml_output_encoder_class Output,
78 enum dml_output_format_class OutputFormat,
79 dml_uint_t HTotal,
80 dml_uint_t HActive,
81 dml_float_t PixelClockBackEnd,
82 dml_float_t ForcedOutputLinkBPP,
83 dml_uint_t DSCInputBitPerComponent,
84 dml_uint_t NumberOfDSCSlices,
85 dml_float_t AudioSampleRate,
86 dml_uint_t AudioSampleLayout,
87 enum dml_odm_mode ODMModeNoDSC,
88 enum dml_odm_mode ODMModeDSC,
89 enum dml_dsc_enable DSCEnable,
90 dml_uint_t OutputLinkDPLanes,
91 enum dml_output_link_dp_rate OutputLinkDPRate,
92
93 // Output
94 dml_bool_t *RequiresDSC,
95 dml_bool_t *RequiresFEC,
96 dml_float_t *OutBpp,
97 enum dml_output_type_and_rate__type *OutputType,
98 enum dml_output_type_and_rate__rate *OutputRate,
99 dml_uint_t *RequiredSlots);
100
101static void CalculateODMMode(
102 dml_uint_t MaximumPixelsPerLinePerDSCUnit,
103 dml_uint_t HActive,
104 enum dml_output_encoder_class Output,
105 enum dml_output_format_class OutputFormat,
106 enum dml_odm_use_policy ODMUse,
107 dml_float_t StateDispclk,
108 dml_float_t MaxDispclk,
109 dml_bool_t DSCEnable,
110 dml_uint_t TotalNumberOfActiveDPP,
111 dml_uint_t MaxNumDPP,
112 dml_float_t PixelClock,
113 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
114 dml_float_t DISPCLKRampingMargin,
115 dml_float_t DISPCLKDPPCLKVCOSpeed,
116
117 // Output
118 dml_bool_t *TotalAvailablePipesSupport,
119 dml_uint_t *NumberOfDPP,
120 enum dml_odm_mode *ODMMode,
121 dml_float_t *RequiredDISPCLKPerSurface);
122
123static dml_float_t CalculateRequiredDispclk(
124 enum dml_odm_mode ODMMode,
125 dml_float_t PixelClock,
126 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
127 dml_float_t DISPCLKRampingMargin,
128 dml_float_t DISPCLKDPPCLKVCOSpeed,
129 dml_float_t MaxDispclkSingle);
130
131static void CalculateSinglePipeDPPCLKAndSCLThroughput(
132 dml_float_t HRatio,
133 dml_float_t HRatioChroma,
134 dml_float_t VRatio,
135 dml_float_t VRatioChroma,
136 dml_float_t MaxDCHUBToPSCLThroughput,
137 dml_float_t MaxPSCLToLBThroughput,
138 dml_float_t PixelClock,
139 enum dml_source_format_class SourcePixelFormat,
140 dml_uint_t HTaps,
141 dml_uint_t HTapsChroma,
142 dml_uint_t VTaps,
143 dml_uint_t VTapsChroma,
144
145 // Output
146 dml_float_t *PSCL_THROUGHPUT,
147 dml_float_t *PSCL_THROUGHPUT_CHROMA,
148 dml_float_t *DPPCLKUsingSingleDPP);
149
150static void CalculateDPPCLK(
151 dml_uint_t NumberOfActiveSurfaces,
152 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
153 dml_float_t DISPCLKDPPCLKVCOSpeed,
154 dml_float_t DPPCLKUsingSingleDPP[],
155 dml_uint_t DPPPerSurface[],
156
157 // Output
158 dml_float_t *GlobalDPPCLK,
159 dml_float_t Dppclk[]);
160
161static void CalculateMALLUseForStaticScreen(
162 dml_uint_t NumberOfActiveSurfaces,
163 dml_uint_t MALLAllocatedForDCNFinal,
164 enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
165 dml_uint_t SurfaceSizeInMALL[],
166 dml_bool_t one_row_per_frame_fits_in_buffer[],
167
168 // Output
169 dml_bool_t UsesMALLForStaticScreen[]);
170
171static dml_uint_t dscceComputeDelay(
172 dml_uint_t bpc,
173 dml_float_t BPP,
174 dml_uint_t sliceWidth,
175 dml_uint_t numSlices,
176 enum dml_output_format_class pixelFormat,
177 enum dml_output_encoder_class Output);
178
179static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat,
180 enum dml_output_encoder_class Output);
181
182static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
183 struct CalculatePrefetchSchedule_params_st *p);
184
185static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed);
186
187static void CalculateDCCConfiguration(
188 dml_bool_t DCCEnabled,
189 dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
190 enum dml_source_format_class SourcePixelFormat,
191 dml_uint_t SurfaceWidthLuma,
192 dml_uint_t SurfaceWidthChroma,
193 dml_uint_t SurfaceHeightLuma,
194 dml_uint_t SurfaceHeightChroma,
195 dml_uint_t nomDETInKByte,
196 dml_uint_t RequestHeight256ByteLuma,
197 dml_uint_t RequestHeight256ByteChroma,
198 enum dml_swizzle_mode TilingFormat,
199 dml_uint_t BytePerPixelY,
200 dml_uint_t BytePerPixelC,
201 dml_float_t BytePerPixelDETY,
202 dml_float_t BytePerPixelDETC,
203 enum dml_rotation_angle SourceScan,
204 // Output
205 dml_uint_t *MaxUncompressedBlockLuma,
206 dml_uint_t *MaxUncompressedBlockChroma,
207 dml_uint_t *MaxCompressedBlockLuma,
208 dml_uint_t *MaxCompressedBlockChroma,
209 dml_uint_t *IndependentBlockLuma,
210 dml_uint_t *IndependentBlockChroma);
211
212static dml_uint_t CalculatePrefetchSourceLines(
213 dml_float_t VRatio,
214 dml_uint_t VTaps,
215 dml_bool_t Interlace,
216 dml_bool_t ProgressiveToInterlaceUnitInOPP,
217 dml_uint_t SwathHeight,
218 enum dml_rotation_angle SourceScan,
219 dml_bool_t ViewportStationary,
220 dml_uint_t SwathWidth,
221 dml_uint_t ViewportHeight,
222 dml_uint_t ViewportXStart,
223 dml_uint_t ViewportYStart,
224
225 // Output
226 dml_uint_t *VInitPreFill,
227 dml_uint_t *MaxNumSwath);
228
229static dml_uint_t CalculateVMAndRowBytes(
230 dml_bool_t ViewportStationary,
231 dml_bool_t DCCEnable,
232 dml_uint_t NumberOfDPPs,
233 dml_uint_t BlockHeight256Bytes,
234 dml_uint_t BlockWidth256Bytes,
235 enum dml_source_format_class SourcePixelFormat,
236 dml_uint_t SurfaceTiling,
237 dml_uint_t BytePerPixel,
238 enum dml_rotation_angle SourceScan,
239 dml_uint_t SwathWidth,
240 dml_uint_t ViewportHeight,
241 dml_uint_t ViewportXStart,
242 dml_uint_t ViewportYStart,
243 dml_bool_t GPUVMEnable,
244 dml_uint_t GPUVMMaxPageTableLevels,
245 dml_uint_t GPUVMMinPageSizeKBytes,
246 dml_uint_t PTEBufferSizeInRequests,
247 dml_uint_t Pitch,
248 dml_uint_t DCCMetaPitch,
249 dml_uint_t MacroTileWidth,
250 dml_uint_t MacroTileHeight,
251
252 // Output
253 dml_uint_t *MetaRowByte,
254 dml_uint_t *PixelPTEBytesPerRow,
255 dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
256 dml_uint_t *dpte_row_width_ub,
257 dml_uint_t *dpte_row_height,
258 dml_uint_t *dpte_row_height_linear,
259 dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
260 dml_uint_t *dpte_row_width_ub_one_row_per_frame,
261 dml_uint_t *dpte_row_height_one_row_per_frame,
262 dml_uint_t *MetaRequestWidth,
263 dml_uint_t *MetaRequestHeight,
264 dml_uint_t *meta_row_width,
265 dml_uint_t *meta_row_height,
266 dml_uint_t *PixelPTEReqWidth,
267 dml_uint_t *PixelPTEReqHeight,
268 dml_uint_t *PTERequestSize,
269 dml_uint_t *DPDE0BytesFrame,
270 dml_uint_t *MetaPTEBytesFrame);
271
272static dml_float_t CalculateTWait(
273 dml_uint_t PrefetchMode,
274 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
275 dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
276 dml_bool_t DRRDisplay,
277 dml_float_t DRAMClockChangeLatency,
278 dml_float_t FCLKChangeLatency,
279 dml_float_t UrgentLatency,
280 dml_float_t SREnterPlusExitTime);
281
282static void CalculatePrefetchMode(
283 enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
284 dml_uint_t *MinPrefetchMode,
285 dml_uint_t *MaxPrefetchMode);
286
287static void CalculateRowBandwidth(
288 dml_bool_t GPUVMEnable,
289 enum dml_source_format_class SourcePixelFormat,
290 dml_float_t VRatio,
291 dml_float_t VRatioChroma,
292 dml_bool_t DCCEnable,
293 dml_float_t LineTime,
294 dml_uint_t MetaRowByteLuma,
295 dml_uint_t MetaRowByteChroma,
296 dml_uint_t meta_row_height_luma,
297 dml_uint_t meta_row_height_chroma,
298 dml_uint_t PixelPTEBytesPerRowLuma,
299 dml_uint_t PixelPTEBytesPerRowChroma,
300 dml_uint_t dpte_row_height_luma,
301 dml_uint_t dpte_row_height_chroma,
302 // Output
303 dml_float_t *meta_row_bw,
304 dml_float_t *dpte_row_bw);
305
306static void CalculateFlipSchedule(
307 dml_float_t HostVMInefficiencyFactor,
308 dml_float_t UrgentExtraLatency,
309 dml_float_t UrgentLatency,
310 dml_uint_t GPUVMMaxPageTableLevels,
311 dml_bool_t HostVMEnable,
312 dml_uint_t HostVMMaxNonCachedPageTableLevels,
313 dml_bool_t GPUVMEnable,
314 dml_uint_t HostVMMinPageSize,
315 dml_float_t PDEAndMetaPTEBytesPerFrame,
316 dml_float_t MetaRowBytes,
317 dml_float_t DPTEBytesPerRow,
318 dml_float_t BandwidthAvailableForImmediateFlip,
319 dml_uint_t TotImmediateFlipBytes,
320 enum dml_source_format_class SourcePixelFormat,
321 dml_float_t LineTime,
322 dml_float_t VRatio,
323 dml_float_t VRatioChroma,
324 dml_float_t Tno_bw,
325 dml_bool_t DCCEnable,
326 dml_uint_t dpte_row_height,
327 dml_uint_t meta_row_height,
328 dml_uint_t dpte_row_height_chroma,
329 dml_uint_t meta_row_height_chroma,
330 dml_bool_t use_one_row_for_frame_flip,
331
332 // Output
333 dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
334 dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
335 dml_float_t *final_flip_bw,
336 dml_bool_t *ImmediateFlipSupportedForPipe);
337
338static dml_float_t CalculateWriteBackDelay(
339 enum dml_source_format_class WritebackPixelFormat,
340 dml_float_t WritebackHRatio,
341 dml_float_t WritebackVRatio,
342 dml_uint_t WritebackVTaps,
343 dml_uint_t WritebackDestinationWidth,
344 dml_uint_t WritebackDestinationHeight,
345 dml_uint_t WritebackSourceHeight,
346 dml_uint_t HTotal);
347
348static void CalculateVUpdateAndDynamicMetadataParameters(
349 dml_uint_t MaxInterDCNTileRepeaters,
350 dml_float_t Dppclk,
351 dml_float_t DISPCLK,
352 dml_float_t DCFClkDeepSleep,
353 dml_float_t PixelClock,
354 dml_uint_t HTotal,
355 dml_uint_t VBlank,
356 dml_uint_t DynamicMetadataTransmittedBytes,
357 dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
358 dml_uint_t InterlaceEnable,
359 dml_bool_t ProgressiveToInterlaceUnitInOPP,
360 dml_float_t *TSetup,
361 dml_float_t *Tdmbf,
362 dml_float_t *Tdmec,
363 dml_float_t *Tdmsks,
364 dml_uint_t *VUpdateOffsetPix,
365 dml_uint_t *VUpdateWidthPix,
366 dml_uint_t *VReadyOffsetPix);
367
368static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported);
369
370static dml_float_t TruncToValidBPP(
371 dml_float_t LinkBitRate,
372 dml_uint_t Lanes,
373 dml_uint_t HTotal,
374 dml_uint_t HActive,
375 dml_float_t PixelClock,
376 dml_float_t DesiredBPP,
377 dml_bool_t DSCEnable,
378 enum dml_output_encoder_class Output,
379 enum dml_output_format_class Format,
380 dml_uint_t DSCInputBitPerComponent,
381 dml_uint_t DSCSlices,
382 dml_uint_t AudioRate,
383 dml_uint_t AudioLayout,
384 enum dml_odm_mode ODMModeNoDSC,
385 enum dml_odm_mode ODMModeDSC,
386 // Output
387 dml_uint_t *RequiredSlotsSingle);
388
389static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
390 struct display_mode_lib_scratch_st *s,
391 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p);
392
393static void CalculateDCFCLKDeepSleep(
394 dml_uint_t NumberOfActiveSurfaces,
395 dml_uint_t BytePerPixelY[],
396 dml_uint_t BytePerPixelC[],
397 dml_float_t VRatio[],
398 dml_float_t VRatioChroma[],
399 dml_uint_t SwathWidthY[],
400 dml_uint_t SwathWidthC[],
401 dml_uint_t DPPPerSurface[],
402 dml_float_t HRatio[],
403 dml_float_t HRatioChroma[],
404 dml_float_t PixelClock[],
405 dml_float_t PSCL_THROUGHPUT[],
406 dml_float_t PSCL_THROUGHPUT_CHROMA[],
407 dml_float_t Dppclk[],
408 dml_float_t ReadBandwidthLuma[],
409 dml_float_t ReadBandwidthChroma[],
410 dml_uint_t ReturnBusWidth,
411
412 // Output
413 dml_float_t *DCFCLKDeepSleep);
414
415static void CalculateUrgentBurstFactor(
416 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
417 dml_uint_t swath_width_luma_ub,
418 dml_uint_t swath_width_chroma_ub,
419 dml_uint_t SwathHeightY,
420 dml_uint_t SwathHeightC,
421 dml_float_t LineTime,
422 dml_float_t UrgentLatency,
423 dml_float_t CursorBufferSize,
424 dml_uint_t CursorWidth,
425 dml_uint_t CursorBPP,
426 dml_float_t VRatio,
427 dml_float_t VRatioC,
428 dml_float_t BytePerPixelInDETY,
429 dml_float_t BytePerPixelInDETC,
430 dml_uint_t DETBufferSizeY,
431 dml_uint_t DETBufferSizeC,
432 // Output
433 dml_float_t *UrgentBurstFactorCursor,
434 dml_float_t *UrgentBurstFactorLuma,
435 dml_float_t *UrgentBurstFactorChroma,
436 dml_bool_t *NotEnoughUrgentLatencyHiding);
437
438static dml_float_t RequiredDTBCLK(
439 dml_bool_t DSCEnable,
440 dml_float_t PixelClock,
441 enum dml_output_format_class OutputFormat,
442 dml_float_t OutputBpp,
443 dml_uint_t DSCSlices,
444 dml_uint_t HTotal,
445 dml_uint_t HActive,
446 dml_uint_t AudioRate,
447 dml_uint_t AudioLayoutSingle);
448
449static void UseMinimumDCFCLK(
450 struct display_mode_lib_scratch_st *scratch,
451 struct UseMinimumDCFCLK_params_st *p);
452
453static void CalculatePixelDeliveryTimes(
454 dml_uint_t NumberOfActiveSurfaces,
455 dml_float_t VRatio[],
456 dml_float_t VRatioChroma[],
457 dml_float_t VRatioPrefetchY[],
458 dml_float_t VRatioPrefetchC[],
459 dml_uint_t swath_width_luma_ub[],
460 dml_uint_t swath_width_chroma_ub[],
461 dml_uint_t DPPPerSurface[],
462 dml_float_t HRatio[],
463 dml_float_t HRatioChroma[],
464 dml_float_t PixelClock[],
465 dml_float_t PSCL_THROUGHPUT[],
466 dml_float_t PSCL_THROUGHPUT_CHROMA[],
467 dml_float_t Dppclk[],
468 dml_uint_t BytePerPixelC[],
469 enum dml_rotation_angle SourceScan[],
470 dml_uint_t NumberOfCursors[],
471 dml_uint_t CursorWidth[],
472 dml_uint_t CursorBPP[],
473 dml_uint_t BlockWidth256BytesY[],
474 dml_uint_t BlockHeight256BytesY[],
475 dml_uint_t BlockWidth256BytesC[],
476 dml_uint_t BlockHeight256BytesC[],
477
478 // Output
479 dml_float_t DisplayPipeLineDeliveryTimeLuma[],
480 dml_float_t DisplayPipeLineDeliveryTimeChroma[],
481 dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
482 dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
483 dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
484 dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
485 dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
486 dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
487 dml_float_t CursorRequestDeliveryTime[],
488 dml_float_t CursorRequestDeliveryTimePrefetch[]);
489
490static void CalculateMetaAndPTETimes(
491 dml_bool_t use_one_row_for_frame[],
492 dml_uint_t NumberOfActiveSurfaces,
493 dml_bool_t GPUVMEnable,
494 dml_uint_t MetaChunkSize,
495 dml_uint_t MinMetaChunkSizeBytes,
496 dml_uint_t HTotal[],
497 dml_float_t VRatio[],
498 dml_float_t VRatioChroma[],
499 dml_float_t DestinationLinesToRequestRowInVBlank[],
500 dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
501 dml_bool_t DCCEnable[],
502 dml_float_t PixelClock[],
503 dml_uint_t BytePerPixelY[],
504 dml_uint_t BytePerPixelC[],
505 enum dml_rotation_angle SourceScan[],
506 dml_uint_t dpte_row_height[],
507 dml_uint_t dpte_row_height_chroma[],
508 dml_uint_t meta_row_width[],
509 dml_uint_t meta_row_width_chroma[],
510 dml_uint_t meta_row_height[],
511 dml_uint_t meta_row_height_chroma[],
512 dml_uint_t meta_req_width[],
513 dml_uint_t meta_req_width_chroma[],
514 dml_uint_t meta_req_height[],
515 dml_uint_t meta_req_height_chroma[],
516 dml_uint_t dpte_group_bytes[],
517 dml_uint_t PTERequestSizeY[],
518 dml_uint_t PTERequestSizeC[],
519 dml_uint_t PixelPTEReqWidthY[],
520 dml_uint_t PixelPTEReqHeightY[],
521 dml_uint_t PixelPTEReqWidthC[],
522 dml_uint_t PixelPTEReqHeightC[],
523 dml_uint_t dpte_row_width_luma_ub[],
524 dml_uint_t dpte_row_width_chroma_ub[],
525
526 // Output
527 dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
528 dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
529 dml_float_t DST_Y_PER_META_ROW_NOM_L[],
530 dml_float_t DST_Y_PER_META_ROW_NOM_C[],
531 dml_float_t TimePerMetaChunkNominal[],
532 dml_float_t TimePerChromaMetaChunkNominal[],
533 dml_float_t TimePerMetaChunkVBlank[],
534 dml_float_t TimePerChromaMetaChunkVBlank[],
535 dml_float_t TimePerMetaChunkFlip[],
536 dml_float_t TimePerChromaMetaChunkFlip[],
537 dml_float_t time_per_pte_group_nom_luma[],
538 dml_float_t time_per_pte_group_vblank_luma[],
539 dml_float_t time_per_pte_group_flip_luma[],
540 dml_float_t time_per_pte_group_nom_chroma[],
541 dml_float_t time_per_pte_group_vblank_chroma[],
542 dml_float_t time_per_pte_group_flip_chroma[]);
543
544static void CalculateVMGroupAndRequestTimes(
545 dml_uint_t NumberOfActiveSurfaces,
546 dml_bool_t GPUVMEnable,
547 dml_uint_t GPUVMMaxPageTableLevels,
548 dml_uint_t HTotal[],
549 dml_uint_t BytePerPixelC[],
550 dml_float_t DestinationLinesToRequestVMInVBlank[],
551 dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
552 dml_bool_t DCCEnable[],
553 dml_float_t PixelClock[],
554 dml_uint_t dpte_row_width_luma_ub[],
555 dml_uint_t dpte_row_width_chroma_ub[],
556 dml_uint_t vm_group_bytes[],
557 dml_uint_t dpde0_bytes_per_frame_ub_l[],
558 dml_uint_t dpde0_bytes_per_frame_ub_c[],
559 dml_uint_t meta_pte_bytes_per_frame_ub_l[],
560 dml_uint_t meta_pte_bytes_per_frame_ub_c[],
561
562 // Output
563 dml_float_t TimePerVMGroupVBlank[],
564 dml_float_t TimePerVMGroupFlip[],
565 dml_float_t TimePerVMRequestVBlank[],
566 dml_float_t TimePerVMRequestFlip[]);
567
568static void CalculateStutterEfficiency(
569 struct display_mode_lib_scratch_st *scratch,
570 struct CalculateStutterEfficiency_params_st *p);
571
572static void CalculateSwathAndDETConfiguration(
573 struct display_mode_lib_scratch_st *scratch,
574 struct CalculateSwathAndDETConfiguration_params_st *p);
575
576static void CalculateSwathWidth(
577 dml_bool_t ForceSingleDPP,
578 dml_uint_t NumberOfActiveSurfaces,
579 enum dml_source_format_class SourcePixelFormat[],
580 enum dml_rotation_angle SourceScan[],
581 dml_bool_t ViewportStationary[],
582 dml_uint_t ViewportWidth[],
583 dml_uint_t ViewportHeight[],
584 dml_uint_t ViewportXStart[],
585 dml_uint_t ViewportYStart[],
586 dml_uint_t ViewportXStartC[],
587 dml_uint_t ViewportYStartC[],
588 dml_uint_t SurfaceWidthY[],
589 dml_uint_t SurfaceWidthC[],
590 dml_uint_t SurfaceHeightY[],
591 dml_uint_t SurfaceHeightC[],
592 enum dml_odm_mode ODMMode[],
593 dml_uint_t BytePerPixY[],
594 dml_uint_t BytePerPixC[],
595 dml_uint_t Read256BytesBlockHeightY[],
596 dml_uint_t Read256BytesBlockHeightC[],
597 dml_uint_t Read256BytesBlockWidthY[],
598 dml_uint_t Read256BytesBlockWidthC[],
599 dml_uint_t BlendingAndTiming[],
600 dml_uint_t HActive[],
601 dml_float_t HRatio[],
602 dml_uint_t DPPPerSurface[],
603
604 // Output
605 dml_uint_t SwathWidthSingleDPPY[],
606 dml_uint_t SwathWidthSingleDPPC[],
607 dml_uint_t SwathWidthY[],
608 dml_uint_t SwathWidthC[],
609 dml_uint_t MaximumSwathHeightY[],
610 dml_uint_t MaximumSwathHeightC[],
611 dml_uint_t swath_width_luma_ub[],
612 dml_uint_t swath_width_chroma_ub[]);
613
614static dml_float_t CalculateExtraLatency(
615 dml_uint_t RoundTripPingLatencyCycles,
616 dml_uint_t ReorderingBytes,
617 dml_float_t DCFCLK,
618 dml_uint_t TotalNumberOfActiveDPP,
619 dml_uint_t PixelChunkSizeInKByte,
620 dml_uint_t TotalNumberOfDCCActiveDPP,
621 dml_uint_t MetaChunkSize,
622 dml_float_t ReturnBW,
623 dml_bool_t GPUVMEnable,
624 dml_bool_t HostVMEnable,
625 dml_uint_t NumberOfActiveSurfaces,
626 dml_uint_t NumberOfDPP[],
627 dml_uint_t dpte_group_bytes[],
628 dml_float_t HostVMInefficiencyFactor,
629 dml_uint_t HostVMMinPageSize,
630 dml_uint_t HostVMMaxNonCachedPageTableLevels);
631
632static dml_uint_t CalculateExtraLatencyBytes(
633 dml_uint_t ReorderingBytes,
634 dml_uint_t TotalNumberOfActiveDPP,
635 dml_uint_t PixelChunkSizeInKByte,
636 dml_uint_t TotalNumberOfDCCActiveDPP,
637 dml_uint_t MetaChunkSize,
638 dml_bool_t GPUVMEnable,
639 dml_bool_t HostVMEnable,
640 dml_uint_t NumberOfActiveSurfaces,
641 dml_uint_t NumberOfDPP[],
642 dml_uint_t dpte_group_bytes[],
643 dml_float_t HostVMInefficiencyFactor,
644 dml_uint_t HostVMMinPageSize,
645 dml_uint_t HostVMMaxNonCachedPageTableLevels);
646
647static dml_float_t CalculateUrgentLatency(
648 dml_float_t UrgentLatencyPixelDataOnly,
649 dml_float_t UrgentLatencyPixelMixedWithVMData,
650 dml_float_t UrgentLatencyVMDataOnly,
651 dml_bool_t DoUrgentLatencyAdjustment,
652 dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
653 dml_float_t UrgentLatencyAdjustmentFabricClockReference,
654 dml_float_t FabricClockSingle);
655
656static dml_bool_t UnboundedRequest(
657 enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
658 dml_uint_t TotalNumberOfActiveDPP,
659 dml_bool_t NoChromaOrLinear,
660 enum dml_output_encoder_class Output);
661
662static void CalculateSurfaceSizeInMall(
663 dml_uint_t NumberOfActiveSurfaces,
664 dml_uint_t MALLAllocatedForDCN,
665 enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
666 dml_bool_t DCCEnable[],
667 dml_bool_t ViewportStationary[],
668 dml_uint_t ViewportXStartY[],
669 dml_uint_t ViewportYStartY[],
670 dml_uint_t ViewportXStartC[],
671 dml_uint_t ViewportYStartC[],
672 dml_uint_t ViewportWidthY[],
673 dml_uint_t ViewportHeightY[],
674 dml_uint_t BytesPerPixelY[],
675 dml_uint_t ViewportWidthC[],
676 dml_uint_t ViewportHeightC[],
677 dml_uint_t BytesPerPixelC[],
678 dml_uint_t SurfaceWidthY[],
679 dml_uint_t SurfaceWidthC[],
680 dml_uint_t SurfaceHeightY[],
681 dml_uint_t SurfaceHeightC[],
682 dml_uint_t Read256BytesBlockWidthY[],
683 dml_uint_t Read256BytesBlockWidthC[],
684 dml_uint_t Read256BytesBlockHeightY[],
685 dml_uint_t Read256BytesBlockHeightC[],
686 dml_uint_t ReadBlockWidthY[],
687 dml_uint_t ReadBlockWidthC[],
688 dml_uint_t ReadBlockHeightY[],
689 dml_uint_t ReadBlockHeightC[],
690
691 // Output
692 dml_uint_t SurfaceSizeInMALL[],
693 dml_bool_t *ExceededMALLSize);
694
695static void CalculateDETBufferSize(
696 dml_uint_t DETSizeOverride[],
697 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
698 dml_bool_t ForceSingleDPP,
699 dml_uint_t NumberOfActiveSurfaces,
700 dml_bool_t UnboundedRequestEnabled,
701 dml_uint_t nomDETInKByte,
702 dml_uint_t MaxTotalDETInKByte,
703 dml_uint_t ConfigReturnBufferSizeInKByte,
704 dml_uint_t MinCompressedBufferSizeInKByte,
705 dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
706 dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
707 enum dml_source_format_class SourcePixelFormat[],
708 dml_float_t ReadBandwidthLuma[],
709 dml_float_t ReadBandwidthChroma[],
710 dml_uint_t RotesY[],
711 dml_uint_t RoundedUpMaxSwathSizeBytesC[],
712 dml_uint_t DPPPerSurface[],
713 // Output
714 dml_uint_t DETBufferSizeInKByte[],
715 dml_uint_t *CompressedBufferSizeInkByte);
716
717static void CalculateMaxDETAndMinCompressedBufferSize(
718 dml_uint_t ConfigReturnBufferSizeInKByte,
719 dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
720 dml_uint_t ROBBufferSizeInKByte,
721 dml_uint_t MaxNumDPP,
722 dml_bool_t nomDETInKByteOverrideEnable,
723 dml_uint_t nomDETInKByteOverrideValue,
724
725 // Output
726 dml_uint_t *MaxTotalDETInKByte,
727 dml_uint_t *nomDETInKByte,
728 dml_uint_t *MinCompressedBufferSizeInKByte);
729
730static dml_uint_t DSCDelayRequirement(
731 dml_bool_t DSCEnabled,
732 enum dml_odm_mode ODMMode,
733 dml_uint_t DSCInputBitPerComponent,
734 dml_float_t OutputBpp,
735 dml_uint_t HActive,
736 dml_uint_t HTotal,
737 dml_uint_t NumberOfDSCSlices,
738 enum dml_output_format_class OutputFormat,
739 enum dml_output_encoder_class Output,
740 dml_float_t PixelClock,
741 dml_float_t PixelClockBackEnd);
742
743static dml_bool_t CalculateVActiveBandwithSupport(
744 dml_uint_t NumberOfActiveSurfaces,
745 dml_float_t ReturnBW,
746 dml_bool_t NotUrgentLatencyHiding[],
747 dml_float_t ReadBandwidthLuma[],
748 dml_float_t ReadBandwidthChroma[],
749 dml_float_t cursor_bw[],
750 dml_float_t meta_row_bandwidth[],
751 dml_float_t dpte_row_bandwidth[],
752 dml_uint_t NumberOfDPP[],
753 dml_float_t UrgentBurstFactorLuma[],
754 dml_float_t UrgentBurstFactorChroma[],
755 dml_float_t UrgentBurstFactorCursor[]);
756
757static void CalculatePrefetchBandwithSupport(
758 dml_uint_t NumberOfActiveSurfaces,
759 dml_float_t ReturnBW,
760 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
761 dml_bool_t NotUrgentLatencyHiding[],
762 dml_float_t ReadBandwidthLuma[],
763 dml_float_t ReadBandwidthChroma[],
764 dml_float_t PrefetchBandwidthLuma[],
765 dml_float_t PrefetchBandwidthChroma[],
766 dml_float_t cursor_bw[],
767 dml_float_t meta_row_bandwidth[],
768 dml_float_t dpte_row_bandwidth[],
769 dml_float_t cursor_bw_pre[],
770 dml_float_t prefetch_vmrow_bw[],
771 dml_uint_t NumberOfDPP[],
772 dml_float_t UrgentBurstFactorLuma[],
773 dml_float_t UrgentBurstFactorChroma[],
774 dml_float_t UrgentBurstFactorCursor[],
775 dml_float_t UrgentBurstFactorLumaPre[],
776 dml_float_t UrgentBurstFactorChromaPre[],
777 dml_float_t UrgentBurstFactorCursorPre[],
778
779 // Output
780 dml_float_t *PrefetchBandwidth,
781 dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
782 dml_float_t *FractionOfUrgentBandwidth,
783 dml_bool_t *PrefetchBandwidthSupport);
784
785static dml_float_t CalculateBandwidthAvailableForImmediateFlip(
786 dml_uint_t NumberOfActiveSurfaces,
787 dml_float_t ReturnBW,
788 dml_float_t ReadBandwidthLuma[],
789 dml_float_t ReadBandwidthChroma[],
790 dml_float_t PrefetchBandwidthLuma[],
791 dml_float_t PrefetchBandwidthChroma[],
792 dml_float_t cursor_bw[],
793 dml_float_t cursor_bw_pre[],
794 dml_uint_t NumberOfDPP[],
795 dml_float_t UrgentBurstFactorLuma[],
796 dml_float_t UrgentBurstFactorChroma[],
797 dml_float_t UrgentBurstFactorCursor[],
798 dml_float_t UrgentBurstFactorLumaPre[],
799 dml_float_t UrgentBurstFactorChromaPre[],
800 dml_float_t UrgentBurstFactorCursorPre[]);
801
802static void CalculateImmediateFlipBandwithSupport(
803 dml_uint_t NumberOfActiveSurfaces,
804 dml_float_t ReturnBW,
805 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
806 enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
807 dml_float_t final_flip_bw[],
808 dml_float_t ReadBandwidthLuma[],
809 dml_float_t ReadBandwidthChroma[],
810 dml_float_t PrefetchBandwidthLuma[],
811 dml_float_t PrefetchBandwidthChroma[],
812 dml_float_t cursor_bw[],
813 dml_float_t meta_row_bandwidth[],
814 dml_float_t dpte_row_bandwidth[],
815 dml_float_t cursor_bw_pre[],
816 dml_float_t prefetch_vmrow_bw[],
817 dml_uint_t NumberOfDPP[],
818 dml_float_t UrgentBurstFactorLuma[],
819 dml_float_t UrgentBurstFactorChroma[],
820 dml_float_t UrgentBurstFactorCursor[],
821 dml_float_t UrgentBurstFactorLumaPre[],
822 dml_float_t UrgentBurstFactorChromaPre[],
823 dml_float_t UrgentBurstFactorCursorPre[],
824
825 // Output
826 dml_float_t *TotalBandwidth,
827 dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
828 dml_float_t *FractionOfUrgentBandwidth,
829 dml_bool_t *ImmediateFlipBandwidthSupport);
830
831// ---------------------------
832// Declaration Ends
833// ---------------------------
834
835static dml_uint_t dscceComputeDelay(
836 dml_uint_t bpc,
837 dml_float_t BPP,
838 dml_uint_t sliceWidth,
839 dml_uint_t numSlices,
840 enum dml_output_format_class pixelFormat,
841 enum dml_output_encoder_class Output)
842{
843 // valid bpc = source bits per component in the set of {8, 10, 12}
844 // valid bpp = increments of 1/16 of a bit
845 // min = 6/7/8 in N420/N422/444, respectively
846 // max = such that compression is 1:1
847 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
848 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
849 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
850
851 // fixed value
852 dml_uint_t rcModelSize = 8192;
853
854 // N422/N420 operate at 2 pixels per clock
855 dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
856 Delay, pixels;
857
858 if (pixelFormat == dml_420)
859 pixelsPerClock = 2;
860 // #all other modes operate at 1 pixel per clock
861 else if (pixelFormat == dml_444)
862 pixelsPerClock = 1;
863 else if (pixelFormat == dml_n422)
864 pixelsPerClock = 2;
865 else
866 pixelsPerClock = 1;
867
868 //initial transmit delay as per PPS
869 initalXmitDelay = (dml_uint_t)(dml_round(val: rcModelSize / 2.0 / BPP / pixelsPerClock, bankers_rounding: 1));
870
871 //compute ssm delay
872 if (bpc == 8)
873 D = 81;
874 else if (bpc == 10)
875 D = 89;
876 else
877 D = 113;
878
879 //divide by pixel per cycle to compute slice width as seen by DSC
880 w = sliceWidth / pixelsPerClock;
881
882 //422 mode has an additional cycle of delay
883 if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422)
884 s = 0;
885 else
886 s = 1;
887
888 //main calculation for the dscce
889 ix = initalXmitDelay + 45;
890 wx = (w + 2) / 3;
891 p = 3 * wx - w;
892 l0 = ix / w;
893 a = ix + p * l0;
894 ax = (a + 2) / 3 + D + 6 + 1;
895 L = (ax + wx - 1) / wx;
896 if ((ix % w) == 0 && p != 0)
897 lstall = 1;
898 else
899 lstall = 0;
900 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
901
902 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
903 pixels = Delay * 3 * pixelsPerClock;
904
905#ifdef __DML_VBA_DEBUG__
906 dml_print("DML::%s: bpc: %u\n", __func__, bpc);
907 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
908 dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
909 dml_print("DML::%s: numSlices: %u\n", __func__, numSlices);
910 dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
911 dml_print("DML::%s: Output: %u\n", __func__, Output);
912 dml_print("DML::%s: pixels: %u\n", __func__, pixels);
913#endif
914 return pixels;
915}
916
917static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output)
918{
919 dml_uint_t Delay = 0;
920
921 if (pixelFormat == dml_420) {
922 // sfr
923 Delay = Delay + 2;
924 // dsccif
925 Delay = Delay + 0;
926 // dscc - input deserializer
927 Delay = Delay + 3;
928 // dscc gets pixels every other cycle
929 Delay = Delay + 2;
930 // dscc - input cdc fifo
931 Delay = Delay + 12;
932 // dscc gets pixels every other cycle
933 Delay = Delay + 13;
934 // dscc - cdc uncertainty
935 Delay = Delay + 2;
936 // dscc - output cdc fifo
937 Delay = Delay + 7;
938 // dscc gets pixels every other cycle
939 Delay = Delay + 3;
940 // dscc - cdc uncertainty
941 Delay = Delay + 2;
942 // dscc - output serializer
943 Delay = Delay + 1;
944 // sft
945 Delay = Delay + 1;
946 } else if (pixelFormat == dml_n422) {
947 // sfr
948 Delay = Delay + 2;
949 // dsccif
950 Delay = Delay + 1;
951 // dscc - input deserializer
952 Delay = Delay + 5;
953 // dscc - input cdc fifo
954 Delay = Delay + 25;
955 // dscc - cdc uncertainty
956 Delay = Delay + 2;
957 // dscc - output cdc fifo
958 Delay = Delay + 10;
959 // dscc - cdc uncertainty
960 Delay = Delay + 2;
961 // dscc - output serializer
962 Delay = Delay + 1;
963 // sft
964 Delay = Delay + 1;
965 } else {
966 // sfr
967 Delay = Delay + 2;
968 // dsccif
969 Delay = Delay + 0;
970 // dscc - input deserializer
971 Delay = Delay + 3;
972 // dscc - input cdc fifo
973 Delay = Delay + 12;
974 // dscc - cdc uncertainty
975 Delay = Delay + 2;
976 // dscc - output cdc fifo
977 Delay = Delay + 7;
978 // dscc - output serializer
979 Delay = Delay + 1;
980 // dscc - cdc uncertainty
981 Delay = Delay + 2;
982 // sft
983 Delay = Delay + 1;
984 }
985#ifdef __DML_VBA_DEBUG__
986 dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
987 dml_print("DML::%s: Delay = %u\n", __func__, Delay);
988#endif
989
990 return Delay;
991}
992
993static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
994 struct CalculatePrefetchSchedule_params_st *p)
995{
996 struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals;
997
998 s->MyError = false;
999 s->DPPCycles = 0;
1000 s->DISPCLKCycles = 0;
1001 s->DSTTotalPixelsAfterScaler = 0.0;
1002 s->LineTime = 0.0;
1003 s->dst_y_prefetch_equ = 0.0;
1004 s->prefetch_bw_oto = 0.0;
1005 s->Tvm_oto = 0.0;
1006 s->Tr0_oto = 0.0;
1007 s->Tvm_oto_lines = 0.0;
1008 s->Tr0_oto_lines = 0.0;
1009 s->dst_y_prefetch_oto = 0.0;
1010 s->TimeForFetchingMetaPTE = 0.0;
1011 s->TimeForFetchingRowInVBlank = 0.0;
1012 s->LinesToRequestPrefetchPixelData = 0.0;
1013 s->HostVMDynamicLevelsTrips = 0;
1014 s->trip_to_mem = 0.0;
1015 s->Tvm_trips = 0.0;
1016 s->Tr0_trips = 0.0;
1017 s->Tvm_trips_rounded = 0.0;
1018 s->Tr0_trips_rounded = 0.0;
1019 s->max_Tsw = 0.0;
1020 s->Lsw_oto = 0.0;
1021 s->Tpre_rounded = 0.0;
1022 s->prefetch_bw_equ = 0.0;
1023 s->Tvm_equ = 0.0;
1024 s->Tr0_equ = 0.0;
1025 s->Tdmbf = 0.0;
1026 s->Tdmec = 0.0;
1027 s->Tdmsks = 0.0;
1028 s->prefetch_sw_bytes = 0.0;
1029 s->prefetch_bw_pr = 0.0;
1030 s->bytes_pp = 0.0;
1031 s->dep_bytes = 0.0;
1032 s->min_Lsw_oto = 0.0;
1033 s->Tsw_est1 = 0.0;
1034 s->Tsw_est3 = 0.0;
1035
1036 if (p->GPUVMEnable == true && p->HostVMEnable == true) {
1037 s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels;
1038 } else {
1039 s->HostVMDynamicLevelsTrips = 0;
1040 }
1041#ifdef __DML_VBA_DEBUG__
1042 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1043 dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels);
1044 dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
1045 dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup);
1046 dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup);
1047 dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable);
1048 dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor);
1049 dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
1050#endif
1051 CalculateVUpdateAndDynamicMetadataParameters(
1052 MaxInterDCNTileRepeaters: p->MaxInterDCNTileRepeaters,
1053 Dppclk: p->myPipe->Dppclk,
1054 DISPCLK: p->myPipe->Dispclk,
1055 DCFClkDeepSleep: p->myPipe->DCFClkDeepSleep,
1056 PixelClock: p->myPipe->PixelClock,
1057 HTotal: p->myPipe->HTotal,
1058 VBlank: p->myPipe->VBlank,
1059 DynamicMetadataTransmittedBytes: p->DynamicMetadataTransmittedBytes,
1060 DynamicMetadataLinesBeforeActiveRequired: p->DynamicMetadataLinesBeforeActiveRequired,
1061 InterlaceEnable: p->myPipe->InterlaceEnable,
1062 ProgressiveToInterlaceUnitInOPP: p->myPipe->ProgressiveToInterlaceUnitInOPP,
1063 TSetup: p->TSetup,
1064
1065 // Output
1066 Tdmbf: &s->Tdmbf,
1067 Tdmec: &s->Tdmec,
1068 Tdmsks: &s->Tdmsks,
1069 VUpdateOffsetPix: p->VUpdateOffsetPix,
1070 VUpdateWidthPix: p->VUpdateWidthPix,
1071 VReadyOffsetPix: p->VReadyOffsetPix);
1072
1073 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
1074 s->trip_to_mem = p->UrgentLatency;
1075 s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1);
1076
1077 if (p->DynamicMetadataVMEnabled == true) {
1078 *p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem;
1079 } else {
1080 *p->Tdmdl = p->TWait + p->UrgentExtraLatency;
1081 }
1082
1083#ifdef __DML_VBA_ALLOW_DELTA__
1084 if (DynamicMetadataEnable == false) {
1085 *Tdmdl = 0.0;
1086 }
1087#endif
1088
1089 if (p->DynamicMetadataEnable == true) {
1090 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
1091 *p->NotEnoughTimeForDynamicMetadata = true;
1092 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1093 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
1094 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
1095 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
1096 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
1097 } else {
1098 *p->NotEnoughTimeForDynamicMetadata = false;
1099 }
1100 } else {
1101 *p->NotEnoughTimeForDynamicMetadata = false;
1102 }
1103
1104 *p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0);
1105
1106 if (p->myPipe->ScalerEnabled)
1107 s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
1108 else
1109 s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
1110
1111 s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
1112
1113 s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal;
1114
1115 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
1116 return true;
1117
1118 *p->DSTXAfterScaler = (dml_uint_t) dml_round(val: s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, bankers_rounding: 1.0);
1119 *p->DSTXAfterScaler = (dml_uint_t) dml_round(val: *p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
1120 ((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) +
1121 ((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), bankers_rounding: 1.0);
1122
1123#ifdef __DML_VBA_DEBUG__
1124 dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
1125 dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
1126 dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
1127 dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
1128 dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
1129 dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
1130 dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
1131 dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
1132 dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
1133#endif
1134
1135 if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
1136 *p->DSTYAfterScaler = 1;
1137 else
1138 *p->DSTYAfterScaler = 0;
1139
1140 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
1141 *p->DSTYAfterScaler = (dml_uint_t)(dml_floor(x: s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, granularity: 1));
1142 *p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal)));
1143#ifdef __DML_VBA_DEBUG__
1144 dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
1145 dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
1146#endif
1147
1148 s->MyError = false;
1149
1150 s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
1151
1152 if (p->GPUVMEnable == true) {
1153 s->Tvm_trips_rounded = dml_ceil(x: 4.0 * s->Tvm_trips / s->LineTime, granularity: 1.0) / 4.0 * s->LineTime;
1154 s->Tr0_trips_rounded = dml_ceil(x: 4.0 * s->Tr0_trips / s->LineTime, granularity: 1.0) / 4.0 * s->LineTime;
1155 if (p->GPUVMPageTableLevels >= 3) {
1156 *p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1);
1157 } else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) {
1158 s->Tr0_trips_rounded = dml_ceil(x: 4.0 * p->UrgentExtraLatency / s->LineTime, granularity: 1.0) / 4.0 * s->LineTime;
1159 *p->Tno_bw = p->UrgentExtraLatency;
1160 } else {
1161 *p->Tno_bw = 0;
1162 }
1163 } else if (p->myPipe->DCCEnable == true) {
1164 s->Tvm_trips_rounded = s->LineTime / 4.0;
1165 s->Tr0_trips_rounded = dml_ceil(x: 4.0 * s->Tr0_trips / s->LineTime, granularity: 1.0) / 4.0 * s->LineTime;
1166 *p->Tno_bw = 0;
1167 } else {
1168 s->Tvm_trips_rounded = s->LineTime / 4.0;
1169 s->Tr0_trips_rounded = s->LineTime / 2.0;
1170 *p->Tno_bw = 0;
1171 }
1172 s->Tvm_trips_rounded = dml_max(x: s->Tvm_trips_rounded, y: s->LineTime / 4.0);
1173 s->Tr0_trips_rounded = dml_max(x: s->Tr0_trips_rounded, y: s->LineTime / 4.0);
1174
1175 if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) {
1176 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4;
1177 } else {
1178 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
1179 }
1180
1181 s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface;
1182 if (p->myPipe->VRatio < 1.0)
1183 s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;
1184
1185 s->max_Tsw = (dml_max(x: p->PrefetchSourceLinesY, y: p->PrefetchSourceLinesC) * s->LineTime);
1186
1187 s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
1188 s->prefetch_bw_oto = dml_max(x: s->prefetch_bw_pr, y: s->prefetch_sw_bytes / s->max_Tsw);
1189
1190 s->min_Lsw_oto = dml_max(x: p->PrefetchSourceLinesY, y: p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__;
1191 s->min_Lsw_oto = dml_max(x: s->min_Lsw_oto, y: 1.0);
1192 s->Lsw_oto = dml_ceil(x: 4.0 * dml_max(x: s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, y: s->min_Lsw_oto), granularity: 1.0) / 4.0;
1193
1194 if (p->GPUVMEnable == true) {
1195 s->Tvm_oto = dml_max3(
1196 x: s->Tvm_trips,
1197 y: *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
1198 z: s->LineTime / 4.0);
1199 } else
1200 s->Tvm_oto = s->LineTime / 4.0;
1201
1202 if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
1203 s->Tr0_oto = dml_max4(
1204 a: s->Tr0_trips,
1205 b: (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto,
1206 c: (s->LineTime - s->Tvm_oto)/2.0,
1207 d: s->LineTime / 4.0);
1208#ifdef __DML_VBA_DEBUG__
1209 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto);
1210 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips);
1211 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto);
1212 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4);
1213#endif
1214 } else
1215 s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0;
1216
1217 s->Tvm_oto_lines = dml_ceil(x: 4.0 * s->Tvm_oto / s->LineTime, granularity: 1) / 4.0;
1218 s->Tr0_oto_lines = dml_ceil(x: 4.0 * s->Tr0_oto / s->LineTime, granularity: 1) / 4.0;
1219 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
1220
1221 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(x: p->TWait + p->TCalc, y: *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
1222
1223#ifdef __DML_VBA_DEBUG__
1224 dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
1225 dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
1226 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw);
1227 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency);
1228 dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
1229 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
1230 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
1231 dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
1232 dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
1233 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
1234 dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
1235 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
1236 dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
1237 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
1238 dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
1239 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
1240 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
1241 dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips);
1242 dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips);
1243 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
1244 dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
1245 dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
1246 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
1247 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
1248 dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
1249 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
1250 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
1251#endif
1252
1253 s->dst_y_prefetch_equ = dml_floor(x: 4.0 * (s->dst_y_prefetch_equ + 0.125), granularity: 1) / 4.0;
1254 s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
1255
1256 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
1257
1258 dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime);
1259 dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup);
1260 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
1261 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
1262 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
1263 dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
1264 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
1265 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
1266 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
1267 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
1268 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
1269 dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
1270 dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
1271
1272 s->dep_bytes = dml_max(x: p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, y: p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor);
1273
1274 if (s->prefetch_sw_bytes < s->dep_bytes) {
1275 s->prefetch_sw_bytes = 2 * s->dep_bytes;
1276 }
1277
1278 *p->DestinationLinesToRequestVMInVBlank = 0;
1279 *p->DestinationLinesToRequestRowInVBlank = 0;
1280 *p->VRatioPrefetchY = 0;
1281 *p->VRatioPrefetchC = 0;
1282 *p->RequiredPrefetchPixDataBWLuma = 0;
1283 if (s->dst_y_prefetch_equ > 1) {
1284
1285 if (s->Tpre_rounded - *p->Tno_bw > 0) {
1286 s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte
1287 + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor
1288 + s->prefetch_sw_bytes)
1289 / (s->Tpre_rounded - *p->Tno_bw);
1290 s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1;
1291 } else
1292 s->PrefetchBandwidth1 = 0;
1293
1294 if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) {
1295 s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) /
1296 (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
1297 }
1298
1299 if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0)
1300 s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
1301 (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded);
1302 else
1303 s->PrefetchBandwidth2 = 0;
1304
1305 if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
1306 s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
1307 (s->Tpre_rounded - s->Tvm_trips_rounded);
1308 s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3;
1309 }
1310 else
1311 s->PrefetchBandwidth3 = 0;
1312
1313
1314 if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) {
1315 s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
1316 }
1317
1318 if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
1319 s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
1320 else
1321 s->PrefetchBandwidth4 = 0;
1322
1323#ifdef __DML_VBA_DEBUG__
1324 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded);
1325 dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
1326 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded);
1327 dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
1328 dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
1329 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1);
1330 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2);
1331 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3);
1332 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4);
1333#endif
1334 {
1335 dml_bool_t Case1OK;
1336 dml_bool_t Case2OK;
1337 dml_bool_t Case3OK;
1338
1339 if (s->PrefetchBandwidth1 > 0) {
1340 if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) {
1341 Case1OK = true;
1342 } else {
1343 Case1OK = false;
1344 }
1345 } else {
1346 Case1OK = false;
1347 }
1348
1349 if (s->PrefetchBandwidth2 > 0) {
1350 if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) {
1351 Case2OK = true;
1352 } else {
1353 Case2OK = false;
1354 }
1355 } else {
1356 Case2OK = false;
1357 }
1358
1359 if (s->PrefetchBandwidth3 > 0) {
1360 if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) {
1361 Case3OK = true;
1362 } else {
1363 Case3OK = false;
1364 }
1365 } else {
1366 Case3OK = false;
1367 }
1368
1369 if (Case1OK) {
1370 s->prefetch_bw_equ = s->PrefetchBandwidth1;
1371 } else if (Case2OK) {
1372 s->prefetch_bw_equ = s->PrefetchBandwidth2;
1373 } else if (Case3OK) {
1374 s->prefetch_bw_equ = s->PrefetchBandwidth3;
1375 } else {
1376 s->prefetch_bw_equ = s->PrefetchBandwidth4;
1377 }
1378
1379#ifdef __DML_VBA_DEBUG__
1380 dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK);
1381 dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK);
1382 dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK);
1383 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
1384#endif
1385
1386 if (s->prefetch_bw_equ > 0) {
1387 if (p->GPUVMEnable == true) {
1388 s->Tvm_equ = dml_max3(x: *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, y: s->Tvm_trips, z: s->LineTime / 4);
1389 } else {
1390 s->Tvm_equ = s->LineTime / 4;
1391 }
1392
1393 if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
1394 s->Tr0_equ = dml_max4(a: (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, b: s->Tr0_trips, c: (s->LineTime - s->Tvm_equ) / 2, d: s->LineTime / 4);
1395 } else {
1396 s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2;
1397 }
1398 } else {
1399 s->Tvm_equ = 0;
1400 s->Tr0_equ = 0;
1401 dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
1402 }
1403 }
1404
1405
1406 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
1407 *p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto;
1408 s->TimeForFetchingMetaPTE = s->Tvm_oto;
1409 s->TimeForFetchingRowInVBlank = s->Tr0_oto;
1410
1411 *p->DestinationLinesToRequestVMInVBlank = dml_ceil(x: 4.0 * s->TimeForFetchingMetaPTE / s->LineTime, granularity: 1.0) / 4.0;
1412 *p->DestinationLinesToRequestRowInVBlank = dml_ceil(x: 4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, granularity: 1.0) / 4.0;
1413 } else {
1414 *p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ;
1415 s->TimeForFetchingMetaPTE = s->Tvm_equ;
1416 s->TimeForFetchingRowInVBlank = s->Tr0_equ;
1417
1418 if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) {
1419 *p->DestinationLinesToRequestVMInVBlank = dml_floor(x: 4.0 * s->TimeForFetchingMetaPTE / s->LineTime, granularity: 1.0) / 4.0;
1420 *p->DestinationLinesToRequestRowInVBlank = dml_floor(x: 4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, granularity: 1.0) / 4.0;
1421 } else {
1422 *p->DestinationLinesToRequestVMInVBlank = dml_ceil(x: 4.0 * s->TimeForFetchingMetaPTE / s->LineTime, granularity: 1.0) / 4.0;
1423 *p->DestinationLinesToRequestRowInVBlank = dml_ceil(x: 4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, granularity: 1.0) / 4.0;
1424 }
1425 }
1426
1427 s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank;
1428
1429#ifdef __DML_VBA_DEBUG__
1430 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch);
1431 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
1432 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
1433 dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
1434 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
1435 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
1436 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
1437#endif
1438
1439 if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) {
1440 *p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
1441 *p->VRatioPrefetchY = dml_max(x: *p->VRatioPrefetchY, y: 1.0);
1442#ifdef __DML_VBA_DEBUG__
1443 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
1444 dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
1445 dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
1446#endif
1447 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
1448 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
1449 *p->VRatioPrefetchY = dml_max(x: *p->VRatioPrefetchY,
1450 y: (dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
1451 } else {
1452 s->MyError = true;
1453 dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
1454 *p->VRatioPrefetchY = 0;
1455 }
1456#ifdef __DML_VBA_DEBUG__
1457 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
1458 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
1459 dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
1460#endif
1461 }
1462
1463 *p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
1464 *p->VRatioPrefetchC = dml_max(x: *p->VRatioPrefetchC, y: 1.0);
1465
1466#ifdef __DML_VBA_DEBUG__
1467 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
1468 dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
1469 dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
1470#endif
1471 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
1472 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
1473 *p->VRatioPrefetchC = dml_max(x: *p->VRatioPrefetchC, y: (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
1474 } else {
1475 s->MyError = true;
1476 dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
1477 *p->VRatioPrefetchC = 0;
1478 }
1479#ifdef __DML_VBA_DEBUG__
1480 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
1481 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
1482 dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
1483#endif
1484 }
1485
1486 *p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData
1487 * p->myPipe->BytePerPixelY
1488 * p->swath_width_luma_ub / s->LineTime;
1489
1490#ifdef __DML_VBA_DEBUG__
1491 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
1492 dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
1493 dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
1494 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma);
1495#endif
1496 *p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData
1497 *p->myPipe->BytePerPixelC
1498 *p->swath_width_chroma_ub / s->LineTime;
1499 } else {
1500 s->MyError = true;
1501 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData);
1502 *p->VRatioPrefetchY = 0;
1503 *p->VRatioPrefetchC = 0;
1504 *p->RequiredPrefetchPixDataBWLuma = 0;
1505 *p->RequiredPrefetchPixDataBWChroma = 0;
1506 }
1507
1508 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE);
1509 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE);
1510 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank);
1511 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime);
1512 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime);
1513 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1514 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
1515 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
1516
1517 } else {
1518 s->MyError = true;
1519 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
1520 s->TimeForFetchingMetaPTE = 0;
1521 s->TimeForFetchingRowInVBlank = 0;
1522 *p->DestinationLinesToRequestVMInVBlank = 0;
1523 *p->DestinationLinesToRequestRowInVBlank = 0;
1524 s->LinesToRequestPrefetchPixelData = 0;
1525 *p->VRatioPrefetchY = 0;
1526 *p->VRatioPrefetchC = 0;
1527 *p->RequiredPrefetchPixDataBWLuma = 0;
1528 *p->RequiredPrefetchPixDataBWChroma = 0;
1529 }
1530
1531 {
1532 dml_float_t prefetch_vm_bw;
1533 dml_float_t prefetch_row_bw;
1534
1535 if (p->PDEAndMetaPTEBytesFrame == 0) {
1536 prefetch_vm_bw = 0;
1537 } else if (*p->DestinationLinesToRequestVMInVBlank > 0) {
1538#ifdef __DML_VBA_DEBUG__
1539 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
1540 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
1541 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
1542 dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
1543#endif
1544 prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime);
1545#ifdef __DML_VBA_DEBUG__
1546 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1547#endif
1548 } else {
1549 prefetch_vm_bw = 0;
1550 s->MyError = true;
1551 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
1552 }
1553
1554 if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) {
1555 prefetch_row_bw = 0;
1556 } else if (*p->DestinationLinesToRequestRowInVBlank > 0) {
1557 prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime);
1558
1559#ifdef __DML_VBA_DEBUG__
1560 dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
1561 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
1562 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
1563 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1564#endif
1565 } else {
1566 prefetch_row_bw = 0;
1567 s->MyError = true;
1568 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
1569 }
1570
1571 *p->prefetch_vmrow_bw = dml_max(x: prefetch_vm_bw, y: prefetch_row_bw);
1572 }
1573
1574 if (s->MyError) {
1575 s->TimeForFetchingMetaPTE = 0;
1576 s->TimeForFetchingRowInVBlank = 0;
1577 *p->DestinationLinesToRequestVMInVBlank = 0;
1578 *p->DestinationLinesToRequestRowInVBlank = 0;
1579 *p->DestinationLinesForPrefetch = 0;
1580 s->LinesToRequestPrefetchPixelData = 0;
1581 *p->VRatioPrefetchY = 0;
1582 *p->VRatioPrefetchC = 0;
1583 *p->RequiredPrefetchPixDataBWLuma = 0;
1584 *p->RequiredPrefetchPixDataBWChroma = 0;
1585 }
1586
1587 return s->MyError;
1588} // CalculatePrefetchSchedule
1589
1590static void CalculateBytePerPixelAndBlockSizes(
1591 enum dml_source_format_class SourcePixelFormat,
1592 enum dml_swizzle_mode SurfaceTiling,
1593
1594 // Output
1595 dml_uint_t *BytePerPixelY,
1596 dml_uint_t *BytePerPixelC,
1597 dml_float_t *BytePerPixelDETY,
1598 dml_float_t *BytePerPixelDETC,
1599 dml_uint_t *BlockHeight256BytesY,
1600 dml_uint_t *BlockHeight256BytesC,
1601 dml_uint_t *BlockWidth256BytesY,
1602 dml_uint_t *BlockWidth256BytesC,
1603 dml_uint_t *MacroTileHeightY,
1604 dml_uint_t *MacroTileHeightC,
1605 dml_uint_t *MacroTileWidthY,
1606 dml_uint_t *MacroTileWidthC)
1607{
1608 if (SourcePixelFormat == dml_444_64) {
1609 *BytePerPixelDETY = 8;
1610 *BytePerPixelDETC = 0;
1611 *BytePerPixelY = 8;
1612 *BytePerPixelC = 0;
1613 } else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) {
1614 *BytePerPixelDETY = 4;
1615 *BytePerPixelDETC = 0;
1616 *BytePerPixelY = 4;
1617 *BytePerPixelC = 0;
1618 } else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) {
1619 *BytePerPixelDETY = 2;
1620 *BytePerPixelDETC = 0;
1621 *BytePerPixelY = 2;
1622 *BytePerPixelC = 0;
1623 } else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) {
1624 *BytePerPixelDETY = 1;
1625 *BytePerPixelDETC = 0;
1626 *BytePerPixelY = 1;
1627 *BytePerPixelC = 0;
1628 } else if (SourcePixelFormat == dml_rgbe_alpha) {
1629 *BytePerPixelDETY = 4;
1630 *BytePerPixelDETC = 1;
1631 *BytePerPixelY = 4;
1632 *BytePerPixelC = 1;
1633 } else if (SourcePixelFormat == dml_420_8) {
1634 *BytePerPixelDETY = 1;
1635 *BytePerPixelDETC = 2;
1636 *BytePerPixelY = 1;
1637 *BytePerPixelC = 2;
1638 } else if (SourcePixelFormat == dml_420_12) {
1639 *BytePerPixelDETY = 2;
1640 *BytePerPixelDETC = 4;
1641 *BytePerPixelY = 2;
1642 *BytePerPixelC = 4;
1643 } else {
1644 *BytePerPixelDETY = (dml_float_t) (4.0 / 3);
1645 *BytePerPixelDETC = (dml_float_t) (8.0 / 3);
1646 *BytePerPixelY = 2;
1647 *BytePerPixelC = 4;
1648 }
1649#ifdef __DML_VBA_DEBUG__
1650 dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
1651 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
1652 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
1653 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
1654 dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
1655#endif
1656 if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32
1657 || SourcePixelFormat == dml_444_16
1658 || SourcePixelFormat == dml_444_8
1659 || SourcePixelFormat == dml_mono_16
1660 || SourcePixelFormat == dml_mono_8
1661 || SourcePixelFormat == dml_rgbe)) {
1662 if (SurfaceTiling == dml_sw_linear) {
1663 *BlockHeight256BytesY = 1;
1664 } else if (SourcePixelFormat == dml_444_64) {
1665 *BlockHeight256BytesY = 4;
1666 } else if (SourcePixelFormat == dml_444_8) {
1667 *BlockHeight256BytesY = 16;
1668 } else {
1669 *BlockHeight256BytesY = 8;
1670 }
1671 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
1672 *BlockHeight256BytesC = 0;
1673 *BlockWidth256BytesC = 0;
1674 } else {
1675 if (SurfaceTiling == dml_sw_linear) {
1676 *BlockHeight256BytesY = 1;
1677 *BlockHeight256BytesC = 1;
1678 } else if (SourcePixelFormat == dml_rgbe_alpha) {
1679 *BlockHeight256BytesY = 8;
1680 *BlockHeight256BytesC = 16;
1681 } else if (SourcePixelFormat == dml_420_8) {
1682 *BlockHeight256BytesY = 16;
1683 *BlockHeight256BytesC = 8;
1684 } else {
1685 *BlockHeight256BytesY = 8;
1686 *BlockHeight256BytesC = 8;
1687 }
1688 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
1689 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
1690 }
1691#ifdef __DML_VBA_DEBUG__
1692 dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
1693 dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
1694 dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
1695 dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
1696#endif
1697
1698 if (SurfaceTiling == dml_sw_linear) {
1699 *MacroTileHeightY = *BlockHeight256BytesY;
1700 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
1701 *MacroTileHeightC = *BlockHeight256BytesC;
1702 if (*MacroTileHeightC == 0) {
1703 *MacroTileWidthC = 0;
1704 } else {
1705 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
1706 }
1707 } else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) {
1708 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
1709 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
1710 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
1711 if (*MacroTileHeightC == 0) {
1712 *MacroTileWidthC = 0;
1713 } else {
1714 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
1715 }
1716 } else {
1717 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
1718 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
1719 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
1720 if (*MacroTileHeightC == 0) {
1721 *MacroTileWidthC = 0;
1722 } else {
1723 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
1724 }
1725 }
1726
1727#ifdef __DML_VBA_DEBUG__
1728 dml_print("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
1729 dml_print("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
1730 dml_print("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
1731 dml_print("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
1732#endif
1733} // CalculateBytePerPixelAndBlockSizes
1734
1735static dml_float_t CalculateTWait(
1736 dml_uint_t PrefetchMode,
1737 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
1738 dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
1739 dml_bool_t DRRDisplay,
1740 dml_float_t DRAMClockChangeLatency,
1741 dml_float_t FCLKChangeLatency,
1742 dml_float_t UrgentLatency,
1743 dml_float_t SREnterPlusExitTime)
1744{
1745 dml_float_t TWait = 0.0;
1746
1747 if (PrefetchMode == 0 &&
1748 !(UseMALLForPStateChange == dml_use_mall_pstate_change_full_frame) && !(UseMALLForPStateChange == dml_use_mall_pstate_change_sub_viewport) &&
1749 !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe) && !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
1750 TWait = dml_max3(x: DRAMClockChangeLatency + UrgentLatency, y: SREnterPlusExitTime, z: UrgentLatency);
1751 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
1752 TWait = dml_max3(x: FCLKChangeLatency + UrgentLatency, y: SREnterPlusExitTime, z: UrgentLatency);
1753 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
1754 TWait = dml_max(x: SREnterPlusExitTime, y: UrgentLatency);
1755 } else {
1756 TWait = UrgentLatency;
1757 }
1758
1759#ifdef __DML_VBA_DEBUG__
1760 dml_print("DML::%s: PrefetchMode = %u\n", __func__, PrefetchMode);
1761 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
1762#endif
1763 return TWait;
1764} // CalculateTWait
1765
1766
1767/// @brief Calculate the "starting point" for prefetch calculation
1768/// if AllowForPStateChangeOrStutterInVBlank is set as a particular requirement, then the mode evalulation
1769/// will only be done at the given mode. If no specific requirement (i.e. *_if_possible), then will just go from
1770/// try all the prefetch mode in decreasing order of "difficulty" (start from 0 which means all power saving
1771/// features).
1772static void CalculatePrefetchMode(
1773 enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
1774 dml_uint_t *MinPrefetchMode,
1775 dml_uint_t *MaxPrefetchMode)
1776{
1777 if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter_if_possible) {
1778 *MinPrefetchMode = 0; // consider all pwr saving features
1779 *MaxPrefetchMode = 3; // consider just urgent latency
1780 } else {
1781 if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_none) {
1782 *MinPrefetchMode = 3;
1783 } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_stutter) {
1784 *MinPrefetchMode = 2;
1785 } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_fclk_and_stutter) {
1786 *MinPrefetchMode = 1;
1787 } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter) {
1788 *MinPrefetchMode = 0;
1789 } else {
1790 dml_print("ERROR: Invalid AllowForPStateChangeOrStutterInVBlank setting! val=%u\n", AllowForPStateChangeOrStutterInVBlank);
1791 ASSERT(0);
1792 }
1793 *MaxPrefetchMode = *MinPrefetchMode;
1794 }
1795} // CalculatePrefetchMode
1796
1797static dml_float_t CalculateWriteBackDISPCLK(
1798 enum dml_source_format_class WritebackPixelFormat,
1799 dml_float_t PixelClock,
1800 dml_float_t WritebackHRatio,
1801 dml_float_t WritebackVRatio,
1802 dml_uint_t WritebackHTaps,
1803 dml_uint_t WritebackVTaps,
1804 dml_uint_t WritebackSourceWidth,
1805 dml_uint_t WritebackDestinationWidth,
1806 dml_uint_t HTotal,
1807 dml_uint_t WritebackLineBufferSize,
1808 dml_float_t DISPCLKDPPCLKVCOSpeed)
1809{
1810 dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB;
1811
1812 DISPCLK_H = PixelClock * dml_ceil(x: WritebackHTaps / 8.0, granularity: 1) / WritebackHRatio;
1813 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(x: WritebackDestinationWidth / 6.0, granularity: 1) + 8.0) / (dml_float_t) HTotal;
1814 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (dml_float_t) WritebackSourceWidth;
1815 return RoundToDFSGranularity(Clock: dml_max3(x: DISPCLK_H, y: DISPCLK_V, z: DISPCLK_HB), round_up: 1, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
1816}
1817
1818static dml_float_t CalculateWriteBackDelay(
1819 enum dml_source_format_class WritebackPixelFormat,
1820 dml_float_t WritebackHRatio,
1821 dml_float_t WritebackVRatio,
1822 dml_uint_t WritebackVTaps,
1823 dml_uint_t WritebackDestinationWidth,
1824 dml_uint_t WritebackDestinationHeight,
1825 dml_uint_t WritebackSourceHeight,
1826 dml_uint_t HTotal)
1827{
1828 dml_float_t CalculateWriteBackDelay;
1829 dml_float_t Line_length;
1830 dml_float_t Output_lines_last_notclamped;
1831 dml_float_t WritebackVInit;
1832
1833 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
1834 Line_length = dml_max(x: (dml_float_t) WritebackDestinationWidth, y: dml_ceil(x: (dml_float_t)WritebackDestinationWidth / 6.0, granularity: 1.0) * WritebackVTaps);
1835 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(x: ((dml_float_t)WritebackSourceHeight - (dml_float_t) WritebackVInit) / (dml_float_t)WritebackVRatio, granularity: 1.0);
1836 if (Output_lines_last_notclamped < 0) {
1837 CalculateWriteBackDelay = 0;
1838 } else {
1839 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
1840 }
1841 return CalculateWriteBackDelay;
1842}
1843
1844static void CalculateVUpdateAndDynamicMetadataParameters(
1845 dml_uint_t MaxInterDCNTileRepeaters,
1846 dml_float_t Dppclk,
1847 dml_float_t Dispclk,
1848 dml_float_t DCFClkDeepSleep,
1849 dml_float_t PixelClock,
1850 dml_uint_t HTotal,
1851 dml_uint_t VBlank,
1852 dml_uint_t DynamicMetadataTransmittedBytes,
1853 dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
1854 dml_uint_t InterlaceEnable,
1855 dml_bool_t ProgressiveToInterlaceUnitInOPP,
1856
1857 // Output
1858 dml_float_t *TSetup,
1859 dml_float_t *Tdmbf,
1860 dml_float_t *Tdmec,
1861 dml_float_t *Tdmsks,
1862 dml_uint_t *VUpdateOffsetPix,
1863 dml_uint_t *VUpdateWidthPix,
1864 dml_uint_t *VReadyOffsetPix)
1865{
1866 dml_float_t TotalRepeaterDelayTime;
1867 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
1868 *VUpdateWidthPix = (dml_uint_t)(dml_ceil(x: (14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, granularity: 1.0));
1869 *VReadyOffsetPix = (dml_uint_t)(dml_ceil(x: dml_max(x: 150.0 / Dppclk, y: TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, granularity: 1.0));
1870 *VUpdateOffsetPix = (dml_uint_t)(dml_ceil(x: HTotal / 4.0, granularity: 1.0));
1871 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
1872 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
1873 *Tdmec = HTotal / PixelClock;
1874
1875 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
1876 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
1877 } else {
1878 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
1879 }
1880 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
1881 *Tdmsks = *Tdmsks / 2;
1882 }
1883#ifdef __DML_VBA_DEBUG__
1884 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
1885 dml_print("DML::%s: VBlank = %u\n", __func__, VBlank);
1886 dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
1887 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
1888 dml_print("DML::%s: Dppclk = %f\n", __func__, Dppclk);
1889 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
1890 dml_print("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
1891 dml_print("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
1892
1893 dml_print("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
1894 dml_print("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
1895 dml_print("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
1896
1897 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
1898#endif
1899}
1900
1901static void CalculateRowBandwidth(
1902 dml_bool_t GPUVMEnable,
1903 enum dml_source_format_class SourcePixelFormat,
1904 dml_float_t VRatio,
1905 dml_float_t VRatioChroma,
1906 dml_bool_t DCCEnable,
1907 dml_float_t LineTime,
1908 dml_uint_t MetaRowByteLuma,
1909 dml_uint_t MetaRowByteChroma,
1910 dml_uint_t meta_row_height_luma,
1911 dml_uint_t meta_row_height_chroma,
1912 dml_uint_t PixelPTEBytesPerRowLuma,
1913 dml_uint_t PixelPTEBytesPerRowChroma,
1914 dml_uint_t dpte_row_height_luma,
1915 dml_uint_t dpte_row_height_chroma,
1916 // Output
1917 dml_float_t *meta_row_bw,
1918 dml_float_t *dpte_row_bw)
1919{
1920 if (DCCEnable != true) {
1921 *meta_row_bw = 0;
1922 } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
1923 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
1924 + VRatioChroma * MetaRowByteChroma
1925 / (meta_row_height_chroma * LineTime);
1926 } else {
1927 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
1928 }
1929
1930 if (GPUVMEnable != true) {
1931 *dpte_row_bw = 0;
1932 } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
1933 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
1934 + VRatioChroma * PixelPTEBytesPerRowChroma
1935 / (dpte_row_height_chroma * LineTime);
1936 } else {
1937 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
1938 }
1939}
1940
1941/// @brief Determine immediate flip schedule given bw remaining after considering the prefetch schedule
1942/// @param BandwidthAvailableForImmediateFlip Bandwidth available for iflip for all planes
1943static void CalculateFlipSchedule(
1944 dml_float_t HostVMInefficiencyFactor,
1945 dml_float_t UrgentExtraLatency,
1946 dml_float_t UrgentLatency,
1947 dml_uint_t GPUVMMaxPageTableLevels,
1948 dml_bool_t HostVMEnable,
1949 dml_uint_t HostVMMaxNonCachedPageTableLevels,
1950 dml_bool_t GPUVMEnable,
1951 dml_uint_t HostVMMinPageSize,
1952 dml_float_t PDEAndMetaPTEBytesPerFrame,
1953 dml_float_t MetaRowBytes,
1954 dml_float_t DPTEBytesPerRow,
1955 dml_float_t BandwidthAvailableForImmediateFlip,
1956 dml_uint_t TotImmediateFlipBytes,
1957 enum dml_source_format_class SourcePixelFormat,
1958 dml_float_t LineTime,
1959 dml_float_t VRatio,
1960 dml_float_t VRatioChroma,
1961 dml_float_t Tno_bw,
1962 dml_bool_t DCCEnable,
1963 dml_uint_t dpte_row_height,
1964 dml_uint_t meta_row_height,
1965 dml_uint_t dpte_row_height_chroma,
1966 dml_uint_t meta_row_height_chroma,
1967 dml_bool_t use_one_row_for_frame_flip,
1968
1969 // Output
1970 dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
1971 dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
1972 dml_float_t *final_flip_bw,
1973 dml_bool_t *ImmediateFlipSupportedForPipe)
1974{
1975 dml_float_t min_row_time = 0.0;
1976 dml_uint_t HostVMDynamicLevelsTrips = 0;
1977 dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0;
1978 dml_float_t TimeForFetchingRowInVBlankImmediateFlip = 0;
1979 dml_float_t ImmediateFlipBW = 0; // @brief The immediate flip bandwidth for this pipe
1980
1981 if (GPUVMEnable == true && HostVMEnable == true) {
1982 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
1983 } else {
1984 HostVMDynamicLevelsTrips = 0;
1985 }
1986
1987#ifdef __DML_VBA_DEBUG__
1988 dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
1989 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1990 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
1991 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
1992#endif
1993
1994 if (TotImmediateFlipBytes > 0) {
1995 if (use_one_row_for_frame_flip) {
1996 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2.0 * DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
1997 } else {
1998 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
1999 }
2000 if (GPUVMEnable == true) {
2001 TimeForFetchingMetaPTEImmediateFlip = dml_max3(x: Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
2002 y: UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
2003 z: LineTime / 4.0);
2004 } else {
2005 TimeForFetchingMetaPTEImmediateFlip = 0;
2006 }
2007 if ((GPUVMEnable == true || DCCEnable == true)) {
2008 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(x: (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, y: UrgentLatency * (HostVMDynamicLevelsTrips + 1), z: LineTime / 4.0);
2009 } else {
2010 TimeForFetchingRowInVBlankImmediateFlip = 0;
2011 }
2012
2013 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(x: 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), granularity: 1.0) / 4.0;
2014 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(x: 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), granularity: 1.0) / 4.0;
2015
2016 if (GPUVMEnable == true) {
2017 *final_flip_bw = dml_max(x: PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
2018 y: (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
2019 } else if ((GPUVMEnable == true || DCCEnable == true)) {
2020 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
2021 } else {
2022 *final_flip_bw = 0;
2023 }
2024 } else {
2025 TimeForFetchingMetaPTEImmediateFlip = 0;
2026 TimeForFetchingRowInVBlankImmediateFlip = 0;
2027 *DestinationLinesToRequestVMInImmediateFlip = 0;
2028 *DestinationLinesToRequestRowInImmediateFlip = 0;
2029 *final_flip_bw = 0;
2030 }
2031
2032 if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_rgbe_alpha) {
2033 if (GPUVMEnable == true && DCCEnable != true) {
2034 min_row_time = dml_min(x: dpte_row_height * LineTime / VRatio, y: dpte_row_height_chroma * LineTime / VRatioChroma);
2035 } else if (GPUVMEnable != true && DCCEnable == true) {
2036 min_row_time = dml_min(x: meta_row_height * LineTime / VRatio, y: meta_row_height_chroma * LineTime / VRatioChroma);
2037 } else {
2038 min_row_time = dml_min4(x: dpte_row_height * LineTime / VRatio, y: meta_row_height * LineTime / VRatio, z: dpte_row_height_chroma * LineTime / VRatioChroma, w: meta_row_height_chroma * LineTime / VRatioChroma);
2039 }
2040 } else {
2041 if (GPUVMEnable == true && DCCEnable != true) {
2042 min_row_time = dpte_row_height * LineTime / VRatio;
2043 } else if (GPUVMEnable != true && DCCEnable == true) {
2044 min_row_time = meta_row_height * LineTime / VRatio;
2045 } else {
2046 min_row_time = dml_min(x: dpte_row_height * LineTime / VRatio, y: meta_row_height * LineTime / VRatio);
2047 }
2048 }
2049
2050 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
2051 *ImmediateFlipSupportedForPipe = false;
2052 } else {
2053 *ImmediateFlipSupportedForPipe = true;
2054 }
2055
2056#ifdef __DML_VBA_DEBUG__
2057 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
2058 dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
2059
2060 dml_print("DML::%s: MetaRowBytes = %f\n", __func__, MetaRowBytes);
2061 dml_print("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
2062 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
2063 dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
2064 dml_print("DML::%s: ImmediateFlipBW = %f\n", __func__, ImmediateFlipBW);
2065 dml_print("DML::%s: PDEAndMetaPTEBytesPerFrame = %f\n", __func__, PDEAndMetaPTEBytesPerFrame);
2066 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
2067 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
2068 dml_print("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
2069
2070 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
2071 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
2072 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
2073 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
2074 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
2075 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
2076#endif
2077} // CalculateFlipSchedule
2078
2079static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed)
2080{
2081 if (Clock <= 0.0)
2082 return 0.0;
2083 else {
2084 if (round_up)
2085 return VCOSpeed * 4.0 / dml_floor(x: VCOSpeed * 4.0 / Clock, granularity: 1.0);
2086 else
2087 return VCOSpeed * 4.0 / dml_ceil(x: VCOSpeed * 4.0 / Clock, granularity: 1.0);
2088 }
2089}
2090
2091static void CalculateDCCConfiguration(
2092 dml_bool_t DCCEnabled,
2093 dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
2094 enum dml_source_format_class SourcePixelFormat,
2095 dml_uint_t SurfaceWidthLuma,
2096 dml_uint_t SurfaceWidthChroma,
2097 dml_uint_t SurfaceHeightLuma,
2098 dml_uint_t SurfaceHeightChroma,
2099 dml_uint_t nomDETInKByte,
2100 dml_uint_t RequestHeight256ByteLuma,
2101 dml_uint_t RequestHeight256ByteChroma,
2102 enum dml_swizzle_mode TilingFormat,
2103 dml_uint_t BytePerPixelY,
2104 dml_uint_t BytePerPixelC,
2105 dml_float_t BytePerPixelDETY,
2106 dml_float_t BytePerPixelDETC,
2107 enum dml_rotation_angle SourceScan,
2108 // Output
2109 dml_uint_t *MaxUncompressedBlockLuma,
2110 dml_uint_t *MaxUncompressedBlockChroma,
2111 dml_uint_t *MaxCompressedBlockLuma,
2112 dml_uint_t *MaxCompressedBlockChroma,
2113 dml_uint_t *IndependentBlockLuma,
2114 dml_uint_t *IndependentBlockChroma)
2115{
2116 dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024;
2117
2118 dml_uint_t yuv420;
2119 dml_uint_t horz_div_l;
2120 dml_uint_t horz_div_c;
2121 dml_uint_t vert_div_l;
2122 dml_uint_t vert_div_c;
2123
2124 dml_uint_t swath_buf_size;
2125 dml_float_t detile_buf_vp_horz_limit;
2126 dml_float_t detile_buf_vp_vert_limit;
2127
2128 dml_uint_t MAS_vp_horz_limit;
2129 dml_uint_t MAS_vp_vert_limit;
2130 dml_uint_t max_vp_horz_width;
2131 dml_uint_t max_vp_vert_height;
2132 dml_uint_t eff_surf_width_l;
2133 dml_uint_t eff_surf_width_c;
2134 dml_uint_t eff_surf_height_l;
2135 dml_uint_t eff_surf_height_c;
2136
2137 dml_uint_t full_swath_bytes_horz_wc_l;
2138 dml_uint_t full_swath_bytes_horz_wc_c;
2139 dml_uint_t full_swath_bytes_vert_wc_l;
2140 dml_uint_t full_swath_bytes_vert_wc_c;
2141
2142 dml_uint_t req128_horz_wc_l;
2143 dml_uint_t req128_horz_wc_c;
2144 dml_uint_t req128_vert_wc_l;
2145 dml_uint_t req128_vert_wc_c;
2146
2147 dml_uint_t segment_order_horz_contiguous_luma;
2148 dml_uint_t segment_order_horz_contiguous_chroma;
2149 dml_uint_t segment_order_vert_contiguous_luma;
2150 dml_uint_t segment_order_vert_contiguous_chroma;
2151
2152 typedef enum{
2153 REQ_256Bytes,
2154 REQ_128BytesNonContiguous,
2155 REQ_128BytesContiguous,
2156 REQ_NA
2157 } RequestType;
2158
2159 RequestType RequestLuma;
2160 RequestType RequestChroma;
2161
2162 yuv420 = ((SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12) ? 1 : 0);
2163 horz_div_l = 1;
2164 horz_div_c = 1;
2165 vert_div_l = 1;
2166 vert_div_c = 1;
2167
2168 if (BytePerPixelY == 1)
2169 vert_div_l = 0;
2170 if (BytePerPixelC == 1)
2171 vert_div_c = 0;
2172
2173 if (BytePerPixelC == 0) {
2174 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
2175 detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
2176 detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
2177 } else {
2178 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
2179 detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (dml_float_t) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
2180 detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
2181 }
2182
2183 if (SourcePixelFormat == dml_420_10) {
2184 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
2185 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
2186 }
2187
2188 detile_buf_vp_horz_limit = dml_floor(x: detile_buf_vp_horz_limit - 1, granularity: 16);
2189 detile_buf_vp_vert_limit = dml_floor(x: detile_buf_vp_vert_limit - 1, granularity: 16);
2190
2191 MAS_vp_horz_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : 6144;
2192 MAS_vp_vert_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
2193 max_vp_horz_width = (dml_uint_t)(dml_min(x: (dml_float_t) MAS_vp_horz_limit, y: detile_buf_vp_horz_limit));
2194 max_vp_vert_height = (dml_uint_t)(dml_min(x: (dml_float_t) MAS_vp_vert_limit, y: detile_buf_vp_vert_limit));
2195 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
2196 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
2197 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
2198 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
2199
2200 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
2201 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
2202 if (BytePerPixelC > 0) {
2203 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
2204 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
2205 } else {
2206 full_swath_bytes_horz_wc_c = 0;
2207 full_swath_bytes_vert_wc_c = 0;
2208 }
2209
2210 if (SourcePixelFormat == dml_420_10) {
2211 full_swath_bytes_horz_wc_l = (dml_uint_t)(dml_ceil(x: (dml_float_t) full_swath_bytes_horz_wc_l * 2.0 / 3.0, granularity: 256.0));
2212 full_swath_bytes_horz_wc_c = (dml_uint_t)(dml_ceil(x: (dml_float_t) full_swath_bytes_horz_wc_c * 2.0 / 3.0, granularity: 256.0));
2213 full_swath_bytes_vert_wc_l = (dml_uint_t)(dml_ceil(x: (dml_float_t) full_swath_bytes_vert_wc_l * 2.0 / 3.0, granularity: 256.0));
2214 full_swath_bytes_vert_wc_c = (dml_uint_t)(dml_ceil(x: (dml_float_t) full_swath_bytes_vert_wc_c * 2.0 / 3.0, granularity: 256.0));
2215 }
2216
2217 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2218 req128_horz_wc_l = 0;
2219 req128_horz_wc_c = 0;
2220 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2221 req128_horz_wc_l = 0;
2222 req128_horz_wc_c = 1;
2223 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2224 req128_horz_wc_l = 1;
2225 req128_horz_wc_c = 0;
2226 } else {
2227 req128_horz_wc_l = 1;
2228 req128_horz_wc_c = 1;
2229 }
2230
2231 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2232 req128_vert_wc_l = 0;
2233 req128_vert_wc_c = 0;
2234 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2235 req128_vert_wc_l = 0;
2236 req128_vert_wc_c = 1;
2237 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2238 req128_vert_wc_l = 1;
2239 req128_vert_wc_c = 0;
2240 } else {
2241 req128_vert_wc_l = 1;
2242 req128_vert_wc_c = 1;
2243 }
2244
2245 if (BytePerPixelY == 2) {
2246 segment_order_horz_contiguous_luma = 0;
2247 segment_order_vert_contiguous_luma = 1;
2248 } else {
2249 segment_order_horz_contiguous_luma = 1;
2250 segment_order_vert_contiguous_luma = 0;
2251 }
2252
2253 if (BytePerPixelC == 2) {
2254 segment_order_horz_contiguous_chroma = 0;
2255 segment_order_vert_contiguous_chroma = 1;
2256 } else {
2257 segment_order_horz_contiguous_chroma = 1;
2258 segment_order_vert_contiguous_chroma = 0;
2259 }
2260#ifdef __DML_VBA_DEBUG__
2261 dml_print("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
2262 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
2263 dml_print("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
2264 dml_print("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
2265 dml_print("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
2266 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
2267 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
2268 dml_print("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
2269 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
2270#endif
2271
2272 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
2273 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
2274 RequestLuma = REQ_256Bytes;
2275 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
2276 RequestLuma = REQ_128BytesNonContiguous;
2277 } else {
2278 RequestLuma = REQ_128BytesContiguous;
2279 }
2280 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
2281 RequestChroma = REQ_256Bytes;
2282 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
2283 RequestChroma = REQ_128BytesNonContiguous;
2284 } else {
2285 RequestChroma = REQ_128BytesContiguous;
2286 }
2287 } else if (!dml_is_vertical_rotation(scan: SourceScan)) {
2288 if (req128_horz_wc_l == 0) {
2289 RequestLuma = REQ_256Bytes;
2290 } else if (segment_order_horz_contiguous_luma == 0) {
2291 RequestLuma = REQ_128BytesNonContiguous;
2292 } else {
2293 RequestLuma = REQ_128BytesContiguous;
2294 }
2295 if (req128_horz_wc_c == 0) {
2296 RequestChroma = REQ_256Bytes;
2297 } else if (segment_order_horz_contiguous_chroma == 0) {
2298 RequestChroma = REQ_128BytesNonContiguous;
2299 } else {
2300 RequestChroma = REQ_128BytesContiguous;
2301 }
2302 } else {
2303 if (req128_vert_wc_l == 0) {
2304 RequestLuma = REQ_256Bytes;
2305 } else if (segment_order_vert_contiguous_luma == 0) {
2306 RequestLuma = REQ_128BytesNonContiguous;
2307 } else {
2308 RequestLuma = REQ_128BytesContiguous;
2309 }
2310 if (req128_vert_wc_c == 0) {
2311 RequestChroma = REQ_256Bytes;
2312 } else if (segment_order_vert_contiguous_chroma == 0) {
2313 RequestChroma = REQ_128BytesNonContiguous;
2314 } else {
2315 RequestChroma = REQ_128BytesContiguous;
2316 }
2317 }
2318
2319 if (RequestLuma == REQ_256Bytes) {
2320 *MaxUncompressedBlockLuma = 256;
2321 *MaxCompressedBlockLuma = 256;
2322 *IndependentBlockLuma = 0;
2323 } else if (RequestLuma == REQ_128BytesContiguous) {
2324 *MaxUncompressedBlockLuma = 256;
2325 *MaxCompressedBlockLuma = 128;
2326 *IndependentBlockLuma = 128;
2327 } else {
2328 *MaxUncompressedBlockLuma = 256;
2329 *MaxCompressedBlockLuma = 64;
2330 *IndependentBlockLuma = 64;
2331 }
2332
2333 if (RequestChroma == REQ_256Bytes) {
2334 *MaxUncompressedBlockChroma = 256;
2335 *MaxCompressedBlockChroma = 256;
2336 *IndependentBlockChroma = 0;
2337 } else if (RequestChroma == REQ_128BytesContiguous) {
2338 *MaxUncompressedBlockChroma = 256;
2339 *MaxCompressedBlockChroma = 128;
2340 *IndependentBlockChroma = 128;
2341 } else {
2342 *MaxUncompressedBlockChroma = 256;
2343 *MaxCompressedBlockChroma = 64;
2344 *IndependentBlockChroma = 64;
2345 }
2346
2347 if (DCCEnabled != true || BytePerPixelC == 0) {
2348 *MaxUncompressedBlockChroma = 0;
2349 *MaxCompressedBlockChroma = 0;
2350 *IndependentBlockChroma = 0;
2351 }
2352
2353 if (DCCEnabled != true) {
2354 *MaxUncompressedBlockLuma = 0;
2355 *MaxCompressedBlockLuma = 0;
2356 *IndependentBlockLuma = 0;
2357 }
2358
2359#ifdef __DML_VBA_DEBUG__
2360 dml_print("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
2361 dml_print("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
2362 dml_print("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
2363 dml_print("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
2364 dml_print("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
2365 dml_print("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
2366#endif
2367
2368} // CalculateDCCConfiguration
2369
2370static dml_uint_t CalculatePrefetchSourceLines(
2371 dml_float_t VRatio,
2372 dml_uint_t VTaps,
2373 dml_bool_t Interlace,
2374 dml_bool_t ProgressiveToInterlaceUnitInOPP,
2375 dml_uint_t SwathHeight,
2376 enum dml_rotation_angle SourceScan,
2377 dml_bool_t ViewportStationary,
2378 dml_uint_t SwathWidth,
2379 dml_uint_t ViewportHeight,
2380 dml_uint_t ViewportXStart,
2381 dml_uint_t ViewportYStart,
2382
2383 // Output
2384 dml_uint_t *VInitPreFill,
2385 dml_uint_t *MaxNumSwath)
2386{
2387
2388 dml_uint_t vp_start_rot = 0;
2389 dml_uint_t sw0_tmp = 0;
2390 dml_uint_t MaxPartialSwath = 0;
2391 dml_float_t numLines = 0;
2392
2393#ifdef __DML_VBA_DEBUG__
2394 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2395 dml_print("DML::%s: VTaps = %u\n", __func__, VTaps);
2396 dml_print("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
2397 dml_print("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
2398 dml_print("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
2399 dml_print("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
2400#endif
2401 if (ProgressiveToInterlaceUnitInOPP)
2402 *VInitPreFill = (dml_uint_t)(dml_floor(x: (VRatio + (dml_float_t) VTaps + 1) / 2.0, granularity: 1));
2403 else
2404 *VInitPreFill = (dml_uint_t)(dml_floor(x: (VRatio + (dml_float_t) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, granularity: 1));
2405
2406 if (ViewportStationary) {
2407 if (SourceScan == dml_rotation_180 || SourceScan == dml_rotation_180m) {
2408 vp_start_rot = SwathHeight - (((dml_uint_t) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2409 } else if (SourceScan == dml_rotation_270 || SourceScan == dml_rotation_90m) {
2410 vp_start_rot = ViewportXStart;
2411 } else if (SourceScan == dml_rotation_90 || SourceScan == dml_rotation_270m) {
2412 vp_start_rot = SwathHeight - (((dml_uint_t)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2413 } else {
2414 vp_start_rot = ViewportYStart;
2415 }
2416 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2417 if (sw0_tmp < *VInitPreFill) {
2418 *MaxNumSwath = (dml_uint_t)(dml_ceil(x: (*VInitPreFill - sw0_tmp) / (dml_float_t) SwathHeight, granularity: 1) + 1);
2419 } else {
2420 *MaxNumSwath = 1;
2421 }
2422 MaxPartialSwath = (dml_uint_t)(dml_max(x: 1, y: (dml_uint_t) (vp_start_rot + *VInitPreFill - 1) % SwathHeight));
2423 } else {
2424 *MaxNumSwath = (dml_uint_t)(dml_ceil(x: (*VInitPreFill - 1.0) / (dml_float_t) SwathHeight, granularity: 1) + 1);
2425 if (*VInitPreFill > 1) {
2426 MaxPartialSwath = (dml_uint_t)(dml_max(x: 1, y: (dml_uint_t) (*VInitPreFill - 2) % SwathHeight));
2427 } else {
2428 MaxPartialSwath = (dml_uint_t)(dml_max(x: 1, y: (dml_uint_t) (*VInitPreFill + SwathHeight - 2) % SwathHeight));
2429 }
2430 }
2431 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2432
2433#ifdef __DML_VBA_DEBUG__
2434 dml_print("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
2435 dml_print("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
2436 dml_print("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
2437 dml_print("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
2438 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2439#endif
2440 return (dml_uint_t)(numLines);
2441
2442} // CalculatePrefetchSourceLines
2443
2444static dml_uint_t CalculateVMAndRowBytes(
2445 dml_bool_t ViewportStationary,
2446 dml_bool_t DCCEnable,
2447 dml_uint_t NumberOfDPPs,
2448 dml_uint_t BlockHeight256Bytes,
2449 dml_uint_t BlockWidth256Bytes,
2450 enum dml_source_format_class SourcePixelFormat,
2451 dml_uint_t SurfaceTiling,
2452 dml_uint_t BytePerPixel,
2453 enum dml_rotation_angle SourceScan,
2454 dml_uint_t SwathWidth,
2455 dml_uint_t ViewportHeight,
2456 dml_uint_t ViewportXStart,
2457 dml_uint_t ViewportYStart,
2458 dml_bool_t GPUVMEnable,
2459 dml_uint_t GPUVMMaxPageTableLevels,
2460 dml_uint_t GPUVMMinPageSizeKBytes,
2461 dml_uint_t PTEBufferSizeInRequests,
2462 dml_uint_t Pitch,
2463 dml_uint_t DCCMetaPitch,
2464 dml_uint_t MacroTileWidth,
2465 dml_uint_t MacroTileHeight,
2466
2467 // Output
2468 dml_uint_t *MetaRowByte,
2469 dml_uint_t *PixelPTEBytesPerRow, // for bandwidth calculation
2470 dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
2471 dml_uint_t *dpte_row_width_ub,
2472 dml_uint_t *dpte_row_height,
2473 dml_uint_t *dpte_row_height_linear,
2474 dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
2475 dml_uint_t *dpte_row_width_ub_one_row_per_frame,
2476 dml_uint_t *dpte_row_height_one_row_per_frame,
2477 dml_uint_t *MetaRequestWidth,
2478 dml_uint_t *MetaRequestHeight,
2479 dml_uint_t *meta_row_width,
2480 dml_uint_t *meta_row_height,
2481 dml_uint_t *PixelPTEReqWidth,
2482 dml_uint_t *PixelPTEReqHeight,
2483 dml_uint_t *PTERequestSize,
2484 dml_uint_t *DPDE0BytesFrame,
2485 dml_uint_t *MetaPTEBytesFrame)
2486{
2487 dml_uint_t MPDEBytesFrame;
2488 dml_uint_t DCCMetaSurfaceBytes;
2489 dml_uint_t ExtraDPDEBytesFrame;
2490 dml_uint_t PDEAndMetaPTEBytesFrame;
2491 dml_uint_t MacroTileSizeBytes;
2492 dml_uint_t vp_height_meta_ub;
2493 dml_uint_t vp_height_dpte_ub;
2494
2495 dml_uint_t PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2496
2497 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2498 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2499 if (SurfaceTiling == dml_sw_linear) {
2500 *meta_row_height = 32;
2501 *meta_row_width = (dml_uint_t)(dml_floor(x: ViewportXStart + SwathWidth + *MetaRequestWidth - 1, granularity: *MetaRequestWidth) - dml_floor(x: ViewportXStart, granularity: *MetaRequestWidth));
2502 } else if (!dml_is_vertical_rotation(scan: SourceScan)) {
2503 *meta_row_height = *MetaRequestHeight;
2504 if (ViewportStationary && NumberOfDPPs == 1) {
2505 *meta_row_width = (dml_uint_t)(dml_floor(x: ViewportXStart + SwathWidth + *MetaRequestWidth - 1, granularity: *MetaRequestWidth) - dml_floor(x: ViewportXStart, granularity: *MetaRequestWidth));
2506 } else {
2507 *meta_row_width = (dml_uint_t)(dml_ceil(x: SwathWidth - 1, granularity: *MetaRequestWidth) + *MetaRequestWidth);
2508 }
2509 *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0);
2510 } else {
2511 *meta_row_height = *MetaRequestWidth;
2512 if (ViewportStationary && NumberOfDPPs == 1) {
2513 *meta_row_width = (dml_uint_t)(dml_floor(x: ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, granularity: *MetaRequestHeight) - dml_floor(x: ViewportYStart, granularity: *MetaRequestHeight));
2514 } else {
2515 *meta_row_width = (dml_uint_t)(dml_ceil(x: SwathWidth - 1, granularity: *MetaRequestHeight) + *MetaRequestHeight);
2516 }
2517 *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0);
2518 }
2519
2520 if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(scan: SourceScan))) {
2521 vp_height_meta_ub = (dml_uint_t)(dml_floor(x: ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, granularity: 64 * BlockHeight256Bytes) - dml_floor(x: ViewportYStart, granularity: 64 * BlockHeight256Bytes));
2522 } else if (!dml_is_vertical_rotation(scan: SourceScan)) {
2523 vp_height_meta_ub = (dml_uint_t)(dml_ceil(x: ViewportHeight - 1, granularity: 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
2524 } else {
2525 vp_height_meta_ub = (dml_uint_t)(dml_ceil(x: SwathWidth - 1, granularity: 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
2526 }
2527
2528 DCCMetaSurfaceBytes = (dml_uint_t)(DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0);
2529
2530 if (GPUVMEnable == true) {
2531 *MetaPTEBytesFrame = (dml_uint_t)((dml_ceil(x: (dml_float_t) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), granularity: 1) + 1) * 64);
2532 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2533 } else {
2534 *MetaPTEBytesFrame = 0;
2535 MPDEBytesFrame = 0;
2536 }
2537
2538 if (DCCEnable != true) {
2539 *MetaPTEBytesFrame = 0;
2540 MPDEBytesFrame = 0;
2541 *MetaRowByte = 0;
2542 }
2543
2544 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2545
2546 if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(scan: SourceScan))) {
2547 vp_height_dpte_ub = (dml_uint_t)(dml_floor(x: ViewportYStart + ViewportHeight + MacroTileHeight - 1, granularity: MacroTileHeight) - dml_floor(x: ViewportYStart, granularity: MacroTileHeight));
2548 } else if (!dml_is_vertical_rotation(scan: SourceScan)) {
2549 vp_height_dpte_ub = (dml_uint_t)(dml_ceil(x: ViewportHeight - 1, granularity: MacroTileHeight) + MacroTileHeight);
2550 } else {
2551 vp_height_dpte_ub = (dml_uint_t)(dml_ceil(x: SwathWidth - 1, granularity: MacroTileHeight) + MacroTileHeight);
2552 }
2553
2554 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2555 *DPDE0BytesFrame = (dml_uint_t)(64 * (dml_ceil(x: (dml_float_t) (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / (dml_float_t) (8 * 2097152), granularity: 1) + 1));
2556 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2557 } else {
2558 *DPDE0BytesFrame = 0;
2559 ExtraDPDEBytesFrame = 0;
2560 }
2561
2562 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2563
2564#ifdef __DML_VBA_DEBUG__
2565 dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
2566 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
2567 dml_print("DML::%s: SwModeLinear = %u\n", __func__, SurfaceTiling == dml_sw_linear);
2568 dml_print("DML::%s: BytePerPixel = %u\n", __func__, BytePerPixel);
2569 dml_print("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, GPUVMMaxPageTableLevels);
2570 dml_print("DML::%s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
2571 dml_print("DML::%s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
2572 dml_print("DML::%s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
2573 dml_print("DML::%s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
2574 dml_print("DML::%s: MetaPTEBytesFrame = %u\n", __func__, *MetaPTEBytesFrame);
2575 dml_print("DML::%s: MPDEBytesFrame = %u\n", __func__, MPDEBytesFrame);
2576 dml_print("DML::%s: DPDE0BytesFrame = %u\n", __func__, *DPDE0BytesFrame);
2577 dml_print("DML::%s: ExtraDPDEBytesFrame= %u\n", __func__, ExtraDPDEBytesFrame);
2578 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, PDEAndMetaPTEBytesFrame);
2579 dml_print("DML::%s: ViewportHeight = %u\n", __func__, ViewportHeight);
2580 dml_print("DML::%s: SwathWidth = %u\n", __func__, SwathWidth);
2581 dml_print("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
2582#endif
2583
2584 if (SurfaceTiling == dml_sw_linear) {
2585 *PixelPTEReqHeight = 1;
2586 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2587 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2588 *PTERequestSize = 64;
2589 } else if (GPUVMMinPageSizeKBytes == 4) {
2590 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2591 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2592 *PTERequestSize = 128;
2593 } else {
2594 *PixelPTEReqHeight = MacroTileHeight;
2595 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2596 *PTERequestSize = 64;
2597 }
2598#ifdef __DML_VBA_DEBUG__
2599 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
2600 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2601 dml_print("DML::%s: PixelPTEReqHeight = %u\n", __func__, *PixelPTEReqHeight);
2602 dml_print("DML::%s: PixelPTEReqWidth = %u\n", __func__, *PixelPTEReqWidth);
2603 dml_print("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
2604 dml_print("DML::%s: PTERequestSize = %u\n", __func__, *PTERequestSize);
2605 dml_print("DML::%s: Pitch = %u\n", __func__, Pitch);
2606#endif
2607
2608 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2609 *dpte_row_width_ub_one_row_per_frame = (dml_uint_t)((dml_ceil(x: ((dml_float_t)Pitch * (dml_float_t) *dpte_row_height_one_row_per_frame / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, granularity: 1) + 1) * (dml_float_t) *PixelPTEReqWidth);
2610 *PixelPTEBytesPerRow_one_row_per_frame = (dml_uint_t)((dml_float_t) *dpte_row_width_ub_one_row_per_frame / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
2611
2612 if (SurfaceTiling == dml_sw_linear) {
2613 *dpte_row_height = (dml_uint_t)(dml_min(x: 128, y: 1 << (dml_uint_t) dml_floor(x: dml_log2(x: PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), granularity: 1)));
2614 dml_print("DML::%s: dpte_row_height term 1 = %u\n", __func__, PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2615 dml_print("DML::%s: dpte_row_height term 2 = %f\n", __func__, dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2616 dml_print("DML::%s: dpte_row_height term 3 = %f\n", __func__, dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2617 dml_print("DML::%s: dpte_row_height term 4 = %u\n", __func__, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2618 dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
2619
2620 *dpte_row_width_ub = (dml_uint_t)(dml_ceil(x: ((dml_float_t) Pitch * (dml_float_t) *dpte_row_height - 1), granularity: (dml_float_t) *PixelPTEReqWidth) + *PixelPTEReqWidth);
2621 *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
2622
2623 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2624 *dpte_row_height_linear = 1 << (dml_uint_t) dml_floor(x: dml_log2(x: PTEBufferSizeInRequests * PixelPTEReqWidth_linear / Pitch), granularity: 1);
2625 if (*dpte_row_height_linear > 128)
2626 *dpte_row_height_linear = 128;
2627
2628#ifdef __DML_VBA_DEBUG__
2629 dml_print("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *dpte_row_width_ub);
2630#endif
2631
2632 } else if (!dml_is_vertical_rotation(scan: SourceScan)) {
2633 *dpte_row_height = *PixelPTEReqHeight;
2634
2635 if (GPUVMMinPageSizeKBytes > 64) {
2636 *dpte_row_width_ub = (dml_uint_t)((dml_ceil(x: ((dml_float_t) Pitch * (dml_float_t) *dpte_row_height / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, granularity: 1) + 1) * *PixelPTEReqWidth);
2637 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2638 *dpte_row_width_ub = (dml_uint_t)(dml_floor(x: ViewportXStart + SwathWidth + *PixelPTEReqWidth - 1, granularity: *PixelPTEReqWidth) - dml_floor(x: ViewportXStart, granularity: *PixelPTEReqWidth));
2639 } else {
2640 *dpte_row_width_ub = (dml_uint_t)((dml_ceil(x: (dml_float_t) (SwathWidth - 1) / (dml_float_t)*PixelPTEReqWidth, granularity: 1) + 1.0) * *PixelPTEReqWidth);
2641 }
2642#ifdef __DML_VBA_DEBUG__
2643 dml_print("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *dpte_row_width_ub);
2644#endif
2645
2646 ASSERT(*PixelPTEReqWidth);
2647 if (*PixelPTEReqWidth != 0)
2648 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2649 } else {
2650 *dpte_row_height = (dml_uint_t)(dml_min(x: *PixelPTEReqWidth, y: MacroTileWidth));
2651
2652 if (ViewportStationary && (NumberOfDPPs == 1)) {
2653 *dpte_row_width_ub = (dml_uint_t)(dml_floor(x: ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, granularity: *PixelPTEReqHeight) - dml_floor(x: ViewportYStart, granularity: *PixelPTEReqHeight));
2654 } else {
2655 *dpte_row_width_ub = (dml_uint_t)((dml_ceil(x: (dml_float_t) (SwathWidth - 1) / (dml_float_t) *PixelPTEReqHeight, granularity: 1) + 1) * *PixelPTEReqHeight);
2656 }
2657
2658 *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqHeight * *PTERequestSize);
2659#ifdef __DML_VBA_DEBUG__
2660 dml_print("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *dpte_row_width_ub);
2661#endif
2662 }
2663
2664 if (GPUVMEnable != true)
2665 *PixelPTEBytesPerRow = 0;
2666
2667 *PixelPTEBytesPerRowStorage = *PixelPTEBytesPerRow;
2668
2669#ifdef __DML_VBA_DEBUG__
2670 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
2671 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
2672 dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
2673 dml_print("DML::%s: dpte_row_height_linear = %u\n", __func__, *dpte_row_height_linear);
2674 dml_print("DML::%s: dpte_row_width_ub = %u\n", __func__, *dpte_row_width_ub);
2675 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *PixelPTEBytesPerRow);
2676 dml_print("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *PixelPTEBytesPerRowStorage);
2677 dml_print("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
2678 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *dpte_row_height_one_row_per_frame);
2679 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *dpte_row_width_ub_one_row_per_frame);
2680 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2681#endif
2682
2683 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2684
2685 return PDEAndMetaPTEBytesFrame;
2686} // CalculateVMAndRowBytes
2687
2688static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported)
2689{
2690 dml_uint_t num_active_planes = dml_get_num_active_planes(display_cfg);
2691
2692 //Progressive To Interlace Unit Effect
2693 for (dml_uint_t k = 0; k < num_active_planes; ++k) {
2694 display_cfg->output.PixelClockBackEnd[k] = display_cfg->timing.PixelClock[k];
2695 if (display_cfg->timing.Interlace[k] == 1 && ptoi_supported == true) {
2696 display_cfg->timing.PixelClock[k] = 2 * display_cfg->timing.PixelClock[k];
2697 }
2698 }
2699}
2700
2701static dml_float_t TruncToValidBPP(
2702 dml_float_t LinkBitRate,
2703 dml_uint_t Lanes,
2704 dml_uint_t HTotal,
2705 dml_uint_t HActive,
2706 dml_float_t PixelClock,
2707 dml_float_t DesiredBPP,
2708 dml_bool_t DSCEnable,
2709 enum dml_output_encoder_class Output,
2710 enum dml_output_format_class Format,
2711 dml_uint_t DSCInputBitPerComponent,
2712 dml_uint_t DSCSlices,
2713 dml_uint_t AudioRate,
2714 dml_uint_t AudioLayout,
2715 enum dml_odm_mode ODMModeNoDSC,
2716 enum dml_odm_mode ODMModeDSC,
2717
2718 // Output
2719 dml_uint_t *RequiredSlots)
2720{
2721 dml_float_t MaxLinkBPP;
2722 dml_uint_t MinDSCBPP;
2723 dml_float_t MaxDSCBPP;
2724 dml_uint_t NonDSCBPP0;
2725 dml_uint_t NonDSCBPP1;
2726 dml_uint_t NonDSCBPP2;
2727
2728 if (Format == dml_420) {
2729 NonDSCBPP0 = 12;
2730 NonDSCBPP1 = 15;
2731 NonDSCBPP2 = 18;
2732 MinDSCBPP = 6;
2733 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
2734 } else if (Format == dml_444) {
2735 NonDSCBPP0 = 24;
2736 NonDSCBPP1 = 30;
2737 NonDSCBPP2 = 36;
2738 MinDSCBPP = 8;
2739 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
2740 } else {
2741 if (Output == dml_hdmi) {
2742 NonDSCBPP0 = 24;
2743 NonDSCBPP1 = 24;
2744 NonDSCBPP2 = 24;
2745 } else {
2746 NonDSCBPP0 = 16;
2747 NonDSCBPP1 = 20;
2748 NonDSCBPP2 = 24;
2749 }
2750 if (Format == dml_n422) {
2751 MinDSCBPP = 7;
2752 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
2753 } else {
2754 MinDSCBPP = 8;
2755 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
2756 }
2757 }
2758
2759 if (Output == dml_dp2p0) {
2760 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
2761 } else if (DSCEnable && Output == dml_dp) {
2762 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
2763 } else {
2764 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
2765 }
2766
2767 if (DSCEnable) {
2768 if (ODMModeDSC == dml_odm_mode_combine_4to1) {
2769 MaxLinkBPP = dml_min(x: MaxLinkBPP, y: 16);
2770 } else if (ODMModeDSC == dml_odm_mode_combine_2to1) {
2771 MaxLinkBPP = dml_min(x: MaxLinkBPP, y: 32);
2772 } else if (ODMModeDSC == dml_odm_mode_split_1to2) {
2773 MaxLinkBPP = 2 * MaxLinkBPP;
2774 }
2775 } else {
2776 if (ODMModeNoDSC == dml_odm_mode_combine_4to1) {
2777 MaxLinkBPP = dml_min(x: MaxLinkBPP, y: 16);
2778 } else if (ODMModeNoDSC == dml_odm_mode_combine_2to1) {
2779 MaxLinkBPP = dml_min(x: MaxLinkBPP, y: 32);
2780 } else if (ODMModeNoDSC == dml_odm_mode_split_1to2) {
2781 MaxLinkBPP = 2 * MaxLinkBPP;
2782 }
2783 }
2784
2785 if (DesiredBPP == 0) {
2786 if (DSCEnable) {
2787 if (MaxLinkBPP < MinDSCBPP) {
2788 return __DML_DPP_INVALID__;
2789 } else if (MaxLinkBPP >= MaxDSCBPP) {
2790 return MaxDSCBPP;
2791 } else {
2792 return dml_floor(x: 16.0 * MaxLinkBPP, granularity: 1.0) / 16.0;
2793 }
2794 } else {
2795 if (MaxLinkBPP >= NonDSCBPP2) {
2796 return NonDSCBPP2;
2797 } else if (MaxLinkBPP >= NonDSCBPP1) {
2798 return NonDSCBPP1;
2799 } else if (MaxLinkBPP >= NonDSCBPP0) {
2800 return NonDSCBPP0;
2801 } else {
2802 return __DML_DPP_INVALID__;
2803 }
2804 }
2805 } else {
2806 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
2807 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
2808 return __DML_DPP_INVALID__;
2809 } else {
2810 return DesiredBPP;
2811 }
2812 }
2813
2814 *RequiredSlots = (dml_uint_t)(dml_ceil(x: DesiredBPP / MaxLinkBPP * 64, granularity: 1));
2815
2816 return __DML_DPP_INVALID__;
2817} // TruncToValidBPP
2818
2819static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
2820 struct display_mode_lib_scratch_st *scratch,
2821 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p)
2822{
2823 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
2824
2825 s->TotalActiveWriteback = 0;
2826 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
2827 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
2828 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
2829 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
2830 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2831 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2832 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2833 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2834
2835#ifdef __DML_VBA_DEBUG__
2836 dml_print("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
2837 dml_print("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
2838 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
2839 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
2840 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
2841 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
2842 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
2843 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
2844 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
2845 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
2846 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
2847#endif
2848
2849 s->TotalActiveWriteback = 0;
2850 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2851 if (p->WritebackEnable[k] == true) {
2852 s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
2853 }
2854 }
2855
2856 if (s->TotalActiveWriteback <= 1) {
2857 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
2858 } else {
2859 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
2860 }
2861 if (p->USRRetrainingRequiredFinal)
2862 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
2863
2864 if (s->TotalActiveWriteback <= 1) {
2865 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
2866 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
2867 } else {
2868 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
2869 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
2870 }
2871
2872 if (p->USRRetrainingRequiredFinal)
2873 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
2874
2875 if (p->USRRetrainingRequiredFinal)
2876 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
2877
2878#ifdef __DML_VBA_DEBUG__
2879 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
2880 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
2881 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
2882 dml_print("DML::%s: USRRetrainingRequiredFinal = %u\n", __func__, p->USRRetrainingRequiredFinal);
2883 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
2884#endif
2885
2886 s->TotalPixelBW = 0.0;
2887 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2888 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
2889 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k] + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * p->VRatioChroma[k]) / (p->HTotal[k] / p->PixelClock[k]);
2890 }
2891
2892 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2893
2894 s->LBLatencyHidingSourceLinesY[k] = (dml_uint_t)(dml_min(x: (dml_float_t)p->MaxLineBufferLines, y: dml_floor(x: (dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthY[k] / dml_max(x: p->HRatio[k], y: 1.0)), granularity: 1)) - (p->VTaps[k] - 1));
2895 s->LBLatencyHidingSourceLinesC[k] = (dml_uint_t)(dml_min(x: (dml_float_t)p->MaxLineBufferLines, y: dml_floor(x: (dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthC[k] / dml_max(x: p->HRatioChroma[k], y: 1.0)), granularity: 1)) - (p->VTapsChroma[k] - 1));
2896
2897
2898#ifdef __DML_VBA_DEBUG__
2899 dml_print("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
2900 dml_print("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
2901 dml_print("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, p->LBBitPerPixel[k]);
2902 dml_print("DML::%s: k=%u, HRatio = %f\n", __func__, k, p->HRatio[k]);
2903 dml_print("DML::%s: k=%u, VTaps = %u\n", __func__, k, p->VTaps[k]);
2904#endif
2905
2906 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / p->VRatio[k] * (p->HTotal[k] / p->PixelClock[k]);
2907 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / p->VRatioChroma[k] * (p->HTotal[k] / p->PixelClock[k]);
2908
2909 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
2910 if (p->UnboundedRequestEnabled) {
2911 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k]) / (p->HTotal[k] / p->PixelClock[k]) / s->TotalPixelBW;
2912 }
2913
2914 s->LinesInDETY[k] = (dml_float_t)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
2915 s->LinesInDETYRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(x: s->LinesInDETY[k], granularity: p->SwathHeightY[k]));
2916 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
2917
2918 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
2919
2920 if (p->NumberOfActiveSurfaces > 1) {
2921 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightY[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatio[k];
2922 }
2923
2924 if (p->BytePerPixelDETC[k] > 0) {
2925 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
2926 s->LinesInDETCRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(x: s->LinesInDETC[k], granularity: p->SwathHeightC[k]));
2927 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatioChroma[k];
2928 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
2929 if (p->NumberOfActiveSurfaces > 1) {
2930 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightC[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatioChroma[k];
2931 }
2932 s->ActiveClockChangeLatencyHiding = dml_min(x: s->ActiveClockChangeLatencyHidingY, y: s->ActiveClockChangeLatencyHidingC);
2933 } else {
2934 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
2935 }
2936
2937 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->DRAMClockChangeWatermark;
2938 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->FCLKChangeWatermark;
2939 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
2940
2941 if (p->WritebackEnable[k]) {
2942 s->WritebackLatencyHiding = (dml_float_t)p->WritebackInterfaceBufferSize * 1024.0 / ((dml_float_t)p->WritebackDestinationWidth[k] * (dml_float_t)p->WritebackDestinationHeight[k] / ((dml_float_t)p->WritebackSourceHeight[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k]) * 4.0);
2943 if (p->WritebackPixelFormat[k] == dml_444_64) {
2944 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
2945 }
2946 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
2947
2948 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
2949
2950 s->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(x: s->ActiveDRAMClockChangeLatencyMargin[k], y: s->WritebackFCLKChangeLatencyMargin);
2951 s->ActiveFCLKChangeLatencyMargin[k] = dml_min(x: s->ActiveFCLKChangeLatencyMargin[k], y: s->WritebackDRAMClockChangeLatencyMargin);
2952 }
2953 p->MaxActiveDRAMClockChangeLatencySupported[k] = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
2954 p->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
2955 }
2956
2957 *p->USRRetrainingSupport = true;
2958 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2959 if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->USRRetrainingLatencyMargin[k] < 0)) {
2960 *p->USRRetrainingSupport = false;
2961 }
2962 }
2963
2964 s->FoundCriticalSurface = false;
2965 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2966 if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && ((!s->FoundCriticalSurface)
2967 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
2968 s->FoundCriticalSurface = true;
2969 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
2970 }
2971 }
2972
2973 for (dml_uint_t i = 0; i < p->NumberOfActiveSurfaces; ++i) {
2974 for (dml_uint_t j = 0; j < p->NumberOfActiveSurfaces; ++j) {
2975 if (i == j ||
2976 (p->BlendingAndTiming[i] == i && p->BlendingAndTiming[j] == i) ||
2977 (p->BlendingAndTiming[j] == j && p->BlendingAndTiming[i] == j) ||
2978 (p->BlendingAndTiming[i] == p->BlendingAndTiming[j] && p->BlendingAndTiming[i] != i) ||
2979 (p->SynchronizeTimingsFinal && p->PixelClock[i] == p->PixelClock[j] && p->HTotal[i] == p->HTotal[j] && p->VTotal[i] == p->VTotal[j] && p->VActive[i] == p->VActive[j]) ||
2980 (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (p->DRRDisplay[i] || p->DRRDisplay[j]))) {
2981 s->SynchronizedSurfaces[i][j] = true;
2982 } else {
2983 s->SynchronizedSurfaces[i][j] = false;
2984 }
2985 }
2986 }
2987
2988 s->FCLKChangeSupportNumber = 0;
2989 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2990 if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->ActiveFCLKChangeLatencyMargin[k] < 0)) {
2991 if (!(p->PrefetchMode[k] <= 1)) {
2992 s->FCLKChangeSupportNumber = 3;
2993 } else if (s->FCLKChangeSupportNumber == 0) {
2994 s->FCLKChangeSupportNumber = ((p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1);
2995 s->LastSurfaceWithoutMargin = k;
2996 } else if (((s->FCLKChangeSupportNumber == 1) && (p->DRRDisplay[k] || (!s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k]))) || (s->FCLKChangeSupportNumber == 2))
2997 s->FCLKChangeSupportNumber = 3;
2998 }
2999 }
3000
3001 if (s->FCLKChangeSupportNumber == 0) {
3002 *p->FCLKChangeSupport = dml_fclock_change_vactive;
3003 } else if ((s->FCLKChangeSupportNumber == 1) || (s->FCLKChangeSupportNumber == 2)) {
3004 *p->FCLKChangeSupport = dml_fclock_change_vblank;
3005 } else {
3006 *p->FCLKChangeSupport = dml_fclock_change_unsupported;
3007 }
3008
3009 s->DRAMClockChangeMethod = 0;
3010 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3011 if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
3012 s->DRAMClockChangeMethod = 1;
3013 else if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport)
3014 s->DRAMClockChangeMethod = 2;
3015 }
3016
3017 s->DRAMClockChangeSupportNumber = 0;
3018 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3019 if (((s->DRAMClockChangeMethod == 0) && (s->ActiveDRAMClockChangeLatencyMargin[k] < 0)) ||
3020 ((s->DRAMClockChangeMethod == 1) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_full_frame)) ||
3021 ((s->DRAMClockChangeMethod == 2) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_sub_viewport) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe))) {
3022 if (p->PrefetchMode[k] != 0) { // Don't need to support DRAM clock change, PrefetchMode 0 means needs DRAM clock change support
3023 s->DRAMClockChangeSupportNumber = 3;
3024 } else if (s->DRAMClockChangeSupportNumber == 0) {
3025 s->DRAMClockChangeSupportNumber = (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1;
3026 s->LastSurfaceWithoutMargin = k;
3027 } else if (((s->DRAMClockChangeSupportNumber == 1) && (p->DRRDisplay[k] || !s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k])) || (s->DRAMClockChangeSupportNumber == 2)) {
3028 s->DRAMClockChangeSupportNumber = 3;
3029 }
3030 }
3031 }
3032
3033 if (s->DRAMClockChangeMethod == 0) { // No MALL usage
3034 if (s->DRAMClockChangeSupportNumber == 0) {
3035 *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive;
3036 } else if (s->DRAMClockChangeSupportNumber == 1) {
3037 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank;
3038 } else if (s->DRAMClockChangeSupportNumber == 2) {
3039 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr;
3040 } else {
3041 *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
3042 }
3043 } else if (s->DRAMClockChangeMethod == 1) { // Any pipe using MALL full frame
3044 if (s->DRAMClockChangeSupportNumber == 0) {
3045 *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_full_frame;
3046 } else if (s->DRAMClockChangeSupportNumber == 1) {
3047 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_full_frame;
3048 } else if (s->DRAMClockChangeSupportNumber == 2) {
3049 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_full_frame;
3050 } else {
3051 *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
3052 }
3053 } else { // Any pipe using MALL subviewport
3054 if (s->DRAMClockChangeSupportNumber == 0) {
3055 *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_sub_vp;
3056 } else if (s->DRAMClockChangeSupportNumber == 1) {
3057 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_sub_vp;
3058 } else if (s->DRAMClockChangeSupportNumber == 2) {
3059 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_sub_vp;
3060 } else {
3061 *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
3062 }
3063 }
3064
3065 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3066 s->dst_y_pstate = (dml_uint_t)(dml_ceil(x: (p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (p->HTotal[k] / p->PixelClock[k]), granularity: 1));
3067 s->src_y_pstate_l = (dml_uint_t)(dml_ceil(x: s->dst_y_pstate * p->VRatio[k], granularity: p->SwathHeightY[k]));
3068 s->src_y_ahead_l = (dml_uint_t)(dml_floor(x: p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], granularity: p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
3069 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height[k];
3070
3071#ifdef __DML_VBA_DEBUG__
3072 dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3073 dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
3074 dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
3075 dml_print("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
3076 dml_print("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
3077 dml_print("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
3078 dml_print("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
3079 dml_print("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
3080 dml_print("DML::%s: k=%u, meta_row_height = %u\n", __func__, k, p->meta_row_height[k]);
3081 dml_print("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
3082#endif
3083 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
3084
3085 if (p->BytePerPixelDETC[k] > 0) {
3086 s->src_y_pstate_c = (dml_uint_t)(dml_ceil(x: s->dst_y_pstate * p->VRatioChroma[k], granularity: p->SwathHeightC[k]));
3087 s->src_y_ahead_c = (dml_uint_t)(dml_floor(x: p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], granularity: p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
3088 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_chroma[k];
3089 p->SubViewportLinesNeededInMALL[k] = (dml_uint_t)(dml_max(x: s->sub_vp_lines_l, y: s->sub_vp_lines_c));
3090
3091#ifdef __DML_VBA_DEBUG__
3092 dml_print("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
3093 dml_print("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
3094 dml_print("DML::%s: k=%u, meta_row_height_chroma = %u\n", __func__, k, p->meta_row_height_chroma[k]);
3095 dml_print("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
3096#endif
3097 }
3098 }
3099
3100#ifdef __DML_VBA_DEBUG__
3101 dml_print("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->DRAMClockChangeSupport);
3102 dml_print("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->FCLKChangeSupport);
3103 dml_print("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
3104 dml_print("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
3105#endif
3106} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
3107
3108static void CalculateDCFCLKDeepSleep(
3109 dml_uint_t NumberOfActiveSurfaces,
3110 dml_uint_t BytePerPixelY[],
3111 dml_uint_t BytePerPixelC[],
3112 dml_float_t VRatio[],
3113 dml_float_t VRatioChroma[],
3114 dml_uint_t SwathWidthY[],
3115 dml_uint_t SwathWidthC[],
3116 dml_uint_t DPPPerSurface[],
3117 dml_float_t HRatio[],
3118 dml_float_t HRatioChroma[],
3119 dml_float_t PixelClock[],
3120 dml_float_t PSCL_THROUGHPUT[],
3121 dml_float_t PSCL_THROUGHPUT_CHROMA[],
3122 dml_float_t Dppclk[],
3123 dml_float_t ReadBandwidthLuma[],
3124 dml_float_t ReadBandwidthChroma[],
3125 dml_uint_t ReturnBusWidth,
3126
3127 // Output
3128 dml_float_t *DCFClkDeepSleep)
3129{
3130 dml_float_t DisplayPipeLineDeliveryTimeLuma;
3131 dml_float_t DisplayPipeLineDeliveryTimeChroma;
3132 dml_float_t DCFClkDeepSleepPerSurface[__DML_NUM_PLANES__];
3133 dml_float_t ReadBandwidth = 0.0;
3134
3135 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3136
3137 if (VRatio[k] <= 1) {
3138 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
3139 } else {
3140 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3141 }
3142 if (BytePerPixelC[k] == 0) {
3143 DisplayPipeLineDeliveryTimeChroma = 0;
3144 } else {
3145 if (VRatioChroma[k] <= 1) {
3146 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
3147 } else {
3148 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3149 }
3150 }
3151
3152 if (BytePerPixelC[k] > 0) {
3153 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
3154 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
3155 } else {
3156 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
3157 }
3158 DCFClkDeepSleepPerSurface[k] = dml_max(x: DCFClkDeepSleepPerSurface[k], y: PixelClock[k] / 16);
3159
3160#ifdef __DML_VBA_DEBUG__
3161 dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, PixelClock[k]);
3162 dml_print("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3163#endif
3164 }
3165
3166 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3167 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
3168 }
3169
3170 *DCFClkDeepSleep = dml_max(x: 8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (dml_float_t) ReturnBusWidth);
3171
3172#ifdef __DML_VBA_DEBUG__
3173 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
3174 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
3175 dml_print("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
3176 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
3177#endif
3178
3179 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3180 *DCFClkDeepSleep = dml_max(x: *DCFClkDeepSleep, y: DCFClkDeepSleepPerSurface[k]);
3181 }
3182 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
3183} // CalculateDCFCLKDeepSleep
3184
3185static void CalculateUrgentBurstFactor(
3186 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3187 dml_uint_t swath_width_luma_ub,
3188 dml_uint_t swath_width_chroma_ub,
3189 dml_uint_t SwathHeightY,
3190 dml_uint_t SwathHeightC,
3191 dml_float_t LineTime,
3192 dml_float_t UrgentLatency,
3193 dml_float_t CursorBufferSize,
3194 dml_uint_t CursorWidth,
3195 dml_uint_t CursorBPP,
3196 dml_float_t VRatio,
3197 dml_float_t VRatioC,
3198 dml_float_t BytePerPixelInDETY,
3199 dml_float_t BytePerPixelInDETC,
3200 dml_uint_t DETBufferSizeY,
3201 dml_uint_t DETBufferSizeC,
3202 // Output
3203 dml_float_t *UrgentBurstFactorCursor,
3204 dml_float_t *UrgentBurstFactorLuma,
3205 dml_float_t *UrgentBurstFactorChroma,
3206 dml_bool_t *NotEnoughUrgentLatencyHiding)
3207{
3208 dml_float_t LinesInDETLuma;
3209 dml_float_t LinesInDETChroma;
3210 dml_uint_t LinesInCursorBuffer;
3211 dml_float_t CursorBufferSizeInTime;
3212 dml_float_t DETBufferSizeInTimeLuma;
3213 dml_float_t DETBufferSizeInTimeChroma;
3214
3215 *NotEnoughUrgentLatencyHiding = 0;
3216
3217 if (CursorWidth > 0) {
3218 LinesInCursorBuffer = 1 << (dml_uint_t) dml_floor(x: dml_log2(x: CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), granularity: 1.0);
3219 if (VRatio > 0) {
3220 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
3221 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
3222 *NotEnoughUrgentLatencyHiding = 1;
3223 *UrgentBurstFactorCursor = 0;
3224 } else {
3225 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
3226 }
3227 } else {
3228 *UrgentBurstFactorCursor = 1;
3229 }
3230 }
3231
3232 LinesInDETLuma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
3233
3234 if (VRatio > 0) {
3235 DETBufferSizeInTimeLuma = dml_floor(x: LinesInDETLuma, granularity: SwathHeightY) * LineTime / VRatio;
3236 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
3237 *NotEnoughUrgentLatencyHiding = 1;
3238 *UrgentBurstFactorLuma = 0;
3239 } else {
3240 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
3241 }
3242 } else {
3243 *UrgentBurstFactorLuma = 1;
3244 }
3245
3246 if (BytePerPixelInDETC > 0) {
3247 LinesInDETChroma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
3248
3249 if (VRatioC > 0) {
3250 DETBufferSizeInTimeChroma = dml_floor(x: LinesInDETChroma, granularity: SwathHeightC) * LineTime / VRatioC;
3251 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
3252 *NotEnoughUrgentLatencyHiding = 1;
3253 *UrgentBurstFactorChroma = 0;
3254 } else {
3255 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
3256 }
3257 } else {
3258 *UrgentBurstFactorChroma = 1;
3259 }
3260 }
3261} // CalculateUrgentBurstFactor
3262
3263static void CalculatePixelDeliveryTimes(
3264 dml_uint_t NumberOfActiveSurfaces,
3265 dml_float_t VRatio[],
3266 dml_float_t VRatioChroma[],
3267 dml_float_t VRatioPrefetchY[],
3268 dml_float_t VRatioPrefetchC[],
3269 dml_uint_t swath_width_luma_ub[],
3270 dml_uint_t swath_width_chroma_ub[],
3271 dml_uint_t DPPPerSurface[],
3272 dml_float_t HRatio[],
3273 dml_float_t HRatioChroma[],
3274 dml_float_t PixelClock[],
3275 dml_float_t PSCL_THROUGHPUT[],
3276 dml_float_t PSCL_THROUGHPUT_CHROMA[],
3277 dml_float_t Dppclk[],
3278 dml_uint_t BytePerPixelC[],
3279 enum dml_rotation_angle SourceScan[],
3280 dml_uint_t NumberOfCursors[],
3281 dml_uint_t CursorWidth[],
3282 dml_uint_t CursorBPP[],
3283 dml_uint_t BlockWidth256BytesY[],
3284 dml_uint_t BlockHeight256BytesY[],
3285 dml_uint_t BlockWidth256BytesC[],
3286 dml_uint_t BlockHeight256BytesC[],
3287
3288 // Output
3289 dml_float_t DisplayPipeLineDeliveryTimeLuma[],
3290 dml_float_t DisplayPipeLineDeliveryTimeChroma[],
3291 dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
3292 dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
3293 dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
3294 dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
3295 dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
3296 dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
3297 dml_float_t CursorRequestDeliveryTime[],
3298 dml_float_t CursorRequestDeliveryTimePrefetch[])
3299{
3300 dml_float_t req_per_swath_ub;
3301
3302 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3303
3304#ifdef __DML_VBA_DEBUG__
3305 dml_print("DML::%s: k=%u : HRatio = %f\n", __func__, k, HRatio[k]);
3306 dml_print("DML::%s: k=%u : VRatio = %f\n", __func__, k, VRatio[k]);
3307 dml_print("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
3308 dml_print("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
3309 dml_print("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
3310 dml_print("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
3311 dml_print("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
3312 dml_print("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
3313 dml_print("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
3314 dml_print("DML::%s: k=%u : PixelClock = %f\n", __func__, k, PixelClock[k]);
3315 dml_print("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
3316#endif
3317
3318 if (VRatio[k] <= 1) {
3319 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
3320 } else {
3321 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3322 }
3323
3324 if (BytePerPixelC[k] == 0) {
3325 DisplayPipeLineDeliveryTimeChroma[k] = 0;
3326 } else {
3327 if (VRatioChroma[k] <= 1) {
3328 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
3329 } else {
3330 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3331 }
3332 }
3333
3334 if (VRatioPrefetchY[k] <= 1) {
3335 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
3336 } else {
3337 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3338 }
3339
3340 if (BytePerPixelC[k] == 0) {
3341 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
3342 } else {
3343 if (VRatioPrefetchC[k] <= 1) {
3344 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
3345 } else {
3346 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3347 }
3348 }
3349#ifdef __DML_VBA_DEBUG__
3350 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
3351 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
3352 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
3353 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
3354#endif
3355 }
3356
3357 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3358 if (!dml_is_vertical_rotation(scan: SourceScan[k])) {
3359 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
3360 } else {
3361 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
3362 }
3363#ifdef __DML_VBA_DEBUG__
3364 dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
3365#endif
3366
3367 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
3368 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
3369 if (BytePerPixelC[k] == 0) {
3370 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
3371 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
3372 } else {
3373 if (!dml_is_vertical_rotation(scan: SourceScan[k])) {
3374 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
3375 } else {
3376 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
3377 }
3378#ifdef __DML_VBA_DEBUG__
3379 dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
3380#endif
3381 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
3382 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
3383 }
3384#ifdef __DML_VBA_DEBUG__
3385 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
3386 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
3387 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
3388 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
3389#endif
3390 }
3391
3392 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3393 dml_uint_t cursor_req_per_width;
3394 cursor_req_per_width = (dml_uint_t)(dml_ceil(x: (dml_float_t) CursorWidth[k] * (dml_float_t) CursorBPP[k] / 256.0 / 8.0, granularity: 1.0));
3395 if (NumberOfCursors[k] > 0) {
3396 if (VRatio[k] <= 1) {
3397 CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
3398 } else {
3399 CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
3400 }
3401 if (VRatioPrefetchY[k] <= 1) {
3402 CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
3403 } else {
3404 CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
3405 }
3406 } else {
3407 CursorRequestDeliveryTime[k] = 0;
3408 CursorRequestDeliveryTimePrefetch[k] = 0;
3409 }
3410#ifdef __DML_VBA_DEBUG__
3411 dml_print("DML::%s: k=%u : NumberOfCursors = %u\n", __func__, k, NumberOfCursors[k]);
3412 dml_print("DML::%s: k=%u : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
3413 dml_print("DML::%s: k=%u : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
3414#endif
3415 }
3416} // CalculatePixelDeliveryTimes
3417
3418static void CalculateMetaAndPTETimes(
3419 dml_bool_t use_one_row_for_frame[],
3420 dml_uint_t NumberOfActiveSurfaces,
3421 dml_bool_t GPUVMEnable,
3422 dml_uint_t MetaChunkSize,
3423 dml_uint_t MinMetaChunkSizeBytes,
3424 dml_uint_t HTotal[],
3425 dml_float_t VRatio[],
3426 dml_float_t VRatioChroma[],
3427 dml_float_t DestinationLinesToRequestRowInVBlank[],
3428 dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
3429 dml_bool_t DCCEnable[],
3430 dml_float_t PixelClock[],
3431 dml_uint_t BytePerPixelY[],
3432 dml_uint_t BytePerPixelC[],
3433 enum dml_rotation_angle SourceScan[],
3434 dml_uint_t dpte_row_height[],
3435 dml_uint_t dpte_row_height_chroma[],
3436 dml_uint_t meta_row_width[],
3437 dml_uint_t meta_row_width_chroma[],
3438 dml_uint_t meta_row_height[],
3439 dml_uint_t meta_row_height_chroma[],
3440 dml_uint_t meta_req_width[],
3441 dml_uint_t meta_req_width_chroma[],
3442 dml_uint_t meta_req_height[],
3443 dml_uint_t meta_req_height_chroma[],
3444 dml_uint_t dpte_group_bytes[],
3445 dml_uint_t PTERequestSizeY[],
3446 dml_uint_t PTERequestSizeC[],
3447 dml_uint_t PixelPTEReqWidthY[],
3448 dml_uint_t PixelPTEReqHeightY[],
3449 dml_uint_t PixelPTEReqWidthC[],
3450 dml_uint_t PixelPTEReqHeightC[],
3451 dml_uint_t dpte_row_width_luma_ub[],
3452 dml_uint_t dpte_row_width_chroma_ub[],
3453
3454 // Output
3455 dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
3456 dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
3457 dml_float_t DST_Y_PER_META_ROW_NOM_L[],
3458 dml_float_t DST_Y_PER_META_ROW_NOM_C[],
3459 dml_float_t TimePerMetaChunkNominal[],
3460 dml_float_t TimePerChromaMetaChunkNominal[],
3461 dml_float_t TimePerMetaChunkVBlank[],
3462 dml_float_t TimePerChromaMetaChunkVBlank[],
3463 dml_float_t TimePerMetaChunkFlip[],
3464 dml_float_t TimePerChromaMetaChunkFlip[],
3465 dml_float_t time_per_pte_group_nom_luma[],
3466 dml_float_t time_per_pte_group_vblank_luma[],
3467 dml_float_t time_per_pte_group_flip_luma[],
3468 dml_float_t time_per_pte_group_nom_chroma[],
3469 dml_float_t time_per_pte_group_vblank_chroma[],
3470 dml_float_t time_per_pte_group_flip_chroma[])
3471{
3472 dml_uint_t meta_chunk_width;
3473 dml_uint_t min_meta_chunk_width;
3474 dml_uint_t meta_chunk_per_row_int;
3475 dml_uint_t meta_row_remainder;
3476 dml_uint_t meta_chunk_threshold;
3477 dml_uint_t meta_chunks_per_row_ub;
3478 dml_uint_t meta_chunk_width_chroma;
3479 dml_uint_t min_meta_chunk_width_chroma;
3480 dml_uint_t meta_chunk_per_row_int_chroma;
3481 dml_uint_t meta_row_remainder_chroma;
3482 dml_uint_t meta_chunk_threshold_chroma;
3483 dml_uint_t meta_chunks_per_row_ub_chroma;
3484 dml_uint_t dpte_group_width_luma;
3485 dml_uint_t dpte_groups_per_row_luma_ub;
3486 dml_uint_t dpte_group_width_chroma;
3487 dml_uint_t dpte_groups_per_row_chroma_ub;
3488
3489 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3490 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
3491 if (BytePerPixelC[k] == 0) {
3492 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
3493 } else {
3494 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
3495 }
3496 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
3497 if (BytePerPixelC[k] == 0) {
3498 DST_Y_PER_META_ROW_NOM_C[k] = 0;
3499 } else {
3500 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
3501 }
3502 }
3503
3504 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3505 if (DCCEnable[k] == true) {
3506 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
3507 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
3508 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
3509 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
3510 if (!dml_is_vertical_rotation(scan: SourceScan[k])) {
3511 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
3512 } else {
3513 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
3514 }
3515 if (meta_row_remainder <= meta_chunk_threshold) {
3516 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
3517 } else {
3518 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
3519 }
3520 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
3521 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
3522 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
3523 if (BytePerPixelC[k] == 0) {
3524 TimePerChromaMetaChunkNominal[k] = 0;
3525 TimePerChromaMetaChunkVBlank[k] = 0;
3526 TimePerChromaMetaChunkFlip[k] = 0;
3527 } else {
3528 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
3529 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
3530 meta_chunk_per_row_int_chroma = (dml_uint_t)((dml_float_t) meta_row_width_chroma[k] / meta_chunk_width_chroma);
3531 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
3532 if (!dml_is_vertical_rotation(scan: SourceScan[k])) {
3533 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
3534 } else {
3535 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
3536 }
3537 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
3538 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
3539 } else {
3540 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
3541 }
3542 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
3543 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
3544 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
3545 }
3546 } else {
3547 TimePerMetaChunkNominal[k] = 0;
3548 TimePerMetaChunkVBlank[k] = 0;
3549 TimePerMetaChunkFlip[k] = 0;
3550 TimePerChromaMetaChunkNominal[k] = 0;
3551 TimePerChromaMetaChunkVBlank[k] = 0;
3552 TimePerChromaMetaChunkFlip[k] = 0;
3553 }
3554 }
3555
3556 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3557 if (GPUVMEnable == true) {
3558 if (!dml_is_vertical_rotation(scan: SourceScan[k])) {
3559 dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqWidthY[k]);
3560 } else {
3561 dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqHeightY[k]);
3562 }
3563
3564 if (use_one_row_for_frame[k]) {
3565 dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil(x: (dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma / 2.0, granularity: 1.0));
3566 } else {
3567 dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil(x: (dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma, granularity: 1.0));
3568 }
3569
3570 dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, use_one_row_for_frame[k]);
3571 dml_print("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, dpte_group_bytes[k]);
3572 dml_print("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, PTERequestSizeY[k]);
3573 dml_print("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, PixelPTEReqWidthY[k]);
3574 dml_print("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, PixelPTEReqHeightY[k]);
3575 dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, dpte_row_width_luma_ub[k]);
3576 dml_print("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
3577 dml_print("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
3578
3579 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
3580 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
3581 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
3582 if (BytePerPixelC[k] == 0) {
3583 time_per_pte_group_nom_chroma[k] = 0;
3584 time_per_pte_group_vblank_chroma[k] = 0;
3585 time_per_pte_group_flip_chroma[k] = 0;
3586 } else {
3587 if (!dml_is_vertical_rotation(scan: SourceScan[k])) {
3588 dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqWidthC[k]);
3589 } else {
3590 dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqHeightC[k]);
3591 }
3592
3593 if (use_one_row_for_frame[k]) {
3594 dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil(x: (dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma / 2.0, granularity: 1.0));
3595 } else {
3596 dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil(x: (dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma, granularity: 1.0));
3597 }
3598 dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, dpte_row_width_chroma_ub[k]);
3599 dml_print("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
3600 dml_print("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
3601
3602 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
3603 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
3604 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
3605 }
3606 } else {
3607 time_per_pte_group_nom_luma[k] = 0;
3608 time_per_pte_group_vblank_luma[k] = 0;
3609 time_per_pte_group_flip_luma[k] = 0;
3610 time_per_pte_group_nom_chroma[k] = 0;
3611 time_per_pte_group_vblank_chroma[k] = 0;
3612 time_per_pte_group_flip_chroma[k] = 0;
3613 }
3614#ifdef __DML_VBA_DEBUG__
3615 dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInVBlank = %f\n", __func__, k, DestinationLinesToRequestRowInVBlank[k]);
3616 dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
3617
3618 dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
3619 dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
3620 dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
3621 dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
3622 dml_print("DML::%s: k=%u, TimePerMetaChunkNominal = %f\n", __func__, k, TimePerMetaChunkNominal[k]);
3623 dml_print("DML::%s: k=%u, TimePerMetaChunkVBlank = %f\n", __func__, k, TimePerMetaChunkVBlank[k]);
3624 dml_print("DML::%s: k=%u, TimePerMetaChunkFlip = %f\n", __func__, k, TimePerMetaChunkFlip[k]);
3625 dml_print("DML::%s: k=%u, TimePerChromaMetaChunkNominal = %f\n", __func__, k, TimePerChromaMetaChunkNominal[k]);
3626 dml_print("DML::%s: k=%u, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, TimePerChromaMetaChunkVBlank[k]);
3627 dml_print("DML::%s: k=%u, TimePerChromaMetaChunkFlip = %f\n", __func__, k, TimePerChromaMetaChunkFlip[k]);
3628 dml_print("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, time_per_pte_group_nom_luma[k]);
3629 dml_print("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, time_per_pte_group_vblank_luma[k]);
3630 dml_print("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, time_per_pte_group_flip_luma[k]);
3631 dml_print("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, time_per_pte_group_nom_chroma[k]);
3632 dml_print("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, time_per_pte_group_vblank_chroma[k]);
3633 dml_print("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, time_per_pte_group_flip_chroma[k]);
3634#endif
3635 }
3636} // CalculateMetaAndPTETimes
3637
3638static void CalculateVMGroupAndRequestTimes(
3639 dml_uint_t NumberOfActiveSurfaces,
3640 dml_bool_t GPUVMEnable,
3641 dml_uint_t GPUVMMaxPageTableLevels,
3642 dml_uint_t HTotal[],
3643 dml_uint_t BytePerPixelC[],
3644 dml_float_t DestinationLinesToRequestVMInVBlank[],
3645 dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
3646 dml_bool_t DCCEnable[],
3647 dml_float_t PixelClock[],
3648 dml_uint_t dpte_row_width_luma_ub[],
3649 dml_uint_t dpte_row_width_chroma_ub[],
3650 dml_uint_t vm_group_bytes[],
3651 dml_uint_t dpde0_bytes_per_frame_ub_l[],
3652 dml_uint_t dpde0_bytes_per_frame_ub_c[],
3653 dml_uint_t meta_pte_bytes_per_frame_ub_l[],
3654 dml_uint_t meta_pte_bytes_per_frame_ub_c[],
3655
3656 // Output
3657 dml_float_t TimePerVMGroupVBlank[],
3658 dml_float_t TimePerVMGroupFlip[],
3659 dml_float_t TimePerVMRequestVBlank[],
3660 dml_float_t TimePerVMRequestFlip[])
3661{
3662 dml_uint_t num_group_per_lower_vm_stage;
3663 dml_uint_t num_req_per_lower_vm_stage;
3664
3665#ifdef __DML_VBA_DEBUG__
3666 dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
3667 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
3668#endif
3669 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3670
3671#ifdef __DML_VBA_DEBUG__
3672 dml_print("DML::%s: k=%u, DCCEnable = %u\n", __func__, k, DCCEnable[k]);
3673 dml_print("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
3674 dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
3675 dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
3676 dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_l = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
3677 dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_c = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
3678#endif
3679
3680 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
3681 if (DCCEnable[k] == false) {
3682 if (BytePerPixelC[k] > 0) {
3683 num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil(x: (dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], granularity: 1.0) +
3684 dml_ceil(x: (dml_float_t) dpde0_bytes_per_frame_ub_c[k] / (dml_float_t) vm_group_bytes[k], granularity: 1.0));
3685 } else {
3686 num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil(x: (dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], granularity: 1.0));
3687 }
3688 } else {
3689 if (GPUVMMaxPageTableLevels == 1) {
3690 if (BytePerPixelC[k] > 0) {
3691 num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil(x: (dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), granularity: 1.0) +
3692 dml_ceil(x: (dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), granularity: 1.0));
3693 } else {
3694 num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil(x: (dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), granularity: 1.0));
3695 }
3696 } else {
3697 if (BytePerPixelC[k] > 0) {
3698 num_group_per_lower_vm_stage = (dml_uint_t)(2.0 + dml_ceil(x: (dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), granularity: 1) +
3699 dml_ceil(x: (dml_float_t) (dpde0_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), granularity: 1) +
3700 dml_ceil(x: (dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), granularity: 1) +
3701 dml_ceil(x: (dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), granularity: 1));
3702 } else {
3703 num_group_per_lower_vm_stage = (dml_uint_t)(1.0 + dml_ceil(x: (dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), granularity: 1) +
3704 dml_ceil(x: (dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), granularity: 1));
3705 }
3706 }
3707 }
3708
3709 if (DCCEnable[k] == false) {
3710 if (BytePerPixelC[k] > 0) {
3711 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
3712 } else {
3713 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
3714 }
3715 } else {
3716 if (GPUVMMaxPageTableLevels == 1) {
3717 if (BytePerPixelC[k] > 0) {
3718 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
3719 } else {
3720 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
3721 }
3722 } else {
3723 if (BytePerPixelC[k] > 0) {
3724 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
3725 } else {
3726 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
3727 }
3728 }
3729 }
3730
3731 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
3732 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
3733 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
3734 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
3735
3736 if (GPUVMMaxPageTableLevels > 2) {
3737 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
3738 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
3739 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
3740 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
3741 }
3742
3743 } else {
3744 TimePerVMGroupVBlank[k] = 0;
3745 TimePerVMGroupFlip[k] = 0;
3746 TimePerVMRequestVBlank[k] = 0;
3747 TimePerVMRequestFlip[k] = 0;
3748 }
3749
3750#ifdef __DML_VBA_DEBUG__
3751 dml_print("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
3752 dml_print("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
3753 dml_print("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
3754 dml_print("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
3755#endif
3756 }
3757} // CalculateVMGroupAndRequestTimes
3758
3759static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scratch,
3760 struct CalculateStutterEfficiency_params_st *p)
3761{
3762 dml_float_t DETBufferingTimeY = 0;
3763 dml_float_t SwathWidthYCriticalSurface = 0;
3764 dml_float_t SwathHeightYCriticalSurface = 0;
3765 dml_float_t VActiveTimeCriticalSurface = 0;
3766 dml_float_t FrameTimeCriticalSurface = 0;
3767 dml_uint_t BytePerPixelYCriticalSurface = 0;
3768 dml_float_t LinesToFinishSwathTransferStutterCriticalSurface = 0;
3769 dml_uint_t DETBufferSizeYCriticalSurface = 0;
3770 dml_float_t MinTTUVBlankCriticalSurface = 0;
3771 dml_uint_t BlockWidth256BytesYCriticalSurface = 0;
3772 dml_bool_t SinglePlaneCriticalSurface = 0;
3773 dml_bool_t SinglePipeCriticalSurface = 0;
3774 dml_float_t TotalCompressedReadBandwidth = 0;
3775 dml_float_t TotalRowReadBandwidth = 0;
3776 dml_float_t AverageDCCCompressionRate = 0;
3777 dml_float_t EffectiveCompressedBufferSize = 0;
3778 dml_float_t PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = 0;
3779 dml_float_t StutterBurstTime = 0;
3780 dml_uint_t TotalActiveWriteback = 0;
3781 dml_float_t LinesInDETY = 0;
3782 dml_float_t LinesInDETYRoundedDownToSwath = 0;
3783 dml_float_t MaximumEffectiveCompressionLuma = 0;
3784 dml_float_t MaximumEffectiveCompressionChroma = 0;
3785 dml_float_t TotalZeroSizeRequestReadBandwidth = 0;
3786 dml_float_t TotalZeroSizeCompressedReadBandwidth = 0;
3787 dml_float_t AverageDCCZeroSizeFraction = 0;
3788 dml_float_t AverageZeroSizeCompressionRate = 0;
3789
3790 dml_bool_t FoundCriticalSurface = false;
3791
3792 dml_uint_t TotalNumberOfActiveOTG = 0;
3793 dml_float_t SinglePixelClock;
3794 dml_uint_t SingleHTotal;
3795 dml_uint_t SingleVTotal;
3796 dml_bool_t SameTiming = true;
3797
3798 dml_float_t LastStutterPeriod = 0.0;
3799 dml_float_t LastZ8StutterPeriod = 0.0;
3800
3801 dml_uint_t SwathSizeCriticalSurface;
3802 dml_uint_t LastChunkOfSwathSize;
3803 dml_uint_t MissingPartOfLastSwathOfDETSize;
3804
3805 TotalZeroSizeRequestReadBandwidth = 0;
3806 TotalZeroSizeCompressedReadBandwidth = 0;
3807 TotalRowReadBandwidth = 0;
3808 TotalCompressedReadBandwidth = 0;
3809
3810 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3811 if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
3812 if (p->DCCEnable[k] == true) {
3813 if ((dml_is_vertical_rotation(scan: p->SourceScan[k]) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(scan: p->SourceScan[k]) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
3814 MaximumEffectiveCompressionLuma = 2;
3815 } else {
3816 MaximumEffectiveCompressionLuma = 4;
3817 }
3818 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / dml_min(x: p->NetDCCRateLuma[k], y: MaximumEffectiveCompressionLuma);
3819#ifdef __DML_VBA_DEBUG__
3820 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
3821 dml_print("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->NetDCCRateLuma[k]);
3822 dml_print("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, MaximumEffectiveCompressionLuma);
3823#endif
3824 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k];
3825 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
3826
3827 if (p->ReadBandwidthSurfaceChroma[k] > 0) {
3828 if ((dml_is_vertical_rotation(scan: p->SourceScan[k]) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(scan: p->SourceScan[k]) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
3829 MaximumEffectiveCompressionChroma = 2;
3830 } else {
3831 MaximumEffectiveCompressionChroma = 4;
3832 }
3833 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / dml_min(x: p->NetDCCRateChroma[k], y: MaximumEffectiveCompressionChroma);
3834#ifdef __DML_VBA_DEBUG__
3835 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
3836 dml_print("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->NetDCCRateChroma[k]);
3837 dml_print("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, MaximumEffectiveCompressionChroma);
3838#endif
3839 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k];
3840 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
3841 }
3842 } else {
3843 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
3844 }
3845 TotalRowReadBandwidth = TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
3846 }
3847 }
3848
3849 AverageDCCCompressionRate = p->TotalDataReadBandwidth / TotalCompressedReadBandwidth;
3850 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
3851
3852#ifdef __DML_VBA_DEBUG__
3853 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
3854 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
3855 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
3856 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
3857 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
3858 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
3859 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
3860 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
3861 dml_print("DML::%s: CompbufReservedSpace64B = %u\n", __func__, p->CompbufReservedSpace64B);
3862 dml_print("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
3863 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, p->CompressedBufferSizeInkByte);
3864#endif
3865 if (AverageDCCZeroSizeFraction == 1) {
3866 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
3867 EffectiveCompressedBufferSize = (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * AverageZeroSizeCompressionRate;
3868 } else if (AverageDCCZeroSizeFraction > 0) {
3869 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
3870 EffectiveCompressedBufferSize = dml_min(x: (dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
3871 y: (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) +
3872 dml_min(x: ((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate,
3873 y: ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
3874
3875#ifdef __DML_VBA_DEBUG__
3876 dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
3877 dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
3878 dml_print("DML::%s: min 3 = %f\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
3879 dml_print("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
3880#endif
3881 } else {
3882 EffectiveCompressedBufferSize = dml_min(x: (dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
3883 y: (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) +
3884 ((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate;
3885
3886#ifdef __DML_VBA_DEBUG__
3887 dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
3888 dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
3889#endif
3890 }
3891
3892#ifdef __DML_VBA_DEBUG__
3893 dml_print("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
3894 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
3895 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
3896#endif
3897
3898 *p->StutterPeriod = 0;
3899
3900 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3901 if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
3902 LinesInDETY = ((dml_float_t)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
3903 LinesInDETYRoundedDownToSwath = dml_floor(x: LinesInDETY, granularity: p->SwathHeightY[k]);
3904 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((dml_float_t)p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
3905#ifdef __DML_VBA_DEBUG__
3906 dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3907 dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
3908 dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
3909 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
3910 dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
3911 dml_print("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, LinesInDETY);
3912 dml_print("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
3913 dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, p->HTotal[k]);
3914 dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, p->PixelClock[k]);
3915 dml_print("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->VRatio[k]);
3916 dml_print("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
3917 dml_print("DML::%s: k=%u,PixelClock = %f\n", __func__, k, p->PixelClock[k]);
3918#endif
3919
3920 if (!FoundCriticalSurface || DETBufferingTimeY < *p->StutterPeriod) {
3921 dml_bool_t isInterlaceTiming = p->Interlace[k] && !p->ProgressiveToInterlaceUnitInOPP;
3922
3923 FoundCriticalSurface = true;
3924 *p->StutterPeriod = DETBufferingTimeY;
3925 FrameTimeCriticalSurface = (isInterlaceTiming ? dml_floor(x: (dml_float_t)p->VTotal[k]/2.0, granularity: 1.0) : p->VTotal[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
3926 VActiveTimeCriticalSurface = (isInterlaceTiming ? dml_floor(x: (dml_float_t)p->VActive[k]/2.0, granularity: 1.0) : p->VActive[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
3927 BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
3928 SwathWidthYCriticalSurface = p->SwathWidthY[k];
3929 SwathHeightYCriticalSurface = p->SwathHeightY[k];
3930 BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
3931 LinesToFinishSwathTransferStutterCriticalSurface = p->SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
3932 DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
3933 MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
3934 SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
3935 SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
3936
3937#ifdef __DML_VBA_DEBUG__
3938 dml_print("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
3939 dml_print("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
3940 dml_print("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, MinTTUVBlankCriticalSurface);
3941 dml_print("DML::%s: k=%u, FrameTimeCriticalSurface = %f\n", __func__, k, FrameTimeCriticalSurface);
3942 dml_print("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, VActiveTimeCriticalSurface);
3943 dml_print("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, BytePerPixelYCriticalSurface);
3944 dml_print("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, SwathWidthYCriticalSurface);
3945 dml_print("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, SwathHeightYCriticalSurface);
3946 dml_print("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, BlockWidth256BytesYCriticalSurface);
3947 dml_print("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, SinglePlaneCriticalSurface);
3948 dml_print("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, SinglePipeCriticalSurface);
3949 dml_print("DML::%s: k=%u, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
3950#endif
3951 }
3952 }
3953 }
3954
3955 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(x: *p->StutterPeriod * p->TotalDataReadBandwidth, y: EffectiveCompressedBufferSize);
3956#ifdef __DML_VBA_DEBUG__
3957 dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, p->ROBBufferSizeInKByte);
3958 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
3959 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth);
3960 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, p->ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
3961 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
3962 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
3963 dml_print("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
3964 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
3965 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
3966 dml_print("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
3967#endif
3968
3969 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW + (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64) + *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW;
3970#ifdef __DML_VBA_DEBUG__
3971 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW);
3972 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth));
3973 dml_print("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
3974 dml_print("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW);
3975 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
3976#endif
3977 StutterBurstTime = dml_max(x: StutterBurstTime, y: LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
3978
3979 dml_print("DML::%s: Time to finish residue swath=%f\n", __func__, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
3980
3981 TotalActiveWriteback = 0;
3982 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3983 if (p->WritebackEnable[k]) {
3984 TotalActiveWriteback = TotalActiveWriteback + 1;
3985 }
3986 }
3987
3988 if (TotalActiveWriteback == 0) {
3989#ifdef __DML_VBA_DEBUG__
3990 dml_print("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
3991 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
3992 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
3993 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
3994#endif
3995 *p->StutterEfficiencyNotIncludingVBlank = dml_max(x: 0., y: 1 - (p->SRExitTime + StutterBurstTime) / *p->StutterPeriod) * 100;
3996 *p->Z8StutterEfficiencyNotIncludingVBlank = dml_max(x: 0., y: 1 - (p->SRExitZ8Time + StutterBurstTime) / *p->StutterPeriod) * 100;
3997 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(x: VActiveTimeCriticalSurface / *p->StutterPeriod, granularity: 1)) : 0);
3998 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(x: VActiveTimeCriticalSurface / *p->StutterPeriod, granularity: 1)) : 0);
3999 } else {
4000 *p->StutterEfficiencyNotIncludingVBlank = 0.;
4001 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
4002 *p->NumberOfStutterBurstsPerFrame = 0;
4003 *p->Z8NumberOfStutterBurstsPerFrame = 0;
4004 }
4005#ifdef __DML_VBA_DEBUG__
4006 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
4007 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
4008 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
4009 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
4010 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
4011#endif
4012
4013 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4014 if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
4015 if (p->BlendingAndTiming[k] == k) {
4016 if (TotalNumberOfActiveOTG == 0) {
4017 SinglePixelClock = p->PixelClock[k];
4018 SingleHTotal = p->HTotal[k];
4019 SingleVTotal = p->VTotal[k];
4020 } else if (SinglePixelClock != p->PixelClock[k] || SingleHTotal != p->HTotal[k] || SingleVTotal != p->VTotal[k]) {
4021 SameTiming = false;
4022 }
4023 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
4024 }
4025 }
4026 }
4027
4028 if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
4029 LastStutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
4030
4031 if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming &&
4032 LastStutterPeriod + MinTTUVBlankCriticalSurface > p->StutterEnterPlusExitWatermark) {
4033 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
4034 } else {
4035 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
4036 }
4037 } else {
4038 *p->StutterEfficiency = 0;
4039 }
4040
4041 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
4042 LastZ8StutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
4043 if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + MinTTUVBlankCriticalSurface > p->Z8StutterEnterPlusExitWatermark) {
4044 *p->Z8StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
4045 } else {
4046 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
4047 }
4048 } else {
4049 *p->Z8StutterEfficiency = 0.;
4050 }
4051
4052#ifdef __DML_VBA_DEBUG__
4053 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
4054 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
4055 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
4056 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
4057 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
4058 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
4059 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
4060 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
4061#endif
4062
4063 SwathSizeCriticalSurface = (dml_uint_t)(BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface * dml_ceil(x: SwathWidthYCriticalSurface, granularity: BlockWidth256BytesYCriticalSurface));
4064 LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024);
4065 MissingPartOfLastSwathOfDETSize = (dml_uint_t)(dml_ceil(x: DETBufferSizeYCriticalSurface, granularity: SwathSizeCriticalSurface) - DETBufferSizeYCriticalSurface);
4066
4067 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && SinglePlaneCriticalSurface && SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) &&
4068 (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
4069
4070#ifdef __DML_VBA_DEBUG__
4071 dml_print("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface);
4072 dml_print("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, DETBufferSizeYCriticalSurface);
4073 dml_print("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
4074 dml_print("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize);
4075 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize);
4076 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
4077#endif
4078} // CalculateStutterEfficiency
4079
4080/// \CalculateSwathAndDETConfiguration
4081/// @brief Calculates swath width and different return buffers sizing (DET, CDB, etc.)
4082static void CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st *scratch,
4083 struct CalculateSwathAndDETConfiguration_params_st *p)
4084{
4085 dml_uint_t MaximumSwathHeightY[__DML_NUM_PLANES__];
4086 dml_uint_t MaximumSwathHeightC[__DML_NUM_PLANES__];
4087 dml_uint_t RoundedUpMaxSwathSizeBytesY[__DML_NUM_PLANES__];
4088 dml_uint_t RoundedUpMaxSwathSizeBytesC[__DML_NUM_PLANES__];
4089 dml_uint_t RoundedUpSwathSizeBytesY[__DML_NUM_PLANES__];
4090 dml_uint_t RoundedUpSwathSizeBytesC[__DML_NUM_PLANES__];
4091 dml_uint_t SwathWidthSingleDPP[__DML_NUM_PLANES__];
4092 dml_uint_t SwathWidthSingleDPPChroma[__DML_NUM_PLANES__];
4093
4094 dml_uint_t TotalActiveDPP = 0;
4095 dml_bool_t NoChromaOrLinearSurfaces = true;
4096 dml_uint_t SurfaceDoingUnboundedRequest = 0;
4097
4098 dml_uint_t DETBufferSizeInKByteForSwathCalculation;
4099
4100 const long TTUFIFODEPTH = 8;
4101 const long MAXIMUMCOMPRESSION = 4;
4102
4103#ifdef __DML_VBA_DEBUG__
4104 dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
4105 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4106 dml_print("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
4107 }
4108#endif
4109 CalculateSwathWidth(ForceSingleDPP: p->ForceSingleDPP,
4110 NumberOfActiveSurfaces: p->NumberOfActiveSurfaces,
4111 SourcePixelFormat: p->SourcePixelFormat,
4112 SourceScan: p->SourceScan,
4113 ViewportStationary: p->ViewportStationary,
4114 ViewportWidth: p->ViewportWidth,
4115 ViewportHeight: p->ViewportHeight,
4116 ViewportXStart: p->ViewportXStart,
4117 ViewportYStart: p->ViewportYStart,
4118 ViewportXStartC: p->ViewportXStartC,
4119 ViewportYStartC: p->ViewportYStartC,
4120 SurfaceWidthY: p->SurfaceWidthY,
4121 SurfaceWidthC: p->SurfaceWidthC,
4122 SurfaceHeightY: p->SurfaceHeightY,
4123 SurfaceHeightC: p->SurfaceHeightC,
4124 ODMMode: p->ODMMode,
4125 BytePerPixY: p->BytePerPixY,
4126 BytePerPixC: p->BytePerPixC,
4127 Read256BytesBlockHeightY: p->Read256BytesBlockHeightY,
4128 Read256BytesBlockHeightC: p->Read256BytesBlockHeightC,
4129 Read256BytesBlockWidthY: p->Read256BytesBlockWidthY,
4130 Read256BytesBlockWidthC: p->Read256BytesBlockWidthC,
4131 BlendingAndTiming: p->BlendingAndTiming,
4132 HActive: p->HActive,
4133 HRatio: p->HRatio,
4134 DPPPerSurface: p->DPPPerSurface,
4135
4136 // Output
4137 SwathWidthSingleDPPY: SwathWidthSingleDPP,
4138 SwathWidthSingleDPPC: SwathWidthSingleDPPChroma,
4139 SwathWidthY: p->SwathWidth,
4140 SwathWidthC: p->SwathWidthChroma,
4141 MaximumSwathHeightY,
4142 MaximumSwathHeightC,
4143 swath_width_luma_ub: p->swath_width_luma_ub,
4144 swath_width_chroma_ub: p->swath_width_chroma_ub);
4145
4146 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4147 RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
4148 RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
4149#ifdef __DML_VBA_DEBUG__
4150 dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
4151 dml_print("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
4152 dml_print("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
4153 dml_print("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
4154 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
4155 dml_print("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
4156 dml_print("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
4157 dml_print("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
4158 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
4159#endif
4160 if (p->SourcePixelFormat[k] == dml_420_10) {
4161 RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(dml_ceil(x: (dml_float_t) RoundedUpMaxSwathSizeBytesY[k], granularity: 256));
4162 RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(dml_ceil(x: (dml_float_t) RoundedUpMaxSwathSizeBytesC[k], granularity: 256));
4163 }
4164 }
4165
4166 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4167 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
4168 if (p->DPPPerSurface[k] > 0)
4169 SurfaceDoingUnboundedRequest = k;
4170 if (p->SourcePixelFormat[k] == dml_420_8 || p->SourcePixelFormat[k] == dml_420_10 ||
4171 p->SourcePixelFormat[k] == dml_420_12 || p->SourcePixelFormat[k] == dml_rgbe_alpha
4172 || p->SurfaceTiling[k] == dml_sw_linear) {
4173 NoChromaOrLinearSurfaces = false;
4174 }
4175 }
4176
4177 *p->UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal: p->UseUnboundedRequestingFinal, TotalNumberOfActiveDPP: TotalActiveDPP,
4178 NoChromaOrLinear: NoChromaOrLinearSurfaces, Output: p->Output[0]);
4179
4180 CalculateDETBufferSize(DETSizeOverride: p->DETSizeOverride,
4181 UseMALLForPStateChange: p->UseMALLForPStateChange,
4182 ForceSingleDPP: p->ForceSingleDPP,
4183 NumberOfActiveSurfaces: p->NumberOfActiveSurfaces,
4184 UnboundedRequestEnabled: *p->UnboundedRequestEnabled,
4185 nomDETInKByte: p->nomDETInKByte,
4186 MaxTotalDETInKByte: p->MaxTotalDETInKByte,
4187 ConfigReturnBufferSizeInKByte: p->ConfigReturnBufferSizeInKByte,
4188 MinCompressedBufferSizeInKByte: p->MinCompressedBufferSizeInKByte,
4189 ConfigReturnBufferSegmentSizeInkByte: p->ConfigReturnBufferSegmentSizeInkByte,
4190 CompressedBufferSegmentSizeInkByteFinal: p->CompressedBufferSegmentSizeInkByteFinal,
4191 SourcePixelFormat: p->SourcePixelFormat,
4192 ReadBandwidthLuma: p->ReadBandwidthLuma,
4193 ReadBandwidthChroma: p->ReadBandwidthChroma,
4194 RotesY: RoundedUpMaxSwathSizeBytesY,
4195 RoundedUpMaxSwathSizeBytesC,
4196 DPPPerSurface: p->DPPPerSurface,
4197
4198 // Output
4199 DETBufferSizeInKByte: p->DETBufferSizeInKByte, // per hubp pipe
4200 CompressedBufferSizeInkByte: p->CompressedBufferSizeInkByte);
4201
4202#ifdef __DML_VBA_DEBUG__
4203 dml_print("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
4204 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
4205 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
4206 dml_print("DML::%s: UseUnboundedRequestingFinal = %u\n", __func__, p->UseUnboundedRequestingFinal);
4207 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
4208 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
4209#endif
4210
4211 *p->ViewportSizeSupport = true;
4212 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4213
4214 DETBufferSizeInKByteForSwathCalculation = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe ? 1024 : p->DETBufferSizeInKByte[k]);
4215#ifdef __DML_VBA_DEBUG__
4216 dml_print("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
4217#endif
4218
4219 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
4220 p->SwathHeightY[k] = MaximumSwathHeightY[k];
4221 p->SwathHeightC[k] = MaximumSwathHeightC[k];
4222 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
4223 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
4224 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
4225 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
4226 p->SwathHeightC[k] = MaximumSwathHeightC[k];
4227 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
4228 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
4229 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
4230 p->SwathHeightY[k] = MaximumSwathHeightY[k];
4231 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
4232 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
4233 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
4234 } else {
4235 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
4236 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
4237 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
4238 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
4239 }
4240
4241 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
4242 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
4243 *p->ViewportSizeSupport = false;
4244 p->ViewportSizeSupportPerSurface[k] = false;
4245 } else {
4246 p->ViewportSizeSupportPerSurface[k] = true;
4247 }
4248
4249 if (p->SwathHeightC[k] == 0) {
4250#ifdef __DML_VBA_DEBUG__
4251 dml_print("DML::%s: k=%u All DET for plane0\n", __func__, k);
4252#endif
4253 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
4254 p->DETBufferSizeC[k] = 0;
4255 } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
4256#ifdef __DML_VBA_DEBUG__
4257 dml_print("DML::%s: k=%u Half DET for plane0, half for plane1\n", __func__, k);
4258#endif
4259 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
4260 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
4261 } else {
4262#ifdef __DML_VBA_DEBUG__
4263 dml_print("DML::%s: k=%u 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
4264#endif
4265 p->DETBufferSizeY[k] = (dml_uint_t)(dml_floor(x: p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, granularity: 1024));
4266 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
4267 }
4268
4269#ifdef __DML_VBA_DEBUG__
4270 dml_print("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
4271 dml_print("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
4272 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
4273 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
4274 dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
4275 dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
4276 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
4277 dml_print("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
4278 dml_print("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
4279 dml_print("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
4280#endif
4281
4282 }
4283
4284 *p->compbuf_reserved_space_64b = 2 * p->PixelChunkSizeInKByte * 1024 / 64;
4285 if (p->UnboundedRequestEnabled) {
4286 *p->compbuf_reserved_space_64b = dml_max(x: *p->compbuf_reserved_space_64b,
4287 y: (dml_float_t)(p->ROBBufferSizeInKByte * 1024/64)
4288 - (dml_float_t)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / MAXIMUMCOMPRESSION/64));
4289 }
4290 *p->compbuf_reserved_space_zs = 2 * p->PixelChunkSizeInKByte * 1024 / 256;
4291} // CalculateSwathAndDETConfiguration
4292
4293static void CalculateSwathWidth(
4294 dml_bool_t ForceSingleDPP,
4295 dml_uint_t NumberOfActiveSurfaces,
4296 enum dml_source_format_class SourcePixelFormat[],
4297 enum dml_rotation_angle SourceScan[],
4298 dml_bool_t ViewportStationary[],
4299 dml_uint_t ViewportWidth[],
4300 dml_uint_t ViewportHeight[],
4301 dml_uint_t ViewportXStart[],
4302 dml_uint_t ViewportYStart[],
4303 dml_uint_t ViewportXStartC[],
4304 dml_uint_t ViewportYStartC[],
4305 dml_uint_t SurfaceWidthY[],
4306 dml_uint_t SurfaceWidthC[],
4307 dml_uint_t SurfaceHeightY[],
4308 dml_uint_t SurfaceHeightC[],
4309 enum dml_odm_mode ODMMode[],
4310 dml_uint_t BytePerPixY[],
4311 dml_uint_t BytePerPixC[],
4312 dml_uint_t Read256BytesBlockHeightY[],
4313 dml_uint_t Read256BytesBlockHeightC[],
4314 dml_uint_t Read256BytesBlockWidthY[],
4315 dml_uint_t Read256BytesBlockWidthC[],
4316 dml_uint_t BlendingAndTiming[],
4317 dml_uint_t HActive[],
4318 dml_float_t HRatio[],
4319 dml_uint_t DPPPerSurface[],
4320
4321 // Output
4322 dml_uint_t SwathWidthSingleDPPY[],
4323 dml_uint_t SwathWidthSingleDPPC[],
4324 dml_uint_t SwathWidthY[], // per-pipe
4325 dml_uint_t SwathWidthC[], // per-pipe
4326 dml_uint_t MaximumSwathHeightY[],
4327 dml_uint_t MaximumSwathHeightC[],
4328 dml_uint_t swath_width_luma_ub[], // per-pipe
4329 dml_uint_t swath_width_chroma_ub[]) // per-pipe
4330{
4331 enum dml_odm_mode MainSurfaceODMMode;
4332 dml_uint_t surface_width_ub_l;
4333 dml_uint_t surface_height_ub_l;
4334 dml_uint_t surface_width_ub_c = 0;
4335 dml_uint_t surface_height_ub_c = 0;
4336
4337#ifdef __DML_VBA_DEBUG__
4338 dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
4339 dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
4340#endif
4341
4342 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4343 if (!dml_is_vertical_rotation(scan: SourceScan[k])) {
4344 SwathWidthSingleDPPY[k] = ViewportWidth[k];
4345 } else {
4346 SwathWidthSingleDPPY[k] = ViewportHeight[k];
4347 }
4348
4349#ifdef __DML_VBA_DEBUG__
4350 dml_print("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, ViewportWidth[k]);
4351 dml_print("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, ViewportHeight[k]);
4352 dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
4353#endif
4354
4355 MainSurfaceODMMode = ODMMode[k];
4356 for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
4357 if (BlendingAndTiming[k] == j) {
4358 MainSurfaceODMMode = ODMMode[j];
4359 }
4360 }
4361
4362 if (ForceSingleDPP) {
4363 SwathWidthY[k] = SwathWidthSingleDPPY[k];
4364 } else {
4365 if (MainSurfaceODMMode == dml_odm_mode_combine_4to1) {
4366 SwathWidthY[k] = (dml_uint_t)(dml_min(x: SwathWidthSingleDPPY[k], y: dml_round(val: HActive[k] / 4.0 * HRatio[k], bankers_rounding: true)));
4367 } else if (MainSurfaceODMMode == dml_odm_mode_combine_2to1) {
4368 SwathWidthY[k] = (dml_uint_t)(dml_min(x: SwathWidthSingleDPPY[k], y: dml_round(val: HActive[k] / 2.0 * HRatio[k], bankers_rounding: true)));
4369 } else if (DPPPerSurface[k] == 2) {
4370 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
4371 } else {
4372 SwathWidthY[k] = SwathWidthSingleDPPY[k];
4373 }
4374 }
4375
4376#ifdef __DML_VBA_DEBUG__
4377 dml_print("DML::%s: k=%u HActive=%u\n", __func__, k, HActive[k]);
4378 dml_print("DML::%s: k=%u HRatio=%f\n", __func__, k, HRatio[k]);
4379 dml_print("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
4380 dml_print("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
4381 dml_print("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
4382#endif
4383
4384 if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
4385 SwathWidthC[k] = SwathWidthY[k] / 2;
4386 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
4387 } else {
4388 SwathWidthC[k] = SwathWidthY[k];
4389 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
4390 }
4391
4392 if (ForceSingleDPP == true) {
4393 SwathWidthY[k] = SwathWidthSingleDPPY[k];
4394 SwathWidthC[k] = SwathWidthSingleDPPC[k];
4395 }
4396
4397 surface_width_ub_l = (dml_uint_t)dml_ceil(x: SurfaceWidthY[k], granularity: Read256BytesBlockWidthY[k]);
4398 surface_height_ub_l = (dml_uint_t)dml_ceil(x: SurfaceHeightY[k], granularity: Read256BytesBlockHeightY[k]);
4399
4400 if (!dml_is_vertical_rotation(scan: SourceScan[k])) {
4401 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
4402 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
4403 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4404 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(x: surface_width_ub_l, y: dml_floor(x: ViewportXStart[k] + SwathWidthY[k] + Read256BytesBlockWidthY[k] - 1, granularity: Read256BytesBlockWidthY[k]) - dml_floor(x: ViewportXStart[k], granularity: Read256BytesBlockWidthY[k])));
4405 } else {
4406 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(x: surface_width_ub_l, y: dml_ceil(x: SwathWidthY[k] - 1, granularity: Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]));
4407 }
4408 if (BytePerPixC[k] > 0) {
4409 surface_width_ub_c = (dml_uint_t)dml_ceil(x: SurfaceWidthC[k], granularity: Read256BytesBlockWidthC[k]);
4410 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4411 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(x: surface_width_ub_c, y: dml_floor(x: ViewportXStartC[k] + SwathWidthC[k] + Read256BytesBlockWidthC[k] - 1, granularity: Read256BytesBlockWidthC[k]) - dml_floor(x: ViewportXStartC[k], granularity: Read256BytesBlockWidthC[k])));
4412 } else {
4413 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(x: surface_width_ub_c, y: dml_ceil(x: SwathWidthC[k] - 1, granularity: Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]));
4414 }
4415 } else {
4416 swath_width_chroma_ub[k] = 0;
4417 }
4418 } else {
4419 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
4420 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
4421
4422 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4423 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(x: surface_height_ub_l, y: dml_floor(x: ViewportYStart[k] + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, granularity: Read256BytesBlockHeightY[k]) - dml_floor(x: ViewportYStart[k], granularity: Read256BytesBlockHeightY[k])));
4424 } else {
4425 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(x: surface_height_ub_l, y: dml_ceil(x: SwathWidthY[k] - 1, granularity: Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
4426 }
4427 if (BytePerPixC[k] > 0) {
4428 surface_height_ub_c = (dml_uint_t)dml_ceil(x: SurfaceHeightC[k], granularity: Read256BytesBlockHeightC[k]);
4429 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4430 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(x: surface_height_ub_c, y: dml_floor(x: ViewportYStartC[k] + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, granularity: Read256BytesBlockHeightC[k]) - dml_floor(x: ViewportYStartC[k], granularity: Read256BytesBlockHeightC[k])));
4431 } else {
4432 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(x: surface_height_ub_c, y: dml_ceil(x: SwathWidthC[k] - 1, granularity: Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
4433 }
4434 } else {
4435 swath_width_chroma_ub[k] = 0;
4436 }
4437 }
4438
4439#ifdef __DML_VBA_DEBUG__
4440 dml_print("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
4441 dml_print("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
4442 dml_print("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
4443 dml_print("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
4444 dml_print("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
4445 dml_print("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
4446 dml_print("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
4447 dml_print("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
4448 dml_print("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, ViewportStationary[k]);
4449 dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
4450 dml_print("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
4451 dml_print("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
4452 dml_print("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
4453 dml_print("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
4454#endif
4455
4456 }
4457} // CalculateSwathWidth
4458
4459static dml_float_t CalculateExtraLatency(
4460 dml_uint_t RoundTripPingLatencyCycles,
4461 dml_uint_t ReorderingBytes,
4462 dml_float_t DCFCLK,
4463 dml_uint_t TotalNumberOfActiveDPP,
4464 dml_uint_t PixelChunkSizeInKByte,
4465 dml_uint_t TotalNumberOfDCCActiveDPP,
4466 dml_uint_t MetaChunkSize,
4467 dml_float_t ReturnBW,
4468 dml_bool_t GPUVMEnable,
4469 dml_bool_t HostVMEnable,
4470 dml_uint_t NumberOfActiveSurfaces,
4471 dml_uint_t NumberOfDPP[],
4472 dml_uint_t dpte_group_bytes[],
4473 dml_float_t HostVMInefficiencyFactor,
4474 dml_uint_t HostVMMinPageSize,
4475 dml_uint_t HostVMMaxNonCachedPageTableLevels)
4476{
4477 dml_float_t ExtraLatencyBytes;
4478 dml_float_t ExtraLatency;
4479
4480 ExtraLatencyBytes = CalculateExtraLatencyBytes(
4481 ReorderingBytes,
4482 TotalNumberOfActiveDPP,
4483 PixelChunkSizeInKByte,
4484 TotalNumberOfDCCActiveDPP,
4485 MetaChunkSize,
4486 GPUVMEnable,
4487 HostVMEnable,
4488 NumberOfActiveSurfaces,
4489 NumberOfDPP,
4490 dpte_group_bytes,
4491 HostVMInefficiencyFactor,
4492 HostVMMinPageSize,
4493 HostVMMaxNonCachedPageTableLevels);
4494
4495 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
4496
4497#ifdef __DML_VBA_DEBUG__
4498 dml_print("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
4499 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
4500 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
4501 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
4502 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
4503#endif
4504
4505 return ExtraLatency;
4506} // CalculateExtraLatency
4507
4508static dml_uint_t CalculateHostVMDynamicLevels(
4509 dml_bool_t GPUVMEnable,
4510 dml_bool_t HostVMEnable,
4511 dml_uint_t HostVMMinPageSize,
4512 dml_uint_t HostVMMaxNonCachedPageTableLevels)
4513{
4514 dml_uint_t HostVMDynamicLevels = 0;
4515
4516 if (GPUVMEnable && HostVMEnable) {
4517 if (HostVMMinPageSize < 2048)
4518 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
4519 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
4520 HostVMDynamicLevels = (dml_uint_t) dml_max(x: 0, y: (dml_float_t) HostVMMaxNonCachedPageTableLevels - 1);
4521 else
4522 HostVMDynamicLevels = (dml_uint_t) dml_max(x: 0, y: (dml_float_t) HostVMMaxNonCachedPageTableLevels - 2);
4523 } else {
4524 HostVMDynamicLevels = 0;
4525 }
4526 return HostVMDynamicLevels;
4527}
4528
4529static dml_uint_t CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes,
4530 dml_uint_t TotalNumberOfActiveDPP,
4531 dml_uint_t PixelChunkSizeInKByte,
4532 dml_uint_t TotalNumberOfDCCActiveDPP,
4533 dml_uint_t MetaChunkSize,
4534 dml_bool_t GPUVMEnable,
4535 dml_bool_t HostVMEnable,
4536 dml_uint_t NumberOfActiveSurfaces,
4537 dml_uint_t NumberOfDPP[],
4538 dml_uint_t dpte_group_bytes[],
4539 dml_float_t HostVMInefficiencyFactor,
4540 dml_uint_t HostVMMinPageSize,
4541 dml_uint_t HostVMMaxNonCachedPageTableLevels)
4542{
4543 dml_uint_t HostVMDynamicLevels = CalculateHostVMDynamicLevels(GPUVMEnable, HostVMEnable, HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
4544 dml_float_t ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
4545
4546 if (GPUVMEnable == true) {
4547 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4548 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
4549 }
4550 }
4551 return (dml_uint_t)(ret);
4552}
4553
4554static dml_float_t CalculateUrgentLatency(
4555 dml_float_t UrgentLatencyPixelDataOnly,
4556 dml_float_t UrgentLatencyPixelMixedWithVMData,
4557 dml_float_t UrgentLatencyVMDataOnly,
4558 dml_bool_t DoUrgentLatencyAdjustment,
4559 dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
4560 dml_float_t UrgentLatencyAdjustmentFabricClockReference,
4561 dml_float_t FabricClock)
4562{
4563 dml_float_t ret;
4564
4565 ret = dml_max3(x: UrgentLatencyPixelDataOnly, y: UrgentLatencyPixelMixedWithVMData, z: UrgentLatencyVMDataOnly);
4566 if (DoUrgentLatencyAdjustment == true) {
4567 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
4568 }
4569 return ret;
4570}
4571
4572static dml_float_t RequiredDTBCLK(
4573 dml_bool_t DSCEnable,
4574 dml_float_t PixelClock,
4575 enum dml_output_format_class OutputFormat,
4576 dml_float_t OutputBpp,
4577 dml_uint_t DSCSlices,
4578 dml_uint_t HTotal,
4579 dml_uint_t HActive,
4580 dml_uint_t AudioRate,
4581 dml_uint_t AudioLayout)
4582{
4583 if (DSCEnable != true) {
4584 return dml_max(x: PixelClock / 4.0 * OutputBpp / 24.0, y: 25.0);
4585 } else {
4586 dml_float_t PixelWordRate = PixelClock / (OutputFormat == dml_444 ? 1 : 2);
4587 dml_float_t HCActive = dml_ceil(x: DSCSlices * dml_ceil(x: OutputBpp * dml_ceil(x: HActive / DSCSlices, granularity: 1) / 8.0, granularity: 1) / 3.0, granularity: 1);
4588 dml_float_t HCBlank = 64 + 32 * dml_ceil(x: AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), granularity: 1);
4589 dml_float_t AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
4590 dml_float_t HActiveTribyteRate = PixelWordRate * HCActive / HActive;
4591 return dml_max4(a: PixelWordRate / 4.0, b: AverageTribyteRate / 4.0, c: HActiveTribyteRate / 4.0, d: 25.0) * 1.002;
4592 }
4593}
4594
4595static void UseMinimumDCFCLK(struct display_mode_lib_scratch_st *scratch, struct UseMinimumDCFCLK_params_st *p)
4596{
4597 struct UseMinimumDCFCLK_locals_st *s = &scratch->UseMinimumDCFCLK_locals;
4598
4599 s->NormalEfficiency = p->PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
4600 for (dml_uint_t j = 0; j < 2; ++j) {
4601
4602
4603 s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = 0;
4604 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4605 s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = s->TotalMaxPrefetchFlipDPTERowBandwidth[j] + p->NoOfDPP[j][k] * p->DPTEBytesPerRow[j][k] / (15.75 * p->HTotal[k] / p->PixelClock[k]);
4606 }
4607
4608 for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
4609 s->NoOfDPPState[k] = p->NoOfDPP[j][k];
4610 }
4611
4612 s->DPTEBandwidth = s->TotalMaxPrefetchFlipDPTERowBandwidth[j];
4613
4614 s->DCFCLKRequiredForAverageBandwidth = dml_max(x: p->ProjectedDCFCLKDeepSleep[j], y: s->DPTEBandwidth / s->NormalEfficiency / p->ReturnBusWidth);
4615
4616 s->ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes: p->ReorderingBytes, TotalNumberOfActiveDPP: p->TotalNumberOfActiveDPP[j], PixelChunkSizeInKByte: p->PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP: p->TotalNumberOfDCCActiveDPP[j],
4617 MetaChunkSize: p->MetaChunkSize, GPUVMEnable: p->GPUVMEnable, HostVMEnable: p->HostVMEnable, NumberOfActiveSurfaces: p->NumberOfActiveSurfaces, NumberOfDPP: s->NoOfDPPState, dpte_group_bytes: p->dpte_group_bytes,
4618 HostVMInefficiencyFactor: 1, HostVMMinPageSize: p->HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels: p->HostVMMaxNonCachedPageTableLevels);
4619 s->ExtraLatencyCycles = p->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + s->ExtraLatencyBytes / s->NormalEfficiency / p->ReturnBusWidth;
4620 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4621 dml_float_t DCFCLKCyclesRequiredInPrefetch;
4622 dml_float_t PrefetchTime;
4623
4624 s->PixelDCFCLKCyclesRequiredInPrefetch[k] = (p->PrefetchLinesY[j][k] * p->swath_width_luma_ub_all_states[j][k] * p->BytePerPixelY[k] + p->PrefetchLinesC[j][k] * p->swath_width_chroma_ub_all_states[j][k] * p->BytePerPixelC[k]) / s->NormalEfficiency / p->ReturnBusWidth;
4625 DCFCLKCyclesRequiredInPrefetch = 2 * s->ExtraLatencyCycles / s->NoOfDPPState[k] + p->PDEAndMetaPTEBytesPerFrame[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth * (p->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * p->DPTEBytesPerRow[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth + 2 * p->MetaRowBytes[j][k] / s->NormalEfficiency / p->ReturnBusWidth + s->PixelDCFCLKCyclesRequiredInPrefetch[k];
4626 s->PrefetchPixelLinesTime[k] = dml_max(x: p->PrefetchLinesY[j][k], y: p->PrefetchLinesC[j][k]) * p->HTotal[k] / p->PixelClock[k];
4627 s->DynamicMetadataVMExtraLatency[k] = (p->GPUVMEnable == true && p->DynamicMetadataEnable[k] == true && p->DynamicMetadataVMEnabled == true) ? p->UrgLatency * p->GPUVMMaxPageTableLevels * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
4628
4629 s->MinimumTWait = CalculateTWait(PrefetchMode: p->MaxPrefetchMode,
4630 UseMALLForPStateChange: p->UseMALLForPStateChange[k],
4631 SynchronizeDRRDisplaysForUCLKPStateChangeFinal: p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4632 DRRDisplay: p->DRRDisplay[k],
4633 DRAMClockChangeLatency: p->DRAMClockChangeLatencyFinal,
4634 FCLKChangeLatency: p->FCLKChangeLatency,
4635 UrgentLatency: p->UrgLatency,
4636 SREnterPlusExitTime: p->SREnterPlusExitTime);
4637
4638 PrefetchTime = (p->MaximumVStartup[j][k] - 1) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - p->UrgLatency * ((p->GPUVMMaxPageTableLevels <= 2 ? p->GPUVMMaxPageTableLevels : p->GPUVMMaxPageTableLevels - 2) * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - s->DynamicMetadataVMExtraLatency[k];
4639
4640 if (PrefetchTime > 0) {
4641 dml_float_t ExpectedVRatioPrefetch;
4642 ExpectedVRatioPrefetch = s->PrefetchPixelLinesTime[k] / (PrefetchTime * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
4643 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->NoOfDPPState[k] * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / s->PrefetchPixelLinesTime[k] * dml_max(x: 1.0, y: ExpectedVRatioPrefetch) * dml_max(x: 1.0, y: ExpectedVRatioPrefetch / 4);
4644 if (p->HostVMEnable == true || p->ImmediateFlipRequirement == true) {
4645 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->DCFCLKRequiredForPeakBandwidthPerSurface[k] + s->NoOfDPPState[k] * s->DPTEBandwidth / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth;
4646 }
4647 } else {
4648 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
4649 }
4650 if (p->DynamicMetadataEnable[k] == true) {
4651 dml_float_t TSetupPipe;
4652 dml_float_t TdmbfPipe;
4653 dml_float_t TdmsksPipe;
4654 dml_float_t TdmecPipe;
4655 dml_float_t AllowedTimeForUrgentExtraLatency;
4656
4657 CalculateVUpdateAndDynamicMetadataParameters(
4658 MaxInterDCNTileRepeaters: p->MaxInterDCNTileRepeaters,
4659 Dppclk: p->RequiredDPPCLKPerSurface[j][k],
4660 Dispclk: p->RequiredDISPCLK[j],
4661 DCFClkDeepSleep: p->ProjectedDCFCLKDeepSleep[j],
4662 PixelClock: p->PixelClock[k],
4663 HTotal: p->HTotal[k],
4664 VBlank: p->VTotal[k] - p->VActive[k],
4665 DynamicMetadataTransmittedBytes: p->DynamicMetadataTransmittedBytes[k],
4666 DynamicMetadataLinesBeforeActiveRequired: p->DynamicMetadataLinesBeforeActiveRequired[k],
4667 InterlaceEnable: p->Interlace[k],
4668 ProgressiveToInterlaceUnitInOPP: p->ProgressiveToInterlaceUnitInOPP,
4669
4670 // Output
4671 TSetup: &TSetupPipe,
4672 Tdmbf: &TdmbfPipe,
4673 Tdmec: &TdmecPipe,
4674 Tdmsks: &TdmsksPipe,
4675 VUpdateOffsetPix: &s->dummy1,
4676 VUpdateWidthPix: &s->dummy2,
4677 VReadyOffsetPix: &s->dummy3);
4678
4679 AllowedTimeForUrgentExtraLatency = p->MaximumVStartup[j][k] * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - s->DynamicMetadataVMExtraLatency[k];
4680 if (AllowedTimeForUrgentExtraLatency > 0) {
4681 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = dml_max(x: s->DCFCLKRequiredForPeakBandwidthPerSurface[k], y: s->ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
4682 } else {
4683 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
4684 }
4685 }
4686 }
4687 s->DCFCLKRequiredForPeakBandwidth = 0;
4688 for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
4689 s->DCFCLKRequiredForPeakBandwidth = s->DCFCLKRequiredForPeakBandwidth + s->DCFCLKRequiredForPeakBandwidthPerSurface[k];
4690 }
4691 s->MinimumTvmPlus2Tr0 = p->UrgLatency * (p->GPUVMEnable == true ? (p->HostVMEnable == true ? (p->GPUVMMaxPageTableLevels + 2) * (p->HostVMMaxNonCachedPageTableLevels + 1) - 1 : p->GPUVMMaxPageTableLevels + 1) : 0);
4692 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4693 dml_float_t MaximumTvmPlus2Tr0PlusTsw;
4694 MaximumTvmPlus2Tr0PlusTsw = (p->MaximumVStartup[j][k] - 2) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - s->DynamicMetadataVMExtraLatency[k];
4695 if (MaximumTvmPlus2Tr0PlusTsw <= s->MinimumTvmPlus2Tr0 + s->PrefetchPixelLinesTime[k] / 4) {
4696 s->DCFCLKRequiredForPeakBandwidth = p->DCFCLKPerState;
4697 } else {
4698 s->DCFCLKRequiredForPeakBandwidth = dml_max3(x: s->DCFCLKRequiredForPeakBandwidth,
4699 y: 2 * s->ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0 - s->PrefetchPixelLinesTime[k] / 4),
4700 z: (2 * s->ExtraLatencyCycles + s->PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0));
4701 }
4702 }
4703 p->DCFCLKState[j] = dml_min(x: p->DCFCLKPerState, y: 1.05 * dml_max(x: s->DCFCLKRequiredForAverageBandwidth, y: s->DCFCLKRequiredForPeakBandwidth));
4704 }
4705}
4706
4707
4708static dml_bool_t UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
4709 dml_uint_t TotalNumberOfActiveDPP,
4710 dml_bool_t NoChromaOrLinear,
4711 enum dml_output_encoder_class Output)
4712{
4713 dml_bool_t ret_val = false;
4714
4715 ret_val = (UseUnboundedRequestingFinal != dml_unbounded_requesting_disable
4716 && TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
4717 if (UseUnboundedRequestingFinal == dml_unbounded_requesting_edp_only && Output != dml_edp) {
4718 ret_val = false;
4719 }
4720 return (ret_val);
4721}
4722
4723static void CalculateSurfaceSizeInMall(
4724 dml_uint_t NumberOfActiveSurfaces,
4725 dml_uint_t MALLAllocatedForDCN,
4726 enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
4727 dml_bool_t DCCEnable[],
4728 dml_bool_t ViewportStationary[],
4729 dml_uint_t ViewportXStartY[],
4730 dml_uint_t ViewportYStartY[],
4731 dml_uint_t ViewportXStartC[],
4732 dml_uint_t ViewportYStartC[],
4733 dml_uint_t ViewportWidthY[],
4734 dml_uint_t ViewportHeightY[],
4735 dml_uint_t BytesPerPixelY[],
4736 dml_uint_t ViewportWidthC[],
4737 dml_uint_t ViewportHeightC[],
4738 dml_uint_t BytesPerPixelC[],
4739 dml_uint_t SurfaceWidthY[],
4740 dml_uint_t SurfaceWidthC[],
4741 dml_uint_t SurfaceHeightY[],
4742 dml_uint_t SurfaceHeightC[],
4743 dml_uint_t Read256BytesBlockWidthY[],
4744 dml_uint_t Read256BytesBlockWidthC[],
4745 dml_uint_t Read256BytesBlockHeightY[],
4746 dml_uint_t Read256BytesBlockHeightC[],
4747 dml_uint_t ReadBlockWidthY[],
4748 dml_uint_t ReadBlockWidthC[],
4749 dml_uint_t ReadBlockHeightY[],
4750 dml_uint_t ReadBlockHeightC[],
4751
4752 // Output
4753 dml_uint_t SurfaceSizeInMALL[],
4754 dml_bool_t *ExceededMALLSize)
4755{
4756 dml_uint_t TotalSurfaceSizeInMALL = 0;
4757
4758 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4759 if (ViewportStationary[k]) {
4760 SurfaceSizeInMALL[k] = (dml_uint_t)(dml_min(x: dml_ceil(x: SurfaceWidthY[k], granularity: ReadBlockWidthY[k]), y: dml_floor(x: ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, granularity: ReadBlockWidthY[k]) - dml_floor(x: ViewportXStartY[k], granularity: ReadBlockWidthY[k])) *
4761 dml_min(x: dml_ceil(x: SurfaceHeightY[k], granularity: ReadBlockHeightY[k]), y: dml_floor(x: ViewportYStartY[k] + ViewportHeightY[k] + ReadBlockHeightY[k] - 1, granularity: ReadBlockHeightY[k]) - dml_floor(x: ViewportYStartY[k], granularity: ReadBlockHeightY[k])) *
4762 BytesPerPixelY[k]);
4763
4764 if (ReadBlockWidthC[k] > 0) {
4765 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4766 dml_min(x: dml_ceil(x: SurfaceWidthC[k], granularity: ReadBlockWidthC[k]), y: dml_floor(x: ViewportXStartC[k] + ViewportWidthC[k] + ReadBlockWidthC[k] - 1, granularity: ReadBlockWidthC[k]) - dml_floor(x: ViewportXStartC[k], granularity: ReadBlockWidthC[k])) *
4767 dml_min(x: dml_ceil(x: SurfaceHeightC[k], granularity: ReadBlockHeightC[k]), y: dml_floor(x: ViewportYStartC[k] + ViewportHeightC[k] + ReadBlockHeightC[k] - 1, granularity: ReadBlockHeightC[k]) - dml_floor(x: ViewportYStartC[k], granularity: ReadBlockHeightC[k])) * BytesPerPixelC[k]);
4768 }
4769 if (DCCEnable[k] == true) {
4770 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4771 dml_min(x: dml_ceil(x: SurfaceWidthY[k], granularity: 8 * Read256BytesBlockWidthY[k]), y: dml_floor(x: ViewportXStartY[k] + ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1, granularity: 8 * Read256BytesBlockWidthY[k]) - dml_floor(x: ViewportXStartY[k], granularity: 8 * Read256BytesBlockWidthY[k])) *
4772 dml_min(x: dml_ceil(x: SurfaceHeightY[k], granularity: 8 * Read256BytesBlockHeightY[k]), y: dml_floor(x: ViewportYStartY[k] + ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, granularity: 8 * Read256BytesBlockHeightY[k]) - dml_floor(x: ViewportYStartY[k], granularity: 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256);
4773 if (Read256BytesBlockWidthC[k] > 0) {
4774 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4775 dml_min(x: dml_ceil(x: SurfaceWidthC[k], granularity: 8 * Read256BytesBlockWidthC[k]), y: dml_floor(x: ViewportXStartC[k] + ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1, granularity: 8 * Read256BytesBlockWidthC[k]) - dml_floor(x: ViewportXStartC[k], granularity: 8 * Read256BytesBlockWidthC[k])) *
4776 dml_min(x: dml_ceil(x: SurfaceHeightC[k], granularity: 8 * Read256BytesBlockHeightC[k]), y: dml_floor(x: ViewportYStartC[k] + ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1, granularity: 8 * Read256BytesBlockHeightC[k]) - dml_floor(x: ViewportYStartC[k], granularity: 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256);
4777 }
4778 }
4779 } else {
4780 SurfaceSizeInMALL[k] = (dml_uint_t)(dml_ceil(x: dml_min(x: SurfaceWidthY[k], y: ViewportWidthY[k] + ReadBlockWidthY[k] - 1), granularity: ReadBlockWidthY[k]) * dml_ceil(x: dml_min(x: SurfaceHeightY[k], y: ViewportHeightY[k] + ReadBlockHeightY[k] - 1), granularity: ReadBlockHeightY[k]) * BytesPerPixelY[k]);
4781 if (ReadBlockWidthC[k] > 0) {
4782 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4783 dml_ceil(x: dml_min(x: SurfaceWidthC[k], y: ViewportWidthC[k] + ReadBlockWidthC[k] - 1), granularity: ReadBlockWidthC[k]) *
4784 dml_ceil(x: dml_min(x: SurfaceHeightC[k], y: ViewportHeightC[k] + ReadBlockHeightC[k] - 1), granularity: ReadBlockHeightC[k]) * BytesPerPixelC[k]);
4785 }
4786 if (DCCEnable[k] == true) {
4787 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4788 dml_ceil(x: dml_min(x: SurfaceWidthY[k], y: ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1), granularity: 8 * Read256BytesBlockWidthY[k]) *
4789 dml_ceil(x: dml_min(x: SurfaceHeightY[k], y: ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1), granularity: 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256);
4790
4791 if (Read256BytesBlockWidthC[k] > 0) {
4792 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4793 dml_ceil(x: dml_min(x: SurfaceWidthC[k], y: ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1), granularity: 8 * Read256BytesBlockWidthC[k]) *
4794 dml_ceil(x: dml_min(x: SurfaceHeightC[k], y: ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1), granularity: 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256);
4795 }
4796 }
4797 }
4798 }
4799
4800 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4801 if (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable)
4802 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
4803 }
4804 *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
4805} // CalculateSurfaceSizeInMall
4806
4807static void CalculateDETBufferSize(
4808 dml_uint_t DETSizeOverride[],
4809 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4810 dml_bool_t ForceSingleDPP,
4811 dml_uint_t NumberOfActiveSurfaces,
4812 dml_bool_t UnboundedRequestEnabled,
4813 dml_uint_t nomDETInKByte,
4814 dml_uint_t MaxTotalDETInKByte,
4815 dml_uint_t ConfigReturnBufferSizeInKByte,
4816 dml_uint_t MinCompressedBufferSizeInKByte,
4817 dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
4818 dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
4819 enum dml_source_format_class SourcePixelFormat[],
4820 dml_float_t ReadBandwidthLuma[],
4821 dml_float_t ReadBandwidthChroma[],
4822 dml_uint_t RoundedUpMaxSwathSizeBytesY[],
4823 dml_uint_t RoundedUpMaxSwathSizeBytesC[],
4824 dml_uint_t DPPPerSurface[],
4825 // Output
4826 dml_uint_t DETBufferSizeInKByte[],
4827 dml_uint_t *CompressedBufferSizeInkByte)
4828{
4829 dml_uint_t DETBufferSizePoolInKByte;
4830 dml_uint_t NextDETBufferPieceInKByte;
4831 dml_bool_t DETPieceAssignedToThisSurfaceAlready[__DML_NUM_PLANES__];
4832 dml_bool_t NextPotentialSurfaceToAssignDETPieceFound;
4833 dml_uint_t NextSurfaceToAssignDETPiece;
4834 dml_float_t TotalBandwidth;
4835 dml_float_t BandwidthOfSurfacesNotAssignedDETPiece;
4836 dml_uint_t max_minDET;
4837 dml_uint_t minDET;
4838 dml_uint_t minDET_pipe;
4839
4840#ifdef __DML_VBA_DEBUG__
4841 dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
4842 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
4843 dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
4844 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
4845 dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
4846 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
4847 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
4848 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %u\n", __func__, CompressedBufferSegmentSizeInkByteFinal);
4849#endif
4850
4851 // Note: Will use default det size if that fits 2 swaths
4852 if (UnboundedRequestEnabled) {
4853 if (DETSizeOverride[0] > 0) {
4854 DETBufferSizeInKByte[0] = DETSizeOverride[0];
4855 } else {
4856 DETBufferSizeInKByte[0] = (dml_uint_t) dml_max(x: 128.0, y: dml_ceil(x: 2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[0] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, granularity: ConfigReturnBufferSegmentSizeInkByte));
4857 }
4858 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
4859 } else {
4860 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
4861 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4862 DETBufferSizeInKByte[k] = 0;
4863 if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
4864 max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
4865 } else {
4866 max_minDET = nomDETInKByte;
4867 }
4868 minDET = 128;
4869 minDET_pipe = 0;
4870
4871 // add DET resource until can hold 2 full swaths
4872 while (minDET <= max_minDET && minDET_pipe == 0) {
4873 if (2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
4874 minDET_pipe = minDET;
4875 minDET = minDET + ConfigReturnBufferSegmentSizeInkByte;
4876 }
4877
4878#ifdef __DML_VBA_DEBUG__
4879 dml_print("DML::%s: k=%u minDET = %u\n", __func__, k, minDET);
4880 dml_print("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET);
4881 dml_print("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe);
4882 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
4883 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
4884#endif
4885
4886 if (minDET_pipe == 0) {
4887 minDET_pipe = (dml_uint_t)(dml_max(x: 128, y: dml_ceil(x: ((dml_float_t)RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, granularity: ConfigReturnBufferSegmentSizeInkByte)));
4888#ifdef __DML_VBA_DEBUG__
4889 dml_print("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe);
4890#endif
4891 }
4892
4893 if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
4894 DETBufferSizeInKByte[k] = 0;
4895 } else if (DETSizeOverride[k] > 0) {
4896 DETBufferSizeInKByte[k] = DETSizeOverride[k];
4897 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
4898 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
4899 DETBufferSizeInKByte[k] = minDET_pipe;
4900 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
4901 }
4902
4903#ifdef __DML_VBA_DEBUG__
4904 dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
4905 dml_print("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, DETSizeOverride[k]);
4906 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
4907 dml_print("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte);
4908#endif
4909 }
4910
4911 TotalBandwidth = 0;
4912 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4913 if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
4914 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
4915 }
4916#ifdef __DML_VBA_DEBUG__
4917 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
4918 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4919 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
4920 }
4921 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
4922#endif
4923 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
4924 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
4925 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4926
4927 if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
4928 DETPieceAssignedToThisSurfaceAlready[k] = true;
4929 } else if (DETSizeOverride[k] > 0 || (((dml_float_t) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * (dml_float_t) DETBufferSizeInKByte[k] / (dml_float_t) MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
4930 DETPieceAssignedToThisSurfaceAlready[k] = true;
4931 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
4932 } else {
4933 DETPieceAssignedToThisSurfaceAlready[k] = false;
4934 }
4935#ifdef __DML_VBA_DEBUG__
4936 dml_print("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
4937 dml_print("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece);
4938#endif
4939 }
4940
4941 for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
4942 NextPotentialSurfaceToAssignDETPieceFound = false;
4943 NextSurfaceToAssignDETPiece = 0;
4944
4945 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4946#ifdef __DML_VBA_DEBUG__
4947 dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
4948 dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
4949 dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
4950 dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
4951 dml_print("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece);
4952#endif
4953 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
4954 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
4955 NextSurfaceToAssignDETPiece = k;
4956 NextPotentialSurfaceToAssignDETPieceFound = true;
4957 }
4958#ifdef __DML_VBA_DEBUG__
4959 dml_print("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
4960 dml_print("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
4961#endif
4962 }
4963
4964 if (NextPotentialSurfaceToAssignDETPieceFound) {
4965 // Note: To show the banker's rounding behavior in VBA and also the fact that the DET buffer size varies due to precision issue
4966 //
4967 //dml_float_t tmp1 = ((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
4968 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
4969 //dml_float_t tmp2 = dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
4970 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
4971 //
4972 //dml_print("DML::%s: j=%u, tmp1 = %f\n", __func__, j, tmp1);
4973 //dml_print("DML::%s: j=%u, tmp2 = %f\n", __func__, j, tmp2);
4974
4975 NextDETBufferPieceInKByte = (dml_uint_t)(dml_min(
4976 x: dml_round(val: (dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
4977 ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte), bankers_rounding: true)
4978 * (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
4979 y: dml_floor(x: (dml_float_t) DETBufferSizePoolInKByte, granularity: (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
4980
4981#ifdef __DML_VBA_DEBUG__
4982 dml_print("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte);
4983 dml_print("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece);
4984 dml_print("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
4985 dml_print("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
4986 dml_print("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
4987 dml_print("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte);
4988 dml_print("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
4989#endif
4990
4991 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
4992#ifdef __DML_VBA_DEBUG__
4993 dml_print("to %u\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
4994#endif
4995
4996 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
4997 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
4998 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
4999 }
5000 }
5001 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
5002 }
5003 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / ConfigReturnBufferSegmentSizeInkByte;
5004
5005#ifdef __DML_VBA_DEBUG__
5006 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
5007 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
5008 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5009 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
5010 }
5011#endif
5012} // CalculateDETBufferSize
5013
5014
5015/// @brief Calculate the bound for return buffer sizing
5016static void CalculateMaxDETAndMinCompressedBufferSize(
5017 dml_uint_t ConfigReturnBufferSizeInKByte,
5018 dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
5019 dml_uint_t ROBBufferSizeInKByte,
5020 dml_uint_t MaxNumDPP,
5021 dml_bool_t nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
5022 dml_uint_t nomDETInKByteOverrideValue, // VBA_DELTA
5023
5024 // Output
5025 dml_uint_t *MaxTotalDETInKByte,
5026 dml_uint_t *nomDETInKByte,
5027 dml_uint_t *MinCompressedBufferSizeInKByte)
5028{
5029 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
5030 *nomDETInKByte = (dml_uint_t)(dml_floor(x: (dml_float_t) *MaxTotalDETInKByte / (dml_float_t) MaxNumDPP, granularity: ConfigReturnBufferSegmentSizeInKByte));
5031 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
5032
5033#ifdef __DML_VBA_DEBUG__
5034 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
5035 dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
5036 dml_print("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
5037 dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
5038 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
5039 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
5040#endif
5041
5042 if (nomDETInKByteOverrideEnable) {
5043 *nomDETInKByte = nomDETInKByteOverrideValue;
5044 dml_print("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
5045 }
5046} // CalculateMaxDETAndMinCompressedBufferSize
5047
5048/// @brief Calculate all the RQ request attributes, like row height and # swath
5049static void CalculateVMRowAndSwath(struct display_mode_lib_scratch_st *scratch,
5050 struct CalculateVMRowAndSwath_params_st *p)
5051{
5052 struct CalculateVMRowAndSwath_locals_st *s = &scratch->CalculateVMRowAndSwath_locals;
5053
5054 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(GPUVMEnable: p->GPUVMEnable, HostVMEnable: p->HostVMEnable, HostVMMinPageSize: p->HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels: p->HostVMMaxNonCachedPageTableLevels);
5055
5056 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5057 if (p->HostVMEnable == true) {
5058 p->vm_group_bytes[k] = 512;
5059 p->dpte_group_bytes[k] = 512;
5060 } else if (p->GPUVMEnable == true) {
5061 p->vm_group_bytes[k] = 2048;
5062 if (p->GPUVMMinPageSizeKBytes[k] >= 64 && dml_is_vertical_rotation(scan: p->myPipe[k].SourceScan)) {
5063 p->dpte_group_bytes[k] = 512;
5064 } else {
5065 p->dpte_group_bytes[k] = 2048;
5066 }
5067 } else {
5068 p->vm_group_bytes[k] = 0;
5069 p->dpte_group_bytes[k] = 0;
5070 }
5071
5072 if (p->myPipe[k].SourcePixelFormat == dml_420_8 || p->myPipe[k].SourcePixelFormat == dml_420_10 ||
5073 p->myPipe[k].SourcePixelFormat == dml_420_12 || p->myPipe[k].SourcePixelFormat == dml_rgbe_alpha) {
5074 if ((p->myPipe[k].SourcePixelFormat == dml_420_10 || p->myPipe[k].SourcePixelFormat == dml_420_12) && !dml_is_vertical_rotation(scan: p->myPipe[k].SourceScan)) {
5075 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
5076 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
5077 } else {
5078 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
5079 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
5080 }
5081
5082 s->PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
5083 ViewportStationary: p->myPipe[k].ViewportStationary,
5084 DCCEnable: p->myPipe[k].DCCEnable,
5085 NumberOfDPPs: p->myPipe[k].DPPPerSurface,
5086 BlockHeight256Bytes: p->myPipe[k].BlockHeight256BytesC,
5087 BlockWidth256Bytes: p->myPipe[k].BlockWidth256BytesC,
5088 SourcePixelFormat: p->myPipe[k].SourcePixelFormat,
5089 SurfaceTiling: p->myPipe[k].SurfaceTiling,
5090 BytePerPixel: p->myPipe[k].BytePerPixelC,
5091 SourceScan: p->myPipe[k].SourceScan,
5092 SwathWidth: p->SwathWidthC[k],
5093 ViewportHeight: p->myPipe[k].ViewportHeightChroma,
5094 ViewportXStart: p->myPipe[k].ViewportXStartC,
5095 ViewportYStart: p->myPipe[k].ViewportYStartC,
5096 GPUVMEnable: p->GPUVMEnable,
5097 GPUVMMaxPageTableLevels: p->GPUVMMaxPageTableLevels,
5098 GPUVMMinPageSizeKBytes: p->GPUVMMinPageSizeKBytes[k],
5099 PTEBufferSizeInRequests: s->PTEBufferSizeInRequestsForChroma[k],
5100 Pitch: p->myPipe[k].PitchC,
5101 DCCMetaPitch: p->myPipe[k].DCCMetaPitchC,
5102 MacroTileWidth: p->myPipe[k].BlockWidthC,
5103 MacroTileHeight: p->myPipe[k].BlockHeightC,
5104
5105 // Output
5106 MetaRowByte: &s->MetaRowByteC[k],
5107 PixelPTEBytesPerRow: &s->PixelPTEBytesPerRowC[k],
5108 PixelPTEBytesPerRowStorage: &s->PixelPTEBytesPerRowStorageC[k],
5109 dpte_row_width_ub: &p->dpte_row_width_chroma_ub[k],
5110 dpte_row_height: &p->dpte_row_height_chroma[k],
5111 dpte_row_height_linear: &p->dpte_row_height_linear_chroma[k],
5112 PixelPTEBytesPerRow_one_row_per_frame: &s->PixelPTEBytesPerRowC_one_row_per_frame[k],
5113 dpte_row_width_ub_one_row_per_frame: &s->dpte_row_width_chroma_ub_one_row_per_frame[k],
5114 dpte_row_height_one_row_per_frame: &s->dpte_row_height_chroma_one_row_per_frame[k],
5115 MetaRequestWidth: &p->meta_req_width_chroma[k],
5116 MetaRequestHeight: &p->meta_req_height_chroma[k],
5117 meta_row_width: &p->meta_row_width_chroma[k],
5118 meta_row_height: &p->meta_row_height_chroma[k],
5119 PixelPTEReqWidth: &p->PixelPTEReqWidthC[k],
5120 PixelPTEReqHeight: &p->PixelPTEReqHeightC[k],
5121 PTERequestSize: &p->PTERequestSizeC[k],
5122 DPDE0BytesFrame: &p->dpde0_bytes_per_frame_ub_c[k],
5123 MetaPTEBytesFrame: &p->meta_pte_bytes_per_frame_ub_c[k]);
5124
5125 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines (
5126 VRatio: p->myPipe[k].VRatioChroma,
5127 VTaps: p->myPipe[k].VTapsChroma,
5128 Interlace: p->myPipe[k].InterlaceEnable,
5129 ProgressiveToInterlaceUnitInOPP: p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
5130 SwathHeight: p->myPipe[k].SwathHeightC,
5131 SourceScan: p->myPipe[k].SourceScan,
5132 ViewportStationary: p->myPipe[k].ViewportStationary,
5133 SwathWidth: p->SwathWidthC[k],
5134 ViewportHeight: p->myPipe[k].ViewportHeightChroma,
5135 ViewportXStart: p->myPipe[k].ViewportXStartC,
5136 ViewportYStart: p->myPipe[k].ViewportYStartC,
5137
5138 // Output
5139 VInitPreFill: &p->VInitPreFillC[k],
5140 MaxNumSwath: &p->MaxNumSwathC[k]);
5141 } else {
5142 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
5143 s->PTEBufferSizeInRequestsForChroma[k] = 0;
5144 s->PixelPTEBytesPerRowC[k] = 0;
5145 s->PixelPTEBytesPerRowStorageC[k] = 0;
5146 s->PDEAndMetaPTEBytesFrameC = 0;
5147 s->MetaRowByteC[k] = 0;
5148 p->MaxNumSwathC[k] = 0;
5149 p->PrefetchSourceLinesC[k] = 0;
5150 s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
5151 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
5152 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
5153 }
5154
5155 s->PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
5156 ViewportStationary: p->myPipe[k].ViewportStationary,
5157 DCCEnable: p->myPipe[k].DCCEnable,
5158 NumberOfDPPs: p->myPipe[k].DPPPerSurface,
5159 BlockHeight256Bytes: p->myPipe[k].BlockHeight256BytesY,
5160 BlockWidth256Bytes: p->myPipe[k].BlockWidth256BytesY,
5161 SourcePixelFormat: p->myPipe[k].SourcePixelFormat,
5162 SurfaceTiling: p->myPipe[k].SurfaceTiling,
5163 BytePerPixel: p->myPipe[k].BytePerPixelY,
5164 SourceScan: p->myPipe[k].SourceScan,
5165 SwathWidth: p->SwathWidthY[k],
5166 ViewportHeight: p->myPipe[k].ViewportHeight,
5167 ViewportXStart: p->myPipe[k].ViewportXStart,
5168 ViewportYStart: p->myPipe[k].ViewportYStart,
5169 GPUVMEnable: p->GPUVMEnable,
5170 GPUVMMaxPageTableLevels: p->GPUVMMaxPageTableLevels,
5171 GPUVMMinPageSizeKBytes: p->GPUVMMinPageSizeKBytes[k],
5172 PTEBufferSizeInRequests: s->PTEBufferSizeInRequestsForLuma[k],
5173 Pitch: p->myPipe[k].PitchY,
5174 DCCMetaPitch: p->myPipe[k].DCCMetaPitchY,
5175 MacroTileWidth: p->myPipe[k].BlockWidthY,
5176 MacroTileHeight: p->myPipe[k].BlockHeightY,
5177
5178 // Output
5179 MetaRowByte: &s->MetaRowByteY[k],
5180 PixelPTEBytesPerRow: &s->PixelPTEBytesPerRowY[k],
5181 PixelPTEBytesPerRowStorage: &s->PixelPTEBytesPerRowStorageY[k],
5182 dpte_row_width_ub: &p->dpte_row_width_luma_ub[k],
5183 dpte_row_height: &p->dpte_row_height_luma[k],
5184 dpte_row_height_linear: &p->dpte_row_height_linear_luma[k],
5185 PixelPTEBytesPerRow_one_row_per_frame: &s->PixelPTEBytesPerRowY_one_row_per_frame[k],
5186 dpte_row_width_ub_one_row_per_frame: &s->dpte_row_width_luma_ub_one_row_per_frame[k],
5187 dpte_row_height_one_row_per_frame: &s->dpte_row_height_luma_one_row_per_frame[k],
5188 MetaRequestWidth: &p->meta_req_width[k],
5189 MetaRequestHeight: &p->meta_req_height[k],
5190 meta_row_width: &p->meta_row_width[k],
5191 meta_row_height: &p->meta_row_height[k],
5192 PixelPTEReqWidth: &p->PixelPTEReqWidthY[k],
5193 PixelPTEReqHeight: &p->PixelPTEReqHeightY[k],
5194 PTERequestSize: &p->PTERequestSizeY[k],
5195 DPDE0BytesFrame: &p->dpde0_bytes_per_frame_ub_l[k],
5196 MetaPTEBytesFrame: &p->meta_pte_bytes_per_frame_ub_l[k]);
5197
5198 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
5199 VRatio: p->myPipe[k].VRatio,
5200 VTaps: p->myPipe[k].VTaps,
5201 Interlace: p->myPipe[k].InterlaceEnable,
5202 ProgressiveToInterlaceUnitInOPP: p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
5203 SwathHeight: p->myPipe[k].SwathHeightY,
5204 SourceScan: p->myPipe[k].SourceScan,
5205 ViewportStationary: p->myPipe[k].ViewportStationary,
5206 SwathWidth: p->SwathWidthY[k],
5207 ViewportHeight: p->myPipe[k].ViewportHeight,
5208 ViewportXStart: p->myPipe[k].ViewportXStart,
5209 ViewportYStart: p->myPipe[k].ViewportYStart,
5210
5211 // Output
5212 VInitPreFill: &p->VInitPreFillY[k],
5213 MaxNumSwath: &p->MaxNumSwathY[k]);
5214
5215 p->PDEAndMetaPTEBytesFrame[k] = (s->PDEAndMetaPTEBytesFrameY + s->PDEAndMetaPTEBytesFrameC) * (1 + 8 * s->HostVMDynamicLevels);
5216 p->MetaRowByte[k] = s->MetaRowByteY[k] + s->MetaRowByteC[k];
5217
5218 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
5219 p->PTEBufferSizeNotExceeded[k] = true;
5220 } else {
5221 p->PTEBufferSizeNotExceeded[k] = false;
5222#ifdef __DML_VBA_DEBUG__
5223 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
5224 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
5225 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
5226 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
5227 dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
5228 dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
5229 dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
5230#endif
5231 }
5232 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
5233 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
5234
5235#ifdef __DML_VBA_DEBUG__
5236 dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrame = %u\n", __func__, k, p->PDEAndMetaPTEBytesFrame[k]);
5237 dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameY = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameY);
5238 dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameC = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameC);
5239 dml_print("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
5240 dml_print("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
5241 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
5242 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
5243#endif
5244 }
5245
5246 CalculateMALLUseForStaticScreen(
5247 NumberOfActiveSurfaces: p->NumberOfActiveSurfaces,
5248 MALLAllocatedForDCNFinal: p->MALLAllocatedForDCN,
5249 UseMALLForStaticScreen: p->UseMALLForStaticScreen, // mode
5250 SurfaceSizeInMALL: p->SurfaceSizeInMALL,
5251 one_row_per_frame_fits_in_buffer: s->one_row_per_frame_fits_in_buffer,
5252 // Output
5253 UsesMALLForStaticScreen: p->UsesMALLForStaticScreen); // boolen
5254
5255 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5256 if (p->PTEBufferModeOverrideEn[k] == 1) {
5257 p->PTE_BUFFER_MODE[k] = p->PTEBufferModeOverrideVal[k];
5258 }
5259 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
5260 (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64);
5261 p->BIGK_FRAGMENT_SIZE[k] = (dml_uint_t)(dml_log2(x: p->GPUVMMinPageSizeKBytes[k] * 1024) - 12);
5262 }
5263
5264 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5265#ifdef __DML_VBA_DEBUG__
5266 dml_print("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
5267 dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, p->UsesMALLForStaticScreen[k]);
5268#endif
5269 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
5270 (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64 && dml_is_vertical_rotation(scan: p->myPipe[k].SourceScan));
5271
5272 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame);
5273
5274 if (p->use_one_row_for_frame[k]) {
5275 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
5276 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
5277 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
5278 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
5279 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
5280 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
5281 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
5282 }
5283
5284 if (p->MetaRowByte[k] <= p->DCCMetaBufferSizeBytes) {
5285 p->DCCMetaBufferSizeNotExceeded[k] = true;
5286 } else {
5287 p->DCCMetaBufferSizeNotExceeded[k] = false;
5288
5289#ifdef __DML_VBA_DEBUG__
5290 dml_print("DML::%s: k=%u, MetaRowByte = %u\n", __func__, k, p->MetaRowByte[k]);
5291 dml_print("DML::%s: k=%u, DCCMetaBufferSizeBytes = %u\n", __func__, k, p->DCCMetaBufferSizeBytes);
5292 dml_print("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
5293#endif
5294 }
5295 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
5296 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
5297 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
5298 if (p->use_one_row_for_frame[k])
5299 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
5300
5301 CalculateRowBandwidth(
5302 GPUVMEnable: p->GPUVMEnable,
5303 SourcePixelFormat: p->myPipe[k].SourcePixelFormat,
5304 VRatio: p->myPipe[k].VRatio,
5305 VRatioChroma: p->myPipe[k].VRatioChroma,
5306 DCCEnable: p->myPipe[k].DCCEnable,
5307 LineTime: p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
5308 MetaRowByteLuma: s->MetaRowByteY[k],
5309 MetaRowByteChroma: s->MetaRowByteC[k],
5310 meta_row_height_luma: p->meta_row_height[k],
5311 meta_row_height_chroma: p->meta_row_height_chroma[k],
5312 PixelPTEBytesPerRowLuma: s->PixelPTEBytesPerRowY[k],
5313 PixelPTEBytesPerRowChroma: s->PixelPTEBytesPerRowC[k],
5314 dpte_row_height_luma: p->dpte_row_height_luma[k],
5315 dpte_row_height_chroma: p->dpte_row_height_chroma[k],
5316
5317 // Output
5318 meta_row_bw: &p->meta_row_bw[k],
5319 dpte_row_bw: &p->dpte_row_bw[k]);
5320#ifdef __DML_VBA_DEBUG__
5321 dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
5322 dml_print("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
5323 dml_print("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->UseMALLForPStateChange[k]);
5324 dml_print("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
5325 dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
5326 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
5327 dml_print("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
5328 dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
5329 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
5330 dml_print("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
5331 dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
5332 dml_print("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
5333 dml_print("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
5334#endif
5335 }
5336}
5337
5338static void CalculateOutputLink(
5339 dml_float_t PHYCLKPerState,
5340 dml_float_t PHYCLKD18PerState,
5341 dml_float_t PHYCLKD32PerState,
5342 dml_float_t Downspreading,
5343 dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
5344 enum dml_output_encoder_class Output,
5345 enum dml_output_format_class OutputFormat,
5346 dml_uint_t HTotal,
5347 dml_uint_t HActive,
5348 dml_float_t PixelClockBackEnd,
5349 dml_float_t ForcedOutputLinkBPP,
5350 dml_uint_t DSCInputBitPerComponent,
5351 dml_uint_t NumberOfDSCSlices,
5352 dml_float_t AudioSampleRate,
5353 dml_uint_t AudioSampleLayout,
5354 enum dml_odm_mode ODMModeNoDSC,
5355 enum dml_odm_mode ODMModeDSC,
5356 enum dml_dsc_enable DSCEnable,
5357 dml_uint_t OutputLinkDPLanes,
5358 enum dml_output_link_dp_rate OutputLinkDPRate,
5359
5360 // Output
5361 dml_bool_t *RequiresDSC,
5362 dml_bool_t *RequiresFEC,
5363 dml_float_t *OutBpp,
5364 enum dml_output_type_and_rate__type *OutputType,
5365 enum dml_output_type_and_rate__rate *OutputRate,
5366 dml_uint_t *RequiredSlots)
5367{
5368 dml_bool_t LinkDSCEnable;
5369 dml_uint_t dummy;
5370 *RequiresDSC = false;
5371 *RequiresFEC = false;
5372 *OutBpp = 0;
5373
5374 *OutputType = dml_output_type_unknown;
5375 *OutputRate = dml_output_rate_unknown;
5376
5377 if (IsMainSurfaceUsingTheIndicatedTiming) {
5378 if (Output == dml_hdmi) {
5379 *RequiresDSC = false;
5380 *RequiresFEC = false;
5381 *OutBpp = TruncToValidBPP(LinkBitRate: dml_min(x: 600, y: PHYCLKPerState) * 10, Lanes: 3, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: false, Output,
5382 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots: &dummy);
5383 //OutputTypeAndRate = "HDMI";
5384 *OutputType = dml_output_type_hdmi;
5385
5386 } else if (Output == dml_dp || Output == dml_dp2p0 || Output == dml_edp) {
5387 if (DSCEnable == dml_dsc_enable) {
5388 *RequiresDSC = true;
5389 LinkDSCEnable = true;
5390 if (Output == dml_dp || Output == dml_dp2p0) {
5391 *RequiresFEC = true;
5392 } else {
5393 *RequiresFEC = false;
5394 }
5395 } else {
5396 *RequiresDSC = false;
5397 LinkDSCEnable = false;
5398 if (Output == dml_dp2p0) {
5399 *RequiresFEC = true;
5400 } else {
5401 *RequiresFEC = false;
5402 }
5403 }
5404 if (Output == dml_dp2p0) {
5405 *OutBpp = 0;
5406 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr10) && PHYCLKD32PerState >= 10000 / 32) {
5407 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 10000, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5408 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5409 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5410 *RequiresDSC = true;
5411 LinkDSCEnable = true;
5412 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 10000, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5413 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5414 }
5415 //OutputTypeAndRate = Output & " UHBR10";
5416 *OutputType = dml_output_type_dp2p0;
5417 *OutputRate = dml_output_rate_dp_rate_uhbr10;
5418 }
5419 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
5420 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 13500, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5421 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5422
5423 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5424 *RequiresDSC = true;
5425 LinkDSCEnable = true;
5426 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 13500, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5427 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5428 }
5429 //OutputTypeAndRate = Output & " UHBR13p5";
5430 *OutputType = dml_output_type_dp2p0;
5431 *OutputRate = dml_output_rate_dp_rate_uhbr13p5;
5432 }
5433 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
5434 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 20000, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5435 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5436 if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5437 *RequiresDSC = true;
5438 LinkDSCEnable = true;
5439 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 20000, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5440 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5441 }
5442 //OutputTypeAndRate = Output & " UHBR20";
5443 *OutputType = dml_output_type_dp2p0;
5444 *OutputRate = dml_output_rate_dp_rate_uhbr20;
5445 }
5446 } else { // output is dp or edp
5447 *OutBpp = 0;
5448 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr) && PHYCLKPerState >= 270) {
5449 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 2700, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5450 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5451 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5452 *RequiresDSC = true;
5453 LinkDSCEnable = true;
5454 if (Output == dml_dp) {
5455 *RequiresFEC = true;
5456 }
5457 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 2700, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5458 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5459 }
5460 //OutputTypeAndRate = Output & " HBR";
5461 *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
5462 *OutputRate = dml_output_rate_dp_rate_hbr;
5463 }
5464 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr2) && *OutBpp == 0 && PHYCLKPerState >= 540) {
5465 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 5400, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5466 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5467
5468 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5469 *RequiresDSC = true;
5470 LinkDSCEnable = true;
5471 if (Output == dml_dp) {
5472 *RequiresFEC = true;
5473 }
5474 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 5400, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5475 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5476 }
5477 //OutputTypeAndRate = Output & " HBR2";
5478 *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
5479 *OutputRate = dml_output_rate_dp_rate_hbr2;
5480 }
5481 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
5482 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 8100, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5483 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5484
5485 if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5486 *RequiresDSC = true;
5487 LinkDSCEnable = true;
5488 if (Output == dml_dp) {
5489 *RequiresFEC = true;
5490 }
5491 *OutBpp = TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 8100, Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
5492 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: (dml_uint_t)AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5493 }
5494 //OutputTypeAndRate = Output & " HBR3";
5495 *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
5496 *OutputRate = dml_output_rate_dp_rate_hbr3;
5497 }
5498 }
5499 }
5500 }
5501}
5502
5503/// @brief Determine the ODM mode and number of DPP used per plane based on dispclk, dsc usage, odm usage policy
5504static void CalculateODMMode(
5505 dml_uint_t MaximumPixelsPerLinePerDSCUnit,
5506 dml_uint_t HActive,
5507 enum dml_output_encoder_class Output,
5508 enum dml_output_format_class OutputFormat,
5509 enum dml_odm_use_policy ODMUse,
5510 dml_float_t StateDispclk,
5511 dml_float_t MaxDispclk,
5512 dml_bool_t DSCEnable,
5513 dml_uint_t TotalNumberOfActiveDPP,
5514 dml_uint_t MaxNumDPP,
5515 dml_float_t PixelClock,
5516 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
5517 dml_float_t DISPCLKRampingMargin,
5518 dml_float_t DISPCLKDPPCLKVCOSpeed,
5519
5520 // Output
5521 dml_bool_t *TotalAvailablePipesSupport,
5522 dml_uint_t *NumberOfDPP,
5523 enum dml_odm_mode *ODMMode,
5524 dml_float_t *RequiredDISPCLKPerSurface)
5525{
5526
5527 dml_float_t SurfaceRequiredDISPCLKWithoutODMCombine;
5528 dml_float_t SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
5529 dml_float_t SurfaceRequiredDISPCLKWithODMCombineFourToOne;
5530
5531 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(ODMMode: dml_odm_mode_bypass, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclkSingle: MaxDispclk);
5532 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(ODMMode: dml_odm_mode_combine_2to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclkSingle: MaxDispclk);
5533 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(ODMMode: dml_odm_mode_combine_4to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclkSingle: MaxDispclk);
5534 *TotalAvailablePipesSupport = true;
5535
5536 if (OutputFormat == dml_420) {
5537 if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH)
5538 *TotalAvailablePipesSupport = false;
5539 else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH)
5540 ODMUse = dml_odm_use_policy_combine_4to1;
5541 else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH)
5542 ODMUse = dml_odm_use_policy_combine_2to1;
5543 if (Output == dml_hdmi && ODMUse == dml_odm_use_policy_combine_2to1)
5544 *TotalAvailablePipesSupport = false;
5545 if ((Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && ODMUse == dml_odm_use_policy_combine_4to1)
5546 *TotalAvailablePipesSupport = false;
5547 }
5548
5549 if (ODMUse == dml_odm_use_policy_bypass || ODMUse == dml_odm_use_policy_combine_as_needed)
5550 *ODMMode = dml_odm_mode_bypass;
5551 else if (ODMUse == dml_odm_use_policy_combine_2to1)
5552 *ODMMode = dml_odm_mode_combine_2to1;
5553 else if (ODMUse == dml_odm_use_policy_combine_4to1)
5554 *ODMMode = dml_odm_mode_combine_4to1;
5555 else if (ODMUse == dml_odm_use_policy_split_1to2)
5556 *ODMMode = dml_odm_mode_split_1to2;
5557 else if (ODMUse == dml_odm_use_policy_mso_1to2)
5558 *ODMMode = dml_odm_mode_mso_1to2;
5559 else if (ODMUse == dml_odm_use_policy_mso_1to4)
5560 *ODMMode = dml_odm_mode_mso_1to4;
5561
5562 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
5563 *NumberOfDPP = 0;
5564
5565 if (!(Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && (ODMUse == dml_odm_use_policy_combine_4to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
5566 (SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
5567 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
5568 *ODMMode = dml_odm_mode_combine_4to1;
5569 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
5570 *NumberOfDPP = 4;
5571 } else {
5572 *TotalAvailablePipesSupport = false;
5573 }
5574 } else if (Output != dml_hdmi && (ODMUse == dml_odm_use_policy_combine_2to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
5575 ((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
5576 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
5577 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
5578 *ODMMode = dml_odm_mode_combine_2to1;
5579 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
5580 *NumberOfDPP = 2;
5581 } else {
5582 *TotalAvailablePipesSupport = false;
5583 }
5584 } else {
5585 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) {
5586 *NumberOfDPP = 1;
5587 } else {
5588 *TotalAvailablePipesSupport = false;
5589 }
5590 }
5591}
5592
5593/// @brief Calculate the required DISPCLK given the odm mode and pixclk
5594static dml_float_t CalculateRequiredDispclk(
5595 enum dml_odm_mode ODMMode,
5596 dml_float_t PixelClock,
5597 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
5598 dml_float_t DISPCLKRampingMargin,
5599 dml_float_t DISPCLKDPPCLKVCOSpeed,
5600 dml_float_t MaxDispclk)
5601{
5602 dml_float_t RequiredDispclk = 0.;
5603 dml_float_t PixelClockAfterODM;
5604
5605 dml_float_t DISPCLKWithRampingRoundedToDFSGranularity;
5606 dml_float_t DISPCLKWithoutRampingRoundedToDFSGranularity;
5607 dml_float_t MaxDispclkRoundedDownToDFSGranularity;
5608
5609 if (ODMMode == dml_odm_mode_combine_4to1) {
5610 PixelClockAfterODM = PixelClock / 4;
5611 } else if (ODMMode == dml_odm_mode_combine_2to1) {
5612 PixelClockAfterODM = PixelClock / 2;
5613 } else {
5614 PixelClockAfterODM = PixelClock;
5615 }
5616
5617 DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularity(Clock: PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + DISPCLKRampingMargin / 100.0), round_up: 1, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
5618 DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularity(Clock: PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0), round_up: 1, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
5619 MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularity(Clock: MaxDispclk, round_up: 0, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
5620
5621 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
5622 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
5623 } else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
5624 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
5625 } else {
5626 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
5627 }
5628
5629 return RequiredDispclk;
5630}
5631
5632/// @brief Determine DPPCLK if there only one DPP per plane, main factor is the pixel rate and DPP scaling parameter
5633static void CalculateSinglePipeDPPCLKAndSCLThroughput(
5634 dml_float_t HRatio,
5635 dml_float_t HRatioChroma,
5636 dml_float_t VRatio,
5637 dml_float_t VRatioChroma,
5638 dml_float_t MaxDCHUBToPSCLThroughput,
5639 dml_float_t MaxPSCLToLBThroughput,
5640 dml_float_t PixelClock,
5641 enum dml_source_format_class SourcePixelFormat,
5642 dml_uint_t HTaps,
5643 dml_uint_t HTapsChroma,
5644 dml_uint_t VTaps,
5645 dml_uint_t VTapsChroma,
5646
5647 // Output
5648 dml_float_t *PSCL_THROUGHPUT,
5649 dml_float_t *PSCL_THROUGHPUT_CHROMA,
5650 dml_float_t *DPPCLKUsingSingleDPP)
5651{
5652 dml_float_t DPPCLKUsingSingleDPPLuma;
5653 dml_float_t DPPCLKUsingSingleDPPChroma;
5654
5655 if (HRatio > 1) {
5656 *PSCL_THROUGHPUT = dml_min(x: MaxDCHUBToPSCLThroughput, y: MaxPSCLToLBThroughput * HRatio / dml_ceil(x: (dml_float_t) HTaps / 6.0, granularity: 1.0));
5657 } else {
5658 *PSCL_THROUGHPUT = dml_min(x: MaxDCHUBToPSCLThroughput, y: MaxPSCLToLBThroughput);
5659 }
5660
5661 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(x: VTaps / 6 * dml_min(x: 1, y: HRatio), y: HRatio * VRatio / *PSCL_THROUGHPUT, z: 1);
5662
5663 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
5664 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
5665
5666 if ((SourcePixelFormat != dml_420_8 && SourcePixelFormat != dml_420_10 && SourcePixelFormat != dml_420_12 && SourcePixelFormat != dml_rgbe_alpha)) {
5667 *PSCL_THROUGHPUT_CHROMA = 0;
5668 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
5669 } else {
5670 if (HRatioChroma > 1) {
5671 *PSCL_THROUGHPUT_CHROMA = dml_min(x: MaxDCHUBToPSCLThroughput, y: MaxPSCLToLBThroughput * HRatioChroma / dml_ceil(x: (dml_float_t) HTapsChroma / 6.0, granularity: 1.0));
5672 } else {
5673 *PSCL_THROUGHPUT_CHROMA = dml_min(x: MaxDCHUBToPSCLThroughput, y: MaxPSCLToLBThroughput);
5674 }
5675 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(x: VTapsChroma / 6 * dml_min(x: 1, y: HRatioChroma),
5676 y: HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, z: 1);
5677 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
5678 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
5679 *DPPCLKUsingSingleDPP = dml_max(x: DPPCLKUsingSingleDPPLuma, y: DPPCLKUsingSingleDPPChroma);
5680 }
5681}
5682
5683/// @brief Calculate the actual dppclk freq
5684/// @param DPPCLKUsingSingleDPP DppClk freq required if there is only 1 DPP per plane
5685/// @param DPPPerSurface Number of DPP for each plane
5686static void CalculateDPPCLK(
5687 dml_uint_t NumberOfActiveSurfaces,
5688 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
5689 dml_float_t DISPCLKDPPCLKVCOSpeed,
5690 dml_float_t DPPCLKUsingSingleDPP[],
5691 dml_uint_t DPPPerSurface[],
5692
5693 // Output
5694 dml_float_t *GlobalDPPCLK,
5695 dml_float_t Dppclk[])
5696{
5697 *GlobalDPPCLK = 0;
5698 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5699 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
5700 *GlobalDPPCLK = dml_max(x: *GlobalDPPCLK, y: Dppclk[k]);
5701 }
5702 *GlobalDPPCLK = RoundToDFSGranularity(Clock: *GlobalDPPCLK, round_up: 1, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
5703
5704 dml_print("DML::%s: GlobalDPPCLK = %f\n", __func__, *GlobalDPPCLK);
5705 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5706 Dppclk[k] = *GlobalDPPCLK / 255.0 * dml_ceil(x: Dppclk[k] * 255.0 / *GlobalDPPCLK, granularity: 1.0);
5707 dml_print("DML::%s: Dppclk[%0d] = %f\n", __func__, k, Dppclk[k]);
5708 }
5709}
5710
5711static void CalculateMALLUseForStaticScreen(
5712 dml_uint_t NumberOfActiveSurfaces,
5713 dml_uint_t MALLAllocatedForDCNFinal,
5714 enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
5715 dml_uint_t SurfaceSizeInMALL[],
5716 dml_bool_t one_row_per_frame_fits_in_buffer[],
5717
5718 // Output
5719 dml_bool_t UsesMALLForStaticScreen[])
5720{
5721
5722 dml_uint_t SurfaceToAddToMALL;
5723 dml_bool_t CanAddAnotherSurfaceToMALL;
5724 dml_uint_t TotalSurfaceSizeInMALL;
5725
5726 TotalSurfaceSizeInMALL = 0;
5727 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5728 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable);
5729 if (UsesMALLForStaticScreen[k])
5730 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
5731#ifdef __DML_VBA_DEBUG__
5732 dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, UsesMALLForStaticScreen[k]);
5733 dml_print("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
5734#endif
5735 }
5736
5737 SurfaceToAddToMALL = 0;
5738 CanAddAnotherSurfaceToMALL = true;
5739 while (CanAddAnotherSurfaceToMALL) {
5740 CanAddAnotherSurfaceToMALL = false;
5741 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5742 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
5743 !UsesMALLForStaticScreen[k] && UseMALLForStaticScreen[k] != dml_use_mall_static_screen_disable && one_row_per_frame_fits_in_buffer[k] &&
5744 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
5745 CanAddAnotherSurfaceToMALL = true;
5746 SurfaceToAddToMALL = k;
5747 dml_print("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, UseMALLForStaticScreen[k]);
5748 }
5749 }
5750 if (CanAddAnotherSurfaceToMALL) {
5751 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
5752 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
5753
5754#ifdef __DML_VBA_DEBUG__
5755 dml_print("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
5756 dml_print("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
5757#endif
5758 }
5759 }
5760}
5761
5762// @brief Calculate return bw for VM only traffic
5763dml_float_t dml_get_return_bw_mbps_vm_only(
5764 const struct soc_bounding_box_st *soc,
5765 dml_bool_t use_ideal_dram_bw_strobe,
5766 dml_bool_t HostVMEnable,
5767 dml_float_t DCFCLK,
5768 dml_float_t FabricClock,
5769 dml_float_t DRAMSpeed)
5770{
5771 dml_float_t VMDataOnlyReturnBW =
5772 dml_min3(x: soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
5773 y: FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
5774 z: DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes *
5775 ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : soc->pct_ideal_dram_bw_after_urgent_vm_only) / 100.0);
5776#ifdef __DML_VBA_DEBUG__
5777 dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
5778 dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
5779 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5780 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
5781 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
5782 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
5783#endif
5784 return VMDataOnlyReturnBW;
5785}
5786
5787// Function: dml_get_return_bw_mbps
5788// Megabyte per second
5789dml_float_t dml_get_return_bw_mbps(
5790 const struct soc_bounding_box_st *soc,
5791 dml_bool_t use_ideal_dram_bw_strobe,
5792 dml_bool_t HostVMEnable,
5793 dml_float_t DCFCLK,
5794 dml_float_t FabricClock,
5795 dml_float_t DRAMSpeed)
5796{
5797 dml_float_t ReturnBW = 0.;
5798 dml_float_t IdealSDPPortBandwidth = soc->return_bus_width_bytes * DCFCLK;
5799 dml_float_t IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
5800 dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
5801 dml_float_t PixelDataOnlyReturnBW = dml_min3(x: IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
5802 y: IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
5803 z: IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5804 soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100);
5805 dml_float_t PixelMixedWithVMDataReturnBW = dml_min3(x: IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
5806 y: IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
5807 z: IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5808 soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100);
5809
5810 if (HostVMEnable != true) {
5811 ReturnBW = PixelDataOnlyReturnBW;
5812 } else {
5813 ReturnBW = PixelMixedWithVMDataReturnBW;
5814 }
5815
5816#ifdef __DML_VBA_DEBUG__
5817 dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
5818 dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
5819 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5820 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
5821 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
5822 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
5823 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
5824 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
5825 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
5826 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
5827 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
5828#endif
5829 return ReturnBW;
5830}
5831
5832// Function: dml_get_return_dram_bw_mbps
5833// Megabyte per second
5834static dml_float_t dml_get_return_dram_bw_mbps(
5835 const struct soc_bounding_box_st *soc,
5836 dml_bool_t use_ideal_dram_bw_strobe,
5837 dml_bool_t HostVMEnable,
5838 dml_float_t DRAMSpeed)
5839{
5840 dml_float_t ReturnDRAMBW = 0.;
5841 dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
5842 dml_float_t PixelDataOnlyReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5843 soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100;
5844 dml_float_t PixelMixedWithVMDataReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5845 soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100;
5846
5847 if (HostVMEnable != true) {
5848 ReturnDRAMBW = PixelDataOnlyReturnBW;
5849 } else {
5850 ReturnDRAMBW = PixelMixedWithVMDataReturnBW;
5851 }
5852
5853#ifdef __DML_VBA_DEBUG__
5854 dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
5855 dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
5856 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
5857 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
5858 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
5859 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
5860 dml_print("DML::%s: ReturnDRAMBW = %f MBps\n", __func__, ReturnDRAMBW);
5861#endif
5862 return ReturnDRAMBW;
5863}
5864
5865/// @brief BACKEND
5866static dml_uint_t DSCDelayRequirement(
5867 dml_bool_t DSCEnabled,
5868 enum dml_odm_mode ODMMode,
5869 dml_uint_t DSCInputBitPerComponent,
5870 dml_float_t OutputBpp,
5871 dml_uint_t HActive,
5872 dml_uint_t HTotal,
5873 dml_uint_t NumberOfDSCSlices,
5874 enum dml_output_format_class OutputFormat,
5875 enum dml_output_encoder_class Output,
5876 dml_float_t PixelClock,
5877 dml_float_t PixelClockBackEnd)
5878{
5879 dml_uint_t DSCDelayRequirement_val = 0;
5880
5881 if (DSCEnabled == true && OutputBpp != 0) {
5882 if (ODMMode == dml_odm_mode_combine_4to1) {
5883 DSCDelayRequirement_val = 4 * (dscceComputeDelay(bpc: DSCInputBitPerComponent, BPP: OutputBpp, sliceWidth: (dml_uint_t)(dml_ceil(x: (dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, granularity: 1.0)),
5884 numSlices: (dml_uint_t) (NumberOfDSCSlices / 4.0), pixelFormat: OutputFormat, Output) + dscComputeDelay(pixelFormat: OutputFormat, Output));
5885 } else if (ODMMode == dml_odm_mode_combine_2to1) {
5886 DSCDelayRequirement_val = 2 * (dscceComputeDelay(bpc: DSCInputBitPerComponent, BPP: OutputBpp, sliceWidth: (dml_uint_t)(dml_ceil(x: (dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, granularity: 1.0)),
5887 numSlices: (dml_uint_t) (NumberOfDSCSlices / 2.0), pixelFormat: OutputFormat, Output) + dscComputeDelay(pixelFormat: OutputFormat, Output));
5888 } else {
5889 DSCDelayRequirement_val = dscceComputeDelay(bpc: DSCInputBitPerComponent, BPP: OutputBpp, sliceWidth: (dml_uint_t)((dml_float_t) dml_ceil(x: HActive / (dml_float_t) NumberOfDSCSlices, granularity: 1.0)),
5890 numSlices: NumberOfDSCSlices, pixelFormat: OutputFormat, Output) + dscComputeDelay(pixelFormat: OutputFormat, Output);
5891 }
5892 DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val + (HTotal - HActive) * dml_ceil(x: (dml_float_t) DSCDelayRequirement_val / (dml_float_t) HActive, granularity: 1.0));
5893 DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
5894
5895 } else {
5896 DSCDelayRequirement_val = 0;
5897 }
5898#ifdef __DML_VBA_DEBUG__
5899 dml_print("DML::%s: DSCEnabled = %u\n", __func__, DSCEnabled);
5900 dml_print("DML::%s: ODMMode = %u\n", __func__, ODMMode);
5901 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
5902 dml_print("DML::%s: HActive = %u\n", __func__, HActive);
5903 dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
5904 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
5905 dml_print("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
5906 dml_print("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
5907 dml_print("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
5908 dml_print("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
5909 dml_print("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
5910#endif
5911
5912 return DSCDelayRequirement_val;
5913}
5914
5915static dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces,
5916 dml_float_t ReturnBW,
5917 dml_bool_t NotUrgentLatencyHiding[],
5918 dml_float_t ReadBandwidthLuma[],
5919 dml_float_t ReadBandwidthChroma[],
5920 dml_float_t cursor_bw[],
5921 dml_float_t meta_row_bandwidth[],
5922 dml_float_t dpte_row_bandwidth[],
5923 dml_uint_t NumberOfDPP[],
5924 dml_float_t UrgentBurstFactorLuma[],
5925 dml_float_t UrgentBurstFactorChroma[],
5926 dml_float_t UrgentBurstFactorCursor[])
5927{
5928 dml_bool_t NotEnoughUrgentLatencyHiding = false;
5929 dml_bool_t CalculateVActiveBandwithSupport_val = false;
5930 dml_float_t VActiveBandwith = 0;
5931
5932 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5933 if (NotUrgentLatencyHiding[k]) {
5934 NotEnoughUrgentLatencyHiding = true;
5935 }
5936 }
5937
5938 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5939 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
5940 }
5941
5942 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
5943
5944#ifdef __DML_VBA_DEBUG__
5945 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %u\n", __func__, NotEnoughUrgentLatencyHiding);
5946 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
5947 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5948 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %u\n", __func__, CalculateVActiveBandwithSupport_val);
5949#endif
5950 return CalculateVActiveBandwithSupport_val;
5951}
5952
5953static void CalculatePrefetchBandwithSupport(
5954 dml_uint_t NumberOfActiveSurfaces,
5955 dml_float_t ReturnBW,
5956 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5957 dml_bool_t NotUrgentLatencyHiding[],
5958 dml_float_t ReadBandwidthLuma[],
5959 dml_float_t ReadBandwidthChroma[],
5960 dml_float_t PrefetchBandwidthLuma[],
5961 dml_float_t PrefetchBandwidthChroma[],
5962 dml_float_t cursor_bw[],
5963 dml_float_t meta_row_bandwidth[],
5964 dml_float_t dpte_row_bandwidth[],
5965 dml_float_t cursor_bw_pre[],
5966 dml_float_t prefetch_vmrow_bw[],
5967 dml_uint_t NumberOfDPP[],
5968 dml_float_t UrgentBurstFactorLuma[],
5969 dml_float_t UrgentBurstFactorChroma[],
5970 dml_float_t UrgentBurstFactorCursor[],
5971 dml_float_t UrgentBurstFactorLumaPre[],
5972 dml_float_t UrgentBurstFactorChromaPre[],
5973 dml_float_t UrgentBurstFactorCursorPre[],
5974
5975 // Output
5976 dml_float_t *PrefetchBandwidth,
5977 dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
5978 dml_float_t *FractionOfUrgentBandwidth,
5979 dml_bool_t *PrefetchBandwidthSupport)
5980{
5981 dml_bool_t NotEnoughUrgentLatencyHiding = false;
5982 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5983 if (NotUrgentLatencyHiding[k]) {
5984 NotEnoughUrgentLatencyHiding = true;
5985 }
5986 }
5987
5988 *PrefetchBandwidth = 0;
5989 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5990 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(x: NumberOfDPP[k] * prefetch_vmrow_bw[k],
5991 y: ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
5992 z: NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
5993 }
5994
5995 *PrefetchBandwidthNotIncludingMALLPrefetch = 0;
5996 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5997 if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
5998 *PrefetchBandwidthNotIncludingMALLPrefetch = *PrefetchBandwidthNotIncludingMALLPrefetch
5999 + dml_max3(x: NumberOfDPP[k] * prefetch_vmrow_bw[k],
6000 y: ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k]
6001 + cursor_bw[k] * UrgentBurstFactorCursor[k]
6002 + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6003 z: NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k]
6004 + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
6005 + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6006 }
6007
6008 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6009 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6010
6011#ifdef __DML_VBA_DEBUG__
6012 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6013 dml_print("DML::%s: PrefetchBandwidth = %f\n", __func__, *PrefetchBandwidth);
6014 dml_print("DML::%s: FractionOfUrgentBandwidth = %f\n", __func__, *FractionOfUrgentBandwidth);
6015 dml_print("DML::%s: PrefetchBandwidthSupport = %u\n", __func__, *PrefetchBandwidthSupport);
6016#endif
6017}
6018
6019static dml_float_t CalculateBandwidthAvailableForImmediateFlip(
6020 dml_uint_t NumberOfActiveSurfaces,
6021 dml_float_t ReturnBW,
6022 dml_float_t ReadBandwidthLuma[],
6023 dml_float_t ReadBandwidthChroma[],
6024 dml_float_t PrefetchBandwidthLuma[],
6025 dml_float_t PrefetchBandwidthChroma[],
6026 dml_float_t cursor_bw[],
6027 dml_float_t cursor_bw_pre[],
6028 dml_uint_t NumberOfDPP[],
6029 dml_float_t UrgentBurstFactorLuma[],
6030 dml_float_t UrgentBurstFactorChroma[],
6031 dml_float_t UrgentBurstFactorCursor[],
6032 dml_float_t UrgentBurstFactorLumaPre[],
6033 dml_float_t UrgentBurstFactorChromaPre[],
6034 dml_float_t UrgentBurstFactorCursorPre[])
6035{
6036 dml_float_t ret_val = ReturnBW;
6037
6038 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6039 ret_val = ret_val - dml_max(x: ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6040 y: NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) +
6041 cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6042#ifdef __DML_VBA_DEBUG__
6043 dml_print("DML::%s: k=%u\n", __func__, k);
6044 dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
6045 dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
6046 dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
6047 dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
6048 dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
6049 dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
6050 dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
6051
6052 dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
6053 dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
6054 dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
6055 dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
6056 dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
6057 dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
6058 dml_print("DML::%s: ret_val = %f\n", __func__, ret_val);
6059#endif
6060 }
6061
6062 return ret_val;
6063}
6064
6065static void CalculateImmediateFlipBandwithSupport(
6066 dml_uint_t NumberOfActiveSurfaces,
6067 dml_float_t ReturnBW,
6068 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
6069 enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
6070 dml_float_t final_flip_bw[],
6071 dml_float_t ReadBandwidthLuma[],
6072 dml_float_t ReadBandwidthChroma[],
6073 dml_float_t PrefetchBandwidthLuma[],
6074 dml_float_t PrefetchBandwidthChroma[],
6075 dml_float_t cursor_bw[],
6076 dml_float_t meta_row_bandwidth[],
6077 dml_float_t dpte_row_bandwidth[],
6078 dml_float_t cursor_bw_pre[],
6079 dml_float_t prefetch_vmrow_bw[],
6080 dml_uint_t NumberOfDPP[],
6081 dml_float_t UrgentBurstFactorLuma[],
6082 dml_float_t UrgentBurstFactorChroma[],
6083 dml_float_t UrgentBurstFactorCursor[],
6084 dml_float_t UrgentBurstFactorLumaPre[],
6085 dml_float_t UrgentBurstFactorChromaPre[],
6086 dml_float_t UrgentBurstFactorCursorPre[],
6087
6088 // Output
6089 dml_float_t *TotalBandwidth,
6090 dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
6091 dml_float_t *FractionOfUrgentBandwidth,
6092 dml_bool_t *ImmediateFlipBandwidthSupport)
6093{
6094 *TotalBandwidth = 0;
6095 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6096 if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
6097
6098
6099
6100 *TotalBandwidth = *TotalBandwidth + dml_max3(x: NumberOfDPP[k] * prefetch_vmrow_bw[k],
6101 y: NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6102 z: NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6103 } else {
6104 *TotalBandwidth = *TotalBandwidth + dml_max3(x: NumberOfDPP[k] * prefetch_vmrow_bw[k],
6105 y: NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6106 z: NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6107 }
6108#ifdef __DML_VBA_DEBUG__
6109 dml_print("DML::%s: k = %u\n", __func__, k);
6110 dml_print("DML::%s: ImmediateFlipRequirement = %u\n", __func__, ImmediateFlipRequirement[k]);
6111 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
6112 dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
6113 dml_print("DML::%s: prefetch_vmrow_bw = %f\n", __func__, prefetch_vmrow_bw[k]);
6114 dml_print("DML::%s: final_flip_bw = %f\n", __func__, final_flip_bw[k]);
6115 dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
6116 dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
6117 dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
6118 dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
6119 dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
6120 dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
6121 dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
6122 dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
6123 dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
6124 dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
6125 dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
6126 dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
6127 dml_print("DML::%s: meta_row_bandwidth = %f\n", __func__, meta_row_bandwidth[k]);
6128 dml_print("DML::%s: dpte_row_bandwidth = %f\n", __func__, dpte_row_bandwidth[k]);
6129#endif
6130 }
6131
6132 *TotalBandwidthNotIncludingMALLPrefetch = 0;
6133 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6134 if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
6135 if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required)
6136 *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(x: NumberOfDPP[k] * prefetch_vmrow_bw[k],
6137 y: NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6138 z: NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
6139 + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6140 else
6141 *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(x: NumberOfDPP[k] * prefetch_vmrow_bw[k],
6142 y: NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k])
6143 + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6144 z: NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
6145 + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6146 }
6147 }
6148
6149 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6150 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6151#ifdef __DML_VBA_DEBUG__
6152 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6153 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
6154 dml_print("DML::%s: ImmediateFlipBandwidthSupport = %u\n", __func__, *ImmediateFlipBandwidthSupport);
6155#endif
6156}
6157
6158static dml_uint_t MicroSecToVertLines(dml_uint_t num_us, dml_uint_t h_total, dml_float_t pixel_clock)
6159{
6160 dml_uint_t lines_time_in_ns = 1000.0 * (h_total * 1000.0) / (pixel_clock * 1000.0);
6161
6162 return dml_ceil(x: 1000.0 * num_us / lines_time_in_ns, granularity: 1.0);
6163}
6164
6165/// @brief Calculate the maximum vstartup for mode support and mode programming consideration
6166/// Bounded by min of actual vblank and input vblank_nom, dont want vstartup/ready to start too early if actual vbllank is huge
6167static dml_uint_t CalculateMaxVStartup(
6168 dml_uint_t plane_idx,
6169 dml_bool_t ptoi_supported,
6170 dml_uint_t vblank_nom_default_us,
6171 struct dml_timing_cfg_st *timing,
6172 dml_float_t write_back_delay_us)
6173{
6174 dml_uint_t vblank_size = 0;
6175 dml_uint_t max_vstartup_lines = 0;
6176 const dml_uint_t max_allowed_vblank_nom = 1023;
6177
6178 dml_float_t line_time_us = (dml_float_t) timing->HTotal[plane_idx] / timing->PixelClock[plane_idx];
6179 dml_uint_t vblank_actual = timing->VTotal[plane_idx] - timing->VActive[plane_idx];
6180
6181 dml_uint_t vblank_nom_default_in_line = MicroSecToVertLines(num_us: vblank_nom_default_us, h_total: timing->HTotal[plane_idx],
6182 pixel_clock: timing->PixelClock[plane_idx]);
6183 dml_uint_t vblank_nom_input = (dml_uint_t)dml_min(x: vblank_actual, y: vblank_nom_default_in_line);
6184
6185 // vblank_nom should not be smaller than (VSync (VTotal - VActive - VFrontPorch) + 2)
6186 // + 2 is because
6187 // 1 -> VStartup_start should be 1 line before VSync
6188 // 1 -> always reserve 1 line between start of VBlank to VStartup signal
6189 dml_uint_t vblank_nom_vsync_capped = dml_max(x: vblank_nom_input,
6190 y: timing->VTotal[plane_idx] - timing->VActive[plane_idx] - timing->VFrontPorch[plane_idx] + 2);
6191 dml_uint_t vblank_nom_max_allowed_capped = dml_min(x: vblank_nom_vsync_capped, y: max_allowed_vblank_nom);
6192 dml_uint_t vblank_avail = (vblank_nom_max_allowed_capped == 0) ?
6193 vblank_nom_default_in_line : vblank_nom_max_allowed_capped;
6194
6195 vblank_size = (dml_uint_t) dml_min(x: vblank_actual, y: vblank_avail);
6196
6197 if (timing->Interlace[plane_idx] && !ptoi_supported)
6198 max_vstartup_lines = (dml_uint_t) (dml_floor(x: vblank_size/2.0, granularity: 1.0));
6199 else
6200 max_vstartup_lines = vblank_size - (dml_uint_t) dml_max(x: 1.0, y: dml_ceil(x: write_back_delay_us/line_time_us, granularity: 1.0));
6201#ifdef __DML_VBA_DEBUG__
6202 dml_print("DML::%s: plane_idx = %u\n", __func__, plane_idx);
6203 dml_print("DML::%s: VBlankNom = %u\n", __func__, timing->VBlankNom[plane_idx]);
6204 dml_print("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
6205 dml_print("DML::%s: line_time_us = %f\n", __func__, line_time_us);
6206 dml_print("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
6207 dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
6208 dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
6209#endif
6210 return max_vstartup_lines;
6211}
6212
6213static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib,
6214 struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params,
6215 dml_uint_t j,
6216 dml_uint_t k)
6217{
6218 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k];
6219 CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
6220 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
6221 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
6222 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
6223 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
6224 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
6225 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
6226 CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
6227 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
6228 CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
6229 CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
6230 CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
6231 CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
6232 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
6233 CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
6234 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
6235 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
6236 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
6237 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
6238 CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency;
6239 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
6240 CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k];
6241 CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k];
6242 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k];
6243 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k];
6244 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
6245 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k];
6246 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k];
6247 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
6248 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k];
6249 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k];
6250 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k];
6251 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
6252 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
6253 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait;
6254 CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k];
6255 CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k];
6256 CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k];
6257 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k];
6258 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k];
6259 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k];
6260 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k];
6261 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k];
6262 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
6263}
6264
6265static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
6266{
6267 struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
6268 struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
6269 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
6270 struct DmlPipe *myPipe;
6271 dml_uint_t j, k;
6272
6273 for (j = 0; j < 2; ++j) {
6274 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
6275
6276 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6277 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
6278 mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
6279 mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
6280 mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
6281 mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
6282 mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
6283 mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
6284 mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j];
6285 mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j];
6286 mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
6287 mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
6288 mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
6289 }
6290
6291 mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport(
6292 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
6293 ReturnBW: mode_lib->ms.ReturnBWPerState[j],
6294 NotUrgentLatencyHiding: mode_lib->ms.NotUrgentLatencyHiding,
6295 ReadBandwidthLuma: mode_lib->ms.ReadBandwidthLuma,
6296 ReadBandwidthChroma: mode_lib->ms.ReadBandwidthChroma,
6297 cursor_bw: mode_lib->ms.cursor_bw,
6298 meta_row_bandwidth: mode_lib->ms.meta_row_bandwidth_this_state,
6299 dpte_row_bandwidth: mode_lib->ms.dpte_row_bandwidth_this_state,
6300 NumberOfDPP: mode_lib->ms.NoOfDPPThisState,
6301 UrgentBurstFactorLuma: mode_lib->ms.UrgentBurstFactorLuma,
6302 UrgentBurstFactorChroma: mode_lib->ms.UrgentBurstFactorChroma,
6303 UrgentBurstFactorCursor: mode_lib->ms.UrgentBurstFactorCursor);
6304
6305 s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only(
6306 soc: &mode_lib->ms.soc,
6307 use_ideal_dram_bw_strobe: mode_lib->ms.state.use_ideal_dram_bw_strobe,
6308 HostVMEnable: mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
6309 DCFCLK: mode_lib->ms.DCFCLKState[j],
6310 FabricClock: mode_lib->ms.state.fabricclk_mhz,
6311 DRAMSpeed: mode_lib->ms.state.dram_speed_mts);
6312
6313 s->HostVMInefficiencyFactor = 1;
6314 if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
6315 s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState;
6316
6317 mode_lib->ms.ExtraLatency = CalculateExtraLatency(
6318 RoundTripPingLatencyCycles: mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
6319 ReorderingBytes: s->ReorderingBytes,
6320 DCFCLK: mode_lib->ms.DCFCLKState[j],
6321 TotalNumberOfActiveDPP: mode_lib->ms.TotalNumberOfActiveDPP[j],
6322 PixelChunkSizeInKByte: mode_lib->ms.ip.pixel_chunk_size_kbytes,
6323 TotalNumberOfDCCActiveDPP: mode_lib->ms.TotalNumberOfDCCActiveDPP[j],
6324 MetaChunkSize: mode_lib->ms.ip.meta_chunk_size_kbytes,
6325 ReturnBW: mode_lib->ms.ReturnBWPerState[j],
6326 GPUVMEnable: mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
6327 HostVMEnable: mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
6328 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
6329 NumberOfDPP: mode_lib->ms.NoOfDPPThisState,
6330 dpte_group_bytes: mode_lib->ms.dpte_group_bytes,
6331 HostVMInefficiencyFactor: s->HostVMInefficiencyFactor,
6332 HostVMMinPageSize: mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
6333 HostVMMaxNonCachedPageTableLevels: mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
6334
6335 s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
6336 s->MaxVStartup = 0;
6337 s->AllPrefetchModeTested = true;
6338 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6339 CalculatePrefetchMode(AllowForPStateChangeOrStutterInVBlank: mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], MinPrefetchMode: &s->MinPrefetchMode[k], MaxPrefetchMode: &s->MaxPrefetchMode[k]);
6340 s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
6341 }
6342
6343 do {
6344 s->MaxVStartup = s->NextMaxVStartup;
6345 s->AllPrefetchModeTested = true;
6346
6347 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6348 mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k];
6349 mode_lib->ms.TWait = CalculateTWait(
6350 PrefetchMode: mode_lib->ms.PrefetchMode[k],
6351 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
6352 SynchronizeDRRDisplaysForUCLKPStateChangeFinal: mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
6353 DRRDisplay: mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
6354 DRAMClockChangeLatency: mode_lib->ms.state.dram_clock_change_latency_us,
6355 FCLKChangeLatency: mode_lib->ms.state.fclk_change_latency_us,
6356 UrgentLatency: mode_lib->ms.UrgLatency,
6357 SREnterPlusExitTime: mode_lib->ms.state.sr_enter_plus_exit_time_us);
6358
6359 myPipe = &s->myPipe;
6360 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
6361 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j];
6362 myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
6363 myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
6364 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
6365 myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
6366 myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
6367 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
6368 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
6369 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
6370 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
6371 myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
6372 myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
6373 myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
6374 myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
6375 myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
6376 myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
6377 myPipe->ODMMode = mode_lib->ms.ODMModePerState[k];
6378 myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
6379 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
6380 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
6381 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
6382
6383#ifdef __DML_VBA_DEBUG__
6384 dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k);
6385 dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]);
6386 dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup);
6387 dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]);
6388 dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
6389 dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]);
6390#endif
6391
6392 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
6393 CalculatePrefetchSchedule_params->myPipe = myPipe;
6394 CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(x: s->MaxVStartup, y: s->MaximumVStartup[j][k]));
6395 CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k];
6396 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
6397 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
6398 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
6399 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
6400 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
6401 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
6402 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
6403 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
6404 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
6405
6406 set_calculate_prefetch_schedule_params(mode_lib, CalculatePrefetchSchedule_params, j, k);
6407
6408 mode_lib->ms.support.NoTimeForPrefetch[j][k] =
6409 CalculatePrefetchSchedule(scratch: &mode_lib->scratch,
6410 p: CalculatePrefetchSchedule_params);
6411 }
6412
6413 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6414 CalculateUrgentBurstFactor(
6415 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
6416 swath_width_luma_ub: mode_lib->ms.swath_width_luma_ub_this_state[k],
6417 swath_width_chroma_ub: mode_lib->ms.swath_width_chroma_ub_this_state[k],
6418 SwathHeightY: mode_lib->ms.SwathHeightYThisState[k],
6419 SwathHeightC: mode_lib->ms.SwathHeightCThisState[k],
6420 LineTime: mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
6421 UrgentLatency: mode_lib->ms.UrgLatency,
6422 CursorBufferSize: mode_lib->ms.ip.cursor_buffer_size,
6423 CursorWidth: mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
6424 CursorBPP: mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
6425 VRatio: mode_lib->ms.VRatioPreY[j][k],
6426 VRatioC: mode_lib->ms.VRatioPreC[j][k],
6427 BytePerPixelInDETY: mode_lib->ms.BytePerPixelInDETY[k],
6428 BytePerPixelInDETC: mode_lib->ms.BytePerPixelInDETC[k],
6429 DETBufferSizeY: mode_lib->ms.DETBufferSizeYThisState[k],
6430 DETBufferSizeC: mode_lib->ms.DETBufferSizeCThisState[k],
6431 /* Output */
6432 UrgentBurstFactorCursor: &mode_lib->ms.UrgentBurstFactorCursorPre[k],
6433 UrgentBurstFactorLuma: &mode_lib->ms.UrgentBurstFactorLumaPre[k],
6434 UrgentBurstFactorChroma: &mode_lib->ms.UrgentBurstFactorChroma[k],
6435 NotEnoughUrgentLatencyHiding: &mode_lib->ms.NotUrgentLatencyHidingPre[k]);
6436
6437 mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] *
6438 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
6439 mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k];
6440 }
6441
6442 {
6443 CalculatePrefetchBandwithSupport(
6444 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
6445 ReturnBW: mode_lib->ms.ReturnBWPerState[j],
6446 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
6447 NotUrgentLatencyHiding: mode_lib->ms.NotUrgentLatencyHidingPre,
6448 ReadBandwidthLuma: mode_lib->ms.ReadBandwidthLuma,
6449 ReadBandwidthChroma: mode_lib->ms.ReadBandwidthChroma,
6450 PrefetchBandwidthLuma: mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
6451 PrefetchBandwidthChroma: mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
6452 cursor_bw: mode_lib->ms.cursor_bw,
6453 meta_row_bandwidth: mode_lib->ms.meta_row_bandwidth_this_state,
6454 dpte_row_bandwidth: mode_lib->ms.dpte_row_bandwidth_this_state,
6455 cursor_bw_pre: mode_lib->ms.cursor_bw_pre,
6456 prefetch_vmrow_bw: mode_lib->ms.prefetch_vmrow_bw,
6457 NumberOfDPP: mode_lib->ms.NoOfDPPThisState,
6458 UrgentBurstFactorLuma: mode_lib->ms.UrgentBurstFactorLuma,
6459 UrgentBurstFactorChroma: mode_lib->ms.UrgentBurstFactorChroma,
6460 UrgentBurstFactorCursor: mode_lib->ms.UrgentBurstFactorCursor,
6461 UrgentBurstFactorLumaPre: mode_lib->ms.UrgentBurstFactorLumaPre,
6462 UrgentBurstFactorChromaPre: mode_lib->ms.UrgentBurstFactorChromaPre,
6463 UrgentBurstFactorCursorPre: mode_lib->ms.UrgentBurstFactorCursorPre,
6464
6465 /* output */
6466 PrefetchBandwidth: &s->dummy_single[0], // dml_float_t *PrefetchBandwidth
6467 PrefetchBandwidthNotIncludingMALLPrefetch: &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
6468 FractionOfUrgentBandwidth: &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth
6469 PrefetchBandwidthSupport: &mode_lib->ms.support.PrefetchSupported[j]);
6470 }
6471
6472 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6473 if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0
6474 || mode_lib->ms.LinesForMetaPTE[k] >= 32.0
6475 || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0
6476 || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) {
6477 mode_lib->ms.support.PrefetchSupported[j] = false;
6478 }
6479 }
6480
6481 mode_lib->ms.support.DynamicMetadataSupported[j] = true;
6482 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6483 if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) {
6484 mode_lib->ms.support.DynamicMetadataSupported[j] = false;
6485 }
6486 }
6487
6488 mode_lib->ms.support.VRatioInPrefetchSupported[j] = true;
6489 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6490 if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true ||
6491 mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
6492 mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
6493 ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
6494 (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) {
6495 mode_lib->ms.support.VRatioInPrefetchSupported[j] = false;
6496 }
6497 }
6498
6499 s->AnyLinesForVMOrRowTooLarge = false;
6500 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6501 if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) {
6502 s->AnyLinesForVMOrRowTooLarge = true;
6503 }
6504 }
6505
6506 if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) {
6507 mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
6508 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
6509 ReturnBW: mode_lib->ms.ReturnBWPerState[j],
6510 ReadBandwidthLuma: mode_lib->ms.ReadBandwidthLuma,
6511 ReadBandwidthChroma: mode_lib->ms.ReadBandwidthChroma,
6512 PrefetchBandwidthLuma: mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
6513 PrefetchBandwidthChroma: mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
6514 cursor_bw: mode_lib->ms.cursor_bw,
6515 cursor_bw_pre: mode_lib->ms.cursor_bw_pre,
6516 NumberOfDPP: mode_lib->ms.NoOfDPPThisState,
6517 UrgentBurstFactorLuma: mode_lib->ms.UrgentBurstFactorLuma,
6518 UrgentBurstFactorChroma: mode_lib->ms.UrgentBurstFactorChroma,
6519 UrgentBurstFactorCursor: mode_lib->ms.UrgentBurstFactorCursor,
6520 UrgentBurstFactorLumaPre: mode_lib->ms.UrgentBurstFactorLumaPre,
6521 UrgentBurstFactorChromaPre: mode_lib->ms.UrgentBurstFactorChromaPre,
6522 UrgentBurstFactorCursorPre: mode_lib->ms.UrgentBurstFactorCursorPre);
6523
6524 mode_lib->ms.TotImmediateFlipBytes = 0;
6525 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6526 if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) {
6527 mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k];
6528 if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) {
6529 mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]);
6530 } else {
6531 mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k];
6532 }
6533 }
6534 }
6535
6536 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6537 CalculateFlipSchedule(
6538 HostVMInefficiencyFactor: s->HostVMInefficiencyFactor,
6539 UrgentExtraLatency: mode_lib->ms.ExtraLatency,
6540 UrgentLatency: mode_lib->ms.UrgLatency,
6541 GPUVMMaxPageTableLevels: mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
6542 HostVMEnable: mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
6543 HostVMMaxNonCachedPageTableLevels: mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
6544 GPUVMEnable: mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
6545 HostVMMinPageSize: mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
6546 PDEAndMetaPTEBytesPerFrame: mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
6547 MetaRowBytes: mode_lib->ms.MetaRowBytes[j][k],
6548 DPTEBytesPerRow: mode_lib->ms.DPTEBytesPerRow[j][k],
6549 BandwidthAvailableForImmediateFlip: mode_lib->ms.BandwidthAvailableForImmediateFlip,
6550 TotImmediateFlipBytes: mode_lib->ms.TotImmediateFlipBytes,
6551 SourcePixelFormat: mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
6552 LineTime: (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]),
6553 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio[k],
6554 VRatioChroma: mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
6555 Tno_bw: mode_lib->ms.Tno_bw[k],
6556 DCCEnable: mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
6557 dpte_row_height: mode_lib->ms.dpte_row_height[k],
6558 meta_row_height: mode_lib->ms.meta_row_height[k],
6559 dpte_row_height_chroma: mode_lib->ms.dpte_row_height_chroma[k],
6560 meta_row_height_chroma: mode_lib->ms.meta_row_height_chroma[k],
6561 use_one_row_for_frame_flip: mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24
6562
6563 /* Output */
6564 DestinationLinesToRequestVMInImmediateFlip: &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k],
6565 DestinationLinesToRequestRowInImmediateFlip: &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k],
6566 final_flip_bw: &mode_lib->ms.final_flip_bw[k],
6567 ImmediateFlipSupportedForPipe: &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
6568 }
6569
6570 {
6571 CalculateImmediateFlipBandwithSupport(NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
6572 ReturnBW: mode_lib->ms.ReturnBWPerState[j],
6573 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
6574 ImmediateFlipRequirement: mode_lib->ms.policy.ImmediateFlipRequirement,
6575 final_flip_bw: mode_lib->ms.final_flip_bw,
6576 ReadBandwidthLuma: mode_lib->ms.ReadBandwidthLuma,
6577 ReadBandwidthChroma: mode_lib->ms.ReadBandwidthChroma,
6578 PrefetchBandwidthLuma: mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
6579 PrefetchBandwidthChroma: mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
6580 cursor_bw: mode_lib->ms.cursor_bw,
6581 meta_row_bandwidth: mode_lib->ms.meta_row_bandwidth_this_state,
6582 dpte_row_bandwidth: mode_lib->ms.dpte_row_bandwidth_this_state,
6583 cursor_bw_pre: mode_lib->ms.cursor_bw_pre,
6584 prefetch_vmrow_bw: mode_lib->ms.prefetch_vmrow_bw,
6585 NumberOfDPP: mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here
6586 UrgentBurstFactorLuma: mode_lib->ms.UrgentBurstFactorLuma,
6587 UrgentBurstFactorChroma: mode_lib->ms.UrgentBurstFactorChroma,
6588 UrgentBurstFactorCursor: mode_lib->ms.UrgentBurstFactorCursor,
6589 UrgentBurstFactorLumaPre: mode_lib->ms.UrgentBurstFactorLumaPre,
6590 UrgentBurstFactorChromaPre: mode_lib->ms.UrgentBurstFactorChromaPre,
6591 UrgentBurstFactorCursorPre: mode_lib->ms.UrgentBurstFactorCursorPre,
6592
6593 /* output */
6594 TotalBandwidth: &s->dummy_single[0], // dml_float_t *TotalBandwidth
6595 TotalBandwidthNotIncludingMALLPrefetch: &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch
6596 FractionOfUrgentBandwidth: &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth
6597 ImmediateFlipBandwidthSupport: &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport
6598 }
6599
6600 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6601 if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false))
6602 mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
6603 }
6604
6605 } else { // if prefetch not support, assume iflip not supported
6606 mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
6607 }
6608
6609 if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) {
6610 s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
6611 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6612 s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
6613
6614 if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
6615 s->AllPrefetchModeTested = false;
6616 }
6617 } else {
6618 s->NextMaxVStartup = s->NextMaxVStartup - 1;
6619 }
6620 } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true &&
6621 mode_lib->ms.support.VRatioInPrefetchSupported[j] == true &&
6622 // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok
6623 // If there is hostvm, DCN needs to support iflip for invalidation
6624 ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) ||
6625 (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested)));
6626
6627 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6628 mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k];
6629 }
6630
6631 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
6632 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
6633 s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
6634 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
6635 s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
6636 s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
6637 s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
6638 s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
6639 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
6640 s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
6641 s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
6642
6643 CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
6644 CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
6645 CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode;
6646 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
6647 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
6648 CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
6649 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
6650 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j];
6651 CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j];
6652 CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
6653 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
6654 CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
6655 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
6656 CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height;
6657 CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
6658 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
6659 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
6660 CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz;
6661 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
6662 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
6663 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
6664 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
6665 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
6666 CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
6667 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
6668 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
6669 CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
6670 CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
6671 CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
6672 CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
6673 CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
6674 CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
6675 CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
6676 CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
6677 CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
6678 CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
6679 CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
6680 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
6681 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
6682 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
6683 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
6684 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
6685 CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
6686 CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
6687 CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
6688 CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
6689 CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
6690 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState;
6691 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState;
6692
6693 // Output
6694 CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
6695 CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j];
6696 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
6697 CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
6698 CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j];
6699 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
6700 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j];
6701 CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin;
6702
6703 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(scratch: &mode_lib->scratch,
6704 p: CalculateWatermarks_params);
6705
6706 } // for j
6707}
6708
6709/// @brief The Mode Support function.
6710dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
6711{
6712 struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
6713 struct UseMinimumDCFCLK_params_st *UseMinimumDCFCLK_params = &mode_lib->scratch.UseMinimumDCFCLK_params;
6714 struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
6715 struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
6716
6717 dml_uint_t j, k, m;
6718
6719 mode_lib->ms.num_active_planes = dml_get_num_active_planes(display_cfg: &mode_lib->ms.cache_display_cfg);
6720 dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
6721
6722 CalculateMaxDETAndMinCompressedBufferSize(
6723 ConfigReturnBufferSizeInKByte: mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
6724 ConfigReturnBufferSegmentSizeInKByte: mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
6725 ROBBufferSizeInKByte: mode_lib->ms.ip.rob_buffer_size_kbytes,
6726 MaxNumDPP: mode_lib->ms.ip.max_num_dpp,
6727 nomDETInKByteOverrideEnable: mode_lib->ms.policy.NomDETInKByteOverrideEnable, // VBA_DELTA
6728 nomDETInKByteOverrideValue: mode_lib->ms.policy.NomDETInKByteOverrideValue, // VBA_DELTA
6729
6730 /* Output */
6731 MaxTotalDETInKByte: &mode_lib->ms.MaxTotalDETInKByte,
6732 nomDETInKByte: &mode_lib->ms.NomDETInKByte,
6733 MinCompressedBufferSizeInKByte: &mode_lib->ms.MinCompressedBufferSizeInKByte);
6734
6735 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg: &mode_lib->ms.cache_display_cfg, ptoi_supported: mode_lib->ms.ip.ptoi_supported);
6736
6737
6738 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
6739
6740 /*Scale Ratio, taps Support Check*/
6741 mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
6742 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6743 if (mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k] == false
6744 && ((mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
6745 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
6746 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
6747 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
6748 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
6749 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
6750 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha)
6751 || mode_lib->ms.cache_display_cfg.plane.HRatio[k] != 1.0
6752 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] != 1.0
6753 || mode_lib->ms.cache_display_cfg.plane.VRatio[k] != 1.0
6754 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] != 1.0)) {
6755 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
6756 } else if (mode_lib->ms.cache_display_cfg.plane.VTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] > 8.0
6757 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 8.0
6758 || (mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 1.0 && (mode_lib->ms.cache_display_cfg.plane.HTaps[k] % 2) == 1)
6759 || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.ip.max_hscl_ratio
6760 || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.ip.max_vscl_ratio
6761 || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.cache_display_cfg.plane.HTaps[k]
6762 || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.cache_display_cfg.plane.VTaps[k]
6763 || (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
6764 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
6765 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
6766 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
6767 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
6768 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
6769 && (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] > 8 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 8 ||
6770 (mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 1 && mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] % 2 == 1) ||
6771 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.ip.max_hscl_ratio ||
6772 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.ip.max_vscl_ratio ||
6773 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] ||
6774 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]))) {
6775 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
6776 }
6777 }
6778
6779 /*Source Format, Pixel Format and Scan Support Check*/
6780 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
6781 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6782 if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear && (!(!dml_is_vertical_rotation(scan: mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) || mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true)) {
6783 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
6784 }
6785 }
6786
6787 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6788 CalculateBytePerPixelAndBlockSizes(
6789 SourcePixelFormat: mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
6790 SurfaceTiling: mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
6791
6792 /* Output */
6793 BytePerPixelY: &mode_lib->ms.BytePerPixelY[k],
6794 BytePerPixelC: &mode_lib->ms.BytePerPixelC[k],
6795 BytePerPixelDETY: &mode_lib->ms.BytePerPixelInDETY[k],
6796 BytePerPixelDETC: &mode_lib->ms.BytePerPixelInDETC[k],
6797 BlockHeight256BytesY: &mode_lib->ms.Read256BlockHeightY[k],
6798 BlockHeight256BytesC: &mode_lib->ms.Read256BlockHeightC[k],
6799 BlockWidth256BytesY: &mode_lib->ms.Read256BlockWidthY[k],
6800 BlockWidth256BytesC: &mode_lib->ms.Read256BlockWidthC[k],
6801 MacroTileHeightY: &mode_lib->ms.MacroTileHeightY[k],
6802 MacroTileHeightC: &mode_lib->ms.MacroTileHeightC[k],
6803 MacroTileWidthY: &mode_lib->ms.MacroTileWidthY[k],
6804 MacroTileWidthC: &mode_lib->ms.MacroTileWidthC[k]);
6805 }
6806
6807 /*Bandwidth Support Check*/
6808 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6809 if (!dml_is_vertical_rotation(scan: mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) {
6810 mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k];
6811 mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k];
6812 } else {
6813 mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
6814 mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
6815 }
6816 }
6817 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6818 mode_lib->ms.ReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * dml_ceil(x: mode_lib->ms.BytePerPixelInDETY[k], granularity: 1.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
6819 mode_lib->ms.ReadBandwidthChroma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * dml_ceil(x: mode_lib->ms.BytePerPixelInDETC[k], granularity: 2.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k] / 2.0;
6820 }
6821 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6822 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true
6823 && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_64) {
6824 mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
6825 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
6826 / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
6827 * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
6828 / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8.0;
6829 } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
6830 mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
6831 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
6832 / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
6833 * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
6834 / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4.0;
6835 } else {
6836 mode_lib->ms.WriteBandwidth[k] = 0.0;
6837 }
6838 }
6839
6840 /*Writeback Latency support check*/
6841 mode_lib->ms.support.WritebackLatencySupport = true;
6842 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6843 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true &&
6844 (mode_lib->ms.WriteBandwidth[k] > mode_lib->ms.ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->ms.state.writeback_latency_us)) {
6845 mode_lib->ms.support.WritebackLatencySupport = false;
6846 }
6847 }
6848
6849 /*Writeback Mode Support Check*/
6850 s->TotalNumberOfActiveWriteback = 0;
6851 for (k = 0; k <= (dml_uint_t) mode_lib->ms.num_active_planes - 1; k++) {
6852 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
6853 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
6854 }
6855 }
6856
6857 mode_lib->ms.support.EnoughWritebackUnits = 1;
6858 if (s->TotalNumberOfActiveWriteback > (dml_uint_t) mode_lib->ms.ip.max_num_wb) {
6859 mode_lib->ms.support.EnoughWritebackUnits = false;
6860 }
6861
6862 /*Writeback Scale Ratio and Taps Support Check*/
6863 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
6864 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6865 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
6866 if (mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > mode_lib->ms.ip.writeback_max_hscl_ratio
6867 || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > mode_lib->ms.ip.writeback_max_vscl_ratio
6868 || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] < mode_lib->ms.ip.writeback_min_hscl_ratio
6869 || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] < mode_lib->ms.ip.writeback_min_vscl_ratio
6870 || mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_hscl_taps
6871 || mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_vscl_taps
6872 || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k]
6873 || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k]
6874 || (mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > 2.0 && ((mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] % 2) == 1))) {
6875 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
6876 }
6877 if (2.0 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * (mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] - 1) * 57 > mode_lib->ms.ip.writeback_line_buffer_buffer_size) {
6878 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
6879 }
6880 }
6881 }
6882
6883 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6884 CalculateSinglePipeDPPCLKAndSCLThroughput(
6885 HRatio: mode_lib->ms.cache_display_cfg.plane.HRatio[k],
6886 HRatioChroma: mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
6887 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio[k],
6888 VRatioChroma: mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
6889 MaxDCHUBToPSCLThroughput: mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
6890 MaxPSCLToLBThroughput: mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
6891 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
6892 SourcePixelFormat: mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
6893 HTaps: mode_lib->ms.cache_display_cfg.plane.HTaps[k],
6894 HTapsChroma: mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
6895 VTaps: mode_lib->ms.cache_display_cfg.plane.VTaps[k],
6896 VTapsChroma: mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
6897 /* Output */
6898 PSCL_THROUGHPUT: &mode_lib->ms.PSCL_FACTOR[k],
6899 PSCL_THROUGHPUT_CHROMA: &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
6900 DPPCLKUsingSingleDPP: &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
6901 }
6902
6903 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6904 if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
6905 s->MaximumSwathWidthSupportLuma = 8192;
6906 } else if (!dml_is_vertical_rotation(scan: mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
6907 s->MaximumSwathWidthSupportLuma = 7680;
6908 } else if (dml_is_vertical_rotation(scan: mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
6909 s->MaximumSwathWidthSupportLuma = 4320;
6910 } else if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha) {
6911 s->MaximumSwathWidthSupportLuma = 3840;
6912 } else if (dml_is_vertical_rotation(scan: mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelY[k] == 8 && mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
6913 s->MaximumSwathWidthSupportLuma = 3072;
6914 } else {
6915 s->MaximumSwathWidthSupportLuma = 6144;
6916 }
6917
6918 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12) {
6919 s->MaximumSwathWidthSupportChroma = (dml_uint_t)(s->MaximumSwathWidthSupportLuma / 2.0);
6920 } else {
6921 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
6922 }
6923 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ms.ip.line_buffer_size_bits * dml_max(x: mode_lib->ms.cache_display_cfg.plane.HRatio[k], y: 1.0) / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] /
6924 (mode_lib->ms.cache_display_cfg.plane.VTaps[k] + dml_max(x: dml_ceil(x: mode_lib->ms.cache_display_cfg.plane.VRatio[k], granularity: 1.0) - 2, y: 0.0));
6925 if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
6926 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
6927 } else {
6928 mode_lib->ms.MaximumSwathWidthInLineBufferChroma =
6929 mode_lib->ms.ip.line_buffer_size_bits
6930 * dml_max(x: mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], y: 1.0)
6931 / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k]
6932 / (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]
6933 + dml_max(x: dml_ceil(x: mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], granularity: 1.0) - 2, y: 0.0));
6934 }
6935 mode_lib->ms.MaximumSwathWidthLuma[k] = dml_min(x: s->MaximumSwathWidthSupportLuma, y: mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
6936 mode_lib->ms.MaximumSwathWidthChroma[k] = dml_min(x: s->MaximumSwathWidthSupportChroma, y: mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
6937 }
6938
6939 /*Number Of DSC Slices*/
6940 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6941 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
6942 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 4800) {
6943 mode_lib->ms.support.NumberOfDSCSlices[k] = (dml_uint_t)(dml_ceil(x: mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 600, granularity: 4));
6944 } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 2400) {
6945 mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
6946 } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 1200) {
6947 mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
6948 } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 340) {
6949 mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
6950 } else {
6951 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
6952 }
6953 } else {
6954 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
6955 }
6956 }
6957
6958 CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
6959 CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
6960 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
6961 CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
6962 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
6963 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
6964 CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
6965 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
6966 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
6967 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
6968 CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
6969 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
6970 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
6971 CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
6972 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
6973 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
6974 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
6975 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
6976 CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
6977 CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
6978 CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
6979 CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
6980 CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
6981 CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
6982 CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
6983 CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
6984 CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
6985 CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
6986 CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
6987 CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
6988 CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
6989 CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
6990 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
6991 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
6992 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
6993 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
6994 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
6995 CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
6996 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
6997 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
6998 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
6999 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
7000 CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
7001 CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
7002 CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
7003 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[0];
7004 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[1];
7005 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[2];
7006 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[3];
7007 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[4];
7008 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[5];
7009 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[6];
7010 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[7];
7011 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
7012 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
7013 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
7014 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
7015 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
7016 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
7017 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
7018 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
7019
7020 CalculateSwathAndDETConfiguration(scratch: &mode_lib->scratch,
7021 p: CalculateSwathAndDETConfiguration_params); /* dml_bool_t *ViewportSizeSupport */
7022
7023 s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = false;
7024 s->MPCCombineMethodAsPossible = false;
7025 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7026 if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_needed_for_pstate_and_voltage)
7027 s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = true;
7028 if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_possible)
7029 s->MPCCombineMethodAsPossible = true;
7030 }
7031 mode_lib->ms.support.MPCCombineMethodIncompatible = s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && s->MPCCombineMethodAsPossible;
7032
7033 for (j = 0; j < 2; j++) {
7034 mode_lib->ms.TotalNumberOfActiveDPP[j] = 0;
7035 mode_lib->ms.support.TotalAvailablePipesSupport[j] = true;
7036
7037 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7038 CalculateODMMode(
7039 MaximumPixelsPerLinePerDSCUnit: mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
7040 HActive: mode_lib->ms.cache_display_cfg.timing.HActive[k],
7041 Output: mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7042 OutputFormat: mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7043 ODMUse: mode_lib->ms.policy.ODMUse[k],
7044 StateDispclk: mode_lib->ms.state.dispclk_mhz,
7045 MaxDispclk: mode_lib->ms.max_state.dispclk_mhz,
7046 DSCEnable: false, // DSCEnable
7047 TotalNumberOfActiveDPP: mode_lib->ms.TotalNumberOfActiveDPP[j],
7048 MaxNumDPP: mode_lib->ms.ip.max_num_dpp,
7049 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7050 DISPCLKDPPCLKDSCCLKDownSpreading: mode_lib->ms.soc.dcn_downspread_percent,
7051 DISPCLKRampingMargin: mode_lib->ms.ip.dispclk_ramp_margin_percent,
7052 DISPCLKDPPCLKVCOSpeed: mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
7053
7054 /* Output */
7055 TotalAvailablePipesSupport: &s->TotalAvailablePipesSupportNoDSC,
7056 NumberOfDPP: &s->NumberOfDPPNoDSC,
7057 ODMMode: &s->ODMModeNoDSC,
7058 RequiredDISPCLKPerSurface: &s->RequiredDISPCLKPerSurfaceNoDSC);
7059
7060 CalculateODMMode(
7061 MaximumPixelsPerLinePerDSCUnit: mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
7062 HActive: mode_lib->ms.cache_display_cfg.timing.HActive[k],
7063 Output: mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7064 OutputFormat: mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7065 ODMUse: mode_lib->ms.policy.ODMUse[k],
7066 StateDispclk: mode_lib->ms.state.dispclk_mhz,
7067 MaxDispclk: mode_lib->ms.max_state.dispclk_mhz,
7068 DSCEnable: true, // DSCEnable
7069 TotalNumberOfActiveDPP: mode_lib->ms.TotalNumberOfActiveDPP[j],
7070 MaxNumDPP: mode_lib->ms.ip.max_num_dpp,
7071 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7072 DISPCLKDPPCLKDSCCLKDownSpreading: mode_lib->ms.soc.dcn_downspread_percent,
7073 DISPCLKRampingMargin: mode_lib->ms.ip.dispclk_ramp_margin_percent,
7074 DISPCLKDPPCLKVCOSpeed: mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
7075
7076 /* Output */
7077 TotalAvailablePipesSupport: &s->TotalAvailablePipesSupportDSC,
7078 NumberOfDPP: &s->NumberOfDPPDSC,
7079 ODMMode: &s->ODMModeDSC,
7080 RequiredDISPCLKPerSurface: &s->RequiredDISPCLKPerSurfaceDSC);
7081
7082 CalculateOutputLink(
7083 PHYCLKPerState: mode_lib->ms.state.phyclk_mhz,
7084 PHYCLKD18PerState: mode_lib->ms.state.phyclk_d18_mhz,
7085 PHYCLKD32PerState: mode_lib->ms.state.phyclk_d32_mhz,
7086 Downspreading: mode_lib->ms.soc.phy_downspread_percent,
7087 IsMainSurfaceUsingTheIndicatedTiming: (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k),
7088 Output: mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7089 OutputFormat: mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7090 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7091 HActive: mode_lib->ms.cache_display_cfg.timing.HActive[k],
7092 PixelClockBackEnd: mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
7093 ForcedOutputLinkBPP: mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k],
7094 DSCInputBitPerComponent: mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
7095 NumberOfDSCSlices: mode_lib->ms.support.NumberOfDSCSlices[k],
7096 AudioSampleRate: mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
7097 AudioSampleLayout: mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k],
7098 ODMModeNoDSC: s->ODMModeNoDSC,
7099 ODMModeDSC: s->ODMModeDSC,
7100 DSCEnable: mode_lib->ms.cache_display_cfg.output.DSCEnable[k],
7101 OutputLinkDPLanes: mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k],
7102 OutputLinkDPRate: mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k],
7103
7104 /* Output */
7105 RequiresDSC: &mode_lib->ms.RequiresDSC[k],
7106 RequiresFEC: &mode_lib->ms.RequiresFEC[k],
7107 OutBpp: &mode_lib->ms.OutputBppPerState[k],
7108 OutputType: &mode_lib->ms.OutputTypePerState[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng
7109 OutputRate: &mode_lib->ms.OutputRatePerState[k],
7110 RequiredSlots: &mode_lib->ms.RequiredSlots[k]);
7111
7112 if (mode_lib->ms.RequiresDSC[k] == false) {
7113 mode_lib->ms.ODMModePerState[k] = s->ODMModeNoDSC;
7114 mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceNoDSC;
7115 if (!s->TotalAvailablePipesSupportNoDSC)
7116 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7117 mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPNoDSC;
7118 } else {
7119 mode_lib->ms.ODMModePerState[k] = s->ODMModeDSC;
7120 mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceDSC;
7121 if (!s->TotalAvailablePipesSupportDSC)
7122 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7123 mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPDSC;
7124 }
7125 }
7126
7127 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7128 if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
7129 mode_lib->ms.MPCCombine[j][k] = false;
7130 mode_lib->ms.NoOfDPP[j][k] = 4;
7131 } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
7132 mode_lib->ms.MPCCombine[j][k] = false;
7133 mode_lib->ms.NoOfDPP[j][k] = 2;
7134 } else if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_disabled) {
7135 mode_lib->ms.MPCCombine[j][k] = false;
7136 mode_lib->ms.NoOfDPP[j][k] = 1;
7137 } else if (RoundToDFSGranularity(Clock: mode_lib->ms.MinDPPCLKUsingSingleDPP[k] * (1 + mode_lib->ms.soc.dcn_downspread_percent / 100),
7138 round_up: 1, VCOSpeed: mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz) <= mode_lib->ms.state.dppclk_mhz &&
7139 mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k] == true) {
7140 mode_lib->ms.MPCCombine[j][k] = false;
7141 mode_lib->ms.NoOfDPP[j][k] = 1;
7142 } else if (mode_lib->ms.TotalNumberOfActiveDPP[j] < (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
7143 mode_lib->ms.MPCCombine[j][k] = true;
7144 mode_lib->ms.NoOfDPP[j][k] = 2;
7145 mode_lib->ms.TotalNumberOfActiveDPP[j] = (dml_uint_t) mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
7146 } else {
7147 mode_lib->ms.MPCCombine[j][k] = false;
7148 mode_lib->ms.NoOfDPP[j][k] = 1;
7149 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7150 }
7151 }
7152
7153 mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = 0;
7154 s->NoChromaOrLinear = true;
7155 for (k = 0; k < (dml_uint_t) mode_lib->ms.num_active_planes; ++k) {
7156 if (mode_lib->ms.NoOfDPP[j][k] == 1)
7157 mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] + 1;
7158 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8
7159 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10
7160 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12
7161 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha
7162 || mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
7163 s->NoChromaOrLinear = false;
7164 }
7165 }
7166
7167 if (j == 1 && !UnboundedRequest(UseUnboundedRequestingFinal: mode_lib->ms.policy.UseUnboundedRequesting,
7168 TotalNumberOfActiveDPP: mode_lib->ms.TotalNumberOfActiveDPP[j], NoChromaOrLinear: s->NoChromaOrLinear,
7169 Output: mode_lib->ms.cache_display_cfg.output.OutputEncoder[0])) {
7170 while (!(mode_lib->ms.TotalNumberOfActiveDPP[j] >= (dml_uint_t) mode_lib->ms.ip.max_num_dpp || mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] == 0)) {
7171 s->BWOfNonCombinedSurfaceOfMaximumBandwidth = 0;
7172 s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0;
7173 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7174 if (mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_disabled && mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_as_needed_for_voltage &&
7175 mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k] > s->BWOfNonCombinedSurfaceOfMaximumBandwidth &&
7176 (mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_2to1 && mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_4to1) &&
7177 mode_lib->ms.MPCCombine[j][k] == false) {
7178 s->BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
7179 s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = k;
7180 }
7181 }
7182 mode_lib->ms.MPCCombine[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = true;
7183 mode_lib->ms.NoOfDPP[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2;
7184 mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
7185 mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] - 1;
7186 }
7187 }
7188
7189 //DISPCLK/DPPCLK
7190 mode_lib->ms.WritebackRequiredDISPCLK = 0;
7191 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7192 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
7193 mode_lib->ms.WritebackRequiredDISPCLK = dml_max(x: mode_lib->ms.WritebackRequiredDISPCLK,
7194 y: CalculateWriteBackDISPCLK(WritebackPixelFormat: mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
7195 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7196 WritebackHRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
7197 WritebackVRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
7198 WritebackHTaps: mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
7199 WritebackVTaps: mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
7200 WritebackSourceWidth: mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
7201 WritebackDestinationWidth: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
7202 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7203 WritebackLineBufferSize: mode_lib->ms.ip.writeback_line_buffer_buffer_size,
7204 DISPCLKDPPCLKVCOSpeed: mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
7205 }
7206 }
7207
7208 mode_lib->ms.RequiredDISPCLK[j] = mode_lib->ms.WritebackRequiredDISPCLK;
7209 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7210 mode_lib->ms.RequiredDISPCLK[j] = dml_max(x: mode_lib->ms.RequiredDISPCLK[j], y: mode_lib->ms.RequiredDISPCLKPerSurface[j][k]);
7211 }
7212
7213 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7214 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
7215 }
7216
7217 CalculateDPPCLK(NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
7218 DISPCLKDPPCLKDSCCLKDownSpreading: mode_lib->ms.soc.dcn_downspread_percent,
7219 DISPCLKDPPCLKVCOSpeed: mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
7220 DPPCLKUsingSingleDPP: mode_lib->ms.MinDPPCLKUsingSingleDPP,
7221 DPPPerSurface: mode_lib->ms.NoOfDPPThisState,
7222 /* Output */
7223 GlobalDPPCLK: &mode_lib->ms.GlobalDPPCLK,
7224 Dppclk: mode_lib->ms.RequiredDPPCLKThisState);
7225
7226 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7227 mode_lib->ms.RequiredDPPCLKPerSurface[j][k] = mode_lib->ms.RequiredDPPCLKThisState[k];
7228 }
7229
7230 mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] = !((mode_lib->ms.RequiredDISPCLK[j] > mode_lib->ms.state.dispclk_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.state.dppclk_mhz));
7231
7232 if (mode_lib->ms.TotalNumberOfActiveDPP[j] > (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
7233 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7234 }
7235 } // j
7236
7237 /* Total Available OTG, HDMIFRL, DP Support Check */
7238 s->TotalNumberOfActiveOTG = 0;
7239 s->TotalNumberOfActiveHDMIFRL = 0;
7240 s->TotalNumberOfActiveDP2p0 = 0;
7241 s->TotalNumberOfActiveDP2p0Outputs = 0;
7242
7243 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7244 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
7245 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
7246 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
7247 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
7248 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0) {
7249 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
7250 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k || mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == false) {
7251 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
7252 }
7253 }
7254 }
7255 }
7256
7257 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (dml_uint_t) mode_lib->ms.ip.max_num_otg);
7258 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (dml_uint_t) mode_lib->ms.ip.max_num_hdmi_frl_outputs);
7259 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_outputs);
7260
7261 /* Display IO and DSC Support Check */
7262 mode_lib->ms.support.NonsupportedDSCInputBPC = false;
7263 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7264 if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
7265 !(mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 12.0
7266 || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 10.0
7267 || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 8.0
7268 || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] > (dml_uint_t) mode_lib->ms.ip.maximum_dsc_bits_per_component
7269 )) {
7270 mode_lib->ms.support.NonsupportedDSCInputBPC = true;
7271 }
7272 }
7273
7274 mode_lib->ms.support.ExceededMultistreamSlots = false;
7275 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7276 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) {
7277 s->TotalSlots = mode_lib->ms.RequiredSlots[k];
7278 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
7279 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[j] == k)
7280 s->TotalSlots = s->TotalSlots + mode_lib->ms.RequiredSlots[j];
7281 }
7282 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp && s->TotalSlots > 63)
7283 mode_lib->ms.support.ExceededMultistreamSlots = true;
7284 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 && s->TotalSlots > 64)
7285 mode_lib->ms.support.ExceededMultistreamSlots = true;
7286 }
7287 }
7288 mode_lib->ms.support.LinkCapacitySupport = true;
7289 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7290 if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
7291 mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
7292 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) && mode_lib->ms.OutputBppPerState[k] == 0) {
7293 mode_lib->ms.support.LinkCapacitySupport = false;
7294 }
7295 }
7296
7297 mode_lib->ms.support.P2IWith420 = false;
7298 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
7299 mode_lib->ms.support.DSC422NativeNotSupported = false;
7300 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
7301 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
7302 mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
7303 mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
7304 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
7305 mode_lib->ms.support.NotEnoughLanesForMSO = false;
7306
7307 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7308 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
7309 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
7310 if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420 && mode_lib->ms.cache_display_cfg.timing.Interlace[k] == 1 && mode_lib->ms.ip.ptoi_supported == true)
7311 mode_lib->ms.support.P2IWith420 = true;
7312
7313 if (mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] != 0)
7314 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true;
7315 if ((mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable || mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary) && mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 && !mode_lib->ms.ip.dsc422_native_support)
7316 mode_lib->ms.support.DSC422NativeNotSupported = true;
7317
7318 if (((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr2 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr3) &&
7319 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_edp) ||
7320 ((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr10 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr13p5 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr20) &&
7321 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp2p0))
7322 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
7323
7324 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1) {
7325 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_na)
7326 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
7327 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
7328 mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
7329 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
7330 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
7331 mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
7332 }
7333 }
7334
7335 if ((mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
7336 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k)
7337 mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
7338 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
7339 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j)
7340 mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
7341 }
7342 }
7343 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_split_1to2 ||
7344 mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 || mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4))
7345 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
7346
7347 if ((mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 2) ||
7348 (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 4))
7349 mode_lib->ms.support.NotEnoughLanesForMSO = true;
7350 }
7351 }
7352
7353 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
7354 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7355 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k &&
7356 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl &&
7357 RequiredDTBCLK(
7358 DSCEnable: mode_lib->ms.RequiresDSC[k],
7359 PixelClock: mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
7360 OutputFormat: mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7361 OutputBpp: mode_lib->ms.OutputBppPerState[k],
7362 DSCSlices: mode_lib->ms.support.NumberOfDSCSlices[k],
7363 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7364 HActive: mode_lib->ms.cache_display_cfg.timing.HActive[k],
7365 AudioRate: mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
7366 AudioLayout: mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k]) > mode_lib->ms.state.dtbclk_mhz) {
7367 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
7368 }
7369 }
7370
7371 mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = true;
7372 mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = true;
7373 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7374 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1 && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi) {
7375 mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = false;
7376 }
7377 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1 && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
7378 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi)) {
7379 mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = false;
7380 }
7381 }
7382
7383 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
7384 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7385 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
7386 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
7387 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 ||
7388 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
7389 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
7390 if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) {
7391 s->DSCFormatFactor = 2;
7392 } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) {
7393 s->DSCFormatFactor = 1;
7394 } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
7395 s->DSCFormatFactor = 2;
7396 } else {
7397 s->DSCFormatFactor = 1;
7398 }
7399#ifdef __DML_VBA_DEBUG__
7400 dml_print("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
7401#endif
7402 if (mode_lib->ms.RequiresDSC[k] == true) {
7403 if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
7404 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
7405#ifdef __DML_VBA_DEBUG__
7406 dml_print("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
7407 dml_print("DML::%s: k=%u, DSCCLKPerState = %f\n", __func__, k, mode_lib->ms.state.dscclk_mhz);
7408 dml_print("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
7409#endif
7410 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7411 }
7412 } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
7413 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
7414 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7415 }
7416 } else {
7417 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
7418 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7419 }
7420 }
7421 }
7422 }
7423 }
7424 }
7425#ifdef __DML_VBA_DEBUG__
7426 dml_print("DML::%s: DSCCLKRequiredMoreThanSupported = %u\n", __func__, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
7427#endif
7428
7429 /* Check DSC Unit and Slices Support */
7430 mode_lib->ms.support.NotEnoughDSCUnits = false;
7431 mode_lib->ms.support.NotEnoughDSCSlices = false;
7432 s->TotalDSCUnitsRequired = 0;
7433 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
7434 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7435 if (mode_lib->ms.RequiresDSC[k] == true) {
7436 if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
7437 if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 4 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
7438 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7439 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 4;
7440 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 16)
7441 mode_lib->ms.support.NotEnoughDSCSlices = true;
7442 } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
7443 if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 2 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
7444 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7445 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 2;
7446 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 8)
7447 mode_lib->ms.support.NotEnoughDSCSlices = true;
7448 } else {
7449 if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
7450 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7451 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 1;
7452 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4)
7453 mode_lib->ms.support.NotEnoughDSCSlices = true;
7454 }
7455 }
7456 }
7457 if (s->TotalDSCUnitsRequired > (dml_uint_t) mode_lib->ms.ip.num_dsc) {
7458 mode_lib->ms.support.NotEnoughDSCUnits = true;
7459 }
7460
7461 /*DSC Delay per state*/
7462 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7463 mode_lib->ms.DSCDelayPerState[k] = DSCDelayRequirement(DSCEnabled: mode_lib->ms.RequiresDSC[k],
7464 ODMMode: mode_lib->ms.ODMModePerState[k],
7465 DSCInputBitPerComponent: mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
7466 OutputBpp: mode_lib->ms.OutputBppPerState[k],
7467 HActive: mode_lib->ms.cache_display_cfg.timing.HActive[k],
7468 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7469 NumberOfDSCSlices: mode_lib->ms.support.NumberOfDSCSlices[k],
7470 OutputFormat: mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7471 Output: mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7472 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7473 PixelClockBackEnd: mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
7474 }
7475
7476 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7477 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
7478 for (j = 0; j <= mode_lib->ms.num_active_planes - 1; j++) {
7479 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m && mode_lib->ms.RequiresDSC[m] == true) {
7480 mode_lib->ms.DSCDelayPerState[k] = mode_lib->ms.DSCDelayPerState[m];
7481 }
7482 }
7483 }
7484 }
7485
7486 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
7487 //
7488 for (j = 0; j < 2; ++j) {
7489 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7490 mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
7491 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
7492 mode_lib->ms.ODMModeThisState[k] = mode_lib->ms.ODMModePerState[k];
7493 }
7494
7495 CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
7496 CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
7497 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
7498 CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
7499 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
7500 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
7501 CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
7502 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
7503 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7504 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
7505 CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
7506 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
7507 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
7508 CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
7509 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
7510 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
7511 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
7512 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
7513 CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
7514 CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
7515 CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
7516 CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
7517 CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
7518 CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
7519 CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
7520 CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
7521 CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
7522 CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
7523 CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
7524 CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
7525 CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
7526 CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
7527 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
7528 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
7529 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
7530 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
7531 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMModeThisState;
7532 CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
7533 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
7534 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
7535 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
7536 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
7537 CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
7538 CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
7539 CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
7540 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
7541 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state;
7542 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state;
7543 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthYThisState;
7544 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthCThisState;
7545 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
7546 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
7547 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByteThisState;
7548 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
7549 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
7550 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabledThisState;
7551 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
7552 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
7553 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByteThisState;
7554 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
7555 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport[j];
7556
7557 CalculateSwathAndDETConfiguration(scratch: &mode_lib->scratch,
7558 p: CalculateSwathAndDETConfiguration_params);
7559
7560 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7561 mode_lib->ms.swath_width_luma_ub_all_states[j][k] = mode_lib->ms.swath_width_luma_ub_this_state[k];
7562 mode_lib->ms.swath_width_chroma_ub_all_states[j][k] = mode_lib->ms.swath_width_chroma_ub_this_state[k];
7563 mode_lib->ms.SwathWidthYAllStates[j][k] = mode_lib->ms.SwathWidthYThisState[k];
7564 mode_lib->ms.SwathWidthCAllStates[j][k] = mode_lib->ms.SwathWidthCThisState[k];
7565 mode_lib->ms.SwathHeightYAllStates[j][k] = mode_lib->ms.SwathHeightYThisState[k];
7566 mode_lib->ms.SwathHeightCAllStates[j][k] = mode_lib->ms.SwathHeightCThisState[k];
7567 mode_lib->ms.UnboundedRequestEnabledAllStates[j] = mode_lib->ms.UnboundedRequestEnabledThisState;
7568 mode_lib->ms.CompressedBufferSizeInkByteAllStates[j] = mode_lib->ms.CompressedBufferSizeInkByteThisState;
7569 mode_lib->ms.DETBufferSizeInKByteAllStates[j][k] = mode_lib->ms.DETBufferSizeInKByteThisState[k];
7570 mode_lib->ms.DETBufferSizeYAllStates[j][k] = mode_lib->ms.DETBufferSizeYThisState[k];
7571 mode_lib->ms.DETBufferSizeCAllStates[j][k] = mode_lib->ms.DETBufferSizeCThisState[k];
7572 }
7573 }
7574
7575 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7576 mode_lib->ms.cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
7577 }
7578
7579 CalculateSurfaceSizeInMall(
7580 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
7581 MALLAllocatedForDCN: mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
7582 UseMALLForStaticScreen: mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
7583 DCCEnable: mode_lib->ms.cache_display_cfg.surface.DCCEnable,
7584 ViewportStationary: mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
7585 ViewportXStartY: mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
7586 ViewportYStartY: mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
7587 ViewportXStartC: mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
7588 ViewportYStartC: mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
7589 ViewportWidthY: mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
7590 ViewportHeightY: mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
7591 BytesPerPixelY: mode_lib->ms.BytePerPixelY,
7592 ViewportWidthC: mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
7593 ViewportHeightC: mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
7594 BytesPerPixelC: mode_lib->ms.BytePerPixelC,
7595 SurfaceWidthY: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
7596 SurfaceWidthC: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
7597 SurfaceHeightY: mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
7598 SurfaceHeightC: mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
7599 Read256BytesBlockWidthY: mode_lib->ms.Read256BlockWidthY,
7600 Read256BytesBlockWidthC: mode_lib->ms.Read256BlockWidthC,
7601 Read256BytesBlockHeightY: mode_lib->ms.Read256BlockHeightY,
7602 Read256BytesBlockHeightC: mode_lib->ms.Read256BlockHeightC,
7603 ReadBlockWidthY: mode_lib->ms.MacroTileWidthY,
7604 ReadBlockWidthC: mode_lib->ms.MacroTileWidthC,
7605 ReadBlockHeightY: mode_lib->ms.MacroTileHeightY,
7606 ReadBlockHeightC: mode_lib->ms.MacroTileHeightC,
7607
7608 /* Output */
7609 SurfaceSizeInMALL: mode_lib->ms.SurfaceSizeInMALL,
7610 ExceededMALLSize: &mode_lib->ms.support.ExceededMALLSize);
7611
7612 for (j = 0; j < 2; j++) {
7613 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7614 mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
7615 mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
7616 mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
7617 mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
7618 mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
7619 mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
7620 mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
7621 mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
7622 mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
7623 mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
7624 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
7625 }
7626
7627 mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = 0;
7628 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7629 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
7630 mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = mode_lib->ms.TotalNumberOfDCCActiveDPP[j] + mode_lib->ms.NoOfDPP[j][k];
7631 }
7632 }
7633
7634 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7635 s->SurfParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
7636 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
7637 s->SurfParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
7638 s->SurfParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
7639 s->SurfParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
7640 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
7641 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
7642 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
7643 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
7644 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
7645 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
7646 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
7647 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
7648 s->SurfParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
7649 s->SurfParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
7650 s->SurfParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
7651 s->SurfParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
7652 s->SurfParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
7653 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
7654 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
7655 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
7656 s->SurfParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
7657 s->SurfParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
7658 s->SurfParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
7659 s->SurfParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
7660 s->SurfParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
7661 s->SurfParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
7662 s->SurfParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
7663 s->SurfParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
7664 s->SurfParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
7665 s->SurfParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
7666 s->SurfParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
7667 s->SurfParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
7668 s->SurfParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
7669 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
7670 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
7671 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
7672 }
7673
7674 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7675 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
7676 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
7677 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
7678 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
7679 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
7680 CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
7681 CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
7682 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
7683 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
7684 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
7685 CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
7686 CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
7687 CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
7688 CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
7689 CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
7690 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
7691 CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
7692 CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
7693 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
7694 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceededPerState;
7695 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[0];
7696 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[1];
7697 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
7698 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
7699 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[2]; // VBA_DELTA
7700 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[3]; // VBA_DELTA
7701 CalculateVMRowAndSwath_params->meta_req_width = s->dummy_integer_array[4];
7702 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[5];
7703 CalculateVMRowAndSwath_params->meta_req_height = s->dummy_integer_array[6];
7704 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[7];
7705 CalculateVMRowAndSwath_params->meta_row_width = s->dummy_integer_array[8];
7706 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[9];
7707 CalculateVMRowAndSwath_params->meta_row_height = mode_lib->ms.meta_row_height;
7708 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
7709 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[10];
7710 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
7711 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[11];
7712 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[12];
7713 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[13];
7714 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[14];
7715 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[15];
7716 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[16];
7717 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[17];
7718 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[18];
7719 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[19];
7720 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[20];
7721 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesYThisState;
7722 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesCThisState;
7723 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
7724 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
7725 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY;
7726 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC;
7727 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bandwidth_this_state;
7728 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bandwidth_this_state;
7729 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRowThisState;
7730 CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState;
7731 CalculateVMRowAndSwath_params->MetaRowByte = mode_lib->ms.MetaRowBytesThisState;
7732 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame_this_state;
7733 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip_this_state;
7734 CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = s->dummy_boolean_array[0];
7735 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
7736 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[21];
7737
7738 CalculateVMRowAndSwath(scratch: &mode_lib->scratch,
7739 p: CalculateVMRowAndSwath_params);
7740
7741 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7742 mode_lib->ms.PrefetchLinesY[j][k] = mode_lib->ms.PrefetchLinesYThisState[k];
7743 mode_lib->ms.PrefetchLinesC[j][k] = mode_lib->ms.PrefetchLinesCThisState[k];
7744 mode_lib->ms.meta_row_bandwidth[j][k] = mode_lib->ms.meta_row_bandwidth_this_state[k];
7745 mode_lib->ms.dpte_row_bandwidth[j][k] = mode_lib->ms.dpte_row_bandwidth_this_state[k];
7746 mode_lib->ms.DPTEBytesPerRow[j][k] = mode_lib->ms.DPTEBytesPerRowThisState[k];
7747 mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState[k];
7748 mode_lib->ms.MetaRowBytes[j][k] = mode_lib->ms.MetaRowBytesThisState[k];
7749 mode_lib->ms.use_one_row_for_frame[j][k] = mode_lib->ms.use_one_row_for_frame_this_state[k];
7750 mode_lib->ms.use_one_row_for_frame_flip[j][k] = mode_lib->ms.use_one_row_for_frame_flip_this_state[k];
7751 }
7752
7753 mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = true;
7754
7755 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7756 if (mode_lib->ms.PTEBufferSizeNotExceededPerState[k] == false)
7757 mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = false;
7758#ifdef __DML_VBA_DEBUG__
7759 dml_print("DML::%s: j=%u k=%u, PTEBufferSizeNotExceededPerState[%u] = %u\n", __func__, j, k, k, mode_lib->ms.PTEBufferSizeNotExceededPerState[k]);
7760#endif
7761 }
7762#ifdef __DML_VBA_DEBUG__
7763 dml_print("DML::%s: PTEBufferSizeNotExceeded[%u] = %u\n", __func__, j, mode_lib->ms.support.PTEBufferSizeNotExceeded[j]);
7764#endif
7765
7766 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = true;
7767 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7768 if (mode_lib->ms.DCCMetaBufferSizeNotExceededPerState[k] == false)
7769 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = false;
7770 }
7771
7772 mode_lib->ms.UrgLatency = CalculateUrgentLatency(UrgentLatencyPixelDataOnly: mode_lib->ms.state.urgent_latency_pixel_data_only_us,
7773 UrgentLatencyPixelMixedWithVMData: mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
7774 UrgentLatencyVMDataOnly: mode_lib->ms.state.urgent_latency_vm_data_only_us,
7775 DoUrgentLatencyAdjustment: mode_lib->ms.soc.do_urgent_latency_adjustment,
7776 UrgentLatencyAdjustmentFabricClockComponent: mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
7777 UrgentLatencyAdjustmentFabricClockReference: mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
7778 FabricClock: mode_lib->ms.state.fabricclk_mhz);
7779
7780 /* Getter functions work at mp interface so copy the urgent latency to mp*/
7781 mode_lib->mp.UrgentLatency = mode_lib->ms.UrgLatency;
7782
7783 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7784 CalculateUrgentBurstFactor(
7785 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
7786 swath_width_luma_ub: mode_lib->ms.swath_width_luma_ub_this_state[k],
7787 swath_width_chroma_ub: mode_lib->ms.swath_width_chroma_ub_this_state[k],
7788 SwathHeightY: mode_lib->ms.SwathHeightYThisState[k],
7789 SwathHeightC: mode_lib->ms.SwathHeightCThisState[k],
7790 LineTime: (dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7791 UrgentLatency: mode_lib->ms.UrgLatency,
7792 CursorBufferSize: mode_lib->ms.ip.cursor_buffer_size,
7793 CursorWidth: mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
7794 CursorBPP: mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
7795 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio[k],
7796 VRatioC: mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
7797 BytePerPixelInDETY: mode_lib->ms.BytePerPixelInDETY[k],
7798 BytePerPixelInDETC: mode_lib->ms.BytePerPixelInDETC[k],
7799 DETBufferSizeY: mode_lib->ms.DETBufferSizeYThisState[k],
7800 DETBufferSizeC: mode_lib->ms.DETBufferSizeCThisState[k],
7801 /* Output */
7802 UrgentBurstFactorCursor: &mode_lib->ms.UrgentBurstFactorCursor[k],
7803 UrgentBurstFactorLuma: &mode_lib->ms.UrgentBurstFactorLuma[k],
7804 UrgentBurstFactorChroma: &mode_lib->ms.UrgentBurstFactorChroma[k],
7805 NotEnoughUrgentLatencyHiding: &mode_lib->ms.NotUrgentLatencyHiding[k]);
7806 }
7807
7808 CalculateDCFCLKDeepSleep(
7809 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
7810 BytePerPixelY: mode_lib->ms.BytePerPixelY,
7811 BytePerPixelC: mode_lib->ms.BytePerPixelC,
7812 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio,
7813 VRatioChroma: mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
7814 SwathWidthY: mode_lib->ms.SwathWidthYThisState,
7815 SwathWidthC: mode_lib->ms.SwathWidthCThisState,
7816 DPPPerSurface: mode_lib->ms.NoOfDPPThisState,
7817 HRatio: mode_lib->ms.cache_display_cfg.plane.HRatio,
7818 HRatioChroma: mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
7819 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock,
7820 PSCL_THROUGHPUT: mode_lib->ms.PSCL_FACTOR,
7821 PSCL_THROUGHPUT_CHROMA: mode_lib->ms.PSCL_FACTOR_CHROMA,
7822 Dppclk: mode_lib->ms.RequiredDPPCLKThisState,
7823 ReadBandwidthLuma: mode_lib->ms.ReadBandwidthLuma,
7824 ReadBandwidthChroma: mode_lib->ms.ReadBandwidthChroma,
7825 ReturnBusWidth: mode_lib->ms.soc.return_bus_width_bytes,
7826
7827 /* Output */
7828 DCFClkDeepSleep: &mode_lib->ms.ProjectedDCFCLKDeepSleep[j]);
7829 }
7830
7831 //Calculate Return BW
7832 for (j = 0; j < 2; ++j) {
7833 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7834 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
7835 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
7836 mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
7837 WritebackPixelFormat: mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
7838 WritebackHRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
7839 WritebackVRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
7840 WritebackVTaps: mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
7841 WritebackDestinationWidth: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
7842 WritebackDestinationHeight: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
7843 WritebackSourceHeight: mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
7844 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / mode_lib->ms.RequiredDISPCLK[j];
7845 } else {
7846 mode_lib->ms.WritebackDelayTime[k] = 0.0;
7847 }
7848 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
7849 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[m] == k && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[m] == true) {
7850 mode_lib->ms.WritebackDelayTime[k] = dml_max(x: mode_lib->ms.WritebackDelayTime[k],
7851 y: mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
7852 WritebackPixelFormat: mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[m],
7853 WritebackHRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[m],
7854 WritebackVRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[m],
7855 WritebackVTaps: mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[m],
7856 WritebackDestinationWidth: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[m],
7857 WritebackDestinationHeight: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[m],
7858 WritebackSourceHeight: mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[m],
7859 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[m]) / mode_lib->ms.RequiredDISPCLK[j]);
7860 }
7861 }
7862 }
7863 }
7864 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7865 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
7866 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m) {
7867 mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m];
7868 }
7869 }
7870 }
7871 s->MaxVStartupAllPlanes[j] = 0; // max vstartup among all planes
7872
7873 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7874 s->MaximumVStartup[j][k] = CalculateMaxVStartup(plane_idx: k,
7875 ptoi_supported: mode_lib->ms.ip.ptoi_supported,
7876 vblank_nom_default_us: mode_lib->ms.ip.vblank_nom_default_us,
7877 timing: &mode_lib->ms.cache_display_cfg.timing,
7878 write_back_delay_us: mode_lib->ms.WritebackDelayTime[k]);
7879
7880 s->MaxVStartupAllPlanes[j] = (dml_uint_t)(dml_max(x: s->MaxVStartupAllPlanes[j], y: s->MaximumVStartup[j][k]));
7881#ifdef __DML_VBA_DEBUG__
7882 dml_print("DML::%s: k=%u, MaxVStartupAllPlanes[%u] = %u\n", __func__, k, j, s->MaxVStartupAllPlanes[j]);
7883 dml_print("DML::%s: k=%u, MaximumVStartup[%u][%u] = %u\n", __func__, k, j, k, s->MaximumVStartup[j][k]);
7884#endif
7885 }
7886 }
7887
7888 s->ReorderingBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(x: mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
7889 y: mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
7890 z: mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
7891
7892 for (j = 0; j < 2; ++j) {
7893 mode_lib->ms.DCFCLKState[j] = mode_lib->ms.state.dcfclk_mhz;
7894 }
7895
7896 /* Immediate Flip and MALL parameters */
7897 s->ImmediateFlipRequiredFinal = false;
7898 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7899 s->ImmediateFlipRequiredFinal = s->ImmediateFlipRequiredFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
7900 }
7901
7902 mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
7903 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7904 mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified ||
7905 ((mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_required) &&
7906 (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required));
7907 }
7908 mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified && s->ImmediateFlipRequiredFinal;
7909
7910 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
7911 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7912 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
7913 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == true || mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) &&
7914 (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame || mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe));
7915 }
7916
7917 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
7918 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7919 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
7920 ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)) ||
7921 ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_disable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame));
7922 }
7923
7924 s->FullFrameMALLPStateMethod = false;
7925 s->SubViewportMALLPStateMethod = false;
7926 s->PhantomPipeMALLPStateMethod = false;
7927 s->SubViewportMALLRefreshGreaterThan120Hz = false;
7928 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7929 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
7930 s->FullFrameMALLPStateMethod = true;
7931 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) {
7932 s->SubViewportMALLPStateMethod = true;
7933 if (mode_lib->ms.cache_display_cfg.timing.RefreshRate[k] > 120)
7934 s->SubViewportMALLRefreshGreaterThan120Hz = true;
7935 }
7936 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)
7937 s->PhantomPipeMALLPStateMethod = true;
7938 }
7939 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod)
7940 || (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
7941
7942 if (mode_lib->ms.policy.UseMinimumRequiredDCFCLK == true) {
7943 UseMinimumDCFCLK_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
7944 UseMinimumDCFCLK_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
7945 UseMinimumDCFCLK_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
7946 UseMinimumDCFCLK_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
7947 UseMinimumDCFCLK_params->MaxPrefetchMode = dml_prefetch_support_stutter;
7948 UseMinimumDCFCLK_params->DRAMClockChangeLatencyFinal = mode_lib->ms.state.dram_clock_change_latency_us;
7949 UseMinimumDCFCLK_params->FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
7950 UseMinimumDCFCLK_params->SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
7951 UseMinimumDCFCLK_params->ReturnBusWidth = mode_lib->ms.soc.return_bus_width_bytes;
7952 UseMinimumDCFCLK_params->RoundTripPingLatencyCycles = mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles;
7953 UseMinimumDCFCLK_params->ReorderingBytes = s->ReorderingBytes;
7954 UseMinimumDCFCLK_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
7955 UseMinimumDCFCLK_params->MetaChunkSize = mode_lib->ms.ip.meta_chunk_size_kbytes;
7956 UseMinimumDCFCLK_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
7957 UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
7958 UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
7959 UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7960 UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
7961 UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
7962 UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
7963 UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
7964 UseMinimumDCFCLK_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
7965 UseMinimumDCFCLK_params->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation = mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent;
7966 UseMinimumDCFCLK_params->PercentOfIdealSDPPortBWReceivedAfterUrgLatency = mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent;
7967 UseMinimumDCFCLK_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
7968 UseMinimumDCFCLK_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
7969 UseMinimumDCFCLK_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes;
7970 UseMinimumDCFCLK_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired;
7971 UseMinimumDCFCLK_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
7972 UseMinimumDCFCLK_params->RequiredDPPCLKPerSurface = mode_lib->ms.RequiredDPPCLKPerSurface;
7973 UseMinimumDCFCLK_params->RequiredDISPCLK = mode_lib->ms.RequiredDISPCLK;
7974 UseMinimumDCFCLK_params->UrgLatency = mode_lib->ms.UrgLatency;
7975 UseMinimumDCFCLK_params->NoOfDPP = mode_lib->ms.NoOfDPP;
7976 UseMinimumDCFCLK_params->ProjectedDCFCLKDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep;
7977 UseMinimumDCFCLK_params->MaximumVStartup = s->MaximumVStartup;
7978 UseMinimumDCFCLK_params->TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP;
7979 UseMinimumDCFCLK_params->TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP;
7980 UseMinimumDCFCLK_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
7981 UseMinimumDCFCLK_params->PrefetchLinesY = mode_lib->ms.PrefetchLinesY;
7982 UseMinimumDCFCLK_params->PrefetchLinesC = mode_lib->ms.PrefetchLinesC;
7983 UseMinimumDCFCLK_params->swath_width_luma_ub_all_states = mode_lib->ms.swath_width_luma_ub_all_states;
7984 UseMinimumDCFCLK_params->swath_width_chroma_ub_all_states = mode_lib->ms.swath_width_chroma_ub_all_states;
7985 UseMinimumDCFCLK_params->BytePerPixelY = mode_lib->ms.BytePerPixelY;
7986 UseMinimumDCFCLK_params->BytePerPixelC = mode_lib->ms.BytePerPixelC;
7987 UseMinimumDCFCLK_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
7988 UseMinimumDCFCLK_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
7989 UseMinimumDCFCLK_params->PDEAndMetaPTEBytesPerFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame;
7990 UseMinimumDCFCLK_params->DPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
7991 UseMinimumDCFCLK_params->MetaRowBytes = mode_lib->ms.MetaRowBytes;
7992 UseMinimumDCFCLK_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable;
7993 UseMinimumDCFCLK_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
7994 UseMinimumDCFCLK_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
7995 UseMinimumDCFCLK_params->DCFCLKPerState = mode_lib->ms.state.dcfclk_mhz;
7996 UseMinimumDCFCLK_params->DCFCLKState = mode_lib->ms.DCFCLKState;
7997
7998 UseMinimumDCFCLK(scratch: &mode_lib->scratch,
7999 p: UseMinimumDCFCLK_params);
8000
8001 } // UseMinimumRequiredDCFCLK == true
8002
8003 for (j = 0; j < 2; ++j) {
8004 mode_lib->ms.ReturnBWPerState[j] = dml_get_return_bw_mbps(soc: &mode_lib->ms.soc, use_ideal_dram_bw_strobe: mode_lib->ms.state.use_ideal_dram_bw_strobe,
8005 HostVMEnable: mode_lib->ms.cache_display_cfg.plane.HostVMEnable, DCFCLK: mode_lib->ms.DCFCLKState[j], FabricClock: mode_lib->ms.state.fabricclk_mhz,
8006 DRAMSpeed: mode_lib->ms.state.dram_speed_mts);
8007 mode_lib->ms.ReturnDRAMBWPerState[j] = dml_get_return_dram_bw_mbps(soc: &mode_lib->ms.soc, use_ideal_dram_bw_strobe: mode_lib->ms.state.use_ideal_dram_bw_strobe,
8008 HostVMEnable: mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8009 DRAMSpeed: mode_lib->ms.state.dram_speed_mts);
8010 }
8011
8012 //Re-ordering Buffer Support Check
8013 for (j = 0; j < 2; ++j) {
8014 if ((mode_lib->ms.ip.rob_buffer_size_kbytes - mode_lib->ms.ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.ReturnBWPerState[j] >
8015 (mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles + 32) / mode_lib->ms.DCFCLKState[j] + s->ReorderingBytes / mode_lib->ms.ReturnBWPerState[j]) {
8016 mode_lib->ms.support.ROBSupport[j] = true;
8017 } else {
8018 mode_lib->ms.support.ROBSupport[j] = false;
8019 }
8020 dml_print("DML::%s: DEBUG ROBSupport[%u] = %u (%u)\n", __func__, j, mode_lib->ms.support.ROBSupport[j], __LINE__);
8021 }
8022
8023 //Vertical Active BW support check
8024 s->MaxTotalVActiveRDBandwidth = 0;
8025 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8026 s->MaxTotalVActiveRDBandwidth = s->MaxTotalVActiveRDBandwidth + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
8027 }
8028
8029 for (j = 0; j < 2; ++j) {
8030 mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j] = dml_min3(x: mode_lib->ms.soc.return_bus_width_bytes * mode_lib->ms.DCFCLKState[j] * mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent / 100.0,
8031 y: mode_lib->ms.state.fabricclk_mhz * mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes * mode_lib->ms.soc.max_avg_fabric_bw_use_normal_percent / 100.0,
8032 z: mode_lib->ms.state.dram_speed_mts * mode_lib->ms.soc.num_chans * mode_lib->ms.soc.dram_channel_width_bytes *
8033 ((mode_lib->ms.state.use_ideal_dram_bw_strobe && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable) ?
8034 mode_lib->ms.soc.max_avg_dram_bw_use_normal_strobe_percent : mode_lib->ms.soc.max_avg_dram_bw_use_normal_percent) / 100.0);
8035
8036 if (s->MaxTotalVActiveRDBandwidth <= mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j]) {
8037 mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = true;
8038 } else {
8039 mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = false;
8040 }
8041 }
8042
8043 /* Prefetch Check */
8044 dml_prefetch_check(mode_lib);
8045
8046 // End of Prefetch Check
8047 dml_print("DML::%s: Done prefetch calculation\n", __func__);
8048
8049 /*Cursor Support Check*/
8050 mode_lib->ms.support.CursorSupport = true;
8051 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8052 if (mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] > 0.0) {
8053 if (mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] == 64 && mode_lib->ms.ip.cursor_64bpp_support == false) {
8054 mode_lib->ms.support.CursorSupport = false;
8055 }
8056 }
8057 }
8058
8059 /*Valid Pitch Check*/
8060 mode_lib->ms.support.PitchSupport = true;
8061 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8062 mode_lib->ms.support.AlignedYPitch[k] = dml_ceil(
8063 x: dml_max(x: mode_lib->ms.cache_display_cfg.surface.PitchY[k], y: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]),
8064 granularity: mode_lib->ms.MacroTileWidthY[k]);
8065 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
8066 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = dml_ceil(x: dml_max(x: mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k], y: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), granularity: 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
8067 } else {
8068 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
8069 }
8070 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
8071 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
8072 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
8073 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
8074 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
8075 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8) {
8076 mode_lib->ms.support.AlignedCPitch[k] = dml_ceil(x: dml_max(x: mode_lib->ms.cache_display_cfg.surface.PitchC[k], y: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), granularity: mode_lib->ms.MacroTileWidthC[k]);
8077 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
8078 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = dml_ceil(x: dml_max(x: mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k], y: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), granularity: 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
8079 } else {
8080 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
8081 }
8082 } else {
8083 mode_lib->ms.support.AlignedCPitch[k] = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
8084 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
8085 }
8086 if (mode_lib->ms.support.AlignedYPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchY[k] || mode_lib->ms.support.AlignedCPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchC[k] ||
8087 mode_lib->ms.support.AlignedDCCMetaPitchY[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k] || mode_lib->ms.support.AlignedDCCMetaPitchC[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]) {
8088 mode_lib->ms.support.PitchSupport = false;
8089 }
8090 }
8091
8092 mode_lib->ms.support.ViewportExceedsSurface = false;
8093 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8094 if (mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k]) {
8095 mode_lib->ms.support.ViewportExceedsSurface = true;
8096 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 &&
8097 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_8 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe) {
8098 if (mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k]) {
8099 mode_lib->ms.support.ViewportExceedsSurface = true;
8100 }
8101 }
8102 }
8103 }
8104
8105 /*Mode Support, Voltage State and SOC Configuration*/
8106 for (j = 0; j < 2; j++) { // j iterator is for the combine mode off or on
8107 dml_print("DML::%s: checking support for j=%u\n", __func__, j);
8108 dml_print("DML::%s: state_idx=%0d max_state_idx=%0d\n", __func__, mode_lib->ms.state_idx, mode_lib->ms.max_state_idx);
8109
8110 s->is_max_pwr_state = (mode_lib->ms.max_state_idx == mode_lib->ms.state_idx);
8111 s->is_max_dram_pwr_state = (mode_lib->ms.max_state.dram_speed_mts == mode_lib->ms.state.dram_speed_mts);
8112
8113 s->dram_clock_change_support = (!mode_lib->ms.policy.DRAMClockChangeRequirementFinal ||
8114 (s->is_max_dram_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported) ||
8115 mode_lib->ms.support.DRAMClockChangeSupport[j] != dml_dram_clock_change_unsupported);
8116 s->f_clock_change_support = (!mode_lib->ms.policy.FCLKChangeRequirementFinal ||
8117 (s->is_max_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported) ||
8118 mode_lib->ms.support.FCLKChangeSupport[j] != dml_fclock_change_unsupported);
8119
8120 if (mode_lib->ms.support.ScaleRatioAndTapsSupport == true
8121 && mode_lib->ms.support.SourceFormatPixelAndScanSupport == true
8122 && mode_lib->ms.support.ViewportSizeSupport[j] == true
8123 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
8124 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
8125 && !mode_lib->ms.support.BPPForMultistreamNotIndicated
8126 && !mode_lib->ms.support.MultistreamWithHDMIOreDP
8127 && !mode_lib->ms.support.ExceededMultistreamSlots
8128 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
8129 && !mode_lib->ms.support.NotEnoughLanesForMSO
8130 && mode_lib->ms.support.LinkCapacitySupport == true
8131 && !mode_lib->ms.support.P2IWith420
8132 && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP
8133 && !mode_lib->ms.support.DSC422NativeNotSupported
8134 && !mode_lib->ms.support.MPCCombineMethodIncompatible
8135 && mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true
8136 && mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true
8137 && mode_lib->ms.support.NotEnoughDSCUnits == false
8138 && !mode_lib->ms.support.NotEnoughDSCSlices
8139 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
8140 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
8141 && mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false
8142 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
8143 && mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false
8144 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
8145 && !mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
8146 && mode_lib->ms.support.ROBSupport[j] == true
8147 && mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true
8148 && mode_lib->ms.support.TotalAvailablePipesSupport[j] == true
8149 && mode_lib->ms.support.NumberOfOTGSupport == true
8150 && mode_lib->ms.support.NumberOfHDMIFRLSupport == true
8151 && mode_lib->ms.support.NumberOfDP2p0Support == true
8152 && mode_lib->ms.support.EnoughWritebackUnits == true
8153 && mode_lib->ms.support.WritebackLatencySupport == true
8154 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true
8155 && mode_lib->ms.support.CursorSupport == true
8156 && mode_lib->ms.support.PitchSupport == true
8157 && mode_lib->ms.support.ViewportExceedsSurface == false
8158 && mode_lib->ms.support.PrefetchSupported[j] == true
8159 && mode_lib->ms.support.VActiveBandwithSupport[j] == true
8160 && mode_lib->ms.support.DynamicMetadataSupported[j] == true
8161 && mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true
8162 && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true
8163 && mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true
8164 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true
8165 && mode_lib->ms.support.NonsupportedDSCInputBPC == false
8166 && !mode_lib->ms.support.ExceededMALLSize
8167 && ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j])
8168 && s->dram_clock_change_support == true
8169 && s->f_clock_change_support == true
8170 && (!mode_lib->ms.policy.USRRetrainingRequiredFinal || mode_lib->ms.support.USRRetrainingSupport[j])) {
8171 dml_print("DML::%s: mode is supported\n", __func__);
8172 mode_lib->ms.support.ModeSupport[j] = true;
8173 } else {
8174 dml_print("DML::%s: mode is NOT supported\n", __func__);
8175 mode_lib->ms.support.ModeSupport[j] = false;
8176 dml_print_mode_support(mode_lib, j);
8177 }
8178 }
8179
8180 mode_lib->ms.support.MaximumMPCCombine = 0;
8181 mode_lib->ms.support.ModeIsSupported = 0;
8182 if (mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true) { // if the mode is supported by either no combine or mpccombine
8183 mode_lib->ms.support.ModeIsSupported = mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true;
8184
8185 // Determine if MPC combine is necessary, depends on if using MPC combine will help dram clock change or fclk change, etc.
8186 if ((mode_lib->ms.support.ModeSupport[0] == false && mode_lib->ms.support.ModeSupport[1] == true) || s->MPCCombineMethodAsPossible ||
8187 (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.DRAMClockChangeRequirementFinal &&
8188 (((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_sub_vp) &&
8189 !(mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_sub_vp)) ||
8190 ((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr
8191 || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_full_frame
8192 || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_sub_vp || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_sub_vp
8193 ) &&
8194 mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_unsupported)))
8195 || (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.FCLKChangeRequirementFinal &&
8196 ((mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vactive && mode_lib->ms.support.FCLKChangeSupport[0] != dml_fclock_change_vactive) ||
8197 (mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vblank && mode_lib->ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)))) {
8198 mode_lib->ms.support.MaximumMPCCombine = 1;
8199 } else {
8200 mode_lib->ms.support.MaximumMPCCombine = 0;
8201 }
8202 }
8203
8204 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
8205 mode_lib->ms.support.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupportedForState[mode_lib->ms.support.MaximumMPCCombine]; // Consider flip support if max combine support imm flip
8206 mode_lib->ms.support.UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
8207 mode_lib->ms.support.CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
8208
8209 dml_print("DML::%s: ModeIsSupported = %u\n", __func__, mode_lib->ms.support.ModeIsSupported);
8210 dml_print("DML::%s: MaximumMPCCombine = %u\n", __func__, mode_lib->ms.support.MaximumMPCCombine);
8211 dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
8212 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, mode_lib->ms.support.UnboundedRequestEnabled);
8213 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, mode_lib->ms.support.CompressedBufferSizeInkByte);
8214
8215 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8216 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[mode_lib->ms.support.MaximumMPCCombine][k];
8217 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[mode_lib->ms.support.MaximumMPCCombine][k];
8218 mode_lib->ms.SwathHeightY[k] = mode_lib->ms.SwathHeightYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8219 mode_lib->ms.SwathHeightC[k] = mode_lib->ms.SwathHeightCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8220 mode_lib->ms.DETBufferSizeInKByte[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8221 mode_lib->ms.DETBufferSizeY[k] = mode_lib->ms.DETBufferSizeYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8222 mode_lib->ms.DETBufferSizeC[k] = mode_lib->ms.DETBufferSizeCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8223 }
8224
8225 mode_lib->ms.DRAMSpeed = mode_lib->ms.state.dram_speed_mts;
8226 mode_lib->ms.FabricClock = mode_lib->ms.state.fabricclk_mhz;
8227 mode_lib->ms.SOCCLK = mode_lib->ms.state.socclk_mhz;
8228 mode_lib->ms.DCFCLK = mode_lib->ms.DCFCLKState[mode_lib->ms.support.MaximumMPCCombine];
8229 mode_lib->ms.ReturnBW = mode_lib->ms.ReturnBWPerState[mode_lib->ms.support.MaximumMPCCombine];
8230 mode_lib->ms.ReturnDRAMBW = mode_lib->ms.ReturnDRAMBWPerState[mode_lib->ms.support.MaximumMPCCombine];
8231
8232 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8233 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
8234 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMModePerState[k];
8235 } else {
8236 mode_lib->ms.support.ODMMode[k] = dml_odm_mode_bypass;
8237 }
8238
8239 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
8240 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
8241 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBppPerState[k];
8242 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputTypePerState[k];
8243 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRatePerState[k];
8244 mode_lib->ms.support.SubViewportLinesNeededInMALL[k] = mode_lib->ms.SubViewportLinesNeededInMALL[k];
8245 }
8246
8247 return mode_lib->ms.support.ModeIsSupported;
8248} // dml_core_mode_support
8249
8250/// @brief This function calculates some parameters thats are needed ahead of the mode programming function all
8251void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib)
8252{
8253 CalculateMaxDETAndMinCompressedBufferSize(
8254 ConfigReturnBufferSizeInKByte: mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
8255 ConfigReturnBufferSegmentSizeInKByte: mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
8256 ROBBufferSizeInKByte: mode_lib->ms.ip.rob_buffer_size_kbytes,
8257 MaxNumDPP: mode_lib->ms.ip.max_num_dpp,
8258 nomDETInKByteOverrideEnable: mode_lib->ms.policy.NomDETInKByteOverrideEnable,
8259 nomDETInKByteOverrideValue: mode_lib->ms.policy.NomDETInKByteOverrideValue,
8260
8261 /* Output */
8262 MaxTotalDETInKByte: &mode_lib->ms.MaxTotalDETInKByte,
8263 nomDETInKByte: &mode_lib->ms.NomDETInKByte,
8264 MinCompressedBufferSizeInKByte: &mode_lib->ms.MinCompressedBufferSizeInKByte);
8265
8266 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg: &mode_lib->ms.cache_display_cfg, ptoi_supported: mode_lib->ms.ip.ptoi_supported);
8267
8268 mode_lib->ms.ReturnBW = dml_get_return_bw_mbps(soc: &mode_lib->ms.soc,
8269 use_ideal_dram_bw_strobe: mode_lib->ms.state.use_ideal_dram_bw_strobe,
8270 HostVMEnable: mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8271 DCFCLK: mode_lib->ms.DCFCLK,
8272 FabricClock: mode_lib->ms.FabricClock,
8273 DRAMSpeed: mode_lib->ms.DRAMSpeed);
8274 dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
8275
8276} // dml_core_mode_support_partial
8277
8278/// @brief This is the mode programming function. It is assumed the display cfg is support at the given power state
8279void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg)
8280{
8281 struct dml_core_mode_programming_locals_st *s = &mode_lib->scratch.dml_core_mode_programming_locals;
8282 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
8283 struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
8284 struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
8285 struct CalculateStutterEfficiency_params_st *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
8286 struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
8287
8288 struct mode_program_st *locals = &mode_lib->mp;
8289 struct DmlPipe *myPipe;
8290 dml_uint_t j = 0, k = 0;
8291 dml_float_t TWait;
8292 dml_bool_t isInterlaceTiming;
8293
8294 mode_lib->ms.num_active_planes = dml_get_num_active_planes(display_cfg: &mode_lib->ms.cache_display_cfg);
8295 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg: &mode_lib->ms.cache_display_cfg);
8296 dml_calc_pipe_plane_mapping(hw: &mode_lib->ms.cache_display_cfg.hw, pipe_plane: mode_lib->mp.pipe_plane);
8297
8298#ifdef __DML_VBA_DEBUG__
8299 dml_print("DML::%s: --- START --- \n", __func__);
8300 dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
8301 dml_print("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
8302#endif
8303
8304 s->DSCFormatFactor = 0;
8305
8306 // Unlike dppclk and dispclk which can be calculated in mode_programming
8307 // DCFCLK is calculated in mode_support (which is the state bbox dcfclk or min dcfclk if min dcfclk option is used in mode support calculation)
8308 if (clk_cfg->dcfclk_option != dml_use_override_freq)
8309 locals->Dcfclk = mode_lib->ms.DCFCLK;
8310 else
8311 locals->Dcfclk = clk_cfg->dcfclk_freq_mhz;
8312
8313#ifdef __DML_VBA_DEBUG__
8314 dml_print_dml_policy(policy: &mode_lib->ms.policy);
8315 dml_print_soc_state_bounding_box(state: &mode_lib->ms.state);
8316 dml_print_soc_bounding_box(soc: &mode_lib->ms.soc);
8317 dml_print_clk_cfg(clk_cfg);
8318
8319 dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
8320 dml_print("DML::%s: Using DCFCLK = %f\n", __func__, locals->Dcfclk);
8321 dml_print("DML::%s: Using SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
8322#endif
8323
8324 locals->WritebackDISPCLK = 0.0;
8325 locals->GlobalDPPCLK = 0.0;
8326
8327 // DISPCLK and DPPCLK Calculation
8328 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8329 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
8330 locals->WritebackDISPCLK =
8331 dml_max(
8332 x: locals->WritebackDISPCLK,
8333 y: CalculateWriteBackDISPCLK(
8334 WritebackPixelFormat: mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
8335 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8336 WritebackHRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
8337 WritebackVRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
8338 WritebackHTaps: mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
8339 WritebackVTaps: mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
8340 WritebackSourceWidth: mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
8341 WritebackDestinationWidth: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
8342 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[k],
8343 WritebackLineBufferSize: mode_lib->ms.ip.writeback_line_buffer_buffer_size,
8344 DISPCLKDPPCLKVCOSpeed: mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
8345 }
8346 }
8347
8348 locals->Dispclk_calculated = locals->WritebackDISPCLK;
8349
8350 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8351 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
8352 locals->Dispclk_calculated = dml_max(x: locals->Dispclk_calculated, y: CalculateRequiredDispclk(
8353 ODMMode: mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
8354 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8355 DISPCLKDPPCLKDSCCLKDownSpreading: mode_lib->ms.soc.dcn_downspread_percent,
8356 DISPCLKRampingMargin: mode_lib->ms.ip.dispclk_ramp_margin_percent,
8357 DISPCLKDPPCLKVCOSpeed: mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
8358 MaxDispclk: mode_lib->ms.max_state.dispclk_mhz));
8359 }
8360 }
8361 if (clk_cfg->dispclk_option == dml_use_required_freq)
8362 locals->Dispclk = locals->Dispclk_calculated;
8363 else if (clk_cfg->dispclk_option == dml_use_override_freq)
8364 locals->Dispclk = clk_cfg->dispclk_freq_mhz;
8365 else
8366 locals->Dispclk = mode_lib->ms.state.dispclk_mhz;
8367#ifdef __DML_VBA_DEBUG__
8368 dml_print("DML::%s: Using Dispclk = %f\n", __func__, locals->Dispclk);
8369#endif
8370
8371 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8372 CalculateSinglePipeDPPCLKAndSCLThroughput(
8373 HRatio: mode_lib->ms.cache_display_cfg.plane.HRatio[k],
8374 HRatioChroma: mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
8375 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio[k],
8376 VRatioChroma: mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
8377 MaxDCHUBToPSCLThroughput: mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
8378 MaxPSCLToLBThroughput: mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
8379 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8380 SourcePixelFormat: mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
8381 HTaps: mode_lib->ms.cache_display_cfg.plane.HTaps[k],
8382 HTapsChroma: mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
8383 VTaps: mode_lib->ms.cache_display_cfg.plane.VTaps[k],
8384 VTapsChroma: mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
8385
8386 /* Output */
8387 PSCL_THROUGHPUT: &locals->PSCL_THROUGHPUT[k],
8388 PSCL_THROUGHPUT_CHROMA: &locals->PSCL_THROUGHPUT_CHROMA[k],
8389 DPPCLKUsingSingleDPP: &locals->DPPCLKUsingSingleDPP[k]);
8390 }
8391
8392 CalculateDPPCLK(NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
8393 DISPCLKDPPCLKDSCCLKDownSpreading: mode_lib->ms.soc.dcn_downspread_percent,
8394 DISPCLKDPPCLKVCOSpeed: mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
8395 DPPCLKUsingSingleDPP: locals->DPPCLKUsingSingleDPP,
8396 DPPPerSurface: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8397 /* Output */
8398 GlobalDPPCLK: &locals->GlobalDPPCLK,
8399 Dppclk: locals->Dppclk_calculated);
8400
8401 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8402 if (clk_cfg->dppclk_option[k] == dml_use_required_freq)
8403 locals->Dppclk[k] = locals->Dppclk_calculated[k];
8404 else if (clk_cfg->dppclk_option[k] == dml_use_override_freq)
8405 locals->Dppclk[k] = clk_cfg->dppclk_freq_mhz[k];
8406 else
8407 locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz;
8408#ifdef __DML_VBA_DEBUG__
8409 dml_print("DML::%s: Using Dppclk[%0d] = %f\n", __func__, k, locals->Dppclk[k]);
8410#endif
8411 }
8412
8413 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8414 CalculateBytePerPixelAndBlockSizes(
8415 SourcePixelFormat: mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
8416 SurfaceTiling: mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
8417
8418 /* Output */
8419 BytePerPixelY: &locals->BytePerPixelY[k],
8420 BytePerPixelC: &locals->BytePerPixelC[k],
8421 BytePerPixelDETY: &locals->BytePerPixelDETY[k],
8422 BytePerPixelDETC: &locals->BytePerPixelDETC[k],
8423 BlockHeight256BytesY: &locals->BlockHeight256BytesY[k],
8424 BlockHeight256BytesC: &locals->BlockHeight256BytesC[k],
8425 BlockWidth256BytesY: &locals->BlockWidth256BytesY[k],
8426 BlockWidth256BytesC: &locals->BlockWidth256BytesC[k],
8427 MacroTileHeightY: &locals->BlockHeightY[k],
8428 MacroTileHeightC: &locals->BlockHeightC[k],
8429 MacroTileWidthY: &locals->BlockWidthY[k],
8430 MacroTileWidthC: &locals->BlockWidthC[k]);
8431 }
8432
8433
8434 dml_print("DML::%s: %u\n", __func__, __LINE__);
8435 CalculateSwathWidth(
8436 ForceSingleDPP: false, // ForceSingleDPP
8437 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
8438 SourcePixelFormat: mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat,
8439 SourceScan: mode_lib->ms.cache_display_cfg.plane.SourceScan,
8440 ViewportStationary: mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
8441 ViewportWidth: mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
8442 ViewportHeight: mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
8443 ViewportXStart: mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
8444 ViewportYStart: mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
8445 ViewportXStartC: mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
8446 ViewportYStartC: mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
8447 SurfaceWidthY: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
8448 SurfaceWidthC: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
8449 SurfaceHeightY: mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
8450 SurfaceHeightC: mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
8451 ODMMode: mode_lib->ms.cache_display_cfg.hw.ODMMode,
8452 BytePerPixY: locals->BytePerPixelY,
8453 BytePerPixC: locals->BytePerPixelC,
8454 Read256BytesBlockHeightY: locals->BlockHeight256BytesY,
8455 Read256BytesBlockHeightC: locals->BlockHeight256BytesC,
8456 Read256BytesBlockWidthY: locals->BlockWidth256BytesY,
8457 Read256BytesBlockWidthC: locals->BlockWidth256BytesC,
8458 BlendingAndTiming: mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
8459 HActive: mode_lib->ms.cache_display_cfg.timing.HActive,
8460 HRatio: mode_lib->ms.cache_display_cfg.plane.HRatio,
8461 DPPPerSurface: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8462
8463 /* Output */
8464 SwathWidthSingleDPPY: locals->SwathWidthSingleDPPY,
8465 SwathWidthSingleDPPC: locals->SwathWidthSingleDPPC,
8466 SwathWidthY: locals->SwathWidthY,
8467 SwathWidthC: locals->SwathWidthC,
8468 MaximumSwathHeightY: s->dummy_integer_array[0], // dml_uint_t MaximumSwathHeightY[]
8469 MaximumSwathHeightC: s->dummy_integer_array[1], // dml_uint_t MaximumSwathHeightC[]
8470 swath_width_luma_ub: locals->swath_width_luma_ub,
8471 swath_width_chroma_ub: locals->swath_width_chroma_ub);
8472
8473 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8474 locals->ReadBandwidthSurfaceLuma[k] = locals->SwathWidthSingleDPPY[k] * locals->BytePerPixelY[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
8475 locals->ReadBandwidthSurfaceChroma[k] = locals->SwathWidthSingleDPPC[k] * locals->BytePerPixelC[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
8476 dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
8477 dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
8478 }
8479
8480 CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
8481 CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
8482 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
8483 CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
8484 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
8485 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
8486 CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
8487 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
8488 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8489 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
8490 CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
8491 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
8492 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
8493 CalculateSwathAndDETConfiguration_params->Output = s->dummy_output_encoder_array;
8494 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = locals->ReadBandwidthSurfaceLuma;
8495 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = locals->ReadBandwidthSurfaceChroma;
8496 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
8497 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
8498 CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
8499 CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
8500 CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
8501 CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
8502 CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
8503 CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
8504 CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
8505 CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
8506 CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
8507 CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
8508 CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
8509 CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
8510 CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
8511 CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
8512 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = locals->BlockHeight256BytesY;
8513 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = locals->BlockHeight256BytesC;
8514 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = locals->BlockWidth256BytesY;
8515 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = locals->BlockWidth256BytesC;
8516 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode;
8517 CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
8518 CalculateSwathAndDETConfiguration_params->BytePerPixY = locals->BytePerPixelY;
8519 CalculateSwathAndDETConfiguration_params->BytePerPixC = locals->BytePerPixelC;
8520 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = locals->BytePerPixelDETY;
8521 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = locals->BytePerPixelDETC;
8522 CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
8523 CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
8524 CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
8525 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
8526 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
8527 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
8528 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
8529 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
8530 CalculateSwathAndDETConfiguration_params->SwathHeightY = locals->SwathHeightY;
8531 CalculateSwathAndDETConfiguration_params->SwathHeightC = locals->SwathHeightC;
8532 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = locals->DETBufferSizeInKByte;
8533 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = locals->DETBufferSizeY;
8534 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = locals->DETBufferSizeC;
8535 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &locals->UnboundedRequestEnabled;
8536 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &locals->compbuf_reserved_space_64b;
8537 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &locals->compbuf_reserved_space_zs;
8538 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &locals->CompressedBufferSizeInkByte;
8539 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
8540 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
8541
8542 // VBA_DELTA
8543 // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage
8544 CalculateSwathAndDETConfiguration(scratch: &mode_lib->scratch,
8545 p: CalculateSwathAndDETConfiguration_params);
8546
8547 // DCFCLK Deep Sleep
8548 CalculateDCFCLKDeepSleep(
8549 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
8550 BytePerPixelY: locals->BytePerPixelY,
8551 BytePerPixelC: locals->BytePerPixelC,
8552 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio,
8553 VRatioChroma: mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
8554 SwathWidthY: locals->SwathWidthY,
8555 SwathWidthC: locals->SwathWidthC,
8556 DPPPerSurface: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8557 HRatio: mode_lib->ms.cache_display_cfg.plane.HRatio,
8558 HRatioChroma: mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
8559 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock,
8560 PSCL_THROUGHPUT: locals->PSCL_THROUGHPUT,
8561 PSCL_THROUGHPUT_CHROMA: locals->PSCL_THROUGHPUT_CHROMA,
8562 Dppclk: locals->Dppclk,
8563 ReadBandwidthLuma: locals->ReadBandwidthSurfaceLuma,
8564 ReadBandwidthChroma: locals->ReadBandwidthSurfaceChroma,
8565 ReturnBusWidth: mode_lib->ms.soc.return_bus_width_bytes,
8566
8567 /* Output */
8568 DCFClkDeepSleep: &locals->DCFCLKDeepSleep);
8569
8570 // DSCCLK
8571 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8572 if ((mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] != k) || !mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k]) {
8573 locals->DSCCLK_calculated[k] = 0.0;
8574 } else {
8575 if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420)
8576 s->DSCFormatFactor = 2;
8577 else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444)
8578 s->DSCFormatFactor = 1;
8579 else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
8580 s->DSCFormatFactor = 2;
8581 else
8582 s->DSCFormatFactor = 1;
8583 if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_4to1)
8584 locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
8585 else if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_2to1)
8586 locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
8587 else
8588 locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
8589 }
8590 }
8591
8592 // DSC Delay
8593 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8594 locals->DSCDelay[k] = DSCDelayRequirement(DSCEnabled: mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k],
8595 ODMMode: mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
8596 DSCInputBitPerComponent: mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
8597 OutputBpp: mode_lib->ms.cache_display_cfg.output.OutputBpp[k],
8598 HActive: mode_lib->ms.cache_display_cfg.timing.HActive[k],
8599 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[k],
8600 NumberOfDSCSlices: mode_lib->ms.cache_display_cfg.hw.NumberOfDSCSlices[k],
8601 OutputFormat: mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
8602 Output: mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
8603 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8604 PixelClockBackEnd: mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
8605 }
8606
8607 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
8608 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) // NumberOfSurfaces
8609 if (j != k && mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j && mode_lib->ms.cache_display_cfg.hw.DSCEnabled[j])
8610 locals->DSCDelay[k] = locals->DSCDelay[j];
8611
8612 // Prefetch
8613 CalculateSurfaceSizeInMall(
8614 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
8615 MALLAllocatedForDCN: mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
8616 UseMALLForStaticScreen: mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
8617 DCCEnable: mode_lib->ms.cache_display_cfg.surface.DCCEnable,
8618 ViewportStationary: mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
8619 ViewportXStartY: mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
8620 ViewportYStartY: mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
8621 ViewportXStartC: mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
8622 ViewportYStartC: mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
8623 ViewportWidthY: mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
8624 ViewportHeightY: mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
8625 BytesPerPixelY: locals->BytePerPixelY,
8626 ViewportWidthC: mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
8627 ViewportHeightC: mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
8628 BytesPerPixelC: locals->BytePerPixelC,
8629 SurfaceWidthY: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
8630 SurfaceWidthC: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
8631 SurfaceHeightY: mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
8632 SurfaceHeightC: mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
8633 Read256BytesBlockWidthY: locals->BlockWidth256BytesY,
8634 Read256BytesBlockWidthC: locals->BlockWidth256BytesC,
8635 Read256BytesBlockHeightY: locals->BlockHeight256BytesY,
8636 Read256BytesBlockHeightC: locals->BlockHeight256BytesC,
8637 ReadBlockWidthY: locals->BlockWidthY,
8638 ReadBlockWidthC: locals->BlockWidthC,
8639 ReadBlockHeightY: locals->BlockHeightY,
8640 ReadBlockHeightC: locals->BlockHeightC,
8641
8642 /* Output */
8643 SurfaceSizeInMALL: locals->SurfaceSizeInTheMALL,
8644 ExceededMALLSize: &s->dummy_boolean[0]); /* dml_bool_t *ExceededMALLSize */
8645
8646 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8647 s->SurfaceParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
8648 s->SurfaceParameters[k].DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8649 s->SurfaceParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
8650 s->SurfaceParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
8651 s->SurfaceParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
8652 s->SurfaceParameters[k].BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
8653 s->SurfaceParameters[k].BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
8654 s->SurfaceParameters[k].BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
8655 s->SurfaceParameters[k].BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
8656 s->SurfaceParameters[k].BlockWidthY = locals->BlockWidthY[k];
8657 s->SurfaceParameters[k].BlockHeightY = locals->BlockHeightY[k];
8658 s->SurfaceParameters[k].BlockWidthC = locals->BlockWidthC[k];
8659 s->SurfaceParameters[k].BlockHeightC = locals->BlockHeightC[k];
8660 s->SurfaceParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
8661 s->SurfaceParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
8662 s->SurfaceParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
8663 s->SurfaceParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
8664 s->SurfaceParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
8665 s->SurfaceParameters[k].BytePerPixelY = locals->BytePerPixelY[k];
8666 s->SurfaceParameters[k].BytePerPixelC = locals->BytePerPixelC[k];
8667 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
8668 s->SurfaceParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
8669 s->SurfaceParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
8670 s->SurfaceParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
8671 s->SurfaceParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
8672 s->SurfaceParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
8673 s->SurfaceParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
8674 s->SurfaceParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
8675 s->SurfaceParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
8676 s->SurfaceParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
8677 s->SurfaceParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
8678 s->SurfaceParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
8679 s->SurfaceParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
8680 s->SurfaceParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
8681 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
8682 s->SurfaceParameters[k].SwathHeightY = locals->SwathHeightY[k];
8683 s->SurfaceParameters[k].SwathHeightC = locals->SwathHeightC[k];
8684 }
8685
8686 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8687 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
8688 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = locals->SurfaceSizeInTheMALL;
8689 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
8690 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
8691 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
8692 CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
8693 CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
8694 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
8695 CalculateVMRowAndSwath_params->SwathWidthY = locals->SwathWidthY;
8696 CalculateVMRowAndSwath_params->SwathWidthC = locals->SwathWidthC;
8697 CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
8698 CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
8699 CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
8700 CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
8701 CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
8702 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
8703 CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
8704 CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
8705 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
8706 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
8707 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = locals->dpte_row_width_luma_ub;
8708 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = locals->dpte_row_width_chroma_ub;
8709 CalculateVMRowAndSwath_params->dpte_row_height_luma = locals->dpte_row_height;
8710 CalculateVMRowAndSwath_params->dpte_row_height_chroma = locals->dpte_row_height_chroma;
8711 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = locals->dpte_row_height_linear;
8712 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = locals->dpte_row_height_linear_chroma;
8713 CalculateVMRowAndSwath_params->meta_req_width = locals->meta_req_width;
8714 CalculateVMRowAndSwath_params->meta_req_width_chroma = locals->meta_req_width_chroma;
8715 CalculateVMRowAndSwath_params->meta_req_height = locals->meta_req_height;
8716 CalculateVMRowAndSwath_params->meta_req_height_chroma = locals->meta_req_height_chroma;
8717 CalculateVMRowAndSwath_params->meta_row_width = locals->meta_row_width;
8718 CalculateVMRowAndSwath_params->meta_row_width_chroma = locals->meta_row_width_chroma;
8719 CalculateVMRowAndSwath_params->meta_row_height = locals->meta_row_height;
8720 CalculateVMRowAndSwath_params->meta_row_height_chroma = locals->meta_row_height_chroma;
8721 CalculateVMRowAndSwath_params->vm_group_bytes = locals->vm_group_bytes;
8722 CalculateVMRowAndSwath_params->dpte_group_bytes = locals->dpte_group_bytes;
8723 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = locals->PixelPTEReqWidthY;
8724 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = locals->PixelPTEReqHeightY;
8725 CalculateVMRowAndSwath_params->PTERequestSizeY = locals->PTERequestSizeY;
8726 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = locals->PixelPTEReqWidthC;
8727 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = locals->PixelPTEReqHeightC;
8728 CalculateVMRowAndSwath_params->PTERequestSizeC = locals->PTERequestSizeC;
8729 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = locals->dpde0_bytes_per_frame_ub_l;
8730 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = locals->meta_pte_bytes_per_frame_ub_l;
8731 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = locals->dpde0_bytes_per_frame_ub_c;
8732 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = locals->meta_pte_bytes_per_frame_ub_c;
8733 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY;
8734 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC;
8735 CalculateVMRowAndSwath_params->VInitPreFillY = locals->VInitPreFillY;
8736 CalculateVMRowAndSwath_params->VInitPreFillC = locals->VInitPreFillC;
8737 CalculateVMRowAndSwath_params->MaxNumSwathY = locals->MaxNumSwathY;
8738 CalculateVMRowAndSwath_params->MaxNumSwathC = locals->MaxNumSwathC;
8739 CalculateVMRowAndSwath_params->meta_row_bw = locals->meta_row_bw;
8740 CalculateVMRowAndSwath_params->dpte_row_bw = locals->dpte_row_bw;
8741 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow;
8742 CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame;
8743 CalculateVMRowAndSwath_params->MetaRowByte = locals->MetaRowByte;
8744 CalculateVMRowAndSwath_params->use_one_row_for_frame = locals->use_one_row_for_frame;
8745 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = locals->use_one_row_for_frame_flip;
8746 CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = locals->UsesMALLForStaticScreen;
8747 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = locals->PTE_BUFFER_MODE;
8748 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = locals->BIGK_FRAGMENT_SIZE;
8749
8750 CalculateVMRowAndSwath(scratch: &mode_lib->scratch,
8751 p: CalculateVMRowAndSwath_params);
8752
8753 s->ReorderBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(
8754 x: mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
8755 y: mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
8756 z: mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
8757
8758 s->VMDataOnlyReturnBW = dml_get_return_bw_mbps_vm_only(soc: &mode_lib->ms.soc,
8759 use_ideal_dram_bw_strobe: mode_lib->ms.state.use_ideal_dram_bw_strobe,
8760 HostVMEnable: mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8761 DCFCLK: locals->Dcfclk,
8762 FabricClock: mode_lib->ms.FabricClock,
8763 DRAMSpeed: mode_lib->ms.DRAMSpeed);
8764
8765#ifdef __DML_VBA_DEBUG__
8766 dml_print("DML::%s: locals->Dcfclk = %f\n", __func__, locals->Dcfclk);
8767 dml_print("DML::%s: mode_lib->ms.soc.return_bus_width_bytes = %u\n", __func__, mode_lib->ms.soc.return_bus_width_bytes);
8768 dml_print("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
8769 dml_print("DML::%s: mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes = %u\n", __func__, mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes);
8770 dml_print("DML::%s: mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent = %f\n", __func__, mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent);
8771 dml_print("DML::%s: mode_lib->ms.DRAMSpeed = %f\n", __func__, mode_lib->ms.DRAMSpeed);
8772 dml_print("DML::%s: mode_lib->ms.soc.num_chans = %u\n", __func__, mode_lib->ms.soc.num_chans);
8773 dml_print("DML::%s: mode_lib->ms.soc.dram_channel_width_bytes = %u\n", __func__, mode_lib->ms.soc.dram_channel_width_bytes);
8774 dml_print("DML::%s: mode_lib->ms.state_idx = %u\n", __func__, mode_lib->ms.state_idx);
8775 dml_print("DML::%s: mode_lib->ms.max_state_idx = %u\n", __func__, mode_lib->ms.max_state_idx);
8776 dml_print("DML::%s: mode_lib->ms.state.use_ideal_dram_bw_strobe = %u\n", __func__, mode_lib->ms.state.use_ideal_dram_bw_strobe);
8777 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, s->VMDataOnlyReturnBW);
8778 dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
8779#endif
8780
8781 s->HostVMInefficiencyFactor = 1.0;
8782 if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
8783 s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBW / s->VMDataOnlyReturnBW;
8784
8785 s->TotalDCCActiveDPP = 0;
8786 s->TotalActiveDPP = 0;
8787 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8788 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8789 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k])
8790 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8791 }
8792
8793 locals->UrgentExtraLatency = CalculateExtraLatency(
8794 RoundTripPingLatencyCycles: mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
8795 ReorderingBytes: s->ReorderBytes,
8796 DCFCLK: locals->Dcfclk,
8797 TotalNumberOfActiveDPP: s->TotalActiveDPP,
8798 PixelChunkSizeInKByte: mode_lib->ms.ip.pixel_chunk_size_kbytes,
8799 TotalNumberOfDCCActiveDPP: s->TotalDCCActiveDPP,
8800 MetaChunkSize: mode_lib->ms.ip.meta_chunk_size_kbytes,
8801 ReturnBW: mode_lib->ms.ReturnBW,
8802 GPUVMEnable: mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
8803 HostVMEnable: mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8804 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
8805 NumberOfDPP: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8806 dpte_group_bytes: locals->dpte_group_bytes,
8807 HostVMInefficiencyFactor: s->HostVMInefficiencyFactor,
8808 HostVMMinPageSize: mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
8809 HostVMMaxNonCachedPageTableLevels: mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
8810
8811 locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
8812
8813 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8814 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
8815 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
8816 locals->WritebackDelay[k] =
8817 mode_lib->ms.state.writeback_latency_us
8818 + CalculateWriteBackDelay(
8819 WritebackPixelFormat: mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
8820 WritebackHRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
8821 WritebackVRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
8822 WritebackVTaps: mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
8823 WritebackDestinationWidth: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
8824 WritebackDestinationHeight: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
8825 WritebackSourceHeight: mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
8826 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk;
8827 } else
8828 locals->WritebackDelay[k] = 0;
8829 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
8830 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[j] == k
8831 && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[j] == true) {
8832 locals->WritebackDelay[k] =
8833 dml_max(
8834 x: locals->WritebackDelay[k],
8835 y: mode_lib->ms.state.writeback_latency_us
8836 + CalculateWriteBackDelay(
8837 WritebackPixelFormat: mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[j],
8838 WritebackHRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[j],
8839 WritebackVRatio: mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[j],
8840 WritebackVTaps: mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[j],
8841 WritebackDestinationWidth: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[j],
8842 WritebackDestinationHeight: mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[j],
8843 WritebackSourceHeight: mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[j],
8844 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk);
8845 }
8846 }
8847 }
8848 }
8849
8850 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
8851 for (j = 0; j < mode_lib->ms.num_active_planes; ++j)
8852 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j)
8853 locals->WritebackDelay[k] = locals->WritebackDelay[j];
8854
8855 locals->UrgentLatency = CalculateUrgentLatency(UrgentLatencyPixelDataOnly: mode_lib->ms.state.urgent_latency_pixel_data_only_us,
8856 UrgentLatencyPixelMixedWithVMData: mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
8857 UrgentLatencyVMDataOnly: mode_lib->ms.state.urgent_latency_vm_data_only_us,
8858 DoUrgentLatencyAdjustment: mode_lib->ms.soc.do_urgent_latency_adjustment,
8859 UrgentLatencyAdjustmentFabricClockComponent: mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
8860 UrgentLatencyAdjustmentFabricClockReference: mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
8861 FabricClock: mode_lib->ms.FabricClock);
8862
8863 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8864 CalculateUrgentBurstFactor(UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
8865 swath_width_luma_ub: locals->swath_width_luma_ub[k],
8866 swath_width_chroma_ub: locals->swath_width_chroma_ub[k],
8867 SwathHeightY: locals->SwathHeightY[k],
8868 SwathHeightC: locals->SwathHeightC[k],
8869 LineTime: mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8870 UrgentLatency: locals->UrgentLatency,
8871 CursorBufferSize: mode_lib->ms.ip.cursor_buffer_size,
8872 CursorWidth: mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
8873 CursorBPP: mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
8874 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio[k],
8875 VRatioC: mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
8876 BytePerPixelInDETY: locals->BytePerPixelDETY[k],
8877 BytePerPixelInDETC: locals->BytePerPixelDETC[k],
8878 DETBufferSizeY: locals->DETBufferSizeY[k],
8879 DETBufferSizeC: locals->DETBufferSizeC[k],
8880
8881 /* output */
8882 UrgentBurstFactorCursor: &locals->UrgBurstFactorCursor[k],
8883 UrgentBurstFactorLuma: &locals->UrgBurstFactorLuma[k],
8884 UrgentBurstFactorChroma: &locals->UrgBurstFactorChroma[k],
8885 NotEnoughUrgentLatencyHiding: &locals->NoUrgentLatencyHiding[k]);
8886
8887 locals->cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 /
8888 ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
8889 }
8890
8891 s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
8892 s->MaxVStartupAllPlanes = 0;
8893
8894 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8895 s->MaxVStartupLines[k] = CalculateMaxVStartup(plane_idx: k,
8896 ptoi_supported: mode_lib->ms.ip.ptoi_supported,
8897 vblank_nom_default_us: mode_lib->ms.ip.vblank_nom_default_us,
8898 timing: &mode_lib->ms.cache_display_cfg.timing,
8899 write_back_delay_us: locals->WritebackDelay[k]);
8900
8901#ifdef __DML_VBA_DEBUG__
8902 dml_print("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
8903 dml_print("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, locals->WritebackDelay[k]);
8904#endif
8905 }
8906
8907 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
8908 s->MaxVStartupAllPlanes = (dml_uint_t)(dml_max(x: s->MaxVStartupAllPlanes, y: s->MaxVStartupLines[k]));
8909
8910 s->ImmediateFlipRequirementFinal = false;
8911 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8912 s->ImmediateFlipRequirementFinal = s->ImmediateFlipRequirementFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
8913 }
8914#ifdef __DML_VBA_DEBUG__
8915 dml_print("DML::%s: ImmediateFlipRequirementFinal = %u\n", __func__, s->ImmediateFlipRequirementFinal);
8916#endif
8917
8918 // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement
8919 // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature
8920 // if possible, then will try to program for the best power saving features in order of diffculty (dram, fclk, stutter)
8921 s->iteration = 0;
8922 s->MaxTotalRDBandwidth = 0;
8923 s->AllPrefetchModeTested = false;
8924 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8925 CalculatePrefetchMode(AllowForPStateChangeOrStutterInVBlank: mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], MinPrefetchMode: &s->MinPrefetchMode[k], MaxPrefetchMode: &s->MaxPrefetchMode[k]);
8926 s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
8927 }
8928
8929 do {
8930 s->MaxTotalRDBandwidthNoUrgentBurst = 0.0;
8931 s->DestinationLineTimesForPrefetchLessThan2 = false;
8932 s->VRatioPrefetchMoreThanMax = false;
8933
8934 dml_print("DML::%s: Start one iteration: VStartupLines = %u\n", __func__, s->VStartupLines);
8935
8936 s->AllPrefetchModeTested = true;
8937 s->MaxTotalRDBandwidth = 0;
8938 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8939 locals->PrefetchMode[k] = s->NextPrefetchMode[k];
8940 TWait = CalculateTWait(
8941 PrefetchMode: locals->PrefetchMode[k],
8942 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
8943 SynchronizeDRRDisplaysForUCLKPStateChangeFinal: mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
8944 DRRDisplay: mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
8945 DRAMClockChangeLatency: mode_lib->ms.state.dram_clock_change_latency_us,
8946 FCLKChangeLatency: mode_lib->ms.state.fclk_change_latency_us,
8947 UrgentLatency: locals->UrgentLatency,
8948 SREnterPlusExitTime: mode_lib->ms.state.sr_enter_plus_exit_time_us);
8949
8950 myPipe = &s->myPipe;
8951 myPipe->Dppclk = locals->Dppclk[k];
8952 myPipe->Dispclk = locals->Dispclk;
8953 myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
8954 myPipe->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
8955 myPipe->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8956 myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
8957 myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
8958 myPipe->BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
8959 myPipe->BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
8960 myPipe->BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
8961 myPipe->BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
8962 myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
8963 myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
8964 myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
8965 myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
8966 myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
8967 myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
8968 myPipe->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode[k];
8969 myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
8970 myPipe->BytePerPixelY = locals->BytePerPixelY[k];
8971 myPipe->BytePerPixelC = locals->BytePerPixelC[k];
8972 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
8973
8974#ifdef __DML_VBA_DEBUG__
8975 dml_print("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
8976 dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
8977 dml_print("DML::%s: PrefetchMode[k] = %u (Min=%u Max=%u)\n", __func__, locals->PrefetchMode[k], s->MinPrefetchMode[k], s->MaxPrefetchMode[k]);
8978#endif
8979
8980 CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
8981 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
8982 CalculatePrefetchSchedule_params->myPipe = myPipe;
8983 CalculatePrefetchSchedule_params->DSCDelay = locals->DSCDelay[k];
8984 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
8985 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
8986 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
8987 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
8988 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
8989 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(locals->SwathWidthY[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
8990 CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
8991 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
8992 CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(x: s->VStartupLines, y: s->MaxVStartupLines[k]));
8993 CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k];
8994 CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
8995 CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
8996 CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
8997 CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
8998 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
8999 CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
9000 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
9001 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
9002 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
9003 CalculatePrefetchSchedule_params->UrgentLatency = locals->UrgentLatency;
9004 CalculatePrefetchSchedule_params->UrgentExtraLatency = locals->UrgentExtraLatency;
9005 CalculatePrefetchSchedule_params->TCalc = locals->TCalc;
9006 CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame[k];
9007 CalculatePrefetchSchedule_params->MetaRowByte = locals->MetaRowByte[k];
9008 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow[k];
9009 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY[k];
9010 CalculatePrefetchSchedule_params->VInitPreFillY = locals->VInitPreFillY[k];
9011 CalculatePrefetchSchedule_params->MaxNumSwathY = locals->MaxNumSwathY[k];
9012 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC[k];
9013 CalculatePrefetchSchedule_params->VInitPreFillC = locals->VInitPreFillC[k];
9014 CalculatePrefetchSchedule_params->MaxNumSwathC = locals->MaxNumSwathC[k];
9015 CalculatePrefetchSchedule_params->swath_width_luma_ub = locals->swath_width_luma_ub[k];
9016 CalculatePrefetchSchedule_params->swath_width_chroma_ub = locals->swath_width_chroma_ub[k];
9017 CalculatePrefetchSchedule_params->SwathHeightY = locals->SwathHeightY[k];
9018 CalculatePrefetchSchedule_params->SwathHeightC = locals->SwathHeightC[k];
9019 CalculatePrefetchSchedule_params->TWait = TWait;
9020 CalculatePrefetchSchedule_params->DSTXAfterScaler = &locals->DSTXAfterScaler[k];
9021 CalculatePrefetchSchedule_params->DSTYAfterScaler = &locals->DSTYAfterScaler[k];
9022 CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &locals->DestinationLinesForPrefetch[k];
9023 CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &locals->DestinationLinesToRequestVMInVBlank[k];
9024 CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &locals->DestinationLinesToRequestRowInVBlank[k];
9025 CalculatePrefetchSchedule_params->VRatioPrefetchY = &locals->VRatioPrefetchY[k];
9026 CalculatePrefetchSchedule_params->VRatioPrefetchC = &locals->VRatioPrefetchC[k];
9027 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &locals->RequiredPrefetchPixDataBWLuma[k];
9028 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &locals->RequiredPrefetchPixDataBWChroma[k];
9029 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &locals->NotEnoughTimeForDynamicMetadata[k];
9030 CalculatePrefetchSchedule_params->Tno_bw = &locals->Tno_bw[k];
9031 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &locals->prefetch_vmrow_bw[k];
9032 CalculatePrefetchSchedule_params->Tdmdl_vm = &locals->Tdmdl_vm[k];
9033 CalculatePrefetchSchedule_params->Tdmdl = &locals->Tdmdl[k];
9034 CalculatePrefetchSchedule_params->TSetup = &locals->TSetup[k];
9035 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &locals->VUpdateOffsetPix[k];
9036 CalculatePrefetchSchedule_params->VUpdateWidthPix = &locals->VUpdateWidthPix[k];
9037 CalculatePrefetchSchedule_params->VReadyOffsetPix = &locals->VReadyOffsetPix[k];
9038
9039 locals->NoTimeToPrefetch[k] =
9040 CalculatePrefetchSchedule(scratch: &mode_lib->scratch,
9041 p: CalculatePrefetchSchedule_params);
9042
9043#ifdef __DML_VBA_DEBUG__
9044 dml_print("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
9045#endif
9046 locals->VStartup[k] = (dml_uint_t)(dml_min(x: s->VStartupLines, y: s->MaxVStartupLines[k]));
9047 locals->VStartupMin[k] = locals->VStartup[k];
9048 }
9049
9050 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9051 CalculateUrgentBurstFactor(
9052 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
9053 swath_width_luma_ub: locals->swath_width_luma_ub[k],
9054 swath_width_chroma_ub: locals->swath_width_chroma_ub[k],
9055 SwathHeightY: locals->SwathHeightY[k],
9056 SwathHeightC: locals->SwathHeightC[k],
9057 LineTime: mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
9058 UrgentLatency: locals->UrgentLatency,
9059 CursorBufferSize: mode_lib->ms.ip.cursor_buffer_size,
9060 CursorWidth: mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
9061 CursorBPP: mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
9062 VRatio: locals->VRatioPrefetchY[k],
9063 VRatioC: locals->VRatioPrefetchC[k],
9064 BytePerPixelInDETY: locals->BytePerPixelDETY[k],
9065 BytePerPixelInDETC: locals->BytePerPixelDETC[k],
9066 DETBufferSizeY: locals->DETBufferSizeY[k],
9067 DETBufferSizeC: locals->DETBufferSizeC[k],
9068 /* Output */
9069 UrgentBurstFactorCursor: &locals->UrgBurstFactorCursorPre[k],
9070 UrgentBurstFactorLuma: &locals->UrgBurstFactorLumaPre[k],
9071 UrgentBurstFactorChroma: &locals->UrgBurstFactorChromaPre[k],
9072 NotEnoughUrgentLatencyHiding: &locals->NoUrgentLatencyHidingPre[k]);
9073
9074 locals->cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * locals->VRatioPrefetchY[k];
9075
9076#ifdef __DML_VBA_DEBUG__
9077 dml_print("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
9078 dml_print("DML::%s: k=%0u UrgBurstFactorLuma=%f\n", __func__, k, locals->UrgBurstFactorLuma[k]);
9079 dml_print("DML::%s: k=%0u UrgBurstFactorChroma=%f\n", __func__, k, locals->UrgBurstFactorChroma[k]);
9080 dml_print("DML::%s: k=%0u UrgBurstFactorLumaPre=%f\n", __func__, k, locals->UrgBurstFactorLumaPre[k]);
9081 dml_print("DML::%s: k=%0u UrgBurstFactorChromaPre=%f\n", __func__, k, locals->UrgBurstFactorChromaPre[k]);
9082
9083 dml_print("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, locals->VRatioPrefetchY[k]);
9084 dml_print("DML::%s: k=%0u VRatioY=%f\n", __func__, k, mode_lib->ms.cache_display_cfg.plane.VRatio[k]);
9085
9086 dml_print("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, locals->prefetch_vmrow_bw[k]);
9087 dml_print("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
9088 dml_print("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
9089 dml_print("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, locals->cursor_bw[k]);
9090 dml_print("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, locals->meta_row_bw[k]);
9091 dml_print("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, locals->dpte_row_bw[k]);
9092 dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWLuma[k]);
9093 dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWChroma[k]);
9094 dml_print("DML::%s: k=%0u cursor_bw_pre=%f\n", __func__, k, locals->cursor_bw_pre[k]);
9095 dml_print("DML::%s: k=%0u MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, s->MaxTotalRDBandwidthNoUrgentBurst);
9096#endif
9097 if (locals->DestinationLinesForPrefetch[k] < 2)
9098 s->DestinationLineTimesForPrefetchLessThan2 = true;
9099
9100 if (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
9101 locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
9102 ((s->VStartupLines < s->MaxVStartupLines[k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
9103 (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__ || locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__)))
9104 s->VRatioPrefetchMoreThanMax = true;
9105
9106 //dml_bool_t DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false;
9107 //dml_bool_t DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false;
9108 //if (locals->DestinationLinesToRequestVMInVBlank[k] >= 32) {
9109 // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true;
9110 //}
9111
9112 //if (locals->DestinationLinesToRequestRowInVBlank[k] >= 16) {
9113 // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true;
9114 //}
9115 }
9116
9117 locals->FractionOfUrgentBandwidth = s->MaxTotalRDBandwidthNoUrgentBurst / mode_lib->ms.ReturnBW;
9118
9119#ifdef __DML_VBA_DEBUG__
9120 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, s->MaxTotalRDBandwidthNoUrgentBurst);
9121 dml_print("DML::%s: ReturnBW=%f \n", __func__, mode_lib->ms.ReturnBW);
9122 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, locals->FractionOfUrgentBandwidth);
9123#endif
9124
9125 CalculatePrefetchBandwithSupport(
9126 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
9127 ReturnBW: mode_lib->ms.ReturnBW,
9128 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9129 NotUrgentLatencyHiding: locals->NoUrgentLatencyHidingPre,
9130 ReadBandwidthLuma: locals->ReadBandwidthSurfaceLuma,
9131 ReadBandwidthChroma: locals->ReadBandwidthSurfaceChroma,
9132 PrefetchBandwidthLuma: locals->RequiredPrefetchPixDataBWLuma,
9133 PrefetchBandwidthChroma: locals->RequiredPrefetchPixDataBWChroma,
9134 cursor_bw: locals->cursor_bw,
9135 meta_row_bandwidth: locals->meta_row_bw,
9136 dpte_row_bandwidth: locals->dpte_row_bw,
9137 cursor_bw_pre: locals->cursor_bw_pre,
9138 prefetch_vmrow_bw: locals->prefetch_vmrow_bw,
9139 NumberOfDPP: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9140 UrgentBurstFactorLuma: locals->UrgBurstFactorLuma,
9141 UrgentBurstFactorChroma: locals->UrgBurstFactorChroma,
9142 UrgentBurstFactorCursor: locals->UrgBurstFactorCursor,
9143 UrgentBurstFactorLumaPre: locals->UrgBurstFactorLumaPre,
9144 UrgentBurstFactorChromaPre: locals->UrgBurstFactorChromaPre,
9145 UrgentBurstFactorCursorPre: locals->UrgBurstFactorCursorPre,
9146
9147 /* output */
9148 PrefetchBandwidth: &s->MaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
9149 PrefetchBandwidthNotIncludingMALLPrefetch: &s->MaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
9150 FractionOfUrgentBandwidth: &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
9151 PrefetchBandwidthSupport: &locals->PrefetchModeSupported);
9152
9153 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
9154 s->dummy_unit_vector[k] = 1.0;
9155
9156 CalculatePrefetchBandwithSupport(NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
9157 ReturnBW: mode_lib->ms.ReturnBW,
9158 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9159 NotUrgentLatencyHiding: locals->NoUrgentLatencyHidingPre,
9160 ReadBandwidthLuma: locals->ReadBandwidthSurfaceLuma,
9161 ReadBandwidthChroma: locals->ReadBandwidthSurfaceChroma,
9162 PrefetchBandwidthLuma: locals->RequiredPrefetchPixDataBWLuma,
9163 PrefetchBandwidthChroma: locals->RequiredPrefetchPixDataBWChroma,
9164 cursor_bw: locals->cursor_bw,
9165 meta_row_bandwidth: locals->meta_row_bw,
9166 dpte_row_bandwidth: locals->dpte_row_bw,
9167 cursor_bw_pre: locals->cursor_bw_pre,
9168 prefetch_vmrow_bw: locals->prefetch_vmrow_bw,
9169 NumberOfDPP: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9170 UrgentBurstFactorLuma: s->dummy_unit_vector,
9171 UrgentBurstFactorChroma: s->dummy_unit_vector,
9172 UrgentBurstFactorCursor: s->dummy_unit_vector,
9173 UrgentBurstFactorLumaPre: s->dummy_unit_vector,
9174 UrgentBurstFactorChromaPre: s->dummy_unit_vector,
9175 UrgentBurstFactorCursorPre: s->dummy_unit_vector,
9176
9177 /* output */
9178 PrefetchBandwidth: &s->NonUrgentMaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
9179 PrefetchBandwidthNotIncludingMALLPrefetch: &s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
9180 FractionOfUrgentBandwidth: &locals->FractionOfUrgentBandwidth,
9181 PrefetchBandwidthSupport: &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport
9182
9183 if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) {
9184 dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
9185 dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
9186 locals->PrefetchModeSupported = false;
9187 }
9188
9189 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9190 if (locals->NoTimeToPrefetch[k] == true || locals->NotEnoughTimeForDynamicMetadata[k]) {
9191 dml_print("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
9192 dml_print("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, locals->NotEnoughTimeForDynamicMetadata[k]);
9193 locals->PrefetchModeSupported = false;
9194 }
9195 }
9196
9197 if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) {
9198 locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
9199 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
9200 ReturnBW: mode_lib->ms.ReturnBW,
9201 ReadBandwidthLuma: locals->ReadBandwidthSurfaceLuma,
9202 ReadBandwidthChroma: locals->ReadBandwidthSurfaceChroma,
9203 PrefetchBandwidthLuma: locals->RequiredPrefetchPixDataBWLuma,
9204 PrefetchBandwidthChroma: locals->RequiredPrefetchPixDataBWChroma,
9205 cursor_bw: locals->cursor_bw,
9206 cursor_bw_pre: locals->cursor_bw_pre,
9207 NumberOfDPP: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9208 UrgentBurstFactorLuma: locals->UrgBurstFactorLuma,
9209 UrgentBurstFactorChroma: locals->UrgBurstFactorChroma,
9210 UrgentBurstFactorCursor: locals->UrgBurstFactorCursor,
9211 UrgentBurstFactorLumaPre: locals->UrgBurstFactorLumaPre,
9212 UrgentBurstFactorChromaPre: locals->UrgBurstFactorChromaPre,
9213 UrgentBurstFactorCursorPre: locals->UrgBurstFactorCursorPre);
9214
9215 locals->TotImmediateFlipBytes = 0;
9216 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9217 if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
9218 locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k]);
9219 if (locals->use_one_row_for_frame_flip[k]) {
9220 locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (2 * locals->PixelPTEBytesPerRow[k]);
9221 } else {
9222 locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * locals->PixelPTEBytesPerRow[k];
9223 }
9224#ifdef __DML_VBA_DEBUG__
9225 dml_print("DML::%s: k = %u\n", __func__, k);
9226 dml_print("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
9227 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, locals->PDEAndMetaPTEBytesFrame[k]);
9228 dml_print("DML::%s: MetaRowByte = %u\n", __func__, locals->MetaRowByte[k]);
9229 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, locals->PixelPTEBytesPerRow[k]);
9230 dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, locals->TotImmediateFlipBytes);
9231#endif
9232 }
9233 }
9234 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9235 CalculateFlipSchedule(
9236 HostVMInefficiencyFactor: s->HostVMInefficiencyFactor,
9237 UrgentExtraLatency: locals->UrgentExtraLatency,
9238 UrgentLatency: locals->UrgentLatency,
9239 GPUVMMaxPageTableLevels: mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
9240 HostVMEnable: mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
9241 HostVMMaxNonCachedPageTableLevels: mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
9242 GPUVMEnable: mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
9243 HostVMMinPageSize: mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
9244 PDEAndMetaPTEBytesPerFrame: locals->PDEAndMetaPTEBytesFrame[k],
9245 MetaRowBytes: locals->MetaRowByte[k],
9246 DPTEBytesPerRow: locals->PixelPTEBytesPerRow[k],
9247 BandwidthAvailableForImmediateFlip: locals->BandwidthAvailableForImmediateFlip,
9248 TotImmediateFlipBytes: locals->TotImmediateFlipBytes,
9249 SourcePixelFormat: mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
9250 LineTime: mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
9251 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio[k],
9252 VRatioChroma: mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
9253 Tno_bw: locals->Tno_bw[k],
9254 DCCEnable: mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
9255 dpte_row_height: locals->dpte_row_height[k],
9256 meta_row_height: locals->meta_row_height[k],
9257 dpte_row_height_chroma: locals->dpte_row_height_chroma[k],
9258 meta_row_height_chroma: locals->meta_row_height_chroma[k],
9259 use_one_row_for_frame_flip: locals->use_one_row_for_frame_flip[k],
9260
9261 /* Output */
9262 DestinationLinesToRequestVMInImmediateFlip: &locals->DestinationLinesToRequestVMInImmediateFlip[k],
9263 DestinationLinesToRequestRowInImmediateFlip: &locals->DestinationLinesToRequestRowInImmediateFlip[k],
9264 final_flip_bw: &locals->final_flip_bw[k],
9265 ImmediateFlipSupportedForPipe: &locals->ImmediateFlipSupportedForPipe[k]);
9266 }
9267
9268 CalculateImmediateFlipBandwithSupport(NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
9269 ReturnBW: mode_lib->ms.ReturnBW,
9270 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9271 ImmediateFlipRequirement: mode_lib->ms.policy.ImmediateFlipRequirement,
9272 final_flip_bw: locals->final_flip_bw,
9273 ReadBandwidthLuma: locals->ReadBandwidthSurfaceLuma,
9274 ReadBandwidthChroma: locals->ReadBandwidthSurfaceChroma,
9275 PrefetchBandwidthLuma: locals->RequiredPrefetchPixDataBWLuma,
9276 PrefetchBandwidthChroma: locals->RequiredPrefetchPixDataBWChroma,
9277 cursor_bw: locals->cursor_bw,
9278 meta_row_bandwidth: locals->meta_row_bw,
9279 dpte_row_bandwidth: locals->dpte_row_bw,
9280 cursor_bw_pre: locals->cursor_bw_pre,
9281 prefetch_vmrow_bw: locals->prefetch_vmrow_bw,
9282 NumberOfDPP: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9283 UrgentBurstFactorLuma: locals->UrgBurstFactorLuma,
9284 UrgentBurstFactorChroma: locals->UrgBurstFactorChroma,
9285 UrgentBurstFactorCursor: locals->UrgBurstFactorCursor,
9286 UrgentBurstFactorLumaPre: locals->UrgBurstFactorLumaPre,
9287 UrgentBurstFactorChromaPre: locals->UrgBurstFactorChromaPre,
9288 UrgentBurstFactorCursorPre: locals->UrgBurstFactorCursorPre,
9289
9290 /* output */
9291 TotalBandwidth: &locals->total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
9292 TotalBandwidthNotIncludingMALLPrefetch: &locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
9293 FractionOfUrgentBandwidth: &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
9294 ImmediateFlipBandwidthSupport: &locals->ImmediateFlipSupported); // dml_bool_t *ImmediateFlipBandwidthSupport
9295
9296 CalculateImmediateFlipBandwithSupport(NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
9297 ReturnBW: mode_lib->ms.ReturnBW,
9298 UseMALLForPStateChange: mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9299 ImmediateFlipRequirement: mode_lib->ms.policy.ImmediateFlipRequirement,
9300 final_flip_bw: locals->final_flip_bw,
9301 ReadBandwidthLuma: locals->ReadBandwidthSurfaceLuma,
9302 ReadBandwidthChroma: locals->ReadBandwidthSurfaceChroma,
9303 PrefetchBandwidthLuma: locals->RequiredPrefetchPixDataBWLuma,
9304 PrefetchBandwidthChroma: locals->RequiredPrefetchPixDataBWChroma,
9305 cursor_bw: locals->cursor_bw,
9306 meta_row_bandwidth: locals->meta_row_bw,
9307 dpte_row_bandwidth: locals->dpte_row_bw,
9308 cursor_bw_pre: locals->cursor_bw_pre,
9309 prefetch_vmrow_bw: locals->prefetch_vmrow_bw,
9310 NumberOfDPP: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9311 UrgentBurstFactorLuma: s->dummy_unit_vector,
9312 UrgentBurstFactorChroma: s->dummy_unit_vector,
9313 UrgentBurstFactorCursor: s->dummy_unit_vector,
9314 UrgentBurstFactorLumaPre: s->dummy_unit_vector,
9315 UrgentBurstFactorChromaPre: s->dummy_unit_vector,
9316 UrgentBurstFactorCursorPre: s->dummy_unit_vector,
9317
9318 /* output */
9319 TotalBandwidth: &locals->non_urgent_total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
9320 TotalBandwidthNotIncludingMALLPrefetch: &locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
9321 FractionOfUrgentBandwidth: &locals->FractionOfUrgentBandwidthImmediateFlip, // dml_float_t *FractionOfUrgentBandwidth
9322 ImmediateFlipBandwidthSupport: &s->dummy_boolean[0]); // dml_bool_t *ImmediateFlipBandwidthSupport
9323
9324 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9325 if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required && locals->ImmediateFlipSupportedForPipe[k] == false) {
9326 locals->ImmediateFlipSupported = false;
9327#ifdef __DML_VBA_DEBUG__
9328 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
9329#endif
9330 }
9331 }
9332 } else {
9333 locals->ImmediateFlipSupported = false;
9334 locals->total_dcn_read_bw_with_flip = s->MaxTotalRDBandwidth;
9335 locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->MaxTotalRDBandwidthNotIncludingMALLPrefetch;
9336 locals->non_urgent_total_dcn_read_bw_with_flip = s->NonUrgentMaxTotalRDBandwidth;
9337 locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch;
9338 }
9339
9340 /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */
9341 locals->PrefetchAndImmediateFlipSupported = (locals->PrefetchModeSupported == true &&
9342 ((!mode_lib->ms.support.ImmediateFlipSupport && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable && !s->ImmediateFlipRequirementFinal) ||
9343 locals->ImmediateFlipSupported)) ? true : false;
9344
9345#ifdef __DML_VBA_DEBUG__
9346 dml_print("DML::%s: PrefetchModeSupported = %u\n", __func__, locals->PrefetchModeSupported);
9347 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
9348 dml_print("DML::%s: ImmediateFlipRequirement[%u] = %u\n", __func__, k, mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
9349 dml_print("DML::%s: HostVMEnable = %u\n", __func__, mode_lib->ms.cache_display_cfg.plane.HostVMEnable);
9350 dml_print("DML::%s: ImmediateFlipSupport = %u (from mode_support)\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
9351 dml_print("DML::%s: ImmediateFlipSupported = %u\n", __func__, locals->ImmediateFlipSupported);
9352 dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, locals->PrefetchAndImmediateFlipSupported);
9353#endif
9354 dml_print("DML::%s: Done one iteration: VStartupLines=%u, MaxVStartupAllPlanes=%u\n", __func__, s->VStartupLines, s->MaxVStartupAllPlanes);
9355
9356 s->VStartupLines = s->VStartupLines + 1;
9357
9358 if (s->VStartupLines > s->MaxVStartupAllPlanes) {
9359 s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
9360
9361 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9362 s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
9363
9364 if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
9365 s->AllPrefetchModeTested = false;
9366 dml_print("DML::%s: VStartupLines=%u, reaches max vstartup, try next prefetch mode=%u\n", __func__, s->VStartupLines-1, s->AllPrefetchModeTested);
9367 }
9368 } else {
9369 s->AllPrefetchModeTested = false;
9370 }
9371 s->iteration++;
9372 if (s->iteration > 2500) {
9373 dml_print("ERROR: DML::%s: Too many errors, exit now\n", __func__);
9374 ASSERT(0);
9375 }
9376 } while (!(locals->PrefetchAndImmediateFlipSupported || s->AllPrefetchModeTested));
9377
9378 if (locals->PrefetchAndImmediateFlipSupported) {
9379 dml_print("DML::%s: Good, Prefetch and flip scheduling solution found at VStartupLines=%u (MaxVStartupAllPlanes=%u)\n", __func__, s->VStartupLines-1, s->MaxVStartupAllPlanes);
9380 } else {
9381 dml_print("DML::%s: Bad, Prefetch and flip scheduling solution did NOT find solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes);
9382 }
9383
9384 //Watermarks and NB P-State/DRAM Clock Change Support
9385 {
9386 s->mmSOCParameters.UrgentLatency = locals->UrgentLatency;
9387 s->mmSOCParameters.ExtraLatency = locals->UrgentExtraLatency;
9388 s->mmSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
9389 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
9390 s->mmSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
9391 s->mmSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
9392 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
9393 s->mmSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
9394 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
9395 s->mmSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
9396 s->mmSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
9397
9398 CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
9399 CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
9400 CalculateWatermarks_params->PrefetchMode = locals->PrefetchMode;
9401 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
9402 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
9403 CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
9404 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
9405 CalculateWatermarks_params->DCFCLK = locals->Dcfclk;
9406 CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBW;
9407 CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
9408 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
9409 CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
9410 CalculateWatermarks_params->dpte_group_bytes = locals->dpte_group_bytes;
9411 CalculateWatermarks_params->meta_row_height = locals->meta_row_height;
9412 CalculateWatermarks_params->meta_row_height_chroma = locals->meta_row_height_chroma;
9413 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
9414 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
9415 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
9416 CalculateWatermarks_params->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
9417 CalculateWatermarks_params->DETBufferSizeY = locals->DETBufferSizeY;
9418 CalculateWatermarks_params->DETBufferSizeC = locals->DETBufferSizeC;
9419 CalculateWatermarks_params->SwathHeightY = locals->SwathHeightY;
9420 CalculateWatermarks_params->SwathHeightC = locals->SwathHeightC;
9421 CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
9422 CalculateWatermarks_params->SwathWidthY = locals->SwathWidthY;
9423 CalculateWatermarks_params->SwathWidthC = locals->SwathWidthC;
9424 CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
9425 CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
9426 CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
9427 CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
9428 CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
9429 CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
9430 CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
9431 CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
9432 CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
9433 CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
9434 CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
9435 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
9436 CalculateWatermarks_params->BytePerPixelDETY = locals->BytePerPixelDETY;
9437 CalculateWatermarks_params->BytePerPixelDETC = locals->BytePerPixelDETC;
9438 CalculateWatermarks_params->DSTXAfterScaler = locals->DSTXAfterScaler;
9439 CalculateWatermarks_params->DSTYAfterScaler = locals->DSTYAfterScaler;
9440 CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
9441 CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
9442 CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
9443 CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
9444 CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
9445 CalculateWatermarks_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
9446 CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
9447
9448 // Output
9449 CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark
9450 CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport;
9451 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
9452 CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[]
9453 CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport;
9454 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported
9455 CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport;
9456
9457 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
9458 scratch: &mode_lib->scratch,
9459 p: CalculateWatermarks_params);
9460
9461 /* Copy the calculated watermarks to mp.Watermark as the getter functions are
9462 * implemented by the DML team to copy the calculated values from the mp.Watermark interface.
9463 */
9464 memcpy(&mode_lib->mp.Watermark, CalculateWatermarks_params->Watermark, sizeof(struct Watermarks));
9465
9466 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9467 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
9468 locals->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(x: 0, y: locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
9469 mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackDRAMClockChangeWatermark);
9470 locals->WritebackAllowFCLKChangeEndPosition[k] = dml_max(x: 0, y: locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
9471 mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackFCLKChangeWatermark);
9472 } else {
9473 locals->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
9474 locals->WritebackAllowFCLKChangeEndPosition[k] = 0;
9475 }
9476 }
9477 }
9478
9479 //Display Pipeline Delivery Time in Prefetch, Groups
9480 CalculatePixelDeliveryTimes(
9481 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
9482 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio,
9483 VRatioChroma: mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
9484 VRatioPrefetchY: locals->VRatioPrefetchY,
9485 VRatioPrefetchC: locals->VRatioPrefetchC,
9486 swath_width_luma_ub: locals->swath_width_luma_ub,
9487 swath_width_chroma_ub: locals->swath_width_chroma_ub,
9488 DPPPerSurface: mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9489 HRatio: mode_lib->ms.cache_display_cfg.plane.HRatio,
9490 HRatioChroma: mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
9491 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock,
9492 PSCL_THROUGHPUT: locals->PSCL_THROUGHPUT,
9493 PSCL_THROUGHPUT_CHROMA: locals->PSCL_THROUGHPUT_CHROMA,
9494 Dppclk: locals->Dppclk,
9495 BytePerPixelC: locals->BytePerPixelC,
9496 SourceScan: mode_lib->ms.cache_display_cfg.plane.SourceScan,
9497 NumberOfCursors: mode_lib->ms.cache_display_cfg.plane.NumberOfCursors,
9498 CursorWidth: mode_lib->ms.cache_display_cfg.plane.CursorWidth,
9499 CursorBPP: mode_lib->ms.cache_display_cfg.plane.CursorBPP,
9500 BlockWidth256BytesY: locals->BlockWidth256BytesY,
9501 BlockHeight256BytesY: locals->BlockHeight256BytesY,
9502 BlockWidth256BytesC: locals->BlockWidth256BytesC,
9503 BlockHeight256BytesC: locals->BlockHeight256BytesC,
9504
9505 /* Output */
9506 DisplayPipeLineDeliveryTimeLuma: locals->DisplayPipeLineDeliveryTimeLuma,
9507 DisplayPipeLineDeliveryTimeChroma: locals->DisplayPipeLineDeliveryTimeChroma,
9508 DisplayPipeLineDeliveryTimeLumaPrefetch: locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
9509 DisplayPipeLineDeliveryTimeChromaPrefetch: locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
9510 DisplayPipeRequestDeliveryTimeLuma: locals->DisplayPipeRequestDeliveryTimeLuma,
9511 DisplayPipeRequestDeliveryTimeChroma: locals->DisplayPipeRequestDeliveryTimeChroma,
9512 DisplayPipeRequestDeliveryTimeLumaPrefetch: locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
9513 DisplayPipeRequestDeliveryTimeChromaPrefetch: locals->DisplayPipeRequestDeliveryTimeChromaPrefetch,
9514 CursorRequestDeliveryTime: locals->CursorRequestDeliveryTime,
9515 CursorRequestDeliveryTimePrefetch: locals->CursorRequestDeliveryTimePrefetch);
9516
9517 CalculateMetaAndPTETimes(
9518 use_one_row_for_frame: locals->use_one_row_for_frame,
9519 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
9520 GPUVMEnable: mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
9521 MetaChunkSize: mode_lib->ms.ip.meta_chunk_size_kbytes,
9522 MinMetaChunkSizeBytes: mode_lib->ms.ip.min_meta_chunk_size_bytes,
9523 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal,
9524 VRatio: mode_lib->ms.cache_display_cfg.plane.VRatio,
9525 VRatioChroma: mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
9526 DestinationLinesToRequestRowInVBlank: locals->DestinationLinesToRequestRowInVBlank,
9527 DestinationLinesToRequestRowInImmediateFlip: locals->DestinationLinesToRequestRowInImmediateFlip,
9528 DCCEnable: mode_lib->ms.cache_display_cfg.surface.DCCEnable,
9529 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock,
9530 BytePerPixelY: locals->BytePerPixelY,
9531 BytePerPixelC: locals->BytePerPixelC,
9532 SourceScan: mode_lib->ms.cache_display_cfg.plane.SourceScan,
9533 dpte_row_height: locals->dpte_row_height,
9534 dpte_row_height_chroma: locals->dpte_row_height_chroma,
9535 meta_row_width: locals->meta_row_width,
9536 meta_row_width_chroma: locals->meta_row_width_chroma,
9537 meta_row_height: locals->meta_row_height,
9538 meta_row_height_chroma: locals->meta_row_height_chroma,
9539 meta_req_width: locals->meta_req_width,
9540 meta_req_width_chroma: locals->meta_req_width_chroma,
9541 meta_req_height: locals->meta_req_height,
9542 meta_req_height_chroma: locals->meta_req_height_chroma,
9543 dpte_group_bytes: locals->dpte_group_bytes,
9544 PTERequestSizeY: locals->PTERequestSizeY,
9545 PTERequestSizeC: locals->PTERequestSizeC,
9546 PixelPTEReqWidthY: locals->PixelPTEReqWidthY,
9547 PixelPTEReqHeightY: locals->PixelPTEReqHeightY,
9548 PixelPTEReqWidthC: locals->PixelPTEReqWidthC,
9549 PixelPTEReqHeightC: locals->PixelPTEReqHeightC,
9550 dpte_row_width_luma_ub: locals->dpte_row_width_luma_ub,
9551 dpte_row_width_chroma_ub: locals->dpte_row_width_chroma_ub,
9552
9553 /* Output */
9554 DST_Y_PER_PTE_ROW_NOM_L: locals->DST_Y_PER_PTE_ROW_NOM_L,
9555 DST_Y_PER_PTE_ROW_NOM_C: locals->DST_Y_PER_PTE_ROW_NOM_C,
9556 DST_Y_PER_META_ROW_NOM_L: locals->DST_Y_PER_META_ROW_NOM_L,
9557 DST_Y_PER_META_ROW_NOM_C: locals->DST_Y_PER_META_ROW_NOM_C,
9558 TimePerMetaChunkNominal: locals->TimePerMetaChunkNominal,
9559 TimePerChromaMetaChunkNominal: locals->TimePerChromaMetaChunkNominal,
9560 TimePerMetaChunkVBlank: locals->TimePerMetaChunkVBlank,
9561 TimePerChromaMetaChunkVBlank: locals->TimePerChromaMetaChunkVBlank,
9562 TimePerMetaChunkFlip: locals->TimePerMetaChunkFlip,
9563 TimePerChromaMetaChunkFlip: locals->TimePerChromaMetaChunkFlip,
9564 time_per_pte_group_nom_luma: locals->time_per_pte_group_nom_luma,
9565 time_per_pte_group_vblank_luma: locals->time_per_pte_group_vblank_luma,
9566 time_per_pte_group_flip_luma: locals->time_per_pte_group_flip_luma,
9567 time_per_pte_group_nom_chroma: locals->time_per_pte_group_nom_chroma,
9568 time_per_pte_group_vblank_chroma: locals->time_per_pte_group_vblank_chroma,
9569 time_per_pte_group_flip_chroma: locals->time_per_pte_group_flip_chroma);
9570
9571 CalculateVMGroupAndRequestTimes(
9572 NumberOfActiveSurfaces: mode_lib->ms.num_active_planes,
9573 GPUVMEnable: mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
9574 GPUVMMaxPageTableLevels: mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
9575 HTotal: mode_lib->ms.cache_display_cfg.timing.HTotal,
9576 BytePerPixelC: locals->BytePerPixelC,
9577 DestinationLinesToRequestVMInVBlank: locals->DestinationLinesToRequestVMInVBlank,
9578 DestinationLinesToRequestVMInImmediateFlip: locals->DestinationLinesToRequestVMInImmediateFlip,
9579 DCCEnable: mode_lib->ms.cache_display_cfg.surface.DCCEnable,
9580 PixelClock: mode_lib->ms.cache_display_cfg.timing.PixelClock,
9581 dpte_row_width_luma_ub: locals->dpte_row_width_luma_ub,
9582 dpte_row_width_chroma_ub: locals->dpte_row_width_chroma_ub,
9583 vm_group_bytes: locals->vm_group_bytes,
9584 dpde0_bytes_per_frame_ub_l: locals->dpde0_bytes_per_frame_ub_l,
9585 dpde0_bytes_per_frame_ub_c: locals->dpde0_bytes_per_frame_ub_c,
9586 meta_pte_bytes_per_frame_ub_l: locals->meta_pte_bytes_per_frame_ub_l,
9587 meta_pte_bytes_per_frame_ub_c: locals->meta_pte_bytes_per_frame_ub_c,
9588
9589 /* Output */
9590 TimePerVMGroupVBlank: locals->TimePerVMGroupVBlank,
9591 TimePerVMGroupFlip: locals->TimePerVMGroupFlip,
9592 TimePerVMRequestVBlank: locals->TimePerVMRequestVBlank,
9593 TimePerVMRequestFlip: locals->TimePerVMRequestFlip);
9594
9595 // Min TTUVBlank
9596 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9597 if (locals->PrefetchMode[k] == 0) {
9598 locals->MinTTUVBlank[k] = dml_max4(
9599 a: locals->Watermark.DRAMClockChangeWatermark,
9600 b: locals->Watermark.FCLKChangeWatermark,
9601 c: locals->Watermark.StutterEnterPlusExitWatermark,
9602 d: locals->Watermark.UrgentWatermark);
9603 } else if (locals->PrefetchMode[k] == 1) {
9604 locals->MinTTUVBlank[k] = dml_max3(
9605 x: locals->Watermark.FCLKChangeWatermark,
9606 y: locals->Watermark.StutterEnterPlusExitWatermark,
9607 z: locals->Watermark.UrgentWatermark);
9608 } else if (locals->PrefetchMode[k] == 2) {
9609 locals->MinTTUVBlank[k] = dml_max(
9610 x: locals->Watermark.StutterEnterPlusExitWatermark,
9611 y: locals->Watermark.UrgentWatermark);
9612 } else {
9613 locals->MinTTUVBlank[k] = locals->Watermark.UrgentWatermark;
9614 }
9615 if (!mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k])
9616 locals->MinTTUVBlank[k] = locals->TCalc + locals->MinTTUVBlank[k];
9617 }
9618
9619 // DCC Configuration
9620 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9621#ifdef __DML_VBA_DEBUG__
9622 dml_print("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
9623#endif
9624 CalculateDCCConfiguration(
9625 DCCEnabled: mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
9626 DCCProgrammingAssumesScanDirectionUnknown: mode_lib->ms.policy.DCCProgrammingAssumesScanDirectionUnknownFinal,
9627 SourcePixelFormat: mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
9628 SurfaceWidthLuma: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k],
9629 SurfaceWidthChroma: mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k],
9630 SurfaceHeightLuma: mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k],
9631 SurfaceHeightChroma: mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k],
9632 nomDETInKByte: mode_lib->ms.NomDETInKByte,
9633 RequestHeight256ByteLuma: locals->BlockHeight256BytesY[k],
9634 RequestHeight256ByteChroma: locals->BlockHeight256BytesC[k],
9635 TilingFormat: mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
9636 BytePerPixelY: locals->BytePerPixelY[k],
9637 BytePerPixelC: locals->BytePerPixelC[k],
9638 BytePerPixelDETY: locals->BytePerPixelDETY[k],
9639 BytePerPixelDETC: locals->BytePerPixelDETC[k],
9640 SourceScan: mode_lib->ms.cache_display_cfg.plane.SourceScan[k],
9641 /* Output */
9642 MaxUncompressedBlockLuma: &locals->DCCYMaxUncompressedBlock[k],
9643 MaxUncompressedBlockChroma: &locals->DCCCMaxUncompressedBlock[k],
9644 MaxCompressedBlockLuma: &locals->DCCYMaxCompressedBlock[k],
9645 MaxCompressedBlockChroma: &locals->DCCCMaxCompressedBlock[k],
9646 IndependentBlockLuma: &locals->DCCYIndependentBlock[k],
9647 IndependentBlockChroma: &locals->DCCCIndependentBlock[k]);
9648 }
9649
9650 // VStartup Adjustment
9651 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9652 s->Tvstartup_margin = (s->MaxVStartupLines[k] - locals->VStartupMin[k]) * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
9653#ifdef __DML_VBA_DEBUG__
9654 dml_print("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, locals->MinTTUVBlank[k]);
9655#endif
9656
9657 locals->MinTTUVBlank[k] = locals->MinTTUVBlank[k] + s->Tvstartup_margin;
9658
9659#ifdef __DML_VBA_DEBUG__
9660 dml_print("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
9661 dml_print("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
9662 dml_print("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, locals->MinTTUVBlank[k]);
9663#endif
9664
9665 locals->Tdmdl[k] = locals->Tdmdl[k] + s->Tvstartup_margin;
9666 if (mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k] && mode_lib->ms.ip.dynamic_metadata_vm_enabled) {
9667 locals->Tdmdl_vm[k] = locals->Tdmdl_vm[k] + s->Tvstartup_margin;
9668 }
9669
9670 isInterlaceTiming = (mode_lib->ms.cache_display_cfg.timing.Interlace[k] && !mode_lib->ms.ip.ptoi_supported);
9671
9672 // The actual positioning of the vstartup
9673 locals->VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
9674
9675 s->dlg_vblank_start = ((isInterlaceTiming ? dml_floor(x: (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, granularity: 1.0) :
9676 mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
9677 s->LSetup = dml_floor(x: 4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), granularity: 1.0) / 4.0;
9678 s->blank_lines_remaining = (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]) - locals->VStartup[k];
9679
9680 if (s->blank_lines_remaining < 0) {
9681 dml_print("ERROR: Vstartup is larger than vblank!?\n");
9682 s->blank_lines_remaining = 0;
9683 ASSERT(0);
9684 }
9685 locals->MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
9686
9687 // debug only
9688 s->old_MIN_DST_Y_NEXT_START = ((isInterlaceTiming ? dml_floor(x: (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, granularity: 1.0) :
9689 mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k])
9690 + dml_max(x: 1.0, y: dml_ceil(x: (dml_float_t) locals->WritebackDelay[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), granularity: 1.0))
9691 + dml_floor(x: 4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), granularity: 1.0) / 4.0;
9692
9693 if (((locals->VUpdateOffsetPix[k] + locals->VUpdateWidthPix[k] + locals->VReadyOffsetPix[k]) / mode_lib->ms.cache_display_cfg.timing.HTotal[k]) <=
9694 (isInterlaceTiming ?
9695 dml_floor(x: (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]) / 2.0, granularity: 1.0) :
9696 (int) (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]))) {
9697 locals->VREADY_AT_OR_AFTER_VSYNC[k] = true;
9698 } else {
9699 locals->VREADY_AT_OR_AFTER_VSYNC[k] = false;
9700 }
9701#ifdef __DML_VBA_DEBUG__
9702 dml_print("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, locals->VStartup[k]);
9703 dml_print("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, locals->VStartupMin[k]);
9704 dml_print("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, locals->VUpdateOffsetPix[k]);
9705 dml_print("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, locals->VUpdateWidthPix[k]);
9706 dml_print("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, locals->VReadyOffsetPix[k]);
9707 dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.HTotal[k]);
9708 dml_print("DML::%s: k=%u, VTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VTotal[k]);
9709 dml_print("DML::%s: k=%u, VActive = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VActive[k]);
9710 dml_print("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
9711 dml_print("DML::%s: k=%u, TSetup = %f\n", __func__, k, locals->TSetup[k]);
9712 dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, locals->MIN_DST_Y_NEXT_START[k]);
9713 dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f (old)\n", __func__, k, s->old_MIN_DST_Y_NEXT_START);
9714 dml_print("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, locals->VREADY_AT_OR_AFTER_VSYNC[k]);
9715#endif
9716 }
9717
9718 //Maximum Bandwidth Used
9719 s->TotalWRBandwidth = 0;
9720 s->WRBandwidth = 0;
9721 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9722 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_32) {
9723 s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
9724 (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4;
9725 } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
9726 s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
9727 (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8;
9728 }
9729 s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
9730 }
9731
9732 locals->TotalDataReadBandwidth = 0;
9733 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9734 locals->TotalDataReadBandwidth = locals->TotalDataReadBandwidth + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
9735
9736#ifdef __DML_VBA_DEBUG__
9737 dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, locals->TotalDataReadBandwidth);
9738 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
9739 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
9740#endif
9741 }
9742
9743 locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = 0;
9744 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9745 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
9746 locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = locals->TotalDataReadBandwidthNotIncludingMALLPrefetch
9747 + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
9748 }
9749 }
9750
9751 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
9752 CalculateStutterEfficiency_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
9753 CalculateStutterEfficiency_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
9754 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ms.ip.meta_fifo_size_in_kentries;
9755 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ms.ip.zero_size_buffer_entries;
9756 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
9757 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
9758 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
9759 CalculateStutterEfficiency_params->TotalDataReadBandwidth = locals->TotalDataReadBandwidth;
9760 CalculateStutterEfficiency_params->DCFCLK = locals->Dcfclk;
9761 CalculateStutterEfficiency_params->ReturnBW = mode_lib->ms.ReturnBW;
9762 CalculateStutterEfficiency_params->CompbufReservedSpace64B = locals->compbuf_reserved_space_64b;
9763 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = locals->compbuf_reserved_space_zs;
9764 CalculateStutterEfficiency_params->SRExitTime = mode_lib->ms.state.sr_exit_time_us;
9765 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
9766 CalculateStutterEfficiency_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
9767 CalculateStutterEfficiency_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
9768 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = locals->Watermark.StutterEnterPlusExitWatermark;
9769 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = locals->Watermark.Z8StutterEnterPlusExitWatermark;
9770 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
9771 CalculateStutterEfficiency_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
9772 CalculateStutterEfficiency_params->MinTTUVBlank = locals->MinTTUVBlank;
9773 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
9774 CalculateStutterEfficiency_params->DETBufferSizeY = locals->DETBufferSizeY;
9775 CalculateStutterEfficiency_params->BytePerPixelY = locals->BytePerPixelY;
9776 CalculateStutterEfficiency_params->BytePerPixelDETY = locals->BytePerPixelDETY;
9777 CalculateStutterEfficiency_params->SwathWidthY = locals->SwathWidthY;
9778 CalculateStutterEfficiency_params->SwathHeightY = locals->SwathHeightY;
9779 CalculateStutterEfficiency_params->SwathHeightC = locals->SwathHeightC;
9780 CalculateStutterEfficiency_params->NetDCCRateLuma = mode_lib->ms.cache_display_cfg.surface.DCCRateLuma;
9781 CalculateStutterEfficiency_params->NetDCCRateChroma = mode_lib->ms.cache_display_cfg.surface.DCCRateChroma;
9782 CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsLuma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma;
9783 CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsChroma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma;
9784 CalculateStutterEfficiency_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
9785 CalculateStutterEfficiency_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
9786 CalculateStutterEfficiency_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
9787 CalculateStutterEfficiency_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
9788 CalculateStutterEfficiency_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
9789 CalculateStutterEfficiency_params->BlockHeight256BytesY = locals->BlockHeight256BytesY;
9790 CalculateStutterEfficiency_params->BlockWidth256BytesY = locals->BlockWidth256BytesY;
9791 CalculateStutterEfficiency_params->BlockHeight256BytesC = locals->BlockHeight256BytesC;
9792 CalculateStutterEfficiency_params->BlockWidth256BytesC = locals->BlockWidth256BytesC;
9793 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = locals->DCCYMaxUncompressedBlock;
9794 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = locals->DCCCMaxUncompressedBlock;
9795 CalculateStutterEfficiency_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
9796 CalculateStutterEfficiency_params->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable;
9797 CalculateStutterEfficiency_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
9798 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = locals->ReadBandwidthSurfaceLuma;
9799 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = locals->ReadBandwidthSurfaceChroma;
9800 CalculateStutterEfficiency_params->meta_row_bw = locals->meta_row_bw;
9801 CalculateStutterEfficiency_params->dpte_row_bw = locals->dpte_row_bw;
9802 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &locals->StutterEfficiencyNotIncludingVBlank;
9803 CalculateStutterEfficiency_params->StutterEfficiency = &locals->StutterEfficiency;
9804 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &locals->NumberOfStutterBurstsPerFrame;
9805 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &locals->Z8StutterEfficiencyNotIncludingVBlank;
9806 CalculateStutterEfficiency_params->Z8StutterEfficiency = &locals->Z8StutterEfficiency;
9807 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &locals->Z8NumberOfStutterBurstsPerFrame;
9808 CalculateStutterEfficiency_params->StutterPeriod = &locals->StutterPeriod;
9809 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &locals->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
9810
9811 // Stutter Efficiency
9812 CalculateStutterEfficiency(scratch: &mode_lib->scratch,
9813 p: CalculateStutterEfficiency_params);
9814
9815#ifdef __DML_VBA_ALLOW_DELTA__
9816 {
9817 dml_float_t dummy_single[2];
9818 dml_uint_t dummy_integer[1];
9819 dml_bool_t dummy_boolean[1];
9820
9821 // Calculate z8 stutter eff assuming 0 reserved space
9822 CalculateStutterEfficiency(
9823 locals->CompressedBufferSizeInkByte,
9824 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9825 locals->UnboundedRequestEnabled,
9826 mode_lib->ms.ip.meta_fifo_size_in_kentries,
9827 mode_lib->ms.ip.zero_size_buffer_entries,
9828 mode_lib->ms.ip.pixel_chunk_size_kbytes,
9829 mode_lib->ms.num_active_planes,
9830 mode_lib->ms.ip.rob_buffer_size_kbytes,
9831 locals->TotalDataReadBandwidth,
9832 locals->Dcfclk,
9833 mode_lib->ms.ReturnBW,
9834 0, //mode_lib->ms.ip.compbuf_reserved_space_64b,
9835 0, //mode_lib->ms.ip.compbuf_reserved_space_zs,
9836 mode_lib->ms.state.sr_exit_time_us,
9837 mode_lib->ms.state.sr_exit_z8_time_us,
9838 mode_lib->ms.policy.SynchronizeTimingsFinal,
9839 mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
9840 locals->Watermark.StutterEnterPlusExitWatermark,
9841 locals->Watermark.Z8StutterEnterPlusExitWatermark,
9842 mode_lib->ms.ip.ptoi_supported,
9843 mode_lib->ms.cache_display_cfg.timing.Interlace,
9844 locals->MinTTUVBlank,
9845 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9846 mode_lib->ms.DETBufferSizeY,
9847 locals->BytePerPixelY,
9848 locals->BytePerPixelDETY,
9849 locals->SwathWidthY,
9850 mode_lib->ms.SwathHeightY,
9851 mode_lib->ms.SwathHeightC,
9852 mode_lib->ms.cache_display_cfg.surface.DCCRateLuma,
9853 mode_lib->ms.cache_display_cfg.surface.DCCRateChroma,
9854 mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma,
9855 mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma,
9856 mode_lib->ms.cache_display_cfg.timing.HTotal,
9857 mode_lib->ms.cache_display_cfg.timing.VTotal,
9858 mode_lib->ms.cache_display_cfg.timing.PixelClock,
9859 mode_lib->ms.cache_display_cfg.plane.VRatio,
9860 mode_lib->ms.cache_display_cfg.plane.SourceScan,
9861 locals->BlockHeight256BytesY,
9862 locals->BlockWidth256BytesY,
9863 locals->BlockHeight256BytesC,
9864 locals->BlockWidth256BytesC,
9865 locals->DCCYMaxUncompressedBlock,
9866 locals->DCCCMaxUncompressedBlock,
9867 mode_lib->ms.cache_display_cfg.timing.VActive,
9868 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
9869 mode_lib->ms.cache_display_cfg.writeback.WritebackEnable,
9870 locals->ReadBandwidthSurfaceLuma,
9871 locals->ReadBandwidthSurfaceChroma,
9872 locals->meta_row_bw,
9873 locals->dpte_row_bw,
9874
9875 /* Output */
9876 &dummy_single[0],
9877 &dummy_single[1],
9878 &dummy_integer[0],
9879 &locals->Z8StutterEfficiencyNotIncludingVBlankBestCase,
9880 &locals->Z8StutterEfficiencyBestCase,
9881 &locals->Z8NumberOfStutterBurstsPerFrameBestCase,
9882 &locals->StutterPeriodBestCase,
9883 &dummy_boolean[0]);
9884 }
9885#else
9886 locals->Z8StutterEfficiencyNotIncludingVBlankBestCase = locals->Z8StutterEfficiencyNotIncludingVBlank;
9887 locals->Z8StutterEfficiencyBestCase = locals->Z8StutterEfficiency;
9888 locals->Z8NumberOfStutterBurstsPerFrameBestCase = locals->Z8NumberOfStutterBurstsPerFrame;
9889 locals->StutterPeriodBestCase = locals->StutterPeriod;
9890#endif
9891
9892#ifdef __DML_VBA_DEBUG__
9893 dml_print("DML::%s: --- END --- \n", __func__);
9894#endif
9895} // dml_core_mode_programming
9896
9897/// Function: dml_core_get_row_heights
9898/// @brief Get row height for DPTE and META with minimal input.
9899void dml_core_get_row_heights(
9900 dml_uint_t *dpte_row_height,
9901 dml_uint_t *meta_row_height,
9902 const struct display_mode_lib_st *mode_lib,
9903 dml_bool_t is_plane1,
9904 enum dml_source_format_class SourcePixelFormat,
9905 enum dml_swizzle_mode SurfaceTiling,
9906 enum dml_rotation_angle ScanDirection,
9907 dml_uint_t pitch,
9908 dml_uint_t GPUVMMinPageSizeKBytes)
9909{
9910 dml_uint_t BytePerPixelY;
9911 dml_uint_t BytePerPixelC;
9912 dml_float_t BytePerPixelInDETY;
9913 dml_float_t BytePerPixelInDETC;
9914 dml_uint_t BlockHeight256BytesY;
9915 dml_uint_t BlockHeight256BytesC;
9916 dml_uint_t BlockWidth256BytesY;
9917 dml_uint_t BlockWidth256BytesC;
9918 dml_uint_t MacroTileWidthY;
9919 dml_uint_t MacroTileWidthC;
9920 dml_uint_t MacroTileHeightY;
9921 dml_uint_t MacroTileHeightC;
9922
9923 dml_uint_t BytePerPixel;
9924 dml_uint_t BlockHeight256Bytes;
9925 dml_uint_t BlockWidth256Bytes;
9926 dml_uint_t MacroTileWidth;
9927 dml_uint_t MacroTileHeight;
9928 dml_uint_t PTEBufferSizeInRequests;
9929
9930 dml_uint_t dummy_integer[16];
9931
9932 CalculateBytePerPixelAndBlockSizes(
9933 SourcePixelFormat,
9934 SurfaceTiling,
9935
9936 /* Output */
9937 BytePerPixelY: &BytePerPixelY,
9938 BytePerPixelC: &BytePerPixelC,
9939 BytePerPixelDETY: &BytePerPixelInDETY,
9940 BytePerPixelDETC: &BytePerPixelInDETC,
9941 BlockHeight256BytesY: &BlockHeight256BytesY,
9942 BlockHeight256BytesC: &BlockHeight256BytesC,
9943 BlockWidth256BytesY: &BlockWidth256BytesY,
9944 BlockWidth256BytesC: &BlockWidth256BytesC,
9945 MacroTileHeightY: &MacroTileHeightY,
9946 MacroTileHeightC: &MacroTileHeightC,
9947 MacroTileWidthY: &MacroTileWidthY,
9948 MacroTileWidthC: &MacroTileWidthC);
9949
9950 BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
9951 BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
9952 BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
9953 MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
9954 MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
9955 PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
9956#ifdef __DML_RQ_DLG_CALC_DEBUG__
9957 dml_print("DML_DLG: %s: is_plane1 = %u\n", __func__, is_plane1);
9958 dml_print("DML_DLG: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
9959 dml_print("DML_DLG: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
9960 dml_print("DML_DLG: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
9961 dml_print("DML_DLG: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
9962 dml_print("DML_DLG: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
9963 dml_print("DML_DLG: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
9964 dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
9965 dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
9966 dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
9967#endif
9968
9969 // just suppluy with enough parameters to calculate meta and dte
9970 CalculateVMAndRowBytes(
9971 ViewportStationary: 0, // dml_bool_t ViewportStationary,
9972 DCCEnable: 1, // dml_bool_t DCCEnable,
9973 NumberOfDPPs: 1, // dml_uint_t NumberOfDPPs,
9974 BlockHeight256Bytes,
9975 BlockWidth256Bytes,
9976 SourcePixelFormat,
9977 SurfaceTiling,
9978 BytePerPixel,
9979 SourceScan: ScanDirection,
9980 SwathWidth: 0, // dml_uint_t SwathWidth,
9981 ViewportHeight: 0, // dml_uint_t ViewportHeight, (Note: DML calculates one_row_for_frame height regardless, would need test input if that height is useful)
9982 ViewportXStart: 0, // dml_uint_t ViewportXStart,
9983 ViewportYStart: 0, // dml_uint_t ViewportYStart,
9984 GPUVMEnable: 1, // dml_bool_t GPUVMEnable,
9985 GPUVMMaxPageTableLevels: 4, // dml_uint_t GPUVMMaxPageTableLevels,
9986 GPUVMMinPageSizeKBytes,
9987 PTEBufferSizeInRequests,
9988 Pitch: pitch,
9989 DCCMetaPitch: 0, // dml_uint_t DCCMetaPitch,
9990 MacroTileWidth,
9991 MacroTileHeight,
9992
9993 // /* Output */
9994 MetaRowByte: &dummy_integer[0], // dml_uint_t *MetaRowByte,
9995 PixelPTEBytesPerRow: &dummy_integer[1], // dml_uint_t *PixelPTEBytesPerRow,
9996 PixelPTEBytesPerRowStorage: &dummy_integer[2], // dml_uint_t *PixelPTEBytesPerRowStorage,
9997 dpte_row_width_ub: &dummy_integer[3], // dml_uint_t *dpte_row_width_ub,
9998 dpte_row_height,
9999 dpte_row_height_linear: &dummy_integer[4], // dml_uint_t *dpte_row_height_linear
10000 PixelPTEBytesPerRow_one_row_per_frame: &dummy_integer[5], // dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
10001 dpte_row_width_ub_one_row_per_frame: &dummy_integer[6], // dml_uint_t *dpte_row_width_ub_one_row_per_frame,
10002 dpte_row_height_one_row_per_frame: &dummy_integer[7], // dml_uint_t *dpte_row_height_one_row_per_frame,
10003 MetaRequestWidth: &dummy_integer[8], // dml_uint_t *MetaRequestWidth,
10004 MetaRequestHeight: &dummy_integer[9], // dml_uint_t *MetaRequestHeight,
10005 meta_row_width: &dummy_integer[10], // dml_uint_t *meta_row_width,
10006 meta_row_height,
10007 PixelPTEReqWidth: &dummy_integer[11], // dml_uint_t *PixelPTEReqWidth,
10008 PixelPTEReqHeight: &dummy_integer[12], // dml_uint_t *PixelPTEReqHeight,
10009 PTERequestSize: &dummy_integer[13], // dml_uint_t *PTERequestSize,
10010 DPDE0BytesFrame: &dummy_integer[14], // dml_uint_t *DPDE0BytesFrame,
10011 MetaPTEBytesFrame: &dummy_integer[15]); // dml_uint_t *MetaPTEBytesFrame)
10012
10013#ifdef __DML_RQ_DLG_CALC_DEBUG__
10014 dml_print("DML_DLG: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
10015 dml_print("DML_DLG: %s: meta_row_height = %u\n", __func__, *meta_row_height);
10016#endif
10017}
10018
10019static struct soc_state_bounding_box_st dml_get_soc_state_bounding_box(
10020 const struct soc_states_st *states,
10021 dml_uint_t state_idx)
10022{
10023 dml_print("DML::%s: state_idx=%u (num_states=%u)\n", __func__, state_idx, states->num_states);
10024
10025 if (state_idx >= (dml_uint_t)states->num_states) {
10026 dml_print("DML::%s: ERROR: Invalid state_idx=%u! num_states=%u\n", __func__, state_idx, states->num_states);
10027 ASSERT(0);
10028 }
10029 return (states->state_array[state_idx]);
10030}
10031
10032/// @brief Copy the parameters to a calculation struct, it actually only need when the DML needs to have
10033/// the intelligence to re-calculate when any of display cfg, bbox, or policy changes since last calculated.
10034///
10035static void cache_ip_soc_cfg(struct display_mode_lib_st *mode_lib,
10036 dml_uint_t state_idx)
10037{
10038 mode_lib->ms.state_idx = state_idx;
10039 mode_lib->ms.max_state_idx = mode_lib->states.num_states - 1;
10040 mode_lib->ms.soc = mode_lib->soc;
10041 mode_lib->ms.ip = mode_lib->ip;
10042 mode_lib->ms.policy = mode_lib->policy;
10043 mode_lib->ms.state = dml_get_soc_state_bounding_box(states: &mode_lib->states, state_idx);
10044 mode_lib->ms.max_state = dml_get_soc_state_bounding_box(states: &mode_lib->states, state_idx: mode_lib->states.num_states - 1);
10045}
10046
10047static void cache_display_cfg(struct display_mode_lib_st *mode_lib,
10048 const struct dml_display_cfg_st *display_cfg)
10049{
10050 mode_lib->ms.cache_display_cfg = *display_cfg;
10051}
10052
10053static void fetch_socbb_params(struct display_mode_lib_st *mode_lib)
10054{
10055 struct soc_state_bounding_box_st *state = &mode_lib->ms.state;
10056
10057 // Default values, SOCCLK, DRAMSpeed, and FabricClock will be reassigned to the same state value in mode_check step
10058 // If UseMinimumRequiredDCFCLK is used, the DCFCLK will be the min dcflk for the mode support
10059 mode_lib->ms.SOCCLK = (dml_float_t)state->socclk_mhz;
10060 mode_lib->ms.DRAMSpeed = (dml_float_t)state->dram_speed_mts;
10061 mode_lib->ms.FabricClock = (dml_float_t)state->fabricclk_mhz;
10062 mode_lib->ms.DCFCLK = (dml_float_t)state->dcfclk_mhz;
10063}
10064
10065/// @brief Use display_cfg directly for mode_support calculation
10066/// Calculated values and informational output are stored in mode_lib.vba data struct
10067/// The display configuration is described with pipes struct and num_pipes
10068/// This function is used when physical resource mapping is not finalized (for example,
10069/// don't know how many pipes to represent a surface)
10070/// @param mode_lib Contains the bounding box and policy setting.
10071/// @param state_idx Power state index
10072/// @param display_cfg Display configurations. A display
10073dml_bool_t dml_mode_support(
10074 struct display_mode_lib_st *mode_lib,
10075 dml_uint_t state_idx,
10076 const struct dml_display_cfg_st *display_cfg)
10077{
10078 dml_bool_t is_mode_support;
10079
10080 dml_print("DML::%s: ------------- START ----------\n", __func__);
10081 cache_ip_soc_cfg(mode_lib, state_idx);
10082 cache_display_cfg(mode_lib, display_cfg);
10083
10084 fetch_socbb_params(mode_lib);
10085
10086 dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
10087
10088 is_mode_support = dml_core_mode_support(mode_lib);
10089
10090 dml_print("DML::%s: is_mode_support = %u\n", __func__, is_mode_support);
10091 dml_print("DML::%s: ------------- DONE ----------\n", __func__);
10092 return is_mode_support;
10093}
10094
10095/// @Brief A function to calculate the programming values for DCN DCHUB (Assume mode is supported)
10096/// The output will be stored in the mode_lib.mp (mode_program_st) data struct and those can be accessed via the getter functions
10097/// Calculated values include: watermarks, dlg, rq reg, different clock frequency
10098/// This function returns 1 when there is no error.
10099/// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK
10100/// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values.
10101/// @param state_idx Power state idx chosen
10102/// @param display_cfg Display Congiuration
10103/// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming
10104/// TODO: Add clk_cfg input, could be useful for standalone mode
10105dml_bool_t dml_mode_programming(
10106 struct display_mode_lib_st *mode_lib,
10107 dml_uint_t state_idx,
10108 const struct dml_display_cfg_st *display_cfg,
10109 bool call_standalone)
10110{
10111 struct dml_clk_cfg_st clk_cfg;
10112 memset(&clk_cfg, 0, sizeof(clk_cfg));
10113
10114 clk_cfg.dcfclk_option = dml_use_required_freq;
10115 clk_cfg.dispclk_option = dml_use_required_freq;
10116 for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k)
10117 clk_cfg.dppclk_option[k] = dml_use_required_freq;
10118
10119 dml_print("DML::%s: ------------- START ----------\n", __func__);
10120 dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
10121 dml_print("DML::%s: call_standalone = %u\n", __func__, call_standalone);
10122
10123 cache_ip_soc_cfg(mode_lib, state_idx);
10124 cache_display_cfg(mode_lib, display_cfg);
10125
10126 fetch_socbb_params(mode_lib);
10127 if (call_standalone) {
10128 mode_lib->ms.support.ImmediateFlipSupport = 1; // assume mode support say immediate flip ok at max state/combine
10129 dml_core_mode_support_partial(mode_lib);
10130 }
10131
10132 dml_core_mode_programming(mode_lib, clk_cfg: &clk_cfg);
10133
10134 dml_print("DML::%s: ------------- DONE ----------\n", __func__);
10135 dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %0d\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
10136 return mode_lib->mp.PrefetchAndImmediateFlipSupported;
10137}
10138
10139static dml_uint_t mode_support_pwr_states(
10140 dml_uint_t *lowest_state_idx,
10141 struct display_mode_lib_st *mode_lib,
10142 const struct dml_display_cfg_st *display_cfg,
10143 dml_uint_t start_state_idx,
10144 dml_uint_t end_state_idx)
10145{
10146 dml_uint_t state_idx = 0;
10147 dml_bool_t mode_is_supported = 0;
10148 *lowest_state_idx = end_state_idx;
10149
10150 if (end_state_idx < start_state_idx)
10151 ASSERT(0);
10152
10153 if (end_state_idx >= mode_lib->states.num_states) // idx is 0-based
10154 ASSERT(0);
10155
10156 for (state_idx = start_state_idx; state_idx <= end_state_idx; state_idx++) {
10157 if (dml_mode_support(mode_lib, state_idx, display_cfg)) {
10158 dml_print("DML::%s: Mode is supported at power state_idx = %u\n", __func__, state_idx);
10159 mode_is_supported = 1;
10160 *lowest_state_idx = state_idx;
10161 break;
10162 }
10163 }
10164
10165 return mode_is_supported;
10166}
10167
10168dml_uint_t dml_mode_support_ex(struct dml_mode_support_ex_params_st *in_out_params)
10169{
10170 dml_uint_t result;
10171
10172 result = mode_support_pwr_states(lowest_state_idx: &in_out_params->out_lowest_state_idx,
10173 mode_lib: in_out_params->mode_lib,
10174 display_cfg: in_out_params->in_display_cfg,
10175 start_state_idx: 0,
10176 end_state_idx: in_out_params->mode_lib->states.num_states - 1);
10177
10178 if (result)
10179 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
10180
10181 return result;
10182}
10183
10184dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx)
10185{
10186 dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
10187 dml_print("DML::%s: pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, pipe_idx, mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx]);
10188 return (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx] == dml_use_mall_pstate_change_phantom_pipe);
10189}
10190
10191#define dml_get_per_surface_var_func(variable, type, interval_var) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) \
10192{ \
10193 dml_uint_t plane_idx; \
10194 plane_idx = mode_lib->mp.pipe_plane[surface_idx]; \
10195 return (type) interval_var[plane_idx]; \
10196}
10197
10198#define dml_get_var_func(var, type, internal_var) type dml_get_##var(struct display_mode_lib_st *mode_lib) \
10199{ \
10200 return (type) internal_var; \
10201}
10202
10203dml_get_var_func(wm_urgent, dml_float_t, mode_lib->mp.Watermark.UrgentWatermark);
10204dml_get_var_func(wm_stutter_exit, dml_float_t, mode_lib->mp.Watermark.StutterExitWatermark);
10205dml_get_var_func(wm_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
10206dml_get_var_func(wm_memory_trip, dml_float_t, mode_lib->mp.UrgentLatency);
10207dml_get_var_func(wm_fclk_change, dml_float_t, mode_lib->mp.Watermark.FCLKChangeWatermark);
10208dml_get_var_func(wm_usr_retraining, dml_float_t, mode_lib->mp.Watermark.USRRetrainingWatermark);
10209dml_get_var_func(wm_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
10210dml_get_var_func(wm_z8_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
10211dml_get_var_func(wm_z8_stutter, dml_float_t, mode_lib->mp.Watermark.Z8StutterExitWatermark);
10212dml_get_var_func(fraction_of_urgent_bandwidth, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidth);
10213dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
10214dml_get_var_func(urgent_latency, dml_float_t, mode_lib->mp.UrgentLatency);
10215dml_get_var_func(clk_dcf_deepsleep, dml_float_t, mode_lib->mp.DCFCLKDeepSleep);
10216dml_get_var_func(wm_writeback_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
10217dml_get_var_func(stutter_efficiency, dml_float_t, mode_lib->mp.StutterEfficiency);
10218dml_get_var_func(stutter_efficiency_no_vblank, dml_float_t, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
10219dml_get_var_func(stutter_efficiency_z8, dml_float_t, mode_lib->mp.Z8StutterEfficiency);
10220dml_get_var_func(stutter_num_bursts_z8, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
10221dml_get_var_func(stutter_period, dml_float_t, mode_lib->mp.StutterPeriod);
10222dml_get_var_func(stutter_efficiency_z8_bestcase, dml_float_t, mode_lib->mp.Z8StutterEfficiencyBestCase);
10223dml_get_var_func(stutter_num_bursts_z8_bestcase, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
10224dml_get_var_func(stutter_period_bestcase, dml_float_t, mode_lib->mp.StutterPeriodBestCase);
10225dml_get_var_func(urgent_extra_latency, dml_float_t, mode_lib->mp.UrgentExtraLatency);
10226dml_get_var_func(dispclk_calculated, dml_float_t, mode_lib->mp.Dispclk_calculated);
10227dml_get_var_func(total_data_read_bw, dml_float_t, mode_lib->mp.TotalDataReadBandwidth);
10228dml_get_var_func(return_bw, dml_float_t, mode_lib->ms.ReturnBW);
10229dml_get_var_func(tcalc, dml_float_t, mode_lib->mp.TCalc);
10230dml_get_var_func(comp_buffer_size_kbytes, dml_uint_t, mode_lib->mp.CompressedBufferSizeInkByte);
10231dml_get_var_func(pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.pixel_chunk_size_kbytes);
10232dml_get_var_func(alpha_pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.alpha_pixel_chunk_size_kbytes);
10233dml_get_var_func(meta_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.meta_chunk_size_kbytes);
10234dml_get_var_func(min_pixel_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_pixel_chunk_size_bytes);
10235dml_get_var_func(min_meta_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_meta_chunk_size_bytes);
10236dml_get_var_func(total_immediate_flip_bytes, dml_uint_t, mode_lib->mp.TotImmediateFlipBytes);
10237
10238dml_get_per_surface_var_func(dsc_delay, dml_uint_t, mode_lib->mp.DSCDelay); // this is the dsc latency
10239dml_get_per_surface_var_func(dppclk_calculated, dml_float_t, mode_lib->mp.Dppclk_calculated);
10240dml_get_per_surface_var_func(dscclk_calculated, dml_float_t, mode_lib->mp.DSCCLK_calculated);
10241dml_get_per_surface_var_func(min_ttu_vblank_in_us, dml_float_t, mode_lib->mp.MinTTUVBlank);
10242dml_get_per_surface_var_func(vratio_prefetch_l, dml_float_t, mode_lib->mp.VRatioPrefetchY);
10243dml_get_per_surface_var_func(vratio_prefetch_c, dml_float_t, mode_lib->mp.VRatioPrefetchC);
10244dml_get_per_surface_var_func(dst_x_after_scaler, dml_uint_t, mode_lib->mp.DSTXAfterScaler);
10245dml_get_per_surface_var_func(dst_y_after_scaler, dml_uint_t, mode_lib->mp.DSTYAfterScaler);
10246dml_get_per_surface_var_func(dst_y_per_vm_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInVBlank);
10247dml_get_per_surface_var_func(dst_y_per_row_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInVBlank);
10248dml_get_per_surface_var_func(dst_y_prefetch, dml_float_t, mode_lib->mp.DestinationLinesForPrefetch);
10249dml_get_per_surface_var_func(dst_y_per_vm_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInImmediateFlip);
10250dml_get_per_surface_var_func(dst_y_per_row_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInImmediateFlip);
10251dml_get_per_surface_var_func(dst_y_per_pte_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L);
10252dml_get_per_surface_var_func(dst_y_per_pte_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C);
10253dml_get_per_surface_var_func(dst_y_per_meta_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_L);
10254dml_get_per_surface_var_func(dst_y_per_meta_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_C);
10255dml_get_per_surface_var_func(refcyc_per_vm_group_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupVBlank);
10256dml_get_per_surface_var_func(refcyc_per_vm_group_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupFlip);
10257dml_get_per_surface_var_func(refcyc_per_vm_req_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestVBlank);
10258dml_get_per_surface_var_func(refcyc_per_vm_req_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestFlip);
10259dml_get_per_surface_var_func(refcyc_per_vm_dmdata_in_us, dml_float_t, mode_lib->mp.Tdmdl_vm);
10260dml_get_per_surface_var_func(dmdata_dl_delta_in_us, dml_float_t, mode_lib->mp.Tdmdl);
10261dml_get_per_surface_var_func(refcyc_per_line_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLuma);
10262dml_get_per_surface_var_func(refcyc_per_line_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChroma);
10263dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch);
10264dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch);
10265dml_get_per_surface_var_func(refcyc_per_req_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma);
10266dml_get_per_surface_var_func(refcyc_per_req_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma);
10267dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch);
10268dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
10269dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTime);
10270dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTimePrefetch);
10271dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkNominal);
10272dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkNominal);
10273dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkVBlank);
10274dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkVBlank);
10275dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkFlip);
10276dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkFlip);
10277dml_get_per_surface_var_func(refcyc_per_pte_group_nom_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_luma);
10278dml_get_per_surface_var_func(refcyc_per_pte_group_nom_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_chroma);
10279dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_luma);
10280dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_chroma);
10281dml_get_per_surface_var_func(refcyc_per_pte_group_flip_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_luma);
10282dml_get_per_surface_var_func(refcyc_per_pte_group_flip_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_chroma);
10283dml_get_per_surface_var_func(dpte_group_size_in_bytes, dml_uint_t, mode_lib->mp.dpte_group_bytes);
10284dml_get_per_surface_var_func(vm_group_size_in_bytes, dml_uint_t, mode_lib->mp.vm_group_bytes);
10285dml_get_per_surface_var_func(swath_height_l, dml_uint_t, mode_lib->ms.SwathHeightY);
10286dml_get_per_surface_var_func(swath_height_c, dml_uint_t, mode_lib->ms.SwathHeightC);
10287dml_get_per_surface_var_func(dpte_row_height_l, dml_uint_t, mode_lib->mp.dpte_row_height);
10288dml_get_per_surface_var_func(dpte_row_height_c, dml_uint_t, mode_lib->mp.dpte_row_height_chroma);
10289dml_get_per_surface_var_func(dpte_row_height_linear_l, dml_uint_t, mode_lib->mp.dpte_row_height_linear);
10290dml_get_per_surface_var_func(dpte_row_height_linear_c, dml_uint_t, mode_lib->mp.dpte_row_height_linear_chroma);
10291dml_get_per_surface_var_func(meta_row_height_l, dml_uint_t, mode_lib->mp.meta_row_height);
10292dml_get_per_surface_var_func(meta_row_height_c, dml_uint_t, mode_lib->mp.meta_row_height_chroma);
10293
10294dml_get_per_surface_var_func(vstartup_calculated, dml_uint_t, mode_lib->mp.VStartup);
10295dml_get_per_surface_var_func(vupdate_offset, dml_uint_t, mode_lib->mp.VUpdateOffsetPix);
10296dml_get_per_surface_var_func(vupdate_width, dml_uint_t, mode_lib->mp.VUpdateWidthPix);
10297dml_get_per_surface_var_func(vready_offset, dml_uint_t, mode_lib->mp.VReadyOffsetPix);
10298dml_get_per_surface_var_func(vready_at_or_after_vsync, dml_uint_t, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC);
10299dml_get_per_surface_var_func(min_dst_y_next_start, dml_uint_t, mode_lib->mp.MIN_DST_Y_NEXT_START);
10300dml_get_per_surface_var_func(det_stored_buffer_size_l_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeY);
10301dml_get_per_surface_var_func(det_stored_buffer_size_c_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeC);
10302dml_get_per_surface_var_func(use_mall_for_static_screen, dml_uint_t, mode_lib->mp.UsesMALLForStaticScreen);
10303dml_get_per_surface_var_func(surface_size_for_mall, dml_uint_t, mode_lib->mp.SurfaceSizeInTheMALL);
10304dml_get_per_surface_var_func(dcc_max_uncompressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxUncompressedBlock);
10305dml_get_per_surface_var_func(dcc_max_compressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxCompressedBlock);
10306dml_get_per_surface_var_func(dcc_independent_block_l, dml_uint_t, mode_lib->mp.DCCYIndependentBlock);
10307dml_get_per_surface_var_func(dcc_max_uncompressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxUncompressedBlock);
10308dml_get_per_surface_var_func(dcc_max_compressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxCompressedBlock);
10309dml_get_per_surface_var_func(dcc_independent_block_c, dml_uint_t, mode_lib->mp.DCCCIndependentBlock);
10310dml_get_per_surface_var_func(max_active_dram_clock_change_latency_supported, dml_uint_t, mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported);
10311dml_get_per_surface_var_func(pte_buffer_mode, dml_uint_t, mode_lib->mp.PTE_BUFFER_MODE);
10312dml_get_per_surface_var_func(bigk_fragment_size, dml_uint_t, mode_lib->mp.BIGK_FRAGMENT_SIZE);
10313dml_get_per_surface_var_func(dpte_bytes_per_row, dml_uint_t, mode_lib->mp.PixelPTEBytesPerRow);
10314dml_get_per_surface_var_func(meta_bytes_per_row, dml_uint_t, mode_lib->mp.MetaRowByte);
10315dml_get_per_surface_var_func(det_buffer_size_kbytes, dml_uint_t, mode_lib->ms.DETBufferSizeInKByte);
10316

source code of linux/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c