1// SPDX-License-Identifier: MIT
2/*
3 * Copyright 2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27#define UNIT_TEST 0
28#if !UNIT_TEST
29#include "dc.h"
30#endif
31#include "../display_mode_lib.h"
32#include "display_mode_vba_314.h"
33#include "../dml_inline_defs.h"
34
35/*
36 * NOTE:
37 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
38 *
39 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
40 * ways. Unless there is something clearly wrong with it the code should
41 * remain as-is as it provides us with a guarantee from HW that it is correct.
42 */
43
44#define BPP_INVALID 0
45#define BPP_BLENDED_PIPE 0xffffffff
46#define DCN314_MAX_DSC_IMAGE_WIDTH 5184
47#define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
48
49// For DML-C changes that hasn't been propagated to VBA yet
50//#define __DML_VBA_ALLOW_DELTA__
51
52// Move these to ip parameters/constant
53
54// At which vstartup the DML start to try if the mode can be supported
55#define __DML_VBA_MIN_VSTARTUP__ 9
56
57// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
58#define __DML_ARB_TO_RET_DELAY__ (7 + 95)
59
60// fudge factor for min dcfclk calclation
61#define __DML_MIN_DCFCLK_FACTOR__ 1.15
62
63typedef struct {
64 double DPPCLK;
65 double DISPCLK;
66 double PixelClock;
67 double DCFCLKDeepSleep;
68 unsigned int DPPPerPlane;
69 bool ScalerEnabled;
70 double VRatio;
71 double VRatioChroma;
72 enum scan_direction_class SourceScan;
73 unsigned int BlockWidth256BytesY;
74 unsigned int BlockHeight256BytesY;
75 unsigned int BlockWidth256BytesC;
76 unsigned int BlockHeight256BytesC;
77 unsigned int InterlaceEnable;
78 unsigned int NumberOfCursors;
79 unsigned int VBlank;
80 unsigned int HTotal;
81 unsigned int DCCEnable;
82 bool ODMCombineIsEnabled;
83 enum source_format_class SourcePixelFormat;
84 int BytePerPixelY;
85 int BytePerPixelC;
86 bool ProgressiveToInterlaceUnitInOPP;
87} Pipe;
88
89#define BPP_INVALID 0
90#define BPP_BLENDED_PIPE 0xffffffff
91
92static bool CalculateBytePerPixelAnd256BBlockSizes(
93 enum source_format_class SourcePixelFormat,
94 enum dm_swizzle_mode SurfaceTiling,
95 unsigned int *BytePerPixelY,
96 unsigned int *BytePerPixelC,
97 double *BytePerPixelDETY,
98 double *BytePerPixelDETC,
99 unsigned int *BlockHeight256BytesY,
100 unsigned int *BlockHeight256BytesC,
101 unsigned int *BlockWidth256BytesY,
102 unsigned int *BlockWidth256BytesC);
103static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
104static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
105static unsigned int dscceComputeDelay(
106 unsigned int bpc,
107 double BPP,
108 unsigned int sliceWidth,
109 unsigned int numSlices,
110 enum output_format_class pixelFormat,
111 enum output_encoder_class Output);
112static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
113static bool CalculatePrefetchSchedule(
114 struct display_mode_lib *mode_lib,
115 double HostVMInefficiencyFactor,
116 Pipe *myPipe,
117 unsigned int DSCDelay,
118 double DPPCLKDelaySubtotalPlusCNVCFormater,
119 double DPPCLKDelaySCL,
120 double DPPCLKDelaySCLLBOnly,
121 double DPPCLKDelayCNVCCursor,
122 double DISPCLKDelaySubtotal,
123 unsigned int DPP_RECOUT_WIDTH,
124 enum output_format_class OutputFormat,
125 unsigned int MaxInterDCNTileRepeaters,
126 unsigned int VStartup,
127 unsigned int MaxVStartup,
128 unsigned int GPUVMPageTableLevels,
129 bool GPUVMEnable,
130 bool HostVMEnable,
131 unsigned int HostVMMaxNonCachedPageTableLevels,
132 double HostVMMinPageSize,
133 bool DynamicMetadataEnable,
134 bool DynamicMetadataVMEnabled,
135 int DynamicMetadataLinesBeforeActiveRequired,
136 unsigned int DynamicMetadataTransmittedBytes,
137 double UrgentLatency,
138 double UrgentExtraLatency,
139 double TCalc,
140 unsigned int PDEAndMetaPTEBytesFrame,
141 unsigned int MetaRowByte,
142 unsigned int PixelPTEBytesPerRow,
143 double PrefetchSourceLinesY,
144 unsigned int SwathWidthY,
145 double VInitPreFillY,
146 unsigned int MaxNumSwathY,
147 double PrefetchSourceLinesC,
148 unsigned int SwathWidthC,
149 double VInitPreFillC,
150 unsigned int MaxNumSwathC,
151 int swath_width_luma_ub,
152 int swath_width_chroma_ub,
153 unsigned int SwathHeightY,
154 unsigned int SwathHeightC,
155 double TWait,
156 double *DSTXAfterScaler,
157 double *DSTYAfterScaler,
158 double *DestinationLinesForPrefetch,
159 double *PrefetchBandwidth,
160 double *DestinationLinesToRequestVMInVBlank,
161 double *DestinationLinesToRequestRowInVBlank,
162 double *VRatioPrefetchY,
163 double *VRatioPrefetchC,
164 double *RequiredPrefetchPixDataBWLuma,
165 double *RequiredPrefetchPixDataBWChroma,
166 bool *NotEnoughTimeForDynamicMetadata,
167 double *Tno_bw,
168 double *prefetch_vmrow_bw,
169 double *Tdmdl_vm,
170 double *Tdmdl,
171 double *TSetup,
172 int *VUpdateOffsetPix,
173 double *VUpdateWidthPix,
174 double *VReadyOffsetPix);
175static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
176static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
177static void CalculateDCCConfiguration(
178 bool DCCEnabled,
179 bool DCCProgrammingAssumesScanDirectionUnknown,
180 enum source_format_class SourcePixelFormat,
181 unsigned int SurfaceWidthLuma,
182 unsigned int SurfaceWidthChroma,
183 unsigned int SurfaceHeightLuma,
184 unsigned int SurfaceHeightChroma,
185 double DETBufferSize,
186 unsigned int RequestHeight256ByteLuma,
187 unsigned int RequestHeight256ByteChroma,
188 enum dm_swizzle_mode TilingFormat,
189 unsigned int BytePerPixelY,
190 unsigned int BytePerPixelC,
191 double BytePerPixelDETY,
192 double BytePerPixelDETC,
193 enum scan_direction_class ScanOrientation,
194 unsigned int *MaxUncompressedBlockLuma,
195 unsigned int *MaxUncompressedBlockChroma,
196 unsigned int *MaxCompressedBlockLuma,
197 unsigned int *MaxCompressedBlockChroma,
198 unsigned int *IndependentBlockLuma,
199 unsigned int *IndependentBlockChroma);
200static double CalculatePrefetchSourceLines(
201 struct display_mode_lib *mode_lib,
202 double VRatio,
203 double vtaps,
204 bool Interlace,
205 bool ProgressiveToInterlaceUnitInOPP,
206 unsigned int SwathHeight,
207 unsigned int ViewportYStart,
208 double *VInitPreFill,
209 unsigned int *MaxNumSwath);
210static unsigned int CalculateVMAndRowBytes(
211 struct display_mode_lib *mode_lib,
212 bool DCCEnable,
213 unsigned int BlockHeight256Bytes,
214 unsigned int BlockWidth256Bytes,
215 enum source_format_class SourcePixelFormat,
216 unsigned int SurfaceTiling,
217 unsigned int BytePerPixel,
218 enum scan_direction_class ScanDirection,
219 unsigned int SwathWidth,
220 unsigned int ViewportHeight,
221 bool GPUVMEnable,
222 bool HostVMEnable,
223 unsigned int HostVMMaxNonCachedPageTableLevels,
224 unsigned int GPUVMMinPageSize,
225 unsigned int HostVMMinPageSize,
226 unsigned int PTEBufferSizeInRequests,
227 unsigned int Pitch,
228 unsigned int DCCMetaPitch,
229 unsigned int *MacroTileWidth,
230 unsigned int *MetaRowByte,
231 unsigned int *PixelPTEBytesPerRow,
232 bool *PTEBufferSizeNotExceeded,
233 int *dpte_row_width_ub,
234 unsigned int *dpte_row_height,
235 unsigned int *MetaRequestWidth,
236 unsigned int *MetaRequestHeight,
237 unsigned int *meta_row_width,
238 unsigned int *meta_row_height,
239 int *vm_group_bytes,
240 unsigned int *dpte_group_bytes,
241 unsigned int *PixelPTEReqWidth,
242 unsigned int *PixelPTEReqHeight,
243 unsigned int *PTERequestSize,
244 int *DPDE0BytesFrame,
245 int *MetaPTEBytesFrame);
246static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
247static void CalculateRowBandwidth(
248 bool GPUVMEnable,
249 enum source_format_class SourcePixelFormat,
250 double VRatio,
251 double VRatioChroma,
252 bool DCCEnable,
253 double LineTime,
254 unsigned int MetaRowByteLuma,
255 unsigned int MetaRowByteChroma,
256 unsigned int meta_row_height_luma,
257 unsigned int meta_row_height_chroma,
258 unsigned int PixelPTEBytesPerRowLuma,
259 unsigned int PixelPTEBytesPerRowChroma,
260 unsigned int dpte_row_height_luma,
261 unsigned int dpte_row_height_chroma,
262 double *meta_row_bw,
263 double *dpte_row_bw);
264
265static void CalculateFlipSchedule(
266 struct display_mode_lib *mode_lib,
267 unsigned int k,
268 double HostVMInefficiencyFactor,
269 double UrgentExtraLatency,
270 double UrgentLatency,
271 double PDEAndMetaPTEBytesPerFrame,
272 double MetaRowBytes,
273 double DPTEBytesPerRow);
274static double CalculateWriteBackDelay(
275 enum source_format_class WritebackPixelFormat,
276 double WritebackHRatio,
277 double WritebackVRatio,
278 unsigned int WritebackVTaps,
279 int WritebackDestinationWidth,
280 int WritebackDestinationHeight,
281 int WritebackSourceHeight,
282 unsigned int HTotal);
283
284static void CalculateVupdateAndDynamicMetadataParameters(
285 int MaxInterDCNTileRepeaters,
286 double DPPCLK,
287 double DISPCLK,
288 double DCFClkDeepSleep,
289 double PixelClock,
290 int HTotal,
291 int VBlank,
292 int DynamicMetadataTransmittedBytes,
293 int DynamicMetadataLinesBeforeActiveRequired,
294 int InterlaceEnable,
295 bool ProgressiveToInterlaceUnitInOPP,
296 double *TSetup,
297 double *Tdmbf,
298 double *Tdmec,
299 double *Tdmsks,
300 int *VUpdateOffsetPix,
301 double *VUpdateWidthPix,
302 double *VReadyOffsetPix);
303
304static void CalculateWatermarksAndDRAMSpeedChangeSupport(
305 struct display_mode_lib *mode_lib,
306 unsigned int PrefetchMode,
307 double DCFCLK,
308 double ReturnBW,
309 double UrgentLatency,
310 double ExtraLatency,
311 double SOCCLK,
312 double DCFCLKDeepSleep,
313 unsigned int DETBufferSizeY[],
314 unsigned int DETBufferSizeC[],
315 unsigned int SwathHeightY[],
316 unsigned int SwathHeightC[],
317 double SwathWidthY[],
318 double SwathWidthC[],
319 unsigned int DPPPerPlane[],
320 double BytePerPixelDETY[],
321 double BytePerPixelDETC[],
322 bool UnboundedRequestEnabled,
323 unsigned int CompressedBufferSizeInkByte,
324 enum clock_change_support *DRAMClockChangeSupport,
325 double *StutterExitWatermark,
326 double *StutterEnterPlusExitWatermark,
327 double *Z8StutterExitWatermark,
328 double *Z8StutterEnterPlusExitWatermark);
329
330static void CalculateDCFCLKDeepSleep(
331 struct display_mode_lib *mode_lib,
332 unsigned int NumberOfActivePlanes,
333 int BytePerPixelY[],
334 int BytePerPixelC[],
335 double VRatio[],
336 double VRatioChroma[],
337 double SwathWidthY[],
338 double SwathWidthC[],
339 unsigned int DPPPerPlane[],
340 double HRatio[],
341 double HRatioChroma[],
342 double PixelClock[],
343 double PSCL_THROUGHPUT[],
344 double PSCL_THROUGHPUT_CHROMA[],
345 double DPPCLK[],
346 double ReadBandwidthLuma[],
347 double ReadBandwidthChroma[],
348 int ReturnBusWidth,
349 double *DCFCLKDeepSleep);
350
351static void CalculateUrgentBurstFactor(
352 int swath_width_luma_ub,
353 int swath_width_chroma_ub,
354 unsigned int SwathHeightY,
355 unsigned int SwathHeightC,
356 double LineTime,
357 double UrgentLatency,
358 double CursorBufferSize,
359 unsigned int CursorWidth,
360 unsigned int CursorBPP,
361 double VRatio,
362 double VRatioC,
363 double BytePerPixelInDETY,
364 double BytePerPixelInDETC,
365 double DETBufferSizeY,
366 double DETBufferSizeC,
367 double *UrgentBurstFactorCursor,
368 double *UrgentBurstFactorLuma,
369 double *UrgentBurstFactorChroma,
370 bool *NotEnoughUrgentLatencyHiding);
371
372static void UseMinimumDCFCLK(
373 struct display_mode_lib *mode_lib,
374 int MaxPrefetchMode,
375 int ReorderingBytes);
376
377static void CalculatePixelDeliveryTimes(
378 unsigned int NumberOfActivePlanes,
379 double VRatio[],
380 double VRatioChroma[],
381 double VRatioPrefetchY[],
382 double VRatioPrefetchC[],
383 unsigned int swath_width_luma_ub[],
384 unsigned int swath_width_chroma_ub[],
385 unsigned int DPPPerPlane[],
386 double HRatio[],
387 double HRatioChroma[],
388 double PixelClock[],
389 double PSCL_THROUGHPUT[],
390 double PSCL_THROUGHPUT_CHROMA[],
391 double DPPCLK[],
392 int BytePerPixelC[],
393 enum scan_direction_class SourceScan[],
394 unsigned int NumberOfCursors[],
395 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
396 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
397 unsigned int BlockWidth256BytesY[],
398 unsigned int BlockHeight256BytesY[],
399 unsigned int BlockWidth256BytesC[],
400 unsigned int BlockHeight256BytesC[],
401 double DisplayPipeLineDeliveryTimeLuma[],
402 double DisplayPipeLineDeliveryTimeChroma[],
403 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
404 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
405 double DisplayPipeRequestDeliveryTimeLuma[],
406 double DisplayPipeRequestDeliveryTimeChroma[],
407 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
408 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
409 double CursorRequestDeliveryTime[],
410 double CursorRequestDeliveryTimePrefetch[]);
411
412static void CalculateMetaAndPTETimes(
413 int NumberOfActivePlanes,
414 bool GPUVMEnable,
415 int MetaChunkSize,
416 int MinMetaChunkSizeBytes,
417 int HTotal[],
418 double VRatio[],
419 double VRatioChroma[],
420 double DestinationLinesToRequestRowInVBlank[],
421 double DestinationLinesToRequestRowInImmediateFlip[],
422 bool DCCEnable[],
423 double PixelClock[],
424 int BytePerPixelY[],
425 int BytePerPixelC[],
426 enum scan_direction_class SourceScan[],
427 int dpte_row_height[],
428 int dpte_row_height_chroma[],
429 int meta_row_width[],
430 int meta_row_width_chroma[],
431 int meta_row_height[],
432 int meta_row_height_chroma[],
433 int meta_req_width[],
434 int meta_req_width_chroma[],
435 int meta_req_height[],
436 int meta_req_height_chroma[],
437 int dpte_group_bytes[],
438 int PTERequestSizeY[],
439 int PTERequestSizeC[],
440 int PixelPTEReqWidthY[],
441 int PixelPTEReqHeightY[],
442 int PixelPTEReqWidthC[],
443 int PixelPTEReqHeightC[],
444 int dpte_row_width_luma_ub[],
445 int dpte_row_width_chroma_ub[],
446 double DST_Y_PER_PTE_ROW_NOM_L[],
447 double DST_Y_PER_PTE_ROW_NOM_C[],
448 double DST_Y_PER_META_ROW_NOM_L[],
449 double DST_Y_PER_META_ROW_NOM_C[],
450 double TimePerMetaChunkNominal[],
451 double TimePerChromaMetaChunkNominal[],
452 double TimePerMetaChunkVBlank[],
453 double TimePerChromaMetaChunkVBlank[],
454 double TimePerMetaChunkFlip[],
455 double TimePerChromaMetaChunkFlip[],
456 double time_per_pte_group_nom_luma[],
457 double time_per_pte_group_vblank_luma[],
458 double time_per_pte_group_flip_luma[],
459 double time_per_pte_group_nom_chroma[],
460 double time_per_pte_group_vblank_chroma[],
461 double time_per_pte_group_flip_chroma[]);
462
463static void CalculateVMGroupAndRequestTimes(
464 unsigned int NumberOfActivePlanes,
465 bool GPUVMEnable,
466 unsigned int GPUVMMaxPageTableLevels,
467 unsigned int HTotal[],
468 int BytePerPixelC[],
469 double DestinationLinesToRequestVMInVBlank[],
470 double DestinationLinesToRequestVMInImmediateFlip[],
471 bool DCCEnable[],
472 double PixelClock[],
473 int dpte_row_width_luma_ub[],
474 int dpte_row_width_chroma_ub[],
475 int vm_group_bytes[],
476 unsigned int dpde0_bytes_per_frame_ub_l[],
477 unsigned int dpde0_bytes_per_frame_ub_c[],
478 int meta_pte_bytes_per_frame_ub_l[],
479 int meta_pte_bytes_per_frame_ub_c[],
480 double TimePerVMGroupVBlank[],
481 double TimePerVMGroupFlip[],
482 double TimePerVMRequestVBlank[],
483 double TimePerVMRequestFlip[]);
484
485static void CalculateStutterEfficiency(
486 struct display_mode_lib *mode_lib,
487 int CompressedBufferSizeInkByte,
488 bool UnboundedRequestEnabled,
489 int ConfigReturnBufferSizeInKByte,
490 int MetaFIFOSizeInKEntries,
491 int ZeroSizeBufferEntries,
492 int NumberOfActivePlanes,
493 int ROBBufferSizeInKByte,
494 double TotalDataReadBandwidth,
495 double DCFCLK,
496 double ReturnBW,
497 double COMPBUF_RESERVED_SPACE_64B,
498 double COMPBUF_RESERVED_SPACE_ZS,
499 double SRExitTime,
500 double SRExitZ8Time,
501 bool SynchronizedVBlank,
502 double Z8StutterEnterPlusExitWatermark,
503 double StutterEnterPlusExitWatermark,
504 bool ProgressiveToInterlaceUnitInOPP,
505 bool Interlace[],
506 double MinTTUVBlank[],
507 int DPPPerPlane[],
508 unsigned int DETBufferSizeY[],
509 int BytePerPixelY[],
510 double BytePerPixelDETY[],
511 double SwathWidthY[],
512 int SwathHeightY[],
513 int SwathHeightC[],
514 double NetDCCRateLuma[],
515 double NetDCCRateChroma[],
516 double DCCFractionOfZeroSizeRequestsLuma[],
517 double DCCFractionOfZeroSizeRequestsChroma[],
518 int HTotal[],
519 int VTotal[],
520 double PixelClock[],
521 double VRatio[],
522 enum scan_direction_class SourceScan[],
523 int BlockHeight256BytesY[],
524 int BlockWidth256BytesY[],
525 int BlockHeight256BytesC[],
526 int BlockWidth256BytesC[],
527 int DCCYMaxUncompressedBlock[],
528 int DCCCMaxUncompressedBlock[],
529 int VActive[],
530 bool DCCEnable[],
531 bool WritebackEnable[],
532 double ReadBandwidthPlaneLuma[],
533 double ReadBandwidthPlaneChroma[],
534 double meta_row_bw[],
535 double dpte_row_bw[],
536 double *StutterEfficiencyNotIncludingVBlank,
537 double *StutterEfficiency,
538 int *NumberOfStutterBurstsPerFrame,
539 double *Z8StutterEfficiencyNotIncludingVBlank,
540 double *Z8StutterEfficiency,
541 int *Z8NumberOfStutterBurstsPerFrame,
542 double *StutterPeriod);
543
544static void CalculateSwathAndDETConfiguration(
545 bool ForceSingleDPP,
546 int NumberOfActivePlanes,
547 unsigned int DETBufferSizeInKByte,
548 double MaximumSwathWidthLuma[],
549 double MaximumSwathWidthChroma[],
550 enum scan_direction_class SourceScan[],
551 enum source_format_class SourcePixelFormat[],
552 enum dm_swizzle_mode SurfaceTiling[],
553 int ViewportWidth[],
554 int ViewportHeight[],
555 int SurfaceWidthY[],
556 int SurfaceWidthC[],
557 int SurfaceHeightY[],
558 int SurfaceHeightC[],
559 int Read256BytesBlockHeightY[],
560 int Read256BytesBlockHeightC[],
561 int Read256BytesBlockWidthY[],
562 int Read256BytesBlockWidthC[],
563 enum odm_combine_mode ODMCombineEnabled[],
564 int BlendingAndTiming[],
565 int BytePerPixY[],
566 int BytePerPixC[],
567 double BytePerPixDETY[],
568 double BytePerPixDETC[],
569 int HActive[],
570 double HRatio[],
571 double HRatioChroma[],
572 int DPPPerPlane[],
573 int swath_width_luma_ub[],
574 int swath_width_chroma_ub[],
575 double SwathWidth[],
576 double SwathWidthChroma[],
577 int SwathHeightY[],
578 int SwathHeightC[],
579 unsigned int DETBufferSizeY[],
580 unsigned int DETBufferSizeC[],
581 bool ViewportSizeSupportPerPlane[],
582 bool *ViewportSizeSupport);
583static void CalculateSwathWidth(
584 bool ForceSingleDPP,
585 int NumberOfActivePlanes,
586 enum source_format_class SourcePixelFormat[],
587 enum scan_direction_class SourceScan[],
588 int ViewportWidth[],
589 int ViewportHeight[],
590 int SurfaceWidthY[],
591 int SurfaceWidthC[],
592 int SurfaceHeightY[],
593 int SurfaceHeightC[],
594 enum odm_combine_mode ODMCombineEnabled[],
595 int BytePerPixY[],
596 int BytePerPixC[],
597 int Read256BytesBlockHeightY[],
598 int Read256BytesBlockHeightC[],
599 int Read256BytesBlockWidthY[],
600 int Read256BytesBlockWidthC[],
601 int BlendingAndTiming[],
602 int HActive[],
603 double HRatio[],
604 int DPPPerPlane[],
605 double SwathWidthSingleDPPY[],
606 double SwathWidthSingleDPPC[],
607 double SwathWidthY[],
608 double SwathWidthC[],
609 int MaximumSwathHeightY[],
610 int MaximumSwathHeightC[],
611 int swath_width_luma_ub[],
612 int swath_width_chroma_ub[]);
613
614static double CalculateExtraLatency(
615 int RoundTripPingLatencyCycles,
616 int ReorderingBytes,
617 double DCFCLK,
618 int TotalNumberOfActiveDPP,
619 int PixelChunkSizeInKByte,
620 int TotalNumberOfDCCActiveDPP,
621 int MetaChunkSize,
622 double ReturnBW,
623 bool GPUVMEnable,
624 bool HostVMEnable,
625 int NumberOfActivePlanes,
626 int NumberOfDPP[],
627 int dpte_group_bytes[],
628 double HostVMInefficiencyFactor,
629 double HostVMMinPageSize,
630 int HostVMMaxNonCachedPageTableLevels);
631
632static double CalculateExtraLatencyBytes(
633 int ReorderingBytes,
634 int TotalNumberOfActiveDPP,
635 int PixelChunkSizeInKByte,
636 int TotalNumberOfDCCActiveDPP,
637 int MetaChunkSize,
638 bool GPUVMEnable,
639 bool HostVMEnable,
640 int NumberOfActivePlanes,
641 int NumberOfDPP[],
642 int dpte_group_bytes[],
643 double HostVMInefficiencyFactor,
644 double HostVMMinPageSize,
645 int HostVMMaxNonCachedPageTableLevels);
646
647static double CalculateUrgentLatency(
648 double UrgentLatencyPixelDataOnly,
649 double UrgentLatencyPixelMixedWithVMData,
650 double UrgentLatencyVMDataOnly,
651 bool DoUrgentLatencyAdjustment,
652 double UrgentLatencyAdjustmentFabricClockComponent,
653 double UrgentLatencyAdjustmentFabricClockReference,
654 double FabricClockSingle);
655
656static void CalculateUnboundedRequestAndCompressedBufferSize(
657 unsigned int DETBufferSizeInKByte,
658 int ConfigReturnBufferSizeInKByte,
659 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
660 int TotalActiveDPP,
661 bool NoChromaPlanes,
662 int MaxNumDPP,
663 int CompressedBufferSegmentSizeInkByteFinal,
664 enum output_encoder_class *Output,
665 bool *UnboundedRequestEnabled,
666 int *CompressedBufferSizeInkByte);
667
668static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
669static unsigned int CalculateMaxVStartup(
670 unsigned int VTotal,
671 unsigned int VActive,
672 unsigned int VBlankNom,
673 unsigned int HTotal,
674 double PixelClock,
675 bool ProgressiveTointerlaceUnitinOPP,
676 bool Interlace,
677 unsigned int VBlankNomDefaultUS,
678 double WritebackDelayTime);
679
680void dml314_recalculate(struct display_mode_lib *mode_lib)
681{
682 ModeSupportAndSystemConfiguration(mode_lib);
683 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
684 DisplayPipeConfiguration(mode_lib);
685#ifdef __DML_VBA_DEBUG__
686 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
687#endif
688 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
689}
690
691static unsigned int dscceComputeDelay(
692 unsigned int bpc,
693 double BPP,
694 unsigned int sliceWidth,
695 unsigned int numSlices,
696 enum output_format_class pixelFormat,
697 enum output_encoder_class Output)
698{
699 // valid bpc = source bits per component in the set of {8, 10, 12}
700 // valid bpp = increments of 1/16 of a bit
701 // min = 6/7/8 in N420/N422/444, respectively
702 // max = such that compression is 1:1
703 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
704 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
705 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
706
707 // fixed value
708 unsigned int rcModelSize = 8192;
709
710 // N422/N420 operate at 2 pixels per clock
711 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
712
713 if (pixelFormat == dm_420)
714 pixelsPerClock = 2;
715 else if (pixelFormat == dm_444)
716 pixelsPerClock = 1;
717 else if (pixelFormat == dm_n422)
718 pixelsPerClock = 2;
719 // #all other modes operate at 1 pixel per clock
720 else
721 pixelsPerClock = 1;
722
723 //initial transmit delay as per PPS
724 initalXmitDelay = dml_round(a: rcModelSize / 2.0 / BPP / pixelsPerClock);
725
726 //compute ssm delay
727 if (bpc == 8)
728 D = 81;
729 else if (bpc == 10)
730 D = 89;
731 else
732 D = 113;
733
734 //divide by pixel per cycle to compute slice width as seen by DSC
735 w = sliceWidth / pixelsPerClock;
736
737 //422 mode has an additional cycle of delay
738 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
739 s = 0;
740 else
741 s = 1;
742
743 //main calculation for the dscce
744 ix = initalXmitDelay + 45;
745 wx = (w + 2) / 3;
746 P = 3 * wx - w;
747 l0 = ix / w;
748 a = ix + P * l0;
749 ax = (a + 2) / 3 + D + 6 + 1;
750 L = (ax + wx - 1) / wx;
751 if ((ix % w) == 0 && P != 0)
752 lstall = 1;
753 else
754 lstall = 0;
755 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
756
757 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
758 pixels = Delay * 3 * pixelsPerClock;
759 return pixels;
760}
761
762static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
763{
764 unsigned int Delay = 0;
765
766 if (pixelFormat == dm_420) {
767 // sfr
768 Delay = Delay + 2;
769 // dsccif
770 Delay = Delay + 0;
771 // dscc - input deserializer
772 Delay = Delay + 3;
773 // dscc gets pixels every other cycle
774 Delay = Delay + 2;
775 // dscc - input cdc fifo
776 Delay = Delay + 12;
777 // dscc gets pixels every other cycle
778 Delay = Delay + 13;
779 // dscc - cdc uncertainty
780 Delay = Delay + 2;
781 // dscc - output cdc fifo
782 Delay = Delay + 7;
783 // dscc gets pixels every other cycle
784 Delay = Delay + 3;
785 // dscc - cdc uncertainty
786 Delay = Delay + 2;
787 // dscc - output serializer
788 Delay = Delay + 1;
789 // sft
790 Delay = Delay + 1;
791 } else if (pixelFormat == dm_n422) {
792 // sfr
793 Delay = Delay + 2;
794 // dsccif
795 Delay = Delay + 1;
796 // dscc - input deserializer
797 Delay = Delay + 5;
798 // dscc - input cdc fifo
799 Delay = Delay + 25;
800 // dscc - cdc uncertainty
801 Delay = Delay + 2;
802 // dscc - output cdc fifo
803 Delay = Delay + 10;
804 // dscc - cdc uncertainty
805 Delay = Delay + 2;
806 // dscc - output serializer
807 Delay = Delay + 1;
808 // sft
809 Delay = Delay + 1;
810 } else {
811 // sfr
812 Delay = Delay + 2;
813 // dsccif
814 Delay = Delay + 0;
815 // dscc - input deserializer
816 Delay = Delay + 3;
817 // dscc - input cdc fifo
818 Delay = Delay + 12;
819 // dscc - cdc uncertainty
820 Delay = Delay + 2;
821 // dscc - output cdc fifo
822 Delay = Delay + 7;
823 // dscc - output serializer
824 Delay = Delay + 1;
825 // dscc - cdc uncertainty
826 Delay = Delay + 2;
827 // sft
828 Delay = Delay + 1;
829 }
830
831 return Delay;
832}
833
834static bool CalculatePrefetchSchedule(
835 struct display_mode_lib *mode_lib,
836 double HostVMInefficiencyFactor,
837 Pipe *myPipe,
838 unsigned int DSCDelay,
839 double DPPCLKDelaySubtotalPlusCNVCFormater,
840 double DPPCLKDelaySCL,
841 double DPPCLKDelaySCLLBOnly,
842 double DPPCLKDelayCNVCCursor,
843 double DISPCLKDelaySubtotal,
844 unsigned int DPP_RECOUT_WIDTH,
845 enum output_format_class OutputFormat,
846 unsigned int MaxInterDCNTileRepeaters,
847 unsigned int VStartup,
848 unsigned int MaxVStartup,
849 unsigned int GPUVMPageTableLevels,
850 bool GPUVMEnable,
851 bool HostVMEnable,
852 unsigned int HostVMMaxNonCachedPageTableLevels,
853 double HostVMMinPageSize,
854 bool DynamicMetadataEnable,
855 bool DynamicMetadataVMEnabled,
856 int DynamicMetadataLinesBeforeActiveRequired,
857 unsigned int DynamicMetadataTransmittedBytes,
858 double UrgentLatency,
859 double UrgentExtraLatency,
860 double TCalc,
861 unsigned int PDEAndMetaPTEBytesFrame,
862 unsigned int MetaRowByte,
863 unsigned int PixelPTEBytesPerRow,
864 double PrefetchSourceLinesY,
865 unsigned int SwathWidthY,
866 double VInitPreFillY,
867 unsigned int MaxNumSwathY,
868 double PrefetchSourceLinesC,
869 unsigned int SwathWidthC,
870 double VInitPreFillC,
871 unsigned int MaxNumSwathC,
872 int swath_width_luma_ub,
873 int swath_width_chroma_ub,
874 unsigned int SwathHeightY,
875 unsigned int SwathHeightC,
876 double TWait,
877 double *DSTXAfterScaler,
878 double *DSTYAfterScaler,
879 double *DestinationLinesForPrefetch,
880 double *PrefetchBandwidth,
881 double *DestinationLinesToRequestVMInVBlank,
882 double *DestinationLinesToRequestRowInVBlank,
883 double *VRatioPrefetchY,
884 double *VRatioPrefetchC,
885 double *RequiredPrefetchPixDataBWLuma,
886 double *RequiredPrefetchPixDataBWChroma,
887 bool *NotEnoughTimeForDynamicMetadata,
888 double *Tno_bw,
889 double *prefetch_vmrow_bw,
890 double *Tdmdl_vm,
891 double *Tdmdl,
892 double *TSetup,
893 int *VUpdateOffsetPix,
894 double *VUpdateWidthPix,
895 double *VReadyOffsetPix)
896{
897 bool MyError = false;
898 unsigned int DPPCycles, DISPCLKCycles;
899 double DSTTotalPixelsAfterScaler;
900 double LineTime;
901 double dst_y_prefetch_equ;
902#ifdef __DML_VBA_DEBUG__
903 double Tsw_oto;
904#endif
905 double prefetch_bw_oto;
906 double prefetch_bw_pr;
907 double Tvm_oto;
908 double Tr0_oto;
909 double Tvm_oto_lines;
910 double Tr0_oto_lines;
911 double dst_y_prefetch_oto;
912 double TimeForFetchingMetaPTE = 0;
913 double TimeForFetchingRowInVBlank = 0;
914 double LinesToRequestPrefetchPixelData = 0;
915 unsigned int HostVMDynamicLevelsTrips;
916 double trip_to_mem;
917 double Tvm_trips;
918 double Tr0_trips;
919 double Tvm_trips_rounded;
920 double Tr0_trips_rounded;
921 double Lsw_oto;
922 double Tpre_rounded;
923 double prefetch_bw_equ;
924 double Tvm_equ;
925 double Tr0_equ;
926 double Tdmbf;
927 double Tdmec;
928 double Tdmsks;
929 double prefetch_sw_bytes;
930 double bytes_pp;
931 double dep_bytes;
932 int max_vratio_pre = 4;
933 double min_Lsw;
934 double Tsw_est1 = 0;
935 double Tsw_est3 = 0;
936 double max_Tsw = 0;
937
938 if (GPUVMEnable == true && HostVMEnable == true) {
939 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
940 } else {
941 HostVMDynamicLevelsTrips = 0;
942 }
943#ifdef __DML_VBA_DEBUG__
944 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
945#endif
946 CalculateVupdateAndDynamicMetadataParameters(
947 MaxInterDCNTileRepeaters,
948 DPPCLK: myPipe->DPPCLK,
949 DISPCLK: myPipe->DISPCLK,
950 DCFClkDeepSleep: myPipe->DCFCLKDeepSleep,
951 PixelClock: myPipe->PixelClock,
952 HTotal: myPipe->HTotal,
953 VBlank: myPipe->VBlank,
954 DynamicMetadataTransmittedBytes,
955 DynamicMetadataLinesBeforeActiveRequired,
956 InterlaceEnable: myPipe->InterlaceEnable,
957 ProgressiveToInterlaceUnitInOPP: myPipe->ProgressiveToInterlaceUnitInOPP,
958 TSetup,
959 Tdmbf: &Tdmbf,
960 Tdmec: &Tdmec,
961 Tdmsks: &Tdmsks,
962 VUpdateOffsetPix,
963 VUpdateWidthPix,
964 VReadyOffsetPix);
965
966 LineTime = myPipe->HTotal / myPipe->PixelClock;
967 trip_to_mem = UrgentLatency;
968 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
969
970#ifdef __DML_VBA_ALLOW_DELTA__
971 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
972#else
973 if (DynamicMetadataVMEnabled == true) {
974#endif
975 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
976 } else {
977 *Tdmdl = TWait + UrgentExtraLatency;
978 }
979
980#ifdef __DML_VBA_ALLOW_DELTA__
981 if (DynamicMetadataEnable == false) {
982 *Tdmdl = 0.0;
983 }
984#endif
985
986 if (DynamicMetadataEnable == true) {
987 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
988 *NotEnoughTimeForDynamicMetadata = true;
989 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
990 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
991 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
992 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
993 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
994 } else {
995 *NotEnoughTimeForDynamicMetadata = false;
996 }
997 } else {
998 *NotEnoughTimeForDynamicMetadata = false;
999 }
1000
1001 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1002
1003 if (myPipe->ScalerEnabled)
1004 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1005 else
1006 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1007
1008 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1009
1010 DISPCLKCycles = DISPCLKDelaySubtotal;
1011
1012 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1013 return true;
1014
1015 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1016
1017#ifdef __DML_VBA_DEBUG__
1018 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1019 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1020 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1021 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1022 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1023 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1024 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1025 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1026#endif
1027
1028 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1029
1030 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1031 *DSTYAfterScaler = 1;
1032 else
1033 *DSTYAfterScaler = 0;
1034
1035 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1036 *DSTYAfterScaler = dml_floor(a: DSTTotalPixelsAfterScaler / myPipe->HTotal, granularity: 1);
1037 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1038
1039#ifdef __DML_VBA_DEBUG__
1040 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1041#endif
1042
1043 MyError = false;
1044
1045 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1046 Tvm_trips_rounded = dml_ceil(a: 4.0 * Tvm_trips / LineTime, granularity: 1) / 4 * LineTime;
1047 Tr0_trips_rounded = dml_ceil(a: 4.0 * Tr0_trips / LineTime, granularity: 1) / 4 * LineTime;
1048
1049#ifdef __DML_VBA_ALLOW_DELTA__
1050 if (!myPipe->DCCEnable) {
1051 Tr0_trips = 0.0;
1052 Tr0_trips_rounded = 0.0;
1053 }
1054#endif
1055
1056 if (!GPUVMEnable) {
1057 Tvm_trips = 0.0;
1058 Tvm_trips_rounded = 0.0;
1059 }
1060
1061 if (GPUVMEnable) {
1062 if (GPUVMPageTableLevels >= 3) {
1063 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1064 } else {
1065 *Tno_bw = 0;
1066 }
1067 } else if (!myPipe->DCCEnable) {
1068 *Tno_bw = LineTime;
1069 } else {
1070 *Tno_bw = LineTime / 4;
1071 }
1072
1073 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1074 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1075 else
1076 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1077 /*rev 99*/
1078 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1079 prefetch_bw_pr = dml_min(a: 1, b: myPipe->VRatio) * prefetch_bw_pr;
1080 max_Tsw = dml_max(a: PrefetchSourceLinesY, b: PrefetchSourceLinesC) * LineTime;
1081 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1082 prefetch_bw_oto = dml_max(a: prefetch_bw_pr, b: prefetch_sw_bytes / max_Tsw);
1083
1084 min_Lsw = dml_max(a: 1, b: dml_max(a: PrefetchSourceLinesY, b: PrefetchSourceLinesC) / max_vratio_pre);
1085 Lsw_oto = dml_ceil(a: 4 * dml_max(a: prefetch_sw_bytes / prefetch_bw_oto / LineTime, b: min_Lsw), granularity: 1) / 4;
1086#ifdef __DML_VBA_DEBUG__
1087 Tsw_oto = Lsw_oto * LineTime;
1088#endif
1089
1090
1091#ifdef __DML_VBA_DEBUG__
1092 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1093 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1094 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1095 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1096 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1097 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1098#endif
1099
1100 if (GPUVMEnable == true)
1101 Tvm_oto = dml_max3(a: *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, b: Tvm_trips, c: LineTime / 4.0);
1102 else
1103 Tvm_oto = LineTime / 4.0;
1104
1105 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1106 Tr0_oto = dml_max4(a: (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, b: Tr0_trips, // PREVIOUS_ERROR (missing this term)
1107 c: LineTime - Tvm_oto,
1108 d: LineTime / 4);
1109 } else {
1110 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1111 }
1112
1113#ifdef __DML_VBA_DEBUG__
1114 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1115 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1116 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1117 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1118 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1119 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1120 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1121 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1122 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1123#endif
1124
1125 Tvm_oto_lines = dml_ceil(a: 4.0 * Tvm_oto / LineTime, granularity: 1) / 4.0;
1126 Tr0_oto_lines = dml_ceil(a: 4.0 * Tr0_oto / LineTime, granularity: 1) / 4.0;
1127 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1128 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(a: TWait + TCalc, b: *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1129 dst_y_prefetch_equ = dml_floor(a: 4.0 * (dst_y_prefetch_equ + 0.125), granularity: 1) / 4.0;
1130 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1131
1132 dep_bytes = dml_max(a: PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, b: MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1133
1134 if (prefetch_sw_bytes < dep_bytes)
1135 prefetch_sw_bytes = 2 * dep_bytes;
1136
1137 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1138 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1139 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1140 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1141 dml_print("DML: LineTime: %f\n", LineTime);
1142 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1143
1144 dml_print("DML: LineTime: %f\n", LineTime);
1145 dml_print("DML: VStartup: %d\n", VStartup);
1146 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1147 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1148 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1149 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1150 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1151 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1152 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1153 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
1154 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
1155 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
1156 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
1157
1158 *PrefetchBandwidth = 0;
1159 *DestinationLinesToRequestVMInVBlank = 0;
1160 *DestinationLinesToRequestRowInVBlank = 0;
1161 *VRatioPrefetchY = 0;
1162 *VRatioPrefetchC = 0;
1163 *RequiredPrefetchPixDataBWLuma = 0;
1164 if (dst_y_prefetch_equ > 1) {
1165 double PrefetchBandwidth1;
1166 double PrefetchBandwidth2;
1167 double PrefetchBandwidth3;
1168 double PrefetchBandwidth4;
1169
1170 if (Tpre_rounded - *Tno_bw > 0) {
1171 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1172 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1173 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1174 } else {
1175 PrefetchBandwidth1 = 0;
1176 }
1177
1178 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1179 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1180 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1181 }
1182
1183 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1184 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1185 else
1186 PrefetchBandwidth2 = 0;
1187
1188 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1189 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1190 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1191 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1192 } else {
1193 PrefetchBandwidth3 = 0;
1194 }
1195
1196#ifdef __DML_VBA_DEBUG__
1197 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1198 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1199 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1200#endif
1201 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1202 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1203 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1204 }
1205
1206 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1207 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1208 else
1209 PrefetchBandwidth4 = 0;
1210
1211 {
1212 bool Case1OK;
1213 bool Case2OK;
1214 bool Case3OK;
1215
1216 if (PrefetchBandwidth1 > 0) {
1217 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1218 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1219 Case1OK = true;
1220 } else {
1221 Case1OK = false;
1222 }
1223 } else {
1224 Case1OK = false;
1225 }
1226
1227 if (PrefetchBandwidth2 > 0) {
1228 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1229 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1230 Case2OK = true;
1231 } else {
1232 Case2OK = false;
1233 }
1234 } else {
1235 Case2OK = false;
1236 }
1237
1238 if (PrefetchBandwidth3 > 0) {
1239 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1240 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1241 Case3OK = true;
1242 } else {
1243 Case3OK = false;
1244 }
1245 } else {
1246 Case3OK = false;
1247 }
1248
1249 if (Case1OK) {
1250 prefetch_bw_equ = PrefetchBandwidth1;
1251 } else if (Case2OK) {
1252 prefetch_bw_equ = PrefetchBandwidth2;
1253 } else if (Case3OK) {
1254 prefetch_bw_equ = PrefetchBandwidth3;
1255 } else {
1256 prefetch_bw_equ = PrefetchBandwidth4;
1257 }
1258
1259#ifdef __DML_VBA_DEBUG__
1260 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1261 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1262 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1263 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1264#endif
1265
1266 if (prefetch_bw_equ > 0) {
1267 if (GPUVMEnable == true) {
1268 Tvm_equ = dml_max3(a: *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, b: Tvm_trips, c: LineTime / 4);
1269 } else {
1270 Tvm_equ = LineTime / 4;
1271 }
1272
1273 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1274 Tr0_equ = dml_max4(
1275 a: (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1276 b: Tr0_trips,
1277 c: (LineTime - Tvm_equ) / 2,
1278 d: LineTime / 4);
1279 } else {
1280 Tr0_equ = (LineTime - Tvm_equ) / 2;
1281 }
1282 } else {
1283 Tvm_equ = 0;
1284 Tr0_equ = 0;
1285 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1286 }
1287 }
1288
1289 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1290 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1291 TimeForFetchingMetaPTE = Tvm_oto;
1292 TimeForFetchingRowInVBlank = Tr0_oto;
1293 *PrefetchBandwidth = prefetch_bw_oto;
1294 } else {
1295 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1296 TimeForFetchingMetaPTE = Tvm_equ;
1297 TimeForFetchingRowInVBlank = Tr0_equ;
1298 *PrefetchBandwidth = prefetch_bw_equ;
1299 }
1300
1301 *DestinationLinesToRequestVMInVBlank = dml_ceil(a: 4.0 * TimeForFetchingMetaPTE / LineTime, granularity: 1.0) / 4.0;
1302
1303 *DestinationLinesToRequestRowInVBlank = dml_ceil(a: 4.0 * TimeForFetchingRowInVBlank / LineTime, granularity: 1.0) / 4.0;
1304
1305#ifdef __DML_VBA_ALLOW_DELTA__
1306 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1307 // See note above dated 5/30/2018
1308 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1309 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1310#else
1311 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1312#endif
1313
1314#ifdef __DML_VBA_DEBUG__
1315 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1316 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1317 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1318 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1319 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1320 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1321 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1322#endif
1323
1324 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1325
1326 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1327 *VRatioPrefetchY = dml_max(a: *VRatioPrefetchY, b: 1.0);
1328#ifdef __DML_VBA_DEBUG__
1329 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1330 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1331 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1332#endif
1333 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1334 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1335 *VRatioPrefetchY = dml_max(
1336 a: (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1337 b: (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1338 *VRatioPrefetchY = dml_max(a: *VRatioPrefetchY, b: 1.0);
1339 } else {
1340 MyError = true;
1341 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1342 *VRatioPrefetchY = 0;
1343 }
1344#ifdef __DML_VBA_DEBUG__
1345 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1346 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1347 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1348#endif
1349 }
1350
1351 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1352 *VRatioPrefetchC = dml_max(a: *VRatioPrefetchC, b: 1.0);
1353
1354#ifdef __DML_VBA_DEBUG__
1355 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1356 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1357 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1358#endif
1359 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1360 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1361 *VRatioPrefetchC = dml_max(
1362 a: *VRatioPrefetchC,
1363 b: (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1364 *VRatioPrefetchC = dml_max(a: *VRatioPrefetchC, b: 1.0);
1365 } else {
1366 MyError = true;
1367 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1368 *VRatioPrefetchC = 0;
1369 }
1370#ifdef __DML_VBA_DEBUG__
1371 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1372 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1373 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1374#endif
1375 }
1376
1377#ifdef __DML_VBA_DEBUG__
1378 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1379 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1380 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1381#endif
1382
1383 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1384
1385#ifdef __DML_VBA_DEBUG__
1386 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1387#endif
1388
1389 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1390 / LineTime;
1391 } else {
1392 MyError = true;
1393 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1394 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1395 *VRatioPrefetchY = 0;
1396 *VRatioPrefetchC = 0;
1397 *RequiredPrefetchPixDataBWLuma = 0;
1398 *RequiredPrefetchPixDataBWChroma = 0;
1399 }
1400
1401 dml_print(
1402 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1403 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1404 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1405 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1406 dml_print(
1407 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1408 (double) LinesToRequestPrefetchPixelData * LineTime);
1409 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1410 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1411 dml_print(
1412 "DML: Tslack(pre): %fus - time left over in schedule\n",
1413 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1414 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1415 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1416
1417 } else {
1418 MyError = true;
1419 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1420 }
1421
1422 {
1423 double prefetch_vm_bw;
1424 double prefetch_row_bw;
1425
1426 if (PDEAndMetaPTEBytesFrame == 0) {
1427 prefetch_vm_bw = 0;
1428 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1429#ifdef __DML_VBA_DEBUG__
1430 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1431 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1432 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1433 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1434#endif
1435 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1436#ifdef __DML_VBA_DEBUG__
1437 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1438#endif
1439 } else {
1440 prefetch_vm_bw = 0;
1441 MyError = true;
1442 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1443 }
1444
1445 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1446 prefetch_row_bw = 0;
1447 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1448 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1449
1450#ifdef __DML_VBA_DEBUG__
1451 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1452 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1453 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1454 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1455#endif
1456 } else {
1457 prefetch_row_bw = 0;
1458 MyError = true;
1459 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1460 }
1461
1462 *prefetch_vmrow_bw = dml_max(a: prefetch_vm_bw, b: prefetch_row_bw);
1463 }
1464
1465 if (MyError) {
1466 *PrefetchBandwidth = 0;
1467 TimeForFetchingMetaPTE = 0;
1468 TimeForFetchingRowInVBlank = 0;
1469 *DestinationLinesToRequestVMInVBlank = 0;
1470 *DestinationLinesToRequestRowInVBlank = 0;
1471 *DestinationLinesForPrefetch = 0;
1472 LinesToRequestPrefetchPixelData = 0;
1473 *VRatioPrefetchY = 0;
1474 *VRatioPrefetchC = 0;
1475 *RequiredPrefetchPixDataBWLuma = 0;
1476 *RequiredPrefetchPixDataBWChroma = 0;
1477 }
1478
1479 return MyError;
1480}
1481
1482static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1483{
1484 return VCOSpeed * 4 / dml_floor(a: VCOSpeed * 4 / Clock, granularity: 1);
1485}
1486
1487static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1488{
1489 return VCOSpeed * 4 / dml_ceil(a: VCOSpeed * 4.0 / Clock, granularity: 1);
1490}
1491
1492static void CalculateDCCConfiguration(
1493 bool DCCEnabled,
1494 bool DCCProgrammingAssumesScanDirectionUnknown,
1495 enum source_format_class SourcePixelFormat,
1496 unsigned int SurfaceWidthLuma,
1497 unsigned int SurfaceWidthChroma,
1498 unsigned int SurfaceHeightLuma,
1499 unsigned int SurfaceHeightChroma,
1500 double DETBufferSize,
1501 unsigned int RequestHeight256ByteLuma,
1502 unsigned int RequestHeight256ByteChroma,
1503 enum dm_swizzle_mode TilingFormat,
1504 unsigned int BytePerPixelY,
1505 unsigned int BytePerPixelC,
1506 double BytePerPixelDETY,
1507 double BytePerPixelDETC,
1508 enum scan_direction_class ScanOrientation,
1509 unsigned int *MaxUncompressedBlockLuma,
1510 unsigned int *MaxUncompressedBlockChroma,
1511 unsigned int *MaxCompressedBlockLuma,
1512 unsigned int *MaxCompressedBlockChroma,
1513 unsigned int *IndependentBlockLuma,
1514 unsigned int *IndependentBlockChroma)
1515{
1516 int yuv420;
1517 int horz_div_l;
1518 int horz_div_c;
1519 int vert_div_l;
1520 int vert_div_c;
1521
1522 int swath_buf_size;
1523 double detile_buf_vp_horz_limit;
1524 double detile_buf_vp_vert_limit;
1525
1526 int MAS_vp_horz_limit;
1527 int MAS_vp_vert_limit;
1528 int max_vp_horz_width;
1529 int max_vp_vert_height;
1530 int eff_surf_width_l;
1531 int eff_surf_width_c;
1532 int eff_surf_height_l;
1533 int eff_surf_height_c;
1534
1535 int full_swath_bytes_horz_wc_l;
1536 int full_swath_bytes_horz_wc_c;
1537 int full_swath_bytes_vert_wc_l;
1538 int full_swath_bytes_vert_wc_c;
1539 int req128_horz_wc_l;
1540 int req128_horz_wc_c;
1541 int req128_vert_wc_l;
1542 int req128_vert_wc_c;
1543 int segment_order_horz_contiguous_luma;
1544 int segment_order_horz_contiguous_chroma;
1545 int segment_order_vert_contiguous_luma;
1546 int segment_order_vert_contiguous_chroma;
1547
1548 typedef enum {
1549 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1550 } RequestType;
1551 RequestType RequestLuma;
1552 RequestType RequestChroma;
1553
1554 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1555 horz_div_l = 1;
1556 horz_div_c = 1;
1557 vert_div_l = 1;
1558 vert_div_c = 1;
1559
1560 if (BytePerPixelY == 1)
1561 vert_div_l = 0;
1562 if (BytePerPixelC == 1)
1563 vert_div_c = 0;
1564 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1565 horz_div_l = 0;
1566 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1567 horz_div_c = 0;
1568
1569 if (BytePerPixelC == 0) {
1570 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1571 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1572 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1573 } else {
1574 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1575 detile_buf_vp_horz_limit = (double) swath_buf_size
1576 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1577 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1578 detile_buf_vp_vert_limit = (double) swath_buf_size
1579 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1580 }
1581
1582 if (SourcePixelFormat == dm_420_10) {
1583 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1584 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1585 }
1586
1587 detile_buf_vp_horz_limit = dml_floor(a: detile_buf_vp_horz_limit - 1, granularity: 16);
1588 detile_buf_vp_vert_limit = dml_floor(a: detile_buf_vp_vert_limit - 1, granularity: 16);
1589
1590 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1591 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1592 max_vp_horz_width = dml_min(a: (double) MAS_vp_horz_limit, b: detile_buf_vp_horz_limit);
1593 max_vp_vert_height = dml_min(a: (double) MAS_vp_vert_limit, b: detile_buf_vp_vert_limit);
1594 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1595 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1596 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1597 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1598
1599 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1600 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1601 if (BytePerPixelC > 0) {
1602 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1603 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1604 } else {
1605 full_swath_bytes_horz_wc_c = 0;
1606 full_swath_bytes_vert_wc_c = 0;
1607 }
1608
1609 if (SourcePixelFormat == dm_420_10) {
1610 full_swath_bytes_horz_wc_l = dml_ceil(a: full_swath_bytes_horz_wc_l * 2 / 3, granularity: 256);
1611 full_swath_bytes_horz_wc_c = dml_ceil(a: full_swath_bytes_horz_wc_c * 2 / 3, granularity: 256);
1612 full_swath_bytes_vert_wc_l = dml_ceil(a: full_swath_bytes_vert_wc_l * 2 / 3, granularity: 256);
1613 full_swath_bytes_vert_wc_c = dml_ceil(a: full_swath_bytes_vert_wc_c * 2 / 3, granularity: 256);
1614 }
1615
1616 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1617 req128_horz_wc_l = 0;
1618 req128_horz_wc_c = 0;
1619 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1620 req128_horz_wc_l = 0;
1621 req128_horz_wc_c = 1;
1622 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1623 req128_horz_wc_l = 1;
1624 req128_horz_wc_c = 0;
1625 } else {
1626 req128_horz_wc_l = 1;
1627 req128_horz_wc_c = 1;
1628 }
1629
1630 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1631 req128_vert_wc_l = 0;
1632 req128_vert_wc_c = 0;
1633 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1634 req128_vert_wc_l = 0;
1635 req128_vert_wc_c = 1;
1636 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1637 req128_vert_wc_l = 1;
1638 req128_vert_wc_c = 0;
1639 } else {
1640 req128_vert_wc_l = 1;
1641 req128_vert_wc_c = 1;
1642 }
1643
1644 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1645 segment_order_horz_contiguous_luma = 0;
1646 } else {
1647 segment_order_horz_contiguous_luma = 1;
1648 }
1649 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1650 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1651 segment_order_vert_contiguous_luma = 0;
1652 } else {
1653 segment_order_vert_contiguous_luma = 1;
1654 }
1655 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1656 segment_order_horz_contiguous_chroma = 0;
1657 } else {
1658 segment_order_horz_contiguous_chroma = 1;
1659 }
1660 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1661 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1662 segment_order_vert_contiguous_chroma = 0;
1663 } else {
1664 segment_order_vert_contiguous_chroma = 1;
1665 }
1666
1667 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1668 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1669 RequestLuma = REQ_256Bytes;
1670 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1671 RequestLuma = REQ_128BytesNonContiguous;
1672 } else {
1673 RequestLuma = REQ_128BytesContiguous;
1674 }
1675 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1676 RequestChroma = REQ_256Bytes;
1677 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1678 RequestChroma = REQ_128BytesNonContiguous;
1679 } else {
1680 RequestChroma = REQ_128BytesContiguous;
1681 }
1682 } else if (ScanOrientation != dm_vert) {
1683 if (req128_horz_wc_l == 0) {
1684 RequestLuma = REQ_256Bytes;
1685 } else if (segment_order_horz_contiguous_luma == 0) {
1686 RequestLuma = REQ_128BytesNonContiguous;
1687 } else {
1688 RequestLuma = REQ_128BytesContiguous;
1689 }
1690 if (req128_horz_wc_c == 0) {
1691 RequestChroma = REQ_256Bytes;
1692 } else if (segment_order_horz_contiguous_chroma == 0) {
1693 RequestChroma = REQ_128BytesNonContiguous;
1694 } else {
1695 RequestChroma = REQ_128BytesContiguous;
1696 }
1697 } else {
1698 if (req128_vert_wc_l == 0) {
1699 RequestLuma = REQ_256Bytes;
1700 } else if (segment_order_vert_contiguous_luma == 0) {
1701 RequestLuma = REQ_128BytesNonContiguous;
1702 } else {
1703 RequestLuma = REQ_128BytesContiguous;
1704 }
1705 if (req128_vert_wc_c == 0) {
1706 RequestChroma = REQ_256Bytes;
1707 } else if (segment_order_vert_contiguous_chroma == 0) {
1708 RequestChroma = REQ_128BytesNonContiguous;
1709 } else {
1710 RequestChroma = REQ_128BytesContiguous;
1711 }
1712 }
1713
1714 if (RequestLuma == REQ_256Bytes) {
1715 *MaxUncompressedBlockLuma = 256;
1716 *MaxCompressedBlockLuma = 256;
1717 *IndependentBlockLuma = 0;
1718 } else if (RequestLuma == REQ_128BytesContiguous) {
1719 *MaxUncompressedBlockLuma = 256;
1720 *MaxCompressedBlockLuma = 128;
1721 *IndependentBlockLuma = 128;
1722 } else {
1723 *MaxUncompressedBlockLuma = 256;
1724 *MaxCompressedBlockLuma = 64;
1725 *IndependentBlockLuma = 64;
1726 }
1727
1728 if (RequestChroma == REQ_256Bytes) {
1729 *MaxUncompressedBlockChroma = 256;
1730 *MaxCompressedBlockChroma = 256;
1731 *IndependentBlockChroma = 0;
1732 } else if (RequestChroma == REQ_128BytesContiguous) {
1733 *MaxUncompressedBlockChroma = 256;
1734 *MaxCompressedBlockChroma = 128;
1735 *IndependentBlockChroma = 128;
1736 } else {
1737 *MaxUncompressedBlockChroma = 256;
1738 *MaxCompressedBlockChroma = 64;
1739 *IndependentBlockChroma = 64;
1740 }
1741
1742 if (DCCEnabled != true || BytePerPixelC == 0) {
1743 *MaxUncompressedBlockChroma = 0;
1744 *MaxCompressedBlockChroma = 0;
1745 *IndependentBlockChroma = 0;
1746 }
1747
1748 if (DCCEnabled != true) {
1749 *MaxUncompressedBlockLuma = 0;
1750 *MaxCompressedBlockLuma = 0;
1751 *IndependentBlockLuma = 0;
1752 }
1753}
1754
1755static double CalculatePrefetchSourceLines(
1756 struct display_mode_lib *mode_lib,
1757 double VRatio,
1758 double vtaps,
1759 bool Interlace,
1760 bool ProgressiveToInterlaceUnitInOPP,
1761 unsigned int SwathHeight,
1762 unsigned int ViewportYStart,
1763 double *VInitPreFill,
1764 unsigned int *MaxNumSwath)
1765{
1766 struct vba_vars_st *v = &mode_lib->vba;
1767 unsigned int MaxPartialSwath;
1768
1769 if (ProgressiveToInterlaceUnitInOPP)
1770 *VInitPreFill = dml_floor(a: (VRatio + vtaps + 1) / 2.0, granularity: 1);
1771 else
1772 *VInitPreFill = dml_floor(a: (VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, granularity: 1);
1773
1774 if (!v->IgnoreViewportPositioning) {
1775
1776 *MaxNumSwath = dml_ceil(a: (*VInitPreFill - 1.0) / SwathHeight, granularity: 1) + 1.0;
1777
1778 if (*VInitPreFill > 1.0)
1779 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1780 else
1781 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1782 MaxPartialSwath = dml_max(a: 1U, b: MaxPartialSwath);
1783
1784 } else {
1785
1786 if (ViewportYStart != 0)
1787 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1788
1789 *MaxNumSwath = dml_ceil(a: *VInitPreFill / SwathHeight, granularity: 1);
1790
1791 if (*VInitPreFill > 1.0)
1792 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1793 else
1794 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1795 }
1796
1797#ifdef __DML_VBA_DEBUG__
1798 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1799 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1800 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1801 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1802 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1803 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1804 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1805 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1806 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1807#endif
1808 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1809}
1810
1811static unsigned int CalculateVMAndRowBytes(
1812 struct display_mode_lib *mode_lib,
1813 bool DCCEnable,
1814 unsigned int BlockHeight256Bytes,
1815 unsigned int BlockWidth256Bytes,
1816 enum source_format_class SourcePixelFormat,
1817 unsigned int SurfaceTiling,
1818 unsigned int BytePerPixel,
1819 enum scan_direction_class ScanDirection,
1820 unsigned int SwathWidth,
1821 unsigned int ViewportHeight,
1822 bool GPUVMEnable,
1823 bool HostVMEnable,
1824 unsigned int HostVMMaxNonCachedPageTableLevels,
1825 unsigned int GPUVMMinPageSize,
1826 unsigned int HostVMMinPageSize,
1827 unsigned int PTEBufferSizeInRequests,
1828 unsigned int Pitch,
1829 unsigned int DCCMetaPitch,
1830 unsigned int *MacroTileWidth,
1831 unsigned int *MetaRowByte,
1832 unsigned int *PixelPTEBytesPerRow,
1833 bool *PTEBufferSizeNotExceeded,
1834 int *dpte_row_width_ub,
1835 unsigned int *dpte_row_height,
1836 unsigned int *MetaRequestWidth,
1837 unsigned int *MetaRequestHeight,
1838 unsigned int *meta_row_width,
1839 unsigned int *meta_row_height,
1840 int *vm_group_bytes,
1841 unsigned int *dpte_group_bytes,
1842 unsigned int *PixelPTEReqWidth,
1843 unsigned int *PixelPTEReqHeight,
1844 unsigned int *PTERequestSize,
1845 int *DPDE0BytesFrame,
1846 int *MetaPTEBytesFrame)
1847{
1848 struct vba_vars_st *v = &mode_lib->vba;
1849 unsigned int MPDEBytesFrame;
1850 unsigned int DCCMetaSurfaceBytes;
1851 unsigned int MacroTileSizeBytes;
1852 unsigned int MacroTileHeight;
1853 unsigned int ExtraDPDEBytesFrame;
1854 unsigned int PDEAndMetaPTEBytesFrame;
1855 unsigned int PixelPTEReqHeightPTEs = 0;
1856 unsigned int HostVMDynamicLevels = 0;
1857 double FractionOfPTEReturnDrop;
1858
1859 if (GPUVMEnable == true && HostVMEnable == true) {
1860 if (HostVMMinPageSize < 2048) {
1861 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1862 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1863 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 1);
1864 } else {
1865 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 2);
1866 }
1867 }
1868
1869 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1870 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1871 if (ScanDirection != dm_vert) {
1872 *meta_row_height = *MetaRequestHeight;
1873 *meta_row_width = dml_ceil(a: (double) SwathWidth - 1, granularity: *MetaRequestWidth) + *MetaRequestWidth;
1874 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1875 } else {
1876 *meta_row_height = *MetaRequestWidth;
1877 *meta_row_width = dml_ceil(a: (double) SwathWidth - 1, granularity: *MetaRequestHeight) + *MetaRequestHeight;
1878 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1879 }
1880 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(a: ViewportHeight - 1, granularity: 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1881 if (GPUVMEnable == true) {
1882 *MetaPTEBytesFrame = (dml_ceil(a: (double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), granularity: 1) + 1) * 64;
1883 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1884 } else {
1885 *MetaPTEBytesFrame = 0;
1886 MPDEBytesFrame = 0;
1887 }
1888
1889 if (DCCEnable != true) {
1890 *MetaPTEBytesFrame = 0;
1891 MPDEBytesFrame = 0;
1892 *MetaRowByte = 0;
1893 }
1894
1895 if (SurfaceTiling == dm_sw_linear) {
1896 MacroTileSizeBytes = 256;
1897 MacroTileHeight = BlockHeight256Bytes;
1898 } else {
1899 MacroTileSizeBytes = 65536;
1900 MacroTileHeight = 16 * BlockHeight256Bytes;
1901 }
1902 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1903
1904 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1905 if (ScanDirection != dm_vert) {
1906 *DPDE0BytesFrame = 64
1907 * (dml_ceil(
1908 a: ((Pitch * (dml_ceil(a: ViewportHeight - 1, granularity: MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1909 / (8 * 2097152),
1910 granularity: 1) + 1);
1911 } else {
1912 *DPDE0BytesFrame = 64
1913 * (dml_ceil(
1914 a: ((Pitch * (dml_ceil(a: (double) SwathWidth - 1, granularity: MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1915 / (8 * 2097152),
1916 granularity: 1) + 1);
1917 }
1918 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1919 } else {
1920 *DPDE0BytesFrame = 0;
1921 ExtraDPDEBytesFrame = 0;
1922 }
1923
1924 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1925
1926#ifdef __DML_VBA_DEBUG__
1927 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1928 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1929 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1930 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1931 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1932#endif
1933
1934 if (HostVMEnable == true) {
1935 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1936 }
1937#ifdef __DML_VBA_DEBUG__
1938 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1939#endif
1940
1941 if (SurfaceTiling == dm_sw_linear) {
1942 PixelPTEReqHeightPTEs = 1;
1943 *PixelPTEReqHeight = 1;
1944 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1945 *PTERequestSize = 64;
1946 FractionOfPTEReturnDrop = 0;
1947 } else if (MacroTileSizeBytes == 4096) {
1948 PixelPTEReqHeightPTEs = 1;
1949 *PixelPTEReqHeight = MacroTileHeight;
1950 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1951 *PTERequestSize = 64;
1952 if (ScanDirection != dm_vert)
1953 FractionOfPTEReturnDrop = 0;
1954 else
1955 FractionOfPTEReturnDrop = 7 / 8;
1956 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1957 PixelPTEReqHeightPTEs = 16;
1958 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1959 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1960 *PTERequestSize = 128;
1961 FractionOfPTEReturnDrop = 0;
1962 } else {
1963 PixelPTEReqHeightPTEs = 1;
1964 *PixelPTEReqHeight = MacroTileHeight;
1965 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1966 *PTERequestSize = 64;
1967 FractionOfPTEReturnDrop = 0;
1968 }
1969
1970 if (SurfaceTiling == dm_sw_linear) {
1971 *dpte_row_height = dml_min(a: 128, b: 1 << (unsigned int) dml_floor(a: dml_log2(x: PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), granularity: 1));
1972 *dpte_row_width_ub = (dml_ceil(a: (double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, granularity: 1) + 1) * *PixelPTEReqWidth;
1973 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1974 } else if (ScanDirection != dm_vert) {
1975 *dpte_row_height = *PixelPTEReqHeight;
1976 *dpte_row_width_ub = (dml_ceil(a: (double) (SwathWidth - 1) / *PixelPTEReqWidth, granularity: 1) + 1) * *PixelPTEReqWidth;
1977 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1978 } else {
1979 *dpte_row_height = dml_min(a: *PixelPTEReqWidth, b: *MacroTileWidth);
1980 *dpte_row_width_ub = (dml_ceil(a: (double) (SwathWidth - 1) / *PixelPTEReqHeight, granularity: 1) + 1) * *PixelPTEReqHeight;
1981 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1982 }
1983
1984 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1985 *PTEBufferSizeNotExceeded = true;
1986 } else {
1987 *PTEBufferSizeNotExceeded = false;
1988 }
1989
1990 if (GPUVMEnable != true) {
1991 *PixelPTEBytesPerRow = 0;
1992 *PTEBufferSizeNotExceeded = true;
1993 }
1994
1995 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1996
1997 if (HostVMEnable == true) {
1998 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1999 }
2000
2001 if (HostVMEnable == true) {
2002 *vm_group_bytes = 512;
2003 *dpte_group_bytes = 512;
2004 } else if (GPUVMEnable == true) {
2005 *vm_group_bytes = 2048;
2006 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2007 *dpte_group_bytes = 512;
2008 } else {
2009 *dpte_group_bytes = 2048;
2010 }
2011 } else {
2012 *vm_group_bytes = 0;
2013 *dpte_group_bytes = 0;
2014 }
2015 return PDEAndMetaPTEBytesFrame;
2016}
2017
2018static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2019{
2020 struct vba_vars_st *v = &mode_lib->vba;
2021 unsigned int j, k;
2022 double HostVMInefficiencyFactor = 1.0;
2023 bool NoChromaPlanes = true;
2024 int ReorderBytes;
2025 double VMDataOnlyReturnBW;
2026 double MaxTotalRDBandwidth = 0;
2027 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2028
2029 v->WritebackDISPCLK = 0.0;
2030 v->DISPCLKWithRamping = 0;
2031 v->DISPCLKWithoutRamping = 0;
2032 v->GlobalDPPCLK = 0.0;
2033 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
2034 {
2035 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2036 a: v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2037 b: v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2038 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2039
2040 if (v->HostVMEnable != true) {
2041 v->ReturnBW = dml_min(
2042 a: IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2043 b: IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2044 } else {
2045 v->ReturnBW = dml_min(
2046 a: IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2047 b: IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2048 }
2049 }
2050 /* End DAL custom code */
2051
2052 // DISPCLK and DPPCLK Calculation
2053 //
2054 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2055 if (v->WritebackEnable[k]) {
2056 v->WritebackDISPCLK = dml_max(
2057 a: v->WritebackDISPCLK,
2058 b: dml314_CalculateWriteBackDISPCLK(
2059 WritebackPixelFormat: v->WritebackPixelFormat[k],
2060 PixelClock: v->PixelClock[k],
2061 WritebackHRatio: v->WritebackHRatio[k],
2062 WritebackVRatio: v->WritebackVRatio[k],
2063 WritebackHTaps: v->WritebackHTaps[k],
2064 WritebackVTaps: v->WritebackVTaps[k],
2065 WritebackSourceWidth: v->WritebackSourceWidth[k],
2066 WritebackDestinationWidth: v->WritebackDestinationWidth[k],
2067 HTotal: v->HTotal[k],
2068 WritebackLineBufferSize: v->WritebackLineBufferSize));
2069 }
2070 }
2071
2072 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2073 if (v->HRatio[k] > 1) {
2074 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2075 a: v->MaxDCHUBToPSCLThroughput,
2076 b: v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(a: v->htaps[k] / 6.0, granularity: 1));
2077 } else {
2078 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(a: v->MaxDCHUBToPSCLThroughput, b: v->MaxPSCLToLBThroughput);
2079 }
2080
2081 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2082 * dml_max(
2083 a: v->vtaps[k] / 6.0 * dml_min(a: 1.0, b: v->HRatio[k]),
2084 b: dml_max(a: v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], b: 1.0));
2085
2086 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2087 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2088 }
2089
2090 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2091 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2092 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2093 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2094 } else {
2095 if (v->HRatioChroma[k] > 1) {
2096 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2097 a: v->MaxDCHUBToPSCLThroughput,
2098 b: v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(a: v->HTAPsChroma[k] / 6.0, granularity: 1.0));
2099 } else {
2100 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(a: v->MaxDCHUBToPSCLThroughput, b: v->MaxPSCLToLBThroughput);
2101 }
2102 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2103 * dml_max3(
2104 a: v->VTAPsChroma[k] / 6.0 * dml_min(a: 1.0, b: v->HRatioChroma[k]),
2105 b: v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2106 c: 1.0);
2107
2108 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2109 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2110 }
2111
2112 v->DPPCLKUsingSingleDPP[k] = dml_max(a: v->DPPCLKUsingSingleDPPLuma, b: v->DPPCLKUsingSingleDPPChroma);
2113 }
2114 }
2115
2116 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2117 if (v->BlendingAndTiming[k] != k)
2118 continue;
2119 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2120 v->DISPCLKWithRamping = dml_max(
2121 a: v->DISPCLKWithRamping,
2122 b: v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2123 * (1 + v->DISPCLKRampingMargin / 100));
2124 v->DISPCLKWithoutRamping = dml_max(
2125 a: v->DISPCLKWithoutRamping,
2126 b: v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2127 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2128 v->DISPCLKWithRamping = dml_max(
2129 a: v->DISPCLKWithRamping,
2130 b: v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2131 * (1 + v->DISPCLKRampingMargin / 100));
2132 v->DISPCLKWithoutRamping = dml_max(
2133 a: v->DISPCLKWithoutRamping,
2134 b: v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2135 } else {
2136 v->DISPCLKWithRamping = dml_max(
2137 a: v->DISPCLKWithRamping,
2138 b: v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2139 v->DISPCLKWithoutRamping = dml_max(
2140 a: v->DISPCLKWithoutRamping,
2141 b: v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2142 }
2143 }
2144
2145 v->DISPCLKWithRamping = dml_max(a: v->DISPCLKWithRamping, b: v->WritebackDISPCLK);
2146 v->DISPCLKWithoutRamping = dml_max(a: v->DISPCLKWithoutRamping, b: v->WritebackDISPCLK);
2147
2148 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2149 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(Clock: v->DISPCLKWithRamping, VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
2150 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(Clock: v->DISPCLKWithoutRamping, VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
2151 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2152 Clock: v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2153 VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
2154 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2155 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2156 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2157 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2158 } else {
2159 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2160 }
2161 v->DISPCLK = v->DISPCLK_calculated;
2162 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2163
2164 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2165 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2166 v->GlobalDPPCLK = dml_max(a: v->GlobalDPPCLK, b: v->DPPCLK_calculated[k]);
2167 }
2168 v->GlobalDPPCLK = RoundToDFSGranularityUp(Clock: v->GlobalDPPCLK, VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
2169 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2170 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(a: v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, granularity: 1);
2171 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2172 }
2173
2174 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2175 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2176 }
2177
2178 // Urgent and B P-State/DRAM Clock Change Watermark
2179 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2180 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2181
2182 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2183 CalculateBytePerPixelAnd256BBlockSizes(
2184 SourcePixelFormat: v->SourcePixelFormat[k],
2185 SurfaceTiling: v->SurfaceTiling[k],
2186 BytePerPixelY: &v->BytePerPixelY[k],
2187 BytePerPixelC: &v->BytePerPixelC[k],
2188 BytePerPixelDETY: &v->BytePerPixelDETY[k],
2189 BytePerPixelDETC: &v->BytePerPixelDETC[k],
2190 BlockHeight256BytesY: &v->BlockHeight256BytesY[k],
2191 BlockHeight256BytesC: &v->BlockHeight256BytesC[k],
2192 BlockWidth256BytesY: &v->BlockWidth256BytesY[k],
2193 BlockWidth256BytesC: &v->BlockWidth256BytesC[k]);
2194 }
2195
2196 CalculateSwathWidth(
2197 ForceSingleDPP: false,
2198 NumberOfActivePlanes: v->NumberOfActivePlanes,
2199 SourcePixelFormat: v->SourcePixelFormat,
2200 SourceScan: v->SourceScan,
2201 ViewportWidth: v->ViewportWidth,
2202 ViewportHeight: v->ViewportHeight,
2203 SurfaceWidthY: v->SurfaceWidthY,
2204 SurfaceWidthC: v->SurfaceWidthC,
2205 SurfaceHeightY: v->SurfaceHeightY,
2206 SurfaceHeightC: v->SurfaceHeightC,
2207 ODMCombineEnabled: v->ODMCombineEnabled,
2208 BytePerPixY: v->BytePerPixelY,
2209 BytePerPixC: v->BytePerPixelC,
2210 Read256BytesBlockHeightY: v->BlockHeight256BytesY,
2211 Read256BytesBlockHeightC: v->BlockHeight256BytesC,
2212 Read256BytesBlockWidthY: v->BlockWidth256BytesY,
2213 Read256BytesBlockWidthC: v->BlockWidth256BytesC,
2214 BlendingAndTiming: v->BlendingAndTiming,
2215 HActive: v->HActive,
2216 HRatio: v->HRatio,
2217 DPPPerPlane: v->DPPPerPlane,
2218 SwathWidthSingleDPPY: v->SwathWidthSingleDPPY,
2219 SwathWidthSingleDPPC: v->SwathWidthSingleDPPC,
2220 SwathWidthY: v->SwathWidthY,
2221 SwathWidthC: v->SwathWidthC,
2222 MaximumSwathHeightY: v->dummyinteger3,
2223 MaximumSwathHeightC: v->dummyinteger4,
2224 swath_width_luma_ub: v->swath_width_luma_ub,
2225 swath_width_chroma_ub: v->swath_width_chroma_ub);
2226
2227 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2228 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2229 * v->VRatio[k];
2230 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2231 * v->VRatioChroma[k];
2232 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2233 }
2234
2235 // DCFCLK Deep Sleep
2236 CalculateDCFCLKDeepSleep(
2237 mode_lib,
2238 NumberOfActivePlanes: v->NumberOfActivePlanes,
2239 BytePerPixelY: v->BytePerPixelY,
2240 BytePerPixelC: v->BytePerPixelC,
2241 VRatio: v->VRatio,
2242 VRatioChroma: v->VRatioChroma,
2243 SwathWidthY: v->SwathWidthY,
2244 SwathWidthC: v->SwathWidthC,
2245 DPPPerPlane: v->DPPPerPlane,
2246 HRatio: v->HRatio,
2247 HRatioChroma: v->HRatioChroma,
2248 PixelClock: v->PixelClock,
2249 PSCL_THROUGHPUT: v->PSCL_THROUGHPUT_LUMA,
2250 PSCL_THROUGHPUT_CHROMA: v->PSCL_THROUGHPUT_CHROMA,
2251 DPPCLK: v->DPPCLK,
2252 ReadBandwidthLuma: v->ReadBandwidthPlaneLuma,
2253 ReadBandwidthChroma: v->ReadBandwidthPlaneChroma,
2254 ReturnBusWidth: v->ReturnBusWidth,
2255 DCFCLKDeepSleep: &v->DCFCLKDeepSleep);
2256
2257 // DSCCLK
2258 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2259 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2260 v->DSCCLK_calculated[k] = 0.0;
2261 } else {
2262 if (v->OutputFormat[k] == dm_420)
2263 v->DSCFormatFactor = 2;
2264 else if (v->OutputFormat[k] == dm_444)
2265 v->DSCFormatFactor = 1;
2266 else if (v->OutputFormat[k] == dm_n422)
2267 v->DSCFormatFactor = 2;
2268 else
2269 v->DSCFormatFactor = 1;
2270 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2271 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2272 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2273 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2274 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2275 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2276 else
2277 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2278 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2279 }
2280 }
2281
2282 // DSC Delay
2283 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2284 double BPP = v->OutputBpp[k];
2285
2286 if (v->DSCEnabled[k] && BPP != 0) {
2287 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2288 v->DSCDelay[k] = dscceComputeDelay(
2289 bpc: v->DSCInputBitPerComponent[k],
2290 BPP,
2291 sliceWidth: dml_ceil(a: (double) v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1),
2292 numSlices: v->NumberOfDSCSlices[k],
2293 pixelFormat: v->OutputFormat[k],
2294 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]);
2295 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2296 v->DSCDelay[k] = 2
2297 * (dscceComputeDelay(
2298 bpc: v->DSCInputBitPerComponent[k],
2299 BPP,
2300 sliceWidth: dml_ceil(a: (double) v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1),
2301 numSlices: v->NumberOfDSCSlices[k] / 2.0,
2302 pixelFormat: v->OutputFormat[k],
2303 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]));
2304 } else {
2305 v->DSCDelay[k] = 4
2306 * (dscceComputeDelay(
2307 bpc: v->DSCInputBitPerComponent[k],
2308 BPP,
2309 sliceWidth: dml_ceil(a: (double) v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1),
2310 numSlices: v->NumberOfDSCSlices[k] / 4.0,
2311 pixelFormat: v->OutputFormat[k],
2312 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]));
2313 }
2314 v->DSCDelay[k] = v->DSCDelay[k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil(a: (double) v->DSCDelay[k] / v->HActive[k], granularity: 1);
2315 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2316 } else {
2317 v->DSCDelay[k] = 0;
2318 }
2319 }
2320
2321 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2322 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2323 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2324 v->DSCDelay[k] = v->DSCDelay[j];
2325
2326 // Prefetch
2327 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2328 unsigned int PDEAndMetaPTEBytesFrameY;
2329 unsigned int PixelPTEBytesPerRowY;
2330 unsigned int MetaRowByteY;
2331 unsigned int MetaRowByteC;
2332 unsigned int PDEAndMetaPTEBytesFrameC;
2333 unsigned int PixelPTEBytesPerRowC;
2334 bool PTEBufferSizeNotExceededY;
2335 bool PTEBufferSizeNotExceededC;
2336
2337 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2338 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2339 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2340 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2341 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2342 } else {
2343 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2344 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2345 }
2346
2347 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2348 mode_lib,
2349 DCCEnable: v->DCCEnable[k],
2350 BlockHeight256Bytes: v->BlockHeight256BytesC[k],
2351 BlockWidth256Bytes: v->BlockWidth256BytesC[k],
2352 SourcePixelFormat: v->SourcePixelFormat[k],
2353 SurfaceTiling: v->SurfaceTiling[k],
2354 BytePerPixel: v->BytePerPixelC[k],
2355 ScanDirection: v->SourceScan[k],
2356 SwathWidth: v->SwathWidthC[k],
2357 ViewportHeight: v->ViewportHeightChroma[k],
2358 GPUVMEnable: v->GPUVMEnable,
2359 HostVMEnable: v->HostVMEnable,
2360 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
2361 GPUVMMinPageSize: v->GPUVMMinPageSize,
2362 HostVMMinPageSize: v->HostVMMinPageSize,
2363 PTEBufferSizeInRequests: v->PTEBufferSizeInRequestsForChroma,
2364 Pitch: v->PitchC[k],
2365 DCCMetaPitch: v->DCCMetaPitchC[k],
2366 MacroTileWidth: &v->MacroTileWidthC[k],
2367 MetaRowByte: &MetaRowByteC,
2368 PixelPTEBytesPerRow: &PixelPTEBytesPerRowC,
2369 PTEBufferSizeNotExceeded: &PTEBufferSizeNotExceededC,
2370 dpte_row_width_ub: &v->dpte_row_width_chroma_ub[k],
2371 dpte_row_height: &v->dpte_row_height_chroma[k],
2372 MetaRequestWidth: &v->meta_req_width_chroma[k],
2373 MetaRequestHeight: &v->meta_req_height_chroma[k],
2374 meta_row_width: &v->meta_row_width_chroma[k],
2375 meta_row_height: &v->meta_row_height_chroma[k],
2376 vm_group_bytes: &v->dummyinteger1,
2377 dpte_group_bytes: &v->dummyinteger2,
2378 PixelPTEReqWidth: &v->PixelPTEReqWidthC[k],
2379 PixelPTEReqHeight: &v->PixelPTEReqHeightC[k],
2380 PTERequestSize: &v->PTERequestSizeC[k],
2381 DPDE0BytesFrame: &v->dpde0_bytes_per_frame_ub_c[k],
2382 MetaPTEBytesFrame: &v->meta_pte_bytes_per_frame_ub_c[k]);
2383
2384 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2385 mode_lib,
2386 VRatio: v->VRatioChroma[k],
2387 vtaps: v->VTAPsChroma[k],
2388 Interlace: v->Interlace[k],
2389 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
2390 SwathHeight: v->SwathHeightC[k],
2391 ViewportYStart: v->ViewportYStartC[k],
2392 VInitPreFill: &v->VInitPreFillC[k],
2393 MaxNumSwath: &v->MaxNumSwathC[k]);
2394 } else {
2395 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2396 v->PTEBufferSizeInRequestsForChroma = 0;
2397 PixelPTEBytesPerRowC = 0;
2398 PDEAndMetaPTEBytesFrameC = 0;
2399 MetaRowByteC = 0;
2400 v->MaxNumSwathC[k] = 0;
2401 v->PrefetchSourceLinesC[k] = 0;
2402 }
2403
2404 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2405 mode_lib,
2406 DCCEnable: v->DCCEnable[k],
2407 BlockHeight256Bytes: v->BlockHeight256BytesY[k],
2408 BlockWidth256Bytes: v->BlockWidth256BytesY[k],
2409 SourcePixelFormat: v->SourcePixelFormat[k],
2410 SurfaceTiling: v->SurfaceTiling[k],
2411 BytePerPixel: v->BytePerPixelY[k],
2412 ScanDirection: v->SourceScan[k],
2413 SwathWidth: v->SwathWidthY[k],
2414 ViewportHeight: v->ViewportHeight[k],
2415 GPUVMEnable: v->GPUVMEnable,
2416 HostVMEnable: v->HostVMEnable,
2417 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
2418 GPUVMMinPageSize: v->GPUVMMinPageSize,
2419 HostVMMinPageSize: v->HostVMMinPageSize,
2420 PTEBufferSizeInRequests: v->PTEBufferSizeInRequestsForLuma,
2421 Pitch: v->PitchY[k],
2422 DCCMetaPitch: v->DCCMetaPitchY[k],
2423 MacroTileWidth: &v->MacroTileWidthY[k],
2424 MetaRowByte: &MetaRowByteY,
2425 PixelPTEBytesPerRow: &PixelPTEBytesPerRowY,
2426 PTEBufferSizeNotExceeded: &PTEBufferSizeNotExceededY,
2427 dpte_row_width_ub: &v->dpte_row_width_luma_ub[k],
2428 dpte_row_height: &v->dpte_row_height[k],
2429 MetaRequestWidth: &v->meta_req_width[k],
2430 MetaRequestHeight: &v->meta_req_height[k],
2431 meta_row_width: &v->meta_row_width[k],
2432 meta_row_height: &v->meta_row_height[k],
2433 vm_group_bytes: &v->vm_group_bytes[k],
2434 dpte_group_bytes: &v->dpte_group_bytes[k],
2435 PixelPTEReqWidth: &v->PixelPTEReqWidthY[k],
2436 PixelPTEReqHeight: &v->PixelPTEReqHeightY[k],
2437 PTERequestSize: &v->PTERequestSizeY[k],
2438 DPDE0BytesFrame: &v->dpde0_bytes_per_frame_ub_l[k],
2439 MetaPTEBytesFrame: &v->meta_pte_bytes_per_frame_ub_l[k]);
2440
2441 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2442 mode_lib,
2443 VRatio: v->VRatio[k],
2444 vtaps: v->vtaps[k],
2445 Interlace: v->Interlace[k],
2446 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
2447 SwathHeight: v->SwathHeightY[k],
2448 ViewportYStart: v->ViewportYStartY[k],
2449 VInitPreFill: &v->VInitPreFillY[k],
2450 MaxNumSwath: &v->MaxNumSwathY[k]);
2451 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2452 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2453 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2454
2455 CalculateRowBandwidth(
2456 GPUVMEnable: v->GPUVMEnable,
2457 SourcePixelFormat: v->SourcePixelFormat[k],
2458 VRatio: v->VRatio[k],
2459 VRatioChroma: v->VRatioChroma[k],
2460 DCCEnable: v->DCCEnable[k],
2461 LineTime: v->HTotal[k] / v->PixelClock[k],
2462 MetaRowByteLuma: MetaRowByteY,
2463 MetaRowByteChroma: MetaRowByteC,
2464 meta_row_height_luma: v->meta_row_height[k],
2465 meta_row_height_chroma: v->meta_row_height_chroma[k],
2466 PixelPTEBytesPerRowLuma: PixelPTEBytesPerRowY,
2467 PixelPTEBytesPerRowChroma: PixelPTEBytesPerRowC,
2468 dpte_row_height_luma: v->dpte_row_height[k],
2469 dpte_row_height_chroma: v->dpte_row_height_chroma[k],
2470 meta_row_bw: &v->meta_row_bw[k],
2471 dpte_row_bw: &v->dpte_row_bw[k]);
2472 }
2473
2474 v->TotalDCCActiveDPP = 0;
2475 v->TotalActiveDPP = 0;
2476 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2477 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2478 if (v->DCCEnable[k])
2479 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2480 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2481 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2482 NoChromaPlanes = false;
2483 }
2484
2485 ReorderBytes = v->NumberOfChannels
2486 * dml_max3(
2487 a: v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2488 b: v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2489 c: v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2490
2491 VMDataOnlyReturnBW = dml_min(
2492 a: dml_min(a: v->ReturnBusWidth * v->DCFCLK, b: v->FabricClock * v->FabricDatapathToDCNDataReturn)
2493 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2494 b: v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2495 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2496
2497#ifdef __DML_VBA_DEBUG__
2498 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2499 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2500 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2501 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2502 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2503 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2504 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2505 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2506 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2507 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2508 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2509#endif
2510
2511 if (v->GPUVMEnable && v->HostVMEnable)
2512 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2513
2514 v->UrgentExtraLatency = CalculateExtraLatency(
2515 RoundTripPingLatencyCycles: v->RoundTripPingLatencyCycles,
2516 ReorderingBytes: ReorderBytes,
2517 DCFCLK: v->DCFCLK,
2518 TotalNumberOfActiveDPP: v->TotalActiveDPP,
2519 PixelChunkSizeInKByte: v->PixelChunkSizeInKByte,
2520 TotalNumberOfDCCActiveDPP: v->TotalDCCActiveDPP,
2521 MetaChunkSize: v->MetaChunkSize,
2522 ReturnBW: v->ReturnBW,
2523 GPUVMEnable: v->GPUVMEnable,
2524 HostVMEnable: v->HostVMEnable,
2525 NumberOfActivePlanes: v->NumberOfActivePlanes,
2526 NumberOfDPP: v->DPPPerPlane,
2527 dpte_group_bytes: v->dpte_group_bytes,
2528 HostVMInefficiencyFactor,
2529 HostVMMinPageSize: v->HostVMMinPageSize,
2530 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels);
2531
2532 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2533
2534 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2535 if (v->BlendingAndTiming[k] == k) {
2536 if (v->WritebackEnable[k] == true) {
2537 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2538 + CalculateWriteBackDelay(
2539 WritebackPixelFormat: v->WritebackPixelFormat[k],
2540 WritebackHRatio: v->WritebackHRatio[k],
2541 WritebackVRatio: v->WritebackVRatio[k],
2542 WritebackVTaps: v->WritebackVTaps[k],
2543 WritebackDestinationWidth: v->WritebackDestinationWidth[k],
2544 WritebackDestinationHeight: v->WritebackDestinationHeight[k],
2545 WritebackSourceHeight: v->WritebackSourceHeight[k],
2546 HTotal: v->HTotal[k]) / v->DISPCLK;
2547 } else
2548 v->WritebackDelay[v->VoltageLevel][k] = 0;
2549 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2550 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2551 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2552 a: v->WritebackDelay[v->VoltageLevel][k],
2553 b: v->WritebackLatency
2554 + CalculateWriteBackDelay(
2555 WritebackPixelFormat: v->WritebackPixelFormat[j],
2556 WritebackHRatio: v->WritebackHRatio[j],
2557 WritebackVRatio: v->WritebackVRatio[j],
2558 WritebackVTaps: v->WritebackVTaps[j],
2559 WritebackDestinationWidth: v->WritebackDestinationWidth[j],
2560 WritebackDestinationHeight: v->WritebackDestinationHeight[j],
2561 WritebackSourceHeight: v->WritebackSourceHeight[j],
2562 HTotal: v->HTotal[k]) / v->DISPCLK);
2563 }
2564 }
2565 }
2566 }
2567
2568 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2569 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2570 if (v->BlendingAndTiming[k] == j)
2571 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2572
2573 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2574 v->MaxVStartupLines[k] =
2575 CalculateMaxVStartup(
2576 VTotal: v->VTotal[k],
2577 VActive: v->VActive[k],
2578 VBlankNom: v->VBlankNom[k],
2579 HTotal: v->HTotal[k],
2580 PixelClock: v->PixelClock[k],
2581 ProgressiveTointerlaceUnitinOPP: v->ProgressiveToInterlaceUnitInOPP,
2582 Interlace: v->Interlace[k],
2583 VBlankNomDefaultUS: v->ip.VBlankNomDefaultUS,
2584 WritebackDelayTime: v->WritebackDelay[v->VoltageLevel][k]);
2585
2586#ifdef __DML_VBA_DEBUG__
2587 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2588 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2589 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2590#endif
2591 }
2592
2593 v->MaximumMaxVStartupLines = 0;
2594 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2595 v->MaximumMaxVStartupLines = dml_max(a: v->MaximumMaxVStartupLines, b: v->MaxVStartupLines[k]);
2596
2597 // VBA_DELTA
2598 // We don't really care to iterate between the various prefetch modes
2599 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2600
2601 v->UrgentLatency = CalculateUrgentLatency(
2602 UrgentLatencyPixelDataOnly: v->UrgentLatencyPixelDataOnly,
2603 UrgentLatencyPixelMixedWithVMData: v->UrgentLatencyPixelMixedWithVMData,
2604 UrgentLatencyVMDataOnly: v->UrgentLatencyVMDataOnly,
2605 DoUrgentLatencyAdjustment: v->DoUrgentLatencyAdjustment,
2606 UrgentLatencyAdjustmentFabricClockComponent: v->UrgentLatencyAdjustmentFabricClockComponent,
2607 UrgentLatencyAdjustmentFabricClockReference: v->UrgentLatencyAdjustmentFabricClockReference,
2608 FabricClockSingle: v->FabricClock);
2609
2610 v->FractionOfUrgentBandwidth = 0.0;
2611 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2612
2613 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2614
2615 do {
2616 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2617 bool DestinationLineTimesForPrefetchLessThan2 = false;
2618 bool VRatioPrefetchMoreThan4 = false;
2619 double TWait = CalculateTWait(PrefetchMode, DRAMClockChangeLatency: v->DRAMClockChangeLatency, UrgentLatency: v->UrgentLatency, SREnterPlusExitTime: v->SREnterPlusExitTime);
2620
2621 MaxTotalRDBandwidth = 0;
2622
2623 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2624
2625 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2626 Pipe myPipe;
2627
2628 myPipe.DPPCLK = v->DPPCLK[k];
2629 myPipe.DISPCLK = v->DISPCLK;
2630 myPipe.PixelClock = v->PixelClock[k];
2631 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2632 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2633 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2634 myPipe.VRatio = v->VRatio[k];
2635 myPipe.VRatioChroma = v->VRatioChroma[k];
2636 myPipe.SourceScan = v->SourceScan[k];
2637 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2638 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2639 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2640 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2641 myPipe.InterlaceEnable = v->Interlace[k];
2642 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2643 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2644 myPipe.HTotal = v->HTotal[k];
2645 myPipe.DCCEnable = v->DCCEnable[k];
2646 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2647 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2648 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2649 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2650 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2651 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2652 v->ErrorResult[k] = CalculatePrefetchSchedule(
2653 mode_lib,
2654 HostVMInefficiencyFactor,
2655 myPipe: &myPipe,
2656 DSCDelay: v->DSCDelay[k],
2657 DPPCLKDelaySubtotalPlusCNVCFormater: v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2658 DPPCLKDelaySCL: v->DPPCLKDelaySCL,
2659 DPPCLKDelaySCLLBOnly: v->DPPCLKDelaySCLLBOnly,
2660 DPPCLKDelayCNVCCursor: v->DPPCLKDelayCNVCCursor,
2661 DISPCLKDelaySubtotal: v->DISPCLKDelaySubtotal,
2662 DPP_RECOUT_WIDTH: (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2663 OutputFormat: v->OutputFormat[k],
2664 MaxInterDCNTileRepeaters: v->MaxInterDCNTileRepeaters,
2665 VStartup: dml_min(a: v->VStartupLines, b: v->MaxVStartupLines[k]),
2666 MaxVStartup: v->MaxVStartupLines[k],
2667 GPUVMPageTableLevels: v->GPUVMMaxPageTableLevels,
2668 GPUVMEnable: v->GPUVMEnable,
2669 HostVMEnable: v->HostVMEnable,
2670 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
2671 HostVMMinPageSize: v->HostVMMinPageSize,
2672 DynamicMetadataEnable: v->DynamicMetadataEnable[k],
2673 DynamicMetadataVMEnabled: v->DynamicMetadataVMEnabled,
2674 DynamicMetadataLinesBeforeActiveRequired: v->DynamicMetadataLinesBeforeActiveRequired[k],
2675 DynamicMetadataTransmittedBytes: v->DynamicMetadataTransmittedBytes[k],
2676 UrgentLatency: v->UrgentLatency,
2677 UrgentExtraLatency: v->UrgentExtraLatency,
2678 TCalc: v->TCalc,
2679 PDEAndMetaPTEBytesFrame: v->PDEAndMetaPTEBytesFrame[k],
2680 MetaRowByte: v->MetaRowByte[k],
2681 PixelPTEBytesPerRow: v->PixelPTEBytesPerRow[k],
2682 PrefetchSourceLinesY: v->PrefetchSourceLinesY[k],
2683 SwathWidthY: v->SwathWidthY[k],
2684 VInitPreFillY: v->VInitPreFillY[k],
2685 MaxNumSwathY: v->MaxNumSwathY[k],
2686 PrefetchSourceLinesC: v->PrefetchSourceLinesC[k],
2687 SwathWidthC: v->SwathWidthC[k],
2688 VInitPreFillC: v->VInitPreFillC[k],
2689 MaxNumSwathC: v->MaxNumSwathC[k],
2690 swath_width_luma_ub: v->swath_width_luma_ub[k],
2691 swath_width_chroma_ub: v->swath_width_chroma_ub[k],
2692 SwathHeightY: v->SwathHeightY[k],
2693 SwathHeightC: v->SwathHeightC[k],
2694 TWait,
2695 DSTXAfterScaler: &v->DSTXAfterScaler[k],
2696 DSTYAfterScaler: &v->DSTYAfterScaler[k],
2697 DestinationLinesForPrefetch: &v->DestinationLinesForPrefetch[k],
2698 PrefetchBandwidth: &v->PrefetchBandwidth[k],
2699 DestinationLinesToRequestVMInVBlank: &v->DestinationLinesToRequestVMInVBlank[k],
2700 DestinationLinesToRequestRowInVBlank: &v->DestinationLinesToRequestRowInVBlank[k],
2701 VRatioPrefetchY: &v->VRatioPrefetchY[k],
2702 VRatioPrefetchC: &v->VRatioPrefetchC[k],
2703 RequiredPrefetchPixDataBWLuma: &v->RequiredPrefetchPixDataBWLuma[k],
2704 RequiredPrefetchPixDataBWChroma: &v->RequiredPrefetchPixDataBWChroma[k],
2705 NotEnoughTimeForDynamicMetadata: &v->NotEnoughTimeForDynamicMetadata[k],
2706 Tno_bw: &v->Tno_bw[k],
2707 prefetch_vmrow_bw: &v->prefetch_vmrow_bw[k],
2708 Tdmdl_vm: &v->Tdmdl_vm[k],
2709 Tdmdl: &v->Tdmdl[k],
2710 TSetup: &v->TSetup[k],
2711 VUpdateOffsetPix: &v->VUpdateOffsetPix[k],
2712 VUpdateWidthPix: &v->VUpdateWidthPix[k],
2713 VReadyOffsetPix: &v->VReadyOffsetPix[k]);
2714
2715#ifdef __DML_VBA_DEBUG__
2716 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2717#endif
2718 v->VStartup[k] = dml_min(a: v->VStartupLines, b: v->MaxVStartupLines[k]);
2719 }
2720
2721 v->NoEnoughUrgentLatencyHiding = false;
2722 v->NoEnoughUrgentLatencyHidingPre = false;
2723
2724 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2725 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2726 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2727 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2728 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2729
2730 CalculateUrgentBurstFactor(
2731 swath_width_luma_ub: v->swath_width_luma_ub[k],
2732 swath_width_chroma_ub: v->swath_width_chroma_ub[k],
2733 SwathHeightY: v->SwathHeightY[k],
2734 SwathHeightC: v->SwathHeightC[k],
2735 LineTime: v->HTotal[k] / v->PixelClock[k],
2736 UrgentLatency: v->UrgentLatency,
2737 CursorBufferSize: v->CursorBufferSize,
2738 CursorWidth: v->CursorWidth[k][0],
2739 CursorBPP: v->CursorBPP[k][0],
2740 VRatio: v->VRatio[k],
2741 VRatioC: v->VRatioChroma[k],
2742 BytePerPixelInDETY: v->BytePerPixelDETY[k],
2743 BytePerPixelInDETC: v->BytePerPixelDETC[k],
2744 DETBufferSizeY: v->DETBufferSizeY[k],
2745 DETBufferSizeC: v->DETBufferSizeC[k],
2746 UrgentBurstFactorCursor: &v->UrgBurstFactorCursor[k],
2747 UrgentBurstFactorLuma: &v->UrgBurstFactorLuma[k],
2748 UrgentBurstFactorChroma: &v->UrgBurstFactorChroma[k],
2749 NotEnoughUrgentLatencyHiding: &v->NoUrgentLatencyHiding[k]);
2750
2751 CalculateUrgentBurstFactor(
2752 swath_width_luma_ub: v->swath_width_luma_ub[k],
2753 swath_width_chroma_ub: v->swath_width_chroma_ub[k],
2754 SwathHeightY: v->SwathHeightY[k],
2755 SwathHeightC: v->SwathHeightC[k],
2756 LineTime: v->HTotal[k] / v->PixelClock[k],
2757 UrgentLatency: v->UrgentLatency,
2758 CursorBufferSize: v->CursorBufferSize,
2759 CursorWidth: v->CursorWidth[k][0],
2760 CursorBPP: v->CursorBPP[k][0],
2761 VRatio: v->VRatioPrefetchY[k],
2762 VRatioC: v->VRatioPrefetchC[k],
2763 BytePerPixelInDETY: v->BytePerPixelDETY[k],
2764 BytePerPixelInDETC: v->BytePerPixelDETC[k],
2765 DETBufferSizeY: v->DETBufferSizeY[k],
2766 DETBufferSizeC: v->DETBufferSizeC[k],
2767 UrgentBurstFactorCursor: &v->UrgBurstFactorCursorPre[k],
2768 UrgentBurstFactorLuma: &v->UrgBurstFactorLumaPre[k],
2769 UrgentBurstFactorChroma: &v->UrgBurstFactorChromaPre[k],
2770 NotEnoughUrgentLatencyHiding: &v->NoUrgentLatencyHidingPre[k]);
2771
2772 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2773 + dml_max3(
2774 a: v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2775 b: v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2776 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2777 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2778 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2779 c: v->DPPPerPlane[k]
2780 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2781 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2782 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2783
2784 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2785 + dml_max3(
2786 a: v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2787 b: v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2788 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2789 c: v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2790 + v->cursor_bw_pre[k]);
2791
2792#ifdef __DML_VBA_DEBUG__
2793 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2794 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2795 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2796 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2797 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2798
2799 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2800 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2801
2802 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2803 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2804 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2805 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2806 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2807 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2808 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2809 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2810 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2811 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2812#endif
2813
2814 if (v->DestinationLinesForPrefetch[k] < 2)
2815 DestinationLineTimesForPrefetchLessThan2 = true;
2816
2817 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2818 VRatioPrefetchMoreThan4 = true;
2819
2820 if (v->NoUrgentLatencyHiding[k] == true)
2821 v->NoEnoughUrgentLatencyHiding = true;
2822
2823 if (v->NoUrgentLatencyHidingPre[k] == true)
2824 v->NoEnoughUrgentLatencyHidingPre = true;
2825 }
2826
2827 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2828
2829#ifdef __DML_VBA_DEBUG__
2830 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2831 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
2832 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
2833#endif
2834
2835 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2836 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2837 v->PrefetchModeSupported = true;
2838 else {
2839 v->PrefetchModeSupported = false;
2840 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2841 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2842 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2843 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2844 }
2845
2846 // PREVIOUS_ERROR
2847 // This error result check was done after the PrefetchModeSupported. So we will
2848 // still try to calculate flip schedule even prefetch mode not supported
2849 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2850 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2851 v->PrefetchModeSupported = false;
2852 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2853 }
2854 }
2855
2856 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2857 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2858 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2859 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2860 - dml_max(
2861 a: v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2862 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2863 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2864 b: v->DPPPerPlane[k]
2865 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2866 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2867 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2868 }
2869
2870 v->TotImmediateFlipBytes = 0;
2871 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2872 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2873 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2874 }
2875 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2876 CalculateFlipSchedule(
2877 mode_lib,
2878 k,
2879 HostVMInefficiencyFactor,
2880 UrgentExtraLatency: v->UrgentExtraLatency,
2881 UrgentLatency: v->UrgentLatency,
2882 PDEAndMetaPTEBytesPerFrame: v->PDEAndMetaPTEBytesFrame[k],
2883 MetaRowBytes: v->MetaRowByte[k],
2884 DPTEBytesPerRow: v->PixelPTEBytesPerRow[k]);
2885 }
2886
2887 v->total_dcn_read_bw_with_flip = 0.0;
2888 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2889 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2890 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2891 + dml_max3(
2892 a: v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2893 b: v->DPPPerPlane[k] * v->final_flip_bw[k]
2894 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2895 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2896 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2897 c: v->DPPPerPlane[k]
2898 * (v->final_flip_bw[k]
2899 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2900 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2901 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2902 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2903 + dml_max3(
2904 a: v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2905 b: v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2906 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2907 c: v->DPPPerPlane[k]
2908 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2909 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2910 }
2911 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2912
2913 v->ImmediateFlipSupported = true;
2914 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2915#ifdef __DML_VBA_DEBUG__
2916 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2917#endif
2918 v->ImmediateFlipSupported = false;
2919 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2920 }
2921 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2922 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2923#ifdef __DML_VBA_DEBUG__
2924 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
2925#endif
2926 v->ImmediateFlipSupported = false;
2927 }
2928 }
2929 } else {
2930 v->ImmediateFlipSupported = false;
2931 }
2932
2933 v->PrefetchAndImmediateFlipSupported =
2934 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2935 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2936 v->ImmediateFlipSupported)) ? true : false;
2937#ifdef __DML_VBA_DEBUG__
2938 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2939 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2940 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2941 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2942 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2943 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2944#endif
2945 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2946
2947 v->VStartupLines = v->VStartupLines + 1;
2948 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2949 ASSERT(v->PrefetchAndImmediateFlipSupported);
2950
2951 // Unbounded Request Enabled
2952 CalculateUnboundedRequestAndCompressedBufferSize(
2953 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
2954 ConfigReturnBufferSizeInKByte: v->ConfigReturnBufferSizeInKByte,
2955 UseUnboundedRequestingFinal: v->UseUnboundedRequesting,
2956 TotalActiveDPP: v->TotalActiveDPP,
2957 NoChromaPlanes,
2958 MaxNumDPP: v->MaxNumDPP,
2959 CompressedBufferSegmentSizeInkByteFinal: v->CompressedBufferSegmentSizeInkByte,
2960 Output: v->Output,
2961 UnboundedRequestEnabled: &v->UnboundedRequestEnabled,
2962 CompressedBufferSizeInkByte: &v->CompressedBufferSizeInkByte);
2963
2964 //Watermarks and NB P-State/DRAM Clock Change Support
2965 {
2966 enum clock_change_support DRAMClockChangeSupport; // dummy
2967
2968 CalculateWatermarksAndDRAMSpeedChangeSupport(
2969 mode_lib,
2970 PrefetchMode,
2971 DCFCLK: v->DCFCLK,
2972 ReturnBW: v->ReturnBW,
2973 UrgentLatency: v->UrgentLatency,
2974 ExtraLatency: v->UrgentExtraLatency,
2975 SOCCLK: v->SOCCLK,
2976 DCFCLKDeepSleep: v->DCFCLKDeepSleep,
2977 DETBufferSizeY: v->DETBufferSizeY,
2978 DETBufferSizeC: v->DETBufferSizeC,
2979 SwathHeightY: v->SwathHeightY,
2980 SwathHeightC: v->SwathHeightC,
2981 SwathWidthY: v->SwathWidthY,
2982 SwathWidthC: v->SwathWidthC,
2983 DPPPerPlane: v->DPPPerPlane,
2984 BytePerPixelDETY: v->BytePerPixelDETY,
2985 BytePerPixelDETC: v->BytePerPixelDETC,
2986 UnboundedRequestEnabled: v->UnboundedRequestEnabled,
2987 CompressedBufferSizeInkByte: v->CompressedBufferSizeInkByte,
2988 DRAMClockChangeSupport: &DRAMClockChangeSupport,
2989 StutterExitWatermark: &v->StutterExitWatermark,
2990 StutterEnterPlusExitWatermark: &v->StutterEnterPlusExitWatermark,
2991 Z8StutterExitWatermark: &v->Z8StutterExitWatermark,
2992 Z8StutterEnterPlusExitWatermark: &v->Z8StutterEnterPlusExitWatermark);
2993
2994 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2995 if (v->WritebackEnable[k] == true) {
2996 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2997 a: 0,
2998 b: v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2999 } else {
3000 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3001 }
3002 }
3003 }
3004
3005 //Display Pipeline Delivery Time in Prefetch, Groups
3006 CalculatePixelDeliveryTimes(
3007 NumberOfActivePlanes: v->NumberOfActivePlanes,
3008 VRatio: v->VRatio,
3009 VRatioChroma: v->VRatioChroma,
3010 VRatioPrefetchY: v->VRatioPrefetchY,
3011 VRatioPrefetchC: v->VRatioPrefetchC,
3012 swath_width_luma_ub: v->swath_width_luma_ub,
3013 swath_width_chroma_ub: v->swath_width_chroma_ub,
3014 DPPPerPlane: v->DPPPerPlane,
3015 HRatio: v->HRatio,
3016 HRatioChroma: v->HRatioChroma,
3017 PixelClock: v->PixelClock,
3018 PSCL_THROUGHPUT: v->PSCL_THROUGHPUT_LUMA,
3019 PSCL_THROUGHPUT_CHROMA: v->PSCL_THROUGHPUT_CHROMA,
3020 DPPCLK: v->DPPCLK,
3021 BytePerPixelC: v->BytePerPixelC,
3022 SourceScan: v->SourceScan,
3023 NumberOfCursors: v->NumberOfCursors,
3024 CursorWidth: v->CursorWidth,
3025 CursorBPP: v->CursorBPP,
3026 BlockWidth256BytesY: v->BlockWidth256BytesY,
3027 BlockHeight256BytesY: v->BlockHeight256BytesY,
3028 BlockWidth256BytesC: v->BlockWidth256BytesC,
3029 BlockHeight256BytesC: v->BlockHeight256BytesC,
3030 DisplayPipeLineDeliveryTimeLuma: v->DisplayPipeLineDeliveryTimeLuma,
3031 DisplayPipeLineDeliveryTimeChroma: v->DisplayPipeLineDeliveryTimeChroma,
3032 DisplayPipeLineDeliveryTimeLumaPrefetch: v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3033 DisplayPipeLineDeliveryTimeChromaPrefetch: v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3034 DisplayPipeRequestDeliveryTimeLuma: v->DisplayPipeRequestDeliveryTimeLuma,
3035 DisplayPipeRequestDeliveryTimeChroma: v->DisplayPipeRequestDeliveryTimeChroma,
3036 DisplayPipeRequestDeliveryTimeLumaPrefetch: v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3037 DisplayPipeRequestDeliveryTimeChromaPrefetch: v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3038 CursorRequestDeliveryTime: v->CursorRequestDeliveryTime,
3039 CursorRequestDeliveryTimePrefetch: v->CursorRequestDeliveryTimePrefetch);
3040
3041 CalculateMetaAndPTETimes(
3042 NumberOfActivePlanes: v->NumberOfActivePlanes,
3043 GPUVMEnable: v->GPUVMEnable,
3044 MetaChunkSize: v->MetaChunkSize,
3045 MinMetaChunkSizeBytes: v->MinMetaChunkSizeBytes,
3046 HTotal: v->HTotal,
3047 VRatio: v->VRatio,
3048 VRatioChroma: v->VRatioChroma,
3049 DestinationLinesToRequestRowInVBlank: v->DestinationLinesToRequestRowInVBlank,
3050 DestinationLinesToRequestRowInImmediateFlip: v->DestinationLinesToRequestRowInImmediateFlip,
3051 DCCEnable: v->DCCEnable,
3052 PixelClock: v->PixelClock,
3053 BytePerPixelY: v->BytePerPixelY,
3054 BytePerPixelC: v->BytePerPixelC,
3055 SourceScan: v->SourceScan,
3056 dpte_row_height: v->dpte_row_height,
3057 dpte_row_height_chroma: v->dpte_row_height_chroma,
3058 meta_row_width: v->meta_row_width,
3059 meta_row_width_chroma: v->meta_row_width_chroma,
3060 meta_row_height: v->meta_row_height,
3061 meta_row_height_chroma: v->meta_row_height_chroma,
3062 meta_req_width: v->meta_req_width,
3063 meta_req_width_chroma: v->meta_req_width_chroma,
3064 meta_req_height: v->meta_req_height,
3065 meta_req_height_chroma: v->meta_req_height_chroma,
3066 dpte_group_bytes: v->dpte_group_bytes,
3067 PTERequestSizeY: v->PTERequestSizeY,
3068 PTERequestSizeC: v->PTERequestSizeC,
3069 PixelPTEReqWidthY: v->PixelPTEReqWidthY,
3070 PixelPTEReqHeightY: v->PixelPTEReqHeightY,
3071 PixelPTEReqWidthC: v->PixelPTEReqWidthC,
3072 PixelPTEReqHeightC: v->PixelPTEReqHeightC,
3073 dpte_row_width_luma_ub: v->dpte_row_width_luma_ub,
3074 dpte_row_width_chroma_ub: v->dpte_row_width_chroma_ub,
3075 DST_Y_PER_PTE_ROW_NOM_L: v->DST_Y_PER_PTE_ROW_NOM_L,
3076 DST_Y_PER_PTE_ROW_NOM_C: v->DST_Y_PER_PTE_ROW_NOM_C,
3077 DST_Y_PER_META_ROW_NOM_L: v->DST_Y_PER_META_ROW_NOM_L,
3078 DST_Y_PER_META_ROW_NOM_C: v->DST_Y_PER_META_ROW_NOM_C,
3079 TimePerMetaChunkNominal: v->TimePerMetaChunkNominal,
3080 TimePerChromaMetaChunkNominal: v->TimePerChromaMetaChunkNominal,
3081 TimePerMetaChunkVBlank: v->TimePerMetaChunkVBlank,
3082 TimePerChromaMetaChunkVBlank: v->TimePerChromaMetaChunkVBlank,
3083 TimePerMetaChunkFlip: v->TimePerMetaChunkFlip,
3084 TimePerChromaMetaChunkFlip: v->TimePerChromaMetaChunkFlip,
3085 time_per_pte_group_nom_luma: v->time_per_pte_group_nom_luma,
3086 time_per_pte_group_vblank_luma: v->time_per_pte_group_vblank_luma,
3087 time_per_pte_group_flip_luma: v->time_per_pte_group_flip_luma,
3088 time_per_pte_group_nom_chroma: v->time_per_pte_group_nom_chroma,
3089 time_per_pte_group_vblank_chroma: v->time_per_pte_group_vblank_chroma,
3090 time_per_pte_group_flip_chroma: v->time_per_pte_group_flip_chroma);
3091
3092 CalculateVMGroupAndRequestTimes(
3093 NumberOfActivePlanes: v->NumberOfActivePlanes,
3094 GPUVMEnable: v->GPUVMEnable,
3095 GPUVMMaxPageTableLevels: v->GPUVMMaxPageTableLevels,
3096 HTotal: v->HTotal,
3097 BytePerPixelC: v->BytePerPixelC,
3098 DestinationLinesToRequestVMInVBlank: v->DestinationLinesToRequestVMInVBlank,
3099 DestinationLinesToRequestVMInImmediateFlip: v->DestinationLinesToRequestVMInImmediateFlip,
3100 DCCEnable: v->DCCEnable,
3101 PixelClock: v->PixelClock,
3102 dpte_row_width_luma_ub: v->dpte_row_width_luma_ub,
3103 dpte_row_width_chroma_ub: v->dpte_row_width_chroma_ub,
3104 vm_group_bytes: v->vm_group_bytes,
3105 dpde0_bytes_per_frame_ub_l: v->dpde0_bytes_per_frame_ub_l,
3106 dpde0_bytes_per_frame_ub_c: v->dpde0_bytes_per_frame_ub_c,
3107 meta_pte_bytes_per_frame_ub_l: v->meta_pte_bytes_per_frame_ub_l,
3108 meta_pte_bytes_per_frame_ub_c: v->meta_pte_bytes_per_frame_ub_c,
3109 TimePerVMGroupVBlank: v->TimePerVMGroupVBlank,
3110 TimePerVMGroupFlip: v->TimePerVMGroupFlip,
3111 TimePerVMRequestVBlank: v->TimePerVMRequestVBlank,
3112 TimePerVMRequestFlip: v->TimePerVMRequestFlip);
3113
3114 // Min TTUVBlank
3115 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3116 if (PrefetchMode == 0) {
3117 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3118 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3119 v->MinTTUVBlank[k] = dml_max(
3120 a: v->DRAMClockChangeWatermark,
3121 b: dml_max(a: v->StutterEnterPlusExitWatermark, b: v->UrgentWatermark));
3122 } else if (PrefetchMode == 1) {
3123 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3124 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3125 v->MinTTUVBlank[k] = dml_max(a: v->StutterEnterPlusExitWatermark, b: v->UrgentWatermark);
3126 } else {
3127 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3128 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3129 v->MinTTUVBlank[k] = v->UrgentWatermark;
3130 }
3131 if (!v->DynamicMetadataEnable[k])
3132 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3133 }
3134
3135 // DCC Configuration
3136 v->ActiveDPPs = 0;
3137 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3138 CalculateDCCConfiguration(DCCEnabled: v->DCCEnable[k], DCCProgrammingAssumesScanDirectionUnknown: false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3139 SourcePixelFormat: v->SourcePixelFormat[k],
3140 SurfaceWidthLuma: v->SurfaceWidthY[k],
3141 SurfaceWidthChroma: v->SurfaceWidthC[k],
3142 SurfaceHeightLuma: v->SurfaceHeightY[k],
3143 SurfaceHeightChroma: v->SurfaceHeightC[k],
3144 DETBufferSize: v->DETBufferSizeInKByte[0] * 1024,
3145 RequestHeight256ByteLuma: v->BlockHeight256BytesY[k],
3146 RequestHeight256ByteChroma: v->BlockHeight256BytesC[k],
3147 TilingFormat: v->SurfaceTiling[k],
3148 BytePerPixelY: v->BytePerPixelY[k],
3149 BytePerPixelC: v->BytePerPixelC[k],
3150 BytePerPixelDETY: v->BytePerPixelDETY[k],
3151 BytePerPixelDETC: v->BytePerPixelDETC[k],
3152 ScanOrientation: v->SourceScan[k],
3153 MaxUncompressedBlockLuma: &v->DCCYMaxUncompressedBlock[k],
3154 MaxUncompressedBlockChroma: &v->DCCCMaxUncompressedBlock[k],
3155 MaxCompressedBlockLuma: &v->DCCYMaxCompressedBlock[k],
3156 MaxCompressedBlockChroma: &v->DCCCMaxCompressedBlock[k],
3157 IndependentBlockLuma: &v->DCCYIndependentBlock[k],
3158 IndependentBlockChroma: &v->DCCCIndependentBlock[k]);
3159 }
3160
3161 // VStartup Adjustment
3162 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3163 bool isInterlaceTiming;
3164 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3165#ifdef __DML_VBA_DEBUG__
3166 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3167#endif
3168
3169 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3170
3171#ifdef __DML_VBA_DEBUG__
3172 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3173 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3174 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3175 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3176#endif
3177
3178 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3179 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3180 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3181 }
3182
3183 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3184 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3185 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
3186 v->MIN_DST_Y_NEXT_START[k] = dml_floor(a: (v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, granularity: 1.0);
3187 } else {
3188 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
3189 }
3190 v->MIN_DST_Y_NEXT_START[k] += dml_floor(a: 4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), granularity: 1.0) / 4.0;
3191 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3192 <= (isInterlaceTiming ?
3193 dml_floor(a: (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, granularity: 1.0) :
3194 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3195 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3196 } else {
3197 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3198 }
3199#ifdef __DML_VBA_DEBUG__
3200 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3201 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3202 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3203 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3204 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3205 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3206 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3207 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3208 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3209 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3210 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3211#endif
3212 }
3213
3214 {
3215 //Maximum Bandwidth Used
3216 double TotalWRBandwidth = 0;
3217 double MaxPerPlaneVActiveWRBandwidth = 0;
3218 double WRBandwidth = 0;
3219
3220 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3221 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3222 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3223 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3224 } else if (v->WritebackEnable[k] == true) {
3225 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3226 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3227 }
3228 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3229 MaxPerPlaneVActiveWRBandwidth = dml_max(a: MaxPerPlaneVActiveWRBandwidth, b: WRBandwidth);
3230 }
3231
3232 v->TotalDataReadBandwidth = 0;
3233 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3234 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3235 }
3236 }
3237 // Stutter Efficiency
3238 CalculateStutterEfficiency(
3239 mode_lib,
3240 CompressedBufferSizeInkByte: v->CompressedBufferSizeInkByte,
3241 UnboundedRequestEnabled: v->UnboundedRequestEnabled,
3242 ConfigReturnBufferSizeInKByte: v->ConfigReturnBufferSizeInKByte,
3243 MetaFIFOSizeInKEntries: v->MetaFIFOSizeInKEntries,
3244 ZeroSizeBufferEntries: v->ZeroSizeBufferEntries,
3245 NumberOfActivePlanes: v->NumberOfActivePlanes,
3246 ROBBufferSizeInKByte: v->ROBBufferSizeInKByte,
3247 TotalDataReadBandwidth: v->TotalDataReadBandwidth,
3248 DCFCLK: v->DCFCLK,
3249 ReturnBW: v->ReturnBW,
3250 COMPBUF_RESERVED_SPACE_64B: v->COMPBUF_RESERVED_SPACE_64B,
3251 COMPBUF_RESERVED_SPACE_ZS: v->COMPBUF_RESERVED_SPACE_ZS,
3252 SRExitTime: v->SRExitTime,
3253 SRExitZ8Time: v->SRExitZ8Time,
3254 SynchronizedVBlank: v->SynchronizedVBlank,
3255 Z8StutterEnterPlusExitWatermark: v->StutterEnterPlusExitWatermark,
3256 StutterEnterPlusExitWatermark: v->Z8StutterEnterPlusExitWatermark,
3257 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
3258 Interlace: v->Interlace,
3259 MinTTUVBlank: v->MinTTUVBlank,
3260 DPPPerPlane: v->DPPPerPlane,
3261 DETBufferSizeY: v->DETBufferSizeY,
3262 BytePerPixelY: v->BytePerPixelY,
3263 BytePerPixelDETY: v->BytePerPixelDETY,
3264 SwathWidthY: v->SwathWidthY,
3265 SwathHeightY: v->SwathHeightY,
3266 SwathHeightC: v->SwathHeightC,
3267 NetDCCRateLuma: v->DCCRateLuma,
3268 NetDCCRateChroma: v->DCCRateChroma,
3269 DCCFractionOfZeroSizeRequestsLuma: v->DCCFractionOfZeroSizeRequestsLuma,
3270 DCCFractionOfZeroSizeRequestsChroma: v->DCCFractionOfZeroSizeRequestsChroma,
3271 HTotal: v->HTotal,
3272 VTotal: v->VTotal,
3273 PixelClock: v->PixelClock,
3274 VRatio: v->VRatio,
3275 SourceScan: v->SourceScan,
3276 BlockHeight256BytesY: v->BlockHeight256BytesY,
3277 BlockWidth256BytesY: v->BlockWidth256BytesY,
3278 BlockHeight256BytesC: v->BlockHeight256BytesC,
3279 BlockWidth256BytesC: v->BlockWidth256BytesC,
3280 DCCYMaxUncompressedBlock: v->DCCYMaxUncompressedBlock,
3281 DCCCMaxUncompressedBlock: v->DCCCMaxUncompressedBlock,
3282 VActive: v->VActive,
3283 DCCEnable: v->DCCEnable,
3284 WritebackEnable: v->WritebackEnable,
3285 ReadBandwidthPlaneLuma: v->ReadBandwidthPlaneLuma,
3286 ReadBandwidthPlaneChroma: v->ReadBandwidthPlaneChroma,
3287 meta_row_bw: v->meta_row_bw,
3288 dpte_row_bw: v->dpte_row_bw,
3289 StutterEfficiencyNotIncludingVBlank: &v->StutterEfficiencyNotIncludingVBlank,
3290 StutterEfficiency: &v->StutterEfficiency,
3291 NumberOfStutterBurstsPerFrame: &v->NumberOfStutterBurstsPerFrame,
3292 Z8StutterEfficiencyNotIncludingVBlank: &v->Z8StutterEfficiencyNotIncludingVBlank,
3293 Z8StutterEfficiency: &v->Z8StutterEfficiency,
3294 Z8NumberOfStutterBurstsPerFrame: &v->Z8NumberOfStutterBurstsPerFrame,
3295 StutterPeriod: &v->StutterPeriod);
3296}
3297
3298static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3299{
3300 struct vba_vars_st *v = &mode_lib->vba;
3301 // Display Pipe Configuration
3302 double BytePerPixDETY[DC__NUM_DPP__MAX];
3303 double BytePerPixDETC[DC__NUM_DPP__MAX];
3304 int BytePerPixY[DC__NUM_DPP__MAX];
3305 int BytePerPixC[DC__NUM_DPP__MAX];
3306 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3307 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3308 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3309 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3310 double dummy1[DC__NUM_DPP__MAX];
3311 double dummy2[DC__NUM_DPP__MAX];
3312 double dummy3[DC__NUM_DPP__MAX];
3313 double dummy4[DC__NUM_DPP__MAX];
3314 int dummy5[DC__NUM_DPP__MAX];
3315 int dummy6[DC__NUM_DPP__MAX];
3316 bool dummy7[DC__NUM_DPP__MAX];
3317 bool dummysinglestring;
3318
3319 unsigned int k;
3320
3321 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3322
3323 CalculateBytePerPixelAnd256BBlockSizes(
3324 SourcePixelFormat: v->SourcePixelFormat[k],
3325 SurfaceTiling: v->SurfaceTiling[k],
3326 BytePerPixelY: &BytePerPixY[k],
3327 BytePerPixelC: &BytePerPixC[k],
3328 BytePerPixelDETY: &BytePerPixDETY[k],
3329 BytePerPixelDETC: &BytePerPixDETC[k],
3330 BlockHeight256BytesY: &Read256BytesBlockHeightY[k],
3331 BlockHeight256BytesC: &Read256BytesBlockHeightC[k],
3332 BlockWidth256BytesY: &Read256BytesBlockWidthY[k],
3333 BlockWidth256BytesC: &Read256BytesBlockWidthC[k]);
3334 }
3335
3336 CalculateSwathAndDETConfiguration(
3337 ForceSingleDPP: false,
3338 NumberOfActivePlanes: v->NumberOfActivePlanes,
3339 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
3340 MaximumSwathWidthLuma: dummy1,
3341 MaximumSwathWidthChroma: dummy2,
3342 SourceScan: v->SourceScan,
3343 SourcePixelFormat: v->SourcePixelFormat,
3344 SurfaceTiling: v->SurfaceTiling,
3345 ViewportWidth: v->ViewportWidth,
3346 ViewportHeight: v->ViewportHeight,
3347 SurfaceWidthY: v->SurfaceWidthY,
3348 SurfaceWidthC: v->SurfaceWidthC,
3349 SurfaceHeightY: v->SurfaceHeightY,
3350 SurfaceHeightC: v->SurfaceHeightC,
3351 Read256BytesBlockHeightY,
3352 Read256BytesBlockHeightC,
3353 Read256BytesBlockWidthY,
3354 Read256BytesBlockWidthC,
3355 ODMCombineEnabled: v->ODMCombineEnabled,
3356 BlendingAndTiming: v->BlendingAndTiming,
3357 BytePerPixY,
3358 BytePerPixC,
3359 BytePerPixDETY,
3360 BytePerPixDETC,
3361 HActive: v->HActive,
3362 HRatio: v->HRatio,
3363 HRatioChroma: v->HRatioChroma,
3364 DPPPerPlane: v->DPPPerPlane,
3365 swath_width_luma_ub: dummy5,
3366 swath_width_chroma_ub: dummy6,
3367 SwathWidth: dummy3,
3368 SwathWidthChroma: dummy4,
3369 SwathHeightY: v->SwathHeightY,
3370 SwathHeightC: v->SwathHeightC,
3371 DETBufferSizeY: v->DETBufferSizeY,
3372 DETBufferSizeC: v->DETBufferSizeC,
3373 ViewportSizeSupportPerPlane: dummy7,
3374 ViewportSizeSupport: &dummysinglestring);
3375}
3376
3377static bool CalculateBytePerPixelAnd256BBlockSizes(
3378 enum source_format_class SourcePixelFormat,
3379 enum dm_swizzle_mode SurfaceTiling,
3380 unsigned int *BytePerPixelY,
3381 unsigned int *BytePerPixelC,
3382 double *BytePerPixelDETY,
3383 double *BytePerPixelDETC,
3384 unsigned int *BlockHeight256BytesY,
3385 unsigned int *BlockHeight256BytesC,
3386 unsigned int *BlockWidth256BytesY,
3387 unsigned int *BlockWidth256BytesC)
3388{
3389 if (SourcePixelFormat == dm_444_64) {
3390 *BytePerPixelDETY = 8;
3391 *BytePerPixelDETC = 0;
3392 *BytePerPixelY = 8;
3393 *BytePerPixelC = 0;
3394 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3395 *BytePerPixelDETY = 4;
3396 *BytePerPixelDETC = 0;
3397 *BytePerPixelY = 4;
3398 *BytePerPixelC = 0;
3399 } else if (SourcePixelFormat == dm_444_16) {
3400 *BytePerPixelDETY = 2;
3401 *BytePerPixelDETC = 0;
3402 *BytePerPixelY = 2;
3403 *BytePerPixelC = 0;
3404 } else if (SourcePixelFormat == dm_444_8) {
3405 *BytePerPixelDETY = 1;
3406 *BytePerPixelDETC = 0;
3407 *BytePerPixelY = 1;
3408 *BytePerPixelC = 0;
3409 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3410 *BytePerPixelDETY = 4;
3411 *BytePerPixelDETC = 1;
3412 *BytePerPixelY = 4;
3413 *BytePerPixelC = 1;
3414 } else if (SourcePixelFormat == dm_420_8) {
3415 *BytePerPixelDETY = 1;
3416 *BytePerPixelDETC = 2;
3417 *BytePerPixelY = 1;
3418 *BytePerPixelC = 2;
3419 } else if (SourcePixelFormat == dm_420_12) {
3420 *BytePerPixelDETY = 2;
3421 *BytePerPixelDETC = 4;
3422 *BytePerPixelY = 2;
3423 *BytePerPixelC = 4;
3424 } else {
3425 *BytePerPixelDETY = 4.0 / 3;
3426 *BytePerPixelDETC = 8.0 / 3;
3427 *BytePerPixelY = 2;
3428 *BytePerPixelC = 4;
3429 }
3430
3431 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3432 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3433 if (SurfaceTiling == dm_sw_linear) {
3434 *BlockHeight256BytesY = 1;
3435 } else if (SourcePixelFormat == dm_444_64) {
3436 *BlockHeight256BytesY = 4;
3437 } else if (SourcePixelFormat == dm_444_8) {
3438 *BlockHeight256BytesY = 16;
3439 } else {
3440 *BlockHeight256BytesY = 8;
3441 }
3442 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3443 *BlockHeight256BytesC = 0;
3444 *BlockWidth256BytesC = 0;
3445 } else {
3446 if (SurfaceTiling == dm_sw_linear) {
3447 *BlockHeight256BytesY = 1;
3448 *BlockHeight256BytesC = 1;
3449 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3450 *BlockHeight256BytesY = 8;
3451 *BlockHeight256BytesC = 16;
3452 } else if (SourcePixelFormat == dm_420_8) {
3453 *BlockHeight256BytesY = 16;
3454 *BlockHeight256BytesC = 8;
3455 } else {
3456 *BlockHeight256BytesY = 8;
3457 *BlockHeight256BytesC = 8;
3458 }
3459 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3460 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3461 }
3462 return true;
3463}
3464
3465static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3466{
3467 if (PrefetchMode == 0) {
3468 return dml_max(a: DRAMClockChangeLatency + UrgentLatency, b: dml_max(a: SREnterPlusExitTime, b: UrgentLatency));
3469 } else if (PrefetchMode == 1) {
3470 return dml_max(a: SREnterPlusExitTime, b: UrgentLatency);
3471 } else {
3472 return UrgentLatency;
3473 }
3474}
3475
3476double dml314_CalculateWriteBackDISPCLK(
3477 enum source_format_class WritebackPixelFormat,
3478 double PixelClock,
3479 double WritebackHRatio,
3480 double WritebackVRatio,
3481 unsigned int WritebackHTaps,
3482 unsigned int WritebackVTaps,
3483 long WritebackSourceWidth,
3484 long WritebackDestinationWidth,
3485 unsigned int HTotal,
3486 unsigned int WritebackLineBufferSize)
3487{
3488 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3489
3490 DISPCLK_H = PixelClock * dml_ceil(a: WritebackHTaps / 8.0, granularity: 1) / WritebackHRatio;
3491 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(a: WritebackDestinationWidth / 6.0, granularity: 1) + 8.0) / HTotal;
3492 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3493 return dml_max3(a: DISPCLK_H, b: DISPCLK_V, c: DISPCLK_HB);
3494}
3495
3496static double CalculateWriteBackDelay(
3497 enum source_format_class WritebackPixelFormat,
3498 double WritebackHRatio,
3499 double WritebackVRatio,
3500 unsigned int WritebackVTaps,
3501 int WritebackDestinationWidth,
3502 int WritebackDestinationHeight,
3503 int WritebackSourceHeight,
3504 unsigned int HTotal)
3505{
3506 double CalculateWriteBackDelay;
3507 double Line_length;
3508 double Output_lines_last_notclamped;
3509 double WritebackVInit;
3510
3511 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3512 Line_length = dml_max(a: (double) WritebackDestinationWidth, b: dml_ceil(a: WritebackDestinationWidth / 6.0, granularity: 1) * WritebackVTaps);
3513 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(a: (WritebackSourceHeight - WritebackVInit) / WritebackVRatio, granularity: 1);
3514 if (Output_lines_last_notclamped < 0) {
3515 CalculateWriteBackDelay = 0;
3516 } else {
3517 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3518 }
3519 return CalculateWriteBackDelay;
3520}
3521
3522static void CalculateVupdateAndDynamicMetadataParameters(
3523 int MaxInterDCNTileRepeaters,
3524 double DPPCLK,
3525 double DISPCLK,
3526 double DCFClkDeepSleep,
3527 double PixelClock,
3528 int HTotal,
3529 int VBlank,
3530 int DynamicMetadataTransmittedBytes,
3531 int DynamicMetadataLinesBeforeActiveRequired,
3532 int InterlaceEnable,
3533 bool ProgressiveToInterlaceUnitInOPP,
3534 double *TSetup,
3535 double *Tdmbf,
3536 double *Tdmec,
3537 double *Tdmsks,
3538 int *VUpdateOffsetPix,
3539 double *VUpdateWidthPix,
3540 double *VReadyOffsetPix)
3541{
3542 double TotalRepeaterDelayTime;
3543
3544 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3545 *VUpdateWidthPix = dml_ceil(a: (14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, granularity: 1.0);
3546 *VReadyOffsetPix = dml_ceil(a: dml_max(a: 150.0 / DPPCLK, b: TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, granularity: 1.0);
3547 *VUpdateOffsetPix = dml_ceil(a: HTotal / 4.0, granularity: 1);
3548 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3549 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3550 *Tdmec = HTotal / PixelClock;
3551 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3552 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3553 } else {
3554 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3555 }
3556 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3557 *Tdmsks = *Tdmsks / 2;
3558 }
3559#ifdef __DML_VBA_DEBUG__
3560 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3561 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3562 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3563#endif
3564}
3565
3566static void CalculateRowBandwidth(
3567 bool GPUVMEnable,
3568 enum source_format_class SourcePixelFormat,
3569 double VRatio,
3570 double VRatioChroma,
3571 bool DCCEnable,
3572 double LineTime,
3573 unsigned int MetaRowByteLuma,
3574 unsigned int MetaRowByteChroma,
3575 unsigned int meta_row_height_luma,
3576 unsigned int meta_row_height_chroma,
3577 unsigned int PixelPTEBytesPerRowLuma,
3578 unsigned int PixelPTEBytesPerRowChroma,
3579 unsigned int dpte_row_height_luma,
3580 unsigned int dpte_row_height_chroma,
3581 double *meta_row_bw,
3582 double *dpte_row_bw)
3583{
3584 if (DCCEnable != true) {
3585 *meta_row_bw = 0;
3586 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3587 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3588 } else {
3589 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3590 }
3591
3592 if (GPUVMEnable != true) {
3593 *dpte_row_bw = 0;
3594 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3595 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3596 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3597 } else {
3598 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3599 }
3600}
3601
3602static void CalculateFlipSchedule(
3603 struct display_mode_lib *mode_lib,
3604 unsigned int k,
3605 double HostVMInefficiencyFactor,
3606 double UrgentExtraLatency,
3607 double UrgentLatency,
3608 double PDEAndMetaPTEBytesPerFrame,
3609 double MetaRowBytes,
3610 double DPTEBytesPerRow)
3611{
3612 struct vba_vars_st *v = &mode_lib->vba;
3613 double min_row_time = 0.0;
3614 unsigned int HostVMDynamicLevelsTrips;
3615 double TimeForFetchingMetaPTEImmediateFlip;
3616 double TimeForFetchingRowInVBlankImmediateFlip;
3617 double ImmediateFlipBW = 1.0;
3618 double LineTime = v->HTotal[k] / v->PixelClock[k];
3619
3620 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3621 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3622 } else {
3623 HostVMDynamicLevelsTrips = 0;
3624 }
3625
3626 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3627 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3628 }
3629
3630 if (v->GPUVMEnable == true) {
3631 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3632 a: v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3633 b: UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3634 c: LineTime / 4.0);
3635 } else {
3636 TimeForFetchingMetaPTEImmediateFlip = 0;
3637 }
3638
3639 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(a: 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), granularity: 1) / 4.0;
3640 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3641 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3642 a: (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3643 b: UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3644 c: LineTime / 4);
3645 } else {
3646 TimeForFetchingRowInVBlankImmediateFlip = 0;
3647 }
3648
3649 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(a: 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), granularity: 1) / 4.0;
3650
3651 if (v->GPUVMEnable == true) {
3652 v->final_flip_bw[k] = dml_max(
3653 a: PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3654 b: (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3655 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3656 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3657 } else {
3658 v->final_flip_bw[k] = 0;
3659 }
3660
3661 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3662 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3663 min_row_time = dml_min(a: v->dpte_row_height[k] * LineTime / v->VRatio[k], b: v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3664 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3665 min_row_time = dml_min(a: v->meta_row_height[k] * LineTime / v->VRatio[k], b: v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3666 } else {
3667 min_row_time = dml_min4(
3668 a: v->dpte_row_height[k] * LineTime / v->VRatio[k],
3669 b: v->meta_row_height[k] * LineTime / v->VRatio[k],
3670 c: v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3671 d: v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3672 }
3673 } else {
3674 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3675 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3676 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3677 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3678 } else {
3679 min_row_time = dml_min(a: v->dpte_row_height[k] * LineTime / v->VRatio[k], b: v->meta_row_height[k] * LineTime / v->VRatio[k]);
3680 }
3681 }
3682
3683 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3684 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3685 v->ImmediateFlipSupportedForPipe[k] = false;
3686 } else {
3687 v->ImmediateFlipSupportedForPipe[k] = true;
3688 }
3689
3690#ifdef __DML_VBA_DEBUG__
3691 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3692 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3693 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3694 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3695 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3696 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3697#endif
3698
3699}
3700
3701static double TruncToValidBPP(
3702 double LinkBitRate,
3703 int Lanes,
3704 int HTotal,
3705 int HActive,
3706 double PixelClock,
3707 double DesiredBPP,
3708 bool DSCEnable,
3709 enum output_encoder_class Output,
3710 enum output_format_class Format,
3711 unsigned int DSCInputBitPerComponent,
3712 int DSCSlices,
3713 int AudioRate,
3714 int AudioLayout,
3715 enum odm_combine_mode ODMCombine)
3716{
3717 double MaxLinkBPP;
3718 int MinDSCBPP;
3719 double MaxDSCBPP;
3720 int NonDSCBPP0;
3721 int NonDSCBPP1;
3722 int NonDSCBPP2;
3723
3724 if (Format == dm_420) {
3725 NonDSCBPP0 = 12;
3726 NonDSCBPP1 = 15;
3727 NonDSCBPP2 = 18;
3728 MinDSCBPP = 6;
3729 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3730 } else if (Format == dm_444) {
3731 NonDSCBPP0 = 24;
3732 NonDSCBPP1 = 30;
3733 NonDSCBPP2 = 36;
3734 MinDSCBPP = 8;
3735 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3736 } else {
3737
3738 NonDSCBPP0 = 16;
3739 NonDSCBPP1 = 20;
3740 NonDSCBPP2 = 24;
3741
3742 if (Format == dm_n422) {
3743 MinDSCBPP = 7;
3744 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3745 } else {
3746 MinDSCBPP = 8;
3747 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3748 }
3749 }
3750
3751 if (DSCEnable && Output == dm_dp) {
3752 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3753 } else {
3754 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3755 }
3756
3757 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3758 MaxLinkBPP = 16;
3759 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3760 MaxLinkBPP = 32;
3761 }
3762
3763 if (DesiredBPP == 0) {
3764 if (DSCEnable) {
3765 if (MaxLinkBPP < MinDSCBPP) {
3766 return BPP_INVALID;
3767 } else if (MaxLinkBPP >= MaxDSCBPP) {
3768 return MaxDSCBPP;
3769 } else {
3770 return dml_floor(a: 16.0 * MaxLinkBPP, granularity: 1.0) / 16.0;
3771 }
3772 } else {
3773 if (MaxLinkBPP >= NonDSCBPP2) {
3774 return NonDSCBPP2;
3775 } else if (MaxLinkBPP >= NonDSCBPP1) {
3776 return NonDSCBPP1;
3777 } else if (MaxLinkBPP >= NonDSCBPP0) {
3778 return 16.0;
3779 } else {
3780 return BPP_INVALID;
3781 }
3782 }
3783 } else {
3784 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3785 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3786 return BPP_INVALID;
3787 } else {
3788 return DesiredBPP;
3789 }
3790 }
3791 return BPP_INVALID;
3792}
3793
3794static noinline void CalculatePrefetchSchedulePerPlane(
3795 struct display_mode_lib *mode_lib,
3796 double HostVMInefficiencyFactor,
3797 int i,
3798 unsigned int j,
3799 unsigned int k)
3800{
3801 struct vba_vars_st *v = &mode_lib->vba;
3802 Pipe myPipe;
3803
3804 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3805 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3806 myPipe.PixelClock = v->PixelClock[k];
3807 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3808 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3809 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3810 myPipe.VRatio = mode_lib->vba.VRatio[k];
3811 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3812
3813 myPipe.SourceScan = v->SourceScan[k];
3814 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3815 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3816 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3817 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3818 myPipe.InterlaceEnable = v->Interlace[k];
3819 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3820 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3821 myPipe.HTotal = v->HTotal[k];
3822 myPipe.DCCEnable = v->DCCEnable[k];
3823 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3824 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3825 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3826 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3827 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3828 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3829 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3830 mode_lib,
3831 HostVMInefficiencyFactor,
3832 myPipe: &myPipe,
3833 DSCDelay: v->DSCDelayPerState[i][k],
3834 DPPCLKDelaySubtotalPlusCNVCFormater: v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3835 DPPCLKDelaySCL: v->DPPCLKDelaySCL,
3836 DPPCLKDelaySCLLBOnly: v->DPPCLKDelaySCLLBOnly,
3837 DPPCLKDelayCNVCCursor: v->DPPCLKDelayCNVCCursor,
3838 DISPCLKDelaySubtotal: v->DISPCLKDelaySubtotal,
3839 DPP_RECOUT_WIDTH: v->SwathWidthYThisState[k] / v->HRatio[k],
3840 OutputFormat: v->OutputFormat[k],
3841 MaxInterDCNTileRepeaters: v->MaxInterDCNTileRepeaters,
3842 VStartup: dml_min(a: v->MaxVStartup, b: v->MaximumVStartup[i][j][k]),
3843 MaxVStartup: v->MaximumVStartup[i][j][k],
3844 GPUVMPageTableLevels: v->GPUVMMaxPageTableLevels,
3845 GPUVMEnable: v->GPUVMEnable,
3846 HostVMEnable: v->HostVMEnable,
3847 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
3848 HostVMMinPageSize: v->HostVMMinPageSize,
3849 DynamicMetadataEnable: v->DynamicMetadataEnable[k],
3850 DynamicMetadataVMEnabled: v->DynamicMetadataVMEnabled,
3851 DynamicMetadataLinesBeforeActiveRequired: v->DynamicMetadataLinesBeforeActiveRequired[k],
3852 DynamicMetadataTransmittedBytes: v->DynamicMetadataTransmittedBytes[k],
3853 UrgentLatency: v->UrgLatency[i],
3854 UrgentExtraLatency: v->ExtraLatency,
3855 TCalc: v->TimeCalc,
3856 PDEAndMetaPTEBytesFrame: v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3857 MetaRowByte: v->MetaRowBytes[i][j][k],
3858 PixelPTEBytesPerRow: v->DPTEBytesPerRow[i][j][k],
3859 PrefetchSourceLinesY: v->PrefetchLinesY[i][j][k],
3860 SwathWidthY: v->SwathWidthYThisState[k],
3861 VInitPreFillY: v->PrefillY[k],
3862 MaxNumSwathY: v->MaxNumSwY[k],
3863 PrefetchSourceLinesC: v->PrefetchLinesC[i][j][k],
3864 SwathWidthC: v->SwathWidthCThisState[k],
3865 VInitPreFillC: v->PrefillC[k],
3866 MaxNumSwathC: v->MaxNumSwC[k],
3867 swath_width_luma_ub: v->swath_width_luma_ub_this_state[k],
3868 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state[k],
3869 SwathHeightY: v->SwathHeightYThisState[k],
3870 SwathHeightC: v->SwathHeightCThisState[k],
3871 TWait: v->TWait,
3872 DSTXAfterScaler: &v->DSTXAfterScaler[k],
3873 DSTYAfterScaler: &v->DSTYAfterScaler[k],
3874 DestinationLinesForPrefetch: &v->LineTimesForPrefetch[k],
3875 PrefetchBandwidth: &v->PrefetchBW[k],
3876 DestinationLinesToRequestVMInVBlank: &v->LinesForMetaPTE[k],
3877 DestinationLinesToRequestRowInVBlank: &v->LinesForMetaAndDPTERow[k],
3878 VRatioPrefetchY: &v->VRatioPreY[i][j][k],
3879 VRatioPrefetchC: &v->VRatioPreC[i][j][k],
3880 RequiredPrefetchPixDataBWLuma: &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3881 RequiredPrefetchPixDataBWChroma: &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3882 NotEnoughTimeForDynamicMetadata: &v->NoTimeForDynamicMetadata[i][j][k],
3883 Tno_bw: &v->Tno_bw[k],
3884 prefetch_vmrow_bw: &v->prefetch_vmrow_bw[k],
3885 Tdmdl_vm: &v->dummy7[k],
3886 Tdmdl: &v->dummy8[k],
3887 TSetup: &v->dummy13[k],
3888 VUpdateOffsetPix: &v->VUpdateOffsetPix[k],
3889 VUpdateWidthPix: &v->VUpdateWidthPix[k],
3890 VReadyOffsetPix: &v->VReadyOffsetPix[k]);
3891}
3892
3893void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3894{
3895 struct vba_vars_st *v = &mode_lib->vba;
3896
3897 int i, j;
3898 unsigned int k, m;
3899 int ReorderingBytes;
3900 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3901 bool NoChroma = true;
3902 bool EnoughWritebackUnits = true;
3903 bool P2IWith420 = false;
3904 bool DSCOnlyIfNecessaryWithBPP = false;
3905 bool DSC422NativeNotSupported = false;
3906 double MaxTotalVActiveRDBandwidth;
3907 bool ViewportExceedsSurface = false;
3908 bool FMTBufferExceeded = false;
3909
3910 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3911
3912 CalculateMinAndMaxPrefetchMode(
3913 AllowDRAMSelfRefreshOrDRAMClockChangeInVblank: mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3914 MinPrefetchMode: &MinPrefetchMode, MaxPrefetchMode: &MaxPrefetchMode);
3915
3916 /*Scale Ratio, taps Support Check*/
3917
3918 v->ScaleRatioAndTapsSupport = true;
3919 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3920 if (v->ScalerEnabled[k] == false
3921 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3922 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3923 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3924 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3925 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3926 v->ScaleRatioAndTapsSupport = false;
3927 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3928 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3929 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3930 || v->VRatio[k] > v->vtaps[k]
3931 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3932 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3933 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3934 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3935 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3936 || v->HRatioChroma[k] > v->MaxHSCLRatio
3937 || v->VRatioChroma[k] > v->MaxVSCLRatio
3938 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3939 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3940 v->ScaleRatioAndTapsSupport = false;
3941 }
3942 }
3943 /*Source Format, Pixel Format and Scan Support Check*/
3944
3945 v->SourceFormatPixelAndScanSupport = true;
3946 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3947 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
3948 v->SourceFormatPixelAndScanSupport = false;
3949 }
3950 }
3951 /*Bandwidth Support Check*/
3952
3953 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3954 CalculateBytePerPixelAnd256BBlockSizes(
3955 SourcePixelFormat: v->SourcePixelFormat[k],
3956 SurfaceTiling: v->SurfaceTiling[k],
3957 BytePerPixelY: &v->BytePerPixelY[k],
3958 BytePerPixelC: &v->BytePerPixelC[k],
3959 BytePerPixelDETY: &v->BytePerPixelInDETY[k],
3960 BytePerPixelDETC: &v->BytePerPixelInDETC[k],
3961 BlockHeight256BytesY: &v->Read256BlockHeightY[k],
3962 BlockHeight256BytesC: &v->Read256BlockHeightC[k],
3963 BlockWidth256BytesY: &v->Read256BlockWidthY[k],
3964 BlockWidth256BytesC: &v->Read256BlockWidthC[k]);
3965 }
3966 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3967 if (v->SourceScan[k] != dm_vert) {
3968 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3969 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3970 } else {
3971 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3972 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3973 }
3974 }
3975 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3976 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(a: v->BytePerPixelInDETY[k], granularity: 1.0)
3977 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3978 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(a: v->BytePerPixelInDETC[k], granularity: 2.0)
3979 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3980 }
3981 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3982 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3983 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3984 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3985 } else if (v->WritebackEnable[k] == true) {
3986 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3987 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3988 } else {
3989 v->WriteBandwidth[k] = 0.0;
3990 }
3991 }
3992
3993 /*Writeback Latency support check*/
3994
3995 v->WritebackLatencySupport = true;
3996 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3997 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3998 v->WritebackLatencySupport = false;
3999 }
4000 }
4001
4002 /*Writeback Mode Support Check*/
4003
4004 v->TotalNumberOfActiveWriteback = 0;
4005 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4006 if (v->WritebackEnable[k] == true) {
4007 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4008 }
4009 }
4010
4011 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4012 EnoughWritebackUnits = false;
4013 }
4014
4015 /*Writeback Scale Ratio and Taps Support Check*/
4016
4017 v->WritebackScaleRatioAndTapsSupport = true;
4018 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4019 if (v->WritebackEnable[k] == true) {
4020 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4021 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4022 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4023 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4024 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4025 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4026 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4027 v->WritebackScaleRatioAndTapsSupport = false;
4028 }
4029 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4030 v->WritebackScaleRatioAndTapsSupport = false;
4031 }
4032 }
4033 }
4034 /*Maximum DISPCLK/DPPCLK Support check*/
4035
4036 v->WritebackRequiredDISPCLK = 0.0;
4037 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4038 if (v->WritebackEnable[k] == true) {
4039 v->WritebackRequiredDISPCLK = dml_max(
4040 a: v->WritebackRequiredDISPCLK,
4041 b: dml314_CalculateWriteBackDISPCLK(
4042 WritebackPixelFormat: v->WritebackPixelFormat[k],
4043 PixelClock: v->PixelClock[k],
4044 WritebackHRatio: v->WritebackHRatio[k],
4045 WritebackVRatio: v->WritebackVRatio[k],
4046 WritebackHTaps: v->WritebackHTaps[k],
4047 WritebackVTaps: v->WritebackVTaps[k],
4048 WritebackSourceWidth: v->WritebackSourceWidth[k],
4049 WritebackDestinationWidth: v->WritebackDestinationWidth[k],
4050 HTotal: v->HTotal[k],
4051 WritebackLineBufferSize: v->WritebackLineBufferSize));
4052 }
4053 }
4054 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4055 if (v->HRatio[k] > 1.0) {
4056 v->PSCL_FACTOR[k] = dml_min(
4057 a: v->MaxDCHUBToPSCLThroughput,
4058 b: v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(a: v->htaps[k] / 6.0, granularity: 1.0));
4059 } else {
4060 v->PSCL_FACTOR[k] = dml_min(a: v->MaxDCHUBToPSCLThroughput, b: v->MaxPSCLToLBThroughput);
4061 }
4062 if (v->BytePerPixelC[k] == 0.0) {
4063 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4064 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4065 * dml_max3(
4066 a: v->vtaps[k] / 6.0 * dml_min(a: 1.0, b: v->HRatio[k]),
4067 b: v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4068 c: 1.0);
4069 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4070 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4071 }
4072 } else {
4073 if (v->HRatioChroma[k] > 1.0) {
4074 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4075 a: v->MaxDCHUBToPSCLThroughput,
4076 b: v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(a: v->HTAPsChroma[k] / 6.0, granularity: 1.0));
4077 } else {
4078 v->PSCL_FACTOR_CHROMA[k] = dml_min(a: v->MaxDCHUBToPSCLThroughput, b: v->MaxPSCLToLBThroughput);
4079 }
4080 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4081 * dml_max5(
4082 a: v->vtaps[k] / 6.0 * dml_min(a: 1.0, b: v->HRatio[k]),
4083 b: v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4084 c: v->VTAPsChroma[k] / 6.0 * dml_min(a: 1.0, b: v->HRatioChroma[k]),
4085 d: v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4086 e: 1.0);
4087 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4088 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4089 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4090 }
4091 }
4092 }
4093 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4094 int MaximumSwathWidthSupportLuma;
4095 int MaximumSwathWidthSupportChroma;
4096
4097 if (v->SurfaceTiling[k] == dm_sw_linear) {
4098 MaximumSwathWidthSupportLuma = 8192.0;
4099 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4100 MaximumSwathWidthSupportLuma = 2880.0;
4101 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4102 MaximumSwathWidthSupportLuma = 3840.0;
4103 } else {
4104 MaximumSwathWidthSupportLuma = 5760.0;
4105 }
4106
4107 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4108 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4109 } else {
4110 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4111 }
4112 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(a: v->HRatio[k], b: 1.0) / v->LBBitPerPixel[k]
4113 / (v->vtaps[k] + dml_max(a: dml_ceil(a: v->VRatio[k], granularity: 1.0) - 2, b: 0.0));
4114 if (v->BytePerPixelC[k] == 0.0) {
4115 v->MaximumSwathWidthInLineBufferChroma = 0;
4116 } else {
4117 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(a: v->HRatioChroma[k], b: 1.0) / v->LBBitPerPixel[k]
4118 / (v->VTAPsChroma[k] + dml_max(a: dml_ceil(a: v->VRatioChroma[k], granularity: 1.0) - 2, b: 0.0));
4119 }
4120 v->MaximumSwathWidthLuma[k] = dml_min(a: MaximumSwathWidthSupportLuma, b: v->MaximumSwathWidthInLineBufferLuma);
4121 v->MaximumSwathWidthChroma[k] = dml_min(a: MaximumSwathWidthSupportChroma, b: v->MaximumSwathWidthInLineBufferChroma);
4122 }
4123
4124 CalculateSwathAndDETConfiguration(
4125 ForceSingleDPP: true,
4126 NumberOfActivePlanes: v->NumberOfActivePlanes,
4127 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
4128 MaximumSwathWidthLuma: v->MaximumSwathWidthLuma,
4129 MaximumSwathWidthChroma: v->MaximumSwathWidthChroma,
4130 SourceScan: v->SourceScan,
4131 SourcePixelFormat: v->SourcePixelFormat,
4132 SurfaceTiling: v->SurfaceTiling,
4133 ViewportWidth: v->ViewportWidth,
4134 ViewportHeight: v->ViewportHeight,
4135 SurfaceWidthY: v->SurfaceWidthY,
4136 SurfaceWidthC: v->SurfaceWidthC,
4137 SurfaceHeightY: v->SurfaceHeightY,
4138 SurfaceHeightC: v->SurfaceHeightC,
4139 Read256BytesBlockHeightY: v->Read256BlockHeightY,
4140 Read256BytesBlockHeightC: v->Read256BlockHeightC,
4141 Read256BytesBlockWidthY: v->Read256BlockWidthY,
4142 Read256BytesBlockWidthC: v->Read256BlockWidthC,
4143 ODMCombineEnabled: v->odm_combine_dummy,
4144 BlendingAndTiming: v->BlendingAndTiming,
4145 BytePerPixY: v->BytePerPixelY,
4146 BytePerPixC: v->BytePerPixelC,
4147 BytePerPixDETY: v->BytePerPixelInDETY,
4148 BytePerPixDETC: v->BytePerPixelInDETC,
4149 HActive: v->HActive,
4150 HRatio: v->HRatio,
4151 HRatioChroma: v->HRatioChroma,
4152 DPPPerPlane: v->NoOfDPPThisState,
4153 swath_width_luma_ub: v->swath_width_luma_ub_this_state,
4154 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state,
4155 SwathWidth: v->SwathWidthYThisState,
4156 SwathWidthChroma: v->SwathWidthCThisState,
4157 SwathHeightY: v->SwathHeightYThisState,
4158 SwathHeightC: v->SwathHeightCThisState,
4159 DETBufferSizeY: v->DETBufferSizeYThisState,
4160 DETBufferSizeC: v->DETBufferSizeCThisState,
4161 ViewportSizeSupportPerPlane: v->SingleDPPViewportSizeSupportPerPlane,
4162 ViewportSizeSupport: &v->ViewportSizeSupport[0][0]);
4163
4164 for (i = 0; i < v->soc.num_states; i++) {
4165 for (j = 0; j < 2; j++) {
4166 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(Clock: v->MaxDispclk[i], VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
4167 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(Clock: v->MaxDppclk[i], VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
4168 v->RequiredDISPCLK[i][j] = 0.0;
4169 v->DISPCLK_DPPCLK_Support[i][j] = true;
4170 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4171 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4172 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4173 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4174 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4175 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4176 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4177 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4178 }
4179 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4180 * (1 + v->DISPCLKRampingMargin / 100.0);
4181 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4182 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4183 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4184 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4185 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4186 }
4187 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4188 * (1 + v->DISPCLKRampingMargin / 100.0);
4189 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4190 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4191 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4192 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4193 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4194 }
4195
4196 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4197 || !(v->Output[k] == dm_dp ||
4198 v->Output[k] == dm_dp2p0 ||
4199 v->Output[k] == dm_edp)) {
4200 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4201 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4202
4203 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4204 FMTBufferExceeded = true;
4205 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4206 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4207 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4208 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4209 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4210 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4211 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4212 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4213 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4214 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4215 } else {
4216 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4217 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4218 }
4219 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
4220 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4221 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
4222 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4223 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4224 } else {
4225 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4226 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4227 }
4228 }
4229 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
4230 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4231 if (v->Output[k] == dm_hdmi) {
4232 FMTBufferExceeded = true;
4233 } else if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
4234 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4235 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4236
4237 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4238 FMTBufferExceeded = true;
4239 } else {
4240 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4241 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4242 }
4243 }
4244 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4245 v->MPCCombine[i][j][k] = false;
4246 v->NoOfDPP[i][j][k] = 4;
4247 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4248 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4249 v->MPCCombine[i][j][k] = false;
4250 v->NoOfDPP[i][j][k] = 2;
4251 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4252 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4253 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4254 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4255 v->MPCCombine[i][j][k] = false;
4256 v->NoOfDPP[i][j][k] = 1;
4257 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4258 } else {
4259 v->MPCCombine[i][j][k] = true;
4260 v->NoOfDPP[i][j][k] = 2;
4261 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4262 }
4263 v->RequiredDISPCLK[i][j] = dml_max(a: v->RequiredDISPCLK[i][j], b: v->PlaneRequiredDISPCLK);
4264 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4265 > v->MaxDppclkRoundedDownToDFSGranularity)
4266 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4267 v->DISPCLK_DPPCLK_Support[i][j] = false;
4268 }
4269 }
4270 v->TotalNumberOfActiveDPP[i][j] = 0;
4271 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4272 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4273 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4274 if (v->NoOfDPP[i][j][k] == 1)
4275 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4276 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4277 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4278 NoChroma = false;
4279 }
4280
4281 // UPTO
4282 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4283 && !UnboundedRequest(UseUnboundedRequestingFinal: v->UseUnboundedRequesting, TotalNumberOfActiveDPP: v->TotalNumberOfActiveDPP[i][j], NoChroma, Output: v->Output[0])) {
4284 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4285 double BWOfNonSplitPlaneOfMaximumBandwidth;
4286 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4287
4288 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4289 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4290 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4291 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4292 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4293 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4294 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4295 }
4296 }
4297 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4298 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4299 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4300 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4301 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4302 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4303 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4304 }
4305 }
4306 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4307 v->RequiredDISPCLK[i][j] = 0.0;
4308 v->DISPCLK_DPPCLK_Support[i][j] = true;
4309 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4310 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4311 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4312 v->MPCCombine[i][j][k] = true;
4313 v->NoOfDPP[i][j][k] = 2;
4314 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4315 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4316 } else {
4317 v->MPCCombine[i][j][k] = false;
4318 v->NoOfDPP[i][j][k] = 1;
4319 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4320 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4321 }
4322 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4323 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4324 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4325 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4326 } else {
4327 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4328 }
4329 v->RequiredDISPCLK[i][j] = dml_max(a: v->RequiredDISPCLK[i][j], b: v->PlaneRequiredDISPCLK);
4330 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4331 > v->MaxDppclkRoundedDownToDFSGranularity)
4332 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4333 v->DISPCLK_DPPCLK_Support[i][j] = false;
4334 }
4335 }
4336 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4337 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4338 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4339 }
4340 }
4341 v->RequiredDISPCLK[i][j] = dml_max(a: v->RequiredDISPCLK[i][j], b: v->WritebackRequiredDISPCLK);
4342 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4343 v->DISPCLK_DPPCLK_Support[i][j] = false;
4344 }
4345 }
4346 }
4347
4348 /*Total Available Pipes Support Check*/
4349
4350 for (i = 0; i < v->soc.num_states; i++) {
4351 for (j = 0; j < 2; j++) {
4352 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4353 v->TotalAvailablePipesSupport[i][j] = true;
4354 } else {
4355 v->TotalAvailablePipesSupport[i][j] = false;
4356 }
4357 }
4358 }
4359 /*Display IO and DSC Support Check*/
4360
4361 v->NonsupportedDSCInputBPC = false;
4362 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4363 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4364 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4365 v->NonsupportedDSCInputBPC = true;
4366 }
4367 }
4368
4369 /*Number Of DSC Slices*/
4370 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4371 if (v->BlendingAndTiming[k] == k) {
4372 if (v->PixelClockBackEnd[k] > 3200) {
4373 v->NumberOfDSCSlices[k] = dml_ceil(a: v->PixelClockBackEnd[k] / 400.0, granularity: 4.0);
4374 } else if (v->PixelClockBackEnd[k] > 1360) {
4375 v->NumberOfDSCSlices[k] = 8;
4376 } else if (v->PixelClockBackEnd[k] > 680) {
4377 v->NumberOfDSCSlices[k] = 4;
4378 } else if (v->PixelClockBackEnd[k] > 340) {
4379 v->NumberOfDSCSlices[k] = 2;
4380 } else {
4381 v->NumberOfDSCSlices[k] = 1;
4382 }
4383 } else {
4384 v->NumberOfDSCSlices[k] = 0;
4385 }
4386 }
4387
4388 for (i = 0; i < v->soc.num_states; i++) {
4389 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4390 v->RequiresDSC[i][k] = false;
4391 v->RequiresFEC[i][k] = false;
4392 if (v->BlendingAndTiming[k] == k) {
4393 if (v->Output[k] == dm_hdmi) {
4394 v->RequiresDSC[i][k] = false;
4395 v->RequiresFEC[i][k] = false;
4396 v->OutputBppPerState[i][k] = TruncToValidBPP(
4397 LinkBitRate: dml_min(a: 600.0, b: v->PHYCLKPerState[i]) * 10,
4398 Lanes: 3,
4399 HTotal: v->HTotal[k],
4400 HActive: v->HActive[k],
4401 PixelClock: v->PixelClockBackEnd[k],
4402 DesiredBPP: v->ForcedOutputLinkBPP[k],
4403 DSCEnable: false,
4404 Output: v->Output[k],
4405 Format: v->OutputFormat[k],
4406 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4407 DSCSlices: v->NumberOfDSCSlices[k],
4408 AudioRate: v->AudioSampleRate[k],
4409 AudioLayout: v->AudioSampleLayout[k],
4410 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4411 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4412 if (v->DSCEnable[k] == true) {
4413 v->RequiresDSC[i][k] = true;
4414 v->LinkDSCEnable = true;
4415 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4416 v->RequiresFEC[i][k] = true;
4417 } else {
4418 v->RequiresFEC[i][k] = false;
4419 }
4420 } else {
4421 v->RequiresDSC[i][k] = false;
4422 v->LinkDSCEnable = false;
4423 if (v->Output[k] == dm_dp2p0) {
4424 v->RequiresFEC[i][k] = true;
4425 } else {
4426 v->RequiresFEC[i][k] = false;
4427 }
4428 }
4429 if (v->Output[k] == dm_dp2p0) {
4430 v->Outbpp = BPP_INVALID;
4431 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4432 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4433 v->Outbpp = TruncToValidBPP(
4434 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 10000,
4435 Lanes: v->OutputLinkDPLanes[k],
4436 HTotal: v->HTotal[k],
4437 HActive: v->HActive[k],
4438 PixelClock: v->PixelClockBackEnd[k],
4439 DesiredBPP: v->ForcedOutputLinkBPP[k],
4440 DSCEnable: v->LinkDSCEnable,
4441 Output: v->Output[k],
4442 Format: v->OutputFormat[k],
4443 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4444 DSCSlices: v->NumberOfDSCSlices[k],
4445 AudioRate: v->AudioSampleRate[k],
4446 AudioLayout: v->AudioSampleLayout[k],
4447 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4448 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4449 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4450 v->RequiresDSC[i][k] = true;
4451 v->LinkDSCEnable = true;
4452 v->Outbpp = TruncToValidBPP(
4453 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 10000,
4454 Lanes: v->OutputLinkDPLanes[k],
4455 HTotal: v->HTotal[k],
4456 HActive: v->HActive[k],
4457 PixelClock: v->PixelClockBackEnd[k],
4458 DesiredBPP: v->ForcedOutputLinkBPP[k],
4459 DSCEnable: v->LinkDSCEnable,
4460 Output: v->Output[k],
4461 Format: v->OutputFormat[k],
4462 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4463 DSCSlices: v->NumberOfDSCSlices[k],
4464 AudioRate: v->AudioSampleRate[k],
4465 AudioLayout: v->AudioSampleLayout[k],
4466 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4467 }
4468 v->OutputBppPerState[i][k] = v->Outbpp;
4469 // TODO: Need some other way to handle this nonsense
4470 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4471 }
4472 if (v->Outbpp == BPP_INVALID &&
4473 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4474 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4475 v->Outbpp = TruncToValidBPP(
4476 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 13500,
4477 Lanes: v->OutputLinkDPLanes[k],
4478 HTotal: v->HTotal[k],
4479 HActive: v->HActive[k],
4480 PixelClock: v->PixelClockBackEnd[k],
4481 DesiredBPP: v->ForcedOutputLinkBPP[k],
4482 DSCEnable: v->LinkDSCEnable,
4483 Output: v->Output[k],
4484 Format: v->OutputFormat[k],
4485 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4486 DSCSlices: v->NumberOfDSCSlices[k],
4487 AudioRate: v->AudioSampleRate[k],
4488 AudioLayout: v->AudioSampleLayout[k],
4489 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4490 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4491 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4492 v->RequiresDSC[i][k] = true;
4493 v->LinkDSCEnable = true;
4494 v->Outbpp = TruncToValidBPP(
4495 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 13500,
4496 Lanes: v->OutputLinkDPLanes[k],
4497 HTotal: v->HTotal[k],
4498 HActive: v->HActive[k],
4499 PixelClock: v->PixelClockBackEnd[k],
4500 DesiredBPP: v->ForcedOutputLinkBPP[k],
4501 DSCEnable: v->LinkDSCEnable,
4502 Output: v->Output[k],
4503 Format: v->OutputFormat[k],
4504 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4505 DSCSlices: v->NumberOfDSCSlices[k],
4506 AudioRate: v->AudioSampleRate[k],
4507 AudioLayout: v->AudioSampleLayout[k],
4508 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4509 }
4510 v->OutputBppPerState[i][k] = v->Outbpp;
4511 // TODO: Need some other way to handle this nonsense
4512 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4513 }
4514 if (v->Outbpp == BPP_INVALID &&
4515 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4516 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4517 v->Outbpp = TruncToValidBPP(
4518 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 20000,
4519 Lanes: v->OutputLinkDPLanes[k],
4520 HTotal: v->HTotal[k],
4521 HActive: v->HActive[k],
4522 PixelClock: v->PixelClockBackEnd[k],
4523 DesiredBPP: v->ForcedOutputLinkBPP[k],
4524 DSCEnable: v->LinkDSCEnable,
4525 Output: v->Output[k],
4526 Format: v->OutputFormat[k],
4527 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4528 DSCSlices: v->NumberOfDSCSlices[k],
4529 AudioRate: v->AudioSampleRate[k],
4530 AudioLayout: v->AudioSampleLayout[k],
4531 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4532 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4533 v->ForcedOutputLinkBPP[k] == 0) {
4534 v->RequiresDSC[i][k] = true;
4535 v->LinkDSCEnable = true;
4536 v->Outbpp = TruncToValidBPP(
4537 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 20000,
4538 Lanes: v->OutputLinkDPLanes[k],
4539 HTotal: v->HTotal[k],
4540 HActive: v->HActive[k],
4541 PixelClock: v->PixelClockBackEnd[k],
4542 DesiredBPP: v->ForcedOutputLinkBPP[k],
4543 DSCEnable: v->LinkDSCEnable,
4544 Output: v->Output[k],
4545 Format: v->OutputFormat[k],
4546 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4547 DSCSlices: v->NumberOfDSCSlices[k],
4548 AudioRate: v->AudioSampleRate[k],
4549 AudioLayout: v->AudioSampleLayout[k],
4550 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4551 }
4552 v->OutputBppPerState[i][k] = v->Outbpp;
4553 // TODO: Need some other way to handle this nonsense
4554 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4555 }
4556 } else {
4557 v->Outbpp = BPP_INVALID;
4558 if (v->PHYCLKPerState[i] >= 270.0) {
4559 v->Outbpp = TruncToValidBPP(
4560 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 2700,
4561 Lanes: v->OutputLinkDPLanes[k],
4562 HTotal: v->HTotal[k],
4563 HActive: v->HActive[k],
4564 PixelClock: v->PixelClockBackEnd[k],
4565 DesiredBPP: v->ForcedOutputLinkBPP[k],
4566 DSCEnable: v->LinkDSCEnable,
4567 Output: v->Output[k],
4568 Format: v->OutputFormat[k],
4569 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4570 DSCSlices: v->NumberOfDSCSlices[k],
4571 AudioRate: v->AudioSampleRate[k],
4572 AudioLayout: v->AudioSampleLayout[k],
4573 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4574 v->OutputBppPerState[i][k] = v->Outbpp;
4575 // TODO: Need some other way to handle this nonsense
4576 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4577 }
4578 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4579 v->Outbpp = TruncToValidBPP(
4580 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 5400,
4581 Lanes: v->OutputLinkDPLanes[k],
4582 HTotal: v->HTotal[k],
4583 HActive: v->HActive[k],
4584 PixelClock: v->PixelClockBackEnd[k],
4585 DesiredBPP: v->ForcedOutputLinkBPP[k],
4586 DSCEnable: v->LinkDSCEnable,
4587 Output: v->Output[k],
4588 Format: v->OutputFormat[k],
4589 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4590 DSCSlices: v->NumberOfDSCSlices[k],
4591 AudioRate: v->AudioSampleRate[k],
4592 AudioLayout: v->AudioSampleLayout[k],
4593 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4594 v->OutputBppPerState[i][k] = v->Outbpp;
4595 // TODO: Need some other way to handle this nonsense
4596 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4597 }
4598 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4599 v->Outbpp = TruncToValidBPP(
4600 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 8100,
4601 Lanes: v->OutputLinkDPLanes[k],
4602 HTotal: v->HTotal[k],
4603 HActive: v->HActive[k],
4604 PixelClock: v->PixelClockBackEnd[k],
4605 DesiredBPP: v->ForcedOutputLinkBPP[k],
4606 DSCEnable: v->LinkDSCEnable,
4607 Output: v->Output[k],
4608 Format: v->OutputFormat[k],
4609 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4610 DSCSlices: v->NumberOfDSCSlices[k],
4611 AudioRate: v->AudioSampleRate[k],
4612 AudioLayout: v->AudioSampleLayout[k],
4613 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4614 v->OutputBppPerState[i][k] = v->Outbpp;
4615 // TODO: Need some other way to handle this nonsense
4616 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4617 }
4618 }
4619 }
4620 } else {
4621 v->OutputBppPerState[i][k] = 0;
4622 }
4623 }
4624 }
4625
4626 for (i = 0; i < v->soc.num_states; i++) {
4627 v->LinkCapacitySupport[i] = true;
4628 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4629 if (v->BlendingAndTiming[k] == k
4630 && (v->Output[k] == dm_dp ||
4631 v->Output[k] == dm_edp ||
4632 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4633 v->LinkCapacitySupport[i] = false;
4634 }
4635 }
4636 }
4637
4638 // UPTO 2172
4639 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4640 if (v->BlendingAndTiming[k] == k
4641 && (v->Output[k] == dm_dp ||
4642 v->Output[k] == dm_edp ||
4643 v->Output[k] == dm_hdmi)) {
4644 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4645 P2IWith420 = true;
4646 }
4647 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4648 && !v->DSC422NativeSupport) {
4649 DSC422NativeNotSupported = true;
4650 }
4651 }
4652 }
4653
4654
4655 for (i = 0; i < v->soc.num_states; ++i) {
4656 v->ODMCombine4To1SupportCheckOK[i] = true;
4657 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4658 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4659 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4660 || v->Output[k] == dm_hdmi)) {
4661 v->ODMCombine4To1SupportCheckOK[i] = false;
4662 }
4663 }
4664 }
4665
4666 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4667
4668 for (i = 0; i < v->soc.num_states; i++) {
4669 v->NotEnoughDSCUnits[i] = false;
4670 v->TotalDSCUnitsRequired = 0.0;
4671 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4672 if (v->RequiresDSC[i][k] == true) {
4673 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4674 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4675 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4676 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4677 } else {
4678 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4679 }
4680 }
4681 }
4682 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4683 v->NotEnoughDSCUnits[i] = true;
4684 }
4685 }
4686 /*DSC Delay per state*/
4687
4688 for (i = 0; i < v->soc.num_states; i++) {
4689 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4690 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4691 v->BPP = 0.0;
4692 } else {
4693 v->BPP = v->OutputBppPerState[i][k];
4694 }
4695 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4696 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4697 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4698 bpc: v->DSCInputBitPerComponent[k],
4699 BPP: v->BPP,
4700 sliceWidth: dml_ceil(a: 1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1.0),
4701 numSlices: v->NumberOfDSCSlices[k],
4702 pixelFormat: v->OutputFormat[k],
4703 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]);
4704 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4705 v->DSCDelayPerState[i][k] = 2.0
4706 * (dscceComputeDelay(
4707 bpc: v->DSCInputBitPerComponent[k],
4708 BPP: v->BPP,
4709 sliceWidth: dml_ceil(a: 1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1.0),
4710 numSlices: v->NumberOfDSCSlices[k] / 2,
4711 pixelFormat: v->OutputFormat[k],
4712 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]));
4713 } else {
4714 v->DSCDelayPerState[i][k] = 4.0
4715 * (dscceComputeDelay(
4716 bpc: v->DSCInputBitPerComponent[k],
4717 BPP: v->BPP,
4718 sliceWidth: dml_ceil(a: 1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1.0),
4719 numSlices: v->NumberOfDSCSlices[k] / 4,
4720 pixelFormat: v->OutputFormat[k],
4721 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]));
4722 }
4723 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil(a: (double) v->DSCDelayPerState[i][k] / v->HActive[k], granularity: 1.0);
4724 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4725 } else {
4726 v->DSCDelayPerState[i][k] = 0.0;
4727 }
4728 }
4729 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4730 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4731 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4732 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4733 }
4734 }
4735 }
4736 }
4737
4738 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4739 //
4740 for (i = 0; i < v->soc.num_states; ++i) {
4741 for (j = 0; j <= 1; ++j) {
4742 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4743 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4744 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4745 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4746 }
4747
4748 CalculateSwathAndDETConfiguration(
4749 ForceSingleDPP: false,
4750 NumberOfActivePlanes: v->NumberOfActivePlanes,
4751 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
4752 MaximumSwathWidthLuma: v->MaximumSwathWidthLuma,
4753 MaximumSwathWidthChroma: v->MaximumSwathWidthChroma,
4754 SourceScan: v->SourceScan,
4755 SourcePixelFormat: v->SourcePixelFormat,
4756 SurfaceTiling: v->SurfaceTiling,
4757 ViewportWidth: v->ViewportWidth,
4758 ViewportHeight: v->ViewportHeight,
4759 SurfaceWidthY: v->SurfaceWidthY,
4760 SurfaceWidthC: v->SurfaceWidthC,
4761 SurfaceHeightY: v->SurfaceHeightY,
4762 SurfaceHeightC: v->SurfaceHeightC,
4763 Read256BytesBlockHeightY: v->Read256BlockHeightY,
4764 Read256BytesBlockHeightC: v->Read256BlockHeightC,
4765 Read256BytesBlockWidthY: v->Read256BlockWidthY,
4766 Read256BytesBlockWidthC: v->Read256BlockWidthC,
4767 ODMCombineEnabled: v->ODMCombineEnableThisState,
4768 BlendingAndTiming: v->BlendingAndTiming,
4769 BytePerPixY: v->BytePerPixelY,
4770 BytePerPixC: v->BytePerPixelC,
4771 BytePerPixDETY: v->BytePerPixelInDETY,
4772 BytePerPixDETC: v->BytePerPixelInDETC,
4773 HActive: v->HActive,
4774 HRatio: v->HRatio,
4775 HRatioChroma: v->HRatioChroma,
4776 DPPPerPlane: v->NoOfDPPThisState,
4777 swath_width_luma_ub: v->swath_width_luma_ub_this_state,
4778 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state,
4779 SwathWidth: v->SwathWidthYThisState,
4780 SwathWidthChroma: v->SwathWidthCThisState,
4781 SwathHeightY: v->SwathHeightYThisState,
4782 SwathHeightC: v->SwathHeightCThisState,
4783 DETBufferSizeY: v->DETBufferSizeYThisState,
4784 DETBufferSizeC: v->DETBufferSizeCThisState,
4785 ViewportSizeSupportPerPlane: v->dummystring,
4786 ViewportSizeSupport: &v->ViewportSizeSupport[i][j]);
4787
4788 CalculateDCFCLKDeepSleep(
4789 mode_lib,
4790 NumberOfActivePlanes: v->NumberOfActivePlanes,
4791 BytePerPixelY: v->BytePerPixelY,
4792 BytePerPixelC: v->BytePerPixelC,
4793 VRatio: v->VRatio,
4794 VRatioChroma: v->VRatioChroma,
4795 SwathWidthY: v->SwathWidthYThisState,
4796 SwathWidthC: v->SwathWidthCThisState,
4797 DPPPerPlane: v->NoOfDPPThisState,
4798 HRatio: v->HRatio,
4799 HRatioChroma: v->HRatioChroma,
4800 PixelClock: v->PixelClock,
4801 PSCL_THROUGHPUT: v->PSCL_FACTOR,
4802 PSCL_THROUGHPUT_CHROMA: v->PSCL_FACTOR_CHROMA,
4803 DPPCLK: v->RequiredDPPCLKThisState,
4804 ReadBandwidthLuma: v->ReadBandwidthLuma,
4805 ReadBandwidthChroma: v->ReadBandwidthChroma,
4806 ReturnBusWidth: v->ReturnBusWidth,
4807 DCFCLKDeepSleep: &v->ProjectedDCFCLKDeepSleep[i][j]);
4808
4809 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4810 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4811 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4812 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4813 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4814 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4815 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4816 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4817 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4818 }
4819 }
4820 }
4821
4822 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4823 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4824 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4825 }
4826
4827 for (i = 0; i < v->soc.num_states; i++) {
4828 for (j = 0; j < 2; j++) {
4829 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4830
4831 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4832 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4833 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4834 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4835 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4836 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4837 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4838 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4839 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4840 }
4841
4842 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4843 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4844 if (v->DCCEnable[k] == true) {
4845 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4846 }
4847 }
4848
4849 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4850 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4851 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4852
4853 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4854 && v->SourceScan[k] != dm_vert) {
4855 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4856 / 2;
4857 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4858 } else {
4859 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4860 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4861 }
4862
4863 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4864 mode_lib,
4865 DCCEnable: v->DCCEnable[k],
4866 BlockHeight256Bytes: v->Read256BlockHeightC[k],
4867 BlockWidth256Bytes: v->Read256BlockWidthC[k],
4868 SourcePixelFormat: v->SourcePixelFormat[k],
4869 SurfaceTiling: v->SurfaceTiling[k],
4870 BytePerPixel: v->BytePerPixelC[k],
4871 ScanDirection: v->SourceScan[k],
4872 SwathWidth: v->SwathWidthCThisState[k],
4873 ViewportHeight: v->ViewportHeightChroma[k],
4874 GPUVMEnable: v->GPUVMEnable,
4875 HostVMEnable: v->HostVMEnable,
4876 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
4877 GPUVMMinPageSize: v->GPUVMMinPageSize,
4878 HostVMMinPageSize: v->HostVMMinPageSize,
4879 PTEBufferSizeInRequests: v->PTEBufferSizeInRequestsForChroma,
4880 Pitch: v->PitchC[k],
4881 DCCMetaPitch: 0.0,
4882 MacroTileWidth: &v->MacroTileWidthC[k],
4883 MetaRowByte: &v->MetaRowBytesC,
4884 PixelPTEBytesPerRow: &v->DPTEBytesPerRowC,
4885 PTEBufferSizeNotExceeded: &v->PTEBufferSizeNotExceededC[i][j][k],
4886 dpte_row_width_ub: &v->dummyinteger7,
4887 dpte_row_height: &v->dpte_row_height_chroma[k],
4888 MetaRequestWidth: &v->dummyinteger28,
4889 MetaRequestHeight: &v->dummyinteger26,
4890 meta_row_width: &v->dummyinteger23,
4891 meta_row_height: &v->meta_row_height_chroma[k],
4892 vm_group_bytes: &v->dummyinteger8,
4893 dpte_group_bytes: &v->dummyinteger9,
4894 PixelPTEReqWidth: &v->dummyinteger19,
4895 PixelPTEReqHeight: &v->dummyinteger20,
4896 PTERequestSize: &v->dummyinteger17,
4897 DPDE0BytesFrame: &v->dummyinteger10,
4898 MetaPTEBytesFrame: &v->dummyinteger11);
4899
4900 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4901 mode_lib,
4902 VRatio: v->VRatioChroma[k],
4903 vtaps: v->VTAPsChroma[k],
4904 Interlace: v->Interlace[k],
4905 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
4906 SwathHeight: v->SwathHeightCThisState[k],
4907 ViewportYStart: v->ViewportYStartC[k],
4908 VInitPreFill: &v->PrefillC[k],
4909 MaxNumSwath: &v->MaxNumSwC[k]);
4910 } else {
4911 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4912 v->PTEBufferSizeInRequestsForChroma = 0;
4913 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4914 v->MetaRowBytesC = 0.0;
4915 v->DPTEBytesPerRowC = 0.0;
4916 v->PrefetchLinesC[i][j][k] = 0.0;
4917 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4918 }
4919 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4920 mode_lib,
4921 DCCEnable: v->DCCEnable[k],
4922 BlockHeight256Bytes: v->Read256BlockHeightY[k],
4923 BlockWidth256Bytes: v->Read256BlockWidthY[k],
4924 SourcePixelFormat: v->SourcePixelFormat[k],
4925 SurfaceTiling: v->SurfaceTiling[k],
4926 BytePerPixel: v->BytePerPixelY[k],
4927 ScanDirection: v->SourceScan[k],
4928 SwathWidth: v->SwathWidthYThisState[k],
4929 ViewportHeight: v->ViewportHeight[k],
4930 GPUVMEnable: v->GPUVMEnable,
4931 HostVMEnable: v->HostVMEnable,
4932 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
4933 GPUVMMinPageSize: v->GPUVMMinPageSize,
4934 HostVMMinPageSize: v->HostVMMinPageSize,
4935 PTEBufferSizeInRequests: v->PTEBufferSizeInRequestsForLuma,
4936 Pitch: v->PitchY[k],
4937 DCCMetaPitch: v->DCCMetaPitchY[k],
4938 MacroTileWidth: &v->MacroTileWidthY[k],
4939 MetaRowByte: &v->MetaRowBytesY,
4940 PixelPTEBytesPerRow: &v->DPTEBytesPerRowY,
4941 PTEBufferSizeNotExceeded: &v->PTEBufferSizeNotExceededY[i][j][k],
4942 dpte_row_width_ub: &v->dummyinteger7,
4943 dpte_row_height: &v->dpte_row_height[k],
4944 MetaRequestWidth: &v->dummyinteger29,
4945 MetaRequestHeight: &v->dummyinteger27,
4946 meta_row_width: &v->dummyinteger24,
4947 meta_row_height: &v->meta_row_height[k],
4948 vm_group_bytes: &v->dummyinteger25,
4949 dpte_group_bytes: &v->dpte_group_bytes[k],
4950 PixelPTEReqWidth: &v->dummyinteger21,
4951 PixelPTEReqHeight: &v->dummyinteger22,
4952 PTERequestSize: &v->dummyinteger18,
4953 DPDE0BytesFrame: &v->dummyinteger5,
4954 MetaPTEBytesFrame: &v->dummyinteger6);
4955 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4956 mode_lib,
4957 VRatio: v->VRatio[k],
4958 vtaps: v->vtaps[k],
4959 Interlace: v->Interlace[k],
4960 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
4961 SwathHeight: v->SwathHeightYThisState[k],
4962 ViewportYStart: v->ViewportYStartY[k],
4963 VInitPreFill: &v->PrefillY[k],
4964 MaxNumSwath: &v->MaxNumSwY[k]);
4965 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4966 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4967 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4968
4969 CalculateRowBandwidth(
4970 GPUVMEnable: v->GPUVMEnable,
4971 SourcePixelFormat: v->SourcePixelFormat[k],
4972 VRatio: v->VRatio[k],
4973 VRatioChroma: v->VRatioChroma[k],
4974 DCCEnable: v->DCCEnable[k],
4975 LineTime: v->HTotal[k] / v->PixelClock[k],
4976 MetaRowByteLuma: v->MetaRowBytesY,
4977 MetaRowByteChroma: v->MetaRowBytesC,
4978 meta_row_height_luma: v->meta_row_height[k],
4979 meta_row_height_chroma: v->meta_row_height_chroma[k],
4980 PixelPTEBytesPerRowLuma: v->DPTEBytesPerRowY,
4981 PixelPTEBytesPerRowChroma: v->DPTEBytesPerRowC,
4982 dpte_row_height_luma: v->dpte_row_height[k],
4983 dpte_row_height_chroma: v->dpte_row_height_chroma[k],
4984 meta_row_bw: &v->meta_row_bandwidth[i][j][k],
4985 dpte_row_bw: &v->dpte_row_bandwidth[i][j][k]);
4986 }
4987 /*
4988 * DCCMetaBufferSizeSupport(i, j) = True
4989 * For k = 0 To NumberOfActivePlanes - 1
4990 * If MetaRowBytes(i, j, k) > 24064 Then
4991 * DCCMetaBufferSizeSupport(i, j) = False
4992 * End If
4993 * Next k
4994 */
4995 v->DCCMetaBufferSizeSupport[i][j] = true;
4996 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4997 if (v->MetaRowBytes[i][j][k] > 24064)
4998 v->DCCMetaBufferSizeSupport[i][j] = false;
4999 }
5000 v->UrgLatency[i] = CalculateUrgentLatency(
5001 UrgentLatencyPixelDataOnly: v->UrgentLatencyPixelDataOnly,
5002 UrgentLatencyPixelMixedWithVMData: v->UrgentLatencyPixelMixedWithVMData,
5003 UrgentLatencyVMDataOnly: v->UrgentLatencyVMDataOnly,
5004 DoUrgentLatencyAdjustment: v->DoUrgentLatencyAdjustment,
5005 UrgentLatencyAdjustmentFabricClockComponent: v->UrgentLatencyAdjustmentFabricClockComponent,
5006 UrgentLatencyAdjustmentFabricClockReference: v->UrgentLatencyAdjustmentFabricClockReference,
5007 FabricClockSingle: v->FabricClockPerState[i]);
5008
5009 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5010 CalculateUrgentBurstFactor(
5011 swath_width_luma_ub: v->swath_width_luma_ub_this_state[k],
5012 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state[k],
5013 SwathHeightY: v->SwathHeightYThisState[k],
5014 SwathHeightC: v->SwathHeightCThisState[k],
5015 LineTime: v->HTotal[k] / v->PixelClock[k],
5016 UrgentLatency: v->UrgLatency[i],
5017 CursorBufferSize: v->CursorBufferSize,
5018 CursorWidth: v->CursorWidth[k][0],
5019 CursorBPP: v->CursorBPP[k][0],
5020 VRatio: v->VRatio[k],
5021 VRatioC: v->VRatioChroma[k],
5022 BytePerPixelInDETY: v->BytePerPixelInDETY[k],
5023 BytePerPixelInDETC: v->BytePerPixelInDETC[k],
5024 DETBufferSizeY: v->DETBufferSizeYThisState[k],
5025 DETBufferSizeC: v->DETBufferSizeCThisState[k],
5026 UrgentBurstFactorCursor: &v->UrgentBurstFactorCursor[k],
5027 UrgentBurstFactorLuma: &v->UrgentBurstFactorLuma[k],
5028 UrgentBurstFactorChroma: &v->UrgentBurstFactorChroma[k],
5029 NotEnoughUrgentLatencyHiding: &NotUrgentLatencyHiding[k]);
5030 }
5031
5032 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5033 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5034 if (NotUrgentLatencyHiding[k]) {
5035 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5036 }
5037 }
5038
5039 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5040 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5041 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5042 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5043 }
5044
5045 v->TotalVActivePixelBandwidth[i][j] = 0;
5046 v->TotalVActiveCursorBandwidth[i][j] = 0;
5047 v->TotalMetaRowBandwidth[i][j] = 0;
5048 v->TotalDPTERowBandwidth[i][j] = 0;
5049 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5050 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5051 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5052 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5053 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5054 }
5055 }
5056 }
5057
5058 //Calculate Return BW
5059 for (i = 0; i < v->soc.num_states; ++i) {
5060 for (j = 0; j <= 1; ++j) {
5061 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5062 if (v->BlendingAndTiming[k] == k) {
5063 if (v->WritebackEnable[k] == true) {
5064 v->WritebackDelayTime[k] = v->WritebackLatency
5065 + CalculateWriteBackDelay(
5066 WritebackPixelFormat: v->WritebackPixelFormat[k],
5067 WritebackHRatio: v->WritebackHRatio[k],
5068 WritebackVRatio: v->WritebackVRatio[k],
5069 WritebackVTaps: v->WritebackVTaps[k],
5070 WritebackDestinationWidth: v->WritebackDestinationWidth[k],
5071 WritebackDestinationHeight: v->WritebackDestinationHeight[k],
5072 WritebackSourceHeight: v->WritebackSourceHeight[k],
5073 HTotal: v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5074 } else {
5075 v->WritebackDelayTime[k] = 0.0;
5076 }
5077 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5078 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5079 v->WritebackDelayTime[k] = dml_max(
5080 a: v->WritebackDelayTime[k],
5081 b: v->WritebackLatency
5082 + CalculateWriteBackDelay(
5083 WritebackPixelFormat: v->WritebackPixelFormat[m],
5084 WritebackHRatio: v->WritebackHRatio[m],
5085 WritebackVRatio: v->WritebackVRatio[m],
5086 WritebackVTaps: v->WritebackVTaps[m],
5087 WritebackDestinationWidth: v->WritebackDestinationWidth[m],
5088 WritebackDestinationHeight: v->WritebackDestinationHeight[m],
5089 WritebackSourceHeight: v->WritebackSourceHeight[m],
5090 HTotal: v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5091 }
5092 }
5093 }
5094 }
5095 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5096 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5097 if (v->BlendingAndTiming[k] == m) {
5098 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5099 }
5100 }
5101 }
5102 v->MaxMaxVStartup[i][j] = 0;
5103 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5104 v->MaximumVStartup[i][j][k] =
5105 CalculateMaxVStartup(
5106 VTotal: v->VTotal[k],
5107 VActive: v->VActive[k],
5108 VBlankNom: v->VBlankNom[k],
5109 HTotal: v->HTotal[k],
5110 PixelClock: v->PixelClock[k],
5111 ProgressiveTointerlaceUnitinOPP: v->ProgressiveToInterlaceUnitInOPP,
5112 Interlace: v->Interlace[k],
5113 VBlankNomDefaultUS: v->ip.VBlankNomDefaultUS,
5114 WritebackDelayTime: v->WritebackDelayTime[k]);
5115 v->MaxMaxVStartup[i][j] = dml_max(a: v->MaxMaxVStartup[i][j], b: v->MaximumVStartup[i][j][k]);
5116 }
5117 }
5118 }
5119
5120 ReorderingBytes = v->NumberOfChannels
5121 * dml_max3(
5122 a: v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5123 b: v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5124 c: v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5125
5126 for (i = 0; i < v->soc.num_states; ++i) {
5127 for (j = 0; j <= 1; ++j) {
5128 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5129 }
5130 }
5131
5132 if (v->UseMinimumRequiredDCFCLK == true)
5133 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5134
5135 for (i = 0; i < v->soc.num_states; ++i) {
5136 for (j = 0; j <= 1; ++j) {
5137 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5138 a: v->ReturnBusWidth * v->DCFCLKState[i][j],
5139 b: v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5140 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5141 double PixelDataOnlyReturnBWPerState = dml_min(
5142 a: IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5143 b: IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5144 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5145 a: IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5146 b: IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5147
5148 if (v->HostVMEnable != true) {
5149 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5150 } else {
5151 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5152 }
5153 }
5154 }
5155
5156 //Re-ordering Buffer Support Check
5157 for (i = 0; i < v->soc.num_states; ++i) {
5158 for (j = 0; j <= 1; ++j) {
5159 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5160 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5161 v->ROBSupport[i][j] = true;
5162 } else {
5163 v->ROBSupport[i][j] = false;
5164 }
5165 }
5166 }
5167
5168 //Vertical Active BW support check
5169
5170 MaxTotalVActiveRDBandwidth = 0;
5171 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5172 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5173 }
5174
5175 for (i = 0; i < v->soc.num_states; ++i) {
5176 for (j = 0; j <= 1; ++j) {
5177 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5178 a: dml_min(
5179 a: v->ReturnBusWidth * v->DCFCLKState[i][j],
5180 b: v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5181 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5182 b: v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5183 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5184
5185 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5186 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5187 } else {
5188 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5189 }
5190 }
5191 }
5192
5193 v->UrgentLatency = CalculateUrgentLatency(
5194 UrgentLatencyPixelDataOnly: v->UrgentLatencyPixelDataOnly,
5195 UrgentLatencyPixelMixedWithVMData: v->UrgentLatencyPixelMixedWithVMData,
5196 UrgentLatencyVMDataOnly: v->UrgentLatencyVMDataOnly,
5197 DoUrgentLatencyAdjustment: v->DoUrgentLatencyAdjustment,
5198 UrgentLatencyAdjustmentFabricClockComponent: v->UrgentLatencyAdjustmentFabricClockComponent,
5199 UrgentLatencyAdjustmentFabricClockReference: v->UrgentLatencyAdjustmentFabricClockReference,
5200 FabricClockSingle: v->FabricClock);
5201 //Prefetch Check
5202 for (i = 0; i < v->soc.num_states; ++i) {
5203 for (j = 0; j <= 1; ++j) {
5204 double VMDataOnlyReturnBWPerState;
5205 double HostVMInefficiencyFactor = 1;
5206 int NextPrefetchModeState = MinPrefetchMode;
5207 bool UnboundedRequestEnabledThisState = false;
5208 int CompressedBufferSizeInkByteThisState = 0;
5209 double dummy;
5210
5211 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5212
5213 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5214 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5215 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5216 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5217 }
5218
5219 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5220 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5221 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5222 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5223 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5224 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5225 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5226 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5227 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5228 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5229 }
5230
5231 VMDataOnlyReturnBWPerState = dml_min(
5232 a: dml_min(
5233 a: v->ReturnBusWidth * v->DCFCLKState[i][j],
5234 b: v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5235 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5236 b: v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5237 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5238 if (v->GPUVMEnable && v->HostVMEnable)
5239 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5240
5241 v->ExtraLatency = CalculateExtraLatency(
5242 RoundTripPingLatencyCycles: v->RoundTripPingLatencyCycles,
5243 ReorderingBytes,
5244 DCFCLK: v->DCFCLKState[i][j],
5245 TotalNumberOfActiveDPP: v->TotalNumberOfActiveDPP[i][j],
5246 PixelChunkSizeInKByte: v->PixelChunkSizeInKByte,
5247 TotalNumberOfDCCActiveDPP: v->TotalNumberOfDCCActiveDPP[i][j],
5248 MetaChunkSize: v->MetaChunkSize,
5249 ReturnBW: v->ReturnBWPerState[i][j],
5250 GPUVMEnable: v->GPUVMEnable,
5251 HostVMEnable: v->HostVMEnable,
5252 NumberOfActivePlanes: v->NumberOfActivePlanes,
5253 NumberOfDPP: v->NoOfDPPThisState,
5254 dpte_group_bytes: v->dpte_group_bytes,
5255 HostVMInefficiencyFactor,
5256 HostVMMinPageSize: v->HostVMMinPageSize,
5257 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels);
5258
5259 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5260 do {
5261 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5262 v->MaxVStartup = v->NextMaxVStartup;
5263
5264 v->TWait = CalculateTWait(
5265 PrefetchMode: v->PrefetchModePerState[i][j],
5266 DRAMClockChangeLatency: v->DRAMClockChangeLatency,
5267 UrgentLatency: v->UrgLatency[i],
5268 SREnterPlusExitTime: v->SREnterPlusExitTime);
5269
5270 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5271 CalculatePrefetchSchedulePerPlane(mode_lib,
5272 HostVMInefficiencyFactor,
5273 i, j, k);
5274 }
5275
5276 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5277 CalculateUrgentBurstFactor(
5278 swath_width_luma_ub: v->swath_width_luma_ub_this_state[k],
5279 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state[k],
5280 SwathHeightY: v->SwathHeightYThisState[k],
5281 SwathHeightC: v->SwathHeightCThisState[k],
5282 LineTime: v->HTotal[k] / v->PixelClock[k],
5283 UrgentLatency: v->UrgLatency[i],
5284 CursorBufferSize: v->CursorBufferSize,
5285 CursorWidth: v->CursorWidth[k][0],
5286 CursorBPP: v->CursorBPP[k][0],
5287 VRatio: v->VRatioPreY[i][j][k],
5288 VRatioC: v->VRatioPreC[i][j][k],
5289 BytePerPixelInDETY: v->BytePerPixelInDETY[k],
5290 BytePerPixelInDETC: v->BytePerPixelInDETC[k],
5291 DETBufferSizeY: v->DETBufferSizeYThisState[k],
5292 DETBufferSizeC: v->DETBufferSizeCThisState[k],
5293 UrgentBurstFactorCursor: &v->UrgentBurstFactorCursorPre[k],
5294 UrgentBurstFactorLuma: &v->UrgentBurstFactorLumaPre[k],
5295 UrgentBurstFactorChroma: &v->UrgentBurstFactorChromaPre[k],
5296 NotEnoughUrgentLatencyHiding: &v->NotUrgentLatencyHidingPre[k]);
5297 }
5298
5299 v->MaximumReadBandwidthWithPrefetch = 0.0;
5300 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5301 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5302 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5303
5304 v->MaximumReadBandwidthWithPrefetch =
5305 v->MaximumReadBandwidthWithPrefetch
5306 + dml_max3(
5307 a: v->VActivePixelBandwidth[i][j][k]
5308 + v->VActiveCursorBandwidth[i][j][k]
5309 + v->NoOfDPP[i][j][k]
5310 * (v->meta_row_bandwidth[i][j][k]
5311 + v->dpte_row_bandwidth[i][j][k]),
5312 b: v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5313 c: v->NoOfDPP[i][j][k]
5314 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5315 * v->UrgentBurstFactorLumaPre[k]
5316 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5317 * v->UrgentBurstFactorChromaPre[k])
5318 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5319 }
5320
5321 v->NotEnoughUrgentLatencyHidingPre = false;
5322 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5323 if (v->NotUrgentLatencyHidingPre[k] == true) {
5324 v->NotEnoughUrgentLatencyHidingPre = true;
5325 }
5326 }
5327
5328 v->PrefetchSupported[i][j] = true;
5329 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5330 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5331 v->PrefetchSupported[i][j] = false;
5332 }
5333 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5334 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5335 || v->NoTimeForPrefetch[i][j][k] == true) {
5336 v->PrefetchSupported[i][j] = false;
5337 }
5338 }
5339
5340 v->DynamicMetadataSupported[i][j] = true;
5341 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5342 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5343 v->DynamicMetadataSupported[i][j] = false;
5344 }
5345 }
5346
5347 v->VRatioInPrefetchSupported[i][j] = true;
5348 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5349 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5350 v->VRatioInPrefetchSupported[i][j] = false;
5351 }
5352 }
5353 v->AnyLinesForVMOrRowTooLarge = false;
5354 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5355 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5356 v->AnyLinesForVMOrRowTooLarge = true;
5357 }
5358 }
5359
5360 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5361
5362 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5363 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5364 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5365 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5366 - dml_max(
5367 a: v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5368 b: v->NoOfDPP[i][j][k]
5369 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5370 * v->UrgentBurstFactorLumaPre[k]
5371 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5372 * v->UrgentBurstFactorChromaPre[k])
5373 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5374 }
5375 v->TotImmediateFlipBytes = 0.0;
5376 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5377 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5378 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5379 + v->DPTEBytesPerRow[i][j][k]);
5380 }
5381
5382 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5383 CalculateFlipSchedule(
5384 mode_lib,
5385 k,
5386 HostVMInefficiencyFactor,
5387 UrgentExtraLatency: v->ExtraLatency,
5388 UrgentLatency: v->UrgLatency[i],
5389 PDEAndMetaPTEBytesPerFrame: v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5390 MetaRowBytes: v->MetaRowBytes[i][j][k],
5391 DPTEBytesPerRow: v->DPTEBytesPerRow[i][j][k]);
5392 }
5393 v->total_dcn_read_bw_with_flip = 0.0;
5394 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5395 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5396 + dml_max3(
5397 a: v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5398 b: v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5399 + v->VActiveCursorBandwidth[i][j][k],
5400 c: v->NoOfDPP[i][j][k]
5401 * (v->final_flip_bw[k]
5402 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5403 * v->UrgentBurstFactorLumaPre[k]
5404 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5405 * v->UrgentBurstFactorChromaPre[k])
5406 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5407 }
5408 v->ImmediateFlipSupportedForState[i][j] = true;
5409 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5410 v->ImmediateFlipSupportedForState[i][j] = false;
5411 }
5412 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5413 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5414 v->ImmediateFlipSupportedForState[i][j] = false;
5415 }
5416 }
5417 } else {
5418 v->ImmediateFlipSupportedForState[i][j] = false;
5419 }
5420
5421 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5422 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5423 NextPrefetchModeState = NextPrefetchModeState + 1;
5424 } else {
5425 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5426 }
5427 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5428 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5429 && ((v->HostVMEnable == false &&
5430 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5431 || v->ImmediateFlipSupportedForState[i][j] == true))
5432 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5433
5434 CalculateUnboundedRequestAndCompressedBufferSize(
5435 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
5436 ConfigReturnBufferSizeInKByte: v->ConfigReturnBufferSizeInKByte,
5437 UseUnboundedRequestingFinal: v->UseUnboundedRequesting,
5438 TotalActiveDPP: v->TotalNumberOfActiveDPP[i][j],
5439 NoChromaPlanes: NoChroma,
5440 MaxNumDPP: v->MaxNumDPP,
5441 CompressedBufferSegmentSizeInkByteFinal: v->CompressedBufferSegmentSizeInkByte,
5442 Output: v->Output,
5443 UnboundedRequestEnabled: &UnboundedRequestEnabledThisState,
5444 CompressedBufferSizeInkByte: &CompressedBufferSizeInkByteThisState);
5445
5446 CalculateWatermarksAndDRAMSpeedChangeSupport(
5447 mode_lib,
5448 PrefetchMode: v->PrefetchModePerState[i][j],
5449 DCFCLK: v->DCFCLKState[i][j],
5450 ReturnBW: v->ReturnBWPerState[i][j],
5451 UrgentLatency: v->UrgLatency[i],
5452 ExtraLatency: v->ExtraLatency,
5453 SOCCLK: v->SOCCLKPerState[i],
5454 DCFCLKDeepSleep: v->ProjectedDCFCLKDeepSleep[i][j],
5455 DETBufferSizeY: v->DETBufferSizeYThisState,
5456 DETBufferSizeC: v->DETBufferSizeCThisState,
5457 SwathHeightY: v->SwathHeightYThisState,
5458 SwathHeightC: v->SwathHeightCThisState,
5459 SwathWidthY: v->SwathWidthYThisState,
5460 SwathWidthC: v->SwathWidthCThisState,
5461 DPPPerPlane: v->NoOfDPPThisState,
5462 BytePerPixelDETY: v->BytePerPixelInDETY,
5463 BytePerPixelDETC: v->BytePerPixelInDETC,
5464 UnboundedRequestEnabled: UnboundedRequestEnabledThisState,
5465 CompressedBufferSizeInkByte: CompressedBufferSizeInkByteThisState,
5466 DRAMClockChangeSupport: &v->DRAMClockChangeSupport[i][j],
5467 StutterExitWatermark: &dummy,
5468 StutterEnterPlusExitWatermark: &dummy,
5469 Z8StutterExitWatermark: &dummy,
5470 Z8StutterEnterPlusExitWatermark: &dummy);
5471 }
5472 }
5473
5474 /*PTE Buffer Size Check*/
5475 for (i = 0; i < v->soc.num_states; i++) {
5476 for (j = 0; j < 2; j++) {
5477 v->PTEBufferSizeNotExceeded[i][j] = true;
5478 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5479 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5480 v->PTEBufferSizeNotExceeded[i][j] = false;
5481 }
5482 }
5483 }
5484 }
5485
5486 /*Cursor Support Check*/
5487 v->CursorSupport = true;
5488 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5489 if (v->CursorWidth[k][0] > 0.0) {
5490 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5491 v->CursorSupport = false;
5492 }
5493 }
5494 }
5495
5496 /*Valid Pitch Check*/
5497 v->PitchSupport = true;
5498 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5499 v->AlignedYPitch[k] = dml_ceil(a: dml_max(a: v->PitchY[k], b: v->SurfaceWidthY[k]), granularity: v->MacroTileWidthY[k]);
5500 if (v->DCCEnable[k] == true) {
5501 v->AlignedDCCMetaPitchY[k] = dml_ceil(a: dml_max(a: v->DCCMetaPitchY[k], b: v->SurfaceWidthY[k]), granularity: 64.0 * v->Read256BlockWidthY[k]);
5502 } else {
5503 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5504 }
5505 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5506 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5507 && v->SourcePixelFormat[k] != dm_mono_8) {
5508 v->AlignedCPitch[k] = dml_ceil(a: dml_max(a: v->PitchC[k], b: v->SurfaceWidthC[k]), granularity: v->MacroTileWidthC[k]);
5509 if (v->DCCEnable[k] == true) {
5510 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5511 a: dml_max(a: v->DCCMetaPitchC[k], b: v->SurfaceWidthC[k]),
5512 granularity: 64.0 * v->Read256BlockWidthC[k]);
5513 } else {
5514 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5515 }
5516 } else {
5517 v->AlignedCPitch[k] = v->PitchC[k];
5518 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5519 }
5520 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5521 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5522 v->PitchSupport = false;
5523 }
5524 }
5525
5526 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5527 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5528 ViewportExceedsSurface = true;
5529 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5530 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5531 && v->SourcePixelFormat[k] != dm_rgbe) {
5532 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5533 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5534 ViewportExceedsSurface = true;
5535 }
5536 }
5537 }
5538 }
5539
5540 /*Mode Support, Voltage State and SOC Configuration*/
5541 for (i = v->soc.num_states - 1; i >= 0; i--) {
5542 for (j = 0; j < 2; j++) {
5543 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5544 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5545 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5546 && v->DTBCLKRequiredMoreThanSupported[i] == false
5547 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5548 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5549 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5550 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5551 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5552 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5553 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5554 && ((v->HostVMEnable == false
5555 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5556 || v->ImmediateFlipSupportedForState[i][j] == true)
5557 && FMTBufferExceeded == false) {
5558 v->ModeSupport[i][j] = true;
5559 } else {
5560 v->ModeSupport[i][j] = false;
5561 }
5562 }
5563 }
5564 for (i = v->soc.num_states; i >= 0; i--) {
5565 for (j = 0; j < 2; j++) {
5566 enum dm_validation_status status = DML_VALIDATION_OK;
5567
5568 if (!v->ScaleRatioAndTapsSupport) {
5569 status = DML_FAIL_SCALE_RATIO_TAP;
5570 } else if (!v->SourceFormatPixelAndScanSupport) {
5571 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5572 } else if (!v->ViewportSizeSupport[i][j]) {
5573 status = DML_FAIL_VIEWPORT_SIZE;
5574 } else if (P2IWith420) {
5575 status = DML_FAIL_P2I_WITH_420;
5576 } else if (DSCOnlyIfNecessaryWithBPP) {
5577 status = DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP;
5578 } else if (DSC422NativeNotSupported) {
5579 status = DML_FAIL_NOT_DSC422_NATIVE;
5580 } else if (!v->ODMCombine4To1SupportCheckOK[i]) {
5581 status = DML_FAIL_ODM_COMBINE4TO1;
5582 } else if (v->NotEnoughDSCUnits[i]) {
5583 status = DML_FAIL_NOT_ENOUGH_DSC;
5584 } else if (!v->ROBSupport[i][j]) {
5585 status = DML_FAIL_REORDERING_BUFFER;
5586 } else if (!v->DISPCLK_DPPCLK_Support[i][j]) {
5587 status = DML_FAIL_DISPCLK_DPPCLK;
5588 } else if (!v->TotalAvailablePipesSupport[i][j]) {
5589 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5590 } else if (!EnoughWritebackUnits) {
5591 status = DML_FAIL_ENOUGH_WRITEBACK_UNITS;
5592 } else if (!v->WritebackLatencySupport) {
5593 status = DML_FAIL_WRITEBACK_LATENCY;
5594 } else if (!v->WritebackScaleRatioAndTapsSupport) {
5595 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5596 } else if (!v->CursorSupport) {
5597 status = DML_FAIL_CURSOR_SUPPORT;
5598 } else if (!v->PitchSupport) {
5599 status = DML_FAIL_PITCH_SUPPORT;
5600 } else if (ViewportExceedsSurface) {
5601 status = DML_FAIL_VIEWPORT_EXCEEDS_SURFACE;
5602 } else if (!v->PrefetchSupported[i][j]) {
5603 status = DML_FAIL_PREFETCH_SUPPORT;
5604 } else if (!v->DynamicMetadataSupported[i][j]) {
5605 status = DML_FAIL_DYNAMIC_METADATA;
5606 } else if (!v->TotalVerticalActiveBandwidthSupport[i][j]) {
5607 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5608 } else if (!v->VRatioInPrefetchSupported[i][j]) {
5609 status = DML_FAIL_V_RATIO_PREFETCH;
5610 } else if (!v->PTEBufferSizeNotExceeded[i][j]) {
5611 status = DML_FAIL_PTE_BUFFER_SIZE;
5612 } else if (v->NonsupportedDSCInputBPC) {
5613 status = DML_FAIL_DSC_INPUT_BPC;
5614 } else if ((v->HostVMEnable
5615 && !v->ImmediateFlipSupportedForState[i][j])) {
5616 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5617 } else if (FMTBufferExceeded) {
5618 status = DML_FAIL_FMT_BUFFER_EXCEEDED;
5619 }
5620 mode_lib->vba.ValidationStatus[i] = status;
5621 }
5622 }
5623
5624 {
5625 unsigned int MaximumMPCCombine = 0;
5626
5627 for (i = v->soc.num_states; i >= 0; i--) {
5628 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5629 v->VoltageLevel = i;
5630 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5631 if (v->ModeSupport[i][0] == true) {
5632 MaximumMPCCombine = 0;
5633 } else {
5634 MaximumMPCCombine = 1;
5635 }
5636 }
5637 }
5638 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5639 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5640 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5641 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5642 }
5643 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5644 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5645 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5646 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5647 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5648 v->maxMpcComb = MaximumMPCCombine;
5649 }
5650}
5651
5652static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5653 struct display_mode_lib *mode_lib,
5654 unsigned int PrefetchMode,
5655 double DCFCLK,
5656 double ReturnBW,
5657 double UrgentLatency,
5658 double ExtraLatency,
5659 double SOCCLK,
5660 double DCFCLKDeepSleep,
5661 unsigned int DETBufferSizeY[],
5662 unsigned int DETBufferSizeC[],
5663 unsigned int SwathHeightY[],
5664 unsigned int SwathHeightC[],
5665 double SwathWidthY[],
5666 double SwathWidthC[],
5667 unsigned int DPPPerPlane[],
5668 double BytePerPixelDETY[],
5669 double BytePerPixelDETC[],
5670 bool UnboundedRequestEnabled,
5671 unsigned int CompressedBufferSizeInkByte,
5672 enum clock_change_support *DRAMClockChangeSupport,
5673 double *StutterExitWatermark,
5674 double *StutterEnterPlusExitWatermark,
5675 double *Z8StutterExitWatermark,
5676 double *Z8StutterEnterPlusExitWatermark)
5677{
5678 struct vba_vars_st *v = &mode_lib->vba;
5679 double EffectiveLBLatencyHidingY;
5680 double EffectiveLBLatencyHidingC;
5681 double LinesInDETY[DC__NUM_DPP__MAX];
5682 double LinesInDETC;
5683 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5684 unsigned int LinesInDETCRoundedDownToSwath;
5685 double FullDETBufferingTimeY;
5686 double FullDETBufferingTimeC;
5687 double ActiveDRAMClockChangeLatencyMarginY;
5688 double ActiveDRAMClockChangeLatencyMarginC;
5689 double WritebackDRAMClockChangeLatencyMargin;
5690 double PlaneWithMinActiveDRAMClockChangeMargin;
5691 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5692 double WritebackDRAMClockChangeLatencyHiding;
5693 double TotalPixelBW = 0.0;
5694 int k, j;
5695
5696 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5697
5698#ifdef __DML_VBA_DEBUG__
5699 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5700 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5701 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5702#endif
5703
5704 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5705
5706#ifdef __DML_VBA_DEBUG__
5707 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5708 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5709#endif
5710
5711 v->TotalActiveWriteback = 0;
5712 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5713 if (v->WritebackEnable[k] == true) {
5714 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5715 }
5716 }
5717
5718 if (v->TotalActiveWriteback <= 1) {
5719 v->WritebackUrgentWatermark = v->WritebackLatency;
5720 } else {
5721 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5722 }
5723
5724 if (v->TotalActiveWriteback <= 1) {
5725 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5726 } else {
5727 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5728 }
5729
5730 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5731 TotalPixelBW = TotalPixelBW
5732 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5733 / (v->HTotal[k] / v->PixelClock[k]);
5734 }
5735
5736 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5737 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5738
5739 v->LBLatencyHidingSourceLinesY = dml_min(
5740 a: (double) v->MaxLineBufferLines,
5741 b: dml_floor(a: v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(a: v->HRatio[k], b: 1.0)), granularity: 1)) - (v->vtaps[k] - 1);
5742
5743 v->LBLatencyHidingSourceLinesC = dml_min(
5744 a: (double) v->MaxLineBufferLines,
5745 b: dml_floor(a: v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(a: v->HRatioChroma[k], b: 1.0)), granularity: 1)) - (v->VTAPsChroma[k] - 1);
5746
5747 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5748
5749 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5750
5751 if (UnboundedRequestEnabled) {
5752 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5753 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5754 }
5755
5756 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5757 LinesInDETYRoundedDownToSwath[k] = dml_floor(a: LinesInDETY[k], granularity: SwathHeightY[k]);
5758 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5759 if (BytePerPixelDETC[k] > 0) {
5760 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5761 LinesInDETCRoundedDownToSwath = dml_floor(a: LinesInDETC, granularity: SwathHeightC[k]);
5762 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5763 } else {
5764 LinesInDETC = 0;
5765 FullDETBufferingTimeC = 999999;
5766 }
5767
5768 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5769 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5770
5771 if (v->NumberOfActivePlanes > 1) {
5772 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5773 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5774 }
5775
5776 if (BytePerPixelDETC[k] > 0) {
5777 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5778 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5779
5780 if (v->NumberOfActivePlanes > 1) {
5781 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5782 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5783 }
5784 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(a: ActiveDRAMClockChangeLatencyMarginY, b: ActiveDRAMClockChangeLatencyMarginC);
5785 } else {
5786 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5787 }
5788
5789 if (v->WritebackEnable[k] == true) {
5790 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5791 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5792 if (v->WritebackPixelFormat[k] == dm_444_64) {
5793 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5794 }
5795 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5796 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(a: v->ActiveDRAMClockChangeLatencyMargin[k], b: WritebackDRAMClockChangeLatencyMargin);
5797 }
5798 }
5799
5800 v->MinActiveDRAMClockChangeMargin = 999999;
5801 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5802 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5803 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5804 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5805 if (v->BlendingAndTiming[k] == k) {
5806 PlaneWithMinActiveDRAMClockChangeMargin = k;
5807 } else {
5808 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5809 if (v->BlendingAndTiming[k] == j) {
5810 PlaneWithMinActiveDRAMClockChangeMargin = j;
5811 }
5812 }
5813 }
5814 }
5815 }
5816
5817 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5818
5819 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5820 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5821 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5822 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5823 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5824 }
5825 }
5826
5827 v->TotalNumberOfActiveOTG = 0;
5828
5829 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5830 if (v->BlendingAndTiming[k] == k) {
5831 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5832 }
5833 }
5834
5835 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5836 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5837 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5838 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5839 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5840 } else {
5841 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5842 }
5843
5844 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5845 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5846 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5847 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5848
5849#ifdef __DML_VBA_DEBUG__
5850 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5851 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5852 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5853 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5854#endif
5855}
5856
5857static void CalculateDCFCLKDeepSleep(
5858 struct display_mode_lib *mode_lib,
5859 unsigned int NumberOfActivePlanes,
5860 int BytePerPixelY[],
5861 int BytePerPixelC[],
5862 double VRatio[],
5863 double VRatioChroma[],
5864 double SwathWidthY[],
5865 double SwathWidthC[],
5866 unsigned int DPPPerPlane[],
5867 double HRatio[],
5868 double HRatioChroma[],
5869 double PixelClock[],
5870 double PSCL_THROUGHPUT[],
5871 double PSCL_THROUGHPUT_CHROMA[],
5872 double DPPCLK[],
5873 double ReadBandwidthLuma[],
5874 double ReadBandwidthChroma[],
5875 int ReturnBusWidth,
5876 double *DCFCLKDeepSleep)
5877{
5878 struct vba_vars_st *v = &mode_lib->vba;
5879 double DisplayPipeLineDeliveryTimeLuma;
5880 double DisplayPipeLineDeliveryTimeChroma;
5881 double ReadBandwidth = 0.0;
5882 int k;
5883
5884 for (k = 0; k < NumberOfActivePlanes; ++k) {
5885
5886 if (VRatio[k] <= 1) {
5887 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5888 } else {
5889 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5890 }
5891 if (BytePerPixelC[k] == 0) {
5892 DisplayPipeLineDeliveryTimeChroma = 0;
5893 } else {
5894 if (VRatioChroma[k] <= 1) {
5895 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5896 } else {
5897 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5898 }
5899 }
5900
5901 if (BytePerPixelC[k] > 0) {
5902 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5903 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5904 } else {
5905 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5906 }
5907 v->DCFCLKDeepSleepPerPlane[k] = dml_max(a: v->DCFCLKDeepSleepPerPlane[k], b: PixelClock[k] / 16);
5908
5909 }
5910
5911 for (k = 0; k < NumberOfActivePlanes; ++k) {
5912 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5913 }
5914
5915 *DCFCLKDeepSleep = dml_max(a: 8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5916
5917 for (k = 0; k < NumberOfActivePlanes; ++k) {
5918 *DCFCLKDeepSleep = dml_max(a: *DCFCLKDeepSleep, b: v->DCFCLKDeepSleepPerPlane[k]);
5919 }
5920}
5921
5922static void CalculateUrgentBurstFactor(
5923 int swath_width_luma_ub,
5924 int swath_width_chroma_ub,
5925 unsigned int SwathHeightY,
5926 unsigned int SwathHeightC,
5927 double LineTime,
5928 double UrgentLatency,
5929 double CursorBufferSize,
5930 unsigned int CursorWidth,
5931 unsigned int CursorBPP,
5932 double VRatio,
5933 double VRatioC,
5934 double BytePerPixelInDETY,
5935 double BytePerPixelInDETC,
5936 double DETBufferSizeY,
5937 double DETBufferSizeC,
5938 double *UrgentBurstFactorCursor,
5939 double *UrgentBurstFactorLuma,
5940 double *UrgentBurstFactorChroma,
5941 bool *NotEnoughUrgentLatencyHiding)
5942{
5943 double LinesInDETLuma;
5944 double LinesInDETChroma;
5945 unsigned int LinesInCursorBuffer;
5946 double CursorBufferSizeInTime;
5947 double DETBufferSizeInTimeLuma;
5948 double DETBufferSizeInTimeChroma;
5949
5950 *NotEnoughUrgentLatencyHiding = 0;
5951
5952 if (CursorWidth > 0) {
5953 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(a: dml_log2(x: CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), granularity: 1.0);
5954 if (VRatio > 0) {
5955 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5956 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5957 *NotEnoughUrgentLatencyHiding = 1;
5958 *UrgentBurstFactorCursor = 0;
5959 } else {
5960 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5961 }
5962 } else {
5963 *UrgentBurstFactorCursor = 1;
5964 }
5965 }
5966
5967 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5968 if (VRatio > 0) {
5969 DETBufferSizeInTimeLuma = dml_floor(a: LinesInDETLuma, granularity: SwathHeightY) * LineTime / VRatio;
5970 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5971 *NotEnoughUrgentLatencyHiding = 1;
5972 *UrgentBurstFactorLuma = 0;
5973 } else {
5974 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5975 }
5976 } else {
5977 *UrgentBurstFactorLuma = 1;
5978 }
5979
5980 if (BytePerPixelInDETC > 0) {
5981 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5982 if (VRatio > 0) {
5983 DETBufferSizeInTimeChroma = dml_floor(a: LinesInDETChroma, granularity: SwathHeightC) * LineTime / VRatio;
5984 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5985 *NotEnoughUrgentLatencyHiding = 1;
5986 *UrgentBurstFactorChroma = 0;
5987 } else {
5988 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5989 }
5990 } else {
5991 *UrgentBurstFactorChroma = 1;
5992 }
5993 }
5994}
5995
5996static void CalculatePixelDeliveryTimes(
5997 unsigned int NumberOfActivePlanes,
5998 double VRatio[],
5999 double VRatioChroma[],
6000 double VRatioPrefetchY[],
6001 double VRatioPrefetchC[],
6002 unsigned int swath_width_luma_ub[],
6003 unsigned int swath_width_chroma_ub[],
6004 unsigned int DPPPerPlane[],
6005 double HRatio[],
6006 double HRatioChroma[],
6007 double PixelClock[],
6008 double PSCL_THROUGHPUT[],
6009 double PSCL_THROUGHPUT_CHROMA[],
6010 double DPPCLK[],
6011 int BytePerPixelC[],
6012 enum scan_direction_class SourceScan[],
6013 unsigned int NumberOfCursors[],
6014 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6015 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6016 unsigned int BlockWidth256BytesY[],
6017 unsigned int BlockHeight256BytesY[],
6018 unsigned int BlockWidth256BytesC[],
6019 unsigned int BlockHeight256BytesC[],
6020 double DisplayPipeLineDeliveryTimeLuma[],
6021 double DisplayPipeLineDeliveryTimeChroma[],
6022 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6023 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6024 double DisplayPipeRequestDeliveryTimeLuma[],
6025 double DisplayPipeRequestDeliveryTimeChroma[],
6026 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6027 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6028 double CursorRequestDeliveryTime[],
6029 double CursorRequestDeliveryTimePrefetch[])
6030{
6031 double req_per_swath_ub;
6032 int k;
6033
6034 for (k = 0; k < NumberOfActivePlanes; ++k) {
6035 if (VRatio[k] <= 1) {
6036 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6037 } else {
6038 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6039 }
6040
6041 if (BytePerPixelC[k] == 0) {
6042 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6043 } else {
6044 if (VRatioChroma[k] <= 1) {
6045 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6046 } else {
6047 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6048 }
6049 }
6050
6051 if (VRatioPrefetchY[k] <= 1) {
6052 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6053 } else {
6054 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6055 }
6056
6057 if (BytePerPixelC[k] == 0) {
6058 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6059 } else {
6060 if (VRatioPrefetchC[k] <= 1) {
6061 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6062 } else {
6063 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6064 }
6065 }
6066 }
6067
6068 for (k = 0; k < NumberOfActivePlanes; ++k) {
6069 if (SourceScan[k] != dm_vert) {
6070 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6071 } else {
6072 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6073 }
6074 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6075 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6076 if (BytePerPixelC[k] == 0) {
6077 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6078 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6079 } else {
6080 if (SourceScan[k] != dm_vert) {
6081 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6082 } else {
6083 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6084 }
6085 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6086 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6087 }
6088#ifdef __DML_VBA_DEBUG__
6089 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6090 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6091 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6092 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6093 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6094 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6095 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6096 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6097 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6098 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6099 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6100 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6101#endif
6102 }
6103
6104 for (k = 0; k < NumberOfActivePlanes; ++k) {
6105 int cursor_req_per_width;
6106
6107 cursor_req_per_width = dml_ceil(a: CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, granularity: 1);
6108 if (NumberOfCursors[k] > 0) {
6109 if (VRatio[k] <= 1) {
6110 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6111 } else {
6112 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6113 }
6114 if (VRatioPrefetchY[k] <= 1) {
6115 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6116 } else {
6117 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6118 }
6119 } else {
6120 CursorRequestDeliveryTime[k] = 0;
6121 CursorRequestDeliveryTimePrefetch[k] = 0;
6122 }
6123#ifdef __DML_VBA_DEBUG__
6124 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6125 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6126 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6127#endif
6128 }
6129}
6130
6131static void CalculateMetaAndPTETimes(
6132 int NumberOfActivePlanes,
6133 bool GPUVMEnable,
6134 int MetaChunkSize,
6135 int MinMetaChunkSizeBytes,
6136 int HTotal[],
6137 double VRatio[],
6138 double VRatioChroma[],
6139 double DestinationLinesToRequestRowInVBlank[],
6140 double DestinationLinesToRequestRowInImmediateFlip[],
6141 bool DCCEnable[],
6142 double PixelClock[],
6143 int BytePerPixelY[],
6144 int BytePerPixelC[],
6145 enum scan_direction_class SourceScan[],
6146 int dpte_row_height[],
6147 int dpte_row_height_chroma[],
6148 int meta_row_width[],
6149 int meta_row_width_chroma[],
6150 int meta_row_height[],
6151 int meta_row_height_chroma[],
6152 int meta_req_width[],
6153 int meta_req_width_chroma[],
6154 int meta_req_height[],
6155 int meta_req_height_chroma[],
6156 int dpte_group_bytes[],
6157 int PTERequestSizeY[],
6158 int PTERequestSizeC[],
6159 int PixelPTEReqWidthY[],
6160 int PixelPTEReqHeightY[],
6161 int PixelPTEReqWidthC[],
6162 int PixelPTEReqHeightC[],
6163 int dpte_row_width_luma_ub[],
6164 int dpte_row_width_chroma_ub[],
6165 double DST_Y_PER_PTE_ROW_NOM_L[],
6166 double DST_Y_PER_PTE_ROW_NOM_C[],
6167 double DST_Y_PER_META_ROW_NOM_L[],
6168 double DST_Y_PER_META_ROW_NOM_C[],
6169 double TimePerMetaChunkNominal[],
6170 double TimePerChromaMetaChunkNominal[],
6171 double TimePerMetaChunkVBlank[],
6172 double TimePerChromaMetaChunkVBlank[],
6173 double TimePerMetaChunkFlip[],
6174 double TimePerChromaMetaChunkFlip[],
6175 double time_per_pte_group_nom_luma[],
6176 double time_per_pte_group_vblank_luma[],
6177 double time_per_pte_group_flip_luma[],
6178 double time_per_pte_group_nom_chroma[],
6179 double time_per_pte_group_vblank_chroma[],
6180 double time_per_pte_group_flip_chroma[])
6181{
6182 unsigned int meta_chunk_width;
6183 unsigned int min_meta_chunk_width;
6184 unsigned int meta_chunk_per_row_int;
6185 unsigned int meta_row_remainder;
6186 unsigned int meta_chunk_threshold;
6187 unsigned int meta_chunks_per_row_ub;
6188 unsigned int meta_chunk_width_chroma;
6189 unsigned int min_meta_chunk_width_chroma;
6190 unsigned int meta_chunk_per_row_int_chroma;
6191 unsigned int meta_row_remainder_chroma;
6192 unsigned int meta_chunk_threshold_chroma;
6193 unsigned int meta_chunks_per_row_ub_chroma;
6194 unsigned int dpte_group_width_luma;
6195 unsigned int dpte_groups_per_row_luma_ub;
6196 unsigned int dpte_group_width_chroma;
6197 unsigned int dpte_groups_per_row_chroma_ub;
6198 int k;
6199
6200 for (k = 0; k < NumberOfActivePlanes; ++k) {
6201 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6202 if (BytePerPixelC[k] == 0) {
6203 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6204 } else {
6205 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6206 }
6207 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6208 if (BytePerPixelC[k] == 0) {
6209 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6210 } else {
6211 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6212 }
6213 }
6214
6215 for (k = 0; k < NumberOfActivePlanes; ++k) {
6216 if (DCCEnable[k] == true) {
6217 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6218 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6219 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6220 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6221 if (SourceScan[k] != dm_vert) {
6222 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6223 } else {
6224 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6225 }
6226 if (meta_row_remainder <= meta_chunk_threshold) {
6227 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6228 } else {
6229 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6230 }
6231 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6232 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6233 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6234 if (BytePerPixelC[k] == 0) {
6235 TimePerChromaMetaChunkNominal[k] = 0;
6236 TimePerChromaMetaChunkVBlank[k] = 0;
6237 TimePerChromaMetaChunkFlip[k] = 0;
6238 } else {
6239 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6240 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6241 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6242 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6243 if (SourceScan[k] != dm_vert) {
6244 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6245 } else {
6246 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6247 }
6248 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6249 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6250 } else {
6251 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6252 }
6253 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6254 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6255 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6256 }
6257 } else {
6258 TimePerMetaChunkNominal[k] = 0;
6259 TimePerMetaChunkVBlank[k] = 0;
6260 TimePerMetaChunkFlip[k] = 0;
6261 TimePerChromaMetaChunkNominal[k] = 0;
6262 TimePerChromaMetaChunkVBlank[k] = 0;
6263 TimePerChromaMetaChunkFlip[k] = 0;
6264 }
6265 }
6266
6267 for (k = 0; k < NumberOfActivePlanes; ++k) {
6268 if (GPUVMEnable == true) {
6269 if (SourceScan[k] != dm_vert) {
6270 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6271 } else {
6272 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6273 }
6274 dpte_groups_per_row_luma_ub = dml_ceil(a: 1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, granularity: 1);
6275 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6276 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6277 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6278 if (BytePerPixelC[k] == 0) {
6279 time_per_pte_group_nom_chroma[k] = 0;
6280 time_per_pte_group_vblank_chroma[k] = 0;
6281 time_per_pte_group_flip_chroma[k] = 0;
6282 } else {
6283 if (SourceScan[k] != dm_vert) {
6284 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6285 } else {
6286 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6287 }
6288 dpte_groups_per_row_chroma_ub = dml_ceil(a: 1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, granularity: 1);
6289 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6290 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6291 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6292 }
6293 } else {
6294 time_per_pte_group_nom_luma[k] = 0;
6295 time_per_pte_group_vblank_luma[k] = 0;
6296 time_per_pte_group_flip_luma[k] = 0;
6297 time_per_pte_group_nom_chroma[k] = 0;
6298 time_per_pte_group_vblank_chroma[k] = 0;
6299 time_per_pte_group_flip_chroma[k] = 0;
6300 }
6301 }
6302}
6303
6304static void CalculateVMGroupAndRequestTimes(
6305 unsigned int NumberOfActivePlanes,
6306 bool GPUVMEnable,
6307 unsigned int GPUVMMaxPageTableLevels,
6308 unsigned int HTotal[],
6309 int BytePerPixelC[],
6310 double DestinationLinesToRequestVMInVBlank[],
6311 double DestinationLinesToRequestVMInImmediateFlip[],
6312 bool DCCEnable[],
6313 double PixelClock[],
6314 int dpte_row_width_luma_ub[],
6315 int dpte_row_width_chroma_ub[],
6316 int vm_group_bytes[],
6317 unsigned int dpde0_bytes_per_frame_ub_l[],
6318 unsigned int dpde0_bytes_per_frame_ub_c[],
6319 int meta_pte_bytes_per_frame_ub_l[],
6320 int meta_pte_bytes_per_frame_ub_c[],
6321 double TimePerVMGroupVBlank[],
6322 double TimePerVMGroupFlip[],
6323 double TimePerVMRequestVBlank[],
6324 double TimePerVMRequestFlip[])
6325{
6326 int num_group_per_lower_vm_stage;
6327 int num_req_per_lower_vm_stage;
6328 int k;
6329
6330 for (k = 0; k < NumberOfActivePlanes; ++k) {
6331 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6332 if (DCCEnable[k] == false) {
6333 if (BytePerPixelC[k] > 0) {
6334 num_group_per_lower_vm_stage = dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6335 + dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6336 } else {
6337 num_group_per_lower_vm_stage = dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6338 }
6339 } else {
6340 if (GPUVMMaxPageTableLevels == 1) {
6341 if (BytePerPixelC[k] > 0) {
6342 num_group_per_lower_vm_stage = dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6343 + dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6344 } else {
6345 num_group_per_lower_vm_stage = dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6346 }
6347 } else {
6348 if (BytePerPixelC[k] > 0) {
6349 num_group_per_lower_vm_stage = 2 + dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6350 + dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6351 + dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6352 + dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6353 } else {
6354 num_group_per_lower_vm_stage = 1 + dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6355 + dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6356 }
6357 }
6358 }
6359
6360 if (DCCEnable[k] == false) {
6361 if (BytePerPixelC[k] > 0) {
6362 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6363 } else {
6364 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6365 }
6366 } else {
6367 if (GPUVMMaxPageTableLevels == 1) {
6368 if (BytePerPixelC[k] > 0) {
6369 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6370 } else {
6371 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6372 }
6373 } else {
6374 if (BytePerPixelC[k] > 0) {
6375 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6376 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6377 } else {
6378 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6379 }
6380 }
6381 }
6382
6383 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6384 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6385 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6386 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6387
6388 if (GPUVMMaxPageTableLevels > 2) {
6389 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6390 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6391 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6392 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6393 }
6394
6395 } else {
6396 TimePerVMGroupVBlank[k] = 0;
6397 TimePerVMGroupFlip[k] = 0;
6398 TimePerVMRequestVBlank[k] = 0;
6399 TimePerVMRequestFlip[k] = 0;
6400 }
6401 }
6402}
6403
6404static void CalculateStutterEfficiency(
6405 struct display_mode_lib *mode_lib,
6406 int CompressedBufferSizeInkByte,
6407 bool UnboundedRequestEnabled,
6408 int ConfigReturnBufferSizeInKByte,
6409 int MetaFIFOSizeInKEntries,
6410 int ZeroSizeBufferEntries,
6411 int NumberOfActivePlanes,
6412 int ROBBufferSizeInKByte,
6413 double TotalDataReadBandwidth,
6414 double DCFCLK,
6415 double ReturnBW,
6416 double COMPBUF_RESERVED_SPACE_64B,
6417 double COMPBUF_RESERVED_SPACE_ZS,
6418 double SRExitTime,
6419 double SRExitZ8Time,
6420 bool SynchronizedVBlank,
6421 double Z8StutterEnterPlusExitWatermark,
6422 double StutterEnterPlusExitWatermark,
6423 bool ProgressiveToInterlaceUnitInOPP,
6424 bool Interlace[],
6425 double MinTTUVBlank[],
6426 int DPPPerPlane[],
6427 unsigned int DETBufferSizeY[],
6428 int BytePerPixelY[],
6429 double BytePerPixelDETY[],
6430 double SwathWidthY[],
6431 int SwathHeightY[],
6432 int SwathHeightC[],
6433 double NetDCCRateLuma[],
6434 double NetDCCRateChroma[],
6435 double DCCFractionOfZeroSizeRequestsLuma[],
6436 double DCCFractionOfZeroSizeRequestsChroma[],
6437 int HTotal[],
6438 int VTotal[],
6439 double PixelClock[],
6440 double VRatio[],
6441 enum scan_direction_class SourceScan[],
6442 int BlockHeight256BytesY[],
6443 int BlockWidth256BytesY[],
6444 int BlockHeight256BytesC[],
6445 int BlockWidth256BytesC[],
6446 int DCCYMaxUncompressedBlock[],
6447 int DCCCMaxUncompressedBlock[],
6448 int VActive[],
6449 bool DCCEnable[],
6450 bool WritebackEnable[],
6451 double ReadBandwidthPlaneLuma[],
6452 double ReadBandwidthPlaneChroma[],
6453 double meta_row_bw[],
6454 double dpte_row_bw[],
6455 double *StutterEfficiencyNotIncludingVBlank,
6456 double *StutterEfficiency,
6457 int *NumberOfStutterBurstsPerFrame,
6458 double *Z8StutterEfficiencyNotIncludingVBlank,
6459 double *Z8StutterEfficiency,
6460 int *Z8NumberOfStutterBurstsPerFrame,
6461 double *StutterPeriod)
6462{
6463 struct vba_vars_st *v = &mode_lib->vba;
6464
6465 double DETBufferingTimeY;
6466 double SwathWidthYCriticalPlane = 0;
6467 double VActiveTimeCriticalPlane = 0;
6468 double FrameTimeCriticalPlane = 0;
6469 int BytePerPixelYCriticalPlane = 0;
6470 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6471 double MinTTUVBlankCriticalPlane = 0;
6472 double TotalCompressedReadBandwidth;
6473 double TotalRowReadBandwidth;
6474 double AverageDCCCompressionRate;
6475 double EffectiveCompressedBufferSize;
6476 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6477 double StutterBurstTime;
6478 int TotalActiveWriteback;
6479 double LinesInDETY;
6480 double LinesInDETYRoundedDownToSwath;
6481 double MaximumEffectiveCompressionLuma;
6482 double MaximumEffectiveCompressionChroma;
6483 double TotalZeroSizeRequestReadBandwidth;
6484 double TotalZeroSizeCompressedReadBandwidth;
6485 double AverageDCCZeroSizeFraction;
6486 double AverageZeroSizeCompressionRate;
6487 int TotalNumberOfActiveOTG = 0;
6488 double LastStutterPeriod = 0.0;
6489 double LastZ8StutterPeriod = 0.0;
6490 int k;
6491
6492 TotalZeroSizeRequestReadBandwidth = 0;
6493 TotalZeroSizeCompressedReadBandwidth = 0;
6494 TotalRowReadBandwidth = 0;
6495 TotalCompressedReadBandwidth = 0;
6496
6497 for (k = 0; k < NumberOfActivePlanes; ++k) {
6498 if (DCCEnable[k] == true) {
6499 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6500 || DCCYMaxUncompressedBlock[k] < 256) {
6501 MaximumEffectiveCompressionLuma = 2;
6502 } else {
6503 MaximumEffectiveCompressionLuma = 4;
6504 }
6505 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(a: NetDCCRateLuma[k], b: MaximumEffectiveCompressionLuma);
6506 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6507 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6508 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6509 if (ReadBandwidthPlaneChroma[k] > 0) {
6510 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6511 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6512 MaximumEffectiveCompressionChroma = 2;
6513 } else {
6514 MaximumEffectiveCompressionChroma = 4;
6515 }
6516 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6517 + ReadBandwidthPlaneChroma[k] / dml_min(a: NetDCCRateChroma[k], b: MaximumEffectiveCompressionChroma);
6518 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6519 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6520 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6521 }
6522 } else {
6523 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6524 }
6525 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6526 }
6527
6528 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6529 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6530
6531#ifdef __DML_VBA_DEBUG__
6532 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6533 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6534 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6535 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6536 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6537 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6538 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6539 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6540#endif
6541
6542 if (AverageDCCZeroSizeFraction == 1) {
6543 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6544 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6545 } else if (AverageDCCZeroSizeFraction > 0) {
6546 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6547 EffectiveCompressedBufferSize = dml_min(
6548 a: CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6549 b: MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6550 + dml_min(a: (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6551 b: (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6552 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6553 dml_print(
6554 "DML::%s: min 2 = %f\n",
6555 __func__,
6556 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6557 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6558 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6559 } else {
6560 EffectiveCompressedBufferSize = dml_min(
6561 a: CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6562 b: MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6563 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6564 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6565 }
6566
6567#ifdef __DML_VBA_DEBUG__
6568 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6569 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6570 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6571#endif
6572
6573 *StutterPeriod = 0;
6574 for (k = 0; k < NumberOfActivePlanes; ++k) {
6575 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6576 / BytePerPixelDETY[k] / SwathWidthY[k];
6577 LinesInDETYRoundedDownToSwath = dml_floor(a: LinesInDETY, granularity: SwathHeightY[k]);
6578 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6579#ifdef __DML_VBA_DEBUG__
6580 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6581 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6582 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6583 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6584 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6585 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6586 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6587 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6588 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6589 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6590 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6591 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6592#endif
6593
6594 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6595 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6596
6597 *StutterPeriod = DETBufferingTimeY;
6598 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(a: VTotal[k] / 2.0, granularity: 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6599 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(a: VActive[k] / 2.0, granularity: 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6600 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6601 SwathWidthYCriticalPlane = SwathWidthY[k];
6602 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6603 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6604
6605#ifdef __DML_VBA_DEBUG__
6606 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6607 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6608 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6609 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6610 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6611 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6612 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6613#endif
6614 }
6615 }
6616
6617 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(a: *StutterPeriod * TotalDataReadBandwidth, b: EffectiveCompressedBufferSize);
6618#ifdef __DML_VBA_DEBUG__
6619 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6620 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6621 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6622 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6623 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6624 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6625 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6626 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6627 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6628 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6629#endif
6630
6631 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6632 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6633 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6634#ifdef __DML_VBA_DEBUG__
6635 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6636 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6637 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6638 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6639 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6640#endif
6641 StutterBurstTime = dml_max(a: StutterBurstTime, b: LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6642
6643 dml_print(
6644 "DML::%s: Time to finish residue swath=%f\n",
6645 __func__,
6646 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6647
6648 TotalActiveWriteback = 0;
6649 for (k = 0; k < NumberOfActivePlanes; ++k) {
6650 if (WritebackEnable[k]) {
6651 TotalActiveWriteback = TotalActiveWriteback + 1;
6652 }
6653 }
6654
6655 if (TotalActiveWriteback == 0) {
6656#ifdef __DML_VBA_DEBUG__
6657 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6658 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6659 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6660 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6661#endif
6662 *StutterEfficiencyNotIncludingVBlank = dml_max(a: 0., b: 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6663 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(a: 0., b: 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6664 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(a: VActiveTimeCriticalPlane / *StutterPeriod, granularity: 1) : 0);
6665 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(a: VActiveTimeCriticalPlane / *StutterPeriod, granularity: 1) : 0);
6666 } else {
6667 *StutterEfficiencyNotIncludingVBlank = 0.;
6668 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6669 *NumberOfStutterBurstsPerFrame = 0;
6670 *Z8NumberOfStutterBurstsPerFrame = 0;
6671 }
6672#ifdef __DML_VBA_DEBUG__
6673 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6674 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6675 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6676 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6677 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6678#endif
6679
6680 for (k = 0; k < NumberOfActivePlanes; ++k) {
6681 if (v->BlendingAndTiming[k] == k) {
6682 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6683 }
6684 }
6685
6686 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6687 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6688
6689 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6690 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6691 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6692 } else {
6693 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6694 }
6695 } else {
6696 *StutterEfficiency = 0;
6697 }
6698
6699 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6700 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6701 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6702 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6703 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6704 } else {
6705 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6706 }
6707 } else {
6708 *Z8StutterEfficiency = 0.;
6709 }
6710
6711 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6712 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6713 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6714 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6715 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6716 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6717 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6718 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6719}
6720
6721static void CalculateSwathAndDETConfiguration(
6722 bool ForceSingleDPP,
6723 int NumberOfActivePlanes,
6724 unsigned int DETBufferSizeInKByte,
6725 double MaximumSwathWidthLuma[],
6726 double MaximumSwathWidthChroma[],
6727 enum scan_direction_class SourceScan[],
6728 enum source_format_class SourcePixelFormat[],
6729 enum dm_swizzle_mode SurfaceTiling[],
6730 int ViewportWidth[],
6731 int ViewportHeight[],
6732 int SurfaceWidthY[],
6733 int SurfaceWidthC[],
6734 int SurfaceHeightY[],
6735 int SurfaceHeightC[],
6736 int Read256BytesBlockHeightY[],
6737 int Read256BytesBlockHeightC[],
6738 int Read256BytesBlockWidthY[],
6739 int Read256BytesBlockWidthC[],
6740 enum odm_combine_mode ODMCombineEnabled[],
6741 int BlendingAndTiming[],
6742 int BytePerPixY[],
6743 int BytePerPixC[],
6744 double BytePerPixDETY[],
6745 double BytePerPixDETC[],
6746 int HActive[],
6747 double HRatio[],
6748 double HRatioChroma[],
6749 int DPPPerPlane[],
6750 int swath_width_luma_ub[],
6751 int swath_width_chroma_ub[],
6752 double SwathWidth[],
6753 double SwathWidthChroma[],
6754 int SwathHeightY[],
6755 int SwathHeightC[],
6756 unsigned int DETBufferSizeY[],
6757 unsigned int DETBufferSizeC[],
6758 bool ViewportSizeSupportPerPlane[],
6759 bool *ViewportSizeSupport)
6760{
6761 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6762 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6763 int MinimumSwathHeightY;
6764 int MinimumSwathHeightC;
6765 int RoundedUpMaxSwathSizeBytesY;
6766 int RoundedUpMaxSwathSizeBytesC;
6767 int RoundedUpMinSwathSizeBytesY;
6768 int RoundedUpMinSwathSizeBytesC;
6769 int RoundedUpSwathSizeBytesY;
6770 int RoundedUpSwathSizeBytesC;
6771 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6772 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6773 int k;
6774
6775 CalculateSwathWidth(
6776 ForceSingleDPP,
6777 NumberOfActivePlanes,
6778 SourcePixelFormat,
6779 SourceScan,
6780 ViewportWidth,
6781 ViewportHeight,
6782 SurfaceWidthY,
6783 SurfaceWidthC,
6784 SurfaceHeightY,
6785 SurfaceHeightC,
6786 ODMCombineEnabled,
6787 BytePerPixY,
6788 BytePerPixC,
6789 Read256BytesBlockHeightY,
6790 Read256BytesBlockHeightC,
6791 Read256BytesBlockWidthY,
6792 Read256BytesBlockWidthC,
6793 BlendingAndTiming,
6794 HActive,
6795 HRatio,
6796 DPPPerPlane,
6797 SwathWidthSingleDPPY: SwathWidthSingleDPP,
6798 SwathWidthSingleDPPC: SwathWidthSingleDPPChroma,
6799 SwathWidthY: SwathWidth,
6800 SwathWidthC: SwathWidthChroma,
6801 MaximumSwathHeightY,
6802 MaximumSwathHeightC,
6803 swath_width_luma_ub,
6804 swath_width_chroma_ub);
6805
6806 *ViewportSizeSupport = true;
6807 for (k = 0; k < NumberOfActivePlanes; ++k) {
6808 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6809 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6810 if (SurfaceTiling[k] == dm_sw_linear
6811 || (SourcePixelFormat[k] == dm_444_64
6812 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6813 && SourceScan[k] != dm_vert)) {
6814 MinimumSwathHeightY = MaximumSwathHeightY[k];
6815 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6816 MinimumSwathHeightY = MaximumSwathHeightY[k];
6817 } else {
6818 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6819 }
6820 MinimumSwathHeightC = MaximumSwathHeightC[k];
6821 } else {
6822 if (SurfaceTiling[k] == dm_sw_linear) {
6823 MinimumSwathHeightY = MaximumSwathHeightY[k];
6824 MinimumSwathHeightC = MaximumSwathHeightC[k];
6825 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6826 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6827 MinimumSwathHeightC = MaximumSwathHeightC[k];
6828 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6829 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6830 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6831 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6832 MinimumSwathHeightY = MaximumSwathHeightY[k];
6833 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6834 } else {
6835 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6836 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6837 }
6838 }
6839
6840 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6841 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6842 if (SourcePixelFormat[k] == dm_420_10) {
6843 RoundedUpMaxSwathSizeBytesY = dml_ceil(a: (double) RoundedUpMaxSwathSizeBytesY, granularity: 256);
6844 RoundedUpMinSwathSizeBytesY = dml_ceil(a: (double) RoundedUpMinSwathSizeBytesY, granularity: 256);
6845 }
6846 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6847 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6848 if (SourcePixelFormat[k] == dm_420_10) {
6849 RoundedUpMaxSwathSizeBytesC = dml_ceil(a: RoundedUpMaxSwathSizeBytesC, granularity: 256);
6850 RoundedUpMinSwathSizeBytesC = dml_ceil(a: RoundedUpMinSwathSizeBytesC, granularity: 256);
6851 }
6852
6853 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6854 SwathHeightY[k] = MaximumSwathHeightY[k];
6855 SwathHeightC[k] = MaximumSwathHeightC[k];
6856 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6857 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6858 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6859 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6860 SwathHeightY[k] = MinimumSwathHeightY;
6861 SwathHeightC[k] = MaximumSwathHeightC[k];
6862 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6863 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6864 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6865 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6866 SwathHeightY[k] = MaximumSwathHeightY[k];
6867 SwathHeightC[k] = MinimumSwathHeightC;
6868 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6869 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6870 } else {
6871 SwathHeightY[k] = MinimumSwathHeightY;
6872 SwathHeightC[k] = MinimumSwathHeightC;
6873 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6874 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6875 }
6876 {
6877 double actDETBufferSizeInKByte = dml_ceil(a: DETBufferSizeInKByte, granularity: 64);
6878
6879 if (SwathHeightC[k] == 0) {
6880 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6881 DETBufferSizeC[k] = 0;
6882 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6883 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6884 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6885 } else {
6886 DETBufferSizeY[k] = dml_floor(a: actDETBufferSizeInKByte * 1024 * 2 / 3, granularity: 1024);
6887 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6888 }
6889
6890 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6891 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6892 *ViewportSizeSupport = false;
6893 ViewportSizeSupportPerPlane[k] = false;
6894 } else {
6895 ViewportSizeSupportPerPlane[k] = true;
6896 }
6897 }
6898 }
6899}
6900
6901static void CalculateSwathWidth(
6902 bool ForceSingleDPP,
6903 int NumberOfActivePlanes,
6904 enum source_format_class SourcePixelFormat[],
6905 enum scan_direction_class SourceScan[],
6906 int ViewportWidth[],
6907 int ViewportHeight[],
6908 int SurfaceWidthY[],
6909 int SurfaceWidthC[],
6910 int SurfaceHeightY[],
6911 int SurfaceHeightC[],
6912 enum odm_combine_mode ODMCombineEnabled[],
6913 int BytePerPixY[],
6914 int BytePerPixC[],
6915 int Read256BytesBlockHeightY[],
6916 int Read256BytesBlockHeightC[],
6917 int Read256BytesBlockWidthY[],
6918 int Read256BytesBlockWidthC[],
6919 int BlendingAndTiming[],
6920 int HActive[],
6921 double HRatio[],
6922 int DPPPerPlane[],
6923 double SwathWidthSingleDPPY[],
6924 double SwathWidthSingleDPPC[],
6925 double SwathWidthY[],
6926 double SwathWidthC[],
6927 int MaximumSwathHeightY[],
6928 int MaximumSwathHeightC[],
6929 int swath_width_luma_ub[],
6930 int swath_width_chroma_ub[])
6931{
6932 enum odm_combine_mode MainPlaneODMCombine;
6933 int j, k;
6934
6935#ifdef __DML_VBA_DEBUG__
6936 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6937#endif
6938
6939 for (k = 0; k < NumberOfActivePlanes; ++k) {
6940 if (SourceScan[k] != dm_vert) {
6941 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6942 } else {
6943 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6944 }
6945
6946#ifdef __DML_VBA_DEBUG__
6947 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6948 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6949#endif
6950
6951 MainPlaneODMCombine = ODMCombineEnabled[k];
6952 for (j = 0; j < NumberOfActivePlanes; ++j) {
6953 if (BlendingAndTiming[k] == j) {
6954 MainPlaneODMCombine = ODMCombineEnabled[j];
6955 }
6956 }
6957
6958 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
6959 SwathWidthY[k] = dml_min(a: SwathWidthSingleDPPY[k], b: dml_round(a: HActive[k] / 4.0 * HRatio[k]));
6960 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
6961 SwathWidthY[k] = dml_min(a: SwathWidthSingleDPPY[k], b: dml_round(a: HActive[k] / 2.0 * HRatio[k]));
6962 else if (DPPPerPlane[k] == 2)
6963 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6964 else
6965 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6966
6967#ifdef __DML_VBA_DEBUG__
6968 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6969 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6970#endif
6971
6972 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6973 SwathWidthC[k] = SwathWidthY[k] / 2;
6974 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6975 } else {
6976 SwathWidthC[k] = SwathWidthY[k];
6977 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6978 }
6979
6980 if (ForceSingleDPP == true) {
6981 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6982 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6983 }
6984 {
6985 int surface_width_ub_l = dml_ceil(a: SurfaceWidthY[k], granularity: Read256BytesBlockWidthY[k]);
6986 int surface_height_ub_l = dml_ceil(a: SurfaceHeightY[k], granularity: Read256BytesBlockHeightY[k]);
6987
6988#ifdef __DML_VBA_DEBUG__
6989 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6990#endif
6991
6992 if (SourceScan[k] != dm_vert) {
6993 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6994 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6995 swath_width_luma_ub[k] = dml_min(a: surface_width_ub_l, b: (int) dml_ceil(a: SwathWidthY[k] - 1, granularity: Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6996 if (BytePerPixC[k] > 0) {
6997 int surface_width_ub_c = dml_ceil(a: SurfaceWidthC[k], granularity: Read256BytesBlockWidthC[k]);
6998
6999 swath_width_chroma_ub[k] = dml_min(
7000 a: surface_width_ub_c,
7001 b: (int) dml_ceil(a: SwathWidthC[k] - 1, granularity: Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7002 } else {
7003 swath_width_chroma_ub[k] = 0;
7004 }
7005 } else {
7006 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7007 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7008 swath_width_luma_ub[k] = dml_min(a: surface_height_ub_l, b: (int) dml_ceil(a: SwathWidthY[k] - 1, granularity: Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7009 if (BytePerPixC[k] > 0) {
7010 int surface_height_ub_c = dml_ceil(a: SurfaceHeightC[k], granularity: Read256BytesBlockHeightC[k]);
7011
7012 swath_width_chroma_ub[k] = dml_min(
7013 a: surface_height_ub_c,
7014 b: (int) dml_ceil(a: SwathWidthC[k] - 1, granularity: Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7015 } else {
7016 swath_width_chroma_ub[k] = 0;
7017 }
7018 }
7019 }
7020 }
7021}
7022
7023static double CalculateExtraLatency(
7024 int RoundTripPingLatencyCycles,
7025 int ReorderingBytes,
7026 double DCFCLK,
7027 int TotalNumberOfActiveDPP,
7028 int PixelChunkSizeInKByte,
7029 int TotalNumberOfDCCActiveDPP,
7030 int MetaChunkSize,
7031 double ReturnBW,
7032 bool GPUVMEnable,
7033 bool HostVMEnable,
7034 int NumberOfActivePlanes,
7035 int NumberOfDPP[],
7036 int dpte_group_bytes[],
7037 double HostVMInefficiencyFactor,
7038 double HostVMMinPageSize,
7039 int HostVMMaxNonCachedPageTableLevels)
7040{
7041 double ExtraLatencyBytes;
7042 double ExtraLatency;
7043
7044 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7045 ReorderingBytes,
7046 TotalNumberOfActiveDPP,
7047 PixelChunkSizeInKByte,
7048 TotalNumberOfDCCActiveDPP,
7049 MetaChunkSize,
7050 GPUVMEnable,
7051 HostVMEnable,
7052 NumberOfActivePlanes,
7053 NumberOfDPP,
7054 dpte_group_bytes,
7055 HostVMInefficiencyFactor,
7056 HostVMMinPageSize,
7057 HostVMMaxNonCachedPageTableLevels);
7058
7059 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7060
7061#ifdef __DML_VBA_DEBUG__
7062 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7063 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7064 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7065 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7066 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7067#endif
7068
7069 return ExtraLatency;
7070}
7071
7072static double CalculateExtraLatencyBytes(
7073 int ReorderingBytes,
7074 int TotalNumberOfActiveDPP,
7075 int PixelChunkSizeInKByte,
7076 int TotalNumberOfDCCActiveDPP,
7077 int MetaChunkSize,
7078 bool GPUVMEnable,
7079 bool HostVMEnable,
7080 int NumberOfActivePlanes,
7081 int NumberOfDPP[],
7082 int dpte_group_bytes[],
7083 double HostVMInefficiencyFactor,
7084 double HostVMMinPageSize,
7085 int HostVMMaxNonCachedPageTableLevels)
7086{
7087 double ret;
7088 int HostVMDynamicLevels = 0, k;
7089
7090 if (GPUVMEnable == true && HostVMEnable == true) {
7091 if (HostVMMinPageSize < 2048)
7092 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7093 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
7094 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 1);
7095 else
7096 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 2);
7097 } else {
7098 HostVMDynamicLevels = 0;
7099 }
7100
7101 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7102
7103 if (GPUVMEnable == true) {
7104 for (k = 0; k < NumberOfActivePlanes; ++k)
7105 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7106 }
7107 return ret;
7108}
7109
7110static double CalculateUrgentLatency(
7111 double UrgentLatencyPixelDataOnly,
7112 double UrgentLatencyPixelMixedWithVMData,
7113 double UrgentLatencyVMDataOnly,
7114 bool DoUrgentLatencyAdjustment,
7115 double UrgentLatencyAdjustmentFabricClockComponent,
7116 double UrgentLatencyAdjustmentFabricClockReference,
7117 double FabricClock)
7118{
7119 double ret;
7120
7121 ret = dml_max3(a: UrgentLatencyPixelDataOnly, b: UrgentLatencyPixelMixedWithVMData, c: UrgentLatencyVMDataOnly);
7122 if (DoUrgentLatencyAdjustment == true)
7123 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7124 return ret;
7125}
7126
7127static noinline_for_stack void UseMinimumDCFCLK(
7128 struct display_mode_lib *mode_lib,
7129 int MaxPrefetchMode,
7130 int ReorderingBytes)
7131{
7132 struct vba_vars_st *v = &mode_lib->vba;
7133 int dummy1, i, j, k;
7134 double NormalEfficiency, dummy2, dummy3;
7135 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7136
7137 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7138 for (i = 0; i < v->soc.num_states; ++i) {
7139 for (j = 0; j <= 1; ++j) {
7140 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7141 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7142 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7143 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7144 double MinimumTWait;
7145 double NonDPTEBandwidth;
7146 double DPTEBandwidth;
7147 double DCFCLKRequiredForAverageBandwidth;
7148 double ExtraLatencyBytes;
7149 double ExtraLatencyCycles;
7150 double DCFCLKRequiredForPeakBandwidth;
7151 int NoOfDPPState[DC__NUM_DPP__MAX];
7152 double MinimumTvmPlus2Tr0;
7153
7154 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7155 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7156 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7157 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7158 }
7159
7160 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7161 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7162
7163 MinimumTWait = CalculateTWait(PrefetchMode: MaxPrefetchMode, DRAMClockChangeLatency: v->FinalDRAMClockChangeLatency, UrgentLatency: v->UrgLatency[i], SREnterPlusExitTime: v->SREnterPlusExitTime);
7164 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7165 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7166 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7167 DCFCLKRequiredForAverageBandwidth = dml_max3(
7168 a: v->ProjectedDCFCLKDeepSleep[i][j],
7169 b: (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7170 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7171 c: (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7172
7173 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7174 ReorderingBytes,
7175 TotalNumberOfActiveDPP: v->TotalNumberOfActiveDPP[i][j],
7176 PixelChunkSizeInKByte: v->PixelChunkSizeInKByte,
7177 TotalNumberOfDCCActiveDPP: v->TotalNumberOfDCCActiveDPP[i][j],
7178 MetaChunkSize: v->MetaChunkSize,
7179 GPUVMEnable: v->GPUVMEnable,
7180 HostVMEnable: v->HostVMEnable,
7181 NumberOfActivePlanes: v->NumberOfActivePlanes,
7182 NumberOfDPP: NoOfDPPState,
7183 dpte_group_bytes: v->dpte_group_bytes,
7184 HostVMInefficiencyFactor: 1,
7185 HostVMMinPageSize: v->HostVMMinPageSize,
7186 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels);
7187 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7188 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7189 double DCFCLKCyclesRequiredInPrefetch;
7190 double ExpectedPrefetchBWAcceleration;
7191 double PrefetchTime;
7192
7193 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7194 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7195 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7196 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7197 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7198 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7199 PrefetchPixelLinesTime[k] = dml_max(a: v->PrefetchLinesY[i][j][k], b: v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7200 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7201 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7202 DynamicMetadataVMExtraLatency[k] =
7203 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7204 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7205 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7206 - v->UrgLatency[i]
7207 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7208 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7209 - DynamicMetadataVMExtraLatency[k];
7210
7211 if (PrefetchTime > 0) {
7212 double ExpectedVRatioPrefetch;
7213
7214 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7215 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7216 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7217 * dml_max(a: 1.0, b: ExpectedVRatioPrefetch) * dml_max(a: 1.0, b: ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7218 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7219 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7220 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7221 }
7222 } else {
7223 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7224 }
7225 if (v->DynamicMetadataEnable[k] == true) {
7226 double TSetupPipe;
7227 double TdmbfPipe;
7228 double TdmsksPipe;
7229 double TdmecPipe;
7230 double AllowedTimeForUrgentExtraLatency;
7231
7232 CalculateVupdateAndDynamicMetadataParameters(
7233 MaxInterDCNTileRepeaters: v->MaxInterDCNTileRepeaters,
7234 DPPCLK: v->RequiredDPPCLK[i][j][k],
7235 DISPCLK: v->RequiredDISPCLK[i][j],
7236 DCFClkDeepSleep: v->ProjectedDCFCLKDeepSleep[i][j],
7237 PixelClock: v->PixelClock[k],
7238 HTotal: v->HTotal[k],
7239 VBlank: v->VTotal[k] - v->VActive[k],
7240 DynamicMetadataTransmittedBytes: v->DynamicMetadataTransmittedBytes[k],
7241 DynamicMetadataLinesBeforeActiveRequired: v->DynamicMetadataLinesBeforeActiveRequired[k],
7242 InterlaceEnable: v->Interlace[k],
7243 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
7244 TSetup: &TSetupPipe,
7245 Tdmbf: &TdmbfPipe,
7246 Tdmec: &TdmecPipe,
7247 Tdmsks: &TdmsksPipe,
7248 VUpdateOffsetPix: &dummy1,
7249 VUpdateWidthPix: &dummy2,
7250 VReadyOffsetPix: &dummy3);
7251 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7252 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7253 if (AllowedTimeForUrgentExtraLatency > 0) {
7254 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7255 a: DCFCLKRequiredForPeakBandwidthPerPlane[k],
7256 b: ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7257 } else {
7258 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7259 }
7260 }
7261 }
7262 DCFCLKRequiredForPeakBandwidth = 0;
7263 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7264 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7265
7266 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7267 * (v->GPUVMEnable == true ?
7268 (v->HostVMEnable == true ?
7269 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7270 0);
7271 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7272 double MaximumTvmPlus2Tr0PlusTsw;
7273
7274 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7275 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7276 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7277 } else {
7278 DCFCLKRequiredForPeakBandwidth = dml_max3(
7279 a: DCFCLKRequiredForPeakBandwidth,
7280 b: 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7281 c: (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7282 }
7283 }
7284 v->DCFCLKState[i][j] = dml_min(a: v->DCFCLKPerState[i], b: 1.05 * dml_max(a: DCFCLKRequiredForAverageBandwidth, b: DCFCLKRequiredForPeakBandwidth));
7285 }
7286 }
7287}
7288
7289static void CalculateUnboundedRequestAndCompressedBufferSize(
7290 unsigned int DETBufferSizeInKByte,
7291 int ConfigReturnBufferSizeInKByte,
7292 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7293 int TotalActiveDPP,
7294 bool NoChromaPlanes,
7295 int MaxNumDPP,
7296 int CompressedBufferSegmentSizeInkByteFinal,
7297 enum output_encoder_class *Output,
7298 bool *UnboundedRequestEnabled,
7299 int *CompressedBufferSizeInkByte)
7300{
7301 double actDETBufferSizeInKByte = dml_ceil(a: DETBufferSizeInKByte, granularity: 64);
7302
7303 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalNumberOfActiveDPP: TotalActiveDPP, NoChroma: NoChromaPlanes, Output: Output[0]);
7304 *CompressedBufferSizeInkByte = (
7305 *UnboundedRequestEnabled == true ?
7306 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7307 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7308 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7309
7310#ifdef __DML_VBA_DEBUG__
7311 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7312 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7313 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7314 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7315 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7316 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7317 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7318#endif
7319}
7320
7321static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7322{
7323 bool ret_val = false;
7324
7325 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7326 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
7327 ret_val = false;
7328 return ret_val;
7329}
7330
7331static unsigned int CalculateMaxVStartup(
7332 unsigned int VTotal,
7333 unsigned int VActive,
7334 unsigned int VBlankNom,
7335 unsigned int HTotal,
7336 double PixelClock,
7337 bool ProgressiveTointerlaceUnitinOPP,
7338 bool Interlace,
7339 unsigned int VBlankNomDefaultUS,
7340 double WritebackDelayTime)
7341{
7342 unsigned int MaxVStartup = 0;
7343 unsigned int vblank_size = 0;
7344 double line_time_us = HTotal / PixelClock;
7345 unsigned int vblank_actual = VTotal - VActive;
7346 unsigned int vblank_nom_default_in_line = dml_floor(a: VBlankNomDefaultUS / line_time_us, granularity: 1.0);
7347 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
7348 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
7349
7350 vblank_size = (unsigned int) dml_min(a: vblank_actual, b: vblank_avail);
7351 if (Interlace && !ProgressiveTointerlaceUnitinOPP)
7352 MaxVStartup = dml_floor(a: vblank_size / 2.0, granularity: 1.0);
7353 else
7354 MaxVStartup = vblank_size - dml_max(a: 1.0, b: dml_ceil(a: WritebackDelayTime / line_time_us, granularity: 1.0));
7355 if (MaxVStartup > 1023)
7356 MaxVStartup = 1023;
7357 return MaxVStartup;
7358}
7359

source code of linux/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c