1/*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25#include "display_mode_vba_util_32.h"
26#include "../dml_inline_defs.h"
27#include "display_mode_vba_32.h"
28#include "../display_mode_lib.h"
29
30#define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31
32unsigned int dml32_dscceComputeDelay(
33 unsigned int bpc,
34 double BPP,
35 unsigned int sliceWidth,
36 unsigned int numSlices,
37 enum output_format_class pixelFormat,
38 enum output_encoder_class Output)
39{
40 // valid bpc = source bits per component in the set of {8, 10, 12}
41 // valid bpp = increments of 1/16 of a bit
42 // min = 6/7/8 in N420/N422/444, respectively
43 // max = such that compression is 1:1
44 //valid sliceWidth = number of pixels per slice line,
45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48
49 // fixed value
50 unsigned int rcModelSize = 8192;
51
52 // N422/N420 operate at 2 pixels per clock
53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 Delay, pixels;
55
56 if (pixelFormat == dm_420)
57 pixelsPerClock = 2;
58 else if (pixelFormat == dm_n422)
59 pixelsPerClock = 2;
60 // #all other modes operate at 1 pixel per clock
61 else
62 pixelsPerClock = 1;
63
64 //initial transmit delay as per PPS
65 initalXmitDelay = dml_round(a: rcModelSize / 2.0 / BPP / pixelsPerClock);
66
67 //compute ssm delay
68 if (bpc == 8)
69 D = 81;
70 else if (bpc == 10)
71 D = 89;
72 else
73 D = 113;
74
75 //divide by pixel per cycle to compute slice width as seen by DSC
76 w = sliceWidth / pixelsPerClock;
77
78 //422 mode has an additional cycle of delay
79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 s = 0;
81 else
82 s = 1;
83
84 //main calculation for the dscce
85 ix = initalXmitDelay + 45;
86 wx = (w + 2) / 3;
87 p = 3 * wx - w;
88 l0 = ix / w;
89 a = ix + p * l0;
90 ax = (a + 2) / 3 + D + 6 + 1;
91 L = (ax + wx - 1) / wx;
92 if ((ix % w) == 0 && p != 0)
93 lstall = 1;
94 else
95 lstall = 0;
96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97
98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 pixels = Delay * 3 * pixelsPerClock;
100
101#ifdef __DML_VBA_DEBUG__
102 dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 dml_print("DML::%s: Output: %d\n", __func__, Output);
108 dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109#endif
110
111 return pixels;
112}
113
114unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115{
116 unsigned int Delay = 0;
117
118 if (pixelFormat == dm_420) {
119 // sfr
120 Delay = Delay + 2;
121 // dsccif
122 Delay = Delay + 0;
123 // dscc - input deserializer
124 Delay = Delay + 3;
125 // dscc gets pixels every other cycle
126 Delay = Delay + 2;
127 // dscc - input cdc fifo
128 Delay = Delay + 12;
129 // dscc gets pixels every other cycle
130 Delay = Delay + 13;
131 // dscc - cdc uncertainty
132 Delay = Delay + 2;
133 // dscc - output cdc fifo
134 Delay = Delay + 7;
135 // dscc gets pixels every other cycle
136 Delay = Delay + 3;
137 // dscc - cdc uncertainty
138 Delay = Delay + 2;
139 // dscc - output serializer
140 Delay = Delay + 1;
141 // sft
142 Delay = Delay + 1;
143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 // sfr
145 Delay = Delay + 2;
146 // dsccif
147 Delay = Delay + 1;
148 // dscc - input deserializer
149 Delay = Delay + 5;
150 // dscc - input cdc fifo
151 Delay = Delay + 25;
152 // dscc - cdc uncertainty
153 Delay = Delay + 2;
154 // dscc - output cdc fifo
155 Delay = Delay + 10;
156 // dscc - cdc uncertainty
157 Delay = Delay + 2;
158 // dscc - output serializer
159 Delay = Delay + 1;
160 // sft
161 Delay = Delay + 1;
162 } else {
163 // sfr
164 Delay = Delay + 2;
165 // dsccif
166 Delay = Delay + 0;
167 // dscc - input deserializer
168 Delay = Delay + 3;
169 // dscc - input cdc fifo
170 Delay = Delay + 12;
171 // dscc - cdc uncertainty
172 Delay = Delay + 2;
173 // dscc - output cdc fifo
174 Delay = Delay + 7;
175 // dscc - output serializer
176 Delay = Delay + 1;
177 // dscc - cdc uncertainty
178 Delay = Delay + 2;
179 // sft
180 Delay = Delay + 1;
181 }
182
183 return Delay;
184}
185
186
187bool IsVertical(enum dm_rotation_angle Scan)
188{
189 bool is_vert = false;
190
191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 is_vert = true;
193 else
194 is_vert = false;
195 return is_vert;
196}
197
198void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 double HRatio,
200 double HRatioChroma,
201 double VRatio,
202 double VRatioChroma,
203 double MaxDCHUBToPSCLThroughput,
204 double MaxPSCLToLBThroughput,
205 double PixelClock,
206 enum source_format_class SourcePixelFormat,
207 unsigned int HTaps,
208 unsigned int HTapsChroma,
209 unsigned int VTaps,
210 unsigned int VTapsChroma,
211
212 /* output */
213 double *PSCL_THROUGHPUT,
214 double *PSCL_THROUGHPUT_CHROMA,
215 double *DPPCLKUsingSingleDPP)
216{
217 double DPPCLKUsingSingleDPPLuma;
218 double DPPCLKUsingSingleDPPChroma;
219
220 if (HRatio > 1) {
221 *PSCL_THROUGHPUT = dml_min(a: MaxDCHUBToPSCLThroughput, b: MaxPSCLToLBThroughput * HRatio /
222 dml_ceil(a: (double) HTaps / 6.0, granularity: 1.0));
223 } else {
224 *PSCL_THROUGHPUT = dml_min(a: MaxDCHUBToPSCLThroughput, b: MaxPSCLToLBThroughput);
225 }
226
227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(a: VTaps / 6 * dml_min(a: 1, b: HRatio), b: HRatio * VRatio /
228 *PSCL_THROUGHPUT, c: 1);
229
230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232
233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 SourcePixelFormat != dm_rgbe_alpha)) {
235 *PSCL_THROUGHPUT_CHROMA = 0;
236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 } else {
238 if (HRatioChroma > 1) {
239 *PSCL_THROUGHPUT_CHROMA = dml_min(a: MaxDCHUBToPSCLThroughput, b: MaxPSCLToLBThroughput *
240 HRatioChroma / dml_ceil(a: (double) HTapsChroma / 6.0, granularity: 1.0));
241 } else {
242 *PSCL_THROUGHPUT_CHROMA = dml_min(a: MaxDCHUBToPSCLThroughput, b: MaxPSCLToLBThroughput);
243 }
244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(a: VTapsChroma / 6 * dml_min(a: 1, b: HRatioChroma),
245 b: HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, c: 1);
246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 *DPPCLKUsingSingleDPP = dml_max(a: DPPCLKUsingSingleDPPLuma, b: DPPCLKUsingSingleDPPChroma);
249 }
250}
251
252void dml32_CalculateBytePerPixelAndBlockSizes(
253 enum source_format_class SourcePixelFormat,
254 enum dm_swizzle_mode SurfaceTiling,
255
256 /* Output */
257 unsigned int *BytePerPixelY,
258 unsigned int *BytePerPixelC,
259 double *BytePerPixelDETY,
260 double *BytePerPixelDETC,
261 unsigned int *BlockHeight256BytesY,
262 unsigned int *BlockHeight256BytesC,
263 unsigned int *BlockWidth256BytesY,
264 unsigned int *BlockWidth256BytesC,
265 unsigned int *MacroTileHeightY,
266 unsigned int *MacroTileHeightC,
267 unsigned int *MacroTileWidthY,
268 unsigned int *MacroTileWidthC)
269{
270 if (SourcePixelFormat == dm_444_64) {
271 *BytePerPixelDETY = 8;
272 *BytePerPixelDETC = 0;
273 *BytePerPixelY = 8;
274 *BytePerPixelC = 0;
275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 *BytePerPixelDETY = 4;
277 *BytePerPixelDETC = 0;
278 *BytePerPixelY = 4;
279 *BytePerPixelC = 0;
280 } else if (SourcePixelFormat == dm_444_16) {
281 *BytePerPixelDETY = 2;
282 *BytePerPixelDETC = 0;
283 *BytePerPixelY = 2;
284 *BytePerPixelC = 0;
285 } else if (SourcePixelFormat == dm_444_8) {
286 *BytePerPixelDETY = 1;
287 *BytePerPixelDETC = 0;
288 *BytePerPixelY = 1;
289 *BytePerPixelC = 0;
290 } else if (SourcePixelFormat == dm_rgbe_alpha) {
291 *BytePerPixelDETY = 4;
292 *BytePerPixelDETC = 1;
293 *BytePerPixelY = 4;
294 *BytePerPixelC = 1;
295 } else if (SourcePixelFormat == dm_420_8) {
296 *BytePerPixelDETY = 1;
297 *BytePerPixelDETC = 2;
298 *BytePerPixelY = 1;
299 *BytePerPixelC = 2;
300 } else if (SourcePixelFormat == dm_420_12) {
301 *BytePerPixelDETY = 2;
302 *BytePerPixelDETC = 4;
303 *BytePerPixelY = 2;
304 *BytePerPixelC = 4;
305 } else {
306 *BytePerPixelDETY = 4.0 / 3;
307 *BytePerPixelDETC = 8.0 / 3;
308 *BytePerPixelY = 2;
309 *BytePerPixelC = 4;
310 }
311#ifdef __DML_VBA_DEBUG__
312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
317#endif
318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 || SourcePixelFormat == dm_444_16
320 || SourcePixelFormat == dm_444_8
321 || SourcePixelFormat == dm_mono_16
322 || SourcePixelFormat == dm_mono_8
323 || SourcePixelFormat == dm_rgbe)) {
324 if (SurfaceTiling == dm_sw_linear)
325 *BlockHeight256BytesY = 1;
326 else if (SourcePixelFormat == dm_444_64)
327 *BlockHeight256BytesY = 4;
328 else if (SourcePixelFormat == dm_444_8)
329 *BlockHeight256BytesY = 16;
330 else
331 *BlockHeight256BytesY = 8;
332
333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 *BlockHeight256BytesC = 0;
335 *BlockWidth256BytesC = 0;
336 } else {
337 if (SurfaceTiling == dm_sw_linear) {
338 *BlockHeight256BytesY = 1;
339 *BlockHeight256BytesC = 1;
340 } else if (SourcePixelFormat == dm_rgbe_alpha) {
341 *BlockHeight256BytesY = 8;
342 *BlockHeight256BytesC = 16;
343 } else if (SourcePixelFormat == dm_420_8) {
344 *BlockHeight256BytesY = 16;
345 *BlockHeight256BytesC = 8;
346 } else {
347 *BlockHeight256BytesY = 8;
348 *BlockHeight256BytesC = 8;
349 }
350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 }
353#ifdef __DML_VBA_DEBUG__
354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358#endif
359
360 if (SurfaceTiling == dm_sw_linear) {
361 *MacroTileHeightY = *BlockHeight256BytesY;
362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 *MacroTileHeightC = *BlockHeight256BytesC;
364 if (*MacroTileHeightC == 0)
365 *MacroTileWidthC = 0;
366 else
367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 if (*MacroTileHeightC == 0)
374 *MacroTileWidthC = 0;
375 else
376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 } else {
378 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 if (*MacroTileHeightC == 0)
382 *MacroTileWidthC = 0;
383 else
384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 }
386
387#ifdef __DML_VBA_DEBUG__
388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392#endif
393} // CalculateBytePerPixelAndBlockSizes
394
395void dml32_CalculateSwathAndDETConfiguration(
396 unsigned int DETSizeOverride[],
397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 unsigned int ConfigReturnBufferSizeInKByte,
399 unsigned int MaxTotalDETInKByte,
400 unsigned int MinCompressedBufferSizeInKByte,
401 double ForceSingleDPP,
402 unsigned int NumberOfActiveSurfaces,
403 unsigned int nomDETInKByte,
404 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 unsigned int PixelChunkSizeKBytes,
407 unsigned int ROBSizeKBytes,
408 unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 enum output_encoder_class Output[],
410 double ReadBandwidthLuma[],
411 double ReadBandwidthChroma[],
412 double MaximumSwathWidthLuma[],
413 double MaximumSwathWidthChroma[],
414 enum dm_rotation_angle SourceRotation[],
415 bool ViewportStationary[],
416 enum source_format_class SourcePixelFormat[],
417 enum dm_swizzle_mode SurfaceTiling[],
418 unsigned int ViewportWidth[],
419 unsigned int ViewportHeight[],
420 unsigned int ViewportXStart[],
421 unsigned int ViewportYStart[],
422 unsigned int ViewportXStartC[],
423 unsigned int ViewportYStartC[],
424 unsigned int SurfaceWidthY[],
425 unsigned int SurfaceWidthC[],
426 unsigned int SurfaceHeightY[],
427 unsigned int SurfaceHeightC[],
428 unsigned int Read256BytesBlockHeightY[],
429 unsigned int Read256BytesBlockHeightC[],
430 unsigned int Read256BytesBlockWidthY[],
431 unsigned int Read256BytesBlockWidthC[],
432 enum odm_combine_mode ODMMode[],
433 unsigned int BlendingAndTiming[],
434 unsigned int BytePerPixY[],
435 unsigned int BytePerPixC[],
436 double BytePerPixDETY[],
437 double BytePerPixDETC[],
438 unsigned int HActive[],
439 double HRatio[],
440 double HRatioChroma[],
441 unsigned int DPPPerSurface[],
442
443 /* Output */
444 unsigned int swath_width_luma_ub[],
445 unsigned int swath_width_chroma_ub[],
446 double SwathWidth[],
447 double SwathWidthChroma[],
448 unsigned int SwathHeightY[],
449 unsigned int SwathHeightC[],
450 unsigned int DETBufferSizeInKByte[],
451 unsigned int DETBufferSizeY[],
452 unsigned int DETBufferSizeC[],
453 bool *UnboundedRequestEnabled,
454 unsigned int *CompressedBufferSizeInkByte,
455 unsigned int *CompBufReservedSpaceKBytes,
456 bool *CompBufReservedSpaceNeedAdjustment,
457 bool ViewportSizeSupportPerSurface[],
458 bool *ViewportSizeSupport)
459{
460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 unsigned int RoundedUpSwathSizeBytesY;
465 unsigned int RoundedUpSwathSizeBytesC;
466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 unsigned int k;
469 unsigned int TotalActiveDPP = 0;
470 bool NoChromaSurfaces = true;
471 unsigned int DETBufferSizeInKByteForSwathCalculation;
472
473#ifdef __DML_VBA_DEBUG__
474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477#endif
478 dml32_CalculateSwathWidth(ForceSingleDPP,
479 NumberOfActiveSurfaces,
480 SourcePixelFormat,
481 SourceScan: SourceRotation,
482 ViewportStationary,
483 ViewportWidth,
484 ViewportHeight,
485 ViewportXStart,
486 ViewportYStart,
487 ViewportXStartC,
488 ViewportYStartC,
489 SurfaceWidthY,
490 SurfaceWidthC,
491 SurfaceHeightY,
492 SurfaceHeightC,
493 ODMMode,
494 BytePerPixY,
495 BytePerPixC,
496 Read256BytesBlockHeightY,
497 Read256BytesBlockHeightC,
498 Read256BytesBlockWidthY,
499 Read256BytesBlockWidthC,
500 BlendingAndTiming,
501 HActive,
502 HRatio,
503 DPPPerSurface,
504
505 /* Output */
506 SwathWidthdoubleDPPY: SwathWidthdoubleDPP,
507 SwathWidthdoubleDPPC: SwathWidthdoubleDPPChroma,
508 SwathWidthY: SwathWidth,
509 SwathWidthC: SwathWidthChroma,
510 MaximumSwathHeightY,
511 MaximumSwathHeightC,
512 swath_width_luma_ub,
513 swath_width_chroma_ub);
514
515 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518#ifdef __DML_VBA_DEBUG__
519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 RoundedUpMaxSwathSizeBytesY[k]);
525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 RoundedUpMaxSwathSizeBytesC[k]);
530#endif
531
532 if (SourcePixelFormat[k] == dm_420_10) {
533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil(a: (unsigned int) RoundedUpMaxSwathSizeBytesY[k], granularity: 256);
534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil(a: (unsigned int) RoundedUpMaxSwathSizeBytesC[k], granularity: 256);
535 }
536 }
537
538 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 NoChromaSurfaces = false;
543 }
544 }
545
546 // By default, just set the reserved space to 2 pixel chunks size
547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548
549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553
554 if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 }
557
558 #ifdef __DML_VBA_DEBUG__
559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
561 #endif
562
563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalNumberOfActiveDPP: TotalActiveDPP, NoChroma: NoChromaSurfaces, Output: Output[0], SurfaceTiling: SurfaceTiling[0], CompBufReservedSpaceNeedAdjustment: *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564
565 dml32_CalculateDETBufferSize(DETSizeOverride,
566 UseMALLForPStateChange,
567 ForceSingleDPP,
568 NumberOfActiveSurfaces,
569 UnboundedRequestEnabled: *UnboundedRequestEnabled,
570 nomDETInKByte,
571 MaxTotalDETInKByte,
572 ConfigReturnBufferSizeInKByte,
573 MinCompressedBufferSizeInKByte,
574 CompressedBufferSegmentSizeInkByteFinal,
575 SourcePixelFormat,
576 ReadBandwidthLuma,
577 ReadBandwidthChroma,
578 RoundedUpMaxSwathSizeBytesY,
579 RoundedUpMaxSwathSizeBytesC,
580 DPPPerSurface,
581
582 /* Output */
583 DETBufferSizeInKByte, // per hubp pipe
584 CompressedBufferSizeInkByte);
585
586#ifdef __DML_VBA_DEBUG__
587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593#endif
594
595 *ViewportSizeSupport = true;
596 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597
598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600#ifdef __DML_VBA_DEBUG__
601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 DETBufferSizeInKByteForSwathCalculation);
603#endif
604
605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 SwathHeightY[k] = MaximumSwathHeightY[k];
608 SwathHeightC[k] = MaximumSwathHeightC[k];
609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 SwathHeightC[k] = MaximumSwathHeightC[k];
616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 SwathHeightY[k] = MaximumSwathHeightY[k];
622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 } else {
626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 }
631
632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 *ViewportSizeSupport = false;
637 ViewportSizeSupportPerSurface[k] = false;
638 } else {
639 ViewportSizeSupportPerSurface[k] = true;
640 }
641
642 if (SwathHeightC[k] == 0) {
643#ifdef __DML_VBA_DEBUG__
644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645#endif
646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 DETBufferSizeC[k] = 0;
648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649#ifdef __DML_VBA_DEBUG__
650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651#endif
652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 } else {
655#ifdef __DML_VBA_DEBUG__
656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657#endif
658 DETBufferSizeY[k] = dml_floor(a: DETBufferSizeInKByte[k] * 1024 * 2 / 3, granularity: 1024);
659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 }
661
662#ifdef __DML_VBA_DEBUG__
663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 k, RoundedUpMaxSwathSizeBytesY[k]);
667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 k, RoundedUpMaxSwathSizeBytesC[k]);
669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 ViewportSizeSupportPerSurface[k]);
676#endif
677
678 }
679} // CalculateSwathAndDETConfiguration
680
681void dml32_CalculateSwathWidth(
682 bool ForceSingleDPP,
683 unsigned int NumberOfActiveSurfaces,
684 enum source_format_class SourcePixelFormat[],
685 enum dm_rotation_angle SourceRotation[],
686 bool ViewportStationary[],
687 unsigned int ViewportWidth[],
688 unsigned int ViewportHeight[],
689 unsigned int ViewportXStart[],
690 unsigned int ViewportYStart[],
691 unsigned int ViewportXStartC[],
692 unsigned int ViewportYStartC[],
693 unsigned int SurfaceWidthY[],
694 unsigned int SurfaceWidthC[],
695 unsigned int SurfaceHeightY[],
696 unsigned int SurfaceHeightC[],
697 enum odm_combine_mode ODMMode[],
698 unsigned int BytePerPixY[],
699 unsigned int BytePerPixC[],
700 unsigned int Read256BytesBlockHeightY[],
701 unsigned int Read256BytesBlockHeightC[],
702 unsigned int Read256BytesBlockWidthY[],
703 unsigned int Read256BytesBlockWidthC[],
704 unsigned int BlendingAndTiming[],
705 unsigned int HActive[],
706 double HRatio[],
707 unsigned int DPPPerSurface[],
708
709 /* Output */
710 double SwathWidthdoubleDPPY[],
711 double SwathWidthdoubleDPPC[],
712 double SwathWidthY[], // per-pipe
713 double SwathWidthC[], // per-pipe
714 unsigned int MaximumSwathHeightY[],
715 unsigned int MaximumSwathHeightC[],
716 unsigned int swath_width_luma_ub[], // per-pipe
717 unsigned int swath_width_chroma_ub[]) // per-pipe
718{
719 unsigned int k, j;
720 enum odm_combine_mode MainSurfaceODMMode;
721
722 unsigned int surface_width_ub_l;
723 unsigned int surface_height_ub_l;
724 unsigned int surface_width_ub_c = 0;
725 unsigned int surface_height_ub_c = 0;
726
727#ifdef __DML_VBA_DEBUG__
728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730#endif
731
732 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 if (!IsVertical(Scan: SourceRotation[k]))
734 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 else
736 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737
738#ifdef __DML_VBA_DEBUG__
739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741#endif
742
743 MainSurfaceODMMode = ODMMode[k];
744 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 if (BlendingAndTiming[k] == j)
746 MainSurfaceODMMode = ODMMode[j];
747 }
748
749 if (ForceSingleDPP) {
750 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 } else {
752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 SwathWidthY[k] = dml_min(a: SwathWidthdoubleDPPY[k],
754 b: dml_round(a: HActive[k] / 4.0 * HRatio[k]));
755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 SwathWidthY[k] = dml_min(a: SwathWidthdoubleDPPY[k],
757 b: dml_round(a: HActive[k] / 2.0 * HRatio[k]));
758 } else if (DPPPerSurface[k] == 2) {
759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 } else {
761 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 }
763 }
764
765#ifdef __DML_VBA_DEBUG__
766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771#endif
772
773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 SourcePixelFormat[k] == dm_420_12) {
775 SwathWidthC[k] = SwathWidthY[k] / 2;
776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 } else {
778 SwathWidthC[k] = SwathWidthY[k];
779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 }
781
782 if (ForceSingleDPP == true) {
783 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 }
786
787 surface_width_ub_l = dml_ceil(a: SurfaceWidthY[k], granularity: Read256BytesBlockWidthY[k]);
788 surface_height_ub_l = dml_ceil(a: SurfaceHeightY[k], granularity: Read256BytesBlockHeightY[k]);
789
790 if (!IsVertical(Scan: SourceRotation[k])) {
791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 swath_width_luma_ub[k] = dml_min(a: surface_width_ub_l,
795 b: dml_floor(a: ViewportXStart[k] +
796 SwathWidthY[k] +
797 Read256BytesBlockWidthY[k] - 1,
798 granularity: Read256BytesBlockWidthY[k]) -
799 dml_floor(a: ViewportXStart[k],
800 granularity: Read256BytesBlockWidthY[k]));
801 } else {
802 swath_width_luma_ub[k] = dml_min(a: surface_width_ub_l,
803 b: dml_ceil(a: SwathWidthY[k] - 1,
804 granularity: Read256BytesBlockWidthY[k]) +
805 Read256BytesBlockWidthY[k]);
806 }
807 if (BytePerPixC[k] > 0) {
808 surface_width_ub_c = dml_ceil(a: SurfaceWidthC[k], granularity: Read256BytesBlockWidthC[k]);
809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 swath_width_chroma_ub[k] = dml_min(a: surface_width_ub_c,
811 b: dml_floor(a: ViewportXStartC[k] + SwathWidthC[k] +
812 Read256BytesBlockWidthC[k] - 1,
813 granularity: Read256BytesBlockWidthC[k]) -
814 dml_floor(a: ViewportXStartC[k],
815 granularity: Read256BytesBlockWidthC[k]));
816 } else {
817 swath_width_chroma_ub[k] = dml_min(a: surface_width_ub_c,
818 b: dml_ceil(a: SwathWidthC[k] - 1,
819 granularity: Read256BytesBlockWidthC[k]) +
820 Read256BytesBlockWidthC[k]);
821 }
822 } else {
823 swath_width_chroma_ub[k] = 0;
824 }
825 } else {
826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828
829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 swath_width_luma_ub[k] = dml_min(a: surface_height_ub_l, b: dml_floor(a: ViewportYStart[k] +
831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 granularity: Read256BytesBlockHeightY[k]) -
833 dml_floor(a: ViewportYStart[k], granularity: Read256BytesBlockHeightY[k]));
834 } else {
835 swath_width_luma_ub[k] = dml_min(a: surface_height_ub_l, b: dml_ceil(a: SwathWidthY[k] - 1,
836 granularity: Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 }
838 if (BytePerPixC[k] > 0) {
839 surface_height_ub_c = dml_ceil(a: SurfaceHeightC[k], granularity: Read256BytesBlockHeightC[k]);
840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 swath_width_chroma_ub[k] = dml_min(a: surface_height_ub_c,
842 b: dml_floor(a: ViewportYStartC[k] + SwathWidthC[k] +
843 Read256BytesBlockHeightC[k] - 1,
844 granularity: Read256BytesBlockHeightC[k]) -
845 dml_floor(a: ViewportYStartC[k],
846 granularity: Read256BytesBlockHeightC[k]));
847 } else {
848 swath_width_chroma_ub[k] = dml_min(a: surface_height_ub_c,
849 b: dml_ceil(a: SwathWidthC[k] - 1, granularity: Read256BytesBlockHeightC[k]) +
850 Read256BytesBlockHeightC[k]);
851 }
852 } else {
853 swath_width_chroma_ub[k] = 0;
854 }
855 }
856
857#ifdef __DML_VBA_DEBUG__
858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872#endif
873
874 }
875} // CalculateSwathWidth
876
877bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 unsigned int TotalNumberOfActiveDPP,
879 bool NoChroma,
880 enum output_encoder_class Output,
881 enum dm_swizzle_mode SurfaceTiling,
882 bool CompBufReservedSpaceNeedAdjustment,
883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884{
885 bool ret_val = false;
886
887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 TotalNumberOfActiveDPP == 1 && NoChroma);
889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 ret_val = false;
891
892 if (SurfaceTiling == dm_sw_linear)
893 ret_val = false;
894
895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 ret_val = false;
897
898#ifdef __DML_VBA_DEBUG__
899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
902#endif
903
904 return (ret_val);
905}
906
907void dml32_CalculateDETBufferSize(
908 unsigned int DETSizeOverride[],
909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 bool ForceSingleDPP,
911 unsigned int NumberOfActiveSurfaces,
912 bool UnboundedRequestEnabled,
913 unsigned int nomDETInKByte,
914 unsigned int MaxTotalDETInKByte,
915 unsigned int ConfigReturnBufferSizeInKByte,
916 unsigned int MinCompressedBufferSizeInKByte,
917 unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 enum source_format_class SourcePixelFormat[],
919 double ReadBandwidthLuma[],
920 double ReadBandwidthChroma[],
921 unsigned int RoundedUpMaxSwathSizeBytesY[],
922 unsigned int RoundedUpMaxSwathSizeBytesC[],
923 unsigned int DPPPerSurface[],
924 /* Output */
925 unsigned int DETBufferSizeInKByte[],
926 unsigned int *CompressedBufferSizeInkByte)
927{
928 unsigned int DETBufferSizePoolInKByte;
929 unsigned int NextDETBufferPieceInKByte;
930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 bool NextPotentialSurfaceToAssignDETPieceFound;
932 unsigned int NextSurfaceToAssignDETPiece;
933 double TotalBandwidth;
934 double BandwidthOfSurfacesNotAssignedDETPiece;
935 unsigned int max_minDET;
936 unsigned int minDET;
937 unsigned int minDET_pipe;
938 unsigned int j, k;
939
940#ifdef __DML_VBA_DEBUG__
941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 CompressedBufferSegmentSizeInkByteFinal);
950#endif
951
952 // Note: Will use default det size if that fits 2 swaths
953 if (UnboundedRequestEnabled) {
954 if (DETSizeOverride[0] > 0) {
955 DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 } else {
957 DETBufferSizeInKByte[0] = dml_max(a: nomDETInKByte, b: dml_ceil(a: 2.0 *
958 ((double) RoundedUpMaxSwathSizeBytesY[0] +
959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, granularity: 64.0));
960 }
961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 } else {
963 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 DETBufferSizeInKByte[k] = nomDETInKByte;
966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 SourcePixelFormat[k] == dm_420_12) {
968 max_minDET = nomDETInKByte - 64;
969 } else {
970 max_minDET = nomDETInKByte;
971 }
972 minDET = 128;
973 minDET_pipe = 0;
974
975 // add DET resource until can hold 2 full swaths
976 while (minDET <= max_minDET && minDET_pipe == 0) {
977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 minDET_pipe = minDET;
980 minDET = minDET + 64;
981 }
982
983#ifdef __DML_VBA_DEBUG__
984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 RoundedUpMaxSwathSizeBytesY[k]);
989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 RoundedUpMaxSwathSizeBytesC[k]);
991#endif
992
993 if (minDET_pipe == 0) {
994 minDET_pipe = dml_max(a: 128, b: dml_ceil(a: ((double)RoundedUpMaxSwathSizeBytesY[k] +
995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, granularity: 64));
996#ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 __func__, k, minDET_pipe);
999#endif
1000 }
1001
1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 DETBufferSizeInKByte[k] = 0;
1004 } else if (DETSizeOverride[k] > 0) {
1005 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 DETBufferSizeInKByte[k] = minDET_pipe;
1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 }
1013
1014#ifdef __DML_VBA_DEBUG__
1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019#endif
1020 }
1021
1022 TotalBandwidth = 0;
1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 }
1027#ifdef __DML_VBA_DEBUG__
1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033#endif
1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036
1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 } else {
1046 DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 }
1048#ifdef __DML_VBA_DEBUG__
1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 DETPieceAssignedToThisSurfaceAlready[k]);
1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 BandwidthOfSurfacesNotAssignedDETPiece);
1053#endif
1054 }
1055
1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 NextPotentialSurfaceToAssignDETPieceFound = false;
1058 NextSurfaceToAssignDETPiece = 0;
1059
1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061#ifdef __DML_VBA_DEBUG__
1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 ReadBandwidthLuma[k]);
1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 ReadBandwidthChroma[k]);
1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 NextSurfaceToAssignDETPiece);
1072#endif
1073 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 (!NextPotentialSurfaceToAssignDETPieceFound ||
1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 NextSurfaceToAssignDETPiece = k;
1079 NextPotentialSurfaceToAssignDETPieceFound = true;
1080 }
1081#ifdef __DML_VBA_DEBUG__
1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086#endif
1087 }
1088
1089 if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 // Note: To show the banker's rounding behavior in VBA and also the fact
1091 // that the DET buffer size varies due to precision issue
1092 //
1093 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 // BandwidthOfSurfacesNotAssignedDETPiece /
1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 //
1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106
1107 NextDETBufferPieceInKByte = dml_min(
1108 a: dml_round(a: (double) DETBufferSizePoolInKByte *
1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 BandwidthOfSurfacesNotAssignedDETPiece /
1112 ((ForceSingleDPP ? 1 :
1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 (ForceSingleDPP ? 1 :
1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 b: dml_floor(a: (double) DETBufferSizePoolInKByte,
1117 granularity: (ForceSingleDPP ? 1 :
1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119
1120 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 // We should limit the per-pipe DET size to the nominal / max per pipe.
1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 } else {
1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 // already has the max per-pipe value
1130 NextDETBufferPieceInKByte = 0;
1131 }
1132 }
1133
1134#ifdef __DML_VBA_DEBUG__
1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 DETBufferSizePoolInKByte);
1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 NextSurfaceToAssignDETPiece);
1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 NextDETBufferPieceInKByte);
1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 __func__, j, NextSurfaceToAssignDETPiece,
1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150#endif
1151
1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 + NextDETBufferPieceInKByte
1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156#ifdef __DML_VBA_DEBUG__
1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158#endif
1159
1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 }
1166 }
1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 }
1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170
1171#ifdef __DML_VBA_DEBUG__
1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 }
1178#endif
1179} // CalculateDETBufferSize
1180
1181void dml32_CalculateODMMode(
1182 unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 unsigned int HActive,
1184 enum output_format_class OutFormat,
1185 enum output_encoder_class Output,
1186 enum odm_combine_policy ODMUse,
1187 double StateDispclk,
1188 double MaxDispclk,
1189 bool DSCEnable,
1190 unsigned int TotalNumberOfActiveDPP,
1191 unsigned int MaxNumDPP,
1192 double PixelClock,
1193 double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 double DISPCLKRampingMargin,
1195 double DISPCLKDPPCLKVCOSpeed,
1196 unsigned int NumberOfDSCSlices,
1197
1198 /* Output */
1199 bool *TotalAvailablePipesSupport,
1200 unsigned int *NumberOfDPP,
1201 enum odm_combine_mode *ODMMode,
1202 double *RequiredDISPCLKPerSurface)
1203{
1204
1205 double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208
1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(ODMMode: dm_odm_combine_mode_disabled,
1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 MaxDispclk);
1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(ODMMode: dm_odm_combine_mode_2to1,
1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 MaxDispclk);
1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(ODMMode: dm_odm_combine_mode_4to1,
1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 MaxDispclk);
1218 *TotalAvailablePipesSupport = true;
1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220
1221 if (ODMUse == dm_odm_combine_policy_none)
1222 *ODMMode = dm_odm_combine_mode_disabled;
1223
1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 *NumberOfDPP = 0;
1226
1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229
1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 || NumberOfDSCSlices > 8)))) {
1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 *ODMMode = dm_odm_combine_mode_4to1;
1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 *NumberOfDPP = 4;
1238 } else {
1239 *TotalAvailablePipesSupport = false;
1240 }
1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 *ODMMode = dm_odm_combine_mode_2to1;
1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 *NumberOfDPP = 2;
1250 } else {
1251 *TotalAvailablePipesSupport = false;
1252 }
1253 } else {
1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 *NumberOfDPP = 1;
1256 else
1257 *TotalAvailablePipesSupport = false;
1258 }
1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 ODMUse != dm_odm_combine_policy_4to1) {
1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 *ODMMode = dm_odm_combine_mode_disabled;
1263 *NumberOfDPP = 0;
1264 *TotalAvailablePipesSupport = false;
1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 *ODMMode == dm_odm_combine_mode_4to1) {
1267 *ODMMode = dm_odm_combine_mode_4to1;
1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 *NumberOfDPP = 4;
1270 } else {
1271 *ODMMode = dm_odm_combine_mode_2to1;
1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 *NumberOfDPP = 2;
1274 }
1275 }
1276 if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 *ODMMode = dm_odm_combine_mode_disabled;
1279 *NumberOfDPP = 0;
1280 *TotalAvailablePipesSupport = false;
1281 }
1282}
1283
1284double dml32_CalculateRequiredDispclk(
1285 enum odm_combine_mode ODMMode,
1286 double PixelClock,
1287 double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 double DISPCLKRampingMargin,
1289 double DISPCLKDPPCLKVCOSpeed,
1290 double MaxDispclk)
1291{
1292 double RequiredDispclk = 0.;
1293 double PixelClockAfterODM;
1294 double DISPCLKWithRampingRoundedToDFSGranularity;
1295 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 double MaxDispclkRoundedDownToDFSGranularity;
1297
1298 if (ODMMode == dm_odm_combine_mode_4to1)
1299 PixelClockAfterODM = PixelClock / 4;
1300 else if (ODMMode == dm_odm_combine_mode_2to1)
1301 PixelClockAfterODM = PixelClock / 2;
1302 else
1303 PixelClockAfterODM = PixelClock;
1304
1305
1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 Clock: PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 * (1 + DISPCLKRampingMargin / 100), round_up: 1, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
1309
1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 Clock: PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), round_up: 1, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
1312
1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(Clock: MaxDispclk, round_up: 0, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
1314
1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 else
1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321
1322 return RequiredDispclk;
1323}
1324
1325double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326{
1327 if (Clock <= 0.0)
1328 return 0.0;
1329
1330 if (round_up)
1331 return VCOSpeed * 4.0 / dml_floor(a: VCOSpeed * 4.0 / Clock, granularity: 1.0);
1332 else
1333 return VCOSpeed * 4.0 / dml_ceil(a: VCOSpeed * 4.0 / Clock, granularity: 1.0);
1334}
1335
1336void dml32_CalculateOutputLink(
1337 double PHYCLKPerState,
1338 double PHYCLKD18PerState,
1339 double PHYCLKD32PerState,
1340 double Downspreading,
1341 bool IsMainSurfaceUsingTheIndicatedTiming,
1342 enum output_encoder_class Output,
1343 enum output_format_class OutputFormat,
1344 unsigned int HTotal,
1345 unsigned int HActive,
1346 double PixelClockBackEnd,
1347 double ForcedOutputLinkBPP,
1348 unsigned int DSCInputBitPerComponent,
1349 unsigned int NumberOfDSCSlices,
1350 double AudioSampleRate,
1351 unsigned int AudioSampleLayout,
1352 enum odm_combine_mode ODMModeNoDSC,
1353 enum odm_combine_mode ODMModeDSC,
1354 bool DSCEnable,
1355 unsigned int OutputLinkDPLanes,
1356 enum dm_output_link_dp_rate OutputLinkDPRate,
1357
1358 /* Output */
1359 bool *RequiresDSC,
1360 double *RequiresFEC,
1361 double *OutBpp,
1362 enum dm_output_type *OutputType,
1363 enum dm_output_rate *OutputRate,
1364 unsigned int *RequiredSlots)
1365{
1366 bool LinkDSCEnable;
1367 unsigned int dummy;
1368 *RequiresDSC = false;
1369 *RequiresFEC = false;
1370 *OutBpp = 0;
1371 *OutputType = dm_output_type_unknown;
1372 *OutputRate = dm_output_rate_unknown;
1373
1374 if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 if (Output == dm_hdmi) {
1376 *RequiresDSC = false;
1377 *RequiresFEC = false;
1378 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: dml_min(a: 600, b: PHYCLKPerState) * 10, Lanes: 3, HTotal, HActive,
1379 PixelClock: PixelClockBackEnd, DesiredBPP: ForcedOutputLinkBPP, DSCEnable: false, Output, Format: OutputFormat,
1380 DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate, AudioLayout: AudioSampleLayout,
1381 ODMModeNoDSC, ODMModeDSC, RequiredSlots: &dummy);
1382 //OutputTypeAndRate = "HDMI";
1383 *OutputType = dm_output_type_hdmi;
1384
1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 if (DSCEnable == true) {
1387 *RequiresDSC = true;
1388 LinkDSCEnable = true;
1389 if (Output == dm_dp || Output == dm_dp2p0)
1390 *RequiresFEC = true;
1391 else
1392 *RequiresFEC = false;
1393 } else {
1394 *RequiresDSC = false;
1395 LinkDSCEnable = false;
1396 if (Output == dm_dp2p0)
1397 *RequiresFEC = true;
1398 else
1399 *RequiresFEC = false;
1400 }
1401 if (Output == dm_dp2p0) {
1402 *OutBpp = 0;
1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 PHYCLKD32PerState >= 10000 / 32) {
1405 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 10000,
1406 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1407 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output, Format: OutputFormat,
1408 DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate,
1409 AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411 ForcedOutputLinkBPP == 0) {
1412 *RequiresDSC = true;
1413 LinkDSCEnable = true;
1414 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 10000,
1415 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1416 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
1417 Format: OutputFormat, DSCInputBitPerComponent,
1418 DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate, AudioLayout: AudioSampleLayout,
1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 }
1421 //OutputTypeAndRate = Output & " UHBR10";
1422 *OutputType = dm_output_type_dp2p0;
1423 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 }
1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 13500,
1428 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1429 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output, Format: OutputFormat,
1430 DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate,
1431 AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432
1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 ForcedOutputLinkBPP == 0) {
1435 *RequiresDSC = true;
1436 LinkDSCEnable = true;
1437 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 13500,
1438 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1439 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
1440 Format: OutputFormat, DSCInputBitPerComponent,
1441 DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate, AudioLayout: AudioSampleLayout,
1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 }
1444 //OutputTypeAndRate = Output & " UHBR13p5";
1445 *OutputType = dm_output_type_dp2p0;
1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 }
1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 20000,
1451 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1452 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output, Format: OutputFormat,
1453 DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate,
1454 AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 *RequiresDSC = true;
1457 LinkDSCEnable = true;
1458 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 20000,
1459 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1460 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
1461 Format: OutputFormat, DSCInputBitPerComponent,
1462 DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate, AudioLayout: AudioSampleLayout,
1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 }
1465 //OutputTypeAndRate = Output & " UHBR20";
1466 *OutputType = dm_output_type_dp2p0;
1467 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 }
1469 } else {
1470 *OutBpp = 0;
1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 PHYCLKPerState >= 270) {
1473 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 2700,
1474 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1475 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output, Format: OutputFormat,
1476 DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate,
1477 AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 ForcedOutputLinkBPP == 0) {
1480 *RequiresDSC = true;
1481 LinkDSCEnable = true;
1482 if (Output == dm_dp)
1483 *RequiresFEC = true;
1484 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 2700,
1485 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1486 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
1487 Format: OutputFormat, DSCInputBitPerComponent,
1488 DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate, AudioLayout: AudioSampleLayout,
1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 }
1491 //OutputTypeAndRate = Output & " HBR";
1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 *OutputRate = dm_output_rate_dp_rate_hbr;
1494 }
1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 *OutBpp == 0 && PHYCLKPerState >= 540) {
1497 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 5400,
1498 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1499 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output, Format: OutputFormat,
1500 DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate,
1501 AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502
1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 ForcedOutputLinkBPP == 0) {
1505 *RequiresDSC = true;
1506 LinkDSCEnable = true;
1507 if (Output == dm_dp)
1508 *RequiresFEC = true;
1509
1510 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 5400,
1511 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1512 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
1513 Format: OutputFormat, DSCInputBitPerComponent,
1514 DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate, AudioLayout: AudioSampleLayout,
1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 }
1517 //OutputTypeAndRate = Output & " HBR2";
1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 *OutputRate = dm_output_rate_dp_rate_hbr2;
1520 }
1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 8100,
1523 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1524 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
1525 Format: OutputFormat, DSCInputBitPerComponent, DSCSlices: NumberOfDSCSlices,
1526 AudioRate: AudioSampleRate, AudioLayout: AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 RequiredSlots);
1528
1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 *RequiresDSC = true;
1531 LinkDSCEnable = true;
1532 if (Output == dm_dp)
1533 *RequiresFEC = true;
1534
1535 *OutBpp = dml32_TruncToValidBPP(LinkBitRate: (1 - Downspreading / 100) * 8100,
1536 Lanes: OutputLinkDPLanes, HTotal, HActive, PixelClock: PixelClockBackEnd,
1537 DesiredBPP: ForcedOutputLinkBPP, DSCEnable: LinkDSCEnable, Output,
1538 Format: OutputFormat, DSCInputBitPerComponent,
1539 DSCSlices: NumberOfDSCSlices, AudioRate: AudioSampleRate, AudioLayout: AudioSampleLayout,
1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 }
1542 //OutputTypeAndRate = Output & " HBR3";
1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 *OutputRate = dm_output_rate_dp_rate_hbr3;
1545 }
1546 }
1547 }
1548 }
1549}
1550
1551void dml32_CalculateDPPCLK(
1552 unsigned int NumberOfActiveSurfaces,
1553 double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 double DISPCLKDPPCLKVCOSpeed,
1555 double DPPCLKUsingSingleDPP[],
1556 unsigned int DPPPerSurface[],
1557
1558 /* output */
1559 double *GlobalDPPCLK,
1560 double Dppclk[])
1561{
1562 unsigned int k;
1563 *GlobalDPPCLK = 0;
1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 *GlobalDPPCLK = dml_max(a: *GlobalDPPCLK, b: Dppclk[k]);
1567 }
1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(Clock: *GlobalDPPCLK, round_up: 1, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
1569 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(a: Dppclk[k] * 255.0 / *GlobalDPPCLK, granularity: 1.0);
1571}
1572
1573double dml32_TruncToValidBPP(
1574 double LinkBitRate,
1575 unsigned int Lanes,
1576 unsigned int HTotal,
1577 unsigned int HActive,
1578 double PixelClock,
1579 double DesiredBPP,
1580 bool DSCEnable,
1581 enum output_encoder_class Output,
1582 enum output_format_class Format,
1583 unsigned int DSCInputBitPerComponent,
1584 unsigned int DSCSlices,
1585 unsigned int AudioRate,
1586 unsigned int AudioLayout,
1587 enum odm_combine_mode ODMModeNoDSC,
1588 enum odm_combine_mode ODMModeDSC,
1589 /* Output */
1590 unsigned int *RequiredSlots)
1591{
1592 double MaxLinkBPP;
1593 unsigned int MinDSCBPP;
1594 double MaxDSCBPP;
1595 unsigned int NonDSCBPP0;
1596 unsigned int NonDSCBPP1;
1597 unsigned int NonDSCBPP2;
1598
1599 if (Format == dm_420) {
1600 NonDSCBPP0 = 12;
1601 NonDSCBPP1 = 15;
1602 NonDSCBPP2 = 18;
1603 MinDSCBPP = 6;
1604 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1605 } else if (Format == dm_444) {
1606 NonDSCBPP0 = 24;
1607 NonDSCBPP1 = 30;
1608 NonDSCBPP2 = 36;
1609 MinDSCBPP = 8;
1610 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1611 } else {
1612 if (Output == dm_hdmi) {
1613 NonDSCBPP0 = 24;
1614 NonDSCBPP1 = 24;
1615 NonDSCBPP2 = 24;
1616 } else {
1617 NonDSCBPP0 = 16;
1618 NonDSCBPP1 = 20;
1619 NonDSCBPP2 = 24;
1620 }
1621 if (Format == dm_n422) {
1622 MinDSCBPP = 7;
1623 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1624 } else {
1625 MinDSCBPP = 8;
1626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1627 }
1628 }
1629 if (Output == dm_dp2p0) {
1630 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1631 } else if (DSCEnable && Output == dm_dp) {
1632 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1633 } else {
1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1635 }
1636
1637 if (DSCEnable) {
1638 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1639 MaxLinkBPP = dml_min(a: MaxLinkBPP, b: 16);
1640 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1641 MaxLinkBPP = dml_min(a: MaxLinkBPP, b: 32);
1642 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1643 MaxLinkBPP = 2 * MaxLinkBPP;
1644 } else {
1645 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1646 MaxLinkBPP = dml_min(a: MaxLinkBPP, b: 16);
1647 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1648 MaxLinkBPP = dml_min(a: MaxLinkBPP, b: 32);
1649 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1650 MaxLinkBPP = 2 * MaxLinkBPP;
1651 }
1652
1653 if (DesiredBPP == 0) {
1654 if (DSCEnable) {
1655 if (MaxLinkBPP < MinDSCBPP)
1656 return BPP_INVALID;
1657 else if (MaxLinkBPP >= MaxDSCBPP)
1658 return MaxDSCBPP;
1659 else
1660 return dml_floor(a: 16.0 * MaxLinkBPP, granularity: 1.0) / 16.0;
1661 } else {
1662 if (MaxLinkBPP >= NonDSCBPP2)
1663 return NonDSCBPP2;
1664 else if (MaxLinkBPP >= NonDSCBPP1)
1665 return NonDSCBPP1;
1666 else if (MaxLinkBPP >= NonDSCBPP0)
1667 return 16.0;
1668 else
1669 return BPP_INVALID;
1670 }
1671 } else {
1672 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1673 DesiredBPP <= NonDSCBPP0)) ||
1674 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1675 return BPP_INVALID;
1676 else
1677 return DesiredBPP;
1678 }
1679
1680 *RequiredSlots = dml_ceil(a: DesiredBPP / MaxLinkBPP * 64, granularity: 1);
1681
1682 return BPP_INVALID;
1683} // TruncToValidBPP
1684
1685double dml32_RequiredDTBCLK(
1686 bool DSCEnable,
1687 double PixelClock,
1688 enum output_format_class OutputFormat,
1689 double OutputBpp,
1690 unsigned int DSCSlices,
1691 unsigned int HTotal,
1692 unsigned int HActive,
1693 unsigned int AudioRate,
1694 unsigned int AudioLayout)
1695{
1696 double PixelWordRate;
1697 double HCActive;
1698 double HCBlank;
1699 double AverageTribyteRate;
1700 double HActiveTribyteRate;
1701
1702 if (DSCEnable != true)
1703 return dml_max(a: PixelClock / 4.0 * OutputBpp / 24.0, b: 25.0);
1704
1705 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1706 HCActive = dml_ceil(a: DSCSlices * dml_ceil(a: OutputBpp *
1707 dml_ceil(a: HActive / DSCSlices, granularity: 1) / 8.0, granularity: 1) / 3.0, granularity: 1);
1708 HCBlank = 64 + 32 *
1709 dml_ceil(a: AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), granularity: 1);
1710 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1711 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1712 return dml_max4(a: PixelWordRate / 4.0, b: AverageTribyteRate / 4.0, c: HActiveTribyteRate / 4.0, d: 25.0) * 1.002;
1713}
1714
1715unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1716 enum odm_combine_mode ODMMode,
1717 unsigned int DSCInputBitPerComponent,
1718 double OutputBpp,
1719 unsigned int HActive,
1720 unsigned int HTotal,
1721 unsigned int NumberOfDSCSlices,
1722 enum output_format_class OutputFormat,
1723 enum output_encoder_class Output,
1724 double PixelClock,
1725 double PixelClockBackEnd,
1726 double dsc_delay_factor_wa)
1727{
1728 unsigned int DSCDelayRequirement_val;
1729
1730 if (DSCEnabled == true && OutputBpp != 0) {
1731 if (ODMMode == dm_odm_combine_mode_4to1) {
1732 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(bpc: DSCInputBitPerComponent, BPP: OutputBpp,
1733 sliceWidth: dml_ceil(a: HActive / NumberOfDSCSlices, granularity: 1), numSlices: NumberOfDSCSlices / 4,
1734 pixelFormat: OutputFormat, Output) + dml32_dscComputeDelay(pixelFormat: OutputFormat, Output));
1735 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1736 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(bpc: DSCInputBitPerComponent, BPP: OutputBpp,
1737 sliceWidth: dml_ceil(a: HActive / NumberOfDSCSlices, granularity: 1), numSlices: NumberOfDSCSlices / 2,
1738 pixelFormat: OutputFormat, Output) + dml32_dscComputeDelay(pixelFormat: OutputFormat, Output));
1739 } else {
1740 DSCDelayRequirement_val = dml32_dscceComputeDelay(bpc: DSCInputBitPerComponent, BPP: OutputBpp,
1741 sliceWidth: dml_ceil(a: HActive / NumberOfDSCSlices, granularity: 1), numSlices: NumberOfDSCSlices,
1742 pixelFormat: OutputFormat, Output) + dml32_dscComputeDelay(pixelFormat: OutputFormat, Output);
1743 }
1744
1745 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1746 dml_ceil(a: (double)DSCDelayRequirement_val / HActive, granularity: 1);
1747
1748 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1749
1750 } else {
1751 DSCDelayRequirement_val = 0;
1752 }
1753
1754#ifdef __DML_VBA_DEBUG__
1755 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
1756 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
1757 dml_print("DML::%s: HActive = %d\n", __func__, HActive);
1758 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
1759 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1760 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
1761 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1762#endif
1763
1764 return dml_ceil(a: DSCDelayRequirement_val * dsc_delay_factor_wa, granularity: 1);
1765}
1766
1767void dml32_CalculateSurfaceSizeInMall(
1768 unsigned int NumberOfActiveSurfaces,
1769 unsigned int MALLAllocatedForDCN,
1770 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1771 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1772 bool DCCEnable[],
1773 bool ViewportStationary[],
1774 unsigned int ViewportXStartY[],
1775 unsigned int ViewportYStartY[],
1776 unsigned int ViewportXStartC[],
1777 unsigned int ViewportYStartC[],
1778 unsigned int ViewportWidthY[],
1779 unsigned int ViewportHeightY[],
1780 unsigned int BytesPerPixelY[],
1781 unsigned int ViewportWidthC[],
1782 unsigned int ViewportHeightC[],
1783 unsigned int BytesPerPixelC[],
1784 unsigned int SurfaceWidthY[],
1785 unsigned int SurfaceWidthC[],
1786 unsigned int SurfaceHeightY[],
1787 unsigned int SurfaceHeightC[],
1788 unsigned int Read256BytesBlockWidthY[],
1789 unsigned int Read256BytesBlockWidthC[],
1790 unsigned int Read256BytesBlockHeightY[],
1791 unsigned int Read256BytesBlockHeightC[],
1792 unsigned int ReadBlockWidthY[],
1793 unsigned int ReadBlockWidthC[],
1794 unsigned int ReadBlockHeightY[],
1795 unsigned int ReadBlockHeightC[],
1796 unsigned int DCCMetaPitchY[],
1797 unsigned int DCCMetaPitchC[],
1798
1799 /* Output */
1800 unsigned int SurfaceSizeInMALL[],
1801 bool *ExceededMALLSize)
1802{
1803 unsigned int k;
1804 unsigned int TotalSurfaceSizeInMALLForSS = 0;
1805 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1806 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1807
1808 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1809 if (ViewportStationary[k]) {
1810 SurfaceSizeInMALL[k] = dml_min(a: dml_ceil(a: SurfaceWidthY[k], granularity: ReadBlockWidthY[k]),
1811 b: dml_floor(a: ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1812 granularity: ReadBlockWidthY[k]) - dml_floor(a: ViewportXStartY[k],
1813 granularity: ReadBlockWidthY[k])) * dml_min(a: dml_ceil(a: SurfaceHeightY[k],
1814 granularity: ReadBlockHeightY[k]), b: dml_floor(a: ViewportYStartY[k] +
1815 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, granularity: ReadBlockHeightY[k]) -
1816 dml_floor(a: ViewportYStartY[k], granularity: ReadBlockHeightY[k])) * BytesPerPixelY[k];
1817
1818 if (ReadBlockWidthC[k] > 0) {
1819 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1820 dml_min(a: dml_ceil(a: SurfaceWidthC[k], granularity: ReadBlockWidthC[k]),
1821 b: dml_floor(a: ViewportXStartC[k] + ViewportWidthC[k] +
1822 ReadBlockWidthC[k] - 1, granularity: ReadBlockWidthC[k]) -
1823 dml_floor(a: ViewportXStartC[k], granularity: ReadBlockWidthC[k])) *
1824 dml_min(a: dml_ceil(a: SurfaceHeightC[k], granularity: ReadBlockHeightC[k]),
1825 b: dml_floor(a: ViewportYStartC[k] + ViewportHeightC[k] +
1826 ReadBlockHeightC[k] - 1, granularity: ReadBlockHeightC[k]) -
1827 dml_floor(a: ViewportYStartC[k], granularity: ReadBlockHeightC[k])) *
1828 BytesPerPixelC[k];
1829 }
1830 if (DCCEnable[k] == true) {
1831 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1832 (dml_min(a: dml_ceil(a: DCCMetaPitchY[k], granularity: 8 * Read256BytesBlockWidthY[k]),
1833 b: dml_floor(a: ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1834 Read256BytesBlockWidthY[k] - 1, granularity: 8 * Read256BytesBlockWidthY[k])
1835 - dml_floor(a: ViewportXStartY[k], granularity: 8 * Read256BytesBlockWidthY[k]))
1836 * dml_min(a: dml_ceil(a: SurfaceHeightY[k], granularity: 8 *
1837 Read256BytesBlockHeightY[k]), b: dml_floor(a: ViewportYStartY[k] +
1838 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, granularity: 8 *
1839 Read256BytesBlockHeightY[k]) - dml_floor(a: ViewportYStartY[k], granularity: 8 *
1840 Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1841 if (Read256BytesBlockWidthC[k] > 0) {
1842 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1843 dml_min(a: dml_ceil(a: DCCMetaPitchC[k], granularity: 8 *
1844 Read256BytesBlockWidthC[k]),
1845 b: dml_floor(a: ViewportXStartC[k] + ViewportWidthC[k] + 8
1846 * Read256BytesBlockWidthC[k] - 1, granularity: 8 *
1847 Read256BytesBlockWidthC[k]) -
1848 dml_floor(a: ViewportXStartC[k], granularity: 8 *
1849 Read256BytesBlockWidthC[k])) *
1850 dml_min(a: dml_ceil(a: SurfaceHeightC[k], granularity: 8 *
1851 Read256BytesBlockHeightC[k]),
1852 b: dml_floor(a: ViewportYStartC[k] + ViewportHeightC[k] +
1853 8 * Read256BytesBlockHeightC[k] - 1, granularity: 8 *
1854 Read256BytesBlockHeightC[k]) -
1855 dml_floor(a: ViewportYStartC[k], granularity: 8 *
1856 Read256BytesBlockHeightC[k])) *
1857 BytesPerPixelC[k] / 256;
1858 }
1859 }
1860 } else {
1861 SurfaceSizeInMALL[k] = dml_ceil(a: dml_min(a: SurfaceWidthY[k], b: ViewportWidthY[k] +
1862 ReadBlockWidthY[k] - 1), granularity: ReadBlockWidthY[k]) *
1863 dml_ceil(a: dml_min(a: SurfaceHeightY[k], b: ViewportHeightY[k] +
1864 ReadBlockHeightY[k] - 1), granularity: ReadBlockHeightY[k]) *
1865 BytesPerPixelY[k];
1866 if (ReadBlockWidthC[k] > 0) {
1867 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1868 dml_ceil(a: dml_min(a: SurfaceWidthC[k], b: ViewportWidthC[k] +
1869 ReadBlockWidthC[k] - 1), granularity: ReadBlockWidthC[k]) *
1870 dml_ceil(a: dml_min(a: SurfaceHeightC[k], b: ViewportHeightC[k] +
1871 ReadBlockHeightC[k] - 1), granularity: ReadBlockHeightC[k]) *
1872 BytesPerPixelC[k];
1873 }
1874 if (DCCEnable[k] == true) {
1875 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1876 (dml_ceil(a: dml_min(a: DCCMetaPitchY[k], b: ViewportWidthY[k] + 8 *
1877 Read256BytesBlockWidthY[k] - 1), granularity: 8 *
1878 Read256BytesBlockWidthY[k]) *
1879 dml_ceil(a: dml_min(a: SurfaceHeightY[k], b: ViewportHeightY[k] + 8 *
1880 Read256BytesBlockHeightY[k] - 1), granularity: 8 *
1881 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1882
1883 if (Read256BytesBlockWidthC[k] > 0) {
1884 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1885 dml_ceil(a: dml_min(a: DCCMetaPitchC[k], b: ViewportWidthC[k] + 8 *
1886 Read256BytesBlockWidthC[k] - 1), granularity: 8 *
1887 Read256BytesBlockWidthC[k]) *
1888 dml_ceil(a: dml_min(a: SurfaceHeightC[k], b: ViewportHeightC[k] + 8 *
1889 Read256BytesBlockHeightC[k] - 1), granularity: 8 *
1890 Read256BytesBlockHeightC[k]) *
1891 BytesPerPixelC[k] / 256;
1892 }
1893 }
1894 }
1895 }
1896
1897 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1898 /* SS and Subvp counted separate as they are never used at the same time */
1899 if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1900 TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1901 else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1902 TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1903 }
1904 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1905 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1906} // CalculateSurfaceSizeInMall
1907
1908void dml32_CalculateVMRowAndSwath(
1909 unsigned int NumberOfActiveSurfaces,
1910 DmlPipe myPipe[],
1911 unsigned int SurfaceSizeInMALL[],
1912 unsigned int PTEBufferSizeInRequestsLuma,
1913 unsigned int PTEBufferSizeInRequestsChroma,
1914 unsigned int DCCMetaBufferSizeBytes,
1915 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1916 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1917 unsigned int MALLAllocatedForDCN,
1918 double SwathWidthY[],
1919 double SwathWidthC[],
1920 bool GPUVMEnable,
1921 bool HostVMEnable,
1922 unsigned int HostVMMaxNonCachedPageTableLevels,
1923 unsigned int GPUVMMaxPageTableLevels,
1924 unsigned int GPUVMMinPageSizeKBytes[],
1925 unsigned int HostVMMinPageSize,
1926
1927 /* Output */
1928 bool PTEBufferSizeNotExceeded[],
1929 bool DCCMetaBufferSizeNotExceeded[],
1930 unsigned int dpte_row_width_luma_ub[],
1931 unsigned int dpte_row_width_chroma_ub[],
1932 unsigned int dpte_row_height_luma[],
1933 unsigned int dpte_row_height_chroma[],
1934 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1935 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1936 unsigned int meta_req_width[],
1937 unsigned int meta_req_width_chroma[],
1938 unsigned int meta_req_height[],
1939 unsigned int meta_req_height_chroma[],
1940 unsigned int meta_row_width[],
1941 unsigned int meta_row_width_chroma[],
1942 unsigned int meta_row_height[],
1943 unsigned int meta_row_height_chroma[],
1944 unsigned int vm_group_bytes[],
1945 unsigned int dpte_group_bytes[],
1946 unsigned int PixelPTEReqWidthY[],
1947 unsigned int PixelPTEReqHeightY[],
1948 unsigned int PTERequestSizeY[],
1949 unsigned int PixelPTEReqWidthC[],
1950 unsigned int PixelPTEReqHeightC[],
1951 unsigned int PTERequestSizeC[],
1952 unsigned int dpde0_bytes_per_frame_ub_l[],
1953 unsigned int meta_pte_bytes_per_frame_ub_l[],
1954 unsigned int dpde0_bytes_per_frame_ub_c[],
1955 unsigned int meta_pte_bytes_per_frame_ub_c[],
1956 double PrefetchSourceLinesY[],
1957 double PrefetchSourceLinesC[],
1958 double VInitPreFillY[],
1959 double VInitPreFillC[],
1960 unsigned int MaxNumSwathY[],
1961 unsigned int MaxNumSwathC[],
1962 double meta_row_bw[],
1963 double dpte_row_bw[],
1964 double PixelPTEBytesPerRow[],
1965 double PDEAndMetaPTEBytesFrame[],
1966 double MetaRowByte[],
1967 bool use_one_row_for_frame[],
1968 bool use_one_row_for_frame_flip[],
1969 bool UsesMALLForStaticScreen[],
1970 bool PTE_BUFFER_MODE[],
1971 unsigned int BIGK_FRAGMENT_SIZE[])
1972{
1973 unsigned int k;
1974 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1975 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1976 unsigned int PDEAndMetaPTEBytesFrameY;
1977 unsigned int PDEAndMetaPTEBytesFrameC;
1978 unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1979 unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1980 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1981 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1982 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1983 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1984 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1985 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1986 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1987 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1988 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1989
1990 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1991 if (HostVMEnable == true) {
1992 vm_group_bytes[k] = 512;
1993 dpte_group_bytes[k] = 512;
1994 } else if (GPUVMEnable == true) {
1995 vm_group_bytes[k] = 2048;
1996 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(Scan: myPipe[k].SourceRotation))
1997 dpte_group_bytes[k] = 512;
1998 else
1999 dpte_group_bytes[k] = 2048;
2000 } else {
2001 vm_group_bytes[k] = 0;
2002 dpte_group_bytes[k] = 0;
2003 }
2004
2005 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2006 myPipe[k].SourcePixelFormat == dm_420_12 ||
2007 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2008 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2009 !IsVertical(Scan: myPipe[k].SourceRotation)) {
2010 PTEBufferSizeInRequestsForLuma[k] =
2011 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2012 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2013 } else {
2014 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2015 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2016 }
2017
2018 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2019 ViewportStationary: myPipe[k].ViewportStationary,
2020 DCCEnable: myPipe[k].DCCEnable,
2021 NumberOfDPPs: myPipe[k].DPPPerSurface,
2022 BlockHeight256Bytes: myPipe[k].BlockHeight256BytesC,
2023 BlockWidth256Bytes: myPipe[k].BlockWidth256BytesC,
2024 SourcePixelFormat: myPipe[k].SourcePixelFormat,
2025 SurfaceTiling: myPipe[k].SurfaceTiling,
2026 BytePerPixel: myPipe[k].BytePerPixelC,
2027 SourceScan: myPipe[k].SourceRotation,
2028 SwathWidth: SwathWidthC[k],
2029 ViewportHeight: myPipe[k].ViewportHeightChroma,
2030 ViewportXStart: myPipe[k].ViewportXStartC,
2031 ViewportYStart: myPipe[k].ViewportYStartC,
2032 GPUVMEnable,
2033 HostVMEnable,
2034 HostVMMaxNonCachedPageTableLevels,
2035 GPUVMMaxPageTableLevels,
2036 GPUVMMinPageSizeKBytes: GPUVMMinPageSizeKBytes[k],
2037 HostVMMinPageSize,
2038 PTEBufferSizeInRequests: PTEBufferSizeInRequestsForChroma[k],
2039 Pitch: myPipe[k].PitchC,
2040 DCCMetaPitch: myPipe[k].DCCMetaPitchC,
2041 MacroTileWidth: myPipe[k].BlockWidthC,
2042 MacroTileHeight: myPipe[k].BlockHeightC,
2043
2044 /* Output */
2045 MetaRowByte: &MetaRowByteC[k],
2046 PixelPTEBytesPerRow: &PixelPTEBytesPerRowC[k],
2047 dpte_row_width_ub: &dpte_row_width_chroma_ub[k],
2048 dpte_row_height: &dpte_row_height_chroma[k],
2049 dpte_row_height_linear: &dpte_row_height_linear_chroma[k],
2050 PixelPTEBytesPerRow_one_row_per_frame: &PixelPTEBytesPerRowC_one_row_per_frame[k],
2051 dpte_row_width_ub_one_row_per_frame: &dpte_row_width_chroma_ub_one_row_per_frame[k],
2052 dpte_row_height_one_row_per_frame: &dpte_row_height_chroma_one_row_per_frame[k],
2053 MetaRequestWidth: &meta_req_width_chroma[k],
2054 MetaRequestHeight: &meta_req_height_chroma[k],
2055 meta_row_width: &meta_row_width_chroma[k],
2056 meta_row_height: &meta_row_height_chroma[k],
2057 PixelPTEReqWidth: &PixelPTEReqWidthC[k],
2058 PixelPTEReqHeight: &PixelPTEReqHeightC[k],
2059 PTERequestSize: &PTERequestSizeC[k],
2060 DPDE0BytesFrame: &dpde0_bytes_per_frame_ub_c[k],
2061 MetaPTEBytesFrame: &meta_pte_bytes_per_frame_ub_c[k]);
2062
2063 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2064 VRatio: myPipe[k].VRatioChroma,
2065 VTaps: myPipe[k].VTapsChroma,
2066 Interlace: myPipe[k].InterlaceEnable,
2067 ProgressiveToInterlaceUnitInOPP: myPipe[k].ProgressiveToInterlaceUnitInOPP,
2068 SwathHeight: myPipe[k].SwathHeightC,
2069 SourceRotation: myPipe[k].SourceRotation,
2070 ViewportStationary: myPipe[k].ViewportStationary,
2071 SwathWidth: SwathWidthC[k],
2072 ViewportHeight: myPipe[k].ViewportHeightChroma,
2073 ViewportXStart: myPipe[k].ViewportXStartC,
2074 ViewportYStart: myPipe[k].ViewportYStartC,
2075
2076 /* Output */
2077 VInitPreFill: &VInitPreFillC[k],
2078 MaxNumSwath: &MaxNumSwathC[k]);
2079 } else {
2080 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2081 PTEBufferSizeInRequestsForChroma[k] = 0;
2082 PixelPTEBytesPerRowC[k] = 0;
2083 PDEAndMetaPTEBytesFrameC = 0;
2084 MetaRowByteC[k] = 0;
2085 MaxNumSwathC[k] = 0;
2086 PrefetchSourceLinesC[k] = 0;
2087 dpte_row_height_chroma_one_row_per_frame[k] = 0;
2088 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2089 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2090 }
2091
2092 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2093 ViewportStationary: myPipe[k].ViewportStationary,
2094 DCCEnable: myPipe[k].DCCEnable,
2095 NumberOfDPPs: myPipe[k].DPPPerSurface,
2096 BlockHeight256Bytes: myPipe[k].BlockHeight256BytesY,
2097 BlockWidth256Bytes: myPipe[k].BlockWidth256BytesY,
2098 SourcePixelFormat: myPipe[k].SourcePixelFormat,
2099 SurfaceTiling: myPipe[k].SurfaceTiling,
2100 BytePerPixel: myPipe[k].BytePerPixelY,
2101 SourceScan: myPipe[k].SourceRotation,
2102 SwathWidth: SwathWidthY[k],
2103 ViewportHeight: myPipe[k].ViewportHeight,
2104 ViewportXStart: myPipe[k].ViewportXStart,
2105 ViewportYStart: myPipe[k].ViewportYStart,
2106 GPUVMEnable,
2107 HostVMEnable,
2108 HostVMMaxNonCachedPageTableLevels,
2109 GPUVMMaxPageTableLevels,
2110 GPUVMMinPageSizeKBytes: GPUVMMinPageSizeKBytes[k],
2111 HostVMMinPageSize,
2112 PTEBufferSizeInRequests: PTEBufferSizeInRequestsForLuma[k],
2113 Pitch: myPipe[k].PitchY,
2114 DCCMetaPitch: myPipe[k].DCCMetaPitchY,
2115 MacroTileWidth: myPipe[k].BlockWidthY,
2116 MacroTileHeight: myPipe[k].BlockHeightY,
2117
2118 /* Output */
2119 MetaRowByte: &MetaRowByteY[k],
2120 PixelPTEBytesPerRow: &PixelPTEBytesPerRowY[k],
2121 dpte_row_width_ub: &dpte_row_width_luma_ub[k],
2122 dpte_row_height: &dpte_row_height_luma[k],
2123 dpte_row_height_linear: &dpte_row_height_linear_luma[k],
2124 PixelPTEBytesPerRow_one_row_per_frame: &PixelPTEBytesPerRowY_one_row_per_frame[k],
2125 dpte_row_width_ub_one_row_per_frame: &dpte_row_width_luma_ub_one_row_per_frame[k],
2126 dpte_row_height_one_row_per_frame: &dpte_row_height_luma_one_row_per_frame[k],
2127 MetaRequestWidth: &meta_req_width[k],
2128 MetaRequestHeight: &meta_req_height[k],
2129 meta_row_width: &meta_row_width[k],
2130 meta_row_height: &meta_row_height[k],
2131 PixelPTEReqWidth: &PixelPTEReqWidthY[k],
2132 PixelPTEReqHeight: &PixelPTEReqHeightY[k],
2133 PTERequestSize: &PTERequestSizeY[k],
2134 DPDE0BytesFrame: &dpde0_bytes_per_frame_ub_l[k],
2135 MetaPTEBytesFrame: &meta_pte_bytes_per_frame_ub_l[k]);
2136
2137 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2138 VRatio: myPipe[k].VRatio,
2139 VTaps: myPipe[k].VTaps,
2140 Interlace: myPipe[k].InterlaceEnable,
2141 ProgressiveToInterlaceUnitInOPP: myPipe[k].ProgressiveToInterlaceUnitInOPP,
2142 SwathHeight: myPipe[k].SwathHeightY,
2143 SourceRotation: myPipe[k].SourceRotation,
2144 ViewportStationary: myPipe[k].ViewportStationary,
2145 SwathWidth: SwathWidthY[k],
2146 ViewportHeight: myPipe[k].ViewportHeight,
2147 ViewportXStart: myPipe[k].ViewportXStart,
2148 ViewportYStart: myPipe[k].ViewportYStart,
2149
2150 /* Output */
2151 VInitPreFill: &VInitPreFillY[k],
2152 MaxNumSwath: &MaxNumSwathY[k]);
2153
2154 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2155 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2156
2157 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2158 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2159 PTEBufferSizeNotExceeded[k] = true;
2160 } else {
2161 PTEBufferSizeNotExceeded[k] = false;
2162 }
2163
2164 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2165 PTEBufferSizeInRequestsForLuma[k] &&
2166 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2167 }
2168
2169 dml32_CalculateMALLUseForStaticScreen(
2170 NumberOfActiveSurfaces,
2171 MALLAllocatedForDCNFinal: MALLAllocatedForDCN,
2172 UseMALLForStaticScreen, // mode
2173 SurfaceSizeInMALL,
2174 one_row_per_frame_fits_in_buffer,
2175 /* Output */
2176 UsesMALLForStaticScreen); // boolen
2177
2178 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2179 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2180 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2181 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2182 (GPUVMMinPageSizeKBytes[k] > 64);
2183 BIGK_FRAGMENT_SIZE[k] = dml_log2(x: GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2184 }
2185
2186 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2187#ifdef __DML_VBA_DEBUG__
2188 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
2189 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2190#endif
2191 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2192 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2193 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2194 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(Scan: myPipe[k].SourceRotation));
2195
2196 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2197 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2198
2199 if (use_one_row_for_frame[k]) {
2200 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2201 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2202 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2203 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2204 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2205 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2206 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2207 }
2208
2209 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2210 DCCMetaBufferSizeNotExceeded[k] = true;
2211 else
2212 DCCMetaBufferSizeNotExceeded[k] = false;
2213
2214 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2215 if (use_one_row_for_frame[k])
2216 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2217
2218 dml32_CalculateRowBandwidth(
2219 GPUVMEnable,
2220 SourcePixelFormat: myPipe[k].SourcePixelFormat,
2221 VRatio: myPipe[k].VRatio,
2222 VRatioChroma: myPipe[k].VRatioChroma,
2223 DCCEnable: myPipe[k].DCCEnable,
2224 LineTime: myPipe[k].HTotal / myPipe[k].PixelClock,
2225 MetaRowByteLuma: MetaRowByteY[k], MetaRowByteChroma: MetaRowByteC[k],
2226 meta_row_height_luma: meta_row_height[k],
2227 meta_row_height_chroma: meta_row_height_chroma[k],
2228 PixelPTEBytesPerRowLuma: PixelPTEBytesPerRowY[k],
2229 PixelPTEBytesPerRowChroma: PixelPTEBytesPerRowC[k],
2230 dpte_row_height_luma: dpte_row_height_luma[k],
2231 dpte_row_height_chroma: dpte_row_height_chroma[k],
2232
2233 /* Output */
2234 meta_row_bw: &meta_row_bw[k],
2235 dpte_row_bw: &dpte_row_bw[k]);
2236#ifdef __DML_VBA_DEBUG__
2237 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
2238 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
2239 __func__, k, use_one_row_for_frame_flip[k]);
2240 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
2241 __func__, k, UseMALLForPStateChange[k]);
2242 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
2243 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
2244 __func__, k, dpte_row_width_luma_ub[k]);
2245 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
2246 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
2247 __func__, k, dpte_row_height_chroma[k]);
2248 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
2249 __func__, k, dpte_row_width_chroma_ub[k]);
2250 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
2251 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
2252 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
2253 __func__, k, PTEBufferSizeNotExceeded[k]);
2254 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2255 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2256#endif
2257 }
2258} // CalculateVMRowAndSwath
2259
2260unsigned int dml32_CalculateVMAndRowBytes(
2261 bool ViewportStationary,
2262 bool DCCEnable,
2263 unsigned int NumberOfDPPs,
2264 unsigned int BlockHeight256Bytes,
2265 unsigned int BlockWidth256Bytes,
2266 enum source_format_class SourcePixelFormat,
2267 unsigned int SurfaceTiling,
2268 unsigned int BytePerPixel,
2269 enum dm_rotation_angle SourceRotation,
2270 double SwathWidth,
2271 unsigned int ViewportHeight,
2272 unsigned int ViewportXStart,
2273 unsigned int ViewportYStart,
2274 bool GPUVMEnable,
2275 bool HostVMEnable,
2276 unsigned int HostVMMaxNonCachedPageTableLevels,
2277 unsigned int GPUVMMaxPageTableLevels,
2278 unsigned int GPUVMMinPageSizeKBytes,
2279 unsigned int HostVMMinPageSize,
2280 unsigned int PTEBufferSizeInRequests,
2281 unsigned int Pitch,
2282 unsigned int DCCMetaPitch,
2283 unsigned int MacroTileWidth,
2284 unsigned int MacroTileHeight,
2285
2286 /* Output */
2287 unsigned int *MetaRowByte,
2288 unsigned int *PixelPTEBytesPerRow,
2289 unsigned int *dpte_row_width_ub,
2290 unsigned int *dpte_row_height,
2291 unsigned int *dpte_row_height_linear,
2292 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2293 unsigned int *dpte_row_width_ub_one_row_per_frame,
2294 unsigned int *dpte_row_height_one_row_per_frame,
2295 unsigned int *MetaRequestWidth,
2296 unsigned int *MetaRequestHeight,
2297 unsigned int *meta_row_width,
2298 unsigned int *meta_row_height,
2299 unsigned int *PixelPTEReqWidth,
2300 unsigned int *PixelPTEReqHeight,
2301 unsigned int *PTERequestSize,
2302 unsigned int *DPDE0BytesFrame,
2303 unsigned int *MetaPTEBytesFrame)
2304{
2305 unsigned int MPDEBytesFrame;
2306 unsigned int DCCMetaSurfaceBytes;
2307 unsigned int ExtraDPDEBytesFrame;
2308 unsigned int PDEAndMetaPTEBytesFrame;
2309 unsigned int HostVMDynamicLevels = 0;
2310 unsigned int MacroTileSizeBytes;
2311 unsigned int vp_height_meta_ub;
2312 unsigned int vp_height_dpte_ub;
2313 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2314
2315 if (GPUVMEnable == true && HostVMEnable == true) {
2316 if (HostVMMinPageSize < 2048)
2317 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2318 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2319 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 1);
2320 else
2321 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 2);
2322 }
2323
2324 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2325 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2326 if (SurfaceTiling == dm_sw_linear) {
2327 *meta_row_height = 32;
2328 *meta_row_width = dml_floor(a: ViewportXStart + SwathWidth + *MetaRequestWidth - 1, granularity: *MetaRequestWidth)
2329 - dml_floor(a: ViewportXStart, granularity: *MetaRequestWidth);
2330 } else if (!IsVertical(Scan: SourceRotation)) {
2331 *meta_row_height = *MetaRequestHeight;
2332 if (ViewportStationary && NumberOfDPPs == 1) {
2333 *meta_row_width = dml_floor(a: ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2334 granularity: *MetaRequestWidth) - dml_floor(a: ViewportXStart, granularity: *MetaRequestWidth);
2335 } else {
2336 *meta_row_width = dml_ceil(a: SwathWidth - 1, granularity: *MetaRequestWidth) + *MetaRequestWidth;
2337 }
2338 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2339 } else {
2340 *meta_row_height = *MetaRequestWidth;
2341 if (ViewportStationary && NumberOfDPPs == 1) {
2342 *meta_row_width = dml_floor(a: ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2343 granularity: *MetaRequestHeight) - dml_floor(a: ViewportYStart, granularity: *MetaRequestHeight);
2344 } else {
2345 *meta_row_width = dml_ceil(a: SwathWidth - 1, granularity: *MetaRequestHeight) + *MetaRequestHeight;
2346 }
2347 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2348 }
2349
2350 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(Scan: SourceRotation))) {
2351 vp_height_meta_ub = dml_floor(a: ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2352 granularity: 64 * BlockHeight256Bytes) - dml_floor(a: ViewportYStart, granularity: 64 * BlockHeight256Bytes);
2353 } else if (!IsVertical(Scan: SourceRotation)) {
2354 vp_height_meta_ub = dml_ceil(a: ViewportHeight - 1, granularity: 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2355 } else {
2356 vp_height_meta_ub = dml_ceil(a: SwathWidth - 1, granularity: 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2357 }
2358
2359 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2360
2361 if (GPUVMEnable == true) {
2362 *MetaPTEBytesFrame = (dml_ceil(a: (double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2363 (8 * 4.0 * 1024), granularity: 1) + 1) * 64;
2364 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2365 } else {
2366 *MetaPTEBytesFrame = 0;
2367 MPDEBytesFrame = 0;
2368 }
2369
2370 if (DCCEnable != true) {
2371 *MetaPTEBytesFrame = 0;
2372 MPDEBytesFrame = 0;
2373 *MetaRowByte = 0;
2374 }
2375
2376 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2377
2378 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2379 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(Scan: SourceRotation))) {
2380 vp_height_dpte_ub = dml_floor(a: ViewportYStart + ViewportHeight +
2381 MacroTileHeight - 1, granularity: MacroTileHeight) -
2382 dml_floor(a: ViewportYStart, granularity: MacroTileHeight);
2383 } else if (!IsVertical(Scan: SourceRotation)) {
2384 vp_height_dpte_ub = dml_ceil(a: ViewportHeight - 1, granularity: MacroTileHeight) + MacroTileHeight;
2385 } else {
2386 vp_height_dpte_ub = dml_ceil(a: SwathWidth - 1, granularity: MacroTileHeight) + MacroTileHeight;
2387 }
2388 *DPDE0BytesFrame = 64 * (dml_ceil(a: (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2389 (8 * 2097152), granularity: 1) + 1);
2390 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2391 } else {
2392 *DPDE0BytesFrame = 0;
2393 ExtraDPDEBytesFrame = 0;
2394 vp_height_dpte_ub = 0;
2395 }
2396
2397 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2398
2399#ifdef __DML_VBA_DEBUG__
2400 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2401 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2402 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2403 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2404 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2405 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2406 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2407 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2408 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2409 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2410 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2411 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2412 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2413 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2414 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2415 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2416 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2417#endif
2418
2419 if (HostVMEnable == true)
2420 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2421
2422 if (SurfaceTiling == dm_sw_linear) {
2423 *PixelPTEReqHeight = 1;
2424 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2425 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2426 *PTERequestSize = 64;
2427 } else if (GPUVMMinPageSizeKBytes == 4) {
2428 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2429 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2430 *PTERequestSize = 128;
2431 } else {
2432 *PixelPTEReqHeight = MacroTileHeight;
2433 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2434 *PTERequestSize = 64;
2435 }
2436#ifdef __DML_VBA_DEBUG__
2437 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2438 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2439 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2440 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2441 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2442 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2443 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2444#endif
2445
2446 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2447 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(a: ((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2448 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, granularity: 1) + 1) *
2449 (double) *PixelPTEReqWidth;
2450 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2451 *PTERequestSize;
2452
2453 if (SurfaceTiling == dm_sw_linear) {
2454 *dpte_row_height = dml_min(a: 128, b: 1 << (unsigned int) dml_floor(a: dml_log2(x: PTEBufferSizeInRequests *
2455 *PixelPTEReqWidth / Pitch), granularity: 1));
2456#ifdef __DML_VBA_DEBUG__
2457 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2458 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2459 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2460 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2461 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2462 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2463 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2464 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2465 *PixelPTEReqWidth / Pitch), 1));
2466 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2467#endif
2468 *dpte_row_width_ub = dml_ceil(a: ((double) Pitch * (double) *dpte_row_height - 1),
2469 granularity: (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2470 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2471
2472 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2473 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(a: dml_log2(x: PTEBufferSizeInRequests *
2474 PixelPTEReqWidth_linear / Pitch), granularity: 1);
2475 if (*dpte_row_height_linear > 128)
2476 *dpte_row_height_linear = 128;
2477
2478 } else if (!IsVertical(Scan: SourceRotation)) {
2479 *dpte_row_height = *PixelPTEReqHeight;
2480
2481 if (GPUVMMinPageSizeKBytes > 64) {
2482 *dpte_row_width_ub = (dml_ceil(a: (Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2483 *PixelPTEReqWidth, granularity: 1) + 1) * *PixelPTEReqWidth;
2484 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2485 *dpte_row_width_ub = dml_floor(a: ViewportXStart + SwathWidth +
2486 *PixelPTEReqWidth - 1, granularity: *PixelPTEReqWidth) -
2487 dml_floor(a: ViewportXStart, granularity: *PixelPTEReqWidth);
2488 } else {
2489 *dpte_row_width_ub = (dml_ceil(a: (SwathWidth - 1) / *PixelPTEReqWidth, granularity: 1) + 1) *
2490 *PixelPTEReqWidth;
2491 }
2492
2493 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2494 } else {
2495 *dpte_row_height = dml_min(a: *PixelPTEReqWidth, b: MacroTileWidth);
2496
2497 if (ViewportStationary && (NumberOfDPPs == 1)) {
2498 *dpte_row_width_ub = dml_floor(a: ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2499 granularity: *PixelPTEReqHeight) - dml_floor(a: ViewportYStart, granularity: *PixelPTEReqHeight);
2500 } else {
2501 *dpte_row_width_ub = (dml_ceil(a: (SwathWidth - 1) / *PixelPTEReqHeight, granularity: 1) + 1)
2502 * *PixelPTEReqHeight;
2503 }
2504
2505 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2506 }
2507
2508 if (GPUVMEnable != true)
2509 *PixelPTEBytesPerRow = 0;
2510 if (HostVMEnable == true)
2511 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2512
2513#ifdef __DML_VBA_DEBUG__
2514 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2515 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2516 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2517 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2518 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2519 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2520 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2521 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2522 __func__, *dpte_row_width_ub_one_row_per_frame);
2523 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2524 __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2525 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2526 *MetaPTEBytesFrame);
2527#endif
2528
2529 return PDEAndMetaPTEBytesFrame;
2530} // CalculateVMAndRowBytes
2531
2532double dml32_CalculatePrefetchSourceLines(
2533 double VRatio,
2534 unsigned int VTaps,
2535 bool Interlace,
2536 bool ProgressiveToInterlaceUnitInOPP,
2537 unsigned int SwathHeight,
2538 enum dm_rotation_angle SourceRotation,
2539 bool ViewportStationary,
2540 double SwathWidth,
2541 unsigned int ViewportHeight,
2542 unsigned int ViewportXStart,
2543 unsigned int ViewportYStart,
2544
2545 /* Output */
2546 double *VInitPreFill,
2547 unsigned int *MaxNumSwath)
2548{
2549
2550 unsigned int vp_start_rot;
2551 unsigned int sw0_tmp;
2552 unsigned int MaxPartialSwath;
2553 double numLines;
2554
2555#ifdef __DML_VBA_DEBUG__
2556 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2557 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2558 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2559 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2560 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2561 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2562#endif
2563 if (ProgressiveToInterlaceUnitInOPP)
2564 *VInitPreFill = dml_floor(a: (VRatio + (double) VTaps + 1) / 2.0, granularity: 1);
2565 else
2566 *VInitPreFill = dml_floor(a: (VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, granularity: 1);
2567
2568 if (ViewportStationary) {
2569 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2570 vp_start_rot = SwathHeight -
2571 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2572 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2573 vp_start_rot = ViewportXStart;
2574 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2575 vp_start_rot = SwathHeight -
2576 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2577 } else {
2578 vp_start_rot = ViewportYStart;
2579 }
2580 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2581 if (sw0_tmp < *VInitPreFill)
2582 *MaxNumSwath = dml_ceil(a: (*VInitPreFill - sw0_tmp) / SwathHeight, granularity: 1) + 1;
2583 else
2584 *MaxNumSwath = 1;
2585 MaxPartialSwath = dml_max(a: 1, b: (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2586 } else {
2587 *MaxNumSwath = dml_ceil(a: (*VInitPreFill - 1.0) / SwathHeight, granularity: 1) + 1;
2588 if (*VInitPreFill > 1)
2589 MaxPartialSwath = dml_max(a: 1, b: (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2590 else
2591 MaxPartialSwath = dml_max(a: 1, b: (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2592 }
2593 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2594
2595#ifdef __DML_VBA_DEBUG__
2596 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2597 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2598 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2599 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2600 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2601#endif
2602 return numLines;
2603
2604} // CalculatePrefetchSourceLines
2605
2606void dml32_CalculateMALLUseForStaticScreen(
2607 unsigned int NumberOfActiveSurfaces,
2608 unsigned int MALLAllocatedForDCNFinal,
2609 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2610 unsigned int SurfaceSizeInMALL[],
2611 bool one_row_per_frame_fits_in_buffer[],
2612
2613 /* output */
2614 bool UsesMALLForStaticScreen[])
2615{
2616 unsigned int k;
2617 unsigned int SurfaceToAddToMALL;
2618 bool CanAddAnotherSurfaceToMALL;
2619 unsigned int TotalSurfaceSizeInMALL;
2620
2621 TotalSurfaceSizeInMALL = 0;
2622 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2623 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2624 if (UsesMALLForStaticScreen[k])
2625 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2626#ifdef __DML_VBA_DEBUG__
2627 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2628 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
2629#endif
2630 }
2631
2632 SurfaceToAddToMALL = 0;
2633 CanAddAnotherSurfaceToMALL = true;
2634 while (CanAddAnotherSurfaceToMALL) {
2635 CanAddAnotherSurfaceToMALL = false;
2636 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2637 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2638 !UsesMALLForStaticScreen[k] &&
2639 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2640 one_row_per_frame_fits_in_buffer[k] &&
2641 (!CanAddAnotherSurfaceToMALL ||
2642 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2643 CanAddAnotherSurfaceToMALL = true;
2644 SurfaceToAddToMALL = k;
2645#ifdef __DML_VBA_DEBUG__
2646 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2647 __func__, k, UseMALLForStaticScreen[k]);
2648#endif
2649 }
2650 }
2651 if (CanAddAnotherSurfaceToMALL) {
2652 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2653 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2654
2655#ifdef __DML_VBA_DEBUG__
2656 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
2657 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
2658#endif
2659
2660 }
2661 }
2662}
2663
2664void dml32_CalculateRowBandwidth(
2665 bool GPUVMEnable,
2666 enum source_format_class SourcePixelFormat,
2667 double VRatio,
2668 double VRatioChroma,
2669 bool DCCEnable,
2670 double LineTime,
2671 unsigned int MetaRowByteLuma,
2672 unsigned int MetaRowByteChroma,
2673 unsigned int meta_row_height_luma,
2674 unsigned int meta_row_height_chroma,
2675 unsigned int PixelPTEBytesPerRowLuma,
2676 unsigned int PixelPTEBytesPerRowChroma,
2677 unsigned int dpte_row_height_luma,
2678 unsigned int dpte_row_height_chroma,
2679 /* Output */
2680 double *meta_row_bw,
2681 double *dpte_row_bw)
2682{
2683 if (DCCEnable != true) {
2684 *meta_row_bw = 0;
2685 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2686 SourcePixelFormat == dm_rgbe_alpha) {
2687 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2688 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2689 } else {
2690 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2691 }
2692
2693 if (GPUVMEnable != true) {
2694 *dpte_row_bw = 0;
2695 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2696 SourcePixelFormat == dm_rgbe_alpha) {
2697 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2698 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2699 } else {
2700 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2701 }
2702}
2703
2704double dml32_CalculateUrgentLatency(
2705 double UrgentLatencyPixelDataOnly,
2706 double UrgentLatencyPixelMixedWithVMData,
2707 double UrgentLatencyVMDataOnly,
2708 bool DoUrgentLatencyAdjustment,
2709 double UrgentLatencyAdjustmentFabricClockComponent,
2710 double UrgentLatencyAdjustmentFabricClockReference,
2711 double FabricClock)
2712{
2713 double ret;
2714
2715 ret = dml_max3(a: UrgentLatencyPixelDataOnly, b: UrgentLatencyPixelMixedWithVMData, c: UrgentLatencyVMDataOnly);
2716 if (DoUrgentLatencyAdjustment == true) {
2717 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2718 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2719 }
2720 return ret;
2721}
2722
2723void dml32_CalculateUrgentBurstFactor(
2724 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2725 unsigned int swath_width_luma_ub,
2726 unsigned int swath_width_chroma_ub,
2727 unsigned int SwathHeightY,
2728 unsigned int SwathHeightC,
2729 double LineTime,
2730 double UrgentLatency,
2731 double CursorBufferSize,
2732 unsigned int CursorWidth,
2733 unsigned int CursorBPP,
2734 double VRatio,
2735 double VRatioC,
2736 double BytePerPixelInDETY,
2737 double BytePerPixelInDETC,
2738 unsigned int DETBufferSizeY,
2739 unsigned int DETBufferSizeC,
2740 /* Output */
2741 double *UrgentBurstFactorCursor,
2742 double *UrgentBurstFactorLuma,
2743 double *UrgentBurstFactorChroma,
2744 bool *NotEnoughUrgentLatencyHiding)
2745{
2746 double LinesInDETLuma;
2747 double LinesInDETChroma;
2748 unsigned int LinesInCursorBuffer;
2749 double CursorBufferSizeInTime;
2750 double DETBufferSizeInTimeLuma;
2751 double DETBufferSizeInTimeChroma;
2752
2753 *NotEnoughUrgentLatencyHiding = 0;
2754
2755 if (CursorWidth > 0) {
2756 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(a: dml_log2(x: CursorBufferSize * 1024.0 /
2757 (CursorWidth * CursorBPP / 8.0)), granularity: 1.0);
2758 if (VRatio > 0) {
2759 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2760 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2761 *NotEnoughUrgentLatencyHiding = 1;
2762 *UrgentBurstFactorCursor = 0;
2763 } else {
2764 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2765 (CursorBufferSizeInTime - UrgentLatency);
2766 }
2767 } else {
2768 *UrgentBurstFactorCursor = 1;
2769 }
2770 }
2771
2772 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2773 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2774
2775 if (VRatio > 0) {
2776 DETBufferSizeInTimeLuma = dml_floor(a: LinesInDETLuma, granularity: SwathHeightY) * LineTime / VRatio;
2777 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2778 *NotEnoughUrgentLatencyHiding = 1;
2779 *UrgentBurstFactorLuma = 0;
2780 } else {
2781 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2782 }
2783 } else {
2784 *UrgentBurstFactorLuma = 1;
2785 }
2786
2787 if (BytePerPixelInDETC > 0) {
2788 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2789 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2790 / swath_width_chroma_ub;
2791
2792 if (VRatio > 0) {
2793 DETBufferSizeInTimeChroma = dml_floor(a: LinesInDETChroma, granularity: SwathHeightC) * LineTime / VRatio;
2794 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2795 *NotEnoughUrgentLatencyHiding = 1;
2796 *UrgentBurstFactorChroma = 0;
2797 } else {
2798 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2799 / (DETBufferSizeInTimeChroma - UrgentLatency);
2800 }
2801 } else {
2802 *UrgentBurstFactorChroma = 1;
2803 }
2804 }
2805} // CalculateUrgentBurstFactor
2806
2807void dml32_CalculateDCFCLKDeepSleep(
2808 unsigned int NumberOfActiveSurfaces,
2809 unsigned int BytePerPixelY[],
2810 unsigned int BytePerPixelC[],
2811 double VRatio[],
2812 double VRatioChroma[],
2813 double SwathWidthY[],
2814 double SwathWidthC[],
2815 unsigned int DPPPerSurface[],
2816 double HRatio[],
2817 double HRatioChroma[],
2818 double PixelClock[],
2819 double PSCL_THROUGHPUT[],
2820 double PSCL_THROUGHPUT_CHROMA[],
2821 double Dppclk[],
2822 double ReadBandwidthLuma[],
2823 double ReadBandwidthChroma[],
2824 unsigned int ReturnBusWidth,
2825
2826 /* Output */
2827 double *DCFClkDeepSleep)
2828{
2829 unsigned int k;
2830 double DisplayPipeLineDeliveryTimeLuma;
2831 double DisplayPipeLineDeliveryTimeChroma;
2832 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2833 double ReadBandwidth = 0.0;
2834
2835 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2836
2837 if (VRatio[k] <= 1) {
2838 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2839 / PixelClock[k];
2840 } else {
2841 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2842 }
2843 if (BytePerPixelC[k] == 0) {
2844 DisplayPipeLineDeliveryTimeChroma = 0;
2845 } else {
2846 if (VRatioChroma[k] <= 1) {
2847 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2848 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2849 } else {
2850 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2851 / Dppclk[k];
2852 }
2853 }
2854
2855 if (BytePerPixelC[k] > 0) {
2856 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2857 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2858 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2859 32.0 / DisplayPipeLineDeliveryTimeChroma);
2860 } else {
2861 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2862 64.0 / DisplayPipeLineDeliveryTimeLuma;
2863 }
2864 DCFClkDeepSleepPerSurface[k] = dml_max(a: DCFClkDeepSleepPerSurface[k], b: PixelClock[k] / 16);
2865
2866#ifdef __DML_VBA_DEBUG__
2867 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2868 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2869#endif
2870 }
2871
2872 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2873 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2874
2875 *DCFClkDeepSleep = dml_max(a: 8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2876
2877#ifdef __DML_VBA_DEBUG__
2878 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2879 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2880 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2881 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2882#endif
2883
2884 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2885 *DCFClkDeepSleep = dml_max(a: *DCFClkDeepSleep, b: DCFClkDeepSleepPerSurface[k]);
2886#ifdef __DML_VBA_DEBUG__
2887 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2888#endif
2889} // CalculateDCFCLKDeepSleep
2890
2891double dml32_CalculateWriteBackDelay(
2892 enum source_format_class WritebackPixelFormat,
2893 double WritebackHRatio,
2894 double WritebackVRatio,
2895 unsigned int WritebackVTaps,
2896 unsigned int WritebackDestinationWidth,
2897 unsigned int WritebackDestinationHeight,
2898 unsigned int WritebackSourceHeight,
2899 unsigned int HTotal)
2900{
2901 double CalculateWriteBackDelay;
2902 double Line_length;
2903 double Output_lines_last_notclamped;
2904 double WritebackVInit;
2905
2906 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2907 Line_length = dml_max(a: (double) WritebackDestinationWidth,
2908 b: dml_ceil(a: (double)WritebackDestinationWidth / 6.0, granularity: 1.0) * WritebackVTaps);
2909 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2910 dml_ceil(a: ((double)WritebackSourceHeight -
2911 (double) WritebackVInit) / (double)WritebackVRatio, granularity: 1.0);
2912 if (Output_lines_last_notclamped < 0) {
2913 CalculateWriteBackDelay = 0;
2914 } else {
2915 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2916 (HTotal - WritebackDestinationWidth) + 80;
2917 }
2918 return CalculateWriteBackDelay;
2919}
2920
2921void dml32_UseMinimumDCFCLK(
2922 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2923 bool DRRDisplay[],
2924 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2925 unsigned int MaxInterDCNTileRepeaters,
2926 unsigned int MaxPrefetchMode,
2927 double DRAMClockChangeLatencyFinal,
2928 double FCLKChangeLatency,
2929 double SREnterPlusExitTime,
2930 unsigned int ReturnBusWidth,
2931 unsigned int RoundTripPingLatencyCycles,
2932 unsigned int ReorderingBytes,
2933 unsigned int PixelChunkSizeInKByte,
2934 unsigned int MetaChunkSize,
2935 bool GPUVMEnable,
2936 unsigned int GPUVMMaxPageTableLevels,
2937 bool HostVMEnable,
2938 unsigned int NumberOfActiveSurfaces,
2939 double HostVMMinPageSize,
2940 unsigned int HostVMMaxNonCachedPageTableLevels,
2941 bool DynamicMetadataVMEnabled,
2942 bool ImmediateFlipRequirement,
2943 bool ProgressiveToInterlaceUnitInOPP,
2944 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2945 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2946 unsigned int VTotal[],
2947 unsigned int VActive[],
2948 unsigned int DynamicMetadataTransmittedBytes[],
2949 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2950 bool Interlace[],
2951 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2952 double RequiredDISPCLK[][2],
2953 double UrgLatency[],
2954 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2955 double ProjectedDCFClkDeepSleep[][2],
2956 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2957 unsigned int TotalNumberOfActiveDPP[][2],
2958 unsigned int TotalNumberOfDCCActiveDPP[][2],
2959 unsigned int dpte_group_bytes[],
2960 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2961 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2962 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2963 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2964 unsigned int BytePerPixelY[],
2965 unsigned int BytePerPixelC[],
2966 unsigned int HTotal[],
2967 double PixelClock[],
2968 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2969 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2970 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2971 bool DynamicMetadataEnable[],
2972 double ReadBandwidthLuma[],
2973 double ReadBandwidthChroma[],
2974 double DCFCLKPerState[],
2975 /* Output */
2976 double DCFCLKState[][2])
2977{
2978 unsigned int i, j, k;
2979 unsigned int dummy1;
2980 double dummy2, dummy3;
2981 double NormalEfficiency;
2982 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2983
2984 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2985 for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2986 for (j = 0; j <= 1; ++j) {
2987 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2988 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2989 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2990 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2991 double MinimumTWait = 0.0;
2992 double DPTEBandwidth;
2993 double DCFCLKRequiredForAverageBandwidth;
2994 unsigned int ExtraLatencyBytes;
2995 double ExtraLatencyCycles;
2996 double DCFCLKRequiredForPeakBandwidth;
2997 unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2998 double MinimumTvmPlus2Tr0;
2999
3000 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
3001 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3002 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3003 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3004 / (15.75 * HTotal[k] / PixelClock[k]);
3005 }
3006
3007 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3008 NoOfDPPState[k] = NoOfDPP[i][j][k];
3009
3010 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3011 DCFCLKRequiredForAverageBandwidth = dml_max(a: ProjectedDCFClkDeepSleep[i][j], b: DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3012
3013 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3014 TotalNumberOfActiveDPP: TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3015 TotalNumberOfDCCActiveDPP: TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3016 NumberOfActiveSurfaces, NumberOfDPP: NoOfDPPState, dpte_group_bytes, HostVMInefficiencyFactor: 1, HostVMMinPageSize,
3017 HostVMMaxNonCachedPageTableLevels);
3018 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3019 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3020 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3021 double DCFCLKCyclesRequiredInPrefetch;
3022 double PrefetchTime;
3023
3024 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3025 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3026 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3027 * BytePerPixelC[k]) / NormalEfficiency
3028 / ReturnBusWidth;
3029 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3030 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3031 / NormalEfficiency / ReturnBusWidth
3032 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3033 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3034 / ReturnBusWidth
3035 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3036 + PixelDCFCLKCyclesRequiredInPrefetch[k];
3037 PrefetchPixelLinesTime[k] = dml_max(a: PrefetchLinesY[i][j][k], b: PrefetchLinesC[i][j][k])
3038 * HTotal[k] / PixelClock[k];
3039 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3040 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3041 UrgLatency[i] * GPUVMMaxPageTableLevels *
3042 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3043
3044 MinimumTWait = dml32_CalculateTWait(PrefetchMode: MaxPrefetchMode,
3045 UseMALLForPStateChange: UseMALLForPStateChange[k],
3046 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3047 DRRDisplay: DRRDisplay[k],
3048 DRAMClockChangeLatency: DRAMClockChangeLatencyFinal,
3049 FCLKChangeLatency,
3050 UrgentLatency: UrgLatency[i],
3051 SREnterPlusExitTime);
3052
3053 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3054 MinimumTWait - UrgLatency[i] *
3055 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3056 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
3057 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3058 DynamicMetadataVMExtraLatency[k];
3059
3060 if (PrefetchTime > 0) {
3061 double ExpectedVRatioPrefetch;
3062
3063 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3064 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3065 DCFCLKCyclesRequiredInPrefetch);
3066 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3067 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3068 PrefetchPixelLinesTime[k] *
3069 dml_max(a: 1.0, b: ExpectedVRatioPrefetch) *
3070 dml_max(a: 1.0, b: ExpectedVRatioPrefetch / 4);
3071 if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3072 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3073 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3074 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3075 NormalEfficiency / ReturnBusWidth;
3076 }
3077 } else {
3078 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3079 }
3080 if (DynamicMetadataEnable[k] == true) {
3081 double TSetupPipe;
3082 double TdmbfPipe;
3083 double TdmsksPipe;
3084 double TdmecPipe;
3085 double AllowedTimeForUrgentExtraLatency;
3086
3087 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3088 MaxInterDCNTileRepeaters,
3089 Dppclk: RequiredDPPCLKPerSurface[i][j][k],
3090 Dispclk: RequiredDISPCLK[i][j],
3091 DCFClkDeepSleep: ProjectedDCFClkDeepSleep[i][j],
3092 PixelClock: PixelClock[k],
3093 HTotal: HTotal[k],
3094 VBlank: VTotal[k] - VActive[k],
3095 DynamicMetadataTransmittedBytes: DynamicMetadataTransmittedBytes[k],
3096 DynamicMetadataLinesBeforeActiveRequired: DynamicMetadataLinesBeforeActiveRequired[k],
3097 InterlaceEnable: Interlace[k],
3098 ProgressiveToInterlaceUnitInOPP,
3099
3100 /* output */
3101 TSetup: &TSetupPipe,
3102 Tdmbf: &TdmbfPipe,
3103 Tdmec: &TdmecPipe,
3104 Tdmsks: &TdmsksPipe,
3105 VUpdateOffsetPix: &dummy1,
3106 VUpdateWidthPix: &dummy2,
3107 VReadyOffsetPix: &dummy3);
3108 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3109 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3110 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3111 if (AllowedTimeForUrgentExtraLatency > 0)
3112 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3113 dml_max(a: DCFCLKRequiredForPeakBandwidthPerSurface[k],
3114 b: ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3115 else
3116 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3117 }
3118 }
3119 DCFCLKRequiredForPeakBandwidth = 0;
3120 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3121 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3122 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3123 }
3124 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3125 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3126 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3127 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3128 double MaximumTvmPlus2Tr0PlusTsw;
3129
3130 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3131 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3132 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3133 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3134 } else {
3135 DCFCLKRequiredForPeakBandwidth = dml_max3(a: DCFCLKRequiredForPeakBandwidth,
3136 b: 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3137 MinimumTvmPlus2Tr0 -
3138 PrefetchPixelLinesTime[k] / 4),
3139 c: (2 * ExtraLatencyCycles +
3140 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3141 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3142 }
3143 }
3144 DCFCLKState[i][j] = dml_min(a: DCFCLKPerState[i], b: 1.05 *
3145 dml_max(a: DCFCLKRequiredForAverageBandwidth, b: DCFCLKRequiredForPeakBandwidth));
3146 }
3147 }
3148}
3149
3150unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3151 unsigned int TotalNumberOfActiveDPP,
3152 unsigned int PixelChunkSizeInKByte,
3153 unsigned int TotalNumberOfDCCActiveDPP,
3154 unsigned int MetaChunkSize,
3155 bool GPUVMEnable,
3156 bool HostVMEnable,
3157 unsigned int NumberOfActiveSurfaces,
3158 unsigned int NumberOfDPP[],
3159 unsigned int dpte_group_bytes[],
3160 double HostVMInefficiencyFactor,
3161 double HostVMMinPageSize,
3162 unsigned int HostVMMaxNonCachedPageTableLevels)
3163{
3164 unsigned int k;
3165 double ret;
3166 unsigned int HostVMDynamicLevels;
3167
3168 if (GPUVMEnable == true && HostVMEnable == true) {
3169 if (HostVMMinPageSize < 2048)
3170 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3171 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3172 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 1);
3173 else
3174 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 2);
3175 } else {
3176 HostVMDynamicLevels = 0;
3177 }
3178
3179 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3180 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3181
3182 if (GPUVMEnable == true) {
3183 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3184 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3185 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3186 }
3187 }
3188 return ret;
3189}
3190
3191void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3192 unsigned int MaxInterDCNTileRepeaters,
3193 double Dppclk,
3194 double Dispclk,
3195 double DCFClkDeepSleep,
3196 double PixelClock,
3197 unsigned int HTotal,
3198 unsigned int VBlank,
3199 unsigned int DynamicMetadataTransmittedBytes,
3200 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3201 unsigned int InterlaceEnable,
3202 bool ProgressiveToInterlaceUnitInOPP,
3203
3204 /* output */
3205 double *TSetup,
3206 double *Tdmbf,
3207 double *Tdmec,
3208 double *Tdmsks,
3209 unsigned int *VUpdateOffsetPix,
3210 double *VUpdateWidthPix,
3211 double *VReadyOffsetPix)
3212{
3213 double TotalRepeaterDelayTime;
3214
3215 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3216 *VUpdateWidthPix =
3217 dml_ceil(a: (14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, granularity: 1.0);
3218 *VReadyOffsetPix = dml_ceil(a: dml_max(a: 150.0 / Dppclk,
3219 b: TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, granularity: 1.0);
3220 *VUpdateOffsetPix = dml_ceil(a: HTotal / 4.0, granularity: 1.0);
3221 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3222 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3223 *Tdmec = HTotal / PixelClock;
3224
3225 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3226 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3227 else
3228 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3229
3230 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3231 *Tdmsks = *Tdmsks / 2;
3232#ifdef __DML_VBA_DEBUG__
3233 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3234 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3235 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3236
3237 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3238 __func__, DynamicMetadataLinesBeforeActiveRequired);
3239 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3240 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3241 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3242 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3243#endif
3244}
3245
3246double dml32_CalculateTWait(
3247 unsigned int PrefetchMode,
3248 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3249 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3250 bool DRRDisplay,
3251 double DRAMClockChangeLatency,
3252 double FCLKChangeLatency,
3253 double UrgentLatency,
3254 double SREnterPlusExitTime)
3255{
3256 double TWait = 0.0;
3257
3258 if (PrefetchMode == 0 &&
3259 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3260 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3261 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3262 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3263 TWait = dml_max3(a: DRAMClockChangeLatency + UrgentLatency, b: SREnterPlusExitTime, c: UrgentLatency);
3264 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3265 TWait = dml_max3(a: FCLKChangeLatency + UrgentLatency, b: SREnterPlusExitTime, c: UrgentLatency);
3266 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3267 TWait = dml_max(a: SREnterPlusExitTime, b: UrgentLatency);
3268 } else {
3269 TWait = UrgentLatency;
3270 }
3271
3272#ifdef __DML_VBA_DEBUG__
3273 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3274 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3275#endif
3276 return TWait;
3277} // CalculateTWait
3278
3279// Function: get_return_bw_mbps
3280// Megabyte per second
3281double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3282 const int VoltageLevel,
3283 const bool HostVMEnable,
3284 const double DCFCLK,
3285 const double FabricClock,
3286 const double DRAMSpeed)
3287{
3288 double ReturnBW = 0.;
3289 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3290 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3291 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3292 double PixelDataOnlyReturnBW = dml_min3(a: IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3293 b: IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3294 c: IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3295 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3296 double PixelMixedWithVMDataReturnBW = dml_min3(a: IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3297 b: IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3298 c: IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3299 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3300
3301 if (HostVMEnable != true)
3302 ReturnBW = PixelDataOnlyReturnBW;
3303 else
3304 ReturnBW = PixelMixedWithVMDataReturnBW;
3305
3306#ifdef __DML_VBA_DEBUG__
3307 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3308 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3309 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3310 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3311 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3312 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
3313 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
3314 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
3315 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
3316 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3317 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
3318#endif
3319 return ReturnBW;
3320}
3321
3322// Function: get_return_bw_mbps_vm_only
3323// Megabyte per second
3324double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3325 const int VoltageLevel,
3326 const double DCFCLK,
3327 const double FabricClock,
3328 const double DRAMSpeed)
3329{
3330 double VMDataOnlyReturnBW = dml_min3(
3331 a: soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3332 b: FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3333 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3334 c: DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3335 * (VoltageLevel < 2 ?
3336 soc->pct_ideal_dram_bw_after_urgent_strobe :
3337 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3338#ifdef __DML_VBA_DEBUG__
3339 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3340 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3341 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3342 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3343 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3344#endif
3345 return VMDataOnlyReturnBW;
3346}
3347
3348double dml32_CalculateExtraLatency(
3349 unsigned int RoundTripPingLatencyCycles,
3350 unsigned int ReorderingBytes,
3351 double DCFCLK,
3352 unsigned int TotalNumberOfActiveDPP,
3353 unsigned int PixelChunkSizeInKByte,
3354 unsigned int TotalNumberOfDCCActiveDPP,
3355 unsigned int MetaChunkSize,
3356 double ReturnBW,
3357 bool GPUVMEnable,
3358 bool HostVMEnable,
3359 unsigned int NumberOfActiveSurfaces,
3360 unsigned int NumberOfDPP[],
3361 unsigned int dpte_group_bytes[],
3362 double HostVMInefficiencyFactor,
3363 double HostVMMinPageSize,
3364 unsigned int HostVMMaxNonCachedPageTableLevels)
3365{
3366 double ExtraLatencyBytes;
3367 double ExtraLatency;
3368
3369 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3370 ReorderingBytes,
3371 TotalNumberOfActiveDPP,
3372 PixelChunkSizeInKByte,
3373 TotalNumberOfDCCActiveDPP,
3374 MetaChunkSize,
3375 GPUVMEnable,
3376 HostVMEnable,
3377 NumberOfActiveSurfaces,
3378 NumberOfDPP,
3379 dpte_group_bytes,
3380 HostVMInefficiencyFactor,
3381 HostVMMinPageSize,
3382 HostVMMaxNonCachedPageTableLevels);
3383
3384 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3385
3386#ifdef __DML_VBA_DEBUG__
3387 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3388 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3389 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3390 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3391 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3392#endif
3393
3394 return ExtraLatency;
3395} // CalculateExtraLatency
3396
3397bool dml32_CalculatePrefetchSchedule(
3398 struct vba_vars_st *v,
3399 unsigned int k,
3400 double HostVMInefficiencyFactor,
3401 DmlPipe *myPipe,
3402 unsigned int DSCDelay,
3403 unsigned int DPP_RECOUT_WIDTH,
3404 unsigned int VStartup,
3405 unsigned int MaxVStartup,
3406 double UrgentLatency,
3407 double UrgentExtraLatency,
3408 double TCalc,
3409 unsigned int PDEAndMetaPTEBytesFrame,
3410 unsigned int MetaRowByte,
3411 unsigned int PixelPTEBytesPerRow,
3412 double PrefetchSourceLinesY,
3413 unsigned int SwathWidthY,
3414 unsigned int VInitPreFillY,
3415 unsigned int MaxNumSwathY,
3416 double PrefetchSourceLinesC,
3417 unsigned int SwathWidthC,
3418 unsigned int VInitPreFillC,
3419 unsigned int MaxNumSwathC,
3420 unsigned int swath_width_luma_ub,
3421 unsigned int swath_width_chroma_ub,
3422 unsigned int SwathHeightY,
3423 unsigned int SwathHeightC,
3424 double TWait,
3425 double TPreReq,
3426 bool ExtendPrefetchIfPossible,
3427 /* Output */
3428 double *DSTXAfterScaler,
3429 double *DSTYAfterScaler,
3430 double *DestinationLinesForPrefetch,
3431 double *PrefetchBandwidth,
3432 double *DestinationLinesToRequestVMInVBlank,
3433 double *DestinationLinesToRequestRowInVBlank,
3434 double *VRatioPrefetchY,
3435 double *VRatioPrefetchC,
3436 double *RequiredPrefetchPixDataBWLuma,
3437 double *RequiredPrefetchPixDataBWChroma,
3438 bool *NotEnoughTimeForDynamicMetadata,
3439 double *Tno_bw,
3440 double *prefetch_vmrow_bw,
3441 double *Tdmdl_vm,
3442 double *Tdmdl,
3443 double *TSetup,
3444 unsigned int *VUpdateOffsetPix,
3445 double *VUpdateWidthPix,
3446 double *VReadyOffsetPix)
3447{
3448 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3449 bool MyError = false;
3450 unsigned int DPPCycles, DISPCLKCycles;
3451 double DSTTotalPixelsAfterScaler;
3452 double LineTime;
3453 double dst_y_prefetch_equ;
3454 double prefetch_bw_oto;
3455 double Tvm_oto;
3456 double Tr0_oto;
3457 double Tvm_oto_lines;
3458 double Tr0_oto_lines;
3459 double dst_y_prefetch_oto;
3460 double TimeForFetchingMetaPTE = 0;
3461 double TimeForFetchingRowInVBlank = 0;
3462 double LinesToRequestPrefetchPixelData = 0;
3463 double LinesForPrefetchBandwidth = 0;
3464 unsigned int HostVMDynamicLevelsTrips;
3465 double trip_to_mem;
3466 double Tvm_trips;
3467 double Tr0_trips;
3468 double Tvm_trips_rounded;
3469 double Tr0_trips_rounded;
3470 double Lsw_oto;
3471 double Tpre_rounded;
3472 double prefetch_bw_equ;
3473 double Tvm_equ;
3474 double Tr0_equ;
3475 double Tdmbf;
3476 double Tdmec;
3477 double Tdmsks;
3478 double prefetch_sw_bytes;
3479 double bytes_pp;
3480 double dep_bytes;
3481 unsigned int max_vratio_pre = v->MaxVRatioPre;
3482 double min_Lsw;
3483 double Tsw_est1 = 0;
3484 double Tsw_est3 = 0;
3485
3486 if (v->GPUVMEnable == true && v->HostVMEnable == true)
3487 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3488 else
3489 HostVMDynamicLevelsTrips = 0;
3490#ifdef __DML_VBA_DEBUG__
3491 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3492 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3493 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3494 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3495 __func__, v->HostVMEnable, HostVMInefficiencyFactor);
3496#endif
3497 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3498 MaxInterDCNTileRepeaters: v->MaxInterDCNTileRepeaters,
3499 Dppclk: myPipe->Dppclk,
3500 Dispclk: myPipe->Dispclk,
3501 DCFClkDeepSleep: myPipe->DCFClkDeepSleep,
3502 PixelClock: myPipe->PixelClock,
3503 HTotal: myPipe->HTotal,
3504 VBlank: myPipe->VBlank,
3505 DynamicMetadataTransmittedBytes: v->DynamicMetadataTransmittedBytes[k],
3506 DynamicMetadataLinesBeforeActiveRequired: v->DynamicMetadataLinesBeforeActiveRequired[k],
3507 InterlaceEnable: myPipe->InterlaceEnable,
3508 ProgressiveToInterlaceUnitInOPP: myPipe->ProgressiveToInterlaceUnitInOPP,
3509 TSetup,
3510
3511 /* output */
3512 Tdmbf: &Tdmbf,
3513 Tdmec: &Tdmec,
3514 Tdmsks: &Tdmsks,
3515 VUpdateOffsetPix,
3516 VUpdateWidthPix,
3517 VReadyOffsetPix);
3518
3519 LineTime = myPipe->HTotal / myPipe->PixelClock;
3520 trip_to_mem = UrgentLatency;
3521 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3522
3523 if (v->DynamicMetadataVMEnabled == true)
3524 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3525 else
3526 *Tdmdl = TWait + UrgentExtraLatency;
3527
3528#ifdef __DML_VBA_ALLOW_DELTA__
3529 if (v->DynamicMetadataEnable[k] == false)
3530 *Tdmdl = 0.0;
3531#endif
3532
3533 if (v->DynamicMetadataEnable[k] == true) {
3534 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3535 *NotEnoughTimeForDynamicMetadata = true;
3536#ifdef __DML_VBA_DEBUG__
3537 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3538 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3539 __func__, Tdmbf);
3540 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3541 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3542 __func__, Tdmsks);
3543 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3544 __func__, *Tdmdl);
3545#endif
3546 } else {
3547 *NotEnoughTimeForDynamicMetadata = false;
3548 }
3549 } else {
3550 *NotEnoughTimeForDynamicMetadata = false;
3551 }
3552
3553 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3554 v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3555
3556 if (myPipe->ScalerEnabled)
3557 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3558 else
3559 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3560
3561 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3562
3563 DISPCLKCycles = v->DISPCLKDelaySubtotal;
3564
3565 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3566 return true;
3567
3568 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3569 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3570
3571 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3572 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3573 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3574 myPipe->HActive / 2 : 0)
3575 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3576
3577#ifdef __DML_VBA_DEBUG__
3578 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3579 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3580 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3581 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3582 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
3583 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
3584 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
3585 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3586 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
3587#endif
3588
3589 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3590 *DSTYAfterScaler = 1;
3591 else
3592 *DSTYAfterScaler = 0;
3593
3594 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3595 *DSTYAfterScaler = dml_floor(a: DSTTotalPixelsAfterScaler / myPipe->HTotal, granularity: 1);
3596 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3597#ifdef __DML_VBA_DEBUG__
3598 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
3599 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3600#endif
3601
3602 MyError = false;
3603
3604 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3605
3606 if (v->GPUVMEnable == true) {
3607 Tvm_trips_rounded = dml_ceil(a: 4.0 * Tvm_trips / LineTime, granularity: 1.0) / 4.0 * LineTime;
3608 Tr0_trips_rounded = dml_ceil(a: 4.0 * Tr0_trips / LineTime, granularity: 1.0) / 4.0 * LineTime;
3609 if (v->GPUVMMaxPageTableLevels >= 3) {
3610 *Tno_bw = UrgentExtraLatency + trip_to_mem *
3611 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3612 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3613 Tr0_trips_rounded = dml_ceil(a: 4.0 * UrgentExtraLatency / LineTime, granularity: 1.0) /
3614 4.0 * LineTime; // VBA_ERROR
3615 *Tno_bw = UrgentExtraLatency;
3616 } else {
3617 *Tno_bw = 0;
3618 }
3619 } else if (myPipe->DCCEnable == true) {
3620 Tvm_trips_rounded = LineTime / 4.0;
3621 Tr0_trips_rounded = dml_ceil(a: 4.0 * Tr0_trips / LineTime, granularity: 1.0) / 4.0 * LineTime;
3622 *Tno_bw = 0;
3623 } else {
3624 Tvm_trips_rounded = LineTime / 4.0;
3625 Tr0_trips_rounded = LineTime / 2.0;
3626 *Tno_bw = 0;
3627 }
3628 Tvm_trips_rounded = dml_max(a: Tvm_trips_rounded, b: LineTime / 4.0);
3629 Tr0_trips_rounded = dml_max(a: Tr0_trips_rounded, b: LineTime / 4.0);
3630
3631 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3632 || myPipe->SourcePixelFormat == dm_420_12) {
3633 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3634 } else {
3635 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3636 }
3637
3638 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3639 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3640 prefetch_bw_oto = dml_max(a: bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3641 b: prefetch_sw_bytes / (dml_max(a: PrefetchSourceLinesY, b: PrefetchSourceLinesC) * LineTime));
3642
3643 min_Lsw = dml_max(a: PrefetchSourceLinesY, b: PrefetchSourceLinesC) / max_vratio_pre;
3644 min_Lsw = dml_max(a: min_Lsw, b: 1.0);
3645 Lsw_oto = dml_ceil(a: 4.0 * dml_max(a: prefetch_sw_bytes / prefetch_bw_oto / LineTime, b: min_Lsw), granularity: 1.0) / 4.0;
3646
3647 if (v->GPUVMEnable == true) {
3648 Tvm_oto = dml_max3(
3649 a: Tvm_trips,
3650 b: *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3651 c: LineTime / 4.0);
3652 } else
3653 Tvm_oto = LineTime / 4.0;
3654
3655 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3656 Tr0_oto = dml_max4(
3657 a: Tr0_trips,
3658 b: (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3659 c: (LineTime - Tvm_oto)/2.0,
3660 d: LineTime / 4.0);
3661#ifdef __DML_VBA_DEBUG__
3662 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3663 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3664 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3665 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3666 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3667#endif
3668 } else
3669 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3670
3671 Tvm_oto_lines = dml_ceil(a: 4.0 * Tvm_oto / LineTime, granularity: 1) / 4.0;
3672 Tr0_oto_lines = dml_ceil(a: 4.0 * Tr0_oto / LineTime, granularity: 1) / 4.0;
3673 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3674
3675 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(a: TWait + TCalc, b: *Tdmdl)) / LineTime -
3676 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3677
3678 dst_y_prefetch_equ = dml_min(a: dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3679#ifdef __DML_VBA_DEBUG__
3680 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3681 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3682 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3683 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3684 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3685 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3686 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3687 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3688 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3689 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3690 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3691 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3692 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3693 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3694 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3695 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3696 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3697 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3698 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3699 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3700 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3701 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3702 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3703 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3704 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3705 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3706 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3707#endif
3708
3709 dst_y_prefetch_equ = dml_floor(a: 4.0 * (dst_y_prefetch_equ + 0.125), granularity: 1) / 4.0;
3710 Tpre_rounded = dst_y_prefetch_equ * LineTime;
3711#ifdef __DML_VBA_DEBUG__
3712 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3713 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3714 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3715 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3716 __func__, VStartup * LineTime);
3717 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3718 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3719 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3720 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3721 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3722 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3723 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3724 __func__, *DSTYAfterScaler);
3725#endif
3726 dep_bytes = dml_max(a: PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3727 b: MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3728
3729 if (prefetch_sw_bytes < dep_bytes)
3730 prefetch_sw_bytes = 2 * dep_bytes;
3731
3732 *PrefetchBandwidth = 0;
3733 *DestinationLinesToRequestVMInVBlank = 0;
3734 *DestinationLinesToRequestRowInVBlank = 0;
3735 *VRatioPrefetchY = 0;
3736 *VRatioPrefetchC = 0;
3737 *RequiredPrefetchPixDataBWLuma = 0;
3738 if (dst_y_prefetch_equ > 1 &&
3739 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3740 double PrefetchBandwidth1;
3741 double PrefetchBandwidth2;
3742 double PrefetchBandwidth3;
3743 double PrefetchBandwidth4;
3744
3745 if (Tpre_rounded - *Tno_bw > 0) {
3746 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3747 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3748 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3749 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3750 } else
3751 PrefetchBandwidth1 = 0;
3752
3753 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3754 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3755 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3756 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3757 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3758 }
3759
3760 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3761 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3762 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3763 else
3764 PrefetchBandwidth2 = 0;
3765
3766 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3767 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3768 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3769 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3770 } else
3771 PrefetchBandwidth3 = 0;
3772
3773
3774 if (VStartup == MaxVStartup &&
3775 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3776 LineTime - Tvm_trips_rounded > 0) {
3777 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3778 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3779 }
3780
3781 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3782 PrefetchBandwidth4 = prefetch_sw_bytes /
3783 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3784 } else {
3785 PrefetchBandwidth4 = 0;
3786 }
3787
3788#ifdef __DML_VBA_DEBUG__
3789 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3790 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3791 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3792 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3793 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3794 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3795 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3796 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3797 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3798#endif
3799 {
3800 bool Case1OK;
3801 bool Case2OK;
3802 bool Case3OK;
3803
3804 if (PrefetchBandwidth1 > 0) {
3805 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3806 >= Tvm_trips_rounded
3807 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3808 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3809 Case1OK = true;
3810 } else {
3811 Case1OK = false;
3812 }
3813 } else {
3814 Case1OK = false;
3815 }
3816
3817 if (PrefetchBandwidth2 > 0) {
3818 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3819 >= Tvm_trips_rounded
3820 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3821 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3822 Case2OK = true;
3823 } else {
3824 Case2OK = false;
3825 }
3826 } else {
3827 Case2OK = false;
3828 }
3829
3830 if (PrefetchBandwidth3 > 0) {
3831 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3832 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3833 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3834 Tr0_trips_rounded) {
3835 Case3OK = true;
3836 } else {
3837 Case3OK = false;
3838 }
3839 } else {
3840 Case3OK = false;
3841 }
3842
3843 if (Case1OK)
3844 prefetch_bw_equ = PrefetchBandwidth1;
3845 else if (Case2OK)
3846 prefetch_bw_equ = PrefetchBandwidth2;
3847 else if (Case3OK)
3848 prefetch_bw_equ = PrefetchBandwidth3;
3849 else
3850 prefetch_bw_equ = PrefetchBandwidth4;
3851
3852#ifdef __DML_VBA_DEBUG__
3853 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3854 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3855 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3856 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3857#endif
3858
3859 if (prefetch_bw_equ > 0) {
3860 if (v->GPUVMEnable == true) {
3861 Tvm_equ = dml_max3(a: *Tno_bw + PDEAndMetaPTEBytesFrame *
3862 HostVMInefficiencyFactor / prefetch_bw_equ,
3863 b: Tvm_trips, c: LineTime / 4);
3864 } else {
3865 Tvm_equ = LineTime / 4;
3866 }
3867
3868 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3869 Tr0_equ = dml_max4(a: (MetaRowByte + PixelPTEBytesPerRow *
3870 HostVMInefficiencyFactor) / prefetch_bw_equ, b: Tr0_trips,
3871 c: (LineTime - Tvm_equ) / 2, d: LineTime / 4);
3872 } else {
3873 Tr0_equ = (LineTime - Tvm_equ) / 2;
3874 }
3875 } else {
3876 Tvm_equ = 0;
3877 Tr0_equ = 0;
3878#ifdef __DML_VBA_DEBUG__
3879 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3880#endif
3881 }
3882 }
3883
3884 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3885 if (dst_y_prefetch_oto * LineTime < TPreReq) {
3886 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3887 } else {
3888 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3889 }
3890 TimeForFetchingMetaPTE = Tvm_oto;
3891 TimeForFetchingRowInVBlank = Tr0_oto;
3892 *PrefetchBandwidth = prefetch_bw_oto;
3893 /* Clamp to oto for bandwidth calculation */
3894 LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3895 } else {
3896 /* For mode programming we want to extend the prefetch as much as possible
3897 * (up to oto, or as long as we can for equ) if we're not already applying
3898 * the 60us prefetch requirement. This is to avoid intermittent underflow
3899 * issues during prefetch.
3900 *
3901 * The prefetch extension is applied under the following scenarios:
3902 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
3903 * 2. We're using subvp or drr methods of p-state switch, in which case we
3904 * we don't care if prefetch takes up more of the blanking time
3905 *
3906 * Mode programming typically chooses the smallest prefetch time possible
3907 * (i.e. highest bandwidth during prefetch) presumably to create margin between
3908 * p-states / c-states that happen in vblank and prefetch. Therefore we only
3909 * apply this prefetch extension when p-state in vblank is not required (UCLK
3910 * p-states take up the most vblank time).
3911 */
3912 if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
3913 MyError = true;
3914 } else {
3915 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3916 TimeForFetchingMetaPTE = Tvm_equ;
3917 TimeForFetchingRowInVBlank = Tr0_equ;
3918 *PrefetchBandwidth = prefetch_bw_equ;
3919 /* Clamp to equ for bandwidth calculation */
3920 LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3921 }
3922 }
3923
3924 *DestinationLinesToRequestVMInVBlank = dml_ceil(a: 4.0 * TimeForFetchingMetaPTE / LineTime, granularity: 1.0) / 4.0;
3925
3926 *DestinationLinesToRequestRowInVBlank =
3927 dml_ceil(a: 4.0 * TimeForFetchingRowInVBlank / LineTime, granularity: 1.0) / 4.0;
3928
3929 LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3930 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3931
3932#ifdef __DML_VBA_DEBUG__
3933 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3934 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3935 __func__, *DestinationLinesToRequestVMInVBlank);
3936 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3937 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3938 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3939 __func__, *DestinationLinesToRequestRowInVBlank);
3940 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3941 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3942#endif
3943
3944 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3945 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3946 *VRatioPrefetchY = dml_max(a: *VRatioPrefetchY, b: 1.0);
3947#ifdef __DML_VBA_DEBUG__
3948 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3949 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3950 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3951#endif
3952 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3953 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3954 *VRatioPrefetchY =
3955 dml_max(a: (double) PrefetchSourceLinesY /
3956 LinesToRequestPrefetchPixelData,
3957 b: (double) MaxNumSwathY * SwathHeightY /
3958 (LinesToRequestPrefetchPixelData -
3959 (VInitPreFillY - 3.0) / 2.0));
3960 *VRatioPrefetchY = dml_max(a: *VRatioPrefetchY, b: 1.0);
3961 } else {
3962 MyError = true;
3963 *VRatioPrefetchY = 0;
3964 }
3965#ifdef __DML_VBA_DEBUG__
3966 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3967 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3968 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3969#endif
3970 }
3971
3972 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3973 *VRatioPrefetchC = dml_max(a: *VRatioPrefetchC, b: 1.0);
3974
3975#ifdef __DML_VBA_DEBUG__
3976 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3977 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3978 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3979#endif
3980 if ((SwathHeightC > 4)) {
3981 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3982 *VRatioPrefetchC =
3983 dml_max(a: *VRatioPrefetchC,
3984 b: (double) MaxNumSwathC * SwathHeightC /
3985 (LinesToRequestPrefetchPixelData -
3986 (VInitPreFillC - 3.0) / 2.0));
3987 *VRatioPrefetchC = dml_max(a: *VRatioPrefetchC, b: 1.0);
3988 } else {
3989 MyError = true;
3990 *VRatioPrefetchC = 0;
3991 }
3992#ifdef __DML_VBA_DEBUG__
3993 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3994 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3995 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3996#endif
3997 }
3998
3999 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
4000 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
4001 / LineTime;
4002
4003#ifdef __DML_VBA_DEBUG__
4004 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
4005 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
4006 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4007 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
4008 __func__, *RequiredPrefetchPixDataBWLuma);
4009#endif
4010 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
4011 LinesToRequestPrefetchPixelData
4012 * myPipe->BytePerPixelC
4013 * swath_width_chroma_ub / LineTime;
4014 } else {
4015 MyError = true;
4016#ifdef __DML_VBA_DEBUG__
4017 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
4018 __func__, LinesToRequestPrefetchPixelData);
4019#endif
4020 *VRatioPrefetchY = 0;
4021 *VRatioPrefetchC = 0;
4022 *RequiredPrefetchPixDataBWLuma = 0;
4023 *RequiredPrefetchPixDataBWChroma = 0;
4024 }
4025#ifdef __DML_VBA_DEBUG__
4026 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4027 (double)LinesToRequestPrefetchPixelData * LineTime +
4028 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4029 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4030 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4031 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4032 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4033 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4034 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4035 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4036 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4037 PixelPTEBytesPerRow);
4038#endif
4039 } else {
4040 MyError = true;
4041#ifdef __DML_VBA_DEBUG__
4042 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4043 __func__, dst_y_prefetch_equ);
4044#endif
4045 }
4046
4047 {
4048 double prefetch_vm_bw;
4049 double prefetch_row_bw;
4050
4051 if (PDEAndMetaPTEBytesFrame == 0) {
4052 prefetch_vm_bw = 0;
4053 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
4054#ifdef __DML_VBA_DEBUG__
4055 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4056 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4057 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4058 __func__, *DestinationLinesToRequestVMInVBlank);
4059 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4060#endif
4061 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4062 (*DestinationLinesToRequestVMInVBlank * LineTime);
4063#ifdef __DML_VBA_DEBUG__
4064 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4065#endif
4066 } else {
4067 prefetch_vm_bw = 0;
4068 MyError = true;
4069#ifdef __DML_VBA_DEBUG__
4070 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4071 __func__, *DestinationLinesToRequestVMInVBlank);
4072#endif
4073 }
4074
4075 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4076 prefetch_row_bw = 0;
4077 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4078 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4079 (*DestinationLinesToRequestRowInVBlank * LineTime);
4080
4081#ifdef __DML_VBA_DEBUG__
4082 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4083 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4084 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4085 __func__, *DestinationLinesToRequestRowInVBlank);
4086 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4087#endif
4088 } else {
4089 prefetch_row_bw = 0;
4090 MyError = true;
4091#ifdef __DML_VBA_DEBUG__
4092 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4093 __func__, *DestinationLinesToRequestRowInVBlank);
4094#endif
4095 }
4096
4097 *prefetch_vmrow_bw = dml_max(a: prefetch_vm_bw, b: prefetch_row_bw);
4098 }
4099
4100 if (MyError) {
4101 *PrefetchBandwidth = 0;
4102 TimeForFetchingMetaPTE = 0;
4103 TimeForFetchingRowInVBlank = 0;
4104 *DestinationLinesToRequestVMInVBlank = 0;
4105 *DestinationLinesToRequestRowInVBlank = 0;
4106 *DestinationLinesForPrefetch = 0;
4107 LinesToRequestPrefetchPixelData = 0;
4108 *VRatioPrefetchY = 0;
4109 *VRatioPrefetchC = 0;
4110 *RequiredPrefetchPixDataBWLuma = 0;
4111 *RequiredPrefetchPixDataBWChroma = 0;
4112 }
4113
4114 return MyError;
4115} // CalculatePrefetchSchedule
4116
4117void dml32_CalculateFlipSchedule(
4118 double HostVMInefficiencyFactor,
4119 double UrgentExtraLatency,
4120 double UrgentLatency,
4121 unsigned int GPUVMMaxPageTableLevels,
4122 bool HostVMEnable,
4123 unsigned int HostVMMaxNonCachedPageTableLevels,
4124 bool GPUVMEnable,
4125 double HostVMMinPageSize,
4126 double PDEAndMetaPTEBytesPerFrame,
4127 double MetaRowBytes,
4128 double DPTEBytesPerRow,
4129 double BandwidthAvailableForImmediateFlip,
4130 unsigned int TotImmediateFlipBytes,
4131 enum source_format_class SourcePixelFormat,
4132 double LineTime,
4133 double VRatio,
4134 double VRatioChroma,
4135 double Tno_bw,
4136 bool DCCEnable,
4137 unsigned int dpte_row_height,
4138 unsigned int meta_row_height,
4139 unsigned int dpte_row_height_chroma,
4140 unsigned int meta_row_height_chroma,
4141 bool use_one_row_for_frame_flip,
4142
4143 /* Output */
4144 double *DestinationLinesToRequestVMInImmediateFlip,
4145 double *DestinationLinesToRequestRowInImmediateFlip,
4146 double *final_flip_bw,
4147 bool *ImmediateFlipSupportedForPipe)
4148{
4149 double min_row_time = 0.0;
4150 unsigned int HostVMDynamicLevelsTrips;
4151 double TimeForFetchingMetaPTEImmediateFlip;
4152 double TimeForFetchingRowInVBlankImmediateFlip;
4153 double ImmediateFlipBW = 1.0;
4154
4155 if (GPUVMEnable == true && HostVMEnable == true)
4156 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4157 else
4158 HostVMDynamicLevelsTrips = 0;
4159
4160#ifdef __DML_VBA_DEBUG__
4161 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4162 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4163#endif
4164
4165 if (TotImmediateFlipBytes > 0) {
4166 if (use_one_row_for_frame_flip) {
4167 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4168 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4169 } else {
4170 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4171 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4172 }
4173 if (GPUVMEnable == true) {
4174 TimeForFetchingMetaPTEImmediateFlip = dml_max3(a: Tno_bw + PDEAndMetaPTEBytesPerFrame *
4175 HostVMInefficiencyFactor / ImmediateFlipBW,
4176 b: UrgentExtraLatency + UrgentLatency *
4177 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4178 c: LineTime / 4.0);
4179 } else {
4180 TimeForFetchingMetaPTEImmediateFlip = 0;
4181 }
4182 if ((GPUVMEnable == true || DCCEnable == true)) {
4183 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4184 a: (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4185 b: UrgentLatency * (HostVMDynamicLevelsTrips + 1), c: LineTime / 4.0);
4186 } else {
4187 TimeForFetchingRowInVBlankImmediateFlip = 0;
4188 }
4189
4190 *DestinationLinesToRequestVMInImmediateFlip =
4191 dml_ceil(a: 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), granularity: 1.0) / 4.0;
4192 *DestinationLinesToRequestRowInImmediateFlip =
4193 dml_ceil(a: 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), granularity: 1.0) / 4.0;
4194
4195 if (GPUVMEnable == true) {
4196 *final_flip_bw = dml_max(a: PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4197 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4198 b: (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4199 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4200 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4201 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4202 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4203 } else {
4204 *final_flip_bw = 0;
4205 }
4206 } else {
4207 TimeForFetchingMetaPTEImmediateFlip = 0;
4208 TimeForFetchingRowInVBlankImmediateFlip = 0;
4209 *DestinationLinesToRequestVMInImmediateFlip = 0;
4210 *DestinationLinesToRequestRowInImmediateFlip = 0;
4211 *final_flip_bw = 0;
4212 }
4213
4214 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4215 if (GPUVMEnable == true && DCCEnable != true) {
4216 min_row_time = dml_min(a: dpte_row_height *
4217 LineTime / VRatio, b: dpte_row_height_chroma * LineTime / VRatioChroma);
4218 } else if (GPUVMEnable != true && DCCEnable == true) {
4219 min_row_time = dml_min(a: meta_row_height *
4220 LineTime / VRatio, b: meta_row_height_chroma * LineTime / VRatioChroma);
4221 } else {
4222 min_row_time = dml_min4(a: dpte_row_height * LineTime / VRatio, b: meta_row_height *
4223 LineTime / VRatio, c: dpte_row_height_chroma * LineTime /
4224 VRatioChroma, d: meta_row_height_chroma * LineTime / VRatioChroma);
4225 }
4226 } else {
4227 if (GPUVMEnable == true && DCCEnable != true) {
4228 min_row_time = dpte_row_height * LineTime / VRatio;
4229 } else if (GPUVMEnable != true && DCCEnable == true) {
4230 min_row_time = meta_row_height * LineTime / VRatio;
4231 } else {
4232 min_row_time =
4233 dml_min(a: dpte_row_height * LineTime / VRatio, b: meta_row_height * LineTime / VRatio);
4234 }
4235 }
4236
4237 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4238 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4239 > min_row_time) {
4240 *ImmediateFlipSupportedForPipe = false;
4241 } else {
4242 *ImmediateFlipSupportedForPipe = true;
4243 }
4244
4245#ifdef __DML_VBA_DEBUG__
4246 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4247 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4248 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4249 __func__, *DestinationLinesToRequestVMInImmediateFlip);
4250 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4251 __func__, *DestinationLinesToRequestRowInImmediateFlip);
4252 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4253 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4254 __func__, TimeForFetchingRowInVBlankImmediateFlip);
4255 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4256 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4257#endif
4258} // CalculateFlipSchedule
4259
4260void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4261 struct vba_vars_st *v,
4262 unsigned int PrefetchMode,
4263 double DCFCLK,
4264 double ReturnBW,
4265 SOCParametersList mmSOCParameters,
4266 double SOCCLK,
4267 double DCFClkDeepSleep,
4268 unsigned int DETBufferSizeY[],
4269 unsigned int DETBufferSizeC[],
4270 unsigned int SwathHeightY[],
4271 unsigned int SwathHeightC[],
4272 double SwathWidthY[],
4273 double SwathWidthC[],
4274 unsigned int DPPPerSurface[],
4275 double BytePerPixelDETY[],
4276 double BytePerPixelDETC[],
4277 double DSTXAfterScaler[],
4278 double DSTYAfterScaler[],
4279 bool UnboundedRequestEnabled,
4280 unsigned int CompressedBufferSizeInkByte,
4281
4282 /* Output */
4283 enum clock_change_support *DRAMClockChangeSupport,
4284 double MaxActiveDRAMClockChangeLatencySupported[],
4285 unsigned int SubViewportLinesNeededInMALL[],
4286 enum dm_fclock_change_support *FCLKChangeSupport,
4287 double *MinActiveFCLKChangeLatencySupported,
4288 bool *USRRetrainingSupport,
4289 double ActiveDRAMClockChangeLatencyMargin[])
4290{
4291 unsigned int i, j, k;
4292 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4293 unsigned int DRAMClockChangeSupportNumber = 0;
4294 unsigned int LastSurfaceWithoutMargin;
4295 unsigned int DRAMClockChangeMethod = 0;
4296 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4297 double MinActiveFCLKChangeMargin = 0.;
4298 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4299 double ActiveClockChangeLatencyHidingY;
4300 double ActiveClockChangeLatencyHidingC;
4301 double ActiveClockChangeLatencyHiding;
4302 double EffectiveDETBufferSizeY;
4303 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4304 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4305 double TotalPixelBW = 0.0;
4306 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4307 double EffectiveLBLatencyHidingY;
4308 double EffectiveLBLatencyHidingC;
4309 double LinesInDETY[DC__NUM_DPP__MAX];
4310 double LinesInDETC[DC__NUM_DPP__MAX];
4311 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4312 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4313 double FullDETBufferingTimeY;
4314 double FullDETBufferingTimeC;
4315 double WritebackDRAMClockChangeLatencyMargin;
4316 double WritebackFCLKChangeLatencyMargin;
4317 double WritebackLatencyHiding;
4318 bool SameTimingForFCLKChange;
4319
4320 unsigned int TotalActiveWriteback = 0;
4321 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4322 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4323
4324 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4325 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4326 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4327 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4328 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4329 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4330 + 10 / DCFClkDeepSleep;
4331 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4332 + 10 / DCFClkDeepSleep;
4333 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4334 + 10 / DCFClkDeepSleep;
4335 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4336 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4337
4338#ifdef __DML_VBA_DEBUG__
4339 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4340 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4341 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4342 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4343 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4344 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4345 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4346 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4347 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4348 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4349 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4350 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4351#endif
4352
4353
4354 TotalActiveWriteback = 0;
4355 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4356 if (v->WritebackEnable[k] == true)
4357 TotalActiveWriteback = TotalActiveWriteback + 1;
4358 }
4359
4360 if (TotalActiveWriteback <= 1) {
4361 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4362 } else {
4363 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4364 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4365 }
4366 if (v->USRRetrainingRequiredFinal)
4367 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4368 + mmSOCParameters.USRRetrainingLatency;
4369
4370 if (TotalActiveWriteback <= 1) {
4371 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4372 + mmSOCParameters.WritebackLatency;
4373 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4374 + mmSOCParameters.WritebackLatency;
4375 } else {
4376 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4377 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4378 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4379 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4380 }
4381
4382 if (v->USRRetrainingRequiredFinal)
4383 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4384 + mmSOCParameters.USRRetrainingLatency;
4385
4386 if (v->USRRetrainingRequiredFinal)
4387 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4388 + mmSOCParameters.USRRetrainingLatency;
4389
4390#ifdef __DML_VBA_DEBUG__
4391 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4392 __func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4393 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4394 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4395 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4396 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4397#endif
4398
4399 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4400 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4401 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4402 }
4403
4404 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4405
4406 LBLatencyHidingSourceLinesY[k] = dml_min(a: (double) v->MaxLineBufferLines, b: dml_floor(a: v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(a: v->HRatio[k], b: 1.0)), granularity: 1)) - (v->vtaps[k] - 1);
4407 LBLatencyHidingSourceLinesC[k] = dml_min(a: (double) v->MaxLineBufferLines, b: dml_floor(a: v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(a: v->HRatioChroma[k], b: 1.0)), granularity: 1)) - (v->VTAPsChroma[k] - 1);
4408
4409
4410#ifdef __DML_VBA_DEBUG__
4411 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4412 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal);
4413 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]);
4414 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]);
4415 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]);
4416#endif
4417
4418 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4419 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4420 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4421
4422 if (UnboundedRequestEnabled) {
4423 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4424 + CompressedBufferSizeInkByte * 1024
4425 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4426 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4427 }
4428
4429 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4430 LinesInDETYRoundedDownToSwath[k] = dml_floor(a: LinesInDETY[k], granularity: SwathHeightY[k]);
4431 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4432
4433 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4434 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4435
4436 if (v->NumberOfActiveSurfaces > 1) {
4437 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4438 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4439 / v->PixelClock[k] / v->VRatio[k];
4440 }
4441
4442 if (BytePerPixelDETC[k] > 0) {
4443 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4444 LinesInDETCRoundedDownToSwath[k] = dml_floor(a: LinesInDETC[k], granularity: SwathHeightC[k]);
4445 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4446 / v->VRatioChroma[k];
4447 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4448 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4449 / v->PixelClock[k];
4450 if (v->NumberOfActiveSurfaces > 1) {
4451 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4452 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4453 / v->PixelClock[k] / v->VRatioChroma[k];
4454 }
4455 ActiveClockChangeLatencyHiding = dml_min(a: ActiveClockChangeLatencyHidingY,
4456 b: ActiveClockChangeLatencyHidingC);
4457 } else {
4458 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4459 }
4460
4461 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4462 - v->Watermark.DRAMClockChangeWatermark;
4463 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4464 - v->Watermark.FCLKChangeWatermark;
4465 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4466
4467 if (v->WritebackEnable[k]) {
4468 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4469 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4470 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4471 if (v->WritebackPixelFormat[k] == dm_444_64)
4472 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4473
4474 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4475 - v->Watermark.WritebackDRAMClockChangeWatermark;
4476
4477 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4478 - v->Watermark.WritebackFCLKChangeWatermark;
4479
4480 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(a: ActiveDRAMClockChangeLatencyMargin[k],
4481 b: WritebackFCLKChangeLatencyMargin);
4482 ActiveFCLKChangeLatencyMargin[k] = dml_min(a: ActiveFCLKChangeLatencyMargin[k],
4483 b: WritebackDRAMClockChangeLatencyMargin);
4484 }
4485 MaxActiveDRAMClockChangeLatencySupported[k] =
4486 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4487 0 :
4488 (ActiveDRAMClockChangeLatencyMargin[k]
4489 + mmSOCParameters.DRAMClockChangeLatency);
4490 }
4491
4492 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4493 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4494 if (i == j ||
4495 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4496 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4497 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4498 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4499 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4500 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4501 (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4502 SynchronizedSurfaces[i][j] = true;
4503 } else {
4504 SynchronizedSurfaces[i][j] = false;
4505 }
4506 }
4507 }
4508
4509 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4510 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4511 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4512 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4513 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4514 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4515 SurfaceWithMinActiveFCLKChangeMargin = k;
4516 }
4517 }
4518
4519 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4520
4521 SameTimingForFCLKChange = true;
4522 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4523 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4524 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4525 (SameTimingForFCLKChange ||
4526 ActiveFCLKChangeLatencyMargin[k] <
4527 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4528 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4529 }
4530 SameTimingForFCLKChange = false;
4531 }
4532 }
4533
4534 if (MinActiveFCLKChangeMargin > 0) {
4535 *FCLKChangeSupport = dm_fclock_change_vactive;
4536 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4537 (PrefetchMode <= 1)) {
4538 *FCLKChangeSupport = dm_fclock_change_vblank;
4539 } else {
4540 *FCLKChangeSupport = dm_fclock_change_unsupported;
4541 }
4542
4543 *USRRetrainingSupport = true;
4544 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4545 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4546 (USRRetrainingLatencyMargin[k] < 0)) {
4547 *USRRetrainingSupport = false;
4548 }
4549 }
4550
4551 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4552 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4553 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4554 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4555 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4556 if (PrefetchMode > 0) {
4557 DRAMClockChangeSupportNumber = 2;
4558 } else if (DRAMClockChangeSupportNumber == 0) {
4559 DRAMClockChangeSupportNumber = 1;
4560 LastSurfaceWithoutMargin = k;
4561 } else if (DRAMClockChangeSupportNumber == 1 &&
4562 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4563 DRAMClockChangeSupportNumber = 2;
4564 }
4565 }
4566 }
4567
4568 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4569 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4570 DRAMClockChangeMethod = 1;
4571 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4572 DRAMClockChangeMethod = 2;
4573 }
4574
4575 if (DRAMClockChangeMethod == 0) {
4576 if (DRAMClockChangeSupportNumber == 0)
4577 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4578 else if (DRAMClockChangeSupportNumber == 1)
4579 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4580 else
4581 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4582 } else if (DRAMClockChangeMethod == 1) {
4583 if (DRAMClockChangeSupportNumber == 0)
4584 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4585 else if (DRAMClockChangeSupportNumber == 1)
4586 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4587 else
4588 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4589 } else {
4590 if (DRAMClockChangeSupportNumber == 0)
4591 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4592 else if (DRAMClockChangeSupportNumber == 1)
4593 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4594 else
4595 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4596 }
4597
4598 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4599 unsigned int dst_y_pstate;
4600 unsigned int src_y_pstate_l;
4601 unsigned int src_y_pstate_c;
4602 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4603
4604 dst_y_pstate = dml_ceil(a: (mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), granularity: 1);
4605 src_y_pstate_l = dml_ceil(a: dst_y_pstate * v->VRatio[k], granularity: SwathHeightY[k]);
4606 src_y_ahead_l = dml_floor(a: DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], granularity: SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4607 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4608
4609#ifdef __DML_VBA_DEBUG__
4610dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
4611dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
4612dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
4613dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
4614dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4615dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
4616dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
4617dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
4618dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]);
4619dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
4620#endif
4621 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4622
4623 if (BytePerPixelDETC[k] > 0) {
4624 src_y_pstate_c = dml_ceil(a: dst_y_pstate * v->VRatioChroma[k], granularity: SwathHeightC[k]);
4625 src_y_ahead_c = dml_floor(a: DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], granularity: SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4626 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4627 SubViewportLinesNeededInMALL[k] = dml_max(a: sub_vp_lines_l, b: sub_vp_lines_c);
4628
4629#ifdef __DML_VBA_DEBUG__
4630dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
4631dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
4632dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4633dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
4634#endif
4635 }
4636 }
4637#ifdef __DML_VBA_DEBUG__
4638 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4639 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4640 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4641 __func__, *MinActiveFCLKChangeLatencySupported);
4642 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4643#endif
4644} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4645
4646double dml32_CalculateWriteBackDISPCLK(
4647 enum source_format_class WritebackPixelFormat,
4648 double PixelClock,
4649 double WritebackHRatio,
4650 double WritebackVRatio,
4651 unsigned int WritebackHTaps,
4652 unsigned int WritebackVTaps,
4653 unsigned int WritebackSourceWidth,
4654 unsigned int WritebackDestinationWidth,
4655 unsigned int HTotal,
4656 unsigned int WritebackLineBufferSize,
4657 double DISPCLKDPPCLKVCOSpeed)
4658{
4659 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4660
4661 DISPCLK_H = PixelClock * dml_ceil(a: WritebackHTaps / 8.0, granularity: 1) / WritebackHRatio;
4662 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(a: WritebackDestinationWidth / 6.0, granularity: 1) + 8.0) / HTotal;
4663 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4664 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4665 return dml32_RoundToDFSGranularity(Clock: dml_max3(a: DISPCLK_H, b: DISPCLK_V, c: DISPCLK_HB), round_up: 1, VCOSpeed: DISPCLKDPPCLKVCOSpeed);
4666}
4667
4668void dml32_CalculateMinAndMaxPrefetchMode(
4669 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4670 unsigned int *MinPrefetchMode,
4671 unsigned int *MaxPrefetchMode)
4672{
4673 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4674 *MinPrefetchMode = 3;
4675 *MaxPrefetchMode = 3;
4676 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4677 *MinPrefetchMode = 2;
4678 *MaxPrefetchMode = 2;
4679 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4680 *MinPrefetchMode = 1;
4681 *MaxPrefetchMode = 1;
4682 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4683 *MinPrefetchMode = 0;
4684 *MaxPrefetchMode = 0;
4685 } else {
4686 *MinPrefetchMode = 0;
4687 *MaxPrefetchMode = 3;
4688 }
4689} // CalculateMinAndMaxPrefetchMode
4690
4691void dml32_CalculatePixelDeliveryTimes(
4692 unsigned int NumberOfActiveSurfaces,
4693 double VRatio[],
4694 double VRatioChroma[],
4695 double VRatioPrefetchY[],
4696 double VRatioPrefetchC[],
4697 unsigned int swath_width_luma_ub[],
4698 unsigned int swath_width_chroma_ub[],
4699 unsigned int DPPPerSurface[],
4700 double HRatio[],
4701 double HRatioChroma[],
4702 double PixelClock[],
4703 double PSCL_THROUGHPUT[],
4704 double PSCL_THROUGHPUT_CHROMA[],
4705 double Dppclk[],
4706 unsigned int BytePerPixelC[],
4707 enum dm_rotation_angle SourceRotation[],
4708 unsigned int NumberOfCursors[],
4709 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
4710 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
4711 unsigned int BlockWidth256BytesY[],
4712 unsigned int BlockHeight256BytesY[],
4713 unsigned int BlockWidth256BytesC[],
4714 unsigned int BlockHeight256BytesC[],
4715
4716 /* Output */
4717 double DisplayPipeLineDeliveryTimeLuma[],
4718 double DisplayPipeLineDeliveryTimeChroma[],
4719 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4720 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4721 double DisplayPipeRequestDeliveryTimeLuma[],
4722 double DisplayPipeRequestDeliveryTimeChroma[],
4723 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4724 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4725 double CursorRequestDeliveryTime[],
4726 double CursorRequestDeliveryTimePrefetch[])
4727{
4728 double req_per_swath_ub;
4729 unsigned int k;
4730
4731 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4732
4733#ifdef __DML_VBA_DEBUG__
4734 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4735 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4736 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4737 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4738 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4739 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4740 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4741 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4742 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4743 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4744 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4745#endif
4746
4747 if (VRatio[k] <= 1) {
4748 DisplayPipeLineDeliveryTimeLuma[k] =
4749 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4750 } else {
4751 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4752 }
4753
4754 if (BytePerPixelC[k] == 0) {
4755 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4756 } else {
4757 if (VRatioChroma[k] <= 1) {
4758 DisplayPipeLineDeliveryTimeChroma[k] =
4759 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4760 } else {
4761 DisplayPipeLineDeliveryTimeChroma[k] =
4762 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4763 }
4764 }
4765
4766 if (VRatioPrefetchY[k] <= 1) {
4767 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4768 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4769 } else {
4770 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4771 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4772 }
4773
4774 if (BytePerPixelC[k] == 0) {
4775 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4776 } else {
4777 if (VRatioPrefetchC[k] <= 1) {
4778 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4779 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4780 } else {
4781 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4782 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4783 }
4784 }
4785#ifdef __DML_VBA_DEBUG__
4786 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4787 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4788 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4789 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4790 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4791 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4792 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4793 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4794#endif
4795 }
4796
4797 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4798 if (!IsVertical(Scan: SourceRotation[k]))
4799 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4800 else
4801 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4802#ifdef __DML_VBA_DEBUG__
4803 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4804#endif
4805
4806 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4807 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4808 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4809 if (BytePerPixelC[k] == 0) {
4810 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4811 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4812 } else {
4813 if (!IsVertical(Scan: SourceRotation[k]))
4814 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4815 else
4816 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4817#ifdef __DML_VBA_DEBUG__
4818 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4819#endif
4820 DisplayPipeRequestDeliveryTimeChroma[k] =
4821 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4822 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4823 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4824 }
4825#ifdef __DML_VBA_DEBUG__
4826 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4827 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4828 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4829 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4830 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4831 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4832 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4833 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4834#endif
4835 }
4836
4837 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4838 unsigned int cursor_req_per_width;
4839
4840 cursor_req_per_width = dml_ceil(a: (double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4841 256.0 / 8.0, granularity: 1.0);
4842 if (NumberOfCursors[k] > 0) {
4843 if (VRatio[k] <= 1) {
4844 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4845 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4846 } else {
4847 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4848 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4849 }
4850 if (VRatioPrefetchY[k] <= 1) {
4851 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4852 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4853 } else {
4854 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4855 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4856 }
4857 } else {
4858 CursorRequestDeliveryTime[k] = 0;
4859 CursorRequestDeliveryTimePrefetch[k] = 0;
4860 }
4861#ifdef __DML_VBA_DEBUG__
4862 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4863 __func__, k, NumberOfCursors[k]);
4864 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4865 __func__, k, CursorRequestDeliveryTime[k]);
4866 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4867 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4868#endif
4869 }
4870} // CalculatePixelDeliveryTimes
4871
4872void dml32_CalculateMetaAndPTETimes(
4873 bool use_one_row_for_frame[],
4874 unsigned int NumberOfActiveSurfaces,
4875 bool GPUVMEnable,
4876 unsigned int MetaChunkSize,
4877 unsigned int MinMetaChunkSizeBytes,
4878 unsigned int HTotal[],
4879 double VRatio[],
4880 double VRatioChroma[],
4881 double DestinationLinesToRequestRowInVBlank[],
4882 double DestinationLinesToRequestRowInImmediateFlip[],
4883 bool DCCEnable[],
4884 double PixelClock[],
4885 unsigned int BytePerPixelY[],
4886 unsigned int BytePerPixelC[],
4887 enum dm_rotation_angle SourceRotation[],
4888 unsigned int dpte_row_height[],
4889 unsigned int dpte_row_height_chroma[],
4890 unsigned int meta_row_width[],
4891 unsigned int meta_row_width_chroma[],
4892 unsigned int meta_row_height[],
4893 unsigned int meta_row_height_chroma[],
4894 unsigned int meta_req_width[],
4895 unsigned int meta_req_width_chroma[],
4896 unsigned int meta_req_height[],
4897 unsigned int meta_req_height_chroma[],
4898 unsigned int dpte_group_bytes[],
4899 unsigned int PTERequestSizeY[],
4900 unsigned int PTERequestSizeC[],
4901 unsigned int PixelPTEReqWidthY[],
4902 unsigned int PixelPTEReqHeightY[],
4903 unsigned int PixelPTEReqWidthC[],
4904 unsigned int PixelPTEReqHeightC[],
4905 unsigned int dpte_row_width_luma_ub[],
4906 unsigned int dpte_row_width_chroma_ub[],
4907
4908 /* Output */
4909 double DST_Y_PER_PTE_ROW_NOM_L[],
4910 double DST_Y_PER_PTE_ROW_NOM_C[],
4911 double DST_Y_PER_META_ROW_NOM_L[],
4912 double DST_Y_PER_META_ROW_NOM_C[],
4913 double TimePerMetaChunkNominal[],
4914 double TimePerChromaMetaChunkNominal[],
4915 double TimePerMetaChunkVBlank[],
4916 double TimePerChromaMetaChunkVBlank[],
4917 double TimePerMetaChunkFlip[],
4918 double TimePerChromaMetaChunkFlip[],
4919 double time_per_pte_group_nom_luma[],
4920 double time_per_pte_group_vblank_luma[],
4921 double time_per_pte_group_flip_luma[],
4922 double time_per_pte_group_nom_chroma[],
4923 double time_per_pte_group_vblank_chroma[],
4924 double time_per_pte_group_flip_chroma[])
4925{
4926 unsigned int meta_chunk_width;
4927 unsigned int min_meta_chunk_width;
4928 unsigned int meta_chunk_per_row_int;
4929 unsigned int meta_row_remainder;
4930 unsigned int meta_chunk_threshold;
4931 unsigned int meta_chunks_per_row_ub;
4932 unsigned int meta_chunk_width_chroma;
4933 unsigned int min_meta_chunk_width_chroma;
4934 unsigned int meta_chunk_per_row_int_chroma;
4935 unsigned int meta_row_remainder_chroma;
4936 unsigned int meta_chunk_threshold_chroma;
4937 unsigned int meta_chunks_per_row_ub_chroma;
4938 unsigned int dpte_group_width_luma;
4939 unsigned int dpte_groups_per_row_luma_ub;
4940 unsigned int dpte_group_width_chroma;
4941 unsigned int dpte_groups_per_row_chroma_ub;
4942 unsigned int k;
4943
4944 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4945 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4946 if (BytePerPixelC[k] == 0)
4947 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4948 else
4949 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4950 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4951 if (BytePerPixelC[k] == 0)
4952 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4953 else
4954 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4955 }
4956
4957 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4958 if (DCCEnable[k] == true) {
4959 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4960 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4961 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4962 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4963 if (!IsVertical(Scan: SourceRotation[k]))
4964 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4965 else
4966 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4967
4968 if (meta_row_remainder <= meta_chunk_threshold)
4969 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4970 else
4971 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4972
4973 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4974 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4975 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4976 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4977 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4978 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4979 if (BytePerPixelC[k] == 0) {
4980 TimePerChromaMetaChunkNominal[k] = 0;
4981 TimePerChromaMetaChunkVBlank[k] = 0;
4982 TimePerChromaMetaChunkFlip[k] = 0;
4983 } else {
4984 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4985 meta_row_height_chroma[k];
4986 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4987 meta_row_height_chroma[k];
4988 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4989 meta_chunk_width_chroma;
4990 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4991 if (!IsVertical(Scan: SourceRotation[k])) {
4992 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4993 meta_req_width_chroma[k];
4994 } else {
4995 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4996 meta_req_height_chroma[k];
4997 }
4998 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4999 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5000 else
5001 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5002
5003 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
5004 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5005 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
5006 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5007 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5008 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5009 }
5010 } else {
5011 TimePerMetaChunkNominal[k] = 0;
5012 TimePerMetaChunkVBlank[k] = 0;
5013 TimePerMetaChunkFlip[k] = 0;
5014 TimePerChromaMetaChunkNominal[k] = 0;
5015 TimePerChromaMetaChunkVBlank[k] = 0;
5016 TimePerChromaMetaChunkFlip[k] = 0;
5017 }
5018 }
5019
5020 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5021 if (GPUVMEnable == true) {
5022 if (!IsVertical(Scan: SourceRotation[k])) {
5023 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5024 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5025 } else {
5026 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5027 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5028 }
5029
5030 if (use_one_row_for_frame[k]) {
5031 dpte_groups_per_row_luma_ub = dml_ceil(a: (double) dpte_row_width_luma_ub[k] /
5032 (double) dpte_group_width_luma / 2.0, granularity: 1.0);
5033 } else {
5034 dpte_groups_per_row_luma_ub = dml_ceil(a: (double) dpte_row_width_luma_ub[k] /
5035 (double) dpte_group_width_luma, granularity: 1.0);
5036 }
5037#ifdef __DML_VBA_DEBUG__
5038 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
5039 __func__, k, use_one_row_for_frame[k]);
5040 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
5041 __func__, k, dpte_group_bytes[k]);
5042 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
5043 __func__, k, PTERequestSizeY[k]);
5044 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
5045 __func__, k, PixelPTEReqWidthY[k]);
5046 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
5047 __func__, k, PixelPTEReqHeightY[k]);
5048 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
5049 __func__, k, dpte_row_width_luma_ub[k]);
5050 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
5051 __func__, k, dpte_group_width_luma);
5052 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
5053 __func__, k, dpte_groups_per_row_luma_ub);
5054#endif
5055
5056 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5057 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5058 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5059 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5060 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5061 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5062 if (BytePerPixelC[k] == 0) {
5063 time_per_pte_group_nom_chroma[k] = 0;
5064 time_per_pte_group_vblank_chroma[k] = 0;
5065 time_per_pte_group_flip_chroma[k] = 0;
5066 } else {
5067 if (!IsVertical(Scan: SourceRotation[k])) {
5068 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5069 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5070 } else {
5071 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5072 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5073 }
5074
5075 if (use_one_row_for_frame[k]) {
5076 dpte_groups_per_row_chroma_ub = dml_ceil(a: (double) dpte_row_width_chroma_ub[k] /
5077 (double) dpte_group_width_chroma / 2.0, granularity: 1.0);
5078 } else {
5079 dpte_groups_per_row_chroma_ub = dml_ceil(a: (double) dpte_row_width_chroma_ub[k] /
5080 (double) dpte_group_width_chroma, granularity: 1.0);
5081 }
5082#ifdef __DML_VBA_DEBUG__
5083 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
5084 __func__, k, dpte_row_width_chroma_ub[k]);
5085 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
5086 __func__, k, dpte_group_width_chroma);
5087 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
5088 __func__, k, dpte_groups_per_row_chroma_ub);
5089#endif
5090 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5091 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5092 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5093 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5094 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5095 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5096 }
5097 } else {
5098 time_per_pte_group_nom_luma[k] = 0;
5099 time_per_pte_group_vblank_luma[k] = 0;
5100 time_per_pte_group_flip_luma[k] = 0;
5101 time_per_pte_group_nom_chroma[k] = 0;
5102 time_per_pte_group_vblank_chroma[k] = 0;
5103 time_per_pte_group_flip_chroma[k] = 0;
5104 }
5105#ifdef __DML_VBA_DEBUG__
5106 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
5107 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5108 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
5109 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5110 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
5111 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5112 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
5113 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5114 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
5115 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5116 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
5117 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5118 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
5119 __func__, k, TimePerMetaChunkNominal[k]);
5120 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
5121 __func__, k, TimePerMetaChunkVBlank[k]);
5122 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
5123 __func__, k, TimePerMetaChunkFlip[k]);
5124 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
5125 __func__, k, TimePerChromaMetaChunkNominal[k]);
5126 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
5127 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5128 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
5129 __func__, k, TimePerChromaMetaChunkFlip[k]);
5130 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
5131 __func__, k, time_per_pte_group_nom_luma[k]);
5132 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
5133 __func__, k, time_per_pte_group_vblank_luma[k]);
5134 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
5135 __func__, k, time_per_pte_group_flip_luma[k]);
5136 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
5137 __func__, k, time_per_pte_group_nom_chroma[k]);
5138 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5139 __func__, k, time_per_pte_group_vblank_chroma[k]);
5140 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
5141 __func__, k, time_per_pte_group_flip_chroma[k]);
5142#endif
5143 }
5144} // CalculateMetaAndPTETimes
5145
5146void dml32_CalculateVMGroupAndRequestTimes(
5147 unsigned int NumberOfActiveSurfaces,
5148 bool GPUVMEnable,
5149 unsigned int GPUVMMaxPageTableLevels,
5150 unsigned int HTotal[],
5151 unsigned int BytePerPixelC[],
5152 double DestinationLinesToRequestVMInVBlank[],
5153 double DestinationLinesToRequestVMInImmediateFlip[],
5154 bool DCCEnable[],
5155 double PixelClock[],
5156 unsigned int dpte_row_width_luma_ub[],
5157 unsigned int dpte_row_width_chroma_ub[],
5158 unsigned int vm_group_bytes[],
5159 unsigned int dpde0_bytes_per_frame_ub_l[],
5160 unsigned int dpde0_bytes_per_frame_ub_c[],
5161 unsigned int meta_pte_bytes_per_frame_ub_l[],
5162 unsigned int meta_pte_bytes_per_frame_ub_c[],
5163
5164 /* Output */
5165 double TimePerVMGroupVBlank[],
5166 double TimePerVMGroupFlip[],
5167 double TimePerVMRequestVBlank[],
5168 double TimePerVMRequestFlip[])
5169{
5170 unsigned int k;
5171 unsigned int num_group_per_lower_vm_stage;
5172 unsigned int num_req_per_lower_vm_stage;
5173
5174#ifdef __DML_VBA_DEBUG__
5175 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5176 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5177#endif
5178 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5179
5180#ifdef __DML_VBA_DEBUG__
5181 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5182 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5183 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5184 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5185 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5186 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5187 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5188 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5189 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5190 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5191#endif
5192
5193 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5194 if (DCCEnable[k] == false) {
5195 if (BytePerPixelC[k] > 0) {
5196 num_group_per_lower_vm_stage = dml_ceil(
5197 a: (double) (dpde0_bytes_per_frame_ub_l[k]) /
5198 (double) (vm_group_bytes[k]), granularity: 1.0) +
5199 dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_c[k]) /
5200 (double) (vm_group_bytes[k]), granularity: 1.0);
5201 } else {
5202 num_group_per_lower_vm_stage = dml_ceil(
5203 a: (double) (dpde0_bytes_per_frame_ub_l[k]) /
5204 (double) (vm_group_bytes[k]), granularity: 1.0);
5205 }
5206 } else {
5207 if (GPUVMMaxPageTableLevels == 1) {
5208 if (BytePerPixelC[k] > 0) {
5209 num_group_per_lower_vm_stage = dml_ceil(
5210 a: (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5211 (double) (vm_group_bytes[k]), granularity: 1.0) +
5212 dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_c[k]) /
5213 (double) (vm_group_bytes[k]), granularity: 1.0);
5214 } else {
5215 num_group_per_lower_vm_stage = dml_ceil(
5216 a: (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5217 (double) (vm_group_bytes[k]), granularity: 1.0);
5218 }
5219 } else {
5220 if (BytePerPixelC[k] > 0) {
5221 num_group_per_lower_vm_stage = 2 + dml_ceil(
5222 a: (double) (dpde0_bytes_per_frame_ub_l[k]) /
5223 (double) (vm_group_bytes[k]), granularity: 1) +
5224 dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_c[k]) /
5225 (double) (vm_group_bytes[k]), granularity: 1) +
5226 dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5227 (double) (vm_group_bytes[k]), granularity: 1) +
5228 dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_c[k]) /
5229 (double) (vm_group_bytes[k]), granularity: 1);
5230 } else {
5231 num_group_per_lower_vm_stage = 1 + dml_ceil(
5232 a: (double) (dpde0_bytes_per_frame_ub_l[k]) /
5233 (double) (vm_group_bytes[k]), granularity: 1) + dml_ceil(
5234 a: (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5235 (double) (vm_group_bytes[k]), granularity: 1);
5236 }
5237 }
5238 }
5239
5240 if (DCCEnable[k] == false) {
5241 if (BytePerPixelC[k] > 0) {
5242 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5243 dpde0_bytes_per_frame_ub_c[k] / 64;
5244 } else {
5245 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5246 }
5247 } else {
5248 if (GPUVMMaxPageTableLevels == 1) {
5249 if (BytePerPixelC[k] > 0) {
5250 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5251 meta_pte_bytes_per_frame_ub_c[k] / 64;
5252 } else {
5253 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5254 }
5255 } else {
5256 if (BytePerPixelC[k] > 0) {
5257 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5258 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5259 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5260 meta_pte_bytes_per_frame_ub_c[k] / 64;
5261 } else {
5262 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5263 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5264 }
5265 }
5266 }
5267
5268 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5269 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5270 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5271 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5272 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5273 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5274 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5275 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5276
5277 if (GPUVMMaxPageTableLevels > 2) {
5278 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5279 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5280 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5281 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5282 }
5283
5284 } else {
5285 TimePerVMGroupVBlank[k] = 0;
5286 TimePerVMGroupFlip[k] = 0;
5287 TimePerVMRequestVBlank[k] = 0;
5288 TimePerVMRequestFlip[k] = 0;
5289 }
5290
5291#ifdef __DML_VBA_DEBUG__
5292 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5293 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5294 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5295 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5296#endif
5297 }
5298} // CalculateVMGroupAndRequestTimes
5299
5300void dml32_CalculateDCCConfiguration(
5301 bool DCCEnabled,
5302 bool DCCProgrammingAssumesScanDirectionUnknown,
5303 enum source_format_class SourcePixelFormat,
5304 unsigned int SurfaceWidthLuma,
5305 unsigned int SurfaceWidthChroma,
5306 unsigned int SurfaceHeightLuma,
5307 unsigned int SurfaceHeightChroma,
5308 unsigned int nomDETInKByte,
5309 unsigned int RequestHeight256ByteLuma,
5310 unsigned int RequestHeight256ByteChroma,
5311 enum dm_swizzle_mode TilingFormat,
5312 unsigned int BytePerPixelY,
5313 unsigned int BytePerPixelC,
5314 double BytePerPixelDETY,
5315 double BytePerPixelDETC,
5316 enum dm_rotation_angle SourceRotation,
5317 /* Output */
5318 unsigned int *MaxUncompressedBlockLuma,
5319 unsigned int *MaxUncompressedBlockChroma,
5320 unsigned int *MaxCompressedBlockLuma,
5321 unsigned int *MaxCompressedBlockChroma,
5322 unsigned int *IndependentBlockLuma,
5323 unsigned int *IndependentBlockChroma)
5324{
5325 typedef enum {
5326 REQ_256Bytes,
5327 REQ_128BytesNonContiguous,
5328 REQ_128BytesContiguous,
5329 REQ_NA
5330 } RequestType;
5331
5332 RequestType RequestLuma;
5333 RequestType RequestChroma;
5334
5335 unsigned int segment_order_horz_contiguous_luma;
5336 unsigned int segment_order_horz_contiguous_chroma;
5337 unsigned int segment_order_vert_contiguous_luma;
5338 unsigned int segment_order_vert_contiguous_chroma;
5339 unsigned int req128_horz_wc_l;
5340 unsigned int req128_horz_wc_c;
5341 unsigned int req128_vert_wc_l;
5342 unsigned int req128_vert_wc_c;
5343 unsigned int MAS_vp_horz_limit;
5344 unsigned int MAS_vp_vert_limit;
5345 unsigned int max_vp_horz_width;
5346 unsigned int max_vp_vert_height;
5347 unsigned int eff_surf_width_l;
5348 unsigned int eff_surf_width_c;
5349 unsigned int eff_surf_height_l;
5350 unsigned int eff_surf_height_c;
5351 unsigned int full_swath_bytes_horz_wc_l;
5352 unsigned int full_swath_bytes_horz_wc_c;
5353 unsigned int full_swath_bytes_vert_wc_l;
5354 unsigned int full_swath_bytes_vert_wc_c;
5355 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5356
5357 unsigned int yuv420;
5358 unsigned int horz_div_l;
5359 unsigned int horz_div_c;
5360 unsigned int vert_div_l;
5361 unsigned int vert_div_c;
5362
5363 unsigned int swath_buf_size;
5364 double detile_buf_vp_horz_limit;
5365 double detile_buf_vp_vert_limit;
5366
5367 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5368 SourcePixelFormat == dm_420_12) ? 1 : 0);
5369 horz_div_l = 1;
5370 horz_div_c = 1;
5371 vert_div_l = 1;
5372 vert_div_c = 1;
5373
5374 if (BytePerPixelY == 1)
5375 vert_div_l = 0;
5376 if (BytePerPixelC == 1)
5377 vert_div_c = 0;
5378
5379 if (BytePerPixelC == 0) {
5380 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5381 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5382 BytePerPixelY / (1 + horz_div_l));
5383 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5384 (1 + vert_div_l));
5385 } else {
5386 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5387 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5388 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5389 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5390 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5391 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5392 (1 + vert_div_c) / (1 + yuv420));
5393 }
5394
5395 if (SourcePixelFormat == dm_420_10) {
5396 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5397 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5398 }
5399
5400 detile_buf_vp_horz_limit = dml_floor(a: detile_buf_vp_horz_limit - 1, granularity: 16);
5401 detile_buf_vp_vert_limit = dml_floor(a: detile_buf_vp_vert_limit - 1, granularity: 16);
5402
5403 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5404 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5405 max_vp_horz_width = dml_min(a: (double) MAS_vp_horz_limit, b: detile_buf_vp_horz_limit);
5406 max_vp_vert_height = dml_min(a: (double) MAS_vp_vert_limit, b: detile_buf_vp_vert_limit);
5407 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5408 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5409 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5410 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5411
5412 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5413 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5414 if (BytePerPixelC > 0) {
5415 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5416 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5417 } else {
5418 full_swath_bytes_horz_wc_c = 0;
5419 full_swath_bytes_vert_wc_c = 0;
5420 }
5421
5422 if (SourcePixelFormat == dm_420_10) {
5423 full_swath_bytes_horz_wc_l = dml_ceil(a: (double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, granularity: 256.0);
5424 full_swath_bytes_horz_wc_c = dml_ceil(a: (double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, granularity: 256.0);
5425 full_swath_bytes_vert_wc_l = dml_ceil(a: (double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, granularity: 256.0);
5426 full_swath_bytes_vert_wc_c = dml_ceil(a: (double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, granularity: 256.0);
5427 }
5428
5429 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5430 req128_horz_wc_l = 0;
5431 req128_horz_wc_c = 0;
5432 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5433 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5434 req128_horz_wc_l = 0;
5435 req128_horz_wc_c = 1;
5436 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5437 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5438 req128_horz_wc_l = 1;
5439 req128_horz_wc_c = 0;
5440 } else {
5441 req128_horz_wc_l = 1;
5442 req128_horz_wc_c = 1;
5443 }
5444
5445 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5446 req128_vert_wc_l = 0;
5447 req128_vert_wc_c = 0;
5448 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5449 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5450 req128_vert_wc_l = 0;
5451 req128_vert_wc_c = 1;
5452 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5453 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5454 req128_vert_wc_l = 1;
5455 req128_vert_wc_c = 0;
5456 } else {
5457 req128_vert_wc_l = 1;
5458 req128_vert_wc_c = 1;
5459 }
5460
5461 if (BytePerPixelY == 2) {
5462 segment_order_horz_contiguous_luma = 0;
5463 segment_order_vert_contiguous_luma = 1;
5464 } else {
5465 segment_order_horz_contiguous_luma = 1;
5466 segment_order_vert_contiguous_luma = 0;
5467 }
5468
5469 if (BytePerPixelC == 2) {
5470 segment_order_horz_contiguous_chroma = 0;
5471 segment_order_vert_contiguous_chroma = 1;
5472 } else {
5473 segment_order_horz_contiguous_chroma = 1;
5474 segment_order_vert_contiguous_chroma = 0;
5475 }
5476#ifdef __DML_VBA_DEBUG__
5477 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5478 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5479 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5480 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5481 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5482 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5483 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5484 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5485 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5486 __func__, segment_order_horz_contiguous_chroma);
5487#endif
5488
5489 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5490 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5491 RequestLuma = REQ_256Bytes;
5492 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5493 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5494 RequestLuma = REQ_128BytesNonContiguous;
5495 else
5496 RequestLuma = REQ_128BytesContiguous;
5497
5498 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5499 RequestChroma = REQ_256Bytes;
5500 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5501 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5502 RequestChroma = REQ_128BytesNonContiguous;
5503 else
5504 RequestChroma = REQ_128BytesContiguous;
5505
5506 } else if (!IsVertical(Scan: SourceRotation)) {
5507 if (req128_horz_wc_l == 0)
5508 RequestLuma = REQ_256Bytes;
5509 else if (segment_order_horz_contiguous_luma == 0)
5510 RequestLuma = REQ_128BytesNonContiguous;
5511 else
5512 RequestLuma = REQ_128BytesContiguous;
5513
5514 if (req128_horz_wc_c == 0)
5515 RequestChroma = REQ_256Bytes;
5516 else if (segment_order_horz_contiguous_chroma == 0)
5517 RequestChroma = REQ_128BytesNonContiguous;
5518 else
5519 RequestChroma = REQ_128BytesContiguous;
5520
5521 } else {
5522 if (req128_vert_wc_l == 0)
5523 RequestLuma = REQ_256Bytes;
5524 else if (segment_order_vert_contiguous_luma == 0)
5525 RequestLuma = REQ_128BytesNonContiguous;
5526 else
5527 RequestLuma = REQ_128BytesContiguous;
5528
5529 if (req128_vert_wc_c == 0)
5530 RequestChroma = REQ_256Bytes;
5531 else if (segment_order_vert_contiguous_chroma == 0)
5532 RequestChroma = REQ_128BytesNonContiguous;
5533 else
5534 RequestChroma = REQ_128BytesContiguous;
5535 }
5536
5537 if (RequestLuma == REQ_256Bytes) {
5538 *MaxUncompressedBlockLuma = 256;
5539 *MaxCompressedBlockLuma = 256;
5540 *IndependentBlockLuma = 0;
5541 } else if (RequestLuma == REQ_128BytesContiguous) {
5542 *MaxUncompressedBlockLuma = 256;
5543 *MaxCompressedBlockLuma = 128;
5544 *IndependentBlockLuma = 128;
5545 } else {
5546 *MaxUncompressedBlockLuma = 256;
5547 *MaxCompressedBlockLuma = 64;
5548 *IndependentBlockLuma = 64;
5549 }
5550
5551 if (RequestChroma == REQ_256Bytes) {
5552 *MaxUncompressedBlockChroma = 256;
5553 *MaxCompressedBlockChroma = 256;
5554 *IndependentBlockChroma = 0;
5555 } else if (RequestChroma == REQ_128BytesContiguous) {
5556 *MaxUncompressedBlockChroma = 256;
5557 *MaxCompressedBlockChroma = 128;
5558 *IndependentBlockChroma = 128;
5559 } else {
5560 *MaxUncompressedBlockChroma = 256;
5561 *MaxCompressedBlockChroma = 64;
5562 *IndependentBlockChroma = 64;
5563 }
5564
5565 if (DCCEnabled != true || BytePerPixelC == 0) {
5566 *MaxUncompressedBlockChroma = 0;
5567 *MaxCompressedBlockChroma = 0;
5568 *IndependentBlockChroma = 0;
5569 }
5570
5571 if (DCCEnabled != true) {
5572 *MaxUncompressedBlockLuma = 0;
5573 *MaxCompressedBlockLuma = 0;
5574 *IndependentBlockLuma = 0;
5575 }
5576
5577#ifdef __DML_VBA_DEBUG__
5578 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5579 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5580 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5581 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5582 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5583 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5584#endif
5585
5586} // CalculateDCCConfiguration
5587
5588void dml32_CalculateStutterEfficiency(
5589 unsigned int CompressedBufferSizeInkByte,
5590 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5591 bool UnboundedRequestEnabled,
5592 unsigned int MetaFIFOSizeInKEntries,
5593 unsigned int ZeroSizeBufferEntries,
5594 unsigned int PixelChunkSizeInKByte,
5595 unsigned int NumberOfActiveSurfaces,
5596 unsigned int ROBBufferSizeInKByte,
5597 double TotalDataReadBandwidth,
5598 double DCFCLK,
5599 double ReturnBW,
5600 unsigned int CompbufReservedSpace64B,
5601 unsigned int CompbufReservedSpaceZs,
5602 double SRExitTime,
5603 double SRExitZ8Time,
5604 bool SynchronizeTimingsFinal,
5605 unsigned int BlendingAndTiming[],
5606 double StutterEnterPlusExitWatermark,
5607 double Z8StutterEnterPlusExitWatermark,
5608 bool ProgressiveToInterlaceUnitInOPP,
5609 bool Interlace[],
5610 double MinTTUVBlank[],
5611 unsigned int DPPPerSurface[],
5612 unsigned int DETBufferSizeY[],
5613 unsigned int BytePerPixelY[],
5614 double BytePerPixelDETY[],
5615 double SwathWidthY[],
5616 unsigned int SwathHeightY[],
5617 unsigned int SwathHeightC[],
5618 double NetDCCRateLuma[],
5619 double NetDCCRateChroma[],
5620 double DCCFractionOfZeroSizeRequestsLuma[],
5621 double DCCFractionOfZeroSizeRequestsChroma[],
5622 unsigned int HTotal[],
5623 unsigned int VTotal[],
5624 double PixelClock[],
5625 double VRatio[],
5626 enum dm_rotation_angle SourceRotation[],
5627 unsigned int BlockHeight256BytesY[],
5628 unsigned int BlockWidth256BytesY[],
5629 unsigned int BlockHeight256BytesC[],
5630 unsigned int BlockWidth256BytesC[],
5631 unsigned int DCCYMaxUncompressedBlock[],
5632 unsigned int DCCCMaxUncompressedBlock[],
5633 unsigned int VActive[],
5634 bool DCCEnable[],
5635 bool WritebackEnable[],
5636 double ReadBandwidthSurfaceLuma[],
5637 double ReadBandwidthSurfaceChroma[],
5638 double meta_row_bw[],
5639 double dpte_row_bw[],
5640
5641 /* Output */
5642 double *StutterEfficiencyNotIncludingVBlank,
5643 double *StutterEfficiency,
5644 unsigned int *NumberOfStutterBurstsPerFrame,
5645 double *Z8StutterEfficiencyNotIncludingVBlank,
5646 double *Z8StutterEfficiency,
5647 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5648 double *StutterPeriod,
5649 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5650{
5651
5652 bool FoundCriticalSurface = false;
5653 unsigned int SwathSizeCriticalSurface = 0;
5654 unsigned int LastChunkOfSwathSize;
5655 unsigned int MissingPartOfLastSwathOfDETSize;
5656 double LastZ8StutterPeriod = 0.0;
5657 double LastStutterPeriod = 0.0;
5658 unsigned int TotalNumberOfActiveOTG = 0;
5659 double doublePixelClock;
5660 unsigned int doubleHTotal;
5661 unsigned int doubleVTotal;
5662 bool SameTiming = true;
5663 double DETBufferingTimeY;
5664 double SwathWidthYCriticalSurface = 0.0;
5665 double SwathHeightYCriticalSurface = 0.0;
5666 double VActiveTimeCriticalSurface = 0.0;
5667 double FrameTimeCriticalSurface = 0.0;
5668 unsigned int BytePerPixelYCriticalSurface = 0;
5669 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5670 unsigned int DETBufferSizeYCriticalSurface = 0;
5671 double MinTTUVBlankCriticalSurface = 0.0;
5672 unsigned int BlockWidth256BytesYCriticalSurface = 0;
5673 bool doublePlaneCriticalSurface = 0;
5674 bool doublePipeCriticalSurface = 0;
5675 double TotalCompressedReadBandwidth;
5676 double TotalRowReadBandwidth;
5677 double AverageDCCCompressionRate;
5678 double EffectiveCompressedBufferSize;
5679 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5680 double StutterBurstTime;
5681 unsigned int TotalActiveWriteback;
5682 double LinesInDETY;
5683 double LinesInDETYRoundedDownToSwath;
5684 double MaximumEffectiveCompressionLuma;
5685 double MaximumEffectiveCompressionChroma;
5686 double TotalZeroSizeRequestReadBandwidth;
5687 double TotalZeroSizeCompressedReadBandwidth;
5688 double AverageDCCZeroSizeFraction;
5689 double AverageZeroSizeCompressionRate;
5690 unsigned int k;
5691
5692 TotalZeroSizeRequestReadBandwidth = 0;
5693 TotalZeroSizeCompressedReadBandwidth = 0;
5694 TotalRowReadBandwidth = 0;
5695 TotalCompressedReadBandwidth = 0;
5696
5697 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5698 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5699 if (DCCEnable[k] == true) {
5700 if ((IsVertical(Scan: SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5701 || (!IsVertical(Scan: SourceRotation[k])
5702 && BlockHeight256BytesY[k] > SwathHeightY[k])
5703 || DCCYMaxUncompressedBlock[k] < 256) {
5704 MaximumEffectiveCompressionLuma = 2;
5705 } else {
5706 MaximumEffectiveCompressionLuma = 4;
5707 }
5708 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5709 + ReadBandwidthSurfaceLuma[k]
5710 / dml_min(a: NetDCCRateLuma[k],
5711 b: MaximumEffectiveCompressionLuma);
5712#ifdef __DML_VBA_DEBUG__
5713 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5714 __func__, k, ReadBandwidthSurfaceLuma[k]);
5715 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5716 __func__, k, NetDCCRateLuma[k]);
5717 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5718 __func__, k, MaximumEffectiveCompressionLuma);
5719#endif
5720 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5721 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5722 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5723 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5724 / MaximumEffectiveCompressionLuma;
5725
5726 if (ReadBandwidthSurfaceChroma[k] > 0) {
5727 if ((IsVertical(Scan: SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5728 || (!IsVertical(Scan: SourceRotation[k])
5729 && BlockHeight256BytesC[k] > SwathHeightC[k])
5730 || DCCCMaxUncompressedBlock[k] < 256) {
5731 MaximumEffectiveCompressionChroma = 2;
5732 } else {
5733 MaximumEffectiveCompressionChroma = 4;
5734 }
5735 TotalCompressedReadBandwidth =
5736 TotalCompressedReadBandwidth
5737 + ReadBandwidthSurfaceChroma[k]
5738 / dml_min(a: NetDCCRateChroma[k],
5739 b: MaximumEffectiveCompressionChroma);
5740#ifdef __DML_VBA_DEBUG__
5741 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5742 __func__, k, ReadBandwidthSurfaceChroma[k]);
5743 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5744 __func__, k, NetDCCRateChroma[k]);
5745 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5746 __func__, k, MaximumEffectiveCompressionChroma);
5747#endif
5748 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5749 + ReadBandwidthSurfaceChroma[k]
5750 * DCCFractionOfZeroSizeRequestsChroma[k];
5751 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5752 + ReadBandwidthSurfaceChroma[k]
5753 * DCCFractionOfZeroSizeRequestsChroma[k]
5754 / MaximumEffectiveCompressionChroma;
5755 }
5756 } else {
5757 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5758 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5759 }
5760 TotalRowReadBandwidth = TotalRowReadBandwidth
5761 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5762 }
5763 }
5764
5765 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5766 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5767
5768#ifdef __DML_VBA_DEBUG__
5769 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5770 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5771 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5772 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5773 __func__, TotalZeroSizeCompressedReadBandwidth);
5774 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5775 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5776 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5777 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5778 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5779 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5780 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5781#endif
5782 if (AverageDCCZeroSizeFraction == 1) {
5783 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5784 / TotalZeroSizeCompressedReadBandwidth;
5785 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5786 * AverageZeroSizeCompressionRate
5787 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5788 * AverageZeroSizeCompressionRate;
5789 } else if (AverageDCCZeroSizeFraction > 0) {
5790 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5791 / TotalZeroSizeCompressedReadBandwidth;
5792 EffectiveCompressedBufferSize = dml_min(
5793 a: (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5794 b: (double) MetaFIFOSizeInKEntries * 1024 * 64
5795 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5796 + 1 / AverageDCCCompressionRate))
5797 + dml_min(a: ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5798 * AverageDCCCompressionRate,
5799 b: ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5800 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5801
5802#ifdef __DML_VBA_DEBUG__
5803 dml_print("DML::%s: min 1 = %f\n", __func__,
5804 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5805 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5806 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5807 AverageDCCCompressionRate));
5808 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5809 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5810 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5811 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5812#endif
5813 } else {
5814 EffectiveCompressedBufferSize = dml_min(
5815 a: (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5816 b: (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5817 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5818 * AverageDCCCompressionRate;
5819
5820#ifdef __DML_VBA_DEBUG__
5821 dml_print("DML::%s: min 1 = %f\n", __func__,
5822 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5823 dml_print("DML::%s: min 2 = %f\n", __func__,
5824 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5825#endif
5826 }
5827
5828#ifdef __DML_VBA_DEBUG__
5829 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5830 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5831 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5832#endif
5833
5834 *StutterPeriod = 0;
5835
5836 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5837 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5838 LinesInDETY = ((double) DETBufferSizeY[k]
5839 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5840 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5841 / BytePerPixelDETY[k] / SwathWidthY[k];
5842 LinesInDETYRoundedDownToSwath = dml_floor(a: LinesInDETY, granularity: SwathHeightY[k]);
5843 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5844 / VRatio[k];
5845#ifdef __DML_VBA_DEBUG__
5846 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5847 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5848 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5849 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5850 __func__, k, ReadBandwidthSurfaceLuma[k]);
5851 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5852 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5853 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5854 __func__, k, LinesInDETYRoundedDownToSwath);
5855 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5856 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5857 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5858 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5859 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5860#endif
5861
5862 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5863 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5864
5865 FoundCriticalSurface = true;
5866 *StutterPeriod = DETBufferingTimeY;
5867 FrameTimeCriticalSurface = (
5868 isInterlaceTiming ?
5869 dml_floor(a: (double) VTotal[k] / 2.0, granularity: 1.0) : VTotal[k])
5870 * (double) HTotal[k] / PixelClock[k];
5871 VActiveTimeCriticalSurface = (
5872 isInterlaceTiming ?
5873 dml_floor(a: (double) VActive[k] / 2.0, granularity: 1.0) : VActive[k])
5874 * (double) HTotal[k] / PixelClock[k];
5875 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5876 SwathWidthYCriticalSurface = SwathWidthY[k];
5877 SwathHeightYCriticalSurface = SwathHeightY[k];
5878 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5879 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5880 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5881 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5882 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5883 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5884 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5885
5886#ifdef __DML_VBA_DEBUG__
5887 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
5888 __func__, k, FoundCriticalSurface);
5889 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
5890 __func__, k, *StutterPeriod);
5891 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
5892 __func__, k, MinTTUVBlankCriticalSurface);
5893 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
5894 __func__, k, FrameTimeCriticalSurface);
5895 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
5896 __func__, k, VActiveTimeCriticalSurface);
5897 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
5898 __func__, k, BytePerPixelYCriticalSurface);
5899 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
5900 __func__, k, SwathWidthYCriticalSurface);
5901 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
5902 __func__, k, SwathHeightYCriticalSurface);
5903 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
5904 __func__, k, BlockWidth256BytesYCriticalSurface);
5905 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
5906 __func__, k, doublePlaneCriticalSurface);
5907 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
5908 __func__, k, doublePipeCriticalSurface);
5909 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5910 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5911#endif
5912 }
5913 }
5914 }
5915
5916 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(a: *StutterPeriod * TotalDataReadBandwidth,
5917 b: EffectiveCompressedBufferSize);
5918#ifdef __DML_VBA_DEBUG__
5919 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5920 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5921 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5922 __func__, *StutterPeriod * TotalDataReadBandwidth);
5923 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5924 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5925 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5926 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5927 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5928 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5929 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5930#endif
5931
5932 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5933 / ReturnBW
5934 + (*StutterPeriod * TotalDataReadBandwidth
5935 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5936 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5937#ifdef __DML_VBA_DEBUG__
5938 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5939 AverageDCCCompressionRate / ReturnBW);
5940 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5941 __func__, (*StutterPeriod * TotalDataReadBandwidth));
5942 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5943 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5944 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5945 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5946#endif
5947 StutterBurstTime = dml_max(a: StutterBurstTime,
5948 b: LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5949 * SwathWidthYCriticalSurface / ReturnBW);
5950
5951#ifdef __DML_VBA_DEBUG__
5952 dml_print("DML::%s: Time to finish residue swath=%f\n",
5953 __func__,
5954 LinesToFinishSwathTransferStutterCriticalSurface *
5955 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5956#endif
5957
5958 TotalActiveWriteback = 0;
5959 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5960 if (WritebackEnable[k])
5961 TotalActiveWriteback = TotalActiveWriteback + 1;
5962 }
5963
5964 if (TotalActiveWriteback == 0) {
5965#ifdef __DML_VBA_DEBUG__
5966 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5967 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5968 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5969 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5970#endif
5971 *StutterEfficiencyNotIncludingVBlank = dml_max(a: 0.,
5972 b: 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5973 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(a: 0.,
5974 b: 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5975 *NumberOfStutterBurstsPerFrame = (
5976 *StutterEfficiencyNotIncludingVBlank > 0 ?
5977 dml_ceil(a: VActiveTimeCriticalSurface / *StutterPeriod, granularity: 1) : 0);
5978 *Z8NumberOfStutterBurstsPerFrame = (
5979 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5980 dml_ceil(a: VActiveTimeCriticalSurface / *StutterPeriod, granularity: 1) : 0);
5981 } else {
5982 *StutterEfficiencyNotIncludingVBlank = 0.;
5983 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5984 *NumberOfStutterBurstsPerFrame = 0;
5985 *Z8NumberOfStutterBurstsPerFrame = 0;
5986 }
5987#ifdef __DML_VBA_DEBUG__
5988 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5989 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5990 __func__, *StutterEfficiencyNotIncludingVBlank);
5991 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5992 __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5993 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5994 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5995#endif
5996
5997 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5998 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5999 if (BlendingAndTiming[k] == k) {
6000 if (TotalNumberOfActiveOTG == 0) {
6001 doublePixelClock = PixelClock[k];
6002 doubleHTotal = HTotal[k];
6003 doubleVTotal = VTotal[k];
6004 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
6005 || doubleVTotal != VTotal[k]) {
6006 SameTiming = false;
6007 }
6008 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6009 }
6010 }
6011 }
6012
6013 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6014 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6015
6016 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6017 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6018 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6019 + StutterBurstTime * VActiveTimeCriticalSurface
6020 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6021 } else {
6022 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6023 }
6024 } else {
6025 *StutterEfficiency = 0;
6026 }
6027
6028 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6029 LastZ8StutterPeriod = VActiveTimeCriticalSurface
6030 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6031 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6032 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6033 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6034 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6035 } else {
6036 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6037 }
6038 } else {
6039 *Z8StutterEfficiency = 0.;
6040 }
6041
6042#ifdef __DML_VBA_DEBUG__
6043 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6044 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6045 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6046 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6047 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6048 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6049 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6050 __func__, *StutterEfficiencyNotIncludingVBlank);
6051 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6052#endif
6053
6054 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6055 * dml_ceil(a: SwathWidthYCriticalSurface, granularity: BlockWidth256BytesYCriticalSurface);
6056 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6057 MissingPartOfLastSwathOfDETSize = dml_ceil(a: DETBufferSizeYCriticalSurface, granularity: SwathSizeCriticalSurface)
6058 - DETBufferSizeYCriticalSurface;
6059
6060 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6061 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6062 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6063 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6064
6065#ifdef __DML_VBA_DEBUG__
6066 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6067 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6068 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6069 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6070#endif
6071} // CalculateStutterEfficiency
6072
6073void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6074 unsigned int ConfigReturnBufferSizeInKByte,
6075 unsigned int ROBBufferSizeInKByte,
6076 unsigned int MaxNumDPP,
6077 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6078 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
6079
6080 /* Output */
6081 unsigned int *MaxTotalDETInKByte,
6082 unsigned int *nomDETInKByte,
6083 unsigned int *MinCompressedBufferSizeInKByte)
6084{
6085 bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
6086 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
6087
6088 *MaxTotalDETInKByte = dml_ceil(a: ((double)ConfigReturnBufferSizeInKByte +
6089 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, granularity: 64);
6090 *nomDETInKByte = dml_floor(a: (double) *MaxTotalDETInKByte / (double) MaxNumDPP, granularity: 64);
6091 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6092
6093#ifdef __DML_VBA_DEBUG__
6094 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6095 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6096 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6097 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6098 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6099 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6100#endif
6101
6102 if (det_buff_size_override_en) {
6103 *nomDETInKByte = det_buff_size_override_val;
6104#ifdef __DML_VBA_DEBUG__
6105 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6106#endif
6107 }
6108} // CalculateMaxDETAndMinCompressedBufferSize
6109
6110bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6111 double ReturnBW,
6112 bool NotUrgentLatencyHiding[],
6113 double ReadBandwidthLuma[],
6114 double ReadBandwidthChroma[],
6115 double cursor_bw[],
6116 double meta_row_bandwidth[],
6117 double dpte_row_bandwidth[],
6118 unsigned int NumberOfDPP[],
6119 double UrgentBurstFactorLuma[],
6120 double UrgentBurstFactorChroma[],
6121 double UrgentBurstFactorCursor[])
6122{
6123 unsigned int k;
6124 bool NotEnoughUrgentLatencyHiding = false;
6125 bool CalculateVActiveBandwithSupport_val = false;
6126 double VActiveBandwith = 0;
6127
6128 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6129 if (NotUrgentLatencyHiding[k]) {
6130 NotEnoughUrgentLatencyHiding = true;
6131 }
6132 }
6133
6134 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6135 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6136 }
6137
6138 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6139
6140#ifdef __DML_VBA_DEBUG__
6141dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6142dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
6143dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6144dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6145#endif
6146 return CalculateVActiveBandwithSupport_val;
6147}
6148
6149void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6150 double ReturnBW,
6151 bool NotUrgentLatencyHiding[],
6152 double ReadBandwidthLuma[],
6153 double ReadBandwidthChroma[],
6154 double PrefetchBandwidthLuma[],
6155 double PrefetchBandwidthChroma[],
6156 double cursor_bw[],
6157 double meta_row_bandwidth[],
6158 double dpte_row_bandwidth[],
6159 double cursor_bw_pre[],
6160 double prefetch_vmrow_bw[],
6161 unsigned int NumberOfDPP[],
6162 double UrgentBurstFactorLuma[],
6163 double UrgentBurstFactorChroma[],
6164 double UrgentBurstFactorCursor[],
6165 double UrgentBurstFactorLumaPre[],
6166 double UrgentBurstFactorChromaPre[],
6167 double UrgentBurstFactorCursorPre[],
6168 double PrefetchBW[],
6169 double VRatio[],
6170 double MaxVRatioPre,
6171
6172 /* output */
6173 double *MaxPrefetchBandwidth,
6174 double *FractionOfUrgentBandwidth,
6175 bool *PrefetchBandwidthSupport)
6176{
6177 unsigned int k;
6178 double ActiveBandwidthPerSurface;
6179 bool NotEnoughUrgentLatencyHiding = false;
6180 double TotalActiveBandwidth = 0;
6181 double TotalPrefetchBandwidth = 0;
6182
6183 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6184 if (NotUrgentLatencyHiding[k]) {
6185 NotEnoughUrgentLatencyHiding = true;
6186 }
6187 }
6188
6189 *MaxPrefetchBandwidth = 0;
6190 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6191 ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6192
6193 TotalActiveBandwidth += ActiveBandwidthPerSurface;
6194
6195 TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6196
6197 *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(a: NumberOfDPP[k] * prefetch_vmrow_bw[k],
6198 b: ActiveBandwidthPerSurface,
6199 c: NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6200 }
6201
6202 if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6203 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6204 else
6205 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6206
6207 *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6208}
6209
6210double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6211 double ReturnBW,
6212 double ReadBandwidthLuma[],
6213 double ReadBandwidthChroma[],
6214 double PrefetchBandwidthLuma[],
6215 double PrefetchBandwidthChroma[],
6216 double cursor_bw[],
6217 double cursor_bw_pre[],
6218 unsigned int NumberOfDPP[],
6219 double UrgentBurstFactorLuma[],
6220 double UrgentBurstFactorChroma[],
6221 double UrgentBurstFactorCursor[],
6222 double UrgentBurstFactorLumaPre[],
6223 double UrgentBurstFactorChromaPre[],
6224 double UrgentBurstFactorCursorPre[])
6225{
6226 unsigned int k;
6227 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6228
6229 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6230 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(a: ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6231 b: NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6232 }
6233
6234 return CalculateBandwidthAvailableForImmediateFlip_val;
6235}
6236
6237void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6238 double ReturnBW,
6239 enum immediate_flip_requirement ImmediateFlipRequirement[],
6240 double final_flip_bw[],
6241 double ReadBandwidthLuma[],
6242 double ReadBandwidthChroma[],
6243 double PrefetchBandwidthLuma[],
6244 double PrefetchBandwidthChroma[],
6245 double cursor_bw[],
6246 double meta_row_bandwidth[],
6247 double dpte_row_bandwidth[],
6248 double cursor_bw_pre[],
6249 double prefetch_vmrow_bw[],
6250 unsigned int NumberOfDPP[],
6251 double UrgentBurstFactorLuma[],
6252 double UrgentBurstFactorChroma[],
6253 double UrgentBurstFactorCursor[],
6254 double UrgentBurstFactorLumaPre[],
6255 double UrgentBurstFactorChromaPre[],
6256 double UrgentBurstFactorCursorPre[],
6257
6258 /* output */
6259 double *TotalBandwidth,
6260 double *FractionOfUrgentBandwidth,
6261 bool *ImmediateFlipBandwidthSupport)
6262{
6263 unsigned int k;
6264 *TotalBandwidth = 0;
6265 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6266 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6267 *TotalBandwidth = *TotalBandwidth + dml_max3(a: NumberOfDPP[k] * prefetch_vmrow_bw[k],
6268 b: NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6269 c: NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6270 } else {
6271 *TotalBandwidth = *TotalBandwidth + dml_max3(a: NumberOfDPP[k] * prefetch_vmrow_bw[k],
6272 b: NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6273 c: NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6274 }
6275 }
6276 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6277 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6278}
6279
6280bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6281 double ReturnBW,
6282 double UrgentLatency,
6283 unsigned int SwathHeightY[],
6284 unsigned int SwathHeightC[],
6285 unsigned int SwathWidthY[],
6286 unsigned int SwathWidthC[],
6287 double BytePerPixelInDETY[],
6288 double BytePerPixelInDETC[],
6289 unsigned int DETBufferSizeY[],
6290 unsigned int DETBufferSizeC[],
6291 unsigned int NumOfDPP[],
6292 unsigned int HTotal[],
6293 double PixelClock[],
6294 double VRatioY[],
6295 double VRatioC[],
6296 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6297 enum unbounded_requesting_policy UseUnboundedRequesting)
6298{
6299 int k;
6300 double SwathSizeAllSurfaces = 0;
6301 double SwathSizeAllSurfacesInFetchTimeUs;
6302 double DETSwathLatencyHidingUs;
6303 double DETSwathLatencyHidingYUs;
6304 double DETSwathLatencyHidingCUs;
6305 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6306 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6307 bool NotEnoughDETSwathFillLatencyHiding = false;
6308
6309 if (UseUnboundedRequesting == dm_unbounded_requesting)
6310 return false;
6311
6312 /* calculate sum of single swath size for all pipes in bytes */
6313 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6314 SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6315
6316 if (SwathHeightC[k] != 0)
6317 SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6318 else
6319 SwathSizePerSurfaceC[k] = 0;
6320
6321 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6322 }
6323
6324 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6325
6326 /* ensure all DET - 1 swath can hide a fetch for all surfaces */
6327 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6328 double LineTime = HTotal[k] / PixelClock[k];
6329
6330 /* only care if surface is not phantom */
6331 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6332 DETSwathLatencyHidingYUs = (dml_floor(a: DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], granularity: 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6333
6334 if (SwathHeightC[k] != 0) {
6335 DETSwathLatencyHidingCUs = (dml_floor(a: DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], granularity: 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6336
6337 DETSwathLatencyHidingUs = dml_min(a: DETSwathLatencyHidingYUs, b: DETSwathLatencyHidingCUs);
6338 } else {
6339 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6340 }
6341
6342 /* DET must be able to hide time to fetch 1 swath for each surface */
6343 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6344 NotEnoughDETSwathFillLatencyHiding = true;
6345 break;
6346 }
6347 }
6348 }
6349
6350 return NotEnoughDETSwathFillLatencyHiding;
6351}
6352

source code of linux/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c