1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | |
3 | #include <linux/crc32.h> |
4 | |
5 | #include <drm/drm_atomic.h> |
6 | #include <drm/drm_atomic_helper.h> |
7 | #include <drm/drm_blend.h> |
8 | #include <drm/drm_fourcc.h> |
9 | #include <drm/drm_fixed.h> |
10 | #include <drm/drm_gem_framebuffer_helper.h> |
11 | #include <drm/drm_vblank.h> |
12 | #include <linux/minmax.h> |
13 | |
14 | #include "vkms_drv.h" |
15 | |
16 | static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha) |
17 | { |
18 | u32 new_color; |
19 | |
20 | new_color = (src * 0xffff + dst * (0xffff - alpha)); |
21 | |
22 | return DIV_ROUND_CLOSEST(new_color, 0xffff); |
23 | } |
24 | |
25 | /** |
26 | * pre_mul_alpha_blend - alpha blending equation |
27 | * @frame_info: Source framebuffer's metadata |
28 | * @stage_buffer: The line with the pixels from src_plane |
29 | * @output_buffer: A line buffer that receives all the blends output |
30 | * |
31 | * Using the information from the `frame_info`, this blends only the |
32 | * necessary pixels from the `stage_buffer` to the `output_buffer` |
33 | * using premultiplied blend formula. |
34 | * |
35 | * The current DRM assumption is that pixel color values have been already |
36 | * pre-multiplied with the alpha channel values. See more |
37 | * drm_plane_create_blend_mode_property(). Also, this formula assumes a |
38 | * completely opaque background. |
39 | */ |
40 | static void pre_mul_alpha_blend(struct vkms_frame_info *frame_info, |
41 | struct line_buffer *stage_buffer, |
42 | struct line_buffer *output_buffer) |
43 | { |
44 | int x_dst = frame_info->dst.x1; |
45 | struct pixel_argb_u16 *out = output_buffer->pixels + x_dst; |
46 | struct pixel_argb_u16 *in = stage_buffer->pixels; |
47 | int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), |
48 | stage_buffer->n_pixels); |
49 | |
50 | for (int x = 0; x < x_limit; x++) { |
51 | out[x].a = (u16)0xffff; |
52 | out[x].r = pre_mul_blend_channel(src: in[x].r, dst: out[x].r, alpha: in[x].a); |
53 | out[x].g = pre_mul_blend_channel(src: in[x].g, dst: out[x].g, alpha: in[x].a); |
54 | out[x].b = pre_mul_blend_channel(src: in[x].b, dst: out[x].b, alpha: in[x].a); |
55 | } |
56 | } |
57 | |
58 | static int get_y_pos(struct vkms_frame_info *frame_info, int y) |
59 | { |
60 | if (frame_info->rotation & DRM_MODE_REFLECT_Y) |
61 | return drm_rect_height(r: &frame_info->rotated) - y - 1; |
62 | |
63 | switch (frame_info->rotation & DRM_MODE_ROTATE_MASK) { |
64 | case DRM_MODE_ROTATE_90: |
65 | return frame_info->rotated.x2 - y - 1; |
66 | case DRM_MODE_ROTATE_270: |
67 | return y + frame_info->rotated.x1; |
68 | default: |
69 | return y; |
70 | } |
71 | } |
72 | |
73 | static bool check_limit(struct vkms_frame_info *frame_info, int pos) |
74 | { |
75 | if (drm_rotation_90_or_270(rotation: frame_info->rotation)) { |
76 | if (pos >= 0 && pos < drm_rect_width(r: &frame_info->rotated)) |
77 | return true; |
78 | } else { |
79 | if (pos >= frame_info->rotated.y1 && pos < frame_info->rotated.y2) |
80 | return true; |
81 | } |
82 | |
83 | return false; |
84 | } |
85 | |
86 | static void fill_background(const struct pixel_argb_u16 *background_color, |
87 | struct line_buffer *output_buffer) |
88 | { |
89 | for (size_t i = 0; i < output_buffer->n_pixels; i++) |
90 | output_buffer->pixels[i] = *background_color; |
91 | } |
92 | |
93 | // lerp(a, b, t) = a + (b - a) * t |
94 | static u16 lerp_u16(u16 a, u16 b, s64 t) |
95 | { |
96 | s64 a_fp = drm_int2fixp(a); |
97 | s64 b_fp = drm_int2fixp(a: b); |
98 | |
99 | s64 delta = drm_fixp_mul(a: b_fp - a_fp, b: t); |
100 | |
101 | return drm_fixp2int(a: a_fp + delta); |
102 | } |
103 | |
104 | static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value) |
105 | { |
106 | s64 color_channel_fp = drm_int2fixp(a: channel_value); |
107 | |
108 | return drm_fixp_mul(a: color_channel_fp, b: lut->channel_value2index_ratio); |
109 | } |
110 | |
111 | /* |
112 | * This enum is related to the positions of the variables inside |
113 | * `struct drm_color_lut`, so the order of both needs to be the same. |
114 | */ |
115 | enum lut_channel { |
116 | LUT_RED = 0, |
117 | LUT_GREEN, |
118 | LUT_BLUE, |
119 | LUT_RESERVED |
120 | }; |
121 | |
122 | static u16 apply_lut_to_channel_value(const struct vkms_color_lut *lut, u16 channel_value, |
123 | enum lut_channel channel) |
124 | { |
125 | s64 lut_index = get_lut_index(lut, channel_value); |
126 | u16 *floor_lut_value, *ceil_lut_value; |
127 | u16 floor_channel_value, ceil_channel_value; |
128 | |
129 | /* |
130 | * This checks if `struct drm_color_lut` has any gap added by the compiler |
131 | * between the struct fields. |
132 | */ |
133 | static_assert(sizeof(struct drm_color_lut) == sizeof(__u16) * 4); |
134 | |
135 | floor_lut_value = (__u16 *)&lut->base[drm_fixp2int(a: lut_index)]; |
136 | if (drm_fixp2int(a: lut_index) == (lut->lut_length - 1)) |
137 | /* We're at the end of the LUT array, use same value for ceil and floor */ |
138 | ceil_lut_value = floor_lut_value; |
139 | else |
140 | ceil_lut_value = (__u16 *)&lut->base[drm_fixp2int_ceil(a: lut_index)]; |
141 | |
142 | floor_channel_value = floor_lut_value[channel]; |
143 | ceil_channel_value = ceil_lut_value[channel]; |
144 | |
145 | return lerp_u16(a: floor_channel_value, b: ceil_channel_value, |
146 | t: lut_index & DRM_FIXED_DECIMAL_MASK); |
147 | } |
148 | |
149 | static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buffer *output_buffer) |
150 | { |
151 | if (!crtc_state->gamma_lut.base) |
152 | return; |
153 | |
154 | if (!crtc_state->gamma_lut.lut_length) |
155 | return; |
156 | |
157 | for (size_t x = 0; x < output_buffer->n_pixels; x++) { |
158 | struct pixel_argb_u16 *pixel = &output_buffer->pixels[x]; |
159 | |
160 | pixel->r = apply_lut_to_channel_value(lut: &crtc_state->gamma_lut, channel_value: pixel->r, channel: LUT_RED); |
161 | pixel->g = apply_lut_to_channel_value(lut: &crtc_state->gamma_lut, channel_value: pixel->g, channel: LUT_GREEN); |
162 | pixel->b = apply_lut_to_channel_value(lut: &crtc_state->gamma_lut, channel_value: pixel->b, channel: LUT_BLUE); |
163 | } |
164 | } |
165 | |
166 | /** |
167 | * blend - blend the pixels from all planes and compute crc |
168 | * @wb: The writeback frame buffer metadata |
169 | * @crtc_state: The crtc state |
170 | * @crc32: The crc output of the final frame |
171 | * @output_buffer: A buffer of a row that will receive the result of the blend(s) |
172 | * @stage_buffer: The line with the pixels from plane being blend to the output |
173 | * @row_size: The size, in bytes, of a single row |
174 | * |
175 | * This function blends the pixels (Using the `pre_mul_alpha_blend`) |
176 | * from all planes, calculates the crc32 of the output from the former step, |
177 | * and, if necessary, convert and store the output to the writeback buffer. |
178 | */ |
179 | static void blend(struct vkms_writeback_job *wb, |
180 | struct vkms_crtc_state *crtc_state, |
181 | u32 *crc32, struct line_buffer *stage_buffer, |
182 | struct line_buffer *output_buffer, size_t row_size) |
183 | { |
184 | struct vkms_plane_state **plane = crtc_state->active_planes; |
185 | u32 n_active_planes = crtc_state->num_active_planes; |
186 | int y_pos; |
187 | |
188 | const struct pixel_argb_u16 background_color = { .a = 0xffff }; |
189 | |
190 | size_t crtc_y_limit = crtc_state->base.crtc->mode.vdisplay; |
191 | |
192 | for (size_t y = 0; y < crtc_y_limit; y++) { |
193 | fill_background(background_color: &background_color, output_buffer); |
194 | |
195 | /* The active planes are composed associatively in z-order. */ |
196 | for (size_t i = 0; i < n_active_planes; i++) { |
197 | y_pos = get_y_pos(frame_info: plane[i]->frame_info, y); |
198 | |
199 | if (!check_limit(frame_info: plane[i]->frame_info, pos: y_pos)) |
200 | continue; |
201 | |
202 | vkms_compose_row(stage_buffer, plane: plane[i], y: y_pos); |
203 | pre_mul_alpha_blend(frame_info: plane[i]->frame_info, stage_buffer, |
204 | output_buffer); |
205 | } |
206 | |
207 | apply_lut(crtc_state, output_buffer); |
208 | |
209 | *crc32 = crc32_le(crc: *crc32, p: (void *)output_buffer->pixels, len: row_size); |
210 | |
211 | if (wb) |
212 | vkms_writeback_row(wb, src_buffer: output_buffer, y: y_pos); |
213 | } |
214 | } |
215 | |
216 | static int check_format_funcs(struct vkms_crtc_state *crtc_state, |
217 | struct vkms_writeback_job *active_wb) |
218 | { |
219 | struct vkms_plane_state **planes = crtc_state->active_planes; |
220 | u32 n_active_planes = crtc_state->num_active_planes; |
221 | |
222 | for (size_t i = 0; i < n_active_planes; i++) |
223 | if (!planes[i]->pixel_read) |
224 | return -1; |
225 | |
226 | if (active_wb && !active_wb->pixel_write) |
227 | return -1; |
228 | |
229 | return 0; |
230 | } |
231 | |
232 | static int check_iosys_map(struct vkms_crtc_state *crtc_state) |
233 | { |
234 | struct vkms_plane_state **plane_state = crtc_state->active_planes; |
235 | u32 n_active_planes = crtc_state->num_active_planes; |
236 | |
237 | for (size_t i = 0; i < n_active_planes; i++) |
238 | if (iosys_map_is_null(map: &plane_state[i]->frame_info->map[0])) |
239 | return -1; |
240 | |
241 | return 0; |
242 | } |
243 | |
244 | static int compose_active_planes(struct vkms_writeback_job *active_wb, |
245 | struct vkms_crtc_state *crtc_state, |
246 | u32 *crc32) |
247 | { |
248 | size_t line_width, pixel_size = sizeof(struct pixel_argb_u16); |
249 | struct line_buffer output_buffer, stage_buffer; |
250 | int ret = 0; |
251 | |
252 | /* |
253 | * This check exists so we can call `crc32_le` for the entire line |
254 | * instead doing it for each channel of each pixel in case |
255 | * `struct `pixel_argb_u16` had any gap added by the compiler |
256 | * between the struct fields. |
257 | */ |
258 | static_assert(sizeof(struct pixel_argb_u16) == 8); |
259 | |
260 | if (WARN_ON(check_iosys_map(crtc_state))) |
261 | return -EINVAL; |
262 | |
263 | if (WARN_ON(check_format_funcs(crtc_state, active_wb))) |
264 | return -EINVAL; |
265 | |
266 | line_width = crtc_state->base.crtc->mode.hdisplay; |
267 | stage_buffer.n_pixels = line_width; |
268 | output_buffer.n_pixels = line_width; |
269 | |
270 | stage_buffer.pixels = kvmalloc(size: line_width * pixel_size, GFP_KERNEL); |
271 | if (!stage_buffer.pixels) { |
272 | DRM_ERROR("Cannot allocate memory for the output line buffer" ); |
273 | return -ENOMEM; |
274 | } |
275 | |
276 | output_buffer.pixels = kvmalloc(size: line_width * pixel_size, GFP_KERNEL); |
277 | if (!output_buffer.pixels) { |
278 | DRM_ERROR("Cannot allocate memory for intermediate line buffer" ); |
279 | ret = -ENOMEM; |
280 | goto free_stage_buffer; |
281 | } |
282 | |
283 | blend(wb: active_wb, crtc_state, crc32, stage_buffer: &stage_buffer, |
284 | output_buffer: &output_buffer, row_size: line_width * pixel_size); |
285 | |
286 | kvfree(addr: output_buffer.pixels); |
287 | free_stage_buffer: |
288 | kvfree(addr: stage_buffer.pixels); |
289 | |
290 | return ret; |
291 | } |
292 | |
293 | /** |
294 | * vkms_composer_worker - ordered work_struct to compute CRC |
295 | * |
296 | * @work: work_struct |
297 | * |
298 | * Work handler for composing and computing CRCs. work_struct scheduled in |
299 | * an ordered workqueue that's periodically scheduled to run by |
300 | * vkms_vblank_simulate() and flushed at vkms_atomic_commit_tail(). |
301 | */ |
302 | void vkms_composer_worker(struct work_struct *work) |
303 | { |
304 | struct vkms_crtc_state *crtc_state = container_of(work, |
305 | struct vkms_crtc_state, |
306 | composer_work); |
307 | struct drm_crtc *crtc = crtc_state->base.crtc; |
308 | struct vkms_writeback_job *active_wb = crtc_state->active_writeback; |
309 | struct vkms_output *out = drm_crtc_to_vkms_output(crtc); |
310 | bool crc_pending, wb_pending; |
311 | u64 frame_start, frame_end; |
312 | u32 crc32 = 0; |
313 | int ret; |
314 | |
315 | spin_lock_irq(lock: &out->composer_lock); |
316 | frame_start = crtc_state->frame_start; |
317 | frame_end = crtc_state->frame_end; |
318 | crc_pending = crtc_state->crc_pending; |
319 | wb_pending = crtc_state->wb_pending; |
320 | crtc_state->frame_start = 0; |
321 | crtc_state->frame_end = 0; |
322 | crtc_state->crc_pending = false; |
323 | |
324 | if (crtc->state->gamma_lut) { |
325 | s64 max_lut_index_fp; |
326 | s64 u16_max_fp = drm_int2fixp(a: 0xffff); |
327 | |
328 | crtc_state->gamma_lut.base = (struct drm_color_lut *)crtc->state->gamma_lut->data; |
329 | crtc_state->gamma_lut.lut_length = |
330 | crtc->state->gamma_lut->length / sizeof(struct drm_color_lut); |
331 | max_lut_index_fp = drm_int2fixp(a: crtc_state->gamma_lut.lut_length - 1); |
332 | crtc_state->gamma_lut.channel_value2index_ratio = drm_fixp_div(a: max_lut_index_fp, |
333 | b: u16_max_fp); |
334 | |
335 | } else { |
336 | crtc_state->gamma_lut.base = NULL; |
337 | } |
338 | |
339 | spin_unlock_irq(lock: &out->composer_lock); |
340 | |
341 | /* |
342 | * We raced with the vblank hrtimer and previous work already computed |
343 | * the crc, nothing to do. |
344 | */ |
345 | if (!crc_pending) |
346 | return; |
347 | |
348 | if (wb_pending) |
349 | ret = compose_active_planes(active_wb, crtc_state, crc32: &crc32); |
350 | else |
351 | ret = compose_active_planes(NULL, crtc_state, crc32: &crc32); |
352 | |
353 | if (ret) |
354 | return; |
355 | |
356 | if (wb_pending) { |
357 | drm_writeback_signal_completion(wb_connector: &out->wb_connector, status: 0); |
358 | spin_lock_irq(lock: &out->composer_lock); |
359 | crtc_state->wb_pending = false; |
360 | spin_unlock_irq(lock: &out->composer_lock); |
361 | } |
362 | |
363 | /* |
364 | * The worker can fall behind the vblank hrtimer, make sure we catch up. |
365 | */ |
366 | while (frame_start <= frame_end) |
367 | drm_crtc_add_crc_entry(crtc, has_frame: true, frame: frame_start++, crcs: &crc32); |
368 | } |
369 | |
370 | static const char * const pipe_crc_sources[] = {"auto" }; |
371 | |
372 | const char *const *vkms_get_crc_sources(struct drm_crtc *crtc, |
373 | size_t *count) |
374 | { |
375 | *count = ARRAY_SIZE(pipe_crc_sources); |
376 | return pipe_crc_sources; |
377 | } |
378 | |
379 | static int vkms_crc_parse_source(const char *src_name, bool *enabled) |
380 | { |
381 | int ret = 0; |
382 | |
383 | if (!src_name) { |
384 | *enabled = false; |
385 | } else if (strcmp(src_name, "auto" ) == 0) { |
386 | *enabled = true; |
387 | } else { |
388 | *enabled = false; |
389 | ret = -EINVAL; |
390 | } |
391 | |
392 | return ret; |
393 | } |
394 | |
395 | int vkms_verify_crc_source(struct drm_crtc *crtc, const char *src_name, |
396 | size_t *values_cnt) |
397 | { |
398 | bool enabled; |
399 | |
400 | if (vkms_crc_parse_source(src_name, enabled: &enabled) < 0) { |
401 | DRM_DEBUG_DRIVER("unknown source %s\n" , src_name); |
402 | return -EINVAL; |
403 | } |
404 | |
405 | *values_cnt = 1; |
406 | |
407 | return 0; |
408 | } |
409 | |
410 | void vkms_set_composer(struct vkms_output *out, bool enabled) |
411 | { |
412 | bool old_enabled; |
413 | |
414 | if (enabled) |
415 | drm_crtc_vblank_get(crtc: &out->crtc); |
416 | |
417 | spin_lock_irq(lock: &out->lock); |
418 | old_enabled = out->composer_enabled; |
419 | out->composer_enabled = enabled; |
420 | spin_unlock_irq(lock: &out->lock); |
421 | |
422 | if (old_enabled) |
423 | drm_crtc_vblank_put(crtc: &out->crtc); |
424 | } |
425 | |
426 | int vkms_set_crc_source(struct drm_crtc *crtc, const char *src_name) |
427 | { |
428 | struct vkms_output *out = drm_crtc_to_vkms_output(crtc); |
429 | bool enabled = false; |
430 | int ret = 0; |
431 | |
432 | ret = vkms_crc_parse_source(src_name, enabled: &enabled); |
433 | |
434 | vkms_set_composer(out, enabled); |
435 | |
436 | return ret; |
437 | } |
438 | |