alpha_blending.rs source code [crates/image_webp/src/alpha_blending.rs]

1	//! Optimized alpha blending routines based on libwebp
2	//!
3	//! <https://github.com/webmproject/libwebp/blob/e4f7a9f0c7c9fbfae1568bc7fa5c94b989b50872/src/demux/anim_decode.c#L215-L267>
4
5	const fn channel_shift(i: u32) -> u32 {
6	i * `8`
7	}
8
9	/// Blend a single channel of `src` over `dst`, given their alpha channel values.
10	/// `src` and `dst` are assumed to be NOT pre-multiplied by alpha.
11	fn blend_channel_nonpremult(
12	src: u32,
13	src_a: u8,
14	dst: u32,
15	dst_a: u8,
16	scale: u32,
17	shift: u32,
18	) -> u8 {
19	let src_channel: u8 = ((src >> shift) & `0xff`) as u8;
20	let dst_channel: u8 = ((dst >> shift) & `0xff`) as u8;
21	let blend_unscaled: u32 =
22	(u32::from(src_channel) * u32::from(src_a)) + (u32::from(dst_channel) * u32::from(dst_a));
23	debug_assert!(u64::from(blend_unscaled) < (`1u64` << `32`) / u64::from(scale));
24	((blend_unscaled * scale) >> channel_shift(`3`)) as u8
25	}
26
27	/// Blend `src` over `dst` assuming they are NOT pre-multiplied by alpha.
28	fn blend_pixel_nonpremult(src: u32, dst: u32) -> u32 {
29	let src_a = ((src >> channel_shift(`3`)) & `0xff`) as u8;
30
31	if src_a == `0` {
32	dst
33	} else {
34	let dst_a = ((dst >> channel_shift(`3`)) & `0xff`) as u8;
35	// Approximate integer arithmetic for: dst_factor_a = (dst_a (255 - src_a)) / 255*
36	// libwebp used the following formula here:
37	//let dst_factor_a = (dst_a as u32 (256 - src_a as u32)) >> 8;*
38	// however, we've found that we can use a more precise approximation without losing performance:
39	let dst_factor_a = div_by_255(u32::from(dst_a) * (`255` - u32::from(src_a)));
40	let blend_a = u32::from(src_a) + dst_factor_a;
41	let scale = (`1u32` << `24`) / blend_a;
42
43	let blend_r =
44	blend_channel_nonpremult(src, src_a, dst, dst_factor_a as u8, scale, channel_shift(`0`));
45	let blend_g =
46	blend_channel_nonpremult(src, src_a, dst, dst_factor_a as u8, scale, channel_shift(`1`));
47	let blend_b =
48	blend_channel_nonpremult(src, src_a, dst, dst_factor_a as u8, scale, channel_shift(`2`));
49	debug_assert!(u32::from(src_a) + dst_factor_a < `256`);
50
51	(u32::from(blend_r) << channel_shift(`0`))
52	\| (u32::from(blend_g) << channel_shift(`1`))
53	\| (u32::from(blend_b) << channel_shift(`2`))
54	\| (blend_a << channel_shift(`3`))
55	}
56	}
57
58	pub(crate) fn do_alpha_blending(buffer: [u8; `4`], canvas: [u8; `4`]) -> [u8; `4`] {
59	// The original C code contained different shift functions for different endianness,
60	// but they didn't work when ported to Rust directly (and probably didn't work in C either).
61	// So instead we reverse the order of bytes on big-endian here, at the interface.
62	// `from_le_bytes` is a no-op on little endian (most systems) and a cheap shuffle on big endian.
63	blend_pixel_nonpremult(src:u32::from_le_bytes(buffer), dst:u32::from_le_bytes(canvas)).to_le_bytes()
64	}
65
66	/// Divides by 255, rounding to nearest (as opposed to down, like regular integer division does).
67	/// TODO: cannot output 256, so the output is effecitively u8. Plumb that through the code.
68	//
69	// Sources:
70	// https://arxiv.org/pdf/2202.02864
71	// https://github.com/image-rs/image-webp/issues/119#issuecomment-2544007820
72	#[inline]
73	const fn div_by_255(v: u32) -> u32 {
74	(((v + `0x80`) >> `8`) + v + `0x80`) >> `8`
75	}
76
77	#[cfg(test)]
78	mod tests {
79	use super::*;
80
81	fn do_alpha_blending_reference(buffer: [u8; `4`], canvas: [u8; `4`]) -> [u8; `4`] {
82	let canvas_alpha = f64::from(canvas[`3`]);
83	let buffer_alpha = f64::from(buffer[`3`]);
84	let blend_alpha_f64 = buffer_alpha + canvas_alpha * (`1.0` - buffer_alpha / `255.0`);
85	//value should be between 0 and 255, this truncates the fractional part
86	let blend_alpha: u8 = blend_alpha_f64 as u8;
87
88	let blend_rgb: [u8; `3`] = if blend_alpha == `0` {
89	[`0`, `0`, `0`]
90	} else {
91	let mut rgb = [`0u8`; `3`];
92	for i in `0`..`3` {
93	let canvas_f64 = f64::from(canvas[i]);
94	let buffer_f64 = f64::from(buffer[i]);
95
96	let val = (buffer_f64 * buffer_alpha
97	+ canvas_f64 * canvas_alpha * (`1.0` - buffer_alpha / `255.0`))
98	/ blend_alpha_f64;
99	//value should be between 0 and 255, this truncates the fractional part
100	rgb[i] = val as u8;
101	}
102
103	rgb
104	};
105
106	[blend_rgb[`0`], blend_rgb[`1`], blend_rgb[`2`], blend_alpha]
107	}
108
109	#[test]
110	#[ignore] // takes too long to run on CI. Run this locally when changing the function.
111	fn alpha_blending_optimization() {
112	for r1 in `0`..u8::MAX {
113	for a1 in `11`..u8::MAX {
114	for r2 in `0`..u8::MAX {
115	for a2 in `11`..u8::MAX {
116	let opt = do_alpha_blending([r1, `0`, `0`, a1], [r2, `0`, `0`, a2]);
117	let slow = do_alpha_blending_reference([r1, `0`, `0`, a1], [r2, `0`, `0`, a2]);
118	// libwebp doesn't do exact blending and so we don't either
119	for (o, s) in opt.iter().zip(slow.iter()) {
120	assert!(
121	o.abs_diff(*s) <= `3`,
122	"Mismatch in results! opt: {opt:?}, slow: {slow:?}, blended values: [{r1}, 0, 0, {a1}], [{r2}, 0, 0, {a2}]"
123	);
124	}
125	}
126	}
127	}
128	}
129	}
130	}
131