mod.rs source code [crates/rav1e/src/scenechange/mod.rs]

1	// Copyright (c) 2018-2022, The rav1e contributors. All rights reserved
2	//
3	// This source code is subject to the terms of the BSD 2 Clause License and
4	// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5	// was not distributed with this source code in the LICENSE file, you can
6	// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7	// Media Patent License 1.0 was not distributed with this source code in the
8	// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10	mod fast;
11	mod standard;
12
13	use crate::api::{EncoderConfig, SceneDetectionSpeed};
14	use crate::cpu_features::CpuFeatureLevel;
15	use crate::encoder::Sequence;
16	use crate::frame::*;
17	use crate::me::RefMEStats;
18	use crate::util::Pixel;
19	use std::collections::BTreeMap;
20	use std::num::NonZeroUsize;
21	use std::sync::Arc;
22	use std::{cmp, u64};
23
24	use self::fast::{detect_scale_factor, FAST_THRESHOLD};
25
26	/// Experiments have determined this to be an optimal threshold
27	const IMP_BLOCK_DIFF_THRESHOLD: f64 = `7.0`;
28
29	/// Fast integer division where divisor is a nonzero power of 2
30	#[inline(always)]
31	pub(crate) fn fast_idiv(n: usize, d: NonZeroUsize) -> usize {
32	debug_assert!(d.is_power_of_two());
33
34	n >> d.trailing_zeros()
35	}
36
37	struct ScaleFunction<T: Pixel> {
38	downscale_in_place:
39	fn(/ &self: / &Plane<T>, / in_plane: / &mut Plane<T>),
40	downscale: fn(/ &self: / &Plane<T>) -> Plane<T>,
41	factor: NonZeroUsize,
42	}
43
44	impl<T: Pixel> ScaleFunction<T> {
45	fn from_scale<const SCALE: usize>() -> Self {
46	assert!(
47	SCALE.is_power_of_two(),
48	"Scaling factor needs to be a nonzero power of two"
49	);
50
51	Self {
52	downscale: Plane::downscale::<SCALE>,
53	downscale_in_place: Plane::downscale_in_place::<SCALE>,
54	factor: NonZeroUsize::new(SCALE).unwrap(),
55	}
56	}
57	}
58	/// Runs keyframe detection on frames from the lookahead queue.
59	pub struct SceneChangeDetector<T: Pixel> {
60	/// Minimum average difference between YUV deltas that will trigger a scene change.
61	threshold: f64,
62	/// Fast scene cut detection mode, uses simple SAD instead of encoder cost estimates.
63	speed_mode: SceneDetectionSpeed,
64	/// Downscaling function for fast scene detection
65	scale_func: Option<ScaleFunction<T>>,
66	/// Frame buffer for scaled frames
67	downscaled_frame_buffer: Option<[Plane<T>; `2`]>,
68	/// Buffer for FrameMEStats for cost scenecut
69	frame_me_stats_buffer: Option<RefMEStats>,
70	/// Deque offset for current
71	lookahead_offset: usize,
72	/// Start deque offset based on lookahead
73	deque_offset: usize,
74	/// Scenechange results for adaptive threshold
75	score_deque: Vec<ScenecutResult>,
76	/// Number of pixels in scaled frame for fast mode
77	pixels: usize,
78	/// The bit depth of the video.
79	bit_depth: usize,
80	/// The CPU feature level to be used.
81	cpu_feature_level: CpuFeatureLevel,
82	encoder_config: EncoderConfig,
83	sequence: Arc<Sequence>,
84	/// Calculated intra costs for each input frame.
85	/// These are cached for reuse later in rav1e.
86	pub(crate) intra_costs: BTreeMap<u64, Box<[u32]>>,
87	/// Temporary buffer used by estimate_intra_costs.
88	pub(crate) temp_plane: Option<Plane<T>>,
89	}
90
91	impl<T: Pixel> SceneChangeDetector<T> {
92	pub fn new(
93	encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel,
94	lookahead_distance: usize, sequence: Arc<Sequence>,
95	) -> Self {
96	let bit_depth = encoder_config.bit_depth;
97	let speed_mode = if encoder_config.low_latency {
98	SceneDetectionSpeed::Fast
99	} else {
100	encoder_config.speed_settings.scene_detection_mode
101	};
102
103	// Downscaling function for fast scene detection
104	let scale_func = detect_scale_factor(&sequence, speed_mode);
105
106	// Set lookahead offset to 5 if normal lookahead available
107	let lookahead_offset = if lookahead_distance >= `5` { `5` } else { `0` };
108	let deque_offset = lookahead_offset;
109
110	let score_deque = Vec::with_capacity(`5` + lookahead_distance);
111
112	// Downscaling factor for fast scenedetect (is currently always a power of 2)
113	let factor =
114	scale_func.as_ref().map_or(NonZeroUsize::new(`1`).unwrap(), \|x\| x.factor);
115
116	let pixels = if speed_mode == SceneDetectionSpeed::Fast {
117	fast_idiv(sequence.max_frame_height as usize, factor)
118	* fast_idiv(sequence.max_frame_width as usize, factor)
119	} else {
120	`1`
121	};
122
123	let threshold = FAST_THRESHOLD * (bit_depth as f64) / `8.0`;
124
125	Self {
126	threshold,
127	speed_mode,
128	scale_func,
129	downscaled_frame_buffer: None,
130	frame_me_stats_buffer: None,
131	lookahead_offset,
132	deque_offset,
133	score_deque,
134	pixels,
135	bit_depth,
136	cpu_feature_level,
137	encoder_config,
138	sequence,
139	intra_costs: BTreeMap::new(),
140	temp_plane: None,
141	}
142	}
143
144	/// Runs keyframe detection on the next frame in the lookahead queue.
145	///
146	/// This function requires that a subset of input frames
147	/// is passed to it in order, and that `keyframes` is only
148	/// updated from this method. `input_frameno` should correspond
149	/// to the second frame in `frame_set`.
150	///
151	/// This will gracefully handle the first frame in the video as well.
152	#[profiling::function]
153	pub fn analyze_next_frame(
154	&mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64,
155	previous_keyframe: u64,
156	) -> bool {
157	// Use score deque for adaptive threshold for scene cut
158	// Declare score_deque offset based on lookahead for scene change scores
159
160	// Find the distance to the previous keyframe.
161	let distance = input_frameno - previous_keyframe;
162
163	if frame_set.len() <= self.lookahead_offset {
164	// Don't insert keyframes in the last few frames of the video
165	// This is basically a scene flash and a waste of bits
166	return `false`;
167	}
168
169	if self.encoder_config.speed_settings.scene_detection_mode
170	== SceneDetectionSpeed::None
171	{
172	if let Some(`true`) = self.handle_min_max_intervals(distance) {
173	return `true`;
174	};
175	return `false`;
176	}
177
178	// Initialization of score deque
179	// based on frame set length
180	if self.deque_offset > `0`
181	&& frame_set.len() > self.deque_offset + `1`
182	&& self.score_deque.is_empty()
183	{
184	self.initialize_score_deque(frame_set, input_frameno, self.deque_offset);
185	} else if self.score_deque.is_empty() {
186	self.initialize_score_deque(
187	frame_set,
188	input_frameno,
189	frame_set.len() - `1`,
190	);
191
192	self.deque_offset = frame_set.len() - `2`;
193	}
194	// Running single frame comparison and adding it to deque
195	// Decrease deque offset if there is no new frames
196	if frame_set.len() > self.deque_offset + `1` {
197	self.run_comparison(
198	frame_set[self.deque_offset].clone(),
199	frame_set[self.deque_offset + `1`].clone(),
200	input_frameno + self.deque_offset as u64,
201	);
202	} else {
203	self.deque_offset -= `1`;
204	}
205
206	// Adaptive scenecut check
207	let (scenecut, score) = self.adaptive_scenecut();
208	let scenecut = self.handle_min_max_intervals(distance).unwrap_or(scenecut);
209	debug!(
210	"[SC-Detect] Frame {}: Raw={:`5`.`1`} ImpBl={:`5`.`1`} Bwd={:`5`.`1`} Fwd={:`5`.`1`} Th={:.`1`} {}",
211	input_frameno,
212	score.inter_cost,
213	score.imp_block_cost,
214	score.backward_adjusted_cost,
215	score.forward_adjusted_cost,
216	score.threshold,
217	if scenecut { "Scenecut" } else { "No cut" }
218	);
219
220	// Keep score deque of 5 backward frames
221	// and forward frames of length of lookahead offset
222	if self.score_deque.len() > `5` + self.lookahead_offset {
223	self.score_deque.pop();
224	}
225
226	scenecut
227	}
228
229	fn handle_min_max_intervals(&mut self, distance: u64) -> Option<bool> {
230	// Handle minimum and maximum keyframe intervals.
231	if distance < self.encoder_config.min_key_frame_interval {
232	return Some(`false`);
233	}
234	if distance >= self.encoder_config.max_key_frame_interval {
235	return Some(`true`);
236	}
237	None
238	}
239
240	// Initially fill score deque with frame scores
241	fn initialize_score_deque(
242	&mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64,
243	init_len: usize,
244	) {
245	for x in `0`..init_len {
246	self.run_comparison(
247	frame_set[x].clone(),
248	frame_set[x + `1`].clone(),
249	input_frameno + x as u64,
250	);
251	}
252	}
253
254	/// Runs scene change comparison beetween 2 given frames
255	/// Insert result to start of score deque
256	fn run_comparison(
257	&mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
258	input_frameno: u64,
259	) {
260	let mut result = if self.speed_mode == SceneDetectionSpeed::Fast {
261	self.fast_scenecut(frame1, frame2)
262	} else {
263	self.cost_scenecut(frame1, frame2, input_frameno)
264	};
265
266	// Subtract the highest metric value of surrounding frames from the current one
267	// It makes the peaks in the metric more distinct
268	if self.speed_mode != SceneDetectionSpeed::Fast && self.deque_offset > `0` {
269	if input_frameno == `1` {
270	// Accounts for the second frame not having a score to adjust against.
271	// It should always be 0 because the first frame of the video is always a keyframe.
272	result.backward_adjusted_cost = `0.0`;
273	} else {
274	let mut adjusted_cost = f64::MAX;
275	for other_cost in
276	self.score_deque.iter().take(self.deque_offset).map(\|i\| i.inter_cost)
277	{
278	let this_cost = result.inter_cost - other_cost;
279	if this_cost < adjusted_cost {
280	adjusted_cost = this_cost;
281	}
282	if adjusted_cost < `0.0` {
283	adjusted_cost = `0.0`;
284	break;
285	}
286	}
287	result.backward_adjusted_cost = adjusted_cost;
288	}
289	if !self.score_deque.is_empty() {
290	for i in `0`..(cmp::min(self.deque_offset, self.score_deque.len())) {
291	let adjusted_cost =
292	self.score_deque[i].inter_cost - result.inter_cost;
293	if i == `0`
294	\|\| adjusted_cost < self.score_deque[i].forward_adjusted_cost
295	{
296	self.score_deque[i].forward_adjusted_cost = adjusted_cost;
297	}
298	if self.score_deque[i].forward_adjusted_cost < `0.0` {
299	self.score_deque[i].forward_adjusted_cost = `0.0`;
300	}
301	}
302	}
303	}
304	self.score_deque.insert(`0`, result);
305	}
306
307	/// Compares current scene score to adapted threshold based on previous scores
308	/// Value of current frame is offset by lookahead, if lookahead >=5
309	/// Returns true if current scene score is higher than adapted threshold
310	fn adaptive_scenecut(&mut self) -> (bool, ScenecutResult) {
311	let score = self.score_deque[self.deque_offset];
312
313	// We use the importance block algorithm's cost metrics as a secondary algorithm
314	// because, although it struggles in certain scenarios such as
315	// finding the end of a pan, it is very good at detecting hard scenecuts
316	// or detecting if a pan exists.
317	// Because of this, we only consider a frame for a scenechange if
318	// the importance block algorithm is over the threshold either on this frame (hard scenecut)
319	// or within the past few frames (pan). This helps filter out a few false positives
320	// produced by the cost-based algorithm.
321	let imp_block_threshold =
322	IMP_BLOCK_DIFF_THRESHOLD * (self.bit_depth as f64) / `8.0`;
323	if !&self.score_deque[self.deque_offset..]
324	.iter()
325	.any(\|result\| result.imp_block_cost >= imp_block_threshold)
326	{
327	return (`false`, score);
328	}
329
330	let cost = score.forward_adjusted_cost;
331	if cost >= score.threshold {
332	let back_deque = &self.score_deque[self.deque_offset + `1`..];
333	let forward_deque = &self.score_deque[..self.deque_offset];
334	let back_over_tr_count = back_deque
335	.iter()
336	.filter(\|result\| result.backward_adjusted_cost >= result.threshold)
337	.count();
338	let forward_over_tr_count = forward_deque
339	.iter()
340	.filter(\|result\| result.forward_adjusted_cost >= result.threshold)
341	.count();
342
343	// Check for scenecut after the flashes
344	// No frames over threshold forward
345	// and some frames over threshold backward
346	let back_count_req = if self.speed_mode == SceneDetectionSpeed::Fast {
347	// Fast scenecut is more sensitive to false flash detection,
348	// so we want more "evidence" of there being a flash before creating a keyframe.
349	`2`
350	} else {
351	`1`
352	};
353	if forward_over_tr_count == `0` && back_over_tr_count >= back_count_req {
354	return (`true`, score);
355	}
356
357	// Check for scenecut before flash
358	// If distance longer than max flash length
359	if back_over_tr_count == `0`
360	&& forward_over_tr_count == `1`
361	&& forward_deque[`0`].forward_adjusted_cost >= forward_deque[`0`].threshold
362	{
363	return (`true`, score);
364	}
365
366	if back_over_tr_count != `0` \|\| forward_over_tr_count != `0` {
367	return (`false`, score);
368	}
369	}
370
371	(cost >= score.threshold, score)
372	}
373	}
374
375	#[derive(Debug, Clone, Copy)]
376	struct ScenecutResult {
377	inter_cost: f64,
378	imp_block_cost: f64,
379	backward_adjusted_cost: f64,
380	forward_adjusted_cost: f64,
381	threshold: f64,
382	}
383