1// Copyright (c) 2018-2022, The rav1e contributors. All rights reserved
2//
3// This source code is subject to the terms of the BSD 2 Clause License and
4// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5// was not distributed with this source code in the LICENSE file, you can
6// obtain it at www.aomedia.org/license/software. If the Alliance for Open
7// Media Patent License 1.0 was not distributed with this source code in the
8// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10mod fast;
11mod standard;
12
13use crate::api::{EncoderConfig, SceneDetectionSpeed};
14use crate::cpu_features::CpuFeatureLevel;
15use crate::encoder::Sequence;
16use crate::frame::*;
17use crate::me::RefMEStats;
18use crate::util::Pixel;
19use std::collections::BTreeMap;
20use std::num::NonZeroUsize;
21use std::sync::Arc;
22use std::{cmp, u64};
23
24use self::fast::{detect_scale_factor, FAST_THRESHOLD};
25
26/// Experiments have determined this to be an optimal threshold
27const IMP_BLOCK_DIFF_THRESHOLD: f64 = 7.0;
28
29/// Fast integer division where divisor is a nonzero power of 2
30#[inline(always)]
31pub(crate) fn fast_idiv(n: usize, d: NonZeroUsize) -> usize {
32 debug_assert!(d.is_power_of_two());
33
34 n >> d.trailing_zeros()
35}
36
37struct ScaleFunction<T: Pixel> {
38 downscale_in_place:
39 fn(/* &self: */ &Plane<T>, /* in_plane: */ &mut Plane<T>),
40 downscale: fn(/* &self: */ &Plane<T>) -> Plane<T>,
41 factor: NonZeroUsize,
42}
43
44impl<T: Pixel> ScaleFunction<T> {
45 fn from_scale<const SCALE: usize>() -> Self {
46 assert!(
47 SCALE.is_power_of_two(),
48 "Scaling factor needs to be a nonzero power of two"
49 );
50
51 Self {
52 downscale: Plane::downscale::<SCALE>,
53 downscale_in_place: Plane::downscale_in_place::<SCALE>,
54 factor: NonZeroUsize::new(SCALE).unwrap(),
55 }
56 }
57}
58/// Runs keyframe detection on frames from the lookahead queue.
59pub struct SceneChangeDetector<T: Pixel> {
60 /// Minimum average difference between YUV deltas that will trigger a scene change.
61 threshold: f64,
62 /// Fast scene cut detection mode, uses simple SAD instead of encoder cost estimates.
63 speed_mode: SceneDetectionSpeed,
64 /// Downscaling function for fast scene detection
65 scale_func: Option<ScaleFunction<T>>,
66 /// Frame buffer for scaled frames
67 downscaled_frame_buffer: Option<[Plane<T>; 2]>,
68 /// Buffer for FrameMEStats for cost scenecut
69 frame_me_stats_buffer: Option<RefMEStats>,
70 /// Deque offset for current
71 lookahead_offset: usize,
72 /// Start deque offset based on lookahead
73 deque_offset: usize,
74 /// Scenechange results for adaptive threshold
75 score_deque: Vec<ScenecutResult>,
76 /// Number of pixels in scaled frame for fast mode
77 pixels: usize,
78 /// The bit depth of the video.
79 bit_depth: usize,
80 /// The CPU feature level to be used.
81 cpu_feature_level: CpuFeatureLevel,
82 encoder_config: EncoderConfig,
83 sequence: Arc<Sequence>,
84 /// Calculated intra costs for each input frame.
85 /// These are cached for reuse later in rav1e.
86 pub(crate) intra_costs: BTreeMap<u64, Box<[u32]>>,
87 /// Temporary buffer used by estimate_intra_costs.
88 pub(crate) temp_plane: Option<Plane<T>>,
89}
90
91impl<T: Pixel> SceneChangeDetector<T> {
92 pub fn new(
93 encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel,
94 lookahead_distance: usize, sequence: Arc<Sequence>,
95 ) -> Self {
96 let bit_depth = encoder_config.bit_depth;
97 let speed_mode = if encoder_config.low_latency {
98 SceneDetectionSpeed::Fast
99 } else {
100 encoder_config.speed_settings.scene_detection_mode
101 };
102
103 // Downscaling function for fast scene detection
104 let scale_func = detect_scale_factor(&sequence, speed_mode);
105
106 // Set lookahead offset to 5 if normal lookahead available
107 let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 };
108 let deque_offset = lookahead_offset;
109
110 let score_deque = Vec::with_capacity(5 + lookahead_distance);
111
112 // Downscaling factor for fast scenedetect (is currently always a power of 2)
113 let factor =
114 scale_func.as_ref().map_or(NonZeroUsize::new(1).unwrap(), |x| x.factor);
115
116 let pixels = if speed_mode == SceneDetectionSpeed::Fast {
117 fast_idiv(sequence.max_frame_height as usize, factor)
118 * fast_idiv(sequence.max_frame_width as usize, factor)
119 } else {
120 1
121 };
122
123 let threshold = FAST_THRESHOLD * (bit_depth as f64) / 8.0;
124
125 Self {
126 threshold,
127 speed_mode,
128 scale_func,
129 downscaled_frame_buffer: None,
130 frame_me_stats_buffer: None,
131 lookahead_offset,
132 deque_offset,
133 score_deque,
134 pixels,
135 bit_depth,
136 cpu_feature_level,
137 encoder_config,
138 sequence,
139 intra_costs: BTreeMap::new(),
140 temp_plane: None,
141 }
142 }
143
144 /// Runs keyframe detection on the next frame in the lookahead queue.
145 ///
146 /// This function requires that a subset of input frames
147 /// is passed to it in order, and that `keyframes` is only
148 /// updated from this method. `input_frameno` should correspond
149 /// to the second frame in `frame_set`.
150 ///
151 /// This will gracefully handle the first frame in the video as well.
152 #[profiling::function]
153 pub fn analyze_next_frame(
154 &mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64,
155 previous_keyframe: u64,
156 ) -> bool {
157 // Use score deque for adaptive threshold for scene cut
158 // Declare score_deque offset based on lookahead for scene change scores
159
160 // Find the distance to the previous keyframe.
161 let distance = input_frameno - previous_keyframe;
162
163 if frame_set.len() <= self.lookahead_offset {
164 // Don't insert keyframes in the last few frames of the video
165 // This is basically a scene flash and a waste of bits
166 return false;
167 }
168
169 if self.encoder_config.speed_settings.scene_detection_mode
170 == SceneDetectionSpeed::None
171 {
172 if let Some(true) = self.handle_min_max_intervals(distance) {
173 return true;
174 };
175 return false;
176 }
177
178 // Initialization of score deque
179 // based on frame set length
180 if self.deque_offset > 0
181 && frame_set.len() > self.deque_offset + 1
182 && self.score_deque.is_empty()
183 {
184 self.initialize_score_deque(frame_set, input_frameno, self.deque_offset);
185 } else if self.score_deque.is_empty() {
186 self.initialize_score_deque(
187 frame_set,
188 input_frameno,
189 frame_set.len() - 1,
190 );
191
192 self.deque_offset = frame_set.len() - 2;
193 }
194 // Running single frame comparison and adding it to deque
195 // Decrease deque offset if there is no new frames
196 if frame_set.len() > self.deque_offset + 1 {
197 self.run_comparison(
198 frame_set[self.deque_offset].clone(),
199 frame_set[self.deque_offset + 1].clone(),
200 input_frameno + self.deque_offset as u64,
201 );
202 } else {
203 self.deque_offset -= 1;
204 }
205
206 // Adaptive scenecut check
207 let (scenecut, score) = self.adaptive_scenecut();
208 let scenecut = self.handle_min_max_intervals(distance).unwrap_or(scenecut);
209 debug!(
210 "[SC-Detect] Frame {}: Raw={:5.1} ImpBl={:5.1} Bwd={:5.1} Fwd={:5.1} Th={:.1} {}",
211 input_frameno,
212 score.inter_cost,
213 score.imp_block_cost,
214 score.backward_adjusted_cost,
215 score.forward_adjusted_cost,
216 score.threshold,
217 if scenecut { "Scenecut" } else { "No cut" }
218 );
219
220 // Keep score deque of 5 backward frames
221 // and forward frames of length of lookahead offset
222 if self.score_deque.len() > 5 + self.lookahead_offset {
223 self.score_deque.pop();
224 }
225
226 scenecut
227 }
228
229 fn handle_min_max_intervals(&mut self, distance: u64) -> Option<bool> {
230 // Handle minimum and maximum keyframe intervals.
231 if distance < self.encoder_config.min_key_frame_interval {
232 return Some(false);
233 }
234 if distance >= self.encoder_config.max_key_frame_interval {
235 return Some(true);
236 }
237 None
238 }
239
240 // Initially fill score deque with frame scores
241 fn initialize_score_deque(
242 &mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64,
243 init_len: usize,
244 ) {
245 for x in 0..init_len {
246 self.run_comparison(
247 frame_set[x].clone(),
248 frame_set[x + 1].clone(),
249 input_frameno + x as u64,
250 );
251 }
252 }
253
254 /// Runs scene change comparison beetween 2 given frames
255 /// Insert result to start of score deque
256 fn run_comparison(
257 &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
258 input_frameno: u64,
259 ) {
260 let mut result = if self.speed_mode == SceneDetectionSpeed::Fast {
261 self.fast_scenecut(frame1, frame2)
262 } else {
263 self.cost_scenecut(frame1, frame2, input_frameno)
264 };
265
266 // Subtract the highest metric value of surrounding frames from the current one
267 // It makes the peaks in the metric more distinct
268 if self.speed_mode != SceneDetectionSpeed::Fast && self.deque_offset > 0 {
269 if input_frameno == 1 {
270 // Accounts for the second frame not having a score to adjust against.
271 // It should always be 0 because the first frame of the video is always a keyframe.
272 result.backward_adjusted_cost = 0.0;
273 } else {
274 let mut adjusted_cost = f64::MAX;
275 for other_cost in
276 self.score_deque.iter().take(self.deque_offset).map(|i| i.inter_cost)
277 {
278 let this_cost = result.inter_cost - other_cost;
279 if this_cost < adjusted_cost {
280 adjusted_cost = this_cost;
281 }
282 if adjusted_cost < 0.0 {
283 adjusted_cost = 0.0;
284 break;
285 }
286 }
287 result.backward_adjusted_cost = adjusted_cost;
288 }
289 if !self.score_deque.is_empty() {
290 for i in 0..(cmp::min(self.deque_offset, self.score_deque.len())) {
291 let adjusted_cost =
292 self.score_deque[i].inter_cost - result.inter_cost;
293 if i == 0
294 || adjusted_cost < self.score_deque[i].forward_adjusted_cost
295 {
296 self.score_deque[i].forward_adjusted_cost = adjusted_cost;
297 }
298 if self.score_deque[i].forward_adjusted_cost < 0.0 {
299 self.score_deque[i].forward_adjusted_cost = 0.0;
300 }
301 }
302 }
303 }
304 self.score_deque.insert(0, result);
305 }
306
307 /// Compares current scene score to adapted threshold based on previous scores
308 /// Value of current frame is offset by lookahead, if lookahead >=5
309 /// Returns true if current scene score is higher than adapted threshold
310 fn adaptive_scenecut(&mut self) -> (bool, ScenecutResult) {
311 let score = self.score_deque[self.deque_offset];
312
313 // We use the importance block algorithm's cost metrics as a secondary algorithm
314 // because, although it struggles in certain scenarios such as
315 // finding the end of a pan, it is very good at detecting hard scenecuts
316 // or detecting if a pan exists.
317 // Because of this, we only consider a frame for a scenechange if
318 // the importance block algorithm is over the threshold either on this frame (hard scenecut)
319 // or within the past few frames (pan). This helps filter out a few false positives
320 // produced by the cost-based algorithm.
321 let imp_block_threshold =
322 IMP_BLOCK_DIFF_THRESHOLD * (self.bit_depth as f64) / 8.0;
323 if !&self.score_deque[self.deque_offset..]
324 .iter()
325 .any(|result| result.imp_block_cost >= imp_block_threshold)
326 {
327 return (false, score);
328 }
329
330 let cost = score.forward_adjusted_cost;
331 if cost >= score.threshold {
332 let back_deque = &self.score_deque[self.deque_offset + 1..];
333 let forward_deque = &self.score_deque[..self.deque_offset];
334 let back_over_tr_count = back_deque
335 .iter()
336 .filter(|result| result.backward_adjusted_cost >= result.threshold)
337 .count();
338 let forward_over_tr_count = forward_deque
339 .iter()
340 .filter(|result| result.forward_adjusted_cost >= result.threshold)
341 .count();
342
343 // Check for scenecut after the flashes
344 // No frames over threshold forward
345 // and some frames over threshold backward
346 let back_count_req = if self.speed_mode == SceneDetectionSpeed::Fast {
347 // Fast scenecut is more sensitive to false flash detection,
348 // so we want more "evidence" of there being a flash before creating a keyframe.
349 2
350 } else {
351 1
352 };
353 if forward_over_tr_count == 0 && back_over_tr_count >= back_count_req {
354 return (true, score);
355 }
356
357 // Check for scenecut before flash
358 // If distance longer than max flash length
359 if back_over_tr_count == 0
360 && forward_over_tr_count == 1
361 && forward_deque[0].forward_adjusted_cost >= forward_deque[0].threshold
362 {
363 return (true, score);
364 }
365
366 if back_over_tr_count != 0 || forward_over_tr_count != 0 {
367 return (false, score);
368 }
369 }
370
371 (cost >= score.threshold, score)
372 }
373}
374
375#[derive(Debug, Clone, Copy)]
376struct ScenecutResult {
377 inter_cost: f64,
378 imp_block_cost: f64,
379 backward_adjusted_cost: f64,
380 forward_adjusted_cost: f64,
381 threshold: f64,
382}
383