1 | // Copyright (c) 2018-2022, The rav1e contributors. All rights reserved |
2 | // |
3 | // This source code is subject to the terms of the BSD 2 Clause License and |
4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
5 | // was not distributed with this source code in the LICENSE file, you can |
6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
7 | // Media Patent License 1.0 was not distributed with this source code in the |
8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
9 | |
10 | mod fast; |
11 | mod standard; |
12 | |
13 | use crate::api::{EncoderConfig, SceneDetectionSpeed}; |
14 | use crate::cpu_features::CpuFeatureLevel; |
15 | use crate::encoder::Sequence; |
16 | use crate::frame::*; |
17 | use crate::me::RefMEStats; |
18 | use crate::util::Pixel; |
19 | use std::collections::BTreeMap; |
20 | use std::num::NonZeroUsize; |
21 | use std::sync::Arc; |
22 | use std::{cmp, u64}; |
23 | |
24 | use self::fast::{detect_scale_factor, FAST_THRESHOLD}; |
25 | |
26 | /// Experiments have determined this to be an optimal threshold |
27 | const IMP_BLOCK_DIFF_THRESHOLD: f64 = 7.0; |
28 | |
29 | /// Fast integer division where divisor is a nonzero power of 2 |
30 | #[inline (always)] |
31 | pub(crate) fn fast_idiv(n: usize, d: NonZeroUsize) -> usize { |
32 | debug_assert!(d.is_power_of_two()); |
33 | |
34 | n >> d.trailing_zeros() |
35 | } |
36 | |
37 | struct ScaleFunction<T: Pixel> { |
38 | downscale_in_place: |
39 | fn(/* &self: */ &Plane<T>, /* in_plane: */ &mut Plane<T>), |
40 | downscale: fn(/* &self: */ &Plane<T>) -> Plane<T>, |
41 | factor: NonZeroUsize, |
42 | } |
43 | |
44 | impl<T: Pixel> ScaleFunction<T> { |
45 | fn from_scale<const SCALE: usize>() -> Self { |
46 | assert!( |
47 | SCALE.is_power_of_two(), |
48 | "Scaling factor needs to be a nonzero power of two" |
49 | ); |
50 | |
51 | Self { |
52 | downscale: Plane::downscale::<SCALE>, |
53 | downscale_in_place: Plane::downscale_in_place::<SCALE>, |
54 | factor: NonZeroUsize::new(SCALE).unwrap(), |
55 | } |
56 | } |
57 | } |
58 | /// Runs keyframe detection on frames from the lookahead queue. |
59 | pub struct SceneChangeDetector<T: Pixel> { |
60 | /// Minimum average difference between YUV deltas that will trigger a scene change. |
61 | threshold: f64, |
62 | /// Fast scene cut detection mode, uses simple SAD instead of encoder cost estimates. |
63 | speed_mode: SceneDetectionSpeed, |
64 | /// Downscaling function for fast scene detection |
65 | scale_func: Option<ScaleFunction<T>>, |
66 | /// Frame buffer for scaled frames |
67 | downscaled_frame_buffer: Option<[Plane<T>; 2]>, |
68 | /// Buffer for FrameMEStats for cost scenecut |
69 | frame_me_stats_buffer: Option<RefMEStats>, |
70 | /// Deque offset for current |
71 | lookahead_offset: usize, |
72 | /// Start deque offset based on lookahead |
73 | deque_offset: usize, |
74 | /// Scenechange results for adaptive threshold |
75 | score_deque: Vec<ScenecutResult>, |
76 | /// Number of pixels in scaled frame for fast mode |
77 | pixels: usize, |
78 | /// The bit depth of the video. |
79 | bit_depth: usize, |
80 | /// The CPU feature level to be used. |
81 | cpu_feature_level: CpuFeatureLevel, |
82 | encoder_config: EncoderConfig, |
83 | sequence: Arc<Sequence>, |
84 | /// Calculated intra costs for each input frame. |
85 | /// These are cached for reuse later in rav1e. |
86 | pub(crate) intra_costs: BTreeMap<u64, Box<[u32]>>, |
87 | /// Temporary buffer used by estimate_intra_costs. |
88 | pub(crate) temp_plane: Option<Plane<T>>, |
89 | } |
90 | |
91 | impl<T: Pixel> SceneChangeDetector<T> { |
92 | pub fn new( |
93 | encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel, |
94 | lookahead_distance: usize, sequence: Arc<Sequence>, |
95 | ) -> Self { |
96 | let bit_depth = encoder_config.bit_depth; |
97 | let speed_mode = if encoder_config.low_latency { |
98 | SceneDetectionSpeed::Fast |
99 | } else { |
100 | encoder_config.speed_settings.scene_detection_mode |
101 | }; |
102 | |
103 | // Downscaling function for fast scene detection |
104 | let scale_func = detect_scale_factor(&sequence, speed_mode); |
105 | |
106 | // Set lookahead offset to 5 if normal lookahead available |
107 | let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 }; |
108 | let deque_offset = lookahead_offset; |
109 | |
110 | let score_deque = Vec::with_capacity(5 + lookahead_distance); |
111 | |
112 | // Downscaling factor for fast scenedetect (is currently always a power of 2) |
113 | let factor = |
114 | scale_func.as_ref().map_or(NonZeroUsize::new(1).unwrap(), |x| x.factor); |
115 | |
116 | let pixels = if speed_mode == SceneDetectionSpeed::Fast { |
117 | fast_idiv(sequence.max_frame_height as usize, factor) |
118 | * fast_idiv(sequence.max_frame_width as usize, factor) |
119 | } else { |
120 | 1 |
121 | }; |
122 | |
123 | let threshold = FAST_THRESHOLD * (bit_depth as f64) / 8.0; |
124 | |
125 | Self { |
126 | threshold, |
127 | speed_mode, |
128 | scale_func, |
129 | downscaled_frame_buffer: None, |
130 | frame_me_stats_buffer: None, |
131 | lookahead_offset, |
132 | deque_offset, |
133 | score_deque, |
134 | pixels, |
135 | bit_depth, |
136 | cpu_feature_level, |
137 | encoder_config, |
138 | sequence, |
139 | intra_costs: BTreeMap::new(), |
140 | temp_plane: None, |
141 | } |
142 | } |
143 | |
144 | /// Runs keyframe detection on the next frame in the lookahead queue. |
145 | /// |
146 | /// This function requires that a subset of input frames |
147 | /// is passed to it in order, and that `keyframes` is only |
148 | /// updated from this method. `input_frameno` should correspond |
149 | /// to the second frame in `frame_set`. |
150 | /// |
151 | /// This will gracefully handle the first frame in the video as well. |
152 | #[profiling::function ] |
153 | pub fn analyze_next_frame( |
154 | &mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64, |
155 | previous_keyframe: u64, |
156 | ) -> bool { |
157 | // Use score deque for adaptive threshold for scene cut |
158 | // Declare score_deque offset based on lookahead for scene change scores |
159 | |
160 | // Find the distance to the previous keyframe. |
161 | let distance = input_frameno - previous_keyframe; |
162 | |
163 | if frame_set.len() <= self.lookahead_offset { |
164 | // Don't insert keyframes in the last few frames of the video |
165 | // This is basically a scene flash and a waste of bits |
166 | return false; |
167 | } |
168 | |
169 | if self.encoder_config.speed_settings.scene_detection_mode |
170 | == SceneDetectionSpeed::None |
171 | { |
172 | if let Some(true) = self.handle_min_max_intervals(distance) { |
173 | return true; |
174 | }; |
175 | return false; |
176 | } |
177 | |
178 | // Initialization of score deque |
179 | // based on frame set length |
180 | if self.deque_offset > 0 |
181 | && frame_set.len() > self.deque_offset + 1 |
182 | && self.score_deque.is_empty() |
183 | { |
184 | self.initialize_score_deque(frame_set, input_frameno, self.deque_offset); |
185 | } else if self.score_deque.is_empty() { |
186 | self.initialize_score_deque( |
187 | frame_set, |
188 | input_frameno, |
189 | frame_set.len() - 1, |
190 | ); |
191 | |
192 | self.deque_offset = frame_set.len() - 2; |
193 | } |
194 | // Running single frame comparison and adding it to deque |
195 | // Decrease deque offset if there is no new frames |
196 | if frame_set.len() > self.deque_offset + 1 { |
197 | self.run_comparison( |
198 | frame_set[self.deque_offset].clone(), |
199 | frame_set[self.deque_offset + 1].clone(), |
200 | input_frameno + self.deque_offset as u64, |
201 | ); |
202 | } else { |
203 | self.deque_offset -= 1; |
204 | } |
205 | |
206 | // Adaptive scenecut check |
207 | let (scenecut, score) = self.adaptive_scenecut(); |
208 | let scenecut = self.handle_min_max_intervals(distance).unwrap_or(scenecut); |
209 | debug!( |
210 | "[SC-Detect] Frame {}: Raw= {:5.1} ImpBl= {:5.1} Bwd= {:5.1} Fwd= {:5.1} Th= {:.1} {}" , |
211 | input_frameno, |
212 | score.inter_cost, |
213 | score.imp_block_cost, |
214 | score.backward_adjusted_cost, |
215 | score.forward_adjusted_cost, |
216 | score.threshold, |
217 | if scenecut { "Scenecut" } else { "No cut" } |
218 | ); |
219 | |
220 | // Keep score deque of 5 backward frames |
221 | // and forward frames of length of lookahead offset |
222 | if self.score_deque.len() > 5 + self.lookahead_offset { |
223 | self.score_deque.pop(); |
224 | } |
225 | |
226 | scenecut |
227 | } |
228 | |
229 | fn handle_min_max_intervals(&mut self, distance: u64) -> Option<bool> { |
230 | // Handle minimum and maximum keyframe intervals. |
231 | if distance < self.encoder_config.min_key_frame_interval { |
232 | return Some(false); |
233 | } |
234 | if distance >= self.encoder_config.max_key_frame_interval { |
235 | return Some(true); |
236 | } |
237 | None |
238 | } |
239 | |
240 | // Initially fill score deque with frame scores |
241 | fn initialize_score_deque( |
242 | &mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64, |
243 | init_len: usize, |
244 | ) { |
245 | for x in 0..init_len { |
246 | self.run_comparison( |
247 | frame_set[x].clone(), |
248 | frame_set[x + 1].clone(), |
249 | input_frameno + x as u64, |
250 | ); |
251 | } |
252 | } |
253 | |
254 | /// Runs scene change comparison beetween 2 given frames |
255 | /// Insert result to start of score deque |
256 | fn run_comparison( |
257 | &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>, |
258 | input_frameno: u64, |
259 | ) { |
260 | let mut result = if self.speed_mode == SceneDetectionSpeed::Fast { |
261 | self.fast_scenecut(frame1, frame2) |
262 | } else { |
263 | self.cost_scenecut(frame1, frame2, input_frameno) |
264 | }; |
265 | |
266 | // Subtract the highest metric value of surrounding frames from the current one |
267 | // It makes the peaks in the metric more distinct |
268 | if self.speed_mode != SceneDetectionSpeed::Fast && self.deque_offset > 0 { |
269 | if input_frameno == 1 { |
270 | // Accounts for the second frame not having a score to adjust against. |
271 | // It should always be 0 because the first frame of the video is always a keyframe. |
272 | result.backward_adjusted_cost = 0.0; |
273 | } else { |
274 | let mut adjusted_cost = f64::MAX; |
275 | for other_cost in |
276 | self.score_deque.iter().take(self.deque_offset).map(|i| i.inter_cost) |
277 | { |
278 | let this_cost = result.inter_cost - other_cost; |
279 | if this_cost < adjusted_cost { |
280 | adjusted_cost = this_cost; |
281 | } |
282 | if adjusted_cost < 0.0 { |
283 | adjusted_cost = 0.0; |
284 | break; |
285 | } |
286 | } |
287 | result.backward_adjusted_cost = adjusted_cost; |
288 | } |
289 | if !self.score_deque.is_empty() { |
290 | for i in 0..(cmp::min(self.deque_offset, self.score_deque.len())) { |
291 | let adjusted_cost = |
292 | self.score_deque[i].inter_cost - result.inter_cost; |
293 | if i == 0 |
294 | || adjusted_cost < self.score_deque[i].forward_adjusted_cost |
295 | { |
296 | self.score_deque[i].forward_adjusted_cost = adjusted_cost; |
297 | } |
298 | if self.score_deque[i].forward_adjusted_cost < 0.0 { |
299 | self.score_deque[i].forward_adjusted_cost = 0.0; |
300 | } |
301 | } |
302 | } |
303 | } |
304 | self.score_deque.insert(0, result); |
305 | } |
306 | |
307 | /// Compares current scene score to adapted threshold based on previous scores |
308 | /// Value of current frame is offset by lookahead, if lookahead >=5 |
309 | /// Returns true if current scene score is higher than adapted threshold |
310 | fn adaptive_scenecut(&mut self) -> (bool, ScenecutResult) { |
311 | let score = self.score_deque[self.deque_offset]; |
312 | |
313 | // We use the importance block algorithm's cost metrics as a secondary algorithm |
314 | // because, although it struggles in certain scenarios such as |
315 | // finding the end of a pan, it is very good at detecting hard scenecuts |
316 | // or detecting if a pan exists. |
317 | // Because of this, we only consider a frame for a scenechange if |
318 | // the importance block algorithm is over the threshold either on this frame (hard scenecut) |
319 | // or within the past few frames (pan). This helps filter out a few false positives |
320 | // produced by the cost-based algorithm. |
321 | let imp_block_threshold = |
322 | IMP_BLOCK_DIFF_THRESHOLD * (self.bit_depth as f64) / 8.0; |
323 | if !&self.score_deque[self.deque_offset..] |
324 | .iter() |
325 | .any(|result| result.imp_block_cost >= imp_block_threshold) |
326 | { |
327 | return (false, score); |
328 | } |
329 | |
330 | let cost = score.forward_adjusted_cost; |
331 | if cost >= score.threshold { |
332 | let back_deque = &self.score_deque[self.deque_offset + 1..]; |
333 | let forward_deque = &self.score_deque[..self.deque_offset]; |
334 | let back_over_tr_count = back_deque |
335 | .iter() |
336 | .filter(|result| result.backward_adjusted_cost >= result.threshold) |
337 | .count(); |
338 | let forward_over_tr_count = forward_deque |
339 | .iter() |
340 | .filter(|result| result.forward_adjusted_cost >= result.threshold) |
341 | .count(); |
342 | |
343 | // Check for scenecut after the flashes |
344 | // No frames over threshold forward |
345 | // and some frames over threshold backward |
346 | let back_count_req = if self.speed_mode == SceneDetectionSpeed::Fast { |
347 | // Fast scenecut is more sensitive to false flash detection, |
348 | // so we want more "evidence" of there being a flash before creating a keyframe. |
349 | 2 |
350 | } else { |
351 | 1 |
352 | }; |
353 | if forward_over_tr_count == 0 && back_over_tr_count >= back_count_req { |
354 | return (true, score); |
355 | } |
356 | |
357 | // Check for scenecut before flash |
358 | // If distance longer than max flash length |
359 | if back_over_tr_count == 0 |
360 | && forward_over_tr_count == 1 |
361 | && forward_deque[0].forward_adjusted_cost >= forward_deque[0].threshold |
362 | { |
363 | return (true, score); |
364 | } |
365 | |
366 | if back_over_tr_count != 0 || forward_over_tr_count != 0 { |
367 | return (false, score); |
368 | } |
369 | } |
370 | |
371 | (cost >= score.threshold, score) |
372 | } |
373 | } |
374 | |
375 | #[derive (Debug, Clone, Copy)] |
376 | struct ScenecutResult { |
377 | inter_cost: f64, |
378 | imp_block_cost: f64, |
379 | backward_adjusted_cost: f64, |
380 | forward_adjusted_cost: f64, |
381 | threshold: f64, |
382 | } |
383 | |