| 1 | // Copyright (c) 2018-2022, The rav1e contributors. All rights reserved |
| 2 | // |
| 3 | // This source code is subject to the terms of the BSD 2 Clause License and |
| 4 | // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 5 | // was not distributed with this source code in the LICENSE file, you can |
| 6 | // obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 7 | // Media Patent License 1.0 was not distributed with this source code in the |
| 8 | // PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 9 | |
| 10 | mod fast; |
| 11 | mod standard; |
| 12 | |
| 13 | use crate::api::{EncoderConfig, SceneDetectionSpeed}; |
| 14 | use crate::cpu_features::CpuFeatureLevel; |
| 15 | use crate::encoder::Sequence; |
| 16 | use crate::frame::*; |
| 17 | use crate::me::RefMEStats; |
| 18 | use crate::util::Pixel; |
| 19 | use std::collections::BTreeMap; |
| 20 | use std::num::NonZeroUsize; |
| 21 | use std::sync::Arc; |
| 22 | use std::{cmp, u64}; |
| 23 | |
| 24 | use self::fast::{detect_scale_factor, FAST_THRESHOLD}; |
| 25 | |
| 26 | /// Experiments have determined this to be an optimal threshold |
| 27 | const IMP_BLOCK_DIFF_THRESHOLD: f64 = 7.0; |
| 28 | |
| 29 | /// Fast integer division where divisor is a nonzero power of 2 |
| 30 | #[inline (always)] |
| 31 | pub(crate) fn fast_idiv(n: usize, d: NonZeroUsize) -> usize { |
| 32 | debug_assert!(d.is_power_of_two()); |
| 33 | |
| 34 | n >> d.trailing_zeros() |
| 35 | } |
| 36 | |
| 37 | struct ScaleFunction<T: Pixel> { |
| 38 | downscale_in_place: |
| 39 | fn(/* &self: */ &Plane<T>, /* in_plane: */ &mut Plane<T>), |
| 40 | downscale: fn(/* &self: */ &Plane<T>) -> Plane<T>, |
| 41 | factor: NonZeroUsize, |
| 42 | } |
| 43 | |
| 44 | impl<T: Pixel> ScaleFunction<T> { |
| 45 | fn from_scale<const SCALE: usize>() -> Self { |
| 46 | assert!( |
| 47 | SCALE.is_power_of_two(), |
| 48 | "Scaling factor needs to be a nonzero power of two" |
| 49 | ); |
| 50 | |
| 51 | Self { |
| 52 | downscale: Plane::downscale::<SCALE>, |
| 53 | downscale_in_place: Plane::downscale_in_place::<SCALE>, |
| 54 | factor: NonZeroUsize::new(SCALE).unwrap(), |
| 55 | } |
| 56 | } |
| 57 | } |
| 58 | /// Runs keyframe detection on frames from the lookahead queue. |
| 59 | pub struct SceneChangeDetector<T: Pixel> { |
| 60 | /// Minimum average difference between YUV deltas that will trigger a scene change. |
| 61 | threshold: f64, |
| 62 | /// Fast scene cut detection mode, uses simple SAD instead of encoder cost estimates. |
| 63 | speed_mode: SceneDetectionSpeed, |
| 64 | /// Downscaling function for fast scene detection |
| 65 | scale_func: Option<ScaleFunction<T>>, |
| 66 | /// Frame buffer for scaled frames |
| 67 | downscaled_frame_buffer: Option<[Plane<T>; 2]>, |
| 68 | /// Buffer for FrameMEStats for cost scenecut |
| 69 | frame_me_stats_buffer: Option<RefMEStats>, |
| 70 | /// Deque offset for current |
| 71 | lookahead_offset: usize, |
| 72 | /// Start deque offset based on lookahead |
| 73 | deque_offset: usize, |
| 74 | /// Scenechange results for adaptive threshold |
| 75 | score_deque: Vec<ScenecutResult>, |
| 76 | /// Number of pixels in scaled frame for fast mode |
| 77 | pixels: usize, |
| 78 | /// The bit depth of the video. |
| 79 | bit_depth: usize, |
| 80 | /// The CPU feature level to be used. |
| 81 | cpu_feature_level: CpuFeatureLevel, |
| 82 | encoder_config: EncoderConfig, |
| 83 | sequence: Arc<Sequence>, |
| 84 | /// Calculated intra costs for each input frame. |
| 85 | /// These are cached for reuse later in rav1e. |
| 86 | pub(crate) intra_costs: BTreeMap<u64, Box<[u32]>>, |
| 87 | /// Temporary buffer used by estimate_intra_costs. |
| 88 | pub(crate) temp_plane: Option<Plane<T>>, |
| 89 | } |
| 90 | |
| 91 | impl<T: Pixel> SceneChangeDetector<T> { |
| 92 | pub fn new( |
| 93 | encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel, |
| 94 | lookahead_distance: usize, sequence: Arc<Sequence>, |
| 95 | ) -> Self { |
| 96 | let bit_depth = encoder_config.bit_depth; |
| 97 | let speed_mode = if encoder_config.low_latency { |
| 98 | SceneDetectionSpeed::Fast |
| 99 | } else { |
| 100 | encoder_config.speed_settings.scene_detection_mode |
| 101 | }; |
| 102 | |
| 103 | // Downscaling function for fast scene detection |
| 104 | let scale_func = detect_scale_factor(&sequence, speed_mode); |
| 105 | |
| 106 | // Set lookahead offset to 5 if normal lookahead available |
| 107 | let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 }; |
| 108 | let deque_offset = lookahead_offset; |
| 109 | |
| 110 | let score_deque = Vec::with_capacity(5 + lookahead_distance); |
| 111 | |
| 112 | // Downscaling factor for fast scenedetect (is currently always a power of 2) |
| 113 | let factor = |
| 114 | scale_func.as_ref().map_or(NonZeroUsize::new(1).unwrap(), |x| x.factor); |
| 115 | |
| 116 | let pixels = if speed_mode == SceneDetectionSpeed::Fast { |
| 117 | fast_idiv(sequence.max_frame_height as usize, factor) |
| 118 | * fast_idiv(sequence.max_frame_width as usize, factor) |
| 119 | } else { |
| 120 | 1 |
| 121 | }; |
| 122 | |
| 123 | let threshold = FAST_THRESHOLD * (bit_depth as f64) / 8.0; |
| 124 | |
| 125 | Self { |
| 126 | threshold, |
| 127 | speed_mode, |
| 128 | scale_func, |
| 129 | downscaled_frame_buffer: None, |
| 130 | frame_me_stats_buffer: None, |
| 131 | lookahead_offset, |
| 132 | deque_offset, |
| 133 | score_deque, |
| 134 | pixels, |
| 135 | bit_depth, |
| 136 | cpu_feature_level, |
| 137 | encoder_config, |
| 138 | sequence, |
| 139 | intra_costs: BTreeMap::new(), |
| 140 | temp_plane: None, |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | /// Runs keyframe detection on the next frame in the lookahead queue. |
| 145 | /// |
| 146 | /// This function requires that a subset of input frames |
| 147 | /// is passed to it in order, and that `keyframes` is only |
| 148 | /// updated from this method. `input_frameno` should correspond |
| 149 | /// to the second frame in `frame_set`. |
| 150 | /// |
| 151 | /// This will gracefully handle the first frame in the video as well. |
| 152 | #[profiling::function ] |
| 153 | pub fn analyze_next_frame( |
| 154 | &mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64, |
| 155 | previous_keyframe: u64, |
| 156 | ) -> bool { |
| 157 | // Use score deque for adaptive threshold for scene cut |
| 158 | // Declare score_deque offset based on lookahead for scene change scores |
| 159 | |
| 160 | // Find the distance to the previous keyframe. |
| 161 | let distance = input_frameno - previous_keyframe; |
| 162 | |
| 163 | if frame_set.len() <= self.lookahead_offset { |
| 164 | // Don't insert keyframes in the last few frames of the video |
| 165 | // This is basically a scene flash and a waste of bits |
| 166 | return false; |
| 167 | } |
| 168 | |
| 169 | if self.encoder_config.speed_settings.scene_detection_mode |
| 170 | == SceneDetectionSpeed::None |
| 171 | { |
| 172 | if let Some(true) = self.handle_min_max_intervals(distance) { |
| 173 | return true; |
| 174 | }; |
| 175 | return false; |
| 176 | } |
| 177 | |
| 178 | // Initialization of score deque |
| 179 | // based on frame set length |
| 180 | if self.deque_offset > 0 |
| 181 | && frame_set.len() > self.deque_offset + 1 |
| 182 | && self.score_deque.is_empty() |
| 183 | { |
| 184 | self.initialize_score_deque(frame_set, input_frameno, self.deque_offset); |
| 185 | } else if self.score_deque.is_empty() { |
| 186 | self.initialize_score_deque( |
| 187 | frame_set, |
| 188 | input_frameno, |
| 189 | frame_set.len() - 1, |
| 190 | ); |
| 191 | |
| 192 | self.deque_offset = frame_set.len() - 2; |
| 193 | } |
| 194 | // Running single frame comparison and adding it to deque |
| 195 | // Decrease deque offset if there is no new frames |
| 196 | if frame_set.len() > self.deque_offset + 1 { |
| 197 | self.run_comparison( |
| 198 | frame_set[self.deque_offset].clone(), |
| 199 | frame_set[self.deque_offset + 1].clone(), |
| 200 | input_frameno + self.deque_offset as u64, |
| 201 | ); |
| 202 | } else { |
| 203 | self.deque_offset -= 1; |
| 204 | } |
| 205 | |
| 206 | // Adaptive scenecut check |
| 207 | let (scenecut, score) = self.adaptive_scenecut(); |
| 208 | let scenecut = self.handle_min_max_intervals(distance).unwrap_or(scenecut); |
| 209 | debug!( |
| 210 | "[SC-Detect] Frame {}: Raw= {:5.1} ImpBl= {:5.1} Bwd= {:5.1} Fwd= {:5.1} Th= {:.1} {}" , |
| 211 | input_frameno, |
| 212 | score.inter_cost, |
| 213 | score.imp_block_cost, |
| 214 | score.backward_adjusted_cost, |
| 215 | score.forward_adjusted_cost, |
| 216 | score.threshold, |
| 217 | if scenecut { "Scenecut" } else { "No cut" } |
| 218 | ); |
| 219 | |
| 220 | // Keep score deque of 5 backward frames |
| 221 | // and forward frames of length of lookahead offset |
| 222 | if self.score_deque.len() > 5 + self.lookahead_offset { |
| 223 | self.score_deque.pop(); |
| 224 | } |
| 225 | |
| 226 | scenecut |
| 227 | } |
| 228 | |
| 229 | fn handle_min_max_intervals(&mut self, distance: u64) -> Option<bool> { |
| 230 | // Handle minimum and maximum keyframe intervals. |
| 231 | if distance < self.encoder_config.min_key_frame_interval { |
| 232 | return Some(false); |
| 233 | } |
| 234 | if distance >= self.encoder_config.max_key_frame_interval { |
| 235 | return Some(true); |
| 236 | } |
| 237 | None |
| 238 | } |
| 239 | |
| 240 | // Initially fill score deque with frame scores |
| 241 | fn initialize_score_deque( |
| 242 | &mut self, frame_set: &[&Arc<Frame<T>>], input_frameno: u64, |
| 243 | init_len: usize, |
| 244 | ) { |
| 245 | for x in 0..init_len { |
| 246 | self.run_comparison( |
| 247 | frame_set[x].clone(), |
| 248 | frame_set[x + 1].clone(), |
| 249 | input_frameno + x as u64, |
| 250 | ); |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | /// Runs scene change comparison beetween 2 given frames |
| 255 | /// Insert result to start of score deque |
| 256 | fn run_comparison( |
| 257 | &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>, |
| 258 | input_frameno: u64, |
| 259 | ) { |
| 260 | let mut result = if self.speed_mode == SceneDetectionSpeed::Fast { |
| 261 | self.fast_scenecut(frame1, frame2) |
| 262 | } else { |
| 263 | self.cost_scenecut(frame1, frame2, input_frameno) |
| 264 | }; |
| 265 | |
| 266 | // Subtract the highest metric value of surrounding frames from the current one |
| 267 | // It makes the peaks in the metric more distinct |
| 268 | if self.speed_mode != SceneDetectionSpeed::Fast && self.deque_offset > 0 { |
| 269 | if input_frameno == 1 { |
| 270 | // Accounts for the second frame not having a score to adjust against. |
| 271 | // It should always be 0 because the first frame of the video is always a keyframe. |
| 272 | result.backward_adjusted_cost = 0.0; |
| 273 | } else { |
| 274 | let mut adjusted_cost = f64::MAX; |
| 275 | for other_cost in |
| 276 | self.score_deque.iter().take(self.deque_offset).map(|i| i.inter_cost) |
| 277 | { |
| 278 | let this_cost = result.inter_cost - other_cost; |
| 279 | if this_cost < adjusted_cost { |
| 280 | adjusted_cost = this_cost; |
| 281 | } |
| 282 | if adjusted_cost < 0.0 { |
| 283 | adjusted_cost = 0.0; |
| 284 | break; |
| 285 | } |
| 286 | } |
| 287 | result.backward_adjusted_cost = adjusted_cost; |
| 288 | } |
| 289 | if !self.score_deque.is_empty() { |
| 290 | for i in 0..(cmp::min(self.deque_offset, self.score_deque.len())) { |
| 291 | let adjusted_cost = |
| 292 | self.score_deque[i].inter_cost - result.inter_cost; |
| 293 | if i == 0 |
| 294 | || adjusted_cost < self.score_deque[i].forward_adjusted_cost |
| 295 | { |
| 296 | self.score_deque[i].forward_adjusted_cost = adjusted_cost; |
| 297 | } |
| 298 | if self.score_deque[i].forward_adjusted_cost < 0.0 { |
| 299 | self.score_deque[i].forward_adjusted_cost = 0.0; |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | } |
| 304 | self.score_deque.insert(0, result); |
| 305 | } |
| 306 | |
| 307 | /// Compares current scene score to adapted threshold based on previous scores |
| 308 | /// Value of current frame is offset by lookahead, if lookahead >=5 |
| 309 | /// Returns true if current scene score is higher than adapted threshold |
| 310 | fn adaptive_scenecut(&mut self) -> (bool, ScenecutResult) { |
| 311 | let score = self.score_deque[self.deque_offset]; |
| 312 | |
| 313 | // We use the importance block algorithm's cost metrics as a secondary algorithm |
| 314 | // because, although it struggles in certain scenarios such as |
| 315 | // finding the end of a pan, it is very good at detecting hard scenecuts |
| 316 | // or detecting if a pan exists. |
| 317 | // Because of this, we only consider a frame for a scenechange if |
| 318 | // the importance block algorithm is over the threshold either on this frame (hard scenecut) |
| 319 | // or within the past few frames (pan). This helps filter out a few false positives |
| 320 | // produced by the cost-based algorithm. |
| 321 | let imp_block_threshold = |
| 322 | IMP_BLOCK_DIFF_THRESHOLD * (self.bit_depth as f64) / 8.0; |
| 323 | if !&self.score_deque[self.deque_offset..] |
| 324 | .iter() |
| 325 | .any(|result| result.imp_block_cost >= imp_block_threshold) |
| 326 | { |
| 327 | return (false, score); |
| 328 | } |
| 329 | |
| 330 | let cost = score.forward_adjusted_cost; |
| 331 | if cost >= score.threshold { |
| 332 | let back_deque = &self.score_deque[self.deque_offset + 1..]; |
| 333 | let forward_deque = &self.score_deque[..self.deque_offset]; |
| 334 | let back_over_tr_count = back_deque |
| 335 | .iter() |
| 336 | .filter(|result| result.backward_adjusted_cost >= result.threshold) |
| 337 | .count(); |
| 338 | let forward_over_tr_count = forward_deque |
| 339 | .iter() |
| 340 | .filter(|result| result.forward_adjusted_cost >= result.threshold) |
| 341 | .count(); |
| 342 | |
| 343 | // Check for scenecut after the flashes |
| 344 | // No frames over threshold forward |
| 345 | // and some frames over threshold backward |
| 346 | let back_count_req = if self.speed_mode == SceneDetectionSpeed::Fast { |
| 347 | // Fast scenecut is more sensitive to false flash detection, |
| 348 | // so we want more "evidence" of there being a flash before creating a keyframe. |
| 349 | 2 |
| 350 | } else { |
| 351 | 1 |
| 352 | }; |
| 353 | if forward_over_tr_count == 0 && back_over_tr_count >= back_count_req { |
| 354 | return (true, score); |
| 355 | } |
| 356 | |
| 357 | // Check for scenecut before flash |
| 358 | // If distance longer than max flash length |
| 359 | if back_over_tr_count == 0 |
| 360 | && forward_over_tr_count == 1 |
| 361 | && forward_deque[0].forward_adjusted_cost >= forward_deque[0].threshold |
| 362 | { |
| 363 | return (true, score); |
| 364 | } |
| 365 | |
| 366 | if back_over_tr_count != 0 || forward_over_tr_count != 0 { |
| 367 | return (false, score); |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | (cost >= score.threshold, score) |
| 372 | } |
| 373 | } |
| 374 | |
| 375 | #[derive (Debug, Clone, Copy)] |
| 376 | struct ScenecutResult { |
| 377 | inter_cost: f64, |
| 378 | imp_block_cost: f64, |
| 379 | backward_adjusted_cost: f64, |
| 380 | forward_adjusted_cost: f64, |
| 381 | threshold: f64, |
| 382 | } |
| 383 | |