| 1 | // Copyright (C) 2024 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | #include "qffmpegaudioencoder_p.h" |
| 4 | #include "qffmpegrecordingengineutils_p.h" |
| 5 | #include "qffmpegaudioencoderutils_p.h" |
| 6 | #include "qffmpegaudioinput_p.h" |
| 7 | #include "qffmpegencoderoptions_p.h" |
| 8 | #include "qffmpegmuxer_p.h" |
| 9 | #include "qffmpegrecordingengine_p.h" |
| 10 | #include "qffmpegmediaformatinfo_p.h" |
| 11 | #include "qffmpegcodecstorage_p.h" |
| 12 | #include <QtCore/qloggingcategory.h> |
| 13 | |
| 14 | QT_BEGIN_NAMESPACE |
| 15 | |
| 16 | namespace QFFmpeg { |
| 17 | |
| 18 | static Q_LOGGING_CATEGORY(qLcFFmpegAudioEncoder, "qt.multimedia.ffmpeg.audioencoder" ); |
| 19 | |
| 20 | namespace { |
| 21 | void setupStreamParameters(AVStream *stream, const Codec &codec, |
| 22 | const AVAudioFormat &requestedAudioFormat) |
| 23 | { |
| 24 | const auto channelLayouts = codec.channelLayouts(); |
| 25 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
| 26 | stream->codecpar->ch_layout = |
| 27 | adjustChannelLayout(channelLayouts, requestedAudioFormat.channelLayout); |
| 28 | #else |
| 29 | stream->codecpar->channel_layout = |
| 30 | adjustChannelLayout(supportedLayouts: channelLayouts, requested: requestedAudioFormat.channelLayoutMask); |
| 31 | stream->codecpar->channels = qPopulationCount(v: stream->codecpar->channel_layout); |
| 32 | #endif |
| 33 | const auto sampleRates = codec.sampleRates(); |
| 34 | const auto sampleRate = adjustSampleRate(supportedRates: sampleRates, requested: requestedAudioFormat.sampleRate); |
| 35 | |
| 36 | stream->codecpar->sample_rate = sampleRate; |
| 37 | stream->codecpar->frame_size = 1024; |
| 38 | const auto sampleFormats = codec.sampleFormats(); |
| 39 | stream->codecpar->format = adjustSampleFormat(supportedFormats: sampleFormats, requested: requestedAudioFormat.sampleFormat); |
| 40 | |
| 41 | stream->time_base = AVRational{ .num: 1, .den: sampleRate }; |
| 42 | |
| 43 | qCDebug(qLcFFmpegAudioEncoder) |
| 44 | << "set stream time_base" << stream->time_base.num << "/" << stream->time_base.den; |
| 45 | } |
| 46 | |
| 47 | bool openCodecContext(AVCodecContext *codecContext, AVStream *stream, |
| 48 | const QMediaEncoderSettings &settings) |
| 49 | { |
| 50 | Q_ASSERT(codecContext); |
| 51 | codecContext->time_base = stream->time_base; |
| 52 | |
| 53 | avcodec_parameters_to_context(codec: codecContext, par: stream->codecpar); |
| 54 | |
| 55 | // if avcodec_open2 fails, it may clean codecContext->codec |
| 56 | Codec codec{ codecContext->codec }; |
| 57 | |
| 58 | AVDictionaryHolder opts; |
| 59 | applyAudioEncoderOptions(settings, codecName: QByteArray{ codec.name() }, codec: codecContext, opts); |
| 60 | applyExperimentalCodecOptions(codec, opts); |
| 61 | |
| 62 | const int res = avcodec_open2(avctx: codecContext, codec: codec.get(), options: opts); |
| 63 | |
| 64 | if (res != 0) { |
| 65 | qCWarning(qLcFFmpegAudioEncoder) |
| 66 | << "Cannot open audio codec" << codec.name() << "; result:" << err2str(errnum: res); |
| 67 | return false; |
| 68 | } |
| 69 | |
| 70 | qCDebug(qLcFFmpegAudioEncoder) << "audio codec params: fmt=" << codecContext->sample_fmt |
| 71 | << "rate=" << codecContext->sample_rate; |
| 72 | |
| 73 | avcodec_parameters_from_context(par: stream->codecpar, codec: codecContext); |
| 74 | |
| 75 | return true; |
| 76 | } |
| 77 | |
| 78 | } // namespace |
| 79 | |
| 80 | AudioEncoder::AudioEncoder(RecordingEngine &recordingEngine, const QAudioFormat &sourceFormat, |
| 81 | const QMediaEncoderSettings &settings) |
| 82 | : EncoderThread(recordingEngine), m_sourceFormat(sourceFormat), m_settings(settings) |
| 83 | { |
| 84 | setObjectName(QLatin1String("AudioEncoder" )); |
| 85 | qCDebug(qLcFFmpegAudioEncoder) << "AudioEncoder" << settings.audioCodec(); |
| 86 | |
| 87 | const AVCodecID codecID = QFFmpegMediaFormatInfo::codecIdForAudioCodec(codec: settings.audioCodec()); |
| 88 | Q_ASSERT(avformat_query_codec(recordingEngine.avFormatContext()->oformat, codecID, |
| 89 | FF_COMPLIANCE_NORMAL)); |
| 90 | |
| 91 | Q_ASSERT(QFFmpeg::findAVEncoder(codecID)); |
| 92 | |
| 93 | m_stream = avformat_new_stream(s: recordingEngine.avFormatContext(), c: nullptr); |
| 94 | m_stream->id = recordingEngine.avFormatContext()->nb_streams - 1; |
| 95 | m_stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; |
| 96 | m_stream->codecpar->codec_id = codecID; |
| 97 | } |
| 98 | |
| 99 | void AudioEncoder::addBuffer(const QAudioBuffer &buffer) |
| 100 | { |
| 101 | if (!buffer.isValid()) { |
| 102 | setEndOfSourceStream(); |
| 103 | return; |
| 104 | } |
| 105 | |
| 106 | { |
| 107 | const std::chrono::microseconds bufferDuration(buffer.duration()); |
| 108 | auto guard = lockLoopData(); |
| 109 | |
| 110 | resetEndOfSourceStream(); |
| 111 | |
| 112 | if (m_paused) |
| 113 | return; |
| 114 | |
| 115 | // TODO: apply logic with canPushFrame |
| 116 | |
| 117 | m_audioBufferQueue.push(x: buffer); |
| 118 | m_queueDuration += bufferDuration; |
| 119 | } |
| 120 | |
| 121 | dataReady(); |
| 122 | } |
| 123 | |
| 124 | QAudioBuffer AudioEncoder::takeBuffer() |
| 125 | { |
| 126 | auto locker = lockLoopData(); |
| 127 | QAudioBuffer result = dequeueIfPossible(queue&: m_audioBufferQueue); |
| 128 | m_queueDuration -= std::chrono::microseconds(result.duration()); |
| 129 | return result; |
| 130 | } |
| 131 | |
| 132 | bool AudioEncoder::init() |
| 133 | { |
| 134 | const AVAudioFormat requestedAudioFormat(m_sourceFormat); |
| 135 | |
| 136 | QFFmpeg::findAndOpenAVEncoder( |
| 137 | codecId: m_stream->codecpar->codec_id, |
| 138 | scoresGetter: [&](const Codec &codec) { |
| 139 | AVScore result = DefaultAVScore; |
| 140 | |
| 141 | // Attempt to find no-conversion format |
| 142 | if (auto fmts = codec.sampleFormats(); !fmts.empty()) |
| 143 | result += hasValue(range: fmts, value: requestedAudioFormat.sampleFormat) ? 1 : -1; |
| 144 | |
| 145 | if (auto rates = codec.sampleRates(); !rates.empty()) |
| 146 | result += hasValue(range: rates, value: requestedAudioFormat.sampleRate) ? 1 : -1; |
| 147 | |
| 148 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
| 149 | if (auto layouts = codec.channelLayouts(); !layouts.empty()) |
| 150 | result += hasValue(layouts, requestedAudioFormat.channelLayout) ? 1 : -1; |
| 151 | #else |
| 152 | if (auto layouts = codec.channelLayouts(); !layouts.empty()) |
| 153 | result += hasValue(range: layouts, value: requestedAudioFormat.channelLayoutMask) ? 1 : -1; |
| 154 | #endif |
| 155 | |
| 156 | return result; |
| 157 | }, |
| 158 | codecOpener: [&](const Codec &codec) { |
| 159 | AVCodecContextUPtr codecContext(avcodec_alloc_context3(codec: codec.get())); |
| 160 | if (!codecContext) |
| 161 | return false; |
| 162 | |
| 163 | setupStreamParameters(stream: m_stream, codec: Codec{ codecContext->codec }, requestedAudioFormat); |
| 164 | if (!openCodecContext(codecContext: codecContext.get(), stream: m_stream, settings: m_settings)) |
| 165 | return false; |
| 166 | |
| 167 | m_codecContext = std::move(codecContext); |
| 168 | return true; |
| 169 | }); |
| 170 | |
| 171 | if (!m_codecContext) { |
| 172 | qCWarning(qLcFFmpegAudioEncoder) << "Unable to open any audio codec" ; |
| 173 | emit m_recordingEngine.sessionError(code: QMediaRecorder::FormatError, |
| 174 | QStringLiteral("Cannot open any audio codec" )); |
| 175 | return false; |
| 176 | } |
| 177 | |
| 178 | qCDebug(qLcFFmpegAudioEncoder) << "found audio codec" << m_codecContext->codec->name; |
| 179 | |
| 180 | updateResampler(sourceFormat: m_sourceFormat); |
| 181 | |
| 182 | // TODO: try to address this dependency here. |
| 183 | if (auto input = qobject_cast<QFFmpegAudioInput *>(object: source())) |
| 184 | input->setBufferSize(m_codecContext->frame_size); |
| 185 | |
| 186 | return EncoderThread::init(); |
| 187 | } |
| 188 | |
| 189 | void AudioEncoder::cleanup() |
| 190 | { |
| 191 | while (!m_audioBufferQueue.empty()) |
| 192 | processOne(); |
| 193 | |
| 194 | if (m_avFrameSamplesOffset) { |
| 195 | // the size of the last frame can be less than m_codecContext->frame_size |
| 196 | |
| 197 | retrievePackets(); |
| 198 | sendPendingFrameToAVCodec(); |
| 199 | } |
| 200 | |
| 201 | while (avcodec_send_frame(avctx: m_codecContext.get(), frame: nullptr) == AVERROR(EAGAIN)) |
| 202 | retrievePackets(); |
| 203 | retrievePackets(); |
| 204 | } |
| 205 | |
| 206 | bool AudioEncoder::hasData() const |
| 207 | { |
| 208 | return !m_audioBufferQueue.empty(); |
| 209 | } |
| 210 | |
| 211 | void AudioEncoder::retrievePackets() |
| 212 | { |
| 213 | while (true) { |
| 214 | AVPacketUPtr packet(av_packet_alloc()); |
| 215 | int ret = avcodec_receive_packet(avctx: m_codecContext.get(), avpkt: packet.get()); |
| 216 | if (ret < 0) { |
| 217 | if (ret != AVERROR(EOF)) |
| 218 | break; |
| 219 | if (ret != AVERROR(EAGAIN)) { |
| 220 | char errStr[1024]; |
| 221 | av_strerror(errnum: ret, errbuf: errStr, errbuf_size: 1024); |
| 222 | qCDebug(qLcFFmpegAudioEncoder) << "receive packet" << ret << errStr; |
| 223 | } |
| 224 | break; |
| 225 | } |
| 226 | |
| 227 | // qCDebug(qLcFFmpegEncoder) << "writing audio packet" << packet->size << packet->pts << |
| 228 | // packet->dts; |
| 229 | packet->stream_index = m_stream->id; |
| 230 | m_recordingEngine.getMuxer()->addPacket(packet: std::move(packet)); |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | void AudioEncoder::processOne() |
| 235 | { |
| 236 | QAudioBuffer buffer = takeBuffer(); |
| 237 | Q_ASSERT(buffer.isValid()); |
| 238 | |
| 239 | // qCDebug(qLcFFmpegEncoder) << "new audio buffer" << buffer.byteCount() << buffer.format() |
| 240 | // << buffer.frameCount() << codec->frame_size; |
| 241 | |
| 242 | if (buffer.format() != m_sourceFormat && !updateResampler(sourceFormat: buffer.format())) |
| 243 | return; |
| 244 | |
| 245 | int samplesOffset = 0; |
| 246 | const int bufferSamplesCount = static_cast<int>(buffer.frameCount()); |
| 247 | |
| 248 | while (samplesOffset < bufferSamplesCount) |
| 249 | handleAudioData(data: buffer.constData<uint8_t>(), samplesOffset, samplesCount: bufferSamplesCount); |
| 250 | |
| 251 | Q_ASSERT(samplesOffset == bufferSamplesCount); |
| 252 | } |
| 253 | |
| 254 | bool AudioEncoder::checkIfCanPushFrame() const |
| 255 | { |
| 256 | if (m_encodingStarted) |
| 257 | return m_audioBufferQueue.size() <= 1 || m_queueDuration < m_maxQueueDuration; |
| 258 | if (!isFinished()) |
| 259 | return m_audioBufferQueue.empty(); |
| 260 | |
| 261 | return false; |
| 262 | } |
| 263 | |
| 264 | bool AudioEncoder::updateResampler(const QAudioFormat &sourceFormat) |
| 265 | { |
| 266 | m_resampler.reset(); |
| 267 | |
| 268 | const AVAudioFormat requestedAudioFormat(sourceFormat); |
| 269 | const AVAudioFormat codecAudioFormat(m_codecContext.get()); |
| 270 | |
| 271 | if (requestedAudioFormat != codecAudioFormat) { |
| 272 | m_resampler = createResampleContext(inputFormat: requestedAudioFormat, outputFormat: codecAudioFormat); |
| 273 | if (!swr_is_initialized(s: m_resampler.get())) { |
| 274 | m_sourceFormat = {}; |
| 275 | qCWarning(qLcFFmpegAudioEncoder) << "Cannot initialize resampler for audio encoder" ; |
| 276 | emit m_recordingEngine.sessionError( |
| 277 | code: QMediaRecorder::FormatError, |
| 278 | QStringLiteral("Cannot initialize resampler for audio encoder" )); |
| 279 | return false; |
| 280 | } |
| 281 | qCDebug(qLcFFmpegAudioEncoder) << "Created resampler with audio formats conversion\n" |
| 282 | << requestedAudioFormat << "->" << codecAudioFormat; |
| 283 | } else { |
| 284 | qCDebug(qLcFFmpegAudioEncoder) << "Resampler is not needed due to no-conversion format\n" |
| 285 | << requestedAudioFormat; |
| 286 | } |
| 287 | |
| 288 | m_sourceFormat = sourceFormat; |
| 289 | |
| 290 | return true; |
| 291 | } |
| 292 | |
| 293 | void AudioEncoder::ensurePendingFrame(int availableSamplesCount) |
| 294 | { |
| 295 | Q_ASSERT(availableSamplesCount >= 0); |
| 296 | |
| 297 | if (m_avFrame) |
| 298 | return; |
| 299 | |
| 300 | m_avFrame = makeAVFrame(); |
| 301 | |
| 302 | m_avFrame->format = m_codecContext->sample_fmt; |
| 303 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
| 304 | m_avFrame->ch_layout = m_codecContext->ch_layout; |
| 305 | #else |
| 306 | m_avFrame->channel_layout = m_codecContext->channel_layout; |
| 307 | m_avFrame->channels = m_codecContext->channels; |
| 308 | #endif |
| 309 | m_avFrame->sample_rate = m_codecContext->sample_rate; |
| 310 | |
| 311 | const bool isFixedFrameSize = |
| 312 | !(m_codecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) |
| 313 | && m_codecContext->frame_size; |
| 314 | m_avFrame->nb_samples = isFixedFrameSize ? m_codecContext->frame_size : availableSamplesCount; |
| 315 | if (m_avFrame->nb_samples) |
| 316 | av_frame_get_buffer(frame: m_avFrame.get(), align: 0); |
| 317 | |
| 318 | const auto &timeBase = m_stream->time_base; |
| 319 | const auto pts = timeBase.den && timeBase.num |
| 320 | ? timeBase.den * m_samplesWritten / (m_codecContext->sample_rate * timeBase.num) |
| 321 | : m_samplesWritten; |
| 322 | setAVFrameTime(frame&: *m_avFrame, pts, timeBase); |
| 323 | } |
| 324 | |
| 325 | void AudioEncoder::writeDataToPendingFrame(const uchar *data, int &samplesOffset, int samplesCount) |
| 326 | { |
| 327 | Q_ASSERT(m_avFrame); |
| 328 | Q_ASSERT(m_avFrameSamplesOffset <= m_avFrame->nb_samples); |
| 329 | |
| 330 | const int bytesPerSample = av_get_bytes_per_sample(sample_fmt: m_codecContext->sample_fmt); |
| 331 | const bool isPlanar = av_sample_fmt_is_planar(sample_fmt: m_codecContext->sample_fmt); |
| 332 | |
| 333 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
| 334 | const int channelsCount = m_codecContext->ch_layout.nb_channels; |
| 335 | #else |
| 336 | const int channelsCount = m_codecContext->channels; |
| 337 | #endif |
| 338 | |
| 339 | const int audioDataOffset = isPlanar ? bytesPerSample * m_avFrameSamplesOffset |
| 340 | : bytesPerSample * m_avFrameSamplesOffset * channelsCount; |
| 341 | |
| 342 | const int planesCount = isPlanar ? channelsCount : 1; |
| 343 | m_avFramePlanesData.resize(new_size: planesCount); |
| 344 | for (int plane = 0; plane < planesCount; ++plane) |
| 345 | m_avFramePlanesData[plane] = m_avFrame->extended_data[plane] + audioDataOffset; |
| 346 | |
| 347 | const int samplesToWrite = m_avFrame->nb_samples - m_avFrameSamplesOffset; |
| 348 | int samplesToRead = |
| 349 | (samplesToWrite * m_sourceFormat.sampleRate() + m_codecContext->sample_rate / 2) |
| 350 | / m_codecContext->sample_rate; |
| 351 | // the lower bound is need to get round infinite loops in corner cases |
| 352 | samplesToRead = qBound(min: 1, val: samplesToRead, max: samplesCount - samplesOffset); |
| 353 | |
| 354 | data += m_sourceFormat.bytesForFrames(frameCount: samplesOffset); |
| 355 | |
| 356 | if (m_resampler) { |
| 357 | m_avFrameSamplesOffset += swr_convert(s: m_resampler.get(), out: m_avFramePlanesData.data(), |
| 358 | out_count: samplesToWrite, in: &data, in_count: samplesToRead); |
| 359 | } else { |
| 360 | Q_ASSERT(planesCount == 1); |
| 361 | m_avFrameSamplesOffset += samplesToRead; |
| 362 | memcpy(dest: m_avFramePlanesData[0], src: data, n: m_sourceFormat.bytesForFrames(frameCount: samplesToRead)); |
| 363 | } |
| 364 | |
| 365 | samplesOffset += samplesToRead; |
| 366 | } |
| 367 | |
| 368 | void AudioEncoder::sendPendingFrameToAVCodec() |
| 369 | { |
| 370 | Q_ASSERT(m_avFrame); |
| 371 | Q_ASSERT(m_avFrameSamplesOffset <= m_avFrame->nb_samples); |
| 372 | |
| 373 | m_avFrame->nb_samples = m_avFrameSamplesOffset; |
| 374 | |
| 375 | m_samplesWritten += m_avFrameSamplesOffset; |
| 376 | |
| 377 | const qint64 time = m_sourceFormat.durationForFrames( |
| 378 | frameCount: m_samplesWritten * m_sourceFormat.sampleRate() / m_codecContext->sample_rate); |
| 379 | m_recordingEngine.newTimeStamp(time: time / 1000); |
| 380 | |
| 381 | // qCDebug(qLcFFmpegEncoder) << "sending audio frame" << buffer.byteCount() << frame->pts << |
| 382 | // ((double)buffer.frameCount()/frame->sample_rate); |
| 383 | |
| 384 | int ret = avcodec_send_frame(avctx: m_codecContext.get(), frame: m_avFrame.get()); |
| 385 | if (ret < 0) { |
| 386 | char errStr[AV_ERROR_MAX_STRING_SIZE]; |
| 387 | av_strerror(errnum: ret, errbuf: errStr, AV_ERROR_MAX_STRING_SIZE); |
| 388 | qCDebug(qLcFFmpegAudioEncoder) << "error sending frame" << ret << errStr; |
| 389 | } |
| 390 | |
| 391 | m_avFrame = nullptr; |
| 392 | m_avFrameSamplesOffset = 0; |
| 393 | std::fill(first: m_avFramePlanesData.begin(), last: m_avFramePlanesData.end(), value: nullptr); |
| 394 | } |
| 395 | |
| 396 | void AudioEncoder::handleAudioData(const uchar *data, int &samplesOffset, int samplesCount) |
| 397 | { |
| 398 | ensurePendingFrame(availableSamplesCount: samplesCount - samplesOffset); |
| 399 | |
| 400 | writeDataToPendingFrame(data, samplesOffset, samplesCount); |
| 401 | |
| 402 | // The frame is not ready yet |
| 403 | if (m_avFrameSamplesOffset < m_avFrame->nb_samples) |
| 404 | return; |
| 405 | |
| 406 | retrievePackets(); |
| 407 | |
| 408 | sendPendingFrameToAVCodec(); |
| 409 | } |
| 410 | |
| 411 | } // namespace QFFmpeg |
| 412 | |
| 413 | QT_END_NAMESPACE |
| 414 | |