1 | // Copyright (C) 2024 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | #include "qffmpegaudioencoder_p.h" |
4 | #include "qffmpegrecordingengineutils_p.h" |
5 | #include "qffmpegaudioencoderutils_p.h" |
6 | #include "qffmpegaudioinput_p.h" |
7 | #include "qffmpegencoderoptions_p.h" |
8 | #include "qffmpegmuxer_p.h" |
9 | #include "qffmpegrecordingengine_p.h" |
10 | #include "qffmpegmediaformatinfo_p.h" |
11 | #include "qffmpegcodecstorage_p.h" |
12 | #include <QtCore/qloggingcategory.h> |
13 | |
14 | QT_BEGIN_NAMESPACE |
15 | |
16 | namespace QFFmpeg { |
17 | |
18 | static Q_LOGGING_CATEGORY(qLcFFmpegAudioEncoder, "qt.multimedia.ffmpeg.audioencoder" ); |
19 | |
20 | namespace { |
21 | void setupStreamParameters(AVStream *stream, const AVCodec *codec, |
22 | const AVAudioFormat &requestedAudioFormat) |
23 | { |
24 | const auto channelLayouts = getCodecChannelLayouts(codec); |
25 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
26 | stream->codecpar->ch_layout = |
27 | adjustChannelLayout(channelLayouts, requestedAudioFormat.channelLayout); |
28 | #else |
29 | stream->codecpar->channel_layout = |
30 | adjustChannelLayout(supportedLayouts: channelLayouts, requested: requestedAudioFormat.channelLayoutMask); |
31 | stream->codecpar->channels = qPopulationCount(v: stream->codecpar->channel_layout); |
32 | #endif |
33 | const auto sampleRates = getCodecSampleRates(codec); |
34 | const auto sampleRate = adjustSampleRate(supportedRates: sampleRates, requested: requestedAudioFormat.sampleRate); |
35 | |
36 | stream->codecpar->sample_rate = sampleRate; |
37 | stream->codecpar->frame_size = 1024; |
38 | const auto sampleFormats = getCodecSampleFormats(codec); |
39 | stream->codecpar->format = adjustSampleFormat(supportedFormats: sampleFormats, requested: requestedAudioFormat.sampleFormat); |
40 | |
41 | stream->time_base = AVRational{ .num: 1, .den: sampleRate }; |
42 | |
43 | qCDebug(qLcFFmpegAudioEncoder) |
44 | << "set stream time_base" << stream->time_base.num << "/" << stream->time_base.den; |
45 | } |
46 | |
47 | bool openCodecContext(AVCodecContext *codecContext, AVStream *stream, |
48 | const QMediaEncoderSettings &settings) |
49 | { |
50 | Q_ASSERT(codecContext); |
51 | codecContext->time_base = stream->time_base; |
52 | |
53 | avcodec_parameters_to_context(codec: codecContext, par: stream->codecpar); |
54 | |
55 | // if avcodec_open2 fails, it may clean codecContext->codec |
56 | const AVCodec *codec = codecContext->codec; |
57 | |
58 | AVDictionaryHolder opts; |
59 | applyAudioEncoderOptions(settings, codecName: codec->name, codec: codecContext, opts); |
60 | applyExperimentalCodecOptions(codec, opts); |
61 | |
62 | const int res = avcodec_open2(avctx: codecContext, codec, options: opts); |
63 | |
64 | if (res != 0) { |
65 | qCWarning(qLcFFmpegAudioEncoder) << "Cannot open audio codec" << codec->name |
66 | << "; result:" << err2str(errnum: res); |
67 | return false; |
68 | } |
69 | |
70 | qCDebug(qLcFFmpegAudioEncoder) << "audio codec params: fmt=" << codecContext->sample_fmt |
71 | << "rate=" << codecContext->sample_rate; |
72 | |
73 | return true; |
74 | } |
75 | |
76 | } // namespace |
77 | |
78 | AudioEncoder::AudioEncoder(RecordingEngine &recordingEngine, const QAudioFormat &sourceFormat, |
79 | const QMediaEncoderSettings &settings) |
80 | : EncoderThread(recordingEngine), m_sourceFormat(sourceFormat), m_settings(settings) |
81 | { |
82 | setObjectName(QLatin1String("AudioEncoder" )); |
83 | qCDebug(qLcFFmpegAudioEncoder) << "AudioEncoder" << settings.audioCodec(); |
84 | |
85 | const AVCodecID codecID = QFFmpegMediaFormatInfo::codecIdForAudioCodec(codec: settings.audioCodec()); |
86 | Q_ASSERT(avformat_query_codec(recordingEngine.avFormatContext()->oformat, codecID, |
87 | FF_COMPLIANCE_NORMAL)); |
88 | |
89 | Q_ASSERT(QFFmpeg::findAVEncoder(codecID)); |
90 | |
91 | m_stream = avformat_new_stream(s: recordingEngine.avFormatContext(), c: nullptr); |
92 | m_stream->id = recordingEngine.avFormatContext()->nb_streams - 1; |
93 | m_stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; |
94 | m_stream->codecpar->codec_id = codecID; |
95 | } |
96 | |
97 | void AudioEncoder::addBuffer(const QAudioBuffer &buffer) |
98 | { |
99 | if (!buffer.isValid()) { |
100 | setEndOfSourceStream(); |
101 | return; |
102 | } |
103 | |
104 | { |
105 | const std::chrono::microseconds bufferDuration(buffer.duration()); |
106 | auto guard = lockLoopData(); |
107 | |
108 | resetEndOfSourceStream(); |
109 | |
110 | if (m_paused) |
111 | return; |
112 | |
113 | // TODO: apply logic with canPushFrame |
114 | |
115 | m_audioBufferQueue.push(x: buffer); |
116 | m_queueDuration += bufferDuration; |
117 | } |
118 | |
119 | dataReady(); |
120 | } |
121 | |
122 | QAudioBuffer AudioEncoder::takeBuffer() |
123 | { |
124 | auto locker = lockLoopData(); |
125 | QAudioBuffer result = dequeueIfPossible(queue&: m_audioBufferQueue); |
126 | m_queueDuration -= std::chrono::microseconds(result.duration()); |
127 | return result; |
128 | } |
129 | |
130 | bool AudioEncoder::init() |
131 | { |
132 | const AVAudioFormat requestedAudioFormat(m_sourceFormat); |
133 | |
134 | QFFmpeg::findAndOpenAVEncoder( |
135 | codecId: m_stream->codecpar->codec_id, |
136 | scoresGetter: [&](const AVCodec *codec) { |
137 | AVScore result = DefaultAVScore; |
138 | |
139 | // Attempt to find no-conversion format |
140 | if (auto fmts = getCodecSampleFormats(codec)) |
141 | result += hasAVValue(fmts, format: requestedAudioFormat.sampleFormat) ? 1 : -1; |
142 | |
143 | if (auto rates = getCodecSampleRates(codec)) |
144 | result += hasAVValue(fmts: rates, format: requestedAudioFormat.sampleRate) ? 1 : -1; |
145 | |
146 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
147 | if (auto layouts = getCodecChannelLayouts(codec)) |
148 | result += hasAVValue(layouts, requestedAudioFormat.channelLayout) ? 1 : -1; |
149 | #else |
150 | if (auto layouts = getCodecChannelLayouts(codec)) |
151 | result += hasAVValue(fmts: layouts, format: requestedAudioFormat.channelLayoutMask) ? 1 : -1; |
152 | #endif |
153 | |
154 | return result; |
155 | }, |
156 | codecOpener: [&](const AVCodec *codec) { |
157 | AVCodecContextUPtr codecContext(avcodec_alloc_context3(codec)); |
158 | if (!codecContext) |
159 | return false; |
160 | |
161 | setupStreamParameters(stream: m_stream, codec: codecContext->codec, requestedAudioFormat); |
162 | if (!openCodecContext(codecContext: codecContext.get(), stream: m_stream, settings: m_settings)) |
163 | return false; |
164 | |
165 | m_codecContext = std::move(codecContext); |
166 | return true; |
167 | }); |
168 | |
169 | if (!m_codecContext) { |
170 | qCWarning(qLcFFmpegAudioEncoder) << "Unable to open any audio codec" ; |
171 | emit m_recordingEngine.sessionError(code: QMediaRecorder::FormatError, |
172 | QStringLiteral("Cannot open any audio codec" )); |
173 | return false; |
174 | } |
175 | |
176 | qCDebug(qLcFFmpegAudioEncoder) << "found audio codec" << m_codecContext->codec->name; |
177 | |
178 | updateResampler(sourceFormat: m_sourceFormat); |
179 | |
180 | // TODO: try to address this dependency here. |
181 | if (auto input = qobject_cast<QFFmpegAudioInput *>(object: source())) |
182 | input->setFrameSize(m_codecContext->frame_size); |
183 | |
184 | return EncoderThread::init(); |
185 | } |
186 | |
187 | void AudioEncoder::cleanup() |
188 | { |
189 | while (!m_audioBufferQueue.empty()) |
190 | processOne(); |
191 | |
192 | if (m_avFrameSamplesOffset) { |
193 | // the size of the last frame can be less than m_codecContext->frame_size |
194 | |
195 | retrievePackets(); |
196 | sendPendingFrameToAVCodec(); |
197 | } |
198 | |
199 | while (avcodec_send_frame(avctx: m_codecContext.get(), frame: nullptr) == AVERROR(EAGAIN)) |
200 | retrievePackets(); |
201 | retrievePackets(); |
202 | } |
203 | |
204 | bool AudioEncoder::hasData() const |
205 | { |
206 | return !m_audioBufferQueue.empty(); |
207 | } |
208 | |
209 | void AudioEncoder::retrievePackets() |
210 | { |
211 | while (1) { |
212 | AVPacketUPtr packet(av_packet_alloc()); |
213 | int ret = avcodec_receive_packet(avctx: m_codecContext.get(), avpkt: packet.get()); |
214 | if (ret < 0) { |
215 | if (ret != AVERROR(EOF)) |
216 | break; |
217 | if (ret != AVERROR(EAGAIN)) { |
218 | char errStr[1024]; |
219 | av_strerror(errnum: ret, errbuf: errStr, errbuf_size: 1024); |
220 | qCDebug(qLcFFmpegAudioEncoder) << "receive packet" << ret << errStr; |
221 | } |
222 | break; |
223 | } |
224 | |
225 | // qCDebug(qLcFFmpegEncoder) << "writing audio packet" << packet->size << packet->pts << |
226 | // packet->dts; |
227 | packet->stream_index = m_stream->id; |
228 | m_recordingEngine.getMuxer()->addPacket(packet: std::move(packet)); |
229 | } |
230 | } |
231 | |
232 | void AudioEncoder::processOne() |
233 | { |
234 | QAudioBuffer buffer = takeBuffer(); |
235 | Q_ASSERT(buffer.isValid()); |
236 | |
237 | // qCDebug(qLcFFmpegEncoder) << "new audio buffer" << buffer.byteCount() << buffer.format() |
238 | // << buffer.frameCount() << codec->frame_size; |
239 | |
240 | if (buffer.format() != m_sourceFormat && !updateResampler(sourceFormat: buffer.format())) |
241 | return; |
242 | |
243 | int samplesOffset = 0; |
244 | const int bufferSamplesCount = static_cast<int>(buffer.frameCount()); |
245 | |
246 | while (samplesOffset < bufferSamplesCount) |
247 | handleAudioData(data: buffer.constData<uint8_t>(), samplesOffset, samplesCount: bufferSamplesCount); |
248 | |
249 | Q_ASSERT(samplesOffset == bufferSamplesCount); |
250 | } |
251 | |
252 | bool AudioEncoder::checkIfCanPushFrame() const |
253 | { |
254 | if (m_encodingStarted) |
255 | return m_audioBufferQueue.size() <= 1 || m_queueDuration < m_maxQueueDuration; |
256 | if (!isFinished()) |
257 | return m_audioBufferQueue.empty(); |
258 | |
259 | return false; |
260 | } |
261 | |
262 | bool AudioEncoder::updateResampler(const QAudioFormat &sourceFormat) |
263 | { |
264 | m_resampler.reset(); |
265 | |
266 | const AVAudioFormat requestedAudioFormat(sourceFormat); |
267 | const AVAudioFormat codecAudioFormat(m_codecContext.get()); |
268 | |
269 | if (requestedAudioFormat != codecAudioFormat) { |
270 | m_resampler = createResampleContext(inputFormat: requestedAudioFormat, outputFormat: codecAudioFormat); |
271 | if (!swr_is_initialized(s: m_resampler.get())) { |
272 | m_sourceFormat = {}; |
273 | qCWarning(qLcFFmpegAudioEncoder) << "Cannot initialize resampler for audio encoder" ; |
274 | emit m_recordingEngine.sessionError( |
275 | code: QMediaRecorder::FormatError, |
276 | QStringLiteral("Cannot initialize resampler for audio encoder" )); |
277 | return false; |
278 | } |
279 | qCDebug(qLcFFmpegAudioEncoder) << "Created resampler with audio formats conversion\n" |
280 | << requestedAudioFormat << "->" << codecAudioFormat; |
281 | } else { |
282 | qCDebug(qLcFFmpegAudioEncoder) << "Resampler is not needed due to no-conversion format\n" |
283 | << requestedAudioFormat; |
284 | } |
285 | |
286 | m_sourceFormat = sourceFormat; |
287 | |
288 | return true; |
289 | } |
290 | |
291 | void AudioEncoder::ensurePendingFrame(int availableSamplesCount) |
292 | { |
293 | Q_ASSERT(availableSamplesCount >= 0); |
294 | |
295 | if (m_avFrame) |
296 | return; |
297 | |
298 | m_avFrame = makeAVFrame(); |
299 | |
300 | m_avFrame->format = m_codecContext->sample_fmt; |
301 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
302 | m_avFrame->ch_layout = m_codecContext->ch_layout; |
303 | #else |
304 | m_avFrame->channel_layout = m_codecContext->channel_layout; |
305 | m_avFrame->channels = m_codecContext->channels; |
306 | #endif |
307 | m_avFrame->sample_rate = m_codecContext->sample_rate; |
308 | |
309 | const bool isFixedFrameSize = |
310 | !(m_codecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) |
311 | && m_codecContext->frame_size; |
312 | m_avFrame->nb_samples = isFixedFrameSize ? m_codecContext->frame_size : availableSamplesCount; |
313 | if (m_avFrame->nb_samples) |
314 | av_frame_get_buffer(frame: m_avFrame.get(), align: 0); |
315 | |
316 | const auto &timeBase = m_stream->time_base; |
317 | const auto pts = timeBase.den && timeBase.num |
318 | ? timeBase.den * m_samplesWritten / (m_codecContext->sample_rate * timeBase.num) |
319 | : m_samplesWritten; |
320 | setAVFrameTime(frame&: *m_avFrame, pts, timeBase); |
321 | } |
322 | |
323 | void AudioEncoder::writeDataToPendingFrame(const uchar *data, int &samplesOffset, int samplesCount) |
324 | { |
325 | Q_ASSERT(m_avFrame); |
326 | Q_ASSERT(m_avFrameSamplesOffset <= m_avFrame->nb_samples); |
327 | |
328 | const int bytesPerSample = av_get_bytes_per_sample(sample_fmt: m_codecContext->sample_fmt); |
329 | const bool isPlanar = av_sample_fmt_is_planar(sample_fmt: m_codecContext->sample_fmt); |
330 | |
331 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
332 | const int channelsCount = m_codecContext->ch_layout.nb_channels; |
333 | #else |
334 | const int channelsCount = m_codecContext->channels; |
335 | #endif |
336 | |
337 | const int audioDataOffset = isPlanar ? bytesPerSample * m_avFrameSamplesOffset |
338 | : bytesPerSample * m_avFrameSamplesOffset * channelsCount; |
339 | |
340 | const int planesCount = isPlanar ? channelsCount : 1; |
341 | m_avFramePlanesData.resize(new_size: planesCount); |
342 | for (int plane = 0; plane < planesCount; ++plane) |
343 | m_avFramePlanesData[plane] = m_avFrame->extended_data[plane] + audioDataOffset; |
344 | |
345 | const int samplesToWrite = m_avFrame->nb_samples - m_avFrameSamplesOffset; |
346 | int samplesToRead = |
347 | (samplesToWrite * m_sourceFormat.sampleRate() + m_codecContext->sample_rate / 2) |
348 | / m_codecContext->sample_rate; |
349 | // the lower bound is need to get round infinite loops in corner cases |
350 | samplesToRead = qBound(min: 1, val: samplesToRead, max: samplesCount - samplesOffset); |
351 | |
352 | data += m_sourceFormat.bytesForFrames(frameCount: samplesOffset); |
353 | |
354 | if (m_resampler) { |
355 | m_avFrameSamplesOffset += swr_convert(s: m_resampler.get(), out: m_avFramePlanesData.data(), |
356 | out_count: samplesToWrite, in: &data, in_count: samplesToRead); |
357 | } else { |
358 | Q_ASSERT(planesCount == 1); |
359 | m_avFrameSamplesOffset += samplesToRead; |
360 | memcpy(dest: m_avFramePlanesData[0], src: data, n: m_sourceFormat.bytesForFrames(frameCount: samplesToRead)); |
361 | } |
362 | |
363 | samplesOffset += samplesToRead; |
364 | } |
365 | |
366 | void AudioEncoder::sendPendingFrameToAVCodec() |
367 | { |
368 | Q_ASSERT(m_avFrame); |
369 | Q_ASSERT(m_avFrameSamplesOffset <= m_avFrame->nb_samples); |
370 | |
371 | m_avFrame->nb_samples = m_avFrameSamplesOffset; |
372 | |
373 | m_samplesWritten += m_avFrameSamplesOffset; |
374 | |
375 | const qint64 time = m_sourceFormat.durationForFrames( |
376 | frameCount: m_samplesWritten * m_sourceFormat.sampleRate() / m_codecContext->sample_rate); |
377 | m_recordingEngine.newTimeStamp(time: time / 1000); |
378 | |
379 | // qCDebug(qLcFFmpegEncoder) << "sending audio frame" << buffer.byteCount() << frame->pts << |
380 | // ((double)buffer.frameCount()/frame->sample_rate); |
381 | |
382 | int ret = avcodec_send_frame(avctx: m_codecContext.get(), frame: m_avFrame.get()); |
383 | if (ret < 0) { |
384 | char errStr[AV_ERROR_MAX_STRING_SIZE]; |
385 | av_strerror(errnum: ret, errbuf: errStr, AV_ERROR_MAX_STRING_SIZE); |
386 | qCDebug(qLcFFmpegAudioEncoder) << "error sending frame" << ret << errStr; |
387 | } |
388 | |
389 | m_avFrame = nullptr; |
390 | m_avFrameSamplesOffset = 0; |
391 | std::fill(first: m_avFramePlanesData.begin(), last: m_avFramePlanesData.end(), value: nullptr); |
392 | } |
393 | |
394 | void AudioEncoder::handleAudioData(const uchar *data, int &samplesOffset, int samplesCount) |
395 | { |
396 | ensurePendingFrame(availableSamplesCount: samplesCount - samplesOffset); |
397 | |
398 | writeDataToPendingFrame(data, samplesOffset, samplesCount); |
399 | |
400 | // The frame is not ready yet |
401 | if (m_avFrameSamplesOffset < m_avFrame->nb_samples) |
402 | return; |
403 | |
404 | retrievePackets(); |
405 | |
406 | sendPendingFrameToAVCodec(); |
407 | } |
408 | |
409 | } // namespace QFFmpeg |
410 | |
411 | QT_END_NAMESPACE |
412 | |