1 | // Copyright (C) 2024 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | #include "qffmpegaudioencoder_p.h" |
4 | #include "qffmpegrecordingengineutils_p.h" |
5 | #include "qffmpegaudioencoderutils_p.h" |
6 | #include "qffmpegaudioinput_p.h" |
7 | #include "qffmpegencoderoptions_p.h" |
8 | #include "qffmpegmuxer_p.h" |
9 | #include "qffmpegrecordingengine_p.h" |
10 | #include "qffmpegmediaformatinfo_p.h" |
11 | #include "qffmpegcodecstorage_p.h" |
12 | #include <QtCore/qloggingcategory.h> |
13 | |
14 | QT_BEGIN_NAMESPACE |
15 | |
16 | namespace QFFmpeg { |
17 | |
18 | static Q_LOGGING_CATEGORY(qLcFFmpegAudioEncoder, "qt.multimedia.ffmpeg.audioencoder" ); |
19 | |
20 | namespace { |
21 | void setupStreamParameters(AVStream *stream, const Codec &codec, |
22 | const AVAudioFormat &requestedAudioFormat) |
23 | { |
24 | const auto channelLayouts = codec.channelLayouts(); |
25 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
26 | stream->codecpar->ch_layout = |
27 | adjustChannelLayout(channelLayouts, requestedAudioFormat.channelLayout); |
28 | #else |
29 | stream->codecpar->channel_layout = |
30 | adjustChannelLayout(supportedLayouts: channelLayouts, requested: requestedAudioFormat.channelLayoutMask); |
31 | stream->codecpar->channels = qPopulationCount(v: stream->codecpar->channel_layout); |
32 | #endif |
33 | const auto sampleRates = codec.sampleRates(); |
34 | const auto sampleRate = adjustSampleRate(supportedRates: sampleRates, requested: requestedAudioFormat.sampleRate); |
35 | |
36 | stream->codecpar->sample_rate = sampleRate; |
37 | stream->codecpar->frame_size = 1024; |
38 | const auto sampleFormats = codec.sampleFormats(); |
39 | stream->codecpar->format = adjustSampleFormat(supportedFormats: sampleFormats, requested: requestedAudioFormat.sampleFormat); |
40 | |
41 | stream->time_base = AVRational{ .num: 1, .den: sampleRate }; |
42 | |
43 | qCDebug(qLcFFmpegAudioEncoder) |
44 | << "set stream time_base" << stream->time_base.num << "/" << stream->time_base.den; |
45 | } |
46 | |
47 | bool openCodecContext(AVCodecContext *codecContext, AVStream *stream, |
48 | const QMediaEncoderSettings &settings) |
49 | { |
50 | Q_ASSERT(codecContext); |
51 | codecContext->time_base = stream->time_base; |
52 | |
53 | avcodec_parameters_to_context(codec: codecContext, par: stream->codecpar); |
54 | |
55 | // if avcodec_open2 fails, it may clean codecContext->codec |
56 | Codec codec{ codecContext->codec }; |
57 | |
58 | AVDictionaryHolder opts; |
59 | applyAudioEncoderOptions(settings, codecName: QByteArray{ codec.name() }, codec: codecContext, opts); |
60 | applyExperimentalCodecOptions(codec, opts); |
61 | |
62 | const int res = avcodec_open2(avctx: codecContext, codec: codec.get(), options: opts); |
63 | |
64 | if (res != 0) { |
65 | qCWarning(qLcFFmpegAudioEncoder) |
66 | << "Cannot open audio codec" << codec.name() << "; result:" << err2str(errnum: res); |
67 | return false; |
68 | } |
69 | |
70 | qCDebug(qLcFFmpegAudioEncoder) << "audio codec params: fmt=" << codecContext->sample_fmt |
71 | << "rate=" << codecContext->sample_rate; |
72 | |
73 | avcodec_parameters_from_context(par: stream->codecpar, codec: codecContext); |
74 | |
75 | return true; |
76 | } |
77 | |
78 | } // namespace |
79 | |
80 | AudioEncoder::AudioEncoder(RecordingEngine &recordingEngine, const QAudioFormat &sourceFormat, |
81 | const QMediaEncoderSettings &settings) |
82 | : EncoderThread(recordingEngine), m_sourceFormat(sourceFormat), m_settings(settings) |
83 | { |
84 | setObjectName(QLatin1String("AudioEncoder" )); |
85 | qCDebug(qLcFFmpegAudioEncoder) << "AudioEncoder" << settings.audioCodec(); |
86 | |
87 | const AVCodecID codecID = QFFmpegMediaFormatInfo::codecIdForAudioCodec(codec: settings.audioCodec()); |
88 | Q_ASSERT(avformat_query_codec(recordingEngine.avFormatContext()->oformat, codecID, |
89 | FF_COMPLIANCE_NORMAL)); |
90 | |
91 | Q_ASSERT(QFFmpeg::findAVEncoder(codecID)); |
92 | |
93 | m_stream = avformat_new_stream(s: recordingEngine.avFormatContext(), c: nullptr); |
94 | m_stream->id = recordingEngine.avFormatContext()->nb_streams - 1; |
95 | m_stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; |
96 | m_stream->codecpar->codec_id = codecID; |
97 | } |
98 | |
99 | void AudioEncoder::addBuffer(const QAudioBuffer &buffer) |
100 | { |
101 | if (!buffer.isValid()) { |
102 | setEndOfSourceStream(); |
103 | return; |
104 | } |
105 | |
106 | { |
107 | const std::chrono::microseconds bufferDuration(buffer.duration()); |
108 | auto guard = lockLoopData(); |
109 | |
110 | resetEndOfSourceStream(); |
111 | |
112 | if (m_paused) |
113 | return; |
114 | |
115 | // TODO: apply logic with canPushFrame |
116 | |
117 | m_audioBufferQueue.push(x: buffer); |
118 | m_queueDuration += bufferDuration; |
119 | } |
120 | |
121 | dataReady(); |
122 | } |
123 | |
124 | QAudioBuffer AudioEncoder::takeBuffer() |
125 | { |
126 | auto locker = lockLoopData(); |
127 | QAudioBuffer result = dequeueIfPossible(queue&: m_audioBufferQueue); |
128 | m_queueDuration -= std::chrono::microseconds(result.duration()); |
129 | return result; |
130 | } |
131 | |
132 | bool AudioEncoder::init() |
133 | { |
134 | const AVAudioFormat requestedAudioFormat(m_sourceFormat); |
135 | |
136 | QFFmpeg::findAndOpenAVEncoder( |
137 | codecId: m_stream->codecpar->codec_id, |
138 | scoresGetter: [&](const Codec &codec) { |
139 | AVScore result = DefaultAVScore; |
140 | |
141 | // Attempt to find no-conversion format |
142 | if (auto fmts = codec.sampleFormats(); !fmts.empty()) |
143 | result += hasValue(range: fmts, value: requestedAudioFormat.sampleFormat) ? 1 : -1; |
144 | |
145 | if (auto rates = codec.sampleRates(); !rates.empty()) |
146 | result += hasValue(range: rates, value: requestedAudioFormat.sampleRate) ? 1 : -1; |
147 | |
148 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
149 | if (auto layouts = codec.channelLayouts(); !layouts.empty()) |
150 | result += hasValue(layouts, requestedAudioFormat.channelLayout) ? 1 : -1; |
151 | #else |
152 | if (auto layouts = codec.channelLayouts(); !layouts.empty()) |
153 | result += hasValue(range: layouts, value: requestedAudioFormat.channelLayoutMask) ? 1 : -1; |
154 | #endif |
155 | |
156 | return result; |
157 | }, |
158 | codecOpener: [&](const Codec &codec) { |
159 | AVCodecContextUPtr codecContext(avcodec_alloc_context3(codec: codec.get())); |
160 | if (!codecContext) |
161 | return false; |
162 | |
163 | setupStreamParameters(stream: m_stream, codec: Codec{ codecContext->codec }, requestedAudioFormat); |
164 | if (!openCodecContext(codecContext: codecContext.get(), stream: m_stream, settings: m_settings)) |
165 | return false; |
166 | |
167 | m_codecContext = std::move(codecContext); |
168 | return true; |
169 | }); |
170 | |
171 | if (!m_codecContext) { |
172 | qCWarning(qLcFFmpegAudioEncoder) << "Unable to open any audio codec" ; |
173 | emit m_recordingEngine.sessionError(code: QMediaRecorder::FormatError, |
174 | QStringLiteral("Cannot open any audio codec" )); |
175 | return false; |
176 | } |
177 | |
178 | qCDebug(qLcFFmpegAudioEncoder) << "found audio codec" << m_codecContext->codec->name; |
179 | |
180 | updateResampler(sourceFormat: m_sourceFormat); |
181 | |
182 | // TODO: try to address this dependency here. |
183 | if (auto input = qobject_cast<QFFmpegAudioInput *>(object: source())) |
184 | input->setBufferSize(m_codecContext->frame_size); |
185 | |
186 | return EncoderThread::init(); |
187 | } |
188 | |
189 | void AudioEncoder::cleanup() |
190 | { |
191 | while (!m_audioBufferQueue.empty()) |
192 | processOne(); |
193 | |
194 | if (m_avFrameSamplesOffset) { |
195 | // the size of the last frame can be less than m_codecContext->frame_size |
196 | |
197 | retrievePackets(); |
198 | sendPendingFrameToAVCodec(); |
199 | } |
200 | |
201 | while (avcodec_send_frame(avctx: m_codecContext.get(), frame: nullptr) == AVERROR(EAGAIN)) |
202 | retrievePackets(); |
203 | retrievePackets(); |
204 | } |
205 | |
206 | bool AudioEncoder::hasData() const |
207 | { |
208 | return !m_audioBufferQueue.empty(); |
209 | } |
210 | |
211 | void AudioEncoder::retrievePackets() |
212 | { |
213 | while (true) { |
214 | AVPacketUPtr packet(av_packet_alloc()); |
215 | int ret = avcodec_receive_packet(avctx: m_codecContext.get(), avpkt: packet.get()); |
216 | if (ret < 0) { |
217 | if (ret != AVERROR(EOF)) |
218 | break; |
219 | if (ret != AVERROR(EAGAIN)) { |
220 | char errStr[1024]; |
221 | av_strerror(errnum: ret, errbuf: errStr, errbuf_size: 1024); |
222 | qCDebug(qLcFFmpegAudioEncoder) << "receive packet" << ret << errStr; |
223 | } |
224 | break; |
225 | } |
226 | |
227 | // qCDebug(qLcFFmpegEncoder) << "writing audio packet" << packet->size << packet->pts << |
228 | // packet->dts; |
229 | packet->stream_index = m_stream->id; |
230 | m_recordingEngine.getMuxer()->addPacket(packet: std::move(packet)); |
231 | } |
232 | } |
233 | |
234 | void AudioEncoder::processOne() |
235 | { |
236 | QAudioBuffer buffer = takeBuffer(); |
237 | Q_ASSERT(buffer.isValid()); |
238 | |
239 | // qCDebug(qLcFFmpegEncoder) << "new audio buffer" << buffer.byteCount() << buffer.format() |
240 | // << buffer.frameCount() << codec->frame_size; |
241 | |
242 | if (buffer.format() != m_sourceFormat && !updateResampler(sourceFormat: buffer.format())) |
243 | return; |
244 | |
245 | int samplesOffset = 0; |
246 | const int bufferSamplesCount = static_cast<int>(buffer.frameCount()); |
247 | |
248 | while (samplesOffset < bufferSamplesCount) |
249 | handleAudioData(data: buffer.constData<uint8_t>(), samplesOffset, samplesCount: bufferSamplesCount); |
250 | |
251 | Q_ASSERT(samplesOffset == bufferSamplesCount); |
252 | } |
253 | |
254 | bool AudioEncoder::checkIfCanPushFrame() const |
255 | { |
256 | if (m_encodingStarted) |
257 | return m_audioBufferQueue.size() <= 1 || m_queueDuration < m_maxQueueDuration; |
258 | if (!isFinished()) |
259 | return m_audioBufferQueue.empty(); |
260 | |
261 | return false; |
262 | } |
263 | |
264 | bool AudioEncoder::updateResampler(const QAudioFormat &sourceFormat) |
265 | { |
266 | m_resampler.reset(); |
267 | |
268 | const AVAudioFormat requestedAudioFormat(sourceFormat); |
269 | const AVAudioFormat codecAudioFormat(m_codecContext.get()); |
270 | |
271 | if (requestedAudioFormat != codecAudioFormat) { |
272 | m_resampler = createResampleContext(inputFormat: requestedAudioFormat, outputFormat: codecAudioFormat); |
273 | if (!swr_is_initialized(s: m_resampler.get())) { |
274 | m_sourceFormat = {}; |
275 | qCWarning(qLcFFmpegAudioEncoder) << "Cannot initialize resampler for audio encoder" ; |
276 | emit m_recordingEngine.sessionError( |
277 | code: QMediaRecorder::FormatError, |
278 | QStringLiteral("Cannot initialize resampler for audio encoder" )); |
279 | return false; |
280 | } |
281 | qCDebug(qLcFFmpegAudioEncoder) << "Created resampler with audio formats conversion\n" |
282 | << requestedAudioFormat << "->" << codecAudioFormat; |
283 | } else { |
284 | qCDebug(qLcFFmpegAudioEncoder) << "Resampler is not needed due to no-conversion format\n" |
285 | << requestedAudioFormat; |
286 | } |
287 | |
288 | m_sourceFormat = sourceFormat; |
289 | |
290 | return true; |
291 | } |
292 | |
293 | void AudioEncoder::ensurePendingFrame(int availableSamplesCount) |
294 | { |
295 | Q_ASSERT(availableSamplesCount >= 0); |
296 | |
297 | if (m_avFrame) |
298 | return; |
299 | |
300 | m_avFrame = makeAVFrame(); |
301 | |
302 | m_avFrame->format = m_codecContext->sample_fmt; |
303 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
304 | m_avFrame->ch_layout = m_codecContext->ch_layout; |
305 | #else |
306 | m_avFrame->channel_layout = m_codecContext->channel_layout; |
307 | m_avFrame->channels = m_codecContext->channels; |
308 | #endif |
309 | m_avFrame->sample_rate = m_codecContext->sample_rate; |
310 | |
311 | const bool isFixedFrameSize = |
312 | !(m_codecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) |
313 | && m_codecContext->frame_size; |
314 | m_avFrame->nb_samples = isFixedFrameSize ? m_codecContext->frame_size : availableSamplesCount; |
315 | if (m_avFrame->nb_samples) |
316 | av_frame_get_buffer(frame: m_avFrame.get(), align: 0); |
317 | |
318 | const auto &timeBase = m_stream->time_base; |
319 | const auto pts = timeBase.den && timeBase.num |
320 | ? timeBase.den * m_samplesWritten / (m_codecContext->sample_rate * timeBase.num) |
321 | : m_samplesWritten; |
322 | setAVFrameTime(frame&: *m_avFrame, pts, timeBase); |
323 | } |
324 | |
325 | void AudioEncoder::writeDataToPendingFrame(const uchar *data, int &samplesOffset, int samplesCount) |
326 | { |
327 | Q_ASSERT(m_avFrame); |
328 | Q_ASSERT(m_avFrameSamplesOffset <= m_avFrame->nb_samples); |
329 | |
330 | const int bytesPerSample = av_get_bytes_per_sample(sample_fmt: m_codecContext->sample_fmt); |
331 | const bool isPlanar = av_sample_fmt_is_planar(sample_fmt: m_codecContext->sample_fmt); |
332 | |
333 | #if QT_FFMPEG_HAS_AV_CHANNEL_LAYOUT |
334 | const int channelsCount = m_codecContext->ch_layout.nb_channels; |
335 | #else |
336 | const int channelsCount = m_codecContext->channels; |
337 | #endif |
338 | |
339 | const int audioDataOffset = isPlanar ? bytesPerSample * m_avFrameSamplesOffset |
340 | : bytesPerSample * m_avFrameSamplesOffset * channelsCount; |
341 | |
342 | const int planesCount = isPlanar ? channelsCount : 1; |
343 | m_avFramePlanesData.resize(new_size: planesCount); |
344 | for (int plane = 0; plane < planesCount; ++plane) |
345 | m_avFramePlanesData[plane] = m_avFrame->extended_data[plane] + audioDataOffset; |
346 | |
347 | const int samplesToWrite = m_avFrame->nb_samples - m_avFrameSamplesOffset; |
348 | int samplesToRead = |
349 | (samplesToWrite * m_sourceFormat.sampleRate() + m_codecContext->sample_rate / 2) |
350 | / m_codecContext->sample_rate; |
351 | // the lower bound is need to get round infinite loops in corner cases |
352 | samplesToRead = qBound(min: 1, val: samplesToRead, max: samplesCount - samplesOffset); |
353 | |
354 | data += m_sourceFormat.bytesForFrames(frameCount: samplesOffset); |
355 | |
356 | if (m_resampler) { |
357 | m_avFrameSamplesOffset += swr_convert(s: m_resampler.get(), out: m_avFramePlanesData.data(), |
358 | out_count: samplesToWrite, in: &data, in_count: samplesToRead); |
359 | } else { |
360 | Q_ASSERT(planesCount == 1); |
361 | m_avFrameSamplesOffset += samplesToRead; |
362 | memcpy(dest: m_avFramePlanesData[0], src: data, n: m_sourceFormat.bytesForFrames(frameCount: samplesToRead)); |
363 | } |
364 | |
365 | samplesOffset += samplesToRead; |
366 | } |
367 | |
368 | void AudioEncoder::sendPendingFrameToAVCodec() |
369 | { |
370 | Q_ASSERT(m_avFrame); |
371 | Q_ASSERT(m_avFrameSamplesOffset <= m_avFrame->nb_samples); |
372 | |
373 | m_avFrame->nb_samples = m_avFrameSamplesOffset; |
374 | |
375 | m_samplesWritten += m_avFrameSamplesOffset; |
376 | |
377 | const qint64 time = m_sourceFormat.durationForFrames( |
378 | frameCount: m_samplesWritten * m_sourceFormat.sampleRate() / m_codecContext->sample_rate); |
379 | m_recordingEngine.newTimeStamp(time: time / 1000); |
380 | |
381 | // qCDebug(qLcFFmpegEncoder) << "sending audio frame" << buffer.byteCount() << frame->pts << |
382 | // ((double)buffer.frameCount()/frame->sample_rate); |
383 | |
384 | int ret = avcodec_send_frame(avctx: m_codecContext.get(), frame: m_avFrame.get()); |
385 | if (ret < 0) { |
386 | char errStr[AV_ERROR_MAX_STRING_SIZE]; |
387 | av_strerror(errnum: ret, errbuf: errStr, AV_ERROR_MAX_STRING_SIZE); |
388 | qCDebug(qLcFFmpegAudioEncoder) << "error sending frame" << ret << errStr; |
389 | } |
390 | |
391 | m_avFrame = nullptr; |
392 | m_avFrameSamplesOffset = 0; |
393 | std::fill(first: m_avFramePlanesData.begin(), last: m_avFramePlanesData.end(), value: nullptr); |
394 | } |
395 | |
396 | void AudioEncoder::handleAudioData(const uchar *data, int &samplesOffset, int samplesCount) |
397 | { |
398 | ensurePendingFrame(availableSamplesCount: samplesCount - samplesOffset); |
399 | |
400 | writeDataToPendingFrame(data, samplesOffset, samplesCount); |
401 | |
402 | // The frame is not ready yet |
403 | if (m_avFrameSamplesOffset < m_avFrame->nb_samples) |
404 | return; |
405 | |
406 | retrievePackets(); |
407 | |
408 | sendPendingFrameToAVCodec(); |
409 | } |
410 | |
411 | } // namespace QFFmpeg |
412 | |
413 | QT_END_NAMESPACE |
414 | |