1// Copyright (C) 2022 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#ifndef QTEXTTOSPEECHPROCESSOR_FLITE_H
5#define QTEXTTOSPEECHPROCESSOR_FLITE_H
6
7#include "qtexttospeechengine.h"
8#include "qvoice.h"
9
10#include <QtCore/qabstracteventdispatcher.h>
11#include <QtCore/qbasictimer.h>
12#include <QtCore/qcoreevent.h>
13#include <QtCore/qdatetime.h>
14#include <QtCore/qlibrary.h>
15#include <QtCore/qlist.h>
16#include <QtCore/qmutex.h>
17#include <QtCore/qprocess.h>
18#include <QtCore/qstring.h>
19#include <QtCore/qthread.h>
20#include <QtMultimedia/qaudiosink.h>
21#include <QtMultimedia/qmediadevices.h>
22
23#include <flite/flite.h>
24
25QT_BEGIN_NAMESPACE
26
27class QTextToSpeechProcessorFlite : public QObject
28{
29 Q_OBJECT
30
31public:
32 QTextToSpeechProcessorFlite(const QAudioDevice &audioDevice);
33 ~QTextToSpeechProcessorFlite();
34
35 struct VoiceInfo
36 {
37 int id;
38 cst_voice *vox;
39 void (*unregister_func)(cst_voice *vox);
40 QString name;
41 QString locale;
42 QVoice::Gender gender;
43 QVoice::Age age;
44 };
45
46 Q_INVOKABLE void say(const QString &text, int voiceId, double pitch, double rate, double volume);
47 Q_INVOKABLE void synthesize(const QString &text, int voiceId, double pitch, double rate, double volume);
48 Q_INVOKABLE void pause();
49 Q_INVOKABLE void resume();
50 Q_INVOKABLE void stop();
51
52 const QList<QTextToSpeechProcessorFlite::VoiceInfo> &voices() const;
53 static constexpr QTextToSpeech::State audioStateToTts(QAudio::State audioState);
54
55private:
56 // Flite callbacks
57 static int audioOutputCb(const cst_wave *w, int start, int size,
58 int last, cst_audio_streaming_info *asi);
59 static int dataOutputCb(const cst_wave *w, int start, int size,
60 int last, cst_audio_streaming_info *asi);
61
62 using OutputHandler = decltype(QTextToSpeechProcessorFlite::audioOutputCb);
63 // Process a single text
64 void processText(const QString &text, int voiceId, float pitch, float rate,
65 OutputHandler outputHandler);
66 int audioOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi);
67 void audioHandleNewToken(std::chrono::milliseconds tokenStartTime,
68 cst_audio_streaming_info *asi);
69 int dataOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi);
70
71 bool init();
72 bool initAudio(const cst_wave *w);
73 void deinitAudio();
74 bool checkFormat(const QAudioFormat &format);
75 bool checkVoice(int voiceId);
76 void deleteSink();
77 void createSink();
78 QAudio::State audioSinkState() const;
79 void setError(QTextToSpeech::ErrorReason err, const QString &errorString = QString());
80
81private slots:
82 void changeState(QAudio::State newState);
83
84Q_SIGNALS:
85 void errorOccurred(QTextToSpeech::ErrorReason error, const QString &errorString);
86 void stateChanged(QTextToSpeech::State);
87 void sayingWord(const QString &word, qsizetype begin, qsizetype length);
88 void synthesized(const QAudioFormat &format, const QByteArray &array);
89
90private:
91 QString m_text;
92 qsizetype m_index = -1;
93
94 QAudioSink *m_audioSink = nullptr;
95 QAudio::State m_state = QAudio::IdleState;
96 QIODevice *m_audioIODevice = nullptr;
97
98 QAudioDevice m_audioDevice;
99 QAudioFormat m_format;
100 double m_volume = 1;
101 std::optional<QAudioFormat> m_synthesisFormat;
102
103 QList<VoiceInfo> m_voices;
104
105 // Statistics for debugging
106 qint64 numberChunks = 0;
107 qint64 totalBytes = 0;
108};
109
110QT_END_NAMESPACE
111
112#endif
113

source code of qtspeech/src/plugins/tts/flite/qtexttospeech_flite_processor.h