1 | // Copyright (C) 2022 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only |
3 | |
4 | #ifndef QTEXTTOSPEECHPROCESSOR_FLITE_H |
5 | #define QTEXTTOSPEECHPROCESSOR_FLITE_H |
6 | |
7 | #include "qtexttospeechengine.h" |
8 | #include "qvoice.h" |
9 | |
10 | #include <QtCore/QList> |
11 | #include <QtCore/QMutex> |
12 | #include <QtCore/QThread> |
13 | #include <QtCore/QLibrary> |
14 | #include <QtCore/QString> |
15 | #include <QtCore/QBasicTimer> |
16 | #include <QtCore/QTimerEvent> |
17 | #include <QtCore/QAbstractEventDispatcher> |
18 | #include <QtCore/QProcessEnvironment> |
19 | #include <QtCore/QDateTime> |
20 | #include <QtMultimedia/QAudioSink> |
21 | #include <QtMultimedia/QMediaDevices> |
22 | |
23 | #include <flite/flite.h> |
24 | |
25 | QT_BEGIN_NAMESPACE |
26 | |
27 | class QTextToSpeechProcessorFlite : public QObject |
28 | { |
29 | Q_OBJECT |
30 | |
31 | public: |
32 | QTextToSpeechProcessorFlite(const QAudioDevice &audioDevice); |
33 | ~QTextToSpeechProcessorFlite(); |
34 | |
35 | struct VoiceInfo |
36 | { |
37 | int id; |
38 | cst_voice *vox; |
39 | void (*unregister_func)(cst_voice *vox); |
40 | QString name; |
41 | QString locale; |
42 | QVoice::Gender gender; |
43 | QVoice::Age age; |
44 | }; |
45 | |
46 | Q_INVOKABLE void say(const QString &text, int voiceId, double pitch, double rate, double volume); |
47 | Q_INVOKABLE void synthesize(const QString &text, int voiceId, double pitch, double rate, double volume); |
48 | Q_INVOKABLE void pause(); |
49 | Q_INVOKABLE void resume(); |
50 | Q_INVOKABLE void stop(); |
51 | |
52 | const QList<QTextToSpeechProcessorFlite::VoiceInfo> &voices() const; |
53 | static constexpr QTextToSpeech::State audioStateToTts(QAudio::State audioState); |
54 | |
55 | private: |
56 | // Flite callbacks |
57 | static int audioOutputCb(const cst_wave *w, int start, int size, |
58 | int last, cst_audio_streaming_info *asi); |
59 | static int dataOutputCb(const cst_wave *w, int start, int size, |
60 | int last, cst_audio_streaming_info *asi); |
61 | |
62 | using OutputHandler = decltype(QTextToSpeechProcessorFlite::audioOutputCb); |
63 | // Process a single text |
64 | void processText(const QString &text, int voiceId, double pitch, double rate, OutputHandler outputHandler); |
65 | int audioOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi); |
66 | int dataOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi); |
67 | |
68 | void setRateForVoice(cst_voice *voice, float rate); |
69 | void setPitchForVoice(cst_voice *voice, float pitch); |
70 | |
71 | bool init(); |
72 | bool initAudio(double rate, int channelCount); |
73 | void deinitAudio(); |
74 | bool checkFormat(const QAudioFormat &format); |
75 | bool checkVoice(int voiceId); |
76 | void deleteSink(); |
77 | void createSink(); |
78 | QAudio::State audioSinkState() const; |
79 | void setError(QTextToSpeech::ErrorReason err, const QString &errorString = QString()); |
80 | |
81 | // Read available flite voices |
82 | QStringList fliteAvailableVoices(const QString &libPrefix, const QString &langCode) const; |
83 | |
84 | private slots: |
85 | void changeState(QAudio::State newState); |
86 | |
87 | Q_SIGNALS: |
88 | void errorOccurred(QTextToSpeech::ErrorReason error, const QString &errorString); |
89 | void stateChanged(QTextToSpeech::State); |
90 | void sayingWord(const QString &word, qsizetype begin, qsizetype length); |
91 | void synthesized(const QAudioFormat &format, const QByteArray &array); |
92 | |
93 | protected: |
94 | void timerEvent(QTimerEvent *event) override; |
95 | |
96 | private: |
97 | struct TokenData { |
98 | qint64 startTime; |
99 | QString text; |
100 | }; |
101 | QString m_text; |
102 | qsizetype m_index = -1; |
103 | QList<TokenData> m_tokens; |
104 | qsizetype m_currentToken = -1; |
105 | QBasicTimer m_tokenTimer; |
106 | void startTokenTimer(); |
107 | |
108 | QAudioSink *m_audioSink = nullptr; |
109 | QAudio::State m_state = QAudio::IdleState; |
110 | QIODevice *m_audioBuffer = nullptr; |
111 | |
112 | QAudioDevice m_audioDevice; |
113 | QAudioFormat m_format; |
114 | double m_volume = 1; |
115 | |
116 | QList<VoiceInfo> m_voices; |
117 | |
118 | // Statistics for debugging |
119 | qint64 numberChunks = 0; |
120 | qint64 totalBytes = 0; |
121 | }; |
122 | |
123 | QT_END_NAMESPACE |
124 | |
125 | #endif |
126 | |