1// Copyright (C) 2022 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only
3
4#ifndef QTEXTTOSPEECHPROCESSOR_FLITE_H
5#define QTEXTTOSPEECHPROCESSOR_FLITE_H
6
7#include "qtexttospeechengine.h"
8#include "qvoice.h"
9
10#include <QtCore/QList>
11#include <QtCore/QMutex>
12#include <QtCore/QThread>
13#include <QtCore/QLibrary>
14#include <QtCore/QString>
15#include <QtCore/QBasicTimer>
16#include <QtCore/QTimerEvent>
17#include <QtCore/QAbstractEventDispatcher>
18#include <QtCore/QProcessEnvironment>
19#include <QtCore/QDateTime>
20#include <QtMultimedia/QAudioSink>
21#include <QtMultimedia/QMediaDevices>
22
23#include <flite/flite.h>
24
25QT_BEGIN_NAMESPACE
26
27class QTextToSpeechProcessorFlite : public QObject
28{
29 Q_OBJECT
30
31public:
32 QTextToSpeechProcessorFlite(const QAudioDevice &audioDevice);
33 ~QTextToSpeechProcessorFlite();
34
35 struct VoiceInfo
36 {
37 int id;
38 cst_voice *vox;
39 void (*unregister_func)(cst_voice *vox);
40 QString name;
41 QString locale;
42 QVoice::Gender gender;
43 QVoice::Age age;
44 };
45
46 Q_INVOKABLE void say(const QString &text, int voiceId, double pitch, double rate, double volume);
47 Q_INVOKABLE void synthesize(const QString &text, int voiceId, double pitch, double rate, double volume);
48 Q_INVOKABLE void pause();
49 Q_INVOKABLE void resume();
50 Q_INVOKABLE void stop();
51
52 const QList<QTextToSpeechProcessorFlite::VoiceInfo> &voices() const;
53 static constexpr QTextToSpeech::State audioStateToTts(QAudio::State audioState);
54
55private:
56 // Flite callbacks
57 static int audioOutputCb(const cst_wave *w, int start, int size,
58 int last, cst_audio_streaming_info *asi);
59 static int dataOutputCb(const cst_wave *w, int start, int size,
60 int last, cst_audio_streaming_info *asi);
61
62 using OutputHandler = decltype(QTextToSpeechProcessorFlite::audioOutputCb);
63 // Process a single text
64 void processText(const QString &text, int voiceId, double pitch, double rate, OutputHandler outputHandler);
65 int audioOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi);
66 int dataOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi);
67
68 void setRateForVoice(cst_voice *voice, float rate);
69 void setPitchForVoice(cst_voice *voice, float pitch);
70
71 bool init();
72 bool initAudio(double rate, int channelCount);
73 void deinitAudio();
74 bool checkFormat(const QAudioFormat &format);
75 bool checkVoice(int voiceId);
76 void deleteSink();
77 void createSink();
78 QAudio::State audioSinkState() const;
79 void setError(QTextToSpeech::ErrorReason err, const QString &errorString = QString());
80
81 // Read available flite voices
82 QStringList fliteAvailableVoices(const QString &libPrefix, const QString &langCode) const;
83
84private slots:
85 void changeState(QAudio::State newState);
86
87Q_SIGNALS:
88 void errorOccurred(QTextToSpeech::ErrorReason error, const QString &errorString);
89 void stateChanged(QTextToSpeech::State);
90 void sayingWord(const QString &word, qsizetype begin, qsizetype length);
91 void synthesized(const QAudioFormat &format, const QByteArray &array);
92
93protected:
94 void timerEvent(QTimerEvent *event) override;
95
96private:
97 struct TokenData {
98 qint64 startTime;
99 QString text;
100 };
101 QString m_text;
102 qsizetype m_index = -1;
103 QList<TokenData> m_tokens;
104 qsizetype m_currentToken = -1;
105 QBasicTimer m_tokenTimer;
106 void startTokenTimer();
107
108 QAudioSink *m_audioSink = nullptr;
109 QAudio::State m_state = QAudio::IdleState;
110 QIODevice *m_audioBuffer = nullptr;
111
112 QAudioDevice m_audioDevice;
113 QAudioFormat m_format;
114 double m_volume = 1;
115
116 QList<VoiceInfo> m_voices;
117
118 // Statistics for debugging
119 qint64 numberChunks = 0;
120 qint64 totalBytes = 0;
121};
122
123QT_END_NAMESPACE
124
125#endif
126

source code of qtspeech/src/plugins/tts/flite/qtexttospeech_flite_processor.h