35 <<
"isVoice:" << obj.
isVoice <<
", "
109 os <<
"SpeechSummary["
110 <<
"speechEvents.size():" << obj.
speechEvents.size() <<
", "
129 using Ptr = std::shared_ptr<SpeechSummaryStream>;
182 const uint8_t *bytes,
183 size_t bytesNum) = 0;
192 const int16_t *pcm16Samples,
193 size_t samplesNum) = 0;
202 const float *floatSamples,
203 size_t samplesNum) = 0;
220 using Ptr = std::shared_ptr<SpeechSummaryStreamOpus>;
284 const uint8_t* bytes,
285 size_t bytesNum) = 0;
301 using Ptr = std::shared_ptr<SpeechSummaryEngine>;
323 const uint8_t *bytes,
325 size_t sampleRate) = 0;
336 const int16_t *pcm16Samples,
338 size_t sampleRate) = 0;
349 const float *floatSamples,
351 size_t sampleRate) = 0;
360 const std::string& audioFile) = 0;
Speech summary engine class (interface), intended to calculate SpeechSummary with given audio samples...
Definition: speech_summary.h:298
virtual SpeechSummary getSpeechSummary(const float *floatSamples, size_t samplesNum, size_t sampleRate)=0
Calculates speech summary with given float audio samples (in [-1; 1] range)
virtual ~SpeechSummaryEngine()=default
std::shared_ptr< SpeechSummaryEngine > Ptr
Definition: speech_summary.h:301
static SpeechSummaryEngine::Ptr create(const std::string &initPath="")
Creates SpeechSummaryEngine instance.
virtual SpeechSummary getSpeechSummary(const int16_t *pcm16Samples, size_t samplesNum, size_t sampleRate)=0
Calculates speech summary with given PCM16 audio samples.
virtual SpeechSummary getSpeechSummary(const std::string &audioFile)=0
Calculates speech summary with given audio file.
virtual SpeechSummaryStream::Ptr createStream(int sampleRate)=0
Factory method for creating SpeechSummaryStream.
virtual SpeechSummary getSpeechSummary(const uint8_t *bytes, size_t bytesNum, size_t sampleRate)=0
Calculates speech summary with given PCM16 audio samples.
Class for online Opus audio stream processing (voice activity detection and speech statistics computa...
Definition: speech_summary.h:217
static SpeechSummaryStreamOpus::Ptr create(const std::string &initPath, size_t sampleRate)
Creates SpeechSummaryStreamOpus instance.
std::shared_ptr< SpeechSummaryStreamOpus > Ptr
Definition: speech_summary.h:220
virtual SpeechEvent getSpeechEvent()=0
Retrieves a single speech event from output FIFO queue. Use hasSpeechEvents() to check if there is av...
virtual void reset()=0
Resets stream state: clears buffer, resets speech summary.
virtual SpeechInfo getTotalSpeechInfo() const =0
Retrieves total accumulated speech info.
virtual float getCurrentBackgroundLength() const =0
Method for retrieving current background length (length of a continuous non-speech segment starting o...
virtual bool hasSpeechEvents() const =0
Checks if there are available speech events in output queue.
virtual SpeechSummary getTotalSpeechSummary() const =0
Retrieves total accumulated speech summary.
virtual bool compare(const SpeechSummaryStreamOpus::Ptr &other) const =0
Check if resulting states of this and another stream are equal.
virtual ~SpeechSummaryStreamOpus()=default
virtual void addPacket(const uint8_t *bytes, size_t bytesNum)=0
Adds Opus packet to process.
virtual void finalize()=0
Finalizes input audio stream to process remaining audio samples and produce result if it's possible.
Class for online audio stream processing (voice activity detection and speech statistics computation)
Definition: speech_summary.h:126
virtual ~SpeechSummaryStream()=default
virtual SpeechSummary getTotalSpeechSummary() const =0
Retrieves total accumulated speech summary.
virtual bool hasSpeechEvents() const =0
Checks if there are available speech events in output queue.
virtual void reset()=0
Resets stream state: clears buffer, resets speech summary.
virtual bool compare(const SpeechSummaryStream::Ptr &other) const =0
Check if resulting states of this and another stream are equal.
std::shared_ptr< SpeechSummaryStream > Ptr
Definition: speech_summary.h:129
virtual float getCurrentBackgroundLength() const =0
Method for retrieving current background length (length of a continuous non-speech segment starting o...
virtual SpeechInfo getTotalSpeechInfo() const =0
Retrieves total accumulated speech info.
virtual void addSamples(const uint8_t *bytes, size_t bytesNum)=0
Adds PCM16 audio samples to process.
virtual void addSamples(const int16_t *pcm16Samples, size_t samplesNum)=0
Adds PCM16 audio samples to process.
virtual SpeechEvent getSpeechEvent()=0
Retrieves a single speech event from output FIFO queue. Use hasSpeechEvents() to check if there is av...
virtual void finalize()=0
Finalizes input audio stream to process remaining audio samples and produce result if it's possible.
virtual void addSamples(const float *floatSamples, size_t samplesNum)=0
Adds float audio samples (in [-1; 1] range) to process.
#define VOICE_SDK_API
Definition: config.h:21
Definition: intervals.h:8
Structure representing interval of audio data.
Definition: intervals.h:57
Definition: speech_summary.h:17
friend std::ostream & operator<<(std::ostream &os, const SpeechEvent &obj)
Definition: speech_summary.h:33
SpeechEvent(bool isVoice, const AudioInterval &audioInterval)
Definition: speech_summary.h:30
bool isVoice
Whether the interval contains speech or not.
Definition: speech_summary.h:21
bool operator==(const SpeechEvent &other) const
Definition: speech_summary.h:40
AudioInterval audioInterval
Speech event audio interval.
Definition: speech_summary.h:26
Definition: speech_summary.h:46
friend std::ostream & operator<<(std::ostream &os, const SpeechInfo &obj)
Definition: speech_summary.h:70
SpeechInfo(float speechLengthMs, float backgroundLengthMs, float totalLengthMs)
Definition: speech_summary.h:64
bool operator==(const SpeechInfo &other) const
Definition: speech_summary.h:78
float speechLengthMs
Speech signal length in milliseconds.
Definition: speech_summary.h:50
float backgroundLengthMs
Non-speech signal length in milliseconds.
Definition: speech_summary.h:55
float totalLengthMs
Processed audio total length (totalLengthMs = speechLengthMs + backgroundLengthMs) in milliseconds.
Definition: speech_summary.h:60
Structure containing speech statistics and audio intervals marked as speech or non-speech.
Definition: speech_summary.h:90
bool operator==(const SpeechSummary &other) const
Definition: speech_summary.h:115
friend std::ostream & operator<<(std::ostream &os, const SpeechSummary &obj)
Definition: speech_summary.h:108
SpeechInfo speechInfo
Contains speech statistics.
Definition: speech_summary.h:99
std::vector< SpeechEvent > speechEvents
Contains audio intervals marked as speech or non-speech.
Definition: speech_summary.h:94
SpeechSummary(const std::vector< SpeechEvent > &speechEvents, const SpeechInfo &speechInfo)
Definition: speech_summary.h:103