8 #include <voicesdk/core/core_c_api.h>
15 typedef struct VoiceSdkSpeechSummaryEngine VoiceSdkSpeechSummaryEngine;
16 typedef struct VoiceSdkSpeechSummaryStream VoiceSdkSpeechSummaryStream;
17 typedef struct VoiceSdkSpeechSummaryStreamOpus VoiceSdkSpeechSummaryStreamOpus;
18 typedef struct VoiceSdkSnrComputer VoiceSdkSnrComputer;
19 typedef struct VoiceSdkSpeechEndpointDetector VoiceSdkSpeechEndpointDetector;
20 typedef struct VoiceSdkSpeechEndpointDetectorOpus VoiceSdkSpeechEndpointDetectorOpus;
21 typedef struct VoiceSdkQualityCheckEngine VoiceSdkQualityCheckEngine;
68 typedef enum VoiceSdkQualityCheckShortDescription {
72 kVoiceSdkQualityCheckShortDescriptionTooNoisy = 0,
77 kVoiceSdkQualityCheckShortDescriptionTooSmallSpeechLength = 1,
82 kVoiceSdkQualityCheckShortDescriptionOk = 2,
87 kVoiceSdkQualityCheckShortDescriptionTooSmallSpeechRelativeLength = 3,
92 kVoiceSdkQualityCheckShortDescriptionMultipleSpeakersDetected = 4
93 } VoiceSdkQualityCheckShortDescription;
95 typedef enum VoiceSdkQualityCheckScenario {
99 kVoiceSdkQualityCheckScenarioVerifyTiEnrollment = 0,
104 kVoiceSdkQualityCheckScenarioVerifyTiVerification = 1,
109 kVoiceSdkQualityCheckScenarioVerifyTdEnrollment = 2,
114 kVoiceSdkQualityCheckScenarioVerifyTdVerification = 3,
119 kVoiceSdkQualityCheckScenarioLiveness = 4
120 } VoiceSdkQualityCheckScenario;
193 VoiceSdkSpeechSummaryEngine* VoiceSdkSpeechSummaryEngineCreate(
const char* init_data_path,
char** error_msg);
209 VoiceSdkSpeechSummary* VoiceSdkSpeechSummaryEngineGetSpeechSummaryFromBytes(
const VoiceSdkSpeechSummaryEngine* engine,
210 const uint8_t* bytes,
size_t bytes_num,
211 size_t sample_rate,
char** error_msg);
228 const VoiceSdkSpeechSummaryEngine* engine,
const int16_t* pcm16_samples,
size_t samples_num,
size_t sample_rate,
247 const VoiceSdkSpeechSummaryEngine* engine,
const float* float_samples,
size_t samples_num,
size_t sample_rate,
262 VoiceSdkSpeechSummary* VoiceSdkSpeechSummaryEngineGetSpeechSummaryFromFile(
const VoiceSdkSpeechSummaryEngine* engine,
263 const char* audio_path,
char** error_msg);
271 void VoiceSdkSpeechSummaryEngineRelease(VoiceSdkSpeechSummaryEngine* engine);
310 VoiceSdkSpeechSummaryStream* VoiceSdkSpeechSummaryStreamCreate(
const VoiceSdkSpeechSummaryEngine* engine,
311 size_t sample_rate,
char** error_msg);
325 bool VoiceSdkSpeechSummaryStreamAddByteSamples(
const VoiceSdkSpeechSummaryStream* stream,
const uint8_t* bytes,
326 size_t bytes_num,
char** error_msg);
340 bool VoiceSdkSpeechSummaryStreamAddPcm16Samples(
const VoiceSdkSpeechSummaryStream* stream,
const int16_t* pcm16_samples,
341 size_t samples_num,
char** error_msg);
355 bool VoiceSdkSpeechSummaryStreamAddFloatSamples(
const VoiceSdkSpeechSummaryStream* stream,
const float* float_samples,
356 size_t samples_num,
char** error_msg);
371 bool VoiceSdkSpeechSummaryStreamGetCurrentBackgroundLength(
const VoiceSdkSpeechSummaryStream* stream,
float* result,
385 bool VoiceSdkSpeechSummaryStreamHasSpeechEvents(
const VoiceSdkSpeechSummaryStream* stream,
bool* result,
400 bool VoiceSdkSpeechSummaryStreamGetSpeechEvent(
const VoiceSdkSpeechSummaryStream* stream,
VoiceSdkSpeechEvent* result,
414 VoiceSdkSpeechSummary* VoiceSdkSpeechSummaryStreamGetTotalSpeechSummary(
const VoiceSdkSpeechSummaryStream* stream,
428 bool VoiceSdkSpeechSummaryStreamGetTotalSpeechInfo(
const VoiceSdkSpeechSummaryStream* stream,
442 bool VoiceSdkSpeechSummaryStreamReset(
const VoiceSdkSpeechSummaryStream* stream,
char** error_msg);
455 bool VoiceSdkSpeechSummaryStreamFinalize(
const VoiceSdkSpeechSummaryStream* stream,
char** error_msg);
463 void VoiceSdkSpeechSummaryStreamRelease(VoiceSdkSpeechSummaryStream* stream);
489 VoiceSdkSpeechSummaryStreamOpus* VoiceSdkSpeechSummaryStreamOpusCreate(
const char* init_data_path,
size_t sample_rate,
505 bool VoiceSdkSpeechSummaryStreamOpusAddPacket(
const VoiceSdkSpeechSummaryStreamOpus* stream,
const uint8_t* bytes,
506 size_t bytes_num,
char** error_msg);
521 bool VoiceSdkSpeechSummaryStreamOpusGetCurrentBackgroundLength(
const VoiceSdkSpeechSummaryStreamOpus* stream,
522 float* result,
char** error_msg);
536 bool VoiceSdkSpeechSummaryStreamOpusHasSpeechEvents(
const VoiceSdkSpeechSummaryStreamOpus* stream,
bool* result,
552 bool VoiceSdkSpeechSummaryStreamOpusGetSpeechEvent(
const VoiceSdkSpeechSummaryStreamOpus* stream,
568 const VoiceSdkSpeechSummaryStreamOpus* stream,
char** error_msg);
581 bool VoiceSdkSpeechSummaryStreamOpusGetTotalSpeechInfo(
const VoiceSdkSpeechSummaryStreamOpus* stream,
595 bool VoiceSdkSpeechSummaryStreamOpusReset(
const VoiceSdkSpeechSummaryStreamOpus* stream,
char** error_msg);
608 bool VoiceSdkSpeechSummaryStreamOpusFinalize(
const VoiceSdkSpeechSummaryStreamOpus* stream,
char** error_msg);
616 void VoiceSdkSpeechSummaryStreamOpusRelease(VoiceSdkSpeechSummaryStreamOpus* stream);
638 VoiceSdkSnrComputer* VoiceSdkSnrComputerCreate(
const char* init_data_path,
char** error_msg);
654 bool VoiceSdkSnrComputerComputeFromBytes(
const VoiceSdkSnrComputer* engine,
const uint8_t* bytes,
size_t bytes_num,
655 size_t sample_rate,
float* result,
char** error_msg);
671 bool VoiceSdkSnrComputerComputeFromPcm16Samples(
const VoiceSdkSnrComputer* engine,
const int16_t* pcm16_samples,
672 size_t samples_num,
size_t sample_rate,
float* result,
690 bool VoiceSdkSnrComputerComputeFromFloatSamples(
const VoiceSdkSnrComputer* engine,
const float* float_samples,
691 size_t samples_num,
size_t sample_rate,
float* result,
705 bool VoiceSdkSnrComputerComputeFromFile(
const VoiceSdkSnrComputer* engine,
const char* audio_path,
float* result,
714 void VoiceSdkSnrComputerRelease(VoiceSdkSnrComputer* engine);
737 VoiceSdkSpeechEndpointDetector* VoiceSdkSpeechEndpointDetectorCreate(
size_t min_speech_length_ms,
738 size_t max_silence_length_ms,
size_t sample_rate,
753 bool VoiceSdkSpeechEndpointDetectorAddByteSamples(
const VoiceSdkSpeechEndpointDetector* detector,
const uint8_t* bytes,
754 size_t bytes_num,
char** error_msg);
769 bool VoiceSdkSpeechEndpointDetectorAddPcm16Samples(
const VoiceSdkSpeechEndpointDetector* detector,
770 const int16_t* pcm16_samples,
size_t samples_num,
char** error_msg);
785 bool VoiceSdkSpeechEndpointDetectorAddFloatSamples(
const VoiceSdkSpeechEndpointDetector* detector,
786 const float* float_samples,
size_t samples_num,
char** error_msg);
799 bool VoiceSdkSpeechEndpointDetectorIsSpeechEnded(
const VoiceSdkSpeechEndpointDetector* detector,
bool* result,
813 bool VoiceSdkSpeechEndpointDetectorReset(
const VoiceSdkSpeechEndpointDetector* detector,
char** error_msg);
821 void VoiceSdkSpeechEndpointDetectorRelease(VoiceSdkSpeechEndpointDetector* detector);
844 VoiceSdkSpeechEndpointDetectorOpus* VoiceSdkSpeechEndpointDetectorOpusCreate(
size_t min_speech_length_ms,
845 size_t max_silence_length_ms,
846 size_t sample_rate,
char** error_msg);
861 bool VoiceSdkSpeechEndpointDetectorOpusAddPacket(
const VoiceSdkSpeechEndpointDetectorOpus* detector,
862 const uint8_t* bytes,
size_t bytes_num,
char** error_msg);
875 bool VoiceSdkSpeechEndpointDetectorOpusIsSpeechEnded(
const VoiceSdkSpeechEndpointDetectorOpus* detector,
bool* result,
890 bool VoiceSdkSpeechEndpointDetectorOpusReset(
const VoiceSdkSpeechEndpointDetectorOpus* detector,
char** error_msg);
898 void VoiceSdkSpeechEndpointDetectorOpusRelease(VoiceSdkSpeechEndpointDetectorOpus* detector);
921 VoiceSdkQualityCheckEngine* VoiceSdkQualityCheckEngineCreate(
const char* init_data_path, VoiceSdkErrorCode* error_code,
939 bool VoiceSdkQualityCheckEngineGetRecommendedThresholds(
const VoiceSdkQualityCheckEngine* engine,
940 VoiceSdkQualityCheckScenario scenario,
942 VoiceSdkErrorCode* error_code,
char** error_msg);
956 const VoiceSdkQualityCheckEngine* engine,
const char* audio_path,
973 const VoiceSdkQualityCheckEngine* engine,
const float* float_samples,
size_t num_samples,
size_t sample_rate,
990 const VoiceSdkQualityCheckEngine* engine,
const int16_t* samples,
size_t num_samples,
size_t sample_rate,
1007 const VoiceSdkQualityCheckEngine* engine,
const uint8_t* bytes,
size_t num_bytes,
size_t sample_rate,
1022 void VoiceSdkQualityCheckEngineRelease(VoiceSdkQualityCheckEngine* engine);
Definition: core_c_api.h:73
Definition: media_c_api.h:144
VoiceSdkQualityCheckShortDescription quality_check_short_description
Short description of the quality check results.
Definition: media_c_api.h:148
float speech_length_ms
Speech length metric value obtained on quality check in milliseconds.
Definition: media_c_api.h:158
float speech_relative_length
Speech relative length (speech length relative to the total audio length) metric value obtained on qu...
Definition: media_c_api.h:163
float multiple_speakers_detector_score
Multiple speakers detector score value obtained on quality check.
Definition: media_c_api.h:168
float snr_db
SNR metric value obtained on quality check in Db.
Definition: media_c_api.h:153
Definition: media_c_api.h:122
float minimum_snr_db
Minimum signal-to-noise ratio required to pass quality check in dB.
Definition: media_c_api.h:126
float minimum_speech_length_ms
Minimum speech length required to pass quality check in milliseconds.
Definition: media_c_api.h:131
float minimum_speech_relative_length
Minimum speech relative length (speech length relative to the total audio length) required to pass qu...
Definition: media_c_api.h:136
float maximum_multiple_speakers_detector_score
Maximum multiple speakers detector score allowed to pass quality check.
Definition: media_c_api.h:141
Definition: media_c_api.h:24
bool is_voice
Whether the interval contains speech or not.
Definition: media_c_api.h:28
VoiceSdkAudioInterval audio_interval
Speech event audio interval.
Definition: media_c_api.h:33
Definition: media_c_api.h:36
float total_length_ms
Processed audio total length (totalLengthMs = speechLengthMs + backgroundLengthMs) in milliseconds.
Definition: media_c_api.h:50
float background_length_ms
Non-speech signal length in milliseconds.
Definition: media_c_api.h:45
float speech_length_ms
Speech signal length in milliseconds.
Definition: media_c_api.h:40
Definition: media_c_api.h:56
VoiceSdkSpeechEventArray speech_events
Contains audio intervals marked as speech or non-speech.
Definition: media_c_api.h:60
VoiceSdkSpeechInfo speech_info
Contains speech statistics.
Definition: media_c_api.h:65