Call Center SDK  1.11.3
media_c_api.h
1 /* Copyright 2021 ID R&D Inc. All Rights Reserved. */
2 
3 #pragma once
4 
5 #include <stdint.h>
6 #include <stddef.h>
7 
8 #include <voicesdk/core/core_c_api.h>
9 
10 #ifdef __cplusplus
11 extern "C" {
12 #endif
13 
14 // forward declarations
15 typedef struct VoiceSdkSpeechSummaryEngine VoiceSdkSpeechSummaryEngine;
16 typedef struct VoiceSdkSpeechSummaryStream VoiceSdkSpeechSummaryStream;
17 typedef struct VoiceSdkSpeechSummaryStreamOpus VoiceSdkSpeechSummaryStreamOpus;
18 typedef struct VoiceSdkSnrComputer VoiceSdkSnrComputer;
19 typedef struct VoiceSdkSpeechEndpointDetector VoiceSdkSpeechEndpointDetector;
20 typedef struct VoiceSdkSpeechEndpointDetectorOpus VoiceSdkSpeechEndpointDetectorOpus;
21 typedef struct VoiceSdkQualityCheckEngine VoiceSdkQualityCheckEngine;
22 
23 // api objects
24 typedef struct VoiceSdkSpeechEvent {
28  bool is_voice;
29 
35 
36 typedef struct VoiceSdkSpeechInfo {
41 
46 
52 
53 DECL_TYPED_ARRAY(VoiceSdkSpeechEventArray, VoiceSdkSpeechEvent)
54 DECL_TYPED_ARRAY(VoiceSdkSpeechInfoArray, VoiceSdkSpeechInfo)
55 
56 typedef struct VoiceSdkSpeechSummary {
60  VoiceSdkSpeechEventArray speech_events;
61 
67 
68 typedef enum VoiceSdkQualityCheckShortDescription {
72  kVoiceSdkQualityCheckShortDescriptionTooNoisy = 0,
73 
77  kVoiceSdkQualityCheckShortDescriptionTooSmallSpeechLength = 1,
78 
82  kVoiceSdkQualityCheckShortDescriptionOk = 2,
83 
87  kVoiceSdkQualityCheckShortDescriptionTooSmallSpeechRelativeLength = 3,
88 
92  kVoiceSdkQualityCheckShortDescriptionMultipleSpeakersDetected = 4
93 } VoiceSdkQualityCheckShortDescription;
94 
95 typedef enum VoiceSdkQualityCheckScenario {
99  kVoiceSdkQualityCheckScenarioVerifyTiEnrollment = 0,
100 
104  kVoiceSdkQualityCheckScenarioVerifyTiVerification = 1,
105 
109  kVoiceSdkQualityCheckScenarioVerifyTdEnrollment = 2,
110 
114  kVoiceSdkQualityCheckScenarioVerifyTdVerification = 3,
115 
119  kVoiceSdkQualityCheckScenarioLiveness = 4
120 } VoiceSdkQualityCheckScenario;
121 
127 
132 
137 
143 
148  VoiceSdkQualityCheckShortDescription quality_check_short_description;
149 
153  float snr_db;
154 
159 
164 
170 
171 /************************
172  * Speech Summary calls *
173  ************************/
174 
192 VOICE_SDK_API
193 VoiceSdkSpeechSummaryEngine* VoiceSdkSpeechSummaryEngineCreate(const char* init_data_path, char** error_msg);
194 
208 VOICE_SDK_API
209 VoiceSdkSpeechSummary* VoiceSdkSpeechSummaryEngineGetSpeechSummaryFromBytes(const VoiceSdkSpeechSummaryEngine* engine,
210  const uint8_t* bytes, size_t bytes_num,
211  size_t sample_rate, char** error_msg);
212 
226 VOICE_SDK_API
227 VoiceSdkSpeechSummary* VoiceSdkSpeechSummaryEngineGetSpeechSummaryFromPcm16Samples(
228  const VoiceSdkSpeechSummaryEngine* engine, const int16_t* pcm16_samples, size_t samples_num, size_t sample_rate,
229  char** error_msg);
230 
245 VOICE_SDK_API
246 VoiceSdkSpeechSummary* VoiceSdkSpeechSummaryEngineGetSpeechSummaryFromFloatSamples(
247  const VoiceSdkSpeechSummaryEngine* engine, const float* float_samples, size_t samples_num, size_t sample_rate,
248  char** error_msg);
249 
261 VOICE_SDK_API
262 VoiceSdkSpeechSummary* VoiceSdkSpeechSummaryEngineGetSpeechSummaryFromFile(const VoiceSdkSpeechSummaryEngine* engine,
263  const char* audio_path, char** error_msg);
264 
270 VOICE_SDK_API
271 void VoiceSdkSpeechSummaryEngineRelease(VoiceSdkSpeechSummaryEngine* engine);
272 
273 
283 VOICE_SDK_API
284 void VoiceSdkSpeechSummaryRelease(VoiceSdkSpeechSummary* summary);
285 
286 
287 /*******************************
288  * SpeechSummaryStream calls *
289  *******************************/
290 
309 VOICE_SDK_API
310 VoiceSdkSpeechSummaryStream* VoiceSdkSpeechSummaryStreamCreate(const VoiceSdkSpeechSummaryEngine* engine,
311  size_t sample_rate, char** error_msg);
312 
324 VOICE_SDK_API
325 bool VoiceSdkSpeechSummaryStreamAddByteSamples(const VoiceSdkSpeechSummaryStream* stream, const uint8_t* bytes,
326  size_t bytes_num, char** error_msg);
327 
339 VOICE_SDK_API
340 bool VoiceSdkSpeechSummaryStreamAddPcm16Samples(const VoiceSdkSpeechSummaryStream* stream, const int16_t* pcm16_samples,
341  size_t samples_num, char** error_msg);
342 
354 VOICE_SDK_API
355 bool VoiceSdkSpeechSummaryStreamAddFloatSamples(const VoiceSdkSpeechSummaryStream* stream, const float* float_samples,
356  size_t samples_num, char** error_msg);
357 
358 
370 VOICE_SDK_API
371 bool VoiceSdkSpeechSummaryStreamGetCurrentBackgroundLength(const VoiceSdkSpeechSummaryStream* stream, float* result,
372  char** error_msg);
384 VOICE_SDK_API
385 bool VoiceSdkSpeechSummaryStreamHasSpeechEvents(const VoiceSdkSpeechSummaryStream* stream, bool* result,
386  char** error_msg);
387 
399 VOICE_SDK_API
400 bool VoiceSdkSpeechSummaryStreamGetSpeechEvent(const VoiceSdkSpeechSummaryStream* stream, VoiceSdkSpeechEvent* result,
401  char** error_msg);
402 
413 VOICE_SDK_API
414 VoiceSdkSpeechSummary* VoiceSdkSpeechSummaryStreamGetTotalSpeechSummary(const VoiceSdkSpeechSummaryStream* stream,
415  char** error_msg);
416 
427 VOICE_SDK_API
428 bool VoiceSdkSpeechSummaryStreamGetTotalSpeechInfo(const VoiceSdkSpeechSummaryStream* stream,
429  VoiceSdkSpeechInfo* result, char** error_msg);
430 
441 VOICE_SDK_API
442 bool VoiceSdkSpeechSummaryStreamReset(const VoiceSdkSpeechSummaryStream* stream, char** error_msg);
443 
454 VOICE_SDK_API
455 bool VoiceSdkSpeechSummaryStreamFinalize(const VoiceSdkSpeechSummaryStream* stream, char** error_msg);
456 
462 VOICE_SDK_API
463 void VoiceSdkSpeechSummaryStreamRelease(VoiceSdkSpeechSummaryStream* stream);
464 
465 /***********************************
466  * SpeechSummaryStreamOpus calls *
467  ***********************************/
468 
488 VOICE_SDK_API
489 VoiceSdkSpeechSummaryStreamOpus* VoiceSdkSpeechSummaryStreamOpusCreate(const char* init_data_path, size_t sample_rate,
490  char** error_msg);
491 
504 VOICE_SDK_API
505 bool VoiceSdkSpeechSummaryStreamOpusAddPacket(const VoiceSdkSpeechSummaryStreamOpus* stream, const uint8_t* bytes,
506  size_t bytes_num, char** error_msg);
507 
520 VOICE_SDK_API
521 bool VoiceSdkSpeechSummaryStreamOpusGetCurrentBackgroundLength(const VoiceSdkSpeechSummaryStreamOpus* stream,
522  float* result, char** error_msg);
535 VOICE_SDK_API
536 bool VoiceSdkSpeechSummaryStreamOpusHasSpeechEvents(const VoiceSdkSpeechSummaryStreamOpus* stream, bool* result,
537  char** error_msg);
538 
551 VOICE_SDK_API
552 bool VoiceSdkSpeechSummaryStreamOpusGetSpeechEvent(const VoiceSdkSpeechSummaryStreamOpus* stream,
553  VoiceSdkSpeechEvent* result, char** error_msg);
554 
566 VOICE_SDK_API
567 VoiceSdkSpeechSummary* VoiceSdkSpeechSummaryStreamOpusGetTotalSpeechSummary(
568  const VoiceSdkSpeechSummaryStreamOpus* stream, char** error_msg);
569 
580 VOICE_SDK_API
581 bool VoiceSdkSpeechSummaryStreamOpusGetTotalSpeechInfo(const VoiceSdkSpeechSummaryStreamOpus* stream,
582  VoiceSdkSpeechInfo* result, char** error_msg);
583 
594 VOICE_SDK_API
595 bool VoiceSdkSpeechSummaryStreamOpusReset(const VoiceSdkSpeechSummaryStreamOpus* stream, char** error_msg);
596 
607 VOICE_SDK_API
608 bool VoiceSdkSpeechSummaryStreamOpusFinalize(const VoiceSdkSpeechSummaryStreamOpus* stream, char** error_msg);
609 
615 VOICE_SDK_API
616 void VoiceSdkSpeechSummaryStreamOpusRelease(VoiceSdkSpeechSummaryStreamOpus* stream);
617 
618 /************************
619  * SNR Computer calls *
620  ************************/
621 
637 VOICE_SDK_API
638 VoiceSdkSnrComputer* VoiceSdkSnrComputerCreate(const char* init_data_path, char** error_msg);
639 
653 VOICE_SDK_API
654 bool VoiceSdkSnrComputerComputeFromBytes(const VoiceSdkSnrComputer* engine, const uint8_t* bytes, size_t bytes_num,
655  size_t sample_rate, float* result, char** error_msg);
656 
670 VOICE_SDK_API
671 bool VoiceSdkSnrComputerComputeFromPcm16Samples(const VoiceSdkSnrComputer* engine, const int16_t* pcm16_samples,
672  size_t samples_num, size_t sample_rate, float* result,
673  char** error_msg);
674 
689 VOICE_SDK_API
690 bool VoiceSdkSnrComputerComputeFromFloatSamples(const VoiceSdkSnrComputer* engine, const float* float_samples,
691  size_t samples_num, size_t sample_rate, float* result,
692  char** error_msg);
693 
704 VOICE_SDK_API
705 bool VoiceSdkSnrComputerComputeFromFile(const VoiceSdkSnrComputer* engine, const char* audio_path, float* result,
706  char** error_msg);
707 
713 VOICE_SDK_API
714 void VoiceSdkSnrComputerRelease(VoiceSdkSnrComputer* engine);
715 
716 /********************************
717  * SpeechEndpointDetector calls *
718  ********************************/
719 
736 VOICE_SDK_API
737 VoiceSdkSpeechEndpointDetector* VoiceSdkSpeechEndpointDetectorCreate(size_t min_speech_length_ms,
738  size_t max_silence_length_ms, size_t sample_rate,
739  char** error_msg);
740 
752 VOICE_SDK_API
753 bool VoiceSdkSpeechEndpointDetectorAddByteSamples(const VoiceSdkSpeechEndpointDetector* detector, const uint8_t* bytes,
754  size_t bytes_num, char** error_msg);
755 
768 VOICE_SDK_API
769 bool VoiceSdkSpeechEndpointDetectorAddPcm16Samples(const VoiceSdkSpeechEndpointDetector* detector,
770  const int16_t* pcm16_samples, size_t samples_num, char** error_msg);
771 
784 VOICE_SDK_API
785 bool VoiceSdkSpeechEndpointDetectorAddFloatSamples(const VoiceSdkSpeechEndpointDetector* detector,
786  const float* float_samples, size_t samples_num, char** error_msg);
787 
798 VOICE_SDK_API
799 bool VoiceSdkSpeechEndpointDetectorIsSpeechEnded(const VoiceSdkSpeechEndpointDetector* detector, bool* result,
800  char** error_msg);
801 
812 VOICE_SDK_API
813 bool VoiceSdkSpeechEndpointDetectorReset(const VoiceSdkSpeechEndpointDetector* detector, char** error_msg);
814 
820 VOICE_SDK_API
821 void VoiceSdkSpeechEndpointDetectorRelease(VoiceSdkSpeechEndpointDetector* detector);
822 
823 /************************************
824  * SpeechEndpointDetectorOpus calls *
825  ************************************/
826 
843 VOICE_SDK_API
844 VoiceSdkSpeechEndpointDetectorOpus* VoiceSdkSpeechEndpointDetectorOpusCreate(size_t min_speech_length_ms,
845  size_t max_silence_length_ms,
846  size_t sample_rate, char** error_msg);
847 
860 VOICE_SDK_API
861 bool VoiceSdkSpeechEndpointDetectorOpusAddPacket(const VoiceSdkSpeechEndpointDetectorOpus* detector,
862  const uint8_t* bytes, size_t bytes_num, char** error_msg);
874 VOICE_SDK_API
875 bool VoiceSdkSpeechEndpointDetectorOpusIsSpeechEnded(const VoiceSdkSpeechEndpointDetectorOpus* detector, bool* result,
876  char** error_msg);
877 
889 VOICE_SDK_API
890 bool VoiceSdkSpeechEndpointDetectorOpusReset(const VoiceSdkSpeechEndpointDetectorOpus* detector, char** error_msg);
891 
897 VOICE_SDK_API
898 void VoiceSdkSpeechEndpointDetectorOpusRelease(VoiceSdkSpeechEndpointDetectorOpus* detector);
899 
900 /************************************
901  * VoiceSdkQualityCheckEngine calls *
902  ************************************/
903 
920 VOICE_SDK_API
921 VoiceSdkQualityCheckEngine* VoiceSdkQualityCheckEngineCreate(const char* init_data_path, VoiceSdkErrorCode* error_code,
922  char** error_msg);
923 
938 VOICE_SDK_API
939 bool VoiceSdkQualityCheckEngineGetRecommendedThresholds(const VoiceSdkQualityCheckEngine* engine,
940  VoiceSdkQualityCheckScenario scenario,
942  VoiceSdkErrorCode* error_code, char** error_msg);
943 
954 VOICE_SDK_API
955 VoiceSdkQualityCheckEngineResult* VoiceSdkQualityCheckEngineCheckQualityAudioFile(
956  const VoiceSdkQualityCheckEngine* engine, const char* audio_path,
957  VoiceSdkQualityCheckMetricsThresholds thresholds, VoiceSdkErrorCode* error_code, char** error_msg);
958 
971 VOICE_SDK_API
972 VoiceSdkQualityCheckEngineResult* VoiceSdkQualityCheckEngineCheckQualityFloatSamples(
973  const VoiceSdkQualityCheckEngine* engine, const float* float_samples, size_t num_samples, size_t sample_rate,
974  VoiceSdkQualityCheckMetricsThresholds thresholds, VoiceSdkErrorCode* error_code, char** error_msg);
975 
988 VOICE_SDK_API
989 VoiceSdkQualityCheckEngineResult* VoiceSdkQualityCheckEngineCheckQualityPcm16Samples(
990  const VoiceSdkQualityCheckEngine* engine, const int16_t* samples, size_t num_samples, size_t sample_rate,
991  VoiceSdkQualityCheckMetricsThresholds thresholds, VoiceSdkErrorCode* error_code, char** error_msg);
992 
1005 VOICE_SDK_API
1006 VoiceSdkQualityCheckEngineResult* VoiceSdkQualityCheckEngineCheckQualityBytes(
1007  const VoiceSdkQualityCheckEngine* engine, const uint8_t* bytes, size_t num_bytes, size_t sample_rate,
1008  VoiceSdkQualityCheckMetricsThresholds thresholds, VoiceSdkErrorCode* error_code, char** error_msg);
1009 
1014 VOICE_SDK_API
1015 void VoiceSdkQualityCheckEngineResultRelease(VoiceSdkQualityCheckEngineResult* check_result);
1016 
1021 VOICE_SDK_API
1022 void VoiceSdkQualityCheckEngineRelease(VoiceSdkQualityCheckEngine* engine);
1023 
1024 #ifdef __cplusplus
1025 }
1026 #endif
Definition: core_c_api.h:73
Definition: media_c_api.h:144
VoiceSdkQualityCheckShortDescription quality_check_short_description
Short description of the quality check results.
Definition: media_c_api.h:148
float speech_length_ms
Speech length metric value obtained on quality check in milliseconds.
Definition: media_c_api.h:158
float speech_relative_length
Speech relative length (speech length relative to the total audio length) metric value obtained on qu...
Definition: media_c_api.h:163
float multiple_speakers_detector_score
Multiple speakers detector score value obtained on quality check.
Definition: media_c_api.h:168
float snr_db
SNR metric value obtained on quality check in Db.
Definition: media_c_api.h:153
Definition: media_c_api.h:122
float minimum_snr_db
Minimum signal-to-noise ratio required to pass quality check in dB.
Definition: media_c_api.h:126
float minimum_speech_length_ms
Minimum speech length required to pass quality check in milliseconds.
Definition: media_c_api.h:131
float minimum_speech_relative_length
Minimum speech relative length (speech length relative to the total audio length) required to pass qu...
Definition: media_c_api.h:136
float maximum_multiple_speakers_detector_score
Maximum multiple speakers detector score allowed to pass quality check.
Definition: media_c_api.h:141
Definition: media_c_api.h:24
bool is_voice
Whether the interval contains speech or not.
Definition: media_c_api.h:28
VoiceSdkAudioInterval audio_interval
Speech event audio interval.
Definition: media_c_api.h:33
Definition: media_c_api.h:36
float total_length_ms
Processed audio total length (totalLengthMs = speechLengthMs + backgroundLengthMs) in milliseconds.
Definition: media_c_api.h:50
float background_length_ms
Non-speech signal length in milliseconds.
Definition: media_c_api.h:45
float speech_length_ms
Speech signal length in milliseconds.
Definition: media_c_api.h:40
Definition: media_c_api.h:56
VoiceSdkSpeechEventArray speech_events
Contains audio intervals marked as speech or non-speech.
Definition: media_c_api.h:60
VoiceSdkSpeechInfo speech_info
Contains speech statistics.
Definition: media_c_api.h:65