Call Center SDK  1.11.3
diarization.h
1 /* Copyright 2021 ID R&D Inc. All Rights Reserved. */
2 
3 #pragma once
4 
5 #include <string>
6 #include <vector>
7 #include <memory>
8 #include <map>
9 #include <voicesdk/core/common/intervals.h>
10 
11 namespace voicesdk {
12 namespace diar {
13 
18 using TimeStamps = std::map<size_t, std::vector<AudioInterval>>;
19 
23 class VOICE_SDK_API DiarizationEngine {
24 public:
25  using Ptr = std::shared_ptr<DiarizationEngine>;
26 
34  static DiarizationEngine::Ptr Create(const std::string& init_dir);
35 
36  virtual ~DiarizationEngine() {}
37 
38 
51  virtual TimeStamps GetSegmentation(const float* float_samples, const size_t samples_num, const size_t sample_rate,
52  const size_t num_speakers = 0) = 0;
53 
66  virtual TimeStamps GetSegmentation(const int16_t* pcm16_samples, const size_t samples_num, const size_t sample_rate,
67  const size_t num_speakers = 0) = 0;
68 
81  virtual TimeStamps GetSegmentation(const uint8_t* pcm16_bytes, const size_t bytes_num, const size_t sample_rate,
82  const size_t num_speakers = 0) = 0;
83 
94  virtual TimeStamps GetSegmentation(const std::string& audio_path, const size_t num_speakers = 0) = 0;
95 };
96 
97 } // namespace diar
98 } // namespace voicesdk
Diarization engine class (interface), intended to perform speaker diarization.
Definition: diarization.h:23
virtual TimeStamps GetSegmentation(const std::string &audio_path, const size_t num_speakers=0)=0
Performs speaker diarization from the given audio file.
virtual TimeStamps GetSegmentation(const float *float_samples, const size_t samples_num, const size_t sample_rate, const size_t num_speakers=0)=0
Performs speaker diarization from the given float (in [-1; 1] range) audio samples.
virtual TimeStamps GetSegmentation(const uint8_t *pcm16_bytes, const size_t bytes_num, const size_t sample_rate, const size_t num_speakers=0)=0
Performs speaker diarization using given PCM16 samples bytes representation.
static DiarizationEngine::Ptr Create(const std::string &init_dir)
Creates DiarizationEngine instance.
virtual TimeStamps GetSegmentation(const int16_t *pcm16_samples, const size_t samples_num, const size_t sample_rate, const size_t num_speakers=0)=0
Performs speaker diarization from the given PCM16 audio samples.