// Copyright 2017 Mobvoi Inc. All Rights Reserved. // Author: shtxie@mobvoi.com (Shoutao Xie) #include "wav_utils.h" #include //#include #include #include #include //#include "sds/utils/log.h" using namespace std; namespace mobvoi { namespace sds { double GetWavDurationMs(const std::string& wav_file) { std::ifstream ifs(wav_file.c_str(), std::ios::binary | std::ios::ate); if (!ifs.good()) { std::cout << "Failed to read wav from path:" << wav_file; return 0; } ifs.seekg(0, std::ios::end); int64 file_size = ifs.tellg(); ifs.seekg(0, std::ios::beg); WavHeader header; ifs.read(reinterpret_cast(&header), sizeof(header)); return static_cast( file_size - sizeof(header)) / (header.samples_per_sec * header.channel_num * header.bits_per_sample / 8) * 1000; } bool ReadWavPcmData(const std::string& wav_file, std::vector* out) { out->clear(); std::ifstream ifs(wav_file.c_str(), std::ios::binary | std::ios::ate); if (!ifs.good()) { std::cout << "Failed to read wav from path: " << wav_file; return false; } int64 file_size = ifs.tellg(); int data_size = file_size; ifs.seekg(0, std::ios::beg); WavHeader header; ifs.read(reinterpret_cast(&header), sizeof(header)); if (header.riff[0] != 'R' || header.riff[1] != 'I' || header.riff[2] != 'F' || header.riff[3] != 'F') { // not wav, seek to begin. ifs.seekg(0, std::ios::beg); } else { data_size = file_size - sizeof(header); } out->resize(data_size); ifs.read(&(out->front()), data_size); return true; } void ConvertPCMToWAV(const std::string& pcm, std::string* wav, const int channel, const int rate, const int bit_depth) { int bytes_per_second = rate * bit_depth * channel / 8; int total = pcm.length(); WavHeader header; memcpy(header.riff, "RIFF", 4); header.chunk_size = total + 36; memcpy(header.wave, "WAVE", 4); memcpy(header.fmt, "fmt ", 4); // 'fmt ' chunk header.subchunk1_size = 16; // 4 bytes: size of 'fmt ' chunk header.audio_format = 1; header.channel_num = channel; header.samples_per_sec = rate; header.bytes_per_sec = bytes_per_second; header.block_align = bytes_per_second / rate; header.bits_per_sample = 16; memcpy(header.subchunk2_id, "data", 4); header.subchunk2_size = total; wav->append(reinterpret_cast(&header), sizeof(header)); wav->append(pcm); } AudioSender::AudioSender(int interval_ms, const std::string& audio_file, audio_send_func audio_handler) : interval_ms_(interval_ms), audio_handler_(audio_handler) { if (!ReadWavPcmData(audio_file.c_str(), &audio_bytes_)) { std::cout << "ERROR: failed to parse audio file:" << audio_file << std::endl; } } void AudioSender::StreamingSend() { // assume 16000 sample rate. const int kBatchSize = interval_ms_ * 32; const int kSleepTime = interval_ms_ * 1000; int pos = 0; int length = audio_bytes_.size(); const char* start = &audio_bytes_[0]; while (pos < length) { int stride = ((pos + kBatchSize) < length) ? kBatchSize : (length - pos); audio_handler_(start + pos, stride); if ((pos + kBatchSize) >= length) break; //usleep(kSleepTime); Sleep(kSleepTime);//////////winsdk pos += stride; } } void AudioSender::DirectlySend() { audio_handler_(&audio_bytes_[0], audio_bytes_.size()); } } // namespace sds } // namespace mobvoi