几个关键点:
1、声卡采播,是采集声卡环回声音,也就是本机电脑播放音乐的声音,同时要能播放到广播;
来源: 使用Core Audio实现对声卡输出的捕捉 涉及的接口有: IMMDeviceEnumerator IMMDevice IAudioClient IAudioCaptureClient 主要过程: 创建多媒体设备枚举器(IMMDeviceEnumerator) 通过多媒体设备枚举器获取声卡接口(IMMDevice) 通过声卡接口获取声卡客户端接口(IAudioClient) 通过声卡客户端接口(IAudioClient)可获取声卡输出的音频参数、初始化声卡、获取声卡输出缓冲区的大小、开启/停止对声卡输出的采集 通过声卡采集客户端接口(IAudioCaptureClient)可获取采集的声卡输出数据,并对内部缓冲区进行控制
deepseek给的代码:
#include <windows.h>
#include <mmdeviceapi.h>
#include <audioclient.h>
#include <functiondiscoverykeys_devpkey.h>
#include <iostream>
#include <fstream>
// 释放COM接口的辅助函数
template <class T> void SafeRelease(T **ppT)
{
if (*ppT)
{
(*ppT)->Release();
*ppT = NULL;
}
}
class AudioCapture {
private:
IMMDeviceEnumerator *pEnumerator;
IMMDevice *pDevice;
IAudioClient *pAudioClient;
IAudioCaptureClient *pCaptureClient;
WAVEFORMATEX *pwfx;
HANDLE hEvent;
bool isCapturing;
public:
AudioCapture() : pEnumerator(NULL), pDevice(NULL), pAudioClient(NULL),
pCaptureClient(NULL), pwfx(NULL), isCapturing(false)
{
hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
}
~AudioCapture() {
StopCapture();
SafeRelease(&pCaptureClient);
SafeRelease(&pAudioClient);
SafeRelease(&pDevice);
SafeRelease(&pEnumerator);
if (pwfx) CoTaskMemFree(pwfx);
if (hEvent) CloseHandle(hEvent);
}
HRESULT Initialize() {
HRESULT hr = S_OK;
// 初始化COM
hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
if (FAILED(hr)) return hr;
// 创建设备枚举器
hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL,
CLSCTX_ALL, __uuidof(IMMDeviceEnumerator),
(void**)&pEnumerator);
if (FAILED(hr)) return hr;
// 获取默认音频渲染设备(扬声器)
hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
if (FAILED(hr)) return hr;
// 激活音频客户端
hr = pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL,
NULL, (void**)&pAudioClient);
if (FAILED(hr)) return hr;
// 获取混合格式
hr = pAudioClient->GetMixFormat(&pwfx);
if (FAILED(hr)) return hr;
// 打印音频格式信息
PrintFormatInfo();
return hr;
}
HRESULT StartCapture() {
HRESULT hr = S_OK;
// 初始化音频客户端进行循环录制
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_LOOPBACK,
0, 0, pwfx, NULL);
if (FAILED(hr)) return hr;
// 获取采集客户端
hr = pAudioClient->GetService(__uuidof(IAudioCaptureClient),
(void**)&pCaptureClient);
if (FAILED(hr)) return hr;
// 开始采集
hr = pAudioClient->Start();
if (FAILED(hr)) return hr;
isCapturing = true;
std::cout << "开始采集声卡输出..." << std::endl;
return hr;
}
void StopCapture() {
if (pAudioClient && isCapturing) {
pAudioClient->Stop();
isCapturing = false;
std::cout << "停止采集声卡输出" << std::endl;
}
}
HRESULT CaptureDataToFile(const char* filename, DWORD captureDurationMs = 5000) {
std::ofstream file(filename, std::ios::binary);
if (!file.is_open()) {
std::cerr << "无法创建文件: " << filename << std::endl;
return E_FAIL;
}
// 写入WAV文件头
WriteWavHeader(file, captureDurationMs);
DWORD flags;
UINT32 packetLength = 0;
BYTE *pData;
UINT32 numFramesAvailable;
DWORD captureCount = 0;
const DWORD targetCount = captureDurationMs / 10; // 假设每10ms检查一次
std::cout << "正在录制音频到文件: " << filename << std::endl;
while (isCapturing && captureCount < targetCount) {
// 等待数据就绪
Sleep(10);
// 获取采集到的数据
HRESULT hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable,
&flags, NULL, NULL);
if (SUCCEEDED(hr) && numFramesAvailable > 0) {
// 计算数据大小
UINT32 dataSize = numFramesAvailable * pwfx->nBlockAlign;
// 写入音频数据
file.write((const char*)pData, dataSize);
// 释放缓冲区
pCaptureClient->ReleaseBuffer(numFramesAvailable);
std::cout << "采集到 " << dataSize << " 字节数据" << std::endl;
}
captureCount++;
}
file.close();
std::cout << "音频录制完成" << std::endl;
return S_OK;
}
private:
void PrintFormatInfo() {
if (pwfx) {
std::cout << "音频格式信息:" << std::endl;
std::cout << " 格式标签: 0x" << std::hex << pwfx->wFormatTag << std::dec << std::endl;
std::cout << " 声道数: " << pwfx->nChannels << std::endl;
std::cout << " 采样率: " << pwfx->nSamplesPerSec << " Hz" << std::endl;
std::cout << " 每秒字节数: " << pwfx->nAvgBytesPerSec << std::endl;
std::cout << " 块对齐: " << pwfx->nBlockAlign << std::endl;
std::cout << " 位深度: " << pwfx->wBitsPerSample << std::endl;
}
}
void WriteWavHeader(std::ofstream &file, DWORD durationMs) {
if (!pwfx) return;
// 计算数据大小
DWORD dataSize = pwfx->nAvgBytesPerSec * durationMs / 1000;
dataSize = (dataSize / pwfx->nBlockAlign) * pwfx->nBlockAlign; // 对齐到块边界
// RIFF块
file.write("RIFF", 4);
DWORD chunkSize = 36 + dataSize;
file.write((const char*)&chunkSize, 4);
file.write("WAVE", 4);
// fmt块
file.write("fmt ", 4);
DWORD fmtChunkSize = 16;
file.write((const char*)&fmtChunkSize, 4);
file.write((const char*)&pwfx->wFormatTag, 2);
file.write((const char*)&pwfx->nChannels, 2);
file.write((const char*)&pwfx->nSamplesPerSec, 4);
file.write((const char*)&pwfx->nAvgBytesPerSec, 4);
file.write((const char*)&pwfx->nBlockAlign, 2);
file.write((const char*)&pwfx->wBitsPerSample, 2);
// data块
file.write("data", 4);
file.write((const char*)&dataSize, 4);
}
};
// 使用示例
int main() {
std::cout << "声卡输出捕捉示例" << std::endl;
AudioCapture capture;
// 初始化音频设备
HRESULT hr = capture.Initialize();
if (FAILED(hr)) {
std::cerr << "初始化失败: " << hr << std::endl;
return -1;
}
// 开始采集
hr = capture.StartCapture();
if (FAILED(hr)) {
std::cerr << "启动采集失败: " << hr << std::endl;
return -1;
}
// 录制5秒音频到文件
capture.CaptureDataToFile("output.wav", 5000);
// 停止采集
capture.StopCapture();
// 清理COM
CoUninitialize();
std::cout << "程序执行完成" << std::endl;
return 0;
}有几个问题,录制的wav不能播放,修改之后,录制的wav还是不能播放,主要是声卡输出的PCM不是普通的PCM格式,增加检测和转换函数:
// 将32位浮点数转换为16位PCM
HRESULT ConvertFloatToPCM16(const BYTE* input, BYTE* output, UINT32 frameCount) {
if (!input || !output) return E_POINTER;
const float* floatData = (const float*)input;
int16_t* pcmData = (int16_t*)output;
for (UINT32 i = 0; i < frameCount * pwfx->nChannels; i++) {
// 将[-1.0, 1.0]的浮点数转换为[-32768, 32767]的16位整数
float sample = floatData[i];
// 限制在合法范围内
if (sample > 1.0f) sample = 1.0f;
if (sample < -1.0f) sample = -1.0f;
// 转换并四舍五入
pcmData[i] = (int16_t)(sample * 32767.0f + 0.5f);
}
return S_OK;
}
// 通用的格式转换函数
HRESULT ConvertAudioFormat(const BYTE* input, UINT32 inputSize,
std::vector<BYTE>& output) {
if (!input) return E_POINTER;
UINT32 frameCount = inputSize / pwfx->nBlockAlign;
UINT32 outputSize = frameCount * targetFormat.nBlockAlign;
output.resize(outputSize);
// 根据原始格式进行不同的转换
if (pwfx->wFormatTag == WAVE_FORMAT_IEEE_FLOAT ||
(pwfex && IsEqualGUID(pwfex->SubFormat, KSDATAFORMAT_SUBTYPE_IEEE_FLOAT))) {
// 32位浮点数转16位PCM
return ConvertFloatToPCM16(input, output.data(), frameCount);
} else if (pwfx->wFormatTag == WAVE_FORMAT_PCM) {
// 如果是其他位深度的PCM,这里可以添加相应的转换逻辑
// 目前只处理32位浮点数
std::cerr << "不支持的PCM位深度: " << pwfx->wBitsPerSample << std::endl;
return E_NOTIMPL;
}
return E_NOTIMPL;
}将采集的声卡数据重采样为广播板能播放支持的samplerate和channel。
重采样的初始化:注意channel_layout 与channel的区别。
swr_alloc_set_opts(swrCtx, AV_CH_LAYOUT_STEREO, AV_SAMPLE_FMT_S16, SAMPLE_RATE, av_get_default_channel_layout(pwfx->nChannels), convert_win_format_to_av_format(pwfx->wFormatTag, pwfx->wBitsPerSample), pwfx->nSamplesPerSec, 0, NULL);
播放卡顿优化:
不压缩PCM数据;
给声卡喂数据不及时修改;
2、声卡录播,则是实时采集麦克风声音;
本地验证声卡采播功能正常,换了个电脑,跑起来效果就不一样了,分析主要还是电脑配置差异,需要提高声卡采集线程的优先级,减少延时时间:
HANDLE hThread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)ThreadCapturePostToUser, NULL, 1, 0); //创建子线程
if (hThread) {
SetThreadPriority(hThread, THREAD_PRIORITY_HIGHEST); // 设置线程优先级为最高
ResumeThread(hThread); //启动子线程
}
参考:
https://blog.csdn.net/qq_41915225/article/details/86004805
https://github.com/quanwstone/Wave_Audio
-------------------广告线---------------
项目、合作,欢迎勾搭,邮箱:promall@qq.com
本文为呱牛笔记原创文章,转载无需和我联系,但请注明来自呱牛笔记 ,it3q.com
