博客：

PortAudio:【PortAudio】PortAudio 音频处理库Demo-CSDN博客

PortAudio:PortAudio —— 跨平台音频采集API_lyyanziyu的技术博客_51CTO博客

中文模型：[CMU Sphinx - Browse /Acoustic and Language Models/Mandarin at SourceForge.net](https://sourceforge.net/projects/cmusphinx/files/Acoustic and Language Models/Mandarin/)

Pocketsphinx源码：https://github.com/cmusphinx/pocketsphinx/tree/master/

VS2015中编译试用pocketsphinx：https://www.jianshu.com/p/b0a3690777fc

window下基于VS，CMUSphinx的中文小词汇量语音识别实例：https://blog.csdn.net/Anadahoji/article/details/88607261

pocketsphinx实现连续大词汇量语音识别

Sphinx应用程序编写 - lovemu - 博客园 (cnblogs.com)

可以简单识别单词，需要设计关键词库

代码：


// SpeechRecognition_PocketSphinxDlg.h: 头文件
//

#pragma once

// 包含必要的头文件
#include <portaudio.h>       // PortAudio 库头文件，用于音频输入输出
#include <pocketsphinx.h>    // PocketSphinx 库头文件，用于语音识别
#include <sphinxbase/ad.h>   // SphinxBase 库头文件，用于音频录制
#include <sphinxbase/err.h>  // SphinxBase 库头文件，用于错误处理
#include <fstream>           // 文件操作头文件
#include <string>            // 字符串处理头文件
#include <set>               // 集合容器头文件
#include <atlstr.h>          // ATL 字符串类头文件

#include <thread>            // 线程库头文件
#include <vector>            // 向量容器头文件
#include <algorithm>         // 算法头文件
#include <locale>            // 本地化头文件
#include <codecvt>           // 编码转换头文件

#include <iostream>          // 输入输出流头文件


#define TIMER_ID 999 // 定义定时器 ID

// CSpeechRecognitionPocketSphinxDlg 对话框
class CSpeechRecognitionPocketSphinxDlg : public CDialogEx
{
// 构造
public:
	CSpeechRecognitionPocketSphinxDlg(CWnd* pParent = nullptr);	// 标准构造函数

// 对话框数据
#ifdef AFX_DESIGN_TIME
	enum { IDD = IDD_SPEECHRECOGNITION_POCKETSPHINX_DIALOG };
#endif

	protected:
	virtual void DoDataExchange(CDataExchange* pDX);	// DDX/DDV 支持



// 实现
protected:
	HICON m_hIcon;

	// 生成的消息映射函数
	virtual BOOL OnInitDialog();
	afx_msg void OnSysCommand(UINT nID, LPARAM lParam);
	afx_msg void OnPaint();
	afx_msg HCURSOR OnQueryDragIcon();
	afx_msg void OnDestroy();
	DECLARE_MESSAGE_MAP()

public:
	CEdit m_edtText;					// 识别后显示的数据

	std::vector<int16> m_audioBuffer;   // 用于存储音频数据
	bool m_bListening = false;			// 语音识别信号
	cmd_ln_t* m_config = nullptr;		// 配置参数指针
	ps_decoder_t* m_ps = nullptr;		// PocketSphinx 解码器指针
	ad_rec_t* m_ad = nullptr;			// 音频录制指针
	uint8 utt_started = FALSE;			// 标志是否开始录制

	PaStream* m_stream = nullptr;		// PortAudio 流指针


	void RecordAudio();
	void ProcessRecordedAudio();
	void ProcessAudioFile(const std::string& filePath);
	CString ExtractTextFromHyp(const char* hyp);

	afx_msg void OnBnClickedButtonStart();
	afx_msg void OnBnClickedButtonStop();
	afx_msg void OnTimer(UINT_PTR nIDEvent); // 定时器处理函数

private:
	std::thread m_thread; // 用于音频录制和处理的线程

};


// SpeechRecognition_PocketSphinxDlg.cpp: 实现文件
//

#include "pch.h"
#include "framework.h"
#include "SpeechRecognition_PocketSphinx.h"
#include "SpeechRecognition_PocketSphinxDlg.h"
#include "afxdialogex.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#endif


// PortAudio 参数
#define SAMPLE_RATE 16000
#define FRAMES_PER_BUFFER (2048) // 调整缓冲区大小


static int paCallback(const void* inputBuffer, void* outputBuffer,
	unsigned long framesPerBuffer,
	const PaStreamCallbackTimeInfo* timeInfo,
	PaStreamCallbackFlags statusFlags,
	void* userData) 
{
	float* in = (float*)inputBuffer;
	float* monoBuffer = (float*)userData;

	if (inputBuffer == NULL) {
		return paContinue;
	}

	for (unsigned long i = 0; i < framesPerBuffer; ++i) {
		monoBuffer[i] = (in[i * 2] + in[i * 2 + 1]) * 0.5f; // 双声道转单声道
	}

	return paContinue;
}

// 用于应用程序“关于”菜单项的 CAboutDlg 对话框

class CAboutDlg : public CDialogEx
{
public:
	CAboutDlg();

// 对话框数据
#ifdef AFX_DESIGN_TIME
	enum { IDD = IDD_ABOUTBOX };
#endif

	protected:
	virtual void DoDataExchange(CDataExchange* pDX);    // DDX/DDV 支持

// 实现
protected:
	DECLARE_MESSAGE_MAP()
};

CAboutDlg::CAboutDlg() : CDialogEx(IDD_ABOUTBOX)
{
}

void CAboutDlg::DoDataExchange(CDataExchange* pDX)
{
	CDialogEx::DoDataExchange(pDX);
}

BEGIN_MESSAGE_MAP(CAboutDlg, CDialogEx)
END_MESSAGE_MAP()


// CSpeechRecognitionPocketSphinxDlg 对话框



CSpeechRecognitionPocketSphinxDlg::CSpeechRecognitionPocketSphinxDlg(CWnd* pParent /*=nullptr*/)
	: CDialogEx(IDD_SPEECHRECOGNITION_POCKETSPHINX_DIALOG, pParent)
{
	m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);

	m_config = cmd_ln_init(NULL, ps_args(), TRUE,
		"-hmm", "D:\\ASR\\pocketsphinx\\model\\zh-cn\\zh-cn",
		"-lm", "D:\\ASR\\pocketsphinx\\model\\zh-cn\\zh-cn.lm.bin",
		"-dict", "D:\\ASR\\pocketsphinx\\model\\zh-cn\\zh-cn.dic",
		/*       "-hmm", "D:\\ASR\\pocketsphinx\\model\\en-us\\en-us",
			   "-lm", "D:\\ASR\\pocketsphinx\\model\\en-us\\en-us.lm.bin",
			   "-dict", "D:\\ASR\\pocketsphinx\\model\\en-us\\cmudict-en-us.dict",*/
		NULL);
	if (m_config == NULL)
	{
		AfxMessageBox(_T("1、m_config fail"));
		cmd_ln_free_r(m_config);
	}
	else
	{
		AfxMessageBox(_T("1、m_config success"));
	}

	m_ps = ps_init(m_config);
	if (m_ps == NULL)
	{
		AfxMessageBox(_T("2、m_ps fail"));
	}
	else
	{
		AfxMessageBox(_T("2、m_ps success"));
	}

	if ((m_ad = ad_open_dev("sysdefault", (int)48000)) == NULL)
	{
		AfxMessageBox(_T("3、Failed to open audio device"));
	}
	else
	{
		AfxMessageBox(_T("3、Success to open audio device"));
	}
}

void CSpeechRecognitionPocketSphinxDlg::OnDestroy()
{
	// 音频设备
	if (m_ad)
	{
		ad_close(m_ad);
		m_ad = nullptr;
	}

	// 释放解码器
	if (m_ps)
	{
		ps_free(m_ps);
		m_ps = nullptr;
	}

	// 释放配置参数
	if (m_config)
	{
		cmd_ln_free_r(m_config);
		m_config = nullptr;
	}
}

void CSpeechRecognitionPocketSphinxDlg::DoDataExchange(CDataExchange* pDX)
{
	CDialogEx::DoDataExchange(pDX);
	DDX_Control(pDX, IDC_EDIT_TEXT, m_edtText);
}

BEGIN_MESSAGE_MAP(CSpeechRecognitionPocketSphinxDlg, CDialogEx)
	ON_WM_SYSCOMMAND()
	ON_WM_PAINT()
	ON_WM_QUERYDRAGICON()
	ON_BN_CLICKED(IDC_BUTTON_START, &CSpeechRecognitionPocketSphinxDlg::OnBnClickedButtonStart)
	ON_BN_CLICKED(IDC_BUTTON_STOP, &CSpeechRecognitionPocketSphinxDlg::OnBnClickedButtonStop)
	ON_WM_DESTROY()
	ON_WM_TIMER()
END_MESSAGE_MAP()


// CSpeechRecognitionPocketSphinxDlg 消息处理程序

BOOL CSpeechRecognitionPocketSphinxDlg::OnInitDialog()
{
	CDialogEx::OnInitDialog();

	// 将“关于...”菜单项添加到系统菜单中。

	// IDM_ABOUTBOX 必须在系统命令范围内。
	ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);
	ASSERT(IDM_ABOUTBOX < 0xF000);

	CMenu* pSysMenu = GetSystemMenu(FALSE);
	if (pSysMenu != nullptr)
	{
		BOOL bNameValid;
		CString strAboutMenu;
		bNameValid = strAboutMenu.LoadString(IDS_ABOUTBOX);
		ASSERT(bNameValid);
		if (!strAboutMenu.IsEmpty())
		{
			pSysMenu->AppendMenu(MF_SEPARATOR);
			pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);
		}
	}

	// 设置此对话框的图标。  当应用程序主窗口不是对话框时，框架将自动
	//  执行此操作
	SetIcon(m_hIcon, TRUE);			// 设置大图标
	SetIcon(m_hIcon, FALSE);		// 设置小图标

	// TODO: 在此添加额外的初始化代码
	m_bListening = false;

	return TRUE;  // 除非将焦点设置到控件，否则返回 TRUE
}

void CSpeechRecognitionPocketSphinxDlg::OnSysCommand(UINT nID, LPARAM lParam)
{
	if ((nID & 0xFFF0) == IDM_ABOUTBOX)
	{
		CAboutDlg dlgAbout;
		dlgAbout.DoModal();
	}
	else
	{
		CDialogEx::OnSysCommand(nID, lParam);
	}
}

// 如果向对话框添加最小化按钮，则需要下面的代码
//  来绘制该图标。  对于使用文档/视图模型的 MFC 应用程序，
//  这将由框架自动完成。

void CSpeechRecognitionPocketSphinxDlg::OnPaint()
{
	if (IsIconic())
	{
		CPaintDC dc(this); // 用于绘制的设备上下文

		SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()), 0);

		// 使图标在工作区矩形中居中
		int cxIcon = GetSystemMetrics(SM_CXICON);
		int cyIcon = GetSystemMetrics(SM_CYICON);
		CRect rect;
		GetClientRect(&rect);
		int x = (rect.Width() - cxIcon + 1) / 2;
		int y = (rect.Height() - cyIcon + 1) / 2;

		// 绘制图标
		dc.DrawIcon(x, y, m_hIcon);
	}
	else
	{
		CDialogEx::OnPaint();
	}
}

//当用户拖动最小化窗口时系统调用此函数取得光标
//显示。
HCURSOR CSpeechRecognitionPocketSphinxDlg::OnQueryDragIcon()
{
	return static_cast<HCURSOR>(m_hIcon);
}

void CSpeechRecognitionPocketSphinxDlg::OnBnClickedButtonStart()
{
	// 清空音频缓冲区
	m_audioBuffer.clear();

	// 初始化 PortAudio
	PaError err = Pa_Initialize();
	if (err != paNoError) {
		AfxMessageBox(_T("Failed to initialize PortAudio."));
		return;
	}

	// 设置音频流参数
	PaStreamParameters inputParameters;
	inputParameters.device = Pa_GetDefaultInputDevice(); // 获取默认输入设备
	if (inputParameters.device == paNoDevice) {
		AfxMessageBox(_T("No default input device."));
		Pa_Terminate();
		return;
	}
	inputParameters.channelCount = 2; // 双声道
	inputParameters.sampleFormat = paInt16; // 16 位整数
	inputParameters.suggestedLatency = Pa_GetDeviceInfo(inputParameters.device)->defaultLowInputLatency;
	inputParameters.hostApiSpecificStreamInfo = nullptr;

	// 打开音频流
	err = Pa_OpenStream(
		&m_stream,
		&inputParameters,
		nullptr, // 没有输出
		48000, // 采样率
		2048, // 缓冲区大小
		paClipOff, // 禁用剪切
		nullptr, // 没有回调
		nullptr  // 没有用户数据
	);

	if (err != paNoError) {
		CString errorMsg;
		errorMsg.Format(_T("Failed to open PortAudio stream: %s"), Pa_GetErrorText(err));
		AfxMessageBox(errorMsg);
		Pa_Terminate();
		return;
	}

	// 启动音频流
	if (Pa_StartStream(m_stream) != paNoError) {
		AfxMessageBox(_T("Failed to start PortAudio stream."));
		Pa_CloseStream(m_stream);
		Pa_Terminate();
		return;
	}

	// 设置标志以启动录音
	m_bListening = true;

	AfxMessageBox(_T("Recording started. Please speak now..."));

	// 启动录音线程
	std::thread t(&CSpeechRecognitionPocketSphinxDlg::RecordAudio, this);
	t.detach();  // 分离线程

	// 设置定时器，在15秒后停止录音
	SetTimer(TIMER_ID, 15000, NULL);
}

void CSpeechRecognitionPocketSphinxDlg::RecordAudio()
{
	ps_start_utt(m_ps);
	int16_t stereoBuffer[2048 * 2];

	while (m_bListening) {
		PaError err = Pa_ReadStream(m_stream, stereoBuffer, 2048);
		if (err != paNoError) {
			CString errorMsg;
			int len = MultiByteToWideChar(CP_ACP, 0, Pa_GetErrorText(err), -1, NULL, 0);
			if (len > 0) {
				WCHAR* wcsErrorText = new WCHAR[len];
				MultiByteToWideChar(CP_ACP, 0, Pa_GetErrorText(err), -1, wcsErrorText, len);
				errorMsg.Format(_T("Failed to read audio: %s"), wcsErrorText);
				delete[] wcsErrorText;
			}
			else {
				errorMsg = _T("Failed to read audio: Unknown error");
			}
			AfxMessageBox(errorMsg);
			break;
		}

		// 将双声道数据转换为单声道数据，并添加到缓冲区
		std::vector<int16_t> monoBuffer(2048);
		for (unsigned long i = 0; i < 2048; ++i) {
			monoBuffer[i] = (stereoBuffer[i * 2] + stereoBuffer[i * 2 + 1]) / 2; // 简单平均以获得单声道数据
		}

		// 将单声道数据添加到缓冲区
		m_audioBuffer.insert(m_audioBuffer.end(), monoBuffer.begin(), monoBuffer.end());
		ps_process_raw(m_ps, monoBuffer.data(), monoBuffer.size(), FALSE, FALSE);
	}

	ps_end_utt(m_ps);

	// 停止音频流
	Pa_StopStream(m_stream);
	Pa_CloseStream(m_stream);
	Pa_Terminate();
}

void CSpeechRecognitionPocketSphinxDlg::OnBnClickedButtonStop()
{
	// 停止录音
	m_bListening = false;

	// 停止定时器
	KillTimer(TIMER_ID);


	// 停止 PortAudio 流
	if (m_stream) 
	{
		Pa_StopStream(m_stream);
		Pa_CloseStream(m_stream);
		Pa_Terminate();
	}

	// 提示用户停止录音
	AfxMessageBox(_T("Recording stopped. Processing data..."));

	// 处理录音数据
	ProcessRecordedAudio();
}


void CSpeechRecognitionPocketSphinxDlg::OnTimer(UINT_PTR nIDEvent)
{
	if (nIDEvent == TIMER_ID)
	{
		// 停止录音
		m_bListening = false;

		// 处理录音数据
		ProcessRecordedAudio();

		// 停止定时器
		KillTimer(TIMER_ID);
	}

	CDialogEx::OnTimer(nIDEvent);
}

void CSpeechRecognitionPocketSphinxDlg::ProcessRecordedAudio()
{
	if (m_audioBuffer.empty())
	{
		m_edtText.SetWindowText(_T("No audio data to process."));
		return;
	}

	// 确保 PocketSphinx 正确初始化
	if (!m_ps)
	{
		m_edtText.SetWindowText(_T("PocketSphinx not initialized."));
		return;
	}

	// 开始语音识别
	ps_start_utt(m_ps);

	// 处理音频数据
	ps_process_raw(m_ps, m_audioBuffer.data(), m_audioBuffer.size(), FALSE, FALSE);

	// 结束语音识别
	ps_end_utt(m_ps);

	const char* hyp = ps_get_hyp(m_ps, NULL);
	if (hyp != NULL)
	{
		// 将 UTF-8 编码的字符串转换为宽字符字符串
		int len = MultiByteToWideChar(CP_UTF8, 0, hyp, -1, NULL, 0);
		if (len > 0)
		{
			std::wstring wstr(len, L'\0');
			MultiByteToWideChar(CP_UTF8, 0, hyp, -1, &wstr[0], len);
			CString result(wstr.c_str());

			m_edtText.SetWindowText(result);
		}
		else
		{
			m_edtText.SetWindowText(_T("Failed to convert recognition result."));
		}
	}
	else
	{
		m_edtText.SetWindowText(_T("No result detected"));
	}
}

CString CSpeechRecognitionPocketSphinxDlg::ExtractTextFromHyp(const char* hyp)
{
	return CString(hyp);
}



void CSpeechRecognitionPocketSphinxDlg::ProcessAudioFile(const std::string& filePath)
{
	std::ifstream file(filePath, std::ios::binary);
	if (!file.is_open())
	{
		AfxMessageBox(_T("Failed to open audio file."));
		return;
	}

	// 读取文件内容到缓冲区
	std::vector<int16> audioData((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
	file.close();

	if (audioData.empty())
	{
		AfxMessageBox(_T("No audio data read from file."));
		return;
	}

	// 确保 PocketSphinx 正确初始化
	if (!m_ps)
	{
		AfxMessageBox(_T("PocketSphinx not initialized."));
		return;
	}

	// 开始语音识别
	ps_start_utt(m_ps);

	// 处理音频数据
	ps_process_raw(m_ps, audioData.data(), audioData.size() * sizeof(int16), FALSE, FALSE);

	// 结束语音识别
	ps_end_utt(m_ps);

	const char* hyp = ps_get_hyp(m_ps, NULL);
	if (hyp != NULL)
	{
		CString result = CString(hyp);
		m_edtText.SetWindowText(result);
	}
	else
	{
		m_edtText.SetWindowText(_T("No result detected"));
	}
}


//void CMFCApplication1Dlg::OnBnClickedButton1()
//{
//	std::string filePath = "D:\\ASR\\pocketsphinx\\test\\data\\cards\\001.wav"; // ten of clubs
//	//std::string filePath = "D:\\ASR\\pocketsphinx\\test\\data\\cards\\test.wav"; // ten of clubs
//	ProcessAudioFile(filePath);
//}

难点：

1、模型初始化

m_config中文如果不可以初始化成功，可以直接在hmm文件夹中修改参数数据

pocket默认是单通道的设备，可能和自己的设备不一样，需要设置数据变换

CSpeechRecognitionPocketSphinxDlg::CSpeechRecognitionPocketSphinxDlg(CWnd* pParent /*=nullptr*/)
	: CDialogEx(IDD_SPEECHRECOGNITION_POCKETSPHINX_DIALOG, pParent)
{
	m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);

	m_config = cmd_ln_init(NULL, ps_args(), TRUE,
		"-hmm", "D:\\ASR\\pocketsphinx\\model\\zh-cn\\zh-cn",
		"-lm", "D:\\ASR\\pocketsphinx\\model\\zh-cn\\zh-cn.lm.bin",
		"-dict", "D:\\ASR\\pocketsphinx\\model\\zh-cn\\zh-cn.dic",
		/*       "-hmm", "D:\\ASR\\pocketsphinx\\model\\en-us\\en-us",
			   "-lm", "D:\\ASR\\pocketsphinx\\model\\en-us\\en-us.lm.bin",
			   "-dict", "D:\\ASR\\pocketsphinx\\model\\en-us\\cmudict-en-us.dict",*/
		NULL);
	if (m_config == NULL)
	{
		AfxMessageBox(_T("1、m_config fail"));
		cmd_ln_free_r(m_config);
	}
	else
	{
		AfxMessageBox(_T("1、m_config success"));
	}

	m_ps = ps_init(m_config);
	if (m_ps == NULL)
	{
		AfxMessageBox(_T("2、m_ps fail"));
	}
	else
	{
		AfxMessageBox(_T("2、m_ps success"));
	}

	if ((m_ad = ad_open_dev("sysdefault", (int)48000)) == NULL)
	{
		AfxMessageBox(_T("3、Failed to open audio device"));
	}
	else
	{
		AfxMessageBox(_T("3、Success to open audio device"));
	}
}

void CSpeechRecognitionPocketSphinxDlg::OnDestroy()
{
	// 音频设备
	if (m_ad)
	{
		ad_close(m_ad);
		m_ad = nullptr;
	}

	// 释放解码器
	if (m_ps)
	{
		ps_free(m_ps);
		m_ps = nullptr;
	}

	// 释放配置参数
	if (m_config)
	{
		cmd_ln_free_r(m_config);
		m_config = nullptr;
	}
}

2、PortAudio的使用

Pocketsphinx直接使用电脑的设备可能造成乱码，如果乱码，可以使用portaudio来录制音频，转换为pocketsphinx适配的音频格式

需要主义，打开音频流之前，一定要对音频流的参数进行设置，否则报错

// PortAudio 参数
#define SAMPLE_RATE 16000                 // 采样率定义为16000Hz
#define FRAMES_PER_BUFFER (2048)          // 每个缓冲区包含2048帧

// 回调函数用于处理音频数据
static int paCallback(const void* inputBuffer, void* outputBuffer,
    unsigned long framesPerBuffer,
    const PaStreamCallbackTimeInfo* timeInfo,
    PaStreamCallbackFlags statusFlags,
    void* userData) 
{
    float* in = (float*)inputBuffer;      // 输入缓冲区的数据
    float* monoBuffer = (float*)userData; // 用户数据（单声道缓冲区）

    // 如果输入缓冲区为空，继续执行
    if (inputBuffer == NULL)
    {
        return paContinue;
    }

    // 将双声道音频数据转换为单声道
    for (unsigned long i = 0; i < framesPerBuffer; ++i) {
        monoBuffer[i] = (in[i * 2] + in[i * 2 + 1]) * 0.5f; // 左右声道相加后取平均值
    }

    return paContinue;
}

// 按钮点击事件处理函数，开始录音
void CSpeechRecognitionPocketSphinxDlg::OnBnClickedButtonStart()
{
    // 清空音频缓冲区
    m_audioBuffer.clear();

    // 初始化 PortAudio
    PaError err = Pa_Initialize();
    if (err != paNoError) {
        AfxMessageBox(_T("Failed to initialize PortAudio.")); // 初始化失败，显示错误消息
        return;
    }

    // 设置音频流参数
    PaStreamParameters inputParameters;
    inputParameters.device = Pa_GetDefaultInputDevice(); // 获取默认输入设备
    if (inputParameters.device == paNoDevice) {
        AfxMessageBox(_T("No default input device.")); // 没有默认输入设备，显示错误消息
        Pa_Terminate(); // 终止 PortAudio
        return;
    }
    inputParameters.channelCount = 2; // 设置为双声道
    inputParameters.sampleFormat = paInt16; // 采样格式为16位整数
    inputParameters.suggestedLatency = Pa_GetDeviceInfo(inputParameters.device)->defaultLowInputLatency;
    inputParameters.hostApiSpecificStreamInfo = nullptr;

    // 打开音频流
    err = Pa_OpenStream(
        &m_stream,
        &inputParameters,
        nullptr, // 没有输出
        48000, // 采样率为48000Hz
        2048, // 每个缓冲区2048帧
        paClipOff, // 禁用剪切
        nullptr, // 没有回调函数
        nullptr  // 没有用户数据
    );

    if (err != paNoError) {
        CString errorMsg;
        errorMsg.Format(_T("Failed to open PortAudio stream: %s"), Pa_GetErrorText(err)); // 打开音频流失败，显示错误消息
        AfxMessageBox(errorMsg);
        Pa_Terminate(); // 终止 PortAudio
        return;
    }

    // 启动音频流
    if (Pa_StartStream(m_stream) != paNoError) {
        AfxMessageBox(_T("Failed to start PortAudio stream.")); // 启动音频流失败，显示错误消息
        Pa_CloseStream(m_stream); // 关闭音频流
        Pa_Terminate(); // 终止 PortAudio
        return;
    }

    // 设置标志，表示正在录音
    m_bListening = true;

    // 提示用户录音已经开始
    AfxMessageBox(_T("Recording started. Please speak now..."));

    // 启动录音线程
    std::thread t(&CSpeechRecognitionPocketSphinxDlg::RecordAudio, this);
    t.detach(); // 分离线程，使其在后台运行

    // 设置定时器，在15秒后停止录音
    SetTimer(TIMER_ID, 15000, NULL);
}

3、录音

注意，读取音频数据失败时，需要进行数据转换，语音识别这些一般是Unicode编码，需要转换，否则报错信息乱码，看不懂；转换为宽多字节可以显示

void CSpeechRecognitionPocketSphinxDlg::RecordAudio()
{
    // 开始新的识别单元
    ps_start_utt(m_ps);

    // 缓冲区用于存储双声道音频数据
    int16_t stereoBuffer[2048 * 2];

    // 当正在录音时不断读取音频数据
    while (m_bListening) 
    {
        // 从音频流中读取数据到缓冲区
        PaError err = Pa_ReadStream(m_stream, stereoBuffer, 2048);
        if (err != paNoError)
        {
            // 读取音频数据失败，显示错误消息
            CString errorMsg;
            int len = MultiByteToWideChar(CP_ACP, 0, Pa_GetErrorText(err), -1, NULL, 0);
            if (len > 0)
            {
                WCHAR* wcsErrorText = new WCHAR[len];
                MultiByteToWideChar(CP_ACP, 0, Pa_GetErrorText(err), -1, wcsErrorText, len);
                errorMsg.Format(_T("Failed to read audio: %s"), wcsErrorText);
                delete[] wcsErrorText;
            } else 
            {
                errorMsg = _T("Failed to read audio: Unknown error");
            }
            AfxMessageBox(errorMsg);
            break; // 退出循环
        }

        // 将双声道数据转换为单声道数据，并添加到缓冲区
        std::vector<int16_t> monoBuffer(2048);
        for (unsigned long i = 0; i < 2048; ++i) 
        {
            monoBuffer[i] = (stereoBuffer[i * 2] + stereoBuffer[i * 2 + 1]) / 2; // 简单平均以获得单声道数据
        }

        // 将单声道数据添加到音频缓冲区
        m_audioBuffer.insert(m_audioBuffer.end(), monoBuffer.begin(), monoBuffer.end());

        // 将单声道数据传递给 PocketSphinx 进行处理
        ps_process_raw(m_ps, monoBuffer.data(), monoBuffer.size(), FALSE, FALSE);
    }

    // 结束识别单元
    ps_end_utt(m_ps);

    // 停止音频流
    Pa_StopStream(m_stream);
    Pa_CloseStream(m_stream);
    Pa_Terminate(); // 终止 PortAudio
}

4、识别

注意转换音频格式

void CSpeechRecognitionPocketSphinxDlg::ProcessRecordedAudio()
{
    // 如果音频缓冲区为空，提示没有音频数据可处理
    if (m_audioBuffer.empty())
    {
        m_edtText.SetWindowText(_T("No audio data to process."));
        return;
    }

    // 确保 PocketSphinx 已正确初始化
    if (!m_ps)
    {
        m_edtText.SetWindowText(_T("PocketSphinx not initialized."));
        return;
    }

    // 开始新的识别单元
    ps_start_utt(m_ps);

    // 处理录制的音频数据
    ps_process_raw(m_ps, m_audioBuffer.data(), m_audioBuffer.size(), FALSE, FALSE);

    // 结束识别单元
    ps_end_utt(m_ps);

    // 获取识别结果
    const char* hyp = ps_get_hyp(m_ps, NULL);
    if (hyp != NULL)
    {
        // 将 UTF-8 编码的字符串转换为宽字符字符串
        int len = MultiByteToWideChar(CP_UTF8, 0, hyp, -1, NULL, 0);
        if (len > 0)
        {
            std::wstring wstr(len, L'\0');
            MultiByteToWideChar(CP_UTF8, 0, hyp, -1, &wstr[0], len);
            CString result(wstr.c_str());

            // 将识别结果显示在编辑框中
            m_edtText.SetWindowText(result);
        }
        else
        {
            // 转换识别结果失败，提示错误信息
            m_edtText.SetWindowText(_T("Failed to convert recognition result."));
        }
    }
    else
    {
        // 没有检测到结果，提示信息
        m_edtText.SetWindowText(_T("No result detected"));
    }
}

// 从识别结果中提取文本
CString CSpeechRecognitionPocketSphinxDlg::ExtractTextFromHyp(const char* hyp)
{
    return CString(hyp);
}

5、定时器

void CSpeechRecognitionPocketSphinxDlg::OnTimer(UINT_PTR nIDEvent)
{
	if (nIDEvent == TIMER_ID)
	{
		// 停止录音
		m_bListening = false;

		// 处理录音数据
		ProcessRecordedAudio();

		// 停止定时器
		KillTimer(TIMER_ID);
	}

	CDialogEx::OnTimer(nIDEvent);
}

6、结束按钮

音频数据不要一直循环启动，这样识别很乱，一个开始按钮，一个结束按钮；或者就设置15s的录音时间，然后进行读取录音数据，进行识别信息

void CSpeechRecognitionPocketSphinxDlg::OnBnClickedButtonStop()
{
	// 停止录音
	m_bListening = false;

	// 停止定时器
	KillTimer(TIMER_ID);


	// 停止 PortAudio 流
	if (m_stream) 
	{
		Pa_StopStream(m_stream);
		Pa_CloseStream(m_stream);
		Pa_Terminate();
	}

	// 提示用户停止录音
	AfxMessageBox(_T("Recording stopped. Processing data..."));

	// 处理录音数据
	ProcessRecordedAudio();
}

李新乾的个人博客

语音识别_Pocketsphinx(二)

博客：