在辨識聲音上有一個重點是,體感裝置一定要離人很近以及室內較不易吵雜,否則會讓Kinect辨識不清楚
我這次的示範直接將感測器放置在電腦旁而已,如果想知道能測多遠的朋友可以自行量測
開始開發前,你必須先安裝『Microsoft Speech Platform SDK』,目前的版本是出到第11版
如果你的電腦跟我一樣是安裝64位元的朋友,建議你32和64的版本都要安裝,以免到時候出錯
如果想瞭解『Microsoft Speech Platform』,就直接點擊進去看吧!
安裝完『Microsoft Speech Platform SDK』後,初始語言包僅只有英文,如果想要新增的朋友可以點擊『Microsoft Speech Platform - Runtime Languages』下載你需要的語言包
在這邊提醒一下,W8是不支援語言包的,不過仍可以使用『Microsoft Speech Platform SDK』,目前打文章的電腦就是W8 XDD
程式載入時會觸發Window_Loaded事件,該事件在之前的幾篇文章也講解過了。差異只是呼叫的方法不一樣,如果不懂的可以回去看我之前寫得『透過Kinect取得聲音來源方位』及『透過Kinect取聲音並繪製成音波圖』
在這邊只是多了SignalProcess方法,SignalProcess內主要配置SpeechRecognitionEngine和音效以及詞彙
先看到ConfigSpeechEngine方法,var變數去取得所有安裝的辨識引擎
逐一走訪是否有符合Kinect使用,判斷引擎屬於美國的,則將辨識引擎的資訊設定給recognizerInfo,透過辨識引擎建構子帶入要辨識的語系。沒找到的話,則結束程式
private void ConfigSpeechEngine()
{
var recognizers = SpeechRecognitionEngine.InstalledRecognizers();
foreach (RecognizerInfo info in recognizers)
{
if (info.AdditionalInfo.ContainsKey("Kinect"))
{
string details = info.AdditionalInfo["Kinect"];
if (details == "True" && info.Culture.Name == "en-US")
{
recognizerInfo = info;
}
}
}
if (recognizerInfo == null)
{
MessageBox.Show("沒有找到");
Application.Current.Shutdown();
}
speechEngine = new SpeechRecognitionEngine(recognizerInfo);
}
BuildGrammars方法主要在建構詞彙並導入文法
詞彙的設定是由Choices這邊去新增的,新增完成後在透過GrammarBuilder去建立一個語法,並且將該詞彙新增到GrammarBuilder內
在由Grammar去將其建構出來,並載入到speechEngine內
private void BuildGrammars()
{
Choices commands = new Choices();
commands.Add("right");
commands.Add("top");
commands.Add("Hi, i Miss you");
commands.Add("bottom");
GrammarBuilder builder = new GrammarBuilder();
builder.Culture = recognizerInfo.Culture;
builder.Append(commands);
Grammar grammar = new Grammar(builder);
speechEngine.LoadGrammar(grammar);
}
接著就是設定雜七雜八的了
搜尋引擎主要用非同步的方式去辨識
speechEngine.RecognizeAsync(RecognizeMode.Multiple);
有將以下幾個事件加入到辨識引擎
speechEngine.SpeechHypothesized += SpeechEngine_SpeechHypothesized;
speechEngine.SpeechRecognitionRejected += SpeechEngine_SpeechRecognitionRejected;
speechEngine.SpeechRecognized += SpeechEngine_SpeechRecognized;
SpeechHypothesized:辨識中會觸發事件
SpeechRecognitionRejected:辨識失敗觸發事件
SpeechRecognized:辨識成功觸發事件
private void SignalProcess()
{
ConfigSpeechEngine();
BuildGrammars();
ConfigAudio();
audioStream = audioSource.Start();
speechEngine.SetInputToAudioStream(audioStream, new SpeechAudioFormatInfo
(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null));
speechEngine.RecognizeAsync(RecognizeMode.Multiple);
speechEngine.SpeechHypothesized += SpeechEngine_SpeechHypothesized;
speechEngine.SpeechRecognitionRejected += SpeechEngine_SpeechRecognitionRejected;
speechEngine.SpeechRecognized += SpeechEngine_SpeechRecognized;
}
辨識成功時,會觸發『SpeechRecognized』,可以透過SpeechRecognizedEventArgs變數去取得目前的辨識率『e.Result.Confidence』,也可以透過變數去取得目前的文字『e.Result.Text』
private void SpeechEngine_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
labelStatus.Content = "辨識成功";
labelStatus.Background = Brushes.Green;
if (e.Result.Confidence > 0.6f)
{
textBlockRate.Text = string.Format("命中率{0:F2}%",
e.Result.Confidence * 100);
textBlockRate.Background = Brushes.Green;
}
else
{
textBlockRate.Text = string.Format("命中率{0:F2}%",
e.Result.Confidence * 100);
textBlockRate.Background = Brushes.Red;
}
}
大致上思路就這麼簡單而已,以下部份包含程式碼及實際操作圖片
XAML:
<Window x:Class="WpfApplication3.MainWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
Title="MainWindow" Height="350" Width="300" Loaded="Window_Loaded">
<Grid>
<Label x:Name="labelRate" Content="命中率" HorizontalAlignment="Left" Margin="19,15,0,0" VerticalAlignment="Top" RenderTransformOrigin="-7.206,-3.438"/>
<TextBlock x:Name="textBlockRate" HorizontalAlignment="Left" Margin="70,20,0,0" TextWrapping="Wrap" Text="命中率為" VerticalAlignment="Top"/>
<Label x:Name="labelStatus" Content="成功" HorizontalAlignment="Left" Margin="19,45,0,0" VerticalAlignment="Top" RenderTransformOrigin="-7.206,-3.438"/>
<TextBlock x:Name="textBlock" HorizontalAlignment="Left" Margin="29,88,0,0" TextWrapping="Wrap" Text="文字" VerticalAlignment="Top"/>
</Grid>
</Window>
程式碼:
using Microsoft.Kinect;
using Microsoft.Speech.AudioFormat;
using Microsoft.Speech.Recognition;
using System.IO;
using System.Windows;
using System.Windows.Media;
namespace WpfApplication3
{
/// <summary>
/// MainWindow.xaml 的互動邏輯
/// </summary>
public partial class MainWindow : Window
{
private KinectSensor sensor;
private RecognizerInfo recognizerInfo;
private SpeechRecognitionEngine speechEngine;
private KinectAudioSource audioSource;
private Stream audioStream;
public MainWindow()
{
InitializeComponent();
}
private void Window_Loaded(object sender, RoutedEventArgs e)
{
if (KinectSensor.KinectSensors.Count == 0)
{
MessageBox.Show("Oh! no");
}
else
{
this.sensor = KinectSensor.KinectSensors[0];
this.sensor.Start();
SignalProcess();
}
}
private void SignalProcess()
{
ConfigSpeechEngine();
BuildGrammars();
ConfigAudio();
audioStream = audioSource.Start();
speechEngine.SetInputToAudioStream(audioStream, new SpeechAudioFormatInfo
(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null));
speechEngine.RecognizeAsync(RecognizeMode.Multiple);
speechEngine.SpeechHypothesized += SpeechEngine_SpeechHypothesized;
speechEngine.SpeechRecognitionRejected += SpeechEngine_SpeechRecognitionRejected;
speechEngine.SpeechRecognized += SpeechEngine_SpeechRecognized;
}
private void SpeechEngine_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
{
labelStatus.Content = "辨識中";
textBlock.Text = e.Result.Text;
}
private void SpeechEngine_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
{
labelStatus.Content = "辨識失敗";
labelStatus.Background = Brushes.Red;
}
private void SpeechEngine_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
labelStatus.Content = "辨識成功";
labelStatus.Background = Brushes.Green;
if (e.Result.Confidence > 0.6f)
{
textBlockRate.Text = string.Format("命中率{0:F2}%",
e.Result.Confidence * 100);
textBlockRate.Background = Brushes.Green;
}
else
{
textBlockRate.Text = string.Format("命中率{0:F2}%",
e.Result.Confidence * 100);
textBlockRate.Background = Brushes.Red;
}
}
private void ConfigSpeechEngine()
{
var recognizers = SpeechRecognitionEngine.InstalledRecognizers();
foreach (RecognizerInfo info in recognizers)
{
if (info.AdditionalInfo.ContainsKey("Kinect"))
{
string details = info.AdditionalInfo["Kinect"];
if (details == "True" && info.Culture.Name == "en-US")
{
recognizerInfo = info;
}
}
}
if (recognizerInfo == null)
{
MessageBox.Show("沒有找到");
Application.Current.Shutdown();
}
speechEngine = new SpeechRecognitionEngine(recognizerInfo);
}
private void ConfigAudio()
{
audioSource = sensor.AudioSource;
audioSource.BeamAngleMode = BeamAngleMode.Adaptive;
}
private void BuildGrammars()
{
Choices commands = new Choices();
commands.Add("right");
commands.Add("top");
commands.Add("Hi, i Miss you");
commands.Add("bottom");
GrammarBuilder builder = new GrammarBuilder();
builder.Culture = recognizerInfo.Culture;
builder.Append(commands);
Grammar grammar = new Grammar(builder);
speechEngine.LoadGrammar(grammar);
}
}
}
參考資料:
Kinect體感程式探索-使用C#
http://msdn.microsoft.com/en-us/library/hh361572(v=office.14).aspx
http://msdn.microsoft.com/en-us/library/hh362873(v=office.14).aspx

