在辨識聲音上有一個重點是,體感裝置一定要離人很近以及室內較不易吵雜,否則會讓Kinect辨識不清楚
我這次的示範直接將感測器放置在電腦旁而已,如果想知道能測多遠的朋友可以自行量測
開始開發前,你必須先安裝『Microsoft Speech Platform SDK』,目前的版本是出到第11版
如果你的電腦跟我一樣是安裝64位元的朋友,建議你32和64的版本都要安裝,以免到時候出錯
如果想瞭解『Microsoft Speech Platform』,就直接點擊進去看吧!
安裝完『Microsoft Speech Platform SDK』後,初始語言包僅只有英文,如果想要新增的朋友可以點擊『Microsoft Speech Platform - Runtime Languages』下載你需要的語言包
在這邊提醒一下,W8是不支援語言包的,不過仍可以使用『Microsoft Speech Platform SDK』,目前打文章的電腦就是W8 XDD
程式載入時會觸發Window_Loaded事件,該事件在之前的幾篇文章也講解過了。差異只是呼叫的方法不一樣,如果不懂的可以回去看我之前寫得『透過Kinect取得聲音來源方位』及『透過Kinect取聲音並繪製成音波圖』
在這邊只是多了SignalProcess方法,SignalProcess內主要配置SpeechRecognitionEngine和音效以及詞彙
先看到ConfigSpeechEngine方法,var變數去取得所有安裝的辨識引擎
逐一走訪是否有符合Kinect使用,判斷引擎屬於美國的,則將辨識引擎的資訊設定給recognizerInfo,透過辨識引擎建構子帶入要辨識的語系。沒找到的話,則結束程式
private void ConfigSpeechEngine() { var recognizers = SpeechRecognitionEngine.InstalledRecognizers(); foreach (RecognizerInfo info in recognizers) { if (info.AdditionalInfo.ContainsKey("Kinect")) { string details = info.AdditionalInfo["Kinect"]; if (details == "True" && info.Culture.Name == "en-US") { recognizerInfo = info; } } } if (recognizerInfo == null) { MessageBox.Show("沒有找到"); Application.Current.Shutdown(); } speechEngine = new SpeechRecognitionEngine(recognizerInfo); }
BuildGrammars方法主要在建構詞彙並導入文法
詞彙的設定是由Choices這邊去新增的,新增完成後在透過GrammarBuilder去建立一個語法,並且將該詞彙新增到GrammarBuilder內
在由Grammar去將其建構出來,並載入到speechEngine內
private void BuildGrammars() { Choices commands = new Choices(); commands.Add("right"); commands.Add("top"); commands.Add("Hi, i Miss you"); commands.Add("bottom"); GrammarBuilder builder = new GrammarBuilder(); builder.Culture = recognizerInfo.Culture; builder.Append(commands); Grammar grammar = new Grammar(builder); speechEngine.LoadGrammar(grammar); }
接著就是設定雜七雜八的了
搜尋引擎主要用非同步的方式去辨識
speechEngine.RecognizeAsync(RecognizeMode.Multiple);
有將以下幾個事件加入到辨識引擎
speechEngine.SpeechHypothesized += SpeechEngine_SpeechHypothesized; speechEngine.SpeechRecognitionRejected += SpeechEngine_SpeechRecognitionRejected; speechEngine.SpeechRecognized += SpeechEngine_SpeechRecognized;
SpeechHypothesized:辨識中會觸發事件
SpeechRecognitionRejected:辨識失敗觸發事件
SpeechRecognized:辨識成功觸發事件
private void SignalProcess() { ConfigSpeechEngine(); BuildGrammars(); ConfigAudio(); audioStream = audioSource.Start(); speechEngine.SetInputToAudioStream(audioStream, new SpeechAudioFormatInfo (EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null)); speechEngine.RecognizeAsync(RecognizeMode.Multiple); speechEngine.SpeechHypothesized += SpeechEngine_SpeechHypothesized; speechEngine.SpeechRecognitionRejected += SpeechEngine_SpeechRecognitionRejected; speechEngine.SpeechRecognized += SpeechEngine_SpeechRecognized; }
辨識成功時,會觸發『SpeechRecognized』,可以透過SpeechRecognizedEventArgs變數去取得目前的辨識率『e.Result.Confidence』,也可以透過變數去取得目前的文字『e.Result.Text』
private void SpeechEngine_SpeechRecognized(object sender, SpeechRecognizedEventArgs e) { labelStatus.Content = "辨識成功"; labelStatus.Background = Brushes.Green; if (e.Result.Confidence > 0.6f) { textBlockRate.Text = string.Format("命中率{0:F2}%", e.Result.Confidence * 100); textBlockRate.Background = Brushes.Green; } else { textBlockRate.Text = string.Format("命中率{0:F2}%", e.Result.Confidence * 100); textBlockRate.Background = Brushes.Red; } }
大致上思路就這麼簡單而已,以下部份包含程式碼及實際操作圖片
XAML:
<Window x:Class="WpfApplication3.MainWindow" xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" Title="MainWindow" Height="350" Width="300" Loaded="Window_Loaded"> <Grid> <Label x:Name="labelRate" Content="命中率" HorizontalAlignment="Left" Margin="19,15,0,0" VerticalAlignment="Top" RenderTransformOrigin="-7.206,-3.438"/> <TextBlock x:Name="textBlockRate" HorizontalAlignment="Left" Margin="70,20,0,0" TextWrapping="Wrap" Text="命中率為" VerticalAlignment="Top"/> <Label x:Name="labelStatus" Content="成功" HorizontalAlignment="Left" Margin="19,45,0,0" VerticalAlignment="Top" RenderTransformOrigin="-7.206,-3.438"/> <TextBlock x:Name="textBlock" HorizontalAlignment="Left" Margin="29,88,0,0" TextWrapping="Wrap" Text="文字" VerticalAlignment="Top"/> </Grid> </Window>
程式碼:
using Microsoft.Kinect; using Microsoft.Speech.AudioFormat; using Microsoft.Speech.Recognition; using System.IO; using System.Windows; using System.Windows.Media; namespace WpfApplication3 { /// <summary> /// MainWindow.xaml 的互動邏輯 /// </summary> public partial class MainWindow : Window { private KinectSensor sensor; private RecognizerInfo recognizerInfo; private SpeechRecognitionEngine speechEngine; private KinectAudioSource audioSource; private Stream audioStream; public MainWindow() { InitializeComponent(); } private void Window_Loaded(object sender, RoutedEventArgs e) { if (KinectSensor.KinectSensors.Count == 0) { MessageBox.Show("Oh! no"); } else { this.sensor = KinectSensor.KinectSensors[0]; this.sensor.Start(); SignalProcess(); } } private void SignalProcess() { ConfigSpeechEngine(); BuildGrammars(); ConfigAudio(); audioStream = audioSource.Start(); speechEngine.SetInputToAudioStream(audioStream, new SpeechAudioFormatInfo (EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null)); speechEngine.RecognizeAsync(RecognizeMode.Multiple); speechEngine.SpeechHypothesized += SpeechEngine_SpeechHypothesized; speechEngine.SpeechRecognitionRejected += SpeechEngine_SpeechRecognitionRejected; speechEngine.SpeechRecognized += SpeechEngine_SpeechRecognized; } private void SpeechEngine_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e) { labelStatus.Content = "辨識中"; textBlock.Text = e.Result.Text; } private void SpeechEngine_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e) { labelStatus.Content = "辨識失敗"; labelStatus.Background = Brushes.Red; } private void SpeechEngine_SpeechRecognized(object sender, SpeechRecognizedEventArgs e) { labelStatus.Content = "辨識成功"; labelStatus.Background = Brushes.Green; if (e.Result.Confidence > 0.6f) { textBlockRate.Text = string.Format("命中率{0:F2}%", e.Result.Confidence * 100); textBlockRate.Background = Brushes.Green; } else { textBlockRate.Text = string.Format("命中率{0:F2}%", e.Result.Confidence * 100); textBlockRate.Background = Brushes.Red; } } private void ConfigSpeechEngine() { var recognizers = SpeechRecognitionEngine.InstalledRecognizers(); foreach (RecognizerInfo info in recognizers) { if (info.AdditionalInfo.ContainsKey("Kinect")) { string details = info.AdditionalInfo["Kinect"]; if (details == "True" && info.Culture.Name == "en-US") { recognizerInfo = info; } } } if (recognizerInfo == null) { MessageBox.Show("沒有找到"); Application.Current.Shutdown(); } speechEngine = new SpeechRecognitionEngine(recognizerInfo); } private void ConfigAudio() { audioSource = sensor.AudioSource; audioSource.BeamAngleMode = BeamAngleMode.Adaptive; } private void BuildGrammars() { Choices commands = new Choices(); commands.Add("right"); commands.Add("top"); commands.Add("Hi, i Miss you"); commands.Add("bottom"); GrammarBuilder builder = new GrammarBuilder(); builder.Culture = recognizerInfo.Culture; builder.Append(commands); Grammar grammar = new Grammar(builder); speechEngine.LoadGrammar(grammar); } } }
參考資料:
Kinect體感程式探索-使用C#
http://msdn.microsoft.com/en-us/library/hh361572(v=office.14).aspx
http://msdn.microsoft.com/en-us/library/hh362873(v=office.14).aspx