2025/03/01

Go + Microsoft Speech SDK:在 macOS 上的環境配置與編譯指南

將環境變數新增至~/.zshrc
# MicroSoft SpeechSDK
export SPEECHSDK_ROOT="$HOME/speechsdk"
export SPEECHSDK_FRAMEWORK_PATH="$SPEECHSDK_ROOT/MicrosoftCognitiveServicesSpeech.xcframework/macos-arm64_x86_64"

# 設定 C 編譯標誌(C++ 頭文件路徑)
export CGO_CFLAGS="-I${SPEECHSDK_FRAMEWORK_PATH}/MicrosoftCognitiveServicesSpeech.framework/Headers"
export C_INCLUDE_PATH="${SPEECHSDK_FRAMEWORK_PATH}/MicrosoftCognitiveServicesSpeech.framework/Headers:$C_INCLUDE_PATH"

# 設定 Go 連結標誌,確保 Go 找到正確的 Framework
export CGO_LDFLAGS="-F${SPEECHSDK_FRAMEWORK_PATH} -framework MicrosoftCognitiveServicesSpeech"

# 設定 macOS 運行時庫的搜索路徑(確保 `ld` 找到 Speech SDK)
export DYLD_LIBRARY_PATH="$SPEECHSDK_FRAMEWORK_PATH/MicrosoftCognitiveServicesSpeech.framework"

套用環境變數
source ~/.zshrc

下載SpeechSDK
# 建立資料夾
mkdir -p "$SPEECHSDK_ROOT"
# 下載 Speech SDK
wget -O SpeechSDK-macOS.zip https://aka.ms/csspeech/macosbinary
unzip SpeechSDK-macOS.zip -d "$SPEECHSDK_ROOT"
# 移除 Speech SDK zip
rm -fr SpeechSDK-macOS.zip
測試代碼如下:
package main

import (
	"C"
	"fmt"
	"time"
	"github.com/Microsoft/cognitive-services-speech-sdk-go/audio"
	"github.com/Microsoft/cognitive-services-speech-sdk-go/common"
	"github.com/Microsoft/cognitive-services-speech-sdk-go/speech"
)

func synthesizeStartedHandler(event speech.SpeechSynthesisEventArgs) {
	defer event.Close()
	fmt.Println("Synthesis started.")
}

func synthesizingHandler(event speech.SpeechSynthesisEventArgs) {
	defer event.Close()
	fmt.Printf("Synthesizing, audio chunk size %d.\n", len(event.Result.AudioData))
}

func synthesizedHandler(event speech.SpeechSynthesisEventArgs) {
	defer event.Close()
	fmt.Printf("Synthesized, audio length %d.\n", len(event.Result.AudioData))
}

func cancelledHandler(event speech.SpeechSynthesisEventArgs) {
	defer event.Close()
	fmt.Println("Received a cancellation.")
}

func main() {
	// This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
	speechKey := "Your Speech Key"
	speechRegion := "Your Speech Region"

	audioConfig, err := audio.NewAudioConfigFromDefaultSpeakerOutput()
	if err != nil {
		fmt.Println("Got an error: ", err)
		return
	}
	defer audioConfig.Close()
	speechConfig, err := speech.NewSpeechConfigFromSubscription(speechKey, speechRegion)
	if err != nil {
		fmt.Println("Got an error: ", err)
		return
	}
	defer speechConfig.Close()

	speechConfig.SetSpeechSynthesisVoiceName("en-US-AvaMultilingualNeural")

	speechSynthesizer, err := speech.NewSpeechSynthesizerFromConfig(speechConfig, audioConfig)
	if err != nil {
		fmt.Println("Got an error: ", err)
		return
	}
	defer speechSynthesizer.Close()

	speechSynthesizer.SynthesisStarted(synthesizeStartedHandler)
	speechSynthesizer.Synthesizing(synthesizingHandler)
	speechSynthesizer.SynthesisCompleted(synthesizedHandler)
	speechSynthesizer.SynthesisCanceled(cancelledHandler)

	for {
		text := "你好嗎,我是人"

		task := speechSynthesizer.SpeakTextAsync(text)
		var outcome speech.SpeechSynthesisOutcome
		select {
		case outcome = <-task: :="speech.NewCancellationDetailsFromSpeechSynthesisResult(outcome.Result)" _="" an="" and="" cancellation.errorcode="" cancellation.errordetails="" cancellation.reason="=" cancellation="" case="" code="" common.error="" common.synthesizingaudiocompleted="" defer="" did="" else="" error:="" errorcode="%d\nCANCELED:" errordetails="[%s]\nCANCELED:" fmt.printf="" fmt.println="" for="" if="" imed="" key="" n="" nil="" ot="" out="" outcome.close="" outcome.error="" outcome.result.reason="=" peech="" reason="%d.\n" region="" resource="" return="" s="" set="" speaker="" speech="" synthesized="" text="" the="" time.after="" time.second="" to="" values="" you="">

參考資料: