/** * Spark AI Voice Chat - Unity SDK * * Drop this MonoBehaviour onto any GameObject to add voice chat. * * Usage: * var voice = gameObject.AddComponent(); * voice.apiKey = "sk_your_key"; * voice.endpoint = "https://your-app.vercel.app/api/voice-chat"; * * voice.StartRecording(); * // ... user speaks ... * voice.StopRecordingAndSend(); * * Events: * OnTranscriptReceived(string transcript) * OnResponseReceived(string aiResponse) * OnAudioPlayed() * OnError(string error) */ using System; using System.Collections; using System.Collections.Generic; using UnityEngine; using UnityEngine.Networking; public class SparkVoiceChat : MonoBehaviour { [Header("Configuration")] public string apiKey; public string endpoint = "https://your-app.vercel.app/api/voice-chat"; public string sessionId; [Header("Recording Settings")] public int sampleRate = 16000; public int maxRecordSeconds = 30; // Events public event Action OnTranscriptReceived; public event Action OnResponseReceived; public event Action OnAudioPlayed; public event Action OnError; private AudioClip _recordingClip; private string _micDevice; private bool _isRecording; private AudioSource _audioSource; void Awake() { _audioSource = gameObject.AddComponent(); if (string.IsNullOrEmpty(sessionId)) sessionId = Guid.NewGuid().ToString(); } /// Start recording from the default microphone. public void StartRecording() { if (_isRecording) return; if (Microphone.devices.Length == 0) { OnError?.Invoke("No microphone found"); return; } _micDevice = Microphone.devices[0]; _recordingClip = Microphone.Start(_micDevice, false, maxRecordSeconds, sampleRate); _isRecording = true; } /// Stop recording and send the audio to the Spark AI voice chat API. public void StopRecordingAndSend() { if (!_isRecording) return; int position = Microphone.GetPosition(_micDevice); Microphone.End(_micDevice); _isRecording = false; if (position == 0) { OnError?.Invoke("No audio recorded"); return; } // Trim to actual recorded length float[] samples = new float[position * _recordingClip.channels]; _recordingClip.GetData(samples, 0); byte[] wavBytes = EncodeToWav(samples, _recordingClip.channels, sampleRate); StartCoroutine(SendVoiceChat(wavBytes)); } private IEnumerator SendVoiceChat(byte[] wavBytes) { List form = new List { new MultipartFormFileSection("audio", wavBytes, "audio.wav", "audio/wav"), new MultipartFormDataSection("sessionId", sessionId) }; using (UnityWebRequest request = UnityWebRequest.Post(endpoint, form)) { request.SetRequestHeader("X-Api-Key", apiKey); // Expect binary audio response request.downloadHandler = new DownloadHandlerBuffer(); yield return request.SendWebRequest(); if (request.result != UnityWebRequest.Result.Success) { string errorMsg = request.downloadHandler?.text ?? request.error; OnError?.Invoke(errorMsg); yield break; } // Parse text headers string transcript = request.GetResponseHeader("X-Transcript"); string aiResponse = request.GetResponseHeader("X-AI-Response"); if (!string.IsNullOrEmpty(transcript)) OnTranscriptReceived?.Invoke(Uri.UnescapeDataString(transcript)); if (!string.IsNullOrEmpty(aiResponse)) OnResponseReceived?.Invoke(Uri.UnescapeDataString(aiResponse)); // Play audio response byte[] audioData = request.downloadHandler.data; AudioClip clip = WavToAudioClip(audioData); if (clip != null) { _audioSource.clip = clip; _audioSource.Play(); yield return new WaitWhile(() => _audioSource.isPlaying); OnAudioPlayed?.Invoke(); } } } // --- WAV Encoding --- private static byte[] EncodeToWav(float[] samples, int channels, int sampleRate) { int sampleCount = samples.Length; int byteRate = sampleRate * channels * 2; // 16-bit int dataSize = sampleCount * 2; int fileSize = 44 + dataSize; byte[] wav = new byte[fileSize]; int p = 0; // RIFF header WriteString(wav, ref p, "RIFF"); WriteInt32(wav, ref p, fileSize - 8); WriteString(wav, ref p, "WAVE"); // fmt chunk WriteString(wav, ref p, "fmt "); WriteInt32(wav, ref p, 16); WriteInt16(wav, ref p, 1); // PCM WriteInt16(wav, ref p, (short)channels); WriteInt32(wav, ref p, sampleRate); WriteInt32(wav, ref p, byteRate); WriteInt16(wav, ref p, (short)(channels * 2)); // block align WriteInt16(wav, ref p, 16); // bits per sample // data chunk WriteString(wav, ref p, "data"); WriteInt32(wav, ref p, dataSize); // PCM samples for (int i = 0; i < sampleCount; i++) { short val = (short)(Mathf.Clamp(samples[i], -1f, 1f) * 32767f); wav[p++] = (byte)(val & 0xFF); wav[p++] = (byte)((val >> 8) & 0xFF); } return wav; } // --- WAV Decoding --- private static AudioClip WavToAudioClip(byte[] wavData) { if (wavData == null || wavData.Length < 44) return null; int channels = BitConverter.ToInt16(wavData, 22); int rate = BitConverter.ToInt32(wavData, 24); int bitsPerSample = BitConverter.ToInt16(wavData, 34); // Find data chunk int dataOffset = 44; int dataSize = BitConverter.ToInt32(wavData, 40); int sampleCount = dataSize / (bitsPerSample / 8); float[] samples = new float[sampleCount]; if (bitsPerSample == 16) { for (int i = 0; i < sampleCount; i++) { int byteIndex = dataOffset + i * 2; if (byteIndex + 1 >= wavData.Length) break; short val = BitConverter.ToInt16(wavData, byteIndex); samples[i] = val / 32768f; } } else if (bitsPerSample == 32) { for (int i = 0; i < sampleCount; i++) { int byteIndex = dataOffset + i * 4; if (byteIndex + 3 >= wavData.Length) break; samples[i] = BitConverter.ToSingle(wavData, byteIndex); } } AudioClip clip = AudioClip.Create("VoiceResponse", sampleCount / channels, channels, rate, false); clip.SetData(samples, 0); return clip; } // --- Byte writing helpers --- private static void WriteString(byte[] buf, ref int pos, string val) { foreach (char c in val) buf[pos++] = (byte)c; } private static void WriteInt32(byte[] buf, ref int pos, int val) { buf[pos++] = (byte)(val & 0xFF); buf[pos++] = (byte)((val >> 8) & 0xFF); buf[pos++] = (byte)((val >> 16) & 0xFF); buf[pos++] = (byte)((val >> 24) & 0xFF); } private static void WriteInt16(byte[] buf, ref int pos, short val) { buf[pos++] = (byte)(val & 0xFF); buf[pos++] = (byte)((val >> 8) & 0xFF); } }