Generate Audio with Gemini
Generate audio from text using Gemini Live API.
- Python
from google import genai
import asyncio
import contextlib
import wave
client = genai.Client(http_options= {
'api_version': 'v1alpha'
})
model = "gemini-2.0-flash-exp"
audio_negative_prompt = "dont say OK , I will do this or that, just only read this story using voice expressions without introductions or ending ,more segments are comming ,dont say OK , I will do this or that:\n"
@contextlib.contextmanager
def wave_file(filename, channels=1, rate=24000, sample_width=2):
with wave.open(filename, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(sample_width)
wf.setframerate(rate)
yield wf
def generate_audio_live(text, output_filename):
collected_audio = bytearray()
async def _generate():
config = {
"generation_config": {"response_modalities": ["AUDIO"]}
}
async with client.aio.live.connect(model=model, config=config) as session:
await session.send(input=audio_negative_prompt + text, end_of_turn=True)
async for response in session.receive():
if response.data:
collected_audio.extend(response.data)
return bytes(collected_audio)
audio_bytes = asyncio.run(_generate())
with wave_file(output_filename) as wf:
wf.writeframes(audio_bytes)
return output_filename