Skip to main content

High quality Text to Speech for free

import soundfile as sf
from kokoro import KPipeline
from pydub import AudioSegment

pipeline = KPipeline(lang_code='a')

generator = pipeline(
[text], voice='af_heart',
speed=1
)

audio_chunks = []
for i, (_, _, audio) in enumerate(generator):
temp_filename = f'temp_{i}.wav'
sf.write(temp_filename, audio, 24000)
audio_chunks.append(temp_filename)
combined = AudioSegment.empty()
for chunk_file in audio_chunks:
chunk = AudioSegment.from_wav(chunk_file)
combined += chunk

combined.export(output_file, format="mp3")
print(f"Audio saved to {output_file}")