Speech & Sound
Text-to-speech and sound effect generation with the Kolbo API.
Convert text to speech or generate sound effects using ElevenLabs and other providers.
Model identifiers are Kolbo-specific — always fetch available models from GET /api/v1/models?type=speech (or type=sound) before specifying a model. Omitting model uses the server default, which is recommended for most use cases.
List Voices
Discover available voices before generating speech. Returns both platform preset voices and your custom cloned/designed voices.
Endpoint
GET /api/v1/voicesQuery Parameters
| Parameter | Type | Description |
|---|---|---|
provider | string | Filter by provider (e.g., "elevenLabs", "google") |
language | string | Filter by language name or code (e.g., "English", "en-US") |
gender | string | Filter by gender (e.g., "Female", "Male") |
Example
curl "https://api.kolbo.ai/api/v1/voices?gender=Female" \
-H "X-API-Key: kolbo_live_..."Response
{
"success": true,
"voices": [
{
"voice_id": "EXAVITQu4vr4xnSDxMaL",
"name": "Rachel",
"provider": "elevenLabs",
"language": "English",
"language_code": "en-US",
"gender": "Female",
"accent": "American",
"preview_url": "https://...",
"styles": ["conversational", "calm"],
"custom": false
},
{
"voice_id": "custom_abc123",
"name": "My Cloned Voice",
"provider": "elevenlabs",
"language": "auto",
"language_code": null,
"gender": null,
"accent": null,
"preview_url": null,
"styles": [],
"custom": true
}
],
"count": 152
}Use the voice_id from this response as the voice parameter in the speech endpoint. You can also pass a voice name (e.g., "Rachel") and the API will resolve it automatically.
Text to Speech
Endpoint
POST /api/v1/generate/speechRequest Body
| Field | Type | Required | Description |
|---|---|---|---|
text | string | Yes | Text to convert to speech |
voice | string | No | Voice ID or name from GET /api/v1/voices (default: "Rachel") |
model | string | No | TTS model from GET /api/v1/models?type=speech (default: server-selected) |
language | string | No | Language code, e.g. "en-US", "he-IL" (default: "en-US") |
Examples
Simple (Recommended)
Omit model to use the server default. You can use a voice name directly:
curl -X POST https://api.kolbo.ai/api/v1/generate/speech \
-H "X-API-Key: kolbo_live_..." \
-H "Content-Type: application/json" \
-d '{
"text": "Welcome to Kolbo AI, the all-in-one creative platform.",
"voice": "Rachel"
}'With Specific Model
To choose a specific model, first fetch identifiers from GET /api/v1/models?type=speech, then pass the identifier value (e.g., eleven_v3, google_tts):
curl -X POST https://api.kolbo.ai/api/v1/generate/speech \
-H "X-API-Key: kolbo_live_..." \
-H "Content-Type: application/json" \
-d '{
"text": "Welcome to Kolbo AI, the all-in-one creative platform.",
"voice": "Rachel",
"model": "eleven_v3"
}'With Language
curl -X POST https://api.kolbo.ai/api/v1/generate/speech \
-H "X-API-Key: kolbo_live_..." \
-H "Content-Type: application/json" \
-d '{
"text": "Welcome to Kolbo AI, the all-in-one creative platform.",
"voice": "Rachel",
"language": "en-US"
}'Response
Generation Started
{
"success": true,
"generation_id": "tts_abc123",
"type": "speech",
"model": "eleven_v3",
"credits_charged": 5,
"poll_url": "/api/v1/generate/tts_abc123/status",
"poll_interval_hint": 3
}Completed Status
{
"success": true,
"generation_id": "tts_abc123",
"state": "completed",
"progress": 100,
"result": {
"urls": ["https://cdn.kolbo.ai/audio/..."],
"model": "eleven_v3",
"voice": "Rachel",
"duration": 4.5
}
}JavaScript Example
const KOLBO_API_KEY = "kolbo_live_..."; // Replace with your API key
const BASE_URL = "https://api.kolbo.ai/api";
async function generateSpeech() {
const response = await fetch(`${BASE_URL}/v1/generate/speech`, {
method: "POST",
headers: {
"X-API-Key": KOLBO_API_KEY,
"Content-Type": "application/json",
},
body: JSON.stringify({
text: "Welcome to Kolbo AI, the all-in-one creative platform.",
voice: "Rachel",
}),
});
const data = await response.json();
if (!data.success) throw new Error(data.error);
const pollUrl = data.poll_url;
// Poll for result
while (true) {
await new Promise((r) => setTimeout(r, 3000));
const statusRes = await fetch(`${BASE_URL}${pollUrl}`, {
headers: { "X-API-Key": KOLBO_API_KEY },
});
const status = await statusRes.json();
if (status.state === "completed") {
console.log("Audio URL:", status.result.urls[0]);
return status.result;
}
if (status.state === "failed") {
throw new Error(status.error || "Generation failed");
}
}
}
generateSpeech().catch(console.error);Python Example
import requests
import time
KOLBO_API_KEY = "kolbo_live_..." # Replace with your API key
BASE_URL = "https://api.kolbo.ai/api"
response = requests.post(
f"{BASE_URL}/v1/generate/speech",
headers={"X-API-Key": KOLBO_API_KEY},
json={
"text": "Welcome to Kolbo AI, the all-in-one creative platform.",
"voice": "Rachel",
},
)
data = response.json()
if not data.get("success"):
raise Exception(data.get("error", "Request failed"))
poll_url = data["poll_url"]
while True:
time.sleep(3)
status = requests.get(
f"{BASE_URL}{poll_url}",
headers={"X-API-Key": KOLBO_API_KEY},
).json()
if status["state"] == "completed":
print("Audio URL:", status["result"]["urls"][0])
break
if status["state"] == "failed":
raise Exception(status.get("error", "Generation failed"))Credits
Speech credits are character-based: ceil(text.length / 100) x model.credit
For example, a 250-character text with a model that costs 1 credit per 100 chars: ceil(250 / 100) x 1 = 3 credits.
Sound Effects
Endpoint
POST /api/v1/generate/soundRequest Body
| Field | Type | Required | Description |
|---|---|---|---|
prompt | string | Yes | Description of the sound effect |
model | string | No | Model from GET /api/v1/models?type=sound (default: server-selected) |
duration | number | No | Duration in seconds (omit for auto) |
Examples
Simple (Recommended)
curl -X POST https://api.kolbo.ai/api/v1/generate/sound \
-H "X-API-Key: kolbo_live_..." \
-H "Content-Type: application/json" \
-d '{"prompt": "Thunder clap followed by heavy rain"}'With Specific Model
To choose a specific model, first fetch identifiers from GET /api/v1/models?type=sound, then pass the identifier value (e.g., elevenlabs-sound-effects-v1, fal-ai/mmaudio-v2/text-to-audio):
curl -X POST https://api.kolbo.ai/api/v1/generate/sound \
-H "X-API-Key: kolbo_live_..." \
-H "Content-Type: application/json" \
-d '{
"prompt": "Thunder clap followed by heavy rain",
"model": "elevenlabs-sound-effects-v1"
}'With Duration
curl -X POST https://api.kolbo.ai/api/v1/generate/sound \
-H "X-API-Key: kolbo_live_..." \
-H "Content-Type: application/json" \
-d '{
"prompt": "Gentle ocean waves on a sandy beach",
"duration": 10
}'Response
Generation Started
{
"success": true,
"generation_id": "snd_abc123",
"type": "sound",
"model": "auto",
"credits_charged": 7,
"poll_url": "/api/v1/generate/snd_abc123/status",
"poll_interval_hint": 5
}Completed Status
{
"success": true,
"generation_id": "snd_abc123",
"state": "completed",
"progress": 100,
"result": {
"urls": ["https://cdn.kolbo.ai/audio/..."],
"model": "elevenlabs-sound-effects-v1",
"duration": 8
}
}JavaScript Example
const KOLBO_API_KEY = "kolbo_live_..."; // Replace with your API key
const BASE_URL = "https://api.kolbo.ai/api";
async function generateSound() {
const response = await fetch(`${BASE_URL}/v1/generate/sound`, {
method: "POST",
headers: {
"X-API-Key": KOLBO_API_KEY,
"Content-Type": "application/json",
},
body: JSON.stringify({
prompt: "Thunder clap followed by heavy rain",
}),
});
const data = await response.json();
if (!data.success) throw new Error(data.error);
const pollUrl = data.poll_url;
// Poll for result
while (true) {
await new Promise((r) => setTimeout(r, 5000));
const statusRes = await fetch(`${BASE_URL}${pollUrl}`, {
headers: { "X-API-Key": KOLBO_API_KEY },
});
const status = await statusRes.json();
if (status.state === "completed") {
console.log("Sound URL:", status.result.urls[0]);
return status.result;
}
if (status.state === "failed") {
throw new Error(status.error || "Generation failed");
}
}
}
generateSound().catch(console.error);Python Example
import requests
import time
KOLBO_API_KEY = "kolbo_live_..." # Replace with your API key
BASE_URL = "https://api.kolbo.ai/api"
response = requests.post(
f"{BASE_URL}/v1/generate/sound",
headers={"X-API-Key": KOLBO_API_KEY},
json={
"prompt": "Thunder clap followed by heavy rain",
},
)
data = response.json()
if not data.get("success"):
raise Exception(data.get("error", "Request failed"))
poll_url = data["poll_url"]
while True:
time.sleep(5)
status = requests.get(
f"{BASE_URL}{poll_url}",
headers={"X-API-Key": KOLBO_API_KEY},
).json()
if status["state"] == "completed":
print("Sound URL:", status["result"]["urls"][0])
break
if status["state"] == "failed":
raise Exception(status.get("error", "Generation failed"))Tips
- Speech generation is fast (5-30 seconds)
- Sound effects typically take 5-30 seconds
- Both return audio URLs that can be downloaded or streamed
- Use
GET /api/v1/voicesto discover available voices before generating speech