Kolbo.AIKolbo.AI Docs
Developer API

Transcription

Transcribe audio and video files to text with SRT subtitles using the Kolbo API.

Transcribe audio and video files to text with word-level SRT subtitles. Supports both URL input and direct file upload.

No model selection — transcription always uses the built-in STT engine. There is no model parameter.

Endpoint

POST /api/v1/transcribe

Accepts both application/json (for URL-based input) and multipart/form-data (for file uploads).

Request Body

FieldTypeRequiredDescription
audio_urlstringNo*URL of an audio file to transcribe
video_urlstringNo*URL of a video file (audio is extracted automatically)
file_urlstringNo*URL of any audio or video file to transcribe
filefileNo*Audio or video file upload (multipart). Max 500 MB.

*At least one of audio_url, video_url, file_url, or file must be provided.

Examples

From Audio URL

curl -X POST https://api.kolbo.ai/api/v1/transcribe \
  -H "X-API-Key: kolbo_live_..." \
  -H "Content-Type: application/json" \
  -d '{"audio_url": "https://example.com/podcast-episode.mp3"}'

From Video URL

curl -X POST https://api.kolbo.ai/api/v1/transcribe \
  -H "X-API-Key: kolbo_live_..." \
  -H "Content-Type: application/json" \
  -d '{"video_url": "https://example.com/interview.mp4"}'

File Upload

curl -X POST https://api.kolbo.ai/api/v1/transcribe \
  -H "X-API-Key: kolbo_live_..." \
  -F "[email protected]"

Response

Generation Started

{
  "success": true,
  "generation_id": "txn_abc123",
  "type": "transcription",
  "model": "auto",
  "credits_charged": 5,
  "poll_url": "/api/v1/generate/txn_abc123/status",
  "poll_interval_hint": 8
}

Completed Status

{
  "success": true,
  "generation_id": "txn_abc123",
  "type": "transcription",
  "state": "completed",
  "progress": 100,
  "result": {
    "text": "Hello and welcome to today's episode...",
    "srt_url": "https://cdn.kolbo.ai/transcriptions/subtitles.srt",
    "txt_url": "https://cdn.kolbo.ai/transcriptions/transcript.txt",
    "word_by_word_srt_url": "https://cdn.kolbo.ai/transcriptions/words.srt",
    "srt_content": "1\n00:00:00,000 --> 00:00:03,500\nHello and welcome to today's episode\n\n",
    "duration": 245.8,
    "audio_url": "https://cdn.kolbo.ai/transcriptions/audio.mp3",
    "model": "whisper",
    "created_at": "2026-04-10T14:20:00Z"
  }
}

JavaScript Example — URL Input

const KOLBO_API_KEY = "kolbo_live_..."; // Replace with your API key
const BASE_URL = "https://api.kolbo.ai";

async function transcribe(body) {
  const response = await fetch(`${BASE_URL}/api/v1/transcribe`, {
    method: "POST",
    headers: {
      "X-API-Key": KOLBO_API_KEY,
      "Content-Type": "application/json",
    },
    body: JSON.stringify(body),
  });

  const data = await response.json();
  if (!data.success) throw new Error(data.error);

  const pollUrl = data.poll_url;

  // Poll for result
  while (true) {
    await new Promise((r) => setTimeout(r, 5000));
    const statusRes = await fetch(`${BASE_URL}${pollUrl}`, {
      headers: { "X-API-Key": KOLBO_API_KEY },
    });
    const status = await statusRes.json();

    if (status.state === "completed") {
      return status.result;
    }
    if (status.state === "failed") {
      throw new Error(status.error || "Transcription failed");
    }
    console.log(`Progress: ${status.progress}%`);
  }
}

async function main() {
  // Transcribe from audio URL
  const result = await transcribe({
    audio_url: "https://example.com/podcast.mp3",
  });

  console.log("Full text:", result.text);
  console.log("Duration:", result.duration, "seconds");
  console.log("SRT file:", result.srt_url);
  console.log("Word-level SRT:", result.word_by_word_srt_url);
}

main().catch(console.error);

JavaScript Example — File Upload

const KOLBO_API_KEY = "kolbo_live_..."; // Replace with your API key
const BASE_URL = "https://api.kolbo.ai";

async function transcribeFile(filePath) {
  // Node.js: use fs to read the file
  const fs = await import("fs");
  const path = await import("path");

  const formData = new FormData();
  formData.append("file", new Blob([fs.readFileSync(filePath)]), path.basename(filePath));

  const response = await fetch(`${BASE_URL}/api/v1/transcribe`, {
    method: "POST",
    headers: { "X-API-Key": KOLBO_API_KEY },
    body: formData,
  });

  const data = await response.json();
  if (!data.success) throw new Error(data.error);

  const pollUrl = data.poll_url;

  // Poll for result
  while (true) {
    await new Promise((r) => setTimeout(r, 5000));
    const statusRes = await fetch(`${BASE_URL}${pollUrl}`, {
      headers: { "X-API-Key": KOLBO_API_KEY },
    });
    const status = await statusRes.json();

    if (status.state === "completed") {
      return status.result;
    }
    if (status.state === "failed") {
      throw new Error(status.error || "Transcription failed");
    }
    console.log(`Progress: ${status.progress}%`);
  }
}

async function main() {
  const result = await transcribeFile("./recording.mp3");
  console.log("Transcript:", result.text);
  console.log("SRT download:", result.srt_url);
}

main().catch(console.error);

Python Example — URL Input

import requests
import time

KOLBO_API_KEY = "kolbo_live_..."  # Replace with your API key
BASE_URL = "https://api.kolbo.ai"
HEADERS = {"X-API-Key": KOLBO_API_KEY}

# Transcribe from audio URL
response = requests.post(
    f"{BASE_URL}/api/v1/transcribe",
    headers=HEADERS,
    json={"audio_url": "https://example.com/podcast.mp3"},
)
data = response.json()
if not data.get("success"):
    raise Exception(data.get("error", "Request failed"))

poll_url = data["poll_url"]

# Poll for result
while True:
    time.sleep(5)
    status = requests.get(f"{BASE_URL}{poll_url}", headers=HEADERS).json()

    if status["state"] == "completed":
        result = status["result"]
        print("Full text:", result["text"])
        print("Duration:", result["duration"], "seconds")
        print("SRT file:", result["srt_url"])
        print("Word-level SRT:", result["word_by_word_srt_url"])
        break
    if status["state"] == "failed":
        raise Exception(status.get("error", "Transcription failed"))
    print(f"Progress: {status.get('progress', 0)}%")

Python Example — File Upload

import requests
import time

KOLBO_API_KEY = "kolbo_live_..."  # Replace with your API key
BASE_URL = "https://api.kolbo.ai"
HEADERS = {"X-API-Key": KOLBO_API_KEY}

# Upload a local file
with open("recording.mp3", "rb") as f:
    response = requests.post(
        f"{BASE_URL}/api/v1/transcribe",
        headers=HEADERS,
        files={"file": ("recording.mp3", f)},
    )

data = response.json()
if not data.get("success"):
    raise Exception(data.get("error", "Request failed"))

poll_url = data["poll_url"]

# Poll for result
while True:
    time.sleep(5)
    status = requests.get(f"{BASE_URL}{poll_url}", headers=HEADERS).json()

    if status["state"] == "completed":
        result = status["result"]
        print("Transcript:", result["text"])
        print("SRT download:", result["srt_url"])
        print("Text download:", result["txt_url"])
        break
    if status["state"] == "failed":
        raise Exception(status.get("error", "Transcription failed"))
    print(f"Progress: {status.get('progress', 0)}%")

Tips

  • Transcription typically takes 10-30 seconds for short clips, longer for full-length videos
  • Audio is automatically extracted from video files -- no preprocessing needed
  • The srt_content field contains the raw SRT text inline, useful if you want to process subtitles without downloading the file
  • The word_by_word_srt_url provides word-level timing, ideal for karaoke-style subtitles or precise editing
  • Maximum file size for uploads is 500 MB
  • Supported formats include MP3, WAV, MP4, MOV, WebM, and most common audio/video formats