Lunos supports two audio workflows:
POST /v1/chat/completions
Authorization: Bearer YOUR_SECRET_KEY
Content-Type: application/json
Use input_audio in messages[].content[].
Audio data must be base64. Direct audio URLs are not supported in this format.
Content shape:
{
"type": "input_audio",
"input_audio": {
"data": "<BASE64_AUDIO_DATA>",
"format": "wav"
}
}
curl -X POST "https://api.lunos.tech/v1/chat/completions" \
-H "Authorization: Bearer YOUR_SECRET_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "google/gemini-2.5-flash",
"messages": [
{
"role": "user",
"content": [
{ "type": "text", "text": "Please transcribe this audio file." },
{
"type": "input_audio",
"input_audio": {
"data": "<BASE64_AUDIO_DATA>",
"format": "wav"
}
}
]
}
]
}'
import base64
import requests
with open("audio.wav", "rb") as f:
b64_audio = base64.b64encode(f.read()).decode("utf-8")
payload = {
"model": "google/gemini-2.5-flash",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "Please transcribe this audio file."},
{
"type": "input_audio",
"input_audio": {
"data": b64_audio,
"format": "wav",
},
},
],
}
],
}
response = requests.post(
"https://api.lunos.tech/v1/chat/completions",
headers={
"Authorization": "Bearer YOUR_SECRET_KEY",
"Content-Type": "application/json",
},
json=payload,
)
print(response.json())
import fs from "node:fs/promises";
const audioBytes = await fs.readFile("audio.wav");
const base64Audio = audioBytes.toString("base64");
const response = await fetch("https://api.lunos.tech/v1/chat/completions", {
method: "POST",
headers: {
Authorization: "Bearer YOUR_SECRET_KEY",
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "google/gemini-2.5-flash",
messages: [
{
role: "user",
content: [
{ type: "text", text: "Please transcribe this audio file." },
{
type: "input_audio",
input_audio: {
data: base64Audio,
format: "wav",
},
},
],
},
],
}),
});
console.log(await response.json());
Supported formats depend on provider/model. Common values: wav, mp3, aiff, aac, ogg, flac, m4a, pcm16, pcm24.
To receive spoken output, set:
modalities: ["text", "audio"]audio config (voice, format)stream: truecurl -N -X POST "https://api.lunos.tech/v1/chat/completions" \
-H "Authorization: Bearer YOUR_SECRET_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "openai/gpt-4o-audio-preview",
"messages": [
{ "role": "user", "content": "Say hello in a friendly tone." }
],
"modalities": ["text", "audio"],
"audio": {
"voice": "alloy",
"format": "wav"
},
"stream": true
}'
import base64
import json
import requests
payload = {
"model": "openai/gpt-4o-audio-preview",
"messages": [{"role": "user", "content": "Say hello in a friendly tone."}],
"modalities": ["text", "audio"],
"audio": {"voice": "alloy", "format": "wav"},
"stream": True,
}
response = requests.post(
"https://api.lunos.tech/v1/chat/completions",
headers={
"Authorization": "Bearer YOUR_SECRET_KEY",
"Content-Type": "application/json",
},
json=payload,
stream=True,
)
audio_chunks = []
transcript_chunks = []
for line in response.iter_lines():
if not line:
continue
decoded = line.decode("utf-8")
if not decoded.startswith("data: "):
continue
data = decoded[6:]
if data.strip() == "[DONE]":
break
chunk = json.loads(data)
audio = chunk.get("choices", [{}])[0].get("delta", {}).get("audio", {})
if audio.get("data"):
audio_chunks.append(audio["data"])
if audio.get("transcript"):
transcript_chunks.append(audio["transcript"])
transcript = "".join(transcript_chunks)
audio_bytes = base64.b64decode("".join(audio_chunks))
with open("output.wav", "wb") as f:
f.write(audio_bytes)
print(transcript)
const response = await fetch("https://api.lunos.tech/v1/chat/completions", {
method: "POST",
headers: {
Authorization: "Bearer YOUR_SECRET_KEY",
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "openai/gpt-4o-audio-preview",
messages: [{ role: "user", content: "Say hello in a friendly tone." }],
modalities: ["text", "audio"],
audio: {
voice: "alloy",
format: "wav",
},
stream: true,
}),
});
const reader = response.body?.getReader();
const decoder = new TextDecoder();
const audioChunks: string[] = [];
const transcriptChunks: string[] = [];
if (reader) {
let buffer = "";
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() ?? "";
for (const line of lines) {
if (!line.startsWith("data: ")) continue;
const data = line.slice(6).trim();
if (data === "[DONE]") continue;
try {
const chunk = JSON.parse(data);
const audio = chunk?.choices?.[0]?.delta?.audio;
if (audio?.data) audioChunks.push(audio.data);
if (audio?.transcript) transcriptChunks.push(audio.transcript);
} catch {}
}
}
}
const transcript = transcriptChunks.join("");
const fullAudioB64 = audioChunks.join("");
console.log(transcript, fullAudioB64.slice(0, 80));
When requesting audio output, chunks usually contain:
{
"choices": [
{
"delta": {
"audio": {
"data": "<base64-audio-chunk>",
"transcript": "Hello"
}
}
}
]
}
| Option | Meaning |
|---|---|
voice |
Voice preset for output speech (model-dependent) |
format |
Output audio format (for example wav, mp3, flac, opus, pcm16) |
inputModalities / outputModalitiesNo headings found on this page.
