API
Speech-to-Text
Transcribe audio to text.
Parameters
| Parameter | Type | Required | Description |
|---|---|---|---|
model | string | Yes | Transcription model ID, e.g. whisper-1. |
file | file | Yes | Audio file (mp3/mp4/mpeg/mpga/m4a/wav/webm, max 25MB). |
language | string | No | ISO-639-1 language code, e.g. en, zh. Auto-detected if omitted. |
prompt | string | No | Optional text to guide the model's style or continue a previous segment. |
response_format | string | No | json (default), text, srt, verbose_json, vtt. |
temperature | number | No | Sampling temperature 0–1. Default: 0. |
Endpoint: POST /v1/audio/transcriptions (multipart/form-data)
Examples
curl -X POST https://silkdock.ai/v1/audio/transcriptions \-H "Authorization: Bearer $SILKDOCK_API_KEY" \-F "model=whisper-1" \-F "[email protected]" \-F "language=en"curl -X POST https://silkdock.ai/v1/audio/transcriptions ^-H "Authorization: Bearer %SILKDOCK_API_KEY%" ^-F "model=whisper-1" ^-F "[email protected]" ^-F "language=en"http -f POST https://silkdock.ai/v1/audio/transcriptions \Authorization:"Bearer $SILKDOCK_API_KEY" \model=whisper-1 \language=en \[email protected]# wget does not natively support multipart/form-data file uploads.# Use curl instead:curl -X POST https://silkdock.ai/v1/audio/transcriptions \-H "Authorization: Bearer $SILKDOCK_API_KEY" \-F "model=whisper-1" \-F "[email protected]" \-F "language=en"$response = Invoke-RestMethod -Uri "https://silkdock.ai/v1/audio/transcriptions" `-Method Post `-Headers @{ Authorization = "Bearer $env:SILKDOCK_API_KEY" } `-Form @{ model = "whisper-1" language = "en" file = Get-Item -Path "audio.mp3"}Write-Output $response.textconst { OpenAI } = require("openai");const fs = require("fs");const client = new OpenAI({apiKey: process.env.SILKDOCK_API_KEY,baseURL: "https://silkdock.ai/v1",});const transcript = await client.audio.transcriptions.create({model: "whisper-1",file: fs.createReadStream("audio.mp3"),language: "en",});console.log(transcript.text);import { readFileSync } from "fs";const form = new FormData();form.append("model", "whisper-1");form.append("file", new Blob([readFileSync("audio.mp3")], { type: "audio/mpeg" }), "audio.mp3");form.append("language", "en");const res = await fetch("https://silkdock.ai/v1/audio/transcriptions", {method: "POST",headers: { "Authorization": `Bearer ${process.env.SILKDOCK_API_KEY}` },body: form,});const { text } = await res.json();console.log(text);import axios from "axios";import { readFileSync } from "fs";import FormData from "form-data";const form = new FormData();form.append("model", "whisper-1");form.append("language", "en");form.append("file", readFileSync("audio.mp3"), {filename: "audio.mp3",contentType: "audio/mpeg",});const { data } = await axios.post("https://silkdock.ai/v1/audio/transcriptions",form,{ headers: { Authorization: `Bearer ${process.env.SILKDOCK_API_KEY}`, ...form.getHeaders(), },});console.log(data.text);const form = new FormData();form.append("model", "whisper-1");form.append("language", "en");form.append("file", fileInput.files[0]); // fileInput is an <input type="file"> element$.ajax({url: "https://silkdock.ai/v1/audio/transcriptions",type: "POST",headers: { Authorization: `Bearer ${SILKDOCK_API_KEY}` },data: form,processData: false,contentType: false,success(data) { console.log(data.text);},});const form = new FormData();form.append("model", "whisper-1");form.append("language", "en");form.append("file", fileInput.files[0]); // fileInput is an <input type="file"> elementconst xhr = new XMLHttpRequest();xhr.open("POST", "https://silkdock.ai/v1/audio/transcriptions");xhr.setRequestHeader("Authorization", `Bearer ${SILKDOCK_API_KEY}`);xhr.onload = () => {const data = JSON.parse(xhr.responseText);console.log(data.text);};xhr.send(form);const request = require("request");const fs = require("fs");request.post({ url: "https://silkdock.ai/v1/audio/transcriptions", headers: { Authorization: `Bearer ${process.env.SILKDOCK_API_KEY}` }, formData: { model: "whisper-1", language: "en", file: { value: fs.createReadStream("audio.mp3"), options: { filename: "audio.mp3", contentType: "audio/mpeg" }, }, },},(err, _res, body) => { if (err) throw err; console.log(JSON.parse(body).text);});const unirest = require("unirest");const fs = require("fs");unirest.post("https://silkdock.ai/v1/audio/transcriptions").headers({ Authorization: `Bearer ${process.env.SILKDOCK_API_KEY}` }).field("model", "whisper-1").field("language", "en").attach("file", fs.createReadStream("audio.mp3")).then((res) => console.log(res.body.text));import OpenAI from "openai";import fs from "fs";const client = new OpenAI({apiKey: process.env.SILKDOCK_API_KEY,baseURL: "https://silkdock.ai/v1",});const transcript = await client.audio.transcriptions.create({model: "whisper-1",file: fs.createReadStream("audio.mp3"),language: "en",});console.log(transcript.text);import { readFileSync } from "fs";const form = new FormData();form.append("model", "whisper-1");form.append("file", new Blob([readFileSync("audio.mp3")], { type: "audio/mpeg" }), "audio.mp3");form.append("language", "en");const res = await fetch("https://silkdock.ai/v1/audio/transcriptions", {method: "POST",headers: { "Authorization": `Bearer ${process.env.SILKDOCK_API_KEY}`,},body: form,});console.log((await res.json()).text);import requests, oswith open("audio.mp3", "rb") as f: res = requests.post( "https://silkdock.ai/v1/audio/transcriptions", headers={"Authorization": f"Bearer {os.getenv('SILKDOCK_API_KEY')}"}, files={"file": ("audio.mp3", f, "audio/mpeg")}, data={"model": "whisper-1", "language": "en"}, )print(res.json()["text"])import osfrom openai import OpenAIclient = OpenAI( api_key=os.getenv("SILKDOCK_API_KEY"), base_url="https://silkdock.ai/v1")with open("audio.mp3", "rb") as f: transcript = client.audio.transcriptions.create( model="whisper-1", file=f, language="en" )print(transcript.text)#include <stdio.h>#include <stdlib.h>#include <curl/curl.h>int main(void) { CURL *curl = curl_easy_init(); if (!curl) return 1; const char *api_key = getenv("SILKDOCK_API_KEY"); char auth_header[256]; snprintf(auth_header, sizeof(auth_header), "Authorization: Bearer %s", api_key); struct curl_slist *headers = NULL; headers = curl_slist_append(headers, auth_header); curl_mime *mime = curl_mime_init(curl); curl_mimepart *part = curl_mime_addpart(mime); curl_mime_name(part, "model"); curl_mime_data(part, "whisper-1", CURL_ZERO_TERMINATED); part = curl_mime_addpart(mime); curl_mime_name(part, "language"); curl_mime_data(part, "en", CURL_ZERO_TERMINATED); part = curl_mime_addpart(mime); curl_mime_name(part, "file"); curl_mime_filedata(part, "audio.mp3"); curl_mime_type(part, "audio/mpeg"); curl_easy_setopt(curl, CURLOPT_URL, "https://silkdock.ai/v1/audio/transcriptions"); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); curl_easy_setopt(curl, CURLOPT_MIMEPOST, mime); CURLcode res = curl_easy_perform(curl); if (res != CURLE_OK) fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res)); curl_mime_free(mime); curl_slist_free_all(headers); curl_easy_cleanup(curl); return 0;}#import <Foundation/Foundation.h>int main(int argc, const char *argv[]) { @autoreleasepool { NSString *apiKey = [NSProcessInfo processInfo].environment[@"SILKDOCK_API_KEY"]; NSString *boundary = [[NSUUID UUID] UUIDString]; NSURL *url = [NSURL URLWithString:@"https://silkdock.ai/v1/audio/transcriptions"]; NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url]; request.HTTPMethod = @"POST"; [request setValue:[NSString stringWithFormat:@"Bearer %@", apiKey] forHTTPHeaderField:@"Authorization"]; [request setValue:[NSString stringWithFormat:@"multipart/form-data; boundary=%@", boundary] forHTTPHeaderField:@"Content-Type"]; NSMutableData *body = [NSMutableData data]; // model field [body appendData:[[NSString stringWithFormat: @"--%@
Content-Disposition: form-data; name="model"
whisper-1
", boundary] dataUsingEncoding:NSUTF8StringEncoding]]; // language field [body appendData:[[NSString stringWithFormat: @"--%@
Content-Disposition: form-data; name="language"
en
", boundary] dataUsingEncoding:NSUTF8StringEncoding]]; // file field NSData *audioData = [NSData dataWithContentsOfFile:@"audio.mp3"]; [body appendData:[[NSString stringWithFormat: @"--%@
Content-Disposition: form-data; name="file"; filename="audio.mp3"
Content-Type: audio/mpeg
", boundary] dataUsingEncoding:NSUTF8StringEncoding]]; [body appendData:audioData]; [body appendData:[@"
" dataUsingEncoding:NSUTF8StringEncoding]]; // closing boundary [body appendData:[[NSString stringWithFormat:@"--%@--
", boundary] dataUsingEncoding:NSUTF8StringEncoding]]; request.HTTPBody = body; dispatch_semaphore_t sema = dispatch_semaphore_create(0); [[[NSURLSession sharedSession] dataTaskWithRequest:request completionHandler:^(NSData *data, NSURLResponse *response, NSError *error) { if (data) { NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:nil]; NSLog(@"%@", json[@"text"]); } dispatch_semaphore_signal(sema); }] resume]; dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER); } return 0;}import com.openai.client.OpenAIClient;import com.openai.client.okhttp.OpenAIOkHttpClient;import com.openai.models.*;import java.nio.file.Path;OpenAIClient client = OpenAIOkHttpClient.builder() .apiKey(System.getenv("SILKDOCK_API_KEY")) .baseURL("https://silkdock.ai/v1") .build();Transcription transcription = client.audio().transcriptions().create( TranscriptionCreateParams.builder() .model(AudioModel.WHISPER_1) .file(Path.of("audio.mp3")) .build());System.out.println(transcription.text());import java.net.http.*;import java.net.URI;import java.nio.file.*;import java.util.UUID;String boundary = UUID.randomUUID().toString();byte[] fileBytes = Files.readAllBytes(Path.of("audio.mp3"));String partHeader = "--" + boundary + "
" + "Content-Disposition: form-data; name="file"; filename="audio.mp3"
" + "Content-Type: audio/mpeg
";String modelPart = "--" + boundary + "
" + "Content-Disposition: form-data; name="model"
whisper-1
";String closing = "--" + boundary + "--
";var body = java.io.ByteArrayOutputStream();body.write(modelPart.getBytes());body.write(partHeader.getBytes());body.write(fileBytes);body.write(("
" + closing).getBytes());var req = HttpRequest.newBuilder() .uri(URI.create("https://silkdock.ai/v1/audio/transcriptions")) .header("Authorization", "Bearer " + System.getenv("SILKDOCK_API_KEY")) .header("Content-Type", "multipart/form-data; boundary=" + boundary) .POST(HttpRequest.BodyPublishers.ofByteArray(body.toByteArray())) .build();System.out.println(HttpClient.newHttpClient().send(req, HttpResponse.BodyHandlers.ofString()).body());import okhttp3.*;import java.io.File;import java.io.IOException;OkHttpClient client = new OkHttpClient();RequestBody requestBody = new MultipartBody.Builder() .setType(MultipartBody.FORM) .addFormDataPart("model", "whisper-1") .addFormDataPart("language", "en") .addFormDataPart("file", "audio.mp3", RequestBody.create(new File("audio.mp3"), MediaType.parse("audio/mpeg"))) .build();Request request = new Request.Builder() .url("https://silkdock.ai/v1/audio/transcriptions") .header("Authorization", "Bearer " + System.getenv("SILKDOCK_API_KEY")) .post(requestBody) .build();try (Response response = client.newCall(request).execute()) { System.out.println(response.body().string());}import kong.unirest.Unirest;import java.io.File;var response = Unirest.post("https://silkdock.ai/v1/audio/transcriptions") .header("Authorization", "Bearer " + System.getenv("SILKDOCK_API_KEY")) .field("model", "whisper-1") .field("language", "en") .field("file", new File("audio.mp3"), "audio/mpeg") .asJson();System.out.println(response.getBody().getObject().getString("text"));package mainimport ( "context" "fmt" "os" "github.com/openai/openai-go" "github.com/openai/openai-go/option")func main() { client := openai.NewClient( option.WithAPIKey(os.Getenv("SILKDOCK_API_KEY")), option.WithBaseURL("https://silkdock.ai/v1"), ) f, _ := os.Open("audio.mp3") defer f.Close() resp, _ := client.Audio.Transcriptions.New(context.Background(), openai.AudioTranscriptionNewParams{ Model: openai.F(openai.AudioModelWhisper1), File: openai.F(f), }, ) fmt.Println(resp.Text)}package mainimport ( "bytes" "fmt" "io" "mime/multipart" "net/http" "os" "path/filepath")func main() { var buf bytes.Buffer w := multipart.NewWriter(&buf) w.WriteField("model", "whisper-1") w.WriteField("language", "en") f, _ := os.Open("audio.mp3") defer f.Close() part, _ := w.CreateFormFile("file", filepath.Base("audio.mp3")) io.Copy(part, f) w.Close() req, _ := http.NewRequest("POST", "https://silkdock.ai/v1/audio/transcriptions", &buf) req.Header.Set("Authorization", "Bearer "+os.Getenv("SILKDOCK_API_KEY")) req.Header.Set("Content-Type", w.FormDataContentType()) resp, _ := http.DefaultClient.Do(req) defer resp.Body.Close() data, _ := io.ReadAll(resp.Body) fmt.Println(string(data))}<?php$ch = curl_init("https://silkdock.ai/v1/audio/transcriptions");curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_RETURNTRANSFER => true, CURLOPT_HTTPHEADER => [ "Authorization: Bearer " . getenv("SILKDOCK_API_KEY"), ], CURLOPT_POSTFIELDS => [ "model" => "whisper-1", "language" => "en", "file" => new CURLFile("audio.mp3", "audio/mpeg", "audio.mp3"), ],]);$res = json_decode(curl_exec($ch), true);echo $res["text"];<?phprequire_once "HTTP/Request2.php";$request = new HTTP_Request2("https://silkdock.ai/v1/audio/transcriptions", HTTP_Request2::METHOD_POST);$request->setHeader("Authorization", "Bearer " . getenv("SILKDOCK_API_KEY"));$request->addPostParameter("model", "whisper-1");$request->addPostParameter("language", "en");$request->addUpload("file", "audio.mp3", "audio.mp3", "audio/mpeg");$response = $request->send();$data = json_decode($response->getBody(), true);echo $data["text"];<?phprequire "vendor/autoload.php";use GuzzleHttpClient;$client = new Client();$response = $client->post("https://silkdock.ai/v1/audio/transcriptions", [ "headers" => [ "Authorization" => "Bearer " . getenv("SILKDOCK_API_KEY"), ], "multipart" => [ ["name" => "model", "contents" => "whisper-1"], ["name" => "language", "contents" => "en"], [ "name" => "file", "contents" => fopen("audio.mp3", "r"), "filename" => "audio.mp3", "headers" => ["Content-Type" => "audio/mpeg"], ], ],]);$data = json_decode($response->getBody(), true);echo $data["text"];<?php$client = new httpClient();$request = new httpClientRequest("POST", "https://silkdock.ai/v1/audio/transcriptions");$request->setHeaders([ "Authorization" => "Bearer " . getenv("SILKDOCK_API_KEY"),]);$body = new httpMessageBody();$body->addForm( ["model" => "whisper-1", "language" => "en"], [ [ "name" => "file", "type" => "audio/mpeg", "file" => "audio.mp3", "data" => null, ], ]);$request->setBody($body);$client->enqueue($request)->send();$response = $client->getResponse();$data = json_decode($response->getBody(), true);echo $data["text"];import OpenAIlet client = OpenAI(configuration: .init( token: ProcessInfo.processInfo.environment["SILKDOCK_API_KEY"]!, host: "silkdock.ai", scheme: "https"))let query = AudioTranscriptionQuery( file: try! Data(contentsOf: URL(fileURLWithPath: "audio.mp3")), fileType: .mp3, model: "whisper-1")let result = try await client.audioTranscriptions(query: query)print(result.text)import Foundationlet boundary = UUID().uuidStringvar req = URLRequest(url: URL(string: "https://silkdock.ai/v1/audio/transcriptions")!)req.httpMethod = "POST"req.setValue("Bearer \(ProcessInfo.processInfo.environment["SILKDOCK_API_KEY"]!)", forHTTPHeaderField: "Authorization")req.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")var body = Data()func addField(_ name: String, _ value: String) { body.append("--\(boundary)\r\nContent-Disposition: form-data; name=\"\(name)\"\r\n\r\n\(value)\r\n".data(using: .utf8)!)}addField("model", "whisper-1")addField("language", "en")let audioData = try! Data(contentsOf: URL(fileURLWithPath: "audio.mp3"))body.append("--\(boundary)\r\nContent-Disposition: form-data; name=\"file\"; filename=\"audio.mp3\"\r\nContent-Type: audio/mpeg\r\n\r\n".data(using: .utf8)!)body.append(audioData)body.append("\r\n--\(boundary)--\r\n".data(using: .utf8)!)req.httpBody = bodylet (data, _) = try! await URLSession.shared.data(for: req)print(String(data: data, encoding: .utf8)!)using OpenAI;using OpenAI.Audio;var client = new AudioClient( model: "whisper-1", credential: new System.ClientModel.ApiKeyCredential( Environment.GetEnvironmentVariable("SILKDOCK_API_KEY")!), options: new OpenAIClientOptions { Endpoint = new Uri("https://silkdock.ai/v1") });var result = await client.TranscribeAudioAsync("audio.mp3");Console.WriteLine(result.Value.Text);using System.Net.Http;var client = new HttpClient();client.DefaultRequestHeaders.Add("Authorization", $"Bearer {Environment.GetEnvironmentVariable("SILKDOCK_API_KEY")}");using var form = new MultipartFormDataContent();form.Add(new StringContent("whisper-1"), "model");form.Add(new StringContent("en"), "language");form.Add(new ByteArrayContent(await File.ReadAllBytesAsync("audio.mp3")), "file", "audio.mp3");var res = await client.PostAsync("https://silkdock.ai/v1/audio/transcriptions", form);Console.WriteLine(await res.Content.ReadAsStringAsync());require "openai"client = OpenAI::Client.new(access_token: ENV["SILKDOCK_API_KEY"],uri_base: "https://silkdock.ai/v1")response = client.audio.transcribe(parameters: { model: "whisper-1", file: File.open("audio.mp3", "rb")})puts response["text"]require "net/http"uri = URI("https://silkdock.ai/v1/audio/transcriptions")req = Net::HTTP::Post.new(uri)req["Authorization"] = "Bearer #{ENV['SILKDOCK_API_KEY']}"form = Net::HTTP::Post::Multipart.new(uri.path,"model" => "whisper-1","language" => "en","file" => UploadIO.new("audio.mp3", "audio/mpeg"))form["Authorization"] = req["Authorization"]res = Net::HTTP.start(uri.host, uri.port, use_ssl: true) { |h| h.request(form) }puts JSON.parse(res.body)["text"]import com.openai.client.OpenAIClientimport com.openai.client.okhttp.OpenAIOkHttpClientimport com.openai.models.*import java.nio.file.Pathval client: OpenAIClient = OpenAIOkHttpClient.builder() .apiKey(System.getenv("SILKDOCK_API_KEY")) .baseURL("https://silkdock.ai/v1") .build()val transcription = client.audio().transcriptions().create( TranscriptionCreateParams.builder() .model(AudioModel.WHISPER_1) .file(Path.of("audio.mp3")) .build())println(transcription.text())import java.net.http.*import java.net.URIimport java.nio.file.*import java.util.UUIDval boundary = UUID.randomUUID().toString()val fileBytes = Files.readAllBytes(Path.of("audio.mp3"))val body = buildString { append("--$boundary
") append("Content-Disposition: form-data; name="model"
whisper-1
") append("--$boundary
") append("Content-Disposition: form-data; name="language"
en
") append("--$boundary
") append("Content-Disposition: form-data; name="file"; filename="audio.mp3"
") append("Content-Type: audio/mpeg
")}.toByteArray() + fileBytes + "
--$boundary--
".toByteArray()val req = HttpRequest.newBuilder() .uri(URI.create("https://silkdock.ai/v1/audio/transcriptions")) .header("Authorization", "Bearer ${System.getenv("SILKDOCK_API_KEY")}") .header("Content-Type", "multipart/form-data; boundary=$boundary") .POST(HttpRequest.BodyPublishers.ofByteArray(body)) .build()println(HttpClient.newHttpClient().send(req, HttpResponse.BodyHandlers.ofString()).body())use reqwest::blocking::{Client, multipart};fn main() -> Result<(), Box<dyn std::error::Error>> { let form = multipart::Form::new() .text("model", "whisper-1") .text("language", "en") .file("file", "audio.mp3")?; let res = Client::new() .post("https://silkdock.ai/v1/audio/transcriptions") .header("Authorization", format!("Bearer {}", std::env::var("SILKDOCK_API_KEY")?)) .multipart(form) .send()?; println!("{}", res.text()?); Ok(())}POST /v1/audio/transcriptions HTTP/1.1Host: silkdock.aiAuthorization: Bearer <YOUR_API_KEY>Content-Type: multipart/form-data; boundary=----boundary------boundaryContent-Disposition: form-data; name="model"whisper-1------boundaryContent-Disposition: form-data; name="language"en------boundaryContent-Disposition: form-data; name="file"; filename="audio.mp3"Content-Type: audio/mpeg<binary audio data>------boundary--import 'dart:io';import 'package:http/http.dart' as http;void main() async {final req = http.MultipartRequest( 'POST', Uri.parse('https://silkdock.ai/v1/audio/transcriptions'),) ..headers['Authorization'] = 'Bearer ${const String.fromEnvironment("SILKDOCK_API_KEY")}' ..fields['model'] = 'whisper-1' ..fields['language'] = 'en' ..files.add(await http.MultipartFile.fromPath('file', 'audio.mp3'));final res = await req.send();print(await res.stream.bytesToString());}library(httr2)req <- request("https://silkdock.ai/v1/audio/transcriptions") |>req_headers(Authorization = paste("Bearer", Sys.getenv("SILKDOCK_API_KEY"))) |>req_body_multipart( model = "whisper-1", language = "en", file = curl::form_file("audio.mp3", type = "audio/mpeg"))resp <- req_perform(req)cat(resp_body_json(resp)$text)(* requires cohttp-lwt-unix and multipart_form *)(* Simple approach using curl via shell *)let () =let key = Sys.getenv "SILKDOCK_API_KEY" inlet cmd = Printf.sprintf {|curl -s -X POST https://silkdock.ai/v1/audio/transcriptions -H "Authorization: Bearer %s" -F "model=whisper-1" -F "language=en" -F "[email protected]"|} keyinprint_string (Unix.open_process_in cmd |> (fun ch -> let buf = Buffer.create 256 in (try while true do Buffer.add_channel buf ch 1 done with End_of_file -> ()); Buffer.contents buf))Response
{
"text": "你好,欢迎使用语音识别服务。"
}For response_format: "verbose_json":
{
"task": "transcribe",
"language": "english",
"duration": 3.14,
"text": "你好,欢迎使用语音识别服务。",
"segments": [
{
"id": 0,
"start": 0.0,
"end": 3.14,
"text": "你好,欢迎使用语音识别服务。",
"tokens": [50364, 2425, 11, 3250, 281, 264, 2348, 12, 1353, 12, 6123, 2643, 13, 50521]
}
]
}Last updated on