Skip to content

Chat Completions API

Endpoint to generate conversational responses based on user-provided input messages.

POST https://api.relax.ai/v1/chat/completions

from relaxai import Relaxai
client = Relaxai(
api_key = RELAX_API_KEY,
)
chat_completion_response = client.chat.create_completion(
messages=[{
"role": "user",
"content": "What is the capital of the UK?",
}],
model="Llama-4-Maverick-17B-128E",
temperature = 0.7,
max_tokens = 100
)
print(chat_completion_response.choices)
import Relaxai from 'relaxai';
const client = new Relaxai({
apiKey: RELAX_API_KEY,
});
const chatCompletionResponse = await client.chat.createCompletion({
messages: [{ role: 'user', content: 'What is the capital of the UK?' }],
model: 'Llama-4-Maverick-17B-128E',
temperature: 0.7,
max_tokens: 100,
});
console.log(chatCompletionResponse.choices);
package main
import (
"context"
"fmt"
"github.com/relax-ai/go-sdk"
"github.com/relax-ai/go-sdk/option"
)
func main() {
client := relaxai.NewClient(
option.WithAPIKey("RELAX_API_KEY"),
)
chatCompletionResponse, err := client.Chat.NewCompletion(context.TODO(), relaxai.ChatNewCompletionParams{
ChatCompletionRequest: relaxai.ChatCompletionRequestParam{
Messages: []relaxai.ChatCompletionMessageParam{relaxai.ChatCompletionMessageParam{
Content: "What is the capital of the UK?",
Role: "user",
}},
Model: "Llama-4-Maverick-17B-128E",
Temperature: 0.7,
},
})
if err != nil {
panic(err.Error())
}
fmt.Printf("%+v
", chatCompletionResponse.ID)
}
Terminal window
curl https://api.relax.ai/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $RELAX_API_KEY" \
-d '{
"model": "Llama-4-Maverick-17B-128E",
"messages": [{"role": "user", "content": "What is the capital of the UK?"}],
"temperature": 0.7
}'

Returns a Chat Completions object.


Chat Completions Response
{
"id": "chatcmpl-1c05717f8d68454f9375d6706cc275e6",
"object": "chat.completion",
"created": 1744034860,
"model": "Llama-4-Maverick-17B-128E",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "The capital of the United Kingdom is London."
},
"finish_reason": "stop",
"content_filter_results": {
"hate": {
"filtered": false
},
"self_harm": {
"filtered": false
},
"sexual": {
"filtered": false
},
"violence": {
"filtered": false
},
"jailbreak": {
"filtered": false,
"detected": false
},
"profanity": {
"filtered": false,
"detected": false
}
}
}
],
"usage": {
"prompt_tokens": 76,
"completion_tokens": 10,
"total_tokens": 86,
"prompt_tokens_details": null,
"completion_tokens_details": null
},
"system_fingerprint": ""
}

The following parameters can be included in the request body:


ParameterTypeRequiredDescription
modelstringYesThe model name to use for generating the completion.
messagesarrayYesA list of message objects representing the conversation history.
streamboolNoIf set to true, partial message deltas will be sent as data-only server-sent events.
temperaturefloatNoControls randomness. Lower values make output more focused and deterministic.
top_pfloatNoAn alternative to temperature that controls diversity via nucleus sampling.
max_tokensintegerNoThe maximum number of tokens to generate for the completion.
presence_penaltyfloatNoPenalizes new tokens based on whether they appear in the text so far.
frequency_penaltyfloatNoPenalizes new tokens based on their cumulative frequency in the generated text.
response_formatobjectNoSpecifies the format of the response.
logprobsbooleanNoIf set to true, returns the log probabilities of each token in the output.

The relaxAI API supports response streaming, enabling clients to receive partial results for specific requests in real-time.

Streaming is available for the relaxAI Chat Completions endpoint. This section will cover streaming functions in Chat Completions, with examples on how to use.

from openai import OpenAI
client = OpenAI(
api_key = RELAX_API_KEY,
base_url = 'https://api.relax.ai/v1/',
)
stream = client.chat.completions.create(
model="Llama-4-Maverick-17B-128E",
messages=[
{"role": "user", "content": "What is the capital of the UK?"}
],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")
import { OpenAI } from "openai";
const openai = new OpenAI({
apiKey: RELAX_API_KEY,
baseUrl: 'https://api.relax.ai/v1/'
});
async function main() {
const stream = await openai.chat.completions.create({
model: "Llama-4-Maverick-17B-128E",
messages: [{ role: "user", content: "What is the capital of the UK?" }],
store: true,
stream: true,
});
for await (const chunk of stream) {
process.stdout.write(chunk.choices[0]?.delta?.content || "");
}
}
main();

relaxAI supports tool calling to enable seamless integration of custom tools into your AI workflows, allowing you to define and pass bespoke functionality to the Chat Completions endpoint. This feature allows developers to extend the capabilities of our AI models with tools tailored to their specific use cases, enhancing the overall flexibility and effectiveness of their applications.

To use custom tools, you will need to define a list of functions the model may generate JSON inputs for. Currently a maximum of 128 functions may be defined.

from relaxai import Relaxai
client = Relaxai(
api_key = RELAX_API_KEY,
)
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
}
]
messages = [{"role": "user", "content": "What's the weather like in London today?"}]
completion = client.chat.create_completion(
model="GLM-46",
messages=messages,
tools=tools,
tool_choice="auto"
)
print(completion)
import Relaxai from 'relaxai';
const client = new Relaxai({
apiKey: RELAX_API_KEY,
});
const tools = [
{
type: "function",
function: {
name: "get_current_weather",
description: "Get the current weather in a given location",
parameters: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
},
unit: { type: "string", enum: ["celsius", "fahrenheit"] },
},
required: ["location"],
},
},
},
];
const messages = [
{ role: "user", content: "What's the weather like in London today?" }
];
const completion = await client.chat.createCompletion({
model: "Llama-4-Maverick-17B-128E",
messages,
tools,
tool_choice: "auto",
});
console.log(completion);
package main
import (
"context"
"encoding/json"
"fmt"
"github.com/relax-ai/go-sdk"
"github.com/relax-ai/go-sdk/option"
)
func main() {
client := relaxai.NewClient(
option.WithAPIKey("RELAX_API_KEY"),
)
tools := []relaxai.ChatCompletionToolParam{
{
Type: "function",
Function: &relaxai.ChatCompletionFunctionParam{
Name: "get_current_weather",
Description: "Get the current weather in a given location",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"location": map[string]interface{}{
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": map[string]interface{}{
"type": "string",
"enum": []string{"celsius", "fahrenheit"},
},
},
"required": []string{"location"},
},
},
},
}
resp, err := client.Chat.NewCompletion(context.TODO(), relaxai.ChatNewCompletionParams{
ChatCompletionRequest: relaxai.ChatCompletionRequestParam{
Messages: []relaxai.ChatCompletionMessageParam{
{
Role: "user",
Content: "What's the weather like in London today?",
},
},
Model: "GLM-46",
Tools: tools,
ToolChoice: relaxai.F("auto"),
Temperature: relaxai.F(0.7),
},
})
if err != nil {
panic(err.Error())
}
out, _ := json.MarshalIndent(resp, "", " ")
fmt.Println(string(out))
}