Best Practices
Optimize your Halfred integration for performance, cost-efficiency, and reliability with proven strategies and patterns.
Profile Selection
Choose the Right Profile for Each Task
// ✅ Good: Match profile to task complexity
async function handleUserRequest(task) {
// Simple UI text
if (task.type === 'autocomplete') {
return await client.chat.completions.create({
model: "lite",
messages: [...]
});
}
// General chat
if (task.type === 'conversation') {
return await client.chat.completions.create({
model: "standard",
messages: [...]
});
}
// Complex analysis
if (task.type === 'analysis') {
return await client.chat.completions.create({
model: "deepthink",
messages: [...]
});
}
}Start Simple, Upgrade When Needed
// Try lite first for cost efficiency
let completion;
try {
completion = await client.chat.completions.create({
model: "lite",
messages: [...]
});
// Check quality (implement your own logic)
if (!isQualitySufficient(completion)) {
// Upgrade to standard if needed
completion = await client.chat.completions.create({
model: "standard",
messages: [...]
});
}
} catch (error) {
console.error(error);
}Cost Optimization
1. Reduce Token Usage
// ❌ Verbose prompts
const prompt = "I would like you to please provide me with a detailed and comprehensive explanation of...";
// ✅ Concise prompts
const prompt = "Explain how...";2. Limit Output Length
await client.chat.completions.create({
model: "lite",
messages: [{ role: "user", content: "Summarize this article" }],
max_tokens: 150, // Prevent overly long responses
});Tip: Be careful when setting max_tokens too low, as it can cut off responses mid-sentence or incomplete. This is especially important for the STANDARD and DEEPTHINK profiles, which tend to generate longer, more detailed responses. If you notice truncated outputs, try increasing the limit or removing it altogether for these profiles.
3. Cache Common Responses
const cache = new Map();
async function getCachedCompletion(prompt, model = "standard") {
const key = `${model}:${prompt}`;
if (cache.has(key)) {
return cache.get(key);
}
const completion = await client.chat.completions.create({
model,
messages: [{ role: "user", content: prompt }],
});
cache.set(key, completion);
return completion;
}Security Best Practices
1. Never Expose API Keys
// ❌ Bad: Hardcoded key
const client = new Halfred({
apiKey: "halfred_xxxxxxxxxxxxxxxxxxxxxxxxxxxx",
});
// ✅ Good: Environment variable
const client = new Halfred({
apiKey: process.env.HALFRED_API_KEY,
});2. Use Server-Side Only
// ✅ Backend API route
app.post("/api/chat", async (req, res) => {
const completion = await client.chat.completions.create({
model: "standard",
messages: req.body.messages,
});
res.json(completion);
});
// ❌ Never in frontend JavaScript
// const client = new Halfred({ apiKey: "..." }); // WRONG!3. Validate User Input
function validateMessages(messages) {
if (!Array.isArray(messages) || messages.length === 0) {
throw new Error("Invalid messages");
}
// Limit message length
for (const msg of messages) {
if (msg.content.length > 10000) {
throw new Error("Message too long");
}
}
return messages;
}
const validated = validateMessages(userInput);
const completion = await client.chat.completions.create({
model: "standard",
messages: validated,
});Conversation Management
1. Manage Context Window
function trimConversation(messages, maxMessages = 20) {
if (messages.length <= maxMessages) {
return messages;
}
// Keep system message + recent messages
const systemMsg = messages.find((m) => m.role === "system");
const recent = messages.slice(-maxMessages);
return systemMsg ? [systemMsg, ...recent] : recent;
}2. Summarize Long Histories
async function summarizeHistory(oldMessages) {
const summary = await client.chat.completions.create({
model: "lite",
messages: [
{
role: "user",
content: `Summarize this conversation: ${JSON.stringify(oldMessages)}`,
},
],
max_tokens: 200,
});
return {
role: "system",
content: `Previous conversation: ${summary.choices[0].message.content}`,
};
}Monitoring & Logging
The Halfred dashboard provides real-time visibility into your API usage during development. You can access recent logs to help debug errors and understand request patterns, as well as monitor costs to keep track of your spending as you develop and test your application. These tools are essential for identifying issues early and optimizing your usage before moving to production.
1. Track Token Usage
let totalTokens = 0;
let totalCost = 0;
async function trackCompletion(params) {
const completion = await client.chat.completions.create(params);
console.log(`Tokens: ${completion.usage.total_tokens}`);
return completion;
}2. Log Errors
try {
const completion = await client.chat.completions.create({...});
} catch (error) {
console.error({
timestamp: new Date().toISOString(),
status: error.status,
code: error.code,
message: error.message,
model: "standard"
});
// Send to monitoring service
// Sentry.captureException(error);
}Testing
1. Use DEV Profile for Development
const model = process.env.NODE_ENV === 'production'
? 'standard'
: 'dev';
await client.chat.completions.create({
model,
messages: [...]
});2. Mock API Calls in Tests
// Jest example
jest.mock("halfred.ai");
test("handles completion correctly", async () => {
const mockCreate = jest.fn().mockResolvedValue({
choices: [{ message: { content: "Test response" } }],
});
Halfred.mockImplementation(() => ({
chat: {
completions: { create: mockCreate },
},
}));
// Test your code
});Production Checklist
Related Documentation
Support
Need optimization help?
Email: [email protected]
Discord: Join our community
Last updated