LLM Rust Anti-Patterns
Anti-patterns to avoid when building LLM applications in Rust: blocking on streaming, over-prompting, synchronous API calls in async context, and missing error boundaries.
Topic: Llm Rust
Search intent: High-intent search: "rust llm anti-patterns mistakes avoid"
LLM Rust Anti-Patterns
Anti-pattern 1: Synchronous HTTP call in async context
// ❌ WRONG: Using reqwest blocking client inside async function
// This blocks the Tokio runtime thread
async fn bad_llm_call(prompt: &str) -> String {
// reqwest::blocking::get blocks the OS thread — Tokio hates this!
// let resp = reqwest::blocking::Client::new().post(url).json(&body).send().unwrap();
"pretend response".to_string()
}
// ✅ CORRECT: Use async reqwest
async fn good_llm_call(prompt: &str) -> Result<String, String> {
// Use reqwest async client
// let resp = reqwest::Client::new()
// .post("https://api.openai.com/v1/chat/completions")
// .bearer_auth(api_key)
// .json(&request)
// .timeout(Duration::from_secs(60))
// .send().await
// .map_err(|e| e.to_string())?;
Ok(format!("async response to: {}", &prompt[..20.min(prompt.len())]))
}
#[tokio::main]
async fn main() {
let r = good_llm_call("Why is Rust good for AI?").await.unwrap();
println!("{}", r);
}---
Anti-pattern 2: Creating a new HTTP client per request
use std::sync::Arc;
// ❌ WRONG: New client per request = no connection reuse, no HTTP/2 multiplexing
async fn bad_pattern(prompt: &str) -> String {
// let client = reqwest::Client::new(); // New TCP connection every time!
// client.post(url).send().await...
prompt.to_string()
}
// ✅ CORRECT: Single shared client for connection pooling
struct LlmService {
// client: reqwest::Client, // Shared across all requests
api_key: String,
}
impl LlmService {
fn new(api_key: String) -> Arc<Self> {
// let client = reqwest::Client::builder()
// .timeout(Duration::from_secs(60))
// .pool_max_idle_per_host(10)
// .http2_prior_knowledge()
// .build().unwrap();
Arc::new(Self { api_key })
}
async fn complete(&self, prompt: &str) -> String {
format!("response for: {}", prompt)
}
}
#[tokio::main]
async fn main() {
let service = LlmService::new("sk-key".to_string());
// Clone Arc to share across tasks — still uses same connection pool
let s2 = service.clone();
let r = tokio::join!(
service.complete("prompt 1"),
s2.complete("prompt 2"),
);
println!("{:?}", r);
}---
Anti-pattern 3: Over-prompting (wasting tokens)
// ❌ WRONG: Verbose prompts waste tokens and increase cost
fn bad_prompt(user_question: &str) -> String {
format!(
"Hello! I am writing to you today to ask for your assistance with the following question \
that I have been thinking about. I would greatly appreciate it if you could please provide \
me with a comprehensive and detailed response that thoroughly addresses all aspects of my \
inquiry. The question that I would like to ask you is as follows: {}. \
Thank you very much for taking the time to answer my question. I look forward to your response.",
user_question
)
}
// ✅ CORRECT: Concise, direct prompts
fn good_prompt(user_question: &str) -> String {
format!("Answer concisely: {}", user_question)
}
fn main() {
let q = "How do I use async/await in Rust?";
let bad = bad_prompt(q);
let good = good_prompt(q);
println!("Bad prompt: {} chars (~{} tokens)", bad.len(), bad.len() / 4);
println!("Good prompt: {} chars (~{} tokens)", good.len(), good.len() / 4);
println!("Token savings: ~{}", (bad.len() - good.len()) / 4);
}---
Anti-pattern 4: No timeout on LLM calls
use std::time::Duration;
use tokio::time::timeout;
// ❌ WRONG: No timeout — can hang indefinitely on provider issues
async fn bad_no_timeout() -> Result<String, String> {
// Some future that might hang forever
let result = some_llm_future().await;
Ok(result)
}
// ✅ CORRECT: Always set a timeout
async fn good_with_timeout() -> Result<String, String> {
timeout(Duration::from_secs(30), some_llm_future())
.await
.map_err(|_| "LLM call timed out after 30s".to_string())
}
async fn some_llm_future() -> String {
tokio::time::sleep(Duration::from_millis(100)).await;
"response".to_string()
}
#[tokio::main]
async fn main() {
println!("{:?}", good_with_timeout().await);
}---
Anti-pattern 5: Ignoring rate limit headers
use std::time::Duration;
// ❌ WRONG: Retry immediately on 429 — hammers the API, wastes quota
async fn bad_retry_429() -> Result<String, String> {
for _ in 0..5 {
// Immediately retry on 429 — always fails, burns retry budget
}
Err("failed".to_string())
}
// ✅ CORRECT: Parse Retry-After header and wait
async fn good_retry_429(retry_after_secs: Option<u64>) -> Result<String, String> {
let wait = Duration::from_secs(retry_after_secs.unwrap_or(5));
println!("Rate limited. Waiting {:?} before retry...", wait);
tokio::time::sleep(wait).await;
Ok("response after backoff".to_string())
}
#[tokio::main]
async fn main() {
// In production: parse `Retry-After` header from 429 response
let retry_after: Option<u64> = Some(10); // From response header
let result = good_retry_429(retry_after).await;
println!("{:?}", result);
}---
Anti-pattern 6: Storing conversation history in-process only
use std::collections::HashMap;
// ❌ WRONG: In-process HashMap loses all history on restart
struct BadConversationStore {
history: HashMap<String, Vec<String>>,
}
// ✅ CORRECT: Persist to database; use session ID from client
// In production, use Redis for short-lived conversations:
// redis.hset(session_id, "messages", serde_json::to_string(&messages)?)
// redis.expire(session_id, 3600) // 1 hour TTL
struct GoodConversationStore {
// redis: redis::Connection, // Real implementation
history: HashMap<String, Vec<String>>, // Dev fallback
}
impl GoodConversationStore {
fn new() -> Self { Self { history: HashMap::new() } }
fn append(&mut self, session_id: &str, message: String) {
self.history.entry(session_id.to_string()).or_default().push(message);
}
fn get(&self, session_id: &str, max_messages: usize) -> Vec<String> {
self.history.get(session_id)
.map(|msgs| msgs.iter().rev().take(max_messages).cloned().collect())
.unwrap_or_default()
}
}
fn main() {
let mut store = GoodConversationStore::new();
store.append("sess-1", "Hello!".to_string());
store.append("sess-1", "What is Rust?".to_string());
println!("History: {:?}", store.get("sess-1", 10));
}