LLM Rust Anti-Patterns

Anti-pattern 1: Synchronous HTTP call in async context

rust

// ❌ WRONG: Using reqwest blocking client inside async function
// This blocks the Tokio runtime thread
async fn bad_llm_call(prompt: &str) -> String {
    // reqwest::blocking::get blocks the OS thread — Tokio hates this!
    // let resp = reqwest::blocking::Client::new().post(url).json(&body).send().unwrap();
    "pretend response".to_string()
}

// ✅ CORRECT: Use async reqwest
async fn good_llm_call(prompt: &str) -> Result<String, String> {
    // Use reqwest async client
    // let resp = reqwest::Client::new()
    //     .post("https://api.openai.com/v1/chat/completions")
    //     .bearer_auth(api_key)
    //     .json(&request)
    //     .timeout(Duration::from_secs(60))
    //     .send().await
    //     .map_err(|e| e.to_string())?;
    Ok(format!("async response to: {}", &prompt[..20.min(prompt.len())]))
}

#[tokio::main]
async fn main() {
    let r = good_llm_call("Why is Rust good for AI?").await.unwrap();
    println!("{}", r);
}

---

Anti-pattern 2: Creating a new HTTP client per request

rust

use std::sync::Arc;

// ❌ WRONG: New client per request = no connection reuse, no HTTP/2 multiplexing
async fn bad_pattern(prompt: &str) -> String {
    // let client = reqwest::Client::new(); // New TCP connection every time!
    // client.post(url).send().await...
    prompt.to_string()
}

// ✅ CORRECT: Single shared client for connection pooling
struct LlmService {
    // client: reqwest::Client,  // Shared across all requests
    api_key: String,
}

impl LlmService {
    fn new(api_key: String) -> Arc<Self> {
        // let client = reqwest::Client::builder()
        //     .timeout(Duration::from_secs(60))
        //     .pool_max_idle_per_host(10)
        //     .http2_prior_knowledge()
        //     .build().unwrap();
        Arc::new(Self { api_key })
    }

    async fn complete(&self, prompt: &str) -> String {
        format!("response for: {}", prompt)
    }
}

#[tokio::main]
async fn main() {
    let service = LlmService::new("sk-key".to_string());
    // Clone Arc to share across tasks — still uses same connection pool
    let s2 = service.clone();
    let r = tokio::join!(
        service.complete("prompt 1"),
        s2.complete("prompt 2"),
    );
    println!("{:?}", r);
}

---

Anti-pattern 3: Over-prompting (wasting tokens)

rust

// ❌ WRONG: Verbose prompts waste tokens and increase cost
fn bad_prompt(user_question: &str) -> String {
    format!(
        "Hello! I am writing to you today to ask for your assistance with the following question \
        that I have been thinking about. I would greatly appreciate it if you could please provide \
        me with a comprehensive and detailed response that thoroughly addresses all aspects of my \
        inquiry. The question that I would like to ask you is as follows: {}. \
        Thank you very much for taking the time to answer my question. I look forward to your response.",
        user_question
    )
}

// ✅ CORRECT: Concise, direct prompts
fn good_prompt(user_question: &str) -> String {
    format!("Answer concisely: {}", user_question)
}

fn main() {
    let q = "How do I use async/await in Rust?";
    let bad = bad_prompt(q);
    let good = good_prompt(q);
    println!("Bad prompt:  {} chars (~{} tokens)", bad.len(), bad.len() / 4);
    println!("Good prompt: {} chars (~{} tokens)", good.len(), good.len() / 4);
    println!("Token savings: ~{}", (bad.len() - good.len()) / 4);
}

---

Anti-pattern 4: No timeout on LLM calls

rust

use std::time::Duration;
use tokio::time::timeout;

// ❌ WRONG: No timeout — can hang indefinitely on provider issues
async fn bad_no_timeout() -> Result<String, String> {
    // Some future that might hang forever
    let result = some_llm_future().await;
    Ok(result)
}

// ✅ CORRECT: Always set a timeout
async fn good_with_timeout() -> Result<String, String> {
    timeout(Duration::from_secs(30), some_llm_future())
        .await
        .map_err(|_| "LLM call timed out after 30s".to_string())
}

async fn some_llm_future() -> String {
    tokio::time::sleep(Duration::from_millis(100)).await;
    "response".to_string()
}

#[tokio::main]
async fn main() {
    println!("{:?}", good_with_timeout().await);
}

---

Anti-pattern 5: Ignoring rate limit headers

rust

use std::time::Duration;

// ❌ WRONG: Retry immediately on 429 — hammers the API, wastes quota
async fn bad_retry_429() -> Result<String, String> {
    for _ in 0..5 {
        // Immediately retry on 429 — always fails, burns retry budget
    }
    Err("failed".to_string())
}

// ✅ CORRECT: Parse Retry-After header and wait
async fn good_retry_429(retry_after_secs: Option<u64>) -> Result<String, String> {
    let wait = Duration::from_secs(retry_after_secs.unwrap_or(5));
    println!("Rate limited. Waiting {:?} before retry...", wait);
    tokio::time::sleep(wait).await;
    Ok("response after backoff".to_string())
}

#[tokio::main]
async fn main() {
    // In production: parse `Retry-After` header from 429 response
    let retry_after: Option<u64> = Some(10); // From response header
    let result = good_retry_429(retry_after).await;
    println!("{:?}", result);
}

---

Anti-pattern 6: Storing conversation history in-process only

rust

use std::collections::HashMap;

// ❌ WRONG: In-process HashMap loses all history on restart
struct BadConversationStore {
    history: HashMap<String, Vec<String>>,
}

// ✅ CORRECT: Persist to database; use session ID from client
// In production, use Redis for short-lived conversations:
// redis.hset(session_id, "messages", serde_json::to_string(&messages)?)
// redis.expire(session_id, 3600)  // 1 hour TTL

struct GoodConversationStore {
    // redis: redis::Connection,  // Real implementation
    history: HashMap<String, Vec<String>>, // Dev fallback
}

impl GoodConversationStore {
    fn new() -> Self { Self { history: HashMap::new() } }

    fn append(&mut self, session_id: &str, message: String) {
        self.history.entry(session_id.to_string()).or_default().push(message);
    }

    fn get(&self, session_id: &str, max_messages: usize) -> Vec<String> {
        self.history.get(session_id)
            .map(|msgs| msgs.iter().rev().take(max_messages).cloned().collect())
            .unwrap_or_default()
    }
}

fn main() {
    let mut store = GoodConversationStore::new();
    store.append("sess-1", "Hello!".to_string());
    store.append("sess-1", "What is Rust?".to_string());
    println!("History: {:?}", store.get("sess-1", 10));
}

LLM Rust Anti-Patterns

LLM Rust Anti-Patterns

Anti-pattern 1: Synchronous HTTP call in async context

Anti-pattern 2: Creating a new HTTP client per request

Anti-pattern 3: Over-prompting (wasting tokens)

Anti-pattern 4: No timeout on LLM calls

Anti-pattern 5: Ignoring rate limit headers

Anti-pattern 6: Storing conversation history in-process only

Related reading

Related Guides

LLM Rust Pitfalls

Building LLM Applications with Rust

Continue in This Topic

LLM API Gateway in Rust

LLM Rust Benchmarking

More Rust Guides

Building LLM Applications with Rust

LLM API Gateway in Rust

LLM Rust Benchmarking

LLM Rust Decision Matrix

LLM Rust Interview Q&A

LLM Rust Maintainability