RRust By Example

LLM Rust Anti-Patterns

Anti-patterns to avoid when building LLM applications in Rust: blocking on streaming, over-prompting, synchronous API calls in async context, and missing error boundaries.

Topic: Llm Rust

Search intent: High-intent search: "rust llm anti-patterns mistakes avoid"

LLM Rust Anti-Patterns

Anti-pattern 1: Synchronous HTTP call in async context

rust
// ❌ WRONG: Using reqwest blocking client inside async function
// This blocks the Tokio runtime thread
async fn bad_llm_call(prompt: &str) -> String {
    // reqwest::blocking::get blocks the OS thread — Tokio hates this!
    // let resp = reqwest::blocking::Client::new().post(url).json(&body).send().unwrap();
    "pretend response".to_string()
}

// ✅ CORRECT: Use async reqwest
async fn good_llm_call(prompt: &str) -> Result<String, String> {
    // Use reqwest async client
    // let resp = reqwest::Client::new()
    //     .post("https://api.openai.com/v1/chat/completions")
    //     .bearer_auth(api_key)
    //     .json(&request)
    //     .timeout(Duration::from_secs(60))
    //     .send().await
    //     .map_err(|e| e.to_string())?;
    Ok(format!("async response to: {}", &prompt[..20.min(prompt.len())]))
}

#[tokio::main]
async fn main() {
    let r = good_llm_call("Why is Rust good for AI?").await.unwrap();
    println!("{}", r);
}

---

Anti-pattern 2: Creating a new HTTP client per request

rust
use std::sync::Arc;

// ❌ WRONG: New client per request = no connection reuse, no HTTP/2 multiplexing
async fn bad_pattern(prompt: &str) -> String {
    // let client = reqwest::Client::new(); // New TCP connection every time!
    // client.post(url).send().await...
    prompt.to_string()
}

// ✅ CORRECT: Single shared client for connection pooling
struct LlmService {
    // client: reqwest::Client,  // Shared across all requests
    api_key: String,
}

impl LlmService {
    fn new(api_key: String) -> Arc<Self> {
        // let client = reqwest::Client::builder()
        //     .timeout(Duration::from_secs(60))
        //     .pool_max_idle_per_host(10)
        //     .http2_prior_knowledge()
        //     .build().unwrap();
        Arc::new(Self { api_key })
    }

    async fn complete(&self, prompt: &str) -> String {
        format!("response for: {}", prompt)
    }
}

#[tokio::main]
async fn main() {
    let service = LlmService::new("sk-key".to_string());
    // Clone Arc to share across tasks — still uses same connection pool
    let s2 = service.clone();
    let r = tokio::join!(
        service.complete("prompt 1"),
        s2.complete("prompt 2"),
    );
    println!("{:?}", r);
}

---

Anti-pattern 3: Over-prompting (wasting tokens)

rust
// ❌ WRONG: Verbose prompts waste tokens and increase cost
fn bad_prompt(user_question: &str) -> String {
    format!(
        "Hello! I am writing to you today to ask for your assistance with the following question \
        that I have been thinking about. I would greatly appreciate it if you could please provide \
        me with a comprehensive and detailed response that thoroughly addresses all aspects of my \
        inquiry. The question that I would like to ask you is as follows: {}. \
        Thank you very much for taking the time to answer my question. I look forward to your response.",
        user_question
    )
}

// ✅ CORRECT: Concise, direct prompts
fn good_prompt(user_question: &str) -> String {
    format!("Answer concisely: {}", user_question)
}

fn main() {
    let q = "How do I use async/await in Rust?";
    let bad = bad_prompt(q);
    let good = good_prompt(q);
    println!("Bad prompt:  {} chars (~{} tokens)", bad.len(), bad.len() / 4);
    println!("Good prompt: {} chars (~{} tokens)", good.len(), good.len() / 4);
    println!("Token savings: ~{}", (bad.len() - good.len()) / 4);
}

---

Anti-pattern 4: No timeout on LLM calls

rust
use std::time::Duration;
use tokio::time::timeout;

// ❌ WRONG: No timeout — can hang indefinitely on provider issues
async fn bad_no_timeout() -> Result<String, String> {
    // Some future that might hang forever
    let result = some_llm_future().await;
    Ok(result)
}

// ✅ CORRECT: Always set a timeout
async fn good_with_timeout() -> Result<String, String> {
    timeout(Duration::from_secs(30), some_llm_future())
        .await
        .map_err(|_| "LLM call timed out after 30s".to_string())
}

async fn some_llm_future() -> String {
    tokio::time::sleep(Duration::from_millis(100)).await;
    "response".to_string()
}

#[tokio::main]
async fn main() {
    println!("{:?}", good_with_timeout().await);
}

---

Anti-pattern 5: Ignoring rate limit headers

rust
use std::time::Duration;

// ❌ WRONG: Retry immediately on 429 — hammers the API, wastes quota
async fn bad_retry_429() -> Result<String, String> {
    for _ in 0..5 {
        // Immediately retry on 429 — always fails, burns retry budget
    }
    Err("failed".to_string())
}

// ✅ CORRECT: Parse Retry-After header and wait
async fn good_retry_429(retry_after_secs: Option<u64>) -> Result<String, String> {
    let wait = Duration::from_secs(retry_after_secs.unwrap_or(5));
    println!("Rate limited. Waiting {:?} before retry...", wait);
    tokio::time::sleep(wait).await;
    Ok("response after backoff".to_string())
}

#[tokio::main]
async fn main() {
    // In production: parse `Retry-After` header from 429 response
    let retry_after: Option<u64> = Some(10); // From response header
    let result = good_retry_429(retry_after).await;
    println!("{:?}", result);
}

---

Anti-pattern 6: Storing conversation history in-process only

rust
use std::collections::HashMap;

// ❌ WRONG: In-process HashMap loses all history on restart
struct BadConversationStore {
    history: HashMap<String, Vec<String>>,
}

// ✅ CORRECT: Persist to database; use session ID from client
// In production, use Redis for short-lived conversations:
// redis.hset(session_id, "messages", serde_json::to_string(&messages)?)
// redis.expire(session_id, 3600)  // 1 hour TTL

struct GoodConversationStore {
    // redis: redis::Connection,  // Real implementation
    history: HashMap<String, Vec<String>>, // Dev fallback
}

impl GoodConversationStore {
    fn new() -> Self { Self { history: HashMap::new() } }

    fn append(&mut self, session_id: &str, message: String) {
        self.history.entry(session_id.to_string()).or_default().push(message);
    }

    fn get(&self, session_id: &str, max_messages: usize) -> Vec<String> {
        self.history.get(session_id)
            .map(|msgs| msgs.iter().rev().take(max_messages).cloned().collect())
            .unwrap_or_default()
    }
}

fn main() {
    let mut store = GoodConversationStore::new();
    store.append("sess-1", "Hello!".to_string());
    store.append("sess-1", "What is Rust?".to_string());
    println!("History: {:?}", store.get("sess-1", 10));
}

Related reading

Related Guides

Continue in This Topic

More Rust Guides