LLM Rust Troubleshooting

Quick diagnosis

rust

LLM call failing?
├── HTTP 401 → Wrong API key or expired
├── HTTP 429 → Rate limited — add backoff
├── HTTP 400 → Bad request (invalid model, malformed JSON, token limit exceeded)
├── HTTP 500/503 → Provider outage — use fallback
├── Timeout → Increase timeout or reduce max_tokens
├── Wrong JSON format → Print raw response before parsing
└── Streaming broken → Check SSE line parsing

Issue 1: Authentication failures

rust

use std::env;

#[derive(Debug)]
enum AuthError {
    MissingKey(String),
    InvalidFormat(String),
    Expired,
}

fn validate_api_key(key: &str, provider: &str) -> Result<(), AuthError> {
    match provider {
        "openai" => {
            if !key.starts_with("sk-") {
                return Err(AuthError::InvalidFormat(
                    "OpenAI keys must start with 'sk-'".to_string()
                ));
            }
            if key.len() < 20 {
                return Err(AuthError::InvalidFormat("Key too short".to_string()));
            }
        }
        "anthropic" => {
            if !key.starts_with("sk-ant-") {
                return Err(AuthError::InvalidFormat(
                    "Anthropic keys must start with 'sk-ant-'".to_string()
                ));
            }
        }
        _ => {}
    }
    Ok(())
}

fn load_and_validate_key(env_var: &str, provider: &str) -> Result<String, AuthError> {
    let key = env::var(env_var)
        .map_err(|_| AuthError::MissingKey(format!("{} not set", env_var)))?;

    if key.is_empty() {
        return Err(AuthError::MissingKey(format!("{} is empty", env_var)));
    }

    validate_api_key(&key, provider)?;
    Ok(key)
}

fn main() {
    // Set for testing
    env::set_var("OPENAI_API_KEY", "sk-test-key-12345678901234567890");

    match load_and_validate_key("OPENAI_API_KEY", "openai") {
        Ok(key) => println!("✅ Key loaded: {}...{}", &key[..7], &key[key.len()-4..]),
        Err(AuthError::MissingKey(msg)) => eprintln!("❌ Missing key: {}", msg),
        Err(AuthError::InvalidFormat(msg)) => eprintln!("❌ Invalid format: {}", msg),
        Err(AuthError::Expired) => eprintln!("❌ Key expired"),
    }
}

---

Issue 2: Rate limiting diagnosis and handling

rust

use std::time::Duration;

#[derive(Debug)]
struct RateLimitInfo {
    limit: u32,
    remaining: u32,
    reset_at: u64, // Unix timestamp
    retry_after: Option<u64>, // seconds
}

impl RateLimitInfo {
    /// Parse from HTTP response headers
    fn from_headers(headers: &[(&str, &str)]) -> Self {
        let get = |name: &str| -> Option<u64> {
            headers.iter()
                .find(|(k, _)| k.to_lowercase() == name.to_lowercase())
                .and_then(|(_, v)| v.parse().ok())
        };

        Self {
            limit: get("x-ratelimit-limit-requests").unwrap_or(0) as u32,
            remaining: get("x-ratelimit-remaining-requests").unwrap_or(0) as u32,
            reset_at: get("x-ratelimit-reset-requests").unwrap_or(0),
            retry_after: get("retry-after"),
        }
    }

    fn wait_duration(&self) -> Duration {
        let secs = self.retry_after.unwrap_or(5);
        Duration::from_secs(secs.max(1))
    }

    fn is_nearly_exhausted(&self) -> bool {
        self.limit > 0 && (self.remaining as f64 / self.limit as f64) < 0.1
    }
}

fn main() {
    // Simulate response headers from a rate-limited response
    let headers = vec![
        ("x-ratelimit-limit-requests", "1000"),
        ("x-ratelimit-remaining-requests", "5"),
        ("retry-after", "30"),
    ];

    let rl = RateLimitInfo::from_headers(&headers);
    println!("Rate limit: {}/{}", rl.remaining, rl.limit);
    println!("Nearly exhausted: {}", rl.is_nearly_exhausted());
    println!("Wait duration: {:?}", rl.wait_duration());
}

---

Issue 3: JSON parsing failures — debug raw response

rust

use serde::Deserialize;

#[derive(Debug, Deserialize)]
struct LlmResponse {
    choices: Vec<Choice>,
}

#[derive(Debug, Deserialize)]
struct Choice {
    message: ChoiceMessage,
}

#[derive(Debug, Deserialize)]
struct ChoiceMessage {
    content: String,
}

fn parse_llm_response(raw: &str) -> Result<String, String> {
    // Always try to parse; if it fails, show the raw response for debugging
    serde_json::from_str::<LlmResponse>(raw)
        .map(|r| r.choices[0].message.content.clone())
        .map_err(|e| {
            eprintln!("❌ Parse error: {}", e);
            eprintln!("❌ Raw response (first 500 chars):\n{}",
                &raw[..500.min(raw.len())]);
            format!("Failed to parse LLM response: {}", e)
        })
}

fn main() {
    // Valid response
    let valid = r#"{"choices":[{"message":{"content":"Rust is great!"}}]}"#;
    println!("Valid: {:?}", parse_llm_response(valid));

    // Error response (common when provider returns an error object)
    let error_resp = r#"{"error":{"message":"Rate limit exceeded","type":"rate_limit_error"}}"#;
    println!("Error: {:?}", parse_llm_response(error_resp));
}

---

Issue 4: Streaming SSE debugging

rust

/// Parse SSE line and extract token content
fn parse_sse_line(line: &str) -> Option<String> {
    if !line.starts_with("data: ") { return None; }
    let data = line.trim_start_matches("data: ").trim();
    if data == "[DONE]" { return None; }

    match serde_json::from_str::<serde_json::Value>(data) {
        Ok(v) => v["choices"][0]["delta"]["content"]
            .as_str()
            .map(|s| s.to_string()),
        Err(e) => {
            // Log problematic lines for debugging
            eprintln!("SSE parse error '{}': {}", &data[..50.min(data.len())], e);
            None
        }
    }
}

fn main() {
    let sse_lines = vec![
        "data: {\"choices\":[{\"delta\":{\"content\":\"Hello\"}}]}",
        "data: {\"choices\":[{\"delta\":{\"content\":\" world\"}}]}",
        "data: {\"choices\":[{\"delta\":{}}]}",   // Empty delta — normal at end
        "data: [DONE]",
        ": ping",                                  // SSE keepalive — ignore
    ];

    let mut output = String::new();
    for line in &sse_lines {
        if let Some(token) = parse_sse_line(line) {
            output.push_str(&token);
        }
    }
    println!("Assembled output: '{}'", output);
}

LLM Rust Troubleshooting

LLM Rust Troubleshooting

Quick diagnosis

Issue 1: Authentication failures

Issue 2: Rate limiting diagnosis and handling

Issue 3: JSON parsing failures — debug raw response

Issue 4: Streaming SSE debugging

Related reading

Related Guides

LLM Rust Pitfalls

LLM Rust Anti-Patterns

Continue in This Topic

LLM Rust Testing Strategy

More Rust Guides

Building LLM Applications with Rust

LLM API Gateway in Rust

LLM Rust Anti-Patterns

LLM Rust Benchmarking

LLM Rust Decision Matrix

LLM Rust Interview Q&A