LLM Rust Pitfalls

Pitfall 1: Naïve token counting breaks context window

Character count ≠ token count. Using text.len() / 4 is a rough estimate; some languages (Chinese, Japanese) use 1 character ≈ 1-2 tokens, while code symbols vary widely.

rust

/// ❌ PITFALL: Naïve estimate can be off by 2-5x for non-English or code
fn bad_token_estimate(text: &str) -> usize {
    text.len() / 4 // Wrong for Chinese: "你好" = 2 chars but ~2 tokens ≈ 8 chars in this formula
}

/// ✅ Better: use tiktoken-rs or account for language density
fn better_token_estimate(text: &str, is_code: bool) -> usize {
    if is_code {
        // Code has more tokens per character (special chars, keywords)
        text.len() / 3
    } else if text.chars().any(|c| c as u32 > 0x2000) {
        // CJK and other multibyte scripts
        text.chars().count() * 2 / 3
    } else {
        // English prose
        text.split_whitespace().count() * 4 / 3
    }
}

fn main() {
    let english = "Rust is great for building AI inference systems.";
    let chinese = "Rust非常适合构建AI推理系统。";
    let code = "fn infer(x: &[f32]) -> Vec<f32> { x.iter().map(|v| v*2.0).collect() }";

    println!("English: ~{} tokens", better_token_estimate(english, false));
    println!("Chinese: ~{} tokens", better_token_estimate(chinese, false));
    println!("Code:    ~{} tokens", better_token_estimate(code, true));
}

---

Pitfall 2: Parsing streamed JSON chunks incorrectly

SSE streams send partial JSON chunks. Concatenating and parsing each chunk individually fails.

rust

/// ❌ PITFALL: Trying to parse each SSE chunk as complete JSON
fn bad_stream_parse(chunk: &str) -> Option<String> {
    // This fails for partial chunks like: {"choices":[{"delta":{"cont
    serde_json::from_str::<serde_json::Value>(chunk)
        .ok()
        .and_then(|v| v["choices"][0]["delta"]["content"].as_str().map(|s| s.to_string()))
}

/// ✅ CORRECT: Buffer until complete JSON object, then parse
struct StreamParser {
    buffer: String,
}

impl StreamParser {
    fn new() -> Self { Self { buffer: String::new() } }

    fn feed(&mut self, chunk: &str) -> Vec<String> {
        let mut tokens = Vec::new();

        for line in chunk.lines() {
            if line.starts_with("data: ") {
                let data = &line[6..];
                if data == "[DONE]" { break; }

                // Parse complete SSE data line as JSON
                if let Ok(v) = serde_json::from_str::<serde_json::Value>(data) {
                    if let Some(content) = v["choices"][0]["delta"]["content"].as_str() {
                        tokens.push(content.to_string());
                    }
                }
            }
        }
        tokens
    }
}

fn main() {
    let mut parser = StreamParser::new();

    // Simulate SSE stream chunks
    let chunks = vec![
        "data: {\"choices\":[{\"delta\":{\"content\":\"Hello\"}}]}\n\n",
        "data: {\"choices\":[{\"delta\":{\"content\":\" world\"}}]}\n\n",
        "data: [DONE]\n\n",
    ];

    for chunk in chunks {
        for token in parser.feed(chunk) {
            print!("{}", token);
        }
    }
    println!();
}

---

Pitfall 3: Not handling `finish_reason: "length"`

When the model hits max_tokens, it stops mid-sentence. Without checking finish_reason, you return a truncated response as if it were complete.

rust

#[derive(Debug, serde::Deserialize)]
struct Choice {
    message: Message,
    finish_reason: Option<String>,
}

#[derive(Debug, serde::Deserialize)]
struct Message { content: String }

fn extract_response(choice: &Choice) -> Result<String, String> {
    match choice.finish_reason.as_deref() {
        Some("stop") | Some("end_turn") => Ok(choice.message.content.clone()),
        Some("length") => Err(format!(
            "Response truncated (hit max_tokens). Got: '{}'",
            &choice.message.content[..50.min(choice.message.content.len())]
        )),
        Some("content_filter") => Err("Response filtered by content policy".to_string()),
        Some(other) => Err(format!("Unexpected finish_reason: {}", other)),
        None => Ok(choice.message.content.clone()), // Streaming: None is normal
    }
}

fn main() {
    let truncated = Choice {
        message: Message { content: "Rust is a systems programming language that".to_string() },
        finish_reason: Some("length".to_string()),
    };
    println!("{:?}", extract_response(&truncated));

    let complete = Choice {
        message: Message { content: "Rust is great for AI inference.".to_string() },
        finish_reason: Some("stop".to_string()),
    };
    println!("{:?}", extract_response(&complete));
}

---

Pitfall 4: Retrying non-retryable errors

rust

/// LLM HTTP errors and their retry semantics
fn should_retry(status_code: u16) -> bool {
    match status_code {
        // ✅ Retryable: transient server errors
        429 => true,  // Rate limited — retry after Retry-After header
        500 => true,  // Internal server error
        502 => true,  // Bad gateway
        503 => true,  // Service unavailable
        504 => true,  // Gateway timeout

        // ❌ Not retryable: client errors
        400 => false, // Bad request — prompt issue
        401 => false, // Unauthorized — wrong API key
        402 => false, // Payment required — billing issue
        403 => false, // Forbidden
        404 => false, // Model not found
        _ => false,
    }
}

fn main() {
    for code in [200, 400, 401, 429, 500, 503] {
        println!("HTTP {}: retry={}", code, should_retry(code));
    }
}

---

Pitfall 5: Leaking API keys in error messages

rust

/// ❌ PITFALL: Error message contains the API key!
fn bad_error_handling(api_key: &str, status: u16) -> String {
    format!("API call failed with key {} and status {}", api_key, status)
}

/// ✅ CORRECT: Never include secrets in error messages or logs
fn good_error_handling(api_key: &str, status: u16) -> String {
    let masked = if api_key.len() > 8 {
        format!("{}...{}", &api_key[..4], &api_key[api_key.len()-4..])
    } else {
        "***".to_string()
    };
    format!("API call failed with key {} and status {}", masked, status)
}

fn main() {
    let key = "sk-proj-abc123secretkey789xyz";
    println!("{}", good_error_handling(key, 401));
}

LLM Rust Pitfalls

LLM Rust Pitfalls

Pitfall 1: Naïve token counting breaks context window

Pitfall 2: Parsing streamed JSON chunks incorrectly

Pitfall 3: Not handling `finish_reason: "length"`

Pitfall 4: Retrying non-retryable errors

Pitfall 5: Leaking API keys in error messages

Related reading

Related Guides

Building LLM Applications with Rust

LLM Rust Troubleshooting

Continue in This Topic

LLM Rust Performance Tuning

LLM Rust Production Guide

More Rust Guides

Building LLM Applications with Rust

LLM API Gateway in Rust

LLM Rust Anti-Patterns

LLM Rust Benchmarking

LLM Rust Decision Matrix

LLM Rust Interview Q&A