RRust By Example

LLM Rust Pitfalls

Common pitfalls in Rust LLM applications: token counting errors, streaming buffer issues, context window overflow, JSON parsing fragility, and rate limit handling mistakes.

Topic: Llm Rust

Search intent: High-intent search: "rust llm pitfalls common mistakes"

LLM Rust Pitfalls

Pitfall 1: Naïve token counting breaks context window

Character count ≠ token count. Using text.len() / 4 is a rough estimate; some languages (Chinese, Japanese) use 1 character ≈ 1-2 tokens, while code symbols vary widely.

rust
/// ❌ PITFALL: Naïve estimate can be off by 2-5x for non-English or code
fn bad_token_estimate(text: &str) -> usize {
    text.len() / 4 // Wrong for Chinese: "你好" = 2 chars but ~2 tokens ≈ 8 chars in this formula
}

/// ✅ Better: use tiktoken-rs or account for language density
fn better_token_estimate(text: &str, is_code: bool) -> usize {
    if is_code {
        // Code has more tokens per character (special chars, keywords)
        text.len() / 3
    } else if text.chars().any(|c| c as u32 > 0x2000) {
        // CJK and other multibyte scripts
        text.chars().count() * 2 / 3
    } else {
        // English prose
        text.split_whitespace().count() * 4 / 3
    }
}

fn main() {
    let english = "Rust is great for building AI inference systems.";
    let chinese = "Rust非常适合构建AI推理系统。";
    let code = "fn infer(x: &[f32]) -> Vec<f32> { x.iter().map(|v| v*2.0).collect() }";

    println!("English: ~{} tokens", better_token_estimate(english, false));
    println!("Chinese: ~{} tokens", better_token_estimate(chinese, false));
    println!("Code:    ~{} tokens", better_token_estimate(code, true));
}

---

Pitfall 2: Parsing streamed JSON chunks incorrectly

SSE streams send partial JSON chunks. Concatenating and parsing each chunk individually fails.

rust
/// ❌ PITFALL: Trying to parse each SSE chunk as complete JSON
fn bad_stream_parse(chunk: &str) -> Option<String> {
    // This fails for partial chunks like: {"choices":[{"delta":{"cont
    serde_json::from_str::<serde_json::Value>(chunk)
        .ok()
        .and_then(|v| v["choices"][0]["delta"]["content"].as_str().map(|s| s.to_string()))
}

/// ✅ CORRECT: Buffer until complete JSON object, then parse
struct StreamParser {
    buffer: String,
}

impl StreamParser {
    fn new() -> Self { Self { buffer: String::new() } }

    fn feed(&mut self, chunk: &str) -> Vec<String> {
        let mut tokens = Vec::new();

        for line in chunk.lines() {
            if line.starts_with("data: ") {
                let data = &line[6..];
                if data == "[DONE]" { break; }

                // Parse complete SSE data line as JSON
                if let Ok(v) = serde_json::from_str::<serde_json::Value>(data) {
                    if let Some(content) = v["choices"][0]["delta"]["content"].as_str() {
                        tokens.push(content.to_string());
                    }
                }
            }
        }
        tokens
    }
}

fn main() {
    let mut parser = StreamParser::new();

    // Simulate SSE stream chunks
    let chunks = vec![
        "data: {\"choices\":[{\"delta\":{\"content\":\"Hello\"}}]}\n\n",
        "data: {\"choices\":[{\"delta\":{\"content\":\" world\"}}]}\n\n",
        "data: [DONE]\n\n",
    ];

    for chunk in chunks {
        for token in parser.feed(chunk) {
            print!("{}", token);
        }
    }
    println!();
}

---

Pitfall 3: Not handling `finish_reason: "length"`

When the model hits max_tokens, it stops mid-sentence. Without checking finish_reason, you return a truncated response as if it were complete.

rust
#[derive(Debug, serde::Deserialize)]
struct Choice {
    message: Message,
    finish_reason: Option<String>,
}

#[derive(Debug, serde::Deserialize)]
struct Message { content: String }

fn extract_response(choice: &Choice) -> Result<String, String> {
    match choice.finish_reason.as_deref() {
        Some("stop") | Some("end_turn") => Ok(choice.message.content.clone()),
        Some("length") => Err(format!(
            "Response truncated (hit max_tokens). Got: '{}'",
            &choice.message.content[..50.min(choice.message.content.len())]
        )),
        Some("content_filter") => Err("Response filtered by content policy".to_string()),
        Some(other) => Err(format!("Unexpected finish_reason: {}", other)),
        None => Ok(choice.message.content.clone()), // Streaming: None is normal
    }
}

fn main() {
    let truncated = Choice {
        message: Message { content: "Rust is a systems programming language that".to_string() },
        finish_reason: Some("length".to_string()),
    };
    println!("{:?}", extract_response(&truncated));

    let complete = Choice {
        message: Message { content: "Rust is great for AI inference.".to_string() },
        finish_reason: Some("stop".to_string()),
    };
    println!("{:?}", extract_response(&complete));
}

---

Pitfall 4: Retrying non-retryable errors

rust
/// LLM HTTP errors and their retry semantics
fn should_retry(status_code: u16) -> bool {
    match status_code {
        // ✅ Retryable: transient server errors
        429 => true,  // Rate limited — retry after Retry-After header
        500 => true,  // Internal server error
        502 => true,  // Bad gateway
        503 => true,  // Service unavailable
        504 => true,  // Gateway timeout

        // ❌ Not retryable: client errors
        400 => false, // Bad request — prompt issue
        401 => false, // Unauthorized — wrong API key
        402 => false, // Payment required — billing issue
        403 => false, // Forbidden
        404 => false, // Model not found
        _ => false,
    }
}

fn main() {
    for code in [200, 400, 401, 429, 500, 503] {
        println!("HTTP {}: retry={}", code, should_retry(code));
    }
}

---

Pitfall 5: Leaking API keys in error messages

rust
/// ❌ PITFALL: Error message contains the API key!
fn bad_error_handling(api_key: &str, status: u16) -> String {
    format!("API call failed with key {} and status {}", api_key, status)
}

/// ✅ CORRECT: Never include secrets in error messages or logs
fn good_error_handling(api_key: &str, status: u16) -> String {
    let masked = if api_key.len() > 8 {
        format!("{}...{}", &api_key[..4], &api_key[api_key.len()-4..])
    } else {
        "***".to_string()
    };
    format!("API call failed with key {} and status {}", masked, status)
}

fn main() {
    let key = "sk-proj-abc123secretkey789xyz";
    println!("{}", good_error_handling(key, 401));
}

Related reading

Related Guides

Continue in This Topic

More Rust Guides