RRust By Example

LLM Rust Team Workflow

Team practices for building and maintaining LLM applications in Rust: prompt engineering workflow, model evaluation, deployment pipelines, incident response, and cost governance.

Topic: Llm Rust

Search intent: High-intent search: "rust llm team practices mlops"

LLM Rust Team Workflow

Prompt development lifecycle

rust
1. Draft prompt in dev environment
   ↓
2. Test against golden eval dataset (offline)3. A/B test in staging (5% traffic)4. Review metrics: quality score, latency, cost
   ↓
5. Promote to production (gradual rollout)6. Monitor production metrics
   ↓
7. Version and tag in git

Runnable example — evaluation harness

rust
use std::time::Instant;

/// One test case for prompt evaluation
#[derive(Debug, Clone)]
struct EvalCase {
    id: String,
    input: String,
    expected_output_contains: Vec<String>,
    expected_output_not_contains: Vec<String>,
    quality_rubric: Vec<String>, // Instructions for scoring
}

#[derive(Debug)]
struct EvalResult {
    case_id: String,
    output: String,
    latency_ms: f64,
    input_tokens: u32,
    output_tokens: u32,
    passed_checks: usize,
    total_checks: usize,
    score: f64, // 0.0 to 1.0
}

impl EvalResult {
    fn passed(&self) -> bool { self.score >= 0.8 }
}

struct EvalHarness {
    cases: Vec<EvalCase>,
}

impl EvalHarness {
    fn new(cases: Vec<EvalCase>) -> Self { Self { cases } }

    async fn run<F, Fut>(&self, model_fn: F) -> Vec<EvalResult>
    where
        F: Fn(&str) -> Fut,
        Fut: std::future::Future<Output = String>,
    {
        let mut results = Vec::new();

        for case in &self.cases {
            let t = Instant::now();
            let output = model_fn(&case.input).await;
            let latency_ms = t.elapsed().as_secs_f64() * 1000.0;

            let mut passed = 0usize;
            let total = case.expected_output_contains.len() + case.expected_output_not_contains.len();

            for must_contain in &case.expected_output_contains {
                if output.to_lowercase().contains(&must_contain.to_lowercase()) {
                    passed += 1;
                }
            }
            for must_not in &case.expected_output_not_contains {
                if !output.to_lowercase().contains(&must_not.to_lowercase()) {
                    passed += 1;
                }
            }

            let score = if total == 0 { 1.0 } else { passed as f64 / total as f64 };
            let input_tokens = case.input.len() as u32 / 4;
            let output_tokens = output.len() as u32 / 4;

            results.push(EvalResult {
                case_id: case.id.clone(),
                output: output[..output.len().min(100)].to_string(),
                latency_ms,
                input_tokens,
                output_tokens,
                passed_checks: passed,
                total_checks: total,
                score,
            });
        }

        results
    }

    fn print_report(results: &[EvalResult]) {
        let n = results.len();
        let passed = results.iter().filter(|r| r.passed()).count();
        let avg_score = results.iter().map(|r| r.score).sum::<f64>() / n as f64;
        let avg_latency = results.iter().map(|r| r.latency_ms).sum::<f64>() / n as f64;

        println!("=== Eval Report ===");
        println!("Passed: {}/{} ({:.0}%)", passed, n, passed as f64 / n as f64 * 100.0);
        println!("Avg score: {:.2} | Avg latency: {:.0}ms", avg_score, avg_latency);
        println!("\nFailed cases:");
        for result in results.iter().filter(|r| !r.passed()) {
            println!("  ❌ [{}] score={:.2} output='{}'", result.case_id, result.score, result.output);
        }
    }
}

#[tokio::main]
async fn main() {
    let harness = EvalHarness::new(vec![
        EvalCase {
            id: "ownership_1".to_string(),
            input: "What is Rust ownership?".to_string(),
            expected_output_contains: vec!["owner".to_string(), "memory".to_string()],
            expected_output_not_contains: vec!["garbage collector".to_string()],
            quality_rubric: vec!["Accurate".to_string(), "Concise".to_string()],
        },
        EvalCase {
            id: "async_1".to_string(),
            input: "How does async work in Rust?".to_string(),
            expected_output_contains: vec!["future".to_string(), "await".to_string()],
            expected_output_not_contains: vec!["threading".to_string()],
            quality_rubric: vec!["Technically correct".to_string()],
        },
    ]);

    let results = harness.run(|prompt| async move {
        // Simulate model output
        if prompt.contains("ownership") {
            "Rust ownership: each value has one owner; memory is freed when owner is dropped.".to_string()
        } else {
            "Async in Rust uses Future trait and await syntax for non-blocking I/O.".to_string()
        }
    }).await;

    EvalHarness::print_report(&results);
}

Cost governance

rust
use std::sync::atomic::{AtomicU64, Ordering};

/// Track and enforce token budgets per team/feature
struct CostGovernor {
    daily_budget_cents: u64,
    spent_today: AtomicU64, // in micro-cents (avoid floating point)
}

impl CostGovernor {
    fn new(daily_budget_usd: f64) -> Self {
        Self {
            daily_budget_cents: (daily_budget_usd * 100.0) as u64,
            spent_today: AtomicU64::new(0),
        }
    }

    fn record_usage(&self, model: &str, input_tokens: u32, output_tokens: u32) -> Result<(), String> {
        let cost_cents = self.calculate_cost_cents(model, input_tokens, output_tokens);
        let current = self.spent_today.fetch_add(cost_cents, Ordering::Relaxed);

        if current + cost_cents > self.daily_budget_cents {
            // Rollback the addition
            self.spent_today.fetch_sub(cost_cents, Ordering::Relaxed);
            return Err(format!(
                "Daily budget exceeded: spent ${:.2} of ${:.2}",
                current as f64 / 100.0,
                self.daily_budget_cents as f64 / 100.0
            ));
        }
        Ok(())
    }

    fn calculate_cost_cents(&self, model: &str, input: u32, output: u32) -> u64 {
        let (input_rate, output_rate) = match model {
            "gpt-4o" => (500, 1500),      // $5/$15 per 1M → micro-cents
            "gpt-4o-mini" => (15, 60),
            _ => (500, 1500),
        };
        (input as u64 * input_rate + output as u64 * output_rate) / 1_000_000
    }

    fn remaining_budget_usd(&self) -> f64 {
        let spent = self.spent_today.load(Ordering::Relaxed);
        (self.daily_budget_cents.saturating_sub(spent)) as f64 / 100.0
    }
}

fn main() {
    let governor = CostGovernor::new(50.0); // $50/day budget

    for i in 0..5 {
        match governor.record_usage("gpt-4o", 5000, 2000) {
            Ok(()) => println!("Request {}: OK | Remaining: ${:.2}", i, governor.remaining_budget_usd()),
            Err(e) => println!("Request {}: BLOCKED — {}", i, e),
        }
    }
}

Related reading

Related Guides

Continue in This Topic

More Rust Guides