LLM Rust Team Workflow

Prompt development lifecycle

rust

1. Draft prompt in dev environment
   ↓
2. Test against golden eval dataset (offline)
   ↓
3. A/B test in staging (5% traffic)
   ↓
4. Review metrics: quality score, latency, cost
   ↓
5. Promote to production (gradual rollout)
   ↓
6. Monitor production metrics
   ↓
7. Version and tag in git

Runnable example — evaluation harness

rust

use std::time::Instant;

/// One test case for prompt evaluation
#[derive(Debug, Clone)]
struct EvalCase {
    id: String,
    input: String,
    expected_output_contains: Vec<String>,
    expected_output_not_contains: Vec<String>,
    quality_rubric: Vec<String>, // Instructions for scoring
}

#[derive(Debug)]
struct EvalResult {
    case_id: String,
    output: String,
    latency_ms: f64,
    input_tokens: u32,
    output_tokens: u32,
    passed_checks: usize,
    total_checks: usize,
    score: f64, // 0.0 to 1.0
}

impl EvalResult {
    fn passed(&self) -> bool { self.score >= 0.8 }
}

struct EvalHarness {
    cases: Vec<EvalCase>,
}

impl EvalHarness {
    fn new(cases: Vec<EvalCase>) -> Self { Self { cases } }

    async fn run<F, Fut>(&self, model_fn: F) -> Vec<EvalResult>
    where
        F: Fn(&str) -> Fut,
        Fut: std::future::Future<Output = String>,
    {
        let mut results = Vec::new();

        for case in &self.cases {
            let t = Instant::now();
            let output = model_fn(&case.input).await;
            let latency_ms = t.elapsed().as_secs_f64() * 1000.0;

            let mut passed = 0usize;
            let total = case.expected_output_contains.len() + case.expected_output_not_contains.len();

            for must_contain in &case.expected_output_contains {
                if output.to_lowercase().contains(&must_contain.to_lowercase()) {
                    passed += 1;
                }
            }
            for must_not in &case.expected_output_not_contains {
                if !output.to_lowercase().contains(&must_not.to_lowercase()) {
                    passed += 1;
                }
            }

            let score = if total == 0 { 1.0 } else { passed as f64 / total as f64 };
            let input_tokens = case.input.len() as u32 / 4;
            let output_tokens = output.len() as u32 / 4;

            results.push(EvalResult {
                case_id: case.id.clone(),
                output: output[..output.len().min(100)].to_string(),
                latency_ms,
                input_tokens,
                output_tokens,
                passed_checks: passed,
                total_checks: total,
                score,
            });
        }

        results
    }

    fn print_report(results: &[EvalResult]) {
        let n = results.len();
        let passed = results.iter().filter(|r| r.passed()).count();
        let avg_score = results.iter().map(|r| r.score).sum::<f64>() / n as f64;
        let avg_latency = results.iter().map(|r| r.latency_ms).sum::<f64>() / n as f64;

        println!("=== Eval Report ===");
        println!("Passed: {}/{} ({:.0}%)", passed, n, passed as f64 / n as f64 * 100.0);
        println!("Avg score: {:.2} | Avg latency: {:.0}ms", avg_score, avg_latency);
        println!("\nFailed cases:");
        for result in results.iter().filter(|r| !r.passed()) {
            println!("  ❌ [{}] score={:.2} output='{}'", result.case_id, result.score, result.output);
        }
    }
}

#[tokio::main]
async fn main() {
    let harness = EvalHarness::new(vec![
        EvalCase {
            id: "ownership_1".to_string(),
            input: "What is Rust ownership?".to_string(),
            expected_output_contains: vec!["owner".to_string(), "memory".to_string()],
            expected_output_not_contains: vec!["garbage collector".to_string()],
            quality_rubric: vec!["Accurate".to_string(), "Concise".to_string()],
        },
        EvalCase {
            id: "async_1".to_string(),
            input: "How does async work in Rust?".to_string(),
            expected_output_contains: vec!["future".to_string(), "await".to_string()],
            expected_output_not_contains: vec!["threading".to_string()],
            quality_rubric: vec!["Technically correct".to_string()],
        },
    ]);

    let results = harness.run(|prompt| async move {
        // Simulate model output
        if prompt.contains("ownership") {
            "Rust ownership: each value has one owner; memory is freed when owner is dropped.".to_string()
        } else {
            "Async in Rust uses Future trait and await syntax for non-blocking I/O.".to_string()
        }
    }).await;

    EvalHarness::print_report(&results);
}

Cost governance

rust

use std::sync::atomic::{AtomicU64, Ordering};

/// Track and enforce token budgets per team/feature
struct CostGovernor {
    daily_budget_cents: u64,
    spent_today: AtomicU64, // in micro-cents (avoid floating point)
}

impl CostGovernor {
    fn new(daily_budget_usd: f64) -> Self {
        Self {
            daily_budget_cents: (daily_budget_usd * 100.0) as u64,
            spent_today: AtomicU64::new(0),
        }
    }

    fn record_usage(&self, model: &str, input_tokens: u32, output_tokens: u32) -> Result<(), String> {
        let cost_cents = self.calculate_cost_cents(model, input_tokens, output_tokens);
        let current = self.spent_today.fetch_add(cost_cents, Ordering::Relaxed);

        if current + cost_cents > self.daily_budget_cents {
            // Rollback the addition
            self.spent_today.fetch_sub(cost_cents, Ordering::Relaxed);
            return Err(format!(
                "Daily budget exceeded: spent ${:.2} of ${:.2}",
                current as f64 / 100.0,
                self.daily_budget_cents as f64 / 100.0
            ));
        }
        Ok(())
    }

    fn calculate_cost_cents(&self, model: &str, input: u32, output: u32) -> u64 {
        let (input_rate, output_rate) = match model {
            "gpt-4o" => (500, 1500),      // $5/$15 per 1M → micro-cents
            "gpt-4o-mini" => (15, 60),
            _ => (500, 1500),
        };
        (input as u64 * input_rate + output as u64 * output_rate) / 1_000_000
    }

    fn remaining_budget_usd(&self) -> f64 {
        let spent = self.spent_today.load(Ordering::Relaxed);
        (self.daily_budget_cents.saturating_sub(spent)) as f64 / 100.0
    }
}

fn main() {
    let governor = CostGovernor::new(50.0); // $50/day budget

    for i in 0..5 {
        match governor.record_usage("gpt-4o", 5000, 2000) {
            Ok(()) => println!("Request {}: OK | Remaining: ${:.2}", i, governor.remaining_budget_usd()),
            Err(e) => println!("Request {}: BLOCKED — {}", i, e),
        }
    }
}

LLM Rust Team Workflow

LLM Rust Team Workflow

Prompt development lifecycle

Runnable example — evaluation harness

Cost governance

Related reading

Related Guides

LLM Rust Production Guide

LLM Rust Maintainability

Continue in This Topic

LLM Rust Security

LLM Rust Testing Strategy

More Rust Guides

Building LLM Applications with Rust

LLM API Gateway in Rust

LLM Rust Anti-Patterns

LLM Rust Benchmarking

LLM Rust Decision Matrix

LLM Rust Interview Q&A