LLM Rust Testing Strategy
Testing strategies for LLM applications in Rust: mocking LLM APIs, golden output tests, prompt regression testing, integration testing streaming responses, and evaluating model outputs.
Topic: Llm Rust
Search intent: High-intent search: "rust llm testing mock api"
LLM Rust Testing Strategy
Testing pyramid for LLM apps
┌─────────────────────┐
│ Eval tests (few) │ — Real LLM calls, measure output quality
├─────────────────────┤
│ Integration tests │ — Mock LLM API, test full pipeline
├─────────────────────┤
│ Unit tests (many) │ — Prompt builders, parsers, validators
└─────────────────────┘Unit tests: prompt builders and parsers
fn build_qa_prompt(system: &str, context: &str, question: &str) -> Vec<(String, String)> {
vec![
("system".to_string(), system.to_string()),
("user".to_string(), format!("Context: {}\n\nQuestion: {}", context, question)),
]
}
fn parse_json_from_llm(raw: &str) -> Result<serde_json::Value, String> {
// LLMs sometimes wrap JSON in markdown code blocks
let clean = raw
.trim()
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
serde_json::from_str(clean).map_err(|e| format!("Parse error: {}", e))
}
#[cfg(test)]
mod unit_tests {
use super::*;
#[test]
fn test_prompt_structure() {
let msgs = build_qa_prompt("You are helpful.", "Rust is fast.", "Why is Rust fast?");
assert_eq!(msgs.len(), 2);
assert_eq!(msgs[0].0, "system");
assert_eq!(msgs[1].0, "user");
assert!(msgs[1].1.contains("Context:"));
assert!(msgs[1].1.contains("Question:"));
}
#[test]
fn test_parse_clean_json() {
let json = r#"{"answer": "Rust is fast due to zero-cost abstractions."}"#;
let v = parse_json_from_llm(json).unwrap();
assert_eq!(v["answer"].as_str().unwrap(), "Rust is fast due to zero-cost abstractions.");
}
#[test]
fn test_parse_markdown_wrapped_json() {
let markdown_json = "```json\n{\"score\": 95}\n```";
let v = parse_json_from_llm(markdown_json).unwrap();
assert_eq!(v["score"].as_i64().unwrap(), 95);
}
#[test]
fn test_parse_invalid_json_returns_error() {
let bad = "This is not JSON at all";
assert!(parse_json_from_llm(bad).is_err());
}
}
fn main() {
// Run: cargo test
let msgs = build_qa_prompt("Be concise.", "Rust has a borrow checker.", "What does Rust have?");
println!("Prompt messages: {}", msgs.len());
}Mock LLM client for integration tests
use std::collections::HashMap;
use async_trait::async_trait;
#[async_trait]
trait LlmClient: Send + Sync {
async fn complete(&self, prompt: &str) -> Result<String, String>;
}
/// Production client (uses real API)
struct OpenAiClient { api_key: String }
#[async_trait]
impl LlmClient for OpenAiClient {
async fn complete(&self, prompt: &str) -> Result<String, String> {
// Real HTTP call in production
Ok(format!("OpenAI response to: {}", &prompt[..20.min(prompt.len())]))
}
}
/// Mock client for tests — returns pre-programmed responses
struct MockLlmClient {
responses: HashMap<String, String>,
default_response: String,
}
impl MockLlmClient {
fn new(default: &str) -> Self {
Self {
responses: HashMap::new(),
default_response: default.to_string(),
}
}
fn with_response(mut self, prompt_contains: &str, response: &str) -> Self {
self.responses.insert(prompt_contains.to_string(), response.to_string());
self
}
}
#[async_trait]
impl LlmClient for MockLlmClient {
async fn complete(&self, prompt: &str) -> Result<String, String> {
for (pattern, response) in &self.responses {
if prompt.contains(pattern.as_str()) {
return Ok(response.clone());
}
}
Ok(self.default_response.clone())
}
}
/// Application logic that uses LlmClient — testable via trait object
struct QaBot {
client: Box<dyn LlmClient>,
}
impl QaBot {
fn new(client: Box<dyn LlmClient>) -> Self { Self { client } }
async fn answer(&self, question: &str) -> Result<String, String> {
let prompt = format!("Answer concisely: {}", question);
self.client.complete(&prompt).await
}
}
#[tokio::main]
async fn main() {
// Test with mock client
let mock = MockLlmClient::new("I don't know")
.with_response("ownership", "Ownership ensures each value has one owner in Rust.")
.with_response("async", "Rust async uses Futures and the .await syntax.");
let bot = QaBot::new(Box::new(mock));
let questions = [
"What is ownership in Rust?",
"How does async work?",
"What is 42?",
];
for q in &questions {
let answer = bot.answer(q).await.unwrap();
println!("Q: {}\nA: {}\n", q, answer);
}
}Golden output regression tests
/// Record expected outputs and alert on changes
struct GoldenTests {
tests: Vec<GoldenTestCase>,
}
struct GoldenTestCase {
name: String,
prompt: String,
expected_contains: Vec<String>,
expected_not_contains: Vec<String>,
}
impl GoldenTests {
fn run(&self, generate: impl Fn(&str) -> String) -> (usize, usize) {
let mut passed = 0;
let mut failed = 0;
for test in &self.tests {
let output = generate(&test.prompt);
let mut ok = true;
for must_contain in &test.expected_contains {
if !output.to_lowercase().contains(&must_contain.to_lowercase()) {
println!("❌ [{}] Missing: '{}'", test.name, must_contain);
ok = false;
}
}
for must_not in &test.expected_not_contains {
if output.to_lowercase().contains(&must_not.to_lowercase()) {
println!("❌ [{}] Should not contain: '{}'", test.name, must_not);
ok = false;
}
}
if ok { println!("✅ [{}] passed", test.name); passed += 1; }
else { failed += 1; }
}
(passed, failed)
}
}
fn main() {
let golden = GoldenTests {
tests: vec![
GoldenTestCase {
name: "ownership_explanation".to_string(),
prompt: "Explain Rust ownership".to_string(),
expected_contains: vec!["owner".to_string(), "memory".to_string()],
expected_not_contains: vec!["garbage collector".to_string()],
},
],
};
let (passed, failed) = golden.run(|prompt| {
// Simulate LLM output
format!("Rust ownership means each value has exactly one owner and memory is freed when owner drops.")
});
println!("\nResults: {} passed, {} failed", passed, failed);
}