LLM Rust Real-World Cases
Real-world LLM application patterns in Rust: RAG pipelines, code generation APIs, document summarization, chatbot backends, and AI-powered search.
Topic: Llm Rust
Search intent: High-intent search: "rust llm real world application examples"
LLM Rust Real-World Cases
Case 1: RAG (Retrieval-Augmented Generation) pipeline
use std::collections::BinaryHeap;
/// Document chunk for RAG indexing
#[derive(Clone)]
struct DocumentChunk {
id: String,
text: String,
embedding: Vec<f32>,
source: String,
}
/// Vector store for RAG retrieval
struct RagVectorStore {
chunks: Vec<DocumentChunk>,
}
impl RagVectorStore {
fn new() -> Self { Self { chunks: Vec::new() } }
fn add_document(&mut self, source: &str, text: &str, embed_fn: impl Fn(&str) -> Vec<f32>) {
// Chunk document into ~500 token pieces
let chunks: Vec<&str> = text.split(". ").collect();
for (i, chunk) in chunks.iter().enumerate() {
if chunk.trim().is_empty() { continue; }
self.chunks.push(DocumentChunk {
id: format!("{}-{}", source, i),
text: chunk.to_string(),
embedding: embed_fn(chunk),
source: source.to_string(),
});
}
}
fn retrieve(&self, query_embedding: &[f32], top_k: usize) -> Vec<&DocumentChunk> {
let cosine = |a: &[f32], b: &[f32]| -> f32 {
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
let na = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let nb = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if na < 1e-8 || nb < 1e-8 { 0.0 } else { dot / (na * nb) }
};
let mut scored: Vec<(usize, f32)> = self.chunks.iter()
.enumerate()
.map(|(i, c)| (i, cosine(query_embedding, &c.embedding)))
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
scored.into_iter().take(top_k).map(|(i, _)| &self.chunks[i]).collect()
}
}
fn build_rag_prompt(query: &str, context_chunks: &[&DocumentChunk]) -> String {
let context = context_chunks.iter()
.map(|c| format!("[{}] {}", c.source, c.text))
.collect::<Vec<_>>()
.join("\n\n");
format!(
"Answer the question based on the context below. If the answer is not in the context, \
say 'I don't know based on the provided context.'\n\n\
Context:\n{}\n\nQuestion: {}\n\nAnswer:",
context, query
)
}
fn mock_embed(text: &str) -> Vec<f32> {
// Deterministic mock embedding for demo
let mut v = vec![0.0f32; 8];
for (i, c) in text.chars().enumerate() { v[i % 8] += c as f32 / 1000.0; }
let norm = v.iter().map(|x| x * x).sum::<f32>().sqrt().max(1e-8);
v.iter().map(|x| x / norm).collect()
}
fn main() {
let mut store = RagVectorStore::new();
store.add_document("rust-book",
"Rust uses ownership to manage memory. Each value has one owner. \
When the owner goes out of scope, the value is dropped. \
Borrowing allows references without taking ownership.",
mock_embed
);
store.add_document("tokio-docs",
"Tokio is an async runtime for Rust. It uses a work-stealing thread pool. \
The async/await syntax makes concurrent code readable. \
Tasks are lightweight and can number in the millions.",
mock_embed
);
let query = "How does Rust manage memory?";
let query_emb = mock_embed(query);
let chunks = store.retrieve(&query_emb, 2);
println!("Query: {}", query);
println!("\nRetrieved {} chunks:", chunks.len());
for chunk in &chunks {
println!(" [{}] {}...", chunk.source, &chunk.text[..50.min(chunk.text.len())]);
}
let prompt = build_rag_prompt(query, &chunks);
println!("\nRAG prompt ({} chars):\n{}", prompt.len(), &prompt[..200]);
}---
Case 2: Code generation and review API
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
struct CodeReviewRequest {
code: String,
language: String,
focus: Vec<String>, // ["security", "performance", "correctness"]
}
#[derive(Debug, Serialize, Deserialize)]
struct CodeReviewResult {
issues: Vec<ReviewIssue>,
score: u8, // 0-100
summary: String,
}
#[derive(Debug, Serialize, Deserialize)]
struct ReviewIssue {
severity: String, // "critical", "warning", "info"
line_range: Option<(u32, u32)>,
message: String,
suggestion: String,
}
fn build_code_review_prompt(req: &CodeReviewRequest) -> String {
let focus_str = req.focus.join(", ");
format!(
"Review the following {} code for: {}.\n\n\
Return a JSON response with:\n\
- issues: array of {{severity, line_range, message, suggestion}}\n\
- score: 0-100 quality score\n\
- summary: one-sentence overview\n\n\
Code:\n```{}\n{}\n```",
req.language, focus_str, req.language, req.code
)
}
fn parse_simulated_review(code: &str) -> CodeReviewResult {
// Simulate LLM analysis
let has_unwrap = code.contains("unwrap()");
let has_clone = code.contains(".clone()");
let mut issues = Vec::new();
if has_unwrap {
issues.push(ReviewIssue {
severity: "warning".to_string(),
line_range: None,
message: "unwrap() will panic on None/Err".to_string(),
suggestion: "Use ? operator or match for proper error handling".to_string(),
});
}
if has_clone {
issues.push(ReviewIssue {
severity: "info".to_string(),
line_range: None,
message: "Unnecessary clone detected".to_string(),
suggestion: "Consider borrowing instead of cloning if ownership not needed".to_string(),
});
}
let score = 100u8.saturating_sub((issues.len() * 10) as u8);
CodeReviewResult {
score,
summary: format!("Found {} issues. Code quality: {}/100", issues.len(), score),
issues,
}
}
fn main() {
let req = CodeReviewRequest {
code: r#"
fn get_value(map: &HashMap<String, i32>, key: &str) -> i32 {
*map.get(key).unwrap() // Will panic if key missing!
}
"#.to_string(),
language: "rust".to_string(),
focus: vec!["correctness".to_string(), "error-handling".to_string()],
};
println!("Review prompt (excerpt):");
println!("{}", &build_code_review_prompt(&req)[..200]);
let result = parse_simulated_review(&req.code);
println!("\nReview result:");
println!("Score: {}/100", result.score);
println!("Summary: {}", result.summary);
for issue in &result.issues {
println!(" [{}] {}", issue.severity, issue.message);
println!(" → {}", issue.suggestion);
}
}---
Case 3: Document summarization pipeline
/// Multi-level document summarization (map-reduce approach)
struct SummarizationPipeline {
chunk_size: usize, // characters per chunk
summary_tokens: usize, // max tokens for each chunk summary
}
impl SummarizationPipeline {
fn new() -> Self {
Self { chunk_size: 2000, summary_tokens: 150 }
}
fn chunk_document(&self, text: &str) -> Vec<String> {
// Split on sentence boundaries near chunk_size
let mut chunks = Vec::new();
let mut current = String::new();
for sentence in text.split(". ") {
if current.len() + sentence.len() > self.chunk_size && !current.is_empty() {
chunks.push(current.trim().to_string());
current.clear();
}
current.push_str(sentence);
current.push_str(". ");
}
if !current.trim().is_empty() { chunks.push(current.trim().to_string()); }
chunks
}
fn map_summarize(&self, chunk: &str) -> String {
// In production: call LLM with max_tokens=summary_tokens
// Simulated: take first 200 chars
format!("[Summary] {}...", &chunk[..chunk.len().min(200)])
}
fn reduce_summarize(&self, summaries: &[String]) -> String {
// Combine chunk summaries into final summary
// In production: call LLM with all chunk summaries as context
format!(
"Document overview ({} sections): {}",
summaries.len(),
&summaries[0][..100.min(summaries[0].len())]
)
}
fn summarize(&self, document: &str) -> String {
let chunks = self.chunk_document(document);
println!("Document split into {} chunks", chunks.len());
let chunk_summaries: Vec<String> = chunks.iter()
.map(|c| self.map_summarize(c))
.collect();
self.reduce_summarize(&chunk_summaries)
}
}
fn main() {
let pipeline = SummarizationPipeline::new();
let document = "Rust is a systems programming language focused on safety and performance. \
It achieves memory safety without a garbage collector through its ownership system. \
Rust's async/await syntax enables high-performance network services. \
The language is used by major companies including Microsoft, Amazon, and Google. \
Rust has been voted the most loved programming language for nine consecutive years.";
let summary = pipeline.summarize(document);
println!("Summary: {}", summary);
}