LLM Rust Real-World Cases

Case 1: RAG (Retrieval-Augmented Generation) pipeline

rust

use std::collections::BinaryHeap;

/// Document chunk for RAG indexing
#[derive(Clone)]
struct DocumentChunk {
    id: String,
    text: String,
    embedding: Vec<f32>,
    source: String,
}

/// Vector store for RAG retrieval
struct RagVectorStore {
    chunks: Vec<DocumentChunk>,
}

impl RagVectorStore {
    fn new() -> Self { Self { chunks: Vec::new() } }

    fn add_document(&mut self, source: &str, text: &str, embed_fn: impl Fn(&str) -> Vec<f32>) {
        // Chunk document into ~500 token pieces
        let chunks: Vec<&str> = text.split(". ").collect();
        for (i, chunk) in chunks.iter().enumerate() {
            if chunk.trim().is_empty() { continue; }
            self.chunks.push(DocumentChunk {
                id: format!("{}-{}", source, i),
                text: chunk.to_string(),
                embedding: embed_fn(chunk),
                source: source.to_string(),
            });
        }
    }

    fn retrieve(&self, query_embedding: &[f32], top_k: usize) -> Vec<&DocumentChunk> {
        let cosine = |a: &[f32], b: &[f32]| -> f32 {
            let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
            let na = a.iter().map(|x| x * x).sum::<f32>().sqrt();
            let nb = b.iter().map(|x| x * x).sum::<f32>().sqrt();
            if na < 1e-8 || nb < 1e-8 { 0.0 } else { dot / (na * nb) }
        };

        let mut scored: Vec<(usize, f32)> = self.chunks.iter()
            .enumerate()
            .map(|(i, c)| (i, cosine(query_embedding, &c.embedding)))
            .collect();
        scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
        scored.into_iter().take(top_k).map(|(i, _)| &self.chunks[i]).collect()
    }
}

fn build_rag_prompt(query: &str, context_chunks: &[&DocumentChunk]) -> String {
    let context = context_chunks.iter()
        .map(|c| format!("[{}] {}", c.source, c.text))
        .collect::<Vec<_>>()
        .join("\n\n");

    format!(
        "Answer the question based on the context below. If the answer is not in the context, \
        say 'I don't know based on the provided context.'\n\n\
        Context:\n{}\n\nQuestion: {}\n\nAnswer:",
        context, query
    )
}

fn mock_embed(text: &str) -> Vec<f32> {
    // Deterministic mock embedding for demo
    let mut v = vec![0.0f32; 8];
    for (i, c) in text.chars().enumerate() { v[i % 8] += c as f32 / 1000.0; }
    let norm = v.iter().map(|x| x * x).sum::<f32>().sqrt().max(1e-8);
    v.iter().map(|x| x / norm).collect()
}

fn main() {
    let mut store = RagVectorStore::new();

    store.add_document("rust-book", 
        "Rust uses ownership to manage memory. Each value has one owner. \
        When the owner goes out of scope, the value is dropped. \
        Borrowing allows references without taking ownership.",
        mock_embed
    );
    store.add_document("tokio-docs",
        "Tokio is an async runtime for Rust. It uses a work-stealing thread pool. \
        The async/await syntax makes concurrent code readable. \
        Tasks are lightweight and can number in the millions.",
        mock_embed
    );

    let query = "How does Rust manage memory?";
    let query_emb = mock_embed(query);
    let chunks = store.retrieve(&query_emb, 2);

    println!("Query: {}", query);
    println!("\nRetrieved {} chunks:", chunks.len());
    for chunk in &chunks {
        println!("  [{}] {}...", chunk.source, &chunk.text[..50.min(chunk.text.len())]);
    }

    let prompt = build_rag_prompt(query, &chunks);
    println!("\nRAG prompt ({} chars):\n{}", prompt.len(), &prompt[..200]);
}

---

Case 2: Code generation and review API

rust

use serde::{Deserialize, Serialize};

#[derive(Debug, Serialize, Deserialize)]
struct CodeReviewRequest {
    code: String,
    language: String,
    focus: Vec<String>, // ["security", "performance", "correctness"]
}

#[derive(Debug, Serialize, Deserialize)]
struct CodeReviewResult {
    issues: Vec<ReviewIssue>,
    score: u8, // 0-100
    summary: String,
}

#[derive(Debug, Serialize, Deserialize)]
struct ReviewIssue {
    severity: String, // "critical", "warning", "info"
    line_range: Option<(u32, u32)>,
    message: String,
    suggestion: String,
}

fn build_code_review_prompt(req: &CodeReviewRequest) -> String {
    let focus_str = req.focus.join(", ");
    format!(
        "Review the following {} code for: {}.\n\n\
        Return a JSON response with:\n\
        - issues: array of {{severity, line_range, message, suggestion}}\n\
        - score: 0-100 quality score\n\
        - summary: one-sentence overview\n\n\
        Code:\n```{}\n{}\n```",
        req.language, focus_str, req.language, req.code
    )
}

fn parse_simulated_review(code: &str) -> CodeReviewResult {
    // Simulate LLM analysis
    let has_unwrap = code.contains("unwrap()");
    let has_clone = code.contains(".clone()");

    let mut issues = Vec::new();
    if has_unwrap {
        issues.push(ReviewIssue {
            severity: "warning".to_string(),
            line_range: None,
            message: "unwrap() will panic on None/Err".to_string(),
            suggestion: "Use ? operator or match for proper error handling".to_string(),
        });
    }
    if has_clone {
        issues.push(ReviewIssue {
            severity: "info".to_string(),
            line_range: None,
            message: "Unnecessary clone detected".to_string(),
            suggestion: "Consider borrowing instead of cloning if ownership not needed".to_string(),
        });
    }

    let score = 100u8.saturating_sub((issues.len() * 10) as u8);
    CodeReviewResult {
        score,
        summary: format!("Found {} issues. Code quality: {}/100", issues.len(), score),
        issues,
    }
}

fn main() {
    let req = CodeReviewRequest {
        code: r#"
fn get_value(map: &HashMap<String, i32>, key: &str) -> i32 {
    *map.get(key).unwrap()  // Will panic if key missing!
}
"#.to_string(),
        language: "rust".to_string(),
        focus: vec!["correctness".to_string(), "error-handling".to_string()],
    };

    println!("Review prompt (excerpt):");
    println!("{}", &build_code_review_prompt(&req)[..200]);

    let result = parse_simulated_review(&req.code);
    println!("\nReview result:");
    println!("Score: {}/100", result.score);
    println!("Summary: {}", result.summary);
    for issue in &result.issues {
        println!("  [{}] {}", issue.severity, issue.message);
        println!("       → {}", issue.suggestion);
    }
}

---

Case 3: Document summarization pipeline

rust

/// Multi-level document summarization (map-reduce approach)
struct SummarizationPipeline {
    chunk_size: usize,       // characters per chunk
    summary_tokens: usize,   // max tokens for each chunk summary
}

impl SummarizationPipeline {
    fn new() -> Self {
        Self { chunk_size: 2000, summary_tokens: 150 }
    }

    fn chunk_document(&self, text: &str) -> Vec<String> {
        // Split on sentence boundaries near chunk_size
        let mut chunks = Vec::new();
        let mut current = String::new();

        for sentence in text.split(". ") {
            if current.len() + sentence.len() > self.chunk_size && !current.is_empty() {
                chunks.push(current.trim().to_string());
                current.clear();
            }
            current.push_str(sentence);
            current.push_str(". ");
        }
        if !current.trim().is_empty() { chunks.push(current.trim().to_string()); }
        chunks
    }

    fn map_summarize(&self, chunk: &str) -> String {
        // In production: call LLM with max_tokens=summary_tokens
        // Simulated: take first 200 chars
        format!("[Summary] {}...", &chunk[..chunk.len().min(200)])
    }

    fn reduce_summarize(&self, summaries: &[String]) -> String {
        // Combine chunk summaries into final summary
        // In production: call LLM with all chunk summaries as context
        format!(
            "Document overview ({} sections): {}",
            summaries.len(),
            &summaries[0][..100.min(summaries[0].len())]
        )
    }

    fn summarize(&self, document: &str) -> String {
        let chunks = self.chunk_document(document);
        println!("Document split into {} chunks", chunks.len());

        let chunk_summaries: Vec<String> = chunks.iter()
            .map(|c| self.map_summarize(c))
            .collect();

        self.reduce_summarize(&chunk_summaries)
    }
}

fn main() {
    let pipeline = SummarizationPipeline::new();
    let document = "Rust is a systems programming language focused on safety and performance. \
        It achieves memory safety without a garbage collector through its ownership system. \
        Rust's async/await syntax enables high-performance network services. \
        The language is used by major companies including Microsoft, Amazon, and Google. \
        Rust has been voted the most loved programming language for nine consecutive years.";

    let summary = pipeline.summarize(document);
    println!("Summary: {}", summary);
}

LLM Rust Real-World Cases

LLM Rust Real-World Cases

Case 1: RAG (Retrieval-Augmented Generation) pipeline

Case 2: Code generation and review API

Case 3: Document summarization pipeline

Related reading

Related Guides

LLM API Gateway in Rust

LLM Rust Production Guide

Continue in This Topic

LLM Rust Production Guide

LLM Rust Review Checklist

More Rust Guides

Building LLM Applications with Rust

LLM API Gateway in Rust

LLM Rust Anti-Patterns

LLM Rust Benchmarking

LLM Rust Decision Matrix

LLM Rust Interview Q&A