RRust By Example

LLM Rust Real-World Cases

Real-world LLM application patterns in Rust: RAG pipelines, code generation APIs, document summarization, chatbot backends, and AI-powered search.

Topic: Llm Rust

Search intent: High-intent search: "rust llm real world application examples"

LLM Rust Real-World Cases

Case 1: RAG (Retrieval-Augmented Generation) pipeline

rust
use std::collections::BinaryHeap;

/// Document chunk for RAG indexing
#[derive(Clone)]
struct DocumentChunk {
    id: String,
    text: String,
    embedding: Vec<f32>,
    source: String,
}

/// Vector store for RAG retrieval
struct RagVectorStore {
    chunks: Vec<DocumentChunk>,
}

impl RagVectorStore {
    fn new() -> Self { Self { chunks: Vec::new() } }

    fn add_document(&mut self, source: &str, text: &str, embed_fn: impl Fn(&str) -> Vec<f32>) {
        // Chunk document into ~500 token pieces
        let chunks: Vec<&str> = text.split(". ").collect();
        for (i, chunk) in chunks.iter().enumerate() {
            if chunk.trim().is_empty() { continue; }
            self.chunks.push(DocumentChunk {
                id: format!("{}-{}", source, i),
                text: chunk.to_string(),
                embedding: embed_fn(chunk),
                source: source.to_string(),
            });
        }
    }

    fn retrieve(&self, query_embedding: &[f32], top_k: usize) -> Vec<&DocumentChunk> {
        let cosine = |a: &[f32], b: &[f32]| -> f32 {
            let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
            let na = a.iter().map(|x| x * x).sum::<f32>().sqrt();
            let nb = b.iter().map(|x| x * x).sum::<f32>().sqrt();
            if na < 1e-8 || nb < 1e-8 { 0.0 } else { dot / (na * nb) }
        };

        let mut scored: Vec<(usize, f32)> = self.chunks.iter()
            .enumerate()
            .map(|(i, c)| (i, cosine(query_embedding, &c.embedding)))
            .collect();
        scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
        scored.into_iter().take(top_k).map(|(i, _)| &self.chunks[i]).collect()
    }
}

fn build_rag_prompt(query: &str, context_chunks: &[&DocumentChunk]) -> String {
    let context = context_chunks.iter()
        .map(|c| format!("[{}] {}", c.source, c.text))
        .collect::<Vec<_>>()
        .join("\n\n");

    format!(
        "Answer the question based on the context below. If the answer is not in the context, \
        say 'I don't know based on the provided context.'\n\n\
        Context:\n{}\n\nQuestion: {}\n\nAnswer:",
        context, query
    )
}

fn mock_embed(text: &str) -> Vec<f32> {
    // Deterministic mock embedding for demo
    let mut v = vec![0.0f32; 8];
    for (i, c) in text.chars().enumerate() { v[i % 8] += c as f32 / 1000.0; }
    let norm = v.iter().map(|x| x * x).sum::<f32>().sqrt().max(1e-8);
    v.iter().map(|x| x / norm).collect()
}

fn main() {
    let mut store = RagVectorStore::new();

    store.add_document("rust-book", 
        "Rust uses ownership to manage memory. Each value has one owner. \
        When the owner goes out of scope, the value is dropped. \
        Borrowing allows references without taking ownership.",
        mock_embed
    );
    store.add_document("tokio-docs",
        "Tokio is an async runtime for Rust. It uses a work-stealing thread pool. \
        The async/await syntax makes concurrent code readable. \
        Tasks are lightweight and can number in the millions.",
        mock_embed
    );

    let query = "How does Rust manage memory?";
    let query_emb = mock_embed(query);
    let chunks = store.retrieve(&query_emb, 2);

    println!("Query: {}", query);
    println!("\nRetrieved {} chunks:", chunks.len());
    for chunk in &chunks {
        println!("  [{}] {}...", chunk.source, &chunk.text[..50.min(chunk.text.len())]);
    }

    let prompt = build_rag_prompt(query, &chunks);
    println!("\nRAG prompt ({} chars):\n{}", prompt.len(), &prompt[..200]);
}

---

Case 2: Code generation and review API

rust
use serde::{Deserialize, Serialize};

#[derive(Debug, Serialize, Deserialize)]
struct CodeReviewRequest {
    code: String,
    language: String,
    focus: Vec<String>, // ["security", "performance", "correctness"]
}

#[derive(Debug, Serialize, Deserialize)]
struct CodeReviewResult {
    issues: Vec<ReviewIssue>,
    score: u8, // 0-100
    summary: String,
}

#[derive(Debug, Serialize, Deserialize)]
struct ReviewIssue {
    severity: String, // "critical", "warning", "info"
    line_range: Option<(u32, u32)>,
    message: String,
    suggestion: String,
}

fn build_code_review_prompt(req: &CodeReviewRequest) -> String {
    let focus_str = req.focus.join(", ");
    format!(
        "Review the following {} code for: {}.\n\n\
        Return a JSON response with:\n\
        - issues: array of {{severity, line_range, message, suggestion}}\n\
        - score: 0-100 quality score\n\
        - summary: one-sentence overview\n\n\
        Code:\n```{}\n{}\n```",
        req.language, focus_str, req.language, req.code
    )
}

fn parse_simulated_review(code: &str) -> CodeReviewResult {
    // Simulate LLM analysis
    let has_unwrap = code.contains("unwrap()");
    let has_clone = code.contains(".clone()");

    let mut issues = Vec::new();
    if has_unwrap {
        issues.push(ReviewIssue {
            severity: "warning".to_string(),
            line_range: None,
            message: "unwrap() will panic on None/Err".to_string(),
            suggestion: "Use ? operator or match for proper error handling".to_string(),
        });
    }
    if has_clone {
        issues.push(ReviewIssue {
            severity: "info".to_string(),
            line_range: None,
            message: "Unnecessary clone detected".to_string(),
            suggestion: "Consider borrowing instead of cloning if ownership not needed".to_string(),
        });
    }

    let score = 100u8.saturating_sub((issues.len() * 10) as u8);
    CodeReviewResult {
        score,
        summary: format!("Found {} issues. Code quality: {}/100", issues.len(), score),
        issues,
    }
}

fn main() {
    let req = CodeReviewRequest {
        code: r#"
fn get_value(map: &HashMap<String, i32>, key: &str) -> i32 {
    *map.get(key).unwrap()  // Will panic if key missing!
}
"#.to_string(),
        language: "rust".to_string(),
        focus: vec!["correctness".to_string(), "error-handling".to_string()],
    };

    println!("Review prompt (excerpt):");
    println!("{}", &build_code_review_prompt(&req)[..200]);

    let result = parse_simulated_review(&req.code);
    println!("\nReview result:");
    println!("Score: {}/100", result.score);
    println!("Summary: {}", result.summary);
    for issue in &result.issues {
        println!("  [{}] {}", issue.severity, issue.message);
        println!("       → {}", issue.suggestion);
    }
}

---

Case 3: Document summarization pipeline

rust
/// Multi-level document summarization (map-reduce approach)
struct SummarizationPipeline {
    chunk_size: usize,       // characters per chunk
    summary_tokens: usize,   // max tokens for each chunk summary
}

impl SummarizationPipeline {
    fn new() -> Self {
        Self { chunk_size: 2000, summary_tokens: 150 }
    }

    fn chunk_document(&self, text: &str) -> Vec<String> {
        // Split on sentence boundaries near chunk_size
        let mut chunks = Vec::new();
        let mut current = String::new();

        for sentence in text.split(". ") {
            if current.len() + sentence.len() > self.chunk_size && !current.is_empty() {
                chunks.push(current.trim().to_string());
                current.clear();
            }
            current.push_str(sentence);
            current.push_str(". ");
        }
        if !current.trim().is_empty() { chunks.push(current.trim().to_string()); }
        chunks
    }

    fn map_summarize(&self, chunk: &str) -> String {
        // In production: call LLM with max_tokens=summary_tokens
        // Simulated: take first 200 chars
        format!("[Summary] {}...", &chunk[..chunk.len().min(200)])
    }

    fn reduce_summarize(&self, summaries: &[String]) -> String {
        // Combine chunk summaries into final summary
        // In production: call LLM with all chunk summaries as context
        format!(
            "Document overview ({} sections): {}",
            summaries.len(),
            &summaries[0][..100.min(summaries[0].len())]
        )
    }

    fn summarize(&self, document: &str) -> String {
        let chunks = self.chunk_document(document);
        println!("Document split into {} chunks", chunks.len());

        let chunk_summaries: Vec<String> = chunks.iter()
            .map(|c| self.map_summarize(c))
            .collect();

        self.reduce_summarize(&chunk_summaries)
    }
}

fn main() {
    let pipeline = SummarizationPipeline::new();
    let document = "Rust is a systems programming language focused on safety and performance. \
        It achieves memory safety without a garbage collector through its ownership system. \
        Rust's async/await syntax enables high-performance network services. \
        The language is used by major companies including Microsoft, Amazon, and Google. \
        Rust has been voted the most loved programming language for nine consecutive years.";

    let summary = pipeline.summarize(document);
    println!("Summary: {}", summary);
}

Related reading

Related Guides

Continue in This Topic

More Rust Guides