LLM Rust Migration Guide

Why migrate from Python LangChain to Rust?

| Aspect | Python LangChain | Rust Custom |

|---|---|---|

| Memory per instance | 200–500MB | 20–50MB |

| Cold start | 3–8s | 0.1–0.5s |

| Concurrent requests (single machine) | 50–200 | 5,000–50,000 |

| Type safety | Runtime (Pydantic) | Compile-time |

| Deployment complexity | Python env + deps | Single binary |

| LangChain abstraction | High (magic) | Low (explicit) |

Best candidates for migration: high-throughput LLM gateways, latency-sensitive features, services where LLM is one step in a pipeline.

Keep Python: research prototyping, complex chain orchestration, rapid experimentation.

Step 1: Map LangChain concepts to Rust

rust

// LangChain Python:                  Rust equivalent:
// ChatOpenAI(model="gpt-4o")        → LlmClient struct with reqwest
// PromptTemplate(...)                → String formatting functions
// LLMChain(prompt, llm)             → Function composition
// ConversationBufferMemory()         → Vec<Message> + context window management
// VectorStore(embeddings)            → Custom Vec<Embedding> + cosine search
// RetrievalQA                        → retrieve() + build_prompt() + complete()

use serde::{Deserialize, Serialize};

#[derive(Clone, Serialize, Deserialize)]
struct Message { role: String, content: String }

/// Rust equivalent of LangChain's PromptTemplate
struct PromptTemplate {
    template: String,
}

impl PromptTemplate {
    fn new(template: &str) -> Self {
        Self { template: template.to_string() }
    }

    /// Fill template variables — Rust version of .format_messages()
    fn format(&self, vars: &[(&str, &str)]) -> String {
        let mut result = self.template.clone();
        for (key, value) in vars {
            result = result.replace(&format!("{{{{{}}}}}", key), value);
        }
        result
    }
}

fn main() {
    let template = PromptTemplate::new(
        "You are a {{role}}. Answer the following: {{question}}"
    );
    let prompt = template.format(&[
        ("role", "Rust expert"),
        ("question", "How do I use async/await?"),
    ]);
    println!("{}", prompt);
}

Step 2: Port RAG pipeline

rust

/// Rust equivalent of LangChain's RetrievalQAChain

struct Document { id: String, content: String, embedding: Vec<f32> }

struct RagChain {
    documents: Vec<Document>,
    top_k: usize,
}

impl RagChain {
    fn new(top_k: usize) -> Self { Self { documents: Vec::new(), top_k } }

    fn add_document(&mut self, id: &str, content: &str, embed: Vec<f32>) {
        self.documents.push(Document {
            id: id.to_string(), content: content.to_string(), embedding: embed,
        });
    }

    fn retrieve(&self, query_embed: &[f32]) -> Vec<&Document> {
        let cosine = |a: &[f32], b: &[f32]| -> f32 {
            let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
            let na = a.iter().map(|x| x * x).sum::<f32>().sqrt();
            let nb = b.iter().map(|x| x * x).sum::<f32>().sqrt();
            if na < 1e-8 || nb < 1e-8 { 0.0 } else { dot / (na * nb) }
        };

        let mut scored: Vec<(usize, f32)> = self.documents.iter()
            .enumerate()
            .map(|(i, d)| (i, cosine(query_embed, &d.embedding)))
            .collect();
        scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
        scored.into_iter().take(self.top_k).map(|(i, _)| &self.documents[i]).collect()
    }

    fn build_prompt(&self, question: &str, docs: &[&Document]) -> Vec<Message> {
        let context = docs.iter()
            .map(|d| format!("[{}]: {}", d.id, d.content))
            .collect::<Vec<_>>()
            .join("\n\n");

        vec![
            Message {
                role: "system".to_string(),
                content: "Answer based on context only. If not in context, say you don't know.".to_string(),
            },
            Message {
                role: "user".to_string(),
                content: format!("Context:\n{}\n\nQuestion: {}", context, question),
            },
        ]
    }

    fn run(&self, question: &str, query_embed: &[f32]) -> Vec<Message> {
        let docs = self.retrieve(query_embed);
        self.build_prompt(question, &docs)
    }
}

fn mock_embed(text: &str) -> Vec<f32> {
    let mut v = vec![0.0f32; 4];
    for (i, c) in text.chars().enumerate() { v[i % 4] += c as f32 / 1000.0; }
    let n = v.iter().map(|x| x * x).sum::<f32>().sqrt().max(1e-8);
    v.iter().map(|x| x / n).collect()
}

fn main() {
    let mut rag = RagChain::new(2);
    rag.add_document("doc1", "Rust ownership ensures memory safety without GC.", mock_embed("ownership memory safety"));
    rag.add_document("doc2", "Tokio enables async I/O in Rust applications.", mock_embed("tokio async io"));
    rag.add_document("doc3", "Candle is a Rust ML framework from Hugging Face.", mock_embed("candle ml framework"));

    let query = "How does Rust handle memory?";
    let query_emb = mock_embed("memory management rust");
    let messages = rag.run(query, &query_emb);

    println!("Generated {} messages for LLM", messages.len());
    println!("Context preview: {}...", &messages[1].content[..100]);
}

Step 3: Incremental migration approach

rust

/// Use feature flags to migrate gradually
#[derive(Debug, Clone, Copy)]
enum LlmBackend {
    Python, // Old Python service
    Rust,   // New Rust implementation
}

struct FeatureRouter {
    /// Percentage of traffic to route to Rust (0-100)
    rust_percentage: u8,
    counter: std::sync::atomic::AtomicU64,
}

impl FeatureRouter {
    fn new(rust_percentage: u8) -> Self {
        Self {
            rust_percentage,
            counter: std::sync::atomic::AtomicU64::new(0),
        }
    }

    fn select_backend(&self) -> LlmBackend {
        let n = self.counter.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
        if n % 100 < self.rust_percentage as u64 {
            LlmBackend::Rust
        } else {
            LlmBackend::Python
        }
    }
}

fn main() {
    // Migration phases:
    let phases = [
        ("Phase 1: Shadow mode", 0),   // 0% Rust traffic, log discrepancies
        ("Phase 2: Canary", 5),        // 5% Rust
        ("Phase 3: Ramp up", 50),      // 50% Rust
        ("Phase 4: Full migration", 100), // 100% Rust
    ];

    for (phase_name, percentage) in phases {
        let router = FeatureRouter::new(percentage);
        let rust_count = (0..100).filter(|_| {
            matches!(router.select_backend(), LlmBackend::Rust)
        }).count();
        println!("{}: {}% Rust ({}/100 requests)", phase_name, percentage, rust_count);
    }
}

LLM Rust Migration Guide

LLM Rust Migration Guide

Why migrate from Python LangChain to Rust?

Step 1: Map LangChain concepts to Rust

Step 2: Port RAG pipeline

Step 3: Incremental migration approach

Related reading

Related Guides

LLM API Gateway in Rust

Building LLM Applications with Rust

Continue in This Topic

LLM Rust Maintainability

LLM Rust Performance Tuning

More Rust Guides

Building LLM Applications with Rust

LLM API Gateway in Rust

LLM Rust Anti-Patterns

LLM Rust Benchmarking

LLM Rust Decision Matrix

LLM Rust Interview Q&A