NEWManaged multimodal retrieval.Explore platform →
    Cross-MediaSimilar

    RAG with MVS Standalone

    Complete RAG pipeline using MVS for retrieval and OpenAI for generation. Chunk your documents, embed them with any provider, store in MVS, retrieve relevant context, and generate answers -- no managed feature extractors needed.

    text
    Multi-Tier
    12.4K runs
    Run in Builder

    "What is the recommended database architecture for high availability?"

    Why This Matters

    Full control over your RAG pipeline without vendor lock-in. Choose your own chunking strategy, embedding model, and LLM while MVS handles the vector storage and retrieval at scale.

    from openai import OpenAI
    from mixpeek import Mixpeek
    openai = OpenAI(api_key="your-openai-key")
    mvs = Mixpeek(api_key="your-mvs-key")
    NAMESPACE = "rag-docs"
    def embed(text: str) -> list[float]:
    resp = openai.embeddings.create(model="text-embedding-3-small", input=text)
    return resp.data[0].embedding
    # Step 1: Chunk documents
    def chunk_text(text: str, chunk_size: int = 512, overlap: int = 64) -> list[str]:
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
    chunks.append(" ".join(words[i:i + chunk_size]))
    return chunks
    # Step 2: Embed and upsert chunks into MVS
    document = open("docs/architecture-guide.md").read()
    chunks = chunk_text(document)
    for i, chunk in enumerate(chunks):
    mvs.namespaces.documents.upsert(
    namespace=NAMESPACE,
    documents=[{
    "dense_embedding": embed(chunk),
    "content": chunk,
    "metadata": {
    "source": "architecture-guide.md",
    "chunk_index": i,
    "total_chunks": len(chunks)
    }
    }]
    )
    # Step 3: Retrieve relevant chunks
    query = "What is the recommended database architecture for high availability?"
    results = mvs.namespaces.documents.search(
    namespace=NAMESPACE,
    query={"dense_embedding": embed(query)},
    top_k=5
    )
    # Step 4: Generate answer with LLM
    context = "\n\n".join([
    f"[Chunk {doc['metadata']['chunk_index']}] {doc['content']}"
    for doc in results
    ])
    response = openai.chat.completions.create(
    model="gpt-4o",
    messages=[
    {"role": "system", "content": f"Answer based on this context. Cite chunk numbers.\n\n{context}"},
    {"role": "user", "content": query}
    ]
    )
    print(response.choices[0].message.content)

    Feature Extractors

    Retriever Stages

    limit

    Truncate results to a maximum count with optional offset for pagination

    reduce