Execute Adhoc Retriever

curl --request POST \
  --url https://api.mixpeek.com/v1/retrievers/execute \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "stages": [
    {
      "stage_name": "<string>",
      "config": {},
      "batch_size": "<string>",
      "description": "<string>",
      "on_error": "<string>",
      "output_alias": "<string>"
    }
  ],
  "collection_identifiers": [
    "<string>"
  ],
  "input_schema": {},
  "inputs": {},
  "budget_limits": {
    "max_credits": 100,
    "max_time_ms": 60000
  },
  "stream": false
}
'

import requests

url = "https://api.mixpeek.com/v1/retrievers/execute"

payload = {
    "stages": [
        {
            "stage_name": "<string>",
            "config": {},
            "batch_size": "<string>",
            "description": "<string>",
            "on_error": "<string>",
            "output_alias": "<string>"
        }
    ],
    "collection_identifiers": ["<string>"],
    "input_schema": {},
    "inputs": {},
    "budget_limits": {
        "max_credits": 100,
        "max_time_ms": 60000
    },
    "stream": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    stages: [
      {
        stage_name: '<string>',
        config: {},
        batch_size: '<string>',
        description: '<string>',
        on_error: '<string>',
        output_alias: '<string>'
      }
    ],
    collection_identifiers: ['<string>'],
    input_schema: {},
    inputs: {},
    budget_limits: {max_credits: 100, max_time_ms: 60000},
    stream: false
  })
};

fetch('https://api.mixpeek.com/v1/retrievers/execute', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.mixpeek.com/v1/retrievers/execute",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'stages' => [
        [
                'stage_name' => '<string>',
                'config' => [
                                
                ],
                'batch_size' => '<string>',
                'description' => '<string>',
                'on_error' => '<string>',
                'output_alias' => '<string>'
        ]
    ],
    'collection_identifiers' => [
        '<string>'
    ],
    'input_schema' => [
        
    ],
    'inputs' => [
        
    ],
    'budget_limits' => [
        'max_credits' => 100,
        'max_time_ms' => 60000
    ],
    'stream' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.mixpeek.com/v1/retrievers/execute"

	payload := strings.NewReader("{\n  \"stages\": [\n    {\n      \"stage_name\": \"<string>\",\n      \"config\": {},\n      \"batch_size\": \"<string>\",\n      \"description\": \"<string>\",\n      \"on_error\": \"<string>\",\n      \"output_alias\": \"<string>\"\n    }\n  ],\n  \"collection_identifiers\": [\n    \"<string>\"\n  ],\n  \"input_schema\": {},\n  \"inputs\": {},\n  \"budget_limits\": {\n    \"max_credits\": 100,\n    \"max_time_ms\": 60000\n  },\n  \"stream\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.mixpeek.com/v1/retrievers/execute")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"stages\": [\n    {\n      \"stage_name\": \"<string>\",\n      \"config\": {},\n      \"batch_size\": \"<string>\",\n      \"description\": \"<string>\",\n      \"on_error\": \"<string>\",\n      \"output_alias\": \"<string>\"\n    }\n  ],\n  \"collection_identifiers\": [\n    \"<string>\"\n  ],\n  \"input_schema\": {},\n  \"inputs\": {},\n  \"budget_limits\": {\n    \"max_credits\": 100,\n    \"max_time_ms\": 60000\n  },\n  \"stream\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.mixpeek.com/v1/retrievers/execute")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"stages\": [\n    {\n      \"stage_name\": \"<string>\",\n      \"config\": {},\n      \"batch_size\": \"<string>\",\n      \"description\": \"<string>\",\n      \"on_error\": \"<string>\",\n      \"output_alias\": \"<string>\"\n    }\n  ],\n  \"collection_identifiers\": [\n    \"<string>\"\n  ],\n  \"input_schema\": {},\n  \"inputs\": {},\n  \"budget_limits\": {\n    \"max_credits\": 100,\n    \"max_time_ms\": 60000\n  },\n  \"stream\": false\n}"

response = http.request(request)
puts response.read_body

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

Adhoc

Execute Adhoc Retriever

Execute a retriever ad-hoc without persisting the configuration.

This endpoint allows you to execute a retriever without saving it to the database. Useful for one-time queries, testing configurations, or temporary searches.

Streaming Execution (stream=True): Response uses Server-Sent Events (SSE) format with Content-Type: text/event-stream. Each stage emits events as it executes, formatted as: data: \n\n

Event Types (StreamEventType):

stage_start: Emitted when a stage begins (includes stage_name, stage_index, total_stages)
stage_complete: Emitted when a stage finishes (includes documents, statistics, budget_used)
stage_error: Emitted if a stage fails (includes error message)
execution_complete: Final event with complete results and pagination
execution_error: Emitted if entire execution fails

StreamStageEvent Fields:

event_type: Type of event
execution_id: Unique execution identifier
stage_name/stage_index/total_stages: Stage progress info
documents: Intermediate results (stage_complete only)
statistics: Stage metrics (duration_ms, input_count, output_count, efficiency)
budget_used: Cumulative consumption (credits_used, time_elapsed_ms, tokens_used)

Response Headers:

Content-Type: text/event-stream
Cache-Control: no-cache
Connection: keep-alive
X-Execution-Mode: adhoc

Standard Execution (stream=False, default):

Returns ExecuteRetrieverResponse after all stages complete
Includes X-Execution-Mode: adhoc header
execution_metadata.retriever_persisted = False

Use Cases:

One-time queries without saving retriever configuration
Testing stage configurations before persisting
Dynamic retrieval with varying parameters
Real-time progress tracking with streaming

POST

retrievers

execute

Execute Adhoc Retriever

curl --request POST \
  --url https://api.mixpeek.com/v1/retrievers/execute \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "stages": [
    {
      "stage_name": "<string>",
      "config": {},
      "batch_size": "<string>",
      "description": "<string>",
      "on_error": "<string>",
      "output_alias": "<string>"
    }
  ],
  "collection_identifiers": [
    "<string>"
  ],
  "input_schema": {},
  "inputs": {},
  "budget_limits": {
    "max_credits": 100,
    "max_time_ms": 60000
  },
  "stream": false
}
'

import requests

url = "https://api.mixpeek.com/v1/retrievers/execute"

payload = {
    "stages": [
        {
            "stage_name": "<string>",
            "config": {},
            "batch_size": "<string>",
            "description": "<string>",
            "on_error": "<string>",
            "output_alias": "<string>"
        }
    ],
    "collection_identifiers": ["<string>"],
    "input_schema": {},
    "inputs": {},
    "budget_limits": {
        "max_credits": 100,
        "max_time_ms": 60000
    },
    "stream": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    stages: [
      {
        stage_name: '<string>',
        config: {},
        batch_size: '<string>',
        description: '<string>',
        on_error: '<string>',
        output_alias: '<string>'
      }
    ],
    collection_identifiers: ['<string>'],
    input_schema: {},
    inputs: {},
    budget_limits: {max_credits: 100, max_time_ms: 60000},
    stream: false
  })
};

fetch('https://api.mixpeek.com/v1/retrievers/execute', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.mixpeek.com/v1/retrievers/execute",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'stages' => [
        [
                'stage_name' => '<string>',
                'config' => [
                                
                ],
                'batch_size' => '<string>',
                'description' => '<string>',
                'on_error' => '<string>',
                'output_alias' => '<string>'
        ]
    ],
    'collection_identifiers' => [
        '<string>'
    ],
    'input_schema' => [
        
    ],
    'inputs' => [
        
    ],
    'budget_limits' => [
        'max_credits' => 100,
        'max_time_ms' => 60000
    ],
    'stream' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.mixpeek.com/v1/retrievers/execute"

	payload := strings.NewReader("{\n  \"stages\": [\n    {\n      \"stage_name\": \"<string>\",\n      \"config\": {},\n      \"batch_size\": \"<string>\",\n      \"description\": \"<string>\",\n      \"on_error\": \"<string>\",\n      \"output_alias\": \"<string>\"\n    }\n  ],\n  \"collection_identifiers\": [\n    \"<string>\"\n  ],\n  \"input_schema\": {},\n  \"inputs\": {},\n  \"budget_limits\": {\n    \"max_credits\": 100,\n    \"max_time_ms\": 60000\n  },\n  \"stream\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.mixpeek.com/v1/retrievers/execute")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"stages\": [\n    {\n      \"stage_name\": \"<string>\",\n      \"config\": {},\n      \"batch_size\": \"<string>\",\n      \"description\": \"<string>\",\n      \"on_error\": \"<string>\",\n      \"output_alias\": \"<string>\"\n    }\n  ],\n  \"collection_identifiers\": [\n    \"<string>\"\n  ],\n  \"input_schema\": {},\n  \"inputs\": {},\n  \"budget_limits\": {\n    \"max_credits\": 100,\n    \"max_time_ms\": 60000\n  },\n  \"stream\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.mixpeek.com/v1/retrievers/execute")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"stages\": [\n    {\n      \"stage_name\": \"<string>\",\n      \"config\": {},\n      \"batch_size\": \"<string>\",\n      \"description\": \"<string>\",\n      \"on_error\": \"<string>\",\n      \"output_alias\": \"<string>\"\n    }\n  ],\n  \"collection_identifiers\": [\n    \"<string>\"\n  ],\n  \"input_schema\": {},\n  \"inputs\": {},\n  \"budget_limits\": {\n    \"max_credits\": 100,\n    \"max_time_ms\": 60000\n  },\n  \"stream\": false\n}"

response = http.request(request)
puts response.read_body

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Query Parameters

return_presigned_urls

boolean

default:false

return_vectors

boolean

default:false

Body

application/json

Request to execute a retriever ad-hoc without persistence.

This combines retriever creation parameters with execution inputs to allow one-time retrieval without saving the retriever configuration.

Use Cases: - One-time queries without polluting retriever registry - Testing retriever configurations before persisting - Dynamic retrieval with varying stage configurations - Temporary search operations

Behavior: - Retriever is NOT saved to database - Execution history is logged but marked as ad-hoc - Response includes X-Execution-Mode: adhoc header - execution_metadata.retriever_persisted = False

Streaming Execution (stream=True): When streaming is enabled, the response uses Server-Sent Events (SSE) format with Content-Type: text/event-stream. Each stage emits events as it executes:

Event Types:
- stage_start: Emitted when a stage begins execution
- stage_complete: Emitted when a stage finishes with results
- stage_error: Emitted if a stage encounters an error
- execution_complete: Emitted after all stages finish successfully
- execution_error: Emitted if the entire execution fails

Each event is a StreamStageEvent containing:
- event_type: The type of event
- execution_id: Unique execution identifier
- stage_name: Human-readable stage name
- stage_index: Zero-based stage position
- total_stages: Total number of stages
- documents: Intermediate results (for stage_complete)
- statistics: Stage metrics (duration_ms, input_count, output_count, etc.)
- budget_used: Cumulative resource consumption (credits, time, tokens)

Response Headers (streaming):
- Content-Type: text/event-stream
- Cache-Control: no-cache
- Connection: keep-alive
- X-Execution-Mode: adhoc

Example streaming request:
```python
response = requests.post(
    '/v1/retrievers/execute',
    json={
        'collection_identifiers': ['my_collection'],
        'input_schema': {'query': {'type': 'text', 'required': True}},
        'stages': [...],
        'inputs': {'query': 'machine learning'},
        'stream': True
    },
    stream=True
)
for line in response.iter_lines():
    if line.startswith(b'data: '):
        event = json.loads(line[6:])
        print(f"{event['event_type']}: {event.get('stage_name')}")
```

Standard Execution (stream=False, default): Returns a single ExecuteRetrieverResponse with final documents, pagination, and aggregate statistics after all stages complete.

Examples: Simple ad-hoc search: { "collection_identifiers": ["col_123"], "input_schema": {"query": {"type": "text", "required": True}}, "stages": [{ "stage_name": "search", "stage_type": "filter", "config": { "stage_id": "feature_search", "parameters": { "searches": [{ "feature_uri": "mixpeek://text_extractor@v1/embedding", "query": { "input_mode": "text", "text": "{{INPUT.query}}" }, "top_k": 100 }], "final_top_k": 10 } } }], "inputs": {"query": "machine learning"}, "stream": false }

stages

StageConfig · object[]

required

REQUIRED. Ordered list of stage configurations. At least one stage is required for execution.

Minimum array length: 1

Show child attributes

collection_identifiers

string[]

Collection identifiers (names or IDs) to query. Can be collection names or IDs. Names are automatically resolved. Can be empty for query-only inference mode (e.g., LLM query analysis without documents). Also accepts 'collection_ids' as an alias for backward compatibility.

Examples:

["my_collection"]

["col_abc123", "products"]

[]

input_schema

Input Schema · object

OPTIONAL. Input schema defining expected inputs. Each key is an input name, value is a RetrieverInputSchemaField. Omit it (or pass {}) for a stages-only execute whose stages carry hardcoded query values — no dynamic inputs needed.

Show child attributes

Examples:

{
  "query": {
    "description": "Search query",
    "required": true,
    "type": "text"
  }
}

{
  "query": { "required": true, "type": "text" },
  "top_k": { "required": false, "type": "integer" }
}

inputs

Inputs · object

OPTIONAL. Input values matching the input_schema. These values are passed to stages for parameterization. Omit it (or pass {}) when the stages carry hardcoded query values.

Examples:

{ "query": "machine learning" }

{ "query": "AI trends", "top_k": 50 }

budget_limits

BudgetLimits · object | null

OPTIONAL. Budget limits for execution.

Show child attributes

Example:

{ "max_credits": 100, "max_time_ms": 60000 }

stream

boolean

default:false

Enable streaming execution to receive real-time stage updates via Server-Sent Events (SSE). NOT REQUIRED - defaults to False for standard execution.

When stream=True:

Response Content-Type: text/event-stream
Events emitted: stage_start, stage_complete, stage_error, execution_complete, execution_error
Each event is formatted as: data: {json}\n\n
StreamStageEvent contains: event_type, execution_id, stage_name, stage_index, total_stages, documents (intermediate), statistics, budget_used

When to use streaming:

Progress tracking for multi-stage pipelines
Displaying intermediate results as stages complete
Real-time budget and performance monitoring
Debugging pipeline execution

When to skip streaming:

Single-stage or fast pipelines (<100ms)
No need for intermediate results
Minimizing overhead is critical

Examples:

false

true

Response

Successful Response

Revoke Retriever API Key List Adhoc Executions

Organization

Namespaces

Buckets

Feature Extractors

Batch Queue

Collections

Documents

Retrievers

Taxonomies

Clusters

Triggers

Alerts

Webhooks

Apps

Agent Sessions

Annotations

Templates

Manifest

Discovery

Analytics

Notifications

Tasks

Inference

Resource Search

Pricing

Execute Adhoc Retriever

Authorizations

Query Parameters

Body

Response