Create Cluster

curl --request POST \
  --url https://api.mixpeek.com/v1/clusters \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "collection_ids": [
    "<string>"
  ],
  "cluster_name": "<string>",
  "cluster_type": "vector",
  "vector_config": {
    "algorithm_params": {
      "min_cluster_size": 10,
      "min_samples": 5
    },
    "clustering_method": "hdbscan",
    "feature_uri": "mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding",
    "sample_size": 1000
  },
  "attribute_config": {
    "attributes": [
      "category"
    ],
    "hierarchical_grouping": false
  },
  "filters": {
    "AND": [
      {
        "field": "name",
        "operator": "eq",
        "value": "John"
      },
      {
        "field": "age",
        "operator": "gte",
        "value": 30
      }
    ],
    "OR": [
      {
        "field": "status",
        "operator": "eq",
        "value": "active"
      },
      {
        "field": "role",
        "operator": "eq",
        "value": "admin"
      }
    ],
    "NOT": [
      {
        "field": "department",
        "operator": "eq",
        "value": "HR"
      },
      {
        "field": "location",
        "operator": "eq",
        "value": "remote"
      }
    ],
    "case_sensitive": true
  },
  "llm_labeling": {
    "enabled": true,
    "include_keywords": true,
    "include_summary": true,
    "labeling_inputs": {
      "input_mappings": [
        {
          "input_key": "title",
          "path": "title",
          "source_type": "payload"
        },
        {
          "input_key": "description",
          "path": "description",
          "source_type": "payload"
        },
        {
          "input_key": "text",
          "path": "text",
          "source_type": "payload"
        }
      ]
    },
    "model_name": "gpt-4o-mini-2024-07-18",
    "provider": "openai"
  },
  "enrich_source_collection": false,
  "source_enrichment_config": {
    "field_mappings": [
      {
        "source_field": "cluster_id",
        "target_field": "category_id"
      },
      {
        "source_field": "cluster_label",
        "target_field": "category_name"
      },
      {
        "source_field": "distance_to_centroid",
        "target_field": "category_confidence"
      }
    ]
  },
  "auto_execute_on_batch": false,
  "auto_execute_min_documents": 123,
  "auto_execute_cooldown_seconds": 3600
}
'

import requests

url = "https://api.mixpeek.com/v1/clusters"

payload = {
    "collection_ids": ["<string>"],
    "cluster_name": "<string>",
    "cluster_type": "vector",
    "vector_config": {
        "algorithm_params": {
            "min_cluster_size": 10,
            "min_samples": 5
        },
        "clustering_method": "hdbscan",
        "feature_uri": "mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding",
        "sample_size": 1000
    },
    "attribute_config": {
        "attributes": ["category"],
        "hierarchical_grouping": False
    },
    "filters": {
        "AND": [
            {
                "field": "name",
                "operator": "eq",
                "value": "John"
            },
            {
                "field": "age",
                "operator": "gte",
                "value": 30
            }
        ],
        "OR": [
            {
                "field": "status",
                "operator": "eq",
                "value": "active"
            },
            {
                "field": "role",
                "operator": "eq",
                "value": "admin"
            }
        ],
        "NOT": [
            {
                "field": "department",
                "operator": "eq",
                "value": "HR"
            },
            {
                "field": "location",
                "operator": "eq",
                "value": "remote"
            }
        ],
        "case_sensitive": True
    },
    "llm_labeling": {
        "enabled": True,
        "include_keywords": True,
        "include_summary": True,
        "labeling_inputs": { "input_mappings": [
                {
                    "input_key": "title",
                    "path": "title",
                    "source_type": "payload"
                },
                {
                    "input_key": "description",
                    "path": "description",
                    "source_type": "payload"
                },
                {
                    "input_key": "text",
                    "path": "text",
                    "source_type": "payload"
                }
            ] },
        "model_name": "gpt-4o-mini-2024-07-18",
        "provider": "openai"
    },
    "enrich_source_collection": False,
    "source_enrichment_config": { "field_mappings": [
            {
                "source_field": "cluster_id",
                "target_field": "category_id"
            },
            {
                "source_field": "cluster_label",
                "target_field": "category_name"
            },
            {
                "source_field": "distance_to_centroid",
                "target_field": "category_confidence"
            }
        ] },
    "auto_execute_on_batch": False,
    "auto_execute_min_documents": 123,
    "auto_execute_cooldown_seconds": 3600
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    collection_ids: ['<string>'],
    cluster_name: '<string>',
    cluster_type: 'vector',
    vector_config: {
      algorithm_params: {min_cluster_size: 10, min_samples: 5},
      clustering_method: 'hdbscan',
      feature_uri: 'mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding',
      sample_size: 1000
    },
    attribute_config: {attributes: ['category'], hierarchical_grouping: false},
    filters: {
      AND: [
        {field: 'name', operator: 'eq', value: 'John'},
        {field: 'age', operator: 'gte', value: 30}
      ],
      OR: [
        {field: 'status', operator: 'eq', value: 'active'},
        {field: 'role', operator: 'eq', value: 'admin'}
      ],
      NOT: [
        {field: 'department', operator: 'eq', value: 'HR'},
        {field: 'location', operator: 'eq', value: 'remote'}
      ],
      case_sensitive: true
    },
    llm_labeling: {
      enabled: true,
      include_keywords: true,
      include_summary: true,
      labeling_inputs: {
        input_mappings: [
          {input_key: 'title', path: 'title', source_type: 'payload'},
          {input_key: 'description', path: 'description', source_type: 'payload'},
          {input_key: 'text', path: 'text', source_type: 'payload'}
        ]
      },
      model_name: 'gpt-4o-mini-2024-07-18',
      provider: 'openai'
    },
    enrich_source_collection: false,
    source_enrichment_config: {
      field_mappings: [
        {source_field: 'cluster_id', target_field: 'category_id'},
        {source_field: 'cluster_label', target_field: 'category_name'},
        {source_field: 'distance_to_centroid', target_field: 'category_confidence'}
      ]
    },
    auto_execute_on_batch: false,
    auto_execute_min_documents: 123,
    auto_execute_cooldown_seconds: 3600
  })
};

fetch('https://api.mixpeek.com/v1/clusters', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.mixpeek.com/v1/clusters",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'collection_ids' => [
        '<string>'
    ],
    'cluster_name' => '<string>',
    'cluster_type' => 'vector',
    'vector_config' => [
        'algorithm_params' => [
                'min_cluster_size' => 10,
                'min_samples' => 5
        ],
        'clustering_method' => 'hdbscan',
        'feature_uri' => 'mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding',
        'sample_size' => 1000
    ],
    'attribute_config' => [
        'attributes' => [
                'category'
        ],
        'hierarchical_grouping' => false
    ],
    'filters' => [
        'AND' => [
                [
                                'field' => 'name',
                                'operator' => 'eq',
                                'value' => 'John'
                ],
                [
                                'field' => 'age',
                                'operator' => 'gte',
                                'value' => 30
                ]
        ],
        'OR' => [
                [
                                'field' => 'status',
                                'operator' => 'eq',
                                'value' => 'active'
                ],
                [
                                'field' => 'role',
                                'operator' => 'eq',
                                'value' => 'admin'
                ]
        ],
        'NOT' => [
                [
                                'field' => 'department',
                                'operator' => 'eq',
                                'value' => 'HR'
                ],
                [
                                'field' => 'location',
                                'operator' => 'eq',
                                'value' => 'remote'
                ]
        ],
        'case_sensitive' => true
    ],
    'llm_labeling' => [
        'enabled' => true,
        'include_keywords' => true,
        'include_summary' => true,
        'labeling_inputs' => [
                'input_mappings' => [
                                [
                                                                'input_key' => 'title',
                                                                'path' => 'title',
                                                                'source_type' => 'payload'
                                ],
                                [
                                                                'input_key' => 'description',
                                                                'path' => 'description',
                                                                'source_type' => 'payload'
                                ],
                                [
                                                                'input_key' => 'text',
                                                                'path' => 'text',
                                                                'source_type' => 'payload'
                                ]
                ]
        ],
        'model_name' => 'gpt-4o-mini-2024-07-18',
        'provider' => 'openai'
    ],
    'enrich_source_collection' => false,
    'source_enrichment_config' => [
        'field_mappings' => [
                [
                                'source_field' => 'cluster_id',
                                'target_field' => 'category_id'
                ],
                [
                                'source_field' => 'cluster_label',
                                'target_field' => 'category_name'
                ],
                [
                                'source_field' => 'distance_to_centroid',
                                'target_field' => 'category_confidence'
                ]
        ]
    ],
    'auto_execute_on_batch' => false,
    'auto_execute_min_documents' => 123,
    'auto_execute_cooldown_seconds' => 3600
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.mixpeek.com/v1/clusters"

	payload := strings.NewReader("{\n  \"collection_ids\": [\n    \"<string>\"\n  ],\n  \"cluster_name\": \"<string>\",\n  \"cluster_type\": \"vector\",\n  \"vector_config\": {\n    \"algorithm_params\": {\n      \"min_cluster_size\": 10,\n      \"min_samples\": 5\n    },\n    \"clustering_method\": \"hdbscan\",\n    \"feature_uri\": \"mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding\",\n    \"sample_size\": 1000\n  },\n  \"attribute_config\": {\n    \"attributes\": [\n      \"category\"\n    ],\n    \"hierarchical_grouping\": false\n  },\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"llm_labeling\": {\n    \"enabled\": true,\n    \"include_keywords\": true,\n    \"include_summary\": true,\n    \"labeling_inputs\": {\n      \"input_mappings\": [\n        {\n          \"input_key\": \"title\",\n          \"path\": \"title\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"description\",\n          \"path\": \"description\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"text\",\n          \"path\": \"text\",\n          \"source_type\": \"payload\"\n        }\n      ]\n    },\n    \"model_name\": \"gpt-4o-mini-2024-07-18\",\n    \"provider\": \"openai\"\n  },\n  \"enrich_source_collection\": false,\n  \"source_enrichment_config\": {\n    \"field_mappings\": [\n      {\n        \"source_field\": \"cluster_id\",\n        \"target_field\": \"category_id\"\n      },\n      {\n        \"source_field\": \"cluster_label\",\n        \"target_field\": \"category_name\"\n      },\n      {\n        \"source_field\": \"distance_to_centroid\",\n        \"target_field\": \"category_confidence\"\n      }\n    ]\n  },\n  \"auto_execute_on_batch\": false,\n  \"auto_execute_min_documents\": 123,\n  \"auto_execute_cooldown_seconds\": 3600\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.mixpeek.com/v1/clusters")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"collection_ids\": [\n    \"<string>\"\n  ],\n  \"cluster_name\": \"<string>\",\n  \"cluster_type\": \"vector\",\n  \"vector_config\": {\n    \"algorithm_params\": {\n      \"min_cluster_size\": 10,\n      \"min_samples\": 5\n    },\n    \"clustering_method\": \"hdbscan\",\n    \"feature_uri\": \"mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding\",\n    \"sample_size\": 1000\n  },\n  \"attribute_config\": {\n    \"attributes\": [\n      \"category\"\n    ],\n    \"hierarchical_grouping\": false\n  },\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"llm_labeling\": {\n    \"enabled\": true,\n    \"include_keywords\": true,\n    \"include_summary\": true,\n    \"labeling_inputs\": {\n      \"input_mappings\": [\n        {\n          \"input_key\": \"title\",\n          \"path\": \"title\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"description\",\n          \"path\": \"description\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"text\",\n          \"path\": \"text\",\n          \"source_type\": \"payload\"\n        }\n      ]\n    },\n    \"model_name\": \"gpt-4o-mini-2024-07-18\",\n    \"provider\": \"openai\"\n  },\n  \"enrich_source_collection\": false,\n  \"source_enrichment_config\": {\n    \"field_mappings\": [\n      {\n        \"source_field\": \"cluster_id\",\n        \"target_field\": \"category_id\"\n      },\n      {\n        \"source_field\": \"cluster_label\",\n        \"target_field\": \"category_name\"\n      },\n      {\n        \"source_field\": \"distance_to_centroid\",\n        \"target_field\": \"category_confidence\"\n      }\n    ]\n  },\n  \"auto_execute_on_batch\": false,\n  \"auto_execute_min_documents\": 123,\n  \"auto_execute_cooldown_seconds\": 3600\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.mixpeek.com/v1/clusters")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"collection_ids\": [\n    \"<string>\"\n  ],\n  \"cluster_name\": \"<string>\",\n  \"cluster_type\": \"vector\",\n  \"vector_config\": {\n    \"algorithm_params\": {\n      \"min_cluster_size\": 10,\n      \"min_samples\": 5\n    },\n    \"clustering_method\": \"hdbscan\",\n    \"feature_uri\": \"mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding\",\n    \"sample_size\": 1000\n  },\n  \"attribute_config\": {\n    \"attributes\": [\n      \"category\"\n    ],\n    \"hierarchical_grouping\": false\n  },\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"llm_labeling\": {\n    \"enabled\": true,\n    \"include_keywords\": true,\n    \"include_summary\": true,\n    \"labeling_inputs\": {\n      \"input_mappings\": [\n        {\n          \"input_key\": \"title\",\n          \"path\": \"title\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"description\",\n          \"path\": \"description\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"text\",\n          \"path\": \"text\",\n          \"source_type\": \"payload\"\n        }\n      ]\n    },\n    \"model_name\": \"gpt-4o-mini-2024-07-18\",\n    \"provider\": \"openai\"\n  },\n  \"enrich_source_collection\": false,\n  \"source_enrichment_config\": {\n    \"field_mappings\": [\n      {\n        \"source_field\": \"cluster_id\",\n        \"target_field\": \"category_id\"\n      },\n      {\n        \"source_field\": \"cluster_label\",\n        \"target_field\": \"category_name\"\n      },\n      {\n        \"source_field\": \"distance_to_centroid\",\n        \"target_field\": \"category_confidence\"\n      }\n    ]\n  },\n  \"auto_execute_on_batch\": false,\n  \"auto_execute_min_documents\": 123,\n  \"auto_execute_cooldown_seconds\": 3600\n}"

response = http.request(request)
puts response.read_body

{
  "cluster_name": "<string>",
  "namespace_id": "<string>",
  "input_collections": [
    "<string>"
  ],
  "cluster_id": "<string>",
  "source_bucket_ids": [
    "<string>"
  ],
  "filters": {},
  "feature_uris": [
    "<string>"
  ],
  "multi_feature_strategy": "<string>",
  "learned_weights": {},
  "learning_quality_score": 123,
  "effective_feature_method": "<string>",
  "face_cluster_merge": {
    "enabled": true,
    "centroid_cosine_threshold": 0.55,
    "bbox_iou_threshold": 0.4,
    "scene_jaccard_threshold": 0.3,
    "bbox_field": "bbox",
    "frame_field": "frame_number",
    "scene_field": "scene_id"
  },
  "sample_size": 123,
  "preprocessing_steps": [
    {}
  ],
  "hierarchical_vector": true,
  "max_hierarchy_depth": 123,
  "clustered_attributes": [
    "<string>"
  ],
  "hierarchical_grouping": true,
  "aggregation_method": "<string>",
  "output_collection_ids": [
    "<string>"
  ],
  "output_collection_names": [
    "<string>"
  ],
  "algorithm": "<string>",
  "algorithm_params": {},
  "enrich_source": false,
  "source_enrichment_config": {
    "field_mappings": [
      {
        "source_field": "cluster_id",
        "target_field": "category_id"
      },
      {
        "source_field": "cluster_label",
        "target_field": "category_name"
      },
      {
        "source_field": "distance_to_centroid",
        "target_field": "category_confidence"
      }
    ]
  },
  "llm_labeling": {
    "enabled": true,
    "include_keywords": true,
    "include_summary": true,
    "labeling_inputs": {
      "input_mappings": [
        {
          "input_key": "title",
          "path": "title",
          "source_type": "payload"
        },
        {
          "input_key": "description",
          "path": "description",
          "source_type": "payload"
        },
        {
          "input_key": "text",
          "path": "text",
          "source_type": "payload"
        }
      ]
    },
    "model_name": "gpt-4o-mini-2024-07-18",
    "provider": "openai"
  },
  "num_clusters": 123,
  "num_documents_clustered": 123,
  "execution_time_seconds": 123,
  "quality_metrics": {},
  "hierarchy_detected": false,
  "parent_cluster_id": "<string>",
  "child_cluster_ids": [
    "<string>"
  ],
  "hierarchy_relationships": [
    {}
  ],
  "status": "PENDING",
  "error": "<string>",
  "last_execution_task_id": "<string>",
  "last_run_id": "<string>",
  "created_at": "2023-11-07T05:31:56Z",
  "updated_at": "2023-11-07T05:31:56Z",
  "last_executed_at": "2023-11-07T05:31:56Z",
  "completed_at": "2023-11-07T05:31:56Z",
  "llm_labeling_errors": [
    "<string>"
  ],
  "metadata": {}
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

POST

clusters

Create Cluster

curl --request POST \
  --url https://api.mixpeek.com/v1/clusters \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "collection_ids": [
    "<string>"
  ],
  "cluster_name": "<string>",
  "cluster_type": "vector",
  "vector_config": {
    "algorithm_params": {
      "min_cluster_size": 10,
      "min_samples": 5
    },
    "clustering_method": "hdbscan",
    "feature_uri": "mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding",
    "sample_size": 1000
  },
  "attribute_config": {
    "attributes": [
      "category"
    ],
    "hierarchical_grouping": false
  },
  "filters": {
    "AND": [
      {
        "field": "name",
        "operator": "eq",
        "value": "John"
      },
      {
        "field": "age",
        "operator": "gte",
        "value": 30
      }
    ],
    "OR": [
      {
        "field": "status",
        "operator": "eq",
        "value": "active"
      },
      {
        "field": "role",
        "operator": "eq",
        "value": "admin"
      }
    ],
    "NOT": [
      {
        "field": "department",
        "operator": "eq",
        "value": "HR"
      },
      {
        "field": "location",
        "operator": "eq",
        "value": "remote"
      }
    ],
    "case_sensitive": true
  },
  "llm_labeling": {
    "enabled": true,
    "include_keywords": true,
    "include_summary": true,
    "labeling_inputs": {
      "input_mappings": [
        {
          "input_key": "title",
          "path": "title",
          "source_type": "payload"
        },
        {
          "input_key": "description",
          "path": "description",
          "source_type": "payload"
        },
        {
          "input_key": "text",
          "path": "text",
          "source_type": "payload"
        }
      ]
    },
    "model_name": "gpt-4o-mini-2024-07-18",
    "provider": "openai"
  },
  "enrich_source_collection": false,
  "source_enrichment_config": {
    "field_mappings": [
      {
        "source_field": "cluster_id",
        "target_field": "category_id"
      },
      {
        "source_field": "cluster_label",
        "target_field": "category_name"
      },
      {
        "source_field": "distance_to_centroid",
        "target_field": "category_confidence"
      }
    ]
  },
  "auto_execute_on_batch": false,
  "auto_execute_min_documents": 123,
  "auto_execute_cooldown_seconds": 3600
}
'

import requests

url = "https://api.mixpeek.com/v1/clusters"

payload = {
    "collection_ids": ["<string>"],
    "cluster_name": "<string>",
    "cluster_type": "vector",
    "vector_config": {
        "algorithm_params": {
            "min_cluster_size": 10,
            "min_samples": 5
        },
        "clustering_method": "hdbscan",
        "feature_uri": "mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding",
        "sample_size": 1000
    },
    "attribute_config": {
        "attributes": ["category"],
        "hierarchical_grouping": False
    },
    "filters": {
        "AND": [
            {
                "field": "name",
                "operator": "eq",
                "value": "John"
            },
            {
                "field": "age",
                "operator": "gte",
                "value": 30
            }
        ],
        "OR": [
            {
                "field": "status",
                "operator": "eq",
                "value": "active"
            },
            {
                "field": "role",
                "operator": "eq",
                "value": "admin"
            }
        ],
        "NOT": [
            {
                "field": "department",
                "operator": "eq",
                "value": "HR"
            },
            {
                "field": "location",
                "operator": "eq",
                "value": "remote"
            }
        ],
        "case_sensitive": True
    },
    "llm_labeling": {
        "enabled": True,
        "include_keywords": True,
        "include_summary": True,
        "labeling_inputs": { "input_mappings": [
                {
                    "input_key": "title",
                    "path": "title",
                    "source_type": "payload"
                },
                {
                    "input_key": "description",
                    "path": "description",
                    "source_type": "payload"
                },
                {
                    "input_key": "text",
                    "path": "text",
                    "source_type": "payload"
                }
            ] },
        "model_name": "gpt-4o-mini-2024-07-18",
        "provider": "openai"
    },
    "enrich_source_collection": False,
    "source_enrichment_config": { "field_mappings": [
            {
                "source_field": "cluster_id",
                "target_field": "category_id"
            },
            {
                "source_field": "cluster_label",
                "target_field": "category_name"
            },
            {
                "source_field": "distance_to_centroid",
                "target_field": "category_confidence"
            }
        ] },
    "auto_execute_on_batch": False,
    "auto_execute_min_documents": 123,
    "auto_execute_cooldown_seconds": 3600
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    collection_ids: ['<string>'],
    cluster_name: '<string>',
    cluster_type: 'vector',
    vector_config: {
      algorithm_params: {min_cluster_size: 10, min_samples: 5},
      clustering_method: 'hdbscan',
      feature_uri: 'mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding',
      sample_size: 1000
    },
    attribute_config: {attributes: ['category'], hierarchical_grouping: false},
    filters: {
      AND: [
        {field: 'name', operator: 'eq', value: 'John'},
        {field: 'age', operator: 'gte', value: 30}
      ],
      OR: [
        {field: 'status', operator: 'eq', value: 'active'},
        {field: 'role', operator: 'eq', value: 'admin'}
      ],
      NOT: [
        {field: 'department', operator: 'eq', value: 'HR'},
        {field: 'location', operator: 'eq', value: 'remote'}
      ],
      case_sensitive: true
    },
    llm_labeling: {
      enabled: true,
      include_keywords: true,
      include_summary: true,
      labeling_inputs: {
        input_mappings: [
          {input_key: 'title', path: 'title', source_type: 'payload'},
          {input_key: 'description', path: 'description', source_type: 'payload'},
          {input_key: 'text', path: 'text', source_type: 'payload'}
        ]
      },
      model_name: 'gpt-4o-mini-2024-07-18',
      provider: 'openai'
    },
    enrich_source_collection: false,
    source_enrichment_config: {
      field_mappings: [
        {source_field: 'cluster_id', target_field: 'category_id'},
        {source_field: 'cluster_label', target_field: 'category_name'},
        {source_field: 'distance_to_centroid', target_field: 'category_confidence'}
      ]
    },
    auto_execute_on_batch: false,
    auto_execute_min_documents: 123,
    auto_execute_cooldown_seconds: 3600
  })
};

fetch('https://api.mixpeek.com/v1/clusters', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.mixpeek.com/v1/clusters",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'collection_ids' => [
        '<string>'
    ],
    'cluster_name' => '<string>',
    'cluster_type' => 'vector',
    'vector_config' => [
        'algorithm_params' => [
                'min_cluster_size' => 10,
                'min_samples' => 5
        ],
        'clustering_method' => 'hdbscan',
        'feature_uri' => 'mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding',
        'sample_size' => 1000
    ],
    'attribute_config' => [
        'attributes' => [
                'category'
        ],
        'hierarchical_grouping' => false
    ],
    'filters' => [
        'AND' => [
                [
                                'field' => 'name',
                                'operator' => 'eq',
                                'value' => 'John'
                ],
                [
                                'field' => 'age',
                                'operator' => 'gte',
                                'value' => 30
                ]
        ],
        'OR' => [
                [
                                'field' => 'status',
                                'operator' => 'eq',
                                'value' => 'active'
                ],
                [
                                'field' => 'role',
                                'operator' => 'eq',
                                'value' => 'admin'
                ]
        ],
        'NOT' => [
                [
                                'field' => 'department',
                                'operator' => 'eq',
                                'value' => 'HR'
                ],
                [
                                'field' => 'location',
                                'operator' => 'eq',
                                'value' => 'remote'
                ]
        ],
        'case_sensitive' => true
    ],
    'llm_labeling' => [
        'enabled' => true,
        'include_keywords' => true,
        'include_summary' => true,
        'labeling_inputs' => [
                'input_mappings' => [
                                [
                                                                'input_key' => 'title',
                                                                'path' => 'title',
                                                                'source_type' => 'payload'
                                ],
                                [
                                                                'input_key' => 'description',
                                                                'path' => 'description',
                                                                'source_type' => 'payload'
                                ],
                                [
                                                                'input_key' => 'text',
                                                                'path' => 'text',
                                                                'source_type' => 'payload'
                                ]
                ]
        ],
        'model_name' => 'gpt-4o-mini-2024-07-18',
        'provider' => 'openai'
    ],
    'enrich_source_collection' => false,
    'source_enrichment_config' => [
        'field_mappings' => [
                [
                                'source_field' => 'cluster_id',
                                'target_field' => 'category_id'
                ],
                [
                                'source_field' => 'cluster_label',
                                'target_field' => 'category_name'
                ],
                [
                                'source_field' => 'distance_to_centroid',
                                'target_field' => 'category_confidence'
                ]
        ]
    ],
    'auto_execute_on_batch' => false,
    'auto_execute_min_documents' => 123,
    'auto_execute_cooldown_seconds' => 3600
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.mixpeek.com/v1/clusters"

	payload := strings.NewReader("{\n  \"collection_ids\": [\n    \"<string>\"\n  ],\n  \"cluster_name\": \"<string>\",\n  \"cluster_type\": \"vector\",\n  \"vector_config\": {\n    \"algorithm_params\": {\n      \"min_cluster_size\": 10,\n      \"min_samples\": 5\n    },\n    \"clustering_method\": \"hdbscan\",\n    \"feature_uri\": \"mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding\",\n    \"sample_size\": 1000\n  },\n  \"attribute_config\": {\n    \"attributes\": [\n      \"category\"\n    ],\n    \"hierarchical_grouping\": false\n  },\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"llm_labeling\": {\n    \"enabled\": true,\n    \"include_keywords\": true,\n    \"include_summary\": true,\n    \"labeling_inputs\": {\n      \"input_mappings\": [\n        {\n          \"input_key\": \"title\",\n          \"path\": \"title\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"description\",\n          \"path\": \"description\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"text\",\n          \"path\": \"text\",\n          \"source_type\": \"payload\"\n        }\n      ]\n    },\n    \"model_name\": \"gpt-4o-mini-2024-07-18\",\n    \"provider\": \"openai\"\n  },\n  \"enrich_source_collection\": false,\n  \"source_enrichment_config\": {\n    \"field_mappings\": [\n      {\n        \"source_field\": \"cluster_id\",\n        \"target_field\": \"category_id\"\n      },\n      {\n        \"source_field\": \"cluster_label\",\n        \"target_field\": \"category_name\"\n      },\n      {\n        \"source_field\": \"distance_to_centroid\",\n        \"target_field\": \"category_confidence\"\n      }\n    ]\n  },\n  \"auto_execute_on_batch\": false,\n  \"auto_execute_min_documents\": 123,\n  \"auto_execute_cooldown_seconds\": 3600\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.mixpeek.com/v1/clusters")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"collection_ids\": [\n    \"<string>\"\n  ],\n  \"cluster_name\": \"<string>\",\n  \"cluster_type\": \"vector\",\n  \"vector_config\": {\n    \"algorithm_params\": {\n      \"min_cluster_size\": 10,\n      \"min_samples\": 5\n    },\n    \"clustering_method\": \"hdbscan\",\n    \"feature_uri\": \"mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding\",\n    \"sample_size\": 1000\n  },\n  \"attribute_config\": {\n    \"attributes\": [\n      \"category\"\n    ],\n    \"hierarchical_grouping\": false\n  },\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"llm_labeling\": {\n    \"enabled\": true,\n    \"include_keywords\": true,\n    \"include_summary\": true,\n    \"labeling_inputs\": {\n      \"input_mappings\": [\n        {\n          \"input_key\": \"title\",\n          \"path\": \"title\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"description\",\n          \"path\": \"description\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"text\",\n          \"path\": \"text\",\n          \"source_type\": \"payload\"\n        }\n      ]\n    },\n    \"model_name\": \"gpt-4o-mini-2024-07-18\",\n    \"provider\": \"openai\"\n  },\n  \"enrich_source_collection\": false,\n  \"source_enrichment_config\": {\n    \"field_mappings\": [\n      {\n        \"source_field\": \"cluster_id\",\n        \"target_field\": \"category_id\"\n      },\n      {\n        \"source_field\": \"cluster_label\",\n        \"target_field\": \"category_name\"\n      },\n      {\n        \"source_field\": \"distance_to_centroid\",\n        \"target_field\": \"category_confidence\"\n      }\n    ]\n  },\n  \"auto_execute_on_batch\": false,\n  \"auto_execute_min_documents\": 123,\n  \"auto_execute_cooldown_seconds\": 3600\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.mixpeek.com/v1/clusters")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"collection_ids\": [\n    \"<string>\"\n  ],\n  \"cluster_name\": \"<string>\",\n  \"cluster_type\": \"vector\",\n  \"vector_config\": {\n    \"algorithm_params\": {\n      \"min_cluster_size\": 10,\n      \"min_samples\": 5\n    },\n    \"clustering_method\": \"hdbscan\",\n    \"feature_uri\": \"mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding\",\n    \"sample_size\": 1000\n  },\n  \"attribute_config\": {\n    \"attributes\": [\n      \"category\"\n    ],\n    \"hierarchical_grouping\": false\n  },\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"llm_labeling\": {\n    \"enabled\": true,\n    \"include_keywords\": true,\n    \"include_summary\": true,\n    \"labeling_inputs\": {\n      \"input_mappings\": [\n        {\n          \"input_key\": \"title\",\n          \"path\": \"title\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"description\",\n          \"path\": \"description\",\n          \"source_type\": \"payload\"\n        },\n        {\n          \"input_key\": \"text\",\n          \"path\": \"text\",\n          \"source_type\": \"payload\"\n        }\n      ]\n    },\n    \"model_name\": \"gpt-4o-mini-2024-07-18\",\n    \"provider\": \"openai\"\n  },\n  \"enrich_source_collection\": false,\n  \"source_enrichment_config\": {\n    \"field_mappings\": [\n      {\n        \"source_field\": \"cluster_id\",\n        \"target_field\": \"category_id\"\n      },\n      {\n        \"source_field\": \"cluster_label\",\n        \"target_field\": \"category_name\"\n      },\n      {\n        \"source_field\": \"distance_to_centroid\",\n        \"target_field\": \"category_confidence\"\n      }\n    ]\n  },\n  \"auto_execute_on_batch\": false,\n  \"auto_execute_min_documents\": 123,\n  \"auto_execute_cooldown_seconds\": 3600\n}"

response = http.request(request)
puts response.read_body

{
  "cluster_name": "<string>",
  "namespace_id": "<string>",
  "input_collections": [
    "<string>"
  ],
  "cluster_id": "<string>",
  "source_bucket_ids": [
    "<string>"
  ],
  "filters": {},
  "feature_uris": [
    "<string>"
  ],
  "multi_feature_strategy": "<string>",
  "learned_weights": {},
  "learning_quality_score": 123,
  "effective_feature_method": "<string>",
  "face_cluster_merge": {
    "enabled": true,
    "centroid_cosine_threshold": 0.55,
    "bbox_iou_threshold": 0.4,
    "scene_jaccard_threshold": 0.3,
    "bbox_field": "bbox",
    "frame_field": "frame_number",
    "scene_field": "scene_id"
  },
  "sample_size": 123,
  "preprocessing_steps": [
    {}
  ],
  "hierarchical_vector": true,
  "max_hierarchy_depth": 123,
  "clustered_attributes": [
    "<string>"
  ],
  "hierarchical_grouping": true,
  "aggregation_method": "<string>",
  "output_collection_ids": [
    "<string>"
  ],
  "output_collection_names": [
    "<string>"
  ],
  "algorithm": "<string>",
  "algorithm_params": {},
  "enrich_source": false,
  "source_enrichment_config": {
    "field_mappings": [
      {
        "source_field": "cluster_id",
        "target_field": "category_id"
      },
      {
        "source_field": "cluster_label",
        "target_field": "category_name"
      },
      {
        "source_field": "distance_to_centroid",
        "target_field": "category_confidence"
      }
    ]
  },
  "llm_labeling": {
    "enabled": true,
    "include_keywords": true,
    "include_summary": true,
    "labeling_inputs": {
      "input_mappings": [
        {
          "input_key": "title",
          "path": "title",
          "source_type": "payload"
        },
        {
          "input_key": "description",
          "path": "description",
          "source_type": "payload"
        },
        {
          "input_key": "text",
          "path": "text",
          "source_type": "payload"
        }
      ]
    },
    "model_name": "gpt-4o-mini-2024-07-18",
    "provider": "openai"
  },
  "num_clusters": 123,
  "num_documents_clustered": 123,
  "execution_time_seconds": 123,
  "quality_metrics": {},
  "hierarchy_detected": false,
  "parent_cluster_id": "<string>",
  "child_cluster_ids": [
    "<string>"
  ],
  "hierarchy_relationships": [
    {}
  ],
  "status": "PENDING",
  "error": "<string>",
  "last_execution_task_id": "<string>",
  "last_run_id": "<string>",
  "created_at": "2023-11-07T05:31:56Z",
  "updated_at": "2023-11-07T05:31:56Z",
  "last_executed_at": "2023-11-07T05:31:56Z",
  "completed_at": "2023-11-07T05:31:56Z",
  "llm_labeling_errors": [
    "<string>"
  ],
  "metadata": {}
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Body

application/json

Create a clustering job for one or more collections.

collection_ids

string[]

required

Collections to cluster together

Minimum array length: 1

cluster_name

string | null

Optional human-friendly name for the clustering job

cluster_type

enum<string>

default:vector

Vector or attribute clustering

Available options:

vector,

attribute

vector_config

VectorBasedConfig · object | null

Required when cluster_type is 'vector'

Show child attributes

Example:

{
  "algorithm_params": { "min_cluster_size": 10, "min_samples": 5 },
  "clustering_method": "hdbscan",
  "feature_uri": "mixpeek://multimodal_extractor@v1/vertex_multimodal_embedding",
  "sample_size": 1000
}

attribute_config

AttributeBasedConfig · object | null

Required when cluster_type is 'attribute'

Show child attributes

Example:

{
  "attributes": ["category"],
  "hierarchical_grouping": false
}

filters

LogicalOperator · object | null

Optional filters to pre-filter documents before clustering (same format as list documents). Applied during Qdrant scroll before parquet export. Useful for clustering subsets like: status='active', category='electronics', etc.

Show child attributes

llm_labeling

LLMLabeling · object | null

Optional configuration for LLM-based cluster labeling. When provided with enabled=True, clusters will have semantic labels generated by LLM instead of generic labels like 'Cluster 0'. When not provided or enabled=False, uses fallback labels.

Show child attributes

Example:

{
  "enabled": true,
  "include_keywords": true,
  "include_summary": true,
  "labeling_inputs": {
    "input_mappings": [
      {
        "input_key": "title",
        "path": "title",
        "source_type": "payload"
      },
      {
        "input_key": "description",
        "path": "description",
        "source_type": "payload"
      },
      {
        "input_key": "text",
        "path": "text",
        "source_type": "payload"
      }
    ]
  },
  "model_name": "gpt-4o-mini-2024-07-18",
  "provider": "openai"
}

enrich_source_collection

boolean

default:false

If True, cluster results are written back to source collection(s) in-place instead of creating new output collections. Documents will be enriched with cluster_id, cluster_label, distance_to_centroid, and optionally other metadata. Similar to taxonomy enrichment pattern.

source_enrichment_config

SourceEnrichmentConfig · object | null

Configuration for source collection enrichment (only used if enrich_source_collection=True). Controls which fields are added to source documents and field naming conventions.

Show child attributes

Example:

{
  "field_mappings": [
    {
      "source_field": "cluster_id",
      "target_field": "category_id"
    },
    {
      "source_field": "cluster_label",
      "target_field": "category_name"
    },
    {
      "source_field": "distance_to_centroid",
      "target_field": "category_confidence"
    }
  ]
}

auto_execute_on_batch

boolean

default:false

Automatically execute this cluster whenever a batch completes on any of its input collections. When True, a ClusterApplicationConfig entry is added to each input collection's cluster_applications field at creation time. The cluster will then auto-trigger after each batch completion (subject to cooldown and document threshold). When False (default), the cluster must be executed manually via the API.

auto_execute_min_documents

integer | null

Minimum number of documents required before auto-executing cluster. Only used when auto_execute_on_batch=True. If the collection has fewer documents than this threshold, clustering is skipped.

auto_execute_cooldown_seconds

integer

default:3600

Minimum time (in seconds) between automatic cluster executions. Only used when auto_execute_on_batch=True. Default: 3600 (1 hour).

Response

Successful Response

Cluster job metadata stored in MongoDB clusters collection.

This is separate from cluster documents themselves. Tracks job-level configuration, status, and summary statistics.

Supports both vector and attribute clustering with appropriate metadata.

cluster_name

string

required

Human-readable cluster name

namespace_id

string

required

Namespace this cluster belongs to

input_collections

string[]

required

Source collection IDs that were clustered

cluster_type

enum<string>

required

Type of clustering: vector (embedding-based) or attribute (metadata-based)

Available options:

vector,

attribute

cluster_id

string

Unique cluster job identifier

source_bucket_ids

string[] | null

Source bucket IDs that the input collections originated from. Enables bucket lineage tracking.

filters

Filters · object | null

Optional filters that were applied to pre-filter documents before clustering

feature_uris

string[] | null

Feature URIs that were clustered (mixpeek://{extractor}@{version}/{output}). Only for vector clustering.

multi_feature_strategy

string | null

Strategy used if multiple features (concatenate/independent/weighted). Only for vector clustering.

learned_weights

Learned Weights · object | null

Automatically learned feature weights (when multi_feature_strategy='weighted'). Keys are feature URIs, values are learned weights. Only populated after clustering execution completes.

Show child attributes

learning_quality_score

number | null

Clustering quality score from weight learning (e.g., silhouette score). Only populated when multi_feature_strategy='weighted' and weights were learned.

effective_feature_method

string | null

Method for calculating cluster centroids (mean/median/medoid). Only for vector clustering.

face_cluster_merge

FaceClusterMergeConfig · object | null

Stored post-HDBSCAN face-identity merge config. Populated from vector_config.face_cluster_merge at cluster creation and replayed into ClusteringConfig on every execute. Only applies to vector clustering.

Show child attributes

sample_size

integer | null

Stored per-execution document cap. Populated from vector_config.sample_size at cluster creation and replayed into ClusteringConfig on every POST /v1/clusters/{id}/execute so re-runs stay consistent with the original config. When None, the export is uncapped (bounded by the engine's 100,000 safety limit). Only applies to vector clustering.

preprocessing_steps

Preprocessing Steps · object[] | null

Stored preprocessing steps from vector_config. Replayed into ClusteringConfig on every execute.

hierarchical_vector

boolean | null

Whether recursive sub-clustering is enabled for vector clustering.

max_hierarchy_depth

integer | null

Maximum recursion depth for hierarchical sub-clustering.

vis_n_components

enum<integer> | null

Stored visualization dimensionality (2D or 3D). Replayed into ClusteringConfig on every execute as the default.

Available options:

2,

3

layout_stability

enum<string> | null

Stored layout-stability mode (LS-5) from vector_config.layout_stability. Replayed into ClusteringConfig on every execute. When unset, executions default to 'align' (keep the map stable across runs via post-hoc registration).

Available options:

none,

transform,

align

clustered_attributes

string[] | null

Attribute field names that were clustered. Only for attribute clustering.

hierarchical_grouping

boolean | null

Whether hierarchical clustering was used. Only for attribute clustering.

aggregation_method

string | null

Method for aggregating attributes (most_frequent/first/last). Only for attribute clustering.

output_collection_ids

string[]

Collection IDs where cluster documents are stored. For single output: list with one collection ID. For per-feature output: list with one collection ID per feature.

output_collection_names

string[]

Names of output collections. Corresponds to output_collection_ids.

algorithm

string | null

Clustering algorithm used (hdbscan, kmeans, attribute_based, etc.)

algorithm_params

Algorithm Params · object | null

Algorithm-specific parameters (not used for attribute_based)

enrich_source

boolean

default:false

Whether source documents were enriched with cluster_id

source_enrichment_config

SourceEnrichmentConfig · object | null

Configuration for source enrichment (if enrich_source=True)

Show child attributes

Example:

{
  "field_mappings": [
    {
      "source_field": "cluster_id",
      "target_field": "category_id"
    },
    {
      "source_field": "cluster_label",
      "target_field": "category_name"
    },
    {
      "source_field": "distance_to_centroid",
      "target_field": "category_confidence"
    }
  ]
}

llm_labeling

LLMLabeling · object | null

Configuration for LLM-based cluster labeling (applies to all cluster types)

Show child attributes

Example:

{
  "enabled": true,
  "include_keywords": true,
  "include_summary": true,
  "labeling_inputs": {
    "input_mappings": [
      {
        "input_key": "title",
        "path": "title",
        "source_type": "payload"
      },
      {
        "input_key": "description",
        "path": "description",
        "source_type": "payload"
      },
      {
        "input_key": "text",
        "path": "text",
        "source_type": "payload"
      }
    ]
  },
  "model_name": "gpt-4o-mini-2024-07-18",
  "provider": "openai"
}

num_clusters

integer | null

Number of clusters found (excludes noise/outliers, populated after execution)

num_documents_clustered

integer | null

Total documents processed

execution_time_seconds

number | null

Time taken to complete clustering

quality_metrics

Quality Metrics · object | null

Clustering quality metrics (silhouette_score, davies_bouldin_score, calinski_harabasz_score, etc.)

Show child attributes

hierarchy_detected

boolean

default:false

Whether implicit hierarchy was detected (multi-feature independent) or created (hierarchical attributes)

parent_cluster_id

string | null

For child clusters in hierarchy

child_cluster_ids

string[] | null

For parent clusters

hierarchy_relationships

Hierarchy Relationships · object[] | null

Parent-child relationships detected from cluster membership overlap

status

enum<string>

default:PENDING

Cluster job status (propagated from TaskService)

Available options:

PENDING,

QUEUED,

IN_PROGRESS,

PROCESSING,

COMPLETED,

COMPLETED_WITH_ERRORS,

FAILED,

CANCELED,

INTERRUPTED,

UNKNOWN,

SKIPPED,

DRAFT,

ACTIVE,

ARCHIVED,

SUSPENDED

error

string | null

Error message if cluster execution failed. Propagated from TaskService.

failure_category

enum<string> | null

Machine-readable failure classification. Same enum as batch failure_category.

Available options:

timeout,

infrastructure,

orphaned,

pipeline,

validation,

unknown

last_execution_task_id

string | null

Most recent task ID for this cluster

last_run_id

string | null

Most recent execution run ID

created_at

string<date-time>

When cluster was created

updated_at

string<date-time>

When cluster was last updated

last_executed_at

string<date-time> | null

Last execution timestamp

completed_at

string<date-time> | null

When clustering completed successfully

llm_labeling_errors

string[] | null

List of errors encountered during LLM labeling (if any). Stored in MongoDB cluster metadata only, NOT in Qdrant cluster documents. Used to track LLM failures while allowing fallback labels to work.

metadata

Metadata · object

Additional user-defined metadata

Analyze multi-step transition paths Get Cluster

Organization

Namespaces

Buckets

Feature Extractors

Batch Queue

Collections

Documents

Retrievers

Taxonomies

Clusters

Triggers

Alerts

Webhooks

Apps

Agent Sessions

Annotations

Templates

Manifest

Discovery

Analytics

Notifications

Tasks

Inference

Resource Search

Pricing

Create Cluster

Authorizations

Body

Response