Get extractor details

curl --request GET \
  --url https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id} \
  --header 'Authorization: Bearer <token>'

import requests

url = "https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}"

headers = {"Authorization": "Bearer <token>"}

response = requests.get(url, headers=headers)

print(response.text)

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "feature_extractor_name": "<string>",
  "version": "<string>",
  "feature_extractor_id": "<string>",
  "description": "<string>",
  "input_schema": {},
  "output_schema": {},
  "icon": "box",
  "parameter_schema": {},
  "type_mode": "<string>",
  "expected_input_types": {},
  "inference_type": "<string>",
  "supported_input_types": [
    "<string>"
  ],
  "max_inputs": {},
  "default_parameters": {},
  "costs": {
    "tier": 2,
    "tier_label": "<string>",
    "rates": [
      {
        "credits_per_unit": 2,
        "description": "<string>"
      }
    ]
  },
  "required_vector_indexes": [
    {
      "name": "<string>",
      "description": "<string>",
      "index": {
        "datatype": "float32",
        "description": "Dense vector embedding for text content using E5-Large multilingual model. Optimized for semantic search across 100+ languages.",
        "dimensions": 1024,
        "distance": "cosine",
        "inference_name": "multilingual_e5_large_instruct_v1",
        "name": "text_extractor_v1_embedding",
        "supported_inputs": [
          "text",
          "string"
        ],
        "type": "dense"
      },
      "feature_uri": "mixpeek://text_extractor@v1/multilingual_e5_large_instruct_v1",
      "display_name": "<string>"
    }
  ],
  "required_payload_indexes": [
    {
      "field_name": "<string>",
      "field_schema": {
        "type": "text",
        "tokenizer": "word",
        "min_token_len": 2,
        "max_token_len": 15,
        "lowercase": true
      },
      "is_protected": false
    }
  ],
  "position_fields": [
    "<string>"
  ],
  "feature_uri": "<string>",
  "capabilities": [
    "<string>"
  ],
  "example_usage": {},
  "plugin_id": "<string>",
  "deployed": true,
  "created_at": "2023-11-07T05:31:56Z",
  "updated_at": "2023-11-07T05:31:56Z"
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "detail": "Extractor 'unknown_extractor_v1' not found"
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

GET

namespaces

{namespace_id}

extractors

{extractor_id}

Get extractor details

curl --request GET \
  --url https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id} \
  --header 'Authorization: Bearer <token>'

import requests

url = "https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}"

headers = {"Authorization": "Bearer <token>"}

response = requests.get(url, headers=headers)

print(response.text)

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.mixpeek.com/v1/namespaces/{namespace_id}/extractors/{extractor_id}")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "feature_extractor_name": "<string>",
  "version": "<string>",
  "feature_extractor_id": "<string>",
  "description": "<string>",
  "input_schema": {},
  "output_schema": {},
  "icon": "box",
  "parameter_schema": {},
  "type_mode": "<string>",
  "expected_input_types": {},
  "inference_type": "<string>",
  "supported_input_types": [
    "<string>"
  ],
  "max_inputs": {},
  "default_parameters": {},
  "costs": {
    "tier": 2,
    "tier_label": "<string>",
    "rates": [
      {
        "credits_per_unit": 2,
        "description": "<string>"
      }
    ]
  },
  "required_vector_indexes": [
    {
      "name": "<string>",
      "description": "<string>",
      "index": {
        "datatype": "float32",
        "description": "Dense vector embedding for text content using E5-Large multilingual model. Optimized for semantic search across 100+ languages.",
        "dimensions": 1024,
        "distance": "cosine",
        "inference_name": "multilingual_e5_large_instruct_v1",
        "name": "text_extractor_v1_embedding",
        "supported_inputs": [
          "text",
          "string"
        ],
        "type": "dense"
      },
      "feature_uri": "mixpeek://text_extractor@v1/multilingual_e5_large_instruct_v1",
      "display_name": "<string>"
    }
  ],
  "required_payload_indexes": [
    {
      "field_name": "<string>",
      "field_schema": {
        "type": "text",
        "tokenizer": "word",
        "min_token_len": 2,
        "max_token_len": 15,
        "lowercase": true
      },
      "is_protected": false
    }
  ],
  "position_fields": [
    "<string>"
  ],
  "feature_uri": "<string>",
  "capabilities": [
    "<string>"
  ],
  "example_usage": {},
  "plugin_id": "<string>",
  "deployed": true,
  "created_at": "2023-11-07T05:31:56Z",
  "updated_at": "2023-11-07T05:31:56Z"
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "detail": "Extractor 'unknown_extractor_v1' not found"
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Path Parameters

namespace_id

string

required

extractor_id

string

required

Response

Extractor details

Unified extractor response combining builtin and custom plugins.

This model provides a consistent view of all extractors available to a namespace, regardless of whether they are builtin or custom.

feature_extractor_name

string

required

Name of the feature extractor

version

string

required

Version of the feature extractor

feature_extractor_id

string

required

Unique identifier (name_version)

source

enum<string>

required

Origin of this extractor: 'builtin' (shipped with Mixpeek), 'custom' (user-uploaded plugin), or 'community' (marketplace)

Available options:

builtin,

custom,

community

description

string

required

Human-readable description

input_schema

Input Schema · object

required

JSON schema for input data

output_schema

Output Schema · object

required

JSON schema for output data

icon

string

default:box

Lucide-react icon name for frontend rendering

parameter_schema

Parameter Schema · object | null

JSON schema for parameters

type_mode

string | null

What input types this extractor can handle: 'type_specific' (only one type, e.g. video-only) or 'multimodal' (handles multiple types with conditional processing). Type-specific extractors cannot use automatic-typed bucket properties.

expected_input_types

Expected Input Types · object | null

For type-specific extractors: maps input keys to required types (e.g., {'video': 'video', 'thumbnail': 'image'}). For multimodal extractors: null.

Show child attributes

inference_type

string | null

Kind of real-time inference this extractor provides: 'embedding', 'rerank', 'classify', 'generate', or 'general'. Determines which retriever stages are compatible. Null if the extractor is batch-only.

supported_input_types

string[]

Supported input types (video, image, text, etc.)

max_inputs

Max Inputs · object

Maximum number of inputs per type

Show child attributes

default_parameters

Default Parameters · object

Default parameter values

costs

CostsInfo · object | null

Credit cost information (builtin extractors only)

Show child attributes

required_vector_indexes

VectorIndexDefinition · object[] | null

Vector indexes this extractor produces

Show child attributes

required_payload_indexes

PayloadIndexConfig · object[] | null

Payload indexes required by this extractor

Show child attributes

position_fields

string[]

Fields that identify unique positions within output documents. Used for deterministic document ID generation.

feature_uri

string | null

Primary feature URI (e.g., mixpeek://text_extractor@v1/embedding)

capabilities

string[]

What this extractor can do: 'batch' (feature extraction during ingestion), 'realtime' (query-time inference for retriever stages)

example_usage

Example Usage · object | null

Minimal working configuration for namespace + collection + input_mappings + parameters

plugin_id

string | null

Plugin ID (custom plugins only)

deployed

boolean | null

Whether the plugin is deployed (custom plugins only)

validation_status

enum<string> | null

Validation status (custom plugins only)

Available options:

passed,

failed,

pending

created_at

string<date-time> | null

Creation timestamp (custom plugins only)

updated_at

string<date-time> | null

Last update timestamp (custom plugins only)

List all extractors available to namespace List models in namespace

Organization

Namespaces

Buckets

Feature Extractors

Batch Queue

Collections

Documents

Retrievers

Taxonomies

Clusters

Triggers

Alerts

Webhooks

Apps

Agent Sessions

Annotations

Templates

Manifest

Discovery

Analytics

Notifications

Tasks

Inference

Resource Search

Pricing

Get extractor details

Authorizations

Path Parameters

Response