List documents.

curl --request POST \
  --url https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "filters": {
    "AND": [
      {
        "field": "name",
        "operator": "eq",
        "value": "John"
      },
      {
        "field": "age",
        "operator": "gte",
        "value": 30
      }
    ],
    "OR": [
      {
        "field": "status",
        "operator": "eq",
        "value": "active"
      },
      {
        "field": "role",
        "operator": "eq",
        "value": "admin"
      }
    ],
    "NOT": [
      {
        "field": "department",
        "operator": "eq",
        "value": "HR"
      },
      {
        "field": "location",
        "operator": "eq",
        "value": "remote"
      }
    ],
    "case_sensitive": true
  },
  "sort": {
    "field": "created_at",
    "direction": "desc"
  },
  "search": "<string>",
  "cursor": "<string>",
  "include_total": true,
  "return_presigned_urls": false,
  "return_vectors": false,
  "return_vector_names": false,
  "group_by": "source_object_id",
  "select": [
    "metadata.title",
    "content"
  ],
  "expand": [
    "customer_id"
  ],
  "limit": 10,
  "offset": 0
}
'

import requests

url = "https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list"

payload = {
    "filters": {
        "AND": [
            {
                "field": "name",
                "operator": "eq",
                "value": "John"
            },
            {
                "field": "age",
                "operator": "gte",
                "value": 30
            }
        ],
        "OR": [
            {
                "field": "status",
                "operator": "eq",
                "value": "active"
            },
            {
                "field": "role",
                "operator": "eq",
                "value": "admin"
            }
        ],
        "NOT": [
            {
                "field": "department",
                "operator": "eq",
                "value": "HR"
            },
            {
                "field": "location",
                "operator": "eq",
                "value": "remote"
            }
        ],
        "case_sensitive": True
    },
    "sort": {
        "field": "created_at",
        "direction": "desc"
    },
    "search": "<string>",
    "cursor": "<string>",
    "include_total": True,
    "return_presigned_urls": False,
    "return_vectors": False,
    "return_vector_names": False,
    "group_by": "source_object_id",
    "select": ["metadata.title", "content"],
    "expand": ["customer_id"],
    "limit": 10,
    "offset": 0
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    filters: {
      AND: [
        {field: 'name', operator: 'eq', value: 'John'},
        {field: 'age', operator: 'gte', value: 30}
      ],
      OR: [
        {field: 'status', operator: 'eq', value: 'active'},
        {field: 'role', operator: 'eq', value: 'admin'}
      ],
      NOT: [
        {field: 'department', operator: 'eq', value: 'HR'},
        {field: 'location', operator: 'eq', value: 'remote'}
      ],
      case_sensitive: true
    },
    sort: {field: 'created_at', direction: 'desc'},
    search: '<string>',
    cursor: '<string>',
    include_total: true,
    return_presigned_urls: false,
    return_vectors: false,
    return_vector_names: false,
    group_by: 'source_object_id',
    select: ['metadata.title', 'content'],
    expand: ['customer_id'],
    limit: 10,
    offset: 0
  })
};

fetch('https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'filters' => [
        'AND' => [
                [
                                'field' => 'name',
                                'operator' => 'eq',
                                'value' => 'John'
                ],
                [
                                'field' => 'age',
                                'operator' => 'gte',
                                'value' => 30
                ]
        ],
        'OR' => [
                [
                                'field' => 'status',
                                'operator' => 'eq',
                                'value' => 'active'
                ],
                [
                                'field' => 'role',
                                'operator' => 'eq',
                                'value' => 'admin'
                ]
        ],
        'NOT' => [
                [
                                'field' => 'department',
                                'operator' => 'eq',
                                'value' => 'HR'
                ],
                [
                                'field' => 'location',
                                'operator' => 'eq',
                                'value' => 'remote'
                ]
        ],
        'case_sensitive' => true
    ],
    'sort' => [
        'field' => 'created_at',
        'direction' => 'desc'
    ],
    'search' => '<string>',
    'cursor' => '<string>',
    'include_total' => true,
    'return_presigned_urls' => false,
    'return_vectors' => false,
    'return_vector_names' => false,
    'group_by' => 'source_object_id',
    'select' => [
        'metadata.title',
        'content'
    ],
    'expand' => [
        'customer_id'
    ],
    'limit' => 10,
    'offset' => 0
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list"

	payload := strings.NewReader("{\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"sort\": {\n    \"field\": \"created_at\",\n    \"direction\": \"desc\"\n  },\n  \"search\": \"<string>\",\n  \"cursor\": \"<string>\",\n  \"include_total\": true,\n  \"return_presigned_urls\": false,\n  \"return_vectors\": false,\n  \"return_vector_names\": false,\n  \"group_by\": \"source_object_id\",\n  \"select\": [\n    \"metadata.title\",\n    \"content\"\n  ],\n  \"expand\": [\n    \"customer_id\"\n  ],\n  \"limit\": 10,\n  \"offset\": 0\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"sort\": {\n    \"field\": \"created_at\",\n    \"direction\": \"desc\"\n  },\n  \"search\": \"<string>\",\n  \"cursor\": \"<string>\",\n  \"include_total\": true,\n  \"return_presigned_urls\": false,\n  \"return_vectors\": false,\n  \"return_vector_names\": false,\n  \"group_by\": \"source_object_id\",\n  \"select\": [\n    \"metadata.title\",\n    \"content\"\n  ],\n  \"expand\": [\n    \"customer_id\"\n  ],\n  \"limit\": 10,\n  \"offset\": 0\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"sort\": {\n    \"field\": \"created_at\",\n    \"direction\": \"desc\"\n  },\n  \"search\": \"<string>\",\n  \"cursor\": \"<string>\",\n  \"include_total\": true,\n  \"return_presigned_urls\": false,\n  \"return_vectors\": false,\n  \"return_vector_names\": false,\n  \"group_by\": \"source_object_id\",\n  \"select\": [\n    \"metadata.title\",\n    \"content\"\n  ],\n  \"expand\": [\n    \"customer_id\"\n  ],\n  \"limit\": 10,\n  \"offset\": 0\n}"

response = http.request(request)
puts response.read_body

{
  "pagination": {
    "total": 123,
    "page": 123,
    "page_size": 123,
    "total_pages": 123,
    "next_page": "<string>",
    "previous_page": "<string>",
    "next_cursor": "<string>"
  },
  "results": [
    {
      "document_id": "<string>",
      "collection_id": "<string>",
      "document_blobs": [
        {
          "field": "<string>",
          "url": "<string>",
          "role": "source",
          "type": "other",
          "filename": "segment_0.mp4",
          "size_bytes": 1048576,
          "content_type": "video/mp4",
          "checksum": "sha256:a1b2c3d4e5f6...",
          "created_at": "2023-11-07T05:31:56Z",
          "source_blob_id": "blob_abc123",
          "presigned_url": "<string>"
        }
      ],
      "_internal": {
        "collection_id": "col_articles",
        "created_at": "2025-10-31T10:00:00Z",
        "document_id": "doc_f8966ff29c",
        "internal_id": "org_abc123",
        "lineage": {
          "path": "bkt_content/col_articles",
          "root_bucket_id": "bkt_content",
          "root_object_id": "obj_article_001",
          "source_object_id": "obj_article_001",
          "source_type": "bucket"
        },
        "metadata": {
          "ingestion_status": "COMPLETED"
        },
        "modality": "text",
        "namespace_id": "ns_xyz789",
        "updated_at": "2025-10-31T10:00:00Z"
      }
    }
  ],
  "groups": [
    {
      "group_key": "<unknown>",
      "documents": [
        {
          "document_id": "<string>",
          "collection_id": "<string>",
          "document_blobs": [
            {
              "field": "<string>",
              "url": "<string>",
              "role": "source",
              "type": "other",
              "filename": "segment_0.mp4",
              "size_bytes": 1048576,
              "content_type": "video/mp4",
              "checksum": "sha256:a1b2c3d4e5f6...",
              "created_at": "2023-11-07T05:31:56Z",
              "source_blob_id": "blob_abc123",
              "presigned_url": "<string>"
            }
          ],
          "_internal": {
            "collection_id": "col_articles",
            "created_at": "2025-10-31T10:00:00Z",
            "document_id": "doc_f8966ff29c",
            "internal_id": "org_abc123",
            "lineage": {
              "path": "bkt_content/col_articles",
              "root_bucket_id": "bkt_content",
              "root_object_id": "obj_article_001",
              "source_object_id": "obj_article_001",
              "source_type": "bucket"
            },
            "metadata": {
              "ingestion_status": "COMPLETED"
            },
            "modality": "text",
            "namespace_id": "ns_xyz789",
            "updated_at": "2025-10-31T10:00:00Z"
          }
        }
      ],
      "count": 123
    }
  ],
  "unknown_collection_ids": [
    "<string>"
  ],
  "total_documents": 123,
  "stats": {
    "total_documents": 123,
    "avg_blobs_per_document": 0,
    "total_groups": 123,
    "avg_documents_per_group": 123
  },
  "group_by_field": "source_object_id"
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

POST

collections

{collection_identifier}

documents

list

List documents.

curl --request POST \
  --url https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "filters": {
    "AND": [
      {
        "field": "name",
        "operator": "eq",
        "value": "John"
      },
      {
        "field": "age",
        "operator": "gte",
        "value": 30
      }
    ],
    "OR": [
      {
        "field": "status",
        "operator": "eq",
        "value": "active"
      },
      {
        "field": "role",
        "operator": "eq",
        "value": "admin"
      }
    ],
    "NOT": [
      {
        "field": "department",
        "operator": "eq",
        "value": "HR"
      },
      {
        "field": "location",
        "operator": "eq",
        "value": "remote"
      }
    ],
    "case_sensitive": true
  },
  "sort": {
    "field": "created_at",
    "direction": "desc"
  },
  "search": "<string>",
  "cursor": "<string>",
  "include_total": true,
  "return_presigned_urls": false,
  "return_vectors": false,
  "return_vector_names": false,
  "group_by": "source_object_id",
  "select": [
    "metadata.title",
    "content"
  ],
  "expand": [
    "customer_id"
  ],
  "limit": 10,
  "offset": 0
}
'

import requests

url = "https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list"

payload = {
    "filters": {
        "AND": [
            {
                "field": "name",
                "operator": "eq",
                "value": "John"
            },
            {
                "field": "age",
                "operator": "gte",
                "value": 30
            }
        ],
        "OR": [
            {
                "field": "status",
                "operator": "eq",
                "value": "active"
            },
            {
                "field": "role",
                "operator": "eq",
                "value": "admin"
            }
        ],
        "NOT": [
            {
                "field": "department",
                "operator": "eq",
                "value": "HR"
            },
            {
                "field": "location",
                "operator": "eq",
                "value": "remote"
            }
        ],
        "case_sensitive": True
    },
    "sort": {
        "field": "created_at",
        "direction": "desc"
    },
    "search": "<string>",
    "cursor": "<string>",
    "include_total": True,
    "return_presigned_urls": False,
    "return_vectors": False,
    "return_vector_names": False,
    "group_by": "source_object_id",
    "select": ["metadata.title", "content"],
    "expand": ["customer_id"],
    "limit": 10,
    "offset": 0
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    filters: {
      AND: [
        {field: 'name', operator: 'eq', value: 'John'},
        {field: 'age', operator: 'gte', value: 30}
      ],
      OR: [
        {field: 'status', operator: 'eq', value: 'active'},
        {field: 'role', operator: 'eq', value: 'admin'}
      ],
      NOT: [
        {field: 'department', operator: 'eq', value: 'HR'},
        {field: 'location', operator: 'eq', value: 'remote'}
      ],
      case_sensitive: true
    },
    sort: {field: 'created_at', direction: 'desc'},
    search: '<string>',
    cursor: '<string>',
    include_total: true,
    return_presigned_urls: false,
    return_vectors: false,
    return_vector_names: false,
    group_by: 'source_object_id',
    select: ['metadata.title', 'content'],
    expand: ['customer_id'],
    limit: 10,
    offset: 0
  })
};

fetch('https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'filters' => [
        'AND' => [
                [
                                'field' => 'name',
                                'operator' => 'eq',
                                'value' => 'John'
                ],
                [
                                'field' => 'age',
                                'operator' => 'gte',
                                'value' => 30
                ]
        ],
        'OR' => [
                [
                                'field' => 'status',
                                'operator' => 'eq',
                                'value' => 'active'
                ],
                [
                                'field' => 'role',
                                'operator' => 'eq',
                                'value' => 'admin'
                ]
        ],
        'NOT' => [
                [
                                'field' => 'department',
                                'operator' => 'eq',
                                'value' => 'HR'
                ],
                [
                                'field' => 'location',
                                'operator' => 'eq',
                                'value' => 'remote'
                ]
        ],
        'case_sensitive' => true
    ],
    'sort' => [
        'field' => 'created_at',
        'direction' => 'desc'
    ],
    'search' => '<string>',
    'cursor' => '<string>',
    'include_total' => true,
    'return_presigned_urls' => false,
    'return_vectors' => false,
    'return_vector_names' => false,
    'group_by' => 'source_object_id',
    'select' => [
        'metadata.title',
        'content'
    ],
    'expand' => [
        'customer_id'
    ],
    'limit' => 10,
    'offset' => 0
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list"

	payload := strings.NewReader("{\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"sort\": {\n    \"field\": \"created_at\",\n    \"direction\": \"desc\"\n  },\n  \"search\": \"<string>\",\n  \"cursor\": \"<string>\",\n  \"include_total\": true,\n  \"return_presigned_urls\": false,\n  \"return_vectors\": false,\n  \"return_vector_names\": false,\n  \"group_by\": \"source_object_id\",\n  \"select\": [\n    \"metadata.title\",\n    \"content\"\n  ],\n  \"expand\": [\n    \"customer_id\"\n  ],\n  \"limit\": 10,\n  \"offset\": 0\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"sort\": {\n    \"field\": \"created_at\",\n    \"direction\": \"desc\"\n  },\n  \"search\": \"<string>\",\n  \"cursor\": \"<string>\",\n  \"include_total\": true,\n  \"return_presigned_urls\": false,\n  \"return_vectors\": false,\n  \"return_vector_names\": false,\n  \"group_by\": \"source_object_id\",\n  \"select\": [\n    \"metadata.title\",\n    \"content\"\n  ],\n  \"expand\": [\n    \"customer_id\"\n  ],\n  \"limit\": 10,\n  \"offset\": 0\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.mixpeek.com/v1/collections/{collection_identifier}/documents/list")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"filters\": {\n    \"AND\": [\n      {\n        \"field\": \"name\",\n        \"operator\": \"eq\",\n        \"value\": \"John\"\n      },\n      {\n        \"field\": \"age\",\n        \"operator\": \"gte\",\n        \"value\": 30\n      }\n    ],\n    \"OR\": [\n      {\n        \"field\": \"status\",\n        \"operator\": \"eq\",\n        \"value\": \"active\"\n      },\n      {\n        \"field\": \"role\",\n        \"operator\": \"eq\",\n        \"value\": \"admin\"\n      }\n    ],\n    \"NOT\": [\n      {\n        \"field\": \"department\",\n        \"operator\": \"eq\",\n        \"value\": \"HR\"\n      },\n      {\n        \"field\": \"location\",\n        \"operator\": \"eq\",\n        \"value\": \"remote\"\n      }\n    ],\n    \"case_sensitive\": true\n  },\n  \"sort\": {\n    \"field\": \"created_at\",\n    \"direction\": \"desc\"\n  },\n  \"search\": \"<string>\",\n  \"cursor\": \"<string>\",\n  \"include_total\": true,\n  \"return_presigned_urls\": false,\n  \"return_vectors\": false,\n  \"return_vector_names\": false,\n  \"group_by\": \"source_object_id\",\n  \"select\": [\n    \"metadata.title\",\n    \"content\"\n  ],\n  \"expand\": [\n    \"customer_id\"\n  ],\n  \"limit\": 10,\n  \"offset\": 0\n}"

response = http.request(request)
puts response.read_body

{
  "pagination": {
    "total": 123,
    "page": 123,
    "page_size": 123,
    "total_pages": 123,
    "next_page": "<string>",
    "previous_page": "<string>",
    "next_cursor": "<string>"
  },
  "results": [
    {
      "document_id": "<string>",
      "collection_id": "<string>",
      "document_blobs": [
        {
          "field": "<string>",
          "url": "<string>",
          "role": "source",
          "type": "other",
          "filename": "segment_0.mp4",
          "size_bytes": 1048576,
          "content_type": "video/mp4",
          "checksum": "sha256:a1b2c3d4e5f6...",
          "created_at": "2023-11-07T05:31:56Z",
          "source_blob_id": "blob_abc123",
          "presigned_url": "<string>"
        }
      ],
      "_internal": {
        "collection_id": "col_articles",
        "created_at": "2025-10-31T10:00:00Z",
        "document_id": "doc_f8966ff29c",
        "internal_id": "org_abc123",
        "lineage": {
          "path": "bkt_content/col_articles",
          "root_bucket_id": "bkt_content",
          "root_object_id": "obj_article_001",
          "source_object_id": "obj_article_001",
          "source_type": "bucket"
        },
        "metadata": {
          "ingestion_status": "COMPLETED"
        },
        "modality": "text",
        "namespace_id": "ns_xyz789",
        "updated_at": "2025-10-31T10:00:00Z"
      }
    }
  ],
  "groups": [
    {
      "group_key": "<unknown>",
      "documents": [
        {
          "document_id": "<string>",
          "collection_id": "<string>",
          "document_blobs": [
            {
              "field": "<string>",
              "url": "<string>",
              "role": "source",
              "type": "other",
              "filename": "segment_0.mp4",
              "size_bytes": 1048576,
              "content_type": "video/mp4",
              "checksum": "sha256:a1b2c3d4e5f6...",
              "created_at": "2023-11-07T05:31:56Z",
              "source_blob_id": "blob_abc123",
              "presigned_url": "<string>"
            }
          ],
          "_internal": {
            "collection_id": "col_articles",
            "created_at": "2025-10-31T10:00:00Z",
            "document_id": "doc_f8966ff29c",
            "internal_id": "org_abc123",
            "lineage": {
              "path": "bkt_content/col_articles",
              "root_bucket_id": "bkt_content",
              "root_object_id": "obj_article_001",
              "source_object_id": "obj_article_001",
              "source_type": "bucket"
            },
            "metadata": {
              "ingestion_status": "COMPLETED"
            },
            "modality": "text",
            "namespace_id": "ns_xyz789",
            "updated_at": "2025-10-31T10:00:00Z"
          }
        }
      ],
      "count": 123
    }
  ],
  "unknown_collection_ids": [
    "<string>"
  ],
  "total_documents": 123,
  "stats": {
    "total_documents": 123,
    "avg_blobs_per_document": 0,
    "total_groups": 123,
    "avg_documents_per_group": 123
  },
  "group_by_field": "source_object_id"
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

{
  "detail": [
    {
      "loc": [
        "<string>"
      ],
      "msg": "<string>",
      "type": "<string>",
      "input": "<unknown>",
      "ctx": {}
    }
  ]
}

{
  "status": 123,
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>",
    "details": {}
  },
  "success": false
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Path Parameters

collection_identifier

string

required

The ID of the collection to list documents from.

Query Parameters

return_presigned_urls

boolean

default:false

Generate presigned URLs for S3-backed blobs and url-shaped fields. Also accepted as a body field — if either is true, presigning is enabled.

return_vectors

boolean

default:false

Include vector embeddings in results. Also accepted as a body field — if either is true, vectors are returned.

filters

string | null

URL-encoded JSON filter (LogicalOperator shape: {"AND":[{"field":"metadata.status","operator":"eq","value":"active"}]}; OR/NOT also supported). Applies to the GET listing. POST /documents/list callers should send filters in the JSON body instead — the body wins when both are present. Invalid JSON returns 422 (never silently ignored).

string | null

Free-text search across common document fields. Body search wins when both are set.

sort

string | null

Sort spec as JSON ({"field":"created_at","direction":"desc"}) or the compact "field:direction" form. Body sort wins when both are set.

limit

integer | null

Required range: 1 <= x <= 1000

page_size

integer | null

Required range: 1 <= x <= 1000

offset

integer | null

Required range: 0 <= x <= 10000

page

integer | null

Required range: x >= 1

cursor

string | null

include_total

boolean

default:false

Body

application/json

Request model for listing documents.

Supports two pagination strategies:

Offset-based (default): Use query params ?page=2&page_size=10

Simple and familiar
Works well for shallow pagination (first ~100 pages)
Less efficient for deep pagination with sorting

Cursor-based (optional): Pass cursor from previous response's next_cursor

More efficient for deep pagination (page 100+)
Required for consistent results when sorting large datasets
When cursor is provided, offset is ignored

filters

LogicalOperator · object | null

Filters to apply.

Show child attributes

sort

SortOption · object | null

Sort options.

Show child attributes

string | null

Search term.

cursor

string | null

OPTIONAL cursor for efficient deep pagination. Pass the 'pagination.next_cursor' value from a previous response to fetch the next page. When cursor is provided, the page/offset query params are ignored. Use cursor-based pagination when: (1) paginating beyond page ~100, (2) sorting large datasets, or (3) you need consistent iteration. Use offset-based pagination (default) for: simple use cases, random page access, or when page numbers are needed in the UI.

include_total

boolean | null

Populate pagination.total/total_pages (runs a COUNT query, adds ~50-200ms). Accepted here in the BODY as well as the ?include_total=true query param — previously only the query-param form worked and a body include_total was silently swallowed (placement-sensitivity class). Body value wins when both are set.

return_presigned_urls

boolean | null

default:false

Whether to return presigned URLs for object keys.

return_vectors

boolean | null

default:false

Whether to return vector embeddings in the document results.

return_vector_names

default:false

Controls vector data in the response. Pass true to get a _vectors field listing available vector names (no embedding data). Pass a list of vector names (e.g. ["fashionsiglip_v1_embedding"]) to return the actual float arrays for those specific vectors, keyed by name.

group_by

string | null

OPTIONAL. Field to group documents by. Supports dot notation for nested fields (e.g., 'metadata.category', 'source_type'). Accepts either a bare string ('metadata.category') or an object form ({'field': 'metadata.category'}) for consistency with other API parameters. When specified, documents are grouped by the field value and returned as grouped results. Requires a payload index on the field in Qdrant for optimal performance. If no index exists, the operation will fail with a validation error. Common groupable fields: 'source_object_id', 'root_object_id', 'collection_id', 'metadata.category'.

Example:

"source_object_id"

select

string[] | null

OPTIONAL. List of fields to include in the response. Supports dot notation for nested fields (e.g., 'metadata.title', 'content'). When specified, only the selected fields will be returned in the document results, reducing response size. System fields like '_id' and 'document_id' are always included. Use this to optimize response size when working with large documents.

Example:

["metadata.title", "content"]

expand

string[] | null

OPTIONAL. List of fields containing document IDs to resolve inline. Referenced documents are fetched and attached under an '_expanded' key. Supports dot-notation for nested fields (e.g., 'items.product_id'). Max 50 unique references per request. Depth is limited to 1 (no recursive expansion).

Example:

["customer_id"]

limit

integer

default:10

Number of documents to return per page. Capped at 1000. Accepts page_size as an alias (POST body only).

Required range: 1 <= x <= 1000

offset

integer

default:0

Number of documents to skip (offset-based pagination).

Required range: x >= 0

Response

Successful Response

Response model for listing documents.

Supports both regular document lists and grouped results based on the group_by parameter. When group_by is specified, results are returned as groups instead of a flat list.

Pagination strategies:

Offset-based (default): Use pagination.page and pagination.page_size
Cursor-based (optional): Use pagination.next_cursor for efficient deep pagination

pagination

PaginationResponse · object

required

Pagination information. Includes next_cursor for cursor-based pagination. When group_by is used, pagination applies to groups (not individual documents). total_count reflects total number of groups, not total documents.

Show child attributes

results

DocumentResponse · object[] | null

List of documents when group_by is NOT specified. Contains flat list of documents with pagination applied. Mutually exclusive with 'groups' field.

Show child attributes

groups

DocumentGroup · object[] | null

List of document groups when group_by IS specified. Each group contains documents sharing the same field value. Pagination applies to groups, not individual documents. Mutually exclusive with 'results' field.

Show child attributes

unknown_collection_ids

string[] | null

Requested collection_ids that do NOT exist in this namespace. Present only on the namespace-scoped list when SOME requested ids resolved and others did not — so a typo'd or deleted collection id is distinguishable from a real-but-empty collection instead of silently contributing zero results forever. When NONE of the requested ids resolve, the endpoint returns 404 instead.

total_documents

integer | null

Total number of documents matching the query (across all pages). Alias for stats.total_documents — included at the top level for convenience.

stats

DocumentListStats · object | null

Aggregate statistics across all documents in the result

Show child attributes

group_by_field

string | null

The field that was used for grouping when group_by was specified. None for non-grouped results. Useful for clients to understand the grouping structure.

Example:

"source_object_id"

Update a document's access control list (ACL).Aggregate documents

Organization

Namespaces

Buckets

Feature Extractors

Batch Queue

Collections

Documents

Retrievers

Taxonomies

Clusters

Triggers

Alerts

Webhooks

Apps

Agent Sessions

Annotations

Templates

Manifest

Discovery

Analytics

Notifications

Tasks

Inference

Resource Search

Pricing

List documents.

Authorizations

Path Parameters

Query Parameters

Body

Response