> ## Documentation Index
> Fetch the complete documentation index at: https://docs.mixpeek.com/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# List Buckets

> This endpoint lists buckets with pagination, sorting, and filtering options.



## OpenAPI

````yaml post /v1/buckets/list
openapi: 3.1.0
info:
  title: Mixpeek API
  description: >-
    This is the Mixpeek API, providing access to various endpoints for data
    processing and retrieval.
  termsOfService: https://mixpeek.com/terms
  contact:
    name: Mixpeek Support
    url: https://mixpeek.com/contact
    email: info@mixpeek.com
  version: '0.82'
servers:
  - url: https://api.mixpeek.com
    description: Production
security: []
paths:
  /v1/buckets/list:
    post:
      tags:
        - Buckets
      summary: List Buckets
      description: >-
        This endpoint lists buckets with pagination, sorting, and filtering
        options.
      operationId: list_buckets_v1_buckets_list_post
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ListBucketsRequest'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListBucketsResponse'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '403':
          description: Forbidden
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '404':
          description: Not Found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
components:
  schemas:
    ListBucketsRequest:
      properties:
        search:
          anyOf:
            - type: string
            - type: 'null'
          title: Search
          description: >-
            Search term for wildcard search across bucket_id, bucket_name,
            description, and other text fields
        filters:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Filters
          description: >-
            Filters to apply to the bucket list. Supports filtering by bucket_id
            or bucket_name.
        sort:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Sort
          description: Sort options for the bucket list
        case_sensitive:
          type: boolean
          title: Case Sensitive
          description: If True, filters and search will be case-sensitive
          default: false
        limit:
          type: integer
          maximum: 1000
          minimum: 1
          title: Limit
          description: Number of results to return
          default: 10
        offset:
          type: integer
          minimum: 0
          title: Offset
          description: Number of results to skip
          default: 0
      type: object
      title: ListBucketsRequest
      description: Request model for listing buckets.
    ListBucketsResponse:
      properties:
        results:
          items:
            $ref: '#/components/schemas/BucketResponse'
          type: array
          title: Results
        total_count:
          type: integer
          title: Total Count
          description: Total number of buckets matching the query
        pagination:
          $ref: '#/components/schemas/PaginationResponse'
        stats:
          anyOf:
            - $ref: '#/components/schemas/BucketListStats'
            - type: 'null'
          description: Aggregate statistics across all buckets in the result
      type: object
      required:
        - results
        - total_count
        - pagination
      title: ListBucketsResponse
      description: Response model for listing buckets.
    ErrorResponse:
      properties:
        success:
          type: boolean
          title: Success
          description: Always false for error responses
          default: false
        status:
          type: integer
          title: Status
          description: HTTP status code for this error
        error:
          $ref: '#/components/schemas/ErrorDetail'
          description: Error details payload
      type: object
      required:
        - status
        - error
      title: ErrorResponse
      description: Error response model.
      examples:
        - error:
            details:
              id: ns_123
              resource: namespace
            message: Namespace not found
            type: NotFoundError
          status: 404
          success: false
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    BucketResponse:
      properties:
        bucket_id:
          type: string
          title: Bucket Id
          description: Unique identifier for the bucket
        bucket_name:
          type: string
          title: Bucket Name
          description: Human-readable name for the bucket
        description:
          anyOf:
            - type: string
            - type: 'null'
          title: Description
          description: Description of the bucket
        bucket_schema:
          anyOf:
            - $ref: '#/components/schemas/BucketSchema-Output'
            - type: 'null'
          description: Schema definition for objects in this bucket
        unique_key:
          anyOf:
            - $ref: '#/components/schemas/UniqueKeyConfig'
            - type: 'null'
          description: Unique key configuration for this bucket (if configured)
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
          description: Additional metadata for the bucket
        storage_class:
          anyOf:
            - $ref: '#/components/schemas/StorageClass'
            - type: 'null'
          description: >-
            Object-storage tier for this bucket's objects: standard | nearline |
            coldline | archive. Provider-agnostic (GCS
            STANDARD/NEARLINE/COLDLINE/ARCHIVE; S3/MinIO
            STANDARD/STANDARD_IA/GLACIER). NOTE: applied on write for sync-based
            ingestion (the primary media path); tiering for direct/presigned
            uploads and retroactive re-tiering of existing objects are in
            progress (TG-2837). None = provider default.
        object_count:
          type: integer
          title: Object Count
          description: Number of objects in the bucket
        total_size_bytes:
          type: integer
          title: Total Size Bytes
          description: Total size of all objects in the bucket in bytes
        created_at:
          anyOf:
            - type: string
              format: date-time
            - type: 'null'
          title: Created At
          description: When the bucket was created
        updated_at:
          anyOf:
            - type: string
              format: date-time
            - type: 'null'
          title: Updated At
          description: Last modification time of bucket metadata
        last_upload_at:
          anyOf:
            - type: string
              format: date-time
            - type: 'null'
          title: Last Upload At
          description: When the last object was uploaded to this bucket
        stats_updated_at:
          anyOf:
            - type: string
              format: date-time
            - type: 'null'
          title: Stats Updated At
          description: When bucket stats were last successfully recalculated
        status:
          $ref: '#/components/schemas/TaskStatusEnum'
          description: >-
            Bucket lifecycle status (ACTIVE, ARCHIVED, SUSPENDED, IN_PROGRESS
            for deleting)
          default: ACTIVE
        is_locked:
          type: boolean
          title: Is Locked
          description: Whether the bucket is locked (read-only)
          default: false
        batch_stats:
          anyOf:
            - $ref: '#/components/schemas/BatchStatistics'
            - type: 'null'
          description: >-
            Batch statistics for this bucket (calculated asynchronously, stored
            in DB)
        storage_stats:
          anyOf:
            - $ref: '#/components/schemas/StorageStatistics'
            - type: 'null'
          description: >-
            Storage statistics for this bucket (calculated asynchronously,
            stored in DB)
        source_adapter:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Source Adapter
          description: Source adapter configuration for inbound webhook-driven ingestion
      type: object
      required:
        - bucket_name
        - object_count
        - total_size_bytes
      title: BucketResponse
      description: Response model for bucket operations.
    PaginationResponse:
      properties:
        total:
          anyOf:
            - type: integer
            - type: 'null'
          title: Total
        page:
          anyOf:
            - type: integer
            - type: 'null'
          title: Page
        page_size:
          anyOf:
            - type: integer
            - type: 'null'
          title: Page Size
        total_pages:
          anyOf:
            - type: integer
            - type: 'null'
          title: Total Pages
        next_page:
          anyOf:
            - type: string
            - type: 'null'
          title: Next Page
        previous_page:
          anyOf:
            - type: string
            - type: 'null'
          title: Previous Page
        next_cursor:
          anyOf:
            - type: string
            - type: 'null'
          title: Next Cursor
      type: object
      title: PaginationResponse
      description: |-
        PaginationResponse.

        Cursor-based pagination response:
        - Use next_cursor for navigation
        - Total count fields only populated when include_total=true
    BucketListStats:
      properties:
        total_objects:
          type: integer
          title: Total Objects
          description: Total number of objects across all buckets
          default: 0
        total_size_bytes:
          type: integer
          title: Total Size Bytes
          description: Total size in bytes across all buckets
          default: 0
        avg_objects_per_bucket:
          type: number
          title: Avg Objects Per Bucket
          description: Average number of objects per bucket
          default: 0
        avg_size_per_bucket:
          type: number
          title: Avg Size Per Bucket
          description: Average size in bytes per bucket
          default: 0
      type: object
      title: BucketListStats
      description: Aggregate statistics for a list of buckets.
    ErrorDetail:
      properties:
        message:
          type: string
          title: Message
          description: Human-readable error message
        type:
          type: string
          title: Type
          description: Stable error type identifier (machine-readable)
        code:
          anyOf:
            - type: string
            - type: 'null'
          title: Code
          description: >-
            Fine-grained error code for programmatic handling (e.g.,
            namespace_name_taken, feature_extractor_not_found). Present only
            when consumers may need to branch on a specific error condition.
        details:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Details
          description: >-
            Optional structured details to help debugging (validation errors,
            IDs, etc.)
      type: object
      required:
        - message
        - type
      title: ErrorDetail
      description: Error detail model.
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
    BucketSchema-Output:
      properties:
        properties:
          additionalProperties:
            $ref: '#/components/schemas/BucketSchemaField-Output'
          type: object
          title: Properties
          description: >-
            REQUIRED. Map of field names to their type definitions. Each field
            must have a 'type' from the supported types: metadata types (string,
            number, integer, float, boolean, object, array, date, datetime) or
            file/blob types (text, image, audio, video, pdf, excel). NOTE: Use
            Mixpeek types, NOT JSON Schema types — e.g. use 'string' not
            'keyword', 'text' for text blobs, 'image' for image blobs. Example:
            {"title": {"type": "string"}, "photo": {"type": "image"}}
      additionalProperties: true
      type: object
      required:
        - properties
      title: BucketSchema
      description: >-
        Schema definition for bucket objects.


        IMPORTANT: The bucket schema defines what fields your bucket objects
        will have.

        This schema is REQUIRED if you want to:

        1. Create collections that use input_mappings to process your bucket
        data

        2. Validate object structure before ingestion

        3. Enable type-safe data pipelines


        The schema defines the custom fields that will be used in:

        - Blob properties (e.g., "content", "thumbnail", "transcript")

        - Object metadata structure

        - Blob data structures


        Example workflow:

        1. Create bucket WITH schema defining your data structure

        2. Upload objects that conform to that schema

        3. Create collections that map schema fields to feature extractors


        Without a bucket_schema, collections cannot use input_mappings.
    UniqueKeyConfig:
      properties:
        fields:
          items:
            type: string
          type: array
          minItems: 1
          title: Fields
          description: >-
            Field name(s) from bucket schema to use as unique constraint.
            REQUIRED - must provide at least one field name. 


            Single field example: ['video_id'] - Enforces uniqueness on video_id
            alone. Compound example: ['sensor_id', 'timestamp'] - Uniqueness
            requires BOTH fields to match. 


            All specified fields must:
              - Exist in the bucket schema
              - Be scalar types (string, integer, float, uuid - NOT objects or arrays)
              - Have non-null, non-empty values in all uploaded objects
              - Be 255 characters or less per string field value


            Field order doesn't matter (sorted internally for consistency).
            ['timestamp', 'sensor_id'] is equivalent to ['sensor_id',
            'timestamp'].
          examples:
            - - video_id
            - - product_sku
            - - user_id
              - session_id
            - - sensor_id
              - timestamp
            - - product_id
              - size
              - color
        default_policy:
          anyOf:
            - type: string
              enum:
                - insert
                - update
                - upsert
            - type: 'null'
          title: Default Policy
          description: >-
            Default insertion policy for this bucket when not specified per
            request. OPTIONAL - if omitted, you must provide ?policy= parameter
            on each upload request.


            Policies:
              - 'insert': Create new object only. Fail with 409 Conflict if unique key already exists.
                          Use when: You want to prevent accidental overwrites (safest option).

              - 'update': Update existing object only. Fail with 404 Not Found if unique key doesn't exist.
                          Use when: You only want to update existing records, never create new ones.

              - 'upsert': Update if exists, create if not (idempotent operation).
                          Use when: You want idempotent ingestion (re-running is safe).

            Policy Resolution:
              1. Request-level ?policy= parameter takes precedence (if provided)
              2. Falls back to this default_policy (if configured)
              3. Returns 400 Bad Request if neither is specified

            Recommendation: Omit default_policy if you want explicit control on
            each upload. Set default_policy='upsert' for idempotent pipelines.
          examples:
            - insert
            - update
            - upsert
      type: object
      required:
        - fields
      title: UniqueKeyConfig
      description: >-
        Configuration for bucket unique key enforcement.


        Enables automatic uniqueness enforcement on one or more fields from the
        bucket schema.

        Supports both single field and compound (multi-field) uniqueness
        constraints.


        When configured, the bucket will maintain a lookup table mapping unique
        key values

        to document IDs, enabling efficient upsert operations and preventing
        duplicates.


        **Impact on Collection Trigger/Re-processing:**

        When a collection is triggered (POST /collections/{id}/trigger), the
        unique_key

        determines whether documents are overwritten or duplicated:

        - WITH unique_key: Documents get deterministic IDs → re-triggering
        OVERWRITES existing docs

        - WITHOUT unique_key: Documents get random IDs → re-triggering CREATES
        DUPLICATES


        For idempotent pipelines where re-triggering is safe, configure a
        unique_key.


        **Relationship to Extractor position_fields:**

        The `unique_key` (bucket-level) and `position_fields` (extractor-level)
        work together

        to generate deterministic document IDs:


        - `unique_key`: Identifies unique SOURCE OBJECTS in the bucket (e.g.,
        video_id)

        - `position_fields`: Identifies unique OUTPUT DOCUMENTS from a single
        object (e.g., start_time, end_time)


        Document ID Formula:
            document_id = hash(source_object_key + extractor_id + collection_id + position_field_values)

        Example - Processing a 60-second video with 10-second segments:
            - Bucket unique_key: ["video_id"] → Identifies the source video
            - Extractor position_fields: ["start_time", "end_time"] → Identifies each segment
            - Result: 6 unique document IDs (one per segment), all deterministic

        Without position_fields, all segments would get the SAME document_id and
        overwrite each other.

        Without unique_key, reprocessing would create DUPLICATE documents
        instead of updating.


        Requirements:
            - fields: REQUIRED - Array of field names from bucket schema to use as unique constraint
            - default_policy: OPTIONAL - Bucket-level default insertion policy (can be overridden per request)
            - All specified fields must exist in the bucket schema
            - All fields must be scalar types (string, integer, float, uuid)
            - Field values cannot be null or empty in uploaded objects
            - Cannot be changed after bucket creation (v1 limitation)

        Use Cases:
            - Single field uniqueness: ["video_id"], ["product_sku"], ["user_email"]
            - Compound uniqueness: ["sensor_id", "timestamp"], ["product_id", "size", "color"]
            - With default policy: Enables idempotent ingestion without per-request policy
            - Without default: Requires explicit policy on each upload (safer, more intentional)

        Insertion Policies:
            - 'insert': Fail with 409 Conflict if key exists (prevents accidental overwrites)
            - 'update': Fail with 404 Not Found if key doesn't exist (updates only)
            - 'upsert': Update if exists, insert if not (idempotent ingestion)

        Policy Resolution (when uploading objects):
            1. Use request-level ?policy= parameter if provided (highest priority)
            2. Fall back to bucket-level default_policy if configured
            3. Return 400 Bad Request if neither is specified (prevents accidental operations)

        Examples:
            Single field with upsert default (idempotent video ingestion):
                {
                    "fields": ["video_id"],
                    "default_policy": "upsert"
                }

            Single field with insert default (prevent duplicate products):
                {
                    "fields": ["product_sku"],
                    "default_policy": "insert"
                }

            Compound fields with upsert (time-series sensor data):
                {
                    "fields": ["sensor_id", "timestamp"],
                    "default_policy": "upsert"
                }

            Compound fields without default (explicit policy required):
                {
                    "fields": ["user_id", "session_id"]
                }
      examples:
        - default_policy: upsert
          description: Single field, upsert by default (idempotent video ingestion)
          fields:
            - video_id
        - default_policy: insert
          description: Single field, insert by default (prevent duplicate products)
          fields:
            - product_sku
        - description: Single field, no default policy (explicit required per upload)
          fields:
            - user_id
        - default_policy: insert
          description: >-
            Compound fields, insert by default (time-series with duplicate
            prevention)
          fields:
            - sensor_id
            - timestamp
        - default_policy: upsert
          description: Compound fields, upsert by default (product variants, idempotent)
          fields:
            - product_id
            - size
            - color
        - description: Compound fields, no default (explicit policy required)
          fields:
            - user_id
            - session_id
        - default_policy: update
          description: Three-field compound key, update only (profile updates)
          fields:
            - tenant_id
            - user_id
            - profile_version
    StorageClass:
      type: string
      enum:
        - standard
        - nearline
        - coldline
        - archive
      title: StorageClass
      description: >-
        Provider-agnostic object-storage tier for a bucket (BACKE-2299).


        The mixpeek API stays provider-agnostic; the object-storage factory maps
        each

        value to the underlying provider's equivalent on write (and, where
        supported,

        retroactively via lifecycle/rewrite):


        | mixpeek   | GCS       | S3 / MinIO     |

        |-----------|-----------|----------------|

        | standard  | STANDARD  | STANDARD       |

        | nearline  | NEARLINE  | STANDARD_IA    |

        | coldline  | COLDLINE  | GLACIER_IR     |

        | archive   | ARCHIVE   | GLACIER        |


        Set per-bucket so hot retriever-source buckets stay `standard` while
        large

        write-once/read-occasionally media buckets (footage, creatives) opt into
        a

        cheaper tier (e.g. ~50% on Nearline for the TS iconik ~13TB footage
        sync).
    TaskStatusEnum:
      type: string
      enum:
        - PENDING
        - QUEUED
        - IN_PROGRESS
        - PROCESSING
        - COMPLETED
        - COMPLETED_WITH_ERRORS
        - FAILED
        - CANCELED
        - INTERRUPTED
        - UNKNOWN
        - SKIPPED
        - DRAFT
        - ACTIVE
        - ARCHIVED
        - SUSPENDED
      title: TaskStatusEnum
      description: |-
        Enumeration of task statuses for tracking asynchronous operations.

        Task statuses indicate the current state of asynchronous operations like
        batch processing, object ingestion, clustering, and taxonomy execution.

        Status Categories:
            Operation Statuses: Track progress of async operations
            Lifecycle Statuses: Track entity state (buckets, collections, namespaces)

        Values:
            PENDING: Task is queued but has not started processing yet
            IN_PROGRESS: Task is currently being executed
            PROCESSING: Task is actively processing data (similar to IN_PROGRESS)
            COMPLETED: Task finished successfully with no errors
            COMPLETED_WITH_ERRORS: Task finished but some items failed (partial success)
            FAILED: Task encountered an error and could not complete
            CANCELED: Task was manually canceled by a user or system
            UNKNOWN: Task status could not be determined
            SKIPPED: Task was intentionally skipped
            DRAFT: Task is in draft state and not yet submitted

            ACTIVE: Entity is active and operational (for buckets, collections, etc.)
            ARCHIVED: Entity has been archived
            SUSPENDED: Entity has been temporarily suspended

        Terminal Statuses:
            COMPLETED, COMPLETED_WITH_ERRORS, FAILED, CANCELED are terminal statuses.
            Once a task reaches these states, it will not transition to another state.

        Partial Success Handling:
            COMPLETED_WITH_ERRORS indicates that the operation completed but some
            documents/items failed. The task result includes:
            - List of successful items
            - List of failed items with error details
            - Success rate percentage
            This allows clients to handle partial success scenarios appropriately.

        Polling Guidance:
            - Poll tasks in PENDING, QUEUED, IN_PROGRESS, or PROCESSING states
            - Stop polling when task reaches COMPLETED, COMPLETED_WITH_ERRORS, FAILED, or CANCELED
            - Use exponential backoff (1s → 30s) when polling
    BatchStatistics:
      properties:
        total:
          type: integer
          title: Total
          description: Total number of batches in this bucket
          default: 0
        active:
          type: integer
          title: Active
          description: >-
            Number of batches that are not completed (DRAFT, PENDING,
            IN_PROGRESS, PROCESSING)
          default: 0
        completed:
          type: integer
          title: Completed
          description: Number of completed batches
          default: 0
        failed:
          type: integer
          title: Failed
          description: Number of failed batches
          default: 0
      type: object
      title: BatchStatistics
      description: Statistics about batches in a bucket.
    StorageStatistics:
      properties:
        total_size_bytes:
          type: integer
          title: Total Size Bytes
          description: Total size of all objects/blobs in the bucket in bytes
          default: 0
        avg_size_bytes:
          type: integer
          title: Avg Size Bytes
          description: Average object size in bytes
          default: 0
        max_size_bytes:
          type: integer
          title: Max Size Bytes
          description: Size of the largest object in bytes
          default: 0
        min_size_bytes:
          type: integer
          title: Min Size Bytes
          description: Size of the smallest object in bytes
          default: 0
      type: object
      title: StorageStatistics
      description: Statistics about object storage in a bucket.
    BucketSchemaField-Output:
      properties:
        type:
          $ref: '#/components/schemas/BucketSchemaFieldType'
        default:
          anyOf:
            - {}
            - type: 'null'
          title: Default
        items:
          anyOf:
            - $ref: '#/components/schemas/BucketSchemaField-Output'
            - type: 'null'
        properties:
          anyOf:
            - additionalProperties:
                $ref: '#/components/schemas/BucketSchemaField-Output'
              type: object
            - type: 'null'
          title: Properties
        examples:
          anyOf:
            - items: {}
              type: array
            - type: 'null'
          title: Examples
          description: >-
            OPTIONAL. List of example values for this field. Used by Apps to
            show example inputs in the UI. Provide multiple diverse examples
            when possible.
        description:
          anyOf:
            - type: string
            - type: 'null'
          title: Description
        enum:
          anyOf:
            - items: {}
              type: array
            - type: 'null'
          title: Enum
        required:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Required
          default: false
      additionalProperties: true
      type: object
      required:
        - type
      title: BucketSchemaField
      description: Schema field definition for bucket objects.
    BucketSchemaFieldType:
      type: string
      enum:
        - string
        - number
        - integer
        - float
        - boolean
        - object
        - array
        - date
        - datetime
        - text
        - image
        - audio
        - video
        - pdf
        - excel
      title: BucketSchemaFieldType
      description: >-
        Supported data types for bucket schema fields.


        Types fall into two categories:


        1. **Metadata Types** (JSON types):
           - Stored as object metadata
           - Standard JSON-compatible types
           - Not processed by extractors (unless explicitly mapped)
           - Examples: string, number, boolean, date

        2. **File Types** (blobs):
           - Stored as files/blobs
           - Processed by extractors
           - Require file content (URL or base64)
           - Examples: text, image, video, pdf

        **GIF Special Handling**:
            GIF files can be declared as either IMAGE or VIDEO type:

            - As IMAGE: GIF is embedded as a single static image (first frame)
            - As VIDEO: GIF is decomposed frame-by-frame with embeddings per frame

            The multimodal extractor detects GIFs via MIME type (image/gif) and routes
            them based on your schema declaration. Use VIDEO for animated GIFs where
            frame-level search is needed, IMAGE for static/thumbnail use cases.

        NOTE: For retriever input schemas that need to accept document
        references

        (e.g., "find similar documents"), use RetrieverInputSchemaFieldType
        instead,

        which includes all bucket types plus document_reference.

````