> ## Documentation Index
> Fetch the complete documentation index at: https://docs.mixpeek.com/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# Get benchmark

> Get benchmark status and results by ID


## OpenAPI

````yaml get /v1/retrievers/benchmarks/{benchmark_id}
openapi: 3.1.0
info:
  title: Mixpeek API
  description: >-
    This is the Mixpeek API, providing access to various endpoints for data
    processing and retrieval.
  termsOfService: https://mixpeek.com/terms
  contact:
    name: Mixpeek Support
    url: https://mixpeek.com/contact
    email: info@mixpeek.com
  version: '0.82'
servers:
  - url: https://api.mixpeek.com
    description: Production
security: []
paths:
  /v1/retrievers/benchmarks/{benchmark_id}:
    get:
      tags:
        - Retriever Benchmarks
      summary: Get benchmark
      description: Get benchmark status and results by ID
      operationId: get_benchmark_v1_retrievers_benchmarks__benchmark_id__get
      parameters:
        - name: benchmark_id
          in: path
          required: true
          schema:
            type: string
            title: Benchmark Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BenchmarkResponse'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '403':
          description: Forbidden
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '404':
          description: Not Found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
components:
  schemas:
    BenchmarkResponse:
      properties:
        benchmark_id:
          type: string
          title: Benchmark Id
          description: Unique benchmark identifier.
        benchmark_name:
          type: string
          title: Benchmark Name
          description: Human-readable name.
        baseline_retriever_id:
          type: string
          title: Baseline Retriever Id
          description: Baseline retriever ID.
        candidate_retriever_ids:
          items:
            type: string
          type: array
          title: Candidate Retriever Ids
          description: Candidate retriever IDs.
        session_filter:
          anyOf:
            - $ref: '#/components/schemas/SessionFilter-Output'
            - type: 'null'
          description: Filter criteria used.
        session_count:
          type: integer
          title: Session Count
          description: Number of sessions in benchmark.
        status:
          $ref: '#/components/schemas/BenchmarkStatus'
          description: Current benchmark status.
        results:
          anyOf:
            - items:
                $ref: '#/components/schemas/BenchmarkResult'
              type: array
            - type: 'null'
          title: Results
          description: Results per pipeline (available when completed).
        comparison:
          anyOf:
            - $ref: '#/components/schemas/BenchmarkComparison'
            - type: 'null'
          description: Statistical comparison (available when completed).
        created_at:
          type: string
          format: date-time
          title: Created At
          description: Creation timestamp.
        started_at:
          anyOf:
            - type: string
              format: date-time
            - type: 'null'
          title: Started At
          description: Execution start time.
        completed_at:
          anyOf:
            - type: string
              format: date-time
            - type: 'null'
          title: Completed At
          description: Completion time.
        error_message:
          anyOf:
            - type: string
            - type: 'null'
          title: Error Message
          description: Error message if failed.
      type: object
      required:
        - benchmark_id
        - benchmark_name
        - baseline_retriever_id
        - candidate_retriever_ids
        - session_count
        - status
        - created_at
      title: BenchmarkResponse
      description: Response containing benchmark details and results.
    ErrorResponse:
      properties:
        success:
          type: boolean
          title: Success
          description: Always false for error responses
          default: false
        status:
          type: integer
          title: Status
          description: HTTP status code for this error
        error:
          $ref: '#/components/schemas/ErrorDetail'
          description: Error details payload
      type: object
      required:
        - status
        - error
      title: ErrorResponse
      description: Error response model.
      examples:
        - error:
            details:
              id: ns_123
              resource: namespace
            message: Namespace not found
            type: NotFoundError
          status: 404
          success: false
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    SessionFilter-Output:
      properties:
        retriever_ids:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          title: Retriever Ids
          description: Filter to sessions from these retrievers.
        taxonomy_node_ids:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          title: Taxonomy Node Ids
          description: Filter to sessions with these taxonomy classifications.
        time_range:
          anyOf:
            - $ref: >-
                #/components/schemas/shared__retrievers__benchmarks__models__TimeRange
            - type: 'null'
          description: Filter to sessions within this time window.
        min_interactions:
          type: integer
          minimum: 1
          title: Min Interactions
          description: Minimum number of user interactions required.
          default: 1
        interaction_types:
          anyOf:
            - items:
                type: string
              type: array
            - type: 'null'
          title: Interaction Types
          description: >-
            Filter to sessions with these interaction types (e.g., ['click',
            'purchase']).
        sample_strategy:
          type: string
          title: Sample Strategy
          description: 'How to sample sessions: ''random'', ''recent'', or ''stratified''.'
          default: random
        interaction_weights:
          $ref: '#/components/schemas/InteractionWeights'
          description: >-
            Custom weights for interaction types when computing relevance
            scores.
      type: object
      title: SessionFilter
      description: Criteria for selecting historical sessions to replay.
    BenchmarkStatus:
      type: string
      enum:
        - pending
        - building_sessions
        - replaying
        - computing_metrics
        - completed
        - failed
      title: BenchmarkStatus
      description: Status of a benchmark run.
    BenchmarkResult:
      properties:
        retriever_id:
          type: string
          title: Retriever Id
          description: ID of the retriever/pipeline tested.
        retriever_name:
          type: string
          title: Retriever Name
          description: Human-readable name of the retriever.
        pipeline_hash:
          type: string
          title: Pipeline Hash
          description: Hash of the pipeline configuration.
        metrics:
          $ref: '#/components/schemas/AlignmentMetrics'
          description: Alignment metrics for this pipeline.
        taxonomy_deltas:
          anyOf:
            - additionalProperties:
                $ref: '#/components/schemas/AlignmentMetrics'
              type: object
            - type: 'null'
          title: Taxonomy Deltas
          description: >-
            Metrics broken down by taxonomy node (for understanding
            category-level performance).
        latency:
          $ref: '#/components/schemas/LatencyMetrics'
          description: Performance timing statistics.
        failed_sessions:
          type: integer
          minimum: 0
          title: Failed Sessions
          description: Number of sessions that failed during replay.
        error_summary:
          additionalProperties:
            type: integer
          type: object
          title: Error Summary
          description: Count of errors by type (error_type -> count).
      type: object
      required:
        - retriever_id
        - retriever_name
        - pipeline_hash
        - metrics
        - latency
        - failed_sessions
      title: BenchmarkResult
      description: Results for a single pipeline in a benchmark run.
    BenchmarkComparison:
      properties:
        baseline_retriever_id:
          type: string
          title: Baseline Retriever Id
          description: ID of the baseline pipeline.
        comparisons:
          items:
            $ref: '#/components/schemas/PipelineComparison'
          type: array
          title: Comparisons
          description: Comparison results for each candidate.
        recommendation:
          anyOf:
            - type: string
            - type: 'null'
          title: Recommendation
          description: System-generated recommendation based on results.
      type: object
      required:
        - baseline_retriever_id
        - comparisons
      title: BenchmarkComparison
      description: Comparison of all candidates against the baseline.
    ErrorDetail:
      properties:
        message:
          type: string
          title: Message
          description: Human-readable error message
        type:
          type: string
          title: Type
          description: Stable error type identifier (machine-readable)
        code:
          anyOf:
            - type: string
            - type: 'null'
          title: Code
          description: >-
            Fine-grained error code for programmatic handling (e.g.,
            namespace_name_taken, feature_extractor_not_found). Present only
            when consumers may need to branch on a specific error condition.
        details:
          anyOf:
            - additionalProperties: true
              type: object
            - type: 'null'
          title: Details
          description: >-
            Optional structured details to help debugging (validation errors,
            IDs, etc.)
      type: object
      required:
        - message
        - type
      title: ErrorDetail
      description: Error detail model.
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
    shared__retrievers__benchmarks__models__TimeRange:
      properties:
        start:
          anyOf:
            - type: string
              format: date-time
            - type: 'null'
          title: Start
          description: Start of time range (inclusive).
        end:
          anyOf:
            - type: string
              format: date-time
            - type: 'null'
          title: End
          description: End of time range (inclusive).
      type: object
      title: TimeRange
      description: Time range filter for session queries.
    InteractionWeights:
      properties:
        weights:
          additionalProperties:
            type: number
          type: object
          title: Weights
          description: Mapping of interaction_type -> weight (higher = more important).
      type: object
      title: InteractionWeights
      description: >-
        Custom weights for different interaction types when computing metrics.


        Higher weights indicate more importance. Purchases typically have higher
        weight

        than clicks because they're a stronger signal of user intent.


        Example: {"click": 1.0, "purchase": 5.0, "add_to_cart": 2.0, "bookmark":
        1.5}
    AlignmentMetrics:
      properties:
        ndcg_at_k:
          additionalProperties:
            type: number
          type: object
          title: Ndcg At K
          description: Normalized Discounted Cumulative Gain at various K values.
        mean_rank_clicked:
          type: number
          title: Mean Rank Clicked
          description: Average position of clicked items in the new ranking.
        mean_rank_purchased:
          anyOf:
            - type: number
            - type: 'null'
          title: Mean Rank Purchased
          description: Average position of purchased items (if any purchases observed).
        recall_at_k:
          additionalProperties:
            type: number
          type: object
          title: Recall At K
          description: Fraction of interacted items found in top K results.
        avg_position_delta:
          type: number
          title: Avg Position Delta
          description: >-
            Average change in position for interacted items (negative =
            promoted).
        items_promoted:
          type: integer
          minimum: 0
          title: Items Promoted
          description: Number of interacted items moved to higher positions.
        items_demoted:
          type: integer
          minimum: 0
          title: Items Demoted
          description: Number of interacted items moved to lower positions.
        sessions_improved:
          type: integer
          minimum: 0
          title: Sessions Improved
          description: Sessions where candidate outperformed baseline.
        sessions_degraded:
          type: integer
          minimum: 0
          title: Sessions Degraded
          description: Sessions where candidate underperformed baseline.
        sessions_neutral:
          type: integer
          minimum: 0
          title: Sessions Neutral
          description: Sessions with no significant difference.
      type: object
      required:
        - ndcg_at_k
        - mean_rank_clicked
        - recall_at_k
        - avg_position_delta
        - items_promoted
        - items_demoted
        - sessions_improved
        - sessions_degraded
        - sessions_neutral
      title: AlignmentMetrics
      description: >-
        Metrics measuring how well a ranking aligns with observed user behavior.


        These metrics compare a candidate pipeline's ranking against ground
        truth

        derived from actual user interactions (clicks, purchases, etc.).
      examples:
        - avg_position_delta: -1.5
          items_demoted: 45
          items_promoted: 120
          mean_rank_clicked: 4.2
          mean_rank_purchased: 2.8
          ndcg_at_k:
            '5': 0.78
            '10': 0.82
            '20': 0.85
          recall_at_k:
            '5': 0.65
            '10': 0.8
            '20': 0.92
          sessions_degraded: 210
          sessions_improved: 580
          sessions_neutral: 210
    LatencyMetrics:
      properties:
        p50_ms:
          type: number
          minimum: 0
          title: P50 Ms
          description: 50th percentile latency in milliseconds.
        p90_ms:
          type: number
          minimum: 0
          title: P90 Ms
          description: 90th percentile latency in milliseconds.
        p99_ms:
          type: number
          minimum: 0
          title: P99 Ms
          description: 99th percentile latency in milliseconds.
        mean_ms:
          type: number
          minimum: 0
          title: Mean Ms
          description: Mean latency in milliseconds.
        stage_latencies:
          additionalProperties:
            type: number
          type: object
          title: Stage Latencies
          description: Per-stage latency breakdown (stage_name -> avg_ms).
      type: object
      required:
        - p50_ms
        - p90_ms
        - p99_ms
        - mean_ms
      title: LatencyMetrics
      description: Performance timing statistics for a pipeline.
    PipelineComparison:
      properties:
        candidate_retriever_id:
          type: string
          title: Candidate Retriever Id
          description: ID of the candidate pipeline.
        ndcg_delta:
          additionalProperties:
            type: number
          type: object
          title: Ndcg Delta
          description: Change in NDCG at each K (positive = candidate better).
        recall_delta:
          additionalProperties:
            type: number
          type: object
          title: Recall Delta
          description: Change in recall at each K (positive = candidate better).
        latency_delta_ms:
          type: number
          title: Latency Delta Ms
          description: Change in mean latency (positive = candidate slower).
        p_value:
          anyOf:
            - type: number
            - type: 'null'
          title: P Value
          description: Statistical significance of the difference (paired t-test).
        confidence_interval:
          anyOf:
            - prefixItems:
                - type: number
                - type: number
              type: array
              maxItems: 2
              minItems: 2
            - type: 'null'
          title: Confidence Interval
          description: 95% confidence interval for NDCG@10 delta.
        taxonomy_wins:
          items:
            type: string
          type: array
          title: Taxonomy Wins
          description: Taxonomy nodes where candidate significantly outperforms.
        taxonomy_losses:
          items:
            type: string
          type: array
          title: Taxonomy Losses
          description: Taxonomy nodes where candidate significantly underperforms.
      type: object
      required:
        - candidate_retriever_id
        - ndcg_delta
        - recall_delta
        - latency_delta_ms
      title: PipelineComparison
      description: Statistical comparison between a candidate and baseline pipeline.

````