QualitySimilar

Sports Highlights Pipeline

Automatically identify highlight-worthy moments in sports broadcasts using multimodal analysis, visual action detection, audio spike recognition (crowd noise, commentator excitement), and on-screen graphic parsing. Returns timestamped event manifests ready for clip assembly.

video

audio

text

Production

0.8K runs

Run in Builder

Why This Matters

Reduces highlight turnaround from 4-8 hours of manual editing to 15-20 minutes of automated processing. Captures 95%+ of key moments vs ~65% with human editors working under time pressure.

from mixpeek import Mixpeek
import requests

client = Mixpeek(api_key="YOUR_API_KEY")
API_URL = "https://api.mixpeek.com"
headers = {"Authorization": "Bearer YOUR_API_KEY", "X-Namespace": "your-namespace"}

# 1. Create a bucket for game footage
bucket = requests.post(f"{API_URL}/v1/buckets", headers=headers, json={
    "bucket_name": "sports-footage",
    "schema": {
        "properties": {
            "video_url": {"type": "url", "required": True},
            "sport": {"type": "text"},
            "game_id": {"type": "text"},
            "broadcast_date": {"type": "text"}
        }
    }
}).json()

# 2. Create a collection with multi-modal extraction
collection = requests.post(f"{API_URL}/v1/collections", headers=headers, json={
    "collection_name": "game-scenes",
    "source": {"type": "bucket", "bucket_id": bucket["bucket_id"]},
    "feature_extractor": {
        "feature_extractor_name": "video_extractor",
        "version": "v1",
        "input_mappings": {"video_url": "video_url"},
        "parameters": {
            "scene_detection_threshold": 0.3,
            "keyframe_interval": 2,
            "max_scenes": 500,
            "extract_audio": True,
            "transcription_model": "whisper-large-v3"
        },
        "field_passthrough": [
            {"source_path": "sport"},
            {"source_path": "game_id"},
            {"source_path": "broadcast_date"}
        ]
    }
}).json()

# 3. Ingest a match recording
requests.post(f"{API_URL}/v1/buckets/{bucket['bucket_id']}/objects", headers=headers, json={
    "key_prefix": "/soccer/champions-league",
    "metadata": {
        "sport": "soccer",
        "game_id": "cl-2026-final",
        "broadcast_date": "2026-05-25"
    },
    "blobs": [{"property": "video_url", "type": "video", "url": "s3://my-bucket/games/cl-final.mp4"}]
})

# 4. Create a sport-specific highlight taxonomy
taxonomy = requests.post(f"{API_URL}/v1/taxonomies", headers=headers, json={
    "taxonomy_name": "soccer_events",
    "taxonomy_type": "flat",
    "nodes": [
        {"node_id": "goal", "collection_id": "col_goal_examples", "description": "Ball crosses goal line"},
        {"node_id": "save", "collection_id": "col_save_examples", "description": "Goalkeeper saves shot"},
        {"node_id": "foul", "collection_id": "col_foul_examples", "description": "Referee stops play for foul"},
        {"node_id": "celebration", "collection_id": "col_celebration_examples", "description": "Player celebration"},
        {"node_id": "var_review", "collection_id": "col_var_examples", "description": "VAR check sequence"}
    ]
}).json()

# 5. Create a retriever that ranks moments by highlight score
retriever = requests.post(f"{API_URL}/v1/retrievers", headers=headers, json={
    "retriever_name": "soccer-highlights",
    "collection_ids": [collection["collection_id"]],
    "input_schema": {
        "properties": {
            "game_id": {"type": "text"},
            "min_confidence": {"type": "number"},
            "event_types": {"type": "array"}
        }
    },
    "stages": [
        {
            "stage_name": "feature_search",
            "version": "v1",
            "parameters": {
                "queries": [
                    {
                        "feature_address": "mixpeek://video_extractor@v1/scene_embedding",
                        "input_mapping": {"text": "highlight action moment goal celebration"},
                        "weight": 0.6
                    },
                    {
                        "feature_address": "mixpeek://audio_extractor@v1/transcript_embedding",
                        "input_mapping": {"text": "goal score amazing incredible crowd roars"},
                        "weight": 0.4
                    }
                ],
                "fusion_method": "rrf",
                "limit": 100
            }
        },
        {
            "stage_name": "taxonomy_enrich",
            "version": "v1",
            "parameters": {"taxonomy_id": taxonomy["taxonomy_id"]}
        },
        {
            "stage_name": "attribute_filter",
            "version": "v1",
            "parameters": {
                "filters": [
                    {"field": "metadata.game_id", "operator": "eq", "value": "{{inputs.game_id}}"},
                    {"field": "taxonomy_enrichment.confidence", "operator": "gte", "value": "{{inputs.min_confidence}}"}
                ]
            }
        },
        {"stage_name": "limit", "version": "v1", "parameters": {"limit": 50}}
    ]
}).json()

# 6. Retrieve highlights for a specific match
highlights = requests.post(
    f"{API_URL}/v1/retrievers/{retriever['retriever_id']}/execute",
    headers=headers,
    json={
        "inputs": {"game_id": "cl-2026-final", "min_confidence": 0.7, "event_types": ["goal", "save", "celebration"]},
        "limit": 20
    }
).json()

# 7. Build highlight manifest
print("\n🎬 Top Highlights:")
for i, doc in enumerate(highlights["documents"], 1):
    start = doc["metadata"].get("start_time", 0)
    end = doc["metadata"].get("end_time", 0)
    event = doc.get("taxonomy_enrichment", {}).get("node_id", "highlight")
    score = doc["score"]
    print(f"  {i}. [{event.upper()}] {start:.1f}s - {end:.1f}s (score: {score:.3f})")
    print(f"     Keyframe: {doc['metadata'].get('keyframe_url', '')}")

Feature Extractors

Video Embedding

Generate vector embeddings for video content

610K runs

Audio Event Detection

Detect specific audio events like gunshots, glass breaking, alarms, etc.

0K runs

Scene Classification

Categorize images based on scene type (indoor, outdoor, etc.)

610K runs

Action Recognition

Identify and classify human actions in video

412K runs

Retriever Stages

feature search

Search and filter documents by vector similarity using feature embeddings

filter

taxonomy enrich

Classify documents against taxonomy nodes via vector similarity

apply

attribute filter

Filter documents by metadata attribute values using boolean logic

filter

sort attribute

Sort documents by any metadata field value

sort

limit

Truncate results to a maximum count with optional offset for pagination

reduce

Documentation

Sports Media Solution Sports Highlights Use Case

Sports Highlights Pipeline

Why This Matters

Feature Extractors

Retriever Stages

Documentation

Use Cases Using This Recipe