Mixpeek Logo
    QualitySimilar

    Sports Highlights Pipeline

    Automatically identify highlight-worthy moments in sports broadcasts using multimodal analysis — visual action detection, audio spike recognition (crowd noise, commentator excitement), and on-screen graphic parsing. Returns timestamped event manifests ready for clip assembly.

    video
    audio
    text
    Production

    Why This Matters

    Reduces highlight turnaround from 4-8 hours of manual editing to 15-20 minutes of automated processing. Captures 95%+ of key moments vs ~65% with human editors working under time pressure.

    from mixpeek import Mixpeek
    import requests
    client = Mixpeek(api_key="YOUR_API_KEY")
    API_URL = "https://api.mixpeek.com"
    headers = {"Authorization": "Bearer YOUR_API_KEY", "X-Namespace": "your-namespace"}
    # 1. Create a bucket for game footage
    bucket = requests.post(f"{API_URL}/v1/buckets", headers=headers, json={
    "bucket_name": "sports-footage",
    "schema": {
    "properties": {
    "video_url": {"type": "url", "required": True},
    "sport": {"type": "text"},
    "game_id": {"type": "text"},
    "broadcast_date": {"type": "text"}
    }
    }
    }).json()
    # 2. Create a collection with multi-modal extraction
    collection = requests.post(f"{API_URL}/v1/collections", headers=headers, json={
    "collection_name": "game-scenes",
    "source": {"type": "bucket", "bucket_id": bucket["bucket_id"]},
    "feature_extractor": {
    "feature_extractor_name": "video_extractor",
    "version": "v1",
    "input_mappings": {"video_url": "video_url"},
    "parameters": {
    "scene_detection_threshold": 0.3,
    "keyframe_interval": 2,
    "max_scenes": 500,
    "extract_audio": True,
    "transcription_model": "whisper-large-v3"
    },
    "field_passthrough": [
    {"source_path": "sport"},
    {"source_path": "game_id"},
    {"source_path": "broadcast_date"}
    ]
    }
    }).json()
    # 3. Ingest a match recording
    requests.post(f"{API_URL}/v1/buckets/{bucket['bucket_id']}/objects", headers=headers, json={
    "key_prefix": "/soccer/champions-league",
    "metadata": {
    "sport": "soccer",
    "game_id": "cl-2026-final",
    "broadcast_date": "2026-05-25"
    },
    "blobs": [{"property": "video_url", "type": "video", "url": "s3://my-bucket/games/cl-final.mp4"}]
    })
    # 4. Create a sport-specific highlight taxonomy
    taxonomy = requests.post(f"{API_URL}/v1/taxonomies", headers=headers, json={
    "taxonomy_name": "soccer_events",
    "taxonomy_type": "flat",
    "nodes": [
    {"node_id": "goal", "collection_id": "col_goal_examples", "description": "Ball crosses goal line"},
    {"node_id": "save", "collection_id": "col_save_examples", "description": "Goalkeeper saves shot"},
    {"node_id": "foul", "collection_id": "col_foul_examples", "description": "Referee stops play for foul"},
    {"node_id": "celebration", "collection_id": "col_celebration_examples", "description": "Player celebration"},
    {"node_id": "var_review", "collection_id": "col_var_examples", "description": "VAR check sequence"}
    ]
    }).json()
    # 5. Create a retriever that ranks moments by highlight score
    retriever = requests.post(f"{API_URL}/v1/retrievers", headers=headers, json={
    "retriever_name": "soccer-highlights",
    "collection_ids": [collection["collection_id"]],
    "input_schema": {
    "properties": {
    "game_id": {"type": "text"},
    "min_confidence": {"type": "number"},
    "event_types": {"type": "array"}
    }
    },
    "stages": [
    {
    "stage_name": "feature_search",
    "version": "v1",
    "parameters": {
    "queries": [
    {
    "feature_address": "mixpeek://video_extractor@v1/scene_embedding",
    "input_mapping": {"text": "highlight action moment goal celebration"},
    "weight": 0.6
    },
    {
    "feature_address": "mixpeek://audio_extractor@v1/transcript_embedding",
    "input_mapping": {"text": "goal score amazing incredible crowd roars"},
    "weight": 0.4
    }
    ],
    "fusion_method": "rrf",
    "limit": 100
    }
    },
    {
    "stage_name": "taxonomy_enrich",
    "version": "v1",
    "parameters": {"taxonomy_id": taxonomy["taxonomy_id"]}
    },
    {
    "stage_name": "attribute_filter",
    "version": "v1",
    "parameters": {
    "filters": [
    {"field": "metadata.game_id", "operator": "eq", "value": "{{inputs.game_id}}"},
    {"field": "taxonomy_enrichment.confidence", "operator": "gte", "value": "{{inputs.min_confidence}}"}
    ]
    }
    },
    {"stage_name": "limit", "version": "v1", "parameters": {"limit": 50}}
    ]
    }).json()
    # 6. Retrieve highlights for a specific match
    highlights = requests.post(
    f"{API_URL}/v1/retrievers/{retriever['retriever_id']}/execute",
    headers=headers,
    json={
    "inputs": {"game_id": "cl-2026-final", "min_confidence": 0.7, "event_types": ["goal", "save", "celebration"]},
    "limit": 20
    }
    ).json()
    # 7. Build highlight manifest
    print("\n🎬 Top Highlights:")
    for i, doc in enumerate(highlights["documents"], 1):
    start = doc["metadata"].get("start_time", 0)
    end = doc["metadata"].get("end_time", 0)
    event = doc.get("taxonomy_enrichment", {}).get("node_id", "highlight")
    score = doc["score"]
    print(f" {i}. [{event.upper()}] {start:.1f}s - {end:.1f}s (score: {score:.3f})")
    print(f" Keyframe: {doc['metadata'].get('keyframe_url', '')}")

    Feature Extractors

    Video Embedding

    Generate vector embeddings for video content

    610K runs

    Audio Event Detection

    Detect specific audio events like gunshots, glass breaking, alarms, etc.

    0K runs

    Scene Classification

    Categorize images based on scene type (indoor, outdoor, etc.)

    610K runs

    Action Recognition

    Identify and classify human actions in video

    412K runs

    Retriever Stages

    feature search

    Search and filter documents by vector similarity using feature embeddings

    filter

    taxonomy enrich

    Classify documents against taxonomy nodes via vector similarity

    apply

    attribute filter

    Filter documents by metadata attribute values using boolean logic

    filter

    sort attribute

    Sort documents by any metadata field value

    sort

    Use Cases Using This Recipe

    Advanced
    7 min

    Sports Highlights

    Auto-generate highlight reels from full-length sports footage

    24x faster

    Highlight generation time

    Who It's For

    Sports broadcasters, media companies, and content teams processing 100+ hours of live footage weekly