Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Python SDK

The Lattice Python SDK provides an async client for interacting with the REST API from notebooks, scripts, and autonomous agents.

Installation

pip install lattice-sdk

Quick Start

import asyncio
from lattice_sdk import LatticeClient, AllocationSpec

async def main():
    async with LatticeClient("lattice-api.example.com", 8080) as client:
        # Submit an allocation
        alloc = await client.submit(AllocationSpec(
            entrypoint="python train.py",
            nodes=4,
            walltime="24h",
            tenant="ml-team",
        ))
        print(f"Submitted: {alloc.id}")

        # Check status
        status = await client.status(alloc.id)
        print(f"State: {status.state}")

        # Wait for completion
        async for event in client.watch(alloc.id):
            print(f"State changed: {event.state}")
            if event.state in ("Completed", "Failed", "Cancelled"):
                break

asyncio.run(main())

Core Methods

Submission

# Basic submission
alloc = await client.submit(AllocationSpec(
    entrypoint="torchrun train.py",
    nodes=64,
    walltime="72h",
    uenv="prgenv-gnu/24.11:v1",
    constraints={"gpu_type": "GH200"},
))

# Submit DAG
dag = await client.submit_dag("workflow.yaml")

Status & Listing

# Get allocation
alloc = await client.status(alloc_id)

# List allocations
allocs = await client.list_allocations(state="running")

# List nodes
nodes = await client.list_nodes(state="ready")

Monitoring

# Stream logs
async for line in client.stream_logs(alloc_id):
    print(line.message)

# Query metrics
metrics = await client.query_metrics(alloc_id)
print(f"GPU util: {metrics.gpu_utilization}%")

# Stream metrics
async for snapshot in client.stream_metrics(alloc_id):
    print(f"GPU: {snapshot.gpu_utilization}%")

# Watch state changes
async for event in client.watch(alloc_id):
    print(f"State: {event.state}")

Management

# Cancel
await client.cancel(alloc_id)

# Checkpoint
await client.checkpoint(alloc_id)

Tenants & vClusters

tenants = await client.list_tenants()
vclusters = await client.list_vclusters()

Error Handling

from lattice_sdk import LatticeError, LatticeNotFoundError, LatticeAuthError

try:
    alloc = await client.status("nonexistent-id")
except LatticeNotFoundError:
    print("Allocation not found")
except LatticeAuthError:
    print("Authentication failed")
except LatticeError as e:
    print(f"API error ({e.status_code}): {e}")

Authentication

# Token-based (OIDC)
client = LatticeClient("api.example.com", 8080, token="eyJ...")

# Headers
client = LatticeClient("api.example.com", 8080, headers={"X-Tenant": "my-team"})