Python SDK

Official Python client for the IC GPU Service API. Supports sync and async, automatic pagination, retries, and typed errors.

Prerequisites

Python 3.9 or later
An IC GPU Service account with an API key (sk-ic-...)

Getting Started

Install the SDK

pip install ic-gpu

Set your API key

Set the IC_GPU_API_KEY environment variable, or pass it directly to the client constructor.

Terminal

export IC_GPU_API_KEY="sk-ic-your-api-key-here"

Make your first API call

list_instances.py

from ic_gpu import Client

client = Client(api_key="sk-ic-your-api-key")

# List all GPU instances
for instance in client.instances.list():
    print(f"{instance['name']}  {instance['status']}  {instance['tier']}")

client.close()

Client Initialisation

The client can be used as a context manager, which automatically closes the HTTP connection when done.

client_setup.py

from ic_gpu import Client

# Using environment variable (recommended)
client = Client()

# Or pass the key directly
client = Client(api_key="sk-ic-your-api-key")

# With custom settings
client = Client(
    api_key="sk-ic-your-api-key",
    base_url="https://api.gpu.local",
    timeout=30.0,
    max_retries=3,
)

# As a context manager (recommended)
with Client() as client:
    instances = client.instances.list().to_list()
    print(f"Found {len(instances)} instances")

GPU Instances

instances.py

from ic_gpu import Client

with Client() as client:
    # Create a GPU instance
    instance = client.instances.create(
        name="my-workspace",
        tier="timesliced",           # "timesliced" (no memory isolation), "dedicated", or "mig"
        inference_engine="vllm",     # optional: "vllm", "ollama", "sglang"
        tags={"project": "research", "team": "ml"},
    )
    print(f"Created: {instance['id']}  Status: {instance['status']}")

    # Get instance details
    details = client.instances.get(instance["id"])
    print(f"SSH: ssh gpuuser@{details['ssh_host']} -p {details['ssh_port']}")

    # Stop and start
    client.instances.stop(instance["id"])
    client.instances.start(instance["id"])

    # View logs
    logs = client.instances.logs(instance["id"])
    print(logs)

    # Terminate when done
    client.instances.delete(instance["id"])

LLM Inference

The LLM resource provides an OpenAI-compatible interface for chat completions, text completions, and embeddings.

llm_chat.py

from ic_gpu import Client

with Client() as client:
    # Chat completion
    response = client.llm.chat(
        model="llama-3-8b",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Explain GPU computing in one paragraph."},
        ],
        temperature=0.7,
        max_tokens=256,
    )
    print(response["choices"][0]["message"]["content"])

    # Text completion
    response = client.llm.complete(
        model="llama-3-8b",
        prompt="The benefits of GPU computing are:",
        max_tokens=256,
    )
    print(response["choices"][0]["text"])

    # Embeddings
    response = client.llm.embed(
        model="bge-large",
        input="GPU computing enables parallel processing",
    )
    print(f"Embedding dimension: {len(response['data'][0]['embedding'])}")

    # List available models
    models = client.llm.models()
    for m in models["data"]:
        print(f"  {m['id']}")

Model Deployments

models.py

from ic_gpu import Client

with Client() as client:
    # Deploy a model from HuggingFace
    deployment = client.models.deploy(
        model_name="my-llama",
        huggingface_repo="meta-llama/Meta-Llama-3-8B-Instruct",
        engine="vllm",
        gpu_count=1,
        min_replicas=1,
        max_replicas=3,
    )
    print(f"Deploying: {deployment['model_name']}  Status: {deployment['status']}")

    # Check deployment status
    status = client.models.get("my-llama")
    print(f"Status: {status['status']}")

    # Scale replicas
    client.models.update_scaling("my-llama", min_replicas=2, max_replicas=5)

    # List all deployments
    for model in client.models.list():
        print(f"  {model['model_name']}  {model['status']}")

    # Stop a deployment
    client.models.stop("my-llama")

Virtual Machines

vms.py

from ic_gpu import Client

with Client() as client:
    # Create a VM
    vm = client.vms.create(
        name="dev-server",
        template="tpl-ubuntu-24",
        cpu_cores=4,
        memory_gb=16,
    )
    print(f"Created VM: {vm['id']}  Status: {vm['status']}")

    # Get VM details
    details = client.vms.get(vm["id"])

    # Stop, start, reboot
    client.vms.stop(vm["id"])
    client.vms.start(vm["id"])
    client.vms.reboot(vm["id"])

    # List all VMs
    for v in client.vms.list():
        print(f"  {v['name']}  {v['status']}")

    # Delete (must be stopped first)
    client.vms.stop(vm["id"])
    client.vms.delete(vm["id"])

Kubernetes Clusters

clusters.py

from ic_gpu import Client

with Client() as client:
    # Create a cluster
    cluster = client.clusters.create(name="ml-cluster")
    print(f"Created: {cluster['id']}  Status: {cluster['status']}")

    # Download kubeconfig
    kubeconfig_yaml = client.clusters.kubeconfig(cluster["id"])
    with open("kubeconfig.yaml", "w") as f:
        f.write(kubeconfig_yaml)
    print("Kubeconfig saved to kubeconfig.yaml")

    # List clusters
    for c in client.clusters.list():
        print(f"  {c['name']}  {c['status']}")

    # Delete
    client.clusters.delete(cluster["id"])

API Keys and SSH Keys

keys.py

from ic_gpu import Client

with Client() as client:
    # Create an API key with scoped permissions
    key = client.api_keys.create(
        name="ci-pipeline",
        scopes=["instances", "models"],
        permissions=["read", "write"],
    )
    print(f"Key: {key['prefix']}...")

    # List API keys
    for k in client.api_keys.list():
        print(f"  {k['name']}  scopes={k['scopes']}")

    # Upload an SSH public key
    ssh_key = client.ssh_keys.create(
        name="laptop",
        public_key="ssh-ed25519 AAAA... user@laptop",
    )

    # Generate a new SSH key pair (private key returned once)
    generated = client.ssh_keys.generate(name="auto-key")
    print(f"Private key:\n{generated['private_key']}")

    # List SSH keys
    for sk in client.ssh_keys.list():
        print(f"  {sk['name']}  {sk['fingerprint']}")

Webhooks

webhooks.py

from ic_gpu import Client

with Client() as client:
    # Create a webhook
    webhook = client.webhooks.create(
        url="https://example.com/hooks/gpu",
        events=["instance.created", "instance.terminated", "balance.low"],
    )
    print(f"Webhook: {webhook['id']}")
    print(f"Signing secret: {webhook['secret']}")

    # Send a test delivery
    client.webhooks.test(webhook["id"])

    # List available event types
    event_types = client.webhooks.events()
    for evt in event_types["events"]:
        print(f"  {evt}")

    # View delivery history
    deliveries = client.webhooks.deliveries(webhook["id"])
    for d in deliveries["deliveries"]:
        print(f"  {d['status']}  {d['created_at']}")

    # Delete
    client.webhooks.delete(webhook["id"])

Tags and Spending Alerts

tags_alerts.py

from ic_gpu import Client

with Client() as client:
    # Tag a resource (AWS-style key/value tags)
    client.tags.create(
        resource_type="instance",
        resource_id="inst-abc123",
        tags={"environment": "production", "team": "ml-infra"},
    )

    # List tags on a resource
    tags = client.tags.list(
        resource_type="instance",
        resource_id="inst-abc123",
    )
    for t in tags["tags"]:
        print(f"  {t['key']}={t['value']}")

    # Remove specific tags
    client.tags.delete(
        resource_type="instance",
        resource_id="inst-abc123",
        keys=["team"],
    )

    # Create a spending alert
    alert = client.alerts.create(
        threshold=10000,
        notification_type="dashboard",
    )
    print(f"Alert: {alert['id']}  triggers at {alert['threshold']} tokens")

    # List alerts
    for a in client.alerts.list():
        print(f"  {a['id']}  threshold={a['threshold']}")

Token Balance and Usage

tokens.py

from ic_gpu import Client

with Client() as client:
    # Check balance
    balance = client.tokens.balance()
    print(f"Balance: {balance['balance']} tokens")

    # View available packages
    packages = client.tokens.packages()
    for pkg in packages["packages"]:
        print(f"  {pkg['name']}: {pkg['tokens']} tokens for {pkg['price']} {pkg['currency']}")

    # Purchase tokens
    client.tokens.purchase(package_id="pkg-starter")

    # View usage history (last 30 days)
    daily = client.tokens.daily(days=30)
    for day in daily["daily"]:
        print(f"  {day['date']}: {day['tokens_used']} tokens")

    # Usage breakdown by API key
    by_key = client.tokens.by_key()
    for entry in by_key["keys"]:
        print(f"  {entry['key_name']}: {entry['tokens_used']}")

Async Client

For async applications, use AsyncClient. All resource methods become awaitable.

async_example.py

import asyncio
from ic_gpu import AsyncClient

async def main():
    async with AsyncClient() as client:
        # List instances
        instances = await client.instances.list()
        for inst in instances:
            print(f"{inst['name']}  {inst['status']}")

        # Create an instance
        instance = await client.instances.create(
            name="async-workspace",
            tier="timesliced",
        )
        print(f"Created: {instance['id']}")

        # Chat completion
        response = await client.llm.chat(
            model="llama-3-8b",
            messages=[{"role": "user", "content": "Hello!"}],
        )
        print(response["choices"][0]["message"]["content"])

asyncio.run(main())

Error Handling

The SDK raises typed exceptions for API errors. All errors extend ICGPUError.

error_handling.py

from ic_gpu import Client
from ic_gpu._errors import (
    ICGPUError,
    ValidationError,
    NotFoundError,
    RateLimitError,
    AccessDeniedError,
    ConflictError,
    InternalError,
    ServiceUnavailableError,
)

with Client() as client:
    try:
        instance = client.instances.get("nonexistent-id")
    except NotFoundError as e:
        print(f"Not found: {e.message}")
        print(f"  Status: {e.status}")       # 404
        print(f"  Code: {e.code}")           # "ResourceNotFoundException"
        print(f"  Hint: {e.hint}")
        print(f"  Request ID: {e.request_id}")
    except ValidationError as e:
        print(f"Bad request: {e.message}")   # 400
    except RateLimitError as e:
        print(f"Rate limited: {e.message}")  # 429
    except AccessDeniedError as e:
        print(f"Forbidden: {e.message}")     # 403
    except ConflictError as e:
        print(f"Conflict: {e.message}")      # 409
    except ICGPUError as e:
        # Catch any other API error
        print(f"API error {e.status}: {e.message}")

Error Classes

Class	HTTP Status	When
ValidationError	400	Invalid request parameters
AccessDeniedError	403	Insufficient permissions or feature disabled
NotFoundError	404	Resource does not exist
ConflictError	409	Resource already exists or state conflict
RateLimitError	429	Too many requests
InternalError	500	Server error
ServiceUnavailableError	502/503	Upstream service down

Pagination

List methods return a PageIterator that lazily fetches pages as you iterate. No need to manage cursors manually.

pagination.py

from ic_gpu import Client

with Client() as client:
    # Lazy iteration — fetches pages automatically
    for instance in client.instances.list():
        print(instance["name"])

    # Collect all results at once
    all_instances = client.instances.list().to_list()
    print(f"Total: {len(all_instances)}")

    # Control page size
    for instance in client.instances.list(max_results=10):
        print(instance["name"])

    # Filter with query parameters
    for instance in client.instances.list(status="running", tier="dedicated"):
        print(instance["name"])

Advanced Configuration

advanced.py

from ic_gpu import Client

# Automatic retries with exponential backoff
# Retries on: 429, 500, 502, 503
# Backoff: 0.5s, 1s, 2s (factor=0.5, doubles each attempt)
client = Client(
    max_retries=3,    # default
    timeout=30.0,     # seconds, default
)

# Environment variables
# IC_GPU_API_KEY — API key (sk-ic-...)
# These are read automatically by Client() if no args are passed

# The client uses httpx under the hood
# Connection pooling is handled automatically

Complete Example

A full workflow: create an instance, deploy a model, run inference, and clean up.

full_example.py

from ic_gpu import Client
from ic_gpu._errors import ICGPUError
import time

def main():
    with Client() as client:
        # 1. Check token balance
        balance = client.tokens.balance()
        print(f"Token balance: {balance['balance']}")

        # 2. Create an SSH key
        ssh_key = client.ssh_keys.generate(name="demo-key")
        print(f"SSH key created: {ssh_key['id']}")

        # 3. Create a GPU instance
        instance = client.instances.create(
            name="demo-workspace",
            tier="timesliced",
            tags={"purpose": "demo"},
        )
        print(f"Instance created: {instance['id']}")

        # 4. Wait for it to be ready
        while True:
            status = client.instances.get(instance["id"])
            if status["status"] == "running":
                break
            print(f"  Status: {status['status']}...")
            time.sleep(5)
        print(f"Instance running at {status['ssh_host']}:{status['ssh_port']}")

        # 5. Run a chat completion
        response = client.llm.chat(
            model="llama-3-8b",
            messages=[{"role": "user", "content": "What is CUDA?"}],
            max_tokens=128,
        )
        print(f"\nLLM response:\n{response['choices'][0]['message']['content']}")

        # 6. Set up a spending alert
        client.alerts.create(threshold=5000, notification_type="dashboard")

        # 7. Clean up
        client.instances.delete(instance["id"])
        client.ssh_keys.delete(ssh_key["id"])
        print("\nCleaned up. Done!")

if __name__ == "__main__":
    try:
        main()
    except ICGPUError as e:
        print(f"API error: {e.message} (status={e.status}, code={e.code})")
    except Exception as e:
        print(f"Error: {e}")