IC GPU

Python SDK

Official Python client for the IC GPU Service API. Supports sync and async, automatic pagination, retries, and typed errors.

Prerequisites

  • Python 3.9 or later
  • An IC GPU Service account with an API key (sk-ic-...)

Getting Started

1

Install the SDK

pip install ic-gpu
2

Set your API key

Set the IC_GPU_API_KEY environment variable, or pass it directly to the client constructor.

Terminal
export IC_GPU_API_KEY="sk-ic-your-api-key-here"
3

Make your first API call

list_instances.py
from ic_gpu import Client

client = Client(api_key="sk-ic-your-api-key")

# List all GPU instances
for instance in client.instances.list():
    print(f"{instance['name']}  {instance['status']}  {instance['tier']}")

client.close()

Client Initialisation

The client can be used as a context manager, which automatically closes the HTTP connection when done.

client_setup.py
from ic_gpu import Client

# Using environment variable (recommended)
client = Client()

# Or pass the key directly
client = Client(api_key="sk-ic-your-api-key")

# With custom settings
client = Client(
    api_key="sk-ic-your-api-key",
    base_url="https://api.gpu.local",
    timeout=30.0,
    max_retries=3,
)

# As a context manager (recommended)
with Client() as client:
    instances = client.instances.list().to_list()
    print(f"Found {len(instances)} instances")

GPU Instances

instances.py
from ic_gpu import Client

with Client() as client:
    # Create a GPU instance
    instance = client.instances.create(
        name="my-workspace",
        tier="timesliced",           # "timesliced" (no memory isolation), "dedicated", or "mig"
        inference_engine="vllm",     # optional: "vllm", "ollama", "sglang"
        tags={"project": "research", "team": "ml"},
    )
    print(f"Created: {instance['id']}  Status: {instance['status']}")

    # Get instance details
    details = client.instances.get(instance["id"])
    print(f"SSH: ssh gpuuser@{details['ssh_host']} -p {details['ssh_port']}")

    # Stop and start
    client.instances.stop(instance["id"])
    client.instances.start(instance["id"])

    # View logs
    logs = client.instances.logs(instance["id"])
    print(logs)

    # Terminate when done
    client.instances.delete(instance["id"])

LLM Inference

The LLM resource provides an OpenAI-compatible interface for chat completions, text completions, and embeddings.

llm_chat.py
from ic_gpu import Client

with Client() as client:
    # Chat completion
    response = client.llm.chat(
        model="llama-3-8b",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Explain GPU computing in one paragraph."},
        ],
        temperature=0.7,
        max_tokens=256,
    )
    print(response["choices"][0]["message"]["content"])

    # Text completion
    response = client.llm.complete(
        model="llama-3-8b",
        prompt="The benefits of GPU computing are:",
        max_tokens=256,
    )
    print(response["choices"][0]["text"])

    # Embeddings
    response = client.llm.embed(
        model="bge-large",
        input="GPU computing enables parallel processing",
    )
    print(f"Embedding dimension: {len(response['data'][0]['embedding'])}")

    # List available models
    models = client.llm.models()
    for m in models["data"]:
        print(f"  {m['id']}")

Model Deployments

models.py
from ic_gpu import Client

with Client() as client:
    # Deploy a model from HuggingFace
    deployment = client.models.deploy(
        model_name="my-llama",
        huggingface_repo="meta-llama/Meta-Llama-3-8B-Instruct",
        engine="vllm",
        gpu_count=1,
        min_replicas=1,
        max_replicas=3,
    )
    print(f"Deploying: {deployment['model_name']}  Status: {deployment['status']}")

    # Check deployment status
    status = client.models.get("my-llama")
    print(f"Status: {status['status']}")

    # Scale replicas
    client.models.update_scaling("my-llama", min_replicas=2, max_replicas=5)

    # List all deployments
    for model in client.models.list():
        print(f"  {model['model_name']}  {model['status']}")

    # Stop a deployment
    client.models.stop("my-llama")

Virtual Machines

vms.py
from ic_gpu import Client

with Client() as client:
    # Create a VM
    vm = client.vms.create(
        name="dev-server",
        template="tpl-ubuntu-24",
        cpu_cores=4,
        memory_gb=16,
    )
    print(f"Created VM: {vm['id']}  Status: {vm['status']}")

    # Get VM details
    details = client.vms.get(vm["id"])

    # Stop, start, reboot
    client.vms.stop(vm["id"])
    client.vms.start(vm["id"])
    client.vms.reboot(vm["id"])

    # List all VMs
    for v in client.vms.list():
        print(f"  {v['name']}  {v['status']}")

    # Delete (must be stopped first)
    client.vms.stop(vm["id"])
    client.vms.delete(vm["id"])

Kubernetes Clusters

clusters.py
from ic_gpu import Client

with Client() as client:
    # Create a cluster
    cluster = client.clusters.create(name="ml-cluster")
    print(f"Created: {cluster['id']}  Status: {cluster['status']}")

    # Download kubeconfig
    kubeconfig_yaml = client.clusters.kubeconfig(cluster["id"])
    with open("kubeconfig.yaml", "w") as f:
        f.write(kubeconfig_yaml)
    print("Kubeconfig saved to kubeconfig.yaml")

    # List clusters
    for c in client.clusters.list():
        print(f"  {c['name']}  {c['status']}")

    # Delete
    client.clusters.delete(cluster["id"])

API Keys and SSH Keys

keys.py
from ic_gpu import Client

with Client() as client:
    # Create an API key with scoped permissions
    key = client.api_keys.create(
        name="ci-pipeline",
        scopes=["instances", "models"],
        permissions=["read", "write"],
    )
    print(f"Key: {key['prefix']}...")

    # List API keys
    for k in client.api_keys.list():
        print(f"  {k['name']}  scopes={k['scopes']}")

    # Upload an SSH public key
    ssh_key = client.ssh_keys.create(
        name="laptop",
        public_key="ssh-ed25519 AAAA... user@laptop",
    )

    # Generate a new SSH key pair (private key returned once)
    generated = client.ssh_keys.generate(name="auto-key")
    print(f"Private key:\n{generated['private_key']}")

    # List SSH keys
    for sk in client.ssh_keys.list():
        print(f"  {sk['name']}  {sk['fingerprint']}")

Webhooks

webhooks.py
from ic_gpu import Client

with Client() as client:
    # Create a webhook
    webhook = client.webhooks.create(
        url="https://example.com/hooks/gpu",
        events=["instance.created", "instance.terminated", "balance.low"],
    )
    print(f"Webhook: {webhook['id']}")
    print(f"Signing secret: {webhook['secret']}")

    # Send a test delivery
    client.webhooks.test(webhook["id"])

    # List available event types
    event_types = client.webhooks.events()
    for evt in event_types["events"]:
        print(f"  {evt}")

    # View delivery history
    deliveries = client.webhooks.deliveries(webhook["id"])
    for d in deliveries["deliveries"]:
        print(f"  {d['status']}  {d['created_at']}")

    # Delete
    client.webhooks.delete(webhook["id"])

Tags and Spending Alerts

tags_alerts.py
from ic_gpu import Client

with Client() as client:
    # Tag a resource (AWS-style key/value tags)
    client.tags.create(
        resource_type="instance",
        resource_id="inst-abc123",
        tags={"environment": "production", "team": "ml-infra"},
    )

    # List tags on a resource
    tags = client.tags.list(
        resource_type="instance",
        resource_id="inst-abc123",
    )
    for t in tags["tags"]:
        print(f"  {t['key']}={t['value']}")

    # Remove specific tags
    client.tags.delete(
        resource_type="instance",
        resource_id="inst-abc123",
        keys=["team"],
    )

    # Create a spending alert
    alert = client.alerts.create(
        threshold=10000,
        notification_type="dashboard",
    )
    print(f"Alert: {alert['id']}  triggers at {alert['threshold']} tokens")

    # List alerts
    for a in client.alerts.list():
        print(f"  {a['id']}  threshold={a['threshold']}")

Token Balance and Usage

tokens.py
from ic_gpu import Client

with Client() as client:
    # Check balance
    balance = client.tokens.balance()
    print(f"Balance: {balance['balance']} tokens")

    # View available packages
    packages = client.tokens.packages()
    for pkg in packages["packages"]:
        print(f"  {pkg['name']}: {pkg['tokens']} tokens for {pkg['price']} {pkg['currency']}")

    # Purchase tokens
    client.tokens.purchase(package_id="pkg-starter")

    # View usage history (last 30 days)
    daily = client.tokens.daily(days=30)
    for day in daily["daily"]:
        print(f"  {day['date']}: {day['tokens_used']} tokens")

    # Usage breakdown by API key
    by_key = client.tokens.by_key()
    for entry in by_key["keys"]:
        print(f"  {entry['key_name']}: {entry['tokens_used']}")

Async Client

For async applications, use AsyncClient. All resource methods become awaitable.

async_example.py
import asyncio
from ic_gpu import AsyncClient

async def main():
    async with AsyncClient() as client:
        # List instances
        instances = await client.instances.list()
        for inst in instances:
            print(f"{inst['name']}  {inst['status']}")

        # Create an instance
        instance = await client.instances.create(
            name="async-workspace",
            tier="timesliced",
        )
        print(f"Created: {instance['id']}")

        # Chat completion
        response = await client.llm.chat(
            model="llama-3-8b",
            messages=[{"role": "user", "content": "Hello!"}],
        )
        print(response["choices"][0]["message"]["content"])

asyncio.run(main())

Error Handling

The SDK raises typed exceptions for API errors. All errors extend ICGPUError.

error_handling.py
from ic_gpu import Client
from ic_gpu._errors import (
    ICGPUError,
    ValidationError,
    NotFoundError,
    RateLimitError,
    AccessDeniedError,
    ConflictError,
    InternalError,
    ServiceUnavailableError,
)

with Client() as client:
    try:
        instance = client.instances.get("nonexistent-id")
    except NotFoundError as e:
        print(f"Not found: {e.message}")
        print(f"  Status: {e.status}")       # 404
        print(f"  Code: {e.code}")           # "ResourceNotFoundException"
        print(f"  Hint: {e.hint}")
        print(f"  Request ID: {e.request_id}")
    except ValidationError as e:
        print(f"Bad request: {e.message}")   # 400
    except RateLimitError as e:
        print(f"Rate limited: {e.message}")  # 429
    except AccessDeniedError as e:
        print(f"Forbidden: {e.message}")     # 403
    except ConflictError as e:
        print(f"Conflict: {e.message}")      # 409
    except ICGPUError as e:
        # Catch any other API error
        print(f"API error {e.status}: {e.message}")

Error Classes

ClassHTTP StatusWhen
ValidationError400Invalid request parameters
AccessDeniedError403Insufficient permissions or feature disabled
NotFoundError404Resource does not exist
ConflictError409Resource already exists or state conflict
RateLimitError429Too many requests
InternalError500Server error
ServiceUnavailableError502/503Upstream service down

Pagination

List methods return a PageIterator that lazily fetches pages as you iterate. No need to manage cursors manually.

pagination.py
from ic_gpu import Client

with Client() as client:
    # Lazy iteration — fetches pages automatically
    for instance in client.instances.list():
        print(instance["name"])

    # Collect all results at once
    all_instances = client.instances.list().to_list()
    print(f"Total: {len(all_instances)}")

    # Control page size
    for instance in client.instances.list(max_results=10):
        print(instance["name"])

    # Filter with query parameters
    for instance in client.instances.list(status="running", tier="dedicated"):
        print(instance["name"])

Advanced Configuration

advanced.py
from ic_gpu import Client

# Automatic retries with exponential backoff
# Retries on: 429, 500, 502, 503
# Backoff: 0.5s, 1s, 2s (factor=0.5, doubles each attempt)
client = Client(
    max_retries=3,    # default
    timeout=30.0,     # seconds, default
)

# Environment variables
# IC_GPU_API_KEY — API key (sk-ic-...)
# These are read automatically by Client() if no args are passed

# The client uses httpx under the hood
# Connection pooling is handled automatically

Complete Example

A full workflow: create an instance, deploy a model, run inference, and clean up.

full_example.py
from ic_gpu import Client
from ic_gpu._errors import ICGPUError
import time

def main():
    with Client() as client:
        # 1. Check token balance
        balance = client.tokens.balance()
        print(f"Token balance: {balance['balance']}")

        # 2. Create an SSH key
        ssh_key = client.ssh_keys.generate(name="demo-key")
        print(f"SSH key created: {ssh_key['id']}")

        # 3. Create a GPU instance
        instance = client.instances.create(
            name="demo-workspace",
            tier="timesliced",
            tags={"purpose": "demo"},
        )
        print(f"Instance created: {instance['id']}")

        # 4. Wait for it to be ready
        while True:
            status = client.instances.get(instance["id"])
            if status["status"] == "running":
                break
            print(f"  Status: {status['status']}...")
            time.sleep(5)
        print(f"Instance running at {status['ssh_host']}:{status['ssh_port']}")

        # 5. Run a chat completion
        response = client.llm.chat(
            model="llama-3-8b",
            messages=[{"role": "user", "content": "What is CUDA?"}],
            max_tokens=128,
        )
        print(f"\nLLM response:\n{response['choices'][0]['message']['content']}")

        # 6. Set up a spending alert
        client.alerts.create(threshold=5000, notification_type="dashboard")

        # 7. Clean up
        client.instances.delete(instance["id"])
        client.ssh_keys.delete(ssh_key["id"])
        print("\nCleaned up. Done!")

if __name__ == "__main__":
    try:
        main()
    except ICGPUError as e:
        print(f"API error: {e.message} (status={e.status}, code={e.code})")
    except Exception as e:
        print(f"Error: {e}")