Python SDK
Official Python client for the IC GPU Service API. Supports sync and async, automatic pagination, retries, and typed errors.
Prerequisites
- Python 3.9 or later
- An IC GPU Service account with an API key (
sk-ic-...)
Getting Started
Install the SDK
pip install ic-gpuSet your API key
Set the IC_GPU_API_KEY environment variable, or pass it directly to the client constructor.
export IC_GPU_API_KEY="sk-ic-your-api-key-here"Make your first API call
from ic_gpu import Client
client = Client(api_key="sk-ic-your-api-key")
# List all GPU instances
for instance in client.instances.list():
print(f"{instance['name']} {instance['status']} {instance['tier']}")
client.close()Client Initialisation
The client can be used as a context manager, which automatically closes the HTTP connection when done.
from ic_gpu import Client
# Using environment variable (recommended)
client = Client()
# Or pass the key directly
client = Client(api_key="sk-ic-your-api-key")
# With custom settings
client = Client(
api_key="sk-ic-your-api-key",
base_url="https://api.gpu.local",
timeout=30.0,
max_retries=3,
)
# As a context manager (recommended)
with Client() as client:
instances = client.instances.list().to_list()
print(f"Found {len(instances)} instances")GPU Instances
from ic_gpu import Client
with Client() as client:
# Create a GPU instance
instance = client.instances.create(
name="my-workspace",
tier="timesliced", # "timesliced" (no memory isolation), "dedicated", or "mig"
inference_engine="vllm", # optional: "vllm", "ollama", "sglang"
tags={"project": "research", "team": "ml"},
)
print(f"Created: {instance['id']} Status: {instance['status']}")
# Get instance details
details = client.instances.get(instance["id"])
print(f"SSH: ssh gpuuser@{details['ssh_host']} -p {details['ssh_port']}")
# Stop and start
client.instances.stop(instance["id"])
client.instances.start(instance["id"])
# View logs
logs = client.instances.logs(instance["id"])
print(logs)
# Terminate when done
client.instances.delete(instance["id"])LLM Inference
The LLM resource provides an OpenAI-compatible interface for chat completions, text completions, and embeddings.
from ic_gpu import Client
with Client() as client:
# Chat completion
response = client.llm.chat(
model="llama-3-8b",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain GPU computing in one paragraph."},
],
temperature=0.7,
max_tokens=256,
)
print(response["choices"][0]["message"]["content"])
# Text completion
response = client.llm.complete(
model="llama-3-8b",
prompt="The benefits of GPU computing are:",
max_tokens=256,
)
print(response["choices"][0]["text"])
# Embeddings
response = client.llm.embed(
model="bge-large",
input="GPU computing enables parallel processing",
)
print(f"Embedding dimension: {len(response['data'][0]['embedding'])}")
# List available models
models = client.llm.models()
for m in models["data"]:
print(f" {m['id']}")Model Deployments
from ic_gpu import Client
with Client() as client:
# Deploy a model from HuggingFace
deployment = client.models.deploy(
model_name="my-llama",
huggingface_repo="meta-llama/Meta-Llama-3-8B-Instruct",
engine="vllm",
gpu_count=1,
min_replicas=1,
max_replicas=3,
)
print(f"Deploying: {deployment['model_name']} Status: {deployment['status']}")
# Check deployment status
status = client.models.get("my-llama")
print(f"Status: {status['status']}")
# Scale replicas
client.models.update_scaling("my-llama", min_replicas=2, max_replicas=5)
# List all deployments
for model in client.models.list():
print(f" {model['model_name']} {model['status']}")
# Stop a deployment
client.models.stop("my-llama")Virtual Machines
from ic_gpu import Client
with Client() as client:
# Create a VM
vm = client.vms.create(
name="dev-server",
template="tpl-ubuntu-24",
cpu_cores=4,
memory_gb=16,
)
print(f"Created VM: {vm['id']} Status: {vm['status']}")
# Get VM details
details = client.vms.get(vm["id"])
# Stop, start, reboot
client.vms.stop(vm["id"])
client.vms.start(vm["id"])
client.vms.reboot(vm["id"])
# List all VMs
for v in client.vms.list():
print(f" {v['name']} {v['status']}")
# Delete (must be stopped first)
client.vms.stop(vm["id"])
client.vms.delete(vm["id"])Kubernetes Clusters
from ic_gpu import Client
with Client() as client:
# Create a cluster
cluster = client.clusters.create(name="ml-cluster")
print(f"Created: {cluster['id']} Status: {cluster['status']}")
# Download kubeconfig
kubeconfig_yaml = client.clusters.kubeconfig(cluster["id"])
with open("kubeconfig.yaml", "w") as f:
f.write(kubeconfig_yaml)
print("Kubeconfig saved to kubeconfig.yaml")
# List clusters
for c in client.clusters.list():
print(f" {c['name']} {c['status']}")
# Delete
client.clusters.delete(cluster["id"])API Keys and SSH Keys
from ic_gpu import Client
with Client() as client:
# Create an API key with scoped permissions
key = client.api_keys.create(
name="ci-pipeline",
scopes=["instances", "models"],
permissions=["read", "write"],
)
print(f"Key: {key['prefix']}...")
# List API keys
for k in client.api_keys.list():
print(f" {k['name']} scopes={k['scopes']}")
# Upload an SSH public key
ssh_key = client.ssh_keys.create(
name="laptop",
public_key="ssh-ed25519 AAAA... user@laptop",
)
# Generate a new SSH key pair (private key returned once)
generated = client.ssh_keys.generate(name="auto-key")
print(f"Private key:\n{generated['private_key']}")
# List SSH keys
for sk in client.ssh_keys.list():
print(f" {sk['name']} {sk['fingerprint']}")Webhooks
from ic_gpu import Client
with Client() as client:
# Create a webhook
webhook = client.webhooks.create(
url="https://example.com/hooks/gpu",
events=["instance.created", "instance.terminated", "balance.low"],
)
print(f"Webhook: {webhook['id']}")
print(f"Signing secret: {webhook['secret']}")
# Send a test delivery
client.webhooks.test(webhook["id"])
# List available event types
event_types = client.webhooks.events()
for evt in event_types["events"]:
print(f" {evt}")
# View delivery history
deliveries = client.webhooks.deliveries(webhook["id"])
for d in deliveries["deliveries"]:
print(f" {d['status']} {d['created_at']}")
# Delete
client.webhooks.delete(webhook["id"])Tags and Spending Alerts
from ic_gpu import Client
with Client() as client:
# Tag a resource (AWS-style key/value tags)
client.tags.create(
resource_type="instance",
resource_id="inst-abc123",
tags={"environment": "production", "team": "ml-infra"},
)
# List tags on a resource
tags = client.tags.list(
resource_type="instance",
resource_id="inst-abc123",
)
for t in tags["tags"]:
print(f" {t['key']}={t['value']}")
# Remove specific tags
client.tags.delete(
resource_type="instance",
resource_id="inst-abc123",
keys=["team"],
)
# Create a spending alert
alert = client.alerts.create(
threshold=10000,
notification_type="dashboard",
)
print(f"Alert: {alert['id']} triggers at {alert['threshold']} tokens")
# List alerts
for a in client.alerts.list():
print(f" {a['id']} threshold={a['threshold']}")Token Balance and Usage
from ic_gpu import Client
with Client() as client:
# Check balance
balance = client.tokens.balance()
print(f"Balance: {balance['balance']} tokens")
# View available packages
packages = client.tokens.packages()
for pkg in packages["packages"]:
print(f" {pkg['name']}: {pkg['tokens']} tokens for {pkg['price']} {pkg['currency']}")
# Purchase tokens
client.tokens.purchase(package_id="pkg-starter")
# View usage history (last 30 days)
daily = client.tokens.daily(days=30)
for day in daily["daily"]:
print(f" {day['date']}: {day['tokens_used']} tokens")
# Usage breakdown by API key
by_key = client.tokens.by_key()
for entry in by_key["keys"]:
print(f" {entry['key_name']}: {entry['tokens_used']}")Async Client
For async applications, use AsyncClient. All resource methods become awaitable.
import asyncio
from ic_gpu import AsyncClient
async def main():
async with AsyncClient() as client:
# List instances
instances = await client.instances.list()
for inst in instances:
print(f"{inst['name']} {inst['status']}")
# Create an instance
instance = await client.instances.create(
name="async-workspace",
tier="timesliced",
)
print(f"Created: {instance['id']}")
# Chat completion
response = await client.llm.chat(
model="llama-3-8b",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response["choices"][0]["message"]["content"])
asyncio.run(main())Error Handling
The SDK raises typed exceptions for API errors. All errors extend ICGPUError.
from ic_gpu import Client
from ic_gpu._errors import (
ICGPUError,
ValidationError,
NotFoundError,
RateLimitError,
AccessDeniedError,
ConflictError,
InternalError,
ServiceUnavailableError,
)
with Client() as client:
try:
instance = client.instances.get("nonexistent-id")
except NotFoundError as e:
print(f"Not found: {e.message}")
print(f" Status: {e.status}") # 404
print(f" Code: {e.code}") # "ResourceNotFoundException"
print(f" Hint: {e.hint}")
print(f" Request ID: {e.request_id}")
except ValidationError as e:
print(f"Bad request: {e.message}") # 400
except RateLimitError as e:
print(f"Rate limited: {e.message}") # 429
except AccessDeniedError as e:
print(f"Forbidden: {e.message}") # 403
except ConflictError as e:
print(f"Conflict: {e.message}") # 409
except ICGPUError as e:
# Catch any other API error
print(f"API error {e.status}: {e.message}")Error Classes
| Class | HTTP Status | When |
|---|---|---|
| ValidationError | 400 | Invalid request parameters |
| AccessDeniedError | 403 | Insufficient permissions or feature disabled |
| NotFoundError | 404 | Resource does not exist |
| ConflictError | 409 | Resource already exists or state conflict |
| RateLimitError | 429 | Too many requests |
| InternalError | 500 | Server error |
| ServiceUnavailableError | 502/503 | Upstream service down |
Pagination
List methods return a PageIterator that lazily fetches pages as you iterate. No need to manage cursors manually.
from ic_gpu import Client
with Client() as client:
# Lazy iteration — fetches pages automatically
for instance in client.instances.list():
print(instance["name"])
# Collect all results at once
all_instances = client.instances.list().to_list()
print(f"Total: {len(all_instances)}")
# Control page size
for instance in client.instances.list(max_results=10):
print(instance["name"])
# Filter with query parameters
for instance in client.instances.list(status="running", tier="dedicated"):
print(instance["name"])Advanced Configuration
from ic_gpu import Client
# Automatic retries with exponential backoff
# Retries on: 429, 500, 502, 503
# Backoff: 0.5s, 1s, 2s (factor=0.5, doubles each attempt)
client = Client(
max_retries=3, # default
timeout=30.0, # seconds, default
)
# Environment variables
# IC_GPU_API_KEY — API key (sk-ic-...)
# These are read automatically by Client() if no args are passed
# The client uses httpx under the hood
# Connection pooling is handled automaticallyComplete Example
A full workflow: create an instance, deploy a model, run inference, and clean up.
from ic_gpu import Client
from ic_gpu._errors import ICGPUError
import time
def main():
with Client() as client:
# 1. Check token balance
balance = client.tokens.balance()
print(f"Token balance: {balance['balance']}")
# 2. Create an SSH key
ssh_key = client.ssh_keys.generate(name="demo-key")
print(f"SSH key created: {ssh_key['id']}")
# 3. Create a GPU instance
instance = client.instances.create(
name="demo-workspace",
tier="timesliced",
tags={"purpose": "demo"},
)
print(f"Instance created: {instance['id']}")
# 4. Wait for it to be ready
while True:
status = client.instances.get(instance["id"])
if status["status"] == "running":
break
print(f" Status: {status['status']}...")
time.sleep(5)
print(f"Instance running at {status['ssh_host']}:{status['ssh_port']}")
# 5. Run a chat completion
response = client.llm.chat(
model="llama-3-8b",
messages=[{"role": "user", "content": "What is CUDA?"}],
max_tokens=128,
)
print(f"\nLLM response:\n{response['choices'][0]['message']['content']}")
# 6. Set up a spending alert
client.alerts.create(threshold=5000, notification_type="dashboard")
# 7. Clean up
client.instances.delete(instance["id"])
client.ssh_keys.delete(ssh_key["id"])
print("\nCleaned up. Done!")
if __name__ == "__main__":
try:
main()
except ICGPUError as e:
print(f"API error: {e.message} (status={e.status}, code={e.code})")
except Exception as e:
print(f"Error: {e}")