Go HTTP Guide

Use the IC GPU Service API from Go with the standard library. No SDK needed — just net/http and encoding/json.

Prerequisites

Go 1.21 or later
An IC GPU Service account with an API key (sk-ic-...)

Tip: If you prefer a CLI tool, the IC CLI is written in Go and available as a pre-built binary.

Getting Started

Create a reusable HTTP client

Build a small helper that adds authentication and JSON handling to every request.

client.go

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"time"
)

// Client wraps net/http with authentication and JSON helpers.
type Client struct {
	BaseURL    string
	APIKey     string
	HTTPClient *http.Client
}

// NewClient creates a client with sensible defaults.
func NewClient(apiKey string) *Client {
	return &Client{
		BaseURL: "https://api.gpu.local",
		APIKey:  apiKey,
		HTTPClient: &http.Client{
			Timeout: 30 * time.Second,
		},
	}
}

// Do sends a JSON request and decodes the response.
func (c *Client) Do(method, path string, body any) (map[string]any, error) {
	var bodyReader io.Reader
	if body != nil {
		b, err := json.Marshal(body)
		if err != nil {
			return nil, fmt.Errorf("marshal body: %w", err)
		}
		bodyReader = bytes.NewReader(b)
	}

	req, err := http.NewRequest(method, c.BaseURL+path, bodyReader)
	if err != nil {
		return nil, err
	}
	req.Header.Set("Authorization", "Bearer "+c.APIKey)
	req.Header.Set("Content-Type", "application/json")

	resp, err := c.HTTPClient.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()

	data, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, err
	}

	if resp.StatusCode >= 400 {
		return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, data)
	}

	if resp.StatusCode == 204 {
		return map[string]any{}, nil
	}

	var result map[string]any
	if err := json.Unmarshal(data, &result); err != nil {
		return nil, fmt.Errorf("decode response: %w", err)
	}
	return result, nil
}

// Get sends a GET request with optional query parameters.
func (c *Client) Get(path string, params map[string]string) (map[string]any, error) {
	if len(params) > 0 {
		q := url.Values{}
		for k, v := range params {
			q.Set(k, v)
		}
		path += "?" + q.Encode()
	}
	return c.Do("GET", path, nil)
}

List GPU instances

main.go

package main

import (
	"fmt"
	"log"
	"os"
)

func main() {
	client := NewClient(os.Getenv("IC_GPU_API_KEY"))

	result, err := client.Get("/api/v1/instances", nil)
	if err != nil {
		log.Fatal(err)
	}

	instances := result["instances"].([]any)
	for _, raw := range instances {
		inst := raw.(map[string]any)
		fmt.Printf("%s  %s  %s\n", inst["name"], inst["status"], inst["tier"])
	}
}

GPU Instances

instances.go

// Create a GPU instance
result, err := client.Do("POST", "/api/v1/instances", map[string]any{
	"name": "my-workspace",
	"tier": "timesliced",  // "timesliced" (no memory isolation), "dedicated", or "mig"
	"tags": map[string]string{
		"project": "research",
	},
})
if err != nil {
	log.Fatal(err)
}
fmt.Printf("Created: %s\n", result["id"])

// Get instance details
inst, _ := client.Get("/api/v1/instances/"+instanceID, nil)
fmt.Printf("SSH: ssh gpuuser@%s -p %v\n", inst["ssh_host"], inst["ssh_port"])

// Stop an instance
client.Do("POST", "/api/v1/instances/"+instanceID+"/stop", nil)

// Start an instance
client.Do("POST", "/api/v1/instances/"+instanceID+"/start", nil)

// Terminate
client.Do("DELETE", "/api/v1/instances/"+instanceID, nil)

LLM Chat Completion

llm.go

// Chat completion (OpenAI-compatible endpoint)
result, err := client.Do("POST", "/v1/chat/completions", map[string]any{
	"model": "llama-3-8b",
	"messages": []map[string]string{
		{"role": "system", "content": "You are a helpful assistant."},
		{"role": "user", "content": "Explain GPU computing in one paragraph."},
	},
	"temperature": 0.7,
	"max_tokens": 256,
})
if err != nil {
	log.Fatal(err)
}

choices := result["choices"].([]any)
msg := choices[0].(map[string]any)["message"].(map[string]any)
fmt.Println(msg["content"])

// Text completion
result, _ = client.Do("POST", "/v1/completions", map[string]any{
	"model":      "llama-3-8b",
	"prompt":     "The benefits of GPU computing are:",
	"max_tokens": 256,
})
choices = result["choices"].([]any)
fmt.Println(choices[0].(map[string]any)["text"])

// Embeddings
result, _ = client.Do("POST", "/v1/embeddings", map[string]any{
	"model": "bge-large",
	"input": "GPU computing enables parallel processing",
})
data := result["data"].([]any)
embedding := data[0].(map[string]any)["embedding"].([]any)
fmt.Printf("Dimensions: %d\n", len(embedding))

Virtual Machines

vms.go

// Create a VM
vm, err := client.Do("POST", "/api/v1/vms", map[string]any{
	"name":        "dev-server",
	"template_id": "tpl-ubuntu-24",
	"cpu":         4,
	"memory_mb":   16384,
})
if err != nil {
	log.Fatal(err)
}
fmt.Printf("Created VM: %s\n", vm["id"])

// Stop / Start / Reboot
client.Do("POST", "/api/v1/vms/"+vmID+"/stop", nil)
client.Do("POST", "/api/v1/vms/"+vmID+"/start", nil)
client.Do("POST", "/api/v1/vms/"+vmID+"/reboot", nil)

// Delete (must be stopped first)
client.Do("DELETE", "/api/v1/vms/"+vmID, nil)

Kubernetes Clusters

clusters.go

// Create a cluster
cluster, _ := client.Do("POST", "/api/v1/clusters", map[string]any{
	"name": "ml-cluster",
})
fmt.Printf("Cluster: %s  Status: %s\n", cluster["id"], cluster["status"])

// Download kubeconfig (returns YAML string)
result, _ := client.Get("/api/v1/clusters/"+clusterID+"/kubeconfig", nil)
kubeconfig := result["kubeconfig"].(string)
os.WriteFile("kubeconfig.yaml", []byte(kubeconfig), 0600)

// Delete
client.Do("DELETE", "/api/v1/clusters/"+clusterID, nil)

Model Deployments

models.go

// Deploy a model
deployment, _ := client.Do("POST", "/api/v1/models", map[string]any{
	"model_name":       "my-llama",
	"huggingface_repo": "meta-llama/Meta-Llama-3-8B-Instruct",
	"engine":           "vllm",
	"gpu_count":        1,
	"min_replicas":     1,
	"max_replicas":     3,
})
fmt.Printf("Deploying: %s\n", deployment["model_name"])

// Check status
status, _ := client.Get("/api/v1/models/my-llama", nil)
fmt.Printf("Status: %s\n", status["status"])

// Scale replicas
client.Do("PATCH", "/api/v1/models/my-llama", map[string]any{
	"min_replicas": 2,
	"max_replicas": 5,
})

// Stop (delete the deployment)
client.Do("DELETE", "/api/v1/models/my-llama", nil)

Pagination

pagination.go

// Iterate through all pages of instances
var allInstances []map[string]any
nextToken := ""

for {
	params := map[string]string{"maxResults": "50"}
	if nextToken != "" {
		params["nextToken"] = nextToken
	}

	result, err := client.Get("/api/v1/instances", params)
	if err != nil {
		log.Fatal(err)
	}

	items := result["instances"].([]any)
	for _, raw := range items {
		allInstances = append(allInstances, raw.(map[string]any))
	}

	// Check for next page
	if token, ok := result["nextToken"].(string); ok && token != "" {
		nextToken = token
	} else {
		break
	}
}

fmt.Printf("Total instances: %d\n", len(allInstances))

Error Handling

errors.go

// APIError represents a structured error from the API.
type APIError struct {
	StatusCode int
	Message    string
	Type       string
	Code       string
	Hint       string
	RequestID  string
}

func (e *APIError) Error() string {
	return fmt.Sprintf("API error %d (%s): %s", e.StatusCode, e.Code, e.Message)
}

// parseError extracts a structured error from a response body.
func parseError(statusCode int, body []byte) *APIError {
	var resp struct {
		Error struct {
			Message   string `json:"message"`
			Type      string `json:"type"`
			Code      string `json:"code"`
			Hint      string `json:"hint"`
			RequestID string `json:"requestId"`
		} `json:"error"`
	}
	if err := json.Unmarshal(body, &resp); err != nil {
		return &APIError{StatusCode: statusCode, Message: string(body)}
	}
	return &APIError{
		StatusCode: statusCode,
		Message:    resp.Error.Message,
		Type:       resp.Error.Type,
		Code:       resp.Error.Code,
		Hint:       resp.Error.Hint,
		RequestID:  resp.Error.RequestID,
	}
}

// Usage:
result, err := client.Get("/api/v1/instances/nonexistent", nil)
if err != nil {
	var apiErr *APIError
	if errors.As(err, &apiErr) {
		switch apiErr.Code {
		case "ResourceNotFoundException":
			fmt.Println("Instance not found")
		case "ValidationException":
			fmt.Printf("Bad request: %s\n", apiErr.Hint)
		case "ThrottlingException":
			fmt.Println("Rate limited, try again later")
		default:
			fmt.Printf("Error: %s\n", apiErr.Message)
		}
	}
}

Advanced: Context and Timeouts

advanced.go

import (
	"context"
	"time"
)

// Per-request timeout using context
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()

req, _ := http.NewRequestWithContext(ctx, "GET", baseURL+"/api/v1/instances", nil)
req.Header.Set("Authorization", "Bearer "+apiKey)

resp, err := http.DefaultClient.Do(req)
if err != nil {
	if ctx.Err() == context.DeadlineExceeded {
		fmt.Println("Request timed out")
	}
	log.Fatal(err)
}
defer resp.Body.Close()

// Retry with exponential backoff
func withRetry(fn func() error, maxRetries int) error {
	for attempt := 0; attempt <= maxRetries; attempt++ {
		err := fn()
		if err == nil {
			return nil
		}
		var apiErr *APIError
		if errors.As(err, &apiErr) {
			switch apiErr.StatusCode {
			case 429, 500, 502, 503:
				delay := time.Duration(1<<uint(attempt)) * 500 * time.Millisecond
				time.Sleep(delay)
				continue
			}
		}
		return err
	}
	return fmt.Errorf("max retries exceeded")
}

Complete Example

main.go

package main

import (
	"fmt"
	"log"
	"os"
	"time"
)

func main() {
	apiKey := os.Getenv("IC_GPU_API_KEY")
	if apiKey == "" {
		log.Fatal("IC_GPU_API_KEY not set")
	}

	client := NewClient(apiKey)

	// 1. Check balance
	balance, err := client.Get("/api/v1/tokens/balance", nil)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Printf("Balance: %v tokens\n", balance["balance"])

	// 2. Create a GPU instance
	inst, err := client.Do("POST", "/api/v1/instances", map[string]any{
		"name": "go-demo",
		"tier": "timesliced",
	})
	if err != nil {
		log.Fatal(err)
	}
	instID := inst["id"].(string)
	fmt.Printf("Instance: %s\n", instID)

	// 3. Wait for running
	for {
		status, _ := client.Get("/api/v1/instances/"+instID, nil)
		if status["status"] == "running" {
			fmt.Printf("Running at %s:%v\n", status["ssh_host"], status["ssh_port"])
			break
		}
		fmt.Printf("  Status: %s...\n", status["status"])
		time.Sleep(5 * time.Second)
	}

	// 4. Chat with LLM
	resp, err := client.Do("POST", "/v1/chat/completions", map[string]any{
		"model": "llama-3-8b",
		"messages": []map[string]string{
			{"role": "user", "content": "What is CUDA?"},
		},
		"max_tokens": 128,
	})
	if err != nil {
		log.Fatal(err)
	}
	choices := resp["choices"].([]any)
	msg := choices[0].(map[string]any)["message"].(map[string]any)
	fmt.Printf("\nLLM: %s\n", msg["content"])

	// 5. Clean up
	client.Do("DELETE", "/api/v1/instances/"+instID, nil)
	fmt.Println("\nDone!")
}