Sovra Sovra

GCP Deployment Guide

Overview

Deploy Sovra on Google Cloud Platform using GKE (Google Kubernetes Engine).

Architecture

GCP Region (europe-west1)
├── VPC Network
│   ├── Subnets (GKE, Cloud SQL)
│   └── Firewall Rules
├── GKE Cluster (3 nodes)
├── Cloud SQL for PostgreSQL
├── Cloud Load Balancer
└── Cloud DNS

Prerequisites

Quick Deploy

cd infrastructure/terraform/gcp

# Authenticate
gcloud auth application-default login

# Configure
cp terraform.tfvars.example terraform.tfvars
nano terraform.tfvars

# Deploy
terraform init
terraform apply

# Get credentials
gcloud container clusters get-credentials sovra-production --region europe-west1

Detailed Steps

1. Configure Variables

# terraform.tfvars
project_id       = "sovra-production-123456"
region           = "europe-west1"
cluster_name     = "sovra-production"
node_count       = 3
machine_type     = "n2-standard-4"

# Cloud SQL
db_tier          = "db-custom-2-7680"  # 2 vCPU, 7.68GB RAM
db_disk_size     = 100
db_ha_enabled    = true

# Networking
network_name     = "sovra-network"
subnet_cidr      = "10.0.0.0/24"
pods_cidr        = "10.1.0.0/16"
services_cidr    = "10.2.0.0/16"

2. Provision Infrastructure

# Initialize Terraform
terraform init

# Review plan
terraform plan

# Apply
terraform apply

# Note the outputs:
# - cluster_endpoint
# - cluster_ca_certificate
# - cloudsql_connection_name

3. Configure kubectl

# Get cluster credentials
gcloud container clusters get-credentials sovra-production \
  --region europe-west1 \
  --project sovra-production-123456

# Verify connection
kubectl cluster-info
kubectl get nodes

4. Deploy Control Plane

# Deploy Sovra
kubectl apply -k ../../kubernetes/overlays/gcp

# Wait for deployment
kubectl wait --for=condition=ready pod \
  -l app.kubernetes.io/name=sovra \
  -n sovra \
  --timeout=300s

5. Configure Cloud SQL Proxy

For secure Cloud SQL connection:

# Deploy Cloud SQL Proxy
kubectl apply -f - <<EOF
apiVersion: v1
kind: Service
metadata:
  name: cloudsql-proxy
  namespace: sovra
spec:
  selector:
    app: cloudsql-proxy
  ports:
  - port: 5432
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: cloudsql-proxy
  namespace: sovra
spec:
  selector:
    matchLabels:
      app: cloudsql-proxy
  template:
    metadata:
      labels:
        app: cloudsql-proxy
    spec:
      serviceAccountName: cloudsql-proxy
      containers:
      - name: cloud-sql-proxy
        image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.8.0
        args:
          - "--structured-logs"
          - "--port=5432"
          - "sovra-production-123456:europe-west1:sovra-postgres"
        securityContext:
          runAsNonRoot: true
        resources:
          requests:
            memory: "256Mi"
            cpu: "100m"
          limits:
            memory: "512Mi"
            cpu: "500m"
EOF

6. Configure DNS

# Get load balancer IP
LOAD_BALANCER_IP=$(kubectl get svc api-gateway -n sovra -o jsonpath='{.status.loadBalancer.ingress[0].ip}')

# Create Cloud DNS record
gcloud dns record-sets create sovra.example.com. \
  --zone=example-zone \
  --type=A \
  --ttl=300 \
  --rrdatas=$LOAD_BALANCER_IP

Terraform Resources

Complete infrastructure configuration:

# main.tf
terraform {
  required_version = ">= 1.7"
  required_providers {
    google = {
      source  = "hashicorp/google"
      version = "~> 5.0"
    }
  }
}

provider "google" {
  project = var.project_id
  region  = var.region
}

# VPC Network
resource "google_compute_network" "sovra" {
  name                    = var.network_name
  auto_create_subnetworks = false
}

resource "google_compute_subnetwork" "sovra" {
  name          = "${var.network_name}-subnet"
  ip_cidr_range = var.subnet_cidr
  region        = var.region
  network       = google_compute_network.sovra.id

  secondary_ip_range {
    range_name    = "pods"
    ip_cidr_range = var.pods_cidr
  }

  secondary_ip_range {
    range_name    = "services"
    ip_cidr_range = var.services_cidr
  }
}

# GKE Cluster
resource "google_container_cluster" "sovra" {
  name     = var.cluster_name
  location = var.region

  # We can't create a cluster with no node pool defined, but we want to only use
  # separately managed node pools. So we create the smallest possible default
  # node pool and immediately delete it.
  remove_default_node_pool = true
  initial_node_count       = 1

  network    = google_compute_network.sovra.name
  subnetwork = google_compute_subnetwork.sovra.name

  # IP allocation for pods and services
  ip_allocation_policy {
    cluster_secondary_range_name  = "pods"
    services_secondary_range_name = "services"
  }

  # Workload Identity
  workload_identity_config {
    workload_pool = "${var.project_id}.svc.id.goog"
  }

  # Binary Authorization
  binary_authorization {
    evaluation_mode = "PROJECT_SINGLETON_POLICY_ENFORCE"
  }

  # Network Policy
  network_policy {
    enabled  = true
    provider = "CALICO"
  }

  addons_config {
    http_load_balancing {
      disabled = false
    }
    horizontal_pod_autoscaling {
      disabled = false
    }
    network_policy_config {
      disabled = false
    }
  }

  # Logging and Monitoring
  logging_config {
    enable_components = ["SYSTEM_COMPONENTS", "WORKLOADS"]
  }

  monitoring_config {
    enable_components = ["SYSTEM_COMPONENTS"]
    managed_prometheus {
      enabled = true
    }
  }

  release_channel {
    channel = "REGULAR"
  }

  maintenance_policy {
    daily_maintenance_window {
      start_time = "03:00"
    }
  }
}

# Node Pool
resource "google_container_node_pool" "sovra" {
  name       = "${var.cluster_name}-node-pool"
  location   = var.region
  cluster    = google_container_cluster.sovra.name
  node_count = var.node_count

  autoscaling {
    min_node_count = 3
    max_node_count = 10
  }

  management {
    auto_repair  = true
    auto_upgrade = true
  }

  node_config {
    preemptible  = false
    machine_type = var.machine_type
    disk_size_gb = 100
    disk_type    = "pd-standard"

    # Google recommends custom service accounts that have cloud-platform scope and permissions granted via IAM Roles.
    service_account = google_service_account.gke_nodes.email
    oauth_scopes = [
      "https://www.googleapis.com/auth/cloud-platform"
    ]

    workload_metadata_config {
      mode = "GKE_METADATA"
    }

    metadata = {
      disable-legacy-endpoints = "true"
    }

    labels = {
      environment = "production"
      application = "sovra"
    }

    tags = ["sovra", "gke-node"]
  }
}

# Service Account for GKE Nodes
resource "google_service_account" "gke_nodes" {
  account_id   = "${var.cluster_name}-gke-sa"
  display_name = "GKE Nodes Service Account for ${var.cluster_name}"
}

resource "google_project_iam_member" "gke_nodes_logging" {
  project = var.project_id
  role    = "roles/logging.logWriter"
  member  = "serviceAccount:${google_service_account.gke_nodes.email}"
}

resource "google_project_iam_member" "gke_nodes_monitoring" {
  project = var.project_id
  role    = "roles/monitoring.metricWriter"
  member  = "serviceAccount:${google_service_account.gke_nodes.email}"
}

resource "google_project_iam_member" "gke_nodes_monitoring_viewer" {
  project = var.project_id
  role    = "roles/monitoring.viewer"
  member  = "serviceAccount:${google_service_account.gke_nodes.email}"
}

# Cloud SQL Instance
resource "google_sql_database_instance" "sovra" {
  name             = "sovra-postgres"
  database_version = "POSTGRES_15"
  region           = var.region

  settings {
    tier              = var.db_tier
    availability_type = var.db_ha_enabled ? "REGIONAL" : "ZONAL"
    disk_size         = var.db_disk_size
    disk_type         = "PD_SSD"
    disk_autoresize   = true

    backup_configuration {
      enabled                        = true
      start_time                     = "03:00"
      point_in_time_recovery_enabled = true
      transaction_log_retention_days = 7
      backup_retention_settings {
        retained_backups = 30
      }
    }

    maintenance_window {
      day  = 7  # Sunday
      hour = 3
    }

    ip_configuration {
      ipv4_enabled    = false
      private_network = google_compute_network.sovra.id
      require_ssl     = true
    }

    database_flags {
      name  = "max_connections"
      value = "100"
    }

    database_flags {
      name  = "shared_buffers"
      value = "256MB"
    }

    insights_config {
      query_insights_enabled  = true
      query_string_length     = 1024
      record_application_tags = true
    }
  }

  deletion_protection = true

  depends_on = [google_service_networking_connection.private_vpc_connection]
}

# Private VPC Connection for Cloud SQL
resource "google_compute_global_address" "private_ip_address" {
  name          = "sovra-private-ip"
  purpose       = "VPC_PEERING"
  address_type  = "INTERNAL"
  prefix_length = 16
  network       = google_compute_network.sovra.id
}

resource "google_service_networking_connection" "private_vpc_connection" {
  network                 = google_compute_network.sovra.id
  service                 = "servicenetworking.googleapis.com"
  reserved_peering_ranges = [google_compute_global_address.private_ip_address.name]
}

# Cloud SQL Database
resource "google_sql_database" "sovra" {
  name     = "sovra"
  instance = google_sql_database_instance.sovra.name
}

# Cloud SQL User
resource "google_sql_user" "sovra" {
  name     = "sovra"
  instance = google_sql_database_instance.sovra.name
  password = random_password.db_password.result
}

resource "random_password" "db_password" {
  length  = 32
  special = true
}

# Service Account for Cloud SQL Proxy
resource "google_service_account" "cloudsql_proxy" {
  account_id   = "${var.cluster_name}-cloudsql-proxy"
  display_name = "Cloud SQL Proxy Service Account"
}

resource "google_project_iam_member" "cloudsql_proxy" {
  project = var.project_id
  role    = "roles/cloudsql.client"
  member  = "serviceAccount:${google_service_account.cloudsql_proxy.email}"
}

resource "google_service_account_iam_member" "cloudsql_proxy_workload_identity" {
  service_account_id = google_service_account.cloudsql_proxy.name
  role               = "roles/iam.workloadIdentityUser"
  member             = "serviceAccount:${var.project_id}.svc.id.goog[sovra/cloudsql-proxy]"
}

# Firewall Rules
resource "google_compute_firewall" "sovra_allow_internal" {
  name    = "${var.network_name}-allow-internal"
  network = google_compute_network.sovra.name

  allow {
    protocol = "tcp"
  }

  allow {
    protocol = "udp"
  }

  allow {
    protocol = "icmp"
  }

  source_ranges = [var.subnet_cidr, var.pods_cidr, var.services_cidr]
}

resource "google_compute_firewall" "sovra_allow_sovra_api" {
  name    = "${var.network_name}-allow-sovra-api"
  network = google_compute_network.sovra.name

  allow {
    protocol = "tcp"
    ports    = ["443", "8443"]
  }

  source_ranges = ["0.0.0.0/0"]
  target_tags   = ["gke-node"]
}

# Outputs
output "cluster_endpoint" {
  value       = google_container_cluster.sovra.endpoint
  description = "GKE cluster endpoint"
  sensitive   = true
}

output "cluster_ca_certificate" {
  value       = google_container_cluster.sovra.master_auth[0].cluster_ca_certificate
  description = "GKE cluster CA certificate"
  sensitive   = true
}

output "cloudsql_connection_name" {
  value       = google_sql_database_instance.sovra.connection_name
  description = "Cloud SQL connection name"
}

output "cloudsql_private_ip" {
  value       = google_sql_database_instance.sovra.private_ip_address
  description = "Cloud SQL private IP address"
}

output "db_password" {
  value       = random_password.db_password.result
  description = "Database password"
  sensitive   = true
}

Workload Identity Setup

# Create Kubernetes Service Account
kubectl create serviceaccount cloudsql-proxy -n sovra

# Annotate with GCP Service Account
kubectl annotate serviceaccount cloudsql-proxy -n sovra \
  iam.gke.io/gcp-service-account=sovra-production-cloudsql-proxy@sovra-production-123456.iam.gserviceaccount.com

# Grant Workload Identity binding
gcloud iam service-accounts add-iam-policy-binding \
  sovra-production-cloudsql-proxy@sovra-production-123456.iam.gserviceaccount.com \
  --role roles/iam.workloadIdentityUser \
  --member "serviceAccount:sovra-production-123456.svc.id.goog[sovra/cloudsql-proxy]"

Monitoring

Cloud Monitoring

# Enable GKE monitoring
gcloud container clusters update sovra-production \
  --region europe-west1 \
  --enable-cloud-monitoring \
  --monitoring=SYSTEM,WORKLOAD

# View logs
gcloud logging read "resource.type=k8s_cluster AND resource.labels.cluster_name=sovra-production"

Managed Prometheus

Prometheus metrics are automatically collected to Cloud Monitoring.

# Query metrics
gcloud monitoring time-series list \
  --filter='metric.type="kubernetes.io/container/cpu/core_usage_time"' \
  --interval-start-time="2026-01-30T00:00:00Z" \
  --interval-end-time="2026-01-30T23:59:59Z"

Security Hardening

Enable Binary Authorization

# Create policy
gcloud container binauthz policy import policy.yaml

# Example policy.yaml
cat > policy.yaml << 'EOF'
admissionWhitelistPatterns:
- namePattern: gcr.io/sovra-production-123456/*
defaultAdmissionRule:
  requireAttestationsBy:
  - projects/sovra-production-123456/attestors/sovra-attestor
  evaluationMode: REQUIRE_ATTESTATION
  enforcementMode: ENFORCED_BLOCK_AND_AUDIT_LOG
globalPolicyEvaluationMode: ENABLE
EOF

gcloud container binauthz policy import policy.yaml

Enable Shielded GKE Nodes

# Already enabled in Terraform with:
# - Secure Boot
# - vTPM
# - Integrity Monitoring

Backup

Cloud SQL Backups

Automated backups configured in Terraform:

Manual Backup

# Create on-demand backup
gcloud sql backups create \
  --instance=sovra-postgres \
  --description="Pre-upgrade backup"

# List backups
gcloud sql backups list --instance=sovra-postgres

# Restore from backup
gcloud sql backups restore <BACKUP_ID> \
  --backup-instance=sovra-postgres \
  --backup-id=<BACKUP_ID>

Application Backup

# Backup Kubernetes resources
kubectl get all --all-namespaces -o yaml > k8s-backup.yaml

# Backup secrets
kubectl get secrets -n sovra -o yaml > secrets-backup.yaml

Disaster Recovery

Multi-Region Setup

For HA across regions:

# Deploy to multiple regions
module "primary" {
  source = "./modules/sovra"
  region = "europe-west1"
}

module "secondary" {
  source = "./modules/sovra"
  region = "europe-west3"
}

# Cross-region Cloud SQL replica
resource "google_sql_database_instance" "replica" {
  name                 = "sovra-postgres-replica"
  master_instance_name = module.primary.sql_instance_name
  region               = "europe-west3"
  database_version     = "POSTGRES_15"

  replica_configuration {
    failover_target = true
  }

  settings {
    tier = var.db_tier
  }
}

Cleanup

# Destroy infrastructure
terraform destroy

# Verify deletion
gcloud container clusters list
gcloud sql instances list

Troubleshooting

Cloud SQL Connection Issues

# Test Cloud SQL Proxy
kubectl run -it --rm debug --image=postgres:15 --restart=Never -- \
  psql -h cloudsql-proxy.sovra.svc.cluster.local -U sovra -d sovra

# Check proxy logs
kubectl logs -n sovra -l app=cloudsql-proxy

GKE Node Issues

# Check node status
kubectl get nodes
kubectl describe node <node-name>

# Check node logs
gcloud compute instances get-serial-port-output <instance-name>

Workload Identity Issues

# Verify service account binding
gcloud iam service-accounts get-iam-policy \
  sovra-production-cloudsql-proxy@sovra-production-123456.iam.gserviceaccount.com

# Test from pod
kubectl run -it --rm debug \
  --image=google/cloud-sdk:slim \
  --serviceaccount=cloudsql-proxy \
  --namespace=sovra \
  -- gcloud auth list