Skip to content

Commit

Permalink
feat: added support for stackdriver and otel metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
amanpruthi committed Jun 4, 2024
1 parent 49ddc33 commit 94a594a
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 13 deletions.
78 changes: 66 additions & 12 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,13 @@ locals {
}

module "service_accounts" {
source = "./modules/service_accounts"
namespace = var.namespace
bucket_name = var.bucket_name
depends_on = [module.project_factory_project_services]
source = "./modules/service_accounts"
namespace = var.namespace
bucket_name = var.bucket_name
account_id = var.workload_account_id
service_account_name = var.service_account_name
enable_stackdriver = var.enable_stackdriver
depends_on = [module.project_factory_project_services]
}

module "kms" {
Expand Down Expand Up @@ -77,14 +80,15 @@ locals {
}

module "app_gke" {
source = "./modules/app_gke"
namespace = var.namespace
machine_type = coalesce(try(local.deployment_size[var.size].node_instance, null), var.gke_machine_type)
node_count = coalesce(try(local.deployment_size[var.size].node_count, null), var.gke_node_count)
network = local.network
subnetwork = local.subnetwork
service_account = module.service_accounts.service_account
depends_on = [module.project_factory_project_services]
source = "./modules/app_gke"
namespace = var.namespace
machine_type = coalesce(try(local.deployment_size[var.size].node_instance, null), var.gke_machine_type)
node_count = coalesce(try(local.deployment_size[var.size].node_count, null), var.gke_node_count)
network = local.network
subnetwork = local.subnetwork
service_account = module.service_accounts.service_account
create_workload_identity = var.enable_stackdriver
depends_on = [module.project_factory_project_services]
}

module "app_lb" {
Expand Down Expand Up @@ -186,6 +190,8 @@ locals {
} : {}
}

data "google_client_config" "current" {}

module "wandb" {
source = "wandb/wandb/helm"
version = "1.2.0"
Expand Down Expand Up @@ -241,6 +247,54 @@ module "wandb" {
"ingress.gcp.kubernetes.io/pre-shared-cert" = module.app_lb.certificate
}
}
# To support otel rds and redis metrics need operator-wandb chart minimum version 0.13.8 ( stackdriver subchart)
stackdriver = var.enable_stackdriver ? {
install = true
stackdriver = {
projectId = data.google_client_config.current.project
}
serviceAccount = { annotations = { "iam.gke.io/gcp-service-account" = module.service_accounts.monitoring_role } }
} : {
install = false
stackdriver = {}
serviceAccount = {}
}

otel = {
daemonset = var.enable_stackdriver ? {
config = {
receivers = {
prometheus = {
config = {
scrape_configs = [
{ job_name = "stackdriver"
scheme = "http"
metrics_path = "/metrics"
dns_sd_configs = [
{ names = ["stackdriver"]
type = "A"
port = 9255
}
]
}
]
}
}
}
service = {
pipelines = {
metrics = {
receivers = ["hostmetrics", "k8s_cluster", "kubeletstats", "prometheus"]
}
}
}
}
} : { config = {
receivers = {}
service = {}
}
}
}

redis = { install = false }
mysql = { install = false }
Expand Down
15 changes: 14 additions & 1 deletion modules/app_gke/main.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
data "google_client_config" "current" {}

locals {
project_id = data.google_client_config.current.project
}

resource "google_container_cluster" "default" {
name = "${var.namespace}-cluster"

Expand All @@ -11,7 +17,14 @@ resource "google_container_cluster" "default" {
evaluation_mode = "PROJECT_SINGLETON_POLICY_ENFORCE"
}


# Conditionally enable workload identity
dynamic "workload_identity_config" {
for_each = var.create_workload_identity == true ? [1] : []
content {
workload_pool = "${local.project_id}.svc.id.goog"
}
}

ip_allocation_policy {
cluster_ipv4_cidr_block = "/14"
services_ipv4_cidr_block = "/19"
Expand Down
5 changes: 5 additions & 0 deletions modules/app_gke/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,9 @@ variable "parquet_wandb_env" {

variable "node_count" {
type = number
}

variable "create_workload_identity" {
description = "Flag to indicate whether to enable workload identity for the service account."
type = bool
}
24 changes: 24 additions & 0 deletions modules/service_accounts/main.tf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
data "google_client_config" "current" {}
data "google_project" "project" {}

resource "random_id" "main" {
# 30 bytes ensures that enough characters are generated to satisfy the service account ID requirements, regardless of
Expand Down Expand Up @@ -60,3 +61,26 @@ resource "google_project_iam_member" "secretmanager_admin" {
member = local.sa_member
role = "roles/secretmanager.admin"
}


resource "google_service_account" "workload-identity-user-sa" {
count = var.enable_stackdriver == true ? 1 : 0
account_id = "stackdriver"
display_name = "Service Account For Workload Identity"

}

resource "google_project_iam_member" "monitoring-role" {
count = var.enable_stackdriver == true ? 1 : 0
project = local.project_id
role = "roles/monitoring.viewer"
member = "serviceAccount:${google_service_account.workload-identity-user-sa[count.index].email}"
}


resource "google_project_iam_member" "workload_identity-role" {
count = var.enable_stackdriver == true ? 1 : 0
project = local.project_id
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${local.project_id}.svc.id.goog[default/${var.service_account_name}]"
}
4 changes: 4 additions & 0 deletions modules/service_accounts/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@ output "service_account" {
value = google_service_account.main

description = "The service account."
}

output "monitoring_role" {
value = var.enable_stackdriver == true ? google_service_account.workload-identity-user-sa[0].email : null
}
15 changes: 15 additions & 0 deletions modules/service_accounts/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,19 @@ variable "bucket_name" {
type = string
description = "Existing bucket the service account will access"
default = ""
}

variable "account_id" {
description = "The ID of the Google Cloud Platform (GCP) account."
type = string
}

variable "service_account_name" {
description = "The name of the service account."
type = string
}

variable "enable_stackdriver" {
description = "Flag to indicate whether to enable workload identity for the service account."
type = bool
}
15 changes: 15 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -253,3 +253,18 @@ variable "parquet_wandb_env" {
description = "Extra environment variables for W&B"
default = {}
}

variable "enable_stackdriver" {
type = bool
default = false
}

variable "workload_account_id" {
type = string
default = "stackdriver"
}

variable "service_account_name" {
type = string
default = "stackdriver"
}

0 comments on commit 94a594a

Please sign in to comment.