networks:
  monitoring-net:
    driver: bridge

services:

  # ---------------------------------------------------------
  # Alertmanager
  # ---------------------------------------------------------
  alertmanager:
    image: quay.io/prometheus/alertmanager:v0.31.1@sha256:88b605de9aba0410775c1eb3438f951115054e0d307f23f274a4c705f51630c1
    container_name: alertmanager
    healthcheck:
      test: ["CMD-SHELL", "http_proxy= HTTP_PROXY= wget -q --tries=1 --spider http://127.0.0.1:9093/-/healthy || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 15s
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    # Alertmanager runs as non-root user by default (uid 65534), but we set it explicitly here for clarity. Running as non-root is a good security practice to minimize the potential impact of a compromise.
    user: "65534:65534"
    networks:
      - monitoring-net
    restart: unless-stopped
    volumes:
      - ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:Z
    command:
      - '--config.file=/etc/alertmanager/alertmanager.yml'
      - '--storage.path=/alertmanager'
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.alertmanager.rule=Host(`alertmanager.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.alertmanager.tls=true"
      - "traefik.http.services.alertmanager.loadbalancer.server.port=9093"

  # ---------------------------------------------------------
  # Alloy
  # ---------------------------------------------------------
  alloy:
    image: docker.io/grafana/alloy:v1.16.1@sha256:51aeb9d829239345070619dad3edd6873186f913c84f45b365b74574fcb38ec0
    container_name: alloy
    # Alloy needs to run as root to be able to read the journal logs from the host. In a production environment, you should use a more secure way to grant these permissions, such as using capabilities or a custom security profile.
    user: "0:0"
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    networks:
      - monitoring-net
    command:
      - run
      - --server.http.listen-addr=0.0.0.0:12345 
      - --storage.path=/var/lib/alloy/data
      - /etc/alloy/config.alloy
    restart: unless-stopped
    # The label=disable security option is used to disable SELinux labeling for the container, which can be necessary when the container needs to access files on the host that have different SELinux contexts. In this case, since Alloy needs to read the journal logs from the host, which may have different SELinux contexts, we disable labeling to avoid permission issues. However, be cautious when using this option, as it can have security implications. Always ensure that you understand the risks and have appropriate security measures in place when disabling SELinux labeling.
    security_opt:
      - label=disable
    volumes:
      - ./alloy/config.alloy:/etc/alloy/config.alloy:ro
      - /var/log/journal:/var/log/journal:ro
      - /run/log/journal:/run/log/journal:ro
      - /etc/machine-id:/etc/machine-id:ro
      - /run/user/${UID:-1000}/podman/podman.sock:/var/run/docker.sock:ro
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.alloy.rule=Host(`alloy.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.alloy.tls=true"
      - "traefik.http.services.alloy.loadbalancer.server.port=12345"

  # ---------------------------------------------------------
  # Blackbox Exporter
  # ---------------------------------------------------------
  blackbox-exporter:
    image: quay.io/prometheus/blackbox-exporter:v0.28.0@sha256:e753ff9f3fc458d02cca5eddab5a77e1c175eee484a8925ac7d524f04366c2fc
    container_name: blackbox-exporter
    networks:
      - monitoring-net
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    restart: unless-stopped
    volumes:
      - ./blackbox/blackbox.yml:/config/blackbox.yml:Z
    command:
      - '--config.file=/config/blackbox.yml'
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.blackbox.rule=Host(`blackbox.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.blackbox.tls=true"
      - "traefik.http.services.blackbox.loadbalancer.server.port=9115"

  # ---------------------------------------------------------
  # Grafana
  # ---------------------------------------------------------
  grafana:
    image: docker.io/grafana/grafana:13.0.1@sha256:0f86bada30d65ef9d0183b90c1e2682ac92d53d95da8bed322b984ea78a4a73a
    container_name: grafana
    healthcheck:
      test: ["CMD-SHELL", "http_proxy= HTTP_PROXY= wget -q --tries=1 --spider http://grafana:3000/api/health || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
    # Grafana runs as non-root user by default (uid 472), but we set it explicitly here for clarity. Running as non-root is a good security practice to minimize the potential impact of a compromise.
    user: "472:472"
    networks:
      - monitoring-net
    expose:
      - 3000
    restart: unless-stopped
    depends_on:
      - prometheus
      - loki
      - tempo
    environment:
      - HTTP_PROXY=${HTTP_PROXY}
      - HTTPS_PROXY=${HTTPS_PROXY}
      - NO_PROXY=${NO_PROXY}
      - http_proxy=${http_proxy}
      - https_proxy=${https_proxy}
      - no_proxy=${no_proxy}
      - GF_SERVER_ROOT_URL=https://grafana.${DOMAIN:-localhost}
      - GF_SERVER_DOMAIN=grafana.${DOMAIN:-localhost}
      - GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER}
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD}
      - GF_USERS_ALLOW_SIGN_UP=false
      - GF_TRACING_ENABLED=true
      - GF_TRACING_PROVIDER=opentelemetry
      - GF_TRACING_OPENTELEMETRY_OTLP_ADDRESS=otel-collector:4317
      - GF_TRACING_OPENTELEMETRY_OTLP_PROTOCOL=grpc
      - GF_TRACING_OPENTELEMETRY_PROPAGATION=w3c
      - GF_TRACING_OPENTELEMETRY_ENABLED=true
      - GF_DIAGNOSTICS_PROFILING_ENABLED=true
      - GF_DIAGNOSTICS_PROFILING_PORT=6060
      - GF_DIAGNOSTICS_PROFILING_ADDR=0.0.0.0
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.grafana.rule=Host(`grafana.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.grafana.entrypoints=websecure"
      - "traefik.http.routers.grafana.tls=true"
      - "traefik.http.services.grafana.loadbalancer.server.port=3000"
      - "traefik.http.middlewares.grafana-headers.headers.customrequestheaders.traceparent="
    volumes:
      - grafana-data:/var/lib/grafana:Z
      - ./grafana-provisioning:/etc/grafana/provisioning:Z

  # ---------------------------------------------------------
  # Karma
  # ---------------------------------------------------------
  karma:
    image: ghcr.io/prymitive/karma:v0.128@sha256:cae0afb8d083756a7a44413480847fa59c072659d909734924a10640e1de600d
    container_name: karma
    networks:
      - monitoring-net
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
      - CONFIG_FILE=/etc/karma.yaml
    volumes:
      - ./karma/karma.yaml:/etc/karma.yaml:ro,Z
    restart: unless-stopped
    depends_on:
      - alertmanager
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.karma.rule=Host(`karma.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.karma.tls=true"
      - "traefik.http.services.karma.loadbalancer.server.port=8080"

  # ---------------------------------------------------------
  # Keep Database (PostgreSQL)
  # ---------------------------------------------------------
  keep-db:
    image: docker.io/library/postgres:18-alpine@sha256:54451ecb8ab38c24c3ec123f2fd501303a3a1856a5c66e98cecf2460d5e1e9d7
    container_name: keep-db
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${KEEP_DB_USER} -d ${KEEP_DB_NAME}"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - monitoring-net
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
      - POSTGRES_USER=${KEEP_DB_USER}
      - POSTGRES_PASSWORD=${KEEP_DB_PASSWORD}
      - POSTGRES_DB=${KEEP_DB_NAME}
    volumes:
      - keep-db-data:/var/lib/postgresql:Z
    restart: unless-stopped

  # ---------------------------------------------------------
  # Keep Backend (API & AIOps Engine)
  # ---------------------------------------------------------
  keep-backend:
    image: us-central1-docker.pkg.dev/keephq/keep/keep-api:0.51.0@sha256:0e95b90210f2caeaf6a654daec274cfe43101cf1c4cdbc9cd1fec1a99e791af6
    container_name: keep-backend
    healthcheck:
      test: ["CMD-SHELL", "http_proxy= HTTP_PROXY= wget -q --tries=1 --spider http://127.0.0.1:8080/healthcheck || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
    user: "0:0"
    networks:
      - monitoring-net
    depends_on:
      - keep-db
    environment:
      - HTTP_PROXY=${HTTP_PROXY}
      - HTTPS_PROXY=${HTTPS_PROXY}
      - NO_PROXY=${NO_PROXY}
      - http_proxy=${http_proxy}
      - https_proxy=${https_proxy}
      - no_proxy=${no_proxy}
      # Database connectie
      - DATABASE_CONNECTION_STRING=postgresql://${KEEP_DB_USER}:${KEEP_DB_PASSWORD}@keep-db:5432/${KEEP_DB_NAME}
      - DISABLE_POSTHOG=true
      - ENABLE_AI=true
      - LOG_LEVEL=DEBUG
      - KEEP_APP_LOG_LEVEL=DEBUG
      - KEEP_API_URL=https://keep-api.${DOMAIN:-localhost}
      - KEEP_API_KEY=${KEEP_API_KEY}
      - KEEP_PROVIDERS_DIRECTORY=/config/providers
      - KEEP_ENABLE_AI=true
      - KEEP_WORKFLOWS_DIRECTORY=/config/workflows
      - PORT=8080
      - SECRET_MANAGER_TYPE=FILE
      - SECRET_MANAGER_DIRECTORY=/state
      # --- AI Configuratie ---
      - OPENAI_BASE_URL=http://host.containers.internal:8111/v1
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - OPENAI_MODEL_NAME=gpt-5.1
      - PROMETHEUS_MULTIPROC_DIR=/prometheus_multiproc_dir
    volumes:
      - keep-state:/state:Z
      - ./keep/providers:/config/providers:ro,Z
      - ./keep/workflows:/config/workflows:ro,Z
    tmpfs:
      - /prometheus_multiproc_dir
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.keep-api.rule=Host(`keep-api.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.keep-api.tls=true"
      - "traefik.http.services.keep-api.loadbalancer.server.port=8080"

  # ---------------------------------------------------------
  # Keep Frontend (UI)
  # ---------------------------------------------------------
  keep-frontend:
    image: us-central1-docker.pkg.dev/keephq/keep/keep-ui:0.51.0@sha256:2041f65c7bbd64c2a800a4d11eedf0e99b89debfd6b88f0bbb109443eb6bcc23
    container_name: keep-frontend
    healthcheck:
      test: ["CMD-SHELL", "http_proxy= HTTP_PROXY= wget -q --spider http://keep-frontend:3000/api/healthcheck || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 10s
    networks:
      - monitoring-net
    depends_on:
      - keep-backend
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
      - API_URL=http://keep-backend:8080
      - ENABLE_AI=true
      - KEEP_ENABLE_AI=true
      - NEXTAUTH_URL=https://keep.${DOMAIN:-localhost}
      - NEXTAUTH_SECRET=${NEXTAUTH_SECRET}
      # --- AI Configuratie ---
      - OPENAI_BASE_URL=http://host.containers.internal:8111/v1
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - OPENAI_MODEL_NAME=gpt-5.1
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.keep-ui.rule=Host(`keep.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.keep-ui.tls=true"
      - "traefik.http.services.keep-ui.loadbalancer.server.port=3000"

  # ---------------------------------------------------------
  # Loki
  # ---------------------------------------------------------
  loki:
    image: docker.io/grafana/loki:3.7.1@sha256:73e905b51a7f917f7a1075e4be68759df30226e03dcb3cd2213b989cc0dc8eb4
    container_name: loki
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    networks:
      - monitoring-net
    command:
      - "-config.file=/etc/loki/loki-config.yaml"
    restart: unless-stopped
    depends_on:
      minio:
        condition: service_healthy
      minio-init:
        condition: service_completed_successfully
    volumes:
      - ./loki/loki-config.yaml:/etc/loki/loki-config.yaml:Z
      - ./loki/rules:/loki/rules:Z
      - loki-wal:/loki/wal:Z,U
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.loki.rule=Host(`loki.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.loki.tls=true"
      - "traefik.http.services.loki.loadbalancer.server.port=3100"

 # ---------------------------------------------------------
  # Minio (S3 Compatible Storage, fork by pgsty)
  # ---------------------------------------------------------
  minio:
    image: docker.io/pgsty/minio:RELEASE.2026-04-17T00-00-00Z
    container_name: minio
    user: "0:0"
    networks:
      - monitoring-net
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
      - MINIO_ROOT_USER=${MINIO_ROOT_USER}
      - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
      - MINIO_PROMETHEUS_AUTH_TYPE=public
      - MINIO_PROMETHEUS_URL=http://prometheus:9090
      - MINIO_PROMETHEUS_JOB_ID=minio-job
    command: server /data --console-address ":9001"
    volumes:
      - minio-data:/data:Z
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://127.0.0.1:9000/minio/health/live"]
      interval: 10s
      timeout: 5s
      retries: 5
    labels:
      - "traefik.enable=true"
      # MinIO Console (https://minio.${DOMAIN:-localhost})
      - "traefik.http.routers.minio.rule=Host(`minio.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.minio.service=minio-console"
      - "traefik.http.routers.minio.tls=true"
      - "traefik.http.services.minio-console.loadbalancer.server.port=9001"
      # S3 API (Internal use, but nice to have accessible)
      - "traefik.http.routers.minio-api.rule=Host(`s3.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.minio-api.service=minio-api"
      - "traefik.http.routers.minio-api.tls=true"
      - "traefik.http.services.minio-api.loadbalancer.server.port=9000"
      # 1. Link a new middleware to the existing minio-api router
      - "traefik.http.routers.minio-api.middlewares=minio-scraper-trick"
      # 2. Create the middleware: override the User-Agent header
      - "traefik.http.middlewares.minio-scraper-trick.headers.customrequestheaders.User-Agent=Prometheus/1.0"

  # ---------------------------------------------------------
  # MinIO Init (fork by pgsty)
  # ---------------------------------------------------------
  minio-init:
    image: docker.io/pgsty/mc:RELEASE.2026-04-17T00-00-00Z
    container_name: minio-init
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    networks:
      - monitoring-net
    # This container is used to initialize the MinIO buckets for Loki and Tempo. It runs a simple script that waits for MinIO to be healthy, then creates the necessary buckets. After the script completes successfully, the container will exit. The loki and tempo services depend on this container with a condition of service_completed_successfully to ensure that Loki only starts after the buckets have been created.
    depends_on:
      minio:
        condition: service_healthy
    entrypoint: |
      /bin/sh -c "
      until mc alias set myminio http://minio:9000 ${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD}; do echo '...waiting for minio...'; sleep 1; done;
      mc mb myminio/loki-data --ignore-existing;
      mc mb myminio/tempo-data --ignore-existing;
      mc mb myminio/pyroscope-data --ignore-existing;
      echo 'Buckets created';
      exit 0;
      "

# ---------------------------------------------------------
  # NGINX (Static HTML)
  # ---------------------------------------------------------
  nginx:
    image: docker.io/library/nginx:alpine@sha256:5616878291a2eed594aee8db4dade5878cf7edcb475e59193904b198d9b830de
    container_name: nginx
    healthcheck:
      test: ["CMD-SHELL", "http_proxy= HTTP_PROXY= wget -q --tries=1 --spider http://127.0.0.1:80/ || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    networks:
      - monitoring-net
    volumes:
      - ./landing-page/index.html:/usr/share/nginx/html/index.html:Z
    restart: unless-stopped
    labels:
      - "traefik.enable=true"
      # Route to https://${DOMAIN:-localhost} (or http which is upgraded by traefik)
      - "traefik.http.routers.nginx.rule=Host(`${DOMAIN:-localhost}`)"
      - "traefik.http.routers.nginx.tls=true"
      # Nginx listens internally on port 80
      - "traefik.http.services.nginx.loadbalancer.server.port=80"

  # ---------------------------------------------------------
  # Node Exporter (EXCEPTION!)
  # ---------------------------------------------------------
  node-exporter:
    image: quay.io/prometheus/node-exporter:v1.10.2@sha256:337ff1d356b68d39cef853e8c6345de11ce7556bb34cda8bd205bcf2ed30b565
    container_name: node-exporter
    healthcheck:
      test: ["CMD-SHELL", "http_proxy= HTTP_PROXY= wget -q --tries=1 --spider http://host.containers.internal:9100/ || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
    command:
      - '--path.rootfs=/host'
      - '--collector.systemd'
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    # MUST STAY ON HOST NETWORK TO ACCESS HOST METRICS!
    pid: host
    network_mode: host
    restart: unless-stopped
    volumes:
      - '/:/host:ro,rslave'
      - '/run/udev/data:/run/udev/data:ro'
      - '/run/dbus/system_bus_socket:/var/run/dbus/system_bus_socket:ro'
    # Note: Traefik config for this is in traefik/dynamic/services.yaml

  # ---------------------------------------------------------
  # OpenTelemetry Collector
  # ---------------------------------------------------------
  otel-collector:
    image: docker.io/otel/opentelemetry-collector-contrib:0.151.0@sha256:d57bfe8eee2378f31cb1193239fbcac521d54a5a071fca2bfc106916a32b892d
    container_name: otel-collector
    command: ["--config=/etc/otel-config.yaml"]
    networks:
      - monitoring-net
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    volumes:
      - ./otel/otel-config.yaml:/etc/otel-config.yaml:Z
    restart: unless-stopped
    depends_on:
      - tempo
    labels:
      - "traefik.enable=true"
      # TCP Router for OTLP gRPC (port 4317)
      # NOTE: Clients must send SNI 'otel-collector.${DOMAIN:-localhost}'!
      - "traefik.tcp.routers.otel.rule=HostSNI(`otel-collector.${DOMAIN:-localhost}`)"
      - "traefik.tcp.routers.otel.entrypoints=otlp"
      - "traefik.tcp.routers.otel.tls=true"
      - "traefik.tcp.services.otel.loadbalancer.server.port=4317"
      # HTTP Router for OTel Collector Metrics (port 8888)
      - "traefik.http.routers.otel-collector.rule=Host(`otel-collector.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.otel-collector.tls=true"
      - "traefik.http.services.otel-collector.loadbalancer.server.port=8888"

  # ---------------------------------------------------------
  # Podman Exporter
  # ---------------------------------------------------------
  podman-exporter:
    image: quay.io/navidys/prometheus-podman-exporter:v1.21.0@sha256:2ebb9e09101d8cc1e28e3f306b56a722450918e628208435201ed39bd62403cb
    container_name: podman-exporter
    healthcheck:
      test: ["CMD-SHELL", "http_proxy= HTTP_PROXY= wget -q -O /dev/null http://127.0.0.1:9882/metrics || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
    # Podman Exporter needs to run as root to access the Podman socket and gather metrics about all containers. In a production environment, you should use a more secure way to grant these permissions, such as using capabilities or a custom security profile.
    user: "0:0"
    networks:
      - monitoring-net
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
      - CONTAINER_HOST=unix:///run/podman/podman.sock
    restart: unless-stopped
    # The label=disable security option is used to disable SELinux labeling for the container, which can be necessary when the container needs to access files on the host that have different SELinux contexts. In this case, since the Podman Exporter needs to access the Podman socket on the host, which may have a different SELinux context, we disable labeling to avoid permission issues.
    security_opt:
      - label=disable
    volumes:
      - /run/user/1000/podman/podman.sock:/run/podman/podman.sock:ro
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.podman-exporter.rule=Host(`podman-exporter.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.podman-exporter.tls=true"
      - "traefik.http.services.podman-exporter.loadbalancer.server.port=9882"

  # ---------------------------------------------------------
  # Prometheus
  # ---------------------------------------------------------
  prometheus:
    image: quay.io/prometheus/prometheus:v3.10.0@sha256:7571a304e67fbd794be02422b13627dc7de822152f74e99e2bef95d29eceecde
    container_name: prometheus
    user: "65534:65534"
    healthcheck:
      test: ["CMD-SHELL", "http_proxy= HTTP_PROXY= wget -q --tries=1 --spider http://127.0.0.1:9090/-/healthy || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 15s
    networks:
      - monitoring-net
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--storage.tsdb.retention.time=15d'
      - '--web.enable-lifecycle'
      - '--web.enable-remote-write-receiver'
    restart: unless-stopped
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:Z
      - ./prometheus/alert.rules.yml:/etc/prometheus/alert.rules.yml:Z
      - prometheus-data:/prometheus:Z
    extra_hosts:
      - "host.containers.internal:host-gateway" 
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.prometheus.rule=Host(`prometheus.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.prometheus.tls=true"
      - "traefik.http.services.prometheus.loadbalancer.server.port=9090"

  # ---------------------------------------------------------
  # Pyroscope
  # ---------------------------------------------------------
  pyroscope:
    image: docker.io/grafana/pyroscope:2.0.2@sha256:644515bfa06cb3779b8666514b1bab2295bd11f1e1a53834bd9584a43f77c4f5
    container_name: pyroscope
    command:
      - "-config.file=/etc/pyroscope/pyroscope.yaml"
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    networks:
      - monitoring-net
    restart: unless-stopped
    depends_on:
      minio:
        condition: service_healthy
      minio-init:
        condition: service_completed_successfully
    volumes:
      - ./pyroscope/pyroscope.yaml:/etc/pyroscope/pyroscope.yaml:Z
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.pyroscope.rule=Host(`pyroscope.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.pyroscope.tls=true"
      - "traefik.http.services.pyroscope.loadbalancer.server.port=4040"

  # ---------------------------------------------------------
  # Tempo
  # ---------------------------------------------------------
  tempo:
    image: docker.io/grafana/tempo:2.10.5@sha256:ee21727732c7a7199cb71c3eee9153bbf23f9b0b87619f0555a0cf21a67f1a33
    container_name: tempo
    command: 
      - "-config.file=/etc/tempo.yaml"
    user: "0:0"
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    networks:
      - monitoring-net
    # OTel gaat via Collector of Traefik
    restart: unless-stopped
    depends_on:
      minio:
        condition: service_healthy
      minio-init:
        condition: service_completed_successfully
    volumes:
      - ./tempo/tempo.yaml:/etc/tempo.yaml:Z
      - tempo-wal:/var/tempo/wal:Z
    # Tempo is normally not exposed publicly, but on request:
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.tempo.rule=Host(`tempo.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.tempo.tls=true"
      - "traefik.http.services.tempo.loadbalancer.server.port=3200"

  # ---------------------------------------------------------
  # Traefik (Reverse Proxy & Ingress)
  # ---------------------------------------------------------
  traefik:
    image: docker.io/library/traefik:v3.7.0@sha256:eb328e2c806c53aafbbace6c451fa54d268961261a85452fcf0fb752a30c17be
    container_name: traefik
    healthcheck:
      test: ["CMD-SHELL", "http_proxy= HTTP_PROXY= wget -q --tries=1 --spider http://127.0.0.1:8082/ping || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
    # Traefik needs to run as root to bind to low ports (80 and 443) and to access the Podman socket for dynamic configuration. 
    security_opt:
      - label=disable
    ports:
      - "80:80"     # HTTP -> HTTPS
      - "443:443"   # HTTPS Services
      - "4317:4317" # OTLP gRPC (TCP/TLS)
    volumes:
      - /run/user/1000/podman/podman.sock:/var/run/docker.sock:ro,Z
      - ./traefik/traefik.yaml:/etc/traefik/traefik.yaml:ro,Z
      # Mount the directory for dynamic config (node-exporter)
      - ./traefik/dynamic:/etc/traefik/dynamic:ro,Z
      - ./traefik/certs:/etc/traefik/certs:ro,Z
    networks:
      - monitoring-net
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
    # Ensure that Traefik can resolve 'host.containers.internal' to the host IP
    # This is needed for the node-exporter proxy
    extra_hosts:
      - "host.containers.internal:host-gateway"
    labels:
      - "traefik.enable=true"
      # Dashboard routing (https://traefik.${DOMAIN:-localhost})
      - "traefik.http.routers.api.rule=Host(`traefik.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.api.service=api@internal"
      - "traefik.http.routers.api.tls=true"
      - "traefik.http.routers.api.entrypoints=websecure"
      # Traefik Metrics (Publicly viewable via browser)
      - "traefik.http.routers.traefik-metrics.rule=Host(`traefik-metrics.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.traefik-metrics.service=prometheus@internal"
      - "traefik.http.routers.traefik-metrics.tls=true"

  # ---------------------------------------------------------
  # Webhook Tester
  # ---------------------------------------------------------
  webhook-tester:
    image: docker.io/tarampampam/webhook-tester:2.3.0@sha256:85818267b450d3d386cad6510c561e09b974183ed2832c373bc83b125fc1b221
    container_name: webhook-tester
    networks:
      - monitoring-net
    restart: unless-stopped
    environment:
      - HTTP_PROXY=
      - HTTPS_PROXY=
      - http_proxy=
      - https_proxy=
      - AUTO_CREATE_SESSIONS=true
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.webhook.rule=Host(`webhook-tester.${DOMAIN:-localhost}`)"
      - "traefik.http.routers.webhook.tls=true"
      - "traefik.http.services.webhook.loadbalancer.server.port=8080"

volumes:
  # Volumes for persistent data storage
  prometheus-data:
  grafana-data:
  loki-wal:
  tempo-wal:
  minio-data:
  keep-db-data:
  keep-state:
