first commit

2026-03-16 00:02:58 +06:00
commit c11f0bd5bc
36 changed files with 11938 additions and 0 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,466 @@
+# =============================================================================
+# BD FHIR National — Production docker-compose.yml
+#
+# USAGE:
+#   # First deploy:
+#   docker-compose --env-file .env up -d
+#
+#   # Scale HAPI replicas (pilot: 1, production: 3):
+#   docker-compose --env-file .env up -d --scale hapi=3
+#
+#   # Pull updated image and redeploy zero-downtime:
+#   docker-compose --env-file .env pull hapi
+#   docker-compose --env-file .env up -d --no-deps --scale hapi=3 hapi
+#
+#   # View logs:
+#   docker-compose logs -f hapi
+#
+# REQUIRED: .env file in same directory as this file.
+# Copy .env.example to .env and fill in all values before first deploy.
+# NEVER commit .env to version control.
+#
+# =============================================================================
+# SCALING ROADMAP
+# =============================================================================
+#
+# PHASE 1 — Pilot (<50 vendors, <10,000 resources/day)
+#   hapi replicas:    1
+#   postgres-fhir:    1 instance, no replication
+#   postgres-audit:   1 instance, no replication
+#   pgbouncer:        1 instance
+#   Expected load:    ~0.1 req/s average, ~5 req/s burst
+#   This docker-compose file as written.
+#
+# PHASE 2 — Regional rollout (<500 vendors, <100,000 resources/day)
+#   hapi replicas:    3 (--scale hapi=3, no other changes needed)
+#   postgres-fhir:    Add streaming replication replica for read queries.
+#                     Change: add postgres-fhir-replica service,
+#                     configure HAPI read datasource to replica.
+#   postgres-audit:   Add streaming replication replica.
+#   pgbouncer:        Scale to 2 instances behind a VIP.
+#   nginx:            Already stateless. Add second nginx instance.
+#   Session storage:  Add Redis for distributed JWKS cache
+#                     (currently per-replica in-memory — acceptable at Phase 1).
+#   Changes needed:   Add postgres-fhir-replica, postgres-audit-replica,
+#                     redis services. Update HAPI datasource config.
+#                     Add pgBouncer VIP (HAProxy or keepalived).
+#
+# PHASE 3 — National rollout (>500 vendors, >1,000,000 resources/day)
+#   Move to Kubernetes (K8s) or Docker Swarm.
+#   docker-compose is not the right orchestrator at this scale.
+#   Kubernetes equivalents:
+#     hapi            → Deployment with HPA (autoscale on CPU/RPS)
+#     postgres-fhir   → Patroni cluster (HA PostgreSQL)
+#     postgres-audit  → Patroni cluster or managed RDS equivalent
+#     pgbouncer       → PgBouncer in K8s sidecar or pgBouncer-as-a-service
+#     nginx           → Ingress controller (nginx-ingress or Traefik)
+#   At this phase, partition HAPI JPA tables (see V1 migration comments).
+#   Estimated trigger: 5M total resources in HFJ_RESOURCE.
+#
+# =============================================================================
+
+version: "3.9"
+
+# =============================================================================
+# NETWORKS
+# Isolate services: only nginx is reachable from outside.
+# hapi is not directly reachable — only via nginx.
+# postgres services are not reachable from nginx — only from hapi/pgbouncer.
+# =============================================================================
+networks:
+
+  # Frontend: nginx ↔ hapi
+  frontend:
+    driver: bridge
+    ipam:
+      config:
+        - subnet: 172.20.1.0/24
+
+  # Backend-fhir: hapi ↔ pgbouncer-fhir ↔ postgres-fhir
+  backend-fhir:
+    driver: bridge
+    internal: true   # no external internet access
+    ipam:
+      config:
+        - subnet: 172.20.2.0/24
+
+  # Backend-audit: hapi ↔ pgbouncer-audit ↔ postgres-audit
+  backend-audit:
+    driver: bridge
+    internal: true
+    ipam:
+      config:
+        - subnet: 172.20.3.0/24
+
+# =============================================================================
+# VOLUMES
+# Named volumes survive container restarts and image upgrades.
+# Never use bind mounts for database data in production.
+# =============================================================================
+volumes:
+  postgres-fhir-data:
+    driver: local
+  postgres-audit-data:
+    driver: local
+  hapi-logs:
+    driver: local
+
+# =============================================================================
+# SERVICES
+# =============================================================================
+services:
+
+  # ---------------------------------------------------------------------------
+  # postgres-fhir
+  # HAPI JPA store. Contains all FHIR resources.
+  # Read-write datasource for HAPI.
+  # ---------------------------------------------------------------------------
+  postgres-fhir:
+    image: postgres:15-alpine
+    container_name: bd-postgres-fhir
+    restart: unless-stopped
+    networks:
+      - backend-fhir
+    volumes:
+      - postgres-fhir-data:/var/lib/postgresql/data
+      # Custom postgresql.conf tuned for HAPI workload
+      - ./postgres/fhir/postgresql.conf:/etc/postgresql/postgresql.conf:ro
+      # Init script: create application user with limited privileges
+      - ./postgres/fhir/init.sql:/docker-entrypoint-initdb.d/init.sql:ro
+    environment:
+      POSTGRES_DB:       ${FHIR_DB_NAME}
+      POSTGRES_USER:     ${FHIR_DB_SUPERUSER}
+      POSTGRES_PASSWORD: ${FHIR_DB_SUPERUSER_PASSWORD}
+    command: postgres -c config_file=/etc/postgresql/postgresql.conf
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${FHIR_DB_SUPERUSER} -d ${FHIR_DB_NAME}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    # Resource limits — PostgreSQL should not starve HAPI of memory
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+        reservations:
+          memory: 512M
+    # Do NOT expose port 5432 to host — only accessible via backend-fhir network
+    # If you need psql access for maintenance, use:
+    #   docker exec -it bd-postgres-fhir psql -U ${FHIR_DB_SUPERUSER} -d ${FHIR_DB_NAME}
+
+  # ---------------------------------------------------------------------------
+  # postgres-audit
+  # Audit store. Contains audit_events and fhir_rejected_submissions.
+  # INSERT-only datasource for HAPI (audit_writer role).
+  # Completely separate from FHIR store — different container, different volume.
+  # ---------------------------------------------------------------------------
+  postgres-audit:
+    image: postgres:15-alpine
+    container_name: bd-postgres-audit
+    restart: unless-stopped
+    networks:
+      - backend-audit
+    volumes:
+      - postgres-audit-data:/var/lib/postgresql/data
+      - ./postgres/audit/postgresql.conf:/etc/postgresql/postgresql.conf:ro
+      - ./postgres/audit/init.sql:/docker-entrypoint-initdb.d/init.sql:ro
+    environment:
+      POSTGRES_DB:       ${AUDIT_DB_NAME}
+      POSTGRES_USER:     ${AUDIT_DB_SUPERUSER}
+      POSTGRES_PASSWORD: ${AUDIT_DB_SUPERUSER_PASSWORD}
+    command: postgres -c config_file=/etc/postgresql/postgresql.conf
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${AUDIT_DB_SUPERUSER} -d ${AUDIT_DB_NAME}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    deploy:
+      resources:
+        limits:
+          memory: 1G
+        reservations:
+          memory: 256M
+
+  # ---------------------------------------------------------------------------
+  # pgbouncer-fhir
+  # Connection pool between HAPI and postgres-fhir.
+  # Session mode — required for Hibernate prepared statements.
+  # pool_size=20: at 3 HAPI replicas with HikariCP maxPool=5,
+  # max PostgreSQL connections = 15. pool_size=20 gives 5 headroom.
+  # ---------------------------------------------------------------------------
+  pgbouncer-fhir:
+    image: bitnami/pgbouncer:1.22.1
+    container_name: bd-pgbouncer-fhir
+    restart: unless-stopped
+    networks:
+      - backend-fhir
+    environment:
+      POSTGRESQL_HOST:     postgres-fhir
+      POSTGRESQL_PORT:     "5432"
+      POSTGRESQL_DATABASE: ${FHIR_DB_NAME}
+      POSTGRESQL_USERNAME: ${FHIR_DB_APP_USER}
+      POSTGRESQL_PASSWORD: ${FHIR_DB_APP_PASSWORD}
+      PGBOUNCER_DATABASE:  ${FHIR_DB_NAME}
+      PGBOUNCER_POOL_MODE: session
+      PGBOUNCER_MAX_CLIENT_CONN: "100"
+      PGBOUNCER_DEFAULT_POOL_SIZE: "20"
+      PGBOUNCER_MIN_POOL_SIZE: "5"
+      PGBOUNCER_RESERVE_POOL_SIZE: "5"
+      PGBOUNCER_RESERVE_POOL_TIMEOUT: "5"
+      PGBOUNCER_SERVER_IDLE_TIMEOUT: "600"
+      PGBOUNCER_CLIENT_IDLE_TIMEOUT: "60"
+      # Logging — errors and connections only, not queries (query logging
+      # would log patient data to container stdout)
+      PGBOUNCER_LOG_CONNECTIONS: "1"
+      PGBOUNCER_LOG_DISCONNECTIONS: "1"
+      PGBOUNCER_LOG_POOLER_ERRORS: "1"
+      PGBOUNCER_VERBOSE: "0"
+    depends_on:
+      postgres-fhir:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -h localhost -p 5432 -U ${FHIR_DB_APP_USER}"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+
+  # ---------------------------------------------------------------------------
+  # pgbouncer-audit
+  # Connection pool between HAPI and postgres-audit.
+  # Smaller pool — audit writes are async and lower volume than FHIR writes.
+  # ---------------------------------------------------------------------------
+  pgbouncer-audit:
+    image: bitnami/pgbouncer:1.22.1
+    container_name: bd-pgbouncer-audit
+    restart: unless-stopped
+    networks:
+      - backend-audit
+    environment:
+      POSTGRESQL_HOST:     postgres-audit
+      POSTGRESQL_PORT:     "5432"
+      POSTGRESQL_DATABASE: ${AUDIT_DB_NAME}
+      POSTGRESQL_USERNAME: ${AUDIT_DB_WRITER_USER}
+      POSTGRESQL_PASSWORD: ${AUDIT_DB_WRITER_PASSWORD}
+      PGBOUNCER_DATABASE:  ${AUDIT_DB_NAME}
+      PGBOUNCER_POOL_MODE: session
+      PGBOUNCER_MAX_CLIENT_CONN: "50"
+      PGBOUNCER_DEFAULT_POOL_SIZE: "10"
+      PGBOUNCER_MIN_POOL_SIZE: "2"
+      PGBOUNCER_RESERVE_POOL_SIZE: "2"
+      PGBOUNCER_SERVER_IDLE_TIMEOUT: "600"
+      PGBOUNCER_LOG_CONNECTIONS: "1"
+      PGBOUNCER_LOG_DISCONNECTIONS: "1"
+      PGBOUNCER_LOG_POOLER_ERRORS: "1"
+      PGBOUNCER_VERBOSE: "0"
+    depends_on:
+      postgres-audit:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -h localhost -p 5432 -U ${AUDIT_DB_WRITER_USER}"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+
+  # ---------------------------------------------------------------------------
+  # hapi
+  # BD FHIR National HAPI overlay.
+  # Stateless — no local state, all state in PostgreSQL.
+  # Scale with: docker-compose up -d --scale hapi=3
+  #
+  # REPLICA SCALING NOTE:
+  #   When scaling to N replicas, ensure:
+  #   1. pgbouncer-fhir pool_size >= N * HAPI_DB_POOL_SIZE (default: N*5)
+  #   2. pgbouncer-audit pool_size >= N * HAPI_AUDIT_POOL_SIZE (default: N*2)
+  #   3. nginx upstream hapi has all N replica IPs or uses DNS round-robin
+  #      (see nginx.conf — uses Docker DNS service name which auto-discovers
+  #       all replicas when using --scale)
+  # ---------------------------------------------------------------------------
+  hapi:
+    image: ${HAPI_IMAGE}
+    # container_name intentionally omitted — docker-compose appends _1, _2, _3
+    # when scaling. A fixed container_name breaks --scale.
+    restart: unless-stopped
+    networks:
+      - frontend
+      - backend-fhir
+      - backend-audit
+    volumes:
+      - hapi-logs:/app/logs
+    environment:
+      # Spring
+      SPRING_PROFILES_ACTIVE: prod
+
+      # FHIR datasource — routes through pgBouncer
+      SPRING_DATASOURCE_URL:      jdbc:postgresql://pgbouncer-fhir:5432/${FHIR_DB_NAME}
+      SPRING_DATASOURCE_USERNAME: ${FHIR_DB_APP_USER}
+      SPRING_DATASOURCE_PASSWORD: ${FHIR_DB_APP_PASSWORD}
+      SPRING_DATASOURCE_DRIVER_CLASS_NAME: org.postgresql.Driver
+
+      # HikariCP — FHIR datasource pool
+      # 5 connections per replica × N replicas = N*5 total PostgreSQL connections
+      # At 3 replicas: 15 connections → fits in pgBouncer pool_size=20
+      SPRING_DATASOURCE_HIKARI_MAXIMUM_POOL_SIZE: "5"
+      SPRING_DATASOURCE_HIKARI_MINIMUM_IDLE:       "2"
+      SPRING_DATASOURCE_HIKARI_CONNECTION_TIMEOUT: "30000"
+      SPRING_DATASOURCE_HIKARI_IDLE_TIMEOUT:       "600000"
+      SPRING_DATASOURCE_HIKARI_MAX_LIFETIME:       "1800000"
+      SPRING_DATASOURCE_HIKARI_POOL_NAME:          fhir-pool
+      # pgBouncer session mode: prepared statements work.
+      # Keep this false for compatibility — pgBouncer manages statement lifecycle.
+      SPRING_DATASOURCE_HIKARI_DATA_SOURCE_PROPERTIES_PREPARESTATEMENT: "false"
+
+      # Audit datasource — INSERT-only, routes through pgBouncer
+      AUDIT_DATASOURCE_URL:      jdbc:postgresql://pgbouncer-audit:5432/${AUDIT_DB_NAME}
+      AUDIT_DATASOURCE_USERNAME: ${AUDIT_DB_WRITER_USER}
+      AUDIT_DATASOURCE_PASSWORD: ${AUDIT_DB_WRITER_PASSWORD}
+
+      # HikariCP — audit datasource pool
+      # Smaller pool — audit writes are async
+      AUDIT_DATASOURCE_HIKARI_MAXIMUM_POOL_SIZE: "2"
+      AUDIT_DATASOURCE_HIKARI_MINIMUM_IDLE:       "1"
+      AUDIT_DATASOURCE_HIKARI_POOL_NAME:          audit-pool
+
+      # Flyway — FHIR schema migrations
+      SPRING_FLYWAY_URL:      jdbc:postgresql://postgres-fhir:5432/${FHIR_DB_NAME}
+      SPRING_FLYWAY_USER:     ${FHIR_DB_SUPERUSER}
+      SPRING_FLYWAY_PASSWORD: ${FHIR_DB_SUPERUSER_PASSWORD}
+      # Flyway connects directly to PostgreSQL (bypassing pgBouncer) for
+      # migrations — pgBouncer session mode is incompatible with DDL in
+      # some edge cases. Direct connection is safer for schema changes.
+
+      # Flyway — Audit schema migrations (separate datasource)
+      AUDIT_FLYWAY_URL:      jdbc:postgresql://postgres-audit:5432/${AUDIT_DB_NAME}
+      AUDIT_FLYWAY_USER:     ${AUDIT_DB_SUPERUSER}
+      AUDIT_FLYWAY_PASSWORD: ${AUDIT_DB_SUPERUSER_PASSWORD}
+
+      # HAPI FHIR
+      HAPI_FHIR_SERVER_ADDRESS: https://fhir.dghs.gov.bd/fhir
+      HAPI_FHIR_FHIR_VERSION:   R4
+
+      # OCL terminology service
+      HAPI_OCL_BASE_URL:         https://tr.ocl.dghs.gov.bd/api/fhir
+      HAPI_OCL_TIMEOUT_SECONDS:  "10"
+      HAPI_OCL_RETRY_ATTEMPTS:   "2"
+
+      # Cluster validator
+      HAPI_CLUSTER_VALIDATOR_URL:             https://icd11.dghs.gov.bd/cluster/validate
+      HAPI_CLUSTER_VALIDATOR_TIMEOUT_SECONDS: "10"
+
+      # Keycloak
+      KEYCLOAK_ISSUER:    https://auth.dghs.gov.bd/realms/hris
+      KEYCLOAK_JWKS_URL:  https://auth.dghs.gov.bd/realms/hris/protocol/openid-connect/certs
+      KEYCLOAK_REQUIRED_ROLE: mci-api
+      KEYCLOAK_ADMIN_ROLE:    fhir-admin
+      # JWKS cache: 1 hour TTL, re-fetch on unknown kid
+      KEYCLOAK_JWKS_CACHE_TTL_SECONDS: "3600"
+
+      # BD Core IG
+      HAPI_IG_PACKAGE_CLASSPATH: classpath:packages/bd.gov.dghs.core-0.2.1.tgz
+      HAPI_IG_VERSION:            0.2.1
+
+      # Terminology cache
+      HAPI_TERMINOLOGY_CACHE_TTL_SECONDS: "86400"
+
+      # JVM options — override defaults from Dockerfile
+      JAVA_OPTS: >-
+        -XX:+UseContainerSupport
+        -XX:MaxRAMPercentage=75.0
+        -XX:+ExitOnOutOfMemoryError
+        -XX:+HeapDumpOnOutOfMemoryError
+        -XX:HeapDumpPath=/tmp/heapdump.hprof
+        -Djava.security.egd=file:/dev/urandom
+        -Dfile.encoding=UTF-8
+        -Duser.timezone=UTC
+        -Dspring.profiles.active=prod
+
+      # Logging
+      LOGGING_LEVEL_ROOT:                               WARN
+      LOGGING_LEVEL_BD_GOV_DGHS:                        INFO
+      LOGGING_LEVEL_CA_UHN_HAPI:                        WARN
+      LOGGING_LEVEL_ORG_SPRINGFRAMEWORK:                WARN
+      # Set to DEBUG temporarily during initial deployment verification,
+      # then revert to WARN. DEBUG logs contain full resource payloads.
+      LOGGING_LEVEL_BD_GOV_DGHS_FHIR_INTERCEPTOR:      INFO
+      LOGGING_LEVEL_BD_GOV_DGHS_FHIR_TERMINOLOGY:      INFO
+      LOGGING_LEVEL_BD_GOV_DGHS_FHIR_VALIDATOR:        INFO
+
+    depends_on:
+      pgbouncer-fhir:
+        condition: service_healthy
+      pgbouncer-audit:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD-SHELL",
+             "curl --fail --silent --show-error http://localhost:8080/actuator/health/liveness || exit 1"]
+      interval: 30s
+      timeout: 10s
+      start_period: 120s
+      retries: 3
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+        reservations:
+          memory: 2G
+      # PHASE 1: replicas=1
+      # PHASE 2: replicas=3 (update here or use --scale flag)
+      replicas: 1
+      restart_policy:
+        condition: on-failure
+        delay: 10s
+        max_attempts: 3
+        window: 120s
+
+  # ---------------------------------------------------------------------------
+  # nginx
+  # Reverse proxy with TLS termination.
+  # Certificates managed by centralised nginx proxy — see Challenge E resolution.
+  # This nginx handles: upstream routing, rate limiting, request ID injection.
+  # ---------------------------------------------------------------------------
+  nginx:
+    image: nginx:1.25-alpine
+    container_name: bd-nginx
+    restart: unless-stopped
+    networks:
+      - frontend
+    ports:
+      - "80:80"
+      - "443:443"
+    volumes:
+      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
+      - ./nginx/conf.d:/etc/nginx/conf.d:ro
+      # TLS certificates — provisioned by centralised nginx proxy / government CA
+      # Mount path must match ssl_certificate directives in nginx.conf
+      - ${TLS_CERT_PATH}:/etc/nginx/certs/server.crt:ro
+      - ${TLS_KEY_PATH}:/etc/nginx/certs/server.key:ro
+    depends_on:
+      hapi:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD-SHELL", "nginx -t && curl --fail --silent http://localhost/health || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+
+# =============================================================================
+# NOTES ON WHAT IS NOT IN THIS FILE
+# =============================================================================
+#
+# ELK STACK (Elasticsearch, Logstash, Kibana):
+#   Not included. At pilot phase, structured JSON logs written to
+#   hapi-logs volume are sufficient. Ship logs to ELK via Filebeat
+#   agent running on the host (outside Docker) to avoid coupling
+#   the FHIR server uptime to the ELK stack uptime.
+#   Add Filebeat config in ops/ when ELK is provisioned.
+#
+# KEYCLOAK:
+#   Not included. Keycloak is an existing national service at
+#   https://auth.dghs.gov.bd — not deployed here.
+#
+# OCL TERMINOLOGY SERVER:
+#   Not included. External service at https://tr.ocl.dghs.gov.bd — not deployed here.
+#
+# CLUSTER VALIDATOR:
+#   Not included. External service at https://icd11.dghs.gov.bd — not deployed here.