services: # ============================================================ # PostgreSQL — Firecrawl's NUQ queue store # Uses the firecrawl-published image, which extends postgres:17 # with pg_cron preloaded and the nuq schema bootstrapped via # /docker-entrypoint-initdb.d/010-nuq.sql. # # NOTE: pg_cron is pinned to database 'postgres' in the image # (cron.database_name = 'postgres'), so POSTGRES_DB MUST be # 'postgres'. The init script creates the nuq schema in that # database. Do not change POSTGRES_DB / POSTGRES_USER here # unless you also rebuild the nuq-postgres image to match. # ============================================================ postgres: image: ghcr.io/firecrawl/nuq-postgres:latest networks: - backend environment: POSTGRES_USER: postgres POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} POSTGRES_DB: postgres volumes: - postgres-data:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres -d postgres"] interval: 10s timeout: 5s retries: 10 start_period: 30s logging: driver: "json-file" options: max-size: "5m" max-file: "2" restart: unless-stopped # ============================================================ # RabbitMQ — required by Firecrawl's NUQ workers # ============================================================ rabbitmq: image: rabbitmq:3-management networks: - backend command: rabbitmq-server volumes: - rabbitmq-data:/var/lib/rabbitmq healthcheck: test: ["CMD", "rabbitmq-diagnostics", "-q", "check_running"] interval: 10s timeout: 5s retries: 10 start_period: 30s logging: driver: "json-file" options: max-size: "5m" max-file: "2" compress: "true" restart: unless-stopped # ============================================================ # SearXNG — metasearch engine (powers Firecrawl's /search API) # ============================================================ searxng: image: docker.io/searxng/searxng:latest networks: - backend volumes: - ./searxng/settings.yml:/etc/searxng/settings.yml - searxng-cache:/var/cache/searxng:rw environment: - SEARXNG_SECRET=${SEARXNG_SECRET:-} - SEARXNG_BASE_URL=https://${SEARXNG_DOMAIN:-searxng.localhost}/ cap_drop: - ALL cap_add: - CHOWN - SETGID - SETUID - DAC_OVERRIDE logging: driver: "json-file" options: max-size: "1m" max-file: "1" restart: unless-stopped # Uncomment labels below if you want SearXNG accessible via its own domain. # Otherwise it's only reachable internally by Firecrawl. # labels: # - "traefik.enable=true" # - "traefik.http.routers.searxng.rule=Host(`${SEARXNG_DOMAIN:-searxng.localhost}`)" # - "traefik.http.routers.searxng.entrypoints=websecure" # - "traefik.http.routers.searxng.tls.certResolver=letsencrypt" # - "traefik.http.services.searxng.loadbalancer.server.port=8080" # ============================================================ # Playwright — headless browser for JS-rendered pages # ============================================================ playwright-service: image: ghcr.io/firecrawl/playwright-service:latest networks: - backend environment: PORT: 3000 PROXY_SERVER: ${PROXY_SERVER:-} PROXY_USERNAME: ${PROXY_USERNAME:-} PROXY_PASSWORD: ${PROXY_PASSWORD:-} BLOCK_MEDIA: ${BLOCK_MEDIA:-} ALLOW_LOCAL_WEBHOOKS: ${ALLOW_LOCAL_WEBHOOKS:-} MAX_CONCURRENT_PAGES: ${MAX_CONCURRENT_PAGES:-10} cpus: 2.0 mem_limit: 4G memswap_limit: 4G tmpfs: - /tmp/.cache:noexec,nosuid,size=1g logging: driver: "json-file" options: max-size: "10m" max-file: "3" compress: "true" restart: unless-stopped # ============================================================ # Firecrawl API — scrape, crawl, search, map # ============================================================ api: image: ghcr.io/firecrawl/firecrawl:latest networks: - backend - dokploy-network extra_hosts: - "host.docker.internal:host-gateway" environment: # === Server === HOST: 0.0.0.0 PORT: ${INTERNAL_PORT:-31329} INTERNAL_PORT: ${INTERNAL_PORT:-31329} ENV: local NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE:-8} USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-false} # === Redis === REDIS_URL: redis://redis:6379 REDIS_RATE_LIMIT_URL: redis://redis:6379 # === Playwright === PLAYWRIGHT_MICROSERVICE_URL: http://playwright-service:3000/scrape # === SearXNG (internal) === SEARXNG_ENDPOINT: http://searxng:8080 SEARXNG_ENGINES: ${SEARXNG_ENGINES:-} SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES:-} # === NUQ Postgres === # POSTGRES_HOST != "localhost" puts the harness into docker-compose # mode and prevents it from trying to spawn its own container. POSTGRES_HOST: postgres POSTGRES_PORT: "5432" POSTGRES_USER: postgres POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} POSTGRES_DB: postgres # === NUQ RabbitMQ === # Must be set explicitly — the harness's docker-compose branch # does NOT auto-populate this, it just skips container management. NUQ_RABBITMQ_URL: amqp://rabbitmq:5672 # === Auth / Secrets === BULL_AUTH_KEY: ${BULL_AUTH_KEY:-} TEST_API_KEY: ${TEST_API_KEY:-} # === Optional: AI Features === OPENAI_API_KEY: ${OPENAI_API_KEY:-} OPENAI_BASE_URL: ${OPENAI_BASE_URL:-} OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-} MODEL_NAME: ${MODEL_NAME:-} MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME:-} # === Optional: Proxy === PROXY_SERVER: ${PROXY_SERVER:-} PROXY_USERNAME: ${PROXY_USERNAME:-} PROXY_PASSWORD: ${PROXY_PASSWORD:-} ports: - "0.0.0.0:${INTERNAL_PORT:-31329}:${INTERNAL_PORT:-31329}" ulimits: nofile: soft: 65535 hard: 65535 cpus: 4.0 mem_limit: 8G memswap_limit: 8G # --start-docker = run pre-built dist/ directly, skip pnpm install # and skip the container-management code paths in harness.ts. command: node dist/src/harness.js --start-docker depends_on: redis: condition: service_healthy postgres: condition: service_healthy rabbitmq: condition: service_healthy playwright-service: condition: service_started searxng: condition: service_started labels: - "traefik.enable=true" - "traefik.http.routers.firecrawl-api.rule=Host(`${FIRECRAWL_DOMAIN}`)" - "traefik.http.routers.firecrawl-api.entrypoints=websecure" - "traefik.http.routers.firecrawl-api.tls.certResolver=letsencrypt" - "traefik.http.services.firecrawl-api.loadbalancer.server.port=${INTERNAL_PORT:-31329}" logging: driver: "json-file" options: max-size: "10m" max-file: "3" compress: "true" restart: unless-stopped # ============================================================ # Redis — rate limiting, cache # ============================================================ redis: image: redis:alpine networks: - backend command: redis-server --bind 0.0.0.0 volumes: - redis-data:/data healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 10s timeout: 5s retries: 5 logging: driver: "json-file" options: max-size: "5m" max-file: "2" compress: "true" restart: unless-stopped networks: backend: driver: bridge dokploy-network: external: true volumes: postgres-data: rabbitmq-data: redis-data: searxng-cache: