diff options
| author | Adam Malczewski <[email protected]> | 2026-04-27 23:54:19 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-04-27 23:54:19 +0900 |
| commit | 60680e0419f96a628f9eccaf9c53d6749d0a20ca (patch) | |
| tree | 55114197ec14bc419f88e1383d29294952960261 /docker-compose.yml | |
| parent | c7d5395ddc4f818d1faf0c59bd7c87d4ffd67a12 (diff) | |
| download | firecrawl-dokploy-60680e0419f96a628f9eccaf9c53d6749d0a20ca.tar.gz firecrawl-dokploy-60680e0419f96a628f9eccaf9c53d6749d0a20ca.zip | |
working local deploy
Diffstat (limited to 'docker-compose.yml')
| -rw-r--r-- | docker-compose.yml | 110 |
1 files changed, 97 insertions, 13 deletions
diff --git a/docker-compose.yml b/docker-compose.yml index a59d779..44c0f05 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,6 +2,65 @@ name: firecrawl services: # ============================================================ + # PostgreSQL — Firecrawl's NUQ queue store + # Uses the firecrawl-published image, which extends postgres:17 + # with pg_cron preloaded and the nuq schema bootstrapped via + # /docker-entrypoint-initdb.d/010-nuq.sql. + # + # NOTE: pg_cron is pinned to database 'postgres' in the image + # (cron.database_name = 'postgres'), so POSTGRES_DB MUST be + # 'postgres'. The init script creates the nuq schema in that + # database. Do not change POSTGRES_DB / POSTGRES_USER here + # unless you also rebuild the nuq-postgres image to match. + # ============================================================ + postgres: + image: ghcr.io/firecrawl/nuq-postgres:latest + networks: + - backend + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} + POSTGRES_DB: postgres + volumes: + - postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d postgres"] + interval: 10s + timeout: 5s + retries: 10 + start_period: 30s + logging: + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + restart: unless-stopped + + # ============================================================ + # RabbitMQ — required by Firecrawl's NUQ workers + # ============================================================ + rabbitmq: + image: rabbitmq:3-management + networks: + - backend + command: rabbitmq-server + volumes: + - rabbitmq-data:/var/lib/rabbitmq + healthcheck: + test: ["CMD", "rabbitmq-diagnostics", "-q", "check_running"] + interval: 10s + timeout: 5s + retries: 10 + start_period: 30s + logging: + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + compress: "true" + restart: unless-stopped + + # ============================================================ # SearXNG — metasearch engine (powers Firecrawl's /search API) # ============================================================ searxng: @@ -10,7 +69,7 @@ services: - backend - dokploy-network volumes: - - ./searxng:/etc/searxng:rw + - ./searxng/settings.yml:/etc/searxng/settings.yml:ro - searxng-cache:/var/cache/searxng:rw environment: - SEARXNG_BASE_URL=https://${SEARXNG_DOMAIN:-searxng.localhost}/ @@ -31,7 +90,7 @@ services: # Otherwise it's only reachable internally by Firecrawl. # labels: # - "traefik.enable=true" - # - "traefik.http.routers.searxng.rule=Host(`${SEARXNG_DOMAIN}`)" + # - "traefik.http.routers.searxng.rule=Host(`${SEARXNG_DOMAIN:-searxng.localhost}`)" # - "traefik.http.routers.searxng.entrypoints=websecure" # - "traefik.http.routers.searxng.tls.certResolver=letsencrypt" # - "traefik.http.services.searxng.loadbalancer.server.port=8080" @@ -69,39 +128,56 @@ services: # Firecrawl API — scrape, crawl, search, map # ============================================================ api: - image: ghcr.io/firecrawl/firecrawl + image: ghcr.io/firecrawl/firecrawl:latest networks: - backend - dokploy-network extra_hosts: - "host.docker.internal:host-gateway" environment: - # === Required === - PORT: ${PORT:-3002} - INTERNAL_PORT: ${INTERNAL_PORT:-3002} + # === Server === HOST: 0.0.0.0 + PORT: ${INTERNAL_PORT:-3002} + INTERNAL_PORT: ${INTERNAL_PORT:-3002} + ENV: local NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE:-8} + USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-false} + # === Redis === REDIS_URL: redis://redis:6379 REDIS_RATE_LIMIT_URL: redis://redis:6379 + # === Playwright === PLAYWRIGHT_MICROSERVICE_URL: http://playwright-service:3000/scrape - USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-false} - # === SearXNG (internal, same compose network) === + # === SearXNG (internal) === SEARXNG_ENDPOINT: http://searxng:8080 SEARXNG_ENGINES: ${SEARXNG_ENGINES:-} SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES:-} - # === Optional: Auth === + # === NUQ Postgres === + # POSTGRES_HOST != "localhost" puts the harness into docker-compose + # mode and prevents it from trying to spawn its own container. + POSTGRES_HOST: postgres + POSTGRES_PORT: "5432" + POSTGRES_USER: postgres + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} + POSTGRES_DB: postgres + # === NUQ RabbitMQ === + # Must be set explicitly — the harness's docker-compose branch + # does NOT auto-populate this, it just skips container management. + NUQ_RABBITMQ_URL: amqp://rabbitmq:5672 + # === Auth / Secrets === + BULL_AUTH_KEY: ${BULL_AUTH_KEY:-} TEST_API_KEY: ${TEST_API_KEY:-} - BULL_AUTH_KEY: ${BULL_AUTH_KEY:-CHANGEME} # === Optional: AI Features === OPENAI_API_KEY: ${OPENAI_API_KEY:-} + OPENAI_BASE_URL: ${OPENAI_BASE_URL:-} OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-} MODEL_NAME: ${MODEL_NAME:-} + MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME:-} # === Optional: Proxy === PROXY_SERVER: ${PROXY_SERVER:-} PROXY_USERNAME: ${PROXY_USERNAME:-} PROXY_PASSWORD: ${PROXY_PASSWORD:-} ports: - - ${PORT:-3002} + - "127.0.0.1:${INTERNAL_PORT:-3002}:${INTERNAL_PORT:-3002}" ulimits: nofile: soft: 65535 @@ -109,10 +185,16 @@ services: cpus: 4.0 mem_limit: 8G memswap_limit: 8G + # --start-docker = run pre-built dist/ directly, skip pnpm install + # and skip the container-management code paths in harness.ts. command: node dist/src/harness.js --start-docker depends_on: redis: condition: service_healthy + postgres: + condition: service_healthy + rabbitmq: + condition: service_healthy playwright-service: condition: service_started searxng: @@ -122,7 +204,7 @@ services: - "traefik.http.routers.firecrawl-api.rule=Host(`${FIRECRAWL_DOMAIN}`)" - "traefik.http.routers.firecrawl-api.entrypoints=websecure" - "traefik.http.routers.firecrawl-api.tls.certResolver=letsencrypt" - - "traefik.http.services.firecrawl-api.loadbalancer.server.port=${PORT:-3002}" + - "traefik.http.services.firecrawl-api.loadbalancer.server.port=${INTERNAL_PORT:-3002}" logging: driver: "json-file" options: @@ -132,7 +214,7 @@ services: restart: unless-stopped # ============================================================ - # Redis — queues, rate limiting, caching + # Redis — rate limiting, cache # ============================================================ redis: image: redis:alpine @@ -161,5 +243,7 @@ networks: external: true volumes: + postgres-data: + rabbitmq-data: redis-data: searxng-cache: |
