summaryrefslogtreecommitdiffhomepage
path: root/docker-compose.yml
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-04-27 23:54:19 +0900
committerAdam Malczewski <[email protected]>2026-04-27 23:54:19 +0900
commit60680e0419f96a628f9eccaf9c53d6749d0a20ca (patch)
tree55114197ec14bc419f88e1383d29294952960261 /docker-compose.yml
parentc7d5395ddc4f818d1faf0c59bd7c87d4ffd67a12 (diff)
downloadfirecrawl-dokploy-60680e0419f96a628f9eccaf9c53d6749d0a20ca.tar.gz
firecrawl-dokploy-60680e0419f96a628f9eccaf9c53d6749d0a20ca.zip
working local deploy
Diffstat (limited to 'docker-compose.yml')
-rw-r--r--docker-compose.yml110
1 files changed, 97 insertions, 13 deletions
diff --git a/docker-compose.yml b/docker-compose.yml
index a59d779..44c0f05 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,6 +2,65 @@ name: firecrawl
services:
# ============================================================
+ # PostgreSQL — Firecrawl's NUQ queue store
+ # Uses the firecrawl-published image, which extends postgres:17
+ # with pg_cron preloaded and the nuq schema bootstrapped via
+ # /docker-entrypoint-initdb.d/010-nuq.sql.
+ #
+ # NOTE: pg_cron is pinned to database 'postgres' in the image
+ # (cron.database_name = 'postgres'), so POSTGRES_DB MUST be
+ # 'postgres'. The init script creates the nuq schema in that
+ # database. Do not change POSTGRES_DB / POSTGRES_USER here
+ # unless you also rebuild the nuq-postgres image to match.
+ # ============================================================
+ postgres:
+ image: ghcr.io/firecrawl/nuq-postgres:latest
+ networks:
+ - backend
+ environment:
+ POSTGRES_USER: postgres
+ POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
+ POSTGRES_DB: postgres
+ volumes:
+ - postgres-data:/var/lib/postgresql/data
+ healthcheck:
+ test: ["CMD-SHELL", "pg_isready -U postgres -d postgres"]
+ interval: 10s
+ timeout: 5s
+ retries: 10
+ start_period: 30s
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "5m"
+ max-file: "2"
+ restart: unless-stopped
+
+ # ============================================================
+ # RabbitMQ — required by Firecrawl's NUQ workers
+ # ============================================================
+ rabbitmq:
+ image: rabbitmq:3-management
+ networks:
+ - backend
+ command: rabbitmq-server
+ volumes:
+ - rabbitmq-data:/var/lib/rabbitmq
+ healthcheck:
+ test: ["CMD", "rabbitmq-diagnostics", "-q", "check_running"]
+ interval: 10s
+ timeout: 5s
+ retries: 10
+ start_period: 30s
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "5m"
+ max-file: "2"
+ compress: "true"
+ restart: unless-stopped
+
+ # ============================================================
# SearXNG — metasearch engine (powers Firecrawl's /search API)
# ============================================================
searxng:
@@ -10,7 +69,7 @@ services:
- backend
- dokploy-network
volumes:
- - ./searxng:/etc/searxng:rw
+ - ./searxng/settings.yml:/etc/searxng/settings.yml:ro
- searxng-cache:/var/cache/searxng:rw
environment:
- SEARXNG_BASE_URL=https://${SEARXNG_DOMAIN:-searxng.localhost}/
@@ -31,7 +90,7 @@ services:
# Otherwise it's only reachable internally by Firecrawl.
# labels:
# - "traefik.enable=true"
- # - "traefik.http.routers.searxng.rule=Host(`${SEARXNG_DOMAIN}`)"
+ # - "traefik.http.routers.searxng.rule=Host(`${SEARXNG_DOMAIN:-searxng.localhost}`)"
# - "traefik.http.routers.searxng.entrypoints=websecure"
# - "traefik.http.routers.searxng.tls.certResolver=letsencrypt"
# - "traefik.http.services.searxng.loadbalancer.server.port=8080"
@@ -69,39 +128,56 @@ services:
# Firecrawl API — scrape, crawl, search, map
# ============================================================
api:
- image: ghcr.io/firecrawl/firecrawl
+ image: ghcr.io/firecrawl/firecrawl:latest
networks:
- backend
- dokploy-network
extra_hosts:
- "host.docker.internal:host-gateway"
environment:
- # === Required ===
- PORT: ${PORT:-3002}
- INTERNAL_PORT: ${INTERNAL_PORT:-3002}
+ # === Server ===
HOST: 0.0.0.0
+ PORT: ${INTERNAL_PORT:-3002}
+ INTERNAL_PORT: ${INTERNAL_PORT:-3002}
+ ENV: local
NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE:-8}
+ USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-false}
+ # === Redis ===
REDIS_URL: redis://redis:6379
REDIS_RATE_LIMIT_URL: redis://redis:6379
+ # === Playwright ===
PLAYWRIGHT_MICROSERVICE_URL: http://playwright-service:3000/scrape
- USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-false}
- # === SearXNG (internal, same compose network) ===
+ # === SearXNG (internal) ===
SEARXNG_ENDPOINT: http://searxng:8080
SEARXNG_ENGINES: ${SEARXNG_ENGINES:-}
SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES:-}
- # === Optional: Auth ===
+ # === NUQ Postgres ===
+ # POSTGRES_HOST != "localhost" puts the harness into docker-compose
+ # mode and prevents it from trying to spawn its own container.
+ POSTGRES_HOST: postgres
+ POSTGRES_PORT: "5432"
+ POSTGRES_USER: postgres
+ POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
+ POSTGRES_DB: postgres
+ # === NUQ RabbitMQ ===
+ # Must be set explicitly — the harness's docker-compose branch
+ # does NOT auto-populate this, it just skips container management.
+ NUQ_RABBITMQ_URL: amqp://rabbitmq:5672
+ # === Auth / Secrets ===
+ BULL_AUTH_KEY: ${BULL_AUTH_KEY:-}
TEST_API_KEY: ${TEST_API_KEY:-}
- BULL_AUTH_KEY: ${BULL_AUTH_KEY:-CHANGEME}
# === Optional: AI Features ===
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
+ OPENAI_BASE_URL: ${OPENAI_BASE_URL:-}
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
MODEL_NAME: ${MODEL_NAME:-}
+ MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME:-}
# === Optional: Proxy ===
PROXY_SERVER: ${PROXY_SERVER:-}
PROXY_USERNAME: ${PROXY_USERNAME:-}
PROXY_PASSWORD: ${PROXY_PASSWORD:-}
ports:
- - ${PORT:-3002}
+ - "127.0.0.1:${INTERNAL_PORT:-3002}:${INTERNAL_PORT:-3002}"
ulimits:
nofile:
soft: 65535
@@ -109,10 +185,16 @@ services:
cpus: 4.0
mem_limit: 8G
memswap_limit: 8G
+ # --start-docker = run pre-built dist/ directly, skip pnpm install
+ # and skip the container-management code paths in harness.ts.
command: node dist/src/harness.js --start-docker
depends_on:
redis:
condition: service_healthy
+ postgres:
+ condition: service_healthy
+ rabbitmq:
+ condition: service_healthy
playwright-service:
condition: service_started
searxng:
@@ -122,7 +204,7 @@ services:
- "traefik.http.routers.firecrawl-api.rule=Host(`${FIRECRAWL_DOMAIN}`)"
- "traefik.http.routers.firecrawl-api.entrypoints=websecure"
- "traefik.http.routers.firecrawl-api.tls.certResolver=letsencrypt"
- - "traefik.http.services.firecrawl-api.loadbalancer.server.port=${PORT:-3002}"
+ - "traefik.http.services.firecrawl-api.loadbalancer.server.port=${INTERNAL_PORT:-3002}"
logging:
driver: "json-file"
options:
@@ -132,7 +214,7 @@ services:
restart: unless-stopped
# ============================================================
- # Redis — queues, rate limiting, caching
+ # Redis — rate limiting, cache
# ============================================================
redis:
image: redis:alpine
@@ -161,5 +243,7 @@ networks:
external: true
volumes:
+ postgres-data:
+ rabbitmq-data:
redis-data:
searxng-cache: