Firecrawl
Firecrawl is an API service that takes a URL, crawls it, and converts it into clean markdown or structured data. It can crawl all accessible subpages and provide clean data for each.
Configuration
name: firecrawl
x-common-service: &common-service
image: ghcr.io/firecrawl/firecrawl:latest
ulimits:
nofile:
soft: 65535
hard: 65535
extra_hosts:
- "host.docker.internal:host-gateway"
x-common-env: &common-env
REDIS_URL: ${REDIS_URL:-redis://redis:6379}
REDIS_RATE_LIMIT_URL: ${REDIS_RATE_LIMIT_URL:-redis://redis:6379}
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}
NUQ_DATABASE_URL: ${NUQ_DATABASE_URL:-postgres://postgres:postgres@nuq-postgres:5432/postgres}
USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-}
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
OPENAI_BASE_URL: ${OPENAI_BASE_URL:-}
MODEL_NAME: ${MODEL_NAME:-}
MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME:-}
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
SLACK_WEBHOOK_URL: ${SLACK_WEBHOOK_URL:-}
BULL_AUTH_KEY: ${BULL_AUTH_KEY:-}
TEST_API_KEY: ${TEST_API_KEY:-}
POSTHOG_API_KEY: ${POSTHOG_API_KEY:-}
POSTHOG_HOST: ${POSTHOG_HOST:-}
SUPABASE_ANON_TOKEN: ${SUPABASE_ANON_TOKEN:-}
SUPABASE_URL: ${SUPABASE_URL:-}
SUPABASE_SERVICE_TOKEN: ${SUPABASE_SERVICE_TOKEN:-}
SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL:-}
SERPER_API_KEY: ${SERPER_API_KEY:-}
SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY:-}
LOGGING_LEVEL: ${LOGGING_LEVEL:-INFO}
PROXY_SERVER: ${PROXY_SERVER:-}
PROXY_USERNAME: ${PROXY_USERNAME:-}
PROXY_PASSWORD: ${PROXY_PASSWORD:-}
NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal}
SEARXNG_ENDPOINT: ${SEARXNG_ENDPOINT:-}
SEARXNG_ENGINES: ${SEARXNG_ENGINES:-}
SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES:-}
services:
playwright-service:
image: ghcr.io/firecrawl/playwright-service:latest
shm_size: "1g"
restart: unless-stopped
environment:
PORT: 3000
PROXY_SERVER: ${PROXY_SERVER:-}
PROXY_USERNAME: ${PROXY_USERNAME:-}
PROXY_PASSWORD: ${PROXY_PASSWORD:-}
BLOCK_MEDIA: ${BLOCK_MEDIA:-}
NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal}
api:
<<: *common-service
restart: unless-stopped
ports:
- "3002"
environment:
<<: *common-env
HOST: "0.0.0.0"
PORT: 3002
WORKER_PORT: 3005
ENV: local
depends_on:
redis:
condition: service_started
playwright-service:
condition: service_started
nuq-postgres:
condition: service_healthy
command: node --import ./dist/src/otel.js dist/src/index.js
worker:
<<: *common-service
restart: unless-stopped
environment:
<<: *common-env
HOST: "0.0.0.0"
PORT: 3005
ENV: local
depends_on:
redis:
condition: service_started
nuq-postgres:
condition: service_healthy
command: node --import ./dist/src/otel.js dist/src/services/queue-worker.js
extract-worker:
<<: *common-service
restart: unless-stopped
environment:
<<: *common-env
HOST: "0.0.0.0"
PORT: 3004
ENV: local
depends_on:
redis:
condition: service_started
nuq-postgres:
condition: service_healthy
command: node --import ./dist/src/otel.js dist/src/services/extract-worker.js
redis:
image: redis:alpine
command: redis-server --bind 0.0.0.0
nuq-postgres:
build:
context: "https://github.com/firecrawl/firecrawl.git#main:apps/nuq-postgres"
dockerfile: Dockerfile
restart: unless-stopped
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: postgres
volumes:
- nuq_pg_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"]
start_period: 30s
interval: 10s
timeout: 5s
retries: 10
volumes:
nuq_pg_data:[variables]
main_domain = "${domain}"
openai_api_key = "${OPENAI_API_KEY}"
openai_base_url = "${OPENAI_BASE_URL}"
ollama_base_url = "${OLLAMA_BASE_URL}"
model_name = "${MODEL_NAME}"
model_embedding_name = "${MODEL_EMBEDDING_NAME}"
proxy_server = "${PROXY_SERVER}"
proxy_username = "${PROXY_USERNAME}"
proxy_password = "${PROXY_PASSWORD}"
searxng_endpoint = "${SEARXNG_ENDPOINT}"
searxng_engines = "${SEARXNG_ENGINES}"
searxng_categories = "${SEARXNG_CATEGORIES}"
supabase_anon_token = "${SUPABASE_ANON_TOKEN}"
supabase_url = "${SUPABASE_URL}"
supabase_service_token = "${SUPABASE_SERVICE_TOKEN}"
test_api_key = "${TEST_API_KEY}"
bull_auth_key = "${password:32}"
llamaparse_api_key = "${LLAMAPARSE_API_KEY}"
slack_webhook_url = "${SLACK_WEBHOOK_URL}"
posthog_api_key = "${POSTHOG_API_KEY}"
posthog_host = "${POSTHOG_HOST}"
max_cpu = "${MAX_CPU}"
max_ram = "${MAX_RAM}"
[config]
env = [
"PORT=3002",
"HOST=0.0.0.0",
"USE_DB_AUTHENTICATION=false",
"BULL_AUTH_KEY=${bull_auth_key}",
"PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape",
"REDIS_URL=redis://redis:6379",
"REDIS_RATE_LIMIT_URL=redis://redis:6379",
"OPENAI_API_KEY=${openai_api_key}",
"OPENAI_BASE_URL=${openai_base_url}",
"OLLAMA_BASE_URL=${ollama_base_url}",
"MODEL_NAME=${model_name}",
"MODEL_EMBEDDING_NAME=${model_embedding_name}",
"PROXY_SERVER=${proxy_server}",
"PROXY_USERNAME=${proxy_username}",
"PROXY_PASSWORD=${proxy_password}",
"SEARXNG_ENDPOINT=${searxng_endpoint}",
"SEARXNG_ENGINES=${searxng_engines}",
"SEARXNG_CATEGORIES=${searxng_categories}",
"SUPABASE_ANON_TOKEN=${supabase_anon_token}",
"SUPABASE_URL=${supabase_url}",
"SUPABASE_SERVICE_TOKEN=${supabase_service_token}",
"TEST_API_KEY=${test_api_key}",
"LLAMAPARSE_API_KEY=${llamaparse_api_key}",
"SLACK_WEBHOOK_URL=${slack_webhook_url}",
"POSTHOG_API_KEY=${posthog_api_key}",
"POSTHOG_HOST=${posthog_host}",
"MAX_CPU=0.8",
"MAX_RAM=0.8"
]
mounts = []
[[config.domains]]
serviceName = "api"
port = 3002
host = "${main_domain}"
path = "/"Base64
To import this template in Dokploy: create a Compose service → Advanced → Base64 import and paste the content below:
{
  "compose": "name: firecrawl\n \nx-common-service: &common-service\n  image: ghcr.io/firecrawl/firecrawl:latest\n  ulimits:\n    nofile:\n      soft: 65535\n      hard: 65535\n  extra_hosts:\n    - \"host.docker.internal:host-gateway\"\n \nx-common-env: &common-env\n  REDIS_URL: ${REDIS_URL:-redis://redis:6379}\n  REDIS_RATE_LIMIT_URL: ${REDIS_RATE_LIMIT_URL:-redis://redis:6379}\n  PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}\n  NUQ_DATABASE_URL: ${NUQ_DATABASE_URL:-postgres://postgres:postgres@nuq-postgres:5432/postgres}\n  USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION:-}\n  OPENAI_API_KEY: ${OPENAI_API_KEY:-}\n  OPENAI_BASE_URL: ${OPENAI_BASE_URL:-}\n  MODEL_NAME: ${MODEL_NAME:-}\n  MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME:-}\n  OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}\n  SLACK_WEBHOOK_URL: ${SLACK_WEBHOOK_URL:-}\n  BULL_AUTH_KEY: ${BULL_AUTH_KEY:-}\n  TEST_API_KEY: ${TEST_API_KEY:-}\n  POSTHOG_API_KEY: ${POSTHOG_API_KEY:-}\n  POSTHOG_HOST: ${POSTHOG_HOST:-}\n  SUPABASE_ANON_TOKEN: ${SUPABASE_ANON_TOKEN:-}\n  SUPABASE_URL: ${SUPABASE_URL:-}\n  SUPABASE_SERVICE_TOKEN: ${SUPABASE_SERVICE_TOKEN:-}\n  SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL:-}\n  SERPER_API_KEY: ${SERPER_API_KEY:-}\n  SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY:-}\n  LOGGING_LEVEL: ${LOGGING_LEVEL:-INFO}\n  PROXY_SERVER: ${PROXY_SERVER:-}\n  PROXY_USERNAME: ${PROXY_USERNAME:-}\n  PROXY_PASSWORD: ${PROXY_PASSWORD:-}\n  NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal}\n  SEARXNG_ENDPOINT: ${SEARXNG_ENDPOINT:-}\n  SEARXNG_ENGINES: ${SEARXNG_ENGINES:-}\n  SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES:-}\n \nservices:\n  playwright-service:\n    image: ghcr.io/firecrawl/playwright-service:latest\n    shm_size: \"1g\"\n    restart: unless-stopped\n    environment:\n      PORT: 3000\n      PROXY_SERVER: ${PROXY_SERVER:-}\n      PROXY_USERNAME: ${PROXY_USERNAME:-}\n      PROXY_PASSWORD: ${PROXY_PASSWORD:-}\n      BLOCK_MEDIA: ${BLOCK_MEDIA:-}\n      NO_PROXY: ${NO_PROXY:-localhost,127.0.0.1,redis,nuq-postgres,playwright-service,host.docker.internal}\n \n  api:\n    <<: *common-service\n    restart: unless-stopped\n    ports:\n      - \"3002\"\n    environment:\n      <<: *common-env\n      HOST: \"0.0.0.0\"\n      PORT: 3002\n      WORKER_PORT: 3005\n      ENV: local\n    depends_on:\n      redis:\n        condition: service_started\n      playwright-service:\n        condition: service_started\n      nuq-postgres:\n        condition: service_healthy\n    command: node --import ./dist/src/otel.js dist/src/index.js\n \n  worker:\n    <<: *common-service\n    restart: unless-stopped\n    environment:\n      <<: *common-env\n      HOST: \"0.0.0.0\"\n      PORT: 3005\n      ENV: local\n    depends_on:\n      redis:\n        condition: service_started\n      nuq-postgres:\n        condition: service_healthy\n    command: node --import ./dist/src/otel.js dist/src/services/queue-worker.js\n \n  extract-worker:\n    <<: *common-service\n    restart: unless-stopped\n    environment:\n      <<: *common-env\n      HOST: \"0.0.0.0\"\n      PORT: 3004\n      ENV: local\n    depends_on:\n      redis:\n        condition: service_started\n      nuq-postgres:\n        condition: service_healthy\n    command: node --import ./dist/src/otel.js dist/src/services/extract-worker.js\n \n  redis:\n    image: redis:alpine\n    command: redis-server --bind 0.0.0.0\n \n  nuq-postgres:\n    build:\n      context: \"https://github.com/firecrawl/firecrawl.git#main:apps/nuq-postgres\"\n      dockerfile: Dockerfile\n    restart: unless-stopped\n    environment:\n      POSTGRES_USER: postgres\n      POSTGRES_PASSWORD: postgres\n      POSTGRES_DB: postgres\n    volumes:\n      - nuq_pg_data:/var/lib/postgresql/data\n    healthcheck:\n      test: [\"CMD-SHELL\", \"pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}\"]\n      start_period: 30s\n      interval: 10s\n      timeout: 5s\n      retries: 10\n \nvolumes:\n  nuq_pg_data:",
  "config": "[variables]\nmain_domain = \"${domain}\"\nopenai_api_key = \"${OPENAI_API_KEY}\"\nopenai_base_url = \"${OPENAI_BASE_URL}\"\nollama_base_url = \"${OLLAMA_BASE_URL}\"\nmodel_name = \"${MODEL_NAME}\"\nmodel_embedding_name = \"${MODEL_EMBEDDING_NAME}\"\nproxy_server = \"${PROXY_SERVER}\"\nproxy_username = \"${PROXY_USERNAME}\"\nproxy_password = \"${PROXY_PASSWORD}\"\nsearxng_endpoint = \"${SEARXNG_ENDPOINT}\"\nsearxng_engines = \"${SEARXNG_ENGINES}\"\nsearxng_categories = \"${SEARXNG_CATEGORIES}\"\nsupabase_anon_token = \"${SUPABASE_ANON_TOKEN}\"\nsupabase_url = \"${SUPABASE_URL}\"\nsupabase_service_token = \"${SUPABASE_SERVICE_TOKEN}\"\ntest_api_key = \"${TEST_API_KEY}\"\nbull_auth_key = \"${password:32}\"\nllamaparse_api_key = \"${LLAMAPARSE_API_KEY}\"\nslack_webhook_url = \"${SLACK_WEBHOOK_URL}\"\nposthog_api_key = \"${POSTHOG_API_KEY}\"\nposthog_host = \"${POSTHOG_HOST}\"\nmax_cpu = \"${MAX_CPU}\"\nmax_ram = \"${MAX_RAM}\"\n\n[config]\nenv = [\n  \"PORT=3002\",\n  \"HOST=0.0.0.0\",\n  \"USE_DB_AUTHENTICATION=false\",\n  \"BULL_AUTH_KEY=${bull_auth_key}\",\n  \"PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape\",\n  \"REDIS_URL=redis://redis:6379\",\n  \"REDIS_RATE_LIMIT_URL=redis://redis:6379\",\n  \"OPENAI_API_KEY=${openai_api_key}\",\n  \"OPENAI_BASE_URL=${openai_base_url}\",\n  \"OLLAMA_BASE_URL=${ollama_base_url}\",\n  \"MODEL_NAME=${model_name}\",\n  \"MODEL_EMBEDDING_NAME=${model_embedding_name}\",\n  \"PROXY_SERVER=${proxy_server}\",\n  \"PROXY_USERNAME=${proxy_username}\",\n  \"PROXY_PASSWORD=${proxy_password}\",\n  \"SEARXNG_ENDPOINT=${searxng_endpoint}\",\n  \"SEARXNG_ENGINES=${searxng_engines}\",\n  \"SEARXNG_CATEGORIES=${searxng_categories}\",\n  \"SUPABASE_ANON_TOKEN=${supabase_anon_token}\",\n  \"SUPABASE_URL=${supabase_url}\",\n  \"SUPABASE_SERVICE_TOKEN=${supabase_service_token}\",\n  \"TEST_API_KEY=${test_api_key}\",\n  \"LLAMAPARSE_API_KEY=${llamaparse_api_key}\",\n  \"SLACK_WEBHOOK_URL=${slack_webhook_url}\",\n  \"POSTHOG_API_KEY=${posthog_api_key}\",\n  \"POSTHOG_HOST=${posthog_host}\",\n  \"MAX_CPU=0.8\",\n  \"MAX_RAM=0.8\"\n]\nmounts = []\n\n[[config.domains]]\nserviceName = \"api\"\nport = 3002\nhost = \"${main_domain}\"\npath = \"/\"\n\n "
}Links
Tags
api, crawler, scraping, data-extraction, llm
Version: latest