summaryrefslogtreecommitdiffhomepage
path: root/bin/test
blob: a7ce5253d1d35fdf5ad71d11ad6bee97f52754f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/env bash
set -euo pipefail

# Force GPG to use terminal-based pinentry (required for SSH sessions)
export GPG_TTY=$(tty)

# ----------------------------------------------------------------------------
# bin/test — verify a running Firecrawl deployment by hitting it from the host
#            (i.e. outside the Docker network), simulating an external client
#            such as an AI agent.
#
# Usage:
#   bin/test                     # local dev — hits http://127.0.0.1:31329
#   bin/test https://my.host     # remote   — hits the given base URL
#
# The API key is read from gopass at projects/firecrawl-dokploy/dev/api_key
# (override by exporting TEST_API_KEY before running).
# ----------------------------------------------------------------------------

BASE_URL="${1:-http://127.0.0.1:31329}"
BASE_URL="${BASE_URL%/}"

if [ -z "${TEST_API_KEY:-}" ]; then
  TEST_API_KEY="$(gopass show -o projects/firecrawl-dokploy/dev/api_key)"
fi

if [ -z "${TEST_API_KEY:-}" ]; then
  echo >&2 "ERROR: TEST_API_KEY is empty (gopass returned nothing and env was unset)"
  exit 1
fi

# ----------------------------------------------------------------------------
# Output capture: tee everything to tmp/test-<timestamp>.log AND the most recent
# run to tmp/test-latest.log, while still printing to the terminal.
# ----------------------------------------------------------------------------
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
TMP_DIR="$REPO_ROOT/tmp"
mkdir -p "$TMP_DIR"
LOG_FILE="$TMP_DIR/test-$(date +%Y%m%d-%H%M%S).log"
LATEST_LOG="$TMP_DIR/test-latest.log"

# Redirect all stdout+stderr through tee. Use a process substitution so the
# script keeps its normal exit code.
exec > >(tee "$LOG_FILE") 2>&1

# Mirror to test-latest.log on exit (atomic copy, not a symlink so it survives
# `rm tmp/*.log`).
trap 'cp -f "$LOG_FILE" "$LATEST_LOG" 2>/dev/null || true' EXIT

# ----------------------------------------------------------------------------
# Pretty output helpers
# ----------------------------------------------------------------------------
RED=$'\033[0;31m'
GREEN=$'\033[0;32m'
YELLOW=$'\033[0;33m'
BLUE=$'\033[0;34m'
BOLD=$'\033[1m'
RESET=$'\033[0m'

PASS=0
FAIL=0

section() {
  printf '\n%s== %s ==%s\n' "$BOLD$BLUE" "$1" "$RESET"
}

ok() {
  printf '  %s✓%s %s\n' "$GREEN" "$RESET" "$1"
  PASS=$((PASS + 1))
}

bad() {
  printf '  %s✗%s %s\n' "$RED" "$RESET" "$1"
  FAIL=$((FAIL + 1))
}

info() {
  printf '  %s…%s %s\n' "$YELLOW" "$RESET" "$1"
}

# ----------------------------------------------------------------------------
# Issue a JSON request, capture body + status
# Args: METHOD PATH [JSON_BODY]
# Sets: HTTP_STATUS, HTTP_BODY
# ----------------------------------------------------------------------------
http_call() {
  local method=$1
  local path=$2
  local body=${3:-}

  local tmp
  tmp=$(mktemp)

  local args=(
    -sS
    -o "$tmp"
    -w '%{http_code}'
    -X "$method"
    -H "Authorization: Bearer ${TEST_API_KEY}"
    -H "Content-Type: application/json"
    --connect-timeout 10
    --max-time 120
  )

  if [ -n "$body" ]; then
    args+=(-d "$body")
  fi

  HTTP_STATUS=$(curl "${args[@]}" "${BASE_URL}${path}" || echo "000")
  HTTP_BODY=$(cat "$tmp")
  rm -f "$tmp"
}

# ----------------------------------------------------------------------------
# Test runner
# ----------------------------------------------------------------------------
section "Target"
info "BASE_URL = ${BASE_URL}"
info "TEST_API_KEY = ${TEST_API_KEY:0:8}…"

# 1. Reachability ------------------------------------------------------------
section "1. Reachability"
http_call GET "/"
case "$HTTP_STATUS" in
  200|404|401)
    ok "API is reachable (HTTP $HTTP_STATUS at /)"
    ;;
  000)
    bad "Could not connect to ${BASE_URL} — is the stack running?"
    echo
    echo "Hint: run 'bin/up' first, or pass a different base URL."
    exit 1
    ;;
  *)
    bad "Unexpected HTTP $HTTP_STATUS at /"
    ;;
esac

# 2. Auth posture (informational) ------------------------------------------
# Self-hosted Firecrawl with USE_DB_AUTHENTICATION=false has no built-in
# bearer-token gate — any token (including a bogus one) is accepted by the
# API. We probe with a bogus token just to surface this fact in the log.
section "2. Auth posture (informational)"
tmp=$(mktemp)
status=$(curl -sS -o "$tmp" -w '%{http_code}' \
  -X POST "${BASE_URL}/v1/scrape" \
  -H "Authorization: Bearer fc-definitely-not-a-real-key" \
  -H "Content-Type: application/json" \
  -d '{"url":"https://example.com"}' \
  --connect-timeout 10 --max-time 60 || echo "000")
rm -f "$tmp"
if [ "$status" = "401" ] || [ "$status" = "403" ]; then
  ok "bogus key rejected with HTTP $status (USE_DB_AUTHENTICATION on?)"
else
  info "bogus key returned HTTP $status — self-hosted Firecrawl is open by design; restrict access at Traefik/firewall level"
fi

# 3. /v1/scrape --------------------------------------------------------------
section "3. Scrape — POST /v1/scrape https://example.com"
http_call POST "/v1/scrape" '{"url":"https://example.com","formats":["markdown"]}'
if [ "$HTTP_STATUS" = "200" ]; then
  if printf '%s' "$HTTP_BODY" | grep -qi "example domain"; then
    ok "scrape returned 200 and markdown contains 'Example Domain'"
  else
    bad "scrape returned 200 but markdown did not contain 'Example Domain'"
    echo "$HTTP_BODY" | head -c 400
    echo
  fi
else
  bad "scrape failed with HTTP $HTTP_STATUS"
  echo "$HTTP_BODY" | head -c 400
  echo
fi

# 4. /v1/search (covers SearXNG + Firecrawl scrape pipeline) -----------------
section "4. Search — POST /v1/search 'firecrawl github'"
http_call POST "/v1/search" '{"query":"firecrawl github","limit":3}'
if [ "$HTTP_STATUS" = "200" ]; then
  if printf '%s' "$HTTP_BODY" | grep -q '"success":true'; then
    ok "search returned 200 with success:true (SearXNG + scrape pipeline OK)"
  else
    bad "search returned 200 but body lacks success:true"
    echo "$HTTP_BODY" | head -c 400
    echo
  fi
else
  bad "search failed with HTTP $HTTP_STATUS"
  echo "$HTTP_BODY" | head -c 400
  echo
fi

# 5. /v1/map -----------------------------------------------------------------
section "5. Map — POST /v1/map https://example.com"
http_call POST "/v1/map" '{"url":"https://example.com"}'
if [ "$HTTP_STATUS" = "200" ]; then
  ok "map returned 200"
else
  bad "map failed with HTTP $HTTP_STATUS"
  echo "$HTTP_BODY" | head -c 400
  echo
fi

# ----------------------------------------------------------------------------
# Summary
# ----------------------------------------------------------------------------
echo
section "Summary"
printf "  %sPassed:%s %d\n" "$GREEN" "$RESET" "$PASS"
printf "  %sFailed:%s %d\n" "$RED" "$RESET" "$FAIL"
echo

if [ "$FAIL" -gt 0 ]; then
  echo "Log: $LOG_FILE"
  exit 1
fi

echo "Log: $LOG_FILE"