blob: a7ce5253d1d35fdf5ad71d11ad6bee97f52754f3 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
|
#!/usr/bin/env bash
set -euo pipefail
# Force GPG to use terminal-based pinentry (required for SSH sessions)
export GPG_TTY=$(tty)
# ----------------------------------------------------------------------------
# bin/test — verify a running Firecrawl deployment by hitting it from the host
# (i.e. outside the Docker network), simulating an external client
# such as an AI agent.
#
# Usage:
# bin/test # local dev — hits http://127.0.0.1:31329
# bin/test https://my.host # remote — hits the given base URL
#
# The API key is read from gopass at projects/firecrawl-dokploy/dev/api_key
# (override by exporting TEST_API_KEY before running).
# ----------------------------------------------------------------------------
BASE_URL="${1:-http://127.0.0.1:31329}"
BASE_URL="${BASE_URL%/}"
if [ -z "${TEST_API_KEY:-}" ]; then
TEST_API_KEY="$(gopass show -o projects/firecrawl-dokploy/dev/api_key)"
fi
if [ -z "${TEST_API_KEY:-}" ]; then
echo >&2 "ERROR: TEST_API_KEY is empty (gopass returned nothing and env was unset)"
exit 1
fi
# ----------------------------------------------------------------------------
# Output capture: tee everything to tmp/test-<timestamp>.log AND the most recent
# run to tmp/test-latest.log, while still printing to the terminal.
# ----------------------------------------------------------------------------
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
TMP_DIR="$REPO_ROOT/tmp"
mkdir -p "$TMP_DIR"
LOG_FILE="$TMP_DIR/test-$(date +%Y%m%d-%H%M%S).log"
LATEST_LOG="$TMP_DIR/test-latest.log"
# Redirect all stdout+stderr through tee. Use a process substitution so the
# script keeps its normal exit code.
exec > >(tee "$LOG_FILE") 2>&1
# Mirror to test-latest.log on exit (atomic copy, not a symlink so it survives
# `rm tmp/*.log`).
trap 'cp -f "$LOG_FILE" "$LATEST_LOG" 2>/dev/null || true' EXIT
# ----------------------------------------------------------------------------
# Pretty output helpers
# ----------------------------------------------------------------------------
RED=$'\033[0;31m'
GREEN=$'\033[0;32m'
YELLOW=$'\033[0;33m'
BLUE=$'\033[0;34m'
BOLD=$'\033[1m'
RESET=$'\033[0m'
PASS=0
FAIL=0
section() {
printf '\n%s== %s ==%s\n' "$BOLD$BLUE" "$1" "$RESET"
}
ok() {
printf ' %s✓%s %s\n' "$GREEN" "$RESET" "$1"
PASS=$((PASS + 1))
}
bad() {
printf ' %s✗%s %s\n' "$RED" "$RESET" "$1"
FAIL=$((FAIL + 1))
}
info() {
printf ' %s…%s %s\n' "$YELLOW" "$RESET" "$1"
}
# ----------------------------------------------------------------------------
# Issue a JSON request, capture body + status
# Args: METHOD PATH [JSON_BODY]
# Sets: HTTP_STATUS, HTTP_BODY
# ----------------------------------------------------------------------------
http_call() {
local method=$1
local path=$2
local body=${3:-}
local tmp
tmp=$(mktemp)
local args=(
-sS
-o "$tmp"
-w '%{http_code}'
-X "$method"
-H "Authorization: Bearer ${TEST_API_KEY}"
-H "Content-Type: application/json"
--connect-timeout 10
--max-time 120
)
if [ -n "$body" ]; then
args+=(-d "$body")
fi
HTTP_STATUS=$(curl "${args[@]}" "${BASE_URL}${path}" || echo "000")
HTTP_BODY=$(cat "$tmp")
rm -f "$tmp"
}
# ----------------------------------------------------------------------------
# Test runner
# ----------------------------------------------------------------------------
section "Target"
info "BASE_URL = ${BASE_URL}"
info "TEST_API_KEY = ${TEST_API_KEY:0:8}…"
# 1. Reachability ------------------------------------------------------------
section "1. Reachability"
http_call GET "/"
case "$HTTP_STATUS" in
200|404|401)
ok "API is reachable (HTTP $HTTP_STATUS at /)"
;;
000)
bad "Could not connect to ${BASE_URL} — is the stack running?"
echo
echo "Hint: run 'bin/up' first, or pass a different base URL."
exit 1
;;
*)
bad "Unexpected HTTP $HTTP_STATUS at /"
;;
esac
# 2. Auth posture (informational) ------------------------------------------
# Self-hosted Firecrawl with USE_DB_AUTHENTICATION=false has no built-in
# bearer-token gate — any token (including a bogus one) is accepted by the
# API. We probe with a bogus token just to surface this fact in the log.
section "2. Auth posture (informational)"
tmp=$(mktemp)
status=$(curl -sS -o "$tmp" -w '%{http_code}' \
-X POST "${BASE_URL}/v1/scrape" \
-H "Authorization: Bearer fc-definitely-not-a-real-key" \
-H "Content-Type: application/json" \
-d '{"url":"https://example.com"}' \
--connect-timeout 10 --max-time 60 || echo "000")
rm -f "$tmp"
if [ "$status" = "401" ] || [ "$status" = "403" ]; then
ok "bogus key rejected with HTTP $status (USE_DB_AUTHENTICATION on?)"
else
info "bogus key returned HTTP $status — self-hosted Firecrawl is open by design; restrict access at Traefik/firewall level"
fi
# 3. /v1/scrape --------------------------------------------------------------
section "3. Scrape — POST /v1/scrape https://example.com"
http_call POST "/v1/scrape" '{"url":"https://example.com","formats":["markdown"]}'
if [ "$HTTP_STATUS" = "200" ]; then
if printf '%s' "$HTTP_BODY" | grep -qi "example domain"; then
ok "scrape returned 200 and markdown contains 'Example Domain'"
else
bad "scrape returned 200 but markdown did not contain 'Example Domain'"
echo "$HTTP_BODY" | head -c 400
echo
fi
else
bad "scrape failed with HTTP $HTTP_STATUS"
echo "$HTTP_BODY" | head -c 400
echo
fi
# 4. /v1/search (covers SearXNG + Firecrawl scrape pipeline) -----------------
section "4. Search — POST /v1/search 'firecrawl github'"
http_call POST "/v1/search" '{"query":"firecrawl github","limit":3}'
if [ "$HTTP_STATUS" = "200" ]; then
if printf '%s' "$HTTP_BODY" | grep -q '"success":true'; then
ok "search returned 200 with success:true (SearXNG + scrape pipeline OK)"
else
bad "search returned 200 but body lacks success:true"
echo "$HTTP_BODY" | head -c 400
echo
fi
else
bad "search failed with HTTP $HTTP_STATUS"
echo "$HTTP_BODY" | head -c 400
echo
fi
# 5. /v1/map -----------------------------------------------------------------
section "5. Map — POST /v1/map https://example.com"
http_call POST "/v1/map" '{"url":"https://example.com"}'
if [ "$HTTP_STATUS" = "200" ]; then
ok "map returned 200"
else
bad "map failed with HTTP $HTTP_STATUS"
echo "$HTTP_BODY" | head -c 400
echo
fi
# ----------------------------------------------------------------------------
# Summary
# ----------------------------------------------------------------------------
echo
section "Summary"
printf " %sPassed:%s %d\n" "$GREEN" "$RESET" "$PASS"
printf " %sFailed:%s %d\n" "$RED" "$RESET" "$FAIL"
echo
if [ "$FAIL" -gt 0 ]; then
echo "Log: $LOG_FILE"
exit 1
fi
echo "Log: $LOG_FILE"
|