Fix write-check by disabling verification by default, since it isn't working yet. Also add tests

This commit is contained in:
pleb 2026-03-22 20:00:16 -07:00
parent dc5023d272
commit ef7463a29a
8 changed files with 292 additions and 25 deletions

View File

@ -9,6 +9,8 @@ export LOG_LEVEL=info
# Optional write confirmation probe settings:
export WRITE_CHECK_ENABLED=true
# false = consider publish OK sufficient; true = require read-after-write confirmation
export WRITE_CHECK_VERIFY_READ=false
export WRITE_CHECK_KIND=30078
# WRITE_CHECK_PRIVKEY accepts either:
# - nsec1... string

View File

@ -7,6 +7,7 @@ Production-oriented Prometheus exporter for monitoring a fixed set of Nostr rela
- Probes each configured relay either on an interval, or on-demand when `/metrics` is scraped.
- Uses `@nostrwatch/nocap` for `open` + `read` checks.
- Optionally performs a low-noise write confirmation check (kind `30078` by default) using deterministic `d` tags.
- By default, treats relay `OK` for publish as success; optional read-after-write verification can be enabled.
- Exposes:
- `/metrics` for Prometheus scraping
- `/healthz` for process and probe-loop health
@ -48,6 +49,7 @@ Environment variables:
- `PORT` (default: `9464`)
- `LOG_LEVEL` (default: `info`; one of `debug|info|warn|error`)
- `WRITE_CHECK_ENABLED` (default: `true`)
- `WRITE_CHECK_VERIFY_READ` (default: `false`; when `true`, require event read-back after publish)
- `WRITE_CHECK_KIND` (default: `30078`)
- `WRITE_CHECK_PRIVKEY` (optional; supports `nsec1...` or 64-char hex)
@ -79,6 +81,34 @@ Run tests:
pnpm test
```
Run the live write-confirm diagnostic for `offchain.pub` (opt-in):
```bash
LIVE_RELAY_TEST_OFFCHAIN=1 \
LIVE_RELAY_TEST_RELAYS="wss://offchain.pub" \
pnpm test test/live.offchain.test.ts
```
Full example with write verification enabled:
```bash
LIVE_RELAY_TEST_OFFCHAIN=1 \
LIVE_RELAY_TEST_RELAYS="wss://offchain.pub" \
LIVE_RELAY_TEST_WRITE_VERIFY_READ=1 \
LIVE_RELAY_TEST_TIMEOUT_SECONDS=8 \
LIVE_RELAY_TEST_EXPECT_NO_FAILURES=1 \
pnpm test test/live.offchain.test.ts
```
Optional knobs:
- `LIVE_RELAY_TEST_SAMPLES` (default `12`)
- `LIVE_RELAY_TEST_SCRAPE_EVERY_MS` (default `1500`)
- `LIVE_RELAY_TEST_TIMEOUT_SECONDS` (default `8`)
- `LIVE_RELAY_TEST_RELAYS` (default `"wss://offchain.pub"`; comma-separated relay list)
- `LIVE_RELAY_TEST_WRITE_VERIFY_READ=1` to enable read-after-write verification (disabled by default)
- `LIVE_RELAY_TEST_EXPECT_NO_FAILURES=1` to make the test fail on any write-confirm/probe failures
## Exposed metrics
Relay-level labels use `{relay}` unless stated:

View File

@ -8,7 +8,7 @@
"dev": "tsx watch src/index.ts",
"build": "tsc -p tsconfig.json",
"start": "node dist/index.js",
"test": "tsx --test test/config.test.ts test/smoke.test.ts"
"test": "tsx --test test/config.test.ts test/smoke.test.ts test/live.offchain.test.ts"
},
"keywords": [
"nostr",

View File

@ -10,6 +10,7 @@ export interface RelayTarget {
export interface WriteCheckConfig {
enabled: boolean;
verifyRead: boolean;
kind: number;
privkey?: string;
}
@ -34,6 +35,7 @@ const DEFAULT_LISTEN_ADDR = "0.0.0.0";
const DEFAULT_PORT = 9464;
const DEFAULT_LOG_LEVEL: LogLevel = "info";
const DEFAULT_WRITE_CHECK_ENABLED = true;
const DEFAULT_WRITE_CHECK_VERIFY_READ = false;
const DEFAULT_WRITE_CHECK_KIND = 30078;
const MIN_TIMEOUT_SECONDS = 1;
@ -170,6 +172,10 @@ export function loadConfig(env: NodeJS.ProcessEnv = process.env): AppConfig {
}
const writeEnabledFlag = parseBoolean(env.WRITE_CHECK_ENABLED, DEFAULT_WRITE_CHECK_ENABLED);
const writeVerifyReadFlag = parseBoolean(
env.WRITE_CHECK_VERIFY_READ,
DEFAULT_WRITE_CHECK_VERIFY_READ,
);
const writeCheckKind = parseInteger(
env.WRITE_CHECK_KIND,
DEFAULT_WRITE_CHECK_KIND,
@ -200,6 +206,7 @@ export function loadConfig(env: NodeJS.ProcessEnv = process.env): AppConfig {
const staleAfterMs = probeIntervalMs + probeTimeoutMs + 5_000;
const writeCheck: WriteCheckConfig = {
enabled: writeEnabledFlag,
verifyRead: writeVerifyReadFlag,
kind: writeCheckKind,
...(resolvedWritePrivkey ? { privkey: resolvedWritePrivkey } : {}),
};

View File

@ -23,6 +23,7 @@ interface RelayCheckNode {
interface NocapResult {
open?: RelayCheckNode;
read?: RelayCheckNode;
write?: RelayCheckNode;
}
interface RelayProbeState {
@ -192,8 +193,13 @@ export class RelayProber {
const adapters = Object.values(NocapEveryAdapterDefault);
await nocap.useAdapters(adapters);
const checks: string[] =
this.config.writeCheck.enabled && !this.config.writeCheck.verifyRead
? ["open", "read", "write"]
: ["open", "read"];
const result = (await withTimeout(
nocap.check(["open", "read"], true) as Promise<NocapResult>,
nocap.check(checks, true) as Promise<NocapResult>,
this.config.probeTimeoutMs * 2,
"nocap open/read",
)) as NocapResult;
@ -211,27 +217,49 @@ export class RelayProber {
}
if (this.config.writeCheck.enabled) {
const privkey = this.config.writeCheck.privkey;
if (!privkey) {
writeConfirmOk = false;
this.metrics.incProbeError(relayLabel, "write_confirm");
} else {
const writeResult = await runWriteConfirm({
relay: relay.relay,
host: relay.host,
kind: this.config.writeCheck.kind,
privkey,
timeoutMs: this.config.probeTimeoutMs,
});
writeConfirmOk = writeResult.ok;
writeDurationMs = durationOrDefault(writeResult.durationMs);
if (!writeResult.ok) {
if (!this.config.writeCheck.verifyRead) {
// Match nocap behavior: use websocket write check in the same nocap session.
const writeNode = result.write;
writeConfirmOk =
writeNode !== undefined &&
writeNode.status !== "error" &&
writeNode.data !== false;
writeDurationMs = durationOrDefault(result.write?.duration);
if (!writeConfirmOk) {
this.metrics.incProbeError(relayLabel, "write_confirm");
this.logger.warn(
{ relay: relayLabel, check: "write_confirm", reason: writeResult.reason },
{
relay: relayLabel,
check: "write_confirm",
reason: result.write?.message ?? "nocap write check failed",
},
"write confirmation failed",
);
}
} else {
const privkey = this.config.writeCheck.privkey;
if (!privkey) {
writeConfirmOk = false;
this.metrics.incProbeError(relayLabel, "write_confirm");
} else {
const writeResult = await runWriteConfirm({
relay: relay.relay,
host: relay.host,
kind: this.config.writeCheck.kind,
privkey,
verifyRead: this.config.writeCheck.verifyRead,
timeoutMs: this.config.probeTimeoutMs,
});
writeConfirmOk = writeResult.ok;
writeDurationMs = durationOrDefault(writeResult.durationMs);
if (!writeResult.ok) {
this.metrics.incProbeError(relayLabel, "write_confirm");
this.logger.warn(
{ relay: relayLabel, check: "write_confirm", reason: writeResult.reason },
"write confirmation failed",
);
}
}
}
}

View File

@ -6,6 +6,7 @@ interface WriteConfirmInput {
host: string;
kind: number;
privkey: string;
verifyRead: boolean;
timeoutMs: number;
}
@ -108,6 +109,13 @@ export async function runWriteConfirm(input: WriteConfirmInput): Promise<WriteCo
await withTimeout(relay.publish(event), input.timeoutMs, "relay publish");
if (!input.verifyRead) {
return {
ok: true,
durationMs: Math.round(performance.now() - startedAt),
};
}
const elapsedMs = performance.now() - startedAt;
const remainingMs = Math.max(1, input.timeoutMs - Math.floor(elapsedMs));

View File

@ -3,15 +3,14 @@ import assert from "node:assert/strict";
import { loadConfig } from "../src/config.js";
test("loadConfig parses valid relays and ignores invalid entries", () => {
test("loadConfig parses relay input and ignores invalid entries", () => {
const config = loadConfig({
RELAYS: "wss://relay.damus.io,not-a-url,wss://nos.lol",
RELAYS: "wss://offchain.pub,not-a-url,wss://offchain.pub",
WRITE_CHECK_ENABLED: "false",
});
assert.equal(config.relays.length, 2);
assert.equal(config.relays[0]?.relay, "wss://relay.damus.io/");
assert.equal(config.relays[1]?.relay, "wss://nos.lol/");
assert.equal(config.relays.length, 1);
assert.equal(config.relays[0]?.relay, "wss://offchain.pub/");
assert.equal(config.writeCheck.enabled, false);
assert.equal(config.probeIntervalMs, 0);
assert.ok(config.warnings.some((w) => w.includes("Ignoring invalid relay URL")));
@ -19,7 +18,7 @@ test("loadConfig parses valid relays and ignores invalid entries", () => {
test("loadConfig generates write-check private key when missing", () => {
const config = loadConfig({
RELAYS: "wss://relay.damus.io",
RELAYS: "wss://offchain.pub",
WRITE_CHECK_ENABLED: "true",
});
@ -33,7 +32,7 @@ test("loadConfig generates write-check private key when missing", () => {
test("loadConfig generates write-check private key when configured key is invalid", () => {
const config = loadConfig({
RELAYS: "wss://relay.damus.io",
RELAYS: "wss://offchain.pub",
WRITE_CHECK_ENABLED: "true",
WRITE_CHECK_PRIVKEY: "not-a-valid-key",
});
@ -46,3 +45,16 @@ test("loadConfig generates write-check private key when configured key is invali
"expected warning about invalid configured key",
);
});
test("loadConfig sets write-check read verification flag", () => {
const defaultConfig = loadConfig({
RELAYS: "wss://offchain.pub",
});
assert.equal(defaultConfig.writeCheck.verifyRead, false);
const verifyConfig = loadConfig({
RELAYS: "wss://offchain.pub",
WRITE_CHECK_VERIFY_READ: "true",
});
assert.equal(verifyConfig.writeCheck.verifyRead, true);
});

180
test/live.offchain.test.ts Normal file
View File

@ -0,0 +1,180 @@
import test from "node:test";
import assert from "node:assert/strict";
import { spawn, type ChildProcessByStdio } from "node:child_process";
import http from "node:http";
import type { Readable } from "node:stream";
import { setTimeout as sleep } from "node:timers/promises";
const ENABLED = process.env.LIVE_RELAY_TEST_OFFCHAIN === "1";
const SCRAPE_SAMPLES = Number.parseInt(process.env.LIVE_RELAY_TEST_SAMPLES ?? "12", 10);
const SCRAPE_EVERY_MS = Number.parseInt(process.env.LIVE_RELAY_TEST_SCRAPE_EVERY_MS ?? "1500", 10);
const PROBE_TIMEOUT_SECONDS = Number.parseInt(process.env.LIVE_RELAY_TEST_TIMEOUT_SECONDS ?? "8", 10);
const WRITE_VERIFY_READ = process.env.LIVE_RELAY_TEST_WRITE_VERIFY_READ === "1";
const ENFORCE_NO_WRITE_CONFIRM_FAILURES = process.env.LIVE_RELAY_TEST_EXPECT_NO_FAILURES === "1";
function randomPort(): number {
return 20_000 + Math.floor(Math.random() * 10_000);
}
function httpGet(
port: number,
path: string,
): Promise<{ statusCode: number; body: string; headers: http.IncomingHttpHeaders }> {
return new Promise((resolve, reject) => {
const req = http.request(
{
hostname: "127.0.0.1",
port,
path,
method: "GET",
timeout: 5_000,
},
(res) => {
const chunks: Buffer[] = [];
res.on("data", (chunk) => {
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
});
res.on("end", () => {
resolve({
statusCode: res.statusCode ?? 0,
body: Buffer.concat(chunks).toString("utf8"),
headers: res.headers,
});
});
},
);
req.on("timeout", () => {
req.destroy(new Error(`timeout GET ${path}`));
});
req.on("error", reject);
req.end();
});
}
type ExporterChild = ChildProcessByStdio<null, Readable, Readable>;
async function waitForStartup(child: ExporterChild, port: number, timeoutMs: number): Promise<void> {
const started = Date.now();
while (Date.now() - started < timeoutMs) {
if (child.exitCode !== null) {
throw new Error(`exporter exited early with code ${child.exitCode}`);
}
try {
const res = await httpGet(port, "/healthz");
if ([200, 503].includes(res.statusCode)) return;
} catch {
// server not up yet
}
await sleep(150);
}
throw new Error("timeout waiting for exporter startup");
}
function metricValue(metrics: string, name: string, labels: Record<string, string>): number | null {
const entries = Object.entries(labels);
const labelsPattern = entries
.map(([k, v]) => `${k}="${v.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}"`)
.join(",");
const linePattern = new RegExp(`^${name}\\{${labelsPattern}\\} (-?\\d+(?:\\.\\d+)?)$`, "m");
const match = metrics.match(linePattern);
if (!match?.[1]) return null;
const parsed = Number.parseFloat(match[1]);
return Number.isFinite(parsed) ? parsed : null;
}
test(
"live exporter probe diagnostic against offchain.pub",
{ skip: !ENABLED },
async (t) => {
const relayInput = process.env.LIVE_RELAY_TEST_RELAYS ?? "wss://offchain.pub";
const relays = relayInput
.split(",")
.map((v) => v.trim())
.filter((v) => v.length > 0);
assert.ok(relays.length > 0, "LIVE_RELAY_TEST_RELAYS must include at least one relay");
let relayLabel: string;
try {
relayLabel = new URL(relays[0]).toString();
} catch {
assert.fail(`invalid first relay URL in LIVE_RELAY_TEST_RELAYS: ${relays[0]}`);
}
const port = randomPort();
const child = spawn("pnpm", ["exec", "tsx", "src/index.ts"], {
cwd: process.cwd(),
env: {
...process.env,
RELAYS: relays.join(","),
PROBE_INTERVAL_SECONDS: "1",
PROBE_TIMEOUT_SECONDS: String(PROBE_TIMEOUT_SECONDS),
WRITE_CHECK_ENABLED: "true",
WRITE_CHECK_VERIFY_READ: WRITE_VERIFY_READ ? "true" : "false",
PORT: String(port),
LOG_LEVEL: "error",
},
stdio: ["ignore", "pipe", "pipe"],
});
let stderr = "";
child.stderr.on("data", (d: Buffer) => {
stderr += d.toString("utf8");
});
t.after(() => {
if (child.exitCode === null) {
child.kill("SIGTERM");
}
});
await waitForStartup(child, port, 10_000);
let lastMetrics = "";
for (let i = 0; i < SCRAPE_SAMPLES; i += 1) {
const metrics = await httpGet(port, "/metrics");
assert.equal(metrics.statusCode, 200);
lastMetrics = metrics.body;
await sleep(SCRAPE_EVERY_MS);
}
const writeConfirmOk = metricValue(lastMetrics, "nostr_relay_write_confirm_ok", { relay: relayLabel });
const writeConfirmErrors = metricValue(lastMetrics, "nostr_relay_probe_errors_total", {
relay: relayLabel,
check: "write_confirm",
});
const successRuns = metricValue(lastMetrics, "nostr_relay_probe_runs_total", {
relay: relayLabel,
result: "success",
});
const failureRuns = metricValue(lastMetrics, "nostr_relay_probe_runs_total", {
relay: relayLabel,
result: "failure",
});
const diagnostic = {
relay: relayLabel,
relays,
scrapeSamples: SCRAPE_SAMPLES,
scrapeEveryMs: SCRAPE_EVERY_MS,
writeVerifyRead: WRITE_VERIFY_READ,
writeConfirmOk,
writeConfirmErrors,
successRuns,
failureRuns,
crashed: child.exitCode !== null,
};
console.log("[live-offchain-exporter]", JSON.stringify(diagnostic));
assert.equal(child.exitCode, null, `exporter crashed unexpectedly: ${stderr}`);
assert.notEqual(writeConfirmOk, null, "missing nostr_relay_write_confirm_ok metric");
assert.ok(
successRuns !== null || failureRuns !== null,
"missing nostr_relay_probe_runs_total metrics",
);
if (ENFORCE_NO_WRITE_CONFIRM_FAILURES) {
assert.equal(writeConfirmErrors ?? 0, 0, `write_confirm failures seen: ${JSON.stringify(diagnostic)}`);
assert.equal(failureRuns ?? 0, 0, `probe failures seen: ${JSON.stringify(diagnostic)}`);
}
},
);