Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.autousers.ai/llms.txt

Use this file to discover all available pages before exploring further.

Four worked examples that customers actually run in production.

1. Slack notification on evaluation.completed

A Vercel function receiver that posts to Slack when an eval finishes.
// app/api/webhooks/autousers/route.ts
import crypto from "node:crypto";

export const runtime = "nodejs";

const SECRET = process.env.AUTOUSERS_WEBHOOK_SECRET!;
const SLACK = process.env.SLACK_WEBHOOK_URL!;

function verify(rawBody: string, header: string | null) {
  if (!header) return false;
  const parts = Object.fromEntries(
    header.split(",").map((p) => p.split("=") as [string, string])
  );
  const t = Number(parts.t);
  if (!t || Math.abs(Date.now() / 1000 - t) > 300) return false;
  const expected = crypto
    .createHmac("sha256", SECRET)
    .update(`${t}.${rawBody}`)
    .digest("hex");
  const a = Buffer.from(parts.v1, "hex");
  const b = Buffer.from(expected, "hex");
  return a.length === b.length && crypto.timingSafeEqual(a, b);
}

export async function POST(req: Request) {
  const raw = await req.text();
  if (!verify(raw, req.headers.get("autousers-signature"))) {
    return new Response("invalid signature", { status: 400 });
  }
  const event = JSON.parse(raw);
  if (event.type !== "evaluation.completed") {
    return new Response(null, { status: 204 });
  }
  const e = event.data.object;
  const alpha = e.summary?.krippendorffAlpha?.toFixed(2) ?? "n/a";
  const text = [
    `:white_check_mark: *${e.name}* finished`,
    `Krippendorff α: *${alpha}* (${e.summary.ratingCount} ratings)`,
    `<https://app.autousers.ai/evaluations/${e.id}/results|Open results>`,
  ].join("\n");

  await fetch(SLACK, {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ text }),
  });

  return new Response(null, { status: 200 });
}
Register the endpoint once:
curl -X POST https://app.autousers.ai/api/v1/webhooks \
  -H "Authorization: Bearer $AUTOUSERS_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://your.app/api/webhooks/autousers",
    "enabled_events": ["evaluation.completed"]
  }'

2. GitHub Actions gate on Krippendorff α

Block the merge when the agreement on the staging deploy drops below a threshold.
# .github/workflows/eval-gate.yml
name: UX gate
on:
  pull_request:
    paths: ["app/**"]
jobs:
  ux-gate:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v5
      - uses: actions/setup-node@v5
        with: { node-version: 22 }
      - run: npm i -g @autousers/cli

      - name: Smoke eval against preview deploy
        env:
          AUTOUSERS_API_KEY: ${{ secrets.AUTOUSERS_API_KEY }}
          PREVIEW_URL: ${{ steps.preview.outputs.preview_url }}
        run: |
          set -euo pipefail

          EVAL_ID=$(autousers eval create \
            --template accessibility \
            --url "$PREVIEW_URL" \
            --autousers first-time-buyer:3,accessibility-first:2 \
            --json | jq -r .id)

          autousers eval run "$EVAL_ID"
          autousers eval wait "$EVAL_ID" --timeout 900

          ALPHA=$(autousers eval agreement "$EVAL_ID" --json | jq -r .krippendorff.alpha)
          echo "alpha=$ALPHA"

          THRESHOLD=0.6
          if [ "$(echo "$ALPHA < $THRESHOLD" | bc)" -eq 1 ]; then
            echo "::error ::UX α=$ALPHA below threshold $THRESHOLD"
            echo "Open https://app.autousers.ai/evaluations/$EVAL_ID/results"
            exit 1
          fi
The CLI’s eval wait blocks on the underlying evaluation.completed webhook (or polls if no webhook receiver is configured for the team). You don’t need to register a webhook to use this recipe — but it finishes faster if you do.

3. Linear ticket on autouser_run.failed

Auto-create a Linear ticket when a run errors. Useful for catching flaky stimuli, expired auth on staging, or outright worker bugs.
// app/api/webhooks/autousers/route.ts (continued from recipe 1)
if (event.type === "autouser_run.failed") {
  const r = event.data.object;
  await fetch("https://api.linear.app/graphql", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      Authorization: process.env.LINEAR_API_KEY!,
    },
    body: JSON.stringify({
      query: `
        mutation IssueCreate($input: IssueCreateInput!) {
          issueCreate(input: $input) { success issue { id identifier url } }
        }
      `,
      variables: {
        input: {
          teamId: process.env.LINEAR_TEAM_ID!,
          title: `Autouser run failed: ${r.id}`,
          description: [
            `**Evaluation:** ${r.evaluationId}`,
            `**Persona:** ${r.autouserId}`,
            `**Error:** ${r.error}`,
            `**Cost burned:** $${r.estimatedCostUsd}`,
            ``,
            `https://app.autousers.ai/evaluations/${r.evaluationId}/autouser-runs/${r.id}`,
          ].join("\n"),
          priority: 2,
          labelIds: [process.env.LINEAR_FAILURE_LABEL_ID!],
        },
      },
    }),
  });
}
Subscribe with enabled_events: ["autouser_run.failed"] (or ["*"] if you have a single fan-out receiver).

4. Looker / BigQuery sync on rating.created

Stream every rating into a warehouse table for trend analysis. The schema mirrors the Rating shape.
// app/api/webhooks/autousers/route.ts (continued)
import { BigQuery } from "@google-cloud/bigquery";
const bq = new BigQuery();

if (event.type === "rating.created") {
  const r = event.data.object;
  await bq
    .dataset(process.env.BQ_DATASET!)
    .table("autousers_ratings")
    .insert(
      [
        {
          insert_id: event.id, // BigQuery dedup on this
          rating_id: r.id,
          evaluation_id: r.evaluationId,
          comparison_id: r.comparisonId,
          rater_type: r.raterType,
          autouser_id: r.autouserId,
          autouser_run_id: r.autouserRunId,
          user_id: r.userId,
          public_rater_id: r.publicRaterId,
          rubric_version: r.rubricVersion,
          dimension_ratings: JSON.stringify(r.dimensionRatings),
          factors: r.factors ? JSON.stringify(r.factors) : null,
          justification: r.justification,
          time_spent_seconds: r.timeSpentSeconds,
          created_at: r.createdAt,
        },
      ],
      { ignoreUnknownValues: false }
    );
}
Schema:
CREATE TABLE `your_project.autousers.autousers_ratings` (
  rating_id STRING,
  evaluation_id STRING,
  comparison_id STRING,
  rater_type STRING,
  autouser_id STRING,
  autouser_run_id STRING,
  user_id STRING,
  public_rater_id STRING,
  rubric_version STRING,
  dimension_ratings JSON,
  factors JSON,
  justification STRING,
  time_spent_seconds INT64,
  created_at TIMESTAMP
)
PARTITION BY DATE(created_at)
CLUSTER BY evaluation_id;
Use BigQuery’s insertId deduplication or a manual MERGE keyed on rating_id to handle webhook retries. For Looker, point a view at the partitioned table; α and dimension distributions update in near-real time as deliveries land.