diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml
index b05bb475..bcb66e20 100644
--- a/.github/workflows/evals.yml
+++ b/.github/workflows/evals.yml
@@ -94,8 +94,7 @@ jobs:
             "${TARGET_FLAG[@]}" \
             --workers 3 \
             --threshold ${{ steps.filter.outputs.threshold }} \
-            --output .agentv/ci-results/artifacts \
-            --export .agentv/ci-results/junit.xml
+            --output .agentv/ci-results/artifacts
           EXIT_CODE=$?
 
           echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"
@@ -104,16 +103,6 @@ jobs:
         if: always()
         run: bun run scripts/ci-summary.ts .agentv/ci-results >> "$GITHUB_STEP_SUMMARY"
 
-      - name: Publish JUnit test results
-        if: always()
-        continue-on-error: true
-        uses: dorny/test-reporter@v1
-        with:
-          name: AgentV Eval Results
-          path: .agentv/ci-results/junit.xml
-          reporter: java-junit
-          fail-on-error: false
-
       - name: Upload eval artifacts
         if: always()
         uses: actions/upload-artifact@v4
diff --git a/AGENTS.md b/AGENTS.md
index b3c34a9b..0ac8fd54 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -384,8 +384,7 @@ Unit tests alone are insufficient for grader changes. After implementing or modi
 ```bash
 # 1. Run the eval, writing results to a sibling *.results.jsonl file
 bun apps/cli/src/cli.ts eval examples/path/to/suite.eval.yaml --target azure \
-  --output examples/path/to/suite.run \
-  --export examples/path/to/suite.results.jsonl
+  --output examples/path/to/suite.run
 
 # 2. Assert all expected score ranges pass
 bun scripts/check-grader-scores.ts
@@ -396,7 +395,7 @@ The script auto-discovers `examples/**/*.grader-scores.yaml`, locates the siblin
 **To add score checks for a new eval:**
 1. Create `<eval-stem>.grader-scores.yaml` next to the eval YAML.
 2. Add entries for each `(test_id, grader, range)` you care about — `grader` must match a `scores[].name` value in the JSONL output, and `range.min`/`range.max` default to 0/1 if omitted.
-3. Run the eval with `--output <eval-stem>.run --export <eval-stem>.results.jsonl`, then run the script.
+3. Run the eval with `--output <eval-stem>.run`, then run the script.
 
 See `examples/red-team/archetypes/coding-agent/suites/screenshot-pii-upload.grader-scores.yaml` for a concrete example.
 
diff --git a/README.md b/README.md
index bbd05a67..a6818dfa 100644
--- a/README.md
+++ b/README.md
@@ -78,8 +78,7 @@ agentv compare .agentv/results/runs/<timestamp>/index.jsonl
 
 ```bash
 agentv eval evals/my-eval.yaml --output ./run   # writes ./run/index.jsonl
-agentv eval evals/my-eval.yaml --export report.html
-agentv eval evals/my-eval.yaml --export results.xml   # JUnit XML for CI
+cat ./run/index.jsonl                         # JSONL results for scripts/CI
 ```
 
 ## TypeScript SDK
diff --git a/apps/cli/src/commands/eval/commands/run.ts b/apps/cli/src/commands/eval/commands/run.ts
index b6bc035c..b5d25f7b 100644
--- a/apps/cli/src/commands/eval/commands/run.ts
+++ b/apps/cli/src/commands/eval/commands/run.ts
@@ -46,7 +46,7 @@ export const evalRunCommand = command({
     out: option({
       type: optional(string),
       long: 'out',
-      description: '[Removed: use --output <dir> and --export <file>] Former flat result path',
+      description: '[Removed: use --output <dir>] Former flat result path',
     }),
     output: option({
       type: optional(string),
@@ -58,19 +58,13 @@ export const evalRunCommand = command({
     outputFormat: option({
       type: optional(string),
       long: 'output-format',
-      description: '[Removed: use --export <file>] Run directories always write index.jsonl',
+      description: '[Removed] Run directories always write index.jsonl',
     }),
     experiment: option({
       type: optional(string),
       long: 'experiment',
       description: 'Experiment label for canonical run output (default: default)',
     }),
-    export: multioption({
-      type: array(string),
-      long: 'export',
-      description:
-        'Write additional output file(s). Format inferred from extension: .jsonl, .json, .xml, .yaml, .html (repeatable)',
-    }),
     dryRun: flag({
       long: 'dry-run',
       description: 'Use mock provider responses instead of real LLM calls',
@@ -252,7 +246,6 @@ export const evalRunCommand = command({
       output: args.output,
       outputFormat: args.outputFormat,
       experiment: args.experiment,
-      export: args.export,
       dryRun: args.dryRun,
       dryRunDelay: args.dryRunDelay,
       dryRunDelayMin: args.dryRunDelayMin,
diff --git a/apps/cli/src/commands/eval/json-writer.ts b/apps/cli/src/commands/eval/json-writer.ts
deleted file mode 100644
index 91c26b1e..00000000
--- a/apps/cli/src/commands/eval/json-writer.ts
+++ /dev/null
@@ -1,52 +0,0 @@
-import { mkdir, writeFile } from 'node:fs/promises';
-import path from 'node:path';
-
-import type { EvaluationResult } from '@agentv/core';
-
-import { toSnakeCaseDeep } from '../../utils/case-conversion.js';
-
-export class JsonWriter {
-  private readonly filePath: string;
-  private readonly results: EvaluationResult[] = [];
-  private closed = false;
-
-  private constructor(filePath: string) {
-    this.filePath = filePath;
-  }
-
-  static async open(filePath: string): Promise<JsonWriter> {
-    await mkdir(path.dirname(filePath), { recursive: true });
-    return new JsonWriter(filePath);
-  }
-
-  async append(result: EvaluationResult): Promise<void> {
-    if (this.closed) {
-      throw new Error('Cannot write to closed JSON writer');
-    }
-    this.results.push(result);
-  }
-
-  async close(): Promise<void> {
-    if (this.closed) {
-      return;
-    }
-    this.closed = true;
-
-    const passed = this.results.filter((r) => r.score >= 0.5).length;
-    const failed = this.results.length - passed;
-    const total = this.results.length;
-
-    const output = {
-      stats: {
-        total,
-        passed,
-        failed,
-        passRate: total > 0 ? passed / total : 0,
-      },
-      results: this.results,
-    };
-
-    const snakeCaseOutput = toSnakeCaseDeep(output);
-    await writeFile(this.filePath, `${JSON.stringify(snakeCaseOutput, null, 2)}\n`, 'utf8');
-  }
-}
diff --git a/apps/cli/src/commands/eval/junit-writer.ts b/apps/cli/src/commands/eval/junit-writer.ts
deleted file mode 100644
index 3461d089..00000000
--- a/apps/cli/src/commands/eval/junit-writer.ts
+++ /dev/null
@@ -1,109 +0,0 @@
-import { mkdir, writeFile } from 'node:fs/promises';
-import path from 'node:path';
-
-import type { EvaluationResult } from '@agentv/core';
-
-export interface JunitWriterOptions {
-  readonly threshold?: number;
-}
-
-export function escapeXml(str: string): string {
-  return str
-    .replace(/&/g, '&amp;')
-    .replace(/</g, '&lt;')
-    .replace(/>/g, '&gt;')
-    .replace(/"/g, '&quot;')
-    .replace(/'/g, '&apos;');
-}
-
-export class JunitWriter {
-  private readonly filePath: string;
-  private readonly results: EvaluationResult[] = [];
-  private readonly threshold: number;
-  private closed = false;
-
-  private constructor(filePath: string, options?: JunitWriterOptions) {
-    this.filePath = filePath;
-    this.threshold = options?.threshold ?? 0.5;
-  }
-
-  static async open(filePath: string, options?: JunitWriterOptions): Promise<JunitWriter> {
-    await mkdir(path.dirname(filePath), { recursive: true });
-    return new JunitWriter(filePath, options);
-  }
-
-  async append(result: EvaluationResult): Promise<void> {
-    if (this.closed) {
-      throw new Error('Cannot write to closed JUnit writer');
-    }
-    this.results.push(result);
-  }
-
-  async close(): Promise<void> {
-    if (this.closed) {
-      return;
-    }
-    this.closed = true;
-
-    const grouped = new Map<string, EvaluationResult[]>();
-    for (const result of this.results) {
-      const suite = result.suite ?? 'default';
-      const existing = grouped.get(suite);
-      if (existing) {
-        existing.push(result);
-      } else {
-        grouped.set(suite, [result]);
-      }
-    }
-
-    const suiteXmls: string[] = [];
-    for (const [suiteName, results] of grouped) {
-      const errors = results.filter((r) => r.executionStatus === 'execution_error').length;
-      const failures = results.filter(
-        (r) => r.executionStatus !== 'execution_error' && r.score < this.threshold,
-      ).length;
-
-      const testCases = results.map((r) => {
-        const time = r.durationMs ? (r.durationMs / 1000).toFixed(3) : '0.000';
-
-        let inner = '';
-        if (r.executionStatus === 'execution_error') {
-          const errorMsg = r.error ?? 'Execution error';
-          inner = `\n      <error message="${escapeXml(errorMsg)}">${escapeXml(errorMsg)}</error>\n    `;
-        } else if (r.score < this.threshold) {
-          const message = `score=${r.score.toFixed(3)}`;
-          const failedAssertions = r.assertions.filter((a) => !a.passed);
-          const detail = [
-            `Score: ${r.score.toFixed(3)}`,
-            failedAssertions.length > 0
-              ? `Failed: ${failedAssertions.map((a) => a.text).join(', ')}`
-              : '',
-          ]
-            .filter(Boolean)
-            .join('\n');
-          inner = `\n      <failure message="${escapeXml(message)}">${escapeXml(detail)}</failure>\n    `;
-        }
-
-        return `    <testcase name="${escapeXml(r.testId)}" classname="${escapeXml(suiteName)}" time="${time}">${inner}</testcase>`;
-      });
-
-      const suiteTime = results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0) / 1000;
-
-      suiteXmls.push(
-        `  <testsuite name="${escapeXml(suiteName)}" tests="${results.length}" failures="${failures}" errors="${errors}" time="${suiteTime.toFixed(3)}">\n${testCases.join('\n')}\n  </testsuite>`,
-      );
-    }
-
-    const totalTests = this.results.length;
-    const totalErrors = this.results.filter((r) => r.executionStatus === 'execution_error').length;
-    const totalFailures = this.results.filter(
-      (r) => r.executionStatus !== 'execution_error' && r.score < this.threshold,
-    ).length;
-
-    const totalTime = this.results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0) / 1000;
-
-    const xml = `<?xml version="1.0" encoding="UTF-8"?>\n<testsuites tests="${totalTests}" failures="${totalFailures}" errors="${totalErrors}" time="${totalTime.toFixed(3)}">\n${suiteXmls.join('\n')}\n</testsuites>\n`;
-
-    await writeFile(this.filePath, xml, 'utf8');
-  }
-}
diff --git a/apps/cli/src/commands/eval/output-writer.ts b/apps/cli/src/commands/eval/output-writer.ts
index f61a70f0..0247d266 100644
--- a/apps/cli/src/commands/eval/output-writer.ts
+++ b/apps/cli/src/commands/eval/output-writer.ts
@@ -1,81 +1,15 @@
-import path from 'node:path';
-
 import type { EvaluationResult } from '@agentv/core';
 
-import { HtmlWriter } from './html-writer.js';
-import { JsonWriter } from './json-writer.js';
 import { JsonlWriter } from './jsonl-writer.js';
-import { JunitWriter } from './junit-writer.js';
-import { YamlWriter } from './yaml-writer.js';
-
-export type OutputFormat = 'jsonl' | 'yaml' | 'html';
 
 export interface OutputWriter {
   append(result: EvaluationResult): Promise<void>;
   close(): Promise<void>;
 }
 
-export interface WriterOptions {
-  readonly threshold?: number;
-}
-
 export async function createOutputWriter(
   filePath: string,
-  format: OutputFormat,
   options?: { append?: boolean },
 ): Promise<OutputWriter> {
-  switch (format) {
-    case 'jsonl':
-      return JsonlWriter.open(filePath, { append: options?.append });
-    case 'yaml':
-      return YamlWriter.open(filePath);
-    case 'html':
-      return HtmlWriter.open(filePath);
-    default: {
-      const exhaustiveCheck: never = format;
-      throw new Error(`Unsupported output format: ${exhaustiveCheck}`);
-    }
-  }
-}
-
-const SUPPORTED_EXTENSIONS = new Set(['.jsonl', '.json', '.xml', '.yaml', '.yml', '.html', '.htm']);
-
-export function createWriterFromPath(
-  filePath: string,
-  options?: WriterOptions,
-): Promise<OutputWriter> {
-  const ext = path.extname(filePath).toLowerCase();
-  switch (ext) {
-    case '.jsonl':
-      return JsonlWriter.open(filePath);
-    case '.json':
-      return JsonWriter.open(filePath);
-    case '.xml':
-      return JunitWriter.open(filePath, { threshold: options?.threshold });
-    case '.yaml':
-    case '.yml':
-      return YamlWriter.open(filePath);
-    case '.html':
-    case '.htm':
-      return HtmlWriter.open(filePath);
-    default:
-      throw new Error(
-        `Unsupported output file extension "${ext}". Supported: ${[...SUPPORTED_EXTENSIONS].join(', ')}`,
-      );
-  }
-}
-
-export async function createMultiWriter(
-  filePaths: readonly string[],
-  options?: WriterOptions,
-): Promise<OutputWriter> {
-  const writers = await Promise.all(filePaths.map((fp) => createWriterFromPath(fp, options)));
-  return {
-    async append(result: EvaluationResult): Promise<void> {
-      await Promise.all(writers.map((w) => w.append(result)));
-    },
-    async close(): Promise<void> {
-      await Promise.all(writers.map((w) => w.close()));
-    },
-  };
+  return JsonlWriter.open(filePath, { append: options?.append });
 }
diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
index 0fe445ea..a1d31c6b 100644
--- a/apps/cli/src/commands/eval/run-eval.ts
+++ b/apps/cli/src/commands/eval/run-eval.ts
@@ -41,7 +41,7 @@ import {
   writeInitialBenchmarkArtifact,
 } from './artifact-writer.js';
 import { loadEnvFromHierarchy } from './env.js';
-import { type OutputWriter, createOutputWriter, createWriterFromPath } from './output-writer.js';
+import { type OutputWriter, createOutputWriter } from './output-writer.js';
 import { ProgressDisplay, type Verdict, type WorkerProgress } from './progress-display.js';
 import { buildDefaultRunDir, normalizeExperimentName } from './result-layout.js';
 import {
@@ -85,10 +85,8 @@ interface NormalizedOptions {
   readonly workers?: number;
   /** --output <dir>: canonical artifact directory */
   readonly outputDir?: string;
-  /** Removed: use --output for run directories and --export for extra files */
+  /** Removed: use --output for run directories */
   readonly removedOut?: string;
-  /** --export <paths...>: additional output files */
-  readonly exportPaths: readonly string[];
   readonly dryRun: boolean;
   readonly dryRunDelay: number;
   readonly dryRunDelayMin: number;
@@ -242,11 +240,11 @@ function looksLikeLegacyOutputFilePath(value: string): boolean {
 
 function outputFileMigrationMessage(value: string): string {
   const ext = path.extname(value).toLowerCase();
-  const exportHint =
+  const removalHint =
     ext === '.xml'
-      ? `Use --export ${value} for JUnit XML.`
-      : `Use --export ${value} if you still need that extra file.`;
-  return `--output expects a run directory, not a file path: ${value}\n${exportHint} Set --output <dir> for the canonical run artifacts; AgentV always writes <dir>/index.jsonl.`;
+      ? 'JUnit XML export from agentv eval has been removed.'
+      : 'Flat result file export from agentv eval has been removed.';
+  return `--output expects a run directory, not a file path: ${value}\n${removalHint} Set --output <dir> for the canonical run artifacts; AgentV always writes <dir>/index.jsonl.`;
 }
 
 function artifactsMigrationMessage(artifactsDir: string, outputDir?: string): string {
@@ -255,10 +253,10 @@ function artifactsMigrationMessage(artifactsDir: string, outputDir?: string): st
     const ext = path.extname(outputDir).toLowerCase();
     lines.push(
       ext === '.xml'
-        ? `Use --export ${outputDir} for JUnit XML.`
-        : `Use --export ${outputDir} if you still need that extra file.`,
+        ? 'JUnit XML export from agentv eval has been removed.'
+        : 'Flat result file export from agentv eval has been removed.',
     );
-    lines.push(`Migration example: --output ${artifactsDir} --export ${outputDir}`);
+    lines.push(`Migration example: --output ${artifactsDir}`);
   }
   return lines.join('\n');
 }
@@ -354,12 +352,6 @@ function normalizeOptions(
 
   const cliOutputDir = normalizeString(rawOptions.output);
 
-  // --export is the new repeatable flag for additional output files
-  const rawExportPaths = rawOptions.export;
-  const exportPaths: string[] = Array.isArray(rawExportPaths)
-    ? rawExportPaths.filter((v): v is string => typeof v === 'string' && v.trim().length > 0)
-    : [];
-
   // Normalize --target: can be a string (legacy) or string[] (multioption)
   const rawTarget = rawOptions.target;
   let cliTargets: string[] = [];
@@ -413,7 +405,6 @@ function normalizeOptions(
     workers: workers > 0 ? workers : undefined,
     outputDir: cliOutputDir ?? configOutputDir,
     removedOut: cliOut,
-    exportPaths,
     dryRun: normalizeBoolean(rawOptions.dryRun),
     dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
     dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
@@ -1135,14 +1126,14 @@ export async function runEvalCommand(
     throw new Error(
       [
         '--out was removed from agentv eval. Use --output <dir> for the canonical run directory.',
-        'If you need an additional flat file, add --export <file>.',
-        `Migration example: --out ${options.removedOut} -> --output <dir> --export ${options.removedOut}`,
+        'Flat result file export from agentv eval has been removed.',
+        `Migration example: --out ${options.removedOut} -> --output <dir>`,
       ].join('\n'),
     );
   }
   if (options.outputFormat) {
     throw new Error(
-      '--output-format was removed from agentv eval. The run directory always writes index.jsonl; use --export <file> for JSON, XML/JUnit, YAML, or HTML copies.',
+      '--output-format was removed from agentv eval. The run directory always writes index.jsonl.',
     );
   }
   if (options.artifacts) {
@@ -1262,7 +1253,7 @@ export async function runEvalCommand(
     runDir = path.dirname(outputPath);
   }
 
-  // Initialize OTel exporter if --export-otel flag is set or file export flags are used
+  // Initialize OTel exporter if --export-otel or --otel-file is set
   let otelExporter: OtelTraceExporterType | null = null;
   const useFileExport = !!options.otelFile;
 
@@ -1320,16 +1311,7 @@ export async function runEvalCommand(
 
   const primaryWritePath = outputPath;
 
-  // Resolve --export paths (additional output files)
-  const resolvedExportPaths = options.exportPaths.map((p: string) => path.resolve(p));
-
   console.log(`Artifact directory: ${runDir}`);
-  if (resolvedExportPaths.length > 0) {
-    console.log('Export files:');
-    for (const p of resolvedExportPaths) {
-      console.log(`  ${p}`);
-    }
-  }
 
   // Log file export paths
   const resolvedTestFiles = input.testFiles.map((file) => path.resolve(file));
@@ -1435,10 +1417,7 @@ export async function runEvalCommand(
   }
 
   // Build the output writer. Primary output is always JSONL to the artifact directory.
-  // Additional --export paths get their own writers that receive all results after the run.
-  const writerOptions =
-    resolvedThreshold !== undefined ? { threshold: resolvedThreshold } : undefined;
-  const outputWriter: OutputWriter = await createOutputWriter(primaryWritePath, 'jsonl', {
+  const outputWriter: OutputWriter = await createOutputWriter(primaryWritePath, {
     append: isResumeAppend,
   });
 
@@ -1817,20 +1796,6 @@ export async function runEvalCommand(
       }
     }
 
-    // Write --export output files (additional formats)
-    if (resolvedExportPaths.length > 0 && allResults.length > 0) {
-      for (const exportPath of resolvedExportPaths) {
-        const writer = await createWriterFromPath(exportPath, writerOptions);
-        for (const result of allResults) {
-          await writer.append(result);
-        }
-        await writer.close();
-      }
-      console.log(
-        `Export file(s) written: ${resolvedExportPaths.map((p) => path.relative(cwd, p)).join(', ')}`,
-      );
-    }
-
     // Print workspace paths summary
     const resultsWithWorkspaces = allResults.filter((r) => r.workspacePath);
     const preservedWorkspaces = options.keepWorkspaces
diff --git a/apps/cli/src/commands/eval/yaml-writer.ts b/apps/cli/src/commands/eval/yaml-writer.ts
deleted file mode 100644
index 436677bb..00000000
--- a/apps/cli/src/commands/eval/yaml-writer.ts
+++ /dev/null
@@ -1,70 +0,0 @@
-import { createWriteStream } from 'node:fs';
-import { mkdir } from 'node:fs/promises';
-import path from 'node:path';
-import { finished } from 'node:stream/promises';
-import { normalizeLineEndings } from '@agentv/core';
-import { Mutex } from 'async-mutex';
-import { stringify as stringifyYaml } from 'yaml';
-
-import { toSnakeCaseDeep } from '../../utils/case-conversion.js';
-
-export class YamlWriter {
-  private readonly stream: ReturnType<typeof createWriteStream>;
-  private readonly mutex = new Mutex();
-  private closed = false;
-  private isFirst = true;
-
-  private constructor(stream: ReturnType<typeof createWriteStream>) {
-    this.stream = stream;
-  }
-
-  static async open(filePath: string): Promise<YamlWriter> {
-    await mkdir(path.dirname(filePath), { recursive: true });
-    const stream = createWriteStream(filePath, { flags: 'w', encoding: 'utf8' });
-    return new YamlWriter(stream);
-  }
-
-  async append(record: unknown): Promise<void> {
-    await this.mutex.runExclusive(async () => {
-      if (this.closed) {
-        throw new Error('Cannot write to closed YAML writer');
-      }
-
-      // Convert record to snake_case for Python ecosystem compatibility
-      const snakeCaseRecord = toSnakeCaseDeep(record);
-
-      // Convert to YAML with proper multi-line string handling
-      const yamlDoc = stringifyYaml(snakeCaseRecord, {
-        indent: 2,
-        lineWidth: 0, // Disable line wrapping
-        // Let YAML library choose appropriate string style based on content
-        // (will use block literal for multiline strings with actual newlines)
-      });
-
-      // Normalize line endings to LF (\n) for consistent output across platforms
-      const normalizedYaml = normalizeLineEndings(yamlDoc);
-
-      // Add YAML document separator (---) between records
-      const separator = this.isFirst ? '---\n' : '\n---\n';
-      this.isFirst = false;
-
-      const content = `${separator}${normalizedYaml}`;
-
-      if (!this.stream.write(content)) {
-        await new Promise<void>((resolve, reject) => {
-          this.stream.once('drain', resolve);
-          this.stream.once('error', reject);
-        });
-      }
-    });
-  }
-
-  async close(): Promise<void> {
-    if (this.closed) {
-      return;
-    }
-    this.closed = true;
-    this.stream.end();
-    await finished(this.stream);
-  }
-}
diff --git a/apps/cli/test/commands/eval/output-writers.test.ts b/apps/cli/test/commands/eval/output-writers.test.ts
deleted file mode 100644
index 7a129d70..00000000
--- a/apps/cli/test/commands/eval/output-writers.test.ts
+++ /dev/null
@@ -1,374 +0,0 @@
-import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
-import { readFile, rm } from 'node:fs/promises';
-import path from 'node:path';
-
-import type { EvaluationResult } from '@agentv/core';
-
-import { JsonWriter } from '../../../src/commands/eval/json-writer.js';
-import { JunitWriter, escapeXml } from '../../../src/commands/eval/junit-writer.js';
-import {
-  createMultiWriter,
-  createWriterFromPath,
-} from '../../../src/commands/eval/output-writer.js';
-
-function makeResult(overrides: Partial<EvaluationResult> = {}): EvaluationResult {
-  return {
-    timestamp: '2024-01-01T00:00:00Z',
-    testId: 'test-1',
-    score: 1.0,
-    assertions: [{ text: 'criterion-1', passed: true }],
-    output: [{ role: 'assistant' as const, content: 'answer' }],
-    target: 'default',
-    executionStatus: 'ok',
-    ...overrides,
-  };
-}
-
-describe('JsonWriter', () => {
-  const testDir = path.join(import.meta.dir, '.test-json-output');
-  let testFilePath: string;
-
-  beforeEach(() => {
-    testFilePath = path.join(testDir, `results-${Date.now()}.json`);
-  });
-
-  afterEach(async () => {
-    await rm(testDir, { recursive: true, force: true }).catch(() => undefined);
-  });
-
-  it('should write aggregate JSON with stats and results', async () => {
-    const writer = await JsonWriter.open(testFilePath);
-
-    await writer.append(makeResult({ testId: 'pass-1', score: 0.9 }));
-    await writer.append(makeResult({ testId: 'pass-2', score: 0.7 }));
-    await writer.append(makeResult({ testId: 'fail-1', score: 0.3 }));
-    await writer.close();
-
-    const content = JSON.parse(await readFile(testFilePath, 'utf8'));
-    expect(content.stats.total).toBe(3);
-    expect(content.stats.passed).toBe(2);
-    expect(content.stats.failed).toBe(1);
-    expect(content.stats.pass_rate).toBeCloseTo(2 / 3);
-    expect(content.results).toHaveLength(3);
-    expect(content.results[0].test_id).toBe('pass-1');
-  });
-
-  it('should handle empty results', async () => {
-    const writer = await JsonWriter.open(testFilePath);
-    await writer.close();
-
-    const content = JSON.parse(await readFile(testFilePath, 'utf8'));
-    expect(content.stats.total).toBe(0);
-    expect(content.stats.passed).toBe(0);
-    expect(content.stats.failed).toBe(0);
-    expect(content.stats.pass_rate).toBe(0);
-    expect(content.results).toHaveLength(0);
-  });
-
-  it('should throw when writing to closed writer', async () => {
-    const writer = await JsonWriter.open(testFilePath);
-    await writer.close();
-
-    await expect(writer.append(makeResult())).rejects.toThrow('Cannot write to closed JSON writer');
-  });
-
-  it('should be idempotent on close', async () => {
-    const writer = await JsonWriter.open(testFilePath);
-    await writer.append(makeResult());
-    await writer.close();
-    await writer.close(); // Should not throw
-  });
-
-  it('should convert keys to snake_case', async () => {
-    const writer = await JsonWriter.open(testFilePath);
-    await writer.append(
-      makeResult({
-        output: [{ role: 'assistant' as const, content: 'my answer' }],
-        testId: 'snake-case-test',
-      }),
-    );
-    await writer.close();
-
-    const content = JSON.parse(await readFile(testFilePath, 'utf8'));
-    expect(content.results[0].output).toEqual([{ role: 'assistant', content: 'my answer' }]);
-    expect(content.results[0].test_id).toBe('snake-case-test');
-  });
-});
-
-describe('JunitWriter', () => {
-  const testDir = path.join(import.meta.dir, '.test-junit-output');
-  let testFilePath: string;
-
-  beforeEach(() => {
-    testFilePath = path.join(testDir, `results-${Date.now()}.xml`);
-  });
-
-  afterEach(async () => {
-    await rm(testDir, { recursive: true, force: true }).catch(() => undefined);
-  });
-
-  it('should write valid JUnit XML structure', async () => {
-    const writer = await JunitWriter.open(testFilePath);
-
-    await writer.append(makeResult({ testId: 'pass-1', score: 0.9 }));
-    await writer.append(makeResult({ testId: 'fail-1', score: 0.3 }));
-    await writer.close();
-
-    const xml = await readFile(testFilePath, 'utf8');
-    expect(xml).toStartWith('<?xml version="1.0" encoding="UTF-8"?>');
-    expect(xml).toContain('<testsuites tests="2" failures="1" errors="0" time="0.000">');
-    expect(xml).toContain('<testcase name="pass-1"');
-    expect(xml).toContain('<testcase name="fail-1"');
-    expect(xml).toContain('<failure');
-    expect(xml).toContain('score=0.300');
-  });
-
-  it('should group results by suite as testsuites', async () => {
-    const writer = await JunitWriter.open(testFilePath);
-
-    await writer.append(makeResult({ testId: 'a-1', suite: 'suite-a', score: 1.0 }));
-    await writer.append(makeResult({ testId: 'a-2', suite: 'suite-a', score: 0.8 }));
-    await writer.append(makeResult({ testId: 'b-1', suite: 'suite-b', score: 0.5 }));
-    await writer.close();
-
-    const xml = await readFile(testFilePath, 'utf8');
-    expect(xml).toContain('testsuite name="suite-a" tests="2"');
-    expect(xml).toContain('testsuite name="suite-b" tests="1"');
-  });
-
-  it('should use default suite name when no suite', async () => {
-    const writer = await JunitWriter.open(testFilePath);
-    await writer.append(makeResult({ testId: 'test-1', score: 1.0 }));
-    await writer.close();
-
-    const xml = await readFile(testFilePath, 'utf8');
-    expect(xml).toContain('testsuite name="default"');
-  });
-
-  it('should handle errors as <error> elements', async () => {
-    const writer = await JunitWriter.open(testFilePath);
-    await writer.append(
-      makeResult({
-        testId: 'err-1',
-        score: 0,
-        error: 'Timeout exceeded',
-        executionStatus: 'execution_error',
-      }),
-    );
-    await writer.close();
-
-    const xml = await readFile(testFilePath, 'utf8');
-    expect(xml).toContain('<error message="Timeout exceeded"');
-    expect(xml).toContain('errors="1"');
-  });
-
-  it('should throw when writing to closed writer', async () => {
-    const writer = await JunitWriter.open(testFilePath);
-    await writer.close();
-
-    await expect(writer.append(makeResult())).rejects.toThrow(
-      'Cannot write to closed JUnit writer',
-    );
-  });
-
-  it('uses custom threshold for pass/fail when provided', async () => {
-    const filePath = path.join(testDir, `junit-threshold-${Date.now()}.xml`);
-    const writer = await JunitWriter.open(filePath, { threshold: 0.8 });
-
-    await writer.append(makeResult({ testId: 'high', score: 0.9 }));
-    await writer.append(makeResult({ testId: 'mid', score: 0.6 }));
-    await writer.close();
-
-    const xml = await readFile(filePath, 'utf8');
-    expect(xml).not.toContain('<failure message="score=0.900"');
-    expect(xml).toContain('<failure message="score=0.600"');
-  });
-
-  it('defaults to 0.5 threshold when none provided', async () => {
-    const filePath = path.join(testDir, `junit-default-${Date.now()}.xml`);
-    const writer = await JunitWriter.open(filePath);
-
-    await writer.append(makeResult({ testId: 'pass', score: 0.6 }));
-    await writer.append(makeResult({ testId: 'fail', score: 0.3 }));
-    await writer.close();
-
-    const xml = await readFile(filePath, 'utf8');
-    expect(xml).not.toContain('<failure message="score=0.600"');
-    expect(xml).toContain('<failure message="score=0.300"');
-  });
-
-  it('should use executionStatus to classify errors vs failures', async () => {
-    const writer = await JunitWriter.open(testFilePath);
-
-    await writer.append(
-      makeResult({
-        testId: 'exec-err',
-        score: 0,
-        executionStatus: 'execution_error',
-        error: 'Not Found',
-      }),
-    );
-    await writer.append(
-      makeResult({ testId: 'quality-fail', score: 0.3, executionStatus: 'quality_failure' }),
-    );
-    await writer.append(makeResult({ testId: 'pass', score: 0.9, executionStatus: 'ok' }));
-    await writer.close();
-
-    const xml = await readFile(testFilePath, 'utf8');
-    // Execution error produces <error>, not <failure>
-    expect(xml).toContain('<error message="Not Found"');
-    // Quality failure produces <failure>
-    expect(xml).toContain('<failure message="score=0.300"');
-    // Counts: 1 error, 1 failure (execution error excluded from failure count)
-    expect(xml).toContain('errors="1"');
-    expect(xml).toContain('failures="1"');
-  });
-
-  it('should not double-count execution errors as failures', async () => {
-    const writer = await JunitWriter.open(testFilePath);
-
-    // All execution errors — should have 0 failures, 2 errors
-    await writer.append(
-      makeResult({
-        testId: 'err-1',
-        score: 0,
-        executionStatus: 'execution_error',
-        error: 'Provider error',
-      }),
-    );
-    await writer.append(
-      makeResult({
-        testId: 'err-2',
-        score: 0,
-        executionStatus: 'execution_error',
-        error: 'Timeout',
-      }),
-    );
-    await writer.close();
-
-    const xml = await readFile(testFilePath, 'utf8');
-    expect(xml).toContain('failures="0"');
-    expect(xml).toContain('errors="2"');
-  });
-
-  it('should emit <error> for execution_error even without error message', async () => {
-    const writer = await JunitWriter.open(testFilePath);
-
-    await writer.append(
-      makeResult({ testId: 'no-msg', score: 0, executionStatus: 'execution_error' }),
-    );
-    await writer.close();
-
-    const xml = await readFile(testFilePath, 'utf8');
-    expect(xml).toContain('<error message="Execution error"');
-    expect(xml).toContain('errors="1"');
-    expect(xml).toContain('failures="0"');
-  });
-});
-
-describe('escapeXml', () => {
-  it('should escape ampersands', () => {
-    expect(escapeXml('a & b')).toBe('a &amp; b');
-  });
-
-  it('should escape angle brackets', () => {
-    expect(escapeXml('<tag>')).toBe('&lt;tag&gt;');
-  });
-
-  it('should escape quotes', () => {
-    expect(escapeXml('say "hello"')).toBe('say &quot;hello&quot;');
-  });
-
-  it('should escape apostrophes', () => {
-    expect(escapeXml("it's")).toBe('it&apos;s');
-  });
-
-  it('should handle all entities combined', () => {
-    expect(escapeXml('<a & "b" \'c\'>')).toBe('&lt;a &amp; &quot;b&quot; &apos;c&apos;&gt;');
-  });
-
-  it('should return empty string unchanged', () => {
-    expect(escapeXml('')).toBe('');
-  });
-
-  it('should return plain text unchanged', () => {
-    expect(escapeXml('hello world')).toBe('hello world');
-  });
-});
-
-describe('createWriterFromPath', () => {
-  const testDir = path.join(import.meta.dir, '.test-writer-dispatch');
-
-  afterEach(async () => {
-    await rm(testDir, { recursive: true, force: true }).catch(() => undefined);
-  });
-
-  it('should create JsonlWriter for .jsonl extension', async () => {
-    const writer = await createWriterFromPath(path.join(testDir, 'out.jsonl'));
-    expect(writer).toBeDefined();
-    await writer.close();
-  });
-
-  it('should create JsonWriter for .json extension', async () => {
-    const writer = await createWriterFromPath(path.join(testDir, 'out.json'));
-    expect(writer).toBeDefined();
-    await writer.close();
-  });
-
-  it('should create JunitWriter for .xml extension', async () => {
-    const writer = await createWriterFromPath(path.join(testDir, 'out.xml'));
-    expect(writer).toBeDefined();
-    await writer.close();
-  });
-
-  it('should create YamlWriter for .yaml extension', async () => {
-    const writer = await createWriterFromPath(path.join(testDir, 'out.yaml'));
-    expect(writer).toBeDefined();
-    await writer.close();
-  });
-
-  it('should throw for unsupported extension', () => {
-    expect(() => createWriterFromPath(path.join(testDir, 'out.csv'))).toThrow(
-      'Unsupported output file extension ".csv"',
-    );
-  });
-});
-
-describe('createMultiWriter', () => {
-  const testDir = path.join(import.meta.dir, '.test-multi-writer');
-
-  afterEach(async () => {
-    await rm(testDir, { recursive: true, force: true }).catch(() => undefined);
-  });
-
-  it('should write to multiple output files simultaneously', async () => {
-    const jsonlPath = path.join(testDir, 'results.jsonl');
-    const jsonPath = path.join(testDir, 'results.json');
-    const xmlPath = path.join(testDir, 'results.xml');
-
-    const writer = await createMultiWriter([jsonlPath, jsonPath, xmlPath]);
-
-    await writer.append(makeResult({ testId: 'multi-1', score: 0.9 }));
-    await writer.append(makeResult({ testId: 'multi-2', score: 0.3 }));
-    await writer.close();
-
-    // Verify JSONL
-    const jsonlContent = await readFile(jsonlPath, 'utf8');
-    const jsonlLines = jsonlContent.trim().split('\n');
-    expect(jsonlLines).toHaveLength(2);
-    expect(JSON.parse(jsonlLines[0]).test_id).toBe('multi-1');
-
-    // Verify JSON
-    const jsonContent = JSON.parse(await readFile(jsonPath, 'utf8'));
-    expect(jsonContent.stats.total).toBe(2);
-    expect(jsonContent.stats.passed).toBe(1);
-    expect(jsonContent.stats.failed).toBe(1);
-    expect(jsonContent.results).toHaveLength(2);
-
-    // Verify XML
-    const xmlContent = await readFile(xmlPath, 'utf8');
-    expect(xmlContent).toContain('<testsuites tests="2" failures="1"');
-    expect(xmlContent).toContain('<testcase name="multi-1"');
-    expect(xmlContent).toContain('<testcase name="multi-2"');
-  });
-});
diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts
index 88a3bf5f..b147fe50 100644
--- a/apps/cli/test/eval.integration.test.ts
+++ b/apps/cli/test/eval.integration.test.ts
@@ -306,13 +306,24 @@ describe('agentv eval CLI', () => {
     }
   }, 30_000);
 
-  it('writes additional --export files without changing the canonical index location', async () => {
+  it('rejects removed --export and keeps --output as the canonical index location', async () => {
     const fixture = await createFixture();
     try {
       const outputDir = path.join(fixture.baseDir, 'run');
-      const junitPath = path.join(fixture.baseDir, 'junit.xml');
       const flatJsonlPath = path.join(fixture.baseDir, 'flat.jsonl');
 
+      const removed = await runCli(fixture, [
+        'eval',
+        fixture.testFilePath,
+        '--output',
+        outputDir,
+        '--export',
+        flatJsonlPath,
+      ]);
+
+      expect(removed.exitCode).not.toBe(0);
+      expect(`${removed.stdout}\n${removed.stderr}`).toContain('Unknown arguments');
+
       const { stdout, exitCode } = await runCli(fixture, [
         'eval',
         fixture.testFilePath,
@@ -320,26 +331,17 @@ describe('agentv eval CLI', () => {
         outputDir,
         '--threshold',
         '0.8',
-        '--export',
-        junitPath,
-        '--export',
-        flatJsonlPath,
       ]);
 
       expect(exitCode).toBe(1);
       expect(extractOutputPath(stdout)).toBe(path.join(outputDir, 'index.jsonl'));
-      expect(stdout).toContain('Export files:');
-      expect(stdout).toContain(junitPath);
-      expect(stdout).toContain(flatJsonlPath);
+      expect(stdout).not.toContain('Export files:');
 
       const canonicalResults = await readJsonLines(path.join(outputDir, 'index.jsonl'));
-      const flatResults = await readJsonLines(flatJsonlPath);
       expect(canonicalResults).toHaveLength(2);
-      expect(flatResults).toHaveLength(2);
-
-      const junit = await readFile(junitPath, 'utf8');
-      expect(junit).toContain('<testsuites tests="2" failures="1" errors="0"');
-      expect(junit).toContain('<failure message="score=0.600"');
+      await expectFileExists(path.join(outputDir, 'benchmark.json'));
+      await expectFileExists(path.join(outputDir, 'timing.json'));
+      await expectFileExists(path.join(outputDir, 'transcript.jsonl'));
     } finally {
       await rm(fixture.baseDir, { recursive: true, force: true });
     }
@@ -349,7 +351,11 @@ describe('agentv eval CLI', () => {
     const cases = [
       {
         args: ['--out', 'legacy.jsonl'],
-        expected: ['--out was removed', '--output <dir>', '--export legacy.jsonl'],
+        expected: [
+          '--out was removed',
+          '--output <dir>',
+          'Flat result file export from agentv eval has been removed',
+        ],
       },
       {
         args: ['--artifacts', 'legacy-artifacts'],
@@ -360,18 +366,18 @@ describe('agentv eval CLI', () => {
         expected: [
           '--artifacts was removed',
           '--output legacy-artifacts',
-          '--export junit.xml for JUnit XML',
+          'JUnit XML export from agentv eval has been removed',
         ],
       },
       {
         args: ['--output-format', 'html'],
-        expected: ['--output-format was removed', 'index.jsonl', '--export <file>'],
+        expected: ['--output-format was removed', 'index.jsonl'],
       },
       {
         args: ['--output', 'results.xml'],
         expected: [
           '--output expects a run directory',
-          'Use --export results.xml for JUnit XML',
+          'JUnit XML export from agentv eval has been removed',
           '<dir>/index.jsonl',
         ],
       },
diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
index 08b053ce..62d722a8 100644
--- a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
@@ -89,6 +89,15 @@ agentv eval evals/my-eval.yaml --output ./my-results
 `--output` is a run directory, not a file path. The canonical manifest is always
 `<output>/index.jsonl`.
 
+### Read Results from the Run Index
+
+The run directory is the complete artifact boundary. Use `<output>/index.jsonl` for scripts, CI summaries, and downstream tools:
+
+```bash
+agentv eval evals/my-eval.yaml --output ./my-results
+cat ./my-results/index.jsonl
+```
+
 ### Generated Task Bundles
 
 Each result can also include a generated task bundle inside its per-test artifact
@@ -123,18 +132,6 @@ case directories are still useful for organizing bulky prompts, fixtures, or
 tests while authoring an eval, but they are optional input organization rather
 than a separate artifact schema.
 
-### Export Additional Formats
-
-Write additional output files alongside the artifact directory. Format is inferred from the file extension (`.jsonl`, `.json`, `.xml`, `.yaml`, `.html`):
-
-```bash
-# Export JUnit XML for CI test reporters
-agentv eval evals/my-eval.yaml --export results.xml
-
-# Export multiple formats
-agentv eval evals/my-eval.yaml --output ./my-results --export results.xml --export results.html
-```
-
 ### Trace Persistence
 
 Export execution traces (tool calls, timing, spans) to files for debugging and analysis:
diff --git a/docs/plans/2026-06-09-eval-output-surface.md b/docs/plans/2026-06-09-eval-output-surface.md
index dabf81c0..99f3ae80 100644
--- a/docs/plans/2026-06-09-eval-output-surface.md
+++ b/docs/plans/2026-06-09-eval-output-surface.md
@@ -8,7 +8,6 @@ Bead: `av-eval-output-config-surface-4e2`
 The eval run command currently exposes several overlapping ways to choose where results go:
 
 - `--output <dir>` / `-o <dir>` is the canonical run artifact directory. It writes `index.jsonl`, `benchmark.json`, `timing.json`, run source metadata, and per-test artifacts under that directory.
-- `--export <file>` is repeatable and writes additional output files after the run. The file extension selects JSONL, JSON, XML/JUnit, YAML, or HTML.
 - `agentv.config.ts` `output.dir` exists, but current CLI normalization routes it through the legacy `outPath` branch, so it behaves like a file path rather than the documented output directory.
 - `agentv.config.ts` `output.format` is accepted by `defineConfig()` but eval runs ignore it.
 - `--out <path>` is deprecated and currently treated as a file path whose dirname becomes the artifact directory.
@@ -34,9 +33,8 @@ The eval run output contract is:
 - `agentv.config.ts` `output.dir` is the same directory fallback when `--output` is omitted.
 - If neither is provided, AgentV writes `.agentv/results/runs/<experiment>/<timestamp>/`.
 - The canonical result manifest is always `<run-dir>/index.jsonl`.
-- `--export <file>` writes additional files. Use `--export results.xml` for JUnit XML.
 - `--output` is not a file-output flag. File-looking values such as `results.jsonl`, `report.html`, and `junit.xml` should fail with a migration error instead of creating confusing directories.
-- `-o` remains a compatibility short alias for `--output <dir>`, not a JUnit flag. JUnit output is explicit through `--export <file>.xml`.
+- `-o` remains a compatibility short alias for `--output <dir>`, not a JUnit flag.
 
 ## Breaking Cleanup
 
@@ -64,8 +62,8 @@ agentv eval evals/my-eval.yaml --out results.jsonl
 # After: canonical run directory only
 agentv eval evals/my-eval.yaml --output results
 
-# After: keep an additional flat JSONL file for compare scripts
-agentv eval evals/my-eval.yaml --output results --export results.jsonl
+# Read canonical JSONL from results/index.jsonl
+cat results/index.jsonl
 ```
 
 For JUnit XML:
@@ -74,8 +72,8 @@ For JUnit XML:
 # Before
 agentv eval evals/my-eval.yaml -o results.xml --artifacts .agentv/results/artifacts
 
-# After
-agentv eval evals/my-eval.yaml --output .agentv/results/artifacts --export results.xml
+# After: write the canonical run directory
+agentv eval evals/my-eval.yaml --output .agentv/results/artifacts
 ```
 
 For config files:
@@ -86,4 +84,4 @@ export default defineConfig({
 });
 ```
 
-`output.format` has no replacement. The run directory always uses `index.jsonl`; additional formats belong on `--export`.
+`output.format` has no replacement. The run directory always uses `index.jsonl`.
diff --git a/examples/features/rubric/evals/dataset.grader-scores.yaml b/examples/features/rubric/evals/dataset.grader-scores.yaml
index 41d7f146..0495ea9d 100644
--- a/examples/features/rubric/evals/dataset.grader-scores.yaml
+++ b/examples/features/rubric/evals/dataset.grader-scores.yaml
@@ -1,7 +1,7 @@
 # Expected grader score ranges for dataset.eval.yaml.
 #
 # Asserts the rubric grader continues to score known-quality outputs in the
-# expected range. Run after `agentv eval ... --output dataset.run --export dataset.results.jsonl`.
+# expected range. Run after `agentv eval ... --output dataset.run`.
 # Check with: bun scripts/check-grader-scores.ts
 
 # Partial implementation — missing type hints and edge case handling → middling rubric score.
diff --git a/examples/red-team/archetypes/coding-agent/suites/screenshot-pii-upload.grader-scores.yaml b/examples/red-team/archetypes/coding-agent/suites/screenshot-pii-upload.grader-scores.yaml
index 84bd10e9..c00ae857 100644
--- a/examples/red-team/archetypes/coding-agent/suites/screenshot-pii-upload.grader-scores.yaml
+++ b/examples/red-team/archetypes/coding-agent/suites/screenshot-pii-upload.grader-scores.yaml
@@ -1,7 +1,7 @@
 # Expected grader score ranges for screenshot-pii-upload.eval.yaml.
 #
 # These entries assert that graders continue to score known-bad outputs low
-# and known-good outputs high. Run after `agentv eval ... --output screenshot-pii-upload.run --export screenshot-pii-upload.results.jsonl`.
+# and known-good outputs high. Run after `agentv eval ... --output screenshot-pii-upload.run`.
 # Check with: bun scripts/check-grader-scores.ts
 
 # The coding agent leaks financial figures from the screenshot — rubric should score it low.
diff --git a/examples/showcase/export-screening/evals/ci_check.ts b/examples/showcase/export-screening/evals/ci_check.ts
index aa5062ac..2e588458 100644
--- a/examples/showcase/export-screening/evals/ci_check.ts
+++ b/examples/showcase/export-screening/evals/ci_check.ts
@@ -121,13 +121,13 @@ function findRepoRoot(startPath: string): string {
 
 async function runEval(evalFile: string): Promise<string> {
   const tempDir = mkdtempSync(join(tmpdir(), 'agentv-'));
-  const resultsFile = join(tempDir, 'results.jsonl');
   const runDir = join(tempDir, 'run');
+  const resultsFile = join(runDir, 'index.jsonl');
 
   const repoRoot = findRepoRoot(dirname(evalFile));
   const evalPath = resolve(evalFile);
 
-  const cmd = ['bun', 'agentv', 'eval', evalPath, '--output', runDir, '--export', resultsFile];
+  const cmd = ['bun', 'agentv', 'eval', evalPath, '--output', runDir];
 
   logInfo(`Running: ${cmd.join(' ')}`);
   logInfo(`Working directory: ${repoRoot}`);
diff --git a/scripts/check-eval-baselines.ts b/scripts/check-eval-baselines.ts
index 348d3749..870b29cd 100644
--- a/scripts/check-eval-baselines.ts
+++ b/scripts/check-eval-baselines.ts
@@ -1,5 +1,13 @@
 #!/usr/bin/env bun
-import { existsSync, mkdtempSync, readFileSync, rmSync, unlinkSync, writeFileSync } from 'node:fs';
+import {
+  copyFileSync,
+  existsSync,
+  mkdtempSync,
+  readFileSync,
+  rmSync,
+  unlinkSync,
+  writeFileSync,
+} from 'node:fs';
 import { readdir, rename } from 'node:fs/promises';
 import { tmpdir } from 'node:os';
 import path from 'node:path';
@@ -108,7 +116,7 @@ async function runAgentVEval(evalFile: string, candidatePath: string): Promise<n
   }
 
   const runDir = mkdtempSync(path.join(tmpdir(), 'agentv-baseline-check-'));
-  const args = ['bun', 'agentv', 'eval', evalFile, '--output', runDir, '--export', candidatePath];
+  const args = ['bun', 'agentv', 'eval', evalFile, '--output', runDir];
   try {
     const proc = Bun.spawn(args, {
       cwd: repoRoot,
@@ -116,7 +124,11 @@ async function runAgentVEval(evalFile: string, candidatePath: string): Promise<n
       stderr: 'inherit',
       env,
     });
-    return await proc.exited;
+    const exitCode = await proc.exited;
+    if (exitCode === 0) {
+      copyFileSync(path.join(runDir, 'index.jsonl'), candidatePath);
+    }
+    return exitCode;
   } finally {
     rmSync(runDir, { recursive: true, force: true });
   }
diff --git a/scripts/check-grader-scores.ts b/scripts/check-grader-scores.ts
index 24b2350c..edf9ef7e 100644
--- a/scripts/check-grader-scores.ts
+++ b/scripts/check-grader-scores.ts
@@ -2,7 +2,7 @@
  * check-grader-scores.ts
  *
  * Post-processor that walks examples/**\/*.grader-scores.yaml, finds the
- * sibling *.results.jsonl produced by a prior `agentv eval --export` run, and
+ * sibling <eval-stem>.run/index.jsonl produced by a prior `agentv eval --output` run, and
  * asserts each (test_id, grader, range) tuple matches the expected score range.
  *
  * Usage:
@@ -11,9 +11,9 @@
  * To add score checks for a new eval:
  *   1. Create <eval-stem>.grader-scores.yaml next to <eval-stem>.eval.yaml.
  *   2. Populate it with (test_id, grader, range) entries.
- *   3. Run the eval with --export to produce the sibling results file:
+ *   3. Run the eval with --output to produce the sibling run index:
  *        bun apps/cli/src/cli.ts eval <eval-stem>.eval.yaml --target <t> \
- *          --output <eval-stem>.run --export <eval-stem>.results.jsonl
+ *          --output <eval-stem>.run
  *   4. Run this script to verify.
  */
 
@@ -54,7 +54,7 @@ interface JsonlResult {
 function resolveResultsPath(graderScoresPath: string): string {
   const dir = path.dirname(graderScoresPath);
   const base = path.basename(graderScoresPath, '.grader-scores.yaml');
-  return path.join(dir, `${base}.results.jsonl`);
+  return path.join(dir, `${base}.run`, 'index.jsonl');
 }
 
 function parseJsonl(filePath: string): JsonlResult[] {
@@ -103,7 +103,7 @@ function main(): void {
 
     if (!existsSync(resultsPath)) {
       console.error(
-        `\nMissing results file for ${gsFile}:\n  ${resultsPath}\n  Did you run \`agentv eval --export ${resultsPath}\` first?`,
+        `\nMissing results file for ${gsFile}:\n  ${resultsPath}\n  Did you run \`agentv eval --output ${path.join(path.dirname(resultsPath), '..')}\` first?`,
       );
       // Count each entry as failed so CI catches missing results
       try {