Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions apps/cli/src/commands/results/remote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@ async function loadNormalizedResultsConfig(
: (getProjectForPath(repoRoot) ?? getProjectForPath(cwd));
const projectResults = project?.results
? {
mode: project.results.mode,
repo: project.results.repo,
path: project.results.path,
auto_push: project.results.autoPush,
mode: 'github' as const,
repo: project.results.repository,
path: project.results.localPath,
auto_push: project.results.sync?.autoPush,
branch_prefix: project.results.branchPrefix,
}
: undefined;
Expand Down
42 changes: 18 additions & 24 deletions apps/cli/test/commands/results/serve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1272,10 +1272,9 @@ describe('serve app', () => {
name: 'Project No Publish',
path: projectDir,
results: {
mode: 'github',
repo: `file://${remoteDir}`,
path: missingCloneDir,
autoPush: true,
repository: `file://${remoteDir}`,
localPath: missingCloneDir,
sync: { autoPush: true },
},
addedAt: '2026-01-01T00:00:00.000Z',
lastOpenedAt: '2026-01-01T00:00:00.000Z',
Expand Down Expand Up @@ -1413,10 +1412,9 @@ describe('serve app', () => {
name: 'AgentV',
path: projectDir,
results: {
mode: 'github',
repo: 'EntityProcess/agentv-examples-eval-results',
path: '/home/entity/projects/EntityProcess/agentv-examples-eval-results',
autoPush: true,
repository: 'EntityProcess/agentv-examples-eval-results',
localPath: '/home/entity/projects/EntityProcess/agentv-examples-eval-results',
sync: { autoPush: true },
},
addedAt: '2026-01-01T00:00:00.000Z',
lastOpenedAt: '2026-01-01T00:00:00.000Z',
Expand Down Expand Up @@ -1466,10 +1464,9 @@ describe('serve app', () => {
name: 'Project Sync Pull',
path: projectDir,
results: {
mode: 'github',
repo: `file://${remoteDir}`,
path: cloneDir,
autoPush: false,
repository: `file://${remoteDir}`,
localPath: cloneDir,
sync: { autoPush: false },
},
addedAt: '2026-01-01T00:00:00.000Z',
lastOpenedAt: '2026-01-01T00:00:00.000Z',
Expand Down Expand Up @@ -1546,10 +1543,9 @@ describe('serve app', () => {
name: 'Project Sync Push',
path: projectDir,
results: {
mode: 'github',
repo: `file://${remoteDir}`,
path: cloneDir,
autoPush: true,
repository: `file://${remoteDir}`,
localPath: cloneDir,
sync: { autoPush: true },
},
addedAt: '2026-01-01T00:00:00.000Z',
lastOpenedAt: '2026-01-01T00:00:00.000Z',
Expand Down Expand Up @@ -1614,10 +1610,9 @@ describe('serve app', () => {
name: 'Project Sync Offline',
path: projectDir,
results: {
mode: 'github',
repo: `file://${remoteDir}`,
path: cloneDir,
autoPush: true,
repository: `file://${remoteDir}`,
localPath: cloneDir,
sync: { autoPush: true },
},
addedAt: '2026-01-01T00:00:00.000Z',
lastOpenedAt: '2026-01-01T00:00:00.000Z',
Expand Down Expand Up @@ -1674,10 +1669,9 @@ describe('serve app', () => {
name: 'Project Sync Conflict',
path: projectDir,
results: {
mode: 'github',
repo: `file://${remoteDir}`,
path: cloneDir,
autoPush: true,
repository: `file://${remoteDir}`,
localPath: cloneDir,
sync: { autoPush: true },
},
addedAt: '2026-01-01T00:00:00.000Z',
lastOpenedAt: '2026-01-01T00:00:00.000Z',
Expand Down
2 changes: 1 addition & 1 deletion apps/dashboard/src/routes/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ function RunsTabContent({
<p className="mt-2 text-sm text-gray-500">
Sync remote results or run an eval with{' '}
<code className="rounded bg-gray-800 px-2 py-1 text-cyan-400">
auto_push: true
sync.auto_push: true
</code>{' '}
in your config.
</p>
Expand Down
66 changes: 52 additions & 14 deletions apps/web/src/content/docs/docs/tools/dashboard.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -189,16 +189,15 @@ agentv dashboard --add /path/to/other-evals

Each path must contain a `.agentv/` directory. Registered projects are stored under `projects:` in `$AGENTV_HOME/config.yaml`, or `~/.agentv/config.yaml` when `AGENTV_HOME` is unset.

To register a remote repo and keep it synced automatically, add a `source` block to the entry in `$AGENTV_HOME/config.yaml`:
To register a remote repo and keep it synced automatically, add `repository` and `ref` to the entry in `$AGENTV_HOME/config.yaml`. `repository` uses GitHub's standard owner/name form and AgentV resolves it to `https://github.com/<owner>/<name>.git` for clone and pull operations:

```yaml
projects:
- id: my-evals
name: My Evals
repository: example/my-evals
path: /srv/agentv/my-evals
source:
url: https://github.com/example/my-evals
ref: main
ref: main
```

On each Dashboard startup, AgentV clones the repo if the path is empty (`git clone --depth 1`) or pulls the latest if a clone already exists (`git pull --ff-only`). You can also trigger a sync manually from the Dashboard UI's **Sync** button.
Expand Down Expand Up @@ -256,15 +255,17 @@ For a registered project, put results repo settings on that project's entry in `
projects:
- id: agentv
name: AgentV
repository: EntityProcess/agentv
path: /home/entity/projects/EntityProcess/agentv
ref: main
results:
mode: github
repo: EntityProcess/agentv-examples-eval-results
path: /home/entity/projects/EntityProcess/agentv-examples-eval-results
auto_push: true
repository: EntityProcess/agentv-examples-eval-results
local_path: /home/entity/projects/EntityProcess/agentv-examples-eval-results
sync:
auto_push: true
```

`results.path` is the filesystem location of the local clone AgentV manages for the results repo. It is **not** a subdirectory inside the remote repo.
`results.local_path` is the filesystem location of the local clone AgentV manages for the results repo. It is **not** a subdirectory inside the remote repo. `results.repository` uses GitHub owner/name form and resolves to `https://github.com/<owner>/<name>.git` for clone and push operations.

You can also set a top-level global fallback in the same file. This is used when the current project is not registered or its registry entry has no `results` block:

Expand All @@ -278,10 +279,47 @@ results:

Project-local `.agentv/config.yaml` is for portable eval defaults such as `execution`, `eval_patterns`, and `dashboard`. Do not put `projects` in project-local config; AgentV warns and ignores it there. `results_by_project` is deprecated; use `projects[].results` in `$AGENTV_HOME/config.yaml`.

The `source` block and the `results` block sync different repositories:
The project `repository` and the `results` block sync different repositories:

- `projects[].repository` is the eval source project. Dashboard startup clones or fast-forwards the project checkout so eval YAML, scripts, and project-local `.agentv/config.yaml` stay current.
- `projects[].results.repository` is the git-backed results store. **Sync Project** fetches, fast-forwards, and, when configured, pushes run artifacts and mutable metadata in that results repo clone.

#### Migration from the legacy project schema

Before:

```yaml
projects:
- id: agentv
name: AgentV
path: /home/entity/projects/EntityProcess/agentv
source:
url: https://github.com/EntityProcess/agentv
ref: main
results:
mode: github
repo: EntityProcess/agentv-eval-results
path: /home/entity/projects/EntityProcess/agentv-eval-results
auto_push: true
```

After:

```yaml
projects:
- id: agentv
name: AgentV
repository: EntityProcess/agentv
path: /home/entity/projects/EntityProcess/agentv
ref: main
results:
repository: EntityProcess/agentv-eval-results
local_path: /home/entity/projects/EntityProcess/agentv-eval-results
sync:
auto_push: true
```

- `projects[].source` is the eval source project. Dashboard startup clones or fast-forwards the project checkout so eval YAML, scripts, and project-local `.agentv/config.yaml` stay current.
- `projects[].results` is the git-backed results store. **Sync Project** fetches, fast-forwards, and, when configured, pushes run artifacts and mutable metadata in that results repo clone.
Legacy project fields (`source`, `results.mode`, `results.repo`, `results.path`, and `results.auto_push`) fail validation with migration guidance.

Use project-level **Sync Project** as the results exchange workflow. It handles pulled remote runs, locally edited metadata, dirty state, and blocked conflict feedback in one project-scoped action.

Expand Down Expand Up @@ -327,8 +365,8 @@ After sync, newly fetched remote runs appear in the list with a **remote** sourc
**Sync Project** fetches the results repo and only changes the clone when Git says it is safe:

- A clean clone that is behind the remote is fast-forwarded.
- Safe uncommitted changes under `.agentv/results/**`, such as remote tag metadata overlays, are committed and pushed when `auto_push: true`.
- A local results repo that is ahead is pushed when `auto_push: true` and the committed paths are all under `.agentv/results/**`.
- Safe uncommitted changes under `.agentv/results/**`, such as remote tag metadata overlays, are committed and pushed when `sync.auto_push: true`.
- A local results repo that is ahead is pushed when `sync.auto_push: true` and the committed paths are all under `.agentv/results/**`.
- Dirty non-results files, dirty metadata plus remote changes, diverged history, unresolved conflicts, missing upstream branches, non-results commits ahead, and rejected pushes are blocked instead of reset.

When sync is blocked, Dashboard keeps the local clone intact and shows the `block_reason`, `dirty_paths` or `conflicted_paths`, `git_status`, and a compact `git_diff_summary` so you can resolve the results repo manually before syncing again.
154 changes: 153 additions & 1 deletion packages/core/src/evaluation/validation/config-validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,30 @@ function validateProjects(errors: ValidationError[], filePath: string, projects:
validateRequiredString(errors, filePath, projectRecord.id, `${location}.id`);
validateRequiredString(errors, filePath, projectRecord.name, `${location}.name`);
validateRequiredString(errors, filePath, projectRecord.path, `${location}.path`);
validateResultsConfig(errors, filePath, projectRecord.results, `${location}.results`);

if (projectRecord.source !== undefined) {
errors.push({
severity: 'error',
filePath,
location: `${location}.source`,
message: `Field '${location}.source' was removed. Move 'source.url' to '${location}.repository' as a GitHub owner/name value (for example, 'example/repo') and move 'source.ref' to '${location}.ref'.`,
});
}

if (projectRecord.repository !== undefined) {
validateGitHubRepository(
errors,
filePath,
projectRecord.repository,
`${location}.repository`,
);
}

if (projectRecord.ref !== undefined) {
validateRequiredString(errors, filePath, projectRecord.ref, `${location}.ref`);
}

validateProjectResultsConfig(errors, filePath, projectRecord.results, `${location}.results`);
});
}

Expand All @@ -189,6 +212,135 @@ function validateRequiredString(
}
}

function validateGitHubRepository(
errors: ValidationError[],
filePath: string,
value: unknown,
location: string,
): void {
if (typeof value !== 'string' || value.trim().length === 0) {
errors.push({
severity: 'error',
filePath,
location,
message: `Field '${location}' must be a non-empty GitHub owner/name repository (e.g., EntityProcess/agentv)`,
});
return;
}

const repository = value.trim();
if (!/^[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+$/.test(repository)) {
errors.push({
severity: 'error',
filePath,
location,
message: `Field '${location}' must use GitHub owner/name format (e.g., EntityProcess/agentv), not a URL. It resolves to https://github.com/<owner>/<name>.git for git operations.`,
});
}
}

function validateProjectResultsConfig(
errors: ValidationError[],
filePath: string,
rawResults: unknown,
location: string,
): void {
if (rawResults === undefined) {
return;
}

if (typeof rawResults !== 'object' || rawResults === null || Array.isArray(rawResults)) {
errors.push({
severity: 'error',
filePath,
location,
message: `Field '${location}' must be an object`,
});
return;
}

const resultsRecord = rawResults as Record<string, unknown>;

const removedFields: Record<string, string> = {
mode: `Remove '${location}.mode'; project results are GitHub-backed by '${location}.repository'.`,
repo: `Field '${location}.repo' was removed. Use '${location}.repository' with GitHub owner/name format instead.`,
path: `Field '${location}.path' was removed. Use '${location}.local_path' for the local clone path instead.`,
auto_push: `Field '${location}.auto_push' was removed. Use '${location}.sync.auto_push' instead.`,
};

for (const [field, message] of Object.entries(removedFields)) {
if (resultsRecord[field] !== undefined) {
errors.push({
severity: 'error',
filePath,
location: `${location}.${field}`,
message,
});
}
}

validateGitHubRepository(errors, filePath, resultsRecord.repository, `${location}.repository`);

if (resultsRecord.local_path !== undefined) {
if (
typeof resultsRecord.local_path !== 'string' ||
resultsRecord.local_path.trim().length === 0
) {
errors.push({
severity: 'error',
filePath,
location: `${location}.local_path`,
message: `Field '${location}.local_path' must be a non-empty string`,
});
} else if (!isFilesystemPath(resultsRecord.local_path.trim())) {
errors.push({
severity: 'error',
filePath,
location: `${location}.local_path`,
message: `'${location}.local_path' must be an absolute or home-relative filesystem path (e.g., ~/data/agentv-results).`,
});
}
}

if (resultsRecord.sync !== undefined) {
if (
typeof resultsRecord.sync !== 'object' ||
resultsRecord.sync === null ||
Array.isArray(resultsRecord.sync)
) {
errors.push({
severity: 'error',
filePath,
location: `${location}.sync`,
message: `Field '${location}.sync' must be an object`,
});
} else {
const syncRecord = resultsRecord.sync as Record<string, unknown>;
if (syncRecord.auto_push !== undefined && typeof syncRecord.auto_push !== 'boolean') {
errors.push({
severity: 'error',
filePath,
location: `${location}.sync.auto_push`,
message: `Field '${location}.sync.auto_push' must be a boolean`,
});
}
}
}

if (
resultsRecord.branch_prefix !== undefined &&
(typeof resultsRecord.branch_prefix !== 'string' ||
resultsRecord.branch_prefix.trim().length === 0)
) {
errors.push({
severity: 'error',
filePath,
location: `${location}.branch_prefix`,
message: `Field '${location}.branch_prefix' must be a non-empty string`,
});
}
}

function validateResultsConfig(
errors: ValidationError[],
filePath: string,
Expand Down
3 changes: 1 addition & 2 deletions packages/core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ export {
} from './paths.js';
export {
type ProjectEntry,
type ProjectSource,
type ProjectRegistry,
loadProjectRegistry,
saveProjectRegistry,
Expand All @@ -110,7 +109,7 @@ export {
deriveProjectId,
getProjectsRegistryPath,
} from './projects.js';
export { syncProject, syncProjects } from './project-sync.js';
export { syncProject, syncProjects, resolveGitHubRepositoryUrl } from './project-sync.js';
export { trimBaselineResult } from './evaluation/baseline.js';
export { DEFAULT_CATEGORY, deriveCategory } from './evaluation/category.js';
export * from './observability/index.js';
Expand Down
Loading
Loading