Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 51 additions & 36 deletions packages/core/src/evaluation/results-repo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,8 @@ async function inspectResultsRepoGit(repoDir: string): Promise<ResultsRepoGitIns
cwd: repoDir,
check: false,
});
const { dirtyPaths, conflictedPaths } = parseGitPorcelainPaths(porcelain);
const { dirtyPaths: allDirtyPaths, conflictedPaths } = parseGitPorcelainPaths(porcelain);
const dirtyPaths = allDirtyPaths.filter(isSafeResultsRepoPath);
const { ahead = 0, behind = 0 } = await getAheadBehind(repoDir, upstream);
const inProgressConflict = await hasInProgressGitConflict(repoDir);

Expand Down Expand Up @@ -518,9 +519,6 @@ function lastErrorForGitInspection(
if (status.auto_push === false) {
return 'Results repo has uncommitted changes and auto_push is disabled';
}
if (!areSafeResultsRepoPaths(inspection.dirtyPaths)) {
return 'Results repo has non-results working tree changes';
}
}

return undefined;
Expand Down Expand Up @@ -562,13 +560,12 @@ function withActionFlags(
};
}

function isSafeResultsRepoPath(p: string): boolean {
return p === RESULTS_REPO_RESULTS_DIR || p.startsWith(`${RESULTS_REPO_RESULTS_DIR}/`);
}

function areSafeResultsRepoPaths(paths: readonly string[]): boolean {
return (
paths.length > 0 &&
paths.every(
(p) => p === RESULTS_REPO_RESULTS_DIR || p.startsWith(`${RESULTS_REPO_RESULTS_DIR}/`),
)
);
return paths.length > 0 && paths.every(isSafeResultsRepoPath);
}

async function getAheadPaths(
Expand Down Expand Up @@ -707,35 +704,53 @@ export async function syncResultsRepoForProject(config: ResultsConfig): Promise<
);
}

if (!areSafeResultsRepoPaths(inspection.dirtyPaths)) {
const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
updateStatusFile(normalized, {
last_error: 'Results repo has non-results working tree changes',
});
return withBlockedStatus(status, 'Results repo has non-results working tree changes', {
pullPerformed,
pushPerformed,
commitCreated,
});
}

if ((inspection.behind ?? 0) > 0) {
const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
const reason = 'Results repo has uncommitted result changes and remote changes';
updateStatusFile(normalized, { last_error: reason });
return withBlockedStatus(status, reason, {
pullPerformed,
pushPerformed,
commitCreated,
});
if (!inspection.upstream) {
const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
updateStatusFile(normalized, {
last_error: 'Results repo has no upstream branch to pull from',
});
return withBlockedStatus(status, 'Results repo has no upstream branch to pull from', {
pullPerformed,
pushPerformed,
commitCreated,
});
}

try {
await runGit(['merge', '--ff-only', inspection.upstream], { cwd: repoDir });
pullPerformed = true;
inspection = await inspectResultsRepoGit(repoDir);
} catch (error) {
inspection = await inspectResultsRepoGit(repoDir);
const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
const reason = `Results repo could not be fast-forwarded: ${getStatusMessage(error)}`;
updateStatusFile(normalized, { last_error: reason });
return withBlockedStatus(status, reason, {
pullPerformed,
pushPerformed,
commitCreated,
});
}
}

await runGit(['add', '--all', '--', RESULTS_REPO_RESULTS_DIR], { cwd: repoDir });
await runGit(['commit', '-m', 'chore(results): sync local result metadata'], {
cwd: repoDir,
});
commitCreated = true;
inspection = await inspectResultsRepoGit(repoDir);
if (inspection.syncStatus === 'dirty') {
await runGit(['add', '--all', '--', RESULTS_REPO_RESULTS_DIR], { cwd: repoDir });
await runGit(
[
'commit',
'-m',
'chore(results): sync local result metadata',
'--',
RESULTS_REPO_RESULTS_DIR,
],
{
cwd: repoDir,
},
);
commitCreated = true;
inspection = await inspectResultsRepoGit(repoDir);
}
}

if (inspection.syncStatus === 'diverged') {
Expand Down
158 changes: 153 additions & 5 deletions packages/core/test/evaluation/results-repo.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -519,24 +519,172 @@ describe('results repo write path', () => {
);
}, 20000);

it('blocks dirty non-results changes with git summaries instead of resetting', async () => {
it('ignores dirty non-results files when reporting project sync status', async () => {
const { remoteDir } = initializeRemoteRepo(rootDir);
const cloneDir = path.join(rootDir, 'results-clone');
const config = createResultsConfig(remoteDir, cloneDir);

await ensureResultsRepoClone(config);
writeFileSync(path.join(cloneDir, 'NOTES.md'), 'do not auto-push me\n');

await expect(getResultsRepoSyncStatus(config)).resolves.toMatchObject({
sync_status: 'clean',
dirty_paths: [],
last_error: undefined,
});

const status = await syncResultsRepoForProject(config);

expect(status.sync_status).toBe('dirty');
expect(status.blocked).toBe(true);
expect(status.block_reason).toContain('non-results');
expect(status.dirty_paths).toEqual(['NOTES.md']);
expect(status.sync_status).toBe('clean');
expect(status.blocked).toBe(false);
expect(status.dirty_paths).toEqual([]);
expect(status.git_status).toContain('NOTES.md');
expect(readFileSync(path.join(cloneDir, 'NOTES.md'), 'utf8')).toBe('do not auto-push me\n');
}, 20000);

it('commits and pushes dirty result artifacts while leaving unrelated files untracked', async () => {
const { remoteDir } = initializeRemoteRepo(rootDir);
const cloneDir = path.join(rootDir, 'results-clone');
const config = createResultsConfig(remoteDir, cloneDir);

await ensureResultsRepoClone(config);
git('git config user.email "test@example.com"', cloneDir);
git('git config user.name "Test User"', cloneDir);
writeFileSync(path.join(cloneDir, 'package.json'), '{"dependencies":{"agentv":"next"}}\n');

const runTimestamp = '2026-05-24T11-00-00-000Z';
const runDir = path.join(cloneDir, '.agentv', 'results', 'runs', 'safe-run', runTimestamp);
writeRunArtifacts(runDir, 'safe-run', '2026-05-24T11:00:00.000Z');

const status = await syncResultsRepoForProject(config);

expect(status).toMatchObject({
sync_status: 'clean',
commit_created: true,
push_performed: true,
blocked: false,
});
expect(status.dirty_paths).toEqual([]);
expect(git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, rootDir)).toContain(
`.agentv/results/runs/safe-run/${runTimestamp}/benchmark.json`,
);
expect(git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, rootDir)).not.toContain(
'package.json',
);
expect(readFileSync(path.join(cloneDir, 'package.json'), 'utf8')).toBe(
'{"dependencies":{"agentv":"next"}}\n',
);
}, 20000);

it('does not commit unrelated files that were already staged before sync', async () => {
const { remoteDir } = initializeRemoteRepo(rootDir);
const cloneDir = path.join(rootDir, 'results-clone');
const config = createResultsConfig(remoteDir, cloneDir);

await ensureResultsRepoClone(config);
git('git config user.email "test@example.com"', cloneDir);
git('git config user.name "Test User"', cloneDir);
writeFileSync(path.join(cloneDir, 'package.json'), '{"dependencies":{"agentv":"next"}}\n');
git('git add package.json', cloneDir);

const runTimestamp = '2026-05-24T11-30-00-000Z';
const runDir = path.join(
cloneDir,
'.agentv',
'results',
'runs',
'staged-unrelated',
runTimestamp,
);
writeRunArtifacts(runDir, 'staged-unrelated', '2026-05-24T11:30:00.000Z');

const status = await syncResultsRepoForProject(config);

expect(status).toMatchObject({
sync_status: 'clean',
commit_created: true,
push_performed: true,
blocked: false,
});
const remoteFiles = git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, rootDir);
expect(remoteFiles).toContain(
`.agentv/results/runs/staged-unrelated/${runTimestamp}/benchmark.json`,
);
expect(remoteFiles).not.toContain('package.json');
expect(git('git status --porcelain', cloneDir)).toContain('A package.json');
}, 20000);

it('fast-forwards remote updates even when unrelated local files are dirty', async () => {
const { remoteDir, seedDir } = initializeRemoteRepo(rootDir);
const cloneDir = path.join(rootDir, 'results-clone');
const config = createResultsConfig(remoteDir, cloneDir);

await ensureResultsRepoClone(config);
writeFileSync(path.join(cloneDir, 'package.json'), '{"dependencies":{"agentv":"next"}}\n');
writeFileSync(path.join(seedDir, 'REMOTE.md'), 'remote update\n');
git('git add REMOTE.md && git commit --quiet -m "remote update"', seedDir);
git('git push --quiet origin main', seedDir);

const status = await syncResultsRepoForProject(config);

expect(status).toMatchObject({
sync_status: 'clean',
pull_performed: true,
push_performed: false,
commit_created: false,
blocked: false,
});
expect(readFileSync(path.join(cloneDir, 'REMOTE.md'), 'utf8')).toBe('remote update\n');
expect(readFileSync(path.join(cloneDir, 'package.json'), 'utf8')).toBe(
'{"dependencies":{"agentv":"next"}}\n',
);
}, 20000);

it('pulls remote updates before pushing local result artifacts with unrelated dirty files', async () => {
const { remoteDir, seedDir } = initializeRemoteRepo(rootDir);
const cloneDir = path.join(rootDir, 'results-clone');
const config = createResultsConfig(remoteDir, cloneDir);

await ensureResultsRepoClone(config);
git('git config user.email "test@example.com"', cloneDir);
git('git config user.name "Test User"', cloneDir);
writeFileSync(path.join(cloneDir, 'package.json'), '{"dependencies":{"agentv":"next"}}\n');

writeFileSync(path.join(seedDir, 'REMOTE.md'), 'remote update\n');
git('git add REMOTE.md && git commit --quiet -m "remote update"', seedDir);
git('git push --quiet origin main', seedDir);

const runTimestamp = '2026-05-24T12-00-00-000Z';
const runDir = path.join(
cloneDir,
'.agentv',
'results',
'runs',
'pulled-then-pushed',
runTimestamp,
);
writeRunArtifacts(runDir, 'pulled-then-pushed', '2026-05-24T12:00:00.000Z');

const status = await syncResultsRepoForProject(config);

expect(status).toMatchObject({
sync_status: 'clean',
pull_performed: true,
push_performed: true,
commit_created: true,
blocked: false,
});
const remoteFiles = git(`git --git-dir "${remoteDir}" ls-tree -r --name-only main`, rootDir);
expect(remoteFiles).toContain('REMOTE.md');
expect(remoteFiles).toContain(
`.agentv/results/runs/pulled-then-pushed/${runTimestamp}/benchmark.json`,
);
expect(remoteFiles).not.toContain('package.json');
expect(readFileSync(path.join(cloneDir, 'package.json'), 'utf8')).toBe(
'{"dependencies":{"agentv":"next"}}\n',
);
}, 20000);

it('blocks diverged committed histories with diff summary', async () => {
const { remoteDir, seedDir } = initializeRemoteRepo(rootDir);
const cloneDir = path.join(rootDir, 'results-clone');
Expand Down
Loading