From 862012133b56e680d4188450fb455e4e6e471c5a Mon Sep 17 00:00:00 2001
From: Ash Wu <hsatac@gmail.com>
Date: Thu, 12 Mar 2026 16:37:57 +0800
Subject: [PATCH] [build-tools] Store per-attempt results for Maestro test
 retries

Signed-off-by: Ash Wu <hsatac@gmail.com>
---
 .../__tests__/internalMaestroTest.test.ts     |  28 ++
 .../__tests__/maestroResultParser.test.ts     | 211 +++++++++++++++
 .../reportMaestroTestResults.test.ts          |  39 +++
 .../steps/functions/internalMaestroTest.ts    |  30 +--
 .../steps/functions/maestroResultParser.ts    | 248 +++++++++++-------
 .../functions/reportMaestroTestResults.ts     |  21 +-
 6 files changed, 460 insertions(+), 117 deletions(-)
diff --git a/packages/build-tools/src/steps/functions/__tests__/internalMaestroTest.test.ts b/packages/build-tools/src/steps/functions/__tests__/internalMaestroTest.test.ts
index a283978f76..1413d97d36 100644
--- a/packages/build-tools/src/steps/functions/__tests__/internalMaestroTest.test.ts
+++ b/packages/build-tools/src/steps/functions/__tests__/internalMaestroTest.test.ts
@@ -177,6 +177,34 @@ describe(createInternalEasMaestroTestFunction, () => {
     );
   });
 
+  it('generates per-attempt report filenames when retrying', async () => {
+    // Mock sequence:
+    // 1. xcrun simctl shutdown → succeeds (beforeEach default is fine)
+    // 2. maestro test attempt 0 → fails (triggers retry)
+    // 3. maestro test attempt 1 → succeeds (beforeEach default)
+    mockedSpawnAsync
+      .mockResolvedValueOnce(undefined as any) // xcrun simctl shutdown
+      .mockRejectedValueOnce(new Error('Maestro test failed')); // maestro attempt 0
+    // Attempt 1 falls through to beforeEach's mockResolvedValue default
+
+    const step = createStep({
+      callInputs: { output_format: 'junit', retries: 2 },
+    });
+    await step.executeAsync();
+
+    // Filter to just the maestro test command calls (skip xcrun simctl shutdown)
+    const maestroCalls = mockedSpawnAsync.mock.calls.filter(([cmd]) => cmd === 'maestro');
+    expect(maestroCalls).toHaveLength(2);
+
+    // Extract --output paths from maestro calls
+    const outputArgs = maestroCalls.map(([, args]) => args[args.indexOf('--output') + 1]);
+
+    expect(outputArgs[0]).toContain('attempt-0');
+    expect(outputArgs[1]).toContain('attempt-1');
+    // Filenames must be unique (not overwriting)
+    expect(outputArgs[0]).not.toBe(outputArgs[1]);
+  });
+
   it('fails Android flow when clone startup exhausts all attempts', async () => {
     mockedAndroidUtils.startAsync
       .mockResolvedValueOnce({
diff --git a/packages/build-tools/src/steps/functions/__tests__/maestroResultParser.test.ts b/packages/build-tools/src/steps/functions/__tests__/maestroResultParser.test.ts
index d788307720..64b9e075ee 100644
--- a/packages/build-tools/src/steps/functions/__tests__/maestroResultParser.test.ts
+++ b/packages/build-tools/src/steps/functions/__tests__/maestroResultParser.test.ts
@@ -217,6 +217,217 @@ describe(parseMaestroResults, () => {
     ]);
   });
 
+  it('returns per-attempt results when multiple JUnit files exist for the same flow', async () => {
+    vol.fromJSON({
+      // Attempt 0: login FAILED
+      '/junit/junit-report-flow-1-attempt-0.xml': [
+        '<?xml version="1.0"?>',
+        '<testsuites>',
+        '  <testsuite name="Test Suite" tests="1" failures="1">',
+        '    <testcase id="login" name="login" classname="login" time="5.0" status="ERROR">',
+        '      <failure>Timeout</failure>',
+        '    </testcase>',
+        '  </testsuite>',
+        '</testsuites>',
+      ].join('\n'),
+      // Attempt 1: login PASSED
+      '/junit/junit-report-flow-1-attempt-1.xml': [
+        '<?xml version="1.0"?>',
+        '<testsuites>',
+        '  <testsuite name="Test Suite" tests="1" failures="0">',
+        '    <testcase id="login" name="login" classname="login" time="3.0" status="SUCCESS"/>',
+        '  </testsuite>',
+        '</testsuites>',
+      ].join('\n'),
+      // ai-*.json metadata (2 timestamp dirs = 2 attempts)
+      '/tests/2026-01-28_055409/ai-login.json': JSON.stringify({
+        flow_name: 'login',
+        flow_file_path: '/root/project/.maestro/login.yml',
+      }),
+      '/tests/2026-01-28_055420/ai-login.json': JSON.stringify({
+        flow_name: 'login',
+        flow_file_path: '/root/project/.maestro/login.yml',
+      }),
+    });
+
+    const results = await parseMaestroResults('/junit', '/tests', '/root/project');
+
+    // Should return 2 results — one per attempt
+    expect(results).toHaveLength(2);
+    expect(results).toEqual([
+      expect.objectContaining({
+        name: 'login',
+        path: '.maestro/login.yml',
+        status: 'failed',
+        errorMessage: 'Timeout',
+        duration: 5000,
+        retryCount: 0,
+      }),
+      expect.objectContaining({
+        name: 'login',
+        path: '.maestro/login.yml',
+        status: 'passed',
+        errorMessage: null,
+        duration: 3000,
+        retryCount: 1,
+      }),
+    ]);
+  });
+
+  it('returns per-attempt results for reuse_devices=true (all flows in every attempt)', async () => {
+    vol.fromJSON({
+      // Attempt 0: home FAILED, login PASSED
+      '/junit-reports/android-maestro-junit-attempt-0.xml': [
+        '<?xml version="1.0"?>',
+        '<testsuites>',
+        '  <testsuite name="Test Suite" tests="2" failures="1">',
+        '    <testcase id="home" name="home" classname="home" time="5.0" status="ERROR">',
+        '      <failure>Timeout</failure>',
+        '    </testcase>',
+        '    <testcase id="login" name="login" classname="login" time="3.0" status="SUCCESS"/>',
+        '  </testsuite>',
+        '</testsuites>',
+      ].join('\n'),
+      // Attempt 1: both PASSED
+      '/junit-reports/android-maestro-junit-attempt-1.xml': [
+        '<?xml version="1.0"?>',
+        '<testsuites>',
+        '  <testsuite name="Test Suite" tests="2" failures="0">',
+        '    <testcase id="home" name="home" classname="home" time="4.0" status="SUCCESS"/>',
+        '    <testcase id="login" name="login" classname="login" time="2.0" status="SUCCESS"/>',
+        '  </testsuite>',
+        '</testsuites>',
+      ].join('\n'),
+      '/tests/2026-01-28_055409/ai-home.json': JSON.stringify({
+        flow_name: 'home',
+        flow_file_path: '/root/project/.maestro/home.yml',
+      }),
+      '/tests/2026-01-28_055409/ai-login.json': JSON.stringify({
+        flow_name: 'login',
+        flow_file_path: '/root/project/.maestro/login.yml',
+      }),
+      '/tests/2026-01-28_055420/ai-home.json': JSON.stringify({
+        flow_name: 'home',
+        flow_file_path: '/root/project/.maestro/home.yml',
+      }),
+      '/tests/2026-01-28_055420/ai-login.json': JSON.stringify({
+        flow_name: 'login',
+        flow_file_path: '/root/project/.maestro/login.yml',
+      }),
+    });
+
+    const results = await parseMaestroResults('/junit-reports', '/tests', '/root/project');
+
+    // 4 results: 2 flows × 2 attempts
+    expect(results).toHaveLength(4);
+    expect(results).toEqual([
+      expect.objectContaining({ name: 'home', status: 'failed', retryCount: 0 }),
+      expect.objectContaining({ name: 'home', status: 'passed', retryCount: 1 }),
+      expect.objectContaining({ name: 'login', status: 'passed', retryCount: 0 }),
+      expect.objectContaining({ name: 'login', status: 'passed', retryCount: 1 }),
+    ]);
+  });
+
+  it('returns per-attempt results with sharding (multiple testsuites per attempt file)', async () => {
+    vol.fromJSON({
+      // Attempt 0: shard 1 has home (FAILED), shard 2 has login (PASSED)
+      '/junit-reports/android-maestro-junit-attempt-0.xml': [
+        '<?xml version="1.0"?>',
+        '<testsuites>',
+        '  <testsuite name="Test Suite" device="emulator-5554" tests="1" failures="1">',
+        '    <testcase id="home" name="home" classname="home" time="5.0" status="ERROR">',
+        '      <failure>Timeout</failure>',
+        '    </testcase>',
+        '  </testsuite>',
+        '  <testsuite name="Test Suite" device="emulator-5556" tests="1" failures="0">',
+        '    <testcase id="login" name="login" classname="login" time="3.0" status="SUCCESS"/>',
+        '  </testsuite>',
+        '</testsuites>',
+      ].join('\n'),
+      // Attempt 1: all passed across shards
+      '/junit-reports/android-maestro-junit-attempt-1.xml': [
+        '<?xml version="1.0"?>',
+        '<testsuites>',
+        '  <testsuite name="Test Suite" device="emulator-5554" tests="1" failures="0">',
+        '    <testcase id="home" name="home" classname="home" time="4.0" status="SUCCESS"/>',
+        '  </testsuite>',
+        '  <testsuite name="Test Suite" device="emulator-5556" tests="1" failures="0">',
+        '    <testcase id="login" name="login" classname="login" time="2.0" status="SUCCESS"/>',
+        '  </testsuite>',
+        '</testsuites>',
+      ].join('\n'),
+      '/tests/2026-01-28_055409/ai-home.json': JSON.stringify({
+        flow_name: 'home',
+        flow_file_path: '/root/project/.maestro/home.yml',
+      }),
+      '/tests/2026-01-28_055409/ai-login.json': JSON.stringify({
+        flow_name: 'login',
+        flow_file_path: '/root/project/.maestro/login.yml',
+      }),
+      '/tests/2026-01-28_055420/ai-home.json': JSON.stringify({
+        flow_name: 'home',
+        flow_file_path: '/root/project/.maestro/home.yml',
+      }),
+      '/tests/2026-01-28_055420/ai-login.json': JSON.stringify({
+        flow_name: 'login',
+        flow_file_path: '/root/project/.maestro/login.yml',
+      }),
+    });
+
+    const results = await parseMaestroResults('/junit-reports', '/tests', '/root/project');
+
+    expect(results).toHaveLength(4);
+    expect(results).toEqual([
+      expect.objectContaining({ name: 'home', status: 'failed', retryCount: 0 }),
+      expect.objectContaining({ name: 'home', status: 'passed', retryCount: 1 }),
+      expect.objectContaining({ name: 'login', status: 'passed', retryCount: 0 }),
+      expect.objectContaining({ name: 'login', status: 'passed', retryCount: 1 }),
+    ]);
+  });
+
+  it('backward compat: reuse_devices=true with retries but old single JUnit file', async () => {
+    vol.fromJSON({
+      // Single overwritten JUnit (only has final attempt's results)
+      '/maestro-tests/android-maestro-junit.xml': [
+        '<?xml version="1.0"?>',
+        '<testsuites>',
+        '  <testsuite name="Test Suite" tests="2" failures="0">',
+        '    <testcase id="home" name="home" classname="home" time="4.0" status="SUCCESS"/>',
+        '    <testcase id="login" name="login" classname="login" time="2.0" status="SUCCESS"/>',
+        '  </testsuite>',
+        '</testsuites>',
+      ].join('\n'),
+      // 2 timestamp dirs — both flows appear in both (entire suite retried)
+      '/maestro-tests/2026-01-28_055409/ai-home.json': JSON.stringify({
+        flow_name: 'home',
+        flow_file_path: '/root/project/.maestro/home.yml',
+      }),
+      '/maestro-tests/2026-01-28_055409/ai-login.json': JSON.stringify({
+        flow_name: 'login',
+        flow_file_path: '/root/project/.maestro/login.yml',
+      }),
+      '/maestro-tests/2026-01-28_055420/ai-home.json': JSON.stringify({
+        flow_name: 'home',
+        flow_file_path: '/root/project/.maestro/home.yml',
+      }),
+      '/maestro-tests/2026-01-28_055420/ai-login.json': JSON.stringify({
+        flow_name: 'login',
+        flow_file_path: '/root/project/.maestro/login.yml',
+      }),
+    });
+
+    const results = await parseMaestroResults('/maestro-tests', '/maestro-tests', '/root/project');
+
+    // Both flows have 2 occurrences → retryCount = 1 for both
+    expect(results).toHaveLength(2);
+    expect(results).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({ name: 'home', status: 'passed', retryCount: 1 }),
+        expect.objectContaining({ name: 'login', status: 'passed', retryCount: 1 }),
+      ])
+    );
+  });
+
   it('handles reuse_devices=false (separate junit_report_directory)', async () => {
     vol.fromJSON({
       // JUnit in temp dir (per-flow files)
diff --git a/packages/build-tools/src/steps/functions/__tests__/reportMaestroTestResults.test.ts b/packages/build-tools/src/steps/functions/__tests__/reportMaestroTestResults.test.ts
index d4d74ab4dd..b46d05f199 100644
--- a/packages/build-tools/src/steps/functions/__tests__/reportMaestroTestResults.test.ts
+++ b/packages/build-tools/src/steps/functions/__tests__/reportMaestroTestResults.test.ts
@@ -238,6 +238,45 @@ describe(createReportMaestroTestResultsFunction, () => {
     expect(mockGraphqlClient.mutation).not.toHaveBeenCalled();
   });
 
+  it('sends per-attempt results with same name but different retryCount', async () => {
+    vol.fromJSON({
+      // Two JUnit files for same flow (per-attempt)
+      '/junit/junit-report-flow-1-attempt-0.xml': [
+        '<?xml version="1.0"?>',
+        '<testsuites>',
+        '  <testsuite name="Test Suite" tests="1" failures="1">',
+        '    <testcase id="home" name="home" classname="home" time="5.0" status="ERROR">',
+        '      <failure>Tap failed</failure>',
+        '    </testcase>',
+        '  </testsuite>',
+        '</testsuites>',
+      ].join('\n'),
+      '/junit/junit-report-flow-1-attempt-1.xml': JUNIT_PASS,
+      '/tests/2026-01-28_055409/ai-home.json': FLOW_AI,
+      '/tests/2026-01-28_055420/ai-home.json': FLOW_AI,
+    });
+
+    mockMutationFn.mockResolvedValue({
+      data: {
+        workflowDeviceTestCaseResult: {
+          createWorkflowDeviceTestCaseResults: [{ id: 'id-1' }, { id: 'id-2' }],
+        },
+      },
+    });
+
+    await createStep().executeAsync();
+
+    expect(mockGraphqlClient.mutation).toHaveBeenCalledTimes(1);
+    const [, variables] = (mockGraphqlClient.mutation as jest.Mock).mock.calls[0];
+    expect(variables.input.testCaseResults).toHaveLength(2);
+    expect(variables.input.testCaseResults[0]).toEqual(
+      expect.objectContaining({ name: 'home', status: 'FAILED', retryCount: 0 })
+    );
+    expect(variables.input.testCaseResults[1]).toEqual(
+      expect.objectContaining({ name: 'home', status: 'PASSED', retryCount: 1 })
+    );
+  });
+
   it('uses default directories when inputs are not provided', async () => {
     vol.fromJSON({
       '/home/expo/.maestro/tests/report.xml': JUNIT_PASS,
diff --git a/packages/build-tools/src/steps/functions/internalMaestroTest.ts b/packages/build-tools/src/steps/functions/internalMaestroTest.ts
index 6be4446bc3..3b04228752 100644
--- a/packages/build-tools/src/steps/functions/internalMaestroTest.ts
+++ b/packages/build-tools/src/steps/functions/internalMaestroTest.ts
@@ -214,18 +214,17 @@ export function createInternalEasMaestroTestFunction(ctx: CustomBuildContext): B
       for (const [flowIndex, flowPath] of flowPathsToExecute.entries()) {
         stepCtx.logger.info('');
 
-        // If output_format is empty or noop, we won't use this.
-        const outputPath = path.join(
-          maestroReportsDir,
-          [
-            `${output_format ? output_format + '-' : ''}report-flow-${flowIndex + 1}`,
-            MaestroOutputFormatToExtensionMap[output_format ?? 'noop'],
-          ]
-            .filter(Boolean)
-            .join('.')
-        );
-
         for (let attemptCount = 0; attemptCount < retries; attemptCount++) {
+          // Generate unique report path per attempt (not overwritten on retry)
+          const outputPath = path.join(
+            maestroReportsDir,
+            [
+              `${output_format ? output_format + '-' : ''}report-flow-${flowIndex + 1}-attempt-${attemptCount}`,
+              MaestroOutputFormatToExtensionMap[output_format ?? 'noop'],
+            ]
+              .filter(Boolean)
+              .join('.')
+          );
           const localDeviceName = `eas-simulator-${flowIndex}-${attemptCount}` as
             | IosSimulatorName
             | AndroidVirtualDeviceName;
@@ -276,12 +275,11 @@ export function createInternalEasMaestroTestFunction(ctx: CustomBuildContext): B
           if (logsResult?.ok) {
             try {
               const extension = path.extname(logsResult.value.outputPath);
-              const destinationPath = path.join(deviceLogsDir, `flow-${flowIndex}${extension}`);
+              const destinationPath = path.join(
+                deviceLogsDir,
+                `flow-${flowIndex}-attempt-${attemptCount}${extension}`
+              );
 
-              await fs.promises.rm(destinationPath, {
-                force: true,
-                recursive: true,
-              });
               await fs.promises.rename(logsResult.value.outputPath, destinationPath);
             } catch (err) {
               stepCtx.logger.warn({ err }, 'Failed to prepare device logs for upload.');
diff --git a/packages/build-tools/src/steps/functions/maestroResultParser.ts b/packages/build-tools/src/steps/functions/maestroResultParser.ts
index 21b66d9ed5..53c8da5c29 100644
--- a/packages/build-tools/src/steps/functions/maestroResultParser.ts
+++ b/packages/build-tools/src/steps/functions/maestroResultParser.ts
@@ -38,97 +38,96 @@ const xmlParser = new XMLParser({
   isArray: name => ['testsuite', 'testcase', 'property'].includes(name),
 });
 
-export async function parseJUnitTestCases(junitDirectory: string): Promise<JUnitTestCaseResult[]> {
-  let entries: string[];
-  try {
-    entries = await fs.readdir(junitDirectory);
-  } catch {
-    return [];
-  }
-
-  const xmlFiles = entries.filter(f => f.endsWith('.xml'));
-  if (xmlFiles.length === 0) {
-    return [];
-  }
-
+// Internal helper — not exported. Parses a single JUnit XML file.
+async function parseJUnitFile(filePath: string): Promise<JUnitTestCaseResult[]> {
   const results: JUnitTestCaseResult[] = [];
+  try {
+    const content = await fs.readFile(filePath, 'utf-8');
+    const parsed = xmlParser.parse(content);
 
-  for (const xmlFile of xmlFiles) {
-    try {
-      const content = await fs.readFile(path.join(junitDirectory, xmlFile), 'utf-8');
-      const parsed = xmlParser.parse(content);
+    const testsuites = parsed?.testsuites?.testsuite;
+    if (!Array.isArray(testsuites)) {
+      return results;
+    }
 
-      const testsuites = parsed?.testsuites?.testsuite;
-      if (!Array.isArray(testsuites)) {
+    for (const suite of testsuites) {
+      const testcases = suite?.testcase;
+      if (!Array.isArray(testcases)) {
         continue;
       }
 
-      for (const suite of testsuites) {
-        const testcases = suite?.testcase;
-        if (!Array.isArray(testcases)) {
+      for (const tc of testcases) {
+        const name = tc['@_name'];
+        if (!name) {
           continue;
         }
 
-        for (const tc of testcases) {
-          const name = tc['@_name'];
-          if (!name) {
+        const timeStr = tc['@_time'];
+        const timeSeconds = timeStr ? parseFloat(timeStr) : 0;
+        const duration = Number.isFinite(timeSeconds) ? Math.round(timeSeconds * 1000) : 0;
+
+        const status: 'passed' | 'failed' = tc['@_status'] === 'SUCCESS' ? 'passed' : 'failed';
+        const failureText =
+          tc.failure != null
+            ? typeof tc.failure === 'string'
+              ? tc.failure
+              : (tc.failure?.['#text'] ?? null)
+            : null;
+        const errorText =
+          tc.error != null
+            ? typeof tc.error === 'string'
+              ? tc.error
+              : (tc.error?.['#text'] ?? null)
+            : null;
+        const errorMessage: string | null = failureText ?? errorText ?? null;
+
+        const rawProperties: { '@_name': string; '@_value': string }[] =
+          tc.properties?.property ?? [];
+        const properties: Record<string, string> = {};
+        for (const prop of rawProperties) {
+          const propName = prop['@_name'];
+          const value = prop['@_value'];
+          if (typeof propName !== 'string' || typeof value !== 'string') {
             continue;
           }
+          properties[propName] = value;
+        }
 
-          const timeStr = tc['@_time'];
-          const timeSeconds = timeStr ? parseFloat(timeStr) : 0;
-          const duration = Number.isFinite(timeSeconds) ? Math.round(timeSeconds * 1000) : 0;
-
-          // Use @_status as primary indicator (more robust than checking <failure> presence)
-          const status: 'passed' | 'failed' = tc['@_status'] === 'SUCCESS' ? 'passed' : 'failed';
-          // Extract error message from <failure> or <error> elements
-          const failureText =
-            tc.failure != null
-              ? typeof tc.failure === 'string'
-                ? tc.failure
-                : (tc.failure?.['#text'] ?? null)
-              : null;
-          const errorText =
-            tc.error != null
-              ? typeof tc.error === 'string'
-                ? tc.error
-                : (tc.error?.['#text'] ?? null)
-              : null;
-          const errorMessage: string | null = failureText ?? errorText ?? null;
-
-          // Extract properties
-          const rawProperties: { '@_name': string; '@_value': string }[] =
-            tc.properties?.property ?? [];
-          const properties: Record<string, string> = {};
-
-          for (const prop of rawProperties) {
-            const propName = prop['@_name'];
-            const value = prop['@_value'];
-            if (typeof propName !== 'string' || typeof value !== 'string') {
-              continue;
-            }
-            properties[propName] = value;
-          }
+        const tagsValue = properties['tags'];
+        const tags: string[] = tagsValue
+          ? tagsValue
+              .split(',')
+              .map(t => t.trim())
+              .filter(Boolean)
+          : [];
+        delete properties['tags'];
 
-          // Extract tags from "tags" property (Maestro 2.2.0+, comma-separated)
-          const tagsValue = properties['tags'];
-          const tags: string[] = tagsValue
-            ? tagsValue
-                .split(',')
-                .map(t => t.trim())
-                .filter(Boolean)
-            : [];
-          delete properties['tags'];
-
-          results.push({ name, status, duration, errorMessage, tags, properties });
-        }
+        results.push({ name, status, duration, errorMessage, tags, properties });
       }
-    } catch {
-      // Skip malformed XML files
-      continue;
     }
+  } catch {
+    // Skip malformed XML files
   }
+  return results;
+}
 
+export async function parseJUnitTestCases(junitDirectory: string): Promise<JUnitTestCaseResult[]> {
+  let entries: string[];
+  try {
+    entries = await fs.readdir(junitDirectory);
+  } catch {
+    return [];
+  }
+
+  const xmlFiles = entries.filter(f => f.endsWith('.xml'));
+  if (xmlFiles.length === 0) {
+    return [];
+  }
+
+  const results: JUnitTestCaseResult[] = [];
+  for (const xmlFile of xmlFiles) {
+    results.push(...(await parseJUnitFile(path.join(junitDirectory, xmlFile))));
+  }
   return results;
 }
 
@@ -167,9 +166,32 @@ export async function parseMaestroResults(
   testsDirectory: string,
   projectRoot: string
 ): Promise<MaestroFlowResult[]> {
-  // 1. Parse JUnit XML files (primary source)
-  const junitResults = await parseJUnitTestCases(junitDirectory);
-  if (junitResults.length === 0) {
+  // 1. Parse JUnit XML files, tracking which file each result came from
+  let junitEntries: string[];
+  try {
+    junitEntries = await fs.readdir(junitDirectory);
+  } catch {
+    return [];
+  }
+  const xmlFiles = junitEntries.filter(f => f.endsWith('.xml')).sort();
+  if (xmlFiles.length === 0) {
+    return [];
+  }
+
+  interface JUnitResultWithSource {
+    result: JUnitTestCaseResult;
+    sourceFile: string;
+  }
+
+  const junitResultsWithSource: JUnitResultWithSource[] = [];
+  for (const xmlFile of xmlFiles) {
+    const fileResults = await parseJUnitFile(path.join(junitDirectory, xmlFile));
+    for (const result of fileResults) {
+      junitResultsWithSource.push({ result, sourceFile: xmlFile });
+    }
+  }
+
+  if (junitResultsWithSource.length === 0) {
     return [];
   }
 
@@ -217,25 +239,63 @@ export async function parseMaestroResults(
   // 3. Merge: JUnit results + ai-*.json metadata
   const results: MaestroFlowResult[] = [];
 
-  for (const junit of junitResults) {
-    const flowFilePath = flowPathMap.get(junit.name);
+  // Parse attempt index from filename pattern: *-attempt-N.*
+  const ATTEMPT_PATTERN = /attempt-(\d+)/;
+
+  // Group results by flow name
+  const resultsByName = new Map<string, JUnitResultWithSource[]>();
+  for (const entry of junitResultsWithSource) {
+    const group = resultsByName.get(entry.result.name) ?? [];
+    group.push(entry);
+    resultsByName.set(entry.result.name, group);
+  }
+
+  for (const [flowName, flowEntries] of resultsByName) {
+    const flowFilePath = flowPathMap.get(flowName);
     const relativePath = flowFilePath
       ? await relativizePathAsync(flowFilePath, projectRoot)
-      : junit.name; // fallback: use flow name if ai-*.json not found
-
-    const occurrences = flowOccurrences.get(junit.name) ?? 0;
-    const retryCount = Math.max(0, occurrences - 1);
-
-    results.push({
-      name: junit.name,
-      path: relativePath,
-      status: junit.status,
-      errorMessage: junit.errorMessage,
-      duration: junit.duration,
-      retryCount,
-      tags: junit.tags,
-      properties: junit.properties,
-    });
+      : flowName;
+
+    if (flowEntries.length === 1) {
+      // Single result for this flow — use ai-*.json occurrence count for retryCount
+      // (backward compat with old-style single JUnit file that gets overwritten)
+      const { result } = flowEntries[0];
+      const occurrences = flowOccurrences.get(flowName) ?? 0;
+      const retryCount = Math.max(0, occurrences - 1);
+
+      results.push({
+        name: flowName,
+        path: relativePath,
+        status: result.status,
+        errorMessage: result.errorMessage,
+        duration: result.duration,
+        retryCount,
+        tags: result.tags,
+        properties: result.properties,
+      });
+    } else {
+      // Multiple results — per-attempt JUnit files. Sort by attempt index from filename.
+      const sorted = flowEntries
+        .map(entry => {
+          const match = entry.sourceFile.match(ATTEMPT_PATTERN);
+          const attemptIndex = match ? parseInt(match[1], 10) : 0;
+          return { ...entry, attemptIndex };
+        })
+        .sort((a, b) => a.attemptIndex - b.attemptIndex);
+
+      for (const { result, attemptIndex } of sorted) {
+        results.push({
+          name: flowName,
+          path: relativePath,
+          status: result.status,
+          errorMessage: result.errorMessage,
+          duration: result.duration,
+          retryCount: attemptIndex,
+          tags: result.tags,
+          properties: result.properties,
+        });
+      }
+    }
   }
 
   return results;
diff --git a/packages/build-tools/src/steps/functions/reportMaestroTestResults.ts b/packages/build-tools/src/steps/functions/reportMaestroTestResults.ts
index e377454210..6d7ec783f3 100644
--- a/packages/build-tools/src/steps/functions/reportMaestroTestResults.ts
+++ b/packages/build-tools/src/steps/functions/reportMaestroTestResults.ts
@@ -64,14 +64,21 @@ export function createReportMaestroTestResultsFunction(ctx: CustomBuildContext):
           return;
         }
 
-        // Maestro allows overriding flow names via config, so different flow files can share
-        // the same name. JUnit XML only contains names (not file paths), making it impossible
-        // to map duplicates back to their original flow files. Skip and let the user fix it.
-        const names = flowResults.map(r => r.name);
-        const duplicates = names.filter((n, i) => names.indexOf(n) !== i);
-        if (duplicates.length > 0) {
+        // Detect truly conflicting results: same (name, retryCount) pair means different flow files
+        // share the same name (Maestro config override), which we can't disambiguate.
+        // Same name with different retryCount is expected (per-attempt results from retries).
+        const seen = new Set<string>();
+        const conflicting = new Set<string>();
+        for (const r of flowResults) {
+          const key = `${r.name}:${r.retryCount}`;
+          if (seen.has(key)) {
+            conflicting.add(r.name);
+          }
+          seen.add(key);
+        }
+        if (conflicting.size > 0) {
           logger.error(
-            `Duplicate test case names found in JUnit output: ${[...new Set(duplicates)].join(
+            `Duplicate test case names found in JUnit output: ${[...conflicting].join(
               ', '
             )}. Skipping report. Ensure each Maestro flow has a unique name.`
           );