8000 Align retry logic for all test framework instrumentations by daniel-mohedano · Pull Request #8803 · DataDog/dd-trace-java · GitHub
[go: up one dir, main page]

Skip to content

Align retry logic for all test framework instrumentations #8803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix auto test retries global counter increment
  • Loading branch information
daniel-mohedano committed May 12, 2025
commit bfbc0f182858cd5a98a6110e788bbab3145966eb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ public RetryUntilSuccessful(
public void registerExecution(TestStatus status, long durationMillis) {
++executions;
successfulExecutionSeen |= (status != TestStatus.fail);
totalExecutions.incrementAndGet();
if (executions > 1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we not count the first execution here?

8000

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From my understanding of datadog.trace.civisibility.domain.buildsystem.ProxyTestModuleTest (and the equivalent test for headless modules), the global counter for ATR retries only takes into account actual retries, so we want to avoid increasing it after the first execution of a test. Should it take into account the first execution too?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I'd say let's rename totalExecutions to totalRetries then and maybe leave a short comment

totalExecutions.incrementAndGet();
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package datadog.trace.civisibility.domain.buildsystem
import datadog.trace.api.Config
import datadog.trace.api.DDTraceId
import datadog.trace.api.civisibility.config.TestSourceData
import datadog.trace.api.civisibility.execution.TestStatus
import datadog.trace.bootstrap.instrumentation.api.AgentSpanContext
import datadog.trace.civisibility.config.EarlyFlakeDetectionSettings
import datadog.trace.api.civisibility.config.TestIdentifier
Expand Down Expand Up @@ -59,20 +60,25 @@ class ProxyTestModuleTest extends DDSpecification {
def retryPolicy1 = proxyTestModule.executionPolicy(new TestIdentifier("suite", "test-1", null), TestSourceData.UNKNOWN, [])

then:
retryPolicy1.retry(false, 1L) // 2nd test execution, 1st retry globally
!retryPolicy1.retry(false, 1L) // asking for 3rd test execution - local limit reached
retryPolicy1.registerExecution(TestStatus.fail, 1L) // 1st test execution
!retryPolicy1.wasLastExecution()
retryPolicy1.registerExecution(TestStatus.fail, 1L) // 2nd test execution, 1st retry globally
retryPolicy1.wasLastExecution() // asking for 3rd test execution - local limit reached

when:
def retryPolicy2 = proxyTestModule.executionPolicy(new TestIdentifier("suite", "test-2", null), TestSourceData.UNKNOWN, [])

then:
retryPolicy2.retry(false, 1L) // 2nd test execution, 2nd retry globally (since previous test was retried too)
!retryPolicy2.retry(false, 1L) // asking for 3rd test execution - local limit reached
retryPolicy2.registerExecution(TestStatus.fail, 1L) // 1st test execution
!retryPolicy2.wasLastExecution()
retryPolicy2.registerExecution(TestStatus.fail, 1L) // 2nd test execution, 1st retry globally
retryPolicy2.wasLastExecution() // asking for 3rd test execution - local limit reached

when:
def retryPolicy3 = proxyTestModule.executionPolicy(new TestIdentifier("suite", "test-3", null), TestSourceData.UNKNOWN, [])

then:
!retryPolicy3.retry(false, 1L) // asking for 3rd retry globally - global limit reached
retryPolicy3.registerExecution(TestStatus.fail, 1L) // 1st test execution
retryPolicy3.wasLastExecution() // asking for 3rd retry globally - global limit reached
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import datadog.trace.api.Config
import datadog.trace.api.civisibility.config.TestIdentifier
import datadog.trace.api.civisibility.config.TestSourceData
import datadog.trace.api.civisibility.coverage.CoverageStore
import datadog.trace.api.civisibility.execution.TestStatus
import datadog.trace.api.civisibility.telemetry.CiVisibilityMetricCollector
import datadog.trace.bootstrap.instrumentation.api.AgentSpanContext
import datadog.trace.civisibility.codeowners.Codeowners
Expand All @@ -24,21 +25,26 @@ class HeadlessTestModuleTest extends SpanWriterTest {
def retryPolicy1 = headlessTestModule.executionPolicy(new TestIdentifier("suite", "test-1", null), TestSourceData.UNKNOWN, [])

then:
retryPolicy1.retry(false, 1L) // 2nd test execution, 1st retry globally
!retryPolicy1.retry(false, 1L) // asking for 3rd test execution - local limit reached
retryPolicy1.registerExecution(TestStatus.fail, 1L) // 1st test execution
!retryPolicy1.wasLastExecution()
retryPolicy1.registerExecution(TestStatus.fail, 1L) // 2nd test execution, 1st retry globally
retryPolicy1.wasLastExecution() // asking for 3rd test execution - local limit reached

when:
def retryPolicy2 = headlessTestModule.executionPolicy(new TestIdentifier("suite", "test-2", null), TestSourceData.UNKNOWN, [])

then:
retryPolicy2.retry(false, 1L) // 2nd test execution, 2nd retry globally (since previous test was retried too)
!retryPolicy2.retry(false, 1L) // asking for 3rd test execution - local limit reached
retryPolicy2.registerExecution(TestStatus.fail, 1L) // 1st test execution
!retryPolicy2.wasLastExecution()
retryPolicy2.registerExecution(TestStatus.fail, 1L) // 2nd test execution, 1st retry globally
retryPolicy2.wasLastExecution() // asking for 3rd test execution - local limit reached

when:
def retryPolicy3 = headlessTestModule.executionPolicy(new TestIdentifier("suite", "test-3", null), TestSourceData.UNKNOWN, [])

then:
!retryPolicy3.retry(false, 1L) // asking for 3rd retry globally - global limit reached
retryPolicy3.registerExecution(TestStatus.fail, 1L) // 1st test execution
retryPolicy3.wasLastExecution() // asking for 3rd retry globally - global limit reached
}

private HeadlessTestModule givenAHeadlessTestModule() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import datadog.trace.api.civisibility.config.TestFQN
import datadog.trace.api.civisibility.config.TestIdentifier
import datadog.trace.api.civisibility.config.TestMetadata
import datadog.trace.api.civisibility.config.TestSourceData
import datadog.trace.api.civisibility.execution.TestStatus
import datadog.trace.api.civisibility.telemetry.tag.RetryReason
import datadog.trace.api.civisibility.telemetry.tag.SkipReason
import datadog.trace.civisibility.config.EarlyFlakeDetectionSettings
Expand Down Expand Up @@ -101,7 +102,7 @@ class ExecutionStrategyTest extends Specification {
def policy = strategy.executionPolicy(testID, TestSourceData.UNKNOWN, [])

// register one execution to get the retry reason
policy.registerExecution(true, 0,)
policy.registerExecution(TestStatus.pass, 0)

expect:
policy.class == RunNTimes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ public String[] helperClassNames() {
commonPackageName + ".TestEventsHandlerHolder",
commonPackageName + ".TestNGClassListener",
commonPackageName + ".execution.RetryAnalyzer",
commonPackageName + ".TracingListener",
};
}

Expand Down
1 change: 1 addition & 0 deletions internal-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ excludedClassesCoverage += [
"datadog.trace.api.civisibility.events.BuildEventsHandler.ModuleInfo",
"datadog.trace.api.civisibility.events.TestDescriptor",
"datadog.trace.api.civisibility.events.TestSuiteDescriptor",
"datadog.trace.api.civisibility.execution.TestStatus",
"datadog.trace.api.civisibility.telemetry.CiVisibilityCountMetric",
"datadog.trace.api.civisibility.telemetry.CiVisibilityCountMetric.IndexHolder",
"datadog.trace.api.civisibility.telemetry.CiVisibilityDistributionMetric",
Expand Down
Loading
0