8000 chore: improve CI reliability by dannykopping · Pull Request #16169 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

chore: improve CI reliability #16169

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 20, 2025
Prev Previous commit
Next Next commit
Remove low-signal tests, add pg tests for windows/mac
Signed-off-by: Danny Kopping <danny@coder.com>
  • Loading branch information
dannykopping committed Jan 17, 2025
commit eb4419719e1f41fadfc07eb0ef99bbde8c495d0e
105 changes: 66 additions & 39 deletions .github/workflows/nightly-gauntlet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,27 @@ permissions:
contents: read

jobs:
go-race:
# While GitHub's toaster runners are likelier to flake, we want consistency
# between this environment and the regular test environment for DataDog
# statistics and to only show real workflow threats.
runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || 'ubuntu-latest' }}
# This runner costs 0.016 USD per minute,
# so 0.016 * 240 = 3.84 USD per run.
timeout-minutes: 240
test-go-pg:
runs-on: ${{ matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'windows-latest-16-cores' || matrix.os }}


# TODO: re-enable main check!





# if: github.ref == 'refs/heads/main'
# This timeout must be greater than the timeout set by `go test` in
# `make test-postgres` to ensure we receive a trace of running
# goroutines. Setting this to the timeout +5m should work quite well
# even if some of the preceding steps are slow.
timeout-minutes: 25
strategy:
matrix:
os:
- macos-latest
- windows-2022
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
Expand All @@ -27,56 +40,70 @@ jobs:

- name: Checkout
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
fetch-depth: 1

- name: Setup Go
uses: ./.github/actions/setup-go

- name: Setup Terraform
uses: ./.github/actions/setup-tf

- name: Run Tests
run: |
# -race is likeliest to catch flaky tests
# due to correctness detection and its performance
# impact.
gotestsum --junitfile="gotests.xml" -- -timeout=240m -count=3 -race ./...
# Sets up the ImDisk toolkit for Windows and creates a RAM disk on drive R:.
- name: Setup ImDisk
if: runner.os == 'Windows'
uses: ./.github/actions/setup-imdisk

- name: Upload test results to DataDog
uses: ./.github/actions/upload-datadog
if: always()
with:
api-key: ${{ secrets.DATADOG_API_KEY }}
- name: Test with PostgreSQL Database
env:
POSTGRES_VERSION: "13"
TS_DEBUG_DISCO: "true"
LC_CTYPE: "en_US.UTF-8"
LC_ALL: "en_US.UTF-8"
shell: bash
run: |
# if macOS, install google-chrome for scaletests
# As another concern, should we really have this kind of external dependency
# requirement on standard CI?
if [ "${{ matrix.os }}" == "macos-latest" ]; then
brew install google-chrome
fi

go-timing:
# We run these tests with p=1 so we don't need a lot of compute.
runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04' || 'ubuntu-latest' }}
timeout-minutes: 10
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
with:
egress-policy: audit
# By default Go will use the number of logical CPUs, which
# is a fine default.
PARALLEL_FLAG=""

- name: Checkout
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
# macOS will output "The default interactive shell is now zsh"
# intermittently in CI...
if [ "${{ matrix.os }}" == "macos-latest" ]; then
touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile
fi

- name: Setup Go
uses: ./.github/actions/setup-go
if [ "${{ runner.os }}" == "Windows" ]; then
# Create a temp dir on the R: ramdisk drive for Windows. The default
# C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755
mkdir -p "R:/temp/embedded-pg"
go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg"
# Reduce test parallelism, mirroring what we do for race tests.
# We'd been encountering issues with timing related flakes, and
# this seems to help.
else
go run scripts/embedded-pg/main.go
fi

- name: Run Tests
run: |
gotestsum --junitfile="gotests.xml" -- --tags="timing" -p=1 -run='_Timing/' ./...
DB=ci gotestsum --format standard-quiet -- -v -short -count=1 -parallel 4 -p 4 ./...

- name: Upload test results to DataDog
- name: Upload test stats to Datadog
timeout-minutes: 1
continue-on-error: true
uses: ./.github/actions/upload-datadog
if: always()
if: success() || failure()
with:
api-key: ${{ secrets.DATADOG_API_KEY }}

notify-slack-on-failure:
needs:
- go-race
- go-timing
- test-go-pg
runs-on: ubuntu-latest
if: failure() && github.ref == 'refs/heads/main'

Expand Down
0