8000 feat: add template info tags to `coderd_agents_up` metric by goodspark · Pull Request #7942 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

feat: add template info tags to coderd_agents_up metric #7942

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions coderd/database/dbfake/dbfake.go
Original file line number Diff line number Diff line change
Expand Up @@ -607,12 +607,12 @@ func (q *fakeQuerier) GetAuthorizedWorkspaces(ctx context.Context, arg database.
}
if arg.Limit > 0 {
if int(arg.Limit) > len(workspaces) {
return convertToWorkspaceRows(workspaces, int64(beforePageCount)), nil
return q.convertToWorkspaceRowsNoLock(ctx, workspaces, int64(beforePageCount)), nil
}
workspaces = workspaces[:arg.Limit]
}

return convertToWorkspaceRows(workspaces, int64(beforePageCount)), nil
return q.convertToWorkspaceRowsNoLock(ctx, workspaces, int64(beforePageCount)), nil
}

// mapAgentStatus determines the agent status based on different timestamps like created_at, last_connected_at, disconnected_at, etc.
Expand Down Expand Up @@ -649,10 +649,10 @@ func mapAgentStatus(dbAgent database.WorkspaceAgent, agentInactiveDisconnectTime
return status
}

func convertToWorkspaceRows(workspaces []database.Workspace, count int64) []database.GetWorkspacesRow {
rows := make([]database.GetWorkspacesRow, len(workspaces))
for i, w := range workspaces {
rows[i] = database.GetWorkspacesRow{
func (q *fakeQuerier) convertToWorkspaceRowsNoLock(ctx context.Context, workspaces []database.Workspace, count int64) []database.GetWorkspacesRow {
rows := make([]database.GetWorkspacesRow, 0, len(workspaces))
for _, w := range workspaces {
wr := database.GetWorkspacesRow{
ID: w.ID,
CreatedAt: w.CreatedAt,
UpdatedAt: w.UpdatedAt,
Expand All @@ -666,6 +666,28 @@ func convertToWorkspaceRows(workspaces []database.Workspace, count int64) []data
LastUsedAt: w.LastUsedAt,
Count: count,
}

for _, t := range q.templates {
if t.ID == w.TemplateID {
wr.TemplateName = t.Name
break
}
}

if build, err := q.getLatestWorkspaceBuildByWorkspaceIDNoLock(ctx, w.ID); err == nil {
for _, tv := range q.templateVersions {
if tv.ID == build.TemplateVersionID {
wr.TemplateVersionID = tv.ID
wr.TemplateVersionName = sql.NullString{
Valid: true,
String: tv.Name,
}
break
}
}
}

rows = append(rows, wr)
}
return rows
}
Expand Down
3 changes: 3 additions & 0 deletions coderd/database/modelqueries.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ func (q *sqlQuerier) GetAuthorizedWorkspaces(ctx context.Context, arg GetWorkspa
&i.Ttl,
&i.LastUsedAt,
&i.LockedAt,
&i.TemplateName,
&i.TemplateVersionID,
&i.TemplateVersionName,
&i.Count,
); err != nil {
return nil, err
Expand Down
64 changes: 44 additions & 20 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 25 additions & 7 deletions coderd/database/queries/workspaces.sql
8000
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,11 @@ WHERE

-- name: GetWorkspaces :many
SELECT
workspaces.*, COUNT(*) OVER () as count
workspaces.*,
COALESCE(template_name.template_name, 'unknown') as template_name,
latest_build.template_version_id,
latest_build.template_version_name,
COUNT(*) OVER () as count
FROM
workspaces
JOIN
Expand All @@ -85,6 +89,8 @@ ON
LEFT JOIN LATERAL (
SELECT
workspace_builds.transition,
workspace_builds.template_version_id,
template_versions.name AS template_version_name,
provisioner_jobs.id AS provisioner_job_id,
provisioner_jobs.started_at,
provisioner_jobs.updated_at,
Expand All @@ -97,13 +103,25 @@ LEFT JOIN LATERAL (
provisioner_jobs
ON
provisioner_jobs.id = workspace_builds.job_id
LEFT JOIN
template_versions
ON
template_versions.id = workspace_builds.template_version_id
WHERE
workspace_builds.workspace_id = workspaces.id
ORDER BY
build_number DESC
LIMIT
1
) latest_build ON TRUE
LEFT JOIN LATERAL (
SELECT
templates.name AS template_name
FROM
templates
WHERE
templates.id = workspaces.template_id
) template_name ON true
WHERE
-- Optionally include deleted workspaces
workspaces.deleted = @deleted
Expand Down Expand Up @@ -175,33 +193,33 @@ WHERE
-- Filter by owner_id
AND CASE
WHEN @owner_id :: uuid != '00000000-0000-0000-0000-000000000000'::uuid THEN
owner_id = @owner_id
workspaces.owner_id = @owner_id
ELSE true
END
-- Filter by owner_name
AND CASE
WHEN @owner_username :: text != '' THEN
owner_id = (SELECT id FROM users WHERE lower(username) = lower(@owner_username) AND deleted = false)
workspaces.owner_id = (SELECT id FROM users WHERE lower(username) = lower(@owner_username) AND deleted = false)
ELSE true
END
-- Filter by template_name
-- There can be more than 1 template with the same name across organizations.
-- Use the organization filter to restrict to 1 org if needed.
AND CASE
WHEN @template_name :: text != '' THEN
template_id = ANY(SELECT id FROM templates WHERE lower(name) = lower(@template_name) AND deleted = false)
workspaces.template_id = ANY(SELECT id FROM templates WHERE lower(name) = lower(@template_name) AND deleted = false)
ELSE true
END
-- Filter by template_ids
AND CASE
WHEN array_length(@template_ids :: uuid[], 1) > 0 THEN
template_id = ANY(@template_ids)
workspaces.template_id = ANY(@template_ids)
ELSE true
END
-- Filter by name, matching on substring
AND CASE
WHEN @name :: text != '' THEN
name ILIKE '%' || @name || '%'
workspaces.name ILIKE '%' || @name || '%'
ELSE true
END
-- Filter by agent status
Expand Down Expand Up @@ -249,7 +267,7 @@ ORDER BY
latest_build.error IS NULL AND
latest_build.transition = 'start'::workspace_transition) DESC,
LOWER(users.username) ASC,
LOWER(name) ASC
LOWER(workspaces.name) ASC
LIMIT
CASE
WHEN @limit_ :: integer > 0 THEN
Expand Down
17 changes: 11 additions & 6 deletions coderd/prometheusmetrics/prometheusmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"tailscale.com/tailcfg"

"cdr.dev/slog"

"github.com/coder/coder/coderd/database"
"github.com/coder/coder/coderd/database/db2sdk"
"github.com/coder/coder/coderd/database/dbauthz"
Expand Down Expand Up @@ -153,7 +152,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
Subsystem: "agents",
Name: "up",
Help: "The number of active agents per workspace.",
}, []string{usernameLabel, workspaceNameLabel}))
}, []string{usernameLabel, workspaceNameLabel, "template_name", "template_version"}))
err := registerer.Register(agentsGauge)
if err != nil {
return nil, err
Expand Down Expand Up @@ -234,29 +233,35 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
}

for _, workspace := range workspaceRows {
templateName := workspace.TemplateName
templateVersionName := workspace.TemplateVersionName.String
if !workspace.TemplateVersionName.Valid {
templateVersionName = "unknown"
}

user, err := db.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
logger.Error(ctx, "can't get user from the database", slog.F("user_id", workspace.OwnerID), slog.Error(err))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}

agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, workspace.ID)
if err != nil {
logger.Error(ctx, "can't get workspace agents", slog.F("workspace_id", workspace.ID), slog.Error(err))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}

if len(agents) == 0 {
logger.Debug(ctx, "workspace agents are unavailable", slog.F("workspace_id", workspace.ID))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}

for _, agent := range agents {
// Collect information about agents
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name, templateName, templateVersionName)

connectionStatus := agent.Status(agentInactiveDisconnectTimeout)
node := (*coordinator.Load()).Node(agent.ID)
Expand Down
8 changes: 5 additions & 3 deletions coderd/prometheusmetrics/prometheusmetrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ func TestAgents(t *testing.T) {
// when
closeFunc, err := prometheusmetrics.Agents(ctx, slogtest.Make(t, &slogtest.Options{
IgnoreErrors: true,
}), registry, db, &coordinatorPtr, derpMap, agentInactiveDisconnectTimeout, time.Millisecond)
}), registry, db, &coordinatorPtr, derpMap, agentInactiveDisconnectTimeout, 50*time.Millisecond)
require.NoError(t, err)
t.Cleanup(closeFunc)

Expand All @@ -332,8 +332,10 @@ func TestAgents(t *testing.T) {
for _, metric := range metrics {
switch metric.GetName() {
case "coderd_agents_up":
assert.Equal(t, "testuser", metric.Metric[0].Label[0].GetValue()) // Username
assert.Equal(t, workspace.Name, metric.Metric[0].Label[1].GetValue()) // Workspace name
assert.Equal(t, template.Name, metric.Metric[0].Label[0].GetValue()) // Template name
assert.Equal(t, version.Name, metric.Metric[0].Label[1].GetValue()) // Template version name
assert.Equal(t, "testuser", metric.Metric[0].Label[2].GetValue()) // Username
assert.Equal(t, workspace.Name, metric.Metric[0].Label[3].GetValue()) // Workspace name
assert.Equal(t, 1, int(metric.Metric[0].Gauge.GetValue())) // Metric value
agentsUp = true
case "coderd_agents_connections":
Expand Down
0