8000 feat: add template info tags to `coderd_agents_up` metric by goodspark · Pull Request #7942 · coder/coder · GitHub
[go: up one dir, main page]

Skip to content

feat: add template info tags to coderd_agents_up metric #7942

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 11, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: add template tags to agent up metric
So we can track template and version usage for all running workspaces.
Right now, we can only track it by workspace builds.
  • Loading branch information
goodspark committed Jun 9, 2023
commit f867dd7ba2c3ad9c766bf011fe4fef0bca1c38c3
5 changes: 4 additions & 1 deletion coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion coderd/database/queries/workspaces.sql
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ WHERE

-- name: GetWorkspaces :many
SELECT
workspaces.*, COUNT(*) OVER () as count
workspaces.*, latest_build.template_version_id as template_version_id, COUNT(*) OVER () as count
FROM
workspaces
JOIN
Expand All @@ -85,6 +85,7 @@ ON
LEFT JOIN LATERAL (
SELECT
workspace_builds.transition,
workspace_builds.template_version_id,
provisioner_jobs.id AS provisioner_job_id,
provisioner_jobs.started_at,
provisioner_jobs.updated_at,
Expand Down
68 changes: 63 additions & 5 deletions coderd/prometheusmetrics/prometheusmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
Subsystem: "agents",
Name: "up",
Help: "The number of active agents per workspace.",
}, []string{usernameLabel, workspaceNameLabel}))
}, []string{usernameLabel, workspaceNameLabel, "template_name", "template_version"}))
err := registerer.Register(agentsGauge)
if err != nil {
return nil, err
Expand Down Expand Up @@ -225,6 +225,10 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
logger.Debug(ctx, "Agent metrics collection is starting")
timer := prometheus.NewTimer(metricsCollectorAgents)

// Need to define these ahead of time bc of the use of gotos below
var templateNamesByID map[uuid.UUID]string
var templateVersionNamesByID map[uuid.UUID]string

workspaceRows, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{
AgentInactiveDisconnectTimeoutSeconds: int64(agentInactiveDisconnectTimeout.Seconds()),
})
Expand All @@ -233,30 +237,44 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
goto done
}

templateNamesByID, templateVersionNamesByID, err = getTemplatesAndVersionNamesFromWorkspaces(ctx, db, workspaceRows)
if err != nil {
logger.Error(ctx, "can't get template info", slog.Error(err))
goto done
}

for _, workspace := range workspaceRows {
templateName, found := templateNamesByID[workspace.TemplateID]
if !found {
templateName = "unknown"
}
templateVersionName, found := templateVersionNamesByID[workspace.TemplateID]
if !found {
templateVersionName = "unknown"
}
user, err := db.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
logger.Error(ctx, "can't get user", slog.F("user_id", workspace.OwnerID), slog.Error(err))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}

agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, workspace.ID)
if err != nil {
logger.Error(ctx, "can't get workspace agents", slog.F("workspace_id", workspace.ID), slog.Error(err))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}

if len(agents) == 0 {
logger.Debug(ctx, "workspace agents are unavailable", slog.F("workspace_id", workspace.ID))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}

for _, agent := range agents {
// Collect information about agents
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name, templateName, templateVersionName)

connectionStatus := agent.Status(agentInactiveDisconnectTimeout)
node := (*coordinator.Load()).Node(agent.ID)
Expand Down Expand Up @@ -325,6 +343,46 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
}, nil
}

func getTemplatesAndVersionNamesFromWorkspaces(ctx context.Context, db database.Store, workspaceRows []database.GetWorkspacesRow) (map[uuid.UUID]string, map[uuid.UUID]string, error) {
// Aggregate the used template and version IDs to minimize DB calls
usedTemplateIDs := map[uuid.UUID]struct{}{}
usedTemplateVersionIDs := map[uuid.UUID]struct{}{}
for _, workspace := range workspaceRows {
usedTemplateIDs[workspace.TemplateID] = struct{}{}
usedTemplateVersionIDs[workspace.TemplateVersionID] = struct{}{}
}
templatesToGet := make([]uuid.UUID, 0, len(usedTemplateIDs))
for id := range usedTemplateIDs {
templatesToGet = append(templatesToGet, id)
}
templateVersionsToGet := make([]uuid.UUID, 0, len(usedTemplateVersionIDs))
for id := range usedTemplateVersionIDs {
templateVersionsToGet = append(templateVersionsToGet, id)
}

templates, err := db.GetTemplatesWithFilter(ctx, database.GetTemplatesWithFilterParams{
IDs: templatesToGet,
})
if err != nil {
return nil, nil, err
}
templateNamesByID := make(map[uuid.UUID]string, len(templates))
for _, template := range templates {
templateNamesByID[template.ID] = template.Name
}

versions, err := db.GetTemplateVersionsByIDs(ctx, templateVersionsToGet)
if err != nil {
return nil, nil, err
}
templateVersionNamesByID := make(map[uuid.UUID]string, len(versions))
for _, version := range versions {
templateVersionNamesByID[version.ID] = version.Name
}

return templateNamesByID, templateVersionNamesByID, nil
}

func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, initialCreateAfter time.Time, duration time.Duration) (func(), error) {
if duration == 0 {
duration = 1 * time.Minute
Expand Down
0