8000 feat: add template tags to agent up metric · coder/coder@f867dd7 · GitHub
[go: up one dir, main page]

Skip to content

Commit f867dd7

Browse files
committed
feat: add template tags to agent up metric
So we can track template and version usage for all running workspaces. Right now, we can only track it by workspace builds.
1 parent 16ebe10 commit f867dd7

File tree

3 files changed

+69
-7
lines changed
  • queries
  • prometheusmetrics
  • 3 files changed

    +69
    -7
    lines changed

    coderd/database/queries.sql.go

    Lines changed: 4 additions & 1 deletion
    Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

    coderd/database/queries/workspaces.sql

    Lines changed: 2 additions & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -75,7 +75,7 @@ WHERE
    7575

    7676
    -- name: GetWorkspaces :many
    7777
    SELECT
    78-
    workspaces.*, COUNT(*) OVER () as count
    78+
    workspaces.*, latest_build.template_version_id as template_version_id, COUNT(*) OVER () as count
    7979
    FROM
    8080
    workspaces
    8181
    JOIN
    @@ -85,6 +85,7 @@ ON
    8585
    LEFT JOIN LATERAL (
    8686
    SELECT
    8787
    workspace_builds.transition,
    88+
    workspace_builds.template_version_id,
    8889
    provisioner_jobs.id AS provisioner_job_id,
    8990
    provisioner_jobs.started_at,
    9091
    provisioner_jobs.updated_at,

    coderd/prometheusmetrics/prometheusmetrics.go

    Lines changed: 63 additions & 5 deletions
    Original file line numberDiff line numberDiff line change
    @@ -153,7 +153,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
    153153
    Subsystem: "agents",
    154154
    Name: "up",
    155155
    Help: "The number of active agents per workspace.",
    156-
    }, []string{usernameLabel, workspaceNameLabel}))
    156+
    }, []string{usernameLabel, workspaceNameLabel, "template_name", "template_version"}))
    157157
    err := registerer.Register(agentsGauge)
    158158
    if err != nil {
    159159
    return nil, err
    @@ -225,6 +225,10 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
    225225
    logger.Debug(ctx, "Agent metrics collection is starting")
    226226
    timer := prometheus.NewTimer(metricsCollectorAgents)
    227227

    228+
    // Need to define these ahead of time bc of the use of gotos below
    229+
    var templateNamesByID map[uuid.UUID]string
    230+
    var templateVersionNamesByID map[uuid.UUID]string
    231+
    228232
    workspaceRows, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{
    229233
    AgentInactiveDisconnectTimeoutSeconds: int64(agentInactiveDisconnectTimeout.Seconds()),
    230234
    })
    @@ -233,30 +237,44 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
    233237
    goto done
    234238
    }
    235239

    240+
    templateNamesByID, templateVersionNamesByID, err = getTemplatesAndVersionNamesFromWorkspaces(ctx, db, workspaceRows)
    241+
    if err != nil {
    242+
    logger.Error(ctx, "can't get template info", slog.Error(err))
    243+
    goto done
    244+
    }
    245+
    236246
    for _, workspace := range workspaceRows {
    247+
    templateName, found := templateNamesByID[workspace.TemplateID]
    248+
    if !found {
    249+
    templateName = "unknown"
    250+
    }
    251+
    templateVersionName, found := templateVersionNamesByID[workspace.TemplateID]
    252+
    if !found {
    253+
    templateVersionName = "unknown"
    254+
    }
    237255
    user, err := db.GetUserByID(ctx, workspace.OwnerID)
    238256
    if err != nil {
    239257
    logger.Error(ctx, "can't get user", slog.F("user_id", workspace.OwnerID), slog.Error(err))
    240-
    agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
    258+
    agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
    241259
    continue
    242260
    }
    243261

    244262
    agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx 8000 , workspace.ID)
    245263
    if err != nil {
    246264
    logger.Error(ctx, "can't get workspace agents", slog.F("workspace_id", workspace.ID), slog.Error(err))
    247-
    agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
    265+
    agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
    248266
    continue
    249267
    }
    250268

    251269
    if len(agents) == 0 {
    252270
    logger.Debug(ctx, "workspace agents are unavailable", slog.F("workspace_id", workspace.ID))
    253-
    agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
    271+
    agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
    254272
    continue
    255273
    }
    256274

    257275
    for _, agent := range agents {
    258276
    // Collect information about agents
    259-
    agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name)
    277+
    agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name, templateName, templateVersionName)
    260278

    261279
    connectionStatus := agent.Status(agentInactiveDisconnectTimeout)
    262280
    node := (*coordinator.Load()).Node(agent.ID)
    @@ -325,6 +343,46 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
    325343
    }, nil
    326344
    }
    327345

    346+
    func getTemplatesAndVersionNamesFromWorkspaces(ctx context.Context, db database.Store, workspaceRows []database.GetWorkspacesRow) (map[uuid.UUID]string, map[uuid.UUID]string, error) {
    347+
    // Aggregate the used template and version IDs to minimize DB calls
    348+
    usedTemplateIDs := map[uuid.UUID]struct{}{}
    349+
    usedTemplateVersionIDs := map[uuid.UUID]struct{}{}
    350+
    for _, workspace := range workspaceRows {
    351+
    usedTemplateIDs[workspace.TemplateID] = struct{}{}
    352+
    usedTemplateVersionIDs[workspace.TemplateVersionID] = struct{}{}
    353+
    }
    354+
    templatesToGet := make([]uuid.UUID, 0, len(usedTemplateIDs))
    355+
    for id := range usedTemplateIDs {
    356+
    templatesToGet = append(templatesToGet, id)
    357+
    }
    358+
    templateVersionsToGet := make([]uuid.UUID, 0, len(usedTemplateVersionIDs))
    359+
    for id := range usedTemplateVersionIDs {
    360+
    templateVersionsToGet = append(templateVersionsToGet, id)
    361+
    }
    362+
    363+
    templates, err := db.GetTemplatesWithFilter(ctx, database.GetTemplatesWithFilterParams{
    364+
    IDs: templatesToGet,
    365+
    })
    366+
    if err != nil {
    367+
    return nil, nil, err
    368+
    }
    369+
    templateNamesByID := make(map[uuid.UUID]string, len(templates))
    370+
    for _, template := range templates {
    371+
    templateNamesByID[template.ID] = template.Name
    372+
    }
    373+
    374+
    versions, err := db.GetTemplateVersionsByIDs(ctx, templateVersionsToGet)
    375+
    if err != nil {
    376+
    return nil, nil, err
    377+
    }
    378+
    templateVersionNamesByID := make(map[uuid.UUID]string, len(versions))
    379+
    for _, version := range versions {
    380+
    templateVersionNamesByID[version.ID] = version.Name
    381+
    }
    382+
    383+
    return templateNamesByID, templateVersionNamesByID, nil
    384+
    }
    385+
    328386
    func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, initialCreateAfter time.Time, duration time.Duration) (func(), error) {
    329387
    if duration == 0 {
    330388
    duration = 1 * time.Minute

    0 commit comments

    Comments
     (0)
    0