From 66f9a084b420be79380c3907095cb5813398f726 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Thu, 20 Apr 2023 14:41:36 -0500 Subject: [PATCH 01/15] chore: Implement workspace proxy health check cron At a given interval will check the reachability of workspace proxies. --- coderd/proxyhealth/proxyhealth.go | 201 ++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 coderd/proxyhealth/proxyhealth.go diff --git a/coderd/proxyhealth/proxyhealth.go b/coderd/proxyhealth/proxyhealth.go new file mode 100644 index 0000000000000..4040fcaea9d13 --- /dev/null +++ b/coderd/proxyhealth/proxyhealth.go @@ -0,0 +1,201 @@ +package proxyhealth + +import ( + "context" + "fmt" + "net/http" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/google/uuid" + "golang.org/x/sync/errgroup" + "golang.org/x/xerrors" + + "cdr.dev/slog" + "github.com/coder/coder/coderd/database" +) + +type ProxyHealthStatus string + +const ( + // Reachable means the proxy access url is reachable and returns a healthy + // status code. + Reachable ProxyHealthStatus = "reachable" + // Unreachable means the proxy access url is not responding. + Unreachable ProxyHealthStatus = "unreachable" + // Unregistered means the proxy has not registered a url yet. This means + // the proxy was created with the cli, but has not yet been started. + Unregistered ProxyHealthStatus = "unregistered" +) + +type Options struct { + // Interval is the interval at which the proxy health is checked. + Interval time.Duration + DB database.Store + Logger slog.Logger + client *http.Client +} + +// ProxyHealth runs a go routine that periodically checks the health of all +// workspace proxies. This information is stored in memory, so each coderd +// replica has its own view of the health of the proxies. These views should be +// consistent, and if they are not, it indicates a problem. +type ProxyHealth struct { + db database.Store + interval time.Duration + logger slog.Logger + client *http.Client + + cache *atomic.Pointer[map[uuid.UUID]ProxyStatus] +} + +func New(opts *Options) (*ProxyHealth, error) { + if opts.Interval <= 0 { + opts.Interval = time.Minute + } + if opts.DB == nil { + return nil, xerrors.Errorf("db is required") + } + + client := opts.client + if opts.client == nil { + client = http.DefaultClient + } + // Set a timeout on the client so we don't wait forever for a healthz response. + tmp := *client + tmp.Timeout = time.Second * 5 + client = &tmp + + return &ProxyHealth{ + db: opts.DB, + interval: opts.Interval, + logger: opts.Logger, + client: opts.client, + cache: &atomic.Pointer[map[uuid.UUID]ProxyStatus]{}, + }, nil +} + +func (p *ProxyHealth) Run(ctx context.Context) { + ticker := time.NewTicker(p.interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case now := <-ticker.C: + statuses, err := p.runOnce(ctx, now) + if err != nil { + p.logger.Error(ctx, "proxy health check failed", slog.Error(err)) + continue + } + // Store the statuses in the cache. + p.cache.Store(&statuses) + } + } +} + +// ForceUpdate runs a single health check and updates the cache. If the health +// check fails, the cache is not updated and an error is returned. This is useful +// to trigger an update when a proxy is created or deleted. +func (p *ProxyHealth) ForceUpdate(ctx context.Context) error { + statuses, err := p.runOnce(ctx, time.Now()) + if err != nil { + return err + } + + // Store the statuses in the cache. + p.cache.Store(&statuses) + return nil +} + +func (p *ProxyHealth) HealthStatus() map[uuid.UUID]ProxyStatus { + ptr := p.cache.Load() + if ptr == nil { + return nil + } + return *ptr +} + +type ProxyStatus struct { + Proxy database.WorkspaceProxy + Status ProxyHealthStatus + CheckedAt time.Time +} + +// runOnce runs the health check for all workspace proxies. If there is an +// unexpected error, an error is returned. Expected errors will mark a proxy as +// unreachable. +func (p *ProxyHealth) runOnce(ctx context.Context, t time.Time) (map[uuid.UUID]ProxyStatus, error) { + proxies, err := p.db.GetWorkspaceProxies(ctx) + if err != nil { + return nil, xerrors.Errorf("get workspace proxies: %w", err) + } + + // Just use a mutex to protect map writes. + var statusMu sync.Mutex + proxyStatus := map[uuid.UUID]ProxyStatus{} + + grp, gctx := errgroup.WithContext(ctx) + // Arbitrary parallelism limit. + grp.SetLimit(5) + + for _, proxy := range proxies { + if proxy.Deleted { + // Ignore deleted proxies. + continue + } + // Each proxy needs to have a status set. Make a local copy for the + // call to be run async. + proxy := proxy + status := ProxyStatus{ + Proxy: proxy, + CheckedAt: t, + } + + grp.Go(func() error { + if proxy.Url == "" { + // Empty URL means the proxy has not registered yet. + // When the proxy is started, it will update the url. + statusMu.Lock() + defer statusMu.Unlock() + status.Status = Unregistered + proxyStatus[proxy.ID] = status + return nil + } + + // Try to hit the healthz endpoint. + reqURL := fmt.Sprintf("%s/healthz", strings.TrimSuffix(proxy.Url, "/")) + req, err := http.NewRequestWithContext(gctx, http.MethodGet, reqURL, nil) + if err != nil { + return xerrors.Errorf("new request: %w", err) + } + req = req.WithContext(gctx) + + resp, err := p.client.Do(req) + statusMu.Lock() + defer statusMu.Unlock() + if err != nil { + status.Status = Unreachable + return nil + } + + if resp.StatusCode != http.StatusOK { + status.Status = Unreachable + return nil + } + + status.Status = Reachable + return nil + }) + } + + err = grp.Wait() + if err != nil { + return nil, xerrors.Errorf("group run: %w", err) + } + + return nil, nil +} From 3d48d32b692576c561a5be33bacac88e57c8e623 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Thu, 20 Apr 2023 14:43:23 -0500 Subject: [PATCH 02/15] Proxyhealth is an enterprise feature --- {coderd => enterprise}/proxyhealth/proxyhealth.go | 4 ++++ 1 file changed, 4 insertions(+) rename {coderd => enterprise}/proxyhealth/proxyhealth.go (95%) diff --git a/coderd/proxyhealth/proxyhealth.go b/enterprise/proxyhealth/proxyhealth.go similarity index 95% rename from coderd/proxyhealth/proxyhealth.go rename to enterprise/proxyhealth/proxyhealth.go index 4040fcaea9d13..eda06dafbc25a 100644 --- a/coderd/proxyhealth/proxyhealth.go +++ b/enterprise/proxyhealth/proxyhealth.go @@ -77,6 +77,8 @@ func New(opts *Options) (*ProxyHealth, error) { }, nil } +// Run will block until the context is canceled. It will periodically check the +// health of all proxies and store the results in the cache. func (p *ProxyHealth) Run(ctx context.Context) { ticker := time.NewTicker(p.interval) defer ticker.Stop() @@ -111,6 +113,8 @@ func (p *ProxyHealth) ForceUpdate(ctx context.Context) error { return nil } +// HealthStatus returns the current health status of all proxies stored in the +// cache. func (p *ProxyHealth) HealthStatus() map[uuid.UUID]ProxyStatus { ptr := p.cache.Load() if ptr == nil { From f3abfe70ecccadff9f89e7262044e9d9a0d1d084 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Thu, 20 Apr 2023 15:03:45 -0500 Subject: [PATCH 03/15] Start proxyhealth go routine on enterprise coder --- codersdk/workspaceproxy.go | 40 ++++++++-- enterprise/coderd/coderd.go | 40 ++++++++-- .../{ => coderd}/proxyhealth/proxyhealth.go | 9 ++- enterprise/coderd/workspaceproxy.go | 76 +++++++++++++------ 4 files changed, 125 insertions(+), 40 deletions(-) rename enterprise/{ => coderd}/proxyhealth/proxyhealth.go (91%) diff --git a/codersdk/workspaceproxy.go b/codersdk/workspaceproxy.go index 9b3521bcb4098..9e5913a79eca2 100644 --- a/codersdk/workspaceproxy.go +++ b/codersdk/workspaceproxy.go @@ -12,17 +12,41 @@ import ( "github.com/google/uuid" ) +type ProxyHealthStatus string + +const ( + // ProxyReachable means the proxy access url is reachable and returns a healthy + // status code. + ProxyReachable ProxyHealthStatus = "reachable" + // ProxyUnreachable means the proxy access url is not responding. + ProxyUnreachable ProxyHealthStatus = "unreachable" + // ProxyUnregistered means the proxy has not registered a url yet. This means + // the proxy was created with the cli, but has not yet been started. + ProxyUnregistered ProxyHealthStatus = "unregistered" +) + + +type WorkspaceProxyStatus struct { + Status ProxyHealthStatus `json:"status" table:"status"` + CheckedAt time.Time `json:"checked_at" table:"checked_at"` +} + type WorkspaceProxy struct { - ID uuid.UUID `db:"id" json:"id" format:"uuid" table:"id"` - Name string `db:"name" json:"name" table:"name,default_sort"` - Icon string `db:"icon" json:"icon" table:"icon"` + ID uuid.UUID `json:"id" format:"uuid" table:"id"` + Name string `json:"name" table:"name,default_sort"` + Icon string `json:"icon" table:"icon"` // Full url including scheme of the proxy api url: https://us.example.com - URL string `db:"url" json:"url" table:"url"` + URL string `json:"url" table:"url"` // WildcardHostname with the wildcard for subdomain based app hosting: *.us.example.com - WildcardHostname string `db:"wildcard_hostname" json:"wildcard_hostname" table:"wildcard_hostname"` - CreatedAt time.Time `db:"created_at" json:"created_at" format:"date-time" table:"created_at"` - UpdatedAt time.Time `db:"updated_at" json:"updated_at" format:"date-time" table:"updated_at"` - Deleted bool `db:"deleted" json:"deleted" table:"deleted"` + WildcardHostname string `json:"wildcard_hostname" table:"wildcard_hostname"` + CreatedAt time.Time `json:"created_at" format:"date-time" table:"created_at"` + UpdatedAt time.Time `djson:"updated_at" format:"date-time" table:"updated_at"` + Deleted bool `json:"deleted" table:"deleted"` + + // Status is the latest status check of the proxy. This will be empty for deleted + // proxies. This value can be used to determine if a workspace proxy is healthy + // and ready to use. + Status WorkspaceProxyStatus `json:"status,omitempty" table:"status"` } type CreateWorkspaceProxyRequest struct { diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 51231009832c7..313e0ba6a3baf 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -24,6 +24,7 @@ import ( "github.com/coder/coder/coderd/schedule" "github.com/coder/coder/codersdk" "github.com/coder/coder/enterprise/coderd/license" + "github.com/coder/coder/enterprise/coderd/proxyhealth" "github.com/coder/coder/enterprise/derpmesh" "github.com/coder/coder/enterprise/replicasync" "github.com/coder/coder/enterprise/tailnet" @@ -52,9 +53,11 @@ func New(ctx context.Context, options *Options) (*API, error) { } ctx, cancelFunc := context.WithCancel(ctx) api := &API{ - AGPL: coderd.New(options.Options), - Options: options, - cancelEntitlementsLoop: cancelFunc, + ctx: ctx, + cancel: cancelFunc, + + AGPL: coderd.New(options.Options), + Options: options, } api.AGPL.Options.SetUserGroups = api.setUserGroups @@ -222,6 +225,22 @@ func New(ctx context.Context, options *Options) (*API, error) { } api.derpMesh = derpmesh.New(options.Logger.Named("derpmesh"), api.DERPServer, meshTLSConfig) + if api.AGPL.Experiments.Enabled(codersdk.ExperimentMoons) { + // Proxy health is a moon feature. + api.proxyHealth, err = proxyhealth.New(&proxyhealth.Options{ + Interval: options.ProxyHealthInterval, + DB: api.Database, + Logger: options.Logger.Named("proxyhealth"), + }) + if err != nil { + return nil, xerrors.Errorf("initialize proxy health: %w", err) + } + go api.proxyHealth.Run(ctx) + // Force the initial loading of the cache. Do this in a go routine in case + // the calls to the workspace proxies hang and this takes some time. + go api.forceWorkspaceProxyHealthUpdate(ctx) + } + err = api.updateEntitlements(ctx) if err != nil { return nil, xerrors.Errorf("update entitlements: %w", err) @@ -245,6 +264,7 @@ type Options struct { DERPServerRegionID int EntitlementsUpdateInterval time.Duration + ProxyHealthInterval time.Duration Keys map[string]ed25519.PublicKey } @@ -252,18 +272,24 @@ type API struct { AGPL *coderd.API *Options + // ctx is canceled immediately on shutdown, it can be used to abort + // interruptible tasks. + ctx context.Context + cancel context.CancelFunc + // Detects multiple Coder replicas running at the same time. replicaManager *replicasync.Manager // Meshes DERP connections from multiple replicas. derpMesh *derpmesh.Mesh + // proxyHealth checks the reachability of all workspace proxies. + proxyHealth *proxyhealth.ProxyHealth - cancelEntitlementsLoop func() - entitlementsMu sync.RWMutex - entitlements codersdk.Entitlements + entitlementsMu sync.RWMutex + entitlements codersdk.Entitlements } func (api *API) Close() error { - api.cancelEntitlementsLoop() + api.cancel() _ = api.replicaManager.Close() _ = api.derpMesh.Close() return api.AGPL.Close() diff --git a/enterprise/proxyhealth/proxyhealth.go b/enterprise/coderd/proxyhealth/proxyhealth.go similarity index 91% rename from enterprise/proxyhealth/proxyhealth.go rename to enterprise/coderd/proxyhealth/proxyhealth.go index eda06dafbc25a..491390c3e5cf5 100644 --- a/enterprise/proxyhealth/proxyhealth.go +++ b/enterprise/coderd/proxyhealth/proxyhealth.go @@ -15,6 +15,7 @@ import ( "cdr.dev/slog" "github.com/coder/coder/coderd/database" + "github.com/coder/coder/coderd/database/dbauthz" ) type ProxyHealthStatus string @@ -63,7 +64,7 @@ func New(opts *Options) (*ProxyHealth, error) { if opts.client == nil { client = http.DefaultClient } - // Set a timeout on the client so we don't wait forever for a healthz response. + // Set a timeout on the client, so we don't wait forever for a healthz response. tmp := *client tmp.Timeout = time.Second * 5 client = &tmp @@ -124,6 +125,10 @@ func (p *ProxyHealth) HealthStatus() map[uuid.UUID]ProxyStatus { } type ProxyStatus struct { + // ProxyStatus includes the value of the proxy at the time of checking. This is + // useful to know as it helps determine if the proxy checked has different values + // then the proxy in hand. AKA if the proxy was updated, and the status was for + // an older proxy. Proxy database.WorkspaceProxy Status ProxyHealthStatus CheckedAt time.Time @@ -133,7 +138,7 @@ type ProxyStatus struct { // unexpected error, an error is returned. Expected errors will mark a proxy as // unreachable. func (p *ProxyHealth) runOnce(ctx context.Context, t time.Time) (map[uuid.UUID]ProxyStatus, error) { - proxies, err := p.db.GetWorkspaceProxies(ctx) + proxies, err := p.db.GetWorkspaceProxies(dbauthz.AsSystemRestricted(ctx)) if err != nil { return nil, xerrors.Errorf("get workspace proxies: %w", err) } diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index 150c5b4f45fd8..24871d681840b 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -1,15 +1,18 @@ package coderd import ( + "context" "crypto/sha256" "database/sql" "fmt" "net/http" "net/url" + "time" "github.com/google/uuid" "golang.org/x/xerrors" + "cdr.dev/slog" "github.com/coder/coder/coderd/audit" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/httpapi" @@ -17,9 +20,18 @@ import ( "github.com/coder/coder/coderd/workspaceapps" "github.com/coder/coder/codersdk" "github.com/coder/coder/cryptorand" + "github.com/coder/coder/enterprise/coderd/proxyhealth" "github.com/coder/coder/enterprise/wsproxy/wsproxysdk" ) +// forceWorkspaceProxyHealthUpdate forces an update of the proxy health. +// This is useful when a proxy is created or deleted. Errors will be logged. +func (api *API) forceWorkspaceProxyHealthUpdate(ctx context.Context) { + if err := api.proxyHealth.ForceUpdate(ctx); err != nil { + api.Logger.Error(ctx, "force proxy health update", slog.Error(err)) + } +} + // @Summary Delete workspace proxy // @ID delete-workspace-proxy // @Security CoderSessionToken @@ -60,6 +72,9 @@ func (api *API) deleteWorkspaceProxy(rw http.ResponseWriter, r *http.Request) { httpapi.Write(ctx, rw, http.StatusOK, codersdk.Response{ Message: "Proxy has been deleted!", }) + + // Update the proxy health cache to remove this proxy. + go api.forceWorkspaceProxyHealthUpdate(api.ctx) } // @Summary Create workspace proxy @@ -140,9 +155,16 @@ func (api *API) postWorkspaceProxy(rw http.ResponseWriter, r *http.Request) { aReq.New = proxy httpapi.Write(ctx, rw, http.StatusCreated, codersdk.CreateWorkspaceProxyResponse{ - Proxy: convertProxy(proxy), + Proxy: convertProxy(proxy, proxyhealth.ProxyStatus{ + Proxy: proxy, + Status: proxyhealth.Unregistered, + CheckedAt: time.Now(), + }), ProxyToken: fullToken, }) + + // Update the proxy health cache to include this new proxy. + go api.forceWorkspaceProxyHealthUpdate(api.ctx) } // nolint:revive @@ -176,28 +198,8 @@ func (api *API) workspaceProxies(rw http.ResponseWriter, r *http.Request) { return } - httpapi.Write(ctx, rw, http.StatusOK, convertProxies(proxies)) -} - -func convertProxies(p []database.WorkspaceProxy) []codersdk.WorkspaceProxy { - resp := make([]codersdk.WorkspaceProxy, 0, len(p)) - for _, proxy := range p { - resp = append(resp, convertProxy(proxy)) - } - return resp -} - -func convertProxy(p database.WorkspaceProxy) codersdk.WorkspaceProxy { - return codersdk.WorkspaceProxy{ - ID: p.ID, - Name: p.Name, - Icon: p.Icon, - URL: p.Url, - WildcardHostname: p.WildcardHostname, - CreatedAt: p.CreatedAt, - UpdatedAt: p.UpdatedAt, - Deleted: p.Deleted, - } + statues := api.proxyHealth.HealthStatus() + httpapi.Write(ctx, rw, http.StatusOK, convertProxies(proxies, statues)) } // @Summary Issue signed workspace app token @@ -313,4 +315,32 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) httpapi.Write(ctx, rw, http.StatusCreated, wsproxysdk.RegisterWorkspaceProxyResponse{ AppSecurityKey: api.AppSecurityKey.String(), }) + + // Update the proxy health cache to update this proxy. + go api.forceWorkspaceProxyHealthUpdate(api.ctx) +} + +func convertProxies(p []database.WorkspaceProxy, statuses map[uuid.UUID]proxyhealth.ProxyStatus) []codersdk.WorkspaceProxy { + resp := make([]codersdk.WorkspaceProxy, 0, len(p)) + for _, proxy := range p { + resp = append(resp, convertProxy(proxy, statuses[proxy.ID])) + } + return resp +} + +func convertProxy(p database.WorkspaceProxy, status proxyhealth.ProxyStatus) codersdk.WorkspaceProxy { + return codersdk.WorkspaceProxy{ + ID: p.ID, + Name: p.Name, + Icon: p.Icon, + URL: p.Url, + WildcardHostname: p.WildcardHostname, + CreatedAt: p.CreatedAt, + UpdatedAt: p.UpdatedAt, + Deleted: p.Deleted, + Status: codersdk.WorkspaceProxyStatus{ + Status: codersdk.ProxyHealthStatus(status.Status), + CheckedAt: status.CheckedAt, + }, + } } From 58c55d06462396b043d844f97056d5f9fae64e22 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Thu, 20 Apr 2023 16:04:30 -0500 Subject: [PATCH 04/15] Add unit tests --- enterprise/coderd/coderd.go | 8 +- enterprise/coderd/proxyhealth/proxyhealth.go | 79 +++++++---- .../coderd/proxyhealth/proxyhealth_test.go | 130 ++++++++++++++++++ site/src/api/typesGenerated.ts | 17 ++- 4 files changed, 203 insertions(+), 31 deletions(-) create mode 100644 enterprise/coderd/proxyhealth/proxyhealth_test.go diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 313e0ba6a3baf..6a03cfb4740a1 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -228,9 +228,11 @@ func New(ctx context.Context, options *Options) (*API, error) { if api.AGPL.Experiments.Enabled(codersdk.ExperimentMoons) { // Proxy health is a moon feature. api.proxyHealth, err = proxyhealth.New(&proxyhealth.Options{ - Interval: options.ProxyHealthInterval, - DB: api.Database, - Logger: options.Logger.Named("proxyhealth"), + Interval: time.Second * 5, + DB: api.Database, + Logger: options.Logger.Named("proxyhealth"), + Client: api.HTTPClient, + Prometheus: api.PrometheusRegistry, }) if err != nil { return nil, xerrors.Errorf("initialize proxy health: %w", err) diff --git a/enterprise/coderd/proxyhealth/proxyhealth.go b/enterprise/coderd/proxyhealth/proxyhealth.go index 491390c3e5cf5..d65a5a262445b 100644 --- a/enterprise/coderd/proxyhealth/proxyhealth.go +++ b/enterprise/coderd/proxyhealth/proxyhealth.go @@ -10,6 +10,7 @@ import ( "time" "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" "golang.org/x/sync/errgroup" "golang.org/x/xerrors" @@ -33,10 +34,11 @@ const ( type Options struct { // Interval is the interval at which the proxy health is checked. - Interval time.Duration - DB database.Store - Logger slog.Logger - client *http.Client + Interval time.Duration + DB database.Store + Logger slog.Logger + Client *http.Client + Prometheus *prometheus.Registry } // ProxyHealth runs a go routine that periodically checks the health of all @@ -50,6 +52,9 @@ type ProxyHealth struct { client *http.Client cache *atomic.Pointer[map[uuid.UUID]ProxyStatus] + + // PromMetrics + healthCheckDuration prometheus.Histogram } func New(opts *Options) (*ProxyHealth, error) { @@ -59,9 +64,12 @@ func New(opts *Options) (*ProxyHealth, error) { if opts.DB == nil { return nil, xerrors.Errorf("db is required") } + if opts.Prometheus == nil { + opts.Prometheus = prometheus.NewRegistry() + } - client := opts.client - if opts.client == nil { + client := opts.Client + if client == nil { client = http.DefaultClient } // Set a timeout on the client, so we don't wait forever for a healthz response. @@ -69,12 +77,23 @@ func New(opts *Options) (*ProxyHealth, error) { tmp.Timeout = time.Second * 5 client = &tmp + // Prometheus metrics + healthCheckDuration := prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "coderd", + Subsystem: "proxyhealth", + Name: "health_check_duration_seconds", + Help: "Histogram for duration of proxy health collection in seconds.", + Buckets: []float64{0.001, 0.005, 0.010, 0.025, 0.050, 0.100, 0.500, 1, 5, 10, 30}, + }) + opts.Prometheus.MustRegister(healthCheckDuration) + return &ProxyHealth{ - db: opts.DB, - interval: opts.Interval, - logger: opts.Logger, - client: opts.client, - cache: &atomic.Pointer[map[uuid.UUID]ProxyStatus]{}, + db: opts.DB, + interval: opts.Interval, + logger: opts.Logger, + client: client, + cache: &atomic.Pointer[map[uuid.UUID]ProxyStatus]{}, + healthCheckDuration: healthCheckDuration, }, nil } @@ -119,7 +138,7 @@ func (p *ProxyHealth) ForceUpdate(ctx context.Context) error { func (p *ProxyHealth) HealthStatus() map[uuid.UUID]ProxyStatus { ptr := p.cache.Load() if ptr == nil { - return nil + return map[uuid.UUID]ProxyStatus{} } return *ptr } @@ -129,15 +148,20 @@ type ProxyStatus struct { // useful to know as it helps determine if the proxy checked has different values // then the proxy in hand. AKA if the proxy was updated, and the status was for // an older proxy. - Proxy database.WorkspaceProxy - Status ProxyHealthStatus - CheckedAt time.Time + Proxy database.WorkspaceProxy + Status ProxyHealthStatus + // StatusError is the error message returned when the proxy is unreachable. + StatusError string + CheckedAt time.Time } // runOnce runs the health check for all workspace proxies. If there is an // unexpected error, an error is returned. Expected errors will mark a proxy as // unreachable. -func (p *ProxyHealth) runOnce(ctx context.Context, t time.Time) (map[uuid.UUID]ProxyStatus, error) { +func (p *ProxyHealth) runOnce(ctx context.Context, now time.Time) (map[uuid.UUID]ProxyStatus, error) { + // Record from the given time. + defer p.healthCheckDuration.Observe(time.Since(now).Seconds()) + proxies, err := p.db.GetWorkspaceProxies(dbauthz.AsSystemRestricted(ctx)) if err != nil { return nil, xerrors.Errorf("get workspace proxies: %w", err) @@ -161,7 +185,7 @@ func (p *ProxyHealth) runOnce(ctx context.Context, t time.Time) (map[uuid.UUID]P proxy := proxy status := ProxyStatus{ Proxy: proxy, - CheckedAt: t, + CheckedAt: now, } grp.Go(func() error { @@ -184,19 +208,20 @@ func (p *ProxyHealth) runOnce(ctx context.Context, t time.Time) (map[uuid.UUID]P req = req.WithContext(gctx) resp, err := p.client.Do(req) - statusMu.Lock() - defer statusMu.Unlock() - if err != nil { + if err == nil && resp.StatusCode != http.StatusOK { + // No error but the status code is incorrect. status.Status = Unreachable - return nil - } - - if resp.StatusCode != http.StatusOK { + } else if err == nil { + status.Status = Reachable + } else { + // Any form of error is considered unreachable. status.Status = Unreachable - return nil + status.StatusError = err.Error() } - status.Status = Reachable + statusMu.Lock() + defer statusMu.Unlock() + proxyStatus[proxy.ID] = status return nil }) } @@ -206,5 +231,5 @@ func (p *ProxyHealth) runOnce(ctx context.Context, t time.Time) (map[uuid.UUID]P return nil, xerrors.Errorf("group run: %w", err) } - return nil, nil + return proxyStatus, nil } diff --git a/enterprise/coderd/proxyhealth/proxyhealth_test.go b/enterprise/coderd/proxyhealth/proxyhealth_test.go new file mode 100644 index 0000000000000..d2e7cd38ab01a --- /dev/null +++ b/enterprise/coderd/proxyhealth/proxyhealth_test.go @@ -0,0 +1,130 @@ +package proxyhealth_test + +import ( + "context" + "net" + "net/http" + "net/http/httptest" + "testing" + + "golang.org/x/xerrors" + + "cdr.dev/slog/sloggers/slogtest" + "github.com/stretchr/testify/require" + + "github.com/coder/coder/coderd/database" + "github.com/coder/coder/coderd/database/dbfake" + "github.com/coder/coder/coderd/database/dbgen" + "github.com/coder/coder/enterprise/coderd/proxyhealth" + "github.com/coder/coder/testutil" +) + +func insertProxy(t *testing.T, db database.Store, url string) database.WorkspaceProxy { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel() + + proxy, _ := dbgen.WorkspaceProxy(t, db, database.WorkspaceProxy{}) + _, err := db.RegisterWorkspaceProxy(ctx, database.RegisterWorkspaceProxyParams{ + Url: url, + WildcardHostname: "", + ID: proxy.ID, + }) + require.NoError(t, err, "failed to update proxy") + return proxy +} + +func TestProxyHealth_Unregistered(t *testing.T) { + t.Parallel() + db := dbfake.New() + + proxies := []database.WorkspaceProxy{ + insertProxy(t, db, ""), + insertProxy(t, db, ""), + } + + ph, err := proxyhealth.New(&proxyhealth.Options{ + Interval: 0, + DB: db, + Logger: slogtest.Make(t, nil), + }) + require.NoError(t, err, "failed to create proxy health") + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel() + + err = ph.ForceUpdate(ctx) + require.NoError(t, err, "failed to force update") + for _, p := range proxies { + require.Equal(t, ph.HealthStatus()[p.ID].Status, proxyhealth.Unregistered, "expect unregistered proxy") + } +} + +func TestProxyHealth_Reachable(t *testing.T) { + t.Parallel() + db := dbfake.New() + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte("OK")) + })) + + proxies := []database.WorkspaceProxy{ + // Same url for both, just checking multiple proxies are checked. + insertProxy(t, db, srv.URL), + insertProxy(t, db, srv.URL), + } + + ph, err := proxyhealth.New(&proxyhealth.Options{ + Interval: 0, + DB: db, + Logger: slogtest.Make(t, nil), + Client: srv.Client(), + }) + require.NoError(t, err, "failed to create proxy health") + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel() + + err = ph.ForceUpdate(ctx) + require.NoError(t, err, "failed to force update") + for _, p := range proxies { + require.Equal(t, ph.HealthStatus()[p.ID].Status, proxyhealth.Reachable, "expect reachable proxy") + } +} + +func TestProxyHealth_Unreachable(t *testing.T) { + t.Parallel() + db := dbfake.New() + + cli := &http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + return nil, xerrors.New("Always fail") + }, + }, + } + + proxies := []database.WorkspaceProxy{ + // example.com is a real domain, but the client should always fail. + insertProxy(t, db, "https://example.com"), + insertProxy(t, db, "https://random.example.com"), + } + + ph, err := proxyhealth.New(&proxyhealth.Options{ + Interval: 0, + DB: db, + Logger: slogtest.Make(t, nil), + Client: cli, + }) + require.NoError(t, err, "failed to create proxy health") + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel() + + err = ph.ForceUpdate(ctx) + require.NoError(t, err, "failed to force update") + for _, p := range proxies { + require.Equal(t, ph.HealthStatus()[p.ID].Status, proxyhealth.Unreachable, "expect unreachable proxy") + } +} diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 6917b89456bab..7101b026a4640 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -1231,8 +1231,9 @@ export interface WorkspaceProxy { readonly url: string readonly wildcard_hostname: string readonly created_at: string - readonly updated_at: string + readonly UpdatedAt: string readonly deleted: boolean + readonly status?: WorkspaceProxyStatus } // From codersdk/deployment.go @@ -1241,6 +1242,12 @@ export interface WorkspaceProxyBuildInfo { readonly dashboard_url: string } +// From codersdk/workspaceproxy.go +export interface WorkspaceProxyStatus { + readonly status: ProxyHealthStatus + readonly checked_at: string +} + // From codersdk/workspaces.go export interface WorkspaceQuota { readonly credits_consumed: number @@ -1438,6 +1445,14 @@ export const ProvisionerStorageMethods: ProvisionerStorageMethod[] = ["file"] export type ProvisionerType = "echo" | "terraform" export const ProvisionerTypes: ProvisionerType[] = ["echo", "terraform"] +// From codersdk/workspaceproxy.go +export type ProxyHealthStatus = "reachable" | "unreachable" | "unregistered" +export const ProxyHealthStatuses: ProxyHealthStatus[] = [ + "reachable", + "unreachable", + "unregistered", +] + // From codersdk/rbacresources.go export type RBACResource = | "api_key" From 4c91b081073c3e4991588b42f3d8a2e68590bf1b Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Thu, 20 Apr 2023 16:06:22 -0500 Subject: [PATCH 05/15] Make gen --- coderd/apidoc/docs.go | 34 +++++++++++++++++++++- coderd/apidoc/swagger.json | 30 ++++++++++++++++++- codersdk/workspaceproxy.go | 5 ++-- docs/api/enterprise.md | 45 ++++++++++++++++++++--------- docs/api/schemas.md | 59 +++++++++++++++++++++++++++++++------- 5 files changed, 144 insertions(+), 29 deletions(-) diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index a934f4f6cb657..df7fb775ee008 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -8147,6 +8147,19 @@ const docTemplate = `{ "ProvisionerStorageMethodFile" ] }, + "codersdk.ProxyHealthStatus": { + "type": "string", + "enum": [ + "reachable", + "unreachable", + "unregistered" + ], + "x-enum-varnames": [ + "ProxyReachable", + "ProxyUnreachable", + "ProxyUnregistered" + ] + }, "codersdk.PutExtendWorkspaceRequest": { "type": "object", "required": [ @@ -9640,7 +9653,15 @@ const docTemplate = `{ "name": { "type": "string" }, - "updated_at": { + "status": { + "description": "Status is the latest status check of the proxy. This will be empty for deleted\nproxies. This value can be used to determine if a workspace proxy is healthy\nand ready to use.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.WorkspaceProxyStatus" + } + ] + }, + "updatedAt": { "type": "string", "format": "date-time" }, @@ -9654,6 +9675,17 @@ const docTemplate = `{ } } }, + "codersdk.WorkspaceProxyStatus": { + "type": "object", + "properties": { + "checked_at": { + "type": "string" + }, + "status": { + "$ref": "#/definitions/codersdk.ProxyHealthStatus" + } + } + }, "codersdk.WorkspaceQuota": { "type": "object", "properties": { diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index aea4a4111ad93..a6994108f6fed 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -7302,6 +7302,15 @@ "enum": ["file"], "x-enum-varnames": ["ProvisionerStorageMethodFile"] }, + "codersdk.ProxyHealthStatus": { + "type": "string", + "enum": ["reachable", "unreachable", "unregistered"], + "x-enum-varnames": [ + "ProxyReachable", + "ProxyUnreachable", + "ProxyUnregistered" + ] + }, "codersdk.PutExtendWorkspaceRequest": { "type": "object", "required": ["deadline"], @@ -8712,7 +8721,15 @@ "name": { "type": "string" }, - "updated_at": { + "status": { + "description": "Status is the latest status check of the proxy. This will be empty for deleted\nproxies. This value can be used to determine if a workspace proxy is healthy\nand ready to use.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.WorkspaceProxyStatus" + } + ] + }, + "updatedAt": { "type": "string", "format": "date-time" }, @@ -8726,6 +8743,17 @@ } } }, + "codersdk.WorkspaceProxyStatus": { + "type": "object", + "properties": { + "checked_at": { + "type": "string" + }, + "status": { + "$ref": "#/definitions/codersdk.ProxyHealthStatus" + } + } + }, "codersdk.WorkspaceQuota": { "type": "object", "properties": { diff --git a/codersdk/workspaceproxy.go b/codersdk/workspaceproxy.go index 9e5913a79eca2..e5d0942668c4f 100644 --- a/codersdk/workspaceproxy.go +++ b/codersdk/workspaceproxy.go @@ -25,10 +25,9 @@ const ( ProxyUnregistered ProxyHealthStatus = "unregistered" ) - type WorkspaceProxyStatus struct { - Status ProxyHealthStatus `json:"status" table:"status"` - CheckedAt time.Time `json:"checked_at" table:"checked_at"` + Status ProxyHealthStatus `json:"status" table:"status"` + CheckedAt time.Time `json:"checked_at" table:"checked_at"` } type WorkspaceProxy struct { diff --git a/docs/api/enterprise.md b/docs/api/enterprise.md index a6bc536bb27e6..754197e2d3178 100644 --- a/docs/api/enterprise.md +++ b/docs/api/enterprise.md @@ -1185,7 +1185,11 @@ curl -X GET http://coder-server:8080/api/v2/workspaceproxies \ "icon": "string", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "name": "string", - "updated_at": "2019-08-24T14:15:22Z", + "status": { + "checked_at": "string", + "status": "reachable" + }, + "updatedAt": "2019-08-24T14:15:22Z", "url": "string", "wildcard_hostname": "string" } @@ -1202,17 +1206,28 @@ curl -X GET http://coder-server:8080/api/v2/workspaceproxies \ Status Code **200** -| Name | Type | Required | Restrictions | Description | -| --------------------- | ----------------- | -------- | ------------ | -------------------------------------------------------------------------------------- | -| `[array item]` | array | false | | | -| `» created_at` | string(date-time) | false | | | -| `» deleted` | boolean | false | | | -| `» icon` | string | false | | | -| `» id` | string(uuid) | false | | | -| `» name` | string | false | | | -| `» updated_at` | string(date-time) | false | | | -| `» url` | string | false | | Full URL including scheme of the proxy api url: https://us.example.com | -| `» wildcard_hostname` | string | false | | Wildcard hostname with the wildcard for subdomain based app hosting: \*.us.example.com | +| Name | Type | Required | Restrictions | Description | +| --------------------- | ------------------------------------------------------------------------ | -------- | ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `[array item]` | array | false | | | +| `» created_at` | string(date-time) | false | | | +| `» deleted` | boolean | false | | | +| `» icon` | string | false | | | +| `» id` | string(uuid) | false | | | +| `» name` | string | false | | | +| `» status` | [codersdk.WorkspaceProxyStatus](schemas.md#codersdkworkspaceproxystatus) | false | | Status is the latest status check of the proxy. This will be empty for deleted proxies. This value can be used to determine if a workspace proxy is healthy and ready to use. | +| `»» checked_at` | string | false | | | +| `»» status` | [codersdk.ProxyHealthStatus](schemas.md#codersdkproxyhealthstatus) | false | | | +| `» updatedAt` | string(date-time) | false | | | +| `» url` | string | false | | Full URL including scheme of the proxy api url: https://us.example.com | +| `» wildcard_hostname` | string | false | | Wildcard hostname with the wildcard for subdomain based app hosting: \*.us.example.com | + +#### Enumerated Values + +| Property | Value | +| -------- | -------------- | +| `status` | `reachable` | +| `status` | `unreachable` | +| `status` | `unregistered` | To perform this operation, you must be authenticated. [Learn more](authentication.md). @@ -1259,7 +1274,11 @@ curl -X POST http://coder-server:8080/api/v2/workspaceproxies \ "icon": "string", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "name": "string", - "updated_at": "2019-08-24T14:15:22Z", + "status": { + "checked_at": "string", + "status": "reachable" + }, + "updatedAt": "2019-08-24T14:15:22Z", "url": "string", "wildcard_hostname": "string" } diff --git a/docs/api/schemas.md b/docs/api/schemas.md index 5f55f961423a6..5e1671680ff9f 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -3356,6 +3356,22 @@ Parameter represents a set value for the scope. | ------ | | `file` | +## codersdk.ProxyHealthStatus + +```json +"reachable" +``` + +### Properties + +#### Enumerated Values + +| Value | +| -------------- | +| `reachable` | +| `unreachable` | +| `unregistered` | + ## codersdk.PutExtendWorkspaceRequest ```json @@ -5166,7 +5182,11 @@ Parameter represents a set value for the scope. "icon": "string", "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "name": "string", - "updated_at": "2019-08-24T14:15:22Z", + "status": { + "checked_at": "string", + "status": "reachable" + }, + "updatedAt": "2019-08-24T14:15:22Z", "url": "string", "wildcard_hostname": "string" } @@ -5174,16 +5194,33 @@ Parameter represents a set value for the scope. ### Properties -| Name | Type | Required | Restrictions | Description | -| ------------------- | ------- | -------- | ------------ | -------------------------------------------------------------------------------------- | -| `created_at` | string | false | | | -| `deleted` | boolean | false | | | -| `icon` | string | false | | | -| `id` | string | false | | | -| `name` | string | false | | | -| `updated_at` | string | false | | | -| `url` | string | false | | Full URL including scheme of the proxy api url: https://us.example.com | -| `wildcard_hostname` | string | false | | Wildcard hostname with the wildcard for subdomain based app hosting: \*.us.example.com | +| Name | Type | Required | Restrictions | Description | +| ------------------- | -------------------------------------------------------------- | -------- | ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `created_at` | string | false | | | +| `deleted` | boolean | false | | | +| `icon` | string | false | | | +| `id` | string | false | | | +| `name` | string | false | | | +| `status` | [codersdk.WorkspaceProxyStatus](#codersdkworkspaceproxystatus) | false | | Status is the latest status check of the proxy. This will be empty for deleted proxies. This value can be used to determine if a workspace proxy is healthy and ready to use. | +| `updatedAt` | string | false | | | +| `url` | string | false | | Full URL including scheme of the proxy api url: https://us.example.com | +| `wildcard_hostname` | string | false | | Wildcard hostname with the wildcard for subdomain based app hosting: \*.us.example.com | + +## codersdk.WorkspaceProxyStatus + +```json +{ + "checked_at": "string", + "status": "reachable" +} +``` + +### Properties + +| Name | Type | Required | Restrictions | Description | +| ------------ | -------------------------------------------------------- | -------- | ------------ | ----------- | +| `checked_at` | string | false | | | +| `status` | [codersdk.ProxyHealthStatus](#codersdkproxyhealthstatus) | false | | | ## codersdk.WorkspaceQuota From a47ee6c2c7e0c17821fc2bc1d18fc455e1af227e Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Thu, 20 Apr 2023 21:26:39 -0500 Subject: [PATCH 06/15] Fix swagger --- codersdk/workspaceproxy.go | 4 ++-- enterprise/coderd/workspaceproxy_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/codersdk/workspaceproxy.go b/codersdk/workspaceproxy.go index e5d0942668c4f..b3db13222d678 100644 --- a/codersdk/workspaceproxy.go +++ b/codersdk/workspaceproxy.go @@ -27,7 +27,7 @@ const ( type WorkspaceProxyStatus struct { Status ProxyHealthStatus `json:"status" table:"status"` - CheckedAt time.Time `json:"checked_at" table:"checked_at"` + CheckedAt time.Time `json:"checked_at" table:"checked_at" format:"date-time"` } type WorkspaceProxy struct { @@ -39,7 +39,7 @@ type WorkspaceProxy struct { // WildcardHostname with the wildcard for subdomain based app hosting: *.us.example.com WildcardHostname string `json:"wildcard_hostname" table:"wildcard_hostname"` CreatedAt time.Time `json:"created_at" format:"date-time" table:"created_at"` - UpdatedAt time.Time `djson:"updated_at" format:"date-time" table:"updated_at"` + UpdatedAt time.Time `json:"updated_at" format:"date-time" table:"updated_at"` Deleted bool `json:"deleted" table:"deleted"` // Status is the latest status check of the proxy. This will be empty for deleted diff --git a/enterprise/coderd/workspaceproxy_test.go b/enterprise/coderd/workspaceproxy_test.go index 71b85ddda284b..11efa4d8fbbbc 100644 --- a/enterprise/coderd/workspaceproxy_test.go +++ b/enterprise/coderd/workspaceproxy_test.go @@ -59,7 +59,7 @@ func TestWorkspaceProxyCRUD(t *testing.T) { proxies, err := client.WorkspaceProxies(ctx) require.NoError(t, err) require.Len(t, proxies, 1) - require.Equal(t, proxyRes.Proxy, proxies[0]) + require.Equal(t, proxyRes.Proxy.ID, proxies[0].ID) require.NotEmpty(t, proxyRes.ProxyToken) }) From 5f8e127cfc2c1bcc83e4e89ab8d1963c0061d2c9 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Fri, 21 Apr 2023 07:53:52 -0500 Subject: [PATCH 07/15] Merge origin/main --- enterprise/coderd/workspaceproxy.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index e17335fc2f9d6..20cd72167f308 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -18,6 +18,7 @@ import ( "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/httpapi" "github.com/coder/coder/coderd/httpmw" + "github.com/coder/coder/coderd/rbac" "github.com/coder/coder/coderd/workspaceapps" "github.com/coder/coder/codersdk" "github.com/coder/coder/cryptorand" @@ -296,6 +297,8 @@ func (api *API) workspaceProxyRegister(rw http.ResponseWriter, r *http.Request) httpapi.Write(ctx, rw, http.StatusCreated, wsproxysdk.RegisterWorkspaceProxyResponse{ AppSecurityKey: api.AppSecurityKey.String(), }) + + go api.forceWorkspaceProxyHealthUpdate(api.ctx) } // reconnectingPTYSignedToken issues a signed app token for use when connecting From 0b9e2ef588f23944bcc99d96be8a89078e8d1c28 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Fri, 21 Apr 2023 09:17:07 -0500 Subject: [PATCH 08/15] Add prometheus metric for health check results --- enterprise/coderd/proxyhealth/proxyhealth.go | 23 ++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/enterprise/coderd/proxyhealth/proxyhealth.go b/enterprise/coderd/proxyhealth/proxyhealth.go index d65a5a262445b..adff9a4557770 100644 --- a/enterprise/coderd/proxyhealth/proxyhealth.go +++ b/enterprise/coderd/proxyhealth/proxyhealth.go @@ -9,6 +9,8 @@ import ( "sync/atomic" "time" + "github.com/coder/coder/coderd/prometheusmetrics" + "github.com/google/uuid" "github.com/prometheus/client_golang/prometheus" "golang.org/x/sync/errgroup" @@ -27,6 +29,9 @@ const ( Reachable ProxyHealthStatus = "reachable" // Unreachable means the proxy access url is not responding. Unreachable ProxyHealthStatus = "unreachable" + // Unhealthy means the proxy access url is responding, but there is some + // problem with the proxy. This problem may or may not be preventing functionality. + Unhealthy ProxyHealthStatus = "unhealthy" // Unregistered means the proxy has not registered a url yet. This means // the proxy was created with the cli, but has not yet been started. Unregistered ProxyHealthStatus = "unregistered" @@ -55,6 +60,7 @@ type ProxyHealth struct { // PromMetrics healthCheckDuration prometheus.Histogram + healthCheckResults *prometheusmetrics.CachedGaugeVec } func New(opts *Options) (*ProxyHealth, error) { @@ -87,6 +93,16 @@ func New(opts *Options) (*ProxyHealth, error) { }) opts.Prometheus.MustRegister(healthCheckDuration) + healthCheckResults := prometheusmetrics.NewCachedGaugeVec(prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "coderd", + Subsystem: "proxyhealth", + Name: "health_check_results", + Help: "This endpoint returns a number to indicate the health status. " + + "-2 (Unreachable), -1 (Unhealthy), 0 (Unregistered), 1 (Reachable)", + }, []string{"proxy_id"})) + opts.Prometheus.MustRegister(healthCheckResults) + return &ProxyHealth{ db: opts.DB, interval: opts.Interval, @@ -94,6 +110,7 @@ func New(opts *Options) (*ProxyHealth, error) { client: client, cache: &atomic.Pointer[map[uuid.UUID]ProxyStatus]{}, healthCheckDuration: healthCheckDuration, + healthCheckResults: healthCheckResults, }, nil } @@ -194,6 +211,7 @@ func (p *ProxyHealth) runOnce(ctx context.Context, now time.Time) (map[uuid.UUID // When the proxy is started, it will update the url. statusMu.Lock() defer statusMu.Unlock() + p.healthCheckResults.WithLabelValues(prometheusmetrics.VectorOperationSet, 0, proxy.ID.String()) status.Status = Unregistered proxyStatus[proxy.ID] = status return nil @@ -211,12 +229,16 @@ func (p *ProxyHealth) runOnce(ctx context.Context, now time.Time) (map[uuid.UUID if err == nil && resp.StatusCode != http.StatusOK { // No error but the status code is incorrect. status.Status = Unreachable + status.StatusError = fmt.Sprintf("status code %d", resp.StatusCode) + p.healthCheckResults.WithLabelValues(prometheusmetrics.VectorOperationSet, -2, proxy.ID.String()) } else if err == nil { status.Status = Reachable + p.healthCheckResults.WithLabelValues(prometheusmetrics.VectorOperationSet, 1, proxy.ID.String()) } else { // Any form of error is considered unreachable. status.Status = Unreachable status.StatusError = err.Error() + p.healthCheckResults.WithLabelValues(prometheusmetrics.VectorOperationSet, -2, proxy.ID.String()) } statusMu.Lock() @@ -230,6 +252,7 @@ func (p *ProxyHealth) runOnce(ctx context.Context, now time.Time) (map[uuid.UUID if err != nil { return nil, xerrors.Errorf("group run: %w", err) } + p.healthCheckResults.Commit() return proxyStatus, nil } From 3c873fc82313191a44037ab5166d7304d38b114a Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Fri, 21 Apr 2023 09:19:03 -0500 Subject: [PATCH 09/15] Include error reason on response --- codersdk/workspaceproxy.go | 9 +++++++-- enterprise/coderd/workspaceproxy.go | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/codersdk/workspaceproxy.go b/codersdk/workspaceproxy.go index a861ef0d237bc..76476f8437242 100644 --- a/codersdk/workspaceproxy.go +++ b/codersdk/workspaceproxy.go @@ -20,14 +20,19 @@ const ( ProxyReachable ProxyHealthStatus = "reachable" // ProxyUnreachable means the proxy access url is not responding. ProxyUnreachable ProxyHealthStatus = "unreachable" + // ProxyUnhealthy means the proxy access url is responding, but there is some + // problem with the proxy. This problem may or may not be preventing functionality. + ProxyUnhealthy ProxyHealthStatus = "unhealthy" // ProxyUnregistered means the proxy has not registered a url yet. This means // the proxy was created with the cli, but has not yet been started. ProxyUnregistered ProxyHealthStatus = "unregistered" ) type WorkspaceProxyStatus struct { - Status ProxyHealthStatus `json:"status" table:"status"` - CheckedAt time.Time `json:"checked_at" table:"checked_at" format:"date-time"` + Status ProxyHealthStatus `json:"status" table:"status"` + // Reason is optional and provides more context to the status response. + Reason string `json:"reason,omitempty" table:"reason"` + CheckedAt time.Time `json:"checked_at" table:"checked_at" format:"date-time"` } type WorkspaceProxy struct { diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index 20cd72167f308..8519c4302dcc1 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -417,6 +417,7 @@ func convertProxy(p database.WorkspaceProxy, status proxyhealth.ProxyStatus) cod Deleted: p.Deleted, Status: codersdk.WorkspaceProxyStatus{ Status: codersdk.ProxyHealthStatus(status.Status), + Reason: status.StatusError, CheckedAt: status.CheckedAt, }, } From 298b29a357266f96ddd22db7703a0f88e3d4e3e3 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Fri, 21 Apr 2023 15:27:45 -0500 Subject: [PATCH 10/15] Start work on roundtrip back to primary --- codersdk/workspaceproxy.go | 16 +++- enterprise/cli/proxyserver.go | 1 + enterprise/coderd/proxyhealth/proxyhealth.go | 73 +++++++++++++------ .../coderd/proxyhealth/proxyhealth_test.go | 50 ++++++++++++- enterprise/coderd/workspaceproxy.go | 4 +- enterprise/wsproxy/wsproxy.go | 47 ++++++++++++ site/src/api/typesGenerated.ts | 16 +++- 7 files changed, 177 insertions(+), 30 deletions(-) diff --git a/codersdk/workspaceproxy.go b/codersdk/workspaceproxy.go index 76476f8437242..57f180b4e7aff 100644 --- a/codersdk/workspaceproxy.go +++ b/codersdk/workspaceproxy.go @@ -30,9 +30,19 @@ const ( type WorkspaceProxyStatus struct { Status ProxyHealthStatus `json:"status" table:"status"` - // Reason is optional and provides more context to the status response. - Reason string `json:"reason,omitempty" table:"reason"` - CheckedAt time.Time `json:"checked_at" table:"checked_at" format:"date-time"` + // Report provides more information about the health of the workspace proxy. + Report ProxyHealthReport `json:"report,omitempty" table:"report"` + CheckedAt time.Time `json:"checked_at" table:"checked_at" format:"date-time"` +} + +// ProxyHealthReport is a report of the health of the workspace proxy. +// A healthy report will have no errors. Warnings are not fatal. +type ProxyHealthReport struct { + // Errors are problems that prevent the workspace proxy from being healthy + Errors []string + // Warnings do not prevent the workspace proxy from being healthy, but + // should be addressed. + Warnings []string } type WorkspaceProxy struct { diff --git a/enterprise/cli/proxyserver.go b/enterprise/cli/proxyserver.go index 855eb98f26570..af5716424bc0e 100644 --- a/enterprise/cli/proxyserver.go +++ b/enterprise/cli/proxyserver.go @@ -227,6 +227,7 @@ func (*RootCmd) proxyServer() *clibase.Cmd { proxy, err := wsproxy.New(ctx, &wsproxy.Options{ Logger: logger, + HTTPClient: httpClient, DashboardURL: primaryAccessURL.Value(), AccessURL: cfg.AccessURL.Value(), AppHostname: appHostname, diff --git a/enterprise/coderd/proxyhealth/proxyhealth.go b/enterprise/coderd/proxyhealth/proxyhealth.go index adff9a4557770..115a5f061aa68 100644 --- a/enterprise/coderd/proxyhealth/proxyhealth.go +++ b/enterprise/coderd/proxyhealth/proxyhealth.go @@ -2,6 +2,7 @@ package proxyhealth import ( "context" + "encoding/json" "fmt" "net/http" "strings" @@ -9,7 +10,7 @@ import ( "sync/atomic" "time" - "github.com/coder/coder/coderd/prometheusmetrics" + "github.com/coder/coder/codersdk" "github.com/google/uuid" "github.com/prometheus/client_golang/prometheus" @@ -19,14 +20,17 @@ import ( "cdr.dev/slog" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/database/dbauthz" + "github.com/coder/coder/coderd/prometheusmetrics" ) type ProxyHealthStatus string const ( - // Reachable means the proxy access url is reachable and returns a healthy + // Unknown should never be returned by the proxy health check. + Unknown ProxyHealthStatus = "unknown" + // Healthy means the proxy access url is reachable and returns a healthy // status code. - Reachable ProxyHealthStatus = "reachable" + Healthy ProxyHealthStatus = "ok" // Unreachable means the proxy access url is not responding. Unreachable ProxyHealthStatus = "unreachable" // Unhealthy means the proxy access url is responding, but there is some @@ -99,7 +103,7 @@ func New(opts *Options) (*ProxyHealth, error) { Subsystem: "proxyhealth", Name: "health_check_results", Help: "This endpoint returns a number to indicate the health status. " + - "-2 (Unreachable), -1 (Unhealthy), 0 (Unregistered), 1 (Reachable)", + "-3 (unknown), -2 (Unreachable), -1 (Unhealthy), 0 (Unregistered), 1 (Healthy)", }, []string{"proxy_id"})) opts.Prometheus.MustRegister(healthCheckResults) @@ -165,11 +169,10 @@ type ProxyStatus struct { // useful to know as it helps determine if the proxy checked has different values // then the proxy in hand. AKA if the proxy was updated, and the status was for // an older proxy. - Proxy database.WorkspaceProxy - Status ProxyHealthStatus - // StatusError is the error message returned when the proxy is unreachable. - StatusError string - CheckedAt time.Time + Proxy database.WorkspaceProxy + Status ProxyHealthStatus + Report codersdk.ProxyHealthReport + CheckedAt time.Time } // runOnce runs the health check for all workspace proxies. If there is an @@ -203,6 +206,7 @@ func (p *ProxyHealth) runOnce(ctx context.Context, now time.Time) (map[uuid.UUID status := ProxyStatus{ Proxy: proxy, CheckedAt: now, + Status: Unknown, } grp.Go(func() error { @@ -217,8 +221,8 @@ func (p *ProxyHealth) runOnce(ctx context.Context, now time.Time) (map[uuid.UUID return nil } - // Try to hit the healthz endpoint. - reqURL := fmt.Sprintf("%s/healthz", strings.TrimSuffix(proxy.Url, "/")) + // Try to hit the healthz-report endpoint for a comprehensive health check. + reqURL := fmt.Sprintf("%s/healthz-report", strings.TrimSuffix(proxy.Url, "/")) req, err := http.NewRequestWithContext(gctx, http.MethodGet, reqURL, nil) if err != nil { return xerrors.Errorf("new request: %w", err) @@ -226,19 +230,46 @@ func (p *ProxyHealth) runOnce(ctx context.Context, now time.Time) (map[uuid.UUID req = req.WithContext(gctx) resp, err := p.client.Do(req) - if err == nil && resp.StatusCode != http.StatusOK { - // No error but the status code is incorrect. + // A switch statement felt easier to categorize the different cases than + // if else statements or nested if statements. + switch { + case err == nil && resp.StatusCode == http.StatusOK: + err := json.NewDecoder(resp.Body).Decode(&status.Report) + if err != nil { + // If we cannot read the report, mark the proxy as unhealthy. + status.Report.Errors = []string{fmt.Sprintf("failed to decode health report: %s", err.Error())} + status.Status = Unhealthy + break + } + if len(status.Report.Errors) > 0 { + status.Status = Unhealthy + break + } + status.Status = Healthy + case err == nil && resp.StatusCode != http.StatusOK: + // Unhealthy as we did reach the proxy but it got an unexpected response. + status.Status = Unhealthy + status.Report.Errors = []string{fmt.Sprintf("unexpected status code %d", resp.StatusCode)} + case err != nil: + // Request failed, mark the proxy as unreachable. status.Status = Unreachable - status.StatusError = fmt.Sprintf("status code %d", resp.StatusCode) - p.healthCheckResults.WithLabelValues(prometheusmetrics.VectorOperationSet, -2, proxy.ID.String()) - } else if err == nil { - status.Status = Reachable + status.Report.Errors = []string{fmt.Sprintf("request to proxy failed: %s", err.Error())} + default: + // This should never happen + status.Status = Unknown + } + + // Set the prometheus metric correctly. + switch status.Status { + case Healthy: p.healthCheckResults.WithLabelValues(prometheusmetrics.VectorOperationSet, 1, proxy.ID.String()) - } else { - // Any form of error is considered unreachable. - status.Status = Unreachable - status.StatusError = err.Error() + case Unhealthy: + p.healthCheckResults.WithLabelValues(prometheusmetrics.VectorOperationSet, -1, proxy.ID.String()) + case Unreachable: p.healthCheckResults.WithLabelValues(prometheusmetrics.VectorOperationSet, -2, proxy.ID.String()) + default: + // Unknown + p.healthCheckResults.WithLabelValues(prometheusmetrics.VectorOperationSet, -3, proxy.ID.String()) } statusMu.Lock() diff --git a/enterprise/coderd/proxyhealth/proxyhealth_test.go b/enterprise/coderd/proxyhealth/proxyhealth_test.go index d2e7cd38ab01a..b2f8d42a31da1 100644 --- a/enterprise/coderd/proxyhealth/proxyhealth_test.go +++ b/enterprise/coderd/proxyhealth/proxyhealth_test.go @@ -7,6 +7,9 @@ import ( "net/http/httptest" "testing" + "github.com/coder/coder/coderd/httpapi" + "github.com/coder/coder/codersdk" + "golang.org/x/xerrors" "cdr.dev/slog/sloggers/slogtest" @@ -61,13 +64,56 @@ func TestProxyHealth_Unregistered(t *testing.T) { } } +func TestProxyHealth_Unhealthy(t *testing.T) { + t.Parallel() + db := dbfake.New() + + srvBadReport := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + httpapi.Write(context.Background(), w, http.StatusOK, codersdk.ProxyHealthReport{ + Errors: []string{"We have a problem!"}, + }) + })) + defer srvBadReport.Close() + + srvBadCode := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + })) + defer srvBadCode.Close() + + proxies := []database.WorkspaceProxy{ + // Same url for both, just checking multiple proxies are checked. + insertProxy(t, db, srvBadReport.URL), + insertProxy(t, db, srvBadCode.URL), + } + + ph, err := proxyhealth.New(&proxyhealth.Options{ + Interval: 0, + DB: db, + Logger: slogtest.Make(t, nil), + Client: srvBadReport.Client(), + }) + require.NoError(t, err, "failed to create proxy health") + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) + defer cancel() + + err = ph.ForceUpdate(ctx) + require.NoError(t, err, "failed to force update") + for _, p := range proxies { + require.Equal(t, ph.HealthStatus()[p.ID].Status, proxyhealth.Unhealthy, "expect reachable proxy") + } +} + func TestProxyHealth_Reachable(t *testing.T) { t.Parallel() db := dbfake.New() srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - _, _ = w.Write([]byte("OK")) + httpapi.Write(context.Background(), w, http.StatusOK, codersdk.ProxyHealthReport{ + Warnings: []string{"No problems, just a warning"}, + }) })) + defer srv.Close() proxies := []database.WorkspaceProxy{ // Same url for both, just checking multiple proxies are checked. @@ -89,7 +135,7 @@ func TestProxyHealth_Reachable(t *testing.T) { err = ph.ForceUpdate(ctx) require.NoError(t, err, "failed to force update") for _, p := range proxies { - require.Equal(t, ph.HealthStatus()[p.ID].Status, proxyhealth.Reachable, "expect reachable proxy") + require.Equal(t, ph.HealthStatus()[p.ID].Status, proxyhealth.Healthy, "expect reachable proxy") } } diff --git a/enterprise/coderd/workspaceproxy.go b/enterprise/coderd/workspaceproxy.go index 8519c4302dcc1..136a000e57289 100644 --- a/enterprise/coderd/workspaceproxy.go +++ b/enterprise/coderd/workspaceproxy.go @@ -139,8 +139,8 @@ func (api *API) postWorkspaceProxy(rw http.ResponseWriter, r *http.Request) { httpapi.Write(ctx, rw, http.StatusCreated, codersdk.CreateWorkspaceProxyResponse{ Proxy: convertProxy(proxy, proxyhealth.ProxyStatus{ Proxy: proxy, - Status: proxyhealth.Unregistered, CheckedAt: time.Now(), + Status: proxyhealth.Unregistered, }), ProxyToken: fullToken, }) @@ -417,7 +417,7 @@ func convertProxy(p database.WorkspaceProxy, status proxyhealth.ProxyStatus) cod Deleted: p.Deleted, Status: codersdk.WorkspaceProxyStatus{ Status: codersdk.ProxyHealthStatus(status.Status), - Reason: status.StatusError, + Report: status.Report, CheckedAt: status.CheckedAt, }, } diff --git a/enterprise/wsproxy/wsproxy.go b/enterprise/wsproxy/wsproxy.go index b30fea54ed4cd..14b12401725d6 100644 --- a/enterprise/wsproxy/wsproxy.go +++ b/enterprise/wsproxy/wsproxy.go @@ -2,6 +2,7 @@ package wsproxy import ( "context" + "fmt" "net/http" "net/url" "reflect" @@ -30,6 +31,7 @@ import ( type Options struct { Logger slog.Logger + HTTPClient *http.Client // DashboardURL is the URL of the primary coderd instance. DashboardURL *url.URL // AccessURL is the URL of the WorkspaceProxy. @@ -120,6 +122,11 @@ func New(ctx context.Context, opts *Options) (*Server, error) { return nil, xerrors.Errorf("set client token: %w", err) } + // Use the configured client if provided. + if opts.HTTPClient != nil { + client.SDKClient.HTTPClient = opts.HTTPClient + } + // TODO: Probably do some version checking here info, err := client.SDKClient.BuildInfo(ctx) if err != nil { @@ -224,6 +231,8 @@ func New(ctx context.Context, opts *Options) (*Server, error) { r.Get("/buildinfo", s.buildInfo) r.Get("/healthz", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte("OK")) }) + // TODO: @emyrk should this be authenticated or debounced? + r.Get("/healthz-report", s.healthReport) return s, nil } @@ -246,6 +255,44 @@ func (s *Server) buildInfo(rw http.ResponseWriter, r *http.Request) { }) } +// healthReport is a more thorough health check than the '/healthz' endpoint. +// This endpoint not only responds if the server is running, but can do some +// internal diagnostics to ensure that the server is running correctly. The +// primary coderd will use this to determine if this workspace proxy can be used +// by the users. This endpoint will take longer to respond than the '/healthz'. +// Checks: +// - Can communicate with primary coderd +// +// TODO: Config checks to ensure consistent with primary +func (s *Server) healthReport(rw http.ResponseWriter, r *http.Request) { + ctx := r.Context() + var report codersdk.ProxyHealthReport + + // Hit the build info to do basic version checking. + primaryBuild, err := s.SDKClient.SDKClient.BuildInfo(ctx) + if err != nil { + report.Errors = append(report.Errors, fmt.Sprintf("failed to get build info: %s", err.Error())) + } + + if primaryBuild.WorkspaceProxy { + // This could be a simple mistake of using a proxy url as the dashboard url. + report.Errors = append(report.Errors, + fmt.Sprintf("dashboard url (%s) is a workspace proxy, must be a primary coderd", s.DashboardURL.String())) + } + + // If we are in dev mode, never check versions. + if !buildinfo.IsDev() && !buildinfo.VersionsMatch(primaryBuild.Version, buildinfo.Version()) { + // Version mismatches are not fatal, but should be reported. + report.Warnings = append(report.Warnings, + fmt.Sprintf("version mismatch: primary coderd (%s) != workspace proxy (%s)", primaryBuild.Version, buildinfo.Version())) + } + + // TODO: We should hit the deployment config endpoint and do some config + // checks. We can check the version from the X-CODER-BUILD-VERSION header + + httpapi.Write(r.Context(), rw, http.StatusOK, report) +} + type optErrors []error func (e optErrors) Error() string { diff --git a/site/src/api/typesGenerated.ts b/site/src/api/typesGenerated.ts index 1c4b6ebf157cd..e2bb4a33f589d 100644 --- a/site/src/api/typesGenerated.ts +++ b/site/src/api/typesGenerated.ts @@ -690,6 +690,12 @@ export interface ProvisionerJobLog { readonly output: string } +// From codersdk/workspaceproxy.go +export interface ProxyHealthReport { + readonly Errors: string[] + readonly Warnings: string[] +} + // From codersdk/workspaces.go export interface PutExtendWorkspaceRequest { readonly deadline: string @@ -1240,7 +1246,7 @@ export interface WorkspaceProxy { readonly url: string readonly wildcard_hostname: string readonly created_at: string - readonly UpdatedAt: string + readonly updated_at: string readonly deleted: boolean readonly status?: WorkspaceProxyStatus } @@ -1254,6 +1260,7 @@ export interface WorkspaceProxyBuildInfo { // From codersdk/workspaceproxy.go export interface WorkspaceProxyStatus { readonly status: ProxyHealthStatus + readonly report?: ProxyHealthReport readonly checked_at: string } @@ -1455,9 +1462,14 @@ export type ProvisionerType = "echo" | "terraform" export const ProvisionerTypes: ProvisionerType[] = ["echo", "terraform"] // From codersdk/workspaceproxy.go -export type ProxyHealthStatus = "reachable" | "unreachable" | "unregistered" +export type ProxyHealthStatus = + | "reachable" + | "unhealthy" + | "unreachable" + | "unregistered" export const ProxyHealthStatuses: ProxyHealthStatus[] = [ "reachable", + "unhealthy", "unreachable", "unregistered", ] From 07a1e07d01ee87dfba37ebd53279ebb9b1c602b2 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Fri, 21 Apr 2023 15:27:59 -0500 Subject: [PATCH 11/15] Fix importS --- enterprise/coderd/proxyhealth/proxyhealth.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/enterprise/coderd/proxyhealth/proxyhealth.go b/enterprise/coderd/proxyhealth/proxyhealth.go index 115a5f061aa68..c76c2d2246e8e 100644 --- a/enterprise/coderd/proxyhealth/proxyhealth.go +++ b/enterprise/coderd/proxyhealth/proxyhealth.go @@ -10,8 +10,6 @@ import ( "sync/atomic" "time" - "github.com/coder/coder/codersdk" - "github.com/google/uuid" "github.com/prometheus/client_golang/prometheus" "golang.org/x/sync/errgroup" @@ -21,6 +19,7 @@ import ( "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/database/dbauthz" "github.com/coder/coder/coderd/prometheusmetrics" + "github.com/coder/coder/codersdk" ) type ProxyHealthStatus string From ff504c8796d6fb971fea202afa5f4dcbea38279b Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Mon, 24 Apr 2023 08:53:48 -0500 Subject: [PATCH 12/15] Make gen --- coderd/apidoc/docs.go | 34 ++++++++++++++++++++++++++++-- coderd/apidoc/swagger.json | 35 ++++++++++++++++++++++++++++--- docs/api/enterprise.md | 24 ++++++++++++++++------ docs/api/schemas.md | 42 ++++++++++++++++++++++++++++++-------- 4 files changed, 116 insertions(+), 19 deletions(-) diff --git a/coderd/apidoc/docs.go b/coderd/apidoc/docs.go index 338d0ff9874ea..4cb4940a03775 100644 --- a/coderd/apidoc/docs.go +++ b/coderd/apidoc/docs.go @@ -8208,16 +8208,37 @@ const docTemplate = `{ "ProvisionerStorageMethodFile" ] }, + "codersdk.ProxyHealthReport": { + "type": "object", + "properties": { + "errors": { + "description": "Errors are problems that prevent the workspace proxy from being healthy", + "type": "array", + "items": { + "type": "string" + } + }, + "warnings": { + "description": "Warnings do not prevent the workspace proxy from being healthy, but\nshould be addressed.", + "type": "array", + "items": { + "type": "string" + } + } + } + }, "codersdk.ProxyHealthStatus": { "type": "string", "enum": [ "reachable", "unreachable", + "unhealthy", "unregistered" ], "x-enum-varnames": [ "ProxyReachable", "ProxyUnreachable", + "ProxyUnhealthy", "ProxyUnregistered" ] }, @@ -9722,7 +9743,7 @@ const docTemplate = `{ } ] }, - "updatedAt": { + "updated_at": { "type": "string", "format": "date-time" }, @@ -9740,7 +9761,16 @@ const docTemplate = `{ "type": "object", "properties": { "checked_at": { - "type": "string" + "type": "string", + "format": "date-time" + }, + "report": { + "description": "Report provides more information about the health of the workspace proxy.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.ProxyHealthReport" + } + ] }, "status": { "$ref": "#/definitions/codersdk.ProxyHealthStatus" diff --git a/coderd/apidoc/swagger.json b/coderd/apidoc/swagger.json index a2e57fb84512a..558360d0c0cc6 100644 --- a/coderd/apidoc/swagger.json +++ b/coderd/apidoc/swagger.json @@ -7354,12 +7354,32 @@ "enum": ["file"], "x-enum-varnames": ["ProvisionerStorageMethodFile"] }, + "codersdk.ProxyHealthReport": { + "type": "object", + "properties": { + "errors": { + "description": "Errors are problems that prevent the workspace proxy from being healthy", + "type": "array", + "items": { + "type": "string" + } + }, + "warnings": { + "description": "Warnings do not prevent the workspace proxy from being healthy, but\nshould be addressed.", + "type": "array", + "items": { + "type": "string" + } + } + } + }, "codersdk.ProxyHealthStatus": { "type": "string", - "enum": ["reachable", "unreachable", "unregistered"], + "enum": ["reachable", "unreachable", "unhealthy", "unregistered"], "x-enum-varnames": [ "ProxyReachable", "ProxyUnreachable", + "ProxyUnhealthy", "ProxyUnregistered" ] }, @@ -8781,7 +8801,7 @@ } ] }, - "updatedAt": { + "updated_at": { "type": "string", "format": "date-time" }, @@ -8799,7 +8819,16 @@ "type": "object", "properties": { "checked_at": { - "type": "string" + "type": "string", + "format": "date-time" + }, + "report": { + "description": "Report provides more information about the health of the workspace proxy.", + "allOf": [ + { + "$ref": "#/definitions/codersdk.ProxyHealthReport" + } + ] }, "status": { "$ref": "#/definitions/codersdk.ProxyHealthStatus" diff --git a/docs/api/enterprise.md b/docs/api/enterprise.md index 75da3d077b78a..fbee85b9970f1 100644 --- a/docs/api/enterprise.md +++ b/docs/api/enterprise.md @@ -1186,10 +1186,14 @@ curl -X GET http://coder-server:8080/api/v2/workspaceproxies \ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "name": "string", "status": { - "checked_at": "string", + "checked_at": "2019-08-24T14:15:22Z", + "report": { + "errors": ["string"], + "warnings": ["string"] + }, "status": "reachable" }, - "updatedAt": "2019-08-24T14:15:22Z", + "updated_at": "2019-08-24T14:15:22Z", "url": "string", "wildcard_hostname": "string" } @@ -1215,9 +1219,12 @@ Status Code **200** | `» id` | string(uuid) | false | | | | `» name` | string | false | | | | `» status` | [codersdk.WorkspaceProxyStatus](schemas.md#codersdkworkspaceproxystatus) | false | | Status is the latest status check of the proxy. This will be empty for deleted proxies. This value can be used to determine if a workspace proxy is healthy and ready to use. | -| `»» checked_at` | string | false | | | +| `»» checked_at` | string(date-time) | false | | | +| `»» report` | [codersdk.ProxyHealthReport](schemas.md#codersdkproxyhealthreport) | false | | Report provides more information about the health of the workspace proxy. | +| `»»» errors` | array | false | | Errors are problems that prevent the workspace proxy from being healthy | +| `»»» warnings` | array | false | | Warnings do not prevent the workspace proxy from being healthy, but should be addressed. | | `»» status` | [codersdk.ProxyHealthStatus](schemas.md#codersdkproxyhealthstatus) | false | | | -| `» updatedAt` | string(date-time) | false | | | +| `» updated_at` | string(date-time) | false | | | | `» url` | string | false | | Full URL including scheme of the proxy api url: https://us.example.com | | `» wildcard_hostname` | string | false | | Wildcard hostname with the wildcard for subdomain based app hosting: \*.us.example.com | @@ -1227,6 +1234,7 @@ Status Code **200** | -------- | -------------- | | `status` | `reachable` | | `status` | `unreachable` | +| `status` | `unhealthy` | | `status` | `unregistered` | To perform this operation, you must be authenticated. [Learn more](authentication.md). @@ -1273,10 +1281,14 @@ curl -X POST http://coder-server:8080/api/v2/workspaceproxies \ "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "name": "string", "status": { - "checked_at": "string", + "checked_at": "2019-08-24T14:15:22Z", + "report": { + "errors": ["string"], + "warnings": ["string"] + }, "status": "reachable" }, - "updatedAt": "2019-08-24T14:15:22Z", + "updated_at": "2019-08-24T14:15:22Z", "url": "string", "wildcard_hostname": "string" } diff --git a/docs/api/schemas.md b/docs/api/schemas.md index a5c02d298f190..d297b31169065 100644 --- a/docs/api/schemas.md +++ b/docs/api/schemas.md @@ -3382,6 +3382,22 @@ Parameter represents a set value for the scope. | ------ | | `file` | +## codersdk.ProxyHealthReport + +```json +{ + "errors": ["string"], + "warnings": ["string"] +} +``` + +### Properties + +| Name | Type | Required | Restrictions | Description | +| ---------- | --------------- | -------- | ------------ | ---------------------------------------------------------------------------------------- | +| `errors` | array of string | false | | Errors are problems that prevent the workspace proxy from being healthy | +| `warnings` | array of string | false | | Warnings do not prevent the workspace proxy from being healthy, but should be addressed. | + ## codersdk.ProxyHealthStatus ```json @@ -3396,6 +3412,7 @@ Parameter represents a set value for the scope. | -------------- | | `reachable` | | `unreachable` | +| `unhealthy` | | `unregistered` | ## codersdk.PutExtendWorkspaceRequest @@ -5209,10 +5226,14 @@ Parameter represents a set value for the scope. "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08", "name": "string", "status": { - "checked_at": "string", + "checked_at": "2019-08-24T14:15:22Z", + "report": { + "errors": ["string"], + "warnings": ["string"] + }, "status": "reachable" }, - "updatedAt": "2019-08-24T14:15:22Z", + "updated_at": "2019-08-24T14:15:22Z", "url": "string", "wildcard_hostname": "string" } @@ -5228,7 +5249,7 @@ Parameter represents a set value for the scope. | `id` | string | false | | | | `name` | string | false | | | | `status` | [codersdk.WorkspaceProxyStatus](#codersdkworkspaceproxystatus) | false | | Status is the latest status check of the proxy. This will be empty for deleted proxies. This value can be used to determine if a workspace proxy is healthy and ready to use. | -| `updatedAt` | string | false | | | +| `updated_at` | string | false | | | | `url` | string | false | | Full URL including scheme of the proxy api url: https://us.example.com | | `wildcard_hostname` | string | false | | Wildcard hostname with the wildcard for subdomain based app hosting: \*.us.example.com | @@ -5236,17 +5257,22 @@ Parameter represents a set value for the scope. ```json { - "checked_at": "string", + "checked_at": "2019-08-24T14:15:22Z", + "report": { + "errors": ["string"], + "warnings": ["string"] + }, "status": "reachable" } ``` ### Properties -| Name | Type | Required | Restrictions | Description | -| ------------ | -------------------------------------------------------- | -------- | ------------ | ----------- | -| `checked_at` | string | false | | | -| `status` | [codersdk.ProxyHealthStatus](#codersdkproxyhealthstatus) | false | | | +| Name | Type | Required | Restrictions | Description | +| ------------ | -------------------------------------------------------- | -------- | ------------ | ------------------------------------------------------------------------- | +| `checked_at` | string | false | | | +| `report` | [codersdk.ProxyHealthReport](#codersdkproxyhealthreport) | false | | Report provides more information about the health of the workspace proxy. | +| `status` | [codersdk.ProxyHealthStatus](#codersdkproxyhealthstatus) | false | | | ## codersdk.WorkspaceQuota From d7814ce4cebd611f7fedac8e502f545ff61014fa Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Mon, 24 Apr 2023 09:09:38 -0500 Subject: [PATCH 13/15] Linting --- enterprise/coderd/proxyhealth/proxyhealth.go | 18 +++++++++++------- .../coderd/proxyhealth/proxyhealth_test.go | 3 ++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/enterprise/coderd/proxyhealth/proxyhealth.go b/enterprise/coderd/proxyhealth/proxyhealth.go index c76c2d2246e8e..ab532f5892618 100644 --- a/enterprise/coderd/proxyhealth/proxyhealth.go +++ b/enterprise/coderd/proxyhealth/proxyhealth.go @@ -22,22 +22,22 @@ import ( "github.com/coder/coder/codersdk" ) -type ProxyHealthStatus string +type Status string const ( // Unknown should never be returned by the proxy health check. - Unknown ProxyHealthStatus = "unknown" + Unknown Status = "unknown" // Healthy means the proxy access url is reachable and returns a healthy // status code. - Healthy ProxyHealthStatus = "ok" + Healthy Status = "ok" // Unreachable means the proxy access url is not responding. - Unreachable ProxyHealthStatus = "unreachable" + Unreachable Status = "unreachable" // Unhealthy means the proxy access url is responding, but there is some // problem with the proxy. This problem may or may not be preventing functionality. - Unhealthy ProxyHealthStatus = "unhealthy" + Unhealthy Status = "unhealthy" // Unregistered means the proxy has not registered a url yet. This means // the proxy was created with the cli, but has not yet been started. - Unregistered ProxyHealthStatus = "unregistered" + Unregistered Status = "unregistered" ) type Options struct { @@ -169,7 +169,7 @@ type ProxyStatus struct { // then the proxy in hand. AKA if the proxy was updated, and the status was for // an older proxy. Proxy database.WorkspaceProxy - Status ProxyHealthStatus + Status Status Report codersdk.ProxyHealthReport CheckedAt time.Time } @@ -181,6 +181,7 @@ func (p *ProxyHealth) runOnce(ctx context.Context, now time.Time) (map[uuid.UUID // Record from the given time. defer p.healthCheckDuration.Observe(time.Since(now).Seconds()) + //nolint:gocritic // Proxy health is a system service. proxies, err := p.db.GetWorkspaceProxies(dbauthz.AsSystemRestricted(ctx)) if err != nil { return nil, xerrors.Errorf("get workspace proxies: %w", err) @@ -229,6 +230,9 @@ func (p *ProxyHealth) runOnce(ctx context.Context, now time.Time) (map[uuid.UUID req = req.WithContext(gctx) resp, err := p.client.Do(req) + if err == nil { + defer resp.Body.Close() + } // A switch statement felt easier to categorize the different cases than // if else statements or nested if statements. switch { diff --git a/enterprise/coderd/proxyhealth/proxyhealth_test.go b/enterprise/coderd/proxyhealth/proxyhealth_test.go index b2f8d42a31da1..6d49cc345093b 100644 --- a/enterprise/coderd/proxyhealth/proxyhealth_test.go +++ b/enterprise/coderd/proxyhealth/proxyhealth_test.go @@ -12,9 +12,10 @@ import ( "golang.org/x/xerrors" - "cdr.dev/slog/sloggers/slogtest" "github.com/stretchr/testify/require" + "cdr.dev/slog/sloggers/slogtest" + "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/database/dbfake" "github.com/coder/coder/coderd/database/dbgen" From 79005e48804963d819a247d1cbd0c7ddf94a542f Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Mon, 24 Apr 2023 09:21:35 -0500 Subject: [PATCH 14/15] Import order --- enterprise/coderd/proxyhealth/proxyhealth_test.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/enterprise/coderd/proxyhealth/proxyhealth_test.go b/enterprise/coderd/proxyhealth/proxyhealth_test.go index 6d49cc345093b..5fb9614385c5f 100644 --- a/enterprise/coderd/proxyhealth/proxyhealth_test.go +++ b/enterprise/coderd/proxyhealth/proxyhealth_test.go @@ -7,18 +7,15 @@ import ( "net/http/httptest" "testing" - "github.com/coder/coder/coderd/httpapi" - "github.com/coder/coder/codersdk" - - "golang.org/x/xerrors" - "github.com/stretchr/testify/require" + "golang.org/x/xerrors" "cdr.dev/slog/sloggers/slogtest" - "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/database/dbfake" "github.com/coder/coder/coderd/database/dbgen" + "github.com/coder/coder/coderd/httpapi" + "github.com/coder/coder/codersdk" "github.com/coder/coder/enterprise/coderd/proxyhealth" "github.com/coder/coder/testutil" ) From 53851be9701c07e4f30145ea1a07db82e43fc797 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Mon, 24 Apr 2023 09:23:52 -0500 Subject: [PATCH 15/15] Return early if the buildinfo fails --- enterprise/wsproxy/wsproxy.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/enterprise/wsproxy/wsproxy.go b/enterprise/wsproxy/wsproxy.go index 14b12401725d6..3f03d486fe87c 100644 --- a/enterprise/wsproxy/wsproxy.go +++ b/enterprise/wsproxy/wsproxy.go @@ -272,6 +272,8 @@ func (s *Server) healthReport(rw http.ResponseWriter, r *http.Request) { primaryBuild, err := s.SDKClient.SDKClient.BuildInfo(ctx) if err != nil { report.Errors = append(report.Errors, fmt.Sprintf("failed to get build info: %s", err.Error())) + httpapi.Write(r.Context(), rw, http.StatusOK, report) + return } if primaryBuild.WorkspaceProxy {