-
Notifications
You must be signed in to change notification settings - Fork 943
feat(coderd): add prometheus metrics to servertailnet #11988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,18 +98,18 @@ func NewServerTailnet( | |
agentConnectionTimes: map[uuid.UUID]time.Time{}, | ||
agentTickets: map[uuid.UUID]map[uuid.UUID]struct{}{}, | ||
transport: tailnetTransport.Clone(), | ||
connsPerAgent: prometheus.NewGaugeVec(prometheus.GaugeOpts{ | ||
connsPerAgent: prometheus.NewGauge(prometheus.GaugeOpts{ | ||
Namespace: "coder", | ||
Subsystem: "servertailnet", | ||
Name: "open_conns", | ||
Name: "open_tcp_connections", | ||
Help: "Total number of TCP connections currently open to workspace agents.", | ||
}, []string{"agent_id"}), | ||
totalConns: prometheus.NewCounterVec(prometheus.CounterOpts{ | ||
}), | ||
totalConns: prometheus.NewCounter(prometheus.CounterOpts{ | ||
Namespace: "coder", | ||
Subsystem: "servertailnet", | ||
Name: "total_conns", | ||
Name: "tcp_connections_total", | ||
Help: "Total number of TCP connections made to workspace agents.", | ||
}, []string{"agent_id"}), | ||
}), | ||
} | ||
tn.transport.DialContext = tn.dialContext | ||
// These options are mostly just picked at random, and they can likely be | ||
|
@@ -328,8 +328,8 @@ type ServerTailnet struct { | |
|
||
transport *http.Transport | ||
|
||
connsPerAgent *prometheus.GaugeVec | ||
totalConns *prometheus.CounterVec | ||
connsPerAgent prometheus.Gauge | ||
totalConns prometheus.Counter | ||
} | ||
|
||
func (s *ServerTailnet) ReverseProxy(targetURL, dashboardURL *url.URL, agentID uuid.UUID) *httputil.ReverseProxy { | ||
|
@@ -380,8 +380,8 @@ func (s *ServerTailnet) dialContext(ctx context.Context, network, addr string) ( | |
return nil, err | ||
} | ||
|
||
s.connsPerAgent.With(prometheus.Labels{"agent_id": agentID.String()}).Inc() | ||
s.totalConns.With(prometheus.Labels{"agent_id": agentID.String()}).Inc() | ||
s.connsPerAgent.Inc() | ||
s.totalConns.Inc() | ||
return &instrumentedConn{ | ||
Conn: nc, | ||
agentID: agentID, | ||
|
@@ -498,12 +498,12 @@ type instrumentedConn struct { | |
|
||
agentID uuid.UUID | ||
closeOnce sync.Once | ||
connsPerAgent *prometheus.GaugeVec | ||
connsPerAgent prometheus.Gauge | ||
} | ||
|
||
func (c *instrumentedConn) Close() error { | ||
c.closeOnce.Do(func() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are network connections always explicitly closed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one I'm not 100% sure on. Admittedly, I found this idea from a stackoverflow post which seemed to work for a couple other people. Was planning to get this into dev and monitor to make sure it works as intended with a lot more usage than I can reproduce myself. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess maybe they should be and this gauge will tell us if there's a leak... |
||
c.connsPerAgent.With(prometheus.Labels{"agent_id": c.agentID.String()}).Dec() | ||
c.connsPerAgent.Dec() | ||
}) | ||
return c.Conn.Close() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package testutil | ||
|
||
import ( | ||
"testing" | ||
|
||
dto "github.com/prometheus/client_model/go" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func PromGaugeHasValue(t testing.TB, metrics []*dto.MetricFamily, value float64, name string, label ...string) bool { | ||
t.Helper() | ||
for _, family := range metrics { | ||
if family.GetName() != name { | ||
continue | ||
} | ||
ms := family.GetMetric() | ||
metricsLoop: | ||
for _, m := range ms { | ||
require.Equal(t, len(label), len(m.GetLabel())) | ||
for i, lv := range label { | ||
if lv != m.GetLabel()[i].GetValue() { | ||
continue metricsLoop | ||
} | ||
} | ||
return value == m.GetGauge().GetValue() | ||
} | ||
} | ||
return false | ||
} | ||
|
||
func PromCounterHasValue(t testing.TB, metrics []*dto.MetricFamily, value float64, name string, label ...string) bool { | ||
t.Helper() | ||
for _, family := range metrics { | ||
if family.GetName() != name { | ||
continue | ||
} | ||
ms := family.GetMetric() | ||
metricsLoop: | ||
for _, m := range ms { | ||
require.Equal(t, len(label), len(m.GetLabel())) | ||
for i, lv := range label { | ||
if lv != m.GetLabel()[i].GetValue() { | ||
continue metricsLoop | ||
} | ||
} | ||
return value == m.GetCounter().GetValue() | ||
} | ||
} | ||
return false | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I realize we only use servertailnet for HTTP proxying at present, but what do you think about changing this to
open_connections
and making the network a label (e.g.network=tcp
? Like, I dunno, what if some customer wants us to proxy QUIC or some shit?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah that's a good point. If we were to keep it like this we would definitely preclude ourselves from expanding this without a breaking change. Adding a
network=tcp
with it really only ever being tcp for the forseeable future basically has the same implications and usability as not having it, probably a good change.