E59A Add per-endpoint sysctls to DriverOpts · moby/moby@e29f2f0 · GitHub
[go: up one dir, main page]

Skip to content

Commit e29f2f0

Browse files
committed
Add per-endpoint sysctls to DriverOpts
Until now it's been possible to set per-interface sysctls using, for example, '--sysctl net.ipv6.conf.eth0.accept_ra=2'. But, the index in the interface name is allocated serially, and the numbering in a container with more than one interface may change when a container is restarted. The change to make it possible to connect a container to more than one network when it's created increased the ambiguity. This change adds label "com.docker.network.endpoint.sysctls" to the DriverOpts in EndpointSettings. This option is explicitly associated with the interface. Settings in "--sysctl" for "eth0" are migrated to DriverOpts. Because using "--sysctl" with any interface apart from "eth0" would have unpredictable results, it is now an error to use any other interface name in the top level "--sysctl" option. The error message includes a hint at how to use the new per-interface setting. The per-endpoint sysctl name is a shortened form of the sysctl name, intended to limit settings to 'net.*', and to eliminate the need to identify the interface by name. For example: net.ipv6.conf.eth0.accept_ra=2 becomes: ipv6.conf.accept_ra=2 The value of DriverOpts["com.docker.network.endpoint.sysctls"] is a comma separated list of these short-form sysctls. Settings from '--sysctl' are applied by the runtime lib during task creation. So, task creation fails if the endpoint does not exist. Applying per-endpoint settings during interface configuration means the endpoint can be created later, which paves the way for removal of the SetKey OCI prestart hook. Unlike other DriverOpts, the sysctl label itself is not driver-specific, but each driver has a chance to check settings/values and raise an error if a setting would cause it a problem - no such checks have been added in this initial version. As a future extension, if required, it would be possible for the driver to echo back valid/extended/modified settings to libnetwork for it to apply to the interface. (At that point, the syntax for the options could become driver specific to allow, for example, a driver to create more than one interface). Signed-off-by: Rob Murray <rob.murray@docker.com>
1 parent 1e29f9b commit e29f2f0

File tree

10 files changed

+353
-2
lines changed

10 files changed

+353
-2
lines changed

api/server/router/container/container_routes.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/docker/docker/api/types/versions"
2424
containerpkg "github.com/docker/docker/container"
2525
"github.com/docker/docker/errdefs"
26+
"github.com/docker/docker/libnetwork/netlabel"
2627
"github.com/docker/docker/pkg/ioutils"
2728
"github.com/docker/docker/runconfig"
2829
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
@@ -619,6 +620,12 @@ func (s *containerRouter) postContainersCreate(ctx context.Context, w http.Respo
619620
warnings = append(warnings, warn)
620621
}
621622

623+
if warn, err := handleSysctlBC(hostConfig, networkingConfig, version); err != nil {
624+
return err
625+
} else if warn != "" {
626+
warnings = append(warnings, warn)
627+
}
628+
622629
if hostConfig.PidsLimit != nil && *hostConfig.PidsLimit <= 0 {
623630
// Don't set a limit if either no limit was specified, or "unlimited" was
624631
// explicitly set.
@@ -694,6 +701,96 @@ func handleMACAddressBC(config *container.Config, hostConfig *container.HostConf
694701
return warning, nil
695702
}
696703

704+
// handleSysctlBC migrates top level network endpoint-specific '--sysctl'
705+
// settings to an DriverOpts for an endpoint. This is necessary because sysctls
706+
// are applied during container task creation, but sysctls that name an interface
707+
// (for example 'net.ipv6.conf.eth0.forwarding') cannot be applied until the
708+
// interface has been created. So, these settings are removed from hostConfig.Sysctls
709+
// and added to DriverOpts[netlabel.EndpointSysctls].
710+
//
711+
// Because interface names ('ethN') are allocated sequentially, and the order of
712+
// network connections is not deterministic on container restart, only 'eth0'
713+
// would work reliably in a top-level '--sysctl' option, and then only when
714+
// there's a single initial network connection. So, settings for 'eth0' are
715+
// migrated to the primary interface, identified by 'hostConfig.NetworkMode'.
716+
// Settings for other interfaces are treated as errors.
717+
//
718+
// In the DriverOpts, to restrict sysctl settings to those configuring network
719+
// interfaces and because the interface name cannot be determined in advance, a
720+
// short-form of the sysctl name is used. For example, 'net.ipv6.conf.eth0.forwarding'
721+
// becomes 'ipv6.conf.forwarding'. The value in DriverOpts is a comma-separated list
722+
// of these short-form settings.
723+
//
724+
// A warning is generated when settings are migrated.
725+
func handleSysctlBC(
726+
hostConfig *container.HostConfig,
727+
netConfig *network.NetworkingConfig,
728+
version string,
729+
) (string, error) {
730+
if !hostConfig.NetworkMode.IsPrivate() {
731+
return "", nil
732+
}
733+
734+
var ep *network.EndpointSettings
735+
var toDelete []string
736+
var netIfSysctls []string
737+
for k, v := range hostConfig.Sysctls {
738+
// If the sysctl name matches "net.*.*.eth0.*" ...
739+
if spl := strings.SplitN(k, ".", 5); len(spl) == 5 && spl[0] == "net" && strings.HasPrefix(spl[3], "eth") {
740+
// Transform the name to the endpoint-specific short form.
741+
netIfSysctl := fmt.Sprintf("%s.%s.%s=%s", spl[1], spl[2], spl[4], v)
742+
// Find the EndpointConfig to migrate settings to, if not already found.
743+
if ep == nil {
744+
// Per-endpoint sysctls were introduced in API version 1.46. Migration is
745+
// needed, but refuse to do it automatically for newer versions of the API.
746+
if versions.GreaterThan(version, "1.46") {
747+
return "", fmt.Errorf("interface specific sysctl setting %q must be supplied using driver option '%s'",
748+
k, netlabel.EndpointSysctls)
749+
}
750+
var err error
751+
ep, err = epConfigForNetMode(version, hostConfig.NetworkMode, netConfig)
752+
if err != nil {
753+
return "", fmt.Errorf("unable to find a network for sysctl %s: %w", k, err)
754+
}
755+
}
756+
// Only try to migrate settings for "eth0", anything else would always
757+
// have behaved unpredictably.
758+
if spl[3] != "eth0" {
759+
return "", fmt.Errorf(`unable to determine network endpoint for sysctl %s, use driver option '%s' to set per-interface sysctls`,
760+
k, netlabel.EndpointSysctls)
761+
}
762+
// Prepare the migration.
763+
toDelete = append(toDelete, k)
764+
netIfSysctls = append(netIfSysctls, netIfSysctl)
765+
}
766+
}
767+
if ep == nil {
768+
return "", nil
769+
}
770+
771+
newDriverOpt := strings.Join(netIfSysctls, ",")
772+
warning := fmt.Sprintf(`Migrated sysctl %q to DriverOpts{"%q":"%q"}.`,
773+
strings.Join(toDelete, ","),
774+
netlabel.EndpointSysctls, newDriverOpt)
775+
776+
// Append exiting per-endpoint sysctls to the migrated sysctls (give priority
777+
// to per-endpoint settings).
778+
if ep.DriverOpts == nil {
779+
ep.DriverOpts = map[string]string{}
780+
}
781+
if oldDriverOpt, ok := ep.DriverOpts[netlabel.EndpointSysctls]; ok {
782+
newDriverOpt += "," + oldDriverOpt
783+
}
784+
ep.DriverOpts[netlabel.EndpointSysctls] = newDriverOpt
785+
786+
// Delete migrated settings from the top-level sysctls.
787+
for _, k := range toDelete {
788+
delete(hostConfig.Sysctls, k)
789+
}
790+
791+
return warning, nil
792+
}
793+
697794
// epConfigForNetMode finds, or creates, an entry in netConfig.EndpointsConfig
698795
// corresponding to nwMode.
699796
//

api/server/router/container/container_routes_test.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
package container
22

33
import (
4+
"strings"
45
"testing"
56

67
"github.com/docker/docker/api/types/container"
78
"github.com/docker/docker/api/types/network"
9+
"github.com/docker/docker/libnetwork/netlabel"
810
"gotest.tools/v3/assert"
911
is "gotest.tools/v3/assert/cmp"
1012
)
@@ -228,3 +230,121 @@ func TestEpConfigForNetMode(t *testing.T) {
228230
})
229231
}
230232
}
233+
234+
func TestHandleSysctlBC(t *testing.T) {
235+
testcases := []struct {
236+
name string
237+
apiVersion string
238+
networkMode string
239+
sysctls map[string]string
240+
epConfig map[string]*network.EndpointSettings
241+
expEpSysctls []string
242+
expSysctls map[string]string
243+
expWarningContains []string
244+
expError string
245+
}{
246+
{
247+
name: "migrate to new ep",
248+
apiVersion: "1.46",
249+
networkMode: "mynet",
250+
sysctls: map[string]string{
251+
"net.ipv6.conf.all.disable_ipv6": "0",
252+
"net.ipv6.conf.eth0.accept_ra": "2",
253+
"net.ipv6.conf.eth0.forwarding": "1",
254+
},
255+
expSysctls: map[string]string{
256+
"net.ipv6.conf.all.disable_ipv6": "0",
257+
},
258+
expEpSysctls: []string{"ipv6.conf.forwarding=1", "ipv6.conf.accept_ra=2"},
259+
expWarningContains: []string{
260+
"Migrated",
261+
"net.ipv6.conf.eth0.accept_ra", "ipv6.conf.accept_ra=2",
262+
"net.ipv6.conf.eth0.forwarding", "ipv6.conf.forwarding=1",
263+
},
264+
},
265+
{
266+
name: "migrate nothing",
267+
apiVersion: "1.46",
268+
networkMode: "mynet",
269+
sysctls: map[string]string{
270+
"net.ipv6.conf.all.disable_ipv6": "0",
271+
},
272+
expSysctls: map[string]string{
273+
"net.ipv6.conf.all.disable_ipv6": "0",
274+
},
275+
},
276+
{
277+
name: "migration disabled for newer api",
278+
apiVersion: "1.47",
279+
networkMode: "mynet",
280+
sysctls: map[string]string{
281+
"net.ipv6.conf.eth0.accept_ra": "2",
282+
},
283+
expError: "must be supplied using driver option 'com.docker.network.endpoint.sysctls'",
284+
},
285+
{
286+
name: "only migrate eth0",
287+
apiVersion: "1.46",
288+
networkMode: "mynet",
289+
sysctls: map[string]string{
290+
"net.ipv6.conf.eth1.accept_ra": "2",
291+
},
292+
expError: "unable to determine network endpoint",
293+
},
294+
{
295+
name: "net name mismatch",
296+
apiVersion: "1.46",
297+
networkMode: "mynet",
298+
epConfig: map[string]*network.EndpointSettings{
299+
"shortid": {EndpointID: "epone"},
300+
},
301+
sysctls: map[string]string{
302+
"net.ipv6.conf.eth1.accept_ra": "2",
303+
},
304+
expError: "unable to find a network for sysctl",
305+
},
306+
}
307+
308+
for _, tc := range testcases {
309+
t.Run(tc.name, func(t *testing.T) {
310+
hostCfg := &container.HostConfig{
311+
NetworkMode: container.NetworkMode(tc.networkMode),
312+
Sysctls: map[string]string{},
313+
}
314+
for k, v := range tc.sysctls {
315+
hostCfg.Sysctls[k] = v
316+
}
317+
netCfg := &network.NetworkingConfig{
318+
EndpointsConfig: tc.epConfig,
319+
}
320+
321+
warnings, err := handleSysctlBC(hostCfg, netCfg, tc.apiVersion)
322+
323+
for _, s := range tc.expWarningContains {
324+
assert.Check(t, is.Contains(warnings, s))
325+
}
326+
327+
if tc.expError != "" {
328+
assert.Check(t, is.ErrorContains(err, tc.expError))
329+
} else {
330+
assert.Check(t, err)
331+
332+
assert.Check(t, is.DeepEqual(hostCfg.Sysctls, tc.expSysctls))
333+
334+
ep := netCfg.EndpointsConfig[tc.networkMode]
335+
if ep == nil {
336+
assert.Check(t, is.Nil(tc.expEpSysctls))
337+
} else {
338+
got, ok := ep.DriverOpts[netlabel.EndpointSysctls]
339+
assert.Check(t, ok)
340+
// Check for expected ep-sysctls.
341+
for _, want := range tc.expEpSysctls {
342+
assert.Check(t, is.Contains(got, want))
343+
}
344+
// Check for unexpected ep-sysctls.
345+
assert.Check(t, is.Len(got, len(strings.Join(tc.expEpSysctls, ","))))
346+
}
347+
}
348+
})
349+
}
350+
}

daemon/container_operations.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,18 @@ func validateEndpointSettings(nw *libnetwork.Network, nwName string, epConfig *n
615615
}
616616
}
617617

618+
if sysctls, ok := epConfig.DriverOpts[netlabel.EndpointSysctls]; ok {
619+
for _, sysctl := range strings.Split(sysctls, ",") {
620+
scname := strings.SplitN(sysctl, ".", 3)
621+
if len(scname) != 3 || (scname[0] != "ipv4" && scname[0] != "ipv6") {
622+
errs = append(errs,
623+
fmt.Errorf(
624+
"unrecognised network interface sysctl '%s'; represent 'net.X.Y.ethN.Z=V' as 'X.Y.Z=V', 'X' must be 'ipv4' or 'ipv6'",
625+
sysctl))
626+
}
627+
}
628+
}
629+
618630
if err := multierror.Join(errs...); err != nil {
619631
return fmt.Errorf("invalid endpoint settings:\n%w", err)
620632
}

docs/api/version-history.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,18 @@ keywords: "API, Docker, rcli, REST, documentation"
1515

1616
## v1.46 API changes
1717

18+
[Docker Engine API v1.46](https://docs.docker.com/engine/api/v1.46/) documentation
19+
20+
* `POST /containers/create` field `NetworkingConfig.EndpointsConfig.DriverOpts`,
21+
and `POST /networks/{id}/connect` field `EndpointsConfig.DriverOpts`, now
22+
support label `com.docker.network.endpoint.sysctls` for setting per-interface
23+
sysctls. The value is a comma separated list of shortened sysctl assignments,
24+
the prefix `net.` and the interface name must be omitted. For example,
25+
`net.ipv4.config.eth0.log_martians=1` must be shortened to
26+
`ipv4.config.log_martians=1`. In API versions up-to 1.46, top level
27+
`--sysctl` settings for `eth0` will be migrated to `DriverOpts` when possible.
28+
This automatic migration will be removed for API versions 1.47 and greater.
29+
1830
## v1.45 API changes
1931

2032
[Docker Engine API v1.45](https://docs.docker.com/engine/api/v1.45/) documentation

integration/networking/bridge_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@ import (
1010

1111
"github.com/docker/docker/api/types"
1212
containertypes "github.com/docker/docker/api/types/container"
13+
apinetwork "github.com/docker/docker/api/types/network"
1314
"github.com/docker/docker/integration/internal/container"
1415
"github.com/docker/docker/integration/internal/network"
16+
"github.com/docker/docker/libnetwork/netlabel"
1517
"github.com/docker/docker/testutil"
1618
"github.com/docker/docker/testutil/daemon"
1719
"github.com/google/go-cmp/cmp/cmpopts"
@@ -828,3 +830,36 @@ func TestReadOnlySlashProc(t *testing.T) {
828830
})
829831
}
830832
}
833+
834+
// Test that it's possible to set a sysctl on an interface in the container
835+
// using DriverOpts.
836+
func TestSetEndpointSysctl(t *testing.T) {
837+
skip.If(t, testEnv.DaemonInfo.OSType == "windows", "no sysctl on Windows")
838+
839+
ctx := setupTest(t)
840+
d := daemon.New(t)
841+
d.StartWithBusybox(ctx, t)
842+
defer d.Stop(t)
843+
844+
c := d.NewClientT(t)
845+
defer c.Close()
846+
847+
const scName = "net.ipv4.conf.eth0.forwarding"
848+
for _, val := range []string{"0", "1"} {
849+
t.Run("set to "+val, func(t *testing.T) {
850+
ctx := testutil.StartSpan(ctx, t)
851+
runRes := container.RunAttach(ctx, t, c,
852+
container.WithCmd("sysctl", "-qn", scName),
853+
container.WithEndpointSettings(apinetwork.NetworkBridge, &apinetwork.EndpointSettings{
854+
DriverOpts: map[string]string{
855+
netlabel.EndpointSysctls: "ipv4.conf.forwarding=" + val,
856+
},
857+
}),
858+
)
859+
defer c.ContainerRemove(ctx, runRes.ContainerID, containertypes.RemoveOptions{Force: true})
860+
861+
stdout := runRes.Stdout.String()
862+
assert.Check(t, is.Equal(strings.TrimSpace(stdout), val))
863+
})
864+
}
865+
}

libnetwork/endpoint.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"encoding/json"
99
"fmt"
1010
"net"
11+
"strings"
1112
"sync"
1213

1314
"github.com/containerd/log"
@@ -401,6 +402,18 @@ func (ep *Endpoint) Value() []byte {
401402
return b
402403
}
403404

405+
func (ep *Endpoint) getSysctls() []string {
406+
ep.mu.Lock()
407+
defer ep.mu.Unlock()
408+
409+
if s, ok := ep.generic[netlabel.EndpointSysctls]; ok {
410+
if ss, ok := s.(string); ok {
411+
return strings.Split(ss, ",")
412+
}
413+
}
414+
return nil
415+
}
416+
404417
func (ep *Endpoint) SetValue(value []byte) error {
405418
return json.Unmarshal(value, ep)
406419
}

libnetwork/netlabel/labels.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ const (
2626
// DNSServers A list of DNS servers associated with the endpoint
2727
DNSServers = Prefix + ".endpoint.dnsservers"
2828

29+
// EndpointSysctls is a comma separated list of shortened sysctls ('net.X.Y.ethN.Z=V' -> 'X.Y.Z=V').
30+
EndpointSysctls = Prefix + ".endpoint.sysctls"
31+
2932
// EnableIPv6 constant represents enabling IPV6 at network level
3033
EnableIPv6 = Prefix + ".enable_ipv6"
3134

0 commit comments

Comments
 (0)
0