8000 libnet/ipams/default: introduce a linear allocator · moby/moby@9c6196f · GitHub
[go: up one dir, main page]

Skip to content

Commit 9c6196f

Browse files
committed
libnet/ipams/default: introduce a linear allocator
The previous allocator was subnetting address pools eagerly when the daemon started, and would then just iterate over that list whenever RequestPool was called. This was leading to high memory usage whenever IPv6 pools were configured with a target subnet size too different from the pools prefix size. For instance: pool = fd00::/8, target size = /64 -- 2 ^ (64-8) subnets would be generated upfront. This would take approx. 9 * 10^18 bits -- way too much for any human computer in 2024. Another noteworthy issue, the previous implementation was allocating a subnet, and then in another layer was checking whether the allocation was conflicting with some 'reserved networks'. If so, the allocation would be retried, etc... To make it worse, 'reserved networks' would be recomputed on every iteration. This is totally ineffective as there could be 'reserved networks' that fully overlap a given address pool (or many!). To fix this issue, a new field `Exclude` is added to `RequestPool`. It's up to each driver to take it into account. Since we don't know whether this retry loop is useful for some remote IPAM driver, it's reimplemented bug-for-bug directly in the remote driver. The new allocator uses a linear-search algorithm. It takes advantage of all lists (predefined pools, allocated subnets and reserved networks) being sorted and logically combines 'allocated' and 'reserved' through a 'double cursor' to iterate on both lists at the same time while preserving the total order. At the same time, it iterates over 'predefined' pools and looks for the first empty space that would be a good fit. Currently, the size of the allocated subnet is still dictated by each 'predefined' pools. We should consider hardcoding that size instead, and let users specify what subnet size they want. This wasn't possible before as the subnets were generated upfront. This new allocator should be able to deal with this easily. The method used for static allocation has been updated to make sure the ascending order of 'allocated' is preserved. It's bug-for-bug compatible with the previous implementation. One consequence of this new algorithm is that we don't keep track of where the last allocation happened, we just allocate the first free subnet we find. Before: - Allocate: 10.0.1.0/24, 10.0.2.0/24 ; Deallocate: 10.0.1.0/24 ; Allocate 10.0.3.0/24. Now, the 3rd allocation would yield 10.0.1.0/24 once again. As it doesn't change the semantics of the allocator, there's no reason to worry about that. Finally, about 'reserved networks'. The heuristics we use are now properly documented. It was discovered that we don't check routes for IPv6 allocations -- this can't be changed because there's no such thing as on-link routes for IPv6. (Kudos to Rob Murray for coming up with the linear-search idea.) Signed-off-by: Albin Kerouanton <albinker@gmail.com>
1 parent c5376e5 commit 9c6196f

29 files changed

+1173
-689
lines changed

daemon/config/config_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package config // import "github.com/docker/docker/daemon/config"
22

33
import (
44
"encoding/json"
5+
"net/netip"
56
"os"
67
"path/filepath"
78
"reflect"
@@ -157,7 +158,7 @@ func TestDaemonConfigurationMergeDefaultAddressPools(t *testing.T) {
157158
emptyConfigFile := makeConfigFile(t, `{}`)
158159
configFile := makeConfigFile(t, `{"default-address-pools":[{"base": "10.123.0.0/16", "size": 24 }]}`)
159160

160-
expected := []*ipamutils.NetworkToSplit{{Base: "10.123.0.0/16", Size: 24}}
161+
expected := []*ipamutils.NetworkToSplit{{Base: netip.MustParsePrefix("10.123.0.0/16"), Size: 24}}
161162

162163
t.Run("empty config file", func(t *testing.T) {
163164
conf := Config{}
@@ -167,7 +168,7 @@ func TestDaemonConfigurationMergeDefaultAddressPools(t *testing.T) {
167168

168169
config, err := MergeDaemonConfigurations(&conf, flags, emptyConfigFile)
169170
assert.NilError(t, err)
170-
assert.DeepEqual(t, config.DefaultAddressPools.Value(), expected)
171+
assert.DeepEqual(t, config.DefaultAddressPools.Value(), expected, cmpopts.EquateComparable(netip.Prefix{}))
171172
})
172173

173174
t.Run("config file", func(t *testing.T) {
@@ -177,7 +178,7 @@ func TestDaemonConfigurationMergeDefaultAddressPools(t *testing.T) {
177178

178179
config, err := MergeDaemonConfigurations(&conf, flags, configFile)
179180
assert.NilError(t, err)
180-
assert.DeepEqual(t, config.DefaultAddressPools.Value(), expected)
181+
assert.DeepEqual(t, config.DefaultAddressPools.Value(), expected, cmpopts.EquateComparable(netip.Prefix{}))
181182
})
182183

183184
t.Run("with conflicting options", func(t *testing.T) {

daemon/info.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ func (daemon *Daemon) fillDefaultAddressPools(ctx context.Context, v *system.Inf
258258
defer span.End()
259259
for _, pool := range cfg.DefaultAddressPools.Value() {
260260
v.DefaultAddressPools = append(v.DefaultAddressPools, system.NetworkAddressPool{
261-
Base: pool.Base,
261+
Base: pool.Base.String(),
262262
Size: pool.Size,
263263
})
264264
}

libnetwork/cnmallocator/drivers_ipam.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package cnmallocator
22

33
import (
44
"context"
5+
"fmt"
6+
"net/netip"
57
"strconv"
68
"strings"
79

@@ -22,8 +24,12 @@ func initIPAMDrivers(r ipamapi.Registerer, netConfig *networkallocator.Config) e
2224
// happens with default address pool option
2325
if netConfig != nil {
2426
for _, p := range netConfig.DefaultAddrPool {
27+
base, err := netip.ParsePrefix(p)
28+
if err != nil {
29+
return fmt.Errorf("invalid prefix %q: %w", p, err)
30+
}
2531
addressPool = append(addressPool, &ipamutils.NetworkToSplit{
26-
Base: p,
32+
Base: base,
2733
Size: int(netConfig.SubnetSize),
2834
})
2935
str.WriteString(p + ",")

libnetwork/drivers/bridge/bridge_linux_test.go

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ import (
1212

1313
"github.com/docker/docker/internal/testutils/netnsutils"
1414
"github.com/docker/docker/libnetwork/driverapi"
15+
"github.com/docker/docker/libnetwork/internal/netiputil"
16+
"github.com/docker/docker/libnetwork/ipamapi"
17+
"github.com/docker/docker/libnetwork/ipams/defaultipam"
1518
"github.com/docker/docker/libnetwork/ipamutils"
1619
"github.com/docker/docker/libnetwork/iptables"
1720
"github.com/docker/docker/libnetwork/netlabel"
@@ -206,17 +209,21 @@ func compareBindings(a, b []types.PortBinding) bool {
206209
return true
207210
}
208211

212+
var a, _ = defaultipam.NewAllocator(ipamutils.GetLocalScopeDefaultNetworks(), []*ipamutils.NetworkToSplit(nil))
213+
209214
func getIPv4Data(t *testing.T) []driverapi.IPAMData {
210-
ipd := driverapi.IPAMData{AddressSpace: "full"}
211-
nw, err := netutils.FindAvailableNetwork(ipamutils.GetLocalScopeDefaultNetworks())
212-
if err != nil {
213-
t.Fatal(err)
214-
}
215-
ipd.Pool = nw
216-
// Set network gateway to X.X.X.1
217-
ipd.Gateway = types.GetIPNetCopy(nw)
218-
ipd.Gateway.IP[len(ipd.Gateway.IP)-1] = 1
219-
return []driverapi.IPAMData{ipd}
215+
t.Helper()
216+
217+
alloc, err := a.RequestPool(ipamapi.PoolRequest{
218+
AddressSpace: "LocalDefault",
219+
Exclude: netutils.InferReservedNetworks(false),
220+
})
221+
assert.NilError(t, err)
222+
223+
gw, _, err := a.RequestAddress(alloc.PoolID, nil, nil)
224+
assert.NilError(t, err)
225+
226+
return []driverapi.IPAMData{{AddressSpace: "LocalDefault", Pool: netiputil.ToIPNet(alloc.Pool), Gateway: gw}}
220227
}
221228

222229
func getIPv6Data(t *testing.T) []driverapi.IPAMData {

libnetwork/internal/netiputil/netiputil.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,32 @@ func AddrPortFromNet(addr net.Addr) netip.AddrPort {
5959
}
6060
return netip.AddrPort{}
6161
}
62+
63+
// LastAddr returns the last address of prefix 'p'.
64+
func LastAddr(p netip.Prefix) netip.Addr {
65+
return ipbits.Add(ipbits.Sub(p.Addr(), 1, 0), 1, uint(p.Addr().BitLen()-p.Bits()))
66+
}
67+
68+
// Compare two prefixes and return a negative, 0, or a positive integer as
69+
// required by [slices.SortFunc]. When two prefixes with the same address is
70+
// provided, the shortest one will be sorted first.
71+
func Compare(a, b netip.Prefix) int {
72+
cmp := a.Addr().Compare(b.Addr())
73+
if cmp != 0 {
74+
return cmp
75+
}
76+
return a.Bits() - b.Bits()
77+
}
78+
79+
// PrefixAfter returns the prefix of size 'sz' right after 'prev'.
80+
func PrefixAfter(prev netip.Prefix, sz int) netip.Prefix {
81+
s := sz
82+
if prev.Bits() < sz {
83+
s = prev.Bits()
84+
}
85+
addr := ipbits.Add(prev.Addr(), 1, uint(prev.Addr().BitLen()-s))
86+
if addr.IsUnspecified() {
87+
return netip.Prefix{}
88+
}
89+
return netip.PrefixFrom(addr, sz).Masked()
90+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package netiputil
2+
3+
import (
4+
"net/netip"
5+
"testing"
6+
7+
"gotest.tools/v3/assert"
8+
)
9+
10+
func TestLastAddr(t *testing.T) {
11+
testcases := []struct {
12+
p netip.Prefix
13+
want netip.Addr
14+
}{
15+
{netip.MustParsePrefix("10.0.0.0/24"), netip.MustParseAddr("10.0.0.255")},
16+
{netip.MustParsePrefix("10.0.0.0/8"), netip.MustParseAddr("10.255.255.255")},
17+
{netip.MustParsePrefix("fd00::/64"), netip.MustParseAddr("fd00::ffff:ffff:ffff:ffff")},
18+
{netip.MustParsePrefix("fd00::/16"), netip.MustParseAddr("fd00:ffff:ffff:ffff:ffff:ffff:ffff:ffff")},
19+
{netip.MustParsePrefix("ffff::/16"), netip.MustParseAddr("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")},
20+
}
21+
22+
for _, tc := range testcases {
23+
last := LastAddr(tc.p)
24+
assert.Check(t, last == tc.want, "LastAddr(%q) = %s; want: %s", tc.p, last, tc.want)
25+
}
26+
}
27+
28+
func TestPrefixAfter(t *testing.T) {
29+
testcases := []struct {
30+
prev netip.Prefix
31+
sz int
32+
want netip.Prefix
33+
}{
34+
{netip.MustParsePrefix("10.0.10.0/24"), 24, netip.MustParsePrefix("10.0.11.0/24")},
35+
{netip.MustParsePrefix("10.0.10.0/24"), 16, netip.MustParsePrefix("10.1.0.0/16")},
36+
{netip.MustParsePrefix("10.10.0.0/16"), 24, netip.MustParsePrefix("10.11.0.0/24")},
37+
{netip.MustParsePrefix("2001:db8:feed:cafe:b000:dead::/96"), 16, netip.MustParsePrefix("2002::/16")},
38+
{netip.MustParsePrefix("ffff::/16"), 16, netip.Prefix{}},
39+
{netip.MustParsePrefix("2001:db8:1::/48"), 64, netip.MustParsePrefix("2001:db8:2::/64")},
40+
}
41+
42+
for _, tc := range testcases {
43+
next := PrefixAfter(tc.prev, tc.sz)
44+
assert.Check(t, next == tc.want, "PrefixAfter(%q, %d) = %s; want: %s", tc.prev, tc.sz, next, tc.want)
45+
}
46+
}

libnetwork/ipamapi/contract.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ var (
3535
ErrIPOutOfRange = types.InvalidParameterErrorf("requested address is out of range")
3636
ErrPoolOverlap = types.ForbiddenErrorf("Pool overlaps with other one on this address space")
3737
ErrBadPool = types.InvalidParameterErrorf("address space does not contain specified address pool")
38+
ErrNoMoreSubnets = types.InvalidParameterErrorf("all predefined address pools have been fully subnetted")
3839
)
3940

4041
// Ipam represents the interface the IPAM service plugins must implement
@@ -73,6 +74,10 @@ type PoolRequest struct {
7374
// Options is a map of opaque k/v passed to the driver. It's non-mandatory.
7475
// Drivers are free to ignore it.
7576
Options map[string]string
77+
// Exclude is a list of prefixes the requester wish to not be dynamically
78+
// allocated (ie. when Pool isn't specified). It's up to the IPAM driver to
79+
// take it into account, or totally ignore it.
80+
Exclude []netip.Prefix
7681
// V6 indicates which address family should be used to dynamically allocate
7782
// a prefix (ie. when Pool isn't specified).
7883
V6 bool

0 commit comments

Comments
 (0)
0