8000 Make container removal fail if platform-specific containers fail · kubernetes/kubernetes@4d4edcb · GitHub
[go: up one dir, main page]

Skip to content

Commit 4d4edcb

Browse files
committed
Make container removal fail if platform-specific containers fail
#74737 introduced a new in-memory map for the dockershim, that could potentially (in pathological cases) cause memory leaks - for containers that use GMSA cred specs, get created successfully, but then never get started nor removed. This patch addresses this issue by making container removal fail altogether when platform-specific clean ups fail: this allows clean ups to be retried later, when the kubelet attempts to remove the container again. Resolves issue #74843. Signed-off-by: Jean Rouge <rougej+github@gmail.com>
1 parent 22a8bcf commit 4d4edcb

File tree

4 files changed

+27
-38
lines changed

4 files changed

+27
-38
lines changed

pkg/kubelet/dockershim/docker_container.go

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -182,14 +182,14 @@ func (ds *dockerService) CreateContainer(_ context.Context, r *runtimeapi.Create
182182
if cleanupInfo != nil {
183183
// we don't perform the clean up just yet at that could destroy information
184184
// needed for the container to start (e.g. Windows credentials stored in
185-
// registry keys); instead, we'll clean up after the container successfully
186-
// starts or gets removed
185+
// registry keys); instead, we'll clean up when the container gets removed
187186
ds.containerCleanupInfos[containerID] = cleanupInfo
188187
}
189188
return &runtimeapi.CreateContainerResponse{ContainerId: containerID}, nil
190189
}
191190

192-
// the creation failed, let's clean up right away
191+
// the creation failed, let's clean up right away - we ignore any errors though,
192+
// this is best effort
193193
ds.performPlatformSpecificContainerCleanupAndLogErrors(containerName, cleanupInfo)
194194

195195
return nil, createErr
@@ -278,8 +278,6 @@ func (ds *dockerService) StartContainer(_ context.Context, r *runtimeapi.StartCo
278278
return nil, fmt.Errorf("failed to start container %q: %v", r.ContainerId, err)
279279
}
280280

281-
ds.performPlatformSpecificContainerForContainer(r.ContainerId)
282-
283281
return &runtimeapi.StartContainerResponse{}, nil
284282
}
285283

@@ -294,19 +292,22 @@ func (ds *dockerService) StopContainer(_ context.Context, r *runtimeapi.StopCont
294292

295293
// RemoveContainer removes the container.
296294
func (ds *dockerService) RemoveContainer(_ context.Context, r *runtimeapi.RemoveContainerRequest) (*runtimeapi.RemoveContainerResponse, error) {
297-
ds.performPlatformSpecificContainerForContainer(r.ContainerId)
298-
299295
// Ideally, log lifecycle should be independent of container lifecycle.
300296
// However, docker will remove container log after container is removed,
301297
// we can't prevent that now, so we also clean up the symlink here.
302298
err := ds.removeContainerLogSymlink(r.ContainerId)
303299
if err != nil {
304300
return nil, err
305301
}
302+
errors := ds.performPlatformSpecificContainerForContainer(r.ContainerId)
303+
if len(errors) != 0 {
304+
return nil, fmt.Errorf("failed to run platform-specific clean ups for container %q: %v", r.ContainerId, errors)
305+
}
306306
err = ds.client.RemoveContainer(r.ContainerId, dockertypes.ContainerRemoveOptions{RemoveVolumes: true, Force: true})
307307
if err != nil {
308308
return nil, fmt.Errorf("failed to remove container %q: %v", r.ContainerId, err)
309309
}
310+
310311
return &runtimeapi.RemoveContainerResponse{}, nil
311312
}
312313

@@ -454,19 +455,27 @@ func (ds *dockerService) UpdateContainerResources(_ context.Context, r *runtimea
454455
return &runtimeapi.UpdateContainerResourcesResponse{}, nil
455456
}
456457

457-
func (ds *dockerService) performPlatformSpecificContainerForContainer(containerID string) {
458+
func (ds *dockerService) performPlatformSpecificContainerForContainer(containerID string) (errors []error) {
458459
if cleanupInfo, present := ds.containerCleanupInfos[containerID]; present {
459-
ds.performPlatformSpecificContainerCleanupAndLogErrors(containerID, cleanupInfo)
460-
delete(ds.containerCleanupInfos, containerID)
460+
errors = ds.performPlatformSpecificContainerCleanupAndLogErrors(containerID, cleanupInfo)
461+
462+
if len(errors) == 0 {
463+
delete(ds.containerCleanupInfos, containerID)
464+
}
461465
}
466+
467+
return
462468
}
463469

464-
func (ds *dockerService) performPlatformSpecificContainerCleanupAndLogErrors(containerNameOrID string, cleanupInfo *containerCleanupInfo) {
470+
func (ds *dockerService) performPlatformSpecificContainerCleanupAndLogErrors(containerNameOrID string, cleanupInfo *containerCleanupInfo) []error {
465471
if cleanupInfo == nil {
466-
return
472+
return nil
467473
}
468474

469-
for _, err := range ds.performPlatformSpecificContainerCleanup(cleanupInfo) {
475+
errors := ds.performPlatformSpecificContainerCleanup(cleanupInfo)
476+
for _, err := range errors {
470477
klog.Warningf("error when cleaning up after container %q: %v", containerNameOrID, err)
471478
}
479+
480+
return errors
472481
}

pkg/kubelet/dockershim/docker_container_unsupported.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,13 @@ type containerCleanupInfo struct{}
2727

2828
// applyPlatformSpecificDockerConfig applies platform-specific configurations to a dockertypes.ContainerCreateConfig struct.
2929
// The containerCleanupInfo struct it returns will be passed as is to performPlatformSpecificContainerCleanup
30-
// after either:
31-
// * the container creation has failed
32-
// * the container has been successfully started
33-
// * the container has been removed
34-
// whichever happens first.
30+
// after either the container creation has failed or the container has been removed.
3531
func (ds *dockerService) applyPlatformSpecificDockerConfig(*runtimeapi.CreateContainerRequest, *dockertypes.ContainerCreateConfig) (*containerCleanupInfo, error) {
3632
return nil, nil
3733
}
3834

3935
// performPlatformSpecificContainerCleanup is responsible for doing any platform-specific cleanup
40-
// after either:
41-
// * the container creation has failed
42-
// * the container has been successfully started
43-
// * the container has been removed
44-
// whichever happens first.
45-
// Any errors it returns are simply logged, but do not prevent the container from being started or
46-
// removed.
36+
// after either the container creation has failed or the container has been removed.
4737
func (ds *dockerService) performPlatformSpecificContainerCleanup(cleanupInfo *containerCleanupInfo) (errors []error) {
4838
return
4939
}

pkg/kubelet/dockershim/docker_container_windows.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,7 @@ type containerCleanupInfo struct {
3838

3939
// applyPlatformSpecificDockerConfig applies platform-specific configurations to a dockertypes.ContainerCreateConfig struct.
4040
// The containerCleanupInfo struct it returns will be passed as is to performPlatformSpecificContainerCleanup
41-
// after either:
42-
// * the container creation has failed
43-
// * the container has been successfully started
44-
// * the container has been removed
45-
// whichever happens first.
41+
// after either the container creation has failed or the container has been removed.
4642
func (ds *dockerService) applyPlatformSpecificDockerConfig(request *runtimeapi.CreateContainerRequest, createConfig *dockertypes.ContainerCreateConfig) (*containerCleanupInfo, error) {
4743
cleanupInfo := &containerCleanupInfo{}
4844

@@ -163,13 +159,7 @@ func randomString(length int) (string, error) {
163159
}
164160

165161
// performPlatformSpecificContainerCleanup is responsible for doing any platform-specific cleanup
166-
// after either:
167-
// * the container creation has failed
168-
// * the container has been successfully started
169-
// * the container has been removed
170-
// whichever happens first.
171-
// Any errors it returns are simply logged, but do not prevent the container from being started or
172-
// removed.
162+
// after either the container creation has failed or the container has been removed.
173163
func (ds *dockerService) performPlatformSpecificContainerCleanup(cleanupInfo *containerCleanupInfo) (errors []error) {
174164
if err := removeGMSARegistryValue(cleanupInfo); err != nil {
175165
errors = append(errors, err)

pkg/kubelet/dockershim/docker_service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ type dockerService struct {
311311
startLocalStreamingServer bool
312312

313313
// containerCleanupInfos maps container IDs to the `containerCleanupInfo` structs
314-
// needed to clean up after containers have been started or removed.
314+
// needed to clean up after containers have been removed.
315315
// (see `applyPlatformSpecificDockerConfig` and `performPlatformSpecificContainerCleanup`
316316
// methods for more info).
317317
containerCleanupInfos map[string]*containerCleanupInfo

0 commit comments

Comments
 (0)
0