8000 KEP-127: user namespace support for stateless pods by giuseppe · Pull Request #116377 · kubernetes/kubernetes · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -819,10 +819,6 @@ func (adc *attachDetachController) GetPodVolumeDir(podUID types.UID, pluginName,
return ""
}

func (adc *attachDetachController) GetHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return nil, nil, nil
}

func (adc *attachDetachController) GetPodPluginDir(podUID types.UID, pluginName string) string {
return ""
}
Expand Down
4 changes: 0 additions & 4 deletions pkg/controller/volume/expand/expand_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,10 +393,6 @@ func (expc *expandController) GetPodsDir() string {
return ""
}

func (expc *expandController) GetHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return nil, nil, nil
}

func (expc *expandController) GetPodVolumeDir(podUID types.UID, pluginName string, volumeName string) string {
return ""
}
Expand Down
4 changes: 0 additions & 4 deletions pkg/controller/volume/persistentvolume/volume_host.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@ func (ctrl *PersistentVolumeController) GetPodVolumeDir(podUID types.UID, plugin
return ""
}

func (ctrl *PersistentVolumeController) GetHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return nil, nil, nil
}

func (ctrl *PersistentVolumeController) GetPodPluginDir(podUID types.UID, pluginName string) string {
return ""
}
Expand Down
5 changes: 3 additions & 2 deletions pkg/kubelet/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/sysctl"
"k8s.io/kubernetes/pkg/kubelet/token"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/userns"
"k8s.io/kubernetes/pkg/kubelet/util"
"k8s.io/kubernetes/pkg/kubelet/util/manager"
"k8s.io/kubernetes/pkg/kubelet/util/queue"
Expand Down Expand Up @@ -900,7 +901,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
StateDirectory: rootDirectory,
})
klet.shutdownManager = shutdownManager
klet.usernsManager, err = MakeUserNsManager(klet)
klet.usernsManager, err = userns.MakeUserNsManager(klet)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -1248,7 +1249,7 @@ type Kubelet struct {
shutdownManager nodeshutdown.Manager

// Manage user namespaces
usernsManager *usernsManager
usernsManager *userns.UsernsManager

// Mutex to serialize new pod admission and existing pod resizing
podResizeMutex sync.Mutex
Expand Down
5 changes: 5 additions & 0 deletions pkg/kubelet/kubelet_getters.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ func (kl *Kubelet) GetPodDir(podUID types.UID) string {
return kl.getPodDir(podUID)
}

// ListPodsFromDisk gets a list of pods that have data directories.
func (kl *Kubelet) ListPodsFromDisk() ([]types.UID, error) {
return kl.listPodsFromDisk()
}

// getPodDir returns the full path to the per-pod directory for the pod with
// the given UID.
func (kl *Kubelet) getPodDir(podUID types.UID) string {
Expand Down
4 changes: 0 additions & 4 deletions pkg/kubelet/kubelet_pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,10 +426,6 @@ func (kl *Kubelet) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.Us
return kl.usernsManager.GetOrCreateUserNamespaceMappings(pod)
}

func (kl *Kubelet) getHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return kl.usernsManager.getHostIDsForPod(pod, containerUID, containerGID)
}

// GeneratePodHostNameAndDomain creates a hostname and domain name for a pod,
// given that pod's spec and annotations or returns an error.
func (kl *Kubelet) GeneratePodHostNameAndDomain(pod *v1.Pod) (string, string, error) {
Expand Down
9 changes: 9 additions & 0 deletions pkg/kubelet/kuberuntime/kuberuntime_container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config
return err
}
config.Linux = cl

if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.UserNamespacesStatelessPodsSupport) {
if cl.SecurityContext.NamespaceOptions.UsernsOptions != nil {
for _, mount := range config.Mounts {
mount.UidMappings = cl.SecurityContext.NamespaceOptions.UsernsOptions.Uids
mount.GidMappings = cl.SecurityContext.NamespaceOptions.UsernsOptions.Gids
}
}
}
return nil
}

Expand Down
116 changes: 23 additions & 93 deletions pkg/kubelet/userns_manager.go → pkg/kubelet/userns/userns_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

package kubelet
package userns

import (
"encoding/json"
Expand Down Expand Up @@ -49,11 +49,11 @@ const maxPods = 1024
const mapReInitializeThreshold = 1000

type userNsPodsManager interface {
getPodDir(podUID types.UID) string
listPodsFromDisk() ([]types.UID, error)
GetPodDir(podUID types.UID) string
ListPodsFromDisk() ([]types.UID, error)
}

type usernsManager struct {
type UsernsManager struct {
used *allocator.AllocationBitmap
usedBy map[types.UID]uint32 // Map pod.UID to range used
removed int
Expand Down Expand Up @@ -86,8 +86,8 @@ const mappingsFile = "userns"

// writeMappingsToFile writes the specified user namespace configuration to the pod
// directory.
func (m *usernsManager) writeMappingsToFile(pod types.UID, userNs userNamespace) error {
dir := m.kl.getPodDir(pod)
func (m *UsernsManager) writeMappingsToFile(pod types.UID, userNs userNamespace) error {
dir := m.kl.GetPodDir(pod)

data, err := json.Marshal(userNs)
if err != nil {
Expand Down Expand Up @@ -119,17 +119,17 @@ func (m *usernsManager) writeMappingsToFile(pod types.UID, userNs userNamespace)
}

// readMappingsFromFile reads the user namespace configuration from the pod directory.
func (m *usernsManager) readMappingsFromFile(pod types.UID) ([]byte, error) {
dir := m.kl.getPodDir(pod)
func (m *UsernsManager) readMappingsFromFile(pod types.UID) ([]byte, error) {
dir := m.kl.GetPodDir(pod)
fstore, err := utilstore.NewFileStore(dir, &utilfs.DefaultFs{})
if err != nil {
return nil, err
}
return fstore.Read(mappingsFile)
}

func MakeUserNsManager(kl userNsPodsManager) (*usernsManager, error) {
m := usernsManager{
func MakeUserNsManager(kl userNsPodsManager) (*UsernsManager, error) {
m := UsernsManager{
// Create a bitArray for all the UID space (2^32).
// As a by product of that, no index param to bitArray can be out of bounds (index is uint32).
used: allocator.NewAllocationMap((math.MaxUint32+1)/userNsLength, "user namespaces"),
Expand All @@ -141,17 +141,12 @@ func MakeUserNsManager(kl userNsPodsManager) (*usernsManager, error) {
return nil, err
}

// Second block will be used for phase II. Don't assign that range for now.
if _, err := m.used.Allocate(1); err != nil {
return nil, err
}

// do not bother reading the list of pods if user namespaces are not enabled.
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return &m, nil
}

found, err := kl.listPodsFromDisk()
found, err := kl.ListPodsFromDisk()
if err != nil {
if os.IsNotExist(err) {
return &m, nil
Expand All @@ -171,7 +166,7 @@ func MakeUserNsManager(kl userNsPodsManager) (*usernsManager, error) {

// recordPodMappings registers the range used for the user namespace if the
// usernsConfFile exists in the pod directory.
func (m *usernsManager) recordPodMappings(pod types.UID) error {
func (m *UsernsManager) recordPodMappings(pod types.UID) error {
content, err := m.readMappingsFromFile(pod)
if err != nil && err != utilstore.ErrKeyNotFound {
return err
Expand All @@ -187,15 +182,15 @@ func (m *usernsManager) recordPodMappings(pod types.UID) error {
}

// isSet checks if the specified index is already set.
func (m *usernsManager) isSet(v uint32) bool {
func (m *UsernsManager) isSet(v uint32) bool {
index := int(v / userNsLength)
return m.used.Has(index)
}

// allocateOne finds a free user namespace and allocate it to the specified pod.
// The first return value is the first ID in the user namespace, the second returns
// the length for the user namespace range.
func (m *usernsManager) allocateOne(pod types.UID) (firstID uint32, length uint32, err error) {
func (m *UsernsManager) allocateOne(pod types.UID) (firstID uint32, length uint32, err error) {
if m.numAllocated >= maxPods {
return 0, 0, fmt.Errorf("limit on count of pods with user namespaces exceeded (limit is %v, current pods with userns: %v)", maxPods, m.numAllocated)
}
Expand All @@ -222,7 +217,7 @@ func (m *usernsManager) allocateOne(pod types.UID) (firstID uint32, length uint3
}

// record stores the user namespace [from; from+length] to the specified pod.
func (m *usernsManager) record(pod types.UID, from, length uint32) (err error) {
func (m *UsernsManager) record(pod types.UID, from, length uint32) (err error) {
if length != userNsLength {
return fmt.Errorf("wrong user namespace length %v", length)
}
Expand Down Expand Up @@ -262,7 +257,7 @@ func (m *usernsManager) record(pod types.UID, from, length uint32) (err error) {
}

// Release releases the user namespace allocated to the specified pod.
func (m *usernsManager) Release(podUID types.UID) {
func (m *UsernsManager) Release(podUID types.UID) {
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return
}
Expand All @@ -273,7 +268,7 @@ func (m *usernsManager) Release(podUID types.UID) {
m.releaseWithLock(podUID)
}

func (m *usernsManager) releaseWithLock(pod types.UID) {
func (m *UsernsManager) releaseWithLock(pod types.UID) {
v, ok := m.usedBy[pod]
if !ok {
klog.V(5).InfoS("pod user namespace allocation not present", "podUID", pod)
Expand All @@ -285,7 +280,7 @@ func (m *usernsManager) releaseWithLock(pod types.UID) {
m.numAllocated--
m.removed++

_ = os.Remove(filepath.Join(m.kl.getPodDir(pod), mappingsFile))
_ = os.Remove(filepath.Join(m.kl.GetPodDir(pod), mappingsFile))

if m.removed%mapReInitializeThreshold == 0 {
n := make(map[types.UID]uint32)
Expand All @@ -298,7 +293,7 @@ func (m *usernsManager) releaseWithLock(pod types.UID) {
m.used.Release(int(v / userNsLength))
}

func (m *usernsManager) parseUserNsFileAndRecord(pod types.UID, content []byte) (userNs userNamespace, err error) {
func (m *UsernsManager) parseUserNsFileAndRecord(pod types.UID, content []byte) (userNs userNamespace, err error) {
if err = json.Unmarshal([]byte(content), &userNs); err != nil {
err = fmt.Errorf("can't parse file: %w", err)
return
Expand Down Expand Up @@ -338,7 +333,7 @@ func (m *usernsManager) parseUserNsFileAndRecord(pod types.UID, content []byte)
return
}

func (m *usernsManager) createUserNs(pod *v1.Pod) (userNs userNamespace, err error) {
func (m *UsernsManager) createUserNs(pod *v1.Pod) (userNs userNamespace, err error) {
firstID, length, err := m.allocateOne(pod.UID)
if err != nil {
return
Expand Down Expand Up @@ -371,7 +366,7 @@ func (m *usernsManager) createUserNs(pod *v1.Pod) (userNs userNamespace, err err
}

// GetOrCreateUserNamespaceMappings returns the configuration for the sandbox user namespace
func (m *usernsManager) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.UserNamespace, error) {
func (m *UsernsManager) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.UserNamespace, error) {
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return nil, nil
}
Expand Down Expand Up @@ -431,7 +426,7 @@ func (m *usernsManager) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimea
// CleanupOrphanedPodUsernsAllocations reconciliates the state of user namespace
// allocations with the pods actually running. It frees any user namespace
// allocation for orphaned pods.
func (m *usernsManager) CleanupOrphanedPodUsernsAllocations(pods []*v1.Pod, runningPods []*kubecontainer.Pod) error {
func (m *UsernsManager) CleanupOrphanedPodUsernsAllocations(pods []*v1.Pod, runningPods []*kubecontainer.Pod) error {
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return nil
}
Expand All @@ -448,7 +443,7 @@ func (m *usernsManager) CleanupOrphanedPodUsernsAllocations(pods []*v1.Pod, runn
}

allFound := sets.NewString()
found, err := m.kl.listPodsFromDisk()
found, err := m.kl.ListPodsFromDisk()
if err != nil {
return err
}
Expand Down Expand Up @@ -479,68 +474,3 @@ func (m *usernsManager) CleanupOrphanedPodUsernsAllocations(pods []*v1.Pod, runn

return nil
}

// getHostIDsForPod if the pod uses user namespaces, takes the uid and gid
// inside the container and returns the host UID and GID those are mapped to on
// the host. If containerUID/containerGID is nil, then it returns the host
// UID/GID for ID 0 inside the container.
// If the pod is not using user namespaces, as there is no mapping needed, the
// same containerUID and containerGID params are returned.
func (m *usernsManager) getHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return containerUID, containerGID, nil
}

if pod == nil || pod.Spec.HostUsers == nil || *pod.Spec.HostUsers == true {
return containerUID, containerGID, nil
}

mapping, err := m.GetOrCreateUserNamespaceMappings(pod)
if err != nil {
err = fmt.Errorf("Error getting pod user namespace mapping: %w", err)
return
}

uid, err := hostIDFromMapping(mapping.Uids, containerUID)
if err != nil {
err = fmt.Errorf("Error getting host UID: %w", err)
return
}

gid, err := hostIDFromMapping(mapping.Gids, containerGID)
if err != nil {
err = fmt.Errorf("Error getting host GID: %w", err)
return
}

return &uid, &gid, nil
}

func hostIDFromMapping(mapping []*runtimeapi.IDMapping, containerId *int64) (int64, error) {
if len(mapping) == 0 {
return 0, fmt.Errorf("can't use empty user namespace mapping")
}

// If none is requested, root inside the container is used
id := int64(0)
if containerId != nil {
id = *containerId
}

for _, m := range mapping {
if m == nil {
continue
}

firstId := int64(m.ContainerId)
lastId := firstId + int64(m.Length) - 1

// The id we are looking for is in the range
if id >= firstId && id <= lastId {
// Return the host id for this container id
return int64(m.HostId) + id - firstId, nil
}
}

return 0, fmt.Errorf("ID: %v not present in pod user namespace", id)
}
Loading
0