I was able to get a ManagedNodeGroup
working with a custom LaunchTemplate
that sets up swap in Python. Below is what's working for me.
I was able to set up a swap file on the EC2 instance, and start the kubelet in a way that it would allow swap usage. However I wasn't able to set the config option for swapBehavior
, the key doesn't seem to be recognized by the kubelet
on EKS (nor is the NodeSwap feature gate) despite documentation saying it should be.
$ pulumi about
CLI
Version 3.46.1
Go Version go1.19.2
Go Compiler gc
Plugins
NAME VERSION
aws 5.7.2
eks 0.42.7
honeycomb 0.0.11
kubernetes 3.23.1
python 3.10.8
_aws_account_id = aws.get_caller_identity().account_id
_K8S_VERSION = "1.23" # latest visible in above version of pulumi-eks
_NODE_ROOT_VOLUME_SIZE_GIB = 60
# Script to run on EKS nodes as root before EKS bootstrapping (which starts the kubelet)
#
# Make a 40GB swap file. This is a gues at allowing a few pods to overrun their
# requested RAM significantly.
# https://stackoverflow.com/questions/17173972/how-do-you-add-swap-to-an-ec2-instance
#
# Enable swap usage in the kubeconfig, following editing commands used in the
# bootstrap script.
# https://github.com/awslabs/amazon-eks-ami/blob/master/files/bootstrap.sh
# https://aws.amazon.com/premiumsupport/knowledge-center/eks-worker-nodes-image-cache/
# https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/
# This user data must be in mime format when passed to a launch template.
# https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html
#
# From MNG launch template docs:
# "your user data is merged with Amazon EKS user data required for nodes to join the
# cluster. Don't specify any commands in your user data that starts or modifies kubelet."
# Inspecting instance user data shows this and the original user data in separate MIME
# parts, both in the user data with this 1st.
#
# The swapBehavior isn't recognized by AWS kubelet. Docs say it requires
# featureGates.NodeSwap=true, but kubelet also doesn't recognize the feature.
# jq adds quotes around the "swapBehavior" key.
# It seems like the behavior defaults to limited swap: pods are killed at their
# resource limit, regardless of swap availability/usage.
# TODO set UnlimitedSwap if/when possible on AWS, using:
# echo "$(jq ".memorySwap={swapBehavior:\"UnlimitedSwap\"}" $KUBELET_CONFIG)" > $KUBELET_CONFIG
_NODE_USER_DATA_ADD_SWAP_AND_ENABLE_IN_KUBELET_CONFIG = r"""#!/bin/bash
set -e
# Use fallocate which is much faster than dd (essentially instant) since we do not
# care about the initial contents of the file.
fallocate -l 40G /swapfile
chmod 600 /swapfile
mkswap /swapfile
swapon /swapfile
echo "/swapfile swap swap defaults 0 0" >> /etc/fstab
KUBELET_CONFIG=/etc/kubernetes/kubelet/kubelet-config.json
cp $KUBELET_CONFIG $KUBELET_CONFIG.orig
echo "$(jq ".failSwapOn=false" $KUBELET_CONFIG)" > $KUBELET_CONFIG
"""
_USER_DATA_MIME_HEADER = """MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="//"
--//
Content-Type: text/x-shellscript; charset="us-ascii"
"""
_USER_DATA_MIME_FOOTER = """
--//--
"""
def _wrap_and_encode_user_data(script_text: str) -> str:
mime_encapsulated = _USER_DATA_MIME_HEADER + script_text + _USER_DATA_MIME_FOOTER
encoded_bytes = base64.b64encode(mime_encapsulated.encode())
return encoded_bytes.decode("latin1")
def _define_cluster_and_get_provider() -> Tuple[eks.Cluster, k8s.Provider]:
# https://www.pulumi.com/docs/guides/crosswalk/aws/eks/
# https://www.pulumi.com/registry/packages/eks/api-docs/cluster/#cluster
# Map AWS IAM users to Kubernetes internal RBAC admin group. Mapping individual
# users avoids having to go from a group to a role with assume-role policies.
# Kubernetes has its own permissions (RBAC) system, with predefined groups for
# common permissions levels. AWS EKS provides translation from IAM to that, but we
# must explicitly map particular users or roles that should be granted permissions
# within the cluster.
#
# AWS docs: https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html
# Detailed example: https://apperati.io/articles/managing_eks_access-bs/
# IAM groups are not supported, only users or roles:
# https://github.com/kubernetes-sigs/aws-iam-authenticator/issues/176
user_mappings = []
for username in TEAM_MEMBERS:
user_mappings.append(
eks.UserMappingArgs(
# AWS IAM user to set permissions for
user_arn=f"arn:aws:iam::{_aws_account_id}:user/{username}",
# k8s RBAC group from which this IAM user will get permissions
groups=["system:masters"],
# k8s RBAC username to create for the user
username=username,
)
)
node_role = _define_node_role(EKS_CLUSTER_NAME)
cluster = eks.Cluster(
EKS_CLUSTER_NAME,
name=EKS_CLUSTER_NAME,
version=_K8S_VERSION,
# Details of VPC usage for EKS:
# https://docs.aws.amazon.com/eks/latest/userguide/network_reqs.html
vpc_id=_CLUSTER_VPC,
subnet_ids=_CLUSTER_SUBNETS,
# OpenID Connect Provider maps from k8s to AWS IDs.
# Get the OIDC's ID with:
# aws eks describe-cluster --name <CLUSTER_NAME> --query "cluster.identity.oidc.issuer" --output text
create_oidc_provider=True,
user_mappings=user_mappings,
skip_default_node_group=True,
instance_role=node_role,
)
# Export the kubeconfig to allow kubectl to access the cluster. For example:
# pulumi stack output my-kubeconfig > kubeconfig.yml
# KUBECONFIG=./kubeconfig.yml kubectl get pods -A
pulumi.export(f"my-kubeconfig", cluster.kubeconfig)
# Work around cluster.provider being the wrong type for Namespace to use.
# https://github.com/pulumi/pulumi-eks/issues/662
provider = k8s.Provider(
f"my-cluster-provider",
kubeconfig=cluster.kubeconfig.apply(lambda k: json.dumps(k)),
)
# Configure startup script and root volume size to allow for swap.
#
# Changing the launch template (or included user data script) will cause the
# ManagedNodeGroup to replace nodes, which takes 10-15 minutes.
launch_template = aws.ec2.LaunchTemplate(
f"{EKS_CLUSTER_NAME}-launch-template",
# Set the default device's size to allow for swap.
block_device_mappings=[
aws.ec2.LaunchTemplateBlockDeviceMappingArgs(
device_name="/dev/xvda",
ebs=aws.ec2.LaunchTemplateBlockDeviceMappingEbsArgs(
volume_size=_NODE_ROOT_VOLUME_SIZE_GIB,
),
),
],
user_data=_wrap_and_encode_user_data(
_NODE_USER_DATA_ADD_SWAP_AND_ENABLE_IN_KUBELET_CONFIG
),
# The default version shows up first in the UI, so update it even though
# we don't really need to since we use latest_version below.
update_default_version=True,
# Other settings, such as tags required for the node to join the group/cluster,
# are filled in by default.
)
# The EC2 instances that the cluster will use to execute pods.
# https://www.pulumi.com/registry/packages/eks/api-docs/managednodegroup/
eks.ManagedNodeGroup(
f"{EKS_CLUSTER_NAME}-managed-node-group",
node_group_name=f"{EKS_CLUSTER_NAME}-managed-node-group",
cluster=cluster.core,
version=_K8S_VERSION,
subnet_ids=_CLUSTER_SUBNETS,
node_role=node_role,
instance_types=["r6i.2xlarge"],
scaling_config=aws.eks.NodeGroupScalingConfigArgs(
min_size=1,
desired_size=2,
max_size=4,
),
launch_template={
"id": launch_template.id,
"version": launch_template.latest_version,
},
)
return cluster, provider