forked from skypilot-org/skypilot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile_k8s_gpu
59 lines (47 loc) · 2.72 KB
/
Dockerfile_k8s_gpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# We use the cuda runtime image instead of devel image to reduce size (1.3GB vs 3.6GB)
FROM nvidia/cuda:12.1.1-runtime-ubuntu20.04
ARG DEBIAN_FRONTEND=noninteractive
# Install ssh and other local dependencies
# We remove cuda lists to avoid conflicts with the cuda version installed by ray
RUN rm -rf /etc/apt/sources.list.d/cuda* && \
apt update -y && \
apt install git gcc rsync sudo patch openssh-server pciutils nano fuse unzip socat netcat curl -y && \
rm -rf /var/lib/apt/lists/*
# Setup SSH and generate hostkeys
RUN sudo mkdir -p /var/run/sshd && \
sudo sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
sudo sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && \
cd /etc/ssh/ && \
sudo ssh-keygen -A
# Setup new user named sky and add to sudoers. \
# Also add /opt/conda/bin to sudo path and give sky user permission to run sudo without password
RUN sudo useradd -m -s /bin/bash sky && \
sudo /bin/bash -c 'echo "sky ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers' && \
sudo /bin/bash -c "echo 'Defaults secure_path=\"/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"' > /etc/sudoers.d/sky"
# Switch to sky user
USER sky
# Set HOME environment variable for sky user
ENV HOME /home/sky
# Set current working directory
WORKDIR /home/sky
SHELL ["/bin/bash", "-c"]
# Install conda and other dependencies
# Keep the conda and Ray versions below in sync with the ones in skylet.constants
RUN curl https://repo.anaconda.com/miniconda/Miniconda3-py310_23.11.0-2-Linux-x86_64.sh -o Miniconda3-Linux-x86_64.sh && \
bash Miniconda3-Linux-x86_64.sh -b && \
eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && conda config --set auto_activate_base true && conda activate base && \
grep "# >>> conda initialize >>>" ~/.bashrc || { conda init && source ~/.bashrc; } && \
rm Miniconda3-Linux-x86_64.sh && \
pip install wheel Click colorama cryptography jinja2 jsonschema networkx \
oauth2client pandas pendulum PrettyTable rich tabulate filelock packaging \
'protobuf<4.0.0' pulp pycryptodome==3.12.0 docker kubernetes==28.1.0 \
grpcio==1.51.3 python-dotenv==1.0.1 ray[default]==2.9.3 && \
curl -LO "https://dl.k8s.io/release/v1.28.11/bin/linux/amd64/kubectl" && \
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
# Add /home/sky/.local/bin/ to PATH
RUN echo 'export PATH="$PATH:$HOME/.local/bin"' >> ~/.bashrc
# Copy SkyPilot code base. This is required for the ssh jump pod to find the
# lifecycle management scripts
COPY --chown=sky . /skypilot/sky/
# Set PYTHONUNBUFFERED=1 to have Python print to stdout/stderr immediately
ENV PYTHONUNBUFFERED=1