Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions dlc_developer_config.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[dev]
# Set to "huggingface", for example, if you are a huggingface developer. Default is ""
partner_developer = ""
partner_developer = "huggingface"
# Please only set it to true if you are preparing an EI related PR
# Do remember to revert it back to false before merging any PR (including EI dedicated PR)
ei_mode = false
Expand Down Expand Up @@ -37,7 +37,7 @@ deep_canary_mode = false
[build]
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
# available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
build_frameworks = []
build_frameworks = ["hf-vllm"]


# By default we build both training and inference containers. Set true/false values to determine which to build.
Expand Down Expand Up @@ -182,4 +182,7 @@ dlc-pr-pytorch-eia-inference = ""
dlc-pr-tensorflow-2-eia-inference = ""

# vllm
dlc-pr-vllm = ""
dlc-pr-vllm = ""

# hf-vllm
dlc-pr-hf-vllm = ""
51 changes: 51 additions & 0 deletions huggingface/hf-vllm/buildspec.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
prod_account_id: &PROD_ACCOUNT_ID 763104351884
region: &REGION <set-$REGION-in-environment>
framework: &FRAMEWORK hf-vllm
version: &VERSION "0.12.0"
short_version: &SHORT_VERSION "0.12"
arch_type: &ARCH_TYPE x86_64
autopatch_build: "False"

repository_info:
build_repository: &BUILD_REPOSITORY
image_type: &IMAGE_TYPE inference
root: huggingface/hf-vllm
repository_name: &REPOSITORY_NAME !join [ pr, "-", huggingface, "-", hf-vllm ]
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ huggingface, "-", hf-vllm ]
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]

context:
build_context: &BUILD_CONTEXT
deep_learning_container:
source: ../../src/deep_learning_container.py
target: deep_learning_container.py
cuda-compatibility-lib:
source: ../build_artifacts/inference/cuda-compatibility-lib.sh
target: cuda-compatibility-lib.sh


images:
BuildHuggingFaceHfVllmGpuPy312Cu129InferenceDockerImage:
<<: *BUILD_REPOSITORY
context:
<<: *BUILD_CONTEXT
image_size_baseline: 25000
device_type: &DEVICE_TYPE gpu
cuda_version: &CUDA_VERSION cu129
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py312
os_version: &OS_VERSION ubuntu22.04
vllm_version: &VLLM_VERSION 0.12.0
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile ]
target: sagemaker
build: true
enable_common_stage_build: false
test_configs:
test_platforms:
- sanity
- security
- sagemaker
42 changes: 42 additions & 0 deletions huggingface/hf-vllm/docker/0.12/cu129/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
ARG FINAL_BASE_IMAGE=763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.12.0-gpu-py312-cu129-ubuntu22.04-sagemaker-v1.0
FROM ${FINAL_BASE_IMAGE} AS vllm-base

LABEL maintainer="Amazon AI"
LABEL dlc_major_version="1"

ARG HUGGINGFACE_HUB_VERSION=0.36.0
ARG HF_XET_VERSION=1.2.0

RUN apt-get update -y \
&& apt-get install -y --no-install-recommends curl unzip \
&& rm -rf /var/lib/apt/lists/*


RUN pip install --upgrade pip && \
pip install --no-cache-dir \
huggingface-hub==${HUGGINGFACE_HUB_VERSION} \
hf-xet==${HF_XET_VERSION} \
grpcio


FROM vllm-base AS sagemaker
ENV HF_HUB_ENABLE_HF_TRANSFER="1" \
HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:gpu-cuda:inference:hf-vllm"

COPY cuda-compatibility-lib.sh /usr/local/bin/cuda-compatibility-lib.sh
RUN chmod +x /usr/local/bin/cuda-compatibility-lib.sh

RUN set -eux; \
HOME_DIR=/root; \
uv pip install --system --upgrade pip requests PTable; \
curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip; \
unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/; \
cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance; \
chmod +x /usr/local/bin/testOSSCompliance; \
chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh; \
${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3; \
rm -rf ${HOME_DIR}/oss_compliance*


ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]

14 changes: 14 additions & 0 deletions huggingface/hf-vllm/out.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import os

try:
if os.path.exists("/usr/local/bin/deep_learning_container.py") and (
os.getenv("OPT_OUT_TRACKING") is None or os.getenv("OPT_OUT_TRACKING", "").lower() != "true"
):
import threading

cmd = "python /usr/local/bin/deep_learning_container.py --framework huggingface_pytorch --framework-version 2.7.1 --container-type training &>/dev/null"
x = threading.Thread(target=lambda: os.system(cmd))
x.setDaemon(True)
x.start()
except Exception:
pass
12 changes: 12 additions & 0 deletions huggingface/hf-vllm/telemetry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# telemetry.sh
#!/bin/bash
if [ -f /usr/local/bin/deep_learning_container.py ] && [[ -z "${OPT_OUT_TRACKING}" || "${OPT_OUT_TRACKING,,}" != "true" ]]; then
(
python /usr/local/bin/deep_learning_container.py \
--framework "hf-vllm" \
--framework-version "0.12.0" \
--container-type "inference" \
&>/dev/null &
)
fi

13 changes: 13 additions & 0 deletions test/sagemaker_tests/huggingface/hf-vllm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import
Loading