aws · fgbelidji · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
@@ -1,6 +1,6 @@
 [dev]
 # Set to "huggingface", for example, if you are a huggingface developer. Default is ""
-partner_developer = ""
+partner_developer = "huggingface"
 # Please only set it to true if you are preparing an EI related PR
 # Do remember to revert it back to false before merging any PR (including EI dedicated PR)
 ei_mode = false
@@ -37,7 +37,7 @@ deep_canary_mode = false
 [build]
 # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
 # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
-build_frameworks = []
+build_frameworks = ["hf-vllm"]
 
 
 # By default we build both training and inference containers. Set true/false values to determine which to build.
@@ -182,4 +182,7 @@ dlc-pr-pytorch-eia-inference = ""
 dlc-pr-tensorflow-2-eia-inference = ""
 
 # vllm
-dlc-pr-vllm = ""
+dlc-pr-vllm = ""
+
+# hf-vllm
+dlc-pr-hf-vllm = ""
@@ -0,0 +1,51 @@
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+prod_account_id: &PROD_ACCOUNT_ID 763104351884
+region: &REGION <set-$REGION-in-environment>
+framework: &FRAMEWORK hf-vllm
+version: &VERSION "0.12.0"
+short_version: &SHORT_VERSION "0.12"
+arch_type: &ARCH_TYPE x86_64
+autopatch_build: "False"
+
+repository_info:
+  build_repository: &BUILD_REPOSITORY
+    image_type: &IMAGE_TYPE inference
+    root: huggingface/hf-vllm
+    repository_name: &REPOSITORY_NAME !join [ pr, "-", huggingface, "-", hf-vllm ]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+    release_repository_name: &RELEASE_REPOSITORY_NAME !join [ huggingface, "-", hf-vllm ]
+    release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
+
+context:
+  build_context: &BUILD_CONTEXT
+    deep_learning_container:
+      source: ../../src/deep_learning_container.py
+      target: deep_learning_container.py
+    cuda-compatibility-lib:
+      source: ../build_artifacts/inference/cuda-compatibility-lib.sh
+      target: cuda-compatibility-lib.sh
+
+
+images:
+  BuildHuggingFaceHfVllmGpuPy312Cu129InferenceDockerImage:
+    <<: *BUILD_REPOSITORY
+    context:
+      <<: *BUILD_CONTEXT
+    image_size_baseline: 25000
+    device_type: &DEVICE_TYPE gpu
+    cuda_version: &CUDA_VERSION cu129
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    os_version: &OS_VERSION ubuntu22.04
+    vllm_version: &VLLM_VERSION 0.12.0
+    tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile ]
+    target: sagemaker
+    build: true
+    enable_common_stage_build: false
+    test_configs:
+      test_platforms:
+        - sanity
+        - security
+        - sagemaker
@@ -0,0 +1,42 @@
+ARG FINAL_BASE_IMAGE=763104351884.dkr.ecr.us-east-1.amazonaws.com/vllm:0.12.0-gpu-py312-cu129-ubuntu22.04-sagemaker-v1.0
+FROM ${FINAL_BASE_IMAGE} AS vllm-base
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ARG HUGGINGFACE_HUB_VERSION=0.36.0
+ARG HF_XET_VERSION=1.2.0
+
+RUN apt-get update -y \
+&& apt-get install -y --no-install-recommends curl unzip \
+&& rm -rf /var/lib/apt/lists/*
+
+
+RUN pip install --upgrade pip && \
+   pip install --no-cache-dir \
+     huggingface-hub==${HUGGINGFACE_HUB_VERSION} \
+     hf-xet==${HF_XET_VERSION} \
+     grpcio
+
+
+FROM vllm-base AS sagemaker
+ENV HF_HUB_ENABLE_HF_TRANSFER="1" \
+    HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:gpu-cuda:inference:hf-vllm"
+
+COPY cuda-compatibility-lib.sh /usr/local/bin/cuda-compatibility-lib.sh
+RUN chmod +x /usr/local/bin/cuda-compatibility-lib.sh
+
+RUN set -eux; \
+    HOME_DIR=/root; \
+    uv pip install --system --upgrade pip requests PTable; \
+    curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip; \
+    unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/; \
+    cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance; \
+    chmod +x /usr/local/bin/testOSSCompliance; \
+    chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh; \
+    ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3; \
+    rm -rf ${HOME_DIR}/oss_compliance*
+
+
+ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
+
@@ -0,0 +1,14 @@
+import os
+
+try:
+    if os.path.exists("/usr/local/bin/deep_learning_container.py") and (
+        os.getenv("OPT_OUT_TRACKING") is None or os.getenv("OPT_OUT_TRACKING", "").lower() != "true"
+    ):
+        import threading
+
+        cmd = "python /usr/local/bin/deep_learning_container.py --framework huggingface_pytorch --framework-version 2.7.1 --container-type training &>/dev/null"
+        x = threading.Thread(target=lambda: os.system(cmd))
+        x.setDaemon(True)
+        x.start()
+except Exception:
+    pass
@@ -0,0 +1,12 @@
+# telemetry.sh
+#!/bin/bash
+if [ -f /usr/local/bin/deep_learning_container.py ] && [[ -z "${OPT_OUT_TRACKING}" || "${OPT_OUT_TRACKING,,}" != "true" ]]; then
+    (
+        python /usr/local/bin/deep_learning_container.py \
+            --framework "hf-vllm" \
+            --framework-version "0.12.0" \
+            --container-type "inference" \
+            &>/dev/null &
+    )
+fi
+
@@ -0,0 +1,13 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import