From 70286a5d30fe8b248bd918bc23bbb317eceb757b Mon Sep 17 00:00:00 2001 From: Alvaro Moran Date: Fri, 14 Nov 2025 14:18:55 +0000 Subject: [PATCH 1/3] feat: updated huggingface neuronx inference and training containers --- .../image_uri_config/huggingface-neuronx.json | 92 ++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/image_uri_config/huggingface-neuronx.json b/src/sagemaker/image_uri_config/huggingface-neuronx.json index 732e397ce9..c13e0d4a89 100644 --- a/src/sagemaker/image_uri_config/huggingface-neuronx.json +++ b/src/sagemaker/image_uri_config/huggingface-neuronx.json @@ -9,7 +9,8 @@ "4.36": "4.36.2", "4.43": "4.43.2", "4.48": "4.48.1", - "4.51": "4.51.0" + "4.51": "4.51.0", + "4.55": "4.55.4" }, "versions": { "4.28.1": { @@ -272,6 +273,49 @@ "sdk2.24.1" ] } + }, + "4.55.4": { + "version_aliases": { + "pytorch2.7": "pytorch2.7.0" + }, + "pytorch2.7.0": { + "py_versions": [ + "py310" + ], + "repository": "huggingface-pytorch-training-neuronx", + "registries": { + "ap-northeast-1": "763104351884", + "ap-south-1": "763104351884", + "ap-south-2": "772153158452", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ap-southeast-4": "457447274322", + "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-central-2": "380420809688", + "eu-south-2": "503227376785", + "eu-west-1": "763104351884", + "eu-west-3": "763104351884", + "il-central-1": "780543022126", + "mx-central-1":"637423239942", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-east-1": "446045086412", + "us-gov-west-1": "442386744353", + "us-west-2": "763104351884", + "ca-west-1": "204538143572" + }, + "container_version": { + "inf": "ubuntu22.04" + }, + "sdk_versions": [ + "sdk2.26.0" + ] + } } } }, @@ -284,7 +328,8 @@ "4.34": "4.34.1", "4.36": "4.36.2", "4.43": "4.43.2", - "4.51": "4.51.3" + "4.51": "4.51.3", + "4.55": "4.55.4" }, "versions": { "4.28.1": { @@ -592,6 +637,49 @@ "sdk2.24.1" ] } + }, + "4.55.4": { + "version_aliases": { + "pytorch2.7": "pytorch2.7.1" + }, + "pytorch2.7.1": { + "py_versions": [ + "py310" + ], + "repository": "huggingface-pytorch-inference-neuronx", + "registries": { + "ap-northeast-1": "763104351884", + "ap-south-1": "763104351884", + "ap-south-2": "772153158452", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ap-southeast-4": "457447274322", + "ap-southeast-5": "550225433462", + "ap-southeast-7": "590183813437", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-central-2": "380420809688", + "eu-south-2": "503227376785", + "eu-west-1": "763104351884", + "eu-west-3": "763104351884", + "il-central-1": "780543022126", + "mx-central-1":"637423239942", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-east-1": "446045086412", + "us-gov-west-1": "442386744353", + "us-west-2": "763104351884", + "ca-west-1": "204538143572" + }, + "container_version": { + "inf": "ubuntu22.04" + }, + "sdk_versions": [ + "sdk2.26.0" + ] + } } } } From 8aa1c92c05296abdb616b4762891ad91e8b5e1f5 Mon Sep 17 00:00:00 2001 From: Alvaro Moran Date: Fri, 14 Nov 2025 14:19:59 +0000 Subject: [PATCH 2/3] feat: add support for new huggingface vllm neuronx containers Also added reference to the first available container URI. --- src/sagemaker/huggingface/llm_utils.py | 8 ++++ .../huggingface-vllm-neuronx.json | 38 +++++++++++++++++++ src/sagemaker/image_uris.py | 7 +++- 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 src/sagemaker/image_uri_config/huggingface-vllm-neuronx.json diff --git a/src/sagemaker/huggingface/llm_utils.py b/src/sagemaker/huggingface/llm_utils.py index c7a1316760..a0e6de8afd 100644 --- a/src/sagemaker/huggingface/llm_utils.py +++ b/src/sagemaker/huggingface/llm_utils.py @@ -67,6 +67,14 @@ def get_huggingface_llm_image_uri( image_scope="inference", inference_tool="neuronx", ) + if backend == "huggingface-vllm-neuronx": + return image_uris.retrieve( + "huggingface-vllm-neuronx", + region=region, + version=version, + image_scope="inference", + inference_tool="neuronx", + ) if backend == "huggingface-tei": return image_uris.retrieve( "huggingface-tei", diff --git a/src/sagemaker/image_uri_config/huggingface-vllm-neuronx.json b/src/sagemaker/image_uri_config/huggingface-vllm-neuronx.json new file mode 100644 index 0000000000..c2592c915a --- /dev/null +++ b/src/sagemaker/image_uri_config/huggingface-vllm-neuronx.json @@ -0,0 +1,38 @@ +{ + "inference": { + "processors": [ + "inf2" + ], + "version_aliases": { + "0.4": "0.4.1" + }, + "versions": { + "0.4.1": { + "py_versions": [ + "py310" + ], + "registries": { + "ap-northeast-1": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "eu-central-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-3": "763104351884", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-west-2": "763104351884" + }, + "tag_prefix": "0.10.2", + "repository": "huggingface-vllm-inference-neuronx", + "container_version": { + "inf2": "ubuntu22.04" + }, + "sdk_versions": [ + "sdk2.26.0" + ] + } + } + } +} \ No newline at end of file diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py index de6d622f78..ffc543110b 100644 --- a/src/sagemaker/image_uris.py +++ b/src/sagemaker/image_uris.py @@ -41,6 +41,7 @@ HUGGING_FACE_TEI_GPU_FRAMEWORK = "huggingface-tei" HUGGING_FACE_TEI_CPU_FRAMEWORK = "huggingface-tei-cpu" HUGGING_FACE_LLM_NEURONX_FRAMEWORK = "huggingface-llm-neuronx" +HUGGING_FACE_VLLM_NEURONX_FRAMEWORK = "huggingface-vllm-neuronx" XGBOOST_FRAMEWORK = "xgboost" SKLEARN_FRAMEWORK = "sklearn" TRAINIUM_ALLOWED_FRAMEWORKS = "pytorch" @@ -230,7 +231,11 @@ def retrieve( container_version = version_config["container_version"][processor] # Append sdk version in case of trainium instances - if repo in ["pytorch-training-neuron", "pytorch-training-neuronx"]: + if repo in [ + "pytorch-training-neuron", + "pytorch-training-neuronx", + "huggingface-vllm-inference-neuronx", + ]: if not sdk_version: sdk_version = _get_latest_versions(version_config["sdk_versions"]) container_version = sdk_version + "-" + container_version From 48718e3e2c59e067dbff430a36edaca6e3334a93 Mon Sep 17 00:00:00 2001 From: Alvaro Moran Date: Fri, 14 Nov 2025 14:20:46 +0000 Subject: [PATCH 3/3] test: add image uri test for huggingface vllm neuronx images --- .../image_uris/test_huggingface_llm.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py index f8fd17eeef..d225306f6a 100644 --- a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py +++ b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py @@ -37,6 +37,11 @@ "1.8.2": "2.0.1-tei1.8.2-cpu-py310-ubuntu22.04", }, } +HF_VLLM_VERSIONS_MAPPING = { + "inf2": { + "0.4.1": "0.10.2-neuronx-py310-sdk2.26.0-ubuntu22.04", + }, +} HF_VERSIONS_MAPPING = { "gpu": { "0.6.0": "2.0.0-tgi0.6.0-gpu-py39-cu118-ubuntu20.04", @@ -124,6 +129,30 @@ def test_huggingface_uris(load_config): assert expected == uri +@pytest.mark.parametrize("load_config", ["huggingface-vllm-neuronx.json"], indirect=True) +def test_huggingface_vllm_neuronx_uris(load_config): + VERSIONS = load_config["inference"]["versions"] + device = load_config["inference"]["processors"][0] + assert device == "inf2" + backend = "huggingface-vllm-neuronx" + + # Fail if device is not in mapping + if device not in HF_VLLM_VERSIONS_MAPPING: + raise ValueError(f"Device {device} not found in HF_VLLM_VERSIONS_MAPPING") + for version in VERSIONS: + ACCOUNTS = load_config["inference"]["versions"][version]["registries"] + for region in ACCOUNTS.keys(): + uri = get_huggingface_llm_image_uri(backend, region=region, version=version) + expected = expected_uris.huggingface_llm_framework_uri( + "huggingface-vllm-inference-neuronx", + ACCOUNTS[region], + version, + HF_VLLM_VERSIONS_MAPPING[device][version], + region=region, + ) + assert expected == uri + + @pytest.mark.parametrize( "load_config", ["huggingface-tei.json", "huggingface-tei-cpu.json"], indirect=True )