Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ func main() {

log.Infof("LLAMA_SERVER_PATH: %s", llamaServerPath)

// Auto-detect GPU type and set appropriate variant
// Check if we have supported AMD GPUs and set ROCm variant accordingly
if hasAMD, err := gpuInfo.HasSupportedAMDGPU(); err == nil && hasAMD {
log.Info("Supported AMD GPU detected, ROCm will be used automatically")
// This will be handled by the llama.cpp backend during server download
} else if err != nil {
log.Debugf("AMD GPU detection failed: %v", err)
}

// Create llama.cpp configuration from environment variables
llamaCppConfig := createLlamaCppConfigFromEnv()

Expand Down
116 changes: 116 additions & 0 deletions pkg/gpuinfo/amd_gpu_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
//go:build linux

package gpuinfo

import (
"bufio"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
)

// supportedAMDGPUs are the AMD GPU targets that should use ROCm
var supportedAMDGPUs = map[string]bool{
"gfx908": true,
"gfx90a": true,
"gfx942": true,
"gfx1010": true,
"gfx1030": true,
"gfx1100": true,
"gfx1200": true,
"gfx1201": true,
"gfx1151": true,
}

func hasSupportedAMDGPU() (bool, error) {
// Check if KFD topology directory exists
topologyDir := "/sys/class/kfd/kfd/topology/nodes/"
info, err := os.Stat(topologyDir)
if err != nil || !info.IsDir() {
return false, nil // KFD not available
}

entries, err := os.ReadDir(topologyDir)
if err != nil {
return false, err
}

// Sort entries by name to maintain consistent order
sort.Slice(entries, func(i, j int) bool {
return entries[i].Name() < entries[j].Name()
})

// Compile regex to match gfx_target_version lines
reTarget := regexp.MustCompile(`gfx_target_version[ \t]+([0-9]+)`)

for _, e := range entries {
if !e.IsDir() {
continue
}
nodePath := filepath.Join(topologyDir, e.Name())
propPath := filepath.Join(nodePath, "properties")

// Attempt to open the properties file directly; skip on error (e.g., permissions)
f, err := os.Open(propPath)
if err != nil {
// Could be permission denied or file doesn't exist; just skip like the Python code
continue
}

sc := bufio.NewScanner(f)
for sc.Scan() {
line := sc.Text()
matches := reTarget.FindStringSubmatch(line)
if len(matches) < 2 {
continue
}

deviceIDStr := matches[1]
deviceID, err := strconv.Atoi(deviceIDStr)
if err != nil || deviceID == 0 {
continue
}

var majorVer, minorVer, steppingVer int
if gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION"); gfxOverride != "" {
parts := strings.Split(strings.TrimSpace(gfxOverride), ".")
if len(parts) != 3 {
// Invalid format, skip
continue
}
mv, err1 := strconv.Atoi(parts[0])
nv, err2 := strconv.Atoi(parts[1])
sv, err3 := strconv.Atoi(parts[2])
if err1 != nil || err2 != nil || err3 != nil {
// Invalid format, skip
continue
}
if mv > 63 || nv > 255 || sv > 255 {
// Invalid values, skip
continue
}
majorVer, minorVer, steppingVer = mv, nv, sv
} else {
majorVer = (deviceID / 10000) % 100
minorVer = (deviceID / 100) % 100
steppingVer = deviceID % 100
}

gfx := "gfx" +
strconv.FormatInt(int64(majorVer), 10) +
strconv.FormatInt(int64(minorVer), 16) +
strconv.FormatInt(int64(steppingVer), 16)

if supportedAMDGPUs[gfx] {
f.Close()
return true, nil // Found a supported AMD GPU
}
}
f.Close()
}

return false, nil // No supported AMD GPU found
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
}

4 changes: 4 additions & 0 deletions pkg/gpuinfo/gpuinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,7 @@ func New(modelRuntimeInstallPath string) *GPUInfo {
func (g *GPUInfo) GetVRAMSize() (uint64, error) {
return getVRAMSize(g.modelRuntimeInstallPath)
}

func (g *GPUInfo) HasSupportedAMDGPU() (bool, error) {
return hasSupportedAMDGPU()
}
3 changes: 3 additions & 0 deletions pkg/gpuinfo/gpuinfo_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//go:build linux

package gpuinfo
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
package gpuinfo
package gpuinfo

8 changes: 8 additions & 0 deletions pkg/gpuinfo/gpuinfo_not_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//go:build !linux

package gpuinfo

func (g *GPUInfo) HasSupportedAMDGPU() (bool, error) {
// AMD GPU detection is only supported on Linux
return false, nil
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
}

6 changes: 6 additions & 0 deletions pkg/gpuinfo/memory_darwin_cgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,9 @@ func getVRAMSize(_ string) (uint64, error) {
}
return uint64(vramSize), nil
}

// hasSupportedAMDGPU returns true if the system has supported AMD GPUs
func hasSupportedAMDGPU() (bool, error) {
// AMD GPU detection is only supported on Linux
return false, nil
}
6 changes: 6 additions & 0 deletions pkg/gpuinfo/memory_darwin_nocgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ import "errors"
func getVRAMSize(_ string) (uint64, error) {
return 0, errors.New("unimplemented without cgo")
}

// hasSupportedAMDGPU returns true if the system has supported AMD GPUs
func hasSupportedAMDGPU() (bool, error) {
// AMD GPU detection is only supported on Linux
return false, nil
}
5 changes: 5 additions & 0 deletions pkg/gpuinfo/memory_linux_nocgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ import "errors"
func getVRAMSize(_ string) (uint64, error) {
return 0, errors.New("unimplemented without cgo")
}

// hasSupportedAMDGPU returns true if the system has supported AMD GPUs
func hasSupportedAMDGPU() (bool, error) {
return false, errors.New("unimplemented without cgo")
}
6 changes: 6 additions & 0 deletions pkg/gpuinfo/memory_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,9 @@ func getVRAMSize(modelRuntimeInstallPath string) (uint64, error) {
}
return 0, errors.New("unexpected nv-gpu-info output format")
}

// hasSupportedAMDGPU returns true if the system has supported AMD GPUs
func hasSupportedAMDGPU() (bool, error) {
// AMD GPU detection is only supported on Linux
return false, nil
}
44 changes: 38 additions & 6 deletions pkg/inference/backends/llamacpp/download_linux.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,50 @@
//go:build linux

package llamacpp

import (
"context"
"fmt"
"net/http"
"path/filepath"

"github.com/docker/model-runner/pkg/gpuinfo"
"github.com/docker/model-runner/pkg/logging"
)

func (l *llamaCpp) ensureLatestLlamaCpp(_ context.Context, log logging.Logger, _ *http.Client,
_, vendoredServerStoragePath string,
func init() {
// Enable GPU variant detection by default on Linux
ShouldUseGPUVariantLock.Lock()
defer ShouldUseGPUVariantLock.Unlock()
ShouldUseGPUVariant = true
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather have ShouldUseGPUVariant = runtime.GOOS == "linux" in download.go instead of an init.
For more context, this variable is used by Docker Desktop to determine whether it should display the GPU backed engine checkbox in the UI.


func (l *llamaCpp) ensureLatestLlamaCpp(ctx context.Context, log logging.Logger, httpClient *http.Client,
llamaCppPath, vendoredServerStoragePath string,
) error {
l.status = fmt.Sprintf("running llama.cpp version: %s",
getLlamaCppVersion(log, filepath.Join(vendoredServerStoragePath, "com.docker.llama-server")))
return errLlamaCppUpdateDisabled
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it'd be better to stick to a consistent logic for picking whether we use acceleration or not. Currently on LInux we expect people to run the model-runner container (appropriate to their usecase and system), so the accelerate/not-accelerate question should be decided in the CLI rather than here.

var hasAMD bool
var err error

ShouldUseGPUVariantLock.Lock()
defer ShouldUseGPUVariantLock.Unlock()
if ShouldUseGPUVariant {
// Create GPU info to check for supported AMD GPUs
gpuInfo := gpuinfo.New(vendoredServerStoragePath)
hasAMD, err = gpuInfo.HasSupportedAMDGPU()
if err != nil {
log.Debugf("AMD GPU detection failed: %v", err)
}
}

desiredVersion := GetDesiredServerVersion()
desiredVariant := "cpu"

// Use ROCm if supported AMD GPU is detected
if hasAMD {
log.Info("Supported AMD GPU detected, using ROCm variant")
desiredVariant = "rocm"
}

l.status = fmt.Sprintf("looking for updates for %s variant", desiredVariant)
return l.downloadLatestLlamaCpp(ctx, log, httpClient, llamaCppPath, vendoredServerStoragePath, desiredVersion,
desiredVariant)
}
Loading