Skip to content

Bug: LLamaSharp Bug #13285

@williamlzw

Description

@williamlzw

Llamasharp model inference, the GetStreamingChatMessageContentsAsync method does not pass the PromptExecutionSettings parameter, and will automatically terminate halfway through the reply.

using LLama;
using LLama.Common;
using LLamaSharp.SemanticKernel;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using System.Text;


public class SemanticKernelLlamaSharp
{
    public async Task RunAsync()
    `{`
        Console.OutputEncoding = Encoding.UTF8;
        var modelPath = @"G:\model\Qwen3-4B-Instruct-2507-GGUF\Qwen3-4B-Instruct-2507-Q4_K_M1.gguf";
        var parameters = new ModelParams(modelPath)
        {
            ContextSize = 1024,
            GpuLayerCount = 65,
            Encoding = Encoding.UTF8,
        };

        var model = LLamaWeights.LoadFromFile(parameters);
        var context = model.CreateContext(parameters);
        var executor = new InteractiveExecutor(context);
        var builder = Kernel.CreateBuilder();

        builder.Services.AddKeyedSingleton<IChatCompletionService>("local-llama", new LLamaSharpChatCompletion(executor));
        var kernel = builder.Build();
        var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();

        var settings = new LLamaSharpPromptExecutionSettings
        {
            FunctionChoiceBehavior = FunctionChoiceBehavior.Auto(),

        };

        string input = "介绍一下香港";

        await foreach (var response in chatCompletionService.GetStreamingChatMessageContentsAsync(input))//==========error
        {
            if (string.IsNullOrEmpty(response.Content))
            {
                continue;
            }
            var ret = response.Content;
            Console.Write(ret);
        }
    }
}

class Program2
{
    public async static Task Main()
    {
        var tool = new SemanticKernelLlamaSharp();
        await tool.RunAsync();
    }
}
using LLama;
using LLama.Common;
using LLamaSharp.SemanticKernel;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using System.Text;


public class SemanticKernelLlamaSharp
{
    public async Task RunAsync()
    {
        Console.OutputEncoding = Encoding.UTF8;
        var modelPath = @"G:\model\Qwen3-4B-Instruct-2507-GGUF\Qwen3-4B-Instruct-2507-Q4_K_M1.gguf";
        var parameters = new ModelParams(modelPath)
        {
            ContextSize = 1024,
            GpuLayerCount = 65,
            Encoding = Encoding.UTF8,
        };

        var model = LLamaWeights.LoadFromFile(parameters);
        var context = model.CreateContext(parameters);
        var executor = new InteractiveExecutor(context);
        var builder = Kernel.CreateBuilder();

        builder.Services.AddKeyedSingleton<IChatCompletionService>("local-llama", new LLamaSharpChatCompletion(executor));
        var kernel = builder.Build();
        var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();

        var settings = new LLamaSharpPromptExecutionSettings
        {
            FunctionChoiceBehavior = FunctionChoiceBehavior.Auto(),

        };

        string input = "介绍一下香港";

        await foreach (var response in chatCompletionService.GetStreamingChatMessageContentsAsync(input, settings))//=============== settings,this is no problem 
        {
            if (string.IsNullOrEmpty(response.Content))
            {
                continue;
            }
            var ret = response.Content;
            Console.Write(ret);
        }
    }
}

class Program2
{
    public async static Task Main()
    {
        var tool = new SemanticKernelLlamaSharp();
        await tool.RunAsync();
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingtriage

    Type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions