-
Couldn't load subscription status.
- Fork 4.3k
Open
Labels
Description
Llamasharp model inference, the GetStreamingChatMessageContentsAsync method does not pass the PromptExecutionSettings parameter, and will automatically terminate halfway through the reply.
using LLama;
using LLama.Common;
using LLamaSharp.SemanticKernel;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using System.Text;
public class SemanticKernelLlamaSharp
{
public async Task RunAsync()
`{`
Console.OutputEncoding = Encoding.UTF8;
var modelPath = @"G:\model\Qwen3-4B-Instruct-2507-GGUF\Qwen3-4B-Instruct-2507-Q4_K_M1.gguf";
var parameters = new ModelParams(modelPath)
{
ContextSize = 1024,
GpuLayerCount = 65,
Encoding = Encoding.UTF8,
};
var model = LLamaWeights.LoadFromFile(parameters);
var context = model.CreateContext(parameters);
var executor = new InteractiveExecutor(context);
var builder = Kernel.CreateBuilder();
builder.Services.AddKeyedSingleton<IChatCompletionService>("local-llama", new LLamaSharpChatCompletion(executor));
var kernel = builder.Build();
var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
var settings = new LLamaSharpPromptExecutionSettings
{
FunctionChoiceBehavior = FunctionChoiceBehavior.Auto(),
};
string input = "介绍一下香港";
await foreach (var response in chatCompletionService.GetStreamingChatMessageContentsAsync(input))//==========error
{
if (string.IsNullOrEmpty(response.Content))
{
continue;
}
var ret = response.Content;
Console.Write(ret);
}
}
}
class Program2
{
public async static Task Main()
{
var tool = new SemanticKernelLlamaSharp();
await tool.RunAsync();
}
}
using LLama;
using LLama.Common;
using LLamaSharp.SemanticKernel;
using LLamaSharp.SemanticKernel.ChatCompletion;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using System.Text;
public class SemanticKernelLlamaSharp
{
public async Task RunAsync()
{
Console.OutputEncoding = Encoding.UTF8;
var modelPath = @"G:\model\Qwen3-4B-Instruct-2507-GGUF\Qwen3-4B-Instruct-2507-Q4_K_M1.gguf";
var parameters = new ModelParams(modelPath)
{
ContextSize = 1024,
GpuLayerCount = 65,
Encoding = Encoding.UTF8,
};
var model = LLamaWeights.LoadFromFile(parameters);
var context = model.CreateContext(parameters);
var executor = new InteractiveExecutor(context);
var builder = Kernel.CreateBuilder();
builder.Services.AddKeyedSingleton<IChatCompletionService>("local-llama", new LLamaSharpChatCompletion(executor));
var kernel = builder.Build();
var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
var settings = new LLamaSharpPromptExecutionSettings
{
FunctionChoiceBehavior = FunctionChoiceBehavior.Auto(),
};
string input = "介绍一下香港";
await foreach (var response in chatCompletionService.GetStreamingChatMessageContentsAsync(input, settings))//=============== settings,this is no problem
{
if (string.IsNullOrEmpty(response.Content))
{
continue;
}
var ret = response.Content;
Console.Write(ret);
}
}
}
class Program2
{
public async static Task Main()
{
var tool = new SemanticKernelLlamaSharp();
await tool.RunAsync();
}
}