Manages a pool of Semantic Kernel instances with per-entry rate limiting.
$ dotnet add package Soenneker.SemanticKernel.Pool
Soenneker.SemanticKernel.PoolA high-performance, thread-safe pool implementation for Microsoft Semantic Kernel instances with built-in rate limiting capabilities.
dotnet add package Soenneker.SemanticKernel.Poolservices.AddSemanticKernelPoolAsSingleton()This library has several extension packages for different AI providers:
// In Program.cs or Startup.cs
public class Program
{
public static async Task Main(string[] args)
{
var builder = WebApplication.CreateBuilder(args);
// Add the kernel pool as a singleton
builder.Services.AddSemanticKernelPoolAsSingleton();
var app = builder.Build();
// Register kernels during startup
var kernelPool = app.Services.GetRequiredService<ISemanticKernelPool>();
// Manually create options, or use one of the extensions mentioned above
var options = new SemanticKernelOptions
{
ApiKey = "your-api-key",
Endpoint = "https://api.openai.com/v1",
Model = "gpt-4",
KernelFactory = async (opts, _) =>
{
return Kernel.CreateBuilder()
.AddOpenAIChatCompletion(modelId: opts.ModelId!,
new OpenAIClient(new ApiKeyCredential(opts.ApiKey), new OpenAIClientOptions {Endpoint = new Uri(opts.Endpoint)}));
}
// Rate Limiting
RequestsPerSecond = 10,
RequestsPerMinute = 100,
RequestsPerDay = 1000,
TokensPerDay = 10000
};
await kernelPool.Register("my-kernel", options);
// Add more registrations... order matters!
await app.RunAsync();
}
}public class MyService
{
private readonly ISemanticKernelPool _kernelPool;
public MyService(ISemanticKernelPool kernelPool)
{
_kernelPool = kernelPool;
}
public async Task ProcessAsync()
{
// Get an available kernel that's within its rate limits, preferring the first registered
var (kernel, entry) = await _kernelPool.GetAvailableKernel();
// Get the chat completion service
var chatCompletionService = kernel.GetService<IChatCompletionService>();
// Create a chat history
var chatHistory = new ChatHistory();
chatHistory.AddMessage(AuthorRole.User, "What is the capital of France?");
// Execute chat completion
var response = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
Console.WriteLine($"Response: {response.Content}");
// Access rate limit information through the entry
var remainingQuota = await entry.RemainingQuota();
Console.WriteLine($"Remaining requests - Second: {remainingQuota.Second}, Minute: {remainingQuota.Minute}, Day: {remainingQuota.Day}");
}
}