PolyPrompt is a lightweight, unified .NET library for chat completions, text generation, and embeddings across Ollama, OpenAI, and Google Gemini APIs.
$ dotnet add package PolyPrompt
PolyPrompt is a lightweight, unified .NET library for chat completions, text generation, embeddings, and model management across Ollama, OpenAI, and Google Gemini APIs. Write your LLM integration code once and swap providers without changing your application logic.
PolyPrompt provides a single, consistent API surface for interacting with multiple LLM providers. Instead of learning three different SDKs with different conventions, response formats, and streaming patterns, you use one set of methods that work identically across all supported providers.
PolyPrompt is a good fit when you need to:
PolyPrompt may not be the right choice if you need:
dotnet add package PolyPrompt
PolyPrompt targets both .NET 8.0 and .NET 10.0.
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OllamaClient client = new OllamaClient("http://localhost:11434");
client.Model = "gemma3:4b";
ChatResponse response = await client.ChatAsync("What is the capital of France?");
Console.WriteLine(response.Text);
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OpenAiClient client = new OpenAiClient("https://api.openai.com", "sk-your-api-key");
client.Model = "gpt-4o";
ChatResponse response = await client.ChatAsync("What is the capital of France?");
Console.WriteLine(response.Text);
using PolyPrompt.Clients;
using PolyPrompt.Models;
using GeminiClient client = new GeminiClient(
"https://generativelanguage.googleapis.com",
"your-api-key");
client.Model = "gemini-2.5-flash";
ChatResponse response = await client.ChatAsync("What is the capital of France?");
Console.WriteLine(response.Text);
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OllamaClient client = new OllamaClient("http://localhost:11434");
client.Model = "gemma3:4b";
client.SystemPrompt = "You are a helpful assistant that responds in haiku format.";
client.Temperature = 0.7;
client.MaxTokens = 256;
ChatResponse response = await client.ChatAsync("Tell me about the ocean.");
if (response.Success)
{
Console.WriteLine(response.Text);
Console.WriteLine("Runtime: " + response.OverallRuntimeMs + " ms");
}
else
{
Console.WriteLine("Error: " + response.Error);
}
using PolyPrompt.Clients;
using PolyPrompt.Models;
using PolyPrompt.Options;
using OllamaClient client = new OllamaClient("http://localhost:11434");
client.Model = "gemma3:4b";
OllamaChatCompletionOptions options = new OllamaChatCompletionOptions();
options.Temperature = 0.5;
options.TopP = 0.9;
options.MaxTokens = 512;
options.TopK = 40;
options.RepeatPenalty = 1.1;
options.Seed = 42;
options.SystemPrompt = "You are a concise technical writer.";
ChatResponse response = await client.ChatAsync("Explain dependency injection.", options);
Console.WriteLine(response.Text);
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OpenAiClient client = new OpenAiClient("https://api.openai.com", "sk-your-api-key");
client.Model = "gpt-4o";
ChatStreamingResponse stream = await client.ChatStreamingAsync("Write a short story about a robot.");
await foreach (ChatStreamingChunk chunk in stream.Chunks)
{
if (!string.IsNullOrEmpty(chunk.Text))
{
Console.Write(chunk.Text);
}
}
Console.WriteLine();
Console.WriteLine("Time to first token: " + stream.TimeToFirstTokenMs + " ms");
Console.WriteLine("Tokens/sec: " + stream.OverallTokensPerSecond.ToString("F1"));
Console.WriteLine("Total chunks: " + stream.ChunkCount);
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OllamaClient client = new OllamaClient("http://localhost:11434");
OllamaEmbeddingOptions options = new OllamaEmbeddingOptions();
options.Model = "all-minilm";
EmbeddingResponse response = await client.EmbedAsync("The quick brown fox jumps over the lazy dog.", options);
if (response.Success && response.Embeddings.Count > 0)
{
float[] vector = response.Embeddings[0].Embedding;
Console.WriteLine("Dimensions: " + vector.Length);
Console.WriteLine("First 5 values: " + string.Join(", ", vector.Take(5)));
}
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OpenAiClient client = new OpenAiClient("https://api.openai.com", "sk-your-api-key");
OpenAiEmbeddingOptions options = new OpenAiEmbeddingOptions();
options.Model = "text-embedding-3-small";
options.Dimensions = 256;
List<string> documents = new List<string>
{
"Machine learning is a subset of artificial intelligence.",
"Neural networks are inspired by biological neurons.",
"Deep learning uses multiple layers of neural networks."
};
EmbeddingResponse response = await client.EmbedAsync(documents, options);
if (response.Success)
{
for (int i = 0; i < response.Embeddings.Count; i++)
{
Console.WriteLine("Document " + i + ": " + response.Embeddings[i].Embedding.Length + " dimensions");
}
}
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OllamaClient client = new OllamaClient("http://localhost:11434");
client.Model = "gemma3:4b";
GenerationResponse response = await client.GenerateAsync("Once upon a time, in a land far away,");
Console.WriteLine(response.Text);
Console.WriteLine("Runtime: " + response.OverallRuntimeMs + " ms");
using PolyPrompt.Clients;
using PolyPrompt.Models;
using GeminiClient client = new GeminiClient(
"https://generativelanguage.googleapis.com",
"your-api-key");
client.Model = "gemini-2.5-flash";
GenerationStreamingResponse stream = await client.GenerateStreamingAsync("Write a limerick about coding.");
await foreach (GenerationStreamingChunk chunk in stream.Chunks)
{
if (!string.IsNullOrEmpty(chunk.Text))
{
Console.Write(chunk.Text);
}
}
Console.WriteLine();
Console.WriteLine("Time to first token: " + stream.TimeToFirstTokenMs + " ms");
Console.WriteLine("Tokens/sec: " + stream.OverallTokensPerSecond.ToString("F1"));
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OllamaClient client = new OllamaClient("http://localhost:11434");
await foreach (ModelInformation model in client.ListModelsAsync())
{
Console.WriteLine(model.Name
+ (model.DisplayName != null ? " (" + model.DisplayName + ")" : "")
+ (model.SizeBytes.HasValue ? " [" + (model.SizeBytes.Value / 1_000_000_000.0).ToString("F1") + " GB]" : ""));
}
using PolyPrompt.Clients;
using OllamaClient client = new OllamaClient("http://localhost:11434");
bool exists = await client.ModelExistsAsync("gemma3:4b");
Console.WriteLine("gemma3:4b exists: " + exists);
// Also matches without tags: "gemma3" matches "gemma3:latest"
bool existsNoTag = await client.ModelExistsAsync("gemma3");
Console.WriteLine("gemma3 exists: " + existsNoTag);
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OllamaClient client = new OllamaClient("http://localhost:11434");
ModelInformation? info = await client.GetModelInformationAsync("gemma3:4b");
if (info != null)
{
Console.WriteLine("Name: " + info.Name);
Console.WriteLine("Modified: " + info.ModifiedUtc);
foreach (KeyValuePair<string, string?> kv in info.Metadata)
{
Console.WriteLine(" " + kv.Key + ": " + kv.Value);
}
}
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OllamaClient client = new OllamaClient("http://localhost:11434");
bool success = await client.PullModelAsync("gemma3:4b", async (ModelPullProgress progress) =>
{
if (progress.PercentComplete.HasValue)
{
Console.Write("\r" + progress.Status + " " + progress.PercentComplete.Value.ToString("F1") + "%");
}
else
{
Console.WriteLine(progress.Status);
}
});
Console.WriteLine();
Console.WriteLine(success ? "Pull succeeded." : "Pull failed.");
using PolyPrompt.Clients;
using OllamaClient client = new OllamaClient("http://localhost:11434");
bool deleted = await client.DeleteModelAsync("gemma3:4b");
Console.WriteLine(deleted ? "Model deleted." : "Delete failed.");
using PolyPrompt.Clients;
using GeminiClient client = new GeminiClient(
"https://generativelanguage.googleapis.com",
"your-api-key");
bool reachable = await client.ValidateConnectivityAsync();
Console.WriteLine(reachable ? "Connected." : "Cannot reach provider.");
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OllamaClient client = new OllamaClient("http://localhost:11434");
client.Model = "gemma3:4b";
ChatResponse response = await client.ChatAsync("Hello!");
foreach (CompletionCallDetail detail in client.CallDetails)
{
Console.WriteLine(detail.Method + " " + detail.Url);
Console.WriteLine(" Status: " + detail.StatusCode);
Console.WriteLine(" Time: " + detail.ResponseTimeMs + " ms");
Console.WriteLine(" Success: " + detail.Success);
}
using PolyPrompt.Clients;
using PolyPrompt.Models;
using OllamaClient client = new OllamaClient("http://localhost:11434");
client.Model = "gemma3:4b";
using CancellationTokenSource cts = new CancellationTokenSource(TimeSpan.FromSeconds(10));
try
{
ChatResponse response = await client.ChatAsync("Write a very long essay.", token: cts.Token);
Console.WriteLine(response.Text);
}
catch (OperationCanceledException)
{
Console.WriteLine("Request was cancelled.");
}
using PolyPrompt.Clients;
using PolyPrompt.Models;
CompletionClientBase CreateClient(string provider, string endpoint, string? apiKey)
{
switch (provider)
{
case "ollama":
return new OllamaClient(endpoint, apiKey);
case "openai":
return new OpenAiClient(endpoint, apiKey);
case "gemini":
return new GeminiClient(endpoint, apiKey);
default:
throw new ArgumentException("Unknown provider: " + provider);
}
}
// Same code works regardless of provider
using CompletionClientBase client = CreateClient("ollama", "http://localhost:11434", null);
client.Model = "gemma3:4b";
ChatResponse chat = await client.ChatAsync("Hello!");
Console.WriteLine(chat.Text);
await foreach (ModelInformation model in client.ListModelsAsync())
{
Console.WriteLine(" " + model.Name);
}
| Property | Type | Default | Description |
|---|---|---|---|
Endpoint | string | varies | API endpoint URL (read-only) |
ApiKey | string? | null | API key (read-only) |
Model | string | varies | Model name for requests |
MaxTokens | int | 4096 | Maximum tokens to generate (1 to 10,000,000) |
TimeoutMs | int | 120000 | HTTP timeout in milliseconds (1,000 to 600,000) |
Temperature | double? | null | Sampling temperature (0.0 to 2.0) |
TopP | double? | null | Nucleus sampling threshold (0.0 to 1.0) |
SystemPrompt | string? | null | System prompt for chat completions |
CallDetails | List<CompletionCallDetail> | empty | Recorded HTTP call details |
| Method | Description |
|---|---|
ChatAsync | Non-streaming chat completion |
ChatStreamingAsync | Streaming chat completion with timing metrics |
EmbedAsync(string) | Generate embedding for a single text |
EmbedAsync(List<string>) | Generate embeddings for a batch of texts |
GenerateAsync | Non-streaming text generation |
GenerateStreamingAsync | Streaming text generation with timing metrics |
ListModelsAsync | List available models (returns IAsyncEnumerable<ModelInformation>) |
ModelExistsAsync | Check if a specific model exists |
GetModelInformationAsync | Get detailed information about a model |
PullModelAsync | Pull/download a model with progress callbacks (Ollama only) |
DeleteModelAsync | Delete a model (Ollama only) |
ValidateConnectivityAsync | Verify the provider is reachable |
Each provider exposes option classes that extend the base options with provider-specific parameters:
| Provider | Chat Options | Embedding Options | Generation Options |
|---|---|---|---|
| Ollama | OllamaChatCompletionOptions | OllamaEmbeddingOptions | OllamaGenerationOptions |
| OpenAI | OpenAiChatCompletionOptions | OpenAiEmbeddingOptions | OpenAiGenerationOptions |
| Gemini | GeminiChatCompletionOptions | GeminiEmbeddingOptions | GeminiGenerationOptions |
Ollama-specific parameters: ContextLength, TopK, RepeatPenalty, Seed, MinP, RepeatLastN
OpenAI-specific parameters: FrequencyPenalty, PresencePenalty, Seed, Dimensions, EncodingFormat, Echo, Suffix, Logprobs
Gemini-specific parameters: TopK, CandidateCount, PresencePenalty, FrequencyPenalty, TaskType, Title
| Provider | Default Inference Model | Suggested Embedding Model |
|---|---|---|
| Ollama | gemma3:4b | all-minilm |
| OpenAI | gpt-4o-mini | text-embedding-3-small |
| Gemini | gemini-2.5-flash | gemini-embedding-001 |
| Feature | Ollama | OpenAI | Gemini |
|---|---|---|---|
| Chat (non-streaming) | Yes | Yes | Yes |
| Chat (streaming) | Yes | Yes | Yes |
| Text Generation | Yes | Legacy only | Yes |
| Embeddings (single) | Yes | Yes | Yes |
| Embeddings (batch) | Yes | Yes | Yes |
| List Models | Yes | Yes | Yes |
| Model Exists | Yes | Yes | Yes |
| Get Model Info | Yes | Yes | Yes |
| Pull Model | Yes | No | No |
| Delete Model | Yes | No | No |
| Validate Connectivity | Yes | Yes | Yes |
PolyPrompt/
├── src/
│ ├── PolyPrompt/ # Core library (NuGet package)
│ │ ├── Clients/ # CompletionClientBase, OllamaClient, OpenAiClient, GeminiClient
│ │ ├── Models/ # Request/response data models
│ │ └── Options/ # Provider-specific option classes
│ ├── OllamaConsole/ # Interactive Ollama test harness
│ ├── OpenAIConsole/ # Interactive OpenAI test harness
│ ├── GeminiConsole/ # Interactive Gemini test harness
│ └── Test.Automated/ # Automated test suite
└── assets/
└── logo.png
dotnet restore src/PolyPrompt.sln
dotnet build src/PolyPrompt.sln
# Ollama
dotnet run --project src/Test.Automated -- ollama http://localhost:11434
# OpenAI
dotnet run --project src/Test.Automated -- openai https://api.openai.com sk-your-key gpt-4o text-embedding-3-small
# Gemini
dotnet run --project src/Test.Automated -- gemini https://generativelanguage.googleapis.com your-key gemini-2.5-flash gemini-embedding-001
Have a bug to report or a feature to request? Please open an issue on GitHub:
https://github.com/jchristn/PolyPrompt/issues
Want to ask a question or start a conversation? Use GitHub Discussions:
https://github.com/jchristn/PolyPrompt/discussions
PolyPrompt is available under the MIT License. See the LICENSE.md file for full details.