Chaos engineering support for DistributedLeasing library. Inject controlled failures to test resilience. FOR TESTING ONLY - NOT FOR PRODUCTION.
$ dotnet add package DistributedLeasing.ChaosEngineeringChaos engineering toolkit for testing distributed leasing resilience
This package provides controlled failure injection for testing the resilience and fault tolerance of distributed leasing systems. Use it to validate your application's behavior under various failure scenarios.
⚠️ FOR TESTING ONLY - NOT FOR PRODUCTION USE
✅ Controlled Failure Injection - Simulate specific failure scenarios
✅ Configurable Probability - Set failure rates for chaos testing
✅ Latency Injection - Add artificial delays to test timeout handling
✅ Intermittent Failures - Simulate network hiccups and transient errors
✅ Integration Testing - Validate error handling and retry logic
✅ Decorator Pattern - Wraps any lease provider for easy testing
Use This Package When:
Do NOT Use This Package:
dotnet add package DistributedLeasing.ChaosEngineering
Install only in test projects, not in production code.
using DistributedLeasing.ChaosEngineering;
using DistributedLeasing.Azure.Blob;
// Create your actual lease provider
var actualProvider = new BlobLeaseProvider(new BlobLeaseProviderOptions
{
ContainerUri = testContainerUri,
Credential = credential
});
// Wrap with chaos provider
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
AcquireFailureProbability = 0.3, // 30% of acquisitions fail
RenewFailureProbability = 0.2, // 20% of renewals fail
ReleaseFailureProbability = 0.1 // 10% of releases fail
});
// Use in tests
var leaseManager = await chaosProvider.CreateLeaseManagerAsync("test-lock");
var lease = await leaseManager.TryAcquireAsync();
// Test your error handling
if (lease == null)
{
// Your code should handle this gracefully
Assert.NotNull(fallbackMechanism);
}
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
MinLatency = TimeSpan.FromMilliseconds(100), // Minimum 100ms delay
MaxLatency = TimeSpan.FromMilliseconds(500), // Maximum 500ms delay
LatencyProbability = 0.5 // 50% of operations delayed
});
// Test timeout handling
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(1));
try
{
var lease = await leaseManager.AcquireAsync(cancellationToken: cts.Token);
}
catch (OperationCanceledException)
{
// Verify your code handles timeouts correctly
Assert.True(true);
}
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
// Fail 2 out of every 5 operations
FailurePattern = new[] { false, true, false, true, false }
});
// Test retry logic
int attempts = 0;
ILease? lease = null;
while (lease == null && attempts < 5)
{
lease = await leaseManager.TryAcquireAsync();
attempts++;
}
Assert.NotNull(lease); // Should succeed after retries
Assert.True(attempts > 1); // Verify retries happened
public class ChaosOptions
{
// Probability of acquire operation failing (0.0 - 1.0)
public double AcquireFailureProbability { get; set; } = 0.0;
// Probability of renew operation failing (0.0 - 1.0)
public double RenewFailureProbability { get; set; } = 0.0;
// Probability of release operation failing (0.0 - 1.0)
public double ReleaseFailureProbability { get; set; } = 0.0;
// Probability of any operation being delayed (0.0 - 1.0)
public double LatencyProbability { get; set; } = 0.0;
// Minimum latency to inject
public TimeSpan MinLatency { get; set; } = TimeSpan.Zero;
// Maximum latency to inject
public TimeSpan MaxLatency { get; set; } = TimeSpan.Zero;
// Custom failure pattern (overrides probabilities)
public bool[]? FailurePattern { get; set; } = null;
// Exception type to throw on failure
public Type ExceptionType { get; set; } = typeof(LeaseException);
// Custom exception message
public string FailureMessage { get; set; } = "Chaos engineering failure";
}
[Fact]
public async Task Should_Retry_On_Acquisition_Failure()
{
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
// Fail first 2 attempts, succeed on 3rd
FailurePattern = new[] { true, true, false }
});
var leaseManager = await chaosProvider.CreateLeaseManagerAsync("test");
// Retry logic
ILease? lease = null;
for (int i = 0; i < 5; i++)
{
lease = await leaseManager.TryAcquireAsync();
if (lease != null) break;
await Task.Delay(100);
}
Assert.NotNull(lease);
}
[Fact]
public async Task Should_Detect_Renewal_Failure()
{
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
RenewFailureProbability = 1.0 // Always fail renewal
});
var leaseManager = await chaosProvider.CreateLeaseManagerAsync("test");
var lease = await leaseManager.AcquireAsync(TimeSpan.FromSeconds(5));
bool renewalFailed = false;
lease.LeaseRenewalFailed += (sender, e) =>
{
renewalFailed = true;
};
// Wait for auto-renewal attempt
await Task.Delay(TimeSpan.FromSeconds(4));
Assert.True(renewalFailed);
}
[Fact]
public async Task Should_Handle_Lease_Loss()
{
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
RenewFailureProbability = 1.0
});
var leaseManager = await chaosProvider.CreateLeaseManagerAsync("test");
var lease = await leaseManager.AcquireAsync(TimeSpan.FromSeconds(5));
bool leaseLost = false;
lease.LeaseLost += (sender, e) =>
{
leaseLost = true;
};
// Wait for expiration
await Task.Delay(TimeSpan.FromSeconds(6));
Assert.True(leaseLost);
}
[Fact]
public async Task Should_Timeout_On_High_Latency()
{
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
LatencyProbability = 1.0,
MinLatency = TimeSpan.FromSeconds(5),
MaxLatency = TimeSpan.FromSeconds(10)
});
var leaseManager = await chaosProvider.CreateLeaseManagerAsync("test");
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
await Assert.ThrowsAsync<OperationCanceledException>(async () =>
{
await leaseManager.AcquireAsync(cancellationToken: cts.Token);
});
}
[Fact]
public async Task Should_Handle_Concurrent_Acquisition_With_Failures()
{
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
AcquireFailureProbability = 0.5
});
var tasks = Enumerable.Range(0, 10).Select(async i =>
{
var manager = await chaosProvider.CreateLeaseManagerAsync("shared-lock");
return await manager.TryAcquireAsync();
});
var results = await Task.WhenAll(tasks);
// At most one should succeed (due to exclusivity)
var successCount = results.Count(l => l != null);
Assert.True(successCount <= 1);
}
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
AcquireFailureProbability = 0.5,
ExceptionType = typeof(TimeoutException),
FailureMessage = "Simulated timeout"
});
// Fail every other operation
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
FailurePattern = new[] { false, true, false, true, false, true }
});
// Pattern repeats: succeed, fail, succeed, fail, ...
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
AcquireFailureProbability = 0.2, // 20% acquisition failures
RenewFailureProbability = 0.1, // 10% renewal failures
LatencyProbability = 0.3, // 30% operations delayed
MinLatency = TimeSpan.FromMilliseconds(50),
MaxLatency = TimeSpan.FromMilliseconds(200)
});
// Simulates realistic production chaos
public class LeasingIntegrationTests : IAsyncLifetime
{
private ILeaseProvider _chaosProvider;
private ILeaseProvider _actualProvider;
public async Task InitializeAsync()
{
_actualProvider = new BlobLeaseProvider(testOptions);
_chaosProvider = new ChaosLeaseProvider(_actualProvider, new ChaosOptions
{
AcquireFailureProbability = 0.3
});
}
[Fact]
public async Task TestLeaseResilience()
{
var manager = await _chaosProvider.CreateLeaseManagerAsync("test");
// Test logic here
}
public async Task DisposeAsync()
{
// Cleanup
}
}
[TestFixture]
public class LeasingChaosTests
{
private ILeaseProvider _chaosProvider;
[SetUp]
public async Task Setup()
{
var actualProvider = new CosmosLeaseProvider(testOptions);
_chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
RenewFailureProbability = 0.5
});
}
[Test]
public async Task TestRenewalFailure()
{
// Test logic
}
}
<!-- ✅ Good - Only in test project -->
<Project Sdk="Microsoft.NET.Sdk">
<ItemGroup>
<PackageReference Include="DistributedLeasing.ChaosEngineering" Version="5.0.0" />
</ItemGroup>
</Project>
<!-- ❌ Bad - Don't reference in production projects -->
// ✅ Start conservative
var chaosOptions = new ChaosOptions
{
AcquireFailureProbability = 0.1 // 10%
};
// ❌ Avoid extreme probabilities initially
var chaosOptions = new ChaosOptions
{
AcquireFailureProbability = 0.9 // 90% - too high for initial testing
};
async Task<ILease?> AcquireWithRetry(ILeaseManager manager, int maxAttempts)
{
for (int i = 0; i < maxAttempts; i++)
{
var lease = await manager.TryAcquireAsync();
if (lease != null) return lease;
await Task.Delay(TimeSpan.FromMilliseconds(100 * (i + 1)));
}
return null;
}
// Test the retry logic
var lease = await AcquireWithRetry(leaseManager, 5);
Assert.NotNull(lease);
[Theory]
[InlineData(1.0, 0.0, 0.0)] // Acquire failures
[InlineData(0.0, 1.0, 0.0)] // Renew failures
[InlineData(0.0, 0.0, 1.0)] // Release failures
public async Task Should_Handle_All_Failure_Types(
double acquireProb,
double renewProb,
double releaseProb)
{
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
AcquireFailureProbability = acquireProb,
RenewFailureProbability = renewProb,
ReleaseFailureProbability = releaseProb
});
// Test logic for each failure type
}
// ✅ Good - Clear documentation
var chaosProvider = new ChaosLeaseProvider(actualProvider, new ChaosOptions
{
// Simulate 20% network failures during acquisition
AcquireFailureProbability = 0.2,
// Simulate occasional renewal delays (100-500ms)
LatencyProbability = 0.3,
MinLatency = TimeSpan.FromMilliseconds(100),
MaxLatency = TimeSpan.FromMilliseconds(500)
});
Problem: Probability set to 0.0 or failure pattern incorrect.
Solution: Verify chaos options are configured:
Assert.True(chaosOptions.AcquireFailureProbability > 0.0);
Problem: Probability too high for test stability.
Solution: Reduce failure probabilities:
var chaosOptions = new ChaosOptions
{
AcquireFailureProbability = 0.1 // Lower from 0.5
};
Problem: Random failures cause flaky tests.
Solution: Use deterministic patterns:
var chaosOptions = new ChaosOptions
{
FailurePattern = new[] { false, true, false } // Deterministic
};
┌─────────────────────────────────────────────────────┐
│ Test Code │
│ ┌──────────────────────────────────────────┐ │
│ │ ChaosLeaseProvider (Decorator) │ │
│ │ ┌────────────────────────────────────┐ │ │
│ │ │ Chaos Logic: │ │ │
│ │ │ • Failure injection │ │ │
│ │ │ • Latency simulation │ │ │
│ │ │ • Pattern-based failures │ │ │
│ │ └────────────────────────────────────┘ │ │
│ │ │ │ │
│ │ │ Delegates to │ │
│ │ ▼ │ │
│ │ ┌────────────────────────────────────┐ │ │
│ │ │ Actual Provider │ │ │
│ │ │ (Blob, Cosmos, Redis) │ │ │
│ │ └────────────────────────────────────┘ │ │
│ └──────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────┘
MIT License - see LICENSE for details.