Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[prometheus] Fixed a race condition happening in simultaneous OpenMetrics and PlainText requests #5517

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
c1af823
Reset cached target info cursor when OpenMetricsRequested is false
hez2010 Apr 8, 2024
1c843d0
Add tests and fix race condition
hez2010 Apr 9, 2024
224d1bd
Add CHANGELOG
hez2010 Apr 9, 2024
677b121
Oops
hez2010 Apr 9, 2024
90c348c
Merge branch 'main' into fix/prometheus_cursor
vishweshbankwar Apr 9, 2024
6e5549f
Apply code suggestion
hez2010 Apr 9, 2024
286d23d
Fix the race condition and add parallel tests
hez2010 Apr 11, 2024
7df9d9c
Update CHANGELOG.md
hez2010 Apr 11, 2024
c2e10df
Merge branch 'main' into fix/prometheus_cursor
hez2010 Apr 11, 2024
0e2551a
Unsubscribe from OnCollect in dtor
hez2010 Apr 11, 2024
00149ef
more fixes
hez2010 Apr 11, 2024
3558e9f
Return two views to simplify the implementation
hez2010 Apr 11, 2024
cb95766
Revert changes to PrometheusExporter
hez2010 Apr 11, 2024
19ae570
Reuse HttpClient in tests
hez2010 Apr 11, 2024
63f32d6
Fix incorrect CompareExchange
hez2010 Apr 11, 2024
7ee3712
Make test faster and add timeout
hez2010 Apr 11, 2024
580257e
Fix linefeeds and add volatile
hez2010 Apr 11, 2024
fb54fc8
Fix markdown
hez2010 Apr 11, 2024
7b3b31e
Merge branch 'main' into fix/prometheus_cursor
CodeBlanch Apr 12, 2024
033148c
Merge branch 'main' into fix/prometheus_cursor
vishweshbankwar Apr 17, 2024
aaf7e43
Adding TODO and CHANGELOG
hez2010 Apr 18, 2024
017dd39
Merge branch 'main' into fix/prometheus_cursor
CodeBlanch Apr 18, 2024
2804a7d
fix lf
hez2010 Apr 19, 2024
fd01eb2
Merge branch 'fix/prometheus_cursor' of github.com:hez2010/openteleme…
hez2010 Apr 19, 2024
665e390
Merge branch 'main' into fix/prometheus_cursor
reyang Apr 19, 2024
45dadb2
Rework the metrics collection
hez2010 Apr 19, 2024
3ee6067
Use CompareExchange
hez2010 Apr 19, 2024
d930f97
Fix invalid assert
hez2010 Apr 20, 2024
0269b93
Fix fromCache
hez2010 Apr 20, 2024
3e003f8
Add an assert
hez2010 Apr 20, 2024
aed502c
typo
hez2010 Apr 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Unreleased

* Fix a race condition in Prometheus exporter where a non-OpenMetrics request following by an OpenMetrics request would cause malformed response

Check failure on line 5 in src/OpenTelemetry.Exporter.Prometheus.AspNetCore/CHANGELOG.md

View workflow job for this annotation

GitHub Actions / lint-md / run-markdownlint

Line length

src/OpenTelemetry.Exporter.Prometheus.AspNetCore/CHANGELOG.md:5:81 MD013/line-length Line length [Expected: 80; Actual: 144] https://github.com/DavidAnson/markdownlint/blob/v0.34.0/doc/md013.md
([#5517](https://github.com/open-telemetry/opentelemetry-dotnet/pull/5517))

## 1.8.0-rc.1

Released 2024-Mar-27
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ internal sealed class PrometheusCollectionManager
private int globalLockState;
private ArraySegment<byte> previousDataView;
private DateTime? previousDataViewGeneratedAtUtc;
private bool previousIsOpenMetricsRequested;
private int readerCount;
private bool collectionRunning;
private TaskCompletionSource<CollectionResponse> collectionTcs;
Expand All @@ -46,7 +47,8 @@ public Task<CollectionResponse> EnterCollect(bool openMetricsRequested)
// last successful collect, return the previous view.
if (this.previousDataViewGeneratedAtUtc.HasValue
&& this.scrapeResponseCacheDurationMilliseconds > 0
&& this.previousDataViewGeneratedAtUtc.Value.AddMilliseconds(this.scrapeResponseCacheDurationMilliseconds) >= DateTime.UtcNow)
&& this.previousDataViewGeneratedAtUtc.Value.AddMilliseconds(this.scrapeResponseCacheDurationMilliseconds) >= DateTime.UtcNow
&& openMetricsRequested == this.previousIsOpenMetricsRequested)
{
Interlocked.Increment(ref this.readerCount);
this.ExitGlobalLock();
Expand All @@ -58,7 +60,7 @@ public Task<CollectionResponse> EnterCollect(bool openMetricsRequested)
}

// If a collection is already running, return a task to wait on the result.
if (this.collectionRunning)
if (this.collectionRunning && openMetricsRequested == this.previousIsOpenMetricsRequested)
{
if (this.collectionTcs == null)
{
Expand All @@ -79,6 +81,7 @@ public Task<CollectionResponse> EnterCollect(bool openMetricsRequested)
// Start a collection on the current thread.
this.collectionRunning = true;
this.previousDataViewGeneratedAtUtc = null;
this.previousIsOpenMetricsRequested = openMetricsRequested;
Interlocked.Increment(ref this.readerCount);
this.ExitGlobalLock();

Expand Down Expand Up @@ -213,6 +216,10 @@ private ExportResult OnCollect(Batch<Metric> metrics)
}
}
}
else
{
this.targetInfoBufferLength = -1;
hez2010 marked this conversation as resolved.
Show resolved Hide resolved
}

foreach (var metric in metrics)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,83 @@ public Task PrometheusExporterMiddlewareIntegration_UseOpenMetricsVersionHeader(
acceptHeader: "application/openmetrics-text; version=1.0.0");
}

[Fact]
public async Task PrometheusExporterMiddlewareIntegration_MultipleRequests()
hez2010 marked this conversation as resolved.
Show resolved Hide resolved
{
using var host = await StartTestHostAsync(
app => app.UseOpenTelemetryPrometheusScrapingEndpoint());

var tags = new KeyValuePair<string, object>[]
{
new KeyValuePair<string, object>("key1", "value1"),
new KeyValuePair<string, object>("key2", "value2"),
};

using var meter = new Meter(MeterName, MeterVersion);

var beginTimestamp = DateTimeOffset.Now.ToUnixTimeMilliseconds();

var counter = meter.CreateCounter<double>("counter_double");
counter.Add(100.18D, tags);
counter.Add(0.99D, tags);

bool[] requestOpenMetricsTestCases = [true, false, true, false];

foreach (var requestOpenMetrics in requestOpenMetricsTestCases)
{
using var client = host.GetTestClient();
client.DefaultRequestHeaders.Add("Accept", requestOpenMetrics ? "application/openmetrics-text" : "text/plain");

using var response = await client.GetAsync("/metrics");

var endTimestamp = DateTimeOffset.Now.ToUnixTimeMilliseconds();

Assert.Equal(HttpStatusCode.OK, response.StatusCode);
Assert.True(response.Content.Headers.Contains("Last-Modified"));

if (requestOpenMetrics)
{
Assert.Equal("application/openmetrics-text; version=1.0.0; charset=utf-8", response.Content.Headers.ContentType.ToString());
}
else
{
Assert.Equal("text/plain; charset=utf-8; version=0.0.4", response.Content.Headers.ContentType.ToString());
}

string content = (await response.Content.ReadAsStringAsync()).ReplaceLineEndings();

string expected = requestOpenMetrics
? $$"""
# TYPE target info
# HELP target Target metadata
target_info{service_name="my_service",service_instance_id="id1"} 1
# TYPE otel_scope_info info
# HELP otel_scope_info Scope metadata
otel_scope_info{otel_scope_name="{{MeterName}}"} 1
# TYPE counter_double_total counter
counter_double_total{otel_scope_name="{{MeterName}}",otel_scope_version="{{MeterVersion}}",key1="value1",key2="value2"} 101.17 (\d+\.\d{3})
# EOF

""".ReplaceLineEndings()
: $$"""
# TYPE counter_double_total counter
counter_double_total{otel_scope_name="{{MeterName}}",otel_scope_version="{{MeterVersion}}",key1="value1",key2="value2"} 101.17 (\d+)
# EOF

""".ReplaceLineEndings();

var matches = Regex.Matches(content, "^" + expected + "$");

Assert.Single(matches);

var timestamp = long.Parse(matches[0].Groups[1].Value.Replace(".", string.Empty));

Assert.True(beginTimestamp <= timestamp && timestamp <= endTimestamp);
}

await host.StopAsync();
}

private static async Task RunPrometheusExporterMiddlewareIntegrationTest(
string path,
Action<IApplicationBuilder> configure,
Expand All @@ -260,26 +337,7 @@ private static async Task RunPrometheusExporterMiddlewareIntegrationTest(
{
var requestOpenMetrics = acceptHeader.StartsWith("application/openmetrics-text");

using var host = await new HostBuilder()
.ConfigureWebHost(webBuilder => webBuilder
.UseTestServer()
.ConfigureServices(services =>
{
if (registerMeterProvider)
{
services.AddOpenTelemetry().WithMetrics(builder => builder
.ConfigureResource(x => x.Clear().AddService("my_service", serviceInstanceId: "id1"))
.AddMeter(MeterName)
.AddPrometheusExporter(o =>
{
configureOptions?.Invoke(o);
}));
}

configureServices?.Invoke(services);
})
.Configure(configure))
.StartAsync();
using var host = await StartTestHostAsync(configure, configureServices, registerMeterProvider, configureOptions);

var tags = new KeyValuePair<string, object>[]
{
Expand Down Expand Up @@ -323,21 +381,27 @@ private static async Task RunPrometheusExporterMiddlewareIntegrationTest(
Assert.Equal("text/plain; charset=utf-8; version=0.0.4", response.Content.Headers.ContentType.ToString());
}

string content = await response.Content.ReadAsStringAsync();

string expected = requestOpenMetrics
? "# TYPE target info\n"
+ "# HELP target Target metadata\n"
+ "target_info{service_name='my_service',service_instance_id='id1'} 1\n"
+ "# TYPE otel_scope_info info\n"
+ "# HELP otel_scope_info Scope metadata\n"
+ $"otel_scope_info{{otel_scope_name='{MeterName}'}} 1\n"
+ "# TYPE counter_double_total counter\n"
+ $"counter_double_total{{otel_scope_name='{MeterName}',otel_scope_version='{MeterVersion}',key1='value1',key2='value2'}} 101.17 (\\d+\\.\\d{{3}})\n"
+ "# EOF\n"
: "# TYPE counter_double_total counter\n"
+ $"counter_double_total{{otel_scope_name='{MeterName}',otel_scope_version='{MeterVersion}',key1='value1',key2='value2'}} 101.17 (\\d+)\n"
+ "# EOF\n";
string content = (await response.Content.ReadAsStringAsync()).ReplaceLineEndings();

string expected = requestOpenMetrics
? $$"""
# TYPE target info
# HELP target Target metadata
target_info{service_name="my_service",service_instance_id="id1"} 1
# TYPE otel_scope_info info
# HELP otel_scope_info Scope metadata
otel_scope_info{otel_scope_name="{{MeterName}}"} 1
# TYPE counter_double_total counter
counter_double_total{otel_scope_name="{{MeterName}}",otel_scope_version="{{MeterVersion}}",key1="value1",key2="value2"} 101.17 (\d+\.\d{3})
# EOF

""".ReplaceLineEndings()
: $$"""
# TYPE counter_double_total counter
counter_double_total{otel_scope_name="{{MeterName}}",otel_scope_version="{{MeterVersion}}",key1="value1",key2="value2"} 101.17 (\d+)
# EOF

""".ReplaceLineEndings();

var matches = Regex.Matches(content, ("^" + expected + "$").Replace('\'', '"'));

Expand All @@ -356,5 +420,33 @@ private static async Task RunPrometheusExporterMiddlewareIntegrationTest(

await host.StopAsync();
}

private static Task<IHost> StartTestHostAsync(
Action<IApplicationBuilder> configure,
Action<IServiceCollection> configureServices = null,
bool registerMeterProvider = true,
Action<PrometheusAspNetCoreOptions> configureOptions = null)
{
return new HostBuilder()
.ConfigureWebHost(webBuilder => webBuilder
.UseTestServer()
.ConfigureServices(services =>
{
if (registerMeterProvider)
{
services.AddOpenTelemetry().WithMetrics(builder => builder
.ConfigureResource(x => x.Clear().AddService("my_service", serviceInstanceId: "id1"))
.AddMeter(MeterName)
.AddPrometheusExporter(o =>
{
configureOptions?.Invoke(o);
}));
}

configureServices?.Invoke(services);
})
.Configure(configure))
.StartAsync();
}
}
#endif
Loading