feat: replace App.Metrics with prometheus-net

This commit is contained in:
sam 2024-08-20 20:19:24 +02:00
parent df8af75dd4
commit be01fb1d53
8 changed files with 113 additions and 137 deletions

View file

@ -1,8 +1,8 @@
using System.ComponentModel;
using System.Diagnostics;
using System.Runtime.InteropServices;
using App.Metrics;
using Catalogger.Backend.Cache;
using System.Text.Json;
using System.Web;
using Catalogger.Backend.Cache.InMemoryCache;
using Catalogger.Backend.Extensions;
using Humanizer;
@ -10,10 +10,7 @@ using Remora.Commands.Attributes;
using Remora.Commands.Groups;
using Remora.Discord.API.Abstractions.Objects;
using Remora.Discord.API.Abstractions.Rest;
using Remora.Discord.Commands.Contexts;
using Remora.Discord.Commands.Extensions;
using Remora.Discord.Commands.Feedback.Services;
using Remora.Discord.Commands.Services;
using Remora.Discord.Extensions.Embeds;
using Remora.Discord.Gateway;
using Remora.Results;
@ -24,16 +21,18 @@ namespace Catalogger.Backend.Bot.Commands;
[Group("catalogger")]
public class MetaCommands(
ILogger logger,
IClock clock,
IMetrics metrics,
Config config,
DiscordGatewayClient client,
IFeedbackService feedbackService,
ContextInjectionService contextInjection,
IInviteCache inviteCache,
GuildCache guildCache,
ChannelCache channelCache,
IDiscordRestChannelAPI channelApi) : CommandGroup
{
private readonly ILogger _logger = logger.ForContext<MetaCommands>();
private readonly HttpClient _client = new();
[Command("ping")]
[Description("Ping pong! See the bot's latency")]
public async Task<IResult> PingAsync()
@ -53,16 +52,15 @@ public class MetaCommands(
inline: true);
embed.AddField("Memory usage", memoryUsage.Bytes().Humanize(), inline: true);
var messagesReceived = metrics.Snapshot.GetForContext("Bot").Meters
.FirstOrDefault(m => m.MultidimensionalName == CataloggerMetrics.MessagesReceived.Name)?.Value;
if (messagesReceived != null)
embed.AddField("Messages received", $"{messagesReceived.OneMinuteRate * 60:F1}/m", true);
var messageCount = metrics.Snapshot.GetForContext("Bot").Gauges
.FirstOrDefault(m => m.MultidimensionalName == CataloggerMetrics.MessagesStored.Name)?.Value ?? 0;
var messageRate = await MessagesRate();
embed.AddField("Messages received",
messageRate != null
? $"{messageRate / 5:F1}/m\n({CataloggerMetrics.MessagesReceived.Value:N0} since last restart)"
: $"{CataloggerMetrics.MessagesReceived.Value:N0} since last restart",
true);
embed.AddField("Numbers",
$"{messageCount:N0} messages from {guildCache.Size:N0} servers\nCached {channelCache.Size:N0} channels",
$"{CataloggerMetrics.MessagesStored.Value:N0} messages from {guildCache.Size:N0} servers\nCached {channelCache.Size:N0} channels",
inline: false);
IEmbed[] embeds = [embed.Build().GetOrThrow()];
@ -70,16 +68,35 @@ public class MetaCommands(
return (Result)await channelApi.EditMessageAsync(msg.ChannelID, msg.ID, content: "", embeds: embeds);
}
[Command("debug-invites")]
[Description("Show a representation of this server's invites")]
public async Task<IResult> DebugInvitesAsync()
// TODO: add more checks around response format, configurable prometheus endpoint
private async Task<double?> MessagesRate()
{
if (contextInjection.Context is not IInteractionCommandContext ctx) throw new CataloggerError("No context");
if (!ctx.TryGetGuildID(out var guildId)) throw new CataloggerError("No guild ID in context");
if (!config.Logging.EnableMetrics) return null;
var invites = await inviteCache.TryGetAsync(guildId);
var text = invites.Select(i => $"{i.Code} in {i.Channel?.ID.Value}");
try
{
var query = HttpUtility.UrlEncode("delta(catalogger_received_messages[5m])");
var resp = await _client.GetAsync($"http://localhost:9090/api/v1/query?query={query}");
resp.EnsureSuccessStatusCode();
return await feedbackService.SendContextualAsync(string.Join("\n", text));
var data = await resp.Content.ReadFromJsonAsync<PrometheusResponse>();
_logger.Debug("Raw json: {Data}", JsonSerializer.Serialize(data));
var rawNumber = (data?.data.result[0].value[1] as JsonElement?)?.GetString();
_logger.Debug("Raw data: {Raw}", rawNumber);
return double.TryParse(rawNumber, out var rate) ? rate : null;
}
catch (Exception e)
{
_logger.Warning(e, "Failed querying Prometheus for message rate");
return null;
}
}
// ReSharper disable InconsistentNaming, ClassNeverInstantiated.Local
private record PrometheusResponse(PrometheusData data);
private record PrometheusData(PrometheusResult[] result);
private record PrometheusResult(object[] value);
// ReSharper restore InconsistentNaming, ClassNeverInstantiated.Local
}

View file

@ -1,5 +1,4 @@
using System.Text.RegularExpressions;
using App.Metrics;
using Catalogger.Backend.Cache.InMemoryCache;
using Catalogger.Backend.Database;
using Catalogger.Backend.Database.Models;
@ -19,8 +18,7 @@ public class MessageCreateResponder(
DatabaseContext db,
MessageRepository messageRepository,
UserCache userCache,
PkMessageHandler pkMessageHandler,
IMetrics metrics)
PkMessageHandler pkMessageHandler)
: IResponder<IMessageCreate>
{
private readonly ILogger _logger = logger.ForContext<MessageCreateResponder>();
@ -28,7 +26,7 @@ public class MessageCreateResponder(
public async Task<Result> RespondAsync(IMessageCreate msg, CancellationToken ct = default)
{
userCache.UpdateUser(msg.Author);
metrics.Measure.Meter.Mark(CataloggerMetrics.MessagesReceived);
CataloggerMetrics.MessagesReceived.Inc();
if (!msg.GuildID.IsDefined())
{

View file

@ -7,7 +7,6 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="App.Metrics" Version="4.3.0"/>
<PackageReference Include="EFCore.NamingConventions" Version="8.0.3"/>
<PackageReference Include="EntityFrameworkCore.Exceptions.PostgreSQL" Version="8.1.2"/>
<PackageReference Include="LazyCache" Version="2.4.0"/>
@ -24,6 +23,8 @@
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL.NodaTime" Version="8.0.4"/>
<PackageReference Include="Polly.Core" Version="8.4.1"/>
<PackageReference Include="Polly.RateLimiting" Version="8.4.1"/>
<PackageReference Include="prometheus-net" Version="8.2.1" />
<PackageReference Include="prometheus-net.AspNetCore" Version="8.2.1" />
<PackageReference Include="Remora.Discord" Version="2024.2.0"/>
<PackageReference Include="Serilog" Version="4.0.1"/>
<PackageReference Include="Serilog.AspNetCore" Version="8.0.1"/>

View file

@ -1,88 +1,39 @@
using App.Metrics;
using App.Metrics.Gauge;
using App.Metrics.Meter;
using App.Metrics.Timer;
using Prometheus;
namespace Catalogger.Backend;
public static class CataloggerMetrics
{
public static MeterOptions MessagesReceived => new()
{
Name = "Messages received",
MeasurementUnit = Unit.Events,
RateUnit = TimeUnit.Seconds,
Context = "Bot"
};
public static readonly Gauge MessagesReceived =
Metrics.CreateGauge("catalogger_received_messages", "Number of messages Catalogger has received");
public static GaugeOptions GuildsCached => new()
{
Name = "Guilds cached",
MeasurementUnit = Unit.Items,
Context = "Bot"
};
public static long MessageRateMinute { get; set; }
public static GaugeOptions ChannelsCached => new()
{
Name = "Channels cached",
MeasurementUnit = Unit.Items,
Context = "Bot"
};
public static readonly Gauge GuildsCached =
Metrics.CreateGauge("catalogger_cache_guilds", "Number of guilds in the cache");
public static GaugeOptions UsersCached => new()
{
Name = "Users cached",
MeasurementUnit = Unit.Items,
Context = "Bot"
};
public static readonly Gauge ChannelsCached =
Metrics.CreateGauge("catalogger_cache_channels", "Number of channels in the cache");
public static GaugeOptions MessagesStored => new()
{
Name = "Messages stored",
MeasurementUnit = Unit.Items,
Context = "Bot"
};
public static readonly Gauge UsersCached =
Metrics.CreateGauge("catalogger_cache_users", "Number of users in the cache");
public static TimerOptions MetricsCollectionTime => new()
{
Name = "Metrics collection time",
MeasurementUnit = Unit.Events,
DurationUnit = TimeUnit.Milliseconds,
Context = "Bot"
};
public static readonly Gauge MessagesStored =
Metrics.CreateGauge("catalogger_stored_messages", "Number of users in the cache");
public static GaugeOptions ProcessPhysicalMemory => new()
{
Name = "Process physical memory",
MeasurementUnit = Unit.Bytes,
Context = "Process"
};
public static readonly Summary MetricsCollectionTime =
Metrics.CreateSummary("catalogger_time_metrics", "Time it took to collect metrics");
public static GaugeOptions ProcessVirtualMemory => new()
{
Name = "Process virtual memory",
MeasurementUnit = Unit.Bytes,
Context = "Process"
};
public static Gauge ProcessPhysicalMemory =>
Metrics.CreateGauge("catalogger_process_physical_memory", "Process physical memory");
public static GaugeOptions ProcessPrivateMemory => new()
{
Name = "Process private memory",
MeasurementUnit = Unit.Bytes,
Context = "Process"
};
public static Gauge ProcessVirtualMemory =>
Metrics.CreateGauge("catalogger_process_virtual_memory", "Process virtual memory");
public static GaugeOptions ProcessThreads => new()
{
Name = "Process thread count",
MeasurementUnit = Unit.Threads,
Context = "Process"
};
public static Gauge ProcessPrivateMemory =>
Metrics.CreateGauge("catalogger_process_private_memory", "Process private memory");
public static GaugeOptions ProcessHandles => new()
{
Name = "Process handle count",
MeasurementUnit = Unit.Items,
Context = "Process"
};
public static Gauge ProcessThreads => Metrics.CreateGauge("catalogger_process_threads", "Process thread count");
public static Gauge ProcessHandles => Metrics.CreateGauge("catalogger_process_handles", "Process handle count");
}

View file

@ -13,6 +13,9 @@ public class Config
{
public LogEventLevel LogEventLevel { get; init; } = LogEventLevel.Debug;
public bool LogQueries { get; init; } = false;
public int MetricsPort { get; init; } = 5001;
public bool EnableMetrics { get; init; } = true;
}
public class DatabaseConfig
@ -37,9 +40,7 @@ public class Config
{
public string Host { get; init; } = "localhost";
public int Port { get; init; } = 5000;
public int? MetricsPort { get; init; }
public string BaseUrl { get; init; } = null!;
public string Address => $"http://{Host}:{Port}";
public string MetricsAddress => $"http://{Host}:{MetricsPort ?? Port}";
}
}

View file

@ -73,11 +73,11 @@ public static class StartupExtensions
.AddSingleton<UserCache>()
.AddSingleton<PluralkitApiService>()
.AddScoped<IEncryptionService, EncryptionService>()
.AddSingleton<MetricsCollectionService>()
.AddScoped<MessageRepository>()
.AddSingleton<WebhookExecutorService>()
.AddSingleton<PkMessageHandler>()
.AddSingleton(InMemoryDataService<Snowflake, ChannelCommandData>.Instance)
.AddHostedService<MetricsCollectionService>()
.AddSingleton<GuildFetchService>()
.AddHostedService(serviceProvider => serviceProvider.GetRequiredService<GuildFetchService>());

View file

@ -1,8 +1,9 @@
using App.Metrics;
using Catalogger.Backend.Bot.Commands;
using Catalogger.Backend.Database;
using Catalogger.Backend.Extensions;
using Catalogger.Backend.Services;
using Newtonsoft.Json.Serialization;
using Prometheus;
using Remora.Commands.Extensions;
using Remora.Discord.API.Abstractions.Gateway.Commands;
using Remora.Discord.Commands.Extensions;
@ -12,6 +13,7 @@ using Remora.Discord.Hosting.Extensions;
using Remora.Discord.Interactivity.Extensions;
using Remora.Discord.Pagination.Extensions;
using Serilog;
using Metrics = Prometheus.Metrics;
var builder = WebApplication.CreateBuilder(args);
var config = builder.AddConfiguration();
@ -50,10 +52,12 @@ builder.Host
.AddInteractionGroup<ChannelCommandsComponents>()
);
// Add metrics
// TODO: add actual reporter
var metricsBuilder = AppMetrics.CreateDefaultBuilder();
builder.Services.AddSingleton<IMetrics>(metricsBuilder.Build());
// Add metric server
// If metrics are disabled (Logging.EnableMetrics = false), also add a background service that updates
// metrics every minute, as some commands rely on them.
builder.Services.AddMetricServer(o => o.Port = (ushort)config.Logging.MetricsPort);
if (!config.Logging.EnableMetrics)
builder.Services.AddHostedService<BackgroundMetricsCollectionService>();
builder.Services
.AddDbContext<DatabaseContext>()
@ -68,6 +72,7 @@ await app.Initialize();
app.UseSerilogRequestLogging();
app.UseRouting();
app.UseHttpMetrics();
app.UseSwagger();
app.UseSwaggerUI();
app.UseCors();
@ -76,5 +81,9 @@ app.MapControllers();
app.Urls.Clear();
app.Urls.Add(config.Web.Address);
// Make sure metrics are updated whenever Prometheus scrapes them
Metrics.DefaultRegistry.AddBeforeCollectCallback(async ct =>
await app.Services.GetRequiredService<MetricsCollectionService>().CollectMetricsAsync(ct));
app.Run();
Log.CloseAndFlush();

View file

@ -1,10 +1,9 @@
using System.Diagnostics;
using App.Metrics;
using Catalogger.Backend.Cache.InMemoryCache;
using Catalogger.Backend.Database;
using Humanizer;
using Microsoft.EntityFrameworkCore;
using NodaTime.Extensions;
using Prometheus;
namespace Catalogger.Backend.Services;
@ -13,49 +12,49 @@ public class MetricsCollectionService(
GuildCache guildCache,
ChannelCache channelCache,
UserCache userCache,
IMetrics metrics,
IServiceProvider services) : BackgroundService
IServiceProvider services)
{
private readonly ILogger _logger = logger.ForContext<MetricsCollectionService>();
private async Task CollectMetricsAsync()
public async Task CollectMetricsAsync(CancellationToken ct = default)
{
var stopwatch = new Stopwatch();
stopwatch.Start();
var timer = CataloggerMetrics.MetricsCollectionTime.NewTimer();
await using var scope = services.CreateAsyncScope();
await using var db = scope.ServiceProvider.GetRequiredService<DatabaseContext>();
var messageCount = await db.Messages.CountAsync();
var messageCount = await db.Messages.CountAsync(ct);
metrics.Measure.Gauge.SetValue(CataloggerMetrics.GuildsCached, guildCache.Size);
metrics.Measure.Gauge.SetValue(CataloggerMetrics.ChannelsCached, channelCache.Size);
metrics.Measure.Gauge.SetValue(CataloggerMetrics.UsersCached, userCache.Size);
metrics.Measure.Gauge.SetValue(CataloggerMetrics.MessagesStored, messageCount);
CataloggerMetrics.GuildsCached.Set(guildCache.Size);
CataloggerMetrics.ChannelsCached.Set(channelCache.Size);
CataloggerMetrics.UsersCached.Set(userCache.Size);
CataloggerMetrics.MessagesStored.Set(messageCount);
CataloggerMetrics.MessageRateMinute = messageCount - CataloggerMetrics.MessageRateMinute;
var process = Process.GetCurrentProcess();
metrics.Measure.Gauge.SetValue(CataloggerMetrics.ProcessPhysicalMemory, process.WorkingSet64);
metrics.Measure.Gauge.SetValue(CataloggerMetrics.ProcessVirtualMemory, process.VirtualMemorySize64);
metrics.Measure.Gauge.SetValue(CataloggerMetrics.ProcessPrivateMemory, process.PrivateMemorySize64);
metrics.Measure.Gauge.SetValue(CataloggerMetrics.ProcessThreads, process.Threads.Count);
metrics.Measure.Gauge.SetValue(CataloggerMetrics.ProcessHandles, process.HandleCount);
CataloggerMetrics.ProcessPhysicalMemory.Set(process.WorkingSet64);
CataloggerMetrics.ProcessVirtualMemory.Set(process.VirtualMemorySize64);
CataloggerMetrics.ProcessPrivateMemory.Set(process.PrivateMemorySize64);
CataloggerMetrics.ProcessThreads.Set(process.Threads.Count);
CataloggerMetrics.ProcessHandles.Set(process.HandleCount);
stopwatch.Stop();
metrics.Measure.Timer.Time(CataloggerMetrics.MetricsCollectionTime, stopwatch.ElapsedMilliseconds);
_logger.Information("Collected metrics in {Duration}", stopwatch.ElapsedDuration());
await Task.WhenAll(((IMetricsRoot)metrics).ReportRunner.RunAllAsync());
_logger.Information("Collected metrics in {Duration}", timer.ObserveDuration());
}
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
public class BackgroundMetricsCollectionService(ILogger logger, MetricsCollectionService innerService) : BackgroundService
{
private readonly ILogger _logger = logger.ForContext<BackgroundMetricsCollectionService>();
protected override async Task ExecuteAsync(CancellationToken ct)
{
_logger.Information("Metrics are disabled, periodically collecting metrics manually");
using var timer = new PeriodicTimer(1.Minutes());
while (await timer.WaitForNextTickAsync(stoppingToken))
while (await timer.WaitForNextTickAsync(ct))
{
_logger.Debug("Collecting periodic metrics");
await CollectMetricsAsync();
_logger.Debug("Reported metrics to backend");
_logger.Debug("Collecting metrics");
await innerService.CollectMetricsAsync(ct);
}
}
}