diff --git a/central/detection/alertmanager/alert_manager_impl.go b/central/detection/alertmanager/alert_manager_impl.go index 79aa95f442d34..2c067da5676fc 100644 --- a/central/detection/alertmanager/alert_manager_impl.go +++ b/central/detection/alertmanager/alert_manager_impl.go @@ -48,6 +48,9 @@ func getDeploymentIDsFromAlerts(alertSlices ...[]*storage.Alert) set.StringSet { // AlertAndNotify is the main function that implements the AlertManager interface func (d *alertManagerImpl) AlertAndNotify(ctx context.Context, currentAlerts []*storage.Alert, oldAlertFilters ...AlertFilterOption) (set.StringSet, error) { + defer observeDurationMs(alertAndNotifyDuration)() + alertAndNotifyIncomingCount.Observe(float64(len(currentAlerts))) + // Merge the old and the new alerts. newAlerts, updatedAlerts, toBeResolvedAlerts, err := d.mergeManyAlerts(ctx, currentAlerts, oldAlertFilters...) if err != nil { @@ -377,6 +380,8 @@ func (d *alertManagerImpl) mergeManyAlerts( incomingAlerts []*storage.Alert, oldAlertFilters ...AlertFilterOption, ) (newAlerts, updatedAlerts, toBeResolvedAlerts []*storage.Alert, err error) { + defer observeDurationMs(mergeManyAlertsDuration)() + qb := search.NewQueryBuilder().AddExactMatches( search.ViolationState, storage.ViolationState_ACTIVE.String(), @@ -389,6 +394,7 @@ func (d *alertManagerImpl) mergeManyAlerts( err = errors.Wrapf(err, "couldn't load previous alerts (query was %s)", qb.Query()) return } + mergeManyAlertsPreviousCount.Observe(float64(len(previousAlerts))) // Merge any alerts that have new and old alerts. for _, alert := range incomingAlerts { @@ -445,6 +451,7 @@ func (d *alertManagerImpl) mergeManyAlerts( } } } + recordAlertOutcomes(len(newAlerts), len(updatedAlerts), len(toBeResolvedAlerts)) return } diff --git a/central/detection/alertmanager/metrics.go b/central/detection/alertmanager/metrics.go new file mode 100644 index 0000000000000..eb58d2c23230f --- /dev/null +++ b/central/detection/alertmanager/metrics.go @@ -0,0 +1,77 @@ +package alertmanager + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stackrox/rox/pkg/metrics" +) + +// observeDurationMs returns a function that, when called, observes the elapsed +// time in milliseconds since observeDurationMs was invoked. Intended for use +// with defer: +// +// defer observeDurationMs(myHistogram)() +func observeDurationMs(h prometheus.Histogram) func() { + start := time.Now() + return func() { + h.Observe(float64(time.Since(start).Milliseconds())) + } +} + +var ( + alertAndNotifyDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: metrics.PrometheusNamespace, + Subsystem: metrics.CentralSubsystem.String(), + Name: "alert_and_notify_duration_ms", + Help: "End-to-end duration of AlertAndNotify in milliseconds", + Buckets: prometheus.ExponentialBuckets(4, 2, 12), + }) + + alertAndNotifyIncomingCount = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: metrics.PrometheusNamespace, + Subsystem: metrics.CentralSubsystem.String(), + Name: "alert_and_notify_incoming_count", + Help: "Number of incoming alerts per AlertAndNotify call", + Buckets: prometheus.ExponentialBuckets(1, 2, 14), + }) + + mergeManyAlertsDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: metrics.PrometheusNamespace, + Subsystem: metrics.CentralSubsystem.String(), + Name: "merge_many_alerts_duration_ms", + Help: "Duration of mergeManyAlerts in milliseconds", + Buckets: prometheus.ExponentialBuckets(4, 2, 12), + }) + + mergeManyAlertsPreviousCount = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: metrics.PrometheusNamespace, + Subsystem: metrics.CentralSubsystem.String(), + Name: "merge_many_alerts_previous_count", + Help: "Number of previous alerts fetched from DB per mergeManyAlerts call", + Buckets: prometheus.ExponentialBuckets(1, 2, 14), + }) + + alertOutcomeTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: metrics.PrometheusNamespace, + Subsystem: metrics.CentralSubsystem.String(), + Name: "alert_outcome_total", + Help: "Cumulative count of alert outcomes from mergeManyAlerts", + }, []string{"outcome"}) +) + +func recordAlertOutcomes(newCount, updatedCount, resolvedCount int) { + alertOutcomeTotal.WithLabelValues("new").Add(float64(newCount)) + alertOutcomeTotal.WithLabelValues("updated").Add(float64(updatedCount)) + alertOutcomeTotal.WithLabelValues("resolved").Add(float64(resolvedCount)) +} + +func init() { + metrics.EmplaceCollector( + alertAndNotifyDuration, + alertAndNotifyIncomingCount, + mergeManyAlertsDuration, + mergeManyAlertsPreviousCount, + alertOutcomeTotal, + ) +}