Skip to content

Commit

Permalink
Change to use unchecked expoter
Browse files Browse the repository at this point in the history
  • Loading branch information
ying-jeanne committed Oct 5, 2024
1 parent 046046f commit 9e4a0b3
Show file tree
Hide file tree
Showing 16 changed files with 842 additions and 369 deletions.
47 changes: 24 additions & 23 deletions cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -3995,28 +3995,6 @@
"fieldType": "string",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "cost_attribution_label",
"required": false,
"desc": "Label used to define the cost attribution label. For each write request, the cost attribution is obtained from the first non-empty cost attribution label from the first timeseries in the incoming list of timeseries. Specific distributor and ingester metrics will be further separated adding a 'attrib' cost attribution's label's value. Applies to the following metrics: cortex_distributor_received_samples_total, cortex_ingester_active_series and cortex_discarded_samples_attribution_total.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "validation.cost-attribution-label",
"fieldType": "string",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "max_cost_attribution_per_user",
"required": false,
"desc": "The maximum number of cost attribution labels per user, across the cluster. 0 to disable cost attribution.",
"fieldValue": null,
"fieldDefaultValue": 0,
"fieldFlag": "validation.max-cost-attribution-per-user",
"fieldType": "int",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "max_fetched_chunks_per_query",
Expand Down Expand Up @@ -4325,6 +4303,28 @@
"fieldType": "int",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "cost_attribution_label",
"required": false,
"desc": "Label used to define the cost attribution label. For each write request, the cost attribution is obtained from the first non-empty cost attribution label from the first timeseries in the incoming list of timeseries. Specific distributor and ingester metrics will be further separated adding a 'attrib' cost attribution's label's value. Applies to the following metrics: cortex_distributor_received_samples_total, cortex_ingester_active_series and cortex_discarded_samples_attribution_total.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "validation.cost-attribution-label",
"fieldType": "string",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "max_cost_attribution_per_user",
"required": false,
"desc": "The maximum number of cost attribution labels per user, across the cluster. 0 to disable cost attribution.",
"fieldValue": null,
"fieldDefaultValue": 0,
"fieldFlag": "validation.max-cost-attribution-per-user",
"fieldType": "int",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "ruler_evaluation_delay_duration",
Expand Down Expand Up @@ -18165,9 +18165,10 @@
"kind": "field",
"name": "custom_registry_path",
"required": false,
"desc": "",
"desc": "Defines a custom path for the registry. When specified, Mimir will expose cost attribution metrics through this custom path instead of using the default Prometheus registry.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "custom-registry-path",
"fieldType": "string",
"fieldCategory": "advanced"
},
Expand Down
2 changes: 2 additions & 0 deletions cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,8 @@ Usage of ./cmd/mimir/mimir:
Configuration file to load.
-cost-attribution-eviction-interval duration
[experimental] Interval at which to evict inactive cost attributions. (default 10m0s)
-custom-registry-path string
Defines a custom path for the registry. When specified, Mimir will expose cost attribution metrics through this custom path instead of using the default Prometheus registry.
-debug.block-profile-rate int
Fraction of goroutine blocking events that are reported in the blocking profile. 1 to include every blocking event in the profile, 0 to disable.
-debug.mutex-profile-fraction int
Expand Down
141 changes: 141 additions & 0 deletions pkg/costattribution/caimpl/managerImpl.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package caimpl

import (
"context"
"fmt"
"time"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/services"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/model/labels"

"github.com/grafana/mimir/pkg/util/validation"
)

type ManagerImpl struct {
services.Service
logger log.Logger
attributionTracker *AttributionTrackerGroup
inactiveTimeout time.Duration
invalidValue string
}

// NewManager creates a new cost attribution manager. which is responsible for managing the cost attribution of series.
// It will clean up inactive series and update the cost attribution of series every 3 minutes.
func NewManager(cleanupInterval, inactiveTimeout time.Duration, logger log.Logger, limits *validation.Overrides) *ManagerImpl {
s := &ManagerImpl{
attributionTracker: newAttributionTrackerGroup(limits),
inactiveTimeout: inactiveTimeout,
logger: logger,
invalidValue: "__unaccounted__",
}

s.Service = services.NewTimerService(cleanupInterval, nil, s.iteration, nil).WithName("cost attribution manager")
return s
}

func (m *ManagerImpl) iteration(_ context.Context) error {
m.attributionTracker.purgeInactiveAttributions(m.inactiveTimeout)
return nil
}

// EnabledForUser returns true if the cost attribution is enabled for the user
func (m *ManagerImpl) EnabledForUser(userID string) bool {
return m.attributionTracker.limits.CostAttributionLabel(userID) != ""
}

// GetUserAttributionLabel returns the cost attribution label for the user, first it will try to get the label from the cache,
// if not found, it will get the label from the config
func (m *ManagerImpl) GetUserAttributionLabel(userID string) string {
if m.EnabledForUser(userID) {
return m.attributionTracker.getUserAttributionLabelFromCache(userID)
}
m.attributionTracker.deleteUserTracerFromCache(userID)
return ""
}

// GetUserAttributionLimit returns the cost attribution limit for the user, first it will try to get the limit from the cache,
// if not found, it will get the limit from the config
func (m *ManagerImpl) GetUserAttributionLimit(userID string) int {
if m.EnabledForUser(userID) {
return m.attributionTracker.getUserAttributionLimitFromCache(userID)
}
m.attributionTracker.deleteUserTracerFromCache(userID)
return 0
}

func (m *ManagerImpl) UpdateAttributionTimestamp(user string, lbs labels.Labels, now time.Time) string {
// if cost attribution is not enabled for the user, return empty string
if !m.EnabledForUser(user) {
m.attributionTracker.deleteUserTracerFromCache(user)
return ""
}

// when cost attribution is enabled, the label has to be set. the cache would be updated with the label
lb := m.attributionTracker.getUserAttributionLabelFromCache(user)
// this should not happened, if user is enabled for cost attribution, the label has to be set
if lb == "" {
return ""
}
val := lbs.Get(lb)

if m.attributionTracker.attributionLimitExceeded(user, val, now) {
val = m.invalidValue
level.Error(m.logger).Log("msg", fmt.Sprintf("set attribution label to \"%s\" since user has reached the limit of cost attribution labels", m.invalidValue))
}
m.attributionTracker.updateAttributionCacheForUser(user, lb, val, now)
return val
}

// SetActiveSeries adjust the input attribution and sets the active series gauge for the given user and attribution
func (m *ManagerImpl) SetActiveSeries(userID, attribution string, value float64) {
attribution = m.adjustUserAttribution(userID, attribution)

m.attributionTracker.mu.Lock()
defer m.attributionTracker.mu.Unlock()
if tracker, exists := m.attributionTracker.trackersByUserID[userID]; exists {
tracker.activeSeriesPerUserAttribution.WithLabelValues(userID, attribution).Set(value)
}
}

// IncrementDiscardedSamples increments the discarded samples counter for a given user and attribution
func (m *ManagerImpl) IncrementDiscardedSamples(userID, attribution string, value float64) {
attribution = m.adjustUserAttribution(userID, attribution)
m.attributionTracker.mu.RLock()
defer m.attributionTracker.mu.RUnlock()
if tracker, exists := m.attributionTracker.trackersByUserID[userID]; exists {
tracker.discardedSampleAttribution.WithLabelValues(userID, attribution).Add(value)
}
}

// IncrementReceivedSamples increments the received samples counter for a given user and attribution
func (m *ManagerImpl) IncrementReceivedSamples(userID, attribution string, value float64) {
attribution = m.adjustUserAttribution(userID, attribution)
m.attributionTracker.mu.RLock()
defer m.attributionTracker.mu.RUnlock()
if tracker, exists := m.attributionTracker.trackersByUserID[userID]; exists {
tracker.receivedSamplesAttribution.WithLabelValues(userID, attribution).Add(value)
}
}

func (m *ManagerImpl) adjustUserAttribution(userID, attribution string) string {
if m.attributionTracker.attributionLimitExceeded(userID, attribution, time.Now()) {
return m.invalidValue
}
return attribution
}

func (m *ManagerImpl) Collect(out chan<- prometheus.Metric) {
m.attributionTracker.mu.RLock()
defer m.attributionTracker.mu.RUnlock()
for _, tracker := range m.attributionTracker.trackersByUserID {
tracker.Collect(out)
}
}

// Describe implements prometheus.Collector.
func (m *ManagerImpl) Describe(chan<- *prometheus.Desc) {
// this is an unchecked collector
}
Loading

0 comments on commit 9e4a0b3

Please sign in to comment.