By default, return an error if metrics collide when escaped to unders…

…cores Signed-off-by: Owen Williams <[email protected]>
prometheus · Oct 11, 2024 · 118a3c1 · 118a3c1
1 parent 93c851f
commit 118a3c1
Show file tree

Hide file tree

Showing 3 changed files with 269 additions and 10 deletions.
diff --git a/prometheus/desc.go b/prometheus/desc.go
@@ -57,6 +57,9 @@ type Desc struct {
 	// must be unique among all registered descriptors and can therefore be
 	// used as an identifier of the descriptor.
 	id uint64
+	// escapedID is similar to id, but is a hash of all the metric name escaped
+	// with underscores.
+	escapedID uint64
 	// dimHash is a hash of the label names (preset and variable) and the
 	// Help string. Each Desc with the same fqName must have the same
 	// dimHash.
@@ -142,11 +145,18 @@ func (v2) NewDesc(fqName, help string, variableLabels ConstrainableLabels, const
 	}
 
 	xxh := xxhash.New()
-	for _, val := range labelValues {
+	escapedXXH := xxhash.New()
+	for i, val := range labelValues {
 		xxh.WriteString(val)
 		xxh.Write(separatorByteSlice)
+		if i == 0 {
+			val = model.EscapeName(val, model.UnderscoreEscaping)
+		}
+		escapedXXH.WriteString(val)
+		escapedXXH.Write(separatorByteSlice)
 	}
 	d.id = xxh.Sum64()
+	d.escapedID = escapedXXH.Sum64()
 	// Sort labelNames so that order doesn't matter for the hash.
 	sort.Strings(labelNames)
 	// Now hash together (in this order) the help string and the sorted

diff --git a/prometheus/registry.go b/prometheus/registry.go
@@ -66,12 +66,23 @@ func init() {
 // pre-registered.
 func NewRegistry() *Registry {
 	return &Registry{
-		collectorsByID:  map[uint64]Collector{},
-		descIDs:         map[uint64]struct{}{},
-		dimHashesByName: map[string]uint64{},
+		collectorsByID:        map[uint64]Collector{},
+		collectorsByEscapedID: map[uint64]Collector{},
+		descIDs:               map[uint64]struct{}{},
+		escapedDescIDs:        map[uint64]struct{}{},
+		dimHashesByName:       map[string]uint64{},
 	}
 }
 
+// AllowEscapedCollisions determines whether the Registry should reject
+// Collectors that would collide when escaped to underscores for compatibility
+// with older systems. You may set this option to Allow if you know your metrics
+// will never be scraped by an older system.
+func (r *Registry) AllowEscapedCollisions(allow bool) *Registry {
+	r.allowEscapedCollision = allow
+	return r
+}
+
 // NewPedanticRegistry returns a registry that checks during collection if each
 // collected Metric is consistent with its reported Desc, and if the Desc has
 // actually been registered with the registry. Unchecked Collectors (those whose
@@ -258,21 +269,30 @@ func (errs MultiError) MaybeUnwrap() error {
 // Registry implements Collector to allow it to be used for creating groups of
 // metrics. See the Grouping example for how this can be done.
 type Registry struct {
-	mtx                   sync.RWMutex
-	collectorsByID        map[uint64]Collector // ID is a hash of the descIDs.
+	mtx            sync.RWMutex
+	collectorsByID map[uint64]Collector // ID is a hash of the descIDs.
+	// collectorsByEscapedID stores colletors by escapedID, only if escaped id is
+	// different (otherwise we can just do the lookup in the regular map).
+	collectorsByEscapedID map[uint64]Collector
 	descIDs               map[uint64]struct{}
+	// escapedDescIDs records desc ids of the escaped version of the metric, only
+	// if different from the regular name.
+	escapedDescIDs        map[uint64]struct{}
 	dimHashesByName       map[string]uint64
 	uncheckedCollectors   []Collector
 	pedanticChecksEnabled bool
+	allowEscapedCollision bool
 }
 
 // Register implements Registerer.
 func (r *Registry) Register(c Collector) error {
 	var (
 		descChan           = make(chan *Desc, capDescChan)
 		newDescIDs         = map[uint64]struct{}{}
+		newEscapedIDs      = map[uint64]struct{}{}
 		newDimHashesByName = map[string]uint64{}
 		collectorID        uint64 // All desc IDs XOR'd together.
+		escapedID          uint64
 		duplicateDescErr   error
 	)
 	go func() {
@@ -307,6 +327,24 @@ func (r *Registry) Register(c Collector) error {
 			collectorID ^= desc.id
 		}
 
+		// Unless we are in pure UTF-8 mode, also check to see if the descID is
+		// unique when all the names are escaped to underscores.
+		if !r.allowEscapedCollision {
+			// First check the primary map, then check the secondary map.
+			if _, exists := r.descIDs[desc.escapedID]; exists {
+				duplicateDescErr = fmt.Errorf("descriptor %s will collide with an existing descriptor when escaped for compatibility with non-UTF8 systems", desc)
+			}
+			if _, exists := r.escapedDescIDs[desc.escapedID]; exists {
+				duplicateDescErr = fmt.Errorf("descriptor %s will collide with an existing descriptor when escaped for compatibility with non-UTF8 systems", desc)
+			}
+		}
+		if _, exists := newEscapedIDs[desc.escapedID]; !exists {
+			if desc.escapedID != desc.id {
+				newEscapedIDs[desc.escapedID] = struct{}{}
+			}
+			escapedID ^= desc.escapedID
+		}
+
 		// Are all the label names and the help string consistent with
 		// previous descriptors of the same name?
 		// First check existing descriptors...
@@ -331,7 +369,18 @@ func (r *Registry) Register(c Collector) error {
 		r.uncheckedCollectors = append(r.uncheckedCollectors, c)
 		return nil
 	}
-	if existing, exists := r.collectorsByID[collectorID]; exists {
+
+	existing, collision := r.collectorsByID[collectorID]
+	// Unless we are in pure UTF-8 mode, we also need to check that the
+	// underscore-escaped versions of the IDs don't match.
+	if !collision && !r.allowEscapedCollision {
+		existing, collision = r.collectorsByID[escapedID]
+		if !collision {
+			existing, collision = r.collectorsByEscapedID[escapedID]
+		}
+	}
+
+	if collision {
 		switch e := existing.(type) {
 		case *wrappingCollector:
 			return AlreadyRegisteredError{
@@ -353,21 +402,30 @@ func (r *Registry) Register(c Collector) error {
 
 	// Only after all tests have passed, actually register.
 	r.collectorsByID[collectorID] = c
+	// We only need to store the escapedID if it doesn't match the unescaped one.
+	if escapedID != collectorID {
+		r.collectorsByEscapedID[escapedID] = c
+	}
 	for hash := range newDescIDs {
 		r.descIDs[hash] = struct{}{}
 	}
 	for name, dimHash := range newDimHashesByName {
 		r.dimHashesByName[name] = dimHash
 	}
+	for hash := range newEscapedIDs {
+		r.escapedDescIDs[hash] = struct{}{}
+	}
 	return nil
 }
 
 // Unregister implements Registerer.
 func (r *Registry) Unregister(c Collector) bool {
 	var (
-		descChan    = make(chan *Desc, capDescChan)
-		descIDs     = map[uint64]struct{}{}
-		collectorID uint64 // All desc IDs XOR'd together.
+		descChan           = make(chan *Desc, capDescChan)
+		descIDs            = map[uint64]struct{}{}
+		escpaedDescIDs     = map[uint64]struct{}{}
+		collectorID        uint64 // All desc IDs XOR'd together.
+		collectorEscapedID uint64
 	)
 	go func() {
 		c.Describe(descChan)
@@ -377,6 +435,8 @@ func (r *Registry) Unregister(c Collector) bool {
 		if _, exists := descIDs[desc.id]; !exists {
 			collectorID ^= desc.id
 			descIDs[desc.id] = struct{}{}
+			collectorEscapedID ^= desc.escapedID
+			escpaedDescIDs[desc.escapedID] = struct{}{}
 		}
 	}
 
@@ -391,9 +451,13 @@ func (r *Registry) Unregister(c Collector) bool {
 	defer r.mtx.Unlock()
 
 	delete(r.collectorsByID, collectorID)
+	delete(r.collectorsByEscapedID, collectorEscapedID)
 	for id := range descIDs {
 		delete(r.descIDs, id)
 	}
+	for id := range escpaedDescIDs {
+		delete(r.escapedDescIDs, id)
+	}
 	// dimHashesByName is left untouched as those must be consistent
 	// throughout the lifetime of a program.
 	return true

diff --git a/prometheus/registry_test.go b/prometheus/registry_test.go
@@ -36,6 +36,7 @@ import (
 
 	dto "github.com/prometheus/client_model/go"
 	"github.com/prometheus/common/expfmt"
+	"github.com/prometheus/common/model"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/timestamppb"
 )
@@ -1181,6 +1182,190 @@ func TestAlreadyRegisteredCollision(t *testing.T) {
 	}
 }
 
+func TestAlreadyRegisteredEscapingCollision(t *testing.T) {
+	oldValidation := model.NameValidationScheme
+	model.NameValidationScheme = model.UTF8Validation
+	defer func() {
+		model.NameValidationScheme = oldValidation
+	}()
+
+	tests := []struct {
+		name string
+		// These are functions because hashes that determine collision are created
+		// at metric creation time.
+		counterA      func() prometheus.Counter
+		counterB      func() prometheus.Counter
+		utf8Collision bool
+		expectErr     bool
+		// postInitFlagFlip tests for the case where metrics are created in an
+		// init() function, when the mode will be to disallow legacy collisions, and
+		// then the user later selects to allow them.
+		postInitFlagFlip bool
+	}{
+		{
+			name: "no metric name collision",
+			counterA: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my_counter_a",
+					ConstLabels: prometheus.Labels{
+						"name": "label",
+						"type": "test",
+					},
+				})
+			},
+			counterB: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "myAcounterAa",
+					ConstLabels: prometheus.Labels{
+						"name": "label",
+						"type": "test",
+					},
+				})
+			},
+		},
+		{
+			name: "compatibility metric name collision",
+			counterA: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my_counter_a",
+					ConstLabels: prometheus.Labels{
+						"name": "label",
+						"type": "test",
+					},
+				})
+			},
+			counterB: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my.counter.a",
+					ConstLabels: prometheus.Labels{
+						"name": "label",
+						"type": "test",
+					},
+				})
+			},
+			expectErr: true,
+		},
+		{
+			// This is a regression test to make sure we are not accidentally
+			// reporting collisions when label values are different.
+			name: "no label value collision",
+			counterA: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my_counter_a",
+					ConstLabels: prometheus.Labels{
+						"name": "label.value",
+						"type": "test",
+					},
+				})
+			},
+			counterB: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my_counter_a",
+					ConstLabels: prometheus.Labels{
+						"name": "label_value",
+						"type": "test",
+					},
+				})
+			},
+		},
+		{
+			name: "compatibility label name collision",
+			counterA: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my_counter_a",
+					ConstLabels: prometheus.Labels{
+						"label.name": "name",
+						"type":       "test",
+					},
+				})
+			},
+			counterB: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my_counter_a",
+					ConstLabels: prometheus.Labels{
+						"label_name": "name",
+						"type":       "test",
+					},
+				})
+			},
+			expectErr: true,
+		},
+		{
+			name: "no utf8 metric name collision",
+			counterA: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my_counter_a",
+					ConstLabels: prometheus.Labels{
+						"name": "label",
+						"type": "test",
+					},
+				})
+			},
+			counterB: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my.counter.a",
+					ConstLabels: prometheus.Labels{
+						"name": "label",
+						"type": "test",
+					},
+				})
+			},
+			utf8Collision: true,
+		},
+		{
+			name: "post init flag flip, should collide",
+			counterA: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my.counter.a",
+					ConstLabels: prometheus.Labels{
+						"name": "label",
+						"type": "test",
+					},
+				})
+			},
+			counterB: func() prometheus.Counter {
+				return prometheus.NewCounter(prometheus.CounterOpts{
+					Name: "my.counter.a",
+					ConstLabels: prometheus.Labels{
+						"name": "label",
+						"type": "test",
+					},
+				})
+			},
+			postInitFlagFlip: true,
+			expectErr:        true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			reg := prometheus.NewRegistry()
+			if tc.postInitFlagFlip {
+				reg.AllowEscapedCollisions(false)
+			} else {
+				reg.AllowEscapedCollisions(tc.utf8Collision)
+			}
+			err := reg.Register(tc.counterA())
+			if err != nil {
+				t.Errorf("expected no error, got: %v", err)
+			}
+			if tc.postInitFlagFlip {
+				reg.AllowEscapedCollisions(false)
+			}
+			err = reg.Register(tc.counterB())
+			if !tc.expectErr {
+				if err != nil {
+					t.Errorf("expected no error, got %T", err)
+				}
+			} else {
+				if err == nil {
+					t.Error("expected AlreadyRegisteredError, got none")
+				}
+			}
+		})
+	}
+}
+
 type tGatherer struct {
 	done bool
 	err  error