Skip to content

Commit

Permalink
Merge pull request #645 from google/limit-histo-size
Browse files Browse the repository at this point in the history
Add metric size limits to the language and metric store.
  • Loading branch information
jaqx0r committed May 15, 2022
2 parents bcc2b99 + 291d865 commit 4895acf
Show file tree
Hide file tree
Showing 13 changed files with 791 additions and 610 deletions.
20 changes: 19 additions & 1 deletion docs/Language.md
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ For example, parsing syslog timestamps is something you may only wish to do
once, as it's expensive to match (and difficult to read!)

```
counter foo counter bar
counter foo
counter bar
/^(?P<date>\w+\s+\d+\s+\d+:\d+:\d+)/ {
strptime($date, "Jan 02 15:04:05")
Expand Down Expand Up @@ -539,6 +540,8 @@ that log line.

#### Variable Storage Management

##### `del`

`mtail` performs no implicit garbage collection in the metric storage. The
program can hint to the virtual machine that a specific datum in a dimensioned
metric is no longer going to be used with the `del` keyword.
Expand Down Expand Up @@ -575,6 +578,21 @@ The del-after form takes any time period supported by the go

Expiry is only processed once ever hour, so durations shorter than 1h won't take effect until the next hour has passed.

This command only makes sense for dimensioned metrics.

##### `limit`

A size limit can be specified on a metric with the modifier `limit`.

```
counter bytes_total by operation limit 500
```

When the garbage collection run encounters a variable with size limit that is over its size limit, it will remove the oldest values until the whole metric is below its limit again. Oldest values are chosen by the timestamp of the datum.

This modifier only makes sense for dimensioned metrics.


### Stopping the program

The program runs from start to finish once per line, but sometimes you may want to stop the program early. For example, if the log filename does not match a pattern, or some stateful metric indicates work shouldn't be done.
Expand Down
17 changes: 17 additions & 0 deletions internal/metrics/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"sync"
"time"

"github.com/golang/glog"
"github.com/google/mtail/internal/metrics/datum"
"github.com/pkg/errors"
)
Expand Down Expand Up @@ -92,6 +93,7 @@ type Metric struct {
labelValuesMap map[string]*LabelValue
Source string `json:",omitempty"`
Buckets []datum.Range `json:",omitempty"`
Limit int `json:",omitempty"`
}

// NewMetric returns a new empty metric of dimension len(keys).
Expand Down Expand Up @@ -155,6 +157,7 @@ func (m *Metric) GetDatum(labelvalues ...string) (d datum.Datum, err error) {
if lv := m.FindLabelValueOrNil(labelvalues); lv != nil {
d = lv.Value
} else {
// TODO Check m.Limit and expire old data
switch m.Type {
case Int:
d = datum.NewInt()
Expand All @@ -177,6 +180,20 @@ func (m *Metric) GetDatum(labelvalues ...string) (d datum.Datum, err error) {
return d, nil
}

// RemoveOldestDatum scans the Metric's LabelValues for the Datum with the oldest timestamp, and removes it.
func (m *Metric) RemoveOldestDatum() {
var oldestLV *LabelValue
for _, lv := range m.LabelValues {
if oldestLV == nil || lv.Value.TimeUTC().Before(oldestLV.Value.TimeUTC()) {
oldestLV = lv
}
}
if oldestLV != nil {
glog.V(1).Infof("removeOldest: removing oldest LV: %v", oldestLV)
m.RemoveDatum(oldestLV.Labels...)
}
}

// RemoveDatum removes the Datum described by labelvalues from the Metric m.
func (m *Metric) RemoveDatum(labelvalues ...string) error {
if len(labelvalues) != len(m.Keys) {
Expand Down
20 changes: 20 additions & 0 deletions internal/metrics/metric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,23 @@ func TestRemoveMetricLabelValue(t *testing.T) {
t.Errorf("label value still exists")
}
}

func TestMetricLabelValueRemovePastLimit(t *testing.T) {
m := NewMetric("test", "prog", Counter, Int, "foo")
m.Limit = 1
_, err := m.GetDatum("a")
testutil.FatalIfErr(t, err)
m.RemoveOldestDatum()
_, err = m.GetDatum("b")
testutil.FatalIfErr(t, err)
m.RemoveOldestDatum()
_, err = m.GetDatum("c")
testutil.FatalIfErr(t, err)
m.RemoveOldestDatum()
if len(m.LabelValues) > 2 {
t.Errorf("Expected 2 labelvalues got %#v", m.LabelValues)
}
if x := m.FindLabelValueOrNil([]string{"a"}); x != nil {
t.Errorf("found label a which is unexpected: %#v", x)
}
}
9 changes: 7 additions & 2 deletions internal/metrics/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,17 @@ func (s *Store) Range(f func(*Metric) error) error {
return nil
}

// Gc iterates through the Store looking for metrics that have been marked
// for expiry, and removing them if their expiration time has passed.
// Gc iterates through the Store looking for metrics that can be tidied up,
// if they are passed their expiry or sized greater than their limit.
func (s *Store) Gc() error {
glog.Info("Running Store.Expire()")
now := time.Now()
return s.Range(func(m *Metric) error {
if m.Limit > 0 && len(m.LabelValues) >= m.Limit {
for i := len(m.LabelValues); i > m.Limit; i-- {
m.RemoveOldestDatum()
}
}
for _, lv := range m.LabelValues {
if lv.Expiry <= 0 {
continue
Expand Down
29 changes: 28 additions & 1 deletion internal/metrics/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func TestAddMetricDifferentType(t *testing.T) {
}
}

func TestExpireMetric(t *testing.T) {
func TestExpireOldDatum(t *testing.T) {
s := NewStore()
m := NewMetric("foo", "prog", Counter, Int, "a", "b", "c")
testutil.FatalIfErr(t, s.Add(m))
Expand Down Expand Up @@ -124,3 +124,30 @@ func TestExpireMetric(t *testing.T) {
t.Logf("Store: %#v", s)
}
}

func TestExpireOversizeDatum(t *testing.T) {
s := NewStore()
m := NewMetric("foo", "prog", Counter, Int, "foo")
m.Limit = 1
testutil.FatalIfErr(t, s.Add(m))

_, err := m.GetDatum("a")
testutil.FatalIfErr(t, err)
testutil.FatalIfErr(t, s.Gc())

_, err = m.GetDatum("b")
testutil.FatalIfErr(t, err)
testutil.FatalIfErr(t, s.Gc())

_, err = m.GetDatum("c")
testutil.FatalIfErr(t, err)
testutil.FatalIfErr(t, s.Gc())

if len(m.LabelValues) > 2 {
t.Errorf("Expected 2 labelvalues got %#v", m.LabelValues)
}
if x := m.FindLabelValueOrNil([]string{"a"}); x != nil {
t.Errorf("found label a which is unexpected: %#v", x)
}

}
1 change: 1 addition & 0 deletions internal/runtime/compiler/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ type VarDecl struct {
Name string
Hidden bool
Keys []string
Limit int64
Buckets []float64
Kind metrics.Kind
ExportedName string
Expand Down
9 changes: 9 additions & 0 deletions internal/runtime/compiler/codegen/codegen.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,15 @@ func (c *codegen) VisitBefore(node ast.Node) (ast.Visitor, ast.Node) {
}

m.Hidden = n.Hidden
// int is int64 only on 64bit platforms. To be fair MaxInt is a
// ridiculously excessive size for this anyway, you're going to use 2GiB
// x sizeof(datum) in a single metric.
if n.Limit > math.MaxInt {
c.errorf(n.Pos(), "limit %d too large; max %d", n.Limit, math.MaxInt)
return nil, n
}
m.Limit = int(n.Limit)

n.Symbol.Binding = m
n.Symbol.Addr = len(c.obj.Metrics)
c.obj.Metrics = append(c.obj.Metrics, m)
Expand Down
1 change: 1 addition & 0 deletions internal/runtime/compiler/parser/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ var keywords = map[string]Kind{
"gauge": GAUGE,
"hidden": HIDDEN,
"histogram": HISTOGRAM,
"limit": LIMIT,
"next": NEXT,
"otherwise": OTHERWISE,
"stop": STOP,
Expand Down

0 comments on commit 4895acf

Please sign in to comment.