Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(core): clean up define metric and add missing functionality #7447

Draft
wants to merge 24 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a69238c
summary={} for finished run; fix runstopped status bug; uncomment win…
dmitryduev Apr 19, 2024
79e3618
summary={} for finished run; fix runstopped status bug; uncomment win…
dmitryduev Apr 19, 2024
a633487
factor out the PathTree data structure into separate package
dmitryduev Apr 19, 2024
98b1b88
factor out the PathTree data structure into separate package
dmitryduev Apr 19, 2024
b3b0286
Merge branch 'main' into define-metric-summary
kptkin Apr 19, 2024
46e0d3a
remove uv for now to run windows as well
kptkin Apr 19, 2024
9912007
see if this fixes the windows tests
kptkin Apr 19, 2024
a2d51ca
Merge branch 'main' into define-metric-summary
dmitryduev Apr 19, 2024
3469330
save progress
kptkin Apr 20, 2024
25992a8
support nested dict
kptkin Apr 22, 2024
a99e172
replace opts with params
kptkin Apr 22, 2024
cedd1b9
Merge branch 'main' into kpt/major-refactor
kptkin Apr 22, 2024
5920818
fix typos
kptkin Apr 22, 2024
24c61d4
Merge branch 'main' into kpt/major-refactor
kptkin Apr 23, 2024
fcc0b65
Merge branch 'main' into kpt/major-refactor
kptkin Apr 24, 2024
ef02338
merge unchanged files
kptkin Apr 24, 2024
782cfee
fix up
kptkin Apr 24, 2024
c85e223
more fixes
kptkin Apr 24, 2024
16be274
Merge branch 'main' into kpt/major-refactor
kptkin Apr 25, 2024
cc42493
merge all relevant changes
kptkin Apr 25, 2024
a1d7fdb
Merge branch 'main' into kpt/major-refactor
kptkin Apr 26, 2024
5951617
merge with main
kptkin Apr 26, 2024
efe7c60
for this one
kptkin Apr 26, 2024
93bdcd5
clean up
kptkin Apr 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 7 additions & 7 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ jobs:
name: "Run nox tests: <<parameters.session_base>>"
no_output_timeout: 10m
command: "\
nox -db uv -s '<<parameters.session_base>>-\
nox -s '<<parameters.session_base>>-\
<<parameters.python_version_major>>.\
<<parameters.python_version_minor>>\
(core=<<parameters.core>>)'"
Expand Down Expand Up @@ -1581,12 +1581,12 @@ workflows:
session_base: unit_tests
codecov_flags: unit

filters:
branches:
only:
# - main
- /^release-.*/
- /^.*-ci-win$/
# filters:
# branches:
# only:
# # - main
# - /^release-.*/
# - /^.*-ci-win$/

matrix:
parameters:
Expand Down
24 changes: 24 additions & 0 deletions core/internal/runmetric/runmetric.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package runmetric

import (
"github.com/wandb/wandb/core/pkg/service"
"google.golang.org/protobuf/proto"
)

type RunMetricDict = map[string]*service.MetricRecord

func addMetric(
metric *service.MetricRecord,
key string,
target *RunMetricDict,
) {
if metric.GetXControl().GetOverwrite() {
(*target)[key] = metric
return
}

Check warning on line 18 in core/internal/runmetric/runmetric.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric.go#L16-L18

Added lines #L16 - L18 were not covered by tests
if existingMetric, ok := (*target)[key]; ok {
proto.Merge(existingMetric, metric)
} else {
(*target)[key] = metric
}

Check warning on line 23 in core/internal/runmetric/runmetric.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric.go#L23

Added line #L23 was not covered by tests
}
103 changes: 103 additions & 0 deletions core/internal/runmetric/runmetric_handler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package runmetric

import (
"fmt"
"path/filepath"
"strings"

"github.com/wandb/wandb/core/pkg/service"
"google.golang.org/protobuf/proto"
)

// RunMetricHanlder is used to track the run metrics
type RunMetricHanlder struct {
definedMetrics RunMetricDict
globMetrics RunMetricDict
}

// NewMetricHandler creates a new RunMetric
func NewMetricHandler() *RunMetricHanlder {
return &RunMetricHanlder{
definedMetrics: make(RunMetricDict),
globMetrics: make(RunMetricDict),
}
}

func (runMetric *RunMetricHanlder) MatchMetric(key string) (*service.MetricRecord, bool) {
if key == "" {
return nil, false
}

Check warning on line 29 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L29

Added line #L29 was not covered by tests

// Skip internal metrics
if strings.HasPrefix(key, "_") {
return nil, false
}

Check warning on line 34 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L34

Added line #L34 was not covered by tests

if metric, ok := runMetric.definedMetrics[key]; ok {
return metric, true
}

Check warning on line 38 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L38

Added line #L38 was not covered by tests

if metric, ok := runMetric.findGlobMatch(key); ok {
metric.Name = key
metric.GlobName = ""
metric.Options.Defined = false
return metric, false
}

Check warning on line 45 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L45

Added line #L45 was not covered by tests

return nil, false
}

func (runMetric *RunMetricHanlder) findGlobMatch(key string) (*service.MetricRecord, bool) {
if key == "" {
return nil, false
}

Check warning on line 53 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L52-L53

Added lines #L52 - L53 were not covered by tests

for pattern, metric := range runMetric.globMetrics {
if match, err := filepath.Match(pattern, key); err != nil {
// h.logger.CaptureError("error matching metric", err)
continue

Check warning on line 58 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L57-L58

Added lines #L57 - L58 were not covered by tests
} else if match {
return proto.Clone(metric).(*service.MetricRecord), true
}

Check warning on line 61 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L61

Added line #L61 was not covered by tests
}

return nil, false
}

func (runMetric *RunMetricHanlder) AddStepMetric(metric *service.MetricRecord) *service.MetricRecord {
if metric.GetName() == "" {
return nil
}

Check warning on line 70 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L70

Added line #L70 was not covered by tests

if metric.GetStepMetric() == "" {
return nil
}

Check warning on line 74 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L74

Added line #L74 was not covered by tests

if _, ok := runMetric.definedMetrics[metric.GetStepMetric()]; ok {
return nil
}

Check warning on line 78 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L78

Added line #L78 was not covered by tests

stepMetric := &service.MetricRecord{
Name: metric.GetStepMetric(),
}

Check warning on line 83 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L82-L83

Added lines #L82 - L83 were not covered by tests
addMetric(stepMetric, stepMetric.GetName(), &runMetric.definedMetrics)

Check warning on line 85 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L85

Added line #L85 was not covered by tests
return stepMetric
}

func (runMetric *RunMetricHanlder) AddMetric(metric *service.MetricRecord) error {
// metric can have a glob name or a name
// TODO: replace glob-name/name with one-of field

Check warning on line 91 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L90-L91

Added lines #L90 - L91 were not covered by tests
switch {
case metric.GetGlobName() != "":
addMetric(metric, metric.GetGlobName(), &runMetric.globMetrics)
return nil
case metric.GetName() != "":
addMetric(metric, metric.GetName(), &runMetric.definedMetrics)
return nil
default:
err := fmt.Errorf("metric must have a name or glob name")
return err

Check warning on line 101 in core/internal/runmetric/runmetric_handler.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_handler.go#L99-L101

Added lines #L99 - L101 were not covered by tests
}
}
59 changes: 59 additions & 0 deletions core/internal/runmetric/runmetric_sender.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package runmetric

import (
"fmt"

"github.com/wandb/wandb/core/internal/corelib"
"github.com/wandb/wandb/core/pkg/service"
"google.golang.org/protobuf/proto"
)

type RunMetricSender struct {
metrics RunMetricDict
indeces map[string]int32
Config []map[int]interface{}
}

func NewRunMetricSender() *RunMetricSender {
return &RunMetricSender{
metrics: make(RunMetricDict),
indeces: make(map[string]int32),
Config: make([]map[int]interface{}, 0),
}

Check warning on line 22 in core/internal/runmetric/runmetric_sender.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_sender.go#L22

Added line #L22 was not covered by tests
}

func (runMetric *RunMetricSender) EncodeConfigHints(metric *service.MetricRecord) error {
if metric.GetName() == "" {
err := fmt.Errorf("metric name is required")
return err
}

Check warning on line 29 in core/internal/runmetric/runmetric_sender.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_sender.go#L29

Added line #L29 was not covered by tests

if metric.GetGlobName() != "" {
err := fmt.Errorf("glob name is not allowed")
return err
}

Check warning on line 34 in core/internal/runmetric/runmetric_sender.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_sender.go#L32-L34

Added lines #L32 - L34 were not covered by tests

addMetric(metric, metric.GetName(), &runMetric.metrics)

// Check if the metric has a step metric
// If it does, we need to increment the step metric index
// and remove the step metric from the metric

Check warning on line 40 in core/internal/runmetric/runmetric_sender.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_sender.go#L37-L40

Added lines #L37 - L40 were not covered by tests
if metric.GetStepMetric() != "" {
index, ok := runMetric.indeces[metric.GetStepMetric()]
if ok {
metric = proto.Clone(metric).(*service.MetricRecord)
metric.StepMetric = ""
metric.StepMetricIndex = index + 1
}

Check warning on line 47 in core/internal/runmetric/runmetric_sender.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_sender.go#L47

Added line #L47 was not covered by tests
}

encoded := corelib.ProtoEncodeToDict(metric)
if index, ok := runMetric.indeces[metric.GetName()]; ok {
runMetric.Config[index] = encoded
} else {
next := int32(len(runMetric.Config))
runMetric.Config = append(runMetric.Config, encoded)
runMetric.indeces[metric.GetName()] = next
}

Check warning on line 57 in core/internal/runmetric/runmetric_sender.go

View check run for this annotation

Codecov / codecov/patch

core/internal/runmetric/runmetric_sender.go#L57

Added line #L57 was not covered by tests
return nil
}