Skip to content

Commit

Permalink
Removing the stat method in tracking (#1169)
Browse files Browse the repository at this point in the history
  • Loading branch information
Malikbadmus committed Jun 20, 2024
1 parent 9c56d9b commit 9bca63e
Showing 1 changed file with 35 additions and 33 deletions.
68 changes: 35 additions & 33 deletions ersilia/core/tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,45 +349,49 @@ def start_tracking(self):
tracemalloc.start()
self.memory_usage_start = tracemalloc.get_traced_memory()[0]


def stats(self, result):
"""
Stats function: calculates the basic statistics of the output file from a model. This includes the
mode (if applicable), minimum, maximum, and standard deviation.


# TODO: see the following link for more details
# https://github.com/ersilia-os/ersilia/issues/1165?notification_referrer_id=NT_kwDOAsB0trQxMTEyNTc5MDIxNzo0NjE2NzIyMg#issuecomment-2178596998

# def stats(self, result):
# """
# Stats function: calculates the basic statistics of the output file from a model. This includes the
# mode (if applicable), minimum, maximum, and standard deviation.

:param result: The path to the model's output file.
:return: A dictionary containing the stats for each column of the result.
"""
# :param result: The path to the model's output file.
# :return: A dictionary containing the stats for each column of the result.
# """

data = read_csv(result)
# data = read_csv(result)

# drop first two columns (key, input)
for row in data:
row.pop('key', None)
row.pop('input', None)
# for row in data:
# row.pop('key', None)
# row.pop('input', None)

# Convert data to a column-oriented format
columns = defaultdict(list)
for row in data:
for key, value in row.items():
columns[key].append(float(value))
# columns = defaultdict(list)
# for row in data:
# for key, value in row.items():
# columns[key].append(float(value))

# Calculate statistics
stats = {}
for column, values in columns.items():
column_stats = {}
column_stats["mean"] = statistics.mean(values)
try:
column_stats["mode"] = statistics.mode(values)
except statistics.StatisticsError:
column_stats["mode"] = None
column_stats["min"] = min(values)
column_stats["max"] = max(values)
column_stats["std"] = statistics.stdev(values) if len(values) > 1 else 0

stats[column] = column_stats

return stats
# stats = {}
# for column, values in columns.items():
# column_stats = {}
# column_stats["mean"] = statistics.mean(values)
# try:
# column_stats["mode"] = statistics.mode(values)
# except statistics.StatisticsError:
# column_stats["mode"] = None
# column_stats["min"] = min(values)
# column_stats["max"] = max(values)
# column_stats["std"] = statistics.stdev(values) if len(values) > 1 else 0
#
# stats[column] = column_stats

# return stats


def get_file_sizes(self, input_file, output_file):
Expand Down Expand Up @@ -514,8 +518,6 @@ def track(self, input, result, meta):
json_dict["nan_count"] = nan_count

json_dict["check_types"] = self.check_types(result_data, meta["metadata"])

json_dict["stats"] = self.stats(result)

json_dict["file_sizes"] = self.get_file_sizes(input_data, result_data)

Expand Down

0 comments on commit 9bca63e

Please sign in to comment.