Skip to content

Commit

Permalink
Merge pull request #2308 from midichef/aggr_misc
Browse files Browse the repository at this point in the history
[aggr-] fix handling of various special cases
  • Loading branch information
anjakefala committed Mar 11, 2024
2 parents 1dd009a + 8a61e09 commit 1987007
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 21 deletions.
4 changes: 4 additions & 0 deletions tests/golden/pr2308.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
{"Total": "Total", "count": 2, "A_keymax": [2.0]},
{"Total": "Selected", "count": 0}
]
5 changes: 5 additions & 0 deletions tests/pr2308.vdj
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!vd -p
{"sheet": null, "col": null, "row": null, "longname": "open-file", "input": "tests/small.json", "keystrokes": "o", "comment": null}
{"sheet": "small", "col": "id", "row": "", "longname": "key-col", "input": "", "keystrokes": "!", "comment": "toggle current column as a key column"}
{"sheet": "small", "col": "A", "row": "", "longname": "aggregate-col", "input": "keymax", "keystrokes": "+", "comment": "Add aggregator to current column"}
{"sheet": "small", "col": "", "row": "", "longname": "freq-summary", "input": "", "keystrokes": "zShift+F", "comment": "open one-line summary for all rows and selected rows"}
4 changes: 4 additions & 0 deletions tests/small.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
{"id": 1.0, "A": 1},
{"id": 2.0, "A": 1}
]
63 changes: 47 additions & 16 deletions visidata/aggregators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import statistics

from visidata import Progress, Sheet, Column, ColumnsSheet, VisiData
from visidata import vd, anytype, vlen, asyncthread, wrapply, AttrDict
from visidata import vd, anytype, vlen, asyncthread, wrapply, AttrDict, date

vd.help_aggregators = '''# Choose Aggregators
Start typing an aggregator name or description.
Expand Down Expand Up @@ -51,7 +51,11 @@ def getValues(self, rows):

def aggregators_get(col):
'A space-separated names of aggregators on this column.'
return list(vd.aggregators[k] for k in (col.aggstr or '').split())
aggs = []
for k in (col.aggstr or '').split():
agg = vd.aggregators[k]
aggs += agg if isinstance(agg, list) else [agg]
return aggs

def aggregators_set(col, aggs):
if isinstance(aggs, str):
Expand Down Expand Up @@ -94,7 +98,7 @@ def _funcRows(col, rows): # wrap builtins so they can have a .type
except Exception as e:
if len(vals) == 0:
return None
return e
raise e

vd.aggregators[name] = _defaggr(name, type, _funcRows, funcValues=funcValues, helpstr=helpstr) # accepts a srccol + list of rows

Expand Down Expand Up @@ -135,7 +139,9 @@ def _percentile(N, percent, key=lambda x:x):

@functools.lru_cache(100)
def percentile(pct, helpstr=''):
return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr=helpstr)
return _defaggr('p%s'%pct, None,
lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100,
key=(lambda d: d.timestamp()) if col.type is date else lambda x:x), helpstr=helpstr)

def quantiles(q, helpstr):
return [percentile(round(100*i/q), helpstr) for i in range(1, q)]
Expand All @@ -149,7 +155,7 @@ def quantiles(q, helpstr):
vd.aggregator('sum', vsum, 'sum of values')
vd.aggregator('distinct', set, 'distinct values', type=vlen)
vd.aggregator('count', lambda values: sum(1 for v in values), 'number of values', type=int)
vd.aggregator('list', list, 'list of values')
vd.aggregator('list', list, 'list of values', type=anytype)
vd.aggregator('stdev', statistics.stdev, 'standard deviation of values', type=float)

vd.aggregators['q3'] = quantiles(3, 'tertiles (33/66th pctile)')
Expand All @@ -162,8 +168,24 @@ def quantiles(q, helpstr):
for pct in (10, 20, 25, 30, 33, 40, 50, 60, 67, 70, 75, 80, 90, 95, 99):
vd.aggregators[f'p{pct}'] = percentile(pct, f'{pct}th percentile')

# returns keys of the row with the max value
vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), helpstr='key of the maximum value')
def keyfunc(aggr_func):
'''Return the key of the row that results from applying *aggr_func* to *rows*.
Return None if *rows* is an empty list.
*aggr_func* takes a list of (value, row) tuples, one for each row in the column,
excluding rows where the column holds null and error values.
*aggr_func* must also take the parameters *default* and *key*, as max() does:
https://docs.python.org/3/library/functions.html#max'''
def key_aggr_func(col, rows):
if not col.sheet.keyCols:
vd.error('key aggregator function requires one or more key columns')
return None
# convert dicts to lists because functions like max() can't compare dicts
sortkey = lambda t: (t[0], sorted(t[1].items())) if isinstance(t[1], dict) else t
row = aggr_func(col.getValueRows(rows), default=(None, None), key=sortkey)[1]
return col.sheet.rowkey(row) if row else None
return key_aggr_func
vd.aggregators['keymax'] = _defaggr('keymax', anytype, keyfunc(max), helpstr='key of the maximum value')
vd.aggregators['keymin'] = _defaggr('keymin', anytype, keyfunc(min), helpstr='key of the minimum value')


ColumnsSheet.columns += [
Expand All @@ -175,7 +197,7 @@ def quantiles(q, helpstr):

@Sheet.api
def addAggregators(sheet, cols, aggrnames):
'Add each aggregator in list of *aggrnames* to each of *cols*.'
'Add each aggregator in list of *aggrnames* to each of *cols*. Ignores names that are not valid.'
for aggrname in aggrnames:
aggrs = vd.aggregators.get(aggrname)
aggrs = aggrs if isinstance(aggrs, list) else [aggrs]
Expand All @@ -194,14 +216,19 @@ def aggname(col, agg):

@Column.api
@asyncthread
def memo_aggregate(col, agg, rows):
def memo_aggregate(col, agg_choices, rows):
'Show aggregated value in status, and add to memory.'
aggval = agg(col, rows)
typedval = wrapply(agg.type or col.type, aggval)
dispval = col.format(typedval)
k = col.name+'_'+agg.name
vd.status(f'{k}={dispval}')
vd.memory[k] = typedval
for agg_choice in agg_choices:
agg = vd.aggregators.get(agg_choice)
if not agg: continue
aggs = agg if isinstance(agg, list) else [agg]
for agg in aggs:
aggval = agg(col, rows)
typedval = wrapply(agg.type or col.type, aggval)
dispval = col.format(typedval)
k = col.name+'_'+agg.name
vd.status(f'{k}={dispval}')
vd.memory[k] = typedval


@VisiData.property
Expand All @@ -215,6 +242,7 @@ def aggregator_choices(vd):

@VisiData.api
def chooseAggregators(vd):
'''Return a list of aggregator name strings chosen or entered by the user. User-entered names may be invalid.'''
prompt = 'choose aggregators: '
def _fmt_aggr_summary(match, row, trigger_key):
formatted_aggrname = match.formatted.get('key', row.key) if match else row.key
Expand All @@ -235,12 +263,15 @@ def _fmt_aggr_summary(match, row, trigger_key):
multiple=True)

aggrs = r.split()
valid_choices = vd.aggregators.keys()
for aggr in aggrs:
vd.usedInputs[aggr] += 1
if aggr not in valid_choices:
vd.warning(f'aggregator does not exist: {aggr}')
return aggrs

Sheet.addCommand('+', 'aggregate-col', 'addAggregators([cursorCol], chooseAggregators())', 'Add aggregator to current column')
Sheet.addCommand('z+', 'memo-aggregate', 'for agg in chooseAggregators(): cursorCol.memo_aggregate(aggregators[agg], selectedRows or rows)', 'memo result of aggregator over values in selected rows for current column')
Sheet.addCommand('z+', 'memo-aggregate', 'cursorCol.memo_aggregate(chooseAggregators(), selectedRows or rows)', 'memo result of aggregator over values in selected rows for current column')
ColumnsSheet.addCommand('g+', 'aggregate-cols', 'addAggregators(selectedRows or source[0].nonKeyVisibleCols, chooseAggregators())', 'add aggregators to selected source columns')

vd.addMenuItems('''
Expand Down
2 changes: 1 addition & 1 deletion visidata/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def getCell(self, row):
notecolor='color_warning')
else:
return DisplayWrapper(typedval.val, text=str(typedval.val),
error='unknown',
error=['unknown'],
note=options.note_type_exc,
notecolor='color_warning')

Expand Down
2 changes: 1 addition & 1 deletion visidata/freqtbl.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def rows(self):
FreqTableSheet.addCommand('', 'open-preview', 'vd.push(FreqTablePreviewSheet(sheet.name, "preview", source=sheet, columns=source.columns), pane=2); vd.options.disp_splitwin_pct=50', 'open split preview of source rows at cursor')

Sheet.addCommand('F', 'freq-col', 'vd.push(makeFreqTable(sheet, cursorCol))', 'open Frequency Table grouped on current column, with aggregations of other columns')
Sheet.addCommand('gF', 'freq-keys', 'vd.push(makeFreqTable(sheet, *keyCols))', 'open Frequency Table grouped by all key columns on source sheet, with aggregations of other columns')
Sheet.addCommand('gF', 'freq-keys', 'vd.push(makeFreqTable(sheet, *keyCols)) if keyCols else vd.fail("there are no key columns to group by")', 'open Frequency Table grouped by all key columns on source sheet, with aggregations of other columns')
Sheet.addCommand('zF', 'freq-summary', 'vd.push(makeFreqTableSheetSummary(sheet, Column("Total", sheet=sheet, getter=lambda col, row: "Total")))', 'open one-line summary for all rows and selected rows')

ColumnsSheet.addCommand(ENTER, 'freq-row', 'vd.push(makeFreqTable(source[0], cursorRow))', 'open a Frequency Table sheet grouped on column referenced in current row')
Expand Down
7 changes: 5 additions & 2 deletions visidata/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,11 @@ def afterLoad(self):
@PivotSheet.api
def addcol_aggr(sheet, col):
hasattr(col, 'origCol') or vd.fail('not an aggregation column')
for agg in vd.chooseAggregators():
sheet.addColumnAtCursor(makeAggrColumn(col.origCol, vd.aggregators[agg]))
for agg_choice in vd.chooseAggregators():
agg_or_list = vd.aggregators[agg_choice]
aggs = agg_or_list if isinstance(agg_or_list, list) else [agg_or_list]
for agg in aggs:
sheet.addColumnAtCursor(makeAggrColumn(col.origCol, vd.aggregators[agg]))


Sheet.addCommand('W', 'pivot', 'vd.push(makePivot(sheet, keyCols, [cursorCol]))', 'open Pivot Table: group rows by key column and summarize current column')
Expand Down
2 changes: 1 addition & 1 deletion visidata/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def isTestableCommand(longname, cmdlist):
'expand-cols-depth': '0',
'save-cmdlog': 'test_commands.vdj',
'aggregate-col': 'mean',
'memo-aggregate': 'mean',
'memo-aggregate': 'count',
'addcol-shell': '',
'theme-input': 'light',
'add-rows': '1',
Expand Down

0 comments on commit 1987007

Please sign in to comment.