Skip to content

Commit

Permalink
[path-] fix undercounted progress for multibyte chars #2323
Browse files Browse the repository at this point in the history
Co-authored-by: @midichef
  • Loading branch information
saulpw authored and anjakefala committed May 18, 2024
1 parent e8db0ae commit 5cefaa9
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 12 deletions.
8 changes: 6 additions & 2 deletions visidata/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from functools import wraps, lru_cache

from visidata import vd
from visidata import VisiData, Progress
from visidata import VisiData, Progress, TextProgress

vd.help_encoding = '''Common Encodings:
Expand Down Expand Up @@ -97,12 +97,16 @@ def peek(self, size=-1):
class FileProgress:
'Open file in binary mode and track read() progress.'
def __init__(self, path, fp, mode='r', **kwargs):
'kwargs has all open() kwargs'
self.path = path
self.fp = fp
self.prog = None
if 'r' in mode:
gerund = 'reading'
self.prog = Progress(gerund=gerund, total=filesize(path))
if 'b' in mode:
self.prog = Progress(gerund=gerund, total=filesize(path))
else:
self.prog = TextProgress(gerund=gerund, total=filesize(path), encoding=kwargs.get('encoding'))
elif 'w' in mode:
gerund = 'writing'
self.prog = Progress(gerund=gerund)
Expand Down
40 changes: 30 additions & 10 deletions visidata/threads.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,14 @@ def _execAsync(*args, **kwargs):
return _decorator


class _Progress:
class Progress:
'''Maintain progress count as either an iterable wrapper, or a context manager.
- *iterable*: wrapped iterable if used as an iterator.
- *gerund*: status text shown while this Progress is active.
- *total*: total count expected.
- *sheet*: specific sheet to associate this progress with. Default is sheet from current thread.
'''
def __init__(self, iterable=None, gerund="", total=None, sheet=None):
self.iterable = iterable
if total is None:
Expand Down Expand Up @@ -70,16 +77,28 @@ def __iter__(self):
yield item
self.made += 1

@VisiData.global_api
def Progress(vd, iterable=None, gerund="", total=None, sheet=None):
'''Maintain progress count as either an iterable wrapper, or a context manager.

- *iterable*: wrapped iterable if used as an iterator.
- *gerund*: status text shown while this Progress is active.
- *total*: total count expected.
- *sheet*: specific sheet to associate this progress with. Default is sheet from current thread.
'''
return _Progress(iterable=iterable, gerund=gerund, total=total, sheet=sheet)
class TextProgress(Progress):
def __init__(self, encoding='utf-8', **kwargs):
super().__init__(**kwargs)
self.est_sample = ''
self.est_charbytes = 1

def addProgress(self, n:int):
if self.made < self.total:
return super().addProgress(n * self.est_charbytes)

def addSample(self, s:str):
# A short string can cause charbytes to be overestimated by 30%,
# due to the Byte Order Marker in encodings like utf-8-sig.
# Combining short strings into one big one lowers that error to < 1%.
if len(self.est_sample) < self.made/1000:
self.est_sample += s[:100]
self.est_charbytes = len(self.est_sample.encode(self.encoding)) / len(self.est_sample)


vd.Progress = Progress
vd.TextProgress = TextProgress


@VisiData.api
Expand Down Expand Up @@ -452,6 +471,7 @@ def codestr(code):
vd.addGlobals({
'ThreadsSheet': ThreadsSheet,
'Progress': Progress,
'TextProgress': TextProgress,
'asynccache': asynccache,
'asyncsingle': asyncsingle,
'asyncignore': asyncignore,
Expand Down

0 comments on commit 5cefaa9

Please sign in to comment.