Add benchmarks.

These benchmarks help us make sure the optimisations are going in the right direction. The benchmark added to time Segment.divide also shows that two proposed changes to 'Segment.divide' using binary search ended up being slower that the current linear search. They're included here because: 1. it _may_ come in handy in the future (unlikely); and 2. I'm sad about throwing this code away. The first implementation replaces the (very clever) linear search with a binary search similar to what we did for `set_cell_size`. My benchmarks showed that this was only 10% faster so I thought maybe I could replace the list slicing with iterator slicing and it would be better, which resulted in my second implementation. They're both equally fast, and the rich benchmarks showed both were actually slower. ```py @classmethod def divide( cls, segments: Iterable["Segment"], cuts: Iterable[int] ) -> Iterable[List["Segment"]]: """Divides an iterable of segments into portions. Args: segments (Iterable[Segment]): The segments to divide. cuts (Iterable[int]): Cell positions where to divide. Yields: Iterable[List[Segment]]: An iterable of Segments in List. """ _cell_len = cached_cell_len segments = list(segments) cuts = list(cuts) widths = [0 if s.control else _cell_len(s.text) for s in segments] lengths = list(accumulate(widths)) offset = 0 for cut in cuts: if cut == offset: yield [] continue segment_idx = bisect.bisect_left(lengths, cut) if segment_idx >= len(lengths): yield segments return if lengths[segment_idx] == cut: yield segments[: segment_idx + 1] segments = segments[segment_idx + 1 :] lengths = lengths[segment_idx + 1 :] else: start_width = lengths[segment_idx - 1] if segment_idx > 0 else offset before, after = segments[segment_idx].split_cells(cut - start_width) yield segments[:segment_idx] + [before] segments = segments[segment_idx:] segments[0] = after lengths = lengths[segment_idx:] offset = cut @classmethod def divide( cls, segments: Iterable["Segment"], cuts: Iterable[int] ) -> Iterable[List["Segment"]]: """Divides an iterable of segments into portions. Args: segments (Iterable[Segment]): The segments to divide. cuts (Iterable[int]): Cell positions where to divide. Yields: Iterable[List[Segment]]: An iterable of Segments in List. """ _cell_len = cached_cell_len segments = list(segments) cuts = list(cuts) widths = [0 if s.control else _cell_len(s.text) for s in segments] lengths = list(accumulate(widths)) segments_iter = iter(segments) idx_offset = 0 offset = 0 for cut in cuts: if cut == offset: yield [] continue length_idx = bisect.bisect_left(lengths, cut) if length_idx >= len(lengths): yield list(segments_iter) return if lengths[length_idx] == cut: segments = list(islice(segments_iter, length_idx - idx_offset + 1)) yield segments idx_offset += len(segments) else: start_width = lengths[length_idx - 1] if length_idx > idx_offset else offset segments = list(islice(segments_iter, length_idx - idx_offset + 1)) before, after = segments[-1].split_cells(cut - start_width) segments_iter = chain([after], segments_iter) segments[-1] = before yield segments idx_offset += len(segments) - 1 offset = cut ```
Textualize · Mar 7, 2024 · 7912306 · 7912306
1 parent 67fb892
commit 7912306
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 3 deletions.
diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py
@@ -1,6 +1,8 @@
 from io import StringIO
+import sys
 
 from benchmarks import snippets
+from rich.cells import cell_len, set_cell_size
 from rich.color import Color, ColorSystem
 from rich.console import Console
 from rich.pretty import Pretty
@@ -50,7 +52,7 @@ def time_split_unicode_heavy(self):
  Text(snippets.UNICODE_HEAVY_TEXT).split()
 
  def time_divide_unicode_heavy(self):
- self.text.divide(range(20, 100, 4))
+ Text(snippets.UNICODE_HEAVY_TEXT).divide(range(20, 100, 4))
 
  def time_align_center_unicode_heavy(self):
  Text(snippets.UNICODE_HEAVY_TEXT).align(
@@ -214,5 +216,22 @@ def setup(self):
  Segment("There is a way"),
  ] * 2
 
- def test_divide_complex(self):
+ self.unicode_segments = [
+ Segment(line) for line in snippets.UNICODE_HEAVY_TEXT.splitlines()
+ ]
+
+ def time_divide_complex(self):
  list(Segment.divide(self.line, [5, 10, 20, 50, 108, 110, 118]))
+
+ def time_divide_unicode_heavy(self):
+ list(Segment.divide(self.unicode_segments, range(0, 2400, 5)))
+
+
+class CellSuite:
+ def time_codepoint_widths(self):
+ for cp in range(sys.maxunicode + 1):
+ cell_len(chr(cp))
+
+ def time_set_cell_size(self):
+ for total in range(len(snippets.UNICODE_HEAVY_TEXT) // 2 + 2):
+ set_cell_size(snippets.UNICODE_HEAVY_TEXT, total)
diff --git a/rich/segment.py b/rich/segment.py
@@ -594,7 +594,7 @@ def divide(
  cuts (Iterable[int]): Cell positions where to divide.
 
  Yields:
- [Iterable[List[Segment]]]: An iterable of Segments in List.
+ Iterable[List[Segment]]: An iterable of Segments in List.
  """
  split_segments: List["Segment"] = []
  add_segment = split_segments.append