release: 0.2.1

Blobfolio · Oct 5, 2023 · 1f6bf20 · 1f6bf20
2 parents 9abd882 + 1959a33
commit 1f6bf20
Show file tree

Hide file tree

Showing 4 changed files with 102 additions and 66 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,14 @@
 # Changelog
 
 
+## [0.2.1](https://github.com/Blobfolio/trimothy/releases/tag/v0.2.1) - 2023-10-04
+
+### Changed
+
+* Refactor/extend `NormalizeWhitespace` to work for all `u8`/`char` `Iterator`s
+
+
+
 ## [0.2.0](https://github.com/Blobfolio/trimothy/releases/tag/v0.2.0) - 2023-10-03
 
 ### New

diff --git a/CREDITS.md b/CREDITS.md
@@ -1,6 +1,6 @@
 # Project Dependencies
  Package: trimothy
- Version: 0.2.0
- Generated: 2023-10-04 04:34:44 UTC
+ Version: 0.2.1
+ Generated: 2023-10-05 01:56:28 UTC
 
 This package has no dependencies.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "trimothy"
-version = "0.2.0"
+version = "0.2.1"
 authors = ["Blobfolio, LLC. <[email protected]>"]
 edition = "2021"
 rust-version = "1.56"

diff --git a/src/iter.rs b/src/iter.rs
@@ -3,29 +3,34 @@
 */
 
 use core::{
- iter::Iterator,
+ iter::{
+ Copied,
+ Iterator,
+ },
  slice::Iter,
  str::Chars,
 };
-use crate::TrimSlice;
 
 
 
 /// # Normalized Whitespace Iterator.
 ///
-/// This trait adds a `normalized_whitespace` method to byte and string slices
-/// for iterating over their contents with the edges trimmed, and all
-/// contiguous inner whitespace converted to a single horizontal space.
-pub trait NormalizeWhitespace<T> {
+/// This trait exposes a `normalized_whitespace` method that returns an
+/// iterator over a byte or string slice that normalizes the whitespace, both
+/// trimming the edges and compacting any inner whitespace spans, converting
+/// them to single horizontal spaces (one per span).
+///
+/// This can be called on an `&[u8]` or `&str` directly, or any iterator
+/// yielding owned `u8` or `char` items.
+pub trait NormalizeWhitespace<T: Copy + Sized, I: Iterator<Item=T>> {
  /// # Normalized Whitespace Iterator.
  ///
- /// Return an iterator over the byte/char contents with the edges trimmed,
- /// and all contiguous inner whitespace converted to a single horizontal
- /// space.
- fn normalized_whitespace(&self) -> NormalizedWhitespace<T>;
+ /// Modify a byte or char iterator to trim the ends, and convert all
+ /// contiguous inner whitespace to a single horizontal space.
+ fn normalized_whitespace(self) -> NormalizeWhiteSpaceIter<T, I>;
 }
 
-impl<'a> NormalizeWhitespace<Iter<'a, u8>> for &'a [u8] {
+impl<'a> NormalizeWhitespace<u8, Copied<Iter<'a, u8>>> for &'a [u8] {
  /// # Normalized Whitespace Iterator.
  ///
  /// Return an iterator over the byte/char contents with the edges trimmed,
@@ -41,15 +46,27 @@ impl<'a> NormalizeWhitespace<Iter<'a, u8>> for &'a [u8] {
  /// let normal: Vec<u8> = abnormal.normalized_whitespace().collect();
  /// assert_eq!(normal, b"Hello World!");
  /// ```
- fn normalized_whitespace(&self) -> NormalizedWhitespace<Iter<'a, u8>> {
- NormalizedWhitespace {
- iter: self.trim().iter(),
- ws: false,
- }
+ ///
+ /// This'll work on `u8` iterators too. For example, if you wanted to
+ /// remove `b'-'` before normalization, you could do something like:
+ ///
+ /// ```
+ /// use trimothy::NormalizeWhitespace;
+ ///
+ /// let abnormal: &[u8] = b" Hello - World!\n";
+ /// let normal: Vec<u8> = abnormal.iter()
+ /// .filter(|b| b'-'.ne(b))
+ /// .copied()
+ /// .normalized_whitespace()
+ /// .collect();
+ /// assert_eq!(normal, b"Hello World!");
+ /// ```
+ fn normalized_whitespace(self) -> NormalizeWhiteSpaceIter<u8, Copied<Iter<'a, u8>>> {
+ self.iter().copied().normalized_whitespace()
  }
 }
 
-impl<'a> NormalizeWhitespace<Chars<'a>> for &'a str {
+impl<'a> NormalizeWhitespace<char, Chars<'a>> for &'a str {
  /// # Normalized Whitespace Iterator.
  ///
  /// Return an iterator over the byte/char contents with the edges trimmed,
@@ -65,11 +82,22 @@ impl<'a> NormalizeWhitespace<Chars<'a>> for &'a str {
  /// let normal: String = abnormal.normalized_whitespace().collect();
  /// assert_eq!(normal, "Hello World!");
  /// ```
- fn normalized_whitespace(&self) -> NormalizedWhitespace<Chars<'a>> {
- NormalizedWhitespace {
- iter: self.trim().chars(),
- ws: false,
- }
+ ///
+ /// This'll work on `char` iterators too. For example, if you wanted to
+ /// reverse and normalize a string, you could do something like:
+ ///
+ /// ```
+ /// use trimothy::NormalizeWhitespace;
+ ///
+ /// let abnormal: &str = " Hello World!\n";
+ /// let normal: String = abnormal.chars()
+ /// .rev()
+ /// .normalized_whitespace()
+ /// .collect();
+ /// assert_eq!(normal, "!dlroW olleH");
+ /// ```
+ fn normalized_whitespace(self) -> NormalizeWhiteSpaceIter<char, Chars<'a>> {
+ self.chars().normalized_whitespace()
  }
 }
 
@@ -80,57 +108,57 @@ impl<'a> NormalizeWhitespace<Chars<'a>> for &'a str {
 ///
 /// This is the actual iterator returned by a
 /// `NormalizeWhitespace::normalized_whitespace` implementation.
-pub struct NormalizedWhitespace<T> {
- iter: T,
- ws: bool
+pub struct NormalizeWhiteSpaceIter<T: Copy + Sized, I: Iterator<Item=T>> {
+ iter: I,
+ next: Option<T>,
 }
 
-impl<'a> Iterator for NormalizedWhitespace<Iter<'a, u8>> {
- type Item = u8;
-
- fn next(&mut self) -> Option<Self::Item> {
- loop {
- let next = self.iter.next()?;
- if next.is_ascii_whitespace() {
- if ! self.ws {
- self.ws = true;
- return Some(b' ');
+/// # Helper: Implementations
+///
+/// Implement our custom `NormalizeWhitespace` trait for existing iterators,
+/// and implement `Iterator` for the corresponding `NormalizeWhiteSpaceIter`
+/// struct.
+macro_rules! iter {
+ ($ty:ty, $is:ident, $ws:literal) => (
+ impl<I: Iterator<Item=$ty>> NormalizeWhitespace<$ty, I> for I {
+ fn normalized_whitespace(mut self) -> NormalizeWhiteSpaceIter<$ty, I> {
+ // Return the iterator, starting with the first non-whitespace
+ // character.
+ let next = self.by_ref().find(|n| ! n.$is());
+ NormalizeWhiteSpaceIter {
+ iter: self,
+ next,
  }
  }
- else {
- self.ws = false;
- return Some(*next);
- }
  }
- }
 
- fn size_hint(&self) -> (usize, Option<usize>) {
- let upper = self.iter.len();
- (0, Some(upper))
- }
-}
+ impl<I: Iterator<Item=$ty>> Iterator for NormalizeWhiteSpaceIter<$ty, I> {
+ type Item = $ty;
 
-impl<'a> Iterator for NormalizedWhitespace<Chars<'a>> {
- type Item = char;
+ fn next(&mut self) -> Option<Self::Item> {
+ // Anything in the buffer?
+ if let Some(next) = self.next.take() { return Some(next); }
 
- fn next(&mut self) -> Option<Self::Item> {
- loop {
- let next = self.iter.next()?;
- if next.is_whitespace() {
- if ! self.ws {
- self.ws = true;
- return Some(' ');
+ // Pull the next thing.
+ let next = self.iter.next()?;
+ if next.$is() {
+ // If there's something other than whitespace later on, return a
+ // single horizontal space. Otherwise we're done.
+ self.next = self.by_ref().find(|n| ! n.$is());
+ if self.next.is_some() { Some($ws) }
+ else { None }
  }
+ // Passthrough any non-whitespace bits.
+ else { Some(next) }
  }
- else {
- self.ws = false;
- return Some(next);
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let (_, upper) = self.iter.size_hint();
+ (0, upper)
  }
  }
- }
-
- fn size_hint(&self) -> (usize, Option<usize>) {
- let (_, upper) = self.iter.size_hint();
- (0, upper)
- }
+ );
 }
+
+iter!(char, is_whitespace, ' ');
+iter!(u8, is_ascii_whitespace, b' ');