diff --git a/CHANGELOG.md b/CHANGELOG.md index 2776c1a..0fb3666 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ # Changelog +## [0.2.1](https://github.com/Blobfolio/trimothy/releases/tag/v0.2.1) - 2023-10-04 + +### Changed + +* Refactor/extend `NormalizeWhitespace` to work for all `u8`/`char` `Iterator`s + + + ## [0.2.0](https://github.com/Blobfolio/trimothy/releases/tag/v0.2.0) - 2023-10-03 ### New diff --git a/CREDITS.md b/CREDITS.md index 8676f5a..1c8827d 100644 --- a/CREDITS.md +++ b/CREDITS.md @@ -1,6 +1,6 @@ # Project Dependencies Package: trimothy - Version: 0.2.0 - Generated: 2023-10-04 04:34:44 UTC + Version: 0.2.1 + Generated: 2023-10-05 01:56:28 UTC This package has no dependencies. diff --git a/Cargo.toml b/Cargo.toml index fc8e556..8371a94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "trimothy" -version = "0.2.0" +version = "0.2.1" authors = ["Blobfolio, LLC. "] edition = "2021" rust-version = "1.56" diff --git a/src/iter.rs b/src/iter.rs index b0cf936..46ed86b 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -3,29 +3,34 @@ */ use core::{ - iter::Iterator, + iter::{ + Copied, + Iterator, + }, slice::Iter, str::Chars, }; -use crate::TrimSlice; /// # Normalized Whitespace Iterator. /// -/// This trait adds a `normalized_whitespace` method to byte and string slices -/// for iterating over their contents with the edges trimmed, and all -/// contiguous inner whitespace converted to a single horizontal space. -pub trait NormalizeWhitespace { +/// This trait exposes a `normalized_whitespace` method that returns an +/// iterator over a byte or string slice that normalizes the whitespace, both +/// trimming the edges and compacting any inner whitespace spans, converting +/// them to single horizontal spaces (one per span). +/// +/// This can be called on an `&[u8]` or `&str` directly, or any iterator +/// yielding owned `u8` or `char` items. +pub trait NormalizeWhitespace> { /// # Normalized Whitespace Iterator. /// - /// Return an iterator over the byte/char contents with the edges trimmed, - /// and all contiguous inner whitespace converted to a single horizontal - /// space. - fn normalized_whitespace(&self) -> NormalizedWhitespace; + /// Modify a byte or char iterator to trim the ends, and convert all + /// contiguous inner whitespace to a single horizontal space. + fn normalized_whitespace(self) -> NormalizeWhiteSpaceIter; } -impl<'a> NormalizeWhitespace> for &'a [u8] { +impl<'a> NormalizeWhitespace>> for &'a [u8] { /// # Normalized Whitespace Iterator. /// /// Return an iterator over the byte/char contents with the edges trimmed, @@ -41,15 +46,27 @@ impl<'a> NormalizeWhitespace> for &'a [u8] { /// let normal: Vec = abnormal.normalized_whitespace().collect(); /// assert_eq!(normal, b"Hello World!"); /// ``` - fn normalized_whitespace(&self) -> NormalizedWhitespace> { - NormalizedWhitespace { - iter: self.trim().iter(), - ws: false, - } + /// + /// This'll work on `u8` iterators too. For example, if you wanted to + /// remove `b'-'` before normalization, you could do something like: + /// + /// ``` + /// use trimothy::NormalizeWhitespace; + /// + /// let abnormal: &[u8] = b" Hello - World!\n"; + /// let normal: Vec = abnormal.iter() + /// .filter(|b| b'-'.ne(b)) + /// .copied() + /// .normalized_whitespace() + /// .collect(); + /// assert_eq!(normal, b"Hello World!"); + /// ``` + fn normalized_whitespace(self) -> NormalizeWhiteSpaceIter>> { + self.iter().copied().normalized_whitespace() } } -impl<'a> NormalizeWhitespace> for &'a str { +impl<'a> NormalizeWhitespace> for &'a str { /// # Normalized Whitespace Iterator. /// /// Return an iterator over the byte/char contents with the edges trimmed, @@ -65,11 +82,22 @@ impl<'a> NormalizeWhitespace> for &'a str { /// let normal: String = abnormal.normalized_whitespace().collect(); /// assert_eq!(normal, "Hello World!"); /// ``` - fn normalized_whitespace(&self) -> NormalizedWhitespace> { - NormalizedWhitespace { - iter: self.trim().chars(), - ws: false, - } + /// + /// This'll work on `char` iterators too. For example, if you wanted to + /// reverse and normalize a string, you could do something like: + /// + /// ``` + /// use trimothy::NormalizeWhitespace; + /// + /// let abnormal: &str = " Hello World!\n"; + /// let normal: String = abnormal.chars() + /// .rev() + /// .normalized_whitespace() + /// .collect(); + /// assert_eq!(normal, "!dlroW olleH"); + /// ``` + fn normalized_whitespace(self) -> NormalizeWhiteSpaceIter> { + self.chars().normalized_whitespace() } } @@ -80,57 +108,57 @@ impl<'a> NormalizeWhitespace> for &'a str { /// /// This is the actual iterator returned by a /// `NormalizeWhitespace::normalized_whitespace` implementation. -pub struct NormalizedWhitespace { - iter: T, - ws: bool +pub struct NormalizeWhiteSpaceIter> { + iter: I, + next: Option, } -impl<'a> Iterator for NormalizedWhitespace> { - type Item = u8; - - fn next(&mut self) -> Option { - loop { - let next = self.iter.next()?; - if next.is_ascii_whitespace() { - if ! self.ws { - self.ws = true; - return Some(b' '); +/// # Helper: Implementations +/// +/// Implement our custom `NormalizeWhitespace` trait for existing iterators, +/// and implement `Iterator` for the corresponding `NormalizeWhiteSpaceIter` +/// struct. +macro_rules! iter { + ($ty:ty, $is:ident, $ws:literal) => ( + impl> NormalizeWhitespace<$ty, I> for I { + fn normalized_whitespace(mut self) -> NormalizeWhiteSpaceIter<$ty, I> { + // Return the iterator, starting with the first non-whitespace + // character. + let next = self.by_ref().find(|n| ! n.$is()); + NormalizeWhiteSpaceIter { + iter: self, + next, } } - else { - self.ws = false; - return Some(*next); - } } - } - fn size_hint(&self) -> (usize, Option) { - let upper = self.iter.len(); - (0, Some(upper)) - } -} + impl> Iterator for NormalizeWhiteSpaceIter<$ty, I> { + type Item = $ty; -impl<'a> Iterator for NormalizedWhitespace> { - type Item = char; + fn next(&mut self) -> Option { + // Anything in the buffer? + if let Some(next) = self.next.take() { return Some(next); } - fn next(&mut self) -> Option { - loop { - let next = self.iter.next()?; - if next.is_whitespace() { - if ! self.ws { - self.ws = true; - return Some(' '); + // Pull the next thing. + let next = self.iter.next()?; + if next.$is() { + // If there's something other than whitespace later on, return a + // single horizontal space. Otherwise we're done. + self.next = self.by_ref().find(|n| ! n.$is()); + if self.next.is_some() { Some($ws) } + else { None } } + // Passthrough any non-whitespace bits. + else { Some(next) } } - else { - self.ws = false; - return Some(next); + + fn size_hint(&self) -> (usize, Option) { + let (_, upper) = self.iter.size_hint(); + (0, upper) } } - } - - fn size_hint(&self) -> (usize, Option) { - let (_, upper) = self.iter.size_hint(); - (0, upper) - } + ); } + +iter!(char, is_whitespace, ' '); +iter!(u8, is_ascii_whitespace, b' ');