diff --git a/CHANGELOG.md b/CHANGELOG.md index 79d78c6..2776c1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ # Changelog +## [0.2.0](https://github.com/Blobfolio/trimothy/releases/tag/v0.2.0) - 2023-10-03 + +### New + +* `NormalizeWhitespace` trait + + + ## [0.1.8](https://github.com/Blobfolio/trimothy/releases/tag/v0.1.8) - 2023-06-01 ### Changed diff --git a/CREDITS.md b/CREDITS.md index 321498b..8676f5a 100644 --- a/CREDITS.md +++ b/CREDITS.md @@ -1,6 +1,6 @@ # Project Dependencies Package: trimothy - Version: 0.1.7 - Generated: 2023-06-01 20:20:18 UTC + Version: 0.2.0 + Generated: 2023-10-04 04:34:44 UTC This package has no dependencies. diff --git a/Cargo.toml b/Cargo.toml index e22c5fa..fc8e556 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "trimothy" -version = "0.1.8" +version = "0.2.0" authors = ["Blobfolio, LLC. "] edition = "2021" rust-version = "1.56" diff --git a/README.md b/README.md index a89d129..9d80d40 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,15 @@ This trait brings _mutable_ match-based trimming `String`, `Vec`, and `Box<[ | `trim_end_matches_mut` | Trim arbitrary trailing bytes via callback (mutably). | +### [`NormalizeWhitespace`] + +This trait exposes an iterator over byte/string slice contents with the edges trimmed, and all contiguous inner whitespace converted to a single horizontal space. + +| Method | Description | +| ------ | ----------- | +| `normalized_whitespace` | Return said iterator. | + + ## Installation @@ -67,7 +76,7 @@ The dependency can be added the normal way: ```toml [dependencies] -trimothy = "0.1" +trimothy = "0.2" ``` diff --git a/src/iter.rs b/src/iter.rs new file mode 100644 index 0000000..b0cf936 --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,136 @@ +/*! +# Trimothy - Normalized Whitespace Iterator +*/ + +use core::{ + iter::Iterator, + slice::Iter, + str::Chars, +}; +use crate::TrimSlice; + + + +/// # Normalized Whitespace Iterator. +/// +/// This trait adds a `normalized_whitespace` method to byte and string slices +/// for iterating over their contents with the edges trimmed, and all +/// contiguous inner whitespace converted to a single horizontal space. +pub trait NormalizeWhitespace { + /// # Normalized Whitespace Iterator. + /// + /// Return an iterator over the byte/char contents with the edges trimmed, + /// and all contiguous inner whitespace converted to a single horizontal + /// space. + fn normalized_whitespace(&self) -> NormalizedWhitespace; +} + +impl<'a> NormalizeWhitespace> for &'a [u8] { + /// # Normalized Whitespace Iterator. + /// + /// Return an iterator over the byte/char contents with the edges trimmed, + /// and all contiguous inner whitespace converted to a single horizontal + /// space. + /// + /// ## Examples + /// + /// ``` + /// use trimothy::NormalizeWhitespace; + /// + /// let abnormal: &[u8] = b" Hello World!\n"; + /// let normal: Vec = abnormal.normalized_whitespace().collect(); + /// assert_eq!(normal, b"Hello World!"); + /// ``` + fn normalized_whitespace(&self) -> NormalizedWhitespace> { + NormalizedWhitespace { + iter: self.trim().iter(), + ws: false, + } + } +} + +impl<'a> NormalizeWhitespace> for &'a str { + /// # Normalized Whitespace Iterator. + /// + /// Return an iterator over the byte/char contents with the edges trimmed, + /// and all contiguous inner whitespace converted to a single horizontal + /// space. + /// + /// ## Examples + /// + /// ``` + /// use trimothy::NormalizeWhitespace; + /// + /// let abnormal: &str = " Hello World!\n"; + /// let normal: String = abnormal.normalized_whitespace().collect(); + /// assert_eq!(normal, "Hello World!"); + /// ``` + fn normalized_whitespace(&self) -> NormalizedWhitespace> { + NormalizedWhitespace { + iter: self.trim().chars(), + ws: false, + } + } +} + + + +#[derive(Debug)] +/// # (Actual) Normalized Whitespace Iterator. +/// +/// This is the actual iterator returned by a +/// `NormalizeWhitespace::normalized_whitespace` implementation. +pub struct NormalizedWhitespace { + iter: T, + ws: bool +} + +impl<'a> Iterator for NormalizedWhitespace> { + type Item = u8; + + fn next(&mut self) -> Option { + loop { + let next = self.iter.next()?; + if next.is_ascii_whitespace() { + if ! self.ws { + self.ws = true; + return Some(b' '); + } + } + else { + self.ws = false; + return Some(*next); + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let upper = self.iter.len(); + (0, Some(upper)) + } +} + +impl<'a> Iterator for NormalizedWhitespace> { + type Item = char; + + fn next(&mut self) -> Option { + loop { + let next = self.iter.next()?; + if next.is_whitespace() { + if ! self.ws { + self.ws = true; + return Some(' '); + } + } + else { + self.ws = false; + return Some(next); + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let (_, upper) = self.iter.size_hint(); + (0, upper) + } +} diff --git a/src/lib.rs b/src/lib.rs index 4fa4bcb..13a6e50 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,14 +62,13 @@ This trait brings _mutable_ match-based trimming `String`, `Vec`, and `Box<[ -## Installation +### [`NormalizeWhitespace`] -The dependency can be added the normal way: +This trait exposes an iterator over byte/string slice contents with the edges trimmed, and all contiguous inner whitespace converted to a single horizontal space. -```ignore,toml -[dependencies] -trimothy = "0.1" -``` +| Method | Description | +| ------ | ----------- | +| `normalized_whitespace` | Return said iterator. | */ #![forbid(unsafe_code)] @@ -101,9 +100,11 @@ trimothy = "0.1" extern crate alloc; +mod iter; mod trim_mut; mod trim_slice; +pub use iter::NormalizeWhitespace; pub use trim_mut::{ TrimMut, TrimMatchesMut, diff --git a/src/trim_mut.rs b/src/trim_mut.rs index 958a61d..974425c 100644 --- a/src/trim_mut.rs +++ b/src/trim_mut.rs @@ -500,3 +500,45 @@ impl TrimMatchesMut for Vec { else { self.truncate(0); } } } + + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn trim_str() { + use alloc::borrow::ToOwned; + + for v in [ + "ĤéĹlo the WŎrld\u{0300}", + " ĤéĹlo the WŎrld\u{0300}", + " \tĤéĹlo the WŎrld\u{0300}", + "\r \nĤéĹlo\nthe WŎrld\u{0300}", + " ĤéĹlo the WŎrld\u{0300}\u{2003} ", + " \tĤéĹlo the WŎrld\u{0300} ", + "\r \nĤéĹlo\nthe WŎrld\u{0300} \t\t", + "ĤéĹlo the WŎrld\u{0300}\0 ", + "ĤéĹlo the WŎrld\u{0300}\r\r", + "ĤéĹlo the WŎrld\u{0300} \r\t", + "\nHello\nWorld\n!\n", + ] { + let mut v2 = v.to_owned(); + v2.trim_start_mut(); + assert_eq!(v2, v.trim_start()); + + v2 = v.to_owned(); + v2.trim_end_mut(); + assert_eq!(v2, v.trim_end()); + + v2 = v.to_owned(); + v2.trim_mut(); + assert_eq!(v2, v.trim()); + + v2 = v.to_owned(); + v2.trim_matches_mut(|c| c == '\t'); + assert_eq!(v2, v.trim_matches(|c| c == '\t')); + } + } +}