diff --git a/CHANGELOG.md b/CHANGELOG.md index 205830f074f..038d7e488f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ - New `DateTime::local_unix_epoch()` convenience constructor (https://github.com/unicode-org/icu4x/pull/4479) - `icu_datetime` - `FormattedDateTime` and `FormattedZonedDateTime` now implement `Clone` and `Copy` (https://github.com/unicode-org/icu4x/pull/4476) + - `icu_locid` + - Added `total_cmp` functions to `Locale` and other types to make them easier to use in `BTreeSet` (https://github.com/unicode-org/icu4x/pull/4608) - `icu_properties` - Add `Aran` script code (https://github.com/unicode-org/icu4x/pull/4426) - Mark additional constructors as `const` (https://github.com/unicode-org/icu4x/pull/4584, https://github.com/unicode-org/icu4x/pull/4574) diff --git a/components/locid/src/extensions/mod.rs b/components/locid/src/extensions/mod.rs index a37bf8b9fcd..dfe3ccd476a 100644 --- a/components/locid/src/extensions/mod.rs +++ b/components/locid/src/extensions/mod.rs @@ -49,6 +49,8 @@ pub mod private; pub mod transform; pub mod unicode; +use core::cmp::Ordering; + use other::Other; use private::Private; use transform::Transform; @@ -58,6 +60,7 @@ use alloc::vec::Vec; use crate::parser::ParserError; use crate::parser::SubtagIterator; +use crate::subtags; /// Defines the type of extension. #[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)] @@ -164,6 +167,41 @@ impl Extensions { && self.other.is_empty() } + #[allow(clippy::type_complexity)] + pub(crate) fn as_tuple( + &self, + ) -> ( + (&unicode::Attributes, &unicode::Keywords), + ( + Option<( + subtags::Language, + Option, + Option, + &subtags::Variants, + )>, + &transform::Fields, + ), + &private::Private, + &[other::Other], + ) { + ( + self.unicode.as_tuple(), + self.transform.as_tuple(), + &self.private, + &self.other, + ) + } + + /// Returns an ordering suitable for use in [`BTreeSet`]. + /// + /// The ordering may or may not be equivalent to string ordering, and it + /// may or may not be stable across ICU4X releases. + /// + /// [`BTreeSet`]: alloc::collections::BTreeSet + pub fn total_cmp(&self, other: &Self) -> Ordering { + self.as_tuple().cmp(&other.as_tuple()) + } + /// Retains the specified extension types, clearing all others. /// /// # Examples diff --git a/components/locid/src/extensions/transform/mod.rs b/components/locid/src/extensions/transform/mod.rs index b4ae471f30f..0e0212eb2e6 100644 --- a/components/locid/src/extensions/transform/mod.rs +++ b/components/locid/src/extensions/transform/mod.rs @@ -34,6 +34,8 @@ mod fields; mod key; mod value; +use core::cmp::Ordering; + pub use fields::Fields; #[doc(inline)] pub use key::{key, Key}; @@ -42,7 +44,7 @@ pub use value::Value; use crate::parser::SubtagIterator; use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode}; use crate::shortvec::ShortBoxSlice; -use crate::subtags::Language; +use crate::subtags::{self, Language}; use crate::LanguageIdentifier; use litemap::LiteMap; @@ -131,6 +133,31 @@ impl Transform { self.fields.clear(); } + #[allow(clippy::type_complexity)] + pub(crate) fn as_tuple( + &self, + ) -> ( + Option<( + subtags::Language, + Option, + Option, + &subtags::Variants, + )>, + &Fields, + ) { + (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields) + } + + /// Returns an ordering suitable for use in [`BTreeSet`]. + /// + /// The ordering may or may not be equivalent to string ordering, and it + /// may or may not be stable across ICU4X releases. + /// + /// [`BTreeSet`]: alloc::collections::BTreeSet + pub fn total_cmp(&self, other: &Self) -> Ordering { + self.as_tuple().cmp(&other.as_tuple()) + } + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result { let mut tlang = None; let mut tfields = LiteMap::new(); diff --git a/components/locid/src/extensions/unicode/mod.rs b/components/locid/src/extensions/unicode/mod.rs index 36573ee926a..3d251a65676 100644 --- a/components/locid/src/extensions/unicode/mod.rs +++ b/components/locid/src/extensions/unicode/mod.rs @@ -32,6 +32,8 @@ mod key; mod keywords; mod value; +use core::cmp::Ordering; + #[doc(inline)] pub use attribute::{attribute, Attribute}; pub use attributes::Attributes; @@ -134,6 +136,20 @@ impl Unicode { self.attributes.clear(); } + pub(crate) fn as_tuple(&self) -> (&Attributes, &Keywords) { + (&self.attributes, &self.keywords) + } + + /// Returns an ordering suitable for use in [`BTreeSet`]. + /// + /// The ordering may or may not be equivalent to string ordering, and it + /// may or may not be stable across ICU4X releases. + /// + /// [`BTreeSet`]: alloc::collections::BTreeSet + pub fn total_cmp(&self, other: &Self) -> Ordering { + self.as_tuple().cmp(&other.as_tuple()) + } + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result { let mut attributes = ShortBoxSlice::new(); diff --git a/components/locid/src/langid.rs b/components/locid/src/langid.rs index a5e9b9cb106..bfb0e6c20d8 100644 --- a/components/locid/src/langid.rs +++ b/components/locid/src/langid.rs @@ -203,6 +203,27 @@ impl LanguageIdentifier { self.write_cmp_bytes(other) } + pub(crate) fn as_tuple( + &self, + ) -> ( + subtags::Language, + Option, + Option, + &subtags::Variants, + ) { + (self.language, self.script, self.region, &self.variants) + } + + /// Compare this [`LanguageIdentifier`] with another [`LanguageIdentifier`] field-by-field. + /// The result is a total ordering sufficient for use in a [`BTreeMap`]. + /// + /// Unlike [`Self::strict_cmp`], this function's ordering may not equal string ordering. + /// + /// [`BTreeMap`]: alloc::collections::BTreeMap + pub fn total_cmp(&self, other: &Self) -> Ordering { + self.as_tuple().cmp(&other.as_tuple()) + } + /// Compare this [`LanguageIdentifier`] with an iterator of BCP-47 subtags. /// /// This function has the same equality semantics as [`LanguageIdentifier::strict_cmp`]. It is intended as diff --git a/components/locid/src/locale.rs b/components/locid/src/locale.rs index 8d8a7c15b64..d3d3e21dba7 100644 --- a/components/locid/src/locale.rs +++ b/components/locid/src/locale.rs @@ -196,6 +196,47 @@ impl Locale { self.write_cmp_bytes(other) } + #[allow(clippy::type_complexity)] + pub(crate) fn as_tuple( + &self, + ) -> ( + ( + subtags::Language, + Option, + Option, + &subtags::Variants, + ), + ( + ( + &extensions::unicode::Attributes, + &extensions::unicode::Keywords, + ), + ( + Option<( + subtags::Language, + Option, + Option, + &subtags::Variants, + )>, + &extensions::transform::Fields, + ), + &extensions::private::Private, + &[extensions::other::Other], + ), + ) { + (self.id.as_tuple(), self.extensions.as_tuple()) + } + + /// Returns an ordering suitable for use in [`BTreeSet`]. + /// + /// The ordering may or may not be equivalent to string ordering, and it + /// may or may not be stable across ICU4X releases. + /// + /// [`BTreeSet`]: alloc::collections::BTreeSet + pub fn total_cmp(&self, other: &Self) -> Ordering { + self.as_tuple().cmp(&other.as_tuple()) + } + /// Compare this [`Locale`] with an iterator of BCP-47 subtags. /// /// This function has the same equality semantics as [`Locale::strict_cmp`]. It is intended as diff --git a/ffi/capi/bindings/c/ICU4XLocale.h b/ffi/capi/bindings/c/ICU4XLocale.h index dea8d196b5c..42f80075ec2 100644 --- a/ffi/capi/bindings/c/ICU4XLocale.h +++ b/ffi/capi/bindings/c/ICU4XLocale.h @@ -52,6 +52,8 @@ bool ICU4XLocale_normalizing_eq(const ICU4XLocale* self, const char* other_data, ICU4XOrdering ICU4XLocale_strict_cmp(const ICU4XLocale* self, const char* other_data, size_t other_len); +ICU4XOrdering ICU4XLocale_total_cmp(const ICU4XLocale* self, const ICU4XLocale* other); + ICU4XLocale* ICU4XLocale_create_en(); ICU4XLocale* ICU4XLocale_create_bn(); diff --git a/ffi/capi/bindings/cpp/ICU4XLocale.h b/ffi/capi/bindings/cpp/ICU4XLocale.h index dea8d196b5c..42f80075ec2 100644 --- a/ffi/capi/bindings/cpp/ICU4XLocale.h +++ b/ffi/capi/bindings/cpp/ICU4XLocale.h @@ -52,6 +52,8 @@ bool ICU4XLocale_normalizing_eq(const ICU4XLocale* self, const char* other_data, ICU4XOrdering ICU4XLocale_strict_cmp(const ICU4XLocale* self, const char* other_data, size_t other_len); +ICU4XOrdering ICU4XLocale_total_cmp(const ICU4XLocale* self, const ICU4XLocale* other); + ICU4XLocale* ICU4XLocale_create_en(); ICU4XLocale* ICU4XLocale_create_bn(); diff --git a/ffi/capi/bindings/cpp/ICU4XLocale.hpp b/ffi/capi/bindings/cpp/ICU4XLocale.hpp index 67607d9a20d..ac531b2370a 100644 --- a/ffi/capi/bindings/cpp/ICU4XLocale.hpp +++ b/ffi/capi/bindings/cpp/ICU4XLocale.hpp @@ -192,6 +192,11 @@ class ICU4XLocale { */ ICU4XOrdering strict_cmp(const std::string_view other) const; + /** + * See the [Rust documentation for `total_cmp`](https://docs.rs/icu/latest/icu/locid/struct.Locale.html#method.total_cmp) for more information. + */ + ICU4XOrdering total_cmp(const ICU4XLocale& other) const; + /** * Deprecated * @@ -429,6 +434,9 @@ inline bool ICU4XLocale::normalizing_eq(const std::string_view other) const { inline ICU4XOrdering ICU4XLocale::strict_cmp(const std::string_view other) const { return static_cast(capi::ICU4XLocale_strict_cmp(this->inner.get(), other.data(), other.size())); } +inline ICU4XOrdering ICU4XLocale::total_cmp(const ICU4XLocale& other) const { + return static_cast(capi::ICU4XLocale_total_cmp(this->inner.get(), other.AsFFI())); +} inline ICU4XLocale ICU4XLocale::create_en() { return ICU4XLocale(capi::ICU4XLocale_create_en()); } diff --git a/ffi/capi/bindings/dart/Locale.g.dart b/ffi/capi/bindings/dart/Locale.g.dart index 028dab1d131..376fbd844a3 100644 --- a/ffi/capi/bindings/dart/Locale.g.dart +++ b/ffi/capi/bindings/dart/Locale.g.dart @@ -236,6 +236,12 @@ final class Locale implements ffi.Finalizable { temp.releaseAll(); return Ordering.values.firstWhere((v) => v._underlying == result); } + + /// See the [Rust documentation for `total_cmp`](https://docs.rs/icu/latest/icu/locid/struct.Locale.html#method.total_cmp) for more information. + Ordering totalCmp(Locale other) { + final result = _ICU4XLocale_total_cmp(_underlying, other._underlying); + return Ordering.values.firstWhere((v) => v._underlying == result); + } } @ffi.Native)>(isLeaf: true, symbol: 'ICU4XLocale_destroy') @@ -301,3 +307,7 @@ external bool _ICU4XLocale_normalizing_eq(ffi.Pointer self, ffi.Poin @ffi.Native, ffi.Pointer, ffi.Size)>(isLeaf: true, symbol: 'ICU4XLocale_strict_cmp') // ignore: non_constant_identifier_names external int _ICU4XLocale_strict_cmp(ffi.Pointer self, ffi.Pointer otherData, int otherLength); + +@ffi.Native, ffi.Pointer)>(isLeaf: true, symbol: 'ICU4XLocale_total_cmp') +// ignore: non_constant_identifier_names +external int _ICU4XLocale_total_cmp(ffi.Pointer self, ffi.Pointer other); diff --git a/ffi/capi/bindings/js/ICU4XLocale.d.ts b/ffi/capi/bindings/js/ICU4XLocale.d.ts index b18b577302d..6729ffa2722 100644 --- a/ffi/capi/bindings/js/ICU4XLocale.d.ts +++ b/ffi/capi/bindings/js/ICU4XLocale.d.ts @@ -141,6 +141,12 @@ export class ICU4XLocale { */ strict_cmp(other: string): ICU4XOrdering; + /** + + * See the {@link https://docs.rs/icu/latest/icu/locid/struct.Locale.html#method.total_cmp Rust documentation for `total_cmp`} for more information. + */ + total_cmp(other: ICU4XLocale): ICU4XOrdering; + /** * Deprecated diff --git a/ffi/capi/bindings/js/ICU4XLocale.mjs b/ffi/capi/bindings/js/ICU4XLocale.mjs index b282b273d7a..425cf4905ba 100644 --- a/ffi/capi/bindings/js/ICU4XLocale.mjs +++ b/ffi/capi/bindings/js/ICU4XLocale.mjs @@ -258,6 +258,10 @@ export class ICU4XLocale { return diplomat_out; } + total_cmp(arg_other) { + return ICU4XOrdering_rust_to_js[wasm.ICU4XLocale_total_cmp(this.underlying, arg_other.underlying)]; + } + static create_en() { return new ICU4XLocale(wasm.ICU4XLocale_create_en(), true, []); } diff --git a/ffi/capi/src/locale.rs b/ffi/capi/src/locale.rs index 49659813ef8..9c08b31de97 100644 --- a/ffi/capi/src/locale.rs +++ b/ffi/capi/src/locale.rs @@ -178,6 +178,11 @@ pub mod ffi { self.0.strict_cmp(other).into() } + #[diplomat::rust_link(icu::locid::Locale::total_cmp, FnInStruct)] + pub fn total_cmp(&self, other: &Self) -> ICU4XOrdering { + self.0.total_cmp(&other.0).into() + } + /// Deprecated /// /// Use `create_from_string("en"). diff --git a/provider/core/src/request.rs b/provider/core/src/request.rs index 1fb17602b8e..d7f4c5e021d 100644 --- a/provider/core/src/request.rs +++ b/provider/core/src/request.rs @@ -370,6 +370,24 @@ impl DataLocale { self == <&DataLocale>::default() } + /// Returns an ordering suitable for use in [`BTreeSet`]. + /// + /// The ordering may or may not be equivalent to string ordering, and it + /// may or may not be stable across ICU4X releases. + /// + /// [`BTreeSet`]: alloc::collections::BTreeSet + pub fn total_cmp(&self, other: &Self) -> Ordering { + self.langid + .total_cmp(&other.langid) + .then_with(|| self.keywords.cmp(&other.keywords)) + .then_with(|| { + #[cfg(feature = "experimental")] + return self.aux.cmp(&other.aux); + #[cfg(not(feature = "experimental"))] + return Ordering::Equal; + }) + } + /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion. /// /// This ignores auxiliary keys. @@ -752,7 +770,7 @@ impl DataLocale { /// ``` /// /// [`Keywords`]: unicode_ext::Keywords -#[derive(Debug, PartialEq, Clone, Eq, Hash)] +#[derive(Debug, PartialEq, Clone, Eq, Hash, PartialOrd, Ord)] #[cfg(feature = "experimental")] pub struct AuxiliaryKeys { value: AuxiliaryKeysInner, @@ -790,6 +808,20 @@ impl PartialEq for AuxiliaryKeysInner { #[cfg(feature = "experimental")] impl Eq for AuxiliaryKeysInner {} +#[cfg(feature = "experimental")] +impl PartialOrd for AuxiliaryKeysInner { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +#[cfg(feature = "experimental")] +impl Ord for AuxiliaryKeysInner { + fn cmp(&self, other: &Self) -> Ordering { + self.deref().cmp(other.deref()) + } +} + #[cfg(feature = "experimental")] impl Debug for AuxiliaryKeysInner { #[inline]