From 5aef0c488fb93c3a5384ed17129544ce998e7c8a Mon Sep 17 00:00:00 2001 From: Simon Ochsenreither Date: Tue, 8 Aug 2023 01:57:38 +0200 Subject: [PATCH] stdlib: better `Hash` for `Float64`/`Float32` fixes #90 --- dora/stdlib/primitives.dora | 12 ++++++++++-- tests/float/float32-hash.dora | 12 ++++++++++++ tests/float/float64-hash.dora | 12 ++++++++++++ tests/num-hash.dora | 3 --- tests/stdlib/hashmap-contains-zero.dora | 8 ++++---- tests/stdlib/hashset-contains-zero.dora | 4 ++-- 6 files changed, 40 insertions(+), 11 deletions(-) create mode 100644 tests/float/float32-hash.dora create mode 100644 tests/float/float64-hash.dora diff --git a/dora/stdlib/primitives.dora b/dora/stdlib/primitives.dora index 5cfb600d..07ffb822 100644 --- a/dora/stdlib/primitives.dora +++ b/dora/stdlib/primitives.dora @@ -350,7 +350,10 @@ impl Float32 { @pub @internal fun isNan(): Bool; @pub @internal fun sqrt(): Float32; - @pub fun hash(): Int32 = self.asInt32(); + @pub fun hash(): Int32 = + // discard the sign, such that 0.0 and -0.0 hash the same, at the cost of all positive numbers + // and their negative counterparts hashing the same, but keeping the computation branch-free. + self.asInt32() & 0x7FFFFFFFi32; // should be lets, not funs @pub @static fun bits(): Int32 = 32i32; @@ -405,7 +408,12 @@ impl Float64 { @pub @internal fun isNan(): Bool; @pub @internal fun sqrt(): Float64; - @pub fun hash(): Int32 = self.asInt64().toInt32(); + @pub fun hash(): Int32 { + // discard the sign, such that 0.0 and -0.0 hash the same, at the cost of all positive numbers + // and their negative counterparts hashing the same, but keeping the computation branch-free. + let hash = self.asInt64() & 0x7FFFFFFFFFFFFFFF; + (hash ^ hash.shiftRight(31i32)).toInt32() + } // should be lets, not funs @pub @static fun bits(): Int32 = 64i32; diff --git a/tests/float/float32-hash.dora b/tests/float/float32-hash.dora new file mode 100644 index 00000000..e9d8a29a --- /dev/null +++ b/tests/float/float32-hash.dora @@ -0,0 +1,12 @@ +fun main(): Unit { + assert( 0.0f32 .hash() == 0i32); + assert((-0.0f32).hash() == 0i32); + + assert( 1.0f32 .hash() == 1065353216i32); + assert((-1.0f32).hash() == 1065353216i32); + + assert(Float32::infinityPositive().hash() == 2139095040i32); + assert(Float32::infinityNegative().hash() == 2139095040i32); + + assert(Float32::notANumber().hash() == 2143289344i32); +} diff --git a/tests/float/float64-hash.dora b/tests/float/float64-hash.dora new file mode 100644 index 00000000..3ecdd9cb --- /dev/null +++ b/tests/float/float64-hash.dora @@ -0,0 +1,12 @@ +fun main(): Unit { + assert( 0.0 .hash() == 0i32); + assert((-0.0).hash() == 0i32); + + assert( 1.0 .hash() == 2145386496i32); + assert((-1.0).hash() == 2145386496i32); + + assert(Float64::infinityPositive().hash() == -2097152i32); + assert(Float64::infinityNegative().hash() == -2097152i32); + + assert(Float64::notANumber().hash() == -1048576i32); +} diff --git a/tests/num-hash.dora b/tests/num-hash.dora index dfe009af..b332e7c6 100644 --- a/tests/num-hash.dora +++ b/tests/num-hash.dora @@ -4,7 +4,4 @@ fun main(): Unit { assert(1.toUInt8().hash() == 1i32); assert(1.hash() == 1i32); assert(1i64.hash() == 1i32); - assert(1.0f32.hash() == 1065353216i32); - // double needs a better hash implementation - assert(1.0.hash() == 0i32); } diff --git a/tests/stdlib/hashmap-contains-zero.dora b/tests/stdlib/hashmap-contains-zero.dora index b7ed8724..eb648bbc 100644 --- a/tests/stdlib/hashmap-contains-zero.dora +++ b/tests/stdlib/hashmap-contains-zero.dora @@ -9,8 +9,8 @@ fun float64(): Unit { assert(map.size() == 1); assert(map.contains(0.0)); assert(map.get(0.0).getOrPanic() == "a"); - assert(map.contains(-0.0)); // FIXME: this only works by coincidence, because the hash function sucks - assert(map.get(-0.0).getOrPanic() == "a"); // FIXME: this only works by coincidence, because the hash function sucks + assert(map.contains(-0.0)); + assert(map.get(-0.0).getOrPanic() == "a"); } fun float32(): Unit { @@ -19,6 +19,6 @@ fun float32(): Unit { assert(map.size() == 1); assert(map.contains(0.0f32)); assert(map.get(0.0f32).getOrPanic() == "a"); - assert(map.contains(-0.0f32).not()); // FIXME: should be true instead - // assert(map.get(-0.0f32).getOrPanic() == "a"); // FIXME: should be true instead + assert(map.contains(-0.0f32)); + assert(map.get(-0.0f32).getOrPanic() == "a"); } diff --git a/tests/stdlib/hashset-contains-zero.dora b/tests/stdlib/hashset-contains-zero.dora index 581ef099..09cf48de 100644 --- a/tests/stdlib/hashset-contains-zero.dora +++ b/tests/stdlib/hashset-contains-zero.dora @@ -8,7 +8,7 @@ fun float64(): Unit { assert(set.size() == 1); assert(set.contains(0.0)); - assert(set.contains(-0.0)); // FIXME: this only works by coincidence, because the hash function sucks + assert(set.contains(-0.0)); } fun float32(): Unit { @@ -16,5 +16,5 @@ fun float32(): Unit { assert(set.size() == 1); assert(set.contains(0.0f32)); - assert(set.contains(-0.0f32).not()); // FIXME: should be true instead + assert(set.contains(-0.0f32)); }