Skip to content

Commit

Permalink
[ENH] Comparison (LT, GT, LTE, GTE) and prefix operations on arrow bl…
Browse files Browse the repository at this point in the history
…ockfile (#2223)

## Description of changes

*Summarize the changes made by this PR.*
 - New functionality
	 - Adds gets for LT, GT, LTE, GTE and get by prefix for arrow blockfile

## Test plan
Unit-tests in arrow blockfile
- [x] Tests pass locally with `pytest` for python, `yarn test` for js,
`cargo test` for rust

## Documentation Changes
None
  • Loading branch information
sanketkedia committed May 23, 2024
1 parent c19b229 commit c81130d
Show file tree
Hide file tree
Showing 8 changed files with 671 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 2080f026e9401a99d3066b4469ec3c40d5756edc7cf222cac08820ff3fb99264 # shrinks to num_key = 3, query_key = 1000
cc edb389ffc15848ee1e63e29713eca3d5c2fc462f7c2dbf200d44b48c53dee47a # shrinks to num_key = 4030, query_key = 4030
112 changes: 112 additions & 0 deletions rust/worker/src/blockstore/arrow/block/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,118 @@ impl Block {
None
}

pub fn get_prefix<'me, K: ArrowReadableKey<'me>, V: ArrowReadableValue<'me>>(
&'me self,
prefix: &str,
) -> Option<Vec<(&str, K, V)>> {
let prefix_array = self
.data
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let mut res: Vec<(&str, K, V)> = vec![];
for i in 0..self.data.num_rows() {
let curr_prefix = prefix_array.value(i);
if curr_prefix == prefix {
res.push((
curr_prefix,
K::get(self.data.column(1), i),
V::get(self.data.column(2), i),
));
}
}
return Some(res);
}

pub fn get_gt<'me, K: ArrowReadableKey<'me>, V: ArrowReadableValue<'me>>(
&'me self,
prefix: &str,
key: K,
) -> Option<Vec<(&str, K, V)>> {
let prefix_array = self
.data
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let mut res: Vec<(&str, K, V)> = vec![];
for i in 0..self.data.num_rows() {
let curr_prefix = prefix_array.value(i);
let curr_key = K::get(self.data.column(1), i);
if curr_prefix == prefix && curr_key > key {
res.push((curr_prefix, curr_key, V::get(self.data.column(2), i)));
}
}
return Some(res);
}

pub fn get_lt<'me, K: ArrowReadableKey<'me>, V: ArrowReadableValue<'me>>(
&'me self,
prefix: &str,
key: K,
) -> Option<Vec<(&str, K, V)>> {
let prefix_array = self
.data
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let mut res: Vec<(&str, K, V)> = vec![];
for i in 0..self.data.num_rows() {
let curr_prefix = prefix_array.value(i);
let curr_key = K::get(self.data.column(1), i);
if curr_prefix == prefix && curr_key < key {
res.push((curr_prefix, curr_key, V::get(self.data.column(2), i)));
}
}
return Some(res);
}

pub fn get_lte<'me, K: ArrowReadableKey<'me>, V: ArrowReadableValue<'me>>(
&'me self,
prefix: &str,
key: K,
) -> Option<Vec<(&str, K, V)>> {
let prefix_array = self
.data
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let mut res: Vec<(&str, K, V)> = vec![];
for i in 0..self.data.num_rows() {
let curr_prefix = prefix_array.value(i);
let curr_key = K::get(self.data.column(1), i);
if curr_prefix == prefix && curr_key <= key {
res.push((curr_prefix, curr_key, V::get(self.data.column(2), i)));
}
}
return Some(res);
}

pub fn get_gte<'me, K: ArrowReadableKey<'me>, V: ArrowReadableValue<'me>>(
&'me self,
prefix: &str,
key: K,
) -> Option<Vec<(&str, K, V)>> {
let prefix_array = self
.data
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let mut res: Vec<(&str, K, V)> = vec![];
for i in 0..self.data.num_rows() {
let curr_prefix = prefix_array.value(i);
let curr_key = K::get(self.data.column(1), i);
if curr_prefix == prefix && curr_key >= key {
res.push((curr_prefix, curr_key, V::get(self.data.column(2), i)));
}
}
return Some(res);
}

pub fn get_at_index<'me, K: ArrowReadableKey<'me>, V: ArrowReadableValue<'me>>(
&'me self,
index: usize,
Expand Down

0 comments on commit c81130d

Please sign in to comment.