Skip to content

Commit

Permalink
feat: iterator over rust range
Browse files Browse the repository at this point in the history
  • Loading branch information
dimastbk committed Dec 1, 2023
1 parent 0488f30 commit 54fec3f
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 4 deletions.
6 changes: 4 additions & 2 deletions python/python_calamine/_python_calamine.pyi
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from __future__ import annotations

import enum
import typing
from datetime import date, datetime, time, timedelta
from os import PathLike
from typing import Protocol

ValueT = int | float | str | bool | time | date | datetime | timedelta

class ReadBuffer(Protocol):
class ReadBuffer(typing.Protocol):
def seek(self) -> int: ...
def read(self) -> bytes: ...

Expand Down Expand Up @@ -57,6 +57,8 @@ class CalamineSheet:
By default, calamine skips empty rows/cols before data.
For suppress this behaviour, set `skip_empty_area` to `False`.
"""
def iter_rows(self) -> typing.Iterator[list[ValueT]]:
"""Retunrning data from sheet as iterator of lists."""

class CalamineWorkbook:
path: str | None
Expand Down
2 changes: 1 addition & 1 deletion src/types/cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use pyo3::prelude::*;
/// https://learn.microsoft.com/en-us/office/troubleshoot/excel/1900-and-1904-date-system
static EXCEL_1900_1904_DIFF: f64 = 1462.0;

#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum CellValue {
Int(i64),
Float(f64),
Expand Down
53 changes: 52 additions & 1 deletion src/types/sheet.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::fmt::Display;
use std::sync::Arc;

use calamine::{DataType, Range, SheetType, SheetVisible};
use calamine::{DataType, Range, Rows, SheetType, SheetVisible};
use pyo3::class::basic::CompareOp;
use pyo3::prelude::*;
use pyo3::types::PyList;
Expand Down Expand Up @@ -199,4 +199,55 @@ impl CalamineSheet {
}),
))
}

fn iter_rows(&self) -> CalamineCellIterator {
CalamineCellIterator::from_range(Arc::clone(&self.range))
}
}

#[pyclass]
pub struct CalamineCellIterator {
position: u32,
start: (u32, u32),
empty_row: Vec<CellValue>,
iter: Rows<'static, DataType>,
#[allow(dead_code)]
range: Arc<Range<DataType>>,
}

impl CalamineCellIterator {
fn from_range(range: Arc<Range<DataType>>) -> CalamineCellIterator {
let mut empty_row = Vec::with_capacity(range.width());
for _ in 0..range.width() {
empty_row.push(CellValue::String("".to_string()))
}
CalamineCellIterator {
empty_row,
position: 0,
start: range.start().unwrap(),
iter: unsafe { std::mem::transmute(range.rows()) },
range,
}
}
}

#[pymethods]
impl CalamineCellIterator {
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}

fn __next__(mut slf: PyRefMut<'_, Self>) -> Option<&PyList> {
slf.position += 1;
if slf.position > slf.start.0 {
slf.iter.next().map(|row| {
PyList::new(
slf.py(),
row.iter().map(<&DataType as Into<CellValue>>::into),
)
})
} else {
Some(PyList::new(slf.py(), slf.empty_row.clone()))
}
}
}
24 changes: 24 additions & 0 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,30 @@ def test_xlsx_read():
assert [] == reader.get_sheet_by_index(1).to_python(skip_empty_area=False)


def test_xlsx_iter_rows():
names = ["Sheet1", "Sheet2", "Sheet3"]
data = [
["", "", "", "", "", "", "", "", "", ""],
[
"String",
1,
1.1,
True,
False,
date(2010, 10, 10),
datetime(2010, 10, 10, 10, 10, 10),
time(10, 10, 10),
timedelta(hours=10, minutes=10, seconds=10, microseconds=100000),
timedelta(hours=255, minutes=10, seconds=10),
],
]

reader = CalamineWorkbook.from_object(PATH / "base.xlsx")

assert names == reader.sheet_names
assert data == list(reader.get_sheet_by_index(0).iter_rows())


def test_nrows():
reader = CalamineWorkbook.from_object(PATH / "base.xlsx")
sheet = reader.get_sheet_by_name("Sheet3")
Expand Down

0 comments on commit 54fec3f

Please sign in to comment.