diff --git a/python/python_calamine/_python_calamine.pyi b/python/python_calamine/_python_calamine.pyi index 62caaf2..21bca9e 100644 --- a/python/python_calamine/_python_calamine.pyi +++ b/python/python_calamine/_python_calamine.pyi @@ -1,13 +1,13 @@ from __future__ import annotations import enum +import typing from datetime import date, datetime, time, timedelta from os import PathLike -from typing import Protocol ValueT = int | float | str | bool | time | date | datetime | timedelta -class ReadBuffer(Protocol): +class ReadBuffer(typing.Protocol): def seek(self) -> int: ... def read(self) -> bytes: ... @@ -57,6 +57,8 @@ class CalamineSheet: By default, calamine skips empty rows/cols before data. For suppress this behaviour, set `skip_empty_area` to `False`. """ + def iter_rows(self) -> typing.Iterator[list[ValueT]]: + """Retunrning data from sheet as iterator of lists.""" class CalamineWorkbook: path: str | None diff --git a/src/types/cell.rs b/src/types/cell.rs index c95ebdb..c00c5d3 100644 --- a/src/types/cell.rs +++ b/src/types/cell.rs @@ -6,7 +6,7 @@ use pyo3::prelude::*; /// https://learn.microsoft.com/en-us/office/troubleshoot/excel/1900-and-1904-date-system static EXCEL_1900_1904_DIFF: f64 = 1462.0; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum CellValue { Int(i64), Float(f64), diff --git a/src/types/sheet.rs b/src/types/sheet.rs index 3be234d..45209c1 100644 --- a/src/types/sheet.rs +++ b/src/types/sheet.rs @@ -1,7 +1,7 @@ use std::fmt::Display; use std::sync::Arc; -use calamine::{DataType, Range, SheetType, SheetVisible}; +use calamine::{DataType, Range, Rows, SheetType, SheetVisible}; use pyo3::class::basic::CompareOp; use pyo3::prelude::*; use pyo3::types::PyList; @@ -199,4 +199,55 @@ impl CalamineSheet { }), )) } + + fn iter_rows(&self) -> CalamineCellIterator { + CalamineCellIterator::from_range(Arc::clone(&self.range)) + } +} + +#[pyclass] +pub struct CalamineCellIterator { + position: u32, + start: (u32, u32), + empty_row: Vec, + iter: Rows<'static, DataType>, + #[allow(dead_code)] + range: Arc>, +} + +impl CalamineCellIterator { + fn from_range(range: Arc>) -> CalamineCellIterator { + let mut empty_row = Vec::with_capacity(range.width()); + for _ in 0..range.width() { + empty_row.push(CellValue::String("".to_string())) + } + CalamineCellIterator { + empty_row, + position: 0, + start: range.start().unwrap(), + iter: unsafe { std::mem::transmute(range.rows()) }, + range, + } + } +} + +#[pymethods] +impl CalamineCellIterator { + fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __next__(mut slf: PyRefMut<'_, Self>) -> Option<&PyList> { + slf.position += 1; + if slf.position > slf.start.0 { + slf.iter.next().map(|row| { + PyList::new( + slf.py(), + row.iter().map(<&DataType as Into>::into), + ) + }) + } else { + Some(PyList::new(slf.py(), slf.empty_row.clone())) + } + } } diff --git a/tests/test_base.py b/tests/test_base.py index d8e6b25..a805b89 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -180,6 +180,30 @@ def test_xlsx_read(): assert [] == reader.get_sheet_by_index(1).to_python(skip_empty_area=False) +def test_xlsx_iter_rows(): + names = ["Sheet1", "Sheet2", "Sheet3"] + data = [ + ["", "", "", "", "", "", "", "", "", ""], + [ + "String", + 1, + 1.1, + True, + False, + date(2010, 10, 10), + datetime(2010, 10, 10, 10, 10, 10), + time(10, 10, 10), + timedelta(hours=10, minutes=10, seconds=10, microseconds=100000), + timedelta(hours=255, minutes=10, seconds=10), + ], + ] + + reader = CalamineWorkbook.from_object(PATH / "base.xlsx") + + assert names == reader.sheet_names + assert data == list(reader.get_sheet_by_index(0).iter_rows()) + + def test_nrows(): reader = CalamineWorkbook.from_object(PATH / "base.xlsx") sheet = reader.get_sheet_by_name("Sheet3")