Skip to content

Commit

Permalink
Merge branch 'main' into hf-kklein-patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
hf-kklein committed May 17, 2024
2 parents 25adea0 + 3dbc4e7 commit 92332ee
Show file tree
Hide file tree
Showing 44 changed files with 157 additions and 133 deletions.
4 changes: 2 additions & 2 deletions dev_requirements/requirements-linting.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# pip-compile-multi
#
astroid==3.1.0
astroid==3.2.1
# via pylint
dill==0.3.8
# via pylint
Expand All @@ -15,7 +15,7 @@ mccabe==0.7.0
# via pylint
platformdirs==4.2.0
# via pylint
pylint==3.1.1
pylint==3.2.0
# via -r dev_requirements/requirements-linting.in
tomlkit==0.12.5
# via pylint
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pydantic-core==2.18.2
# via pydantic
python-dateutil==2.9.0.post0
# via pandas
python-docx==1.1.0
python-docx==1.1.2
# via kohlrahbi (pyproject.toml)
pytz==2024.1
# via
Expand Down
2 changes: 1 addition & 1 deletion src/kohlrahbi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

@click.group()
@click.version_option(version=version)
def cli():
def cli() -> None:
"""Kohlrahbi CLI tool"""


Expand Down
12 changes: 6 additions & 6 deletions src/kohlrahbi/ahb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from typing import Any, Dict, Optional

import click
import docx # type: ignore
import docx
import tomlkit
from docx.document import Document # type:ignore[import]
from docx.table import Table # type:ignore[import]
from docx.document import Document
from docx.table import Table
from maus.edifact import EdifactFormatVersion

from kohlrahbi.ahbtable.ahbtable import AhbTable
Expand Down Expand Up @@ -48,7 +48,7 @@ def process_ahb_table(
pruefi: str,
output_path: Path,
file_type: str,
):
) -> None:
"""
Process the ahb table.
"""
Expand Down Expand Up @@ -98,7 +98,7 @@ def process_pruefi(
path_to_ahb_docx_file: Path,
output_path: Path,
file_type: str,
):
) -> None:
"""
Process one pruefi.
If the input path ends with .docx, we assume that the file containing the pruefi is given.
Expand Down Expand Up @@ -183,7 +183,7 @@ def extract_pruefis_from_table(table: Table) -> list[str]:

def table_header_contains_text_pruefidentifikator(table: Table) -> bool:
"""Checks if the table header contains the text 'Prüfidentifikator'."""
return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator")
return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator") # type:ignore[no-any-return]


def get_pruefi_to_file_mapping(basic_input_path: Path, format_version: EdifactFormatVersion) -> dict[str, str]:
Expand Down
6 changes: 3 additions & 3 deletions src/kohlrahbi/ahb/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from kohlrahbi.enums.ahbexportfileformat import AhbExportFileFormat


def check_python_version():
def check_python_version() -> None:
"""
Check if the Python interpreter is greater or equal to 3.11
"""
Expand All @@ -24,7 +24,7 @@ def check_python_version():


# pylint: disable=unused-argument
def validate_path(ctx, param, value):
def validate_path(ctx, param, value) -> Path: # type:ignore[no-untyped-def]
"""
Ensure the path exists or offer to create it.
"""
Expand Down Expand Up @@ -97,7 +97,7 @@ def ahb(
format_version: EdifactFormatVersion | str,
assume_yes: bool, # pylint: disable=unused-argument
# it is used by the callback function of the output-path
):
) -> None:
"""
Scrape AHB documents for pruefidentifikatoren.
This is a command line interface for the pruefis module.
Expand Down
8 changes: 4 additions & 4 deletions src/kohlrahbi/ahbtable/ahbcondtions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import re
from pathlib import Path

from docx.table import Table as DocxTable # type: ignore[import-untyped]
from docx.table import Table as DocxTable
from maus.edifact import EdifactFormat
from pydantic import BaseModel, ConfigDict

Expand Down Expand Up @@ -53,10 +53,11 @@ def collect_conditions(
logger.info("The package conditions for %s were collected.", edifact_format)
return conditions_dict

def include_condition_dict(self, to_add=dict[EdifactFormat, dict[str, str]] | None) -> None:
def include_condition_dict(self, to_add: dict[EdifactFormat, dict[str, str]] | None) -> None:
""" " Include a dict of conditions to the conditions_dict"""
if to_add is None:
logger.info("Conditions dict to be added is empty.")
return
for edifact_format, edi_cond_dict in to_add.items():
for condition_key, condition_text in edi_cond_dict.items():
if edifact_format in self.conditions_dict:
Expand Down Expand Up @@ -117,8 +118,7 @@ def parse_conditions_from_string(
# check whether condition was already collected:
existing_text = conditions_dict[edifact_format].get(match[0])
is_condition_key_collected_yet = existing_text is not None
if is_condition_key_collected_yet and existing_text is not None:
key_exits_but_shorter_text = len(text) > len(existing_text)
key_exits_but_shorter_text = existing_text is not None and len(text) > len(existing_text)
if not is_condition_key_collected_yet or key_exits_but_shorter_text:
conditions_dict[edifact_format][match[0]] = text
return conditions_dict
14 changes: 7 additions & 7 deletions src/kohlrahbi/ahbtable/ahbpackagetable.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class which contains AHB package condition table
from pathlib import Path

import pandas as pd
from docx.table import Table as DocxTable # type: ignore[import-untyped]
from docx.table import Table as DocxTable
from maus.edifact import EdifactFormat
from pydantic import BaseModel, ConfigDict

Expand Down Expand Up @@ -52,7 +52,7 @@ def provide_conditions(self, edifact_format: EdifactFormat) -> dict[EdifactForma
logger.info("The package conditions for %s were collected.", edifact_format)
return conditions_dict

def provide_packages(self, edifact_format: EdifactFormat):
def provide_packages(self, edifact_format: EdifactFormat) -> None:
"""collect conditions from package table and store them in conditions dict."""
package_dict: dict[EdifactFormat, dict[str, str]] = {edifact_format: {}}

Expand All @@ -71,20 +71,20 @@ def provide_packages(self, edifact_format: EdifactFormat):
# check whether package was already collected:
existing_text = package_dict[edifact_format].get(package)
is_package_key_collected_yet = existing_text is not None
if is_package_key_collected_yet:
key_exits_but_shorter_text = len(package_conditions) > len(
existing_text # type: ignore[arg-type]
) # type: ignore[arg-type]
key_exits_but_shorter_text = existing_text is not None and len(package_conditions) > len(
existing_text
)
if not is_package_key_collected_yet or key_exits_but_shorter_text:
package_dict[edifact_format][package] = package_conditions

logger.info("Packages for %s were collected.", edifact_format)
self.package_dict = package_dict

def include_package_dict(self, to_add=dict[EdifactFormat, dict[str, str]] | None) -> None:
def include_package_dict(self, to_add: dict[EdifactFormat, dict[str, str]] | None) -> None:
"""Include a dict of conditions to the conditions_dict"""
if to_add is None:
logger.info("Packages dict to be added is empty.")
return
for edifact_format, edi_cond_dict in to_add.items():
for package_key, package_conditions in edi_cond_dict.items():
if edifact_format in self.package_dict:
Expand Down
9 changes: 5 additions & 4 deletions src/kohlrahbi/ahbtable/ahbsubtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
from typing import Generator

import pandas as pd
from docx.table import Table as DocxTable # type:ignore[import]
from docx.table import _Cell # type:ignore[import]
from docx.table import Table as DocxTable
from docx.table import _Cell, _Row
from pydantic import BaseModel, ConfigDict

from kohlrahbi.ahbtable.ahbtablerow import AhbTableRow
from kohlrahbi.row_type_checker import RowType, get_row_type
from kohlrahbi.enums import RowType
from kohlrahbi.row_type_checker import get_row_type
from kohlrahbi.seed import Seed


Expand Down Expand Up @@ -122,7 +123,7 @@ def from_headless_table(cls, tmd: Seed, docx_table: DocxTable) -> "AhbSubTable":
return cls(table_meta_data=tmd, table=ahb_table_dataframe)

@staticmethod
def iter_visible_cells(row) -> Generator[_Cell, None, None]:
def iter_visible_cells(row: _Row) -> Generator[_Cell, None, None]:
"""
This function makes sure that you will iterate over the cells you see in the word document.
For more information go to https://github.com/python-openxml/python-docx/issues/970#issuecomment-877386927
Expand Down
3 changes: 2 additions & 1 deletion src/kohlrahbi/ahbtable/ahbtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def append_ahb_sub_table(self, ahb_sub_table: AhbSubTable) -> None:
self.table = pd.concat([self.table, ahb_sub_table.table], ignore_index=True)

@staticmethod
def line_contains_only_segment_gruppe(raw_line: pd.Series) -> bool:
def line_contains_only_segment_gruppe(raw_line: pd.Series) -> bool: # type:ignore[type-arg]
"""
Returns true if the given raw line only contains some meaningful data in the "Segment Gruppe" key
"""
Expand All @@ -112,6 +112,7 @@ def sanitize(self) -> None:
iterable_ahb_table = peekable(self.table.iterrows())
self.table.reset_index(drop=True, inplace=True)
for _, row in iterable_ahb_table:
# pylint: disable=unpacking-non-sequence # it is a tuple indeed
index_of_next_row, next_row = iterable_ahb_table.peek(
(
0,
Expand Down
8 changes: 5 additions & 3 deletions src/kohlrahbi/ahbtable/ahbtablerow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
from typing import Optional

import pandas as pd
from docx.table import _Cell # type:ignore[import]
from docx.table import _Cell
from pydantic import BaseModel, ConfigDict

from kohlrahbi.docxtablecells import BedingungCell, BodyCell, EdifactStrukturCell
from kohlrahbi.row_type_checker import RowType
from kohlrahbi.enums import RowType
from kohlrahbi.seed import Seed


Expand Down Expand Up @@ -46,7 +46,9 @@ def parse(
dtype="str",
)
# pylint: disable=unsubscriptable-object, no-member
empty_row: pd.Series[str] = pd.Series(len(ahb_row_dataframe.columns) * [""], index=self.seed.column_headers)
empty_row: pd.Series = pd.Series( # type:ignore[type-arg]
len(ahb_row_dataframe.columns) * [""], index=self.seed.column_headers
)

ahb_row_dataframe = pd.concat([ahb_row_dataframe, empty_row.to_frame().T], ignore_index=True)

Expand Down
6 changes: 3 additions & 3 deletions src/kohlrahbi/changehistory/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
from pathlib import Path
from typing import Optional

import docx # type: ignore
import docx
import pandas as pd
from docx.document import Document # type:ignore[import]
from docx.table import Table # type: ignore
from docx.document import Document
from docx.table import Table

from kohlrahbi.changehistory.changehistorytable import ChangeHistoryTable
from kohlrahbi.docxfilefinder import DocxFileFinder
Expand Down
4 changes: 2 additions & 2 deletions src/kohlrahbi/changehistory/changehistorytable.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import pandas as pd
from docx.table import Table # type:ignore[import]
from docx.table import Table
from pydantic import BaseModel, ConfigDict

from kohlrahbi.ahbtable.ahbsubtable import AhbSubTable
Expand Down Expand Up @@ -54,7 +54,7 @@ def is_empty(val: str) -> bool:
return pd.isna(val) or val == ""

# Define a function to check if a value is considered empty for our case
def is_the_first_column_empty(row: pd.Series) -> bool:
def is_the_first_column_empty(row: pd.Series) -> bool: # type:ignore[type-arg]
"""
Checks if the first column of the given row is empty.
This is our indicator if the current row is a continuation of the upper row.
Expand Down
2 changes: 1 addition & 1 deletion src/kohlrahbi/changehistory/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def changehistory(
format_version: EdifactFormatVersion | str,
assume_yes: bool, # pylint: disable=unused-argument
# it is used by the callback function of the output-path
):
) -> None:
"""
Scrape change histories from the input path and save them to the output path.
Expand Down
6 changes: 3 additions & 3 deletions src/kohlrahbi/conditions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from pathlib import Path

import docx # type: ignore[import-untyped]
import docx
from maus.edifact import EdifactFormat, EdifactFormatVersion, get_format_of_pruefidentifikator

from kohlrahbi.ahb import get_pruefi_to_file_mapping
Expand Down Expand Up @@ -45,8 +45,8 @@ def scrape_conditions(
for edifact_format, files in all_format_files.items():
for file in files:
# pylint: disable=too-many-function-args
# type: ignore[call-arg, arg-type]
doc = docx.Document(basic_input_path / path_to_file / Path(file))
path: Path = basic_input_path / path_to_file / Path(file)
doc = docx.Document(str(path.absolute()))
logger.info("Start scraping conditions for %s in %s", edifact_format, file)
if not doc:
logger.error("Could not open file %s as docx", Path(file))
Expand Down
6 changes: 3 additions & 3 deletions src/kohlrahbi/conditions/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from kohlrahbi.conditions import scrape_conditions


def check_python_version():
def check_python_version() -> None:
"""
Check if the Python interpreter is greater or equal to 3.11
"""
Expand All @@ -40,7 +40,7 @@ def check_python_version():


# pylint: disable=unused-argument
def validate_path(ctx, param, value):
def validate_path(ctx, param, value) -> Path: # type:ignore[no-untyped-def]
"""
Ensure the path exists or offer to create it.
"""
Expand Down Expand Up @@ -91,7 +91,7 @@ def validate_path(ctx, param, value):
)
def conditions(
edi_energy_mirror_path: Path, output_path: Path, format_version: EdifactFormatVersion | str, assume_yes: bool
):
) -> None:
"""
Scrape AHB documents for conditions.
"""
Expand Down
2 changes: 1 addition & 1 deletion src/kohlrahbi/docxfilefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def filter_docx_files_for_edifact_format(self, edifact_format: EdifactFormat) ->

self.paths_to_docx_files = [path for path in self.paths_to_docx_files if str(edifact_format) in path.name]

def remove_temporary_files(self):
def remove_temporary_files(self) -> None:
"""
This method removes all temporary files from paths_to_docx_files.
Temporary files lead to the exception `BadZipFile: File is not a zip file`.
Expand Down
2 changes: 2 additions & 0 deletions src/kohlrahbi/docxtablecells/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
from .bedinungscell import BedingungCell
from .bodycell import BodyCell
from .edifactstrukturcell import EdifactStrukturCell

__all__ = ["BedingungCell", "BodyCell", "EdifactStrukturCell"]
2 changes: 1 addition & 1 deletion src/kohlrahbi/docxtablecells/bedinungscell.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re

import pandas as pd
from docx.table import _Cell # type:ignore[import]
from docx.table import _Cell
from pydantic import BaseModel, ConfigDict


Expand Down
7 changes: 4 additions & 3 deletions src/kohlrahbi/docxtablecells/bodycell.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"""

import pandas as pd
from docx.table import _Cell # type:ignore[import]
from docx.table import _Cell
from docx.text.paragraph import Paragraph
from maus.reader.flat_ahb_reader import FlatAhbCsvReader
from pydantic import BaseModel, ConfigDict

Expand Down Expand Up @@ -103,9 +104,9 @@ def parse(self, ahb_row_dataframe: pd.DataFrame) -> pd.DataFrame:

return ahb_row_dataframe

def has_paragraph_tabstops(self, paragraph) -> bool:
def has_paragraph_tabstops(self, paragraph: Paragraph) -> bool:
"""
Checks if the given paragraph contains tabstops
"""
tab_stops = list(paragraph.paragraph_format.tab_stops)
return len(tab_stops) > 0
return any(tab_stops)
2 changes: 1 addition & 1 deletion src/kohlrahbi/docxtablecells/edifactstrukturcell.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re

import pandas as pd
from docx.table import _Cell # type:ignore[import]
from docx.table import _Cell
from pydantic import BaseModel, ConfigDict

_segment_group_pattern = re.compile(r"^SG\d+$")
Expand Down
2 changes: 2 additions & 0 deletions src/kohlrahbi/enums/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@

from .row_type import RowType
from .row_type_color import RowTypeColor

__all__ = ["RowType", "RowTypeColor"]
Loading

0 comments on commit 92332ee

Please sign in to comment.