Merge branch 'main' into hf-kklein-patch-1

Hochfrequenz · May 17, 2024 · 92332ee · 92332ee
2 parents 25adea0 + 3dbc4e7
commit 92332ee
Show file tree

Hide file tree

Showing 44 changed files with 157 additions and 133 deletions.
diff --git a/dev_requirements/requirements-linting.txt b/dev_requirements/requirements-linting.txt
@@ -5,7 +5,7 @@
 #
 # pip-compile-multi
 #
-astroid==3.1.0
+astroid==3.2.1
  # via pylint
 dill==0.3.8
  # via pylint
@@ -15,7 +15,7 @@ mccabe==0.7.0
  # via pylint
 platformdirs==4.2.0
  # via pylint
-pylint==3.1.1
+pylint==3.2.0
  # via -r dev_requirements/requirements-linting.in
 tomlkit==0.12.5
  # via pylint
diff --git a/requirements.txt b/requirements.txt
@@ -40,7 +40,7 @@ pydantic-core==2.18.2
  # via pydantic
 python-dateutil==2.9.0.post0
  # via pandas
-python-docx==1.1.0
+python-docx==1.1.2
  # via kohlrahbi (pyproject.toml)
 pytz==2024.1
  # via

diff --git a/src/kohlrahbi/__init__.py b/src/kohlrahbi/__init__.py
@@ -12,7 +12,7 @@
 
 @click.group()
 @click.version_option(version=version)
-def cli():
+def cli() -> None:
  """Kohlrahbi CLI tool"""
 
 

diff --git a/src/kohlrahbi/ahb/__init__.py b/src/kohlrahbi/ahb/__init__.py
@@ -9,10 +9,10 @@
 from typing import Any, Dict, Optional
 
 import click
-import docx # type: ignore
+import docx
 import tomlkit
-from docx.document import Document # type:ignore[import]
-from docx.table import Table # type:ignore[import]
+from docx.document import Document
+from docx.table import Table
 from maus.edifact import EdifactFormatVersion
 
 from kohlrahbi.ahbtable.ahbtable import AhbTable
@@ -48,7 +48,7 @@ def process_ahb_table(
  pruefi: str,
  output_path: Path,
  file_type: str,
-):
+) -> None:
  """
  Process the ahb table.
  """
@@ -98,7 +98,7 @@ def process_pruefi(
  path_to_ahb_docx_file: Path,
  output_path: Path,
  file_type: str,
-):
+) -> None:
  """
  Process one pruefi.
  If the input path ends with .docx, we assume that the file containing the pruefi is given.
@@ -183,7 +183,7 @@ def extract_pruefis_from_table(table: Table) -> list[str]:
 
 def table_header_contains_text_pruefidentifikator(table: Table) -> bool:
  """Checks if the table header contains the text 'Prüfidentifikator'."""
- return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator")
+ return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator") # type:ignore[no-any-return]
 
 
 def get_pruefi_to_file_mapping(basic_input_path: Path, format_version: EdifactFormatVersion) -> dict[str, str]:

diff --git a/src/kohlrahbi/ahb/command.py b/src/kohlrahbi/ahb/command.py
@@ -12,7 +12,7 @@
 from kohlrahbi.enums.ahbexportfileformat import AhbExportFileFormat
 
 
-def check_python_version():
+def check_python_version() -> None:
  """
  Check if the Python interpreter is greater or equal to 3.11
  """
@@ -24,7 +24,7 @@ def check_python_version():
 
 
 # pylint: disable=unused-argument
-def validate_path(ctx, param, value):
+def validate_path(ctx, param, value) -> Path: # type:ignore[no-untyped-def]
  """
  Ensure the path exists or offer to create it.
  """
@@ -97,7 +97,7 @@ def ahb(
  format_version: EdifactFormatVersion | str,
  assume_yes: bool, # pylint: disable=unused-argument
  # it is used by the callback function of the output-path
-):
+) -> None:
  """
  Scrape AHB documents for pruefidentifikatoren.
  This is a command line interface for the pruefis module.

diff --git a/src/kohlrahbi/ahbtable/ahbcondtions.py b/src/kohlrahbi/ahbtable/ahbcondtions.py
@@ -4,7 +4,7 @@
 import re
 from pathlib import Path
 
-from docx.table import Table as DocxTable # type: ignore[import-untyped]
+from docx.table import Table as DocxTable
 from maus.edifact import EdifactFormat
 from pydantic import BaseModel, ConfigDict
 
@@ -53,10 +53,11 @@ def collect_conditions(
  logger.info("The package conditions for %s were collected.", edifact_format)
  return conditions_dict
 
- def include_condition_dict(self, to_add=dict[EdifactFormat, dict[str, str]] | None) -> None:
+ def include_condition_dict(self, to_add: dict[EdifactFormat, dict[str, str]] | None) -> None:
  """ " Include a dict of conditions to the conditions_dict"""
  if to_add is None:
  logger.info("Conditions dict to be added is empty.")
+ return
  for edifact_format, edi_cond_dict in to_add.items():
  for condition_key, condition_text in edi_cond_dict.items():
  if edifact_format in self.conditions_dict:
@@ -117,8 +118,7 @@ def parse_conditions_from_string(
  # check whether condition was already collected:
  existing_text = conditions_dict[edifact_format].get(match[0])
  is_condition_key_collected_yet = existing_text is not None
- if is_condition_key_collected_yet and existing_text is not None:
- key_exits_but_shorter_text = len(text) > len(existing_text)
+ key_exits_but_shorter_text = existing_text is not None and len(text) > len(existing_text)
  if not is_condition_key_collected_yet or key_exits_but_shorter_text:
  conditions_dict[edifact_format][match[0]] = text
  return conditions_dict
diff --git a/src/kohlrahbi/ahbtable/ahbpackagetable.py b/src/kohlrahbi/ahbtable/ahbpackagetable.py
@@ -7,7 +7,7 @@ class which contains AHB package condition table
 from pathlib import Path
 
 import pandas as pd
-from docx.table import Table as DocxTable # type: ignore[import-untyped]
+from docx.table import Table as DocxTable
 from maus.edifact import EdifactFormat
 from pydantic import BaseModel, ConfigDict
 
@@ -52,7 +52,7 @@ def provide_conditions(self, edifact_format: EdifactFormat) -> dict[EdifactForma
  logger.info("The package conditions for %s were collected.", edifact_format)
  return conditions_dict
 
- def provide_packages(self, edifact_format: EdifactFormat):
+ def provide_packages(self, edifact_format: EdifactFormat) -> None:
  """collect conditions from package table and store them in conditions dict."""
  package_dict: dict[EdifactFormat, dict[str, str]] = {edifact_format: {}}
 
@@ -71,20 +71,20 @@ def provide_packages(self, edifact_format: EdifactFormat):
  # check whether package was already collected:
  existing_text = package_dict[edifact_format].get(package)
  is_package_key_collected_yet = existing_text is not None
- if is_package_key_collected_yet:
- key_exits_but_shorter_text = len(package_conditions) > len(
- existing_text # type: ignore[arg-type]
- ) # type: ignore[arg-type]
+ key_exits_but_shorter_text = existing_text is not None and len(package_conditions) > len(
+ existing_text
+ )
  if not is_package_key_collected_yet or key_exits_but_shorter_text:
  package_dict[edifact_format][package] = package_conditions
 
  logger.info("Packages for %s were collected.", edifact_format)
  self.package_dict = package_dict
 
- def include_package_dict(self, to_add=dict[EdifactFormat, dict[str, str]] | None) -> None:
+ def include_package_dict(self, to_add: dict[EdifactFormat, dict[str, str]] | None) -> None:
  """Include a dict of conditions to the conditions_dict"""
  if to_add is None:
  logger.info("Packages dict to be added is empty.")
+ return
  for edifact_format, edi_cond_dict in to_add.items():
  for package_key, package_conditions in edi_cond_dict.items():
  if edifact_format in self.package_dict:

diff --git a/src/kohlrahbi/ahbtable/ahbsubtable.py b/src/kohlrahbi/ahbtable/ahbsubtable.py
@@ -5,12 +5,13 @@
 from typing import Generator
 
 import pandas as pd
-from docx.table import Table as DocxTable # type:ignore[import]
-from docx.table import _Cell # type:ignore[import]
+from docx.table import Table as DocxTable
+from docx.table import _Cell, _Row
 from pydantic import BaseModel, ConfigDict
 
 from kohlrahbi.ahbtable.ahbtablerow import AhbTableRow
-from kohlrahbi.row_type_checker import RowType, get_row_type
+from kohlrahbi.enums import RowType
+from kohlrahbi.row_type_checker import get_row_type
 from kohlrahbi.seed import Seed
 
 
@@ -122,7 +123,7 @@ def from_headless_table(cls, tmd: Seed, docx_table: DocxTable) -> "AhbSubTable":
  return cls(table_meta_data=tmd, table=ahb_table_dataframe)
 
  @staticmethod
- def iter_visible_cells(row) -> Generator[_Cell, None, None]:
+ def iter_visible_cells(row: _Row) -> Generator[_Cell, None, None]:
  """
  This function makes sure that you will iterate over the cells you see in the word document.
  For more information go to https://github.com/python-openxml/python-docx/issues/970#issuecomment-877386927

diff --git a/src/kohlrahbi/ahbtable/ahbtable.py b/src/kohlrahbi/ahbtable/ahbtable.py
@@ -91,7 +91,7 @@ def append_ahb_sub_table(self, ahb_sub_table: AhbSubTable) -> None:
  self.table = pd.concat([self.table, ahb_sub_table.table], ignore_index=True)
 
  @staticmethod
- def line_contains_only_segment_gruppe(raw_line: pd.Series) -> bool:
+ def line_contains_only_segment_gruppe(raw_line: pd.Series) -> bool: # type:ignore[type-arg]
  """
  Returns true if the given raw line only contains some meaningful data in the "Segment Gruppe" key
  """
@@ -112,6 +112,7 @@ def sanitize(self) -> None:
  iterable_ahb_table = peekable(self.table.iterrows())
  self.table.reset_index(drop=True, inplace=True)
  for _, row in iterable_ahb_table:
+ # pylint: disable=unpacking-non-sequence # it is a tuple indeed
  index_of_next_row, next_row = iterable_ahb_table.peek(
  (
  0,

diff --git a/src/kohlrahbi/ahbtable/ahbtablerow.py b/src/kohlrahbi/ahbtable/ahbtablerow.py
@@ -5,11 +5,11 @@
 from typing import Optional
 
 import pandas as pd
-from docx.table import _Cell # type:ignore[import]
+from docx.table import _Cell
 from pydantic import BaseModel, ConfigDict
 
 from kohlrahbi.docxtablecells import BedingungCell, BodyCell, EdifactStrukturCell
-from kohlrahbi.row_type_checker import RowType
+from kohlrahbi.enums import RowType
 from kohlrahbi.seed import Seed
 
 
@@ -46,7 +46,9 @@ def parse(
  dtype="str",
  )
  # pylint: disable=unsubscriptable-object, no-member
- empty_row: pd.Series[str] = pd.Series(len(ahb_row_dataframe.columns) * [""], index=self.seed.column_headers)
+ empty_row: pd.Series = pd.Series( # type:ignore[type-arg]
+ len(ahb_row_dataframe.columns) * [""], index=self.seed.column_headers
+ )
 
  ahb_row_dataframe = pd.concat([ahb_row_dataframe, empty_row.to_frame().T], ignore_index=True)
 

diff --git a/src/kohlrahbi/changehistory/__init__.py b/src/kohlrahbi/changehistory/__init__.py
@@ -13,10 +13,10 @@
 from pathlib import Path
 from typing import Optional
 
-import docx # type: ignore
+import docx
 import pandas as pd
-from docx.document import Document # type:ignore[import]
-from docx.table import Table # type: ignore
+from docx.document import Document
+from docx.table import Table
 
 from kohlrahbi.changehistory.changehistorytable import ChangeHistoryTable
 from kohlrahbi.docxfilefinder import DocxFileFinder

diff --git a/src/kohlrahbi/changehistory/changehistorytable.py b/src/kohlrahbi/changehistory/changehistorytable.py
@@ -3,7 +3,7 @@
 """
 
 import pandas as pd
-from docx.table import Table # type:ignore[import]
+from docx.table import Table
 from pydantic import BaseModel, ConfigDict
 
 from kohlrahbi.ahbtable.ahbsubtable import AhbSubTable
@@ -54,7 +54,7 @@ def is_empty(val: str) -> bool:
  return pd.isna(val) or val == ""
 
  # Define a function to check if a value is considered empty for our case
- def is_the_first_column_empty(row: pd.Series) -> bool:
+ def is_the_first_column_empty(row: pd.Series) -> bool: # type:ignore[type-arg]
  """
  Checks if the first column of the given row is empty.
  This is our indicator if the current row is a continuation of the upper row.

diff --git a/src/kohlrahbi/changehistory/command.py b/src/kohlrahbi/changehistory/command.py
@@ -47,7 +47,7 @@ def changehistory(
  format_version: EdifactFormatVersion | str,
  assume_yes: bool, # pylint: disable=unused-argument
  # it is used by the callback function of the output-path
-):
+) -> None:
  """
  Scrape change histories from the input path and save them to the output path.
 

diff --git a/src/kohlrahbi/conditions/__init__.py b/src/kohlrahbi/conditions/__init__.py
@@ -4,7 +4,7 @@
 
 from pathlib import Path
 
-import docx # type: ignore[import-untyped]
+import docx
 from maus.edifact import EdifactFormat, EdifactFormatVersion, get_format_of_pruefidentifikator
 
 from kohlrahbi.ahb import get_pruefi_to_file_mapping
@@ -45,8 +45,8 @@ def scrape_conditions(
  for edifact_format, files in all_format_files.items():
  for file in files:
  # pylint: disable=too-many-function-args
- # type: ignore[call-arg, arg-type]
- doc = docx.Document(basic_input_path / path_to_file / Path(file))
+ path: Path = basic_input_path / path_to_file / Path(file)
+ doc = docx.Document(str(path.absolute()))
  logger.info("Start scraping conditions for %s in %s", edifact_format, file)
  if not doc:
  logger.error("Could not open file %s as docx", Path(file))

diff --git a/src/kohlrahbi/conditions/command.py b/src/kohlrahbi/conditions/command.py
@@ -28,7 +28,7 @@
 from kohlrahbi.conditions import scrape_conditions
 
 
-def check_python_version():
+def check_python_version() -> None:
  """
  Check if the Python interpreter is greater or equal to 3.11
  """
@@ -40,7 +40,7 @@ def check_python_version():
 
 
 # pylint: disable=unused-argument
-def validate_path(ctx, param, value):
+def validate_path(ctx, param, value) -> Path: # type:ignore[no-untyped-def]
  """
  Ensure the path exists or offer to create it.
  """
@@ -91,7 +91,7 @@ def validate_path(ctx, param, value):
 )
 def conditions(
  edi_energy_mirror_path: Path, output_path: Path, format_version: EdifactFormatVersion | str, assume_yes: bool
-):
+) -> None:
  """
  Scrape AHB documents for conditions.
  """

diff --git a/src/kohlrahbi/docxfilefinder.py b/src/kohlrahbi/docxfilefinder.py
@@ -138,7 +138,7 @@ def filter_docx_files_for_edifact_format(self, edifact_format: EdifactFormat) ->
 
  self.paths_to_docx_files = [path for path in self.paths_to_docx_files if str(edifact_format) in path.name]
 
- def remove_temporary_files(self):
+ def remove_temporary_files(self) -> None:
  """
  This method removes all temporary files from paths_to_docx_files.
  Temporary files lead to the exception `BadZipFile: File is not a zip file`.

diff --git a/src/kohlrahbi/docxtablecells/__init__.py b/src/kohlrahbi/docxtablecells/__init__.py
@@ -5,3 +5,5 @@
 from .bedinungscell import BedingungCell
 from .bodycell import BodyCell
 from .edifactstrukturcell import EdifactStrukturCell
+
+__all__ = ["BedingungCell", "BodyCell", "EdifactStrukturCell"]
diff --git a/src/kohlrahbi/docxtablecells/bedinungscell.py b/src/kohlrahbi/docxtablecells/bedinungscell.py
@@ -5,7 +5,7 @@
 import re
 
 import pandas as pd
-from docx.table import _Cell # type:ignore[import]
+from docx.table import _Cell
 from pydantic import BaseModel, ConfigDict
 
 

diff --git a/src/kohlrahbi/docxtablecells/bodycell.py b/src/kohlrahbi/docxtablecells/bodycell.py
@@ -3,7 +3,8 @@
 """
 
 import pandas as pd
-from docx.table import _Cell # type:ignore[import]
+from docx.table import _Cell
+from docx.text.paragraph import Paragraph
 from maus.reader.flat_ahb_reader import FlatAhbCsvReader
 from pydantic import BaseModel, ConfigDict
 
@@ -103,9 +104,9 @@ def parse(self, ahb_row_dataframe: pd.DataFrame) -> pd.DataFrame:
 
  return ahb_row_dataframe
 
- def has_paragraph_tabstops(self, paragraph) -> bool:
+ def has_paragraph_tabstops(self, paragraph: Paragraph) -> bool:
  """
  Checks if the given paragraph contains tabstops
  """
  tab_stops = list(paragraph.paragraph_format.tab_stops)
- return len(tab_stops) > 0
+ return any(tab_stops)
diff --git a/src/kohlrahbi/docxtablecells/edifactstrukturcell.py b/src/kohlrahbi/docxtablecells/edifactstrukturcell.py
@@ -5,7 +5,7 @@
 import re
 
 import pandas as pd
-from docx.table import _Cell # type:ignore[import]
+from docx.table import _Cell
 from pydantic import BaseModel, ConfigDict
 
 _segment_group_pattern = re.compile(r"^SG\d+$")

diff --git a/src/kohlrahbi/enums/__init__.py b/src/kohlrahbi/enums/__init__.py
@@ -4,3 +4,5 @@
 
 from .row_type import RowType
 from .row_type_color import RowTypeColor
+
+__all__ = ["RowType", "RowTypeColor"]