Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove double entries in flatahb output #354

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions src/kohlrahbi/unfoldedahb/unfoldedahbtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pathlib import Path
from uuid import uuid4

import attrs
import pandas as pd
from maus.edifact import get_format_of_pruefidentifikator
from maus.models.anwendungshandbuch import (
Expand Down Expand Up @@ -356,6 +357,7 @@ def convert_to_flat_ahb(self) -> FlatAnwendungshandbuch:
index=unfolded_ahb_line.index,
)
)
lines = _remove_irrelevant_lines(lines)
try:
return FlatAnwendungshandbuch(meta=meta, lines=lines)
except ValueError:
Expand Down Expand Up @@ -480,3 +482,38 @@ def dump_xlsx(self, path_to_output_directory: Path) -> None:
self.meta_data.pruefidentifikator,
xlsx_output_directory_path / f"{self.meta_data.pruefidentifikator}.json",
)


def _remove_irrelevant_lines(lines: list[AhbLine]) -> list[AhbLine]:
"""
Removes lines that are irrelevant for the AHB.
"""
reduced_lines: list[AhbLine] = []
for line, next_line in zip(lines, lines[1:] + [None]):
line_dict = attrs.asdict(line)
next_line_dict: dict[str, str | None]
if next_line:
next_line_dict = attrs.asdict(next_line)
is_next_ahb_line_empty = next_line is None or next_line_dict["ahb_expression"] is None
is_ahb_line_only_segment_group_header = (
line_dict["segment_group_key"] is not None
and line_dict["segment_code"] is None
and line_dict["ahb_expression"] is None
)
is_empty_ahb_line = (
line_dict["segment_code"] is not None
and line_dict["section_name"] is not None
and line_dict["ahb_expression"] is None
) or (is_ahb_line_only_segment_group_header and is_next_ahb_line_empty)

is_double_line = (
line_dict["data_element"] is None
and line_dict["name"] == ""
and line_dict["segment_code"] is None
and line_dict["value_pool_entry"] is None
and line_dict["segment_group_key"] is not None
and line_dict["section_name"] is not None
)
if not is_double_line and not is_empty_ahb_line:
reduced_lines.append(line)
return reduced_lines