Skip to content

Commit

Permalink
Extract 5 digit Segment ID from (some) ≥FV2410 AHBs (#309)
Browse files Browse the repository at this point in the history
  • Loading branch information
hf-kklein committed May 20, 2024
1 parent e7b7f00 commit be97b58
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/kohlrahbi/unfoldedahb/unfoldedahbline.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class UnfoldedAhbLine(BaseModel):
segment_name: str # Ansprechpartner
segment_gruppe: str | None # SG3
segment: str | None # CTA
segment_id: str | None = None # 00009
datenelement: str | None # 3055
code: str | None # IC
qualifier: str | None # Name vom Ansprechpartner
Expand Down
26 changes: 25 additions & 1 deletion src/kohlrahbi/unfoldedahb/unfoldedahbtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import copy
import json
import re
from functools import lru_cache
from pathlib import Path
from uuid import uuid4

Expand All @@ -26,6 +27,7 @@
from kohlrahbi.unfoldedahb.unfoldedahbtablemetadata import UnfoldedAhbTableMetaData

_segment_group_pattern = re.compile(r"^SG\d+$")
_segment_id_pattern = re.compile(r"^\d{5}$")


def _lines_are_equal_when_ignoring_guid(line1: AhbLine, line2: AhbLine) -> bool:
Expand All @@ -39,6 +41,24 @@ def _lines_are_equal_when_ignoring_guid(line1: AhbLine, line2: AhbLine) -> bool:
return line1_copy == line2_copy


@lru_cache
def _split_data_element_and_segment_id(value: str | None) -> tuple[str | None, str | None]:
"""
returns the data element id and segment id
"""
if value is None:
return None, None
datenelement_id: str | None
segment_id: str | None
if _segment_id_pattern.match(value):
datenelement_id = None
segment_id = value
else:
datenelement_id = value
segment_id = None
return datenelement_id, segment_id


def _keep_guids_of_unchanged_lines_stable(
updated_ahb: FlatAnwendungshandbuch, existing_ahb: FlatAnwendungshandbuch
) -> None:
Expand Down Expand Up @@ -159,7 +179,8 @@ def from_ahb_table(cls, ahb_table: AhbTable, pruefi: str) -> "UnfoldedAhb":
segment_name=current_section_name,
segment_gruppe=row["Segment Gruppe"] or None,
segment=row["Segment"] or None,
datenelement=row["Datenelement"] or None,
datenelement=_split_data_element_and_segment_id(row["Datenelement"])[0],
segment_id=_split_data_element_and_segment_id(row["Datenelement"])[1],
code=value_pool_entry,
qualifier="",
beschreibung=description,
Expand Down Expand Up @@ -284,6 +305,8 @@ def _is_just_segment(ahb_row: pd.Series) -> bool: # type:ignore[type-arg]
and not ahb_row["Datenelement"]
):
return True
if ahb_row["Datenelement"] is not None and _segment_id_pattern.match(ahb_row["Datenelement"]):
return True
return False

@staticmethod
Expand Down Expand Up @@ -325,6 +348,7 @@ def convert_to_flat_ahb(self) -> FlatAnwendungshandbuch:
segment_group_key=unfolded_ahb_line.segment_gruppe,
segment_code=unfolded_ahb_line.segment,
data_element=unfolded_ahb_line.datenelement,
segment_id=unfolded_ahb_line.segment_id,
value_pool_entry=unfolded_ahb_line.code,
name=unfolded_ahb_line.beschreibung or unfolded_ahb_line.qualifier,
ahb_expression=unfolded_ahb_line.bedingung_ausdruck,
Expand Down
Binary file not shown.
51 changes: 50 additions & 1 deletion unittests/test_ahb_sub_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
from pathlib import Path

import docx
import pytest
from docx.table import Table

from kohlrahbi.ahbtable.ahbsubtable import AhbSubTable
from kohlrahbi.read_functions import get_all_paragraphs_and_tables
from kohlrahbi.read_functions import get_ahb_table, get_all_paragraphs_and_tables
from kohlrahbi.unfoldedahb import UnfoldedAhb


class TestAhbSubTable:
Expand Down Expand Up @@ -41,3 +43,50 @@ def test_from_table(self) -> None:
assert isinstance(ahb_sub_table, AhbSubTable)
else:
raise TypeError("You did not pass a docx table instance.")

@pytest.mark.parametrize(
"docx_path, segment_id, segment_code",
[
pytest.param(
Path(__file__).parent
/ Path(
# pylint: disable=line-too-long
"test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx"
),
"00003",
"UNH",
),
pytest.param(
Path(__file__).parent
/ Path(
# pylint: disable=line-too-long
"test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx"
),
"00004",
"BGM",
),
pytest.param(
Path(__file__).parent
/ Path(
# pylint: disable=line-too-long
"test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx"
),
"00540",
"UNT",
),
],
)
def test_segment_id_parsing(self, docx_path: Path, segment_id: str, segment_code: str) -> None:
"""
https://github.com/Hochfrequenz/kohlrahbi/issues/304
"""
assert docx_path.exists()
doc = docx.Document(str(docx_path)) # Creating word reader object.
ahb_table = get_ahb_table(document=doc, pruefi="55109")
assert ahb_table is not None
unfolded_ahb = UnfoldedAhb.from_ahb_table(ahb_table=ahb_table, pruefi="55109")
assert unfolded_ahb is not None
flat_ahb = unfolded_ahb.convert_to_flat_ahb()
assert flat_ahb is not None
assert any(l for l in flat_ahb.lines if l.segment_id is not None)
assert any(l for l in flat_ahb.lines if l.segment_id == segment_id and l.segment_code == segment_code)

0 comments on commit be97b58

Please sign in to comment.