Skip to content

Commit

Permalink
chore: Test kTGHZ2013 field
Browse files Browse the repository at this point in the history
  • Loading branch information
tony committed Jun 22, 2024
1 parent ce630ae commit 62c22a4
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 0 deletions.
71 changes: 71 additions & 0 deletions src/unihan_etl/validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Experimental pydantic models for unihan data."""
import typing as t

import pydantic

from unihan_etl.expansion import expand_kTGHZ2013


class UCNBaseModel(pydantic.BaseModel):
"""Core model for UCN data."""

ucn: str


class kTGHZ2013Location(pydantic.BaseModel):
"""Core model for location."""

page: int
position: int
entry_type: int = pydantic.Field(
description=(
"0 for a main entry and greater than 0 for a parenthesized or bracketed "
+ "variant of the main entry"
)
)


class kTGHZ2013Reading(pydantic.BaseModel):
"""kTGHZ2013 model."""

reading: str
locations: t.List[kTGHZ2013Location]


class kTGHZ2013(UCNBaseModel):
"""kTGHZ2013 model."""

readings: t.List[kTGHZ2013Reading]

model_config = pydantic.ConfigDict(
validate_assignment=True,
arbitrary_types_allowed=True,
)

@classmethod
def from_string(cls, value: str) -> "kTGHZ2013":
"""Accept csv valdation from UNIHAN."""
if isinstance(value, str):
ucn, field, val = value.split("\t")
outs = expand_kTGHZ2013(val.split(" "))

return cls(
ucn=ucn,
readings=[
kTGHZ2013Reading(
reading=out["reading"],
locations=[
kTGHZ2013Location(
page=loc["page"],
position=loc["position"],
entry_type=loc["entry_type"],
)
for loc in out["locations"]
],
)
for out in outs
],
)
elif isinstance(value, dict):
return pydantic.parse_obj_as(cls, value)
raise pydantic.ValidationError("Invalid input for kTGHZ2013 model.") # noqa: TRY003
27 changes: 27 additions & 0 deletions tests/test_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Test expansion of multi-value fields in UNIHAN."""
import typing as t

from unihan_etl import validator

if t.TYPE_CHECKING:
pass


def test_kTGHZ2013() -> None:
"""Example of kTGHZ2013 being parsed via pydantic."""
model = validator.kTGHZ2013.from_string("U+3447 kTGHZ2013 482.140:zhòu")
assert model.ucn == "U+3447"

model = validator.kTGHZ2013.from_string(
"U+4E07 kTGHZ2013 256.090:mò 379.160:wàn"
)
assert model.ucn == "U+4E07"
assert model.readings[0].reading == "mò"
assert model.readings[1].reading == "wàn"
assert model.readings[1].locations[0] == validator.kTGHZ2013Location(
page=379,
position=16,
entry_type=0,
)

print(f"\n{model}\n")

0 comments on commit 62c22a4

Please sign in to comment.