Extract entities from PDF and text documents and transform them to schema.org resources in RDF.
pip install doc2sdo
doc2sdo path/to/your.pdf >output.ttl
from pathlib import Path
import sys
from doc2sdo import doc2sdo
for thing in doc2sdo(Path("/path/to/your.pdf")):
thing.resource.graph.serialize(sys.stdout.buffer)
script/bootstrap
script/test