diff --git a/constants.py b/constants.py index 629230e2..380c650a 100644 --- a/constants.py +++ b/constants.py @@ -6,6 +6,7 @@ # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader from langchain.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader +from langchain.document_loaders import UnstructuredHTMLLoader # load_dotenv() @@ -43,6 +44,7 @@ # https://python.langchain.com/en/latest/_modules/langchain/document_loaders/excel.html#UnstructuredExcelLoader DOCUMENT_MAP = { + ".html": UnstructuredHTMLLoader, ".txt": TextLoader, ".md": UnstructuredMarkdownLoader, ".py": TextLoader,