Skip to content

Commit

Permalink
Merge pull request #369 from ChrisAylen/issue-368-Enhancement-Enable-…
Browse files Browse the repository at this point in the history
…folder-recursion-in-ingest.py

added folder recursion to ingest.py
  • Loading branch information
PromtEngineer committed Aug 29, 2023
2 parents 9743221 + 8795021 commit afcf323
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ def load_document_batch(filepaths):


def load_documents(source_dir: str) -> list[Document]:
# Loads all documents from the source documents directory
all_files = os.listdir(source_dir)
# Loads all documents from the source documents directory, including nested folders
paths = []
for file_path in all_files:
file_extension = os.path.splitext(file_path)[1]
source_file_path = os.path.join(source_dir, file_path)
if file_extension in DOCUMENT_MAP.keys():
paths.append(source_file_path)
for root, _, files in os.walk(source_dir):
for file_name in files:
file_extension = os.path.splitext(file_name)[1]
source_file_path = os.path.join(root, file_name)
if file_extension in DOCUMENT_MAP.keys():
paths.append(source_file_path)

# Have at least one worker and at most INGEST_THREADS workers
n_workers = min(INGEST_THREADS, max(len(paths), 1))
Expand Down

0 comments on commit afcf323

Please sign in to comment.