diff --git a/autollm/__init__.py b/autollm/__init__.py index e6fd8a9f..7c32b9d8 100644 --- a/autollm/__init__.py +++ b/autollm/__init__.py @@ -4,7 +4,7 @@ and vector databases, along with various utility functions. """ -__version__ = '0.1.2' +__version__ = '0.1.3' __author__ = 'safevideo' __license__ = 'AGPL-3.0' diff --git a/autollm/utils/document_reading.py b/autollm/utils/document_reading.py index 7dee190d..f6685cb3 100644 --- a/autollm/utils/document_reading.py +++ b/autollm/utils/document_reading.py @@ -22,6 +22,7 @@ def read_files_as_documents( filename_as_id: bool = True, recursive: bool = True, required_exts: Optional[List[str]] = None, + show_progress: bool = True, **kwargs) -> Sequence[Document]: """ Process markdown files to extract documents using SimpleDirectoryReader. @@ -58,7 +59,7 @@ def read_files_as_documents( f"Reading files {input_files}..") # Read and process the documents - documents = reader.load_data() + documents = reader.load_data(show_progress=show_progress) logger.info(f"Found {len(documents)} 'document(s)'.") return documents diff --git a/autollm/utils/pdf_reader.py b/autollm/utils/pdf_reader.py index b8c84b53..4100c25e 100644 --- a/autollm/utils/pdf_reader.py +++ b/autollm/utils/pdf_reader.py @@ -20,7 +20,6 @@ def load_data(self, file_path: str, extra_info: dict = None) -> List[Document]: # Convert the PosixPath object to a string before passing it to PDFMinerLoader loader = PDFMinerLoader(str(file_path), extract_images=self.extract_images) - logger.info(f"Parsing pages of the PDF file: {file_path}..") langchain_documents = loader.load() # This returns a list of langchain Document objects # Convert langchain documents into llama-index documents diff --git a/requirements.txt b/requirements.txt index f19c39b7..c57b1ddf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -llama-index==0.9.4 +llama-index==0.9.10 litellm==1.1.1 uvicorn fastapi