Merge pull request #7 from jonfairbanks/develop

Bug Fix
jonfairbanks · Feb 23, 2024 · fa1572a · fa1572a
2 parents 63d62ac + cf86a87
commit fa1572a
Show file tree

Hide file tree

Showing 5 changed files with 9 additions and 16 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # 📚 Local RAG
 
-![local-rag-logo](logo.png)
+![local-rag-demo](demo.gif)
 
 ![GitHub commit activity](https://img.shields.io/github/commit-activity/t/jonfairbanks/local-rag)
 ![GitHub last commit](https://img.shields.io/github/last-commit/jonfairbanks/local-rag)
@@ -46,10 +46,9 @@ Docker:
     - [ ] Chat Mode
     - [x] top_k
     - [x] chunk_size
-    - [x] chunk_overlap
-- [ ] Allow Switching of Embedding Model & Settings
+    - [ ] chunk_overlap
+- [x] Allow Switching of Embedding Model & Settings
 - [x] Delete Files after Index Created/Failed
-- [ ] Ability to Remove Files from Index
 - [ ] Function to Handle GitHub Repo Ingestion
 - [ ] Support for JSON Files
 - [x] Show Loaders in UI (File Uploads, Conversions, ...)
@@ -64,12 +63,13 @@ Docker:
 - [ ] Refreshing the page loses all state (expected Streamlit behavior; need to implement local-storage)
 - [x] Files can be uploaded before Ollama config is set, leading to embedding errors
 - [ ] Assuming Ollama is hosted on localhost, Models are automatically loaded and selected, but the dropdown does not render the selected option
-- [ ] Upon sending a Chat message, the File Processing expander appears to re-run itself
+- [ ] Upon sending a Chat message, the File Processing expander appears to re-run itself (seems something is not using state correctly)
 
 ### Resources
 - [Ollama](https://ollama.com/)
 - [Llama-Index](https://docs.llamaindex.ai/en/stable/index.html)
 - [Streamlit](https://docs.streamlit.io/library/api-reference)
 - [Ollama w/ Llama-Index](https://docs.llamaindex.ai/en/stable/examples/llm/ollama.html)
 - [RAG w/ Llama-Index](https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/)
-- [Llama-Index Chat Engine](https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_context.html)
+- [Llama-Index Chat Engine](https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_context.html)
+- [PoC Notebook](https://github.com/fairbanksio/notebooks/blob/main/llm/local/github-rag-prep.ipynb)
diff --git a/components/tabs/file_upload.py b/components/tabs/file_upload.py
@@ -73,7 +73,7 @@ def file_upload():
                     print("Using default embedding model...")
                     hf_embedding_model = "BAAI/bge-large-en-v1.5"
 
-                if embedding_model == "Best (Salesforce/SFR-Embedding-Mistral)":
+                if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)":
                     print("Using the Salesforce embedding model; RIP yer VRAM...")
                     hf_embedding_model = "Salesforce/SFR-Embedding-Mistral"
 

diff --git a/components/tabs/settings.py b/components/tabs/settings.py
@@ -57,7 +57,7 @@ def settings():
             "Model",
             [
                 "Default (bge-large-en-v1.5)",
-                "Best (Salesforce/SFR-Embedding-Mistral)",
+                "Large (Salesforce/SFR-Embedding-Mistral)",
                 "Other",
             ],
             key="embedding_model",
@@ -79,13 +79,6 @@ def settings():
                 placeholder="1024",
                 value=st.session_state["chunk_size"],
             )
-            st.text_input(
-                "Chunk Overlap",
-                help="`chunk_overlap` sets the overlap between consecutive document chunks. It prevents loss of information at chunk boundaries. For instance, a value of 20 means a 20-token overlap. Adjusting this parameter affects the precision and generality of the calculated embeddings.",
-                key="chunk_overlap",
-                placeholder="20",
-                value=st.session_state["chunk_overlap"],
-            )
 
     st.subheader("Export Data")
     export_data_settings = st.container(border=True)

diff --git a/demo.gif b/demo.gif
diff --git a/utils/llama_index.py b/utils/llama_index.py
@@ -43,7 +43,7 @@ def create_service_context(
         llm=llm,
         system_prompt=system_prompt,
         embed_model=formatted_embed_model,
-        chunk_size=chunk_size,
+        chunk_size=int(chunk_size),
     )
 
     # Note: this may be redundant since service_context is returned