Merge pull request #41 from jonfairbanks/develop

Migrate to Llama-Index settings
jonfairbanks · Mar 22, 2024 · 5bbb2a2 · 5bbb2a2
2 parents 8ddfe75 + f06ee40
commit 5bbb2a2
Show file tree

Hide file tree

Showing 13 changed files with 167 additions and 302 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,41 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, attach screenshots to help explain your problem. **A screenshot of your Settings > Advanced > Application State section can greatly aid in troubleshooting.**
+
+**Logs**
+If applicable, attach log files to help explain your problem. **Reproducing your issue and sharing a copy of your `local-rag.log` can greatly aid in troubleshooting.**
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. iOS]
+ - Browser [e.g. chrome, safari]
+ - Version [e.g. 22]
+
+**Smartphone (please complete the following information):**
+ - Device: [e.g. iPhone6]
+ - OS: [e.g. iOS8.1]
+ - Browser [e.g. stock browser, safari]
+ - Version [e.g. 22]
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/components/page_state.py b/components/page_state.py
@@ -76,9 +76,6 @@ def set_initial_state():
     if "query_engine" not in st.session_state:
         st.session_state["query_engine"] = None
 
-    if "service_context" not in st.session_state:
-        st.session_state["service_context"] = None
-
     if "chat_mode" not in st.session_state:
         st.session_state["chat_mode"] = "compact"
 
@@ -107,4 +104,4 @@ def set_initial_state():
         st.session_state["chunk_size"] = 1024
 
     if "chunk_overlap" not in st.session_state:
-        st.session_state["chunk_overlap"] = 20
+        st.session_state["chunk_overlap"] = 200
diff --git a/components/tabs/github_repo.py b/components/tabs/github_repo.py
@@ -1,11 +1,7 @@
-import os
-import shutil
-
 import streamlit as st
 
 import utils.helpers as func
-import utils.ollama as ollama
-import utils.llama_index as llama_index
+import utils.rag_pipeline as rag
 import utils.logs as logs
 
 
@@ -29,116 +25,13 @@ def github_repo():
 
         with st.spinner("Processing..."):
             if repo_processed is True:
-                error = None
-
-                ######################################
-                # Create Llama-Index service-context #
-                # to use local LLMs and embeddings   #
-                ######################################
-
-                try:
-                    llm = ollama.create_ollama_llm(
-                        st.session_state["selected_model"],
-                        st.session_state["ollama_endpoint"],
-                    )
-                    st.session_state["llm"] = llm
-                    st.caption("✔️ LLM Initialized")
-
-                    # resp = llm.complete("Hello!")
-                    # print(resp)
-                except Exception as err:
-                    logs.log.error(f"Failed to setup LLM: {err}")
-                    error = err
-                    st.exception(error)
-                    st.stop()
-
-                ####################################
-                # Determine embedding model to use #
-                ####################################
-
-                embedding_model = st.session_state["embedding_model"]
-                hf_embedding_model = None
-
-                if embedding_model == None:
-                    hf_embedding_model = "BAAI/bge-large-en-v1.5"
-
-                if embedding_model == "Default (bge-large-en-v1.5)":
-                    hf_embedding_model = "BAAI/bge-large-en-v1.5"
-
-                if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)":
-                    hf_embedding_model = "Salesforce/SFR-Embedding-Mistral"
-
-                if embedding_model == "Other":
-                    hf_embedding_model = st.session_state["other_embedding_model"]
-
-                try:
-                    llama_index.create_service_context(
-                        st.session_state["llm"],
-                        st.session_state["system_prompt"],
-                        hf_embedding_model,
-                        st.session_state["chunk_size"],
-                        # st.session_state["chunk_overlap"],
-                    )
-                    st.caption("✔️ Context Created")
-                except Exception as err:
-                    logs.log.error(f"Setting up Service Context failed: {err}")
-                    error = err
-                    st.exception(error)
-                    st.stop()
-
-                #######################################
-                # Load files from the data/ directory #
-                #######################################
-
-                try:
-                    save_dir = os.getcwd() + "/data"
-                    documents = llama_index.load_documents(save_dir)
-                    st.session_state["documents"] = documents
-                    st.caption("✔️ Processed File Data")
-                except Exception as err:
-                    logs.log.error(f"Document Load Error: {err}")
-                    error = err
-                    st.exception(error)
-                    st.stop()
-
-                ###########################################
-                # Create an index from ingested documents #
-                ###########################################
-
-                try:
-                    llama_index.create_query_engine(
-                        st.session_state["documents"],
-                        st.session_state["service_context"],
-                    )
-                    st.caption("✔️ Created File Index")
-                except Exception as err:
-                    logs.log.error(f"Index Creation Error: {err}")
-                    error = err
-                    st.exception(error)
-                    st.stop()
-
-                #####################
-                # Remove data files #
-                #####################
-
-                try:
-                    save_dir = os.getcwd() + "/data"
-                    shutil.rmtree(save_dir)
-                    st.caption("✔️ Removed Temp Files")
-                except Exception as err:
-                    logs.log.error(f"Failed to delete data files: {err}")
-                    error = err
-                    st.exception(error)
-                    st.stop()
-
-                #####################
-                # Show Final Status #
-                #####################
-
+                # Initiate the RAG pipeline, providing documents to be saved on disk if necessary
+                error = rag.rag_pipeline()
+
                 if error is not None:
                     st.exception(error)
                 else:
-                    st.write("Your files are ready. Let's chat! 😎")
+                    st.write("Your files are ready. Let's chat! 😎") # TODO: This should be a button.
 
     else:
         st.text_input(

diff --git a/components/tabs/local_files.py b/components/tabs/local_files.py
@@ -53,4 +53,4 @@ def local_files():
             if error is not None:
                 st.exception(error)
             else:
-                st.write("Your files are ready. Let's chat! 😎")
+                st.write("Your files are ready. Let's chat! 😎") # TODO: This should be a button.
diff --git a/components/tabs/settings.py b/components/tabs/settings.py
@@ -94,7 +94,7 @@ def settings():
                 "Chunk Overlap",
                 help="The amount of overlap between two consecutive chunks. A higher overlap value helps maintain continuity and context across chunks.",
                 key="chunk_overlap",
-                placeholder="20",
+                placeholder="200",
                 value=st.session_state["chunk_overlap"],
             )