-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12 from jonfairbanks/develop
GitHub RAG Support
- Loading branch information
Showing
17 changed files
with
525 additions
and
307 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
*.pyc | ||
data/* | ||
data/* | ||
*.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
FROM python:3.9-slim as base | ||
FROM python:3.10-slim as base | ||
|
||
# Setup env | ||
ENV LANG C.UTF-8 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,4 +15,4 @@ nbconvert = "*" | |
[dev-packages] | ||
|
||
[requires] | ||
python_version = "3.9" | ||
python_version = "3.10" |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,136 @@ | ||
import os | ||
import shutil | ||
|
||
import streamlit as st | ||
|
||
import utils.helpers as func | ||
import utils.ollama as ollama | ||
import utils.llama_index as llama_index | ||
import utils.logs as logs | ||
|
||
|
||
def github_repo(): | ||
st.header("Import files from a GitHub repo") | ||
st.caption("Convert a GitHub repo to embeddings for utilization during chat") | ||
|
||
github_container = st.container(border=True) | ||
with github_container: | ||
# st.header("Import files from a GitHub repo") | ||
# st.caption("Convert a GitHub repo to embeddings for utilization during chat") | ||
if st.session_state["selected_model"] is not None: | ||
st.text_input( | ||
"GitHub repo", | ||
"Select a GitHub.com repo", | ||
placeholder="jonfairbanks/local-rag", | ||
key="github_repo", | ||
value=st.session_state.github_repo, | ||
on_change=func.process_github_repo, | ||
args=(st.session_state.github_repo,), | ||
) | ||
|
||
repo_processed = None | ||
repo_processed = st.button( | ||
"Process Repo", | ||
on_click=func.clone_github_repo, | ||
args=(st.session_state["github_repo"],), | ||
) # TODO: Should this be with st.button? | ||
|
||
with st.spinner("Processing..."): | ||
if repo_processed is True: | ||
error = None | ||
|
||
###################################### | ||
# Create Llama-Index service-context # | ||
# to use local LLMs and embeddings # | ||
###################################### | ||
|
||
try: | ||
llm = ollama.create_ollama_llm( | ||
st.session_state["selected_model"], | ||
st.session_state["ollama_endpoint"], | ||
) | ||
st.caption("✔️ LLM Initialized") | ||
|
||
# resp = llm.complete("Hello!") | ||
# print(resp) | ||
|
||
# Determine embedding model to use | ||
|
||
embedding_model = st.session_state["embedding_model"] | ||
hf_embedding_model = None | ||
|
||
if embedding_model == None: | ||
logs.log.info("No embedding model set; using defaults...") | ||
hf_embedding_model = "BAAI/bge-large-en-v1.5" | ||
|
||
if embedding_model == "Default (bge-large-en-v1.5)": | ||
logs.log.info("Using default embedding model...") | ||
hf_embedding_model = "BAAI/bge-large-en-v1.5" | ||
|
||
if embedding_model == "Large (Salesforce/SFR-Embedding-Mistral)": | ||
logs.log.info( | ||
"Using the Salesforce embedding model; RIP yer VRAM..." | ||
) | ||
hf_embedding_model = "Salesforce/SFR-Embedding-Mistral" | ||
|
||
if embedding_model == "Other": | ||
logs.log.info("Using a user-provided embedding model...") | ||
hf_embedding_model = st.session_state["other_embedding_model"] | ||
|
||
service_context = llama_index.create_service_context( | ||
llm, | ||
st.session_state["system_prompt"], | ||
hf_embedding_model, | ||
st.session_state["chunk_size"], | ||
) | ||
st.caption("✔️ Context Created") | ||
except Exception as err: | ||
logs.log.error(f"Setting up Service Context failed: {err}") | ||
error = err | ||
|
||
####################################### | ||
# Load files from the data/ directory # | ||
####################################### | ||
|
||
try: | ||
save_dir = os.getcwd() + "/data" | ||
documents = llama_index.load_documents(save_dir) | ||
st.session_state["documents"] = documents | ||
st.caption("✔️ Processed File Data") | ||
except Exception as err: | ||
logs.log.error(f"Document Load Error: {err}") | ||
error = err | ||
|
||
########################################### | ||
# Create an index from ingested documents # | ||
########################################### | ||
|
||
try: | ||
llama_index.create_query_engine(documents, service_context) | ||
st.caption("✔️ Created File Index") | ||
except Exception as err: | ||
logs.log.error(f"Index Creation Error: {err}") | ||
error = err | ||
|
||
##################### | ||
# Remove data files # | ||
##################### | ||
|
||
try: | ||
save_dir = os.getcwd() + "/data" | ||
shutil.rmtree(save_dir) | ||
st.caption("✔️ Removed Temp Files") | ||
except Exception as err: | ||
logs.log.error(f"Failed to delete data files: {err}") | ||
error = err | ||
|
||
##################### | ||
# Show Final Status # | ||
##################### | ||
|
||
if error is not None: | ||
st.exception(error) | ||
else: | ||
st.write("Your files are ready. Let's chat! 😎") | ||
|
||
else: | ||
st.text_input( | ||
"Select a GitHub.com repo", | ||
placeholder="jonfairbanks/local-rag", | ||
disabled=True, | ||
) | ||
st.button( | ||
"Process Repo", | ||
on_click=func.process_github_repo, | ||
args=(st.session_state.github_repo,), | ||
disabled=True, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.