Skip to content

Commit

Permalink
Rewriting local example in llm-app to use xpack (#5541)
Browse files Browse the repository at this point in the history
GitOrigin-RevId: 9b44e727a2d28a56ee49be2a0cda9862af7cb66e
  • Loading branch information
szymondudycz authored and Manul from Pathway committed Feb 1, 2024
1 parent 0dc944d commit 589cc3e
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 22 deletions.
43 changes: 29 additions & 14 deletions examples/pipelines/local/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
for documents in the corpus. A prompt is build from the relevant documentations pages
and run through a local LLM downloaded form the HuggingFace repository.
Because of restrictions of model you need to be careful about the length of prompt with
the embedded documents. In this example this is solved with cropping the prompt to a set
length - the query is in the beginning of the prompt, so it won't be removed, but some
parts of documents to be omitted from the query.
Depending on the length of documents and the model you use this may not be necessary or
you can use some more refined method of shortening your prompts.
Usage:
In the root of this repository run:
`poetry run ./run_examples.py local`
Expand All @@ -28,8 +35,8 @@

import pathway as pw
from pathway.stdlib.ml.index import KNNIndex

from llm_app.model_wrappers import HFTextGenerationTask, SentenceTransformerTask
from pathway.xpacks.llm.embedders import SentenceTransformerEmbedder
from pathway.xpacks.llm.llms import HFPipelineChat, prompt_chat_single_qa


class DocumentInputSchema(pw.Schema):
Expand All @@ -50,13 +57,12 @@ def run(
port: int = 8080,
model_locator: str = os.environ.get("MODEL", "gpt2"),
embedder_locator: str = os.environ.get("EMBEDDER", "intfloat/e5-large-v2"),
embedding_dimension: int = 1024,
max_tokens: int = 60,
device: str = "cpu",
**kwargs,
):
embedder = SentenceTransformerTask(model=embedder_locator, device=device)
embedding_dimension = len(embedder(""))
embedder = SentenceTransformerEmbedder(model=embedder_locator, device=device)
embedding_dimension = len(embedder.__wrapped__(""))

documents = pw.io.jsonlines.read(
data_dir,
Expand All @@ -65,9 +71,7 @@ def run(
autocommit_duration_ms=50,
)

enriched_documents = documents + documents.select(
vector=embedder.apply(text=pw.this.doc)
)
enriched_documents = documents + documents.select(vector=embedder(text=pw.this.doc))

index = KNNIndex(
enriched_documents.vector, enriched_documents, n_dimensions=embedding_dimension
Expand All @@ -82,7 +86,7 @@ def run(
)

query += query.select(
vector=embedder.apply(text=pw.this.query),
vector=embedder(text=pw.this.query),
)

query_context = query + index.get_nearest_items(
Expand All @@ -92,20 +96,31 @@ def run(
@pw.udf
def build_prompt(documents, query):
docs_str = "\n".join(documents)
prompt = f"Given the following documents : \n {docs_str} \nanswer this query: {query}"
prompt = f"You are given a query: {query}\n Answer this query based on the following documents: \n {docs_str}"
return prompt

prompt = query_context.select(
prompt=build_prompt(pw.this.documents_list, pw.this.query)
)

model = HFTextGenerationTask(model=model_locator, device=device)
model = HFPipelineChat(
model=model_locator,
device=device,
return_full_text=False,
max_new_tokens=max_tokens,
)

# Cropping the prompt so that it is short enough for the model. Depending on input documents
# and chosen model this may not be necessary.
prompt = prompt.select(
prompt=model.crop_to_max_length(
input_string=pw.this.prompt, max_prompt_length=500
)
)

responses = prompt.select(
query_id=pw.this.id,
result=model.apply(
pw.this.prompt, return_full_text=False, max_new_tokens=max_tokens
),
result=model(prompt_chat_single_qa(pw.this.prompt)),
)

response_writer(responses)
Expand Down
29 changes: 22 additions & 7 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ classifiers = [

[tool.poetry.dependencies]
python = ">=3.10,<3.13"
pathway = "=0.7.10"
pathway = "=0.8.0"
openai = ">=1.2.4"
requests = "^2.31.0"
diskcache = "^5.6.1"
Expand Down

0 comments on commit 589cc3e

Please sign in to comment.