Rewriting local example in llm-app to use xpack (#5541)

GitOrigin-RevId: 9b44e727a2d28a56ee49be2a0cda9862af7cb66e
pathwaycom · Feb 1, 2024 · 589cc3e · 589cc3e
1 parent 0dc944d
commit 589cc3e
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 22 deletions.
diff --git a/examples/pipelines/local/app.py b/examples/pipelines/local/app.py
@@ -13,6 +13,13 @@
 for documents in the corpus. A prompt is build from the relevant documentations pages
 and run through a local LLM downloaded form the HuggingFace repository.
 
+Because of restrictions of model you need to be careful about the length of prompt with
+the embedded documents. In this example this is solved with cropping the prompt to a set
+length - the query is in the beginning of the prompt, so it won't be removed, but some
+parts of documents to be omitted from the query.
+Depending on the length of documents and the model you use this may not be necessary or
+you can use some more refined method of shortening your prompts.
+
 Usage:
 In the root of this repository run:
 `poetry run ./run_examples.py local`
@@ -28,8 +35,8 @@
 
 import pathway as pw
 from pathway.stdlib.ml.index import KNNIndex
-
-from llm_app.model_wrappers import HFTextGenerationTask, SentenceTransformerTask
+from pathway.xpacks.llm.embedders import SentenceTransformerEmbedder
+from pathway.xpacks.llm.llms import HFPipelineChat, prompt_chat_single_qa
 
 
 class DocumentInputSchema(pw.Schema):
@@ -50,13 +57,12 @@ def run(
     port: int = 8080,
     model_locator: str = os.environ.get("MODEL", "gpt2"),
     embedder_locator: str = os.environ.get("EMBEDDER", "intfloat/e5-large-v2"),
-    embedding_dimension: int = 1024,
     max_tokens: int = 60,
     device: str = "cpu",
     **kwargs,
 ):
-    embedder = SentenceTransformerTask(model=embedder_locator, device=device)
-    embedding_dimension = len(embedder(""))
+    embedder = SentenceTransformerEmbedder(model=embedder_locator, device=device)
+    embedding_dimension = len(embedder.__wrapped__(""))
 
     documents = pw.io.jsonlines.read(
         data_dir,
@@ -65,9 +71,7 @@ def run(
         autocommit_duration_ms=50,
     )
 
-    enriched_documents = documents + documents.select(
-        vector=embedder.apply(text=pw.this.doc)
-    )
+    enriched_documents = documents + documents.select(vector=embedder(text=pw.this.doc))
 
     index = KNNIndex(
         enriched_documents.vector, enriched_documents, n_dimensions=embedding_dimension
@@ -82,7 +86,7 @@ def run(
     )
 
     query += query.select(
-        vector=embedder.apply(text=pw.this.query),
+        vector=embedder(text=pw.this.query),
     )
 
     query_context = query + index.get_nearest_items(
@@ -92,20 +96,31 @@ def run(
     @pw.udf
     def build_prompt(documents, query):
         docs_str = "\n".join(documents)
-        prompt = f"Given the following documents : \n {docs_str} \nanswer this query: {query}"
+        prompt = f"You are given a query: {query}\n Answer this query based on the following documents: \n {docs_str}"
         return prompt
 
     prompt = query_context.select(
         prompt=build_prompt(pw.this.documents_list, pw.this.query)
     )
 
-    model = HFTextGenerationTask(model=model_locator, device=device)
+    model = HFPipelineChat(
+        model=model_locator,
+        device=device,
+        return_full_text=False,
+        max_new_tokens=max_tokens,
+    )
+
+    # Cropping the prompt so that it is short enough for the model. Depending on input documents
+    # and chosen model this may not be necessary.
+    prompt = prompt.select(
+        prompt=model.crop_to_max_length(
+            input_string=pw.this.prompt, max_prompt_length=500
+        )
+    )
 
     responses = prompt.select(
         query_id=pw.this.id,
-        result=model.apply(
-            pw.this.prompt, return_full_text=False, max_new_tokens=max_tokens
-        ),
+        result=model(prompt_chat_single_qa(pw.this.prompt)),
     )
 
     response_writer(responses)

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,7 +30,7 @@ classifiers = [
 
 [tool.poetry.dependencies]
 python = ">=3.10,<3.13"
-pathway = "=0.7.10"
+pathway = "=0.8.0"
 openai = ">=1.2.4"
 requests = "^2.31.0"
 diskcache = "^5.6.1"