From cf9af20e6a6bc6c5c76b83c2edeee7279f1ef5c0 Mon Sep 17 00:00:00 2001 From: ogzhanolguncu Date: Mon, 6 May 2024 12:17:02 +0300 Subject: [PATCH] feat: add jsdocs and allow using retrieval standalone --- index.ts | 2 +- package.json | 2 +- src/error/ratelimit.ts | 2 +- src/rag-chat.ts | 45 +++++++++++++++++------- src/services/index.ts | 2 ++ src/services/retrieval.ts | 37 +++++++++++++++++--- src/types.ts | 72 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 141 insertions(+), 21 deletions(-) create mode 100644 src/services/index.ts diff --git a/index.ts b/index.ts index 9e09b8a..e96ba83 100644 --- a/index.ts +++ b/index.ts @@ -1,3 +1,3 @@ export * from "./src/rag-chat"; -export * from "./src/services/history"; +export * from "./src/services"; export * from "./src/error"; diff --git a/package.json b/package.json index 6f2d9db..bb9434b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@upstash/rag-chat", - "version": "0.0.14-alpha", + "version": "0.0.15-alpha", "main": "./dist/index.js", "module": "./dist/index.mjs", "types": "./dist/index.d.ts", diff --git a/src/error/ratelimit.ts b/src/error/ratelimit.ts index 567f7e7..886b3e6 100644 --- a/src/error/ratelimit.ts +++ b/src/error/ratelimit.ts @@ -1,6 +1,6 @@ import type { RATELIMIT_ERROR_MESSAGE } from "../constants"; -type RatelimitResponse = { +export type RatelimitResponse = { error: typeof RATELIMIT_ERROR_MESSAGE; resetTime?: number; }; diff --git a/src/rag-chat.ts b/src/rag-chat.ts index 6efcd62..dd5eb76 100644 --- a/src/rag-chat.ts +++ b/src/rag-chat.ts @@ -2,21 +2,24 @@ import type { Callbacks } from "@langchain/core/callbacks/manager"; import type { BaseMessage } from "@langchain/core/messages"; import { RunnableSequence, RunnableWithMessageHistory } from "@langchain/core/runnables"; import { LangChainStream, StreamingTextResponse } from "ai"; - -import { appendDefaultsIfNeeded, formatChatHistory, sanitizeQuestion } from "./utils"; - import type { BaseLanguageModelInterface } from "@langchain/core/language_models/base"; import type { PromptTemplate } from "@langchain/core/prompts"; -import { ClientFactory } from "./client-factory"; -import { Config } from "./config"; + import { HistoryService } from "./services/history"; import { RetrievalService } from "./services/retrieval"; +import { RateLimitService } from "./services/ratelimit"; +import type { RetrievePayload } from "./services/retrieval"; + import { QA_TEMPLATE } from "./prompts"; + import { UpstashModelError } from "./error/model"; -import { RateLimitService } from "./services/ratelimit"; -import type { ChatOptions, PrepareChatResult, RAGChatConfig } from "./types"; import { RatelimitUpstashError } from "./error/ratelimit"; +import type { ChatOptions, PrepareChatResult, RAGChatConfig } from "./types"; +import { ClientFactory } from "./client-factory"; +import { Config } from "./config"; +import { appendDefaultsIfNeeded, formatChatHistory, sanitizeQuestion } from "./utils"; + type CustomInputValues = { chat_history?: BaseMessage[]; question: string; context: string }; export class RAGChat { @@ -41,12 +44,17 @@ export class RAGChat { this.template = config.template; } - private async prepareChat( - input: string, - similarityThreshold?: number - ): Promise { + private async prepareChat({ + question: input, + similarityThreshold, + topK, + }: RetrievePayload): Promise { const question = sanitizeQuestion(input); - const facts = await this.retrievalService.retrieveFromVectorDb(question, similarityThreshold); + const facts = await this.retrievalService.retrieveFromVectorDb({ + question, + similarityThreshold, + topK, + }); return { question, facts }; } @@ -54,7 +62,10 @@ export class RAGChat { input: string, options: ChatOptions ): Promise> { + // Adds chat session id and ratelimit session id if not provided. const options_ = appendDefaultsIfNeeded(options); + + //Checks ratelimit of the user. If not enabled `success` will be always true. const { success, resetTime } = await this.ratelimitService.checkLimit( options_.ratelimitSessionId ); @@ -66,7 +77,12 @@ export class RAGChat { }); } - const { question, facts } = await this.prepareChat(input, options.similarityThreshold); + //Sanitizes the given input by stripping all the newline chars then queries vector db with sanitized question. + const { question, facts } = await this.prepareChat({ + question: input, + similarityThreshold: options.similarityThreshold, + topK: options.topK, + }); return options.stream ? this.streamingChainCall(options_, question, facts) @@ -122,6 +138,9 @@ export class RAGChat { ); } + /** + * Prepares RAG Chat by creating or getting Redis, Vector and Ratelimit instances. + */ static async initialize( config: RAGChatConfig & { email: string; token: string } ): Promise { diff --git a/src/services/index.ts b/src/services/index.ts new file mode 100644 index 0000000..7357a41 --- /dev/null +++ b/src/services/index.ts @@ -0,0 +1,2 @@ +export * from "./history"; +export * from "./retrieval"; diff --git a/src/services/retrieval.ts b/src/services/retrieval.ts index 0bbe040..063daee 100644 --- a/src/services/retrieval.ts +++ b/src/services/retrieval.ts @@ -1,7 +1,22 @@ import type { Index } from "@upstash/sdk"; import { formatFacts } from "../utils"; +import type { RAGChatConfig } from "../types"; +import { ClientFactory } from "../client-factory"; +import { Config } from "../config"; const SIMILARITY_THRESHOLD = 0.5; +const TOP_K = 5; + +type RetrievalInit = Omit & { + email: string; + token: string; +}; + +export type RetrievePayload = { + question: string; + similarityThreshold?: number; + topK?: number; +}; export class RetrievalService { private index: Index; @@ -9,14 +24,15 @@ export class RetrievalService { this.index = index; } - async retrieveFromVectorDb( - question: string, - similarityThreshold = SIMILARITY_THRESHOLD - ): Promise { + async retrieveFromVectorDb({ + question, + similarityThreshold = SIMILARITY_THRESHOLD, + topK = TOP_K, + }: RetrievePayload): Promise { const index = this.index; const result = await index.query<{ value: string }>({ data: question, - topK: 5, + topK, includeMetadata: true, includeVectors: false, }); @@ -34,4 +50,15 @@ export class RetrievalService { .map((embedding, index) => `- Context Item ${index}: ${embedding.metadata?.value ?? ""}`); return formatFacts(facts); } + + public static async init(config: RetrievalInit) { + const clientFactory = new ClientFactory( + new Config(config.email, config.token, { + redis: config.redis, + region: config.region, + }) + ); + const { vector } = await clientFactory.init({ vector: true }); + return new RetrievalService(vector); + } } diff --git a/src/types.ts b/src/types.ts index bc4a451..58823b1 100644 --- a/src/types.ts +++ b/src/types.ts @@ -5,11 +5,34 @@ import type { Index, Ratelimit, Redis } from "@upstash/sdk"; export type PreferredRegions = "eu-west-1" | "us-east-1"; export type ChatOptions = { + /** Set to `true` if working with web apps and you want to be interactive without stalling users. + */ stream: boolean; + + /** Chat session ID of the user interacting with the application. + * @default "upstash-rag-chat-session" + */ sessionId?: string; + + /** Length of the conversation history to include in your LLM query. Increasing this may lead to hallucinations. Retrieves the last N messages. + * @default 5 + */ includeHistory?: number; + + /** Configuration to adjust the accuracy of results. + * @default 0.5 + */ similarityThreshold?: number; + + /** Rate limit session ID of the user interacting with the application. + * @default "upstash-rag-chat-ratelimit-session" + */ ratelimitSessionId?: string; + + /** Amount of data points to include in your LLM query. + * @default 5 + */ + topK?: number; }; export type PrepareChatResult = { @@ -18,13 +41,62 @@ export type PrepareChatResult = { }; type RAGChatConfigCommon = { + /**Any valid Langchain compatiable LLM will work + * @example new ChatOpenAI({ + modelName: "gpt-3.5-turbo", + streaming: true, + verbose, + temperature: 0, + apiKey, + }) + */ model?: BaseLanguageModelInterface; + /** + * If no Index name or instance is provided, falls back to the default. + * @default + PromptTemplate.fromTemplate(`You are a friendly AI assistant augmented with an Upstash Vector Store. + To help you answer the questions, a context will be provided. This context is generated by querying the vector store with the user question. + Answer the question at the end using only the information available in the context and chat history. + If the answer is not available in the chat history or context, do not answer the question and politely let the user know that you can only answer if the answer is available in context or the chat history. + + ------------- + Chat history: + {chat_history} + ------------- + Context: + {context} + ------------- + + Question: {question} + Helpful answer:`) + */ template?: PromptTemplate; + /** + * Region that will be used to create or get Vector and Redis instance. + * @default "us-east-1" + */ region?: PreferredRegions; + /** + * Ratelimit instance + * @example new Ratelimit({ + redis, + limiter: Ratelimit.tokenBucket(10, "1d", 10), + prefix: "@upstash/rag-chat-ratelimit", + }) + */ ratelimit?: Ratelimit; }; +/**Config needed to initialize RAG Chat SDK */ export type RAGChatConfig = { + /** + * If no Index name or instance is provided, falls back to the default. + * @default "upstash-rag-chat-redis" + */ vector?: string | Index; + /** + * If no Redis database name or instance is provided, falls back to the default. + * @default "upstash-rag-chat-redis" + */ redis?: string | Redis; } & RAGChatConfigCommon;