Skip to content

Commit

Permalink
feat: add jsdocs and allow using retrieval standalone
Browse files Browse the repository at this point in the history
  • Loading branch information
ogzhanolguncu committed May 6, 2024
1 parent 8532294 commit cf9af20
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 21 deletions.
2 changes: 1 addition & 1 deletion index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export * from "./src/rag-chat";
export * from "./src/services/history";
export * from "./src/services";
export * from "./src/error";
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@upstash/rag-chat",
"version": "0.0.14-alpha",
"version": "0.0.15-alpha",
"main": "./dist/index.js",
"module": "./dist/index.mjs",
"types": "./dist/index.d.ts",
Expand Down
2 changes: 1 addition & 1 deletion src/error/ratelimit.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { RATELIMIT_ERROR_MESSAGE } from "../constants";

type RatelimitResponse = {
export type RatelimitResponse = {
error: typeof RATELIMIT_ERROR_MESSAGE;
resetTime?: number;
};
Expand Down
45 changes: 32 additions & 13 deletions src/rag-chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,24 @@ import type { Callbacks } from "@langchain/core/callbacks/manager";
import type { BaseMessage } from "@langchain/core/messages";
import { RunnableSequence, RunnableWithMessageHistory } from "@langchain/core/runnables";
import { LangChainStream, StreamingTextResponse } from "ai";

import { appendDefaultsIfNeeded, formatChatHistory, sanitizeQuestion } from "./utils";

import type { BaseLanguageModelInterface } from "@langchain/core/language_models/base";
import type { PromptTemplate } from "@langchain/core/prompts";
import { ClientFactory } from "./client-factory";
import { Config } from "./config";

import { HistoryService } from "./services/history";
import { RetrievalService } from "./services/retrieval";
import { RateLimitService } from "./services/ratelimit";
import type { RetrievePayload } from "./services/retrieval";

import { QA_TEMPLATE } from "./prompts";

import { UpstashModelError } from "./error/model";
import { RateLimitService } from "./services/ratelimit";
import type { ChatOptions, PrepareChatResult, RAGChatConfig } from "./types";
import { RatelimitUpstashError } from "./error/ratelimit";

import type { ChatOptions, PrepareChatResult, RAGChatConfig } from "./types";
import { ClientFactory } from "./client-factory";
import { Config } from "./config";
import { appendDefaultsIfNeeded, formatChatHistory, sanitizeQuestion } from "./utils";

type CustomInputValues = { chat_history?: BaseMessage[]; question: string; context: string };

export class RAGChat {
Expand All @@ -41,20 +44,28 @@ export class RAGChat {
this.template = config.template;
}

private async prepareChat(
input: string,
similarityThreshold?: number
): Promise<PrepareChatResult> {
private async prepareChat({
question: input,
similarityThreshold,
topK,
}: RetrievePayload): Promise<PrepareChatResult> {
const question = sanitizeQuestion(input);
const facts = await this.retrievalService.retrieveFromVectorDb(question, similarityThreshold);
const facts = await this.retrievalService.retrieveFromVectorDb({
question,
similarityThreshold,
topK,
});
return { question, facts };
}

async chat(
input: string,
options: ChatOptions
): Promise<StreamingTextResponse | Record<string, unknown>> {
// Adds chat session id and ratelimit session id if not provided.
const options_ = appendDefaultsIfNeeded(options);

//Checks ratelimit of the user. If not enabled `success` will be always true.
const { success, resetTime } = await this.ratelimitService.checkLimit(
options_.ratelimitSessionId
);
Expand All @@ -66,7 +77,12 @@ export class RAGChat {
});
}

const { question, facts } = await this.prepareChat(input, options.similarityThreshold);
//Sanitizes the given input by stripping all the newline chars then queries vector db with sanitized question.
const { question, facts } = await this.prepareChat({
question: input,
similarityThreshold: options.similarityThreshold,
topK: options.topK,
});

return options.stream
? this.streamingChainCall(options_, question, facts)
Expand Down Expand Up @@ -122,6 +138,9 @@ export class RAGChat {
);
}

/**
* Prepares RAG Chat by creating or getting Redis, Vector and Ratelimit instances.
*/
static async initialize(
config: RAGChatConfig & { email: string; token: string }
): Promise<RAGChat> {
Expand Down
2 changes: 2 additions & 0 deletions src/services/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export * from "./history";
export * from "./retrieval";
37 changes: 32 additions & 5 deletions src/services/retrieval.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,38 @@
import type { Index } from "@upstash/sdk";
import { formatFacts } from "../utils";
import type { RAGChatConfig } from "../types";
import { ClientFactory } from "../client-factory";
import { Config } from "../config";

const SIMILARITY_THRESHOLD = 0.5;
const TOP_K = 5;

type RetrievalInit = Omit<RAGChatConfig, "model" | "template" | "vector"> & {
email: string;
token: string;
};

export type RetrievePayload = {
question: string;
similarityThreshold?: number;
topK?: number;
};

export class RetrievalService {
private index: Index;
constructor(index: Index) {
this.index = index;
}

async retrieveFromVectorDb(
question: string,
similarityThreshold = SIMILARITY_THRESHOLD
): Promise<string> {
async retrieveFromVectorDb({
question,
similarityThreshold = SIMILARITY_THRESHOLD,
topK = TOP_K,
}: RetrievePayload): Promise<string> {
const index = this.index;
const result = await index.query<{ value: string }>({
data: question,
topK: 5,
topK,
includeMetadata: true,
includeVectors: false,
});
Expand All @@ -34,4 +50,15 @@ export class RetrievalService {
.map((embedding, index) => `- Context Item ${index}: ${embedding.metadata?.value ?? ""}`);
return formatFacts(facts);
}

public static async init(config: RetrievalInit) {
const clientFactory = new ClientFactory(
new Config(config.email, config.token, {
redis: config.redis,
region: config.region,
})
);
const { vector } = await clientFactory.init({ vector: true });
return new RetrievalService(vector);
}
}
72 changes: 72 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,34 @@ import type { Index, Ratelimit, Redis } from "@upstash/sdk";
export type PreferredRegions = "eu-west-1" | "us-east-1";

export type ChatOptions = {
/** Set to `true` if working with web apps and you want to be interactive without stalling users.
*/
stream: boolean;

/** Chat session ID of the user interacting with the application.
* @default "upstash-rag-chat-session"
*/
sessionId?: string;

/** Length of the conversation history to include in your LLM query. Increasing this may lead to hallucinations. Retrieves the last N messages.
* @default 5
*/
includeHistory?: number;

/** Configuration to adjust the accuracy of results.
* @default 0.5
*/
similarityThreshold?: number;

/** Rate limit session ID of the user interacting with the application.
* @default "upstash-rag-chat-ratelimit-session"
*/
ratelimitSessionId?: string;

/** Amount of data points to include in your LLM query.
* @default 5
*/
topK?: number;
};

export type PrepareChatResult = {
Expand All @@ -18,13 +41,62 @@ export type PrepareChatResult = {
};

type RAGChatConfigCommon = {
/**Any valid Langchain compatiable LLM will work
* @example new ChatOpenAI({
modelName: "gpt-3.5-turbo",
streaming: true,
verbose,
temperature: 0,
apiKey,
})
*/
model?: BaseLanguageModelInterface;
/**
* If no Index name or instance is provided, falls back to the default.
* @default
PromptTemplate.fromTemplate(`You are a friendly AI assistant augmented with an Upstash Vector Store.
To help you answer the questions, a context will be provided. This context is generated by querying the vector store with the user question.
Answer the question at the end using only the information available in the context and chat history.
If the answer is not available in the chat history or context, do not answer the question and politely let the user know that you can only answer if the answer is available in context or the chat history.
-------------
Chat history:
{chat_history}
-------------
Context:
{context}
-------------
Question: {question}
Helpful answer:`)
*/
template?: PromptTemplate;
/**
* Region that will be used to create or get Vector and Redis instance.
* @default "us-east-1"
*/
region?: PreferredRegions;
/**
* Ratelimit instance
* @example new Ratelimit({
redis,
limiter: Ratelimit.tokenBucket(10, "1d", 10),
prefix: "@upstash/rag-chat-ratelimit",
})
*/
ratelimit?: Ratelimit;
};

/**Config needed to initialize RAG Chat SDK */
export type RAGChatConfig = {
/**
* If no Index name or instance is provided, falls back to the default.
* @default "upstash-rag-chat-redis"
*/
vector?: string | Index;
/**
* If no Redis database name or instance is provided, falls back to the default.
* @default "upstash-rag-chat-redis"
*/
redis?: string | Redis;
} & RAGChatConfigCommon;

0 comments on commit cf9af20

Please sign in to comment.