Skip to content

Commit

Permalink
Merge pull request #19 from upstash/embedding-support
Browse files Browse the repository at this point in the history
feat: add embedding support
  • Loading branch information
ogzhanolguncu authored Apr 3, 2024
2 parents 35c1993 + bbac7ae commit 0f9f881
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 29 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,18 @@ bun run test
```sh
bun run build
```

### Contributing

Make sure you have Bun.js installed and have those relevant keys with specific vector dimensions:

```bash

## Vector dimension should be 2
UPSTASH_VECTOR_REST_URL="XXXXX"
UPSTASH_VECTOR_REST_TOKEN="XXXXX"

## Vector dimension should be 384
EMBEDDING_UPSTASH_VECTOR_REST_URL="XXXXX"
EMBEDDING_UPSTASH_VECTOR_REST_TOKEN="XXXXX"
```
86 changes: 76 additions & 10 deletions src/commands/client/query/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@ const client = newHttpClient();

describe("QUERY", () => {
afterAll(async () => await resetIndexes());

test("should query records successfully", async () => {
const initialVector = [6.6, 7.7];
const initialData = { id: 33, vector: initialVector };

await new UpsertCommand(initialData).exec(client);
//This is needed for vector index insertion to happen. When run with other tests in parallel this tends to fail without sleep. But, standalone it should work without an issue.
await sleep(2000);
Expand All @@ -28,7 +26,6 @@ describe("QUERY", () => {
},
]);
});

test("should query records filtered with metadata filter", async () => {
const initialVector = [6.6, 7.7];
const initialData = {
Expand All @@ -42,7 +39,6 @@ describe("QUERY", () => {
},
},
};

await new UpsertCommand(initialData).exec(client);
//This is needed for vector index insertion to happen. When run with other tests in parallel this tends to fail without sleep. But, standalone it should work without an issue.
await sleep(2000);
Expand All @@ -57,17 +53,19 @@ describe("QUERY", () => {
includeVectors: true,
includeMetadata: true,
}).exec(client);

expect(res).toEqual([
{
id: "34",
score: 1,
vector: [6.6, 7.7],
metadata: { city: "Istanbul", population: 1546000, geography: { continent: "Asia" } },
metadata: {
city: "Istanbul",
population: 1546000,
geography: { continent: "Asia" },
},
},
]);
});

test("should narrow down the query results with filter", async () => {
const exampleVector = [6.6, 7.7];
const initialData = [
Expand All @@ -90,18 +88,20 @@ describe("QUERY", () => {
},
},
];

await new UpsertCommand(initialData).exec(client);
//This is needed for vector index insertion to happen. When run with other tests in parallel this tends to fail without sleep. But, standalone it should work without an issue.
await sleep(2000);
const res = await new QueryCommand<{ animal: string; tags: string[]; diet: string }>({
const res = await new QueryCommand<{
animal: string;
tags: string[];
diet: string;
}>({
vector: exampleVector,
topK: 1,
filter: "tags[0] = 'mammal' AND diet = 'carnivore'",
includeVectors: true,
includeMetadata: true,
}).exec(client);

expect(res).toEqual([
{
id: "2",
Expand All @@ -111,4 +111,70 @@ describe("QUERY", () => {
},
]);
});

test(
"should query with plain text successfully",
async () => {
const embeddingClient = newHttpClient(undefined, {
token: process.env.EMBEDDING_UPSTASH_VECTOR_REST_TOKEN!,
url: process.env.EMBEDDING_UPSTASH_VECTOR_REST_URL!,
});
await new UpsertCommand([
{
id: "hello-world",
data: "Test1-2-3-4-5",
metadata: { upstash: "test" },
},
]).exec(embeddingClient);
// This is needed for vector index insertion to happen. When run with other tests in parallel this tends to fail without sleep. But, standalone it should work without an issue.
await sleep(5000);
const res = await new QueryCommand({
data: "Test1-2-3-4-5",
topK: 1,
includeVectors: true,
includeMetadata: true,
}).exec(embeddingClient);

expect(res[0].metadata).toEqual({ upstash: "test" });
},
{ timeout: 20000 }
);

test(
"should query with plain text successfully",
async () => {
const embeddingClient = newHttpClient(undefined, {
token: process.env.EMBEDDING_UPSTASH_VECTOR_REST_TOKEN!,
url: process.env.EMBEDDING_UPSTASH_VECTOR_REST_URL!,
});
await new UpsertCommand([
{
id: "hello-world",
data: "Test1-2-3-4-5",
metadata: { upstash: "Cookie" },
},
{
id: "hello-world1",
data: "Test1-2-3-4-5-6",
metadata: { upstash: "Monster" },
},
{
id: "hello-world2",
data: "Test1-2-3-4-5",
metadata: { upstash: "Jar" },
},
]).exec(embeddingClient);
// This is needed for vector index insertion to happen. When run with other tests in parallel this tends to fail without sleep. But, standalone it should work without an issue.
await sleep(5000);
const res = await new QueryCommand({
data: "Test1-2-3-4-5",
topK: 1,
includeVectors: true,
includeMetadata: true,
}).exec(embeddingClient);

expect(res[0].metadata).toEqual({ upstash: "Cookie" });
},
{ timeout: 20000 }
);
});
10 changes: 7 additions & 3 deletions src/commands/client/query/index.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import { Command } from "@commands/command";

type QueryCommandPayload = {
vector: number[];
topK: number;
filter?: string;
includeVectors?: boolean;
includeMetadata?: boolean;
};
} & ({ vector: number[]; data?: never } | { data: string; vector?: never });

export type QueryResult<TMetadata = Record<string, unknown>> = {
id: number | string;
Expand All @@ -17,6 +16,11 @@ export type QueryResult<TMetadata = Record<string, unknown>> = {

export class QueryCommand<TMetadata> extends Command<QueryResult<TMetadata>[]> {
constructor(payload: QueryCommandPayload) {
super(payload, "query");
let endpoint: "query" | "query-data" = "query";

if ("data" in payload) {
endpoint = "query-data";
}
super(payload, endpoint);
}
}
42 changes: 41 additions & 1 deletion src/commands/client/upsert/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ describe("UPSERT", () => {
});

test("should add bulk data with string id", async () => {
//@ts-ignore
const res = await new UpsertCommand([
{
id: "hello-world",
Expand All @@ -47,4 +46,45 @@ describe("UPSERT", () => {
]).exec(client);
expect(res).toEqual("Success");
});

test("should add plain text as data successfully", async () => {
const embeddingClient = newHttpClient(undefined, {
token: process.env.EMBEDDING_UPSTASH_VECTOR_REST_TOKEN!,
url: process.env.EMBEDDING_UPSTASH_VECTOR_REST_URL!,
});

const res = await new UpsertCommand([
{
id: "hello-world",
data: "Test1-2-3-4-5",
metadata: { upstash: "test" },
},
]).exec(embeddingClient);
expect(res).toEqual("Success");
});

test("should fail to upsert due to mixed usage of vector and plain text", () => {
const throwable = async () => {
const embeddingClient = newHttpClient(undefined, {
token: process.env.EMBEDDING_UPSTASH_VECTOR_REST_TOKEN!,
url: process.env.EMBEDDING_UPSTASH_VECTOR_REST_URL!,
});

await new UpsertCommand([
{
id: "hello-world",
data: "Test1-2-3-4-5",
metadata: { upstash: "test" },
},
{
id: "hello-world",
//@ts-ignore
vector: [1, 2, 3, 4],
metadata: { upstash: "test" },
},
]).exec(embeddingClient);
};

expect(throwable).toThrow();
});
});
28 changes: 25 additions & 3 deletions src/commands/client/upsert/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,36 @@ import { Command } from "@commands/command";

type NoInfer<T> = T extends infer U ? U : never;

type UpsertCommandPayload<TMetadata> = {
type VectorPayload<TMetadata> = {
id: number | string;
vector: number[];
metadata?: NoInfer<TMetadata>;
};
type DataPayload<TMetadata> = {
id: number | string;
data: string;
metadata?: NoInfer<TMetadata>;
};

type PayloadArray<TMetadata> = VectorPayload<TMetadata>[] | DataPayload<TMetadata>[];

export class UpsertCommand<TMetadata> extends Command<string> {
constructor(payload: UpsertCommandPayload<TMetadata> | UpsertCommandPayload<TMetadata>[]) {
super(payload, "upsert");
constructor(
payload: VectorPayload<TMetadata> | DataPayload<TMetadata> | PayloadArray<TMetadata>
) {
let endpoint: "upsert" | "upsert-data" = "upsert";

if (Array.isArray(payload)) {
const hasData = payload.some((p) => "data" in p && p.data);
if (hasData) {
endpoint = "upsert-data";
}
} else {
if ("data" in payload) {
endpoint = "upsert-data";
}
}

super(payload, endpoint);
}
}
12 changes: 11 additions & 1 deletion src/commands/command.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
import { UpstashError } from "@error/index";
import { Requester } from "@http";

const ENDPOINTS = ["upsert", "query", "delete", "fetch", "reset", "range", "info"] as const;
const ENDPOINTS = [
"upsert",
"query",
"delete",
"fetch",
"reset",
"range",
"info",
"upsert-data",
"query-data",
] as const;

export type EndpointVariants = (typeof ENDPOINTS)[number];
/**
Expand Down
20 changes: 12 additions & 8 deletions src/http/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,18 @@ test("remove trailing slash from urls", () => {

describe(new URL("", import.meta.url).pathname, () => {
describe("when the request is invalid", () => {
test("throws", async () => {
const client = newHttpClient();
let hasThrown = false;
await client.request({ body: ["get", "1", "2"] }).catch(() => {
hasThrown = true;
});
expect(hasThrown).toBeTrue();
});
test(
"throws",
async () => {
const client = newHttpClient();
let hasThrown = false;
await client.request({ body: ["get", "1", "2"] }).catch(() => {
hasThrown = true;
});
expect(hasThrown).toBeTrue();
},
{ timeout: 20000 }
);
});

describe("whithout authorization", () => {
Expand Down
9 changes: 6 additions & 3 deletions src/utils/test-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ import { HttpClient, RetryConfig } from "../http";

export type NonArrayType<T> = T extends Array<infer U> ? U : T;

export const newHttpClient = (retry?: RetryConfig | undefined) => {
export const newHttpClient = (
retry?: RetryConfig | undefined,
altToken?: { url: string; token: string }
) => {
const url = process.env.UPSTASH_VECTOR_REST_URL;
if (!url) {
throw new Error("Could not find url");
Expand All @@ -14,8 +17,8 @@ export const newHttpClient = (retry?: RetryConfig | undefined) => {
}

return new HttpClient({
baseUrl: url,
headers: { authorization: `Bearer ${token}` },
baseUrl: altToken?.url ?? url,
headers: { authorization: `Bearer ${altToken?.token ?? token}` },
retry,
});
};
Expand Down

0 comments on commit 0f9f881

Please sign in to comment.