diff --git a/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/AzureAISearchFactory.cs b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/AzureAISearchFactory.cs new file mode 100644 index 000000000000..0f437422fb32 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/AzureAISearchFactory.cs @@ -0,0 +1,137 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json; +using System.Text.Json.Nodes; +using System.Text.Json.Serialization; +using Azure.Search.Documents.Indexes; +using Microsoft.Extensions.VectorData; +using Microsoft.SemanticKernel.Connectors.AzureAISearch; + +namespace Memory.VectorStoreLangchainInterop; + +/// +/// Contains a factory method that can be used to create an Azure AI Search vector store that is compatible with datasets ingested using Langchain. +/// +/// +/// This class is used with the sample. +/// +public static class AzureAISearchFactory +{ + /// + /// Record definition that matches the storage format used by Langchain for Azure AI Search. + /// + private static readonly VectorStoreRecordDefinition s_recordDefinition = new() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("id", typeof(string)), + new VectorStoreRecordDataProperty("content", typeof(string)), + new VectorStoreRecordDataProperty("metadata", typeof(string)), + new VectorStoreRecordVectorProperty("content_vector", typeof(ReadOnlyMemory)) { Dimensions = 1536 } + } + }; + + /// + /// Create a new Azure AI Search-backed that can be used to read data that was ingested using Langchain. + /// + /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. + /// The . + public static IVectorStore CreateQdrantLangchainInteropVectorStore(SearchIndexClient searchIndexClient) + { + // Create a vector store that uses our custom factory for creating collections + // so that the collection can be configured to be compatible with Langchain. + return new AzureAISearchVectorStore( + searchIndexClient, + new() + { + VectorStoreCollectionFactory = new AzureAISearchVectorStoreRecordCollectionFactory() + }); + } + + /// + /// Factory that is used to inject the appropriate and mapper for Langchain interoperability. + /// + private sealed class AzureAISearchVectorStoreRecordCollectionFactory : IAzureAISearchVectorStoreRecordCollectionFactory + { + public IVectorStoreRecordCollection CreateVectorStoreRecordCollection(SearchIndexClient searchIndexClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TKey : notnull + { + if (typeof(TKey) != typeof(string) || typeof(TRecord) != typeof(LangchainDocument)) + { + throw new NotSupportedException("This VectorStore is only usable with string keys and LangchainDocument record types"); + } + + // Create an Azure AI Search collection. To be compatible with Langchain + // we need to use a custom record definition that matches the + // schema used by Langchain. We also need to use a custom mapper + // since the Langchain schema includes a metadata field that is + // a JSON string containing the source property. Parsing this + // string and extracting the source is not supported by the default mapper. + return (new AzureAISearchVectorStoreRecordCollection( + searchIndexClient, + name, + new() + { + VectorStoreRecordDefinition = s_recordDefinition, + JsonObjectCustomMapper = new LangchainInteropMapper() as IVectorStoreRecordMapper + }) as IVectorStoreRecordCollection)!; + } + } + + /// + /// Custom mapper to map the metadata string field, since it contains JSON as a string and this is not supported + /// automatically by the built in mapper. + /// + private sealed class LangchainInteropMapper : IVectorStoreRecordMapper, JsonObject> + { + public JsonObject MapFromDataToStorageModel(LangchainDocument dataModel) + { + var storageDocument = new AzureAISearchLangchainDocument() + { + Key = dataModel.Key, + Content = dataModel.Content, + Metadata = $"{{\"source\": \"{dataModel.Source}\"}}", + Embedding = dataModel.Embedding + }; + + return JsonSerializer.SerializeToNode(storageDocument)!.AsObject(); + } + + public LangchainDocument MapFromStorageToDataModel(JsonObject storageModel, StorageToDataModelMapperOptions options) + { + var storageDocument = JsonSerializer.Deserialize(storageModel)!; + var metadataDocument = JsonSerializer.Deserialize(storageDocument.Metadata); + var source = metadataDocument?["source"]?.AsValue()?.ToString(); + + return new LangchainDocument() + { + Key = storageDocument.Key, + Content = storageDocument.Content, + Source = source!, + Embedding = storageDocument.Embedding + }; + } + } + + /// + /// Model class that matches the storage format used by Langchain for Azure AI Search. + /// + private sealed class AzureAISearchLangchainDocument + { + [JsonPropertyName("id")] + public string Key { get; set; } + + [JsonPropertyName("content")] + public string Content { get; set; } + + /// + /// The storage format used by Langchain stores the source information + /// in the metadata field as a JSON string. + /// E.g. {"source": "my-doc"} + /// + [JsonPropertyName("metadata")] + public string Metadata { get; set; } + + [JsonPropertyName("content_vector")] + public ReadOnlyMemory Embedding { get; set; } + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/LangchainDocument.cs b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/LangchainDocument.cs new file mode 100644 index 000000000000..fd7cf0a3f991 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/LangchainDocument.cs @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Memory.VectorStoreLangchainInterop; + +/// +/// Data model class that matches the data model used by Langchain. +/// This data model is not decorated with vector store attributes since instead +/// a different record definition is used with each vector store implementation. +/// +/// +/// This class is used with the sample. +/// +public class LangchainDocument +{ + /// + /// The unique identifier of the record. + /// + public TKey Key { get; set; } + + /// + /// The text content for which embeddings have been generated. + /// + public string Content { get; set; } + + /// + /// The source of the content. E.g. where to find the original content. + /// + public string Source { get; set; } + + /// + /// The embedding for the . + /// + public ReadOnlyMemory Embedding { get; set; } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/MappingVectorStoreRecordCollection.cs b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/MappingVectorStoreRecordCollection.cs new file mode 100644 index 000000000000..581ed6bf2565 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/MappingVectorStoreRecordCollection.cs @@ -0,0 +1,134 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Runtime.CompilerServices; +using Microsoft.Extensions.VectorData; + +namespace Memory.VectorStoreLangchainInterop; + +/// +/// Decorator class that allows conversion of keys and records between public and internal representations. +/// +/// +/// This class is useful if a vector store implementation exposes keys or records in a way that is not +/// suitable for the user of the vector store. E.g. let's say that the vector store supports Guid keys +/// but you want to work with string keys that contain Guids. This class allows you to map between the +/// public string Guids and the internal Guids. +/// +/// The type of the key that the user of this class will use. +/// The type of the key that the internal collection exposes. +/// The type of the record that the user of this class will use. +/// The type of the record that the internal collection exposes. +internal sealed class MappingVectorStoreRecordCollection : IVectorStoreRecordCollection + where TPublicKey : notnull + where TInternalKey : notnull +{ + private readonly IVectorStoreRecordCollection _collection; + private readonly Func _publicToInternalKeyMapper; + private readonly Func _internalToPublicKeyMapper; + private readonly Func _publicToInternalRecordMapper; + private readonly Func _internalToPublicRecordMapper; + + public MappingVectorStoreRecordCollection( + IVectorStoreRecordCollection collection, + Func publicToInternalKeyMapper, + Func internalToPublicKeyMapper, + Func publicToInternalRecordMapper, + Func internalToPublicRecordMapper) + { + this._collection = collection; + this._publicToInternalKeyMapper = publicToInternalKeyMapper; + this._internalToPublicKeyMapper = internalToPublicKeyMapper; + this._publicToInternalRecordMapper = publicToInternalRecordMapper; + this._internalToPublicRecordMapper = internalToPublicRecordMapper; + } + + /// + public string CollectionName => this._collection.CollectionName; + + /// + public Task CollectionExistsAsync(CancellationToken cancellationToken = default) + { + return this._collection.CollectionExistsAsync(cancellationToken); + } + + /// + public Task CreateCollectionAsync(CancellationToken cancellationToken = default) + { + return this._collection.CreateCollectionAsync(cancellationToken); + } + + /// + public Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default) + { + return this._collection.CreateCollectionIfNotExistsAsync(cancellationToken); + } + + /// + public Task DeleteAsync(TPublicKey key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + { + return this._collection.DeleteAsync(this._publicToInternalKeyMapper(key), options, cancellationToken); + } + + /// + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + { + return this._collection.DeleteBatchAsync(keys.Select(this._publicToInternalKeyMapper), options, cancellationToken); + } + + /// + public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) + { + return this._collection.DeleteCollectionAsync(cancellationToken); + } + + /// + public async Task GetAsync(TPublicKey key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + var internalRecord = await this._collection.GetAsync(this._publicToInternalKeyMapper(key), options, cancellationToken).ConfigureAwait(false); + if (internalRecord == null) + { + return default; + } + + return this._internalToPublicRecordMapper(internalRecord); + } + + /// + public IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + var internalRecords = this._collection.GetBatchAsync(keys.Select(this._publicToInternalKeyMapper), options, cancellationToken); + return internalRecords.Select(this._internalToPublicRecordMapper); + } + + /// + public async Task UpsertAsync(TPublicRecord record, UpsertRecordOptions? options = null, CancellationToken cancellationToken = default) + { + var internalRecord = this._publicToInternalRecordMapper(record); + var internalKey = await this._collection.UpsertAsync(internalRecord, options, cancellationToken).ConfigureAwait(false); + return this._internalToPublicKeyMapper(internalKey); + } + + /// + public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var internalRecords = records.Select(this._publicToInternalRecordMapper); + var internalKeys = this._collection.UpsertBatchAsync(internalRecords, options, cancellationToken); + await foreach (var internalKey in internalKeys.ConfigureAwait(false)) + { + yield return this._internalToPublicKeyMapper(internalKey); + } + } + + /// + public async Task> VectorizedSearchAsync(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default) + { + var searchResults = await this._collection.VectorizedSearchAsync(vector, options, cancellationToken).ConfigureAwait(false); + var publicResultRecords = searchResults.Results.Select(result => new VectorSearchResult(this._internalToPublicRecordMapper(result.Record), result.Score)); + + return new VectorSearchResults(publicResultRecords) + { + TotalCount = searchResults.TotalCount, + Metadata = searchResults.Metadata, + }; + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/PineconeFactory.cs b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/PineconeFactory.cs new file mode 100644 index 000000000000..e1a30d75a4ff --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/PineconeFactory.cs @@ -0,0 +1,72 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.VectorData; +using Microsoft.SemanticKernel.Connectors.Pinecone; +using Sdk = Pinecone; + +namespace Memory.VectorStoreLangchainInterop; + +/// +/// Contains a factory method that can be used to create a Pinecone vector store that is compatible with datasets ingested using Langchain. +/// +/// +/// This class is used with the sample. +/// +public static class PineconeFactory +{ + /// + /// Record definition that matches the storage format used by Langchain for Pinecone. + /// + private static readonly VectorStoreRecordDefinition s_recordDefinition = new() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("Content", typeof(string)) { StoragePropertyName = "text" }, + new VectorStoreRecordDataProperty("Source", typeof(string)) { StoragePropertyName = "source" }, + new VectorStoreRecordVectorProperty("Embedding", typeof(ReadOnlyMemory)) { StoragePropertyName = "embedding", Dimensions = 1536 } + } + }; + + /// + /// Create a new Pinecone-backed that can be used to read data that was ingested using Langchain. + /// + /// Pinecone client that can be used to manage the collections and points in a Pinecone store. + /// The . + public static IVectorStore CreatePineconeLangchainInteropVectorStore(Sdk.PineconeClient pineconeClient) + { + // Create a vector store that uses our custom factory for creating collections + // so that the collection can be configured to be compatible with Langchain. + return new PineconeVectorStore( + pineconeClient, + new() + { + VectorStoreCollectionFactory = new PineconeVectorStoreRecordCollectionFactory() + }); + } + + /// + /// Factory that is used to inject the appropriate for Langchain interoperability. + /// + private sealed class PineconeVectorStoreRecordCollectionFactory : IPineconeVectorStoreRecordCollectionFactory + { + public IVectorStoreRecordCollection CreateVectorStoreRecordCollection(Sdk.PineconeClient pineconeClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TKey : notnull + { + if (typeof(TKey) != typeof(string) || typeof(TRecord) != typeof(LangchainDocument)) + { + throw new NotSupportedException("This VectorStore is only usable with string keys and LangchainDocument record types"); + } + + // Create a Pinecone collection and pass in our custom record definition that matches + // the schema used by Langchain so that the default mapper can use the storage names + // in it, to map to the storage scheme. + return (new PineconeVectorStoreRecordCollection( + pineconeClient, + name, + new() + { + VectorStoreRecordDefinition = s_recordDefinition + }) as IVectorStoreRecordCollection)!; + } + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/QdrantFactory.cs b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/QdrantFactory.cs new file mode 100644 index 000000000000..a21a2245a1c4 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/QdrantFactory.cs @@ -0,0 +1,140 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.VectorData; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Qdrant.Client; +using Qdrant.Client.Grpc; + +namespace Memory.VectorStoreLangchainInterop; + +/// +/// Contains a factory method that can be used to create a Qdrant vector store that is compatible with datasets ingested using Langchain. +/// +/// +/// This class is used with the sample. +/// +public static class QdrantFactory +{ + /// + /// Record definition that matches the storage format used by Langchain for Qdrant. + /// There is no need to list the data fields, since they have no indexing requirements and Qdrant + /// doesn't require individual fields to be defined on index creation. + /// + private static readonly VectorStoreRecordDefinition s_recordDefinition = new() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Key", typeof(Guid)), + new VectorStoreRecordVectorProperty("Embedding", typeof(ReadOnlyMemory)) { StoragePropertyName = "embedding", Dimensions = 1536 } + } + }; + + /// + /// Create a new Qdrant-backed that can be used to read data that was ingested using Langchain. + /// + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + /// The . + public static IVectorStore CreateQdrantLangchainInteropVectorStore(QdrantClient qdrantClient) + { + // Create a vector store that uses our custom factory for creating collections + // so that the collection can be configured to be compatible with Langchain. + return new QdrantVectorStore( + qdrantClient, + new() + { + VectorStoreCollectionFactory = new QdrantVectorStoreRecordCollectionFactory() + }); + } + + /// + /// Factory that is used to inject the appropriate and mapper for Langchain interoperability. + /// + private sealed class QdrantVectorStoreRecordCollectionFactory : IQdrantVectorStoreRecordCollectionFactory + { + public IVectorStoreRecordCollection CreateVectorStoreRecordCollection(QdrantClient qdrantClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TKey : notnull + { + // Create a Qdrant collection. To be compatible with Langchain + // we need to use a custom record definition that matches the + // schema used by Langchain. We also need to use a custom mapper + // since the Langchain schema includes a metadata field that is + // a struct and this isn't supported by the default mapper. + // Since langchain creates collections without named vector support + // we should set HasNamedVectors to false. + var collection = new QdrantVectorStoreRecordCollection>( + qdrantClient, + name, + new() + { + HasNamedVectors = false, + VectorStoreRecordDefinition = s_recordDefinition, + PointStructCustomMapper = new LangchainInteropMapper() + }); + + // If the user asked for a guid key, we can return the collection as is. + if (typeof(TKey) == typeof(Guid) && typeof(TRecord) == typeof(LangchainDocument)) + { + return (collection as IVectorStoreRecordCollection)!; + } + + // If the user asked for a string key, we can add a decorator which converts back and forth between string and guid. + // The string that the user provides will still need to contain a valid guid, since the Langchain created collection + // uses guid keys. + // Supporting string keys like this is useful since it means you can work with the collection in the same way as with + // collections from other vector stores that support string keys. + if (typeof(TKey) == typeof(string) && typeof(TRecord) == typeof(LangchainDocument)) + { + var stringKeyCollection = new MappingVectorStoreRecordCollection, LangchainDocument>( + collection, + p => Guid.Parse(p), + i => i.ToString("D"), + p => new LangchainDocument { Key = Guid.Parse(p.Key), Content = p.Content, Source = p.Source, Embedding = p.Embedding }, + i => new LangchainDocument { Key = i.Key.ToString("D"), Content = i.Content, Source = i.Source, Embedding = i.Embedding }); + + return (stringKeyCollection as IVectorStoreRecordCollection)!; + } + + throw new NotSupportedException("This VectorStore is only usable with Guid keys and LangchainDocument record types or string keys and LangchainDocument record types"); + } + } + + /// + /// A custom mapper that is required to map the metadata struct. While the other + /// fields in the record can be mapped by the default Qdrant mapper, the default + /// mapper doesn't support complex types like metadata, which is a Qdrant struct + /// containing a source field. + /// + private sealed class LangchainInteropMapper : IVectorStoreRecordMapper, PointStruct> + { + public PointStruct MapFromDataToStorageModel(LangchainDocument dataModel) + { + var metadataStruct = new Struct() + { + Fields = { ["source"] = dataModel.Source } + }; + + var pointStruct = new PointStruct() + { + Id = new PointId() { Uuid = dataModel.Key.ToString("D") }, + Vectors = new Vectors() { Vector = dataModel.Embedding.ToArray() }, + Payload = + { + ["page_content"] = dataModel.Content, + ["metadata"] = new Value() { StructValue = metadataStruct } + }, + }; + + return pointStruct; + } + + public LangchainDocument MapFromStorageToDataModel(PointStruct storageModel, StorageToDataModelMapperOptions options) + { + return new LangchainDocument() + { + Key = new Guid(storageModel.Id.Uuid), + Content = storageModel.Payload["page_content"].StringValue, + Source = storageModel.Payload["metadata"].StructValue.Fields["source"].StringValue, + Embedding = options.IncludeVectors ? storageModel.Vectors.Vector.Data.ToArray() : null + }; + } + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/RedisFactory.cs b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/RedisFactory.cs new file mode 100644 index 000000000000..16c269491d91 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStoreLangchainInterop/RedisFactory.cs @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.VectorData; +using Microsoft.SemanticKernel.Connectors.Redis; +using StackExchange.Redis; + +namespace Memory.VectorStoreLangchainInterop; + +/// +/// Contains a factory method that can be used to create a Redis vector store that is compatible with datasets ingested using Langchain. +/// +/// +/// This class is used with the sample. +/// +public static class RedisFactory +{ + /// + /// Record definition that matches the storage format used by Langchain for Redis. + /// + private static readonly VectorStoreRecordDefinition s_recordDefinition = new() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("Content", typeof(string)) { StoragePropertyName = "text" }, + new VectorStoreRecordDataProperty("Source", typeof(string)) { StoragePropertyName = "source" }, + new VectorStoreRecordVectorProperty("Embedding", typeof(ReadOnlyMemory)) { StoragePropertyName = "embedding", Dimensions = 1536 } + } + }; + + /// + /// Create a new Redis-backed that can be used to read data that was ingested using Langchain. + /// + /// The redis database to read/write from. + /// The . + public static IVectorStore CreateRedisLangchainInteropVectorStore(IDatabase database) + { + // Create a vector store that uses our custom factory for creating collections + // so that the collection can be configured to be compatible with Langchain. + return new RedisVectorStore( + database, + new() + { + VectorStoreCollectionFactory = new RedisVectorStoreRecordCollectionFactory() + }); + } + + /// + /// Factory that is used to inject the appropriate for Langchain interoperability. + /// + private sealed class RedisVectorStoreRecordCollectionFactory : IRedisVectorStoreRecordCollectionFactory + { + public IVectorStoreRecordCollection CreateVectorStoreRecordCollection(IDatabase database, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TKey : notnull + { + if (typeof(TKey) != typeof(string) || typeof(TRecord) != typeof(LangchainDocument)) + { + throw new NotSupportedException("This VectorStore is only usable with string keys and LangchainDocument record types"); + } + + // Create a hash set collection, since Langchain uses redis hashes for storing records. + // Also pass in our custom record definition that matches the schema used by Langchain + // so that the default mapper can use the storage names in it, to map to the storage + // scheme. + return (new RedisHashSetVectorStoreRecordCollection( + database, + name, + new() + { + VectorStoreRecordDefinition = s_recordDefinition + }) as IVectorStoreRecordCollection)!; + } + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop.cs b/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop.cs new file mode 100644 index 000000000000..5466e7fd30af --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop.cs @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Azure; +using Azure.Identity; +using Azure.Search.Documents.Indexes; +using Memory.VectorStoreLangchainInterop; +using Microsoft.Extensions.VectorData; +using Microsoft.SemanticKernel.Connectors.AzureOpenAI; +using Microsoft.SemanticKernel.Embeddings; +using Qdrant.Client; +using StackExchange.Redis; +using Sdk = Pinecone; + +namespace Memory; + +/// +/// Example showing how to consume data that had previously been ingested into a database using Langchain. +/// The example also demonstrates how to get all vector stores to share the same data model, so where necessary +/// a conversion is done, specifically for ids, where the database requires GUIDs, but we want to use strings +/// containing GUIDs in the common data model. +/// +/// +/// To run these samples, you need to first create collections instances using Langhain. +/// This sample assumes that you used the pets sample data set from this article: +/// https://python.langchain.com/docs/tutorials/retrievers/#documents +/// And the from_documents method to create the collection as shown here: +/// https://python.langchain.com/docs/tutorials/retrievers/#vector-stores +/// +public class VectorStore_Langchain_Interop(ITestOutputHelper output) : BaseTest(output) +{ + /// + /// Shows how to read data from an Azure AI Search collection that was created and ingested using Langchain. + /// + [Fact] + public async Task ReadDataFromLangchainAzureAISearchAsync() + { + var searchIndexClient = new SearchIndexClient( + new Uri(TestConfiguration.AzureAISearch.Endpoint), + new AzureKeyCredential(TestConfiguration.AzureAISearch.ApiKey)); + var vectorStore = AzureAISearchFactory.CreateQdrantLangchainInteropVectorStore(searchIndexClient); + await this.ReadDataFromCollectionAsync(vectorStore, "pets"); + } + + /// + /// Shows how to read data from a Qdrant collection that was created and ingested using Langchain. + /// Also adds a converter to expose keys as strings containing GUIDs instead of objects, + /// to match the document schema of the other vector stores. + /// + [Fact] + public async Task ReadDataFromLangchainQdrantAsync() + { + var qdrantClient = new QdrantClient("localhost"); + var vectorStore = QdrantFactory.CreateQdrantLangchainInteropVectorStore(qdrantClient); + await this.ReadDataFromCollectionAsync(vectorStore, "pets"); + } + + /// + /// Shows how to read data from a Pinecone collection that was created and ingested using Langchain. + /// + [Fact] + public async Task ReadDataFromLangchainPineconeAsync() + { + var pineconeClient = new Sdk.PineconeClient(TestConfiguration.Pinecone.ApiKey); + var vectorStore = PineconeFactory.CreatePineconeLangchainInteropVectorStore(pineconeClient); + await this.ReadDataFromCollectionAsync(vectorStore, "pets"); + } + + /// + /// Shows how to read data from a Redis collection that was created and ingested using Langchain. + /// + [Fact] + public async Task ReadDataFromLangchainRedisAsync() + { + var database = ConnectionMultiplexer.Connect("localhost:6379").GetDatabase(); + var vectorStore = RedisFactory.CreateRedisLangchainInteropVectorStore(database); + await this.ReadDataFromCollectionAsync(vectorStore, "pets"); + } + + /// + /// Method to do a vector search on a collection in the provided vector store. + /// + /// The vector store to search. + /// The name of the collection. + /// An async task. + private async Task ReadDataFromCollectionAsync(IVectorStore vectorStore, string collectionName) + { + // Create an embedding generation service. + var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( + TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, + TestConfiguration.AzureOpenAIEmbeddings.Endpoint, + new AzureCliCredential()); + + // Get the collection. + var collection = vectorStore.GetCollection>(collectionName); + + // Search the data set. + var searchString = "I'm looking for an animal that is loyal and will make a great companion"; + var searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString); + var searchResult = await collection.VectorizedSearchAsync(searchVector, new() { Top = 1 }); + var resultRecords = await searchResult.Results.ToListAsync(); + + this.Output.WriteLine("Search string: " + searchString); + this.Output.WriteLine("Source: " + resultRecords.First().Record.Source); + this.Output.WriteLine("Text: " + resultRecords.First().Record.Content); + this.Output.WriteLine(); + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_AzureAISearch.cs b/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_AzureAISearch.cs deleted file mode 100644 index 989de37009e2..000000000000 --- a/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_AzureAISearch.cs +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System.Text.Json.Serialization; -using Azure; -using Azure.Identity; -using Azure.Search.Documents.Indexes; -using Microsoft.Extensions.VectorData; -using Microsoft.SemanticKernel.Connectors.AzureAISearch; -using Microsoft.SemanticKernel.Connectors.AzureOpenAI; -using Microsoft.SemanticKernel.Embeddings; - -namespace Memory; - -/// -/// Example showing how to consume data that had previously been -/// ingested into an Azure AI Search instance using Langchain. -/// -/// -/// To run this sample, you need to first create an instance of an -/// Azure AI Search collection using Langhain. -/// This sample assumes that you used the pets sample data set from this article: -/// https://python.langchain.com/docs/tutorials/retrievers/#documents -/// And the from_documents method to create the collection as shown here: -/// https://python.langchain.com/docs/tutorials/retrievers/#vector-stores -/// -public class VectorStore_Langchain_Interop_AzureAISearch(ITestOutputHelper output) : BaseTest(output) -{ - [Fact] - public async Task ReadDataFromLangchainAzureAISearchAsync() - { - // Create an embedding generation service. - var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( - TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, - TestConfiguration.AzureOpenAIEmbeddings.Endpoint, - new AzureCliCredential()); - - // Create a vector store. - var searchIndexClient = new SearchIndexClient( - new Uri(TestConfiguration.AzureAISearch.Endpoint), - new AzureKeyCredential(TestConfiguration.AzureAISearch.ApiKey)); - var vectorStore = new AzureAISearchVectorStore(searchIndexClient); - - // Get the collection. - var collection = vectorStore.GetCollection("pets"); - - // Search the data set. - var searchString = "I'm looking for an animal that is loyal and will make a great companion"; - var searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString); - var searchResult = await collection.VectorizedSearchAsync(searchVector, new() { Top = 1 }); - var resultRecords = await searchResult.Results.ToListAsync(); - - this.Output.WriteLine("Search string: " + searchString); - this.Output.WriteLine("Source: " + resultRecords.First().Record.Metadata); - this.Output.WriteLine("Text: " + resultRecords.First().Record.Content); - this.Output.WriteLine(); - } - - /// - /// Model class that matches the storage format used by Langchain for Azure AI Search. - /// - private sealed class AzureAISearchLangchainDocument - { - [JsonPropertyName("id")] - [VectorStoreRecordKey] - public string Key { get; set; } - - [JsonPropertyName("content")] - [VectorStoreRecordData] - public string Content { get; set; } - - /// - /// The storage format used by Lanchain stores the source information - /// in the metadata field as a JSON string. - /// E.g. {"source": "my-doc"} - /// - [JsonPropertyName("metadata")] - [VectorStoreRecordData] - public string Metadata { get; set; } - - [JsonPropertyName("content_vector")] - [VectorStoreRecordVector(1536)] - public ReadOnlyMemory Embedding { get; set; } - } -} diff --git a/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_Qdrant.cs b/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_Qdrant.cs deleted file mode 100644 index 1671164e13c7..000000000000 --- a/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_Qdrant.cs +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using Azure.Identity; -using Microsoft.Extensions.VectorData; -using Microsoft.SemanticKernel.Connectors.AzureOpenAI; -using Microsoft.SemanticKernel.Connectors.Qdrant; -using Microsoft.SemanticKernel.Embeddings; -using Qdrant.Client; -using Qdrant.Client.Grpc; - -namespace Memory; - -/// -/// Example showing how to consume data that had previously been -/// ingested into a Qdrant instance using Langchain. -/// -/// -/// To run this sample, you need to first create an instance of a -/// Qdrant collection using Langhain. -/// This sample assumes that you used the pets sample data set from this article: -/// https://python.langchain.com/docs/tutorials/retrievers/#documents -/// And the from_documents method to create the collection as shown here: -/// https://python.langchain.com/docs/tutorials/retrievers/#vector-stores -/// -/// Since the source field is stored as a subfield on the metadata field, and -/// the default Qdrant mapper doesn't support complex types, we need to create a custom mapper. -/// -public class VectorStore_Langchain_Interop_Qdrant(ITestOutputHelper output) : BaseTest(output) -{ - [Fact] - public async Task ReadDataFromLangchainQdrantAsync() - { - // Create an embedding generation service. - var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( - TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, - TestConfiguration.AzureOpenAIEmbeddings.Endpoint, - new AzureCliCredential()); - - // Get the collection. - var qdrantClient = new QdrantClient("localhost"); - var collection = new QdrantVectorStoreRecordCollection( - qdrantClient, - "pets", - new() { PointStructCustomMapper = new LangchainInteropMapper() }); - - // Search the data set. - var searchString = "I'm looking for an animal that is loyal and will make a great companion"; - var searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString); - var searchResult = await collection.VectorizedSearchAsync(searchVector, new() { Top = 1 }); - var resultRecords = await searchResult.Results.ToListAsync(); - - this.Output.WriteLine("Search string: " + searchString); - this.Output.WriteLine("Source: " + resultRecords.First().Record.Source); - this.Output.WriteLine("Text: " + resultRecords.First().Record.Content); - this.Output.WriteLine(); - } - - /// - /// Model class containing the fields we want to map from the Qdrant storage format. - /// - /// - /// Note that since we won't be using this data model to infer a schema from - /// for creating a new collection or to do data model to storage model mapping with, - /// we can just specify the most minimal of attributes. - /// - private sealed class QdrantLangchainDocument - { - [VectorStoreRecordKey] - public Guid Key { get; set; } - - public string Content { get; set; } - - public string Source { get; set; } - - [VectorStoreRecordVector(1536)] - public ReadOnlyMemory Embedding { get; set; } - } - - /// - /// Custom mapper to map the metadata struct, since the default - /// Qdrant mapper doesn't support complex types. - /// - private sealed class LangchainInteropMapper : IVectorStoreRecordMapper - { - public PointStruct MapFromDataToStorageModel(QdrantLangchainDocument dataModel) - { - var metadataStruct = new Struct() - { - Fields = { ["source"] = dataModel.Source } - }; - - var pointStruct = new PointStruct() - { - Id = new PointId() { Uuid = dataModel.Key.ToString("D") }, - Vectors = new Vectors() { Vector = dataModel.Embedding.ToArray() }, - Payload = - { - ["page_content"] = dataModel.Content, - ["metadata"] = new Value() { StructValue = metadataStruct } - }, - }; - - return pointStruct; - } - - public QdrantLangchainDocument MapFromStorageToDataModel(PointStruct storageModel, StorageToDataModelMapperOptions options) - { - return new QdrantLangchainDocument() - { - Key = new Guid(storageModel.Id.Uuid), - Content = storageModel.Payload["page_content"].StringValue, - Source = storageModel.Payload["metadata"].StructValue.Fields["source"].StringValue, - Embedding = options.IncludeVectors ? storageModel.Vectors.Vector.Data.ToArray() : null - }; - } - } -} diff --git a/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_Redis.cs b/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_Redis.cs deleted file mode 100644 index 6ca668a1230f..000000000000 --- a/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_Redis.cs +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using Azure.Identity; -using Microsoft.Extensions.VectorData; -using Microsoft.SemanticKernel.Connectors.AzureOpenAI; -using Microsoft.SemanticKernel.Connectors.Redis; -using Microsoft.SemanticKernel.Embeddings; -using StackExchange.Redis; - -namespace Memory; - -/// -/// Example showing how to consume data that had previously been -/// ingested into a Redis instance using Langchain. -/// -/// -/// To run this sample, you need to first create an instance of a -/// Redis collection using Langhain. -/// This sample assumes that you used the pets sample data set from this article: -/// https://python.langchain.com/docs/tutorials/retrievers/#documents -/// And the from_documents method to create the collection as shown here: -/// https://python.langchain.com/docs/tutorials/retrievers/#vector-stores -/// -public class VectorStore_Langchain_Interop_Redis(ITestOutputHelper output) : BaseTest(output) -{ - [Fact] - public async Task ReadDataFromLangchainRedisAsync() - { - // Create an embedding generation service. - var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( - TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, - TestConfiguration.AzureOpenAIEmbeddings.Endpoint, - new AzureCliCredential()); - - // Create a vector store. - var database = ConnectionMultiplexer.Connect("localhost:6379").GetDatabase(); - var vectorStore = new RedisVectorStore(database, new() { StorageType = RedisStorageType.HashSet }); - - // Get the collection. - var collection = vectorStore.GetCollection("pets"); - - // Search the data set. - var searchString = "I'm looking for an animal that is loyal and will make a great companion"; - var searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString); - var searchResult = await collection.VectorizedSearchAsync(searchVector, new() { Top = 1 }); - var resultRecords = await searchResult.Results.ToListAsync(); - - this.Output.WriteLine("Search string: " + searchString); - this.Output.WriteLine("Source: " + resultRecords.First().Record.Source); - this.Output.WriteLine("Text: " + resultRecords.First().Record.Content); - this.Output.WriteLine(); - } - - /// - /// Model class that matches the storage format used by Langchain for Redis. - /// - private sealed class RedisLangchainDocument - { - [VectorStoreRecordKey] - public string Key { get; set; } - - [VectorStoreRecordData(StoragePropertyName = "text")] - public string Content { get; set; } - - [VectorStoreRecordData(StoragePropertyName = "source")] - public string Source { get; set; } - - [VectorStoreRecordVector(1536, StoragePropertyName = "embedding")] - public ReadOnlyMemory Embedding { get; set; } - } -} diff --git a/dotnet/samples/Concepts/README.md b/dotnet/samples/Concepts/README.md index 15584b88685c..0bd3dba591c0 100644 --- a/dotnet/samples/Concepts/README.md +++ b/dotnet/samples/Concepts/README.md @@ -138,9 +138,7 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom - [VectorStore_ConsumeFromMemoryStore_Qdrant: An example that shows how you can use the QdrantVectorStore to consume data that was ingested using the QdrantMemoryStore.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_ConsumeFromMemoryStore_Qdrant.cs) - [VectorStore_ConsumeFromMemoryStore_Redis: An example that shows how you can use the RedisVectorStore to consume data that was ingested using the RedisMemoryStore.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_ConsumeFromMemoryStore_Redis.cs) - [VectorStore_MigrateFromMemoryStore_Redis: An example that shows how you can use the RedisMemoryStore and RedisVectorStore to migrate data to a new schema.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_MigrateFromMemoryStore_Redis.cs) -- [VectorStore_Langchain_Interop_AzureAISearch: An example that shows how you can use the AzureAISearch Vector Store to consume data that was ingested using Langchain.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_AzureAISearch.cs) -- [VectorStore_Langchain_Interop_Qdrant: An example that shows how you can use the Qdrant Vector Store to consume data that was ingested using Langchain.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_Qdrant.cs) -- [VectorStore_Langchain_Interop_Redis: An example that shows how you can use the Redis Vector Store to consume data that was ingested using Langchain.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop_Redis.cs) +- [VectorStore_Langchain_Interop: An example that shows how you can use various Vector Store to consume data that was ingested using Langchain.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_Langchain_Interop.cs) ### Optimization - Examples of different cost and performance optimization techniques