From ba92d4212d4bb221b252fa599f344fe6f2223063 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Tue, 15 Oct 2024 11:57:47 +0100 Subject: [PATCH] Add vetorless search to record collection interface. --- ...VectorStoreCollectionSearchMappingTests.cs | 10 +-- ...nMemoryVectorStoreRecordCollectionTests.cs | 34 ++++++++ ...earchVectorStoreCollectionSearchMapping.cs | 10 +-- ...zureAISearchVectorStoreRecordCollection.cs | 34 +++++++- ...mosDBMongoDBVectorStoreRecordCollection.cs | 12 +++ ...osmosDBNoSQLVectorStoreRecordCollection.cs | 12 +++ ...emoryVectorStoreCollectionSearchMapping.cs | 8 +- .../InMemoryVectorStoreRecordCollection.cs | 22 +++++- .../PineconeVectorStoreRecordCollection.cs | 11 +++ .../QdrantVectorStoreRecordCollection.cs | 11 +++ ...RedisHashSetVectorStoreRecordCollection.cs | 12 +++ .../RedisJsonVectorStoreRecordCollection.cs | 12 +++ .../WeaviateVectorStoreRecordCollection.cs | 11 +++ .../VectorSearch/VectorSearchOptions.cs | 23 ++++++ .../IVectorStoreRecordCollection.cs | 14 ++++ .../VectorStorage/VectorlessSearchFilter.cs | 79 +++++++++++++++++++ .../VectorStorage/VectorlessSearchOptions.cs | 39 +++++++++ .../VectorStorage/VectorlessSearchResults.cs | 32 ++++++++ ...ISearchVectorStoreRecordCollectionTests.cs | 40 ++++++++++ .../QdrantVectorStoreRecordCollectionTests.cs | 42 ++++++++++ ...HashSetVectorStoreRecordCollectionTests.cs | 38 +++++++++ ...disJsonVectorStoreRecordCollectionTests.cs | 36 ++++++++- ...eaviateVectorStoreRecordCollectionTests.cs | 33 ++++++++ .../Data/VectorStoreSearchResultMapping.cs | 53 +++++++++++++ ...atileVectorStoreCollectionSearchMapping.cs | 8 +- .../VolatileVectorStoreRecordCollection.cs | 22 +++++- 26 files changed, 635 insertions(+), 23 deletions(-) create mode 100644 dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchFilter.cs create mode 100644 dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchOptions.cs create mode 100644 dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchResults.cs create mode 100644 dotnet/src/InternalUtilities/src/Data/VectorStoreSearchResultMapping.cs diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionSearchMappingTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionSearchMappingTests.cs index ae121f93bd0e..f8ee66b06f96 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionSearchMappingTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionSearchMappingTests.cs @@ -21,7 +21,7 @@ public void BuildFilterStringBuildsCorrectEqualityStringForEachFilterType(string var filter = new VectorSearchFilter().EqualTo(fieldName, fieldValue!); // Act. - var actual = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(filter, new Dictionary { { fieldName, "storage_" + fieldName } }); + var actual = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(filter.FilterClauses, new Dictionary { { fieldName, "storage_" + fieldName } }); // Assert. Assert.Equal(expected, actual); @@ -34,7 +34,7 @@ public void BuildFilterStringBuildsCorrectTagContainsString() var filter = new VectorSearchFilter().AnyTagEqualTo("Tags", "mytag"); // Act. - var actual = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(filter, new Dictionary { { "Tags", "storage_tags" } }); + var actual = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(filter.FilterClauses, new Dictionary { { "Tags", "storage_tags" } }); // Assert. Assert.Equal("storage_tags/any(t: t eq 'mytag')", actual); @@ -47,7 +47,7 @@ public void BuildFilterStringCombinesFilterOptions() var filter = new VectorSearchFilter().EqualTo("intField", 5).AnyTagEqualTo("Tags", "mytag"); // Act. - var actual = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(filter, new Dictionary { { "Tags", "storage_tags" }, { "intField", "storage_intField" } }); + var actual = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(filter.FilterClauses, new Dictionary { { "Tags", "storage_tags" }, { "intField", "storage_intField" } }); // Assert. Assert.Equal("storage_intField eq 5 and storage_tags/any(t: t eq 'mytag')", actual); @@ -57,8 +57,8 @@ public void BuildFilterStringCombinesFilterOptions() public void BuildFilterStringThrowsForUnknownPropertyName() { // Act and assert. - Assert.Throws(() => AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(new VectorSearchFilter().EqualTo("unknown", "value"), new Dictionary())); - Assert.Throws(() => AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(new VectorSearchFilter().AnyTagEqualTo("unknown", "value"), new Dictionary())); + Assert.Throws(() => AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(new VectorSearchFilter().EqualTo("unknown", "value").FilterClauses, new Dictionary())); + Assert.Throws(() => AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(new VectorSearchFilter().AnyTagEqualTo("unknown", "value").FilterClauses, new Dictionary())); } public static IEnumerable DataTypeMappingOptions() diff --git a/dotnet/src/Connectors/Connectors.InMemory.UnitTests/InMemoryVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.InMemory.UnitTests/InMemoryVectorStoreRecordCollectionTests.cs index 1cf974a77c84..ab0bc2028107 100644 --- a/dotnet/src/Connectors/Connectors.InMemory.UnitTests/InMemoryVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.InMemory.UnitTests/InMemoryVectorStoreRecordCollectionTests.cs @@ -270,6 +270,40 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition, TKey testK Assert.Equal($"data {testKey1}", (collection[testKey1] as SinglePropsModel)!.Data); } + [Theory] + [InlineData(true, TestRecordKey1, TestRecordKey2)] + [InlineData(true, TestRecordIntKey1, TestRecordIntKey2)] + [InlineData(false, TestRecordKey1, TestRecordKey2)] + [InlineData(false, TestRecordIntKey1, TestRecordIntKey2)] + public async Task CanSearchWithoutVectorAsync(bool useDefinition, TKey testKey1, TKey testKey2) + where TKey : notnull + { + // Arrange + var record1 = CreateModel(testKey1, withVectors: true, new float[] { 1, 1, 1, 1 }); + var record2 = CreateModel(testKey2, withVectors: true, new float[] { -1, -1, -1, -1 }); + + var collection = new ConcurrentDictionary(); + collection.TryAdd(testKey1, record1); + collection.TryAdd(testKey2, record2); + + this._collectionStore.TryAdd(TestCollectionName, collection); + + var sut = this.CreateRecordCollection(useDefinition); + + // Act + var filter = new VectorlessSearchFilter().EqualTo("Data", $"data {testKey2}"); + var actual = await sut.VectorlessSearchAsync( + new VectorlessSearchOptions { IncludeVectors = true, Filter = filter }, + this._testCancellationToken); + + // Assert + Assert.NotNull(actual); + Assert.Null(actual.TotalCount); + var actualResults = await actual.Results.ToListAsync(); + Assert.Single(actualResults); + Assert.Contains(actualResults, x => x.Key!.Equals(testKey2)); + } + [Theory] [InlineData(true, TestRecordKey1, TestRecordKey2)] [InlineData(true, TestRecordIntKey1, TestRecordIntKey2)] diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionSearchMapping.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionSearchMapping.cs index ced35f244c5e..e6745f69d9a3 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionSearchMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionSearchMapping.cs @@ -15,17 +15,17 @@ internal static class AzureAISearchVectorStoreCollectionSearchMapping /// /// Build an OData filter string from the provided . /// - /// The to build an OData filter string from. + /// The objects to build an OData filter string from. /// A mapping of data model property names to the names under which they are stored. /// The OData filter string. /// Thrown when a provided filter value is not supported. - public static string BuildFilterString(VectorSearchFilter? basicVectorSearchFilter, IReadOnlyDictionary storagePropertyNames) + public static string BuildFilterString(IEnumerable? filterClausees, IReadOnlyDictionary storagePropertyNames) { var filterString = string.Empty; - if (basicVectorSearchFilter?.FilterClauses is not null) + if (filterClausees is not null) { // Map Equality clauses. - var filterStrings = basicVectorSearchFilter?.FilterClauses.OfType().Select(x => + var filterStrings = filterClausees.OfType().Select(x => { string storageFieldName = GetStoragePropertyName(storagePropertyNames, x.FieldName); @@ -46,7 +46,7 @@ public static string BuildFilterString(VectorSearchFilter? basicVectorSearchFilt }); // Map tag contains clauses. - var tagListContainsStrings = basicVectorSearchFilter?.FilterClauses + var tagListContainsStrings = filterClausees .OfType() .Select(x => { diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index 7658b52fc702..234eeab5da50 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -313,6 +313,36 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco foreach (var resultKey in resultKeys) { yield return resultKey; } } + /// + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) + { + // Resolve options. + var internalOptions = options ?? new VectorlessSearchOptions(); + + // Configure search settings. + var filterString = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(internalOptions.Filter?.FilterClauses, this._propertyReader.JsonPropertyNamesMap); + + // Build search options. + var searchOptions = new SearchOptions + { + VectorSearch = new(), + Size = internalOptions.Top, + Skip = internalOptions.Skip, + Filter = filterString, + IncludeTotalCount = internalOptions.IncludeTotalCount, + }; + + // Filter out vector fields if requested. + if (!internalOptions.IncludeVectors) + { + searchOptions.Select.Add(this._propertyReader.KeyPropertyJsonName); + searchOptions.Select.AddRange(this._propertyReader.DataPropertyJsonNames); + } + + var vectorSearchResults = await this.SearchAndMapToDataModelAsync(null, searchOptions, internalOptions.IncludeVectors, cancellationToken).ConfigureAwait(false); + return VectorStoreSearchResultMapping.ConvertToVectorlessSearchResults(vectorSearchResults, cancellationToken); + } + /// public Task> VectorizedSearchAsync(TVector vector, VectorData.VectorSearchOptions? options = null, CancellationToken cancellationToken = default) { @@ -335,7 +365,7 @@ public Task> VectorizedSearchAsync(TVector // Configure search settings. var vectorQueries = new List(); vectorQueries.Add(new VectorizedQuery(floatVector) { KNearestNeighborsCount = internalOptions.Top, Fields = { vectorFieldName } }); - var filterString = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(internalOptions.Filter, this._propertyReader.JsonPropertyNamesMap); + var filterString = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(internalOptions.Filter?.FilterClauses, this._propertyReader.JsonPropertyNamesMap); // Build search options. var searchOptions = new SearchOptions @@ -375,7 +405,7 @@ public Task> VectorizableTextSearchAsync(string sea // Configure search settings. var vectorQueries = new List(); vectorQueries.Add(new VectorizableTextQuery(searchText) { KNearestNeighborsCount = internalOptions.Top, Fields = { vectorFieldName } }); - var filterString = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(internalOptions.Filter, this._propertyReader.JsonPropertyNamesMap); + var filterString = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(internalOptions.Filter?.FilterClauses, this._propertyReader.JsonPropertyNamesMap); // Build search options. var searchOptions = new SearchOptions diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureCosmosDBMongoDB/AzureCosmosDBMongoDBVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureCosmosDBMongoDB/AzureCosmosDBMongoDBVectorStoreRecordCollection.cs index 2f683c73ef92..6cd5dd0a0664 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureCosmosDBMongoDB/AzureCosmosDBMongoDBVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureCosmosDBMongoDB/AzureCosmosDBMongoDBVectorStoreRecordCollection.cs @@ -243,6 +243,18 @@ public async IAsyncEnumerable UpsertBatchAsync( } } + /// + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) + { + // TODO: Switch to non-vector search to improve performance. + var dimensions = this._propertyReader.VectorProperty?.Dimensions ?? throw new InvalidOperationException("The collection does not have any vector properties, so simulated vectorless search is not possible."); + var vectorSearchResults = await this.VectorizedSearchAsync( + new ReadOnlyMemory(new float[dimensions]), + VectorSearchOptions.FromVectorlessSearchOptions(options), + cancellationToken).ConfigureAwait(false); + return VectorStoreSearchResultMapping.ConvertToVectorlessSearchResults(vectorSearchResults, cancellationToken); + } + /// public async Task> VectorizedSearchAsync( TVector vector, diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureCosmosDBNoSQL/AzureCosmosDBNoSQLVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureCosmosDBNoSQL/AzureCosmosDBNoSQLVectorStoreRecordCollection.cs index 3c7fc5052473..3f6b1e4e2b6b 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureCosmosDBNoSQL/AzureCosmosDBNoSQLVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureCosmosDBNoSQL/AzureCosmosDBNoSQLVectorStoreRecordCollection.cs @@ -361,6 +361,18 @@ async IAsyncEnumerable IVectorStoreRecordCollect } } + /// + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) + { + // TODO: Switch to non-vector search to improve performance. + var dimensions = this._propertyReader.VectorProperty?.Dimensions ?? throw new InvalidOperationException("The collection does not have any vector properties, so simulated vectorless search is not possible."); + var vectorSearchResults = await this.VectorizedSearchAsync( + new ReadOnlyMemory(new float[dimensions]), + VectorSearchOptions.FromVectorlessSearchOptions(options), + cancellationToken).ConfigureAwait(false); + return VectorStoreSearchResultMapping.ConvertToVectorlessSearchResults(vectorSearchResults, cancellationToken); + } + /// public Task> VectorizedSearchAsync( TVector vector, diff --git a/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStoreCollectionSearchMapping.cs b/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStoreCollectionSearchMapping.cs index 7ecea345cb85..e8954b3a054f 100644 --- a/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStoreCollectionSearchMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStoreCollectionSearchMapping.cs @@ -91,13 +91,13 @@ public static float ConvertScore(float score, string? distanceFunction) /// /// Filter the provided records using the provided filter definition. /// - /// The filter definition to filter the with. + /// The filter clauses to filter the with. /// The records to filter. /// The filtered records. /// Thrown when an unsupported filter clause is encountered. - public static IEnumerable FilterRecords(VectorSearchFilter? filter, IEnumerable records) + public static IEnumerable FilterRecords(IEnumerable? filterClauses, IEnumerable records) { - if (filter == null) + if (filterClauses == null) { return records; } @@ -109,7 +109,7 @@ public static IEnumerable FilterRecords(VectorSearchFilter? filter, IEnu // Run each filter clause against the record, and AND the results together. // Break if any clause returns false, since we are doing an AND and no need // to check any further clauses. - foreach (var clause in filter.FilterClauses) + foreach (var clause in filterClauses) { if (clause is EqualToFilterClause equalToFilter) { diff --git a/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStoreRecordCollection.cs index 3526990f18b0..35b42752293f 100644 --- a/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStoreRecordCollection.cs @@ -208,6 +208,26 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable record } } + /// +#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously - Need to satisfy the interface which returns IAsyncEnumerable + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) +#pragma warning restore CS1998 + { + var internalOptions = options ?? new VectorlessSearchOptions(); + + var filteredRecords = InMemoryVectorStoreCollectionSearchMapping.FilterRecords(internalOptions.Filter?.FilterClauses, this.GetCollectionDictionary().Values); + + long? count = null; + if (internalOptions.IncludeTotalCount) + { + count = filteredRecords.Count(); + } + + var resultsPage = filteredRecords.Skip(internalOptions.Skip).Take(internalOptions.Top); + + return new VectorlessSearchResults(resultsPage.Cast().ToAsyncEnumerable()) { TotalCount = count }; + } + /// #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously - Need to satisfy the interface which returns IAsyncEnumerable public async Task> VectorizedSearchAsync(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default) @@ -235,7 +255,7 @@ public async Task> VectorizedSearchAsync(T } // Filter records using the provided filter before doing the vector comparison. - var filteredRecords = InMemoryVectorStoreCollectionSearchMapping.FilterRecords(internalOptions.Filter, this.GetCollectionDictionary().Values); + var filteredRecords = InMemoryVectorStoreCollectionSearchMapping.FilterRecords(internalOptions.Filter?.FilterClauses, this.GetCollectionDictionary().Values); // Compare each vector in the filtered results with the provided vector. var results = filteredRecords.Select((record) => diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs index fcffda0eedd7..9e2b813d08b4 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs @@ -234,6 +234,17 @@ await this.RunOperationAsync( } } + /// + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) + { + var dimensions = this._propertyReader.VectorProperty?.Dimensions ?? throw new InvalidOperationException("The collection does not have any vector properties, so simulated vectorless search is not possible."); + var vectorSearchResults = await this.VectorizedSearchAsync( + new ReadOnlyMemory(new float[dimensions]), + VectorSearchOptions.FromVectorlessSearchOptions(options), + cancellationToken).ConfigureAwait(false); + return VectorStoreSearchResultMapping.ConvertToVectorlessSearchResults(vectorSearchResults, cancellationToken); + } + /// public Task> VectorizedSearchAsync(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index 1706448d7df3..385c58312a9f 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -443,6 +443,17 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( } } + /// + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) + { + var dimensions = this._propertyReader.VectorProperty?.Dimensions ?? throw new InvalidOperationException("The collection does not have any vector properties, so simulated vectorless search is not possible."); + var vectorSearchResults = await this.VectorizedSearchAsync( + new ReadOnlyMemory(new float[dimensions]), + VectorSearchOptions.FromVectorlessSearchOptions(options), + cancellationToken).ConfigureAwait(false); + return VectorStoreSearchResultMapping.ConvertToVectorlessSearchResults(vectorSearchResults, cancellationToken); + } + /// public async Task> VectorizedSearchAsync(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs index 25236402cbdf..ff12de78e727 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs @@ -329,6 +329,18 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco } } + /// + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) + { + // TODO: Switch to non-vector search to improve performance. + var dimensions = this._propertyReader.VectorProperty?.Dimensions ?? throw new InvalidOperationException("The collection does not have any vector properties, so simulated vectorless search is not possible."); + var vectorSearchResults = await this.VectorizedSearchAsync( + new ReadOnlyMemory(new float[dimensions]), + VectorSearchOptions.FromVectorlessSearchOptions(options), + cancellationToken).ConfigureAwait(false); + return VectorStoreSearchResultMapping.ConvertToVectorlessSearchResults(vectorSearchResults, cancellationToken); + } + /// public async Task> VectorizedSearchAsync(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs index b3467b12abb6..aed4e3dc88ec 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs @@ -373,6 +373,18 @@ await this.RunOperationAsync( } } + /// + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) + { + // TODO: Switch to non-vector search to improve performance. + var dimensions = this._propertyReader.VectorProperty?.Dimensions ?? throw new InvalidOperationException("The collection does not have any vector properties, so simulated vectorless search is not possible."); + var vectorSearchResults = await this.VectorizedSearchAsync( + new ReadOnlyMemory(new float[dimensions]), + VectorSearchOptions.FromVectorlessSearchOptions(options), + cancellationToken).ConfigureAwait(false); + return VectorStoreSearchResultMapping.ConvertToVectorlessSearchResults(vectorSearchResults, cancellationToken); + } + /// public async Task> VectorizedSearchAsync(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Weaviate/WeaviateVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Weaviate/WeaviateVectorStoreRecordCollection.cs index 99a800eb12d5..1830f3a26638 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Weaviate/WeaviateVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Weaviate/WeaviateVectorStoreRecordCollection.cs @@ -335,6 +335,17 @@ public async IAsyncEnumerable UpsertBatchAsync( } } + /// + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) + { + var dimensions = this._propertyReader.VectorProperty?.Dimensions ?? throw new InvalidOperationException("The collection does not have any vector properties, so simulated vectorless search is not possible."); + var vectorSearchResults = await this.VectorizedSearchAsync( + new ReadOnlyMemory(new float[dimensions]), + VectorSearchOptions.FromVectorlessSearchOptions(options), + cancellationToken).ConfigureAwait(false); + return VectorStoreSearchResultMapping.ConvertToVectorlessSearchResults(vectorSearchResults, cancellationToken); + } + /// public async Task> VectorizedSearchAsync( TVector vector, diff --git a/dotnet/src/Connectors/VectorData.Abstractions/VectorSearch/VectorSearchOptions.cs b/dotnet/src/Connectors/VectorData.Abstractions/VectorSearch/VectorSearchOptions.cs index a5773b0cc606..3ac778cf436b 100644 --- a/dotnet/src/Connectors/VectorData.Abstractions/VectorSearch/VectorSearchOptions.cs +++ b/dotnet/src/Connectors/VectorData.Abstractions/VectorSearch/VectorSearchOptions.cs @@ -43,4 +43,27 @@ public class VectorSearchOptions /// count will be null even if requested via this option. /// public bool IncludeTotalCount { get; init; } = false; + + /// + /// Create a new instance from a instance + /// by copying all matching properties. + /// + /// The instance to create the instance from. + /// The new instance. + public static VectorSearchOptions FromVectorlessSearchOptions(VectorlessSearchOptions? options) + { + if (options is null) + { + return new VectorSearchOptions(); + } + + return new VectorSearchOptions() + { + Filter = options.Filter is not null ? new VectorSearchFilter(options.Filter.FilterClauses) : null, + Top = options.Top, + Skip = options.Skip, + IncludeVectors = options.IncludeVectors, + IncludeTotalCount = options.IncludeTotalCount + }; + } } diff --git a/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/IVectorStoreRecordCollection.cs b/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/IVectorStoreRecordCollection.cs index 1aacdff332fa..ac81349ba25b 100644 --- a/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/IVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/IVectorStoreRecordCollection.cs @@ -124,4 +124,18 @@ public interface IVectorStoreRecordCollection : IVectorizedSearch /// Throw when the command fails to execute for any reason. /// Throw when mapping between the storage model and record data model fails. IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Search the vector store for records that match the given options. + /// + /// + /// This is a regular search, without using a search vector, however for some vector stores, it may internally use + /// an artificial vector to perform the search, since not all vector stores support searching without a vector. + /// + /// The options that control the behavior of the search. + /// The to monitor for cancellation requests. The default is . + /// The records found by the vector search, including their result scores. + Task> VectorlessSearchAsync( + VectorlessSearchOptions? options = default, + CancellationToken cancellationToken = default); } diff --git a/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchFilter.cs b/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchFilter.cs new file mode 100644 index 000000000000..8597aa48376a --- /dev/null +++ b/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchFilter.cs @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; + +namespace Microsoft.Extensions.VectorData; + +/// +/// Used to provide filtering when doing searches without vectors. +/// +/// +/// A filter has a collection of s that can be used +/// to request that the underlying service filter the search results. +/// All clauses are combined with and. +/// +public sealed class VectorlessSearchFilter +{ + /// The filter clauses to and together. + private readonly List _filterClauses = []; + + /// Gets the default search filter. + public static VectorlessSearchFilter Default { get; } = new VectorlessSearchFilter(); + + /// + /// The filter clauses to and together. + /// + public IEnumerable FilterClauses => this._filterClauses; + + /// + /// Create an instance of + /// + public VectorlessSearchFilter() + { + } + + /// + /// Create an instance of with the provided s. + /// The instances to use + /// + public VectorlessSearchFilter(IEnumerable filterClauses) + { + if (filterClauses == null) + { + throw new ArgumentNullException(nameof(filterClauses)); + } + + this._filterClauses.AddRange(filterClauses); + } + + /// + /// Add an equal to clause to the filter options. + /// + /// Name of the property to check against. Use the name of the property from your data model or as provided in the record definition. + /// Value that the property should match. + /// instance to allow fluent configuration. + /// + /// This clause will check if a property is equal to a specific value. + /// + public VectorlessSearchFilter EqualTo(string propertyName, object value) + { + this._filterClauses.Add(new EqualToFilterClause(propertyName, value)); + return this; + } + + /// + /// Add an any tag equal to clause to the filter options. + /// + /// Name of the property consisting of a list of values to check against. Use the name of the property from your data model or as provided in the record definition. + /// Value that the list should contain. + /// instance to allow fluent configuration. + /// + /// This clause will check if a property consisting of a list of values contains a specific value. + /// + public VectorlessSearchFilter AnyTagEqualTo(string propertyName, string value) + { + this._filterClauses.Add(new AnyTagEqualToFilterClause(propertyName, value)); + return this; + } +} diff --git a/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchOptions.cs b/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchOptions.cs new file mode 100644 index 000000000000..80b9cc2f51ec --- /dev/null +++ b/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchOptions.cs @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.Extensions.VectorData; + +/// +/// Options for searching the vector store without a search vector as input. +/// +public class VectorlessSearchOptions +{ + /// + /// Gets or sets a search filter to filter the records in the store with during the search. + /// + public VectorlessSearchFilter? Filter { get; init; } + + /// + /// Gets or sets the maximum number of results to return. + /// + public int Top { get; init; } = 3; + + /// + /// Gets or sets the number of results to skip before returning results, i.e. the index of the first result to return. + /// + public int Skip { get; init; } = 0; + + /// + /// Gets or sets a value indicating whether to include vectors in the retrieval result. + /// + public bool IncludeVectors { get; init; } = false; + + /// + /// Gets or sets a value indicating whether the total count should be included in the results. + /// + /// + /// Default value is false. + /// Not all vector stores will support this option in which case the total + /// count will be null even if requested via this option. + /// + public bool IncludeTotalCount { get; init; } = false; +} diff --git a/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchResults.cs b/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchResults.cs new file mode 100644 index 000000000000..f8103bdb594d --- /dev/null +++ b/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/VectorlessSearchResults.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; + +namespace Microsoft.Extensions.VectorData; + +/// +/// Contains the full list of search results for a vectorless search operation with metadata. +/// +/// The record data model to use for retrieving data from the store. +/// The list of records returned by the search operation. +public class VectorlessSearchResults(IAsyncEnumerable results) +{ + /// + /// The total count of results found by the search operation, or null + /// if the count was not requested or cannot be computed. + /// + /// + /// This value represents the total number of results that are available for the current query and not the number of results being returned. + /// + public long? TotalCount { get; init; } + + /// + /// The metadata associated with the content. + /// + public IReadOnlyDictionary? Metadata { get; init; } + + /// + /// The search results. + /// + public IAsyncEnumerable Results { get; } = results; +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs index 9265efa90f02..6b87a4e82fa4 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -334,6 +334,46 @@ public async Task ItThrowsMappingExceptionForFailedMapperAsync() await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); } + [Theory(Skip = SkipReason)] + [InlineData("equality", true)] + [InlineData("tagContains", false)] + public async Task ItCanSearchWithoutVectorAndWithFiltersAsync(string option, bool includeVectors) + { + // Arrange. + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName); + + // Act. + var filter = option == "equality" ? new VectorlessSearchFilter().EqualTo("HotelName", "Hotel 3") : new VectorlessSearchFilter().AnyTagEqualTo("Tags", "bar"); + var actual = await sut.VectorlessSearchAsync( + new() + { + IncludeVectors = includeVectors, + Filter = filter, + }); + + // Assert. + var searchResults = await actual.Results.ToListAsync(); + Assert.Single(searchResults); + var searchResult = searchResults.First(); + Assert.Equal("BaseSet-3", searchResult.HotelId); + Assert.Equal("Hotel 3", searchResult.HotelName); + Assert.Equal("This is a great hotel", searchResult.Description); + Assert.Equal(new[] { "air conditioning", "bar", "continental breakfast" }, searchResult.Tags); + Assert.True(searchResult.ParkingIncluded); + Assert.Equal(new DateTimeOffset(2015, 9, 20, 0, 0, 0, TimeSpan.Zero), searchResult.LastRenovationDate); + Assert.Equal(4.8, searchResult.Rating); + if (includeVectors) + { + Assert.NotNull(searchResult.DescriptionEmbedding); + var embedding = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("This is a great hotel"); + Assert.Equal(embedding, searchResult.DescriptionEmbedding!.Value.ToArray()); + } + else + { + Assert.Null(searchResult.DescriptionEmbedding); + } + } + [Theory(Skip = SkipReason)] [InlineData("equality", true)] [InlineData("tagContains", false)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs index 0b8a4bb0cd22..6f88af880c20 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs @@ -368,6 +368,48 @@ public async Task ItThrowsMappingExceptionForFailedMapperAsync() await Assert.ThrowsAsync(async () => await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = true })); } + [Theory] + [InlineData(true, "singleVectorHotels", false, "equality")] + [InlineData(false, "singleVectorHotels", false, "equality")] + [InlineData(true, "namedVectorsHotels", true, "equality")] + [InlineData(false, "namedVectorsHotels", true, "equality")] + [InlineData(true, "singleVectorHotels", false, "tagContains")] + [InlineData(false, "singleVectorHotels", false, "tagContains")] + [InlineData(true, "namedVectorsHotels", true, "tagContains")] + [InlineData(false, "namedVectorsHotels", true, "tagContains")] + public async Task ItCanSearchWithoutVectorAndWithFilterAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors, string filterType) + { + // Arrange. + var options = new QdrantVectorStoreRecordCollectionOptions + { + HasNamedVectors = hasNamedVectors, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options); + + // Act. + var vector = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("A great hotel"); + var filter = filterType == "equality" ? new VectorlessSearchFilter().EqualTo("HotelName", "My Hotel 11") : new VectorlessSearchFilter().AnyTagEqualTo("Tags", "t1"); + var actual = await sut.VectorlessSearchAsync( + new() + { + Filter = filter + }); + + // Assert. + var searchResults = await actual.Results.ToListAsync(); + Assert.Single(searchResults); + + var searchResultRecord = searchResults.First(); + Assert.Equal(11ul, searchResultRecord?.HotelId); + Assert.Equal("My Hotel 11", searchResultRecord?.HotelName); + Assert.Equal(11, searchResultRecord?.HotelCode); + Assert.Equal(4.5f, searchResultRecord?.HotelRating); + Assert.Equal(true, searchResultRecord?.ParkingIncluded); + Assert.Equal(new string[] { "t1", "t2" }, searchResultRecord?.Tags.ToArray()); + Assert.Equal("This is a great hotel.", searchResultRecord?.Description); + } + [Theory] [InlineData(true, "singleVectorHotels", false, "equality")] [InlineData(false, "singleVectorHotels", false, "equality")] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisHashSetVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisHashSetVectorStoreRecordCollectionTests.cs index d5d807781807..66b3408d7196 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisHashSetVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisHashSetVectorStoreRecordCollectionTests.cs @@ -302,6 +302,44 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() Assert.Null(await sut.GetAsync("HRemoveMany-3", new GetRecordOptions { IncludeVectors = true })); } + [Theory(Skip = SkipReason)] + [InlineData("hotelCode", true)] + [InlineData("hotelName", false)] + public async Task ItCanSearchWithoutVectorAndWithFilterAsync(string filterType, bool includeVectors) + { + // Arrange + var options = new RedisHashSetVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + var filter = filterType == "equality" ? new VectorlessSearchFilter().EqualTo("HotelCode", 1) : new VectorlessSearchFilter().EqualTo("HotelName", "My Hotel 1"); + + // Act + var actual = await sut.VectorlessSearchAsync( + new() + { + IncludeVectors = includeVectors, + Filter = filter + }); + + // Assert + var searchResults = await actual.Results.ToListAsync(); + Assert.Single(searchResults); + var searchResult = searchResults.First(); + Assert.Equal("HBaseSet-1", searchResult?.HotelId); + Assert.Equal("My Hotel 1", searchResult?.HotelName); + Assert.Equal(1, searchResult?.HotelCode); + Assert.True(searchResult?.ParkingIncluded); + Assert.Equal(3.6, searchResult?.Rating); + Assert.Equal("This is a great hotel.", searchResult?.Description); + if (includeVectors) + { + Assert.Equal(new[] { 30f, 31f, 32f, 33f }, searchResult?.DescriptionEmbedding?.ToArray()); + } + else + { + Assert.Null(searchResult?.DescriptionEmbedding); + } + } + [Theory(Skip = SkipReason)] [InlineData("hotelCode", true)] [InlineData("hotelName", false)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs index 2ed69bc63055..8ed6cb401a2f 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs @@ -332,6 +332,41 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() Assert.Null(await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); } + [Theory(Skip = SkipReason)] + [InlineData("equality")] + [InlineData("tagContains")] + public async Task ItCanSearchWithoutVectorAndWithFilterAsync(string filterType) + { + // Arrange + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + var filter = filterType == "equality" ? new VectorlessSearchFilter().EqualTo("HotelCode", 1) : new VectorlessSearchFilter().AnyTagEqualTo("Tags", "pool"); + + // Act + var actual = await sut.VectorlessSearchAsync( + new() + { + IncludeVectors = true, + Filter = filter + }); + + // Assert + var searchResults = await actual.Results.ToListAsync(); + Assert.Single(searchResults); + var searchResult = searchResults.First(); + Assert.Equal("BaseSet-1", searchResult?.HotelId); + Assert.Equal("My Hotel 1", searchResult?.HotelName); + Assert.Equal(1, searchResult?.HotelCode); + Assert.Equal(new[] { "pool", "air conditioning", "concierge" }, searchResult?.Tags); + Assert.Equal(new[] { "pool", "air conditioning", "concierge" }, searchResult?.FTSTags); + Assert.True(searchResult?.ParkingIncluded); + Assert.Equal(new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), searchResult?.LastRenovationDate); + Assert.Equal(3.6, searchResult?.Rating); + Assert.Equal("Seattle", searchResult?.Address.City); + Assert.Equal("This is a great hotel.", searchResult?.Description); + Assert.Equal(new[] { 30f, 31f, 32f, 33f }, searchResult?.DescriptionEmbedding?.ToArray()); + } + [Theory(Skip = SkipReason)] [InlineData("equality")] [InlineData("tagContains")] @@ -352,7 +387,6 @@ public async Task ItCanSearchWithFloat32VectorAndFilterAsync(string filterType) var searchResults = await actual.Results.ToListAsync(); Assert.Single(searchResults); var searchResult = searchResults.First().Record; - Assert.Equal("My Hotel 1", searchResults.First().Record.HotelName); Assert.Equal("BaseSet-1", searchResult?.HotelId); Assert.Equal("My Hotel 1", searchResult?.HotelName); Assert.Equal(1, searchResult?.HotelCode); diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Weaviate/WeaviateVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Weaviate/WeaviateVectorStoreRecordCollectionTests.cs index 9ffaf3172eec..bdc6534953e6 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Weaviate/WeaviateVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Weaviate/WeaviateVectorStoreRecordCollectionTests.cs @@ -206,6 +206,39 @@ public async Task ItCanUpsertRecordAsync() Assert.Equal(10, getResult.HotelRating); } + [Theory] + [MemberData(nameof(VectorizedSearchWithFilterData))] + public async Task VectorlessSearchReturnsValidResultsWithFilterAsync(VectorSearchFilter filter, List expectedIds) + { + // Arrange + var hotel1 = this.CreateTestHotel(hotelId: new Guid("11111111-1111-1111-1111-111111111111"), embedding: new[] { 30f, 31f, 32f, 33f }); + var hotel2 = this.CreateTestHotel(hotelId: new Guid("22222222-2222-2222-2222-222222222222"), embedding: new[] { 31f, 32f, 33f, 34f }); + var hotel3 = this.CreateTestHotel(hotelId: new Guid("33333333-3333-3333-3333-333333333333"), embedding: new[] { 20f, 20f, 20f, 20f }); + var hotel4 = this.CreateTestHotel(hotelId: new Guid("44444444-4444-4444-4444-444444444444"), embedding: new[] { -1000f, -1000f, -1000f, -1000f }); + + var sut = new WeaviateVectorStoreRecordCollection(fixture.HttpClient!, "VectorlessSearchWithFilter"); + + await sut.CreateCollectionIfNotExistsAsync(); + + await sut.UpsertBatchAsync([hotel4, hotel2, hotel3, hotel1]).ToListAsync(); + + // Act + var actual = await sut.VectorlessSearchAsync(new() + { + Filter = new VectorlessSearchFilter(filter.FilterClauses), + Top = 4, + }); + + // Assert + var searchResults = await actual.Results.ToListAsync(); + var actualIds = searchResults.Select(l => l.HotelId.ToString()).ToList(); + + expectedIds.Sort(); + actualIds.Sort(); + + Assert.Equal(expectedIds, actualIds); + } + [Theory] [InlineData(true)] [InlineData(false)] diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreSearchResultMapping.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreSearchResultMapping.cs new file mode 100644 index 000000000000..564f37def601 --- /dev/null +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreSearchResultMapping.cs @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.Extensions.VectorData; + +/// +/// Contains helpers for reading vector store model properties and their attributes. +/// +[ExcludeFromCodeCoverage] +internal static class VectorStoreSearchResultMapping +{ + /// + /// Convert the given to a instance. + /// + /// The type of the returned data model. + /// The to convert. + /// The to monitor for cancellation requests. The default is . + /// The converted instance. + public static VectorlessSearchResults ConvertToVectorlessSearchResults( + VectorSearchResults vectorSearchResults, + CancellationToken cancellationToken) + { + var convertedItems = ConvertToRecordOnlyEnumerableAsync(vectorSearchResults.Results, cancellationToken); + + return new VectorlessSearchResults(convertedItems) + { + TotalCount = vectorSearchResults.TotalCount, + Metadata = vectorSearchResults.Metadata, + }; + } + + /// + /// Convert the given list of vector search results to a list containing only the records. + /// + /// The type of the returned data model. + /// The vector search results to convert. + /// The to monitor for cancellation requests. The default is . + /// The converted records. + private static async IAsyncEnumerable ConvertToRecordOnlyEnumerableAsync( + IAsyncEnumerable> vectorSearchResults, + [EnumeratorCancellation] CancellationToken cancellationToken) + { + await foreach (var result in vectorSearchResults.ConfigureAwait(false)) + { + yield return result.Record; + } + } +} diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreCollectionSearchMapping.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreCollectionSearchMapping.cs index 555483ea8e6a..10da765f997a 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreCollectionSearchMapping.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreCollectionSearchMapping.cs @@ -92,13 +92,13 @@ public static float ConvertScore(float score, string? distanceFunction) /// /// Filter the provided records using the provided filter definition. /// - /// The filter definition to filter the with. + /// The filter clauses to filter the with. /// The records to filter. /// The filtered records. /// Thrown when an unsupported filter clause is encountered. - public static IEnumerable FilterRecords(VectorSearchFilter? filter, IEnumerable records) + public static IEnumerable FilterRecords(IEnumerable? filterClauses, IEnumerable records) { - if (filter == null) + if (filterClauses == null) { return records; } @@ -110,7 +110,7 @@ public static IEnumerable FilterRecords(VectorSearchFilter? filter, IEnu // Run each filter clause against the record, and AND the results together. // Break if any clause returns false, since we are doing an AND and no need // to check any further clauses. - foreach (var clause in filter.FilterClauses) + foreach (var clause in filterClauses) { if (clause is EqualToFilterClause equalToFilter) { diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs index 016ff01e1541..f04abb75a31d 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs @@ -209,6 +209,26 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable record } } + /// +#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously - Need to satisfy the interface which returns IAsyncEnumerable + public async Task> VectorlessSearchAsync(VectorlessSearchOptions? options = null, CancellationToken cancellationToken = default) +#pragma warning restore CS1998 + { + var internalOptions = options ?? new VectorlessSearchOptions(); + + var filteredRecords = VolatileVectorStoreCollectionSearchMapping.FilterRecords(internalOptions.Filter?.FilterClauses, this.GetCollectionDictionary().Values); + + long? count = null; + if (internalOptions.IncludeTotalCount) + { + count = filteredRecords.Count(); + } + + var resultsPage = filteredRecords.Skip(internalOptions.Skip).Take(internalOptions.Top); + + return new VectorlessSearchResults(resultsPage.Cast().ToAsyncEnumerable()) { TotalCount = count }; + } + /// #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously - Need to satisfy the interface which returns IAsyncEnumerable public async Task> VectorizedSearchAsync(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default) @@ -236,7 +256,7 @@ public async Task> VectorizedSearchAsync(T } // Filter records using the provided filter before doing the vector comparison. - var filteredRecords = VolatileVectorStoreCollectionSearchMapping.FilterRecords(internalOptions.Filter, this.GetCollectionDictionary().Values); + var filteredRecords = VolatileVectorStoreCollectionSearchMapping.FilterRecords(internalOptions.Filter?.FilterClauses, this.GetCollectionDictionary().Values); // Compare each vector in the filtered results with the provided vector. var results = filteredRecords.Select((record) =>