diff --git a/Directory.Packages.props b/Directory.Packages.props
index c07a747b3..7c560380f 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -37,7 +37,7 @@
-
+
diff --git a/examples/207-dotnet-expanding-chunks-on-retrieval/Program.cs b/examples/207-dotnet-expanding-chunks-on-retrieval/Program.cs
index ddee300e9..fa34454f3 100644
--- a/examples/207-dotnet-expanding-chunks-on-retrieval/Program.cs
+++ b/examples/207-dotnet-expanding-chunks-on-retrieval/Program.cs
@@ -81,108 +81,80 @@ public static async Task Main()
SearchResult relevant = await memory.SearchAsync(query: Query, minRelevance: MinRelevance, limit: Limit);
Console.WriteLine($"Relevant documents: {relevant.Results.Count}");
-#if KernelMemoryDev
- var relevantDocuments = new Dictionary>();
foreach (Citation result in relevant.Results)
{
// Store the document IDs so we can load all their records later
- relevantDocuments.Add(result.DocumentId, new List());
Console.WriteLine($"Document ID: {result.DocumentId}");
Console.WriteLine($"Relevant partitions: {result.Partitions.Count}");
foreach (Citation.Partition partition in result.Partitions)
{
- Console.WriteLine("--------------------------");
- Console.WriteLine($"Partition number: {partition.PartitionNumber}");
- Console.WriteLine($"Relevance: {partition.Relevance}\n");
- Console.WriteLine(partition.Text);
-
- relevantDocuments[result.DocumentId].Add(partition.PartitionNumber);
+ Console.WriteLine($" * Partition {partition.PartitionNumber}, relevance: {partition.Relevance}");
}
- Console.WriteLine();
- }
-
- // For each relevant document
- // Note: loops can be optimized for better perf, this code is only a demo
- const int HowManyToAdd = 1;
- Console.WriteLine("Fetching all document partitions...");
- foreach (KeyValuePair> relevantPartitionNumbers in relevantDocuments)
- {
- var docId = relevantPartitionNumbers.Key;
- Console.WriteLine($"\nDocument ID: {docId}");
-
- // Load all partitions. Note: the list might be out of order.
- SearchResult all = await memory.SearchAsync("", filters: new[] { MemoryFilters.ByDocument(docId) }, limit: int.MaxValue);
- List allPartitionsContent = all.Results.FirstOrDefault()?.Partitions ?? new();
+ Console.WriteLine("--------------------------");
- // Loop through the relevant partitions
- foreach (int relevantPartitionNumber in relevantPartitionNumbers.Value)
+ // For each relevant partition fetch the partition before and one after
+ foreach (Citation.Partition partition in result.Partitions)
{
- Console.WriteLine("--------------------------");
+ // Collect partitions in a sorted collection
+ var partitions = new SortedDictionary { [partition.PartitionNumber] = partition };
- // Use a data structure to order partitions by number
- var result = new SortedDictionary();
+ // Filters to fetch adjacent partitions
+ var filters = new List
+ {
+ MemoryFilters.ByDocument(result.DocumentId).ByTag(Constants.ReservedFilePartitionNumberTag, $"{partition.PartitionNumber - 1}"),
+ MemoryFilters.ByDocument(result.DocumentId).ByTag(Constants.ReservedFilePartitionNumberTag, $"{partition.PartitionNumber + 1}")
+ };
- // Loop all partitions, include before and after the relevant ones
- foreach (Citation.Partition p in allPartitionsContent)
+ // Fetch adjacent partitions and add them to the sorted collection
+ SearchResult adjacentList = await memory.SearchAsync("", filters: filters, limit: 2);
+ foreach (Citation.Partition adjacent in adjacentList.Results.First().Partitions)
{
- if (Math.Abs(p.PartitionNumber - relevantPartitionNumber) <= HowManyToAdd)
- {
- result.Add(p.PartitionNumber, p.Text);
- }
+ partitions[adjacent.PartitionNumber] = adjacent;
}
- // Show partition and adjacent ones in order
- foreach (var p in result)
+ // Print partitions in order
+ foreach (var p in partitions)
{
- Console.WriteLine($"Partition: {p.Key}");
- Console.WriteLine(p.Value);
+ Console.WriteLine($"# Partition {p.Value.PartitionNumber}");
+ Console.WriteLine(p.Value.Text);
+ Console.WriteLine();
}
- Console.WriteLine();
+ Console.WriteLine("--------------------------");
}
+
+ Console.WriteLine();
}
-#endif
}
}
/* Result:
-Token count: 2510
Importing memories...
Searching memories...
Relevant documents: 1
Document ID: example207
Relevant partitions: 2
+* Partition 27, relevance: 0.8557962
+* Partition 13, relevance: 0.85513425
--------------------------
-Partition number: 27
-Relevance: 0.8557962
-
-As scientific interest in [...] or ancient microbial life.
---------------------------
-Partition number: 13
-Relevance: 0.85513425
+# Partition 26
+Dr. Mei Lin, a renowned ...
-Gerald Marshall, the Chief [...] in astrobiological research."
+# Partition 27
+As scientific interest in ...
-Fetching all document partitions...
-
-Document ID: example207
+# Partition 28
+Meanwhile, back on Earth, the ...
--------------------------
-Partition: 26
-Dr. Mei Lin, a renowned [...] of life in the universe."
-Partition: 27
-As scientific interest [...] ancient microbial life.
-Partition: 28
-Meanwhile, back on Earth, [...] meaning in the universe.
+# Partition 12
+Appearing as a glowing, translucent ...
---------------------------
-Partition: 12
-Appearing as a glowing, [...] including its high CO2 levels.
-Partition: 13
-Gerald Marshall, the [...] in astrobiological research."
-Partition: 14
-While further studies [...] alien at the same time.
+# Partition 13
+Gerald Marshall, the Chief ...
+# Partition 14
+While further studies are ...
+--------------------------
*/
-
diff --git a/extensions/Redis/Redis/RedisConfig.cs b/extensions/Redis/Redis/RedisConfig.cs
index 4719a8588..52e142d22 100644
--- a/extensions/Redis/Redis/RedisConfig.cs
+++ b/extensions/Redis/Redis/RedisConfig.cs
@@ -43,9 +43,7 @@ public class RedisConfig
{ Constants.ReservedDocumentIdTag, '|' },
{ Constants.ReservedFileIdTag, '|' },
{ Constants.ReservedFilePartitionTag, '|' },
-#if KernelMemoryDev
{ Constants.ReservedFileSectionNumberTag, '|' },
-#endif
{ Constants.ReservedFileTypeTag, '|' },
};
diff --git a/service/Core/DataFormats/FileSection.cs b/service/Core/DataFormats/FileSection.cs
deleted file mode 100644
index 0e83b922e..000000000
--- a/service/Core/DataFormats/FileSection.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (c) Microsoft. All rights reserved.
-
-#if KernelMemoryDev
-// See Abstractions
-#else
-using System.Collections.Generic;
-using System.Text.Json.Serialization;
-
-namespace Microsoft.KernelMemory.DataFormats;
-
-public class FileContent
-{
- [JsonPropertyOrder(0)]
- [JsonPropertyName("sections")]
- public List Sections { get; set; } = new();
-}
-
-public class FileSection
-{
- ///
- /// Text page number/Audio segment number/Video scene number
- ///
- [JsonPropertyOrder(0)]
- [JsonPropertyName("number")]
- public int Number { get; }
-
- ///
- /// Whether the first/last sentence may continue from the previous/into
- /// the next section (e.g. like PDF docs).
- /// true: the first/last sentence do not cross over, the first doesn't
- /// continue from the previous section, and the last sentence ends
- /// where the section ends (e.g. Powerpoint, Excel).
- /// false: the first sentence may be a continuation from the previous section,
- /// and the last sentence may continue into the next section.
- ///
- [JsonPropertyOrder(1)]
- [JsonPropertyName("complete")]
- public bool SentencesAreComplete { get; }
-
- ///
- /// Page text content
- ///
- [JsonPropertyOrder(2)]
- [JsonPropertyName("content")]
- public string Content { get; }
-
- public FileSection(int number, string? content, bool sentencesAreComplete)
- {
- this.Number = number;
- this.SentencesAreComplete = sentencesAreComplete;
- this.Content = content ?? string.Empty;
- }
-}
-#endif
diff --git a/service/Core/Handlers/GenerateEmbeddingsHandler.cs b/service/Core/Handlers/GenerateEmbeddingsHandler.cs
index 0351c8b08..0f9dddf76 100644
--- a/service/Core/Handlers/GenerateEmbeddingsHandler.cs
+++ b/service/Core/Handlers/GenerateEmbeddingsHandler.cs
@@ -157,10 +157,8 @@ public GenerateEmbeddingsHandler(
Size = text.Length,
MimeType = MimeTypes.TextEmbeddingVector,
ArtifactType = DataPipeline.ArtifactTypes.TextEmbeddingVector,
-#if KernelMemoryDev
PartitionNumber = partitionFile.PartitionNumber,
SectionNumber = partitionFile.SectionNumber,
-#endif
Tags = partitionFile.Tags,
};
embeddingFileNameDetails.MarkProcessedBy(this);
diff --git a/service/Core/Handlers/SaveRecordsHandler.cs b/service/Core/Handlers/SaveRecordsHandler.cs
index 6c3874636..77c934f4a 100644
--- a/service/Core/Handlers/SaveRecordsHandler.cs
+++ b/service/Core/Handlers/SaveRecordsHandler.cs
@@ -133,13 +133,8 @@ public SaveRecordsHandler(
fileId: embeddingFile.File.ParentId,
partitionFileId: embeddingFile.File.SourcePartitionId,
partitionContent: partitionContent,
-#if KernelMemoryDev
partitionNumber: embeddingFile.File.PartitionNumber,
sectionNumber: embeddingFile.File.SectionNumber,
-#else
- partitionNumber: 0,
- sectionNumber: 0,
-#endif
partitionEmbedding: embeddingData.Vector,
embeddingGeneratorProvider: embeddingData.GeneratorProvider,
embeddingGeneratorName: embeddingData.GeneratorName,
@@ -202,13 +197,8 @@ public SaveRecordsHandler(
fileId: file.File.ParentId,
partitionFileId: file.File.Id,
partitionContent: partitionContent,
-#if KernelMemoryDev
partitionNumber: partitionFileDetails.PartitionNumber,
sectionNumber: partitionFileDetails.SectionNumber,
-#else
- partitionNumber: 0,
- sectionNumber: 0,
-#endif
partitionEmbedding: new Embedding(),
embeddingGeneratorProvider: "",
embeddingGeneratorName: "",
@@ -368,11 +358,9 @@ private static MemoryRecord PrepareRecord(
// Partition ID. Filtering used for purge.
record.Tags.Add(Constants.ReservedFilePartitionTag, partitionFileId);
-#if KernelMemoryDev
// Partition number (starting from 0) and Page number (provided by text extractor)
record.Tags.Add(Constants.ReservedFilePartitionNumberTag, $"{partitionNumber}");
record.Tags.Add(Constants.ReservedFileSectionNumberTag, $"{sectionNumber}");
-#endif
/*
* TIMESTAMP and USER TAGS
diff --git a/service/Core/Handlers/TextExtractionHandler.cs b/service/Core/Handlers/TextExtractionHandler.cs
index c0cf6aecb..13ecbc1a4 100644
--- a/service/Core/Handlers/TextExtractionHandler.cs
+++ b/service/Core/Handlers/TextExtractionHandler.cs
@@ -67,9 +67,7 @@ public TextExtractionHandler(
var sourceFile = uploadedFile.Name;
var destFile = $"{uploadedFile.Name}.extract.txt";
-#if KernelMemoryDev
var destFile2 = $"{uploadedFile.Name}.extract.json";
-#endif
BinaryData fileContent = await this._orchestrator.ReadFileAsync(pipeline, sourceFile, cancellationToken).ConfigureAwait(false);
string text = string.Empty;
@@ -104,7 +102,6 @@ public TextExtractionHandler(
destFileDetails.MarkProcessedBy(this);
uploadedFile.GeneratedFiles.Add(destFile, destFileDetails);
-#if KernelMemoryDev
// Structured content (pages)
this._log.LogDebug("Saving extracted content {0}", destFile2);
await this._orchestrator.WriteFileAsync(pipeline, destFile2, new BinaryData(content), cancellationToken).ConfigureAwait(false);
@@ -120,7 +117,6 @@ public TextExtractionHandler(
};
destFile2Details.MarkProcessedBy(this);
uploadedFile.GeneratedFiles.Add(destFile2, destFile2Details);
-#endif
}
uploadedFile.MarkProcessedBy(this);
diff --git a/service/Core/Handlers/TextPartitioningHandler.cs b/service/Core/Handlers/TextPartitioningHandler.cs
index 77133bfb9..ad67726a1 100644
--- a/service/Core/Handlers/TextPartitioningHandler.cs
+++ b/service/Core/Handlers/TextPartitioningHandler.cs
@@ -150,9 +150,7 @@ public TextPartitioningHandler(
{
// TODO: turn partitions in objects with more details, e.g. page number
string text = partitions[partitionNumber];
-#if KernelMemoryDev
int sectionNumber = 0; // TODO: use this to store the page number (if any)
-#endif
BinaryData textData = new(text);
int tokenCount = this._tokenCounter(text);
@@ -169,10 +167,8 @@ public TextPartitioningHandler(
Size = text.Length,
MimeType = MimeTypes.PlainText,
ArtifactType = DataPipeline.ArtifactTypes.TextPartition,
-#if KernelMemoryDev
PartitionNumber = partitionNumber,
SectionNumber = sectionNumber,
-#endif
Tags = pipeline.Tags,
ContentSHA256 = textData.CalculateSHA256(),
};
diff --git a/service/Core/MemoryStorage/MemoryRecordExtensions.cs b/service/Core/MemoryStorage/MemoryRecordExtensions.cs
index d229800b4..b561837ab 100644
--- a/service/Core/MemoryStorage/MemoryRecordExtensions.cs
+++ b/service/Core/MemoryStorage/MemoryRecordExtensions.cs
@@ -34,15 +34,11 @@ public static string GetFileId(this MemoryRecord record, ILogger? log = null)
///
public static int GetPartitionNumber(this MemoryRecord record, ILogger? log = null)
{
-#if KernelMemoryDev
var value = record.GetTagValue(Constants.ReservedFilePartitionNumberTag, log);
if (string.IsNullOrEmpty(value))
{
return 0;
}
-#else
- var value = "0";
-#endif
return int.TryParse(value, out int number) ? number : 0;
}
@@ -52,15 +48,11 @@ public static int GetPartitionNumber(this MemoryRecord record, ILogger? log = nu
///
public static int GetSectionNumber(this MemoryRecord record, ILogger? log = null)
{
-#if KernelMemoryDev
var value = record.GetTagValue(Constants.ReservedFileSectionNumberTag, log);
if (string.IsNullOrEmpty(value))
{
return 0;
}
-#else
- var value = "0";
-#endif
return int.TryParse(value, out int number) ? number : 0;
}
diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs
index 56b53e0b4..d22c25c93 100644
--- a/service/Core/Search/SearchClient.cs
+++ b/service/Core/Search/SearchClient.cs
@@ -157,10 +157,8 @@ public async Task SearchAsync(
{
Text = partitionText,
Relevance = (float)relevance,
-#if KernelMemoryDev
PartitionNumber = memory.GetPartitionNumber(this._log),
SectionNumber = memory.GetSectionNumber(),
-#endif
LastUpdate = memory.GetLastUpdate(),
Tags = memory.Tags,
});
diff --git a/tools/InteractiveSetup/InteractiveSetup.csproj b/tools/InteractiveSetup/InteractiveSetup.csproj
index ed8eaafde..bd9e04fb8 100644
--- a/tools/InteractiveSetup/InteractiveSetup.csproj
+++ b/tools/InteractiveSetup/InteractiveSetup.csproj
@@ -9,7 +9,7 @@
-
+