Skip to content

Commit

Permalink
Upgrade to KM Abstractions 0.28
Browse files Browse the repository at this point in the history
  • Loading branch information
dluc committed Feb 13, 2024
1 parent 98e1c1f commit f7c2bd7
Show file tree
Hide file tree
Showing 11 changed files with 41 additions and 157 deletions.
2 changes: 1 addition & 1 deletion Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
</ItemGroup>
<!-- Kernel Memory -->
<ItemGroup>
<PackageVersion Include="Microsoft.KernelMemory.Abstractions" Version="0.27.240205.2" />
<PackageVersion Include="Microsoft.KernelMemory.Abstractions" Version="0.28.240212.1" />
<PackageVersion Include="KernelMemory.MemoryStorage.SqlServer" Version="1.3.1" />
<PackageVersion Include="FreeMindLabs.KernelMemory.Elasticsearch" Version="0.9.5" />
</ItemGroup>
Expand Down
106 changes: 39 additions & 67 deletions examples/207-dotnet-expanding-chunks-on-retrieval/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,108 +81,80 @@ public static async Task Main()
SearchResult relevant = await memory.SearchAsync(query: Query, minRelevance: MinRelevance, limit: Limit);
Console.WriteLine($"Relevant documents: {relevant.Results.Count}");

#if KernelMemoryDev
var relevantDocuments = new Dictionary<string, List<int>>();
foreach (Citation result in relevant.Results)
{
// Store the document IDs so we can load all their records later
relevantDocuments.Add(result.DocumentId, new List<int>());
Console.WriteLine($"Document ID: {result.DocumentId}");
Console.WriteLine($"Relevant partitions: {result.Partitions.Count}");
foreach (Citation.Partition partition in result.Partitions)
{
Console.WriteLine("--------------------------");
Console.WriteLine($"Partition number: {partition.PartitionNumber}");
Console.WriteLine($"Relevance: {partition.Relevance}\n");
Console.WriteLine(partition.Text);

relevantDocuments[result.DocumentId].Add(partition.PartitionNumber);
Console.WriteLine($" * Partition {partition.PartitionNumber}, relevance: {partition.Relevance}");
}

Console.WriteLine();
}

// For each relevant document
// Note: loops can be optimized for better perf, this code is only a demo
const int HowManyToAdd = 1;
Console.WriteLine("Fetching all document partitions...");
foreach (KeyValuePair<string, List<int>> relevantPartitionNumbers in relevantDocuments)
{
var docId = relevantPartitionNumbers.Key;
Console.WriteLine($"\nDocument ID: {docId}");

// Load all partitions. Note: the list might be out of order.
SearchResult all = await memory.SearchAsync("", filters: new[] { MemoryFilters.ByDocument(docId) }, limit: int.MaxValue);
List<Citation.Partition> allPartitionsContent = all.Results.FirstOrDefault()?.Partitions ?? new();
Console.WriteLine("--------------------------");

// Loop through the relevant partitions
foreach (int relevantPartitionNumber in relevantPartitionNumbers.Value)
// For each relevant partition fetch the partition before and one after
foreach (Citation.Partition partition in result.Partitions)
{
Console.WriteLine("--------------------------");
// Collect partitions in a sorted collection
var partitions = new SortedDictionary<int, Citation.Partition> { [partition.PartitionNumber] = partition };

// Use a data structure to order partitions by number
var result = new SortedDictionary<int, string>();
// Filters to fetch adjacent partitions
var filters = new List<MemoryFilter>
{
MemoryFilters.ByDocument(result.DocumentId).ByTag(Constants.ReservedFilePartitionNumberTag, $"{partition.PartitionNumber - 1}"),
MemoryFilters.ByDocument(result.DocumentId).ByTag(Constants.ReservedFilePartitionNumberTag, $"{partition.PartitionNumber + 1}")
};

// Loop all partitions, include <HowManyToAdd> before and <HowManyToAdd> after the relevant ones
foreach (Citation.Partition p in allPartitionsContent)
// Fetch adjacent partitions and add them to the sorted collection
SearchResult adjacentList = await memory.SearchAsync("", filters: filters, limit: 2);
foreach (Citation.Partition adjacent in adjacentList.Results.First().Partitions)
{
if (Math.Abs(p.PartitionNumber - relevantPartitionNumber) <= HowManyToAdd)
{
result.Add(p.PartitionNumber, p.Text);
}
partitions[adjacent.PartitionNumber] = adjacent;
}

// Show partition and adjacent ones in order
foreach (var p in result)
// Print partitions in order
foreach (var p in partitions)
{
Console.WriteLine($"Partition: {p.Key}");
Console.WriteLine(p.Value);
Console.WriteLine($"# Partition {p.Value.PartitionNumber}");
Console.WriteLine(p.Value.Text);
Console.WriteLine();
}

Console.WriteLine();
Console.WriteLine("--------------------------");
}

Console.WriteLine();
}
#endif
}
}

/* Result:
Token count: 2510
Importing memories...
Searching memories...
Relevant documents: 1
Document ID: example207
Relevant partitions: 2
* Partition 27, relevance: 0.8557962
* Partition 13, relevance: 0.85513425
--------------------------
Partition number: 27
Relevance: 0.8557962
As scientific interest in [...] or ancient microbial life.
--------------------------
Partition number: 13
Relevance: 0.85513425
# Partition 26
Dr. Mei Lin, a renowned ...
Gerald Marshall, the Chief [...] in astrobiological research."
# Partition 27
As scientific interest in ...
Fetching all document partitions...
Document ID: example207
# Partition 28
Meanwhile, back on Earth, the ...
--------------------------
Partition: 26
Dr. Mei Lin, a renowned [...] of life in the universe."
Partition: 27
As scientific interest [...] ancient microbial life.
Partition: 28
Meanwhile, back on Earth, [...] meaning in the universe.
# Partition 12
Appearing as a glowing, translucent ...
--------------------------
Partition: 12
Appearing as a glowing, [...] including its high CO2 levels.
Partition: 13
Gerald Marshall, the [...] in astrobiological research."
Partition: 14
While further studies [...] alien at the same time.
# Partition 13
Gerald Marshall, the Chief ...
# Partition 14
While further studies are ...
--------------------------
*/

2 changes: 0 additions & 2 deletions extensions/Redis/Redis/RedisConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ public class RedisConfig
{ Constants.ReservedDocumentIdTag, '|' },
{ Constants.ReservedFileIdTag, '|' },
{ Constants.ReservedFilePartitionTag, '|' },
#if KernelMemoryDev
{ Constants.ReservedFileSectionNumberTag, '|' },
#endif
{ Constants.ReservedFileTypeTag, '|' },
};

Expand Down
54 changes: 0 additions & 54 deletions service/Core/DataFormats/FileSection.cs

This file was deleted.

2 changes: 0 additions & 2 deletions service/Core/Handlers/GenerateEmbeddingsHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,8 @@ public GenerateEmbeddingsHandler(
Size = text.Length,
MimeType = MimeTypes.TextEmbeddingVector,
ArtifactType = DataPipeline.ArtifactTypes.TextEmbeddingVector,
#if KernelMemoryDev
PartitionNumber = partitionFile.PartitionNumber,
SectionNumber = partitionFile.SectionNumber,
#endif
Tags = partitionFile.Tags,
};
embeddingFileNameDetails.MarkProcessedBy(this);
Expand Down
12 changes: 0 additions & 12 deletions service/Core/Handlers/SaveRecordsHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,8 @@ public SaveRecordsHandler(
fileId: embeddingFile.File.ParentId,
partitionFileId: embeddingFile.File.SourcePartitionId,
partitionContent: partitionContent,
#if KernelMemoryDev
partitionNumber: embeddingFile.File.PartitionNumber,
sectionNumber: embeddingFile.File.SectionNumber,
#else
partitionNumber: 0,
sectionNumber: 0,
#endif
partitionEmbedding: embeddingData.Vector,
embeddingGeneratorProvider: embeddingData.GeneratorProvider,
embeddingGeneratorName: embeddingData.GeneratorName,
Expand Down Expand Up @@ -202,13 +197,8 @@ public SaveRecordsHandler(
fileId: file.File.ParentId,
partitionFileId: file.File.Id,
partitionContent: partitionContent,
#if KernelMemoryDev
partitionNumber: partitionFileDetails.PartitionNumber,
sectionNumber: partitionFileDetails.SectionNumber,
#else
partitionNumber: 0,
sectionNumber: 0,
#endif
partitionEmbedding: new Embedding(),
embeddingGeneratorProvider: "",
embeddingGeneratorName: "",
Expand Down Expand Up @@ -368,11 +358,9 @@ private static MemoryRecord PrepareRecord(
// Partition ID. Filtering used for purge.
record.Tags.Add(Constants.ReservedFilePartitionTag, partitionFileId);

#if KernelMemoryDev
// Partition number (starting from 0) and Page number (provided by text extractor)
record.Tags.Add(Constants.ReservedFilePartitionNumberTag, $"{partitionNumber}");
record.Tags.Add(Constants.ReservedFileSectionNumberTag, $"{sectionNumber}");
#endif

/*
* TIMESTAMP and USER TAGS
Expand Down
4 changes: 0 additions & 4 deletions service/Core/Handlers/TextExtractionHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ public TextExtractionHandler(

var sourceFile = uploadedFile.Name;
var destFile = $"{uploadedFile.Name}.extract.txt";
#if KernelMemoryDev
var destFile2 = $"{uploadedFile.Name}.extract.json";
#endif
BinaryData fileContent = await this._orchestrator.ReadFileAsync(pipeline, sourceFile, cancellationToken).ConfigureAwait(false);

string text = string.Empty;
Expand Down Expand Up @@ -104,7 +102,6 @@ public TextExtractionHandler(
destFileDetails.MarkProcessedBy(this);
uploadedFile.GeneratedFiles.Add(destFile, destFileDetails);

#if KernelMemoryDev
// Structured content (pages)
this._log.LogDebug("Saving extracted content {0}", destFile2);
await this._orchestrator.WriteFileAsync(pipeline, destFile2, new BinaryData(content), cancellationToken).ConfigureAwait(false);
Expand All @@ -120,7 +117,6 @@ public TextExtractionHandler(
};
destFile2Details.MarkProcessedBy(this);
uploadedFile.GeneratedFiles.Add(destFile2, destFile2Details);
#endif
}

uploadedFile.MarkProcessedBy(this);
Expand Down
4 changes: 0 additions & 4 deletions service/Core/Handlers/TextPartitioningHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,7 @@ public TextPartitioningHandler(
{
// TODO: turn partitions in objects with more details, e.g. page number
string text = partitions[partitionNumber];
#if KernelMemoryDev
int sectionNumber = 0; // TODO: use this to store the page number (if any)
#endif
BinaryData textData = new(text);

int tokenCount = this._tokenCounter(text);
Expand All @@ -169,10 +167,8 @@ public TextPartitioningHandler(
Size = text.Length,
MimeType = MimeTypes.PlainText,
ArtifactType = DataPipeline.ArtifactTypes.TextPartition,
#if KernelMemoryDev
PartitionNumber = partitionNumber,
SectionNumber = sectionNumber,
#endif
Tags = pipeline.Tags,
ContentSHA256 = textData.CalculateSHA256(),
};
Expand Down
8 changes: 0 additions & 8 deletions service/Core/MemoryStorage/MemoryRecordExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,11 @@ public static string GetFileId(this MemoryRecord record, ILogger? log = null)
/// </summary>
public static int GetPartitionNumber(this MemoryRecord record, ILogger? log = null)
{
#if KernelMemoryDev
var value = record.GetTagValue(Constants.ReservedFilePartitionNumberTag, log);
if (string.IsNullOrEmpty(value))
{
return 0;
}
#else
var value = "0";
#endif

return int.TryParse(value, out int number) ? number : 0;
}
Expand All @@ -52,15 +48,11 @@ public static int GetPartitionNumber(this MemoryRecord record, ILogger? log = nu
/// </summary>
public static int GetSectionNumber(this MemoryRecord record, ILogger? log = null)
{
#if KernelMemoryDev
var value = record.GetTagValue(Constants.ReservedFileSectionNumberTag, log);
if (string.IsNullOrEmpty(value))
{
return 0;
}
#else
var value = "0";
#endif

return int.TryParse(value, out int number) ? number : 0;
}
Expand Down
2 changes: 0 additions & 2 deletions service/Core/Search/SearchClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,8 @@ public async Task<SearchResult> SearchAsync(
{
Text = partitionText,
Relevance = (float)relevance,
#if KernelMemoryDev
PartitionNumber = memory.GetPartitionNumber(this._log),
SectionNumber = memory.GetSectionNumber(),
#endif
LastUpdate = memory.GetLastUpdate(),
Tags = memory.Tags,
});
Expand Down
2 changes: 1 addition & 1 deletion tools/InteractiveSetup/InteractiveSetup.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.27.240205.2" Condition="'$(SolutionName)' != 'KernelMemoryDev'" />
<PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.28.240212.1" Condition="'$(SolutionName)' != 'KernelMemoryDev'" />
<ProjectReference Include="..\..\service\Abstractions\Abstractions.csproj" Condition="'$(SolutionName)' == 'KernelMemoryDev'"/>
</ItemGroup>

Expand Down

0 comments on commit f7c2bd7

Please sign in to comment.