Skip to content

Commit

Permalink
Merge pull request #39 from koculu/garbage-collection-on-startup
Browse files Browse the repository at this point in the history
Garbage collection on startup
  • Loading branch information
koculu authored Jun 17, 2023
2 parents f8b01ad + ea5e49e commit 17e1730
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 21 deletions.
36 changes: 36 additions & 0 deletions src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,42 @@ public void IntNullableIntDeleteTest()
Assert.That(data.ContainsKey(3), Is.True);
}

[Test]
public void IntStringGarbageCollectionTest()
{
var dataPath = "data/IntStringGarbageCollectionTest";
if (Directory.Exists(dataPath))
Directory.Delete(dataPath, true);

// load and populate tree
{
using var data = new ZoneTreeFactory<int, string>()
.SetDataDirectory(dataPath)
.OpenOrCreate();
data.TryAtomicAdd(1, "1");
data.TryAtomicAdd(2, "2");
data.TryAtomicAdd(3, "3");
data.TryDelete(2);
Assert.That(data.ContainsKey(1), Is.True);
Assert.That(data.ContainsKey(2), Is.False);
Assert.That(data.ContainsKey(3), Is.True);
Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(3));
}

// reload tree and check the length
for (var i = 0; i < 3; ++i)
{
using var data = new ZoneTreeFactory<int, string>()
.Configure(options => options.EnableSingleSegmentGarbageCollection = true)
.SetDataDirectory(dataPath)
.Open();
Assert.That(data.ContainsKey(1), Is.True);
Assert.That(data.ContainsKey(2), Is.False);
Assert.That(data.ContainsKey(3), Is.True);
Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(2));
}
}

[TestCase(true)]
[TestCase(false)]
public void StringIntTreeTest(bool useSparseArray)
Expand Down
29 changes: 26 additions & 3 deletions src/ZoneTree/Core/ZoneTreeLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using Tenray.ZoneTree.Segments.InMemory;
using Tenray.ZoneTree.Segments.MultiPart;
using Tenray.ZoneTree.Segments.NullDisk;
using Tenray.ZoneTree.WAL;

namespace Tenray.ZoneTree.Core;

Expand Down Expand Up @@ -145,11 +146,17 @@ void ValidateSegmentOrder()
}
}

void LoadMutableSegment(long maximumOpIndex)
IWriteAheadLog<TKey, TValue> LoadMutableSegment(long maximumOpIndex,
bool collectGarbage)
{
var loader = new MutableSegmentLoader<TKey, TValue>(Options);
MutableSegment = loader
.LoadMutableSegment(ZoneTreeMeta.MutableSegment, maximumOpIndex);
.LoadMutableSegment(
ZoneTreeMeta.MutableSegment,
maximumOpIndex,
collectGarbage,
out var wal);
return wal;
}

long LoadReadOnlySegments()
Expand Down Expand Up @@ -225,13 +232,29 @@ void SetMaximumId()
maximumId = bs.Count > 0 ? bs.Max() : 0;
SetMaximumSegmentId(maximumId);
}

public ZoneTree<TKey, TValue> LoadZoneTree()
{
LoadZoneTreeMeta();
LoadZoneTreeMetaWAL();
SetMaximumId();
var maximumOpIndex = LoadReadOnlySegments();
LoadMutableSegment(maximumOpIndex);
bool collectGarbage = Options.EnableSingleSegmentGarbageCollection && !ZoneTreeMeta.HasDiskSegment && ReadOnlySegments.Count == 0;
var mutableSegmentWal = LoadMutableSegment(maximumOpIndex, collectGarbage);
if (collectGarbage)
{
var len = MutableSegment.Length;
var keys = new TKey[len];
var values = new TValue[len];
var iterator = MutableSegment.GetSeekableIterator();
var i = 0;
while (iterator.Next())
{
keys[i] = iterator.CurrentKey;
values[i++] = iterator.CurrentValue;
}
mutableSegmentWal.ReplaceWriteAheadLog(keys, values, true);
}
LoadDiskSegment();
LoadBottomSegments();
var zoneTree = new ZoneTree<TKey, TValue>(Options, ZoneTreeMeta,
Expand Down
4 changes: 3 additions & 1 deletion src/ZoneTree/Core/ZoneTreeMeta.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public sealed class ZoneTreeMeta
public int DiskSegmentMaxItemCount { get; set; } = 20_000_000;

public WriteAheadLogOptions WriteAheadLogOptions { get; set; }

public DiskSegmentOptions DiskSegmentOptions { get; set; }

public long MutableSegment { get; set; }
Expand All @@ -31,4 +31,6 @@ public sealed class ZoneTreeMeta
public long DiskSegment { get; set; }

public IReadOnlyList<long> BottomSegments { get; set; }

public bool HasDiskSegment => DiskSegment != 0 && BottomSegments?.Count > 0;
}
4 changes: 2 additions & 2 deletions src/ZoneTree/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
<Authors>Ahmed Yasin Koculu</Authors>
<PackageId>ZoneTree</PackageId>
<Title>ZoneTree</Title>
<ProductVersion>1.6.4.0</ProductVersion>
<Version>1.6.4.0</Version>
<ProductVersion>1.6.5.0</ProductVersion>
<Version>1.6.5.0</Version>
<Authors>Ahmed Yasin Koculu</Authors>
<AssemblyTitle>ZoneTree</AssemblyTitle>
<Description>ZoneTree is a persistent, high-performance, transactional, ACID-compliant ordered key-value database for NET. It can operate in memory or on local/cloud storage.</Description>
Expand Down
7 changes: 7 additions & 0 deletions src/ZoneTree/Options/ZoneTreeOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,13 @@ public void Validate()
/// </summary>
public DeleteValueConfigurationValidation DeleteValueConfigurationValidation { get; set; }

/// <summary>
/// If the ZoneTree contains only a single segment (which is the mutable segment),
/// there is an opportunity to perform a hard delete of the soft deleted values.
/// If enabled, the tree performs garbage collection on load if it is applicable.
/// </summary>
public bool EnableSingleSegmentGarbageCollection { get; set; }

/// <summary>
/// Creates default delete delegates for nullable types.
/// </summary>
Expand Down
37 changes: 35 additions & 2 deletions src/ZoneTree/Segments/InMemory/MutableSegment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ public MutableSegment(
ZoneTreeOptions<TKey, TValue> options,
IReadOnlyList<TKey> keys,
IReadOnlyList<TValue> values,
long nextOpIndex)
long nextOpIndex,
bool collectGarbage)
{
SegmentId = segmentId;
WriteAheadLog = wal;
Expand All @@ -85,7 +86,16 @@ public MutableSegment(

MarkValueDeleted = options.MarkValueDeleted;
MutableSegmentMaxItemCount = options.MutableSegmentMaxItemCount;
LoadLogEntries(keys, values);
if (collectGarbage)
{
// If there isn't any disk segment and readonly segment,
// it is safe to hard delete the soft deleted values.
LoadLogEntriesWithGarbageCollection(keys, values);
}
else
{
LoadLogEntries(keys, values);
}
}

void LoadLogEntries(IReadOnlyList<TKey> keys, IReadOnlyList<TValue> values)
Expand All @@ -101,6 +111,29 @@ void LoadLogEntries(IReadOnlyList<TKey> keys, IReadOnlyList<TValue> values)
}
}

void LoadLogEntriesWithGarbageCollection(
IReadOnlyList<TKey> keys,
IReadOnlyList<TValue> values)
{
var distinctKeys =
new BTree<TKey, byte>(Options.Comparer, Collections.BTree.Lock.BTreeLockMode.NoLock);

var isValueDeleted = Options.IsValueDeleted;
for (var i = keys.Count - 1; i >= 0; --i)
{
var key = keys[i];
if (distinctKeys.ContainsKey(in key))
continue;
var value = values[i];
distinctKeys.Upsert(in key, 1, out _);
if (isValueDeleted(in value))
{
continue;
}
BTree.Upsert(in key, in value, out var _);
}
}

public bool ContainsKey(in TKey key)
{
return BTree.ContainsKey(key);
Expand Down
20 changes: 15 additions & 5 deletions src/ZoneTree/Segments/InMemory/MutableSegmentLoader.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using Tenray.ZoneTree.Exceptions;
using Tenray.ZoneTree.Core;
using Tenray.ZoneTree.Options;
using Tenray.ZoneTree.WAL;

namespace Tenray.ZoneTree.Segments.InMemory;

Expand All @@ -14,9 +15,13 @@ public MutableSegmentLoader(
Options = options;
}

public IMutableSegment<TKey, TValue> LoadMutableSegment(long segmentId, long maximumOpIndex)
public IMutableSegment<TKey, TValue> LoadMutableSegment(
long segmentId,
long maximumOpIndex,
bool collectGarbage,
out IWriteAheadLog<TKey, TValue> wal)
{
var wal = Options.WriteAheadLogProvider
wal = Options.WriteAheadLogProvider
.GetOrCreateWAL(
segmentId,
ZoneTree<TKey, TValue>.SegmentWalCategory,
Expand All @@ -40,8 +45,13 @@ public IMutableSegment<TKey, TValue> LoadMutableSegment(long segmentId, long max
}
}
maximumOpIndex = Math.Max(result.MaximumOpIndex, maximumOpIndex);
return new MutableSegment<TKey, TValue>
(segmentId, wal, Options, result.Keys,
result.Values, maximumOpIndex + 1);
return new MutableSegment<TKey, TValue>(
segmentId,
wal,
Options,
result.Keys,
result.Values,
maximumOpIndex + 1,
collectGarbage);
}
}
26 changes: 18 additions & 8 deletions src/ZoneTree/ZoneTreeFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,20 @@ void FillValueSerializer()
new ByteArraySerializer() as ISerializer<TValue>;
}

void LoadInitialSparseArrays(ZoneTree<TKey, TValue> zoneTree)
{
if (InitialSparseArrayLength <= 1)
return;

var t1 = Task.Run(() =>
zoneTree.Maintenance.DiskSegment.InitSparseArray(InitialSparseArrayLength));
Parallel.ForEach(zoneTree.Maintenance.BottomSegments, (bs) =>
{
bs.InitSparseArray(InitialSparseArrayLength);
});
t1.Wait();
}

/// <summary>
/// Opens or creates a ZoneTree.
/// </summary>
Expand All @@ -487,13 +501,7 @@ public IZoneTree<TKey, TValue> OpenOrCreate()
if (loader.ZoneTreeMetaExists)
{
var zoneTree = loader.LoadZoneTree();
var t1 = Task.Run(() =>
zoneTree.Maintenance.DiskSegment.InitSparseArray(InitialSparseArrayLength));
Parallel.ForEach(zoneTree.Maintenance.BottomSegments, (bs) =>
{
bs.InitSparseArray(InitialSparseArrayLength);
});
t1.Wait();
LoadInitialSparseArrays(zoneTree);
return zoneTree;
}
return new ZoneTree<TKey, TValue>(Options);
Expand Down Expand Up @@ -526,7 +534,9 @@ public IZoneTree<TKey, TValue> Open()
var loader = new ZoneTreeLoader<TKey, TValue>(Options);
if (!loader.ZoneTreeMetaExists)
throw new DatabaseNotFoundException();
return loader.LoadZoneTree();
var zoneTree = loader.LoadZoneTree();
LoadInitialSparseArrays(zoneTree);
return zoneTree;
}

/// <summary>
Expand Down

0 comments on commit 17e1730

Please sign in to comment.