diff --git a/src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs b/src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs index 72454cb..751da03 100644 --- a/src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs +++ b/src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs @@ -120,10 +120,16 @@ public void IntStringGarbageCollectionTest() data.TryAtomicAdd(2, "2"); data.TryAtomicAdd(3, "3"); data.TryDelete(2); + data.TryAtomicAdd(4, "4"); + data.TryAtomicUpdate(3, "33"); + data.TryDelete(2); Assert.That(data.ContainsKey(1), Is.True); Assert.That(data.ContainsKey(2), Is.False); Assert.That(data.ContainsKey(3), Is.True); - Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(3)); + Assert.That(data.ContainsKey(4), Is.True); + data.TryGet(3, out var value3); + Assert.That(value3, Is.EqualTo("33")); + Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(4)); } // reload tree and check the length @@ -136,7 +142,95 @@ public void IntStringGarbageCollectionTest() Assert.That(data.ContainsKey(1), Is.True); Assert.That(data.ContainsKey(2), Is.False); Assert.That(data.ContainsKey(3), Is.True); - Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(2)); + Assert.That(data.ContainsKey(4), Is.True); + data.TryGet(3, out var value3); + Assert.That(value3, Is.EqualTo("33")); + Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(3)); + } + } + + [Test] + public void IntStringReadOnlySegmentLoadingTest() + { + var dataPath = "data/IntStringGarbageCollectionTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + + // load and populate tree + { + using var data = new ZoneTreeFactory() + .SetDataDirectory(dataPath) + .OpenOrCreate(); + data.TryAtomicAdd(1, "1"); + data.TryAtomicAdd(2, "2"); + data.TryAtomicAdd(3, "3"); + data.TryDelete(2); + data.TryAtomicAdd(4, "4"); + data.TryAtomicUpdate(3, "33"); + data.TryDelete(2); + Assert.That(data.ContainsKey(1), Is.True); + Assert.That(data.ContainsKey(2), Is.False); + Assert.That(data.ContainsKey(3), Is.True); + Assert.That(data.ContainsKey(4), Is.True); + data.TryGet(3, out var value3); + Assert.That(value3, Is.EqualTo("33")); + Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(4)); + data.Maintenance.MoveMutableSegmentForward(); + Assert.That(data.Maintenance.ReadOnlySegments[0].Length, Is.EqualTo(4)); + } + + // reload tree and check the length + for (var i = 0; i < 3; ++i) + { + using var data = new ZoneTreeFactory() + .SetDataDirectory(dataPath) + .Open(); + Assert.That(data.ContainsKey(1), Is.True); + Assert.That(data.ContainsKey(2), Is.False); + Assert.That(data.ContainsKey(3), Is.True); + Assert.That(data.ContainsKey(4), Is.True); + data.TryGet(3, out var value3); + Assert.That(value3, Is.EqualTo("33")); + Assert.That(data.Maintenance.ReadOnlySegments[0].Length, Is.EqualTo(4)); + } + } + + [Test] + public void IntStringDiskSegmentLoadingTest() + { + var dataPath = "data/IntStringGarbageCollectionTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + + // load and populate tree + { + using var data = new ZoneTreeFactory() + .SetDataDirectory(dataPath) + .OpenOrCreate(); + data.TryAtomicAdd(1, "1"); + data.TryAtomicAdd(2, "2"); + data.TryAtomicAdd(3, "3"); + data.TryDelete(2); + Assert.That(data.ContainsKey(1), Is.True); + Assert.That(data.ContainsKey(2), Is.False); + Assert.That(data.ContainsKey(3), Is.True); + Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(3)); + data.Maintenance.MoveMutableSegmentForward(); + Assert.That(data.Maintenance.ReadOnlySegments[0].Length, Is.EqualTo(3)); + data.Maintenance.StartMergeOperation().Join(); + Assert.That(data.Maintenance.DiskSegment.Length, Is.EqualTo(2)); + } + + // reload tree and check the length + for (var i = 0; i < 3; ++i) + { + using var data = new ZoneTreeFactory() + .SetDataDirectory(dataPath) + .Open(); + Assert.That(data.ContainsKey(1), Is.True); + Assert.That(data.ContainsKey(2), Is.False); + Assert.That(data.ContainsKey(3), Is.True); + Assert.That(data.Maintenance.DiskSegment.Length, Is.EqualTo(2)); } } diff --git a/src/ZoneTree/Collections/DictionaryOfDictionaryWithWAL.cs b/src/ZoneTree/Collections/DictionaryOfDictionaryWithWAL.cs index aea2c5b..670b3fe 100644 --- a/src/ZoneTree/Collections/DictionaryOfDictionaryWithWAL.cs +++ b/src/ZoneTree/Collections/DictionaryOfDictionaryWithWAL.cs @@ -73,7 +73,7 @@ void LoadFromWriteAheadLog() var len = keys.Count; for (var i = 0; i < len; ++i) { - Upsert(keys[i], values[i].Value1, values[i].Value2); + UpsertWithoutWal(keys[i], values[i].Value1, values[i].Value2); } } @@ -113,6 +113,22 @@ public bool Upsert(in TKey1 key1, in TKey2 key2, in TValue value) return false; } + bool UpsertWithoutWal(in TKey1 key1, in TKey2 key2, in TValue value) + { + if (Dictionary.TryGetValue(key1, out var dic)) + { + dic.Remove(key2); + dic.Add(key2, value); + return true; + } + dic = new Dictionary + { + { key2, value } + }; + Dictionary[key1] = dic; + return false; + } + long NextOpIndex() { return IdProvider.NextId(); diff --git a/src/ZoneTree/Collections/DictionaryWithWal.cs b/src/ZoneTree/Collections/DictionaryWithWal.cs index a7edd08..d486676 100644 --- a/src/ZoneTree/Collections/DictionaryWithWal.cs +++ b/src/ZoneTree/Collections/DictionaryWithWal.cs @@ -70,7 +70,7 @@ public DictionaryWithWAL( void LoadFromWriteAheadLog() { - var result = WriteAheadLog.ReadLogEntries(false, false, false); + var result = WriteAheadLog.ReadLogEntries(false, false, true); if (!result.Success) { if (result.HasFoundIncompleteTailRecord) @@ -87,7 +87,7 @@ void LoadFromWriteAheadLog() } (var newKeys, var newValues) = WriteAheadLogUtility - .StableSortAndCleanUpDeletedKeys( + .StableSortAndCleanUpDeletedAndDuplicatedKeys( result.Keys, result.Values, Comparer, diff --git a/src/ZoneTree/Core/ZoneTreeLoader.cs b/src/ZoneTree/Core/ZoneTreeLoader.cs index f85fbbf..d05188e 100644 --- a/src/ZoneTree/Core/ZoneTreeLoader.cs +++ b/src/ZoneTree/Core/ZoneTreeLoader.cs @@ -244,16 +244,19 @@ public ZoneTree LoadZoneTree() if (collectGarbage) { var len = MutableSegment.Length; - var keys = new TKey[len]; - var values = new TValue[len]; - var iterator = MutableSegment.GetSeekableIterator(); - var i = 0; - while (iterator.Next()) + if (mutableSegmentWal.InitialLength != MutableSegment.Length) { - keys[i] = iterator.CurrentKey; - values[i++] = iterator.CurrentValue; + var keys = new TKey[len]; + var values = new TValue[len]; + var iterator = MutableSegment.GetSeekableIterator(); + var i = 0; + while (iterator.Next()) + { + keys[i] = iterator.CurrentKey; + values[i++] = iterator.CurrentValue; + } + mutableSegmentWal.ReplaceWriteAheadLog(keys, values, true); } - mutableSegmentWal.ReplaceWriteAheadLog(keys, values, true); } LoadDiskSegment(); LoadBottomSegments(); diff --git a/src/ZoneTree/Segments/InMemory/ReadOnlySegmentLoader.cs b/src/ZoneTree/Segments/InMemory/ReadOnlySegmentLoader.cs index 558bbea..515b210 100644 --- a/src/ZoneTree/Segments/InMemory/ReadOnlySegmentLoader.cs +++ b/src/ZoneTree/Segments/InMemory/ReadOnlySegmentLoader.cs @@ -46,11 +46,10 @@ public IReadOnlySegment LoadReadOnlySegment(long segmentId) ZoneTree.SegmentWalCategory); (var newKeys, var newValues) = WriteAheadLogUtility - .StableSortAndCleanUpDeletedKeys( + .StableSortAndCleanUpDuplicatedKeys( result.Keys, result.Values, - Options.Comparer, - Options.IsValueDeleted); + Options.Comparer); var ros = new ReadOnlySegment( segmentId, diff --git a/src/ZoneTree/WAL/Async/AsyncCompressedFileSystemWriteAheadLog.cs b/src/ZoneTree/WAL/Async/AsyncCompressedFileSystemWriteAheadLog.cs index 49e9aa6..8c3357c 100644 --- a/src/ZoneTree/WAL/Async/AsyncCompressedFileSystemWriteAheadLog.cs +++ b/src/ZoneTree/WAL/Async/AsyncCompressedFileSystemWriteAheadLog.cs @@ -83,6 +83,8 @@ public override int GetHashCode() public bool EnableIncrementalBackup { get; set; } + public int InitialLength { get; private set; } + public AsyncCompressedFileSystemWriteAheadLog( ILogger logger, IFileStreamProvider fileStreamProvider, @@ -222,7 +224,7 @@ public WriteAheadLogReadLogEntriesResult ReadLogEntries( bool stopReadOnChecksumFailure, bool sortByOpIndexes) { - return WriteAheadLogEntryReader.ReadLogEntries( + var result = WriteAheadLogEntryReader.ReadLogEntries( Logger, FileStream, stopReadOnException, @@ -230,6 +232,8 @@ public WriteAheadLogReadLogEntriesResult ReadLogEntries( LogEntry.ReadLogEntry, DeserializeLogEntry, sortByOpIndexes); + InitialLength = result.Keys.Count; + return result; } (bool isValid, TKey key, TValue value, long opIndex) DeserializeLogEntry(in LogEntry logEntry) diff --git a/src/ZoneTree/WAL/IWriteAheadLog.cs b/src/ZoneTree/WAL/IWriteAheadLog.cs index 66e7bdd..003b0bc 100644 --- a/src/ZoneTree/WAL/IWriteAheadLog.cs +++ b/src/ZoneTree/WAL/IWriteAheadLog.cs @@ -8,6 +8,12 @@ public interface IWriteAheadLog : IDisposable bool EnableIncrementalBackup { get; set; } + /// + /// The initial record count of the log. + /// It is available after the ReadLogEntries call. + /// + int InitialLength { get; } + void Append(in TKey key, in TValue value, long opIndex); void Drop(); @@ -27,13 +33,13 @@ WriteAheadLogReadLogEntriesResult ReadLogEntries( /// disable backup regardless of wal flag. /// the difference: old file length - new file length. long ReplaceWriteAheadLog(TKey[] keys, TValue[] values, bool disableBackup); - + /// /// Informs the write ahead log that no further writes will be sent. /// to let WAL optimize itself. /// void MarkFrozen(); - + /// /// Truncates incomplete tail record. /// diff --git a/src/ZoneTree/WAL/Null/NullWriteAheadLog.cs b/src/ZoneTree/WAL/Null/NullWriteAheadLog.cs index f0cfac4..535fc76 100644 --- a/src/ZoneTree/WAL/Null/NullWriteAheadLog.cs +++ b/src/ZoneTree/WAL/Null/NullWriteAheadLog.cs @@ -8,6 +8,8 @@ public sealed class NullWriteAheadLog : IWriteAheadLog : IWriteAheadLog ReadLogEntries( bool stopReadOnChecksumFailure, bool sortByOpIndexes) { - return WriteAheadLogEntryReader.ReadLogEntries( + var result = WriteAheadLogEntryReader.ReadLogEntries( Logger, FileStream.ToStream(), stopReadOnException, @@ -96,6 +98,8 @@ public WriteAheadLogReadLogEntriesResult ReadLogEntries( LogEntry.ReadLogEntry, DeserializeLogEntry, sortByOpIndexes); + InitialLength = result.Keys.Count; + return result; } (bool isValid, TKey key, TValue value, long opIndex) DeserializeLogEntry(in LogEntry logEntry) diff --git a/src/ZoneTree/WAL/SyncCompressed/SyncCompressedFileSystemWriteAheadLog.cs b/src/ZoneTree/WAL/SyncCompressed/SyncCompressedFileSystemWriteAheadLog.cs index 12d1137..ae0c012 100644 --- a/src/ZoneTree/WAL/SyncCompressed/SyncCompressedFileSystemWriteAheadLog.cs +++ b/src/ZoneTree/WAL/SyncCompressed/SyncCompressedFileSystemWriteAheadLog.cs @@ -38,6 +38,8 @@ public sealed class SyncCompressedFileSystemWriteAheadLog : IWrite public bool EnableIncrementalBackup { get; set; } + public int InitialLength { get; private set; } + public SyncCompressedFileSystemWriteAheadLog( ILogger logger, IFileStreamProvider fileStreamProvider, @@ -98,7 +100,7 @@ public WriteAheadLogReadLogEntriesResult ReadLogEntries( bool stopReadOnChecksumFailure, bool sortByOpIndexes) { - return WriteAheadLogEntryReader.ReadLogEntries( + var result = WriteAheadLogEntryReader.ReadLogEntries( Logger, FileStream, stopReadOnException, @@ -106,6 +108,8 @@ public WriteAheadLogReadLogEntriesResult ReadLogEntries( LogEntry.ReadLogEntry, DeserializeLogEntry, sortByOpIndexes); + InitialLength = result.Keys.Count; + return result; } (bool isValid, TKey key, TValue value, long opIndex) DeserializeLogEntry(in LogEntry logEntry) diff --git a/src/ZoneTree/WAL/WriteAheadLogUtility.cs b/src/ZoneTree/WAL/WriteAheadLogUtility.cs index 4490f42..5d2a944 100644 --- a/src/ZoneTree/WAL/WriteAheadLogUtility.cs +++ b/src/ZoneTree/WAL/WriteAheadLogUtility.cs @@ -7,7 +7,7 @@ namespace Tenray.ZoneTree.WAL; public static class WriteAheadLogUtility { public static (IReadOnlyList keys, IReadOnlyList values) - StableSortAndCleanUpDeletedKeys( + StableSortAndCleanUpDeletedAndDuplicatedKeys( IReadOnlyList keys, IReadOnlyList values, IRefComparer comparer, @@ -15,14 +15,14 @@ public static (IReadOnlyList keys, IReadOnlyList values) { // WAL has unsorted data. Need to do following. // 1. stable sort keys and values based on keys - // 2. discard deleted items + // 2. discard deleted and duplicated items // 3. create new keys and values arrays. int len = keys.Count; var list = new KeyValuePocket[len]; var pocketComparer = new KeyValuePocketRefComparer(comparer); for (var i = 0; i < len; ++i) - list[i] = new KeyValuePocket(keys[i], values[i]); + list[len - i - 1] = new KeyValuePocket(keys[i], values[i]); TimSort>.Sort(list, pocketComparer); var newKeys = new List(len); @@ -47,6 +47,57 @@ public static (IReadOnlyList keys, IReadOnlyList values) } newKeys.Add(key); newValues.Add(value); + + // discard duplicated keys found in log entries. + while (++i < len) + { + if (comparer.Compare(key, list[i].Key) != 0) + { + --i; + break; + } + } + } + return (newKeys, newValues); + } + + public static (IReadOnlyList keys, IReadOnlyList values) + StableSortAndCleanUpDuplicatedKeys( + IReadOnlyList keys, + IReadOnlyList values, + IRefComparer comparer) + { + // WAL has unsorted data. Need to do following. + // 1. stable sort keys and values based on keys + // 2. discard duplicated keys + // 3. create new keys and values arrays. + + int len = keys.Count; + var list = new KeyValuePocket[len]; + var pocketComparer = new KeyValuePocketRefComparer(comparer); + for (var i = 0; i < len; ++i) + list[len - i - 1] = new KeyValuePocket(keys[i], values[i]); + TimSort>.Sort(list, pocketComparer); + + var newKeys = new List(len); + var newValues = new List(len); + + for (var i = 0; i < len; ++i) + { + var value = list[i].Value; + var key = list[i].Key; + newKeys.Add(key); + newValues.Add(value); + + // discard duplicated keys found in log entries. + while (++i < len) + { + if (comparer.Compare(key, list[i].Key) != 0) + { + --i; + break; + } + } } return (newKeys, newValues); }