From e33cfa9fc9ebb88db11cff9514c2605aeb0b07d0 Mon Sep 17 00:00:00 2001 From: Stanislav Zeman Date: Fri, 27 Oct 2023 19:43:25 +0200 Subject: [PATCH] feat: add Timsort sorting algorithm (#692) * feat: add timsort sorting algorithm implementation * test: add timsort sorting algorithm to tests * chore: remove left-over print statement * refactor: change insertionSortRun temp variable name * docs: add concise documentation to timsort algorithm * refactor: reuse insertion sort algorithm * refactor: reuse merge sort algorithm helper function * refactor: remove slice copying in merge run --------- Co-authored-by: Taj --- sort/sorts_test.go | 10 ++++++- sort/timsort.go | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 sort/timsort.go diff --git a/sort/sorts_test.go b/sort/sorts_test.go index 0fd0a93fe..63ca010e0 100644 --- a/sort/sorts_test.go +++ b/sort/sorts_test.go @@ -186,7 +186,11 @@ func TestCycle(t *testing.T) { testFramework(t, sort.Cycle[int]) } -//END TESTS +func TestTimsort(t *testing.T) { + testFramework(t, sort.Timsort[int]) +} + +// END TESTS func benchmarkFramework(b *testing.B, f func(arr []int) []int) { var sortTests = []struct { @@ -320,3 +324,7 @@ func BenchmarkPatience(b *testing.B) { func BenchmarkCycle(b *testing.B) { benchmarkFramework(b, sort.Cycle[int]) } + +func BenchmarkTimsort(b *testing.B) { + benchmarkFramework(b, sort.Timsort[int]) +} diff --git a/sort/timsort.go b/sort/timsort.go new file mode 100644 index 000000000..95520219a --- /dev/null +++ b/sort/timsort.go @@ -0,0 +1,71 @@ +// Implementation of Timsort algorithm +// Reference: https://en.wikipedia.org/wiki/Timsort + +package sort + +import ( + "github.com/TheAlgorithms/Go/constraints" +) + +const runSizeThreshold = 8 + +// Timsort is a simple generic implementation of Timsort algorithm. +func Timsort[T constraints.Ordered](data []T) []T { + runSize := calculateRunSize(len(data)) + insertionSortRuns(data, runSize) + mergeRuns(data, runSize) + return data +} + +// calculateRunSize returns a run size parameter that is further used +// to slice the data slice. +func calculateRunSize(dataLength int) int { + remainder := 0 + for dataLength >= runSizeThreshold { + if dataLength%2 == 1 { + remainder = 1 + } + + dataLength = dataLength / 2 + } + + return dataLength + remainder +} + +// insertionSortRuns runs insertion sort on all the data runs one by one. +func insertionSortRuns[T constraints.Ordered](data []T, runSize int) { + for lower := 0; lower < len(data); lower += runSize { + upper := lower + runSize + if upper >= len(data) { + upper = len(data) + } + + Insertion(data[lower:upper]) + } +} + +// mergeRuns merge sorts all the data runs into a single sorted data slice. +func mergeRuns[T constraints.Ordered](data []T, runSize int) { + for size := runSize; size < len(data); size *= 2 { + for lowerBound := 0; lowerBound < len(data); lowerBound += size * 2 { + middleBound := lowerBound + size - 1 + upperBound := lowerBound + 2*size - 1 + if len(data)-1 < upperBound { + upperBound = len(data) - 1 + } + + mergeRun(data, lowerBound, middleBound, upperBound) + } + } +} + +// mergeRun uses merge sort to sort adjacent data runs. +func mergeRun[T constraints.Ordered](data []T, lower, mid, upper int) { + left := data[lower : mid+1] + right := data[mid+1 : upper+1] + merged := merge(left, right) + // rewrite original data slice values with sorted values from merged slice + for i, value := range merged { + data[lower+i] = value + } +}