From 2e343376cf2598e1b0adc76a9fa1ab31918385d4 Mon Sep 17 00:00:00 2001 From: Matthew Rothenberg Date: Fri, 29 Dec 2023 13:01:49 -0500 Subject: [PATCH 1/4] refactor: uint64 backing, math/rand/v2, and remove PickSource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This a first step in refactoring towards a v3 release. A major semantic version release is required in order to let us fully remove the deprecated PickSource from our API. Switches to using the new math/rand/v2 module. Paving the way for the future. Removes PickSource method: As planned, this will remove the previously deprecated PickSource method that uses v1 of math/rand sources. Custom randomness source should be reintroduced in a future version using a different methodology (e.g. setting on the Chooser instead of with each function call). Switches the Chooser backing from being a system int to a uint64. This should result in defined behavior across 32 and 64 bit systems (with the potential for some performance regressions on 32 bit systems, which I consider an acceptable tradeoff). Internal implementation: removes the custom searchInts binary sort that was present for performance (ala github.com/mroth/xsort)in favor of slices.BinarySearch which is available as of go1.21 and hits acceptable performance as of go1.22. goos: darwin goarch: arm64 pkg: github.com/mroth/weightedrand/v2 │ v2-main.txt │ v3-dev1.txt │ │ sec/op │ sec/op vs base │ NewChooser/size=1e1-8 132.7n ± 0% 132.9n ± 0% ~ (p=0.314 n=6) NewChooser/size=1e2-8 476.1n ± 1% 472.8n ± 0% -0.70% (p=0.002 n=6) NewChooser/size=1e3-8 3.406µ ± 0% 3.412µ ± 0% ~ (p=0.379 n=6) NewChooser/size=1e4-8 31.19µ ± 1% 31.03µ ± 0% -0.51% (p=0.002 n=6) NewChooser/size=1e5-8 296.6µ ± 0% 295.9µ ± 0% ~ (p=0.394 n=6) NewChooser/size=1e6-8 2.843m ± 1% 2.843m ± 1% ~ (p=0.485 n=6) NewChooser/size=1e7-8 35.83m ± 1% 35.92m ± 1% ~ (p=0.485 n=6) Pick/size=1e1-8 22.49n ± 8% 20.28n ± 9% -9.80% (p=0.015 n=6) Pick/size=1e2-8 35.26n ± 2% 32.82n ± 2% -6.92% (p=0.002 n=6) Pick/size=1e3-8 48.41n ± 1% 45.38n ± 1% -6.26% (p=0.002 n=6) Pick/size=1e4-8 63.30n ± 1% 60.23n ± 2% -4.85% (p=0.002 n=6) Pick/size=1e5-8 85.92n ± 1% 82.53n ± 1% -3.95% (p=0.002 n=6) Pick/size=1e6-8 111.5n ± 1% 107.3n ± 4% -3.72% (p=0.013 n=6) Pick/size=1e7-8 240.7n ± 2% 233.2n ± 1% -3.10% (p=0.002 n=6) PickParallel/size=1e1-8 2.982n ± 6% 2.760n ± 5% -7.43% (p=0.009 n=6) PickParallel/size=1e2-8 4.679n ± 1% 4.360n ± 2% -6.81% (p=0.002 n=6) PickParallel/size=1e3-8 6.422n ± 2% 6.059n ± 1% -5.66% (p=0.002 n=6) PickParallel/size=1e4-8 8.463n ± 0% 8.114n ± 58% ~ (p=0.058 n=6) PickParallel/size=1e5-8 11.55n ± 3% 11.06n ± 0% -4.24% (p=0.002 n=6) PickParallel/size=1e6-8 14.98n ± 0% 14.40n ± 0% -3.87% (p=0.002 n=6) PickParallel/size=1e7-8 34.70n ± 0% 33.71n ± 0% -2.82% (p=0.002 n=6) PickSourceParallel/size=1e1-8 2.752n ± 10% PickSourceParallel/size=1e2-8 4.369n ± 2% PickSourceParallel/size=1e3-8 5.989n ± 1% PickSourceParallel/size=1e4-8 7.991n ± 2% PickSourceParallel/size=1e5-8 11.28n ± 0% PickSourceParallel/size=1e6-8 14.59n ± 0% PickSourceParallel/size=1e7-8 33.86n ± 0% geomean 120.0n 279.7n -3.59% ¹ ¹ benchmark set differs from baseline; geomeans may not be comparable --- weightedrand.go | 74 +++++---------------------------- weightedrand_test.go | 98 +++++++++++++++----------------------------- 2 files changed, 44 insertions(+), 128 deletions(-) diff --git a/weightedrand.go b/weightedrand.go index ec9a73b..09b9042 100644 --- a/weightedrand.go +++ b/weightedrand.go @@ -10,7 +10,9 @@ package weightedrand import ( "errors" - "math/rand" + "math" + "math/rand/v2" + "slices" "sort" ) @@ -33,8 +35,8 @@ func NewChoice[T any, W integer](item T, weight W) Choice[T, W] { // performance on repeated calls for weighted random selection. type Chooser[T any, W integer] struct { data []Choice[T, W] - totals []int - max int + totals []uint64 + max uint64 } // NewChooser initializes a new Chooser for picking from the provided choices. @@ -43,20 +45,15 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro return choices[i].Weight < choices[j].Weight }) - totals := make([]int, len(choices)) - runningTotal := 0 + totals := make([]uint64, len(choices)) + var runningTotal uint64 for i, c := range choices { if c.Weight < 0 { continue // ignore negative weights, can never be picked } - // case of single ~uint64 or similar value that exceeds maxInt on its own - if uint64(c.Weight) >= maxInt { - return nil, errWeightOverflow - } - - weight := int(c.Weight) // convert weight to int for internal counter usage - if (maxInt - runningTotal) <= weight { + weight := uint64(c.Weight) // convert weight to uint64 for internal counter usage + if (math.MaxUint64 - runningTotal) <= weight { return nil, errWeightOverflow } runningTotal += weight @@ -70,11 +67,6 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal}, nil } -const ( - intSize = 32 << (^uint(0) >> 63) // cf. strconv.IntSize - maxInt = 1<<(intSize-1) - 1 - maxUint64 = 1<<64 - 1 -) // Possible errors returned by NewChooser, preventing the creation of a Chooser // with unsafe runtime states. @@ -93,51 +85,7 @@ var ( // // Utilizes global rand as the source of randomness. Safe for concurrent usage. func (c Chooser[T, W]) Pick() T { - r := rand.Intn(c.max) + 1 - i := searchInts(c.totals, r) - return c.data[i].Item -} - -// PickSource returns a single weighted random Choice.Item from the Chooser, -// utilizing the provided *rand.Rand source rs for randomness. -// -// The primary use-case for this is avoid lock contention from the global random -// source if utilizing Chooser(s) from multiple goroutines in extremely -// high-throughput situations. -// -// It is the responsibility of the caller to ensure the provided rand.Source is -// free from thread safety issues. -// -// Deprecated: Since go1.21 global rand no longer suffers from lock contention -// when used in multiple high throughput goroutines, as long as you don't -// manually seed it. Use [Chooser.Pick] instead. -func (c Chooser[T, W]) PickSource(rs *rand.Rand) T { - r := rs.Intn(c.max) + 1 - i := searchInts(c.totals, r) + r := rand.Uint64N(c.max) + 1 + i, _ := slices.BinarySearch(c.totals, r) return c.data[i].Item } - -// The standard library sort.SearchInts() just wraps the generic sort.Search() -// function, which takes a function closure to determine truthfulness. However, -// since this function is utilized within a for loop, it cannot currently be -// properly inlined by the compiler, resulting in non-trivial performance -// overhead. -// -// Thus, this is essentially manually inlined version. In our use case here, it -// results in a significant throughput increase for Pick. -// -// See also github.com/mroth/xsort. -func searchInts(a []int, x int) int { - // Possible further future optimization for searchInts via SIMD if we want - // to write some Go assembly code: http://0x80.pl/articles/simd-search.html - i, j := 0, len(a) - for i < j { - h := int(uint(i+j) >> 1) // avoid overflow when computing h - if a[h] < x { - i = h + 1 - } else { - j = h - } - } - return i -} diff --git a/weightedrand_test.go b/weightedrand_test.go index 6dc642b..cda03ff 100644 --- a/weightedrand_test.go +++ b/weightedrand_test.go @@ -3,10 +3,8 @@ package weightedrand import ( "fmt" "math" - "math/rand" - "sync" + "math/rand/v2" "testing" - "time" ) /****************************************************************************** @@ -41,37 +39,42 @@ const ( func TestNewChooser(t *testing.T) { tests := []struct { name string - cs []Choice[rune, int] + cs []Choice[rune, int64] wantErr error }{ { name: "zero choices", - cs: []Choice[rune, int]{}, + cs: []Choice[rune, int64]{}, wantErr: errNoValidChoices, }, { name: "no choices with positive weight", - cs: []Choice[rune, int]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}}, + cs: []Choice[rune, int64]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}}, wantErr: errNoValidChoices, }, { name: "choice with weight equals 1", - cs: []Choice[rune, int]{{Item: 'a', Weight: 1}}, + cs: []Choice[rune, int64]{{Item: 'a', Weight: 1}}, wantErr: nil, }, { - name: "weight overflow", - cs: []Choice[rune, int]{{Item: 'a', Weight: maxInt/2 + 1}, {Item: 'b', Weight: maxInt/2 + 1}}, + name: "weight overflow", + cs: []Choice[rune, int64]{ + {Item: 'a', Weight: math.MaxInt64/2 + 1}, + {Item: 'b', Weight: math.MaxInt64/2 + 1}, + {Item: 'c', Weight: math.MaxInt64/2 + 1}, + {Item: 'd', Weight: math.MaxInt64/2 + 1}, + }, wantErr: errWeightOverflow, }, { name: "nominal case", - cs: []Choice[rune, int]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}}, + cs: []Choice[rune, int64]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}}, wantErr: nil, }, { name: "negative weight case", - cs: []Choice[rune, int]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}}, + cs: []Choice[rune, int64]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}}, wantErr: nil, }, } @@ -96,8 +99,24 @@ func TestNewChooser(t *testing.T) { wantErr error }{ { - name: "weight overflow from single uint64 exceeding system maxInt", - cs: []Choice[rune, uint64]{{Item: 'a', Weight: maxInt + 1}}, + name: "single uint64 equalling MaxUint64", + cs: []Choice[rune, uint64]{{Item: 'a', Weight: math.MaxUint64}}, + wantErr: errWeightOverflow, + }, + { + name: "single uint64 equalling MaxUint64 and a zero weight", + cs: []Choice[rune, uint64]{ + {Item: 'a', Weight: math.MaxUint64}, + {Item: 'b', Weight: 0}, + }, + wantErr: errWeightOverflow, + }, + { + name: "multiple uint64s with sum MaxUint64", + cs: []Choice[rune, uint64]{ + {Item: 'a', Weight: math.MaxUint64/2 + 1}, + {Item: 'b', Weight: math.MaxUint64/2 + 1}, + }, wantErr: errWeightOverflow, }, } @@ -139,38 +158,6 @@ func TestChooser_Pick(t *testing.T) { verifyFrequencyCounts(t, counts, choices) } -// TestChooser_PickSource is the same test methodology as TestChooser_Pick, but -// here we use the PickSource method and access the same chooser concurrently -// from multiple different goroutines, each providing its own source of -// randomness. -func TestChooser_PickSource(t *testing.T) { - choices := mockFrequencyChoices(t, testChoices) - chooser, err := NewChooser(choices...) - if err != nil { - t.Fatal(err) - } - t.Log("totals in chooser", chooser.totals) - - counts1 := make(map[int]int) - counts2 := make(map[int]int) - var wg sync.WaitGroup - wg.Add(2) - checker := func(counts map[int]int) { - defer wg.Done() - rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano())) - for i := 0; i < testIterations/2; i++ { - c := chooser.PickSource(rs) - counts[c]++ - } - } - go checker(counts1) - go checker(counts2) - wg.Wait() - - verifyFrequencyCounts(t, counts1, choices) - verifyFrequencyCounts(t, counts2, choices) -} - // Similar to what is used in randutil test, but in randomized order to avoid // any issues with algorithms that are accidentally dependant on presorted data. func mockFrequencyChoices(t *testing.T, n int) []Choice[int, int] { @@ -259,30 +246,11 @@ func BenchmarkPickParallel(b *testing.B) { } } -func BenchmarkPickSourceParallel(b *testing.B) { - for n := BMMinChoices; n <= BMMaxChoices; n *= 10 { - b.Run(fmt.Sprintf("size=%s", fmt1eN(n)), func(b *testing.B) { - choices := mockChoices(n) - chooser, err := NewChooser(choices...) - if err != nil { - b.Fatal(err) - } - b.ResetTimer() - b.RunParallel(func(pb *testing.PB) { - rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano())) - for pb.Next() { - _ = chooser.PickSource(rs) - } - }) - }) - } -} - func mockChoices(n int) []Choice[rune, int] { choices := make([]Choice[rune, int], 0, n) for i := 0; i < n; i++ { s := '🥑' - w := rand.Intn(10) + w := rand.IntN(10) c := NewChoice(s, w) choices = append(choices, c) } From 378f6fb1fe777e8bd65652bfd124e7e6337fbec2 Mon Sep 17 00:00:00 2001 From: Matthew Rothenberg Date: Sat, 30 Dec 2023 12:35:13 -0500 Subject: [PATCH 2/4] perf: replace sort.Slice with slices.SortFunc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we're already requiring the slices package in stdlib with this refactor, we can utilize this newer function which should be slightly more efficient (and has nicer ergonomics imo). Performs roughly ~11% faster during NewChooser initialization. goos: darwin goarch: arm64 pkg: github.com/mroth/weightedrand/v2 │ v3-dev1.txt │ v3-dev2.txt │ │ sec/op │ sec/op vs base │ NewChooser/size=1e1-8 132.90n ± 0% 70.73n ± 1% -46.78% (p=0.002 n=6) NewChooser/size=1e2-8 472.8n ± 0% 444.1n ± 2% -6.05% (p=0.002 n=6) NewChooser/size=1e3-8 3.412µ ± 0% 3.333µ ± 0% -2.30% (p=0.002 n=6) NewChooser/size=1e4-8 31.03µ ± 0% 30.30µ ± 0% -2.33% (p=0.002 n=6) NewChooser/size=1e5-8 295.9µ ± 0% 291.9µ ± 1% -1.36% (p=0.002 n=6) NewChooser/size=1e6-8 2.843m ± 1% 2.775m ± 1% -2.37% (p=0.002 n=6) NewChooser/size=1e7-8 35.92m ± 1% 32.99m ± 2% -8.16% (p=0.002 n=6) geomean 279.7n 36.41µ -11.60% ¹ ¹ benchmark set differs from baseline; geomeans may not be comparable --- weightedrand.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/weightedrand.go b/weightedrand.go index 09b9042..03648a7 100644 --- a/weightedrand.go +++ b/weightedrand.go @@ -9,11 +9,11 @@ package weightedrand import ( + "cmp" "errors" "math" "math/rand/v2" "slices" - "sort" ) // Choice is a generic wrapper that can be used to add weights for any item. @@ -41,8 +41,8 @@ type Chooser[T any, W integer] struct { // NewChooser initializes a new Chooser for picking from the provided choices. func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], error) { - sort.Slice(choices, func(i, j int) bool { - return choices[i].Weight < choices[j].Weight + slices.SortFunc(choices, func(a, b Choice[T, W]) int { + return cmp.Compare(a.Weight, b.Weight) }) totals := make([]uint64, len(choices)) @@ -67,7 +67,6 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal}, nil } - // Possible errors returned by NewChooser, preventing the creation of a Chooser // with unsafe runtime states. var ( From a5fa29dcdb5bfed3893f000a0362e5b90229b7d2 Mon Sep 17 00:00:00 2001 From: Matthew Rothenberg Date: Wed, 7 Feb 2024 15:28:11 -0500 Subject: [PATCH 3/4] chore(ci): set compatibility to go1.22+ --- .github/workflows/test.yml | 7 ++----- go.mod | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e50beb9..907a6c7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,14 +7,11 @@ jobs: strategy: matrix: go: - - "1.18" - - "1.19" - - "1.20" - - "1.21" + - "1.22" name: Go ${{ matrix.go }} test steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 with: go-version: ${{ matrix.go }} - - run: go test -race + - run: go test -race . diff --git a/go.mod b/go.mod index 1ed6cf1..dc2b8d2 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ -module github.com/mroth/weightedrand/v2 +module github.com/mroth/weightedrand/v3 -go 1.18 +go 1.22 From cd42f24b22afa40fdcaa3511ea06ec45f87e87f8 Mon Sep 17 00:00:00 2001 From: Matthew Rothenberg Date: Wed, 17 Apr 2024 15:46:02 -0400 Subject: [PATCH 4/4] feat: method to set randomness source The internal mechanics of this are a bit inelegant, since unfortunately the global randomness source is not exported, necessitating these nil check methods instead. The API here needs some user feedback. I believe the majority case will want to set this once and not on a per-call basis (cf. the deprecated PickSource method in the previous version), but that needs be validated. --- weightedrand.go | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/weightedrand.go b/weightedrand.go index 03648a7..b8fd40d 100644 --- a/weightedrand.go +++ b/weightedrand.go @@ -37,6 +37,8 @@ type Chooser[T any, W integer] struct { data []Choice[T, W] totals []uint64 max uint64 + + customRand *rand.Rand } // NewChooser initializes a new Chooser for picking from the provided choices. @@ -64,7 +66,13 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro return nil, errNoValidChoices } - return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal}, nil + return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal, customRand: nil}, nil +} + +// SetRand applies an optional custom randomness source r for the Chooser. If +// set to nil nil, global rand will be used. +func (c *Chooser[T, W]) SetRand(r *rand.Rand) { + c.customRand = r } // Possible errors returned by NewChooser, preventing the creation of a Chooser @@ -82,9 +90,17 @@ var ( // Pick returns a single weighted random Choice.Item from the Chooser. // -// Utilizes global rand as the source of randomness. Safe for concurrent usage. +// Utilizes global rand as the source of randomness by default, which is safe +// for concurrent usage. If a custom rand source was set with SetRand, that +// source will be used instead. func (c Chooser[T, W]) Pick() T { - r := rand.Uint64N(c.max) + 1 + var r uint64 + if c.customRand == nil { + r = rand.Uint64N(c.max) + 1 + } else { + r = c.customRand.Uint64N(c.max) + 1 + } + i, _ := slices.BinarySearch(c.totals, r) return c.data[i].Item }