From 2e343376cf2598e1b0adc76a9fa1ab31918385d4 Mon Sep 17 00:00:00 2001
From: Matthew Rothenberg <mrothenberg@gmail.com>
Date: Fri, 29 Dec 2023 13:01:49 -0500
Subject: [PATCH 1/4] refactor: uint64 backing, math/rand/v2, and remove
 PickSource
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This a first step in refactoring towards a v3 release.  A major semantic
version release is required in order to let us fully remove the
deprecated PickSource from our API.

Switches to using the new math/rand/v2 module.  Paving the way for the
future.

Removes PickSource method: As planned, this will remove the previously
deprecated PickSource method that uses v1 of math/rand sources. Custom
randomness source should be reintroduced in a future version using a
different methodology (e.g. setting on the Chooser instead of with each
function call).

Switches the Chooser backing from being a system int to a uint64. This
should result in defined behavior across 32 and 64 bit systems (with
the potential for some performance regressions on 32 bit systems, which
I consider an acceptable tradeoff).

Internal implementation: removes the custom searchInts binary sort that
was present for performance (ala github.com/mroth/xsort)in favor of
slices.BinarySearch which is available as of go1.21 and hits acceptable
performance as of go1.22.

goos: darwin
goarch: arm64
pkg: github.com/mroth/weightedrand/v2
                              │ v2-main.txt  │             v3-dev1.txt              │
                              │    sec/op    │    sec/op     vs base                │
NewChooser/size=1e1-8           132.7n ±  0%   132.9n ±  0%       ~ (p=0.314 n=6)
NewChooser/size=1e2-8           476.1n ±  1%   472.8n ±  0%  -0.70% (p=0.002 n=6)
NewChooser/size=1e3-8           3.406µ ±  0%   3.412µ ±  0%       ~ (p=0.379 n=6)
NewChooser/size=1e4-8           31.19µ ±  1%   31.03µ ±  0%  -0.51% (p=0.002 n=6)
NewChooser/size=1e5-8           296.6µ ±  0%   295.9µ ±  0%       ~ (p=0.394 n=6)
NewChooser/size=1e6-8           2.843m ±  1%   2.843m ±  1%       ~ (p=0.485 n=6)
NewChooser/size=1e7-8           35.83m ±  1%   35.92m ±  1%       ~ (p=0.485 n=6)
Pick/size=1e1-8                 22.49n ±  8%   20.28n ±  9%  -9.80% (p=0.015 n=6)
Pick/size=1e2-8                 35.26n ±  2%   32.82n ±  2%  -6.92% (p=0.002 n=6)
Pick/size=1e3-8                 48.41n ±  1%   45.38n ±  1%  -6.26% (p=0.002 n=6)
Pick/size=1e4-8                 63.30n ±  1%   60.23n ±  2%  -4.85% (p=0.002 n=6)
Pick/size=1e5-8                 85.92n ±  1%   82.53n ±  1%  -3.95% (p=0.002 n=6)
Pick/size=1e6-8                 111.5n ±  1%   107.3n ±  4%  -3.72% (p=0.013 n=6)
Pick/size=1e7-8                 240.7n ±  2%   233.2n ±  1%  -3.10% (p=0.002 n=6)
PickParallel/size=1e1-8         2.982n ±  6%   2.760n ±  5%  -7.43% (p=0.009 n=6)
PickParallel/size=1e2-8         4.679n ±  1%   4.360n ±  2%  -6.81% (p=0.002 n=6)
PickParallel/size=1e3-8         6.422n ±  2%   6.059n ±  1%  -5.66% (p=0.002 n=6)
PickParallel/size=1e4-8         8.463n ±  0%   8.114n ± 58%       ~ (p=0.058 n=6)
PickParallel/size=1e5-8         11.55n ±  3%   11.06n ±  0%  -4.24% (p=0.002 n=6)
PickParallel/size=1e6-8         14.98n ±  0%   14.40n ±  0%  -3.87% (p=0.002 n=6)
PickParallel/size=1e7-8         34.70n ±  0%   33.71n ±  0%  -2.82% (p=0.002 n=6)
PickSourceParallel/size=1e1-8   2.752n ± 10%
PickSourceParallel/size=1e2-8   4.369n ±  2%
PickSourceParallel/size=1e3-8   5.989n ±  1%
PickSourceParallel/size=1e4-8   7.991n ±  2%
PickSourceParallel/size=1e5-8   11.28n ±  0%
PickSourceParallel/size=1e6-8   14.59n ±  0%
PickSourceParallel/size=1e7-8   33.86n ±  0%
geomean                         120.0n         279.7n        -3.59%               ¹
¹ benchmark set differs from baseline; geomeans may not be comparable
---
 weightedrand.go      | 74 +++++----------------------------
 weightedrand_test.go | 98 +++++++++++++++-----------------------------
 2 files changed, 44 insertions(+), 128 deletions(-)

diff --git a/weightedrand.go b/weightedrand.go
index ec9a73b..09b9042 100644
--- a/weightedrand.go
+++ b/weightedrand.go
@@ -10,7 +10,9 @@ package weightedrand
 
 import (
 	"errors"
-	"math/rand"
+	"math"
+	"math/rand/v2"
+	"slices"
 	"sort"
 )
 
@@ -33,8 +35,8 @@ func NewChoice[T any, W integer](item T, weight W) Choice[T, W] {
 // performance on repeated calls for weighted random selection.
 type Chooser[T any, W integer] struct {
 	data   []Choice[T, W]
-	totals []int
-	max    int
+	totals []uint64
+	max    uint64
 }
 
 // NewChooser initializes a new Chooser for picking from the provided choices.
@@ -43,20 +45,15 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro
 		return choices[i].Weight < choices[j].Weight
 	})
 
-	totals := make([]int, len(choices))
-	runningTotal := 0
+	totals := make([]uint64, len(choices))
+	var runningTotal uint64
 	for i, c := range choices {
 		if c.Weight < 0 {
 			continue // ignore negative weights, can never be picked
 		}
 
-		// case of single ~uint64 or similar value that exceeds maxInt on its own
-		if uint64(c.Weight) >= maxInt {
-			return nil, errWeightOverflow
-		}
-
-		weight := int(c.Weight) // convert weight to int for internal counter usage
-		if (maxInt - runningTotal) <= weight {
+		weight := uint64(c.Weight) // convert weight to uint64 for internal counter usage
+		if (math.MaxUint64 - runningTotal) <= weight {
 			return nil, errWeightOverflow
 		}
 		runningTotal += weight
@@ -70,11 +67,6 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro
 	return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal}, nil
 }
 
-const (
-	intSize   = 32 << (^uint(0) >> 63) // cf. strconv.IntSize
-	maxInt    = 1<<(intSize-1) - 1
-	maxUint64 = 1<<64 - 1
-)
 
 // Possible errors returned by NewChooser, preventing the creation of a Chooser
 // with unsafe runtime states.
@@ -93,51 +85,7 @@ var (
 //
 // Utilizes global rand as the source of randomness. Safe for concurrent usage.
 func (c Chooser[T, W]) Pick() T {
-	r := rand.Intn(c.max) + 1
-	i := searchInts(c.totals, r)
-	return c.data[i].Item
-}
-
-// PickSource returns a single weighted random Choice.Item from the Chooser,
-// utilizing the provided *rand.Rand source rs for randomness.
-//
-// The primary use-case for this is avoid lock contention from the global random
-// source if utilizing Chooser(s) from multiple goroutines in extremely
-// high-throughput situations.
-//
-// It is the responsibility of the caller to ensure the provided rand.Source is
-// free from thread safety issues.
-//
-// Deprecated: Since go1.21 global rand no longer suffers from lock contention
-// when used in multiple high throughput goroutines, as long as you don't
-// manually seed it. Use [Chooser.Pick] instead.
-func (c Chooser[T, W]) PickSource(rs *rand.Rand) T {
-	r := rs.Intn(c.max) + 1
-	i := searchInts(c.totals, r)
+	r := rand.Uint64N(c.max) + 1
+	i, _ := slices.BinarySearch(c.totals, r)
 	return c.data[i].Item
 }
-
-// The standard library sort.SearchInts() just wraps the generic sort.Search()
-// function, which takes a function closure to determine truthfulness. However,
-// since this function is utilized within a for loop, it cannot currently be
-// properly inlined by the compiler, resulting in non-trivial performance
-// overhead.
-//
-// Thus, this is essentially manually inlined version.  In our use case here, it
-// results in a significant throughput increase for Pick.
-//
-// See also github.com/mroth/xsort.
-func searchInts(a []int, x int) int {
-	// Possible further future optimization for searchInts via SIMD if we want
-	// to write some Go assembly code: http://0x80.pl/articles/simd-search.html
-	i, j := 0, len(a)
-	for i < j {
-		h := int(uint(i+j) >> 1) // avoid overflow when computing h
-		if a[h] < x {
-			i = h + 1
-		} else {
-			j = h
-		}
-	}
-	return i
-}
diff --git a/weightedrand_test.go b/weightedrand_test.go
index 6dc642b..cda03ff 100644
--- a/weightedrand_test.go
+++ b/weightedrand_test.go
@@ -3,10 +3,8 @@ package weightedrand
 import (
 	"fmt"
 	"math"
-	"math/rand"
-	"sync"
+	"math/rand/v2"
 	"testing"
-	"time"
 )
 
 /******************************************************************************
@@ -41,37 +39,42 @@ const (
 func TestNewChooser(t *testing.T) {
 	tests := []struct {
 		name    string
-		cs      []Choice[rune, int]
+		cs      []Choice[rune, int64]
 		wantErr error
 	}{
 		{
 			name:    "zero choices",
-			cs:      []Choice[rune, int]{},
+			cs:      []Choice[rune, int64]{},
 			wantErr: errNoValidChoices,
 		},
 		{
 			name:    "no choices with positive weight",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}},
+			cs:      []Choice[rune, int64]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}},
 			wantErr: errNoValidChoices,
 		},
 		{
 			name:    "choice with weight equals 1",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: 1}},
+			cs:      []Choice[rune, int64]{{Item: 'a', Weight: 1}},
 			wantErr: nil,
 		},
 		{
-			name:    "weight overflow",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: maxInt/2 + 1}, {Item: 'b', Weight: maxInt/2 + 1}},
+			name: "weight overflow",
+			cs: []Choice[rune, int64]{
+				{Item: 'a', Weight: math.MaxInt64/2 + 1},
+				{Item: 'b', Weight: math.MaxInt64/2 + 1},
+				{Item: 'c', Weight: math.MaxInt64/2 + 1},
+				{Item: 'd', Weight: math.MaxInt64/2 + 1},
+			},
 			wantErr: errWeightOverflow,
 		},
 		{
 			name:    "nominal case",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}},
+			cs:      []Choice[rune, int64]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}},
 			wantErr: nil,
 		},
 		{
 			name:    "negative weight case",
-			cs:      []Choice[rune, int]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}},
+			cs:      []Choice[rune, int64]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}},
 			wantErr: nil,
 		},
 	}
@@ -96,8 +99,24 @@ func TestNewChooser(t *testing.T) {
 		wantErr error
 	}{
 		{
-			name:    "weight overflow from single uint64 exceeding system maxInt",
-			cs:      []Choice[rune, uint64]{{Item: 'a', Weight: maxInt + 1}},
+			name:    "single uint64 equalling MaxUint64",
+			cs:      []Choice[rune, uint64]{{Item: 'a', Weight: math.MaxUint64}},
+			wantErr: errWeightOverflow,
+		},
+		{
+			name: "single uint64 equalling MaxUint64 and a zero weight",
+			cs: []Choice[rune, uint64]{
+				{Item: 'a', Weight: math.MaxUint64},
+				{Item: 'b', Weight: 0},
+			},
+			wantErr: errWeightOverflow,
+		},
+		{
+			name: "multiple uint64s with sum MaxUint64",
+			cs: []Choice[rune, uint64]{
+				{Item: 'a', Weight: math.MaxUint64/2 + 1},
+				{Item: 'b', Weight: math.MaxUint64/2 + 1},
+			},
 			wantErr: errWeightOverflow,
 		},
 	}
@@ -139,38 +158,6 @@ func TestChooser_Pick(t *testing.T) {
 	verifyFrequencyCounts(t, counts, choices)
 }
 
-// TestChooser_PickSource is the same test methodology as TestChooser_Pick, but
-// here we use the PickSource method and access the same chooser concurrently
-// from multiple different goroutines, each providing its own source of
-// randomness.
-func TestChooser_PickSource(t *testing.T) {
-	choices := mockFrequencyChoices(t, testChoices)
-	chooser, err := NewChooser(choices...)
-	if err != nil {
-		t.Fatal(err)
-	}
-	t.Log("totals in chooser", chooser.totals)
-
-	counts1 := make(map[int]int)
-	counts2 := make(map[int]int)
-	var wg sync.WaitGroup
-	wg.Add(2)
-	checker := func(counts map[int]int) {
-		defer wg.Done()
-		rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
-		for i := 0; i < testIterations/2; i++ {
-			c := chooser.PickSource(rs)
-			counts[c]++
-		}
-	}
-	go checker(counts1)
-	go checker(counts2)
-	wg.Wait()
-
-	verifyFrequencyCounts(t, counts1, choices)
-	verifyFrequencyCounts(t, counts2, choices)
-}
-
 // Similar to what is used in randutil test, but in randomized order to avoid
 // any issues with algorithms that are accidentally dependant on presorted data.
 func mockFrequencyChoices(t *testing.T, n int) []Choice[int, int] {
@@ -259,30 +246,11 @@ func BenchmarkPickParallel(b *testing.B) {
 	}
 }
 
-func BenchmarkPickSourceParallel(b *testing.B) {
-	for n := BMMinChoices; n <= BMMaxChoices; n *= 10 {
-		b.Run(fmt.Sprintf("size=%s", fmt1eN(n)), func(b *testing.B) {
-			choices := mockChoices(n)
-			chooser, err := NewChooser(choices...)
-			if err != nil {
-				b.Fatal(err)
-			}
-			b.ResetTimer()
-			b.RunParallel(func(pb *testing.PB) {
-				rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
-				for pb.Next() {
-					_ = chooser.PickSource(rs)
-				}
-			})
-		})
-	}
-}
-
 func mockChoices(n int) []Choice[rune, int] {
 	choices := make([]Choice[rune, int], 0, n)
 	for i := 0; i < n; i++ {
 		s := '🥑'
-		w := rand.Intn(10)
+		w := rand.IntN(10)
 		c := NewChoice(s, w)
 		choices = append(choices, c)
 	}

From 378f6fb1fe777e8bd65652bfd124e7e6337fbec2 Mon Sep 17 00:00:00 2001
From: Matthew Rothenberg <mrothenberg@gmail.com>
Date: Sat, 30 Dec 2023 12:35:13 -0500
Subject: [PATCH 2/4] perf: replace sort.Slice with slices.SortFunc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we're already requiring the slices package in stdlib with this
refactor, we can utilize this newer function which should be slightly
more efficient (and has nicer ergonomics imo).

Performs roughly ~11% faster during NewChooser initialization.

goos: darwin
goarch: arm64
pkg: github.com/mroth/weightedrand/v2
                        │  v3-dev1.txt  │             v3-dev2.txt              │
                        │    sec/op     │   sec/op     vs base                 │
NewChooser/size=1e1-8     132.90n ±  0%   70.73n ± 1%  -46.78% (p=0.002 n=6)
NewChooser/size=1e2-8      472.8n ±  0%   444.1n ± 2%   -6.05% (p=0.002 n=6)
NewChooser/size=1e3-8      3.412µ ±  0%   3.333µ ± 0%   -2.30% (p=0.002 n=6)
NewChooser/size=1e4-8      31.03µ ±  0%   30.30µ ± 0%   -2.33% (p=0.002 n=6)
NewChooser/size=1e5-8      295.9µ ±  0%   291.9µ ± 1%   -1.36% (p=0.002 n=6)
NewChooser/size=1e6-8      2.843m ±  1%   2.775m ± 1%   -2.37% (p=0.002 n=6)
NewChooser/size=1e7-8      35.92m ±  1%   32.99m ± 2%   -8.16% (p=0.002 n=6)
geomean                    279.7n         36.41µ       -11.60%               ¹
¹ benchmark set differs from baseline; geomeans may not be comparable
---
 weightedrand.go | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/weightedrand.go b/weightedrand.go
index 09b9042..03648a7 100644
--- a/weightedrand.go
+++ b/weightedrand.go
@@ -9,11 +9,11 @@
 package weightedrand
 
 import (
+	"cmp"
 	"errors"
 	"math"
 	"math/rand/v2"
 	"slices"
-	"sort"
 )
 
 // Choice is a generic wrapper that can be used to add weights for any item.
@@ -41,8 +41,8 @@ type Chooser[T any, W integer] struct {
 
 // NewChooser initializes a new Chooser for picking from the provided choices.
 func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], error) {
-	sort.Slice(choices, func(i, j int) bool {
-		return choices[i].Weight < choices[j].Weight
+	slices.SortFunc(choices, func(a, b Choice[T, W]) int {
+		return cmp.Compare(a.Weight, b.Weight)
 	})
 
 	totals := make([]uint64, len(choices))
@@ -67,7 +67,6 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro
 	return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal}, nil
 }
 
-
 // Possible errors returned by NewChooser, preventing the creation of a Chooser
 // with unsafe runtime states.
 var (

From a5fa29dcdb5bfed3893f000a0362e5b90229b7d2 Mon Sep 17 00:00:00 2001
From: Matthew Rothenberg <mrothenberg@gmail.com>
Date: Wed, 7 Feb 2024 15:28:11 -0500
Subject: [PATCH 3/4] chore(ci): set compatibility to go1.22+

---
 .github/workflows/test.yml | 7 ++-----
 go.mod                     | 4 ++--
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e50beb9..907a6c7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -7,14 +7,11 @@ jobs:
     strategy:
       matrix:
         go:
-          - "1.18"
-          - "1.19"
-          - "1.20"
-          - "1.21"
+          - "1.22"
     name: Go ${{ matrix.go }} test
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-go@v5
         with:
           go-version: ${{ matrix.go }}
-      - run: go test -race
+      - run: go test -race .
diff --git a/go.mod b/go.mod
index 1ed6cf1..dc2b8d2 100644
--- a/go.mod
+++ b/go.mod
@@ -1,3 +1,3 @@
-module github.com/mroth/weightedrand/v2
+module github.com/mroth/weightedrand/v3
 
-go 1.18
+go 1.22

From cd42f24b22afa40fdcaa3511ea06ec45f87e87f8 Mon Sep 17 00:00:00 2001
From: Matthew Rothenberg <mrothenberg@gmail.com>
Date: Wed, 17 Apr 2024 15:46:02 -0400
Subject: [PATCH 4/4] feat: method to set randomness source

The internal mechanics of this are a bit inelegant, since unfortunately
the global randomness source is not exported, necessitating these nil
check methods instead.

The API here needs some user feedback. I believe the majority case will
want to set this once and not on a per-call basis (cf. the deprecated
PickSource method in the previous version), but that needs be validated.
---
 weightedrand.go | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/weightedrand.go b/weightedrand.go
index 03648a7..b8fd40d 100644
--- a/weightedrand.go
+++ b/weightedrand.go
@@ -37,6 +37,8 @@ type Chooser[T any, W integer] struct {
 	data   []Choice[T, W]
 	totals []uint64
 	max    uint64
+
+	customRand *rand.Rand
 }
 
 // NewChooser initializes a new Chooser for picking from the provided choices.
@@ -64,7 +66,13 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro
 		return nil, errNoValidChoices
 	}
 
-	return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal}, nil
+	return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal, customRand: nil}, nil
+}
+
+// SetRand applies an optional custom randomness source r for the Chooser.  If
+// set to nil nil, global rand will be used.
+func (c *Chooser[T, W]) SetRand(r *rand.Rand) {
+	c.customRand = r
 }
 
 // Possible errors returned by NewChooser, preventing the creation of a Chooser
@@ -82,9 +90,17 @@ var (
 
 // Pick returns a single weighted random Choice.Item from the Chooser.
 //
-// Utilizes global rand as the source of randomness. Safe for concurrent usage.
+// Utilizes global rand as the source of randomness by default, which is safe
+// for concurrent usage. If a custom rand source was set with SetRand, that
+// source will be used instead.
 func (c Chooser[T, W]) Pick() T {
-	r := rand.Uint64N(c.max) + 1
+	var r uint64
+	if c.customRand == nil {
+		r = rand.Uint64N(c.max) + 1
+	} else {
+		r = c.customRand.Uint64N(c.max) + 1
+	}
+
 	i, _ := slices.BinarySearch(c.totals, r)
 	return c.data[i].Item
 }