Skip to content

Commit

Permalink
refactor: uint64 backing, math/rand/v2, and remove PickSource
Browse files Browse the repository at this point in the history
This a first step in refactoring towards a v3 release.  A major semantic
version release is required in order to let us fully remove the
deprecated PickSource from our API.

Switches to using the new math/rand/v2 module.  Paving the way for the
future.

Removes PickSource method: As planned, this will remove the previously
deprecated PickSource method that uses v1 of math/rand sources. Custom
randomness source should be reintroduced in a future version using a
different methodology (e.g. setting on the Chooser instead of with each
function call).

Switches the Chooser backing from being a system int to a uint64. This
should result in defined behavior across 32 and 64 bit systems (with
the potential for some performance regressions on 32 bit systems, which
I consider an acceptable tradeoff).

Internal implementation: removes the custom searchInts binary sort that
was present for performance (ala github.com/mroth/xsort)in favor of
slices.BinarySearch which is available as of go1.21 and hits acceptable
performance as of go1.22.

goos: darwin
goarch: arm64
pkg: github.com/mroth/weightedrand/v2
                              │ v2-main.txt  │             v3-dev1.txt              │
                              │    sec/op    │    sec/op     vs base                │
NewChooser/size=1e1-8           132.7n ±  0%   132.9n ±  0%       ~ (p=0.314 n=6)
NewChooser/size=1e2-8           476.1n ±  1%   472.8n ±  0%  -0.70% (p=0.002 n=6)
NewChooser/size=1e3-8           3.406µ ±  0%   3.412µ ±  0%       ~ (p=0.379 n=6)
NewChooser/size=1e4-8           31.19µ ±  1%   31.03µ ±  0%  -0.51% (p=0.002 n=6)
NewChooser/size=1e5-8           296.6µ ±  0%   295.9µ ±  0%       ~ (p=0.394 n=6)
NewChooser/size=1e6-8           2.843m ±  1%   2.843m ±  1%       ~ (p=0.485 n=6)
NewChooser/size=1e7-8           35.83m ±  1%   35.92m ±  1%       ~ (p=0.485 n=6)
Pick/size=1e1-8                 22.49n ±  8%   20.28n ±  9%  -9.80% (p=0.015 n=6)
Pick/size=1e2-8                 35.26n ±  2%   32.82n ±  2%  -6.92% (p=0.002 n=6)
Pick/size=1e3-8                 48.41n ±  1%   45.38n ±  1%  -6.26% (p=0.002 n=6)
Pick/size=1e4-8                 63.30n ±  1%   60.23n ±  2%  -4.85% (p=0.002 n=6)
Pick/size=1e5-8                 85.92n ±  1%   82.53n ±  1%  -3.95% (p=0.002 n=6)
Pick/size=1e6-8                 111.5n ±  1%   107.3n ±  4%  -3.72% (p=0.013 n=6)
Pick/size=1e7-8                 240.7n ±  2%   233.2n ±  1%  -3.10% (p=0.002 n=6)
PickParallel/size=1e1-8         2.982n ±  6%   2.760n ±  5%  -7.43% (p=0.009 n=6)
PickParallel/size=1e2-8         4.679n ±  1%   4.360n ±  2%  -6.81% (p=0.002 n=6)
PickParallel/size=1e3-8         6.422n ±  2%   6.059n ±  1%  -5.66% (p=0.002 n=6)
PickParallel/size=1e4-8         8.463n ±  0%   8.114n ± 58%       ~ (p=0.058 n=6)
PickParallel/size=1e5-8         11.55n ±  3%   11.06n ±  0%  -4.24% (p=0.002 n=6)
PickParallel/size=1e6-8         14.98n ±  0%   14.40n ±  0%  -3.87% (p=0.002 n=6)
PickParallel/size=1e7-8         34.70n ±  0%   33.71n ±  0%  -2.82% (p=0.002 n=6)
PickSourceParallel/size=1e1-8   2.752n ± 10%
PickSourceParallel/size=1e2-8   4.369n ±  2%
PickSourceParallel/size=1e3-8   5.989n ±  1%
PickSourceParallel/size=1e4-8   7.991n ±  2%
PickSourceParallel/size=1e5-8   11.28n ±  0%
PickSourceParallel/size=1e6-8   14.59n ±  0%
PickSourceParallel/size=1e7-8   33.86n ±  0%
geomean                         120.0n         279.7n        -3.59%               ¹
¹ benchmark set differs from baseline; geomeans may not be comparable
  • Loading branch information
mroth committed Dec 30, 2023
1 parent b73a2f7 commit afeb21f
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 128 deletions.
74 changes: 11 additions & 63 deletions weightedrand.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ package weightedrand

import (
"errors"
"math/rand"
"math"
"math/rand/v2"
"slices"
"sort"
)

Expand All @@ -33,8 +35,8 @@ func NewChoice[T any, W integer](item T, weight W) Choice[T, W] {
// performance on repeated calls for weighted random selection.
type Chooser[T any, W integer] struct {
data []Choice[T, W]
totals []int
max int
totals []uint64
max uint64
}

// NewChooser initializes a new Chooser for picking from the provided choices.
Expand All @@ -43,20 +45,15 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro
return choices[i].Weight < choices[j].Weight
})

totals := make([]int, len(choices))
runningTotal := 0
totals := make([]uint64, len(choices))
var runningTotal uint64
for i, c := range choices {
if c.Weight < 0 {
continue // ignore negative weights, can never be picked
}

// case of single ~uint64 or similar value that exceeds maxInt on its own
if uint64(c.Weight) >= maxInt {
return nil, errWeightOverflow
}

weight := int(c.Weight) // convert weight to int for internal counter usage
if (maxInt - runningTotal) <= weight {
weight := uint64(c.Weight) // convert weight to uint64 for internal counter usage
if (math.MaxUint64 - runningTotal) <= weight {
return nil, errWeightOverflow
}
runningTotal += weight
Expand All @@ -70,11 +67,6 @@ func NewChooser[T any, W integer](choices ...Choice[T, W]) (*Chooser[T, W], erro
return &Chooser[T, W]{data: choices, totals: totals, max: runningTotal}, nil
}

const (
intSize = 32 << (^uint(0) >> 63) // cf. strconv.IntSize
maxInt = 1<<(intSize-1) - 1
maxUint64 = 1<<64 - 1
)

// Possible errors returned by NewChooser, preventing the creation of a Chooser
// with unsafe runtime states.
Expand All @@ -93,51 +85,7 @@ var (
//
// Utilizes global rand as the source of randomness. Safe for concurrent usage.
func (c Chooser[T, W]) Pick() T {
r := rand.Intn(c.max) + 1
i := searchInts(c.totals, r)
return c.data[i].Item
}

// PickSource returns a single weighted random Choice.Item from the Chooser,
// utilizing the provided *rand.Rand source rs for randomness.
//
// The primary use-case for this is avoid lock contention from the global random
// source if utilizing Chooser(s) from multiple goroutines in extremely
// high-throughput situations.
//
// It is the responsibility of the caller to ensure the provided rand.Source is
// free from thread safety issues.
//
// Deprecated: Since go1.21 global rand no longer suffers from lock contention
// when used in multiple high throughput goroutines, as long as you don't
// manually seed it. Use [Chooser.Pick] instead.
func (c Chooser[T, W]) PickSource(rs *rand.Rand) T {
r := rs.Intn(c.max) + 1
i := searchInts(c.totals, r)
r := rand.Uint64N(c.max) + 1
i, _ := slices.BinarySearch(c.totals, r)
return c.data[i].Item
}

// The standard library sort.SearchInts() just wraps the generic sort.Search()
// function, which takes a function closure to determine truthfulness. However,
// since this function is utilized within a for loop, it cannot currently be
// properly inlined by the compiler, resulting in non-trivial performance
// overhead.
//
// Thus, this is essentially manually inlined version. In our use case here, it
// results in a significant throughput increase for Pick.
//
// See also github.com/mroth/xsort.
func searchInts(a []int, x int) int {
// Possible further future optimization for searchInts via SIMD if we want
// to write some Go assembly code: http://0x80.pl/articles/simd-search.html
i, j := 0, len(a)
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
if a[h] < x {
i = h + 1
} else {
j = h
}
}
return i
}
98 changes: 33 additions & 65 deletions weightedrand_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ package weightedrand
import (
"fmt"
"math"
"math/rand"
"sync"
"math/rand/v2"
"testing"
"time"
)

/******************************************************************************
Expand Down Expand Up @@ -41,37 +39,42 @@ const (
func TestNewChooser(t *testing.T) {
tests := []struct {
name string
cs []Choice[rune, int]
cs []Choice[rune, int64]
wantErr error
}{
{
name: "zero choices",
cs: []Choice[rune, int]{},
cs: []Choice[rune, int64]{},
wantErr: errNoValidChoices,
},
{
name: "no choices with positive weight",
cs: []Choice[rune, int]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}},
cs: []Choice[rune, int64]{{Item: 'a', Weight: 0}, {Item: 'b', Weight: 0}},
wantErr: errNoValidChoices,
},
{
name: "choice with weight equals 1",
cs: []Choice[rune, int]{{Item: 'a', Weight: 1}},
cs: []Choice[rune, int64]{{Item: 'a', Weight: 1}},
wantErr: nil,
},
{
name: "weight overflow",
cs: []Choice[rune, int]{{Item: 'a', Weight: maxInt/2 + 1}, {Item: 'b', Weight: maxInt/2 + 1}},
name: "weight overflow",
cs: []Choice[rune, int64]{
{Item: 'a', Weight: math.MaxInt64/2 + 1},
{Item: 'b', Weight: math.MaxInt64/2 + 1},
{Item: 'c', Weight: math.MaxInt64/2 + 1},
{Item: 'd', Weight: math.MaxInt64/2 + 1},
},
wantErr: errWeightOverflow,
},
{
name: "nominal case",
cs: []Choice[rune, int]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}},
cs: []Choice[rune, int64]{{Item: 'a', Weight: 1}, {Item: 'b', Weight: 2}},
wantErr: nil,
},
{
name: "negative weight case",
cs: []Choice[rune, int]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}},
cs: []Choice[rune, int64]{{Item: 'a', Weight: 3}, {Item: 'b', Weight: -2}},
wantErr: nil,
},
}
Expand All @@ -96,8 +99,24 @@ func TestNewChooser(t *testing.T) {
wantErr error
}{
{
name: "weight overflow from single uint64 exceeding system maxInt",
cs: []Choice[rune, uint64]{{Item: 'a', Weight: maxInt + 1}},
name: "single uint64 equalling MaxUint64",
cs: []Choice[rune, uint64]{{Item: 'a', Weight: math.MaxUint64}},
wantErr: errWeightOverflow,
},
{
name: "single uint64 equalling MaxUint64 and a zero weight",
cs: []Choice[rune, uint64]{
{Item: 'a', Weight: math.MaxUint64},
{Item: 'b', Weight: 0},
},
wantErr: errWeightOverflow,
},
{
name: "multiple uint64s with sum MaxUint64",
cs: []Choice[rune, uint64]{
{Item: 'a', Weight: math.MaxUint64/2 + 1},
{Item: 'b', Weight: math.MaxUint64/2 + 1},
},
wantErr: errWeightOverflow,
},
}
Expand Down Expand Up @@ -139,38 +158,6 @@ func TestChooser_Pick(t *testing.T) {
verifyFrequencyCounts(t, counts, choices)
}

// TestChooser_PickSource is the same test methodology as TestChooser_Pick, but
// here we use the PickSource method and access the same chooser concurrently
// from multiple different goroutines, each providing its own source of
// randomness.
func TestChooser_PickSource(t *testing.T) {
choices := mockFrequencyChoices(t, testChoices)
chooser, err := NewChooser(choices...)
if err != nil {
t.Fatal(err)
}
t.Log("totals in chooser", chooser.totals)

counts1 := make(map[int]int)
counts2 := make(map[int]int)
var wg sync.WaitGroup
wg.Add(2)
checker := func(counts map[int]int) {
defer wg.Done()
rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
for i := 0; i < testIterations/2; i++ {
c := chooser.PickSource(rs)
counts[c]++
}
}
go checker(counts1)
go checker(counts2)
wg.Wait()

verifyFrequencyCounts(t, counts1, choices)
verifyFrequencyCounts(t, counts2, choices)
}

// Similar to what is used in randutil test, but in randomized order to avoid
// any issues with algorithms that are accidentally dependant on presorted data.
func mockFrequencyChoices(t *testing.T, n int) []Choice[int, int] {
Expand Down Expand Up @@ -259,30 +246,11 @@ func BenchmarkPickParallel(b *testing.B) {
}
}

func BenchmarkPickSourceParallel(b *testing.B) {
for n := BMMinChoices; n <= BMMaxChoices; n *= 10 {
b.Run(fmt.Sprintf("size=%s", fmt1eN(n)), func(b *testing.B) {
choices := mockChoices(n)
chooser, err := NewChooser(choices...)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
rs := rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
for pb.Next() {
_ = chooser.PickSource(rs)
}
})
})
}
}

func mockChoices(n int) []Choice[rune, int] {
choices := make([]Choice[rune, int], 0, n)
for i := 0; i < n; i++ {
s := '🥑'
w := rand.Intn(10)
w := rand.IntN(10)
c := NewChoice(s, w)
choices = append(choices, c)
}
Expand Down

0 comments on commit afeb21f

Please sign in to comment.