Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added prefix match for consistent hash #103

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 98 additions & 6 deletions consistenthash/consistenthash.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,78 @@ package consistenthash

import (
"hash/crc32"
"math/bits"
"sort"
"strconv"
)

type Hash func(data []byte) uint32

const defaultHashExpansion = 6

type Map struct {
hash Hash
// Inputs

// hash is the hash function that will be applied to both added
// keys and fetched keys
hash Hash

// replicas is the number of virtual nodes that will be inserted
// into the consistent hash ring for each key added
replicas int
keys []int // Sorted
hashMap map[int]string

// prefixTableExpansion is the multiple of virtual nodes that
// will be inserted into the internal hash table for O(1) lookups.
prefixTableExpansion int
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

document these fields now that they're non-obvious?


// Internal data

// keys is the hash of the virtual nodes, sorted by hash value
keys []int // Sorted

// hashMap maps the hashed keys back to the input strings.
// Note that all virtual nodes will map back to the same input
// string
hashMap map[int]string

// prefixShift is the number of bits an input hash should
// be right-shifted to act as a lookup in the prefixTable
prefixShift uint32

// prefixTable is a map of the most significant bits of
// a hash value to output all hashes with that prefix
// map to. If the result is ambiguous (i.e. there is a
// hash range split within this prefix) the value will
// be blank and we should fall back to a binary search
// through keys to find the exact output
prefixTable []string
}

// New returns a blank consistent hash ring that will return
// the key whose hash comes next after the hash of the input to
// Map.Get.
// Increasing the number of replicas will improve the smoothness
// of the hash ring and reduce the data moved when adding/removing
// nodes, at the cost of more memory.
func New(replicas int, fn Hash) *Map {
return NewConsistentHash(replicas, defaultHashExpansion, fn)
}

// NewConsistentHash returns a blank consistent hash ring that will return
// the key whose hash comes next after the hash of the input to
// Map.Get.
// Increasing the number of replicas will improve the smoothness
// of the hash ring and reduce the data moved when adding/removing
// nodes.
// Increasing the tableExpansion will allocate more entries in the
// internal hash table, reducing the frequency of lg(n) binary
// searches during calls to the Map.Get method.
func NewConsistentHash(replicas int, tableExpansion int, fn Hash) *Map {
m := &Map{
replicas: replicas,
hash: fn,
hashMap: make(map[int]string),
replicas: replicas,
hash: fn,
hashMap: make(map[int]string),
prefixTableExpansion: tableExpansion,
}
if m.hash == nil {
m.hash = crc32.ChecksumIEEE
Expand All @@ -59,6 +113,37 @@ func (m *Map) Add(keys ...string) {
}
}
sort.Ints(m.keys)

// Find minimum number of bits to hold |keys| * prefixTableExpansion
prefixBits := uint32(bits.Len32(uint32(len(m.keys) * m.prefixTableExpansion)))
m.prefixShift = 32 - prefixBits

prefixTableSize := 1 << prefixBits
m.prefixTable = make([]string, prefixTableSize)

previousKeyPrefix := -1 // Effectively -Inf
currentKeyIdx := 0
currentKeyPrefix := m.keys[currentKeyIdx] >> m.prefixShift

for i := range m.prefixTable {
if previousKeyPrefix < i && currentKeyPrefix > i {
// All keys with this prefix will map to a single value
m.prefixTable[i] = m.hashMap[m.keys[currentKeyIdx]]
} else {
// Several keys might have the same prefix. Walk
// over them until it changes
previousKeyPrefix = currentKeyPrefix
for currentKeyPrefix == previousKeyPrefix {
currentKeyIdx++
if currentKeyIdx < len(m.keys) {
currentKeyPrefix = m.keys[currentKeyIdx] >> m.prefixShift
} else {
currentKeyIdx = 0
currentKeyPrefix = prefixTableSize + 1 // Effectively +Inf
}
}
}
}
}

// Gets the closest item in the hash to the provided key.
Expand All @@ -69,6 +154,13 @@ func (m *Map) Get(key string) string {

hash := int(m.hash([]byte(key)))

// Look for the hash prefix in the prefix table
prefixSlot := hash >> m.prefixShift
tableResult := m.prefixTable[prefixSlot]
if len(tableResult) > 0 {
return tableResult
}

// Binary search for appropriate replica.
idx := sort.Search(len(m.keys), func(i int) bool { return m.keys[i] >= hash })

Expand Down
26 changes: 16 additions & 10 deletions consistenthash/consistenthash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func TestHashing(t *testing.T) {

// Override the hash function to return easier to reason about values. Assumes
// the keys can be converted to an integer.
hash := New(3, func(key []byte) uint32 {
hash := NewConsistentHash(3, 6, func(key []byte) uint32 {
i, err := strconv.Atoi(string(key))
if err != nil {
panic(err)
Expand Down Expand Up @@ -66,8 +66,8 @@ func TestHashing(t *testing.T) {
}

func TestConsistency(t *testing.T) {
hash1 := New(1, nil)
hash2 := New(1, nil)
hash1 := NewConsistentHash(1, 6, nil)
hash2 := NewConsistentHash(1, 6, nil)

hash1.Add("Bill", "Bob", "Bonny")
hash2.Add("Bob", "Bonny", "Bill")
Expand All @@ -86,25 +86,31 @@ func TestConsistency(t *testing.T) {

}

func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8) }
func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32) }
func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128) }
func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512) }
func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8, 6) }
func BenchmarkGet32(b *testing.B) { benchmarkGet(b, 32, 6) }
func BenchmarkGet128(b *testing.B) { benchmarkGet(b, 128, 6) }
func BenchmarkGet512(b *testing.B) { benchmarkGet(b, 512, 6) }

func benchmarkGet(b *testing.B, shards int) {
func benchmarkGet(b *testing.B, shards int, expansion int) {

hash := New(50, nil)
hash := NewConsistentHash(50, expansion, nil)

var buckets []string
for i := 0; i < shards; i++ {
buckets = append(buckets, fmt.Sprintf("shard-%d", i))
}

testStringCount := shards
var testStrings []string
for i := 0; i < testStringCount; i++ {
testStrings = append(testStrings, fmt.Sprintf("%d", i))
}

hash.Add(buckets...)

b.ResetTimer()

for i := 0; i < b.N; i++ {
hash.Get(buckets[i&(shards-1)])
hash.Get(testStrings[i&(testStringCount-1)])
}
}