pkg/fuzzer: use MAB to distinguish between exec fuzz and exec gen

Let's try to use a plain delta-epsylon MAB for this purpose. To better track its effect, also calculate moving averages of the "new max signal" / "execution time" ratios for exec fuzz and exec gen.
google · Apr 2, 2024 · 2de14ee · 2de14ee
1 parent b5ad152
commit 2de14ee
Show file tree

Hide file tree

Showing 13 changed files with 329 additions and 32 deletions.
diff --git a/pkg/fuzzer/fuzzer.go b/pkg/fuzzer/fuzzer.go
@@ -14,6 +14,7 @@ import (
 
  "github.com/google/syzkaller/pkg/corpus"
  "github.com/google/syzkaller/pkg/ipc"
+ "github.com/google/syzkaller/pkg/learning"
  "github.com/google/syzkaller/pkg/rpctype"
  "github.com/google/syzkaller/pkg/signal"
  "github.com/google/syzkaller/prog"
@@ -34,6 +35,12 @@ type Fuzzer struct {
  ctMu sync.Mutex // TODO: use RWLock.
  ctRegenerate chan struct{}
 
+ // Use a MAB to determine the right distribution of
+ // exec fuzz and exec gen.
+ genFuzzMAB *learning.PlainMAB[string]
+ genSignalSpeed *learning.RunningRatioAverage[float64]
+ fuzzSignalSpeed *learning.RunningRatioAverage[float64]
+
  nextExec *priorityQueue[*Request]
  nextJobID atomic.Int64
 
@@ -43,6 +50,12 @@ type Fuzzer struct {
 
 func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,
  target *prog.Target) *Fuzzer {
+ genFuzzMAB := &learning.PlainMAB[string]{
+ ExplorationRate: 0.02,
+ MinLearningRate: 0.001,
+ }
+ genFuzzMAB.AddArms(statFuzz, statGenerate)
+
  f := &Fuzzer{
  Config: cfg,
  Cover: &Cover{},
@@ -54,7 +67,10 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,
 
  // We're okay to lose some of the messages -- if we are already
  // regenerating the table, we don't want to repeat it right away.
- ctRegenerate: make(chan struct{}),
+ ctRegenerate: make(chan struct{}),
+ genFuzzMAB: genFuzzMAB,
+ genSignalSpeed: learning.NewRunningRatioAverage[float64](20000),
+ fuzzSignalSpeed: learning.NewRunningRatioAverage[float64](20000),
 
  nextExec: makePriorityQueue[*Request](),
  }
@@ -91,22 +107,26 @@ type Request struct {
  flags ProgTypes
  stat string
  resultC chan *Result
+
+ genFuzzAction *learning.Action[string]
 }
 
 type Result struct {
- Info *ipc.ProgInfo
- Stop bool
+ Info *ipc.ProgInfo
+ Stop bool
+ ElapsedSec float64
 }
 
 func (fuzzer *Fuzzer) Done(req *Request, res *Result) {
  // Triage individual calls.
  // We do it before unblocking the waiting threads because
  // it may result it concurrent modification of req.Prog.
+ var newSignal int
  if req.NeedSignal != rpctype.NoSignal && res.Info != nil {
  for call, info := range res.Info.Calls {
- fuzzer.triageProgCall(req.Prog, &info, call, req.flags)
+ newSignal += fuzzer.triageProgCall(req.Prog, &info, call, req.flags)
  }
- fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags)
+ newSignal += fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags)
  }
  // Unblock threads that wait for the result.
  if req.resultC != nil {
@@ -116,20 +136,38 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) {
  fuzzer.mu.Lock()
  fuzzer.stats[req.stat]++
  fuzzer.mu.Unlock()
+ // Update the MAB(s).
+ reward := 0.0
+ if res.ElapsedSec > 0 {
+ // Similarly to the "SyzVegas: Beating Kernel Fuzzing Odds with Reinforcement Learning"
+ // paper, let's use the ratio of "new max signal" to "execution time".
+ // Unlike the paper, let's take the raw value of it instead of its ratio to the average one.
+ reward = float64(newSignal) / res.ElapsedSec
+ if req.stat == statGenerate {
+ fuzzer.genSignalSpeed.Save(float64(newSignal), res.ElapsedSec)
+ } else if req.stat == statFuzz {
+ fuzzer.fuzzSignalSpeed.Save(float64(newSignal), res.ElapsedSec)
+ }
+ }
+ if req.genFuzzAction != nil {
+ fuzzer.mu.Lock()
+ fuzzer.genFuzzMAB.SaveReward(*req.genFuzzAction, reward)
+ fuzzer.mu.Unlock()
+ }
 }
 
 func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int,
- flags ProgTypes) {
+ flags ProgTypes) int {
  prio := signalPrio(p, info, call)
  newMaxSignal := fuzzer.Cover.addRawMaxSignal(info.Signal, prio)
  if newMaxSignal.Empty() {
- return
+ return 0
  }
  if flags&progInTriage > 0 {
  // We are already triaging this exact prog.
  // All newly found coverage is flaky.
  fuzzer.Logf(2, "found new flaky signal in call %d in %s", call, p)
- return
+ return newMaxSignal.Len()
  }
  fuzzer.Logf(2, "found new signal in call %d in %s", call, p)
  fuzzer.startJob(&triageJob{
@@ -140,6 +178,7 @@ func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int,
  flags: flags,
  jobPriority: triageJobPrio(flags),
  })
+ return newMaxSignal.Len()
 }
 
 func signalPrio(p *prog.Prog, info *ipc.CallInfo, call int) (prio uint8) {
@@ -184,21 +223,20 @@ func (fuzzer *Fuzzer) nextInput() *Request {
  }
  }
 
- // Either generate a new input or mutate an existing one.
- mutateRate := 0.95
- if !fuzzer.Config.Coverage {
- // If we don't have real coverage signal, generate programs
- // more frequently because fallback signal is weak.
- mutateRate = 0.5
- }
  rnd := fuzzer.rand()
- if rnd.Float64() < mutateRate {
- req := mutateProgRequest(fuzzer, rnd)
- if req != nil {
- return req
- }
+ fuzzer.mu.Lock()
+ action := fuzzer.genFuzzMAB.Action(rnd)
+ fuzzer.mu.Unlock()
+
+ var req *Request
+ if action.Arm == statFuzz {
+ req = mutateProgRequest(fuzzer, rnd)
  }
- return genProgRequest(fuzzer, rnd)
+ if req == nil {
+ req = genProgRequest(fuzzer, rnd)
+ }
+ req.genFuzzAction = &action
+ return req
 }
 
 func (fuzzer *Fuzzer) startJob(newJob job) {

diff --git a/pkg/fuzzer/fuzzer_test.go b/pkg/fuzzer/fuzzer_test.go
@@ -85,6 +85,8 @@ func TestFuzz(t *testing.T) {
  t.Logf("%s", p.Serialize())
  }
 
+ t.Logf("stats: %+v", fuzzer.Stats().Named)
+
  assert.Equal(t, len(tf.expectedCrashes), len(tf.crashes),
  "not all expected crashes were found")
 }

diff --git a/pkg/fuzzer/stats.go b/pkg/fuzzer/stats.go
@@ -42,5 +42,7 @@ func (fuzzer *Fuzzer) Stats() Stats {
  for k, v := range fuzzer.stats {
  ret.Named[k] = v
  }
+ ret.Named["exec gen, sig/sec*1000"] = uint64(fuzzer.genSignalSpeed.Load() * 1000)
+ ret.Named["exec fuzz, sig/sec*1000"] = uint64(fuzzer.fuzzSignalSpeed.Load() * 1000)
  return ret
 }
diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go
@@ -253,6 +253,12 @@ var rateLimit = time.NewTicker(1 * time.Second)
 // hanged: program hanged and was killed
 // err0: failed to start the process or bug in executor itself.
 func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInfo, hanged bool, err0 error) {
+ output, info, hanged, _, err0 = env.ExecWithElapsed(opts, p)
+ return
+}
+
+func (env *Env) ExecWithElapsed(opts *ExecOpts, p *prog.Prog) (output []byte,
+ info *ProgInfo, hanged bool, elapsed time.Duration, err0 error) {
  // Copy-in serialized program.
  progSize, err := p.SerializeForExec(env.in)
  if err != nil {
@@ -275,7 +281,9 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf
  return
  }
 
+ start := time.Now()
  output, hanged, err0 = env.cmd.exec(opts, progData)
+ elapsed = time.Since(start)
  if err0 != nil {
  env.cmd.close()
  env.cmd = nil

diff --git a/pkg/learning/mab.go b/pkg/learning/mab.go
@@ -0,0 +1,65 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package learning
+
+import (
+ "math/rand"
+)
+
+type Action[T comparable] struct {
+ Arm T
+ index int
+}
+
+type countedValue struct {
+ value float64
+ count int64
+}
+
+func (cv *countedValue) update(value, minStep float64) {
+ // Using larger steps at the beginning allows us to
+ // converge faster to the actual value.
+ // The minStep limit ensures that we can still track
+ // non-stationary problems.
+ cv.count++
+ step := 1.0 / float64(cv.count)
+ if step < minStep {
+ step = minStep
+ }
+ cv.value += (value - cv.value) * step
+}
+
+// PlainMAB is a very simple epsylon-greedy MAB implementation.
+// It's not thread-safe.
+type PlainMAB[T comparable] struct {
+ MinLearningRate float64
+ ExplorationRate float64
+ arms []T
+ weights []countedValue
+}
+
+func (p *PlainMAB[T]) AddArms(arms ...T) {
+ for _, arm := range arms {
+ p.arms = append(p.arms, arm)
+ p.weights = append(p.weights, countedValue{0, 0})
+ }
+}
+
+func (p *PlainMAB[T]) Action(r *rand.Rand) Action[T] {
+ var pos int
+ if r.Float64() < p.ExplorationRate {
+ pos = r.Intn(len(p.arms))
+ } else {
+ for i := 1; i < len(p.arms); i++ {
+ if p.weights[i].value > p.weights[pos].value {
+ pos = i
+ }
+ }
+ }
+ return Action[T]{Arm: p.arms[pos], index: pos}
+}
+
+func (p *PlainMAB[T]) SaveReward(action Action[T], reward float64) {
+ p.weights[action.index].update(reward, p.MinLearningRate)
+}
diff --git a/pkg/learning/mab_test.go b/pkg/learning/mab_test.go
@@ -0,0 +1,66 @@
+// Copyright 2024 syzkaller project authors. All rights reserved.
+// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
+
+package learning
+
+import (
+ "math/rand"
+ "testing"
+
+ "github.com/google/syzkaller/pkg/testutil"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMABSmallDiff(t *testing.T) {
+ r := rand.New(testutil.RandSource(t))
+ bandit := &PlainMAB[int]{
+ MinLearningRate: 0.0001,
+ ExplorationRate: 0.1,
+ }
+ arms := []float64{0.65, 0.7}
+ for i := range arms {
+ bandit.AddArms(i)
+ }
+ const steps = 40000
+ counts := runMAB(r, bandit, arms, steps)
+ t.Logf("counts: %v", counts)
+ assert.Greater(t, counts[1], steps/4*3)
+}
+
+func TestNonStationaryMAB(t *testing.T) {
+ r := rand.New(testutil.RandSource(t))
+ bandit := &PlainMAB[int]{
+ MinLearningRate: 0.02,
+ ExplorationRate: 0.04,
+ }
+
+ arms := []float64{0.2, 0.7, 0.5, 0.1}
+ for i := range arms {
+ bandit.AddArms(i)
+ }
+
+ const steps = 25000
+ counts := runMAB(r, bandit, arms, steps)
+ t.Logf("initially: %v", counts)
+
+ // Ensure that we've found the best arm.
+ assert.Greater(t, counts[1], steps/2)
+
+ // Now change the best arm's avg reward.
+ arms[3] = 0.9
+ counts = runMAB(r, bandit, arms, steps)
+ t.Logf("after reward change: %v", counts)
+ assert.Greater(t, counts[3], steps/2)
+}
+
+func runMAB(r *rand.Rand, bandit *PlainMAB[int], arms []float64, steps int) []int {
+ counts := make([]int, len(arms))
+ for i := 0; i < steps; i++ {
+ action := bandit.Action(r)
+ // TODO: use normal distribution?
+ reward := r.Float64() * arms[action.Arm]
+ counts[action.Arm]++
+ bandit.SaveReward(action, reward)
+ }
+ return counts
+}