Skip to content

Commit

Permalink
Add weights butler script. (#3941)
Browse files Browse the repository at this point in the history
Simple script to allow querying fuzzer/job weights and computing some
simple stats on them.

To do in followup PRs:
 
- support editing weights once
#3937 is fixed
- support `FuzzTargetJob` weights
  • Loading branch information
letitz committed Apr 16, 2024
1 parent f65983a commit d3854a4
Show file tree
Hide file tree
Showing 2 changed files with 296 additions and 0 deletions.
36 changes: 36 additions & 0 deletions butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,42 @@ def main():
subparsers.add_parser(
'integration_tests', help='Run end-to-end integration tests.')

parser_weights = subparsers.add_parser(
'weights', help='Interact with fuzzer/job weights.')
parser_weights.add_argument(
'-c', '--config-dir', required=True, help='Path to application config.')

weights_subparsers = parser_weights.add_subparsers(dest='weights_command')
weights_subparsers.add_parser('platforms', help='List platforms.')

weights_dump_parser = weights_subparsers.add_parser(
'dump', help='Dump database entries.')
weights_dump_parser.add_argument(
'type',
help='The type of entries to dump from the database.',
choices=['fuzzer_job', 'fuzzer_jobs'])

weights_list_parser = weights_subparsers.add_parser(
'list', help='List FuzzerJob entries.')
weights_list_parser.add_argument(
'-p',
'--platforms',
help='Which platforms to list entries for.',
nargs='+')
weights_list_parser.add_argument(
'-f', '--fuzzers', help='Which fuzzers to list entries for.', nargs='+')
weights_list_parser.add_argument(
'-j', '--jobs', help='Which jobs to list entries for.', nargs='+')

weights_aggregate_parser = weights_subparsers.add_parser(
'aggregate', help='Aggregate matching FuzzerJob entries.')
weights_aggregate_parser.add_argument(
'-p', '--platform', help='Which platform to query.', required=True)
weights_aggregate_parser.add_argument(
'-f', '--fuzzers', help='Which fuzzers to aggregate.', nargs='+')
weights_aggregate_parser.add_argument(
'-j', '--jobs', help='Which jobs to aggregate.', nargs='+')

args = parser.parse_args()
if not args.command:
parser.print_help()
Expand Down
260 changes: 260 additions & 0 deletions src/local/butler/weights.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to interact with fuzzer weights in the database.
Usage:
python butler.py weights --help
"""

import csv
import enum
import os
import statistics
import sys
from typing import Dict
from typing import List
from typing import Optional
from typing import Sequence
from typing import Union

from src.clusterfuzz._internal.config import local_config
from src.clusterfuzz._internal.datastore import data_types
from src.clusterfuzz._internal.datastore import ndb_init


class EntryType(enum.Enum):
FUZZER_JOB = 'fuzzer_job'
FUZZER_JOBS = 'fuzzer_jobs'


def _iter_weights(
fuzzer_jobs: Sequence[data_types.FuzzerJob]) -> Sequence[float]:
for fj in fuzzer_jobs:
yield fj.actual_weight


def _sum_weights(fuzzer_jobs: Sequence[data_types.FuzzerJob]) -> float:
return sum(_iter_weights(fuzzer_jobs))


def _display_prob(probability: float) -> str:
return f'{probability:0.04f} = {probability * 100:0.02f}%'


def list_platforms() -> None:
# Query only distinct platform values from the database.
fuzzer_jobs = data_types.FuzzerJob.query(
projection=[data_types.FuzzerJob.platform], distinct=True)
for fuzzer_job in fuzzer_jobs:
print(fuzzer_job.platform)


def _query_fuzzer_jobs_batches(platforms: Optional[Sequence[str]] = None
) -> Sequence[data_types.FuzzerJobs]:
query = data_types.FuzzerJobs.query()

if platforms:
query = query.filter(
data_types.FuzzerJobs.platform.IN([p.upper() for p in platforms]))

return query


def _query_fuzzer_jobs(
platforms: Optional[Sequence[str]] = None,
fuzzers: Optional[Sequence[str]] = None,
jobs: Optional[Sequence[str]] = None,
) -> Sequence[data_types.FuzzerJob]:
"""Queries Datastore for matching FuzzerJob entries."""
query = data_types.FuzzerJob.query()

if platforms:
query = query.filter(
data_types.FuzzerJob.platform.IN([p.upper() for p in platforms]))
if fuzzers:
query = query.filter(data_types.FuzzerJob.fuzzer.IN(fuzzers))
if jobs:
query = query.filter(data_types.FuzzerJob.job.IN(jobs))

return query


def _list_fuzzer_jobs(fuzzer_jobs: Sequence[data_types.FuzzerJob]) -> None:
"""Lists the given FuzzerJob entries on stdout."""
fuzzer_jobs = list(fuzzer_jobs)
fuzzer_jobs.sort(key=lambda fj: fj.actual_weight, reverse=True)

total_weight = _sum_weights(fuzzer_jobs)

for fuzzer_job in fuzzer_jobs:
probability = fuzzer_job.actual_weight / total_weight

print("FuzzerJob:")
print(f' Fuzzer: {fuzzer_job.fuzzer}')
print(f' Job: {fuzzer_job.job}')
print(f' Platform: {fuzzer_job.platform}')
print(f' Weight: {fuzzer_job.actual_weight} = ' +
f'{fuzzer_job.weight} * {fuzzer_job.multiplier}')
print(f' Probability: {_display_prob(probability)}')

print(f'Count: {len(fuzzer_jobs)}')
print(f'Total weight (for this query): {total_weight}')


_FUZZER_JOB_FIELDS = [
'fuzzer',
'job',
'platform',
'weight',
'multiplier',
'actual_weight',
]


def _fuzzer_job_to_dict(
fuzzer_job: data_types.FuzzerJob) -> Dict[str, Union[str, float]]:
"""Converts the given FuzzerJob to a dictionary of CSV column values."""
return {
'fuzzer': fuzzer_job.fuzzer,
'job': fuzzer_job.job,
'platform': fuzzer_job.platform,
'weight': fuzzer_job.weight,
'multiplier': fuzzer_job.multiplier,
'actual_weight': fuzzer_job.actual_weight,
}


def _dump_fuzzer_jobs() -> None:
"""Dumps FuzzerJob entries from the database to stdout in CSV format."""
fuzzer_jobs = _query_fuzzer_jobs()

writer = csv.DictWriter(sys.stdout, fieldnames=_FUZZER_JOB_FIELDS)
writer.writeheader()

for fuzzer_job in fuzzer_jobs:
writer.writerow(_fuzzer_job_to_dict(fuzzer_job))


def _dump_fuzzer_jobs_batches() -> None:
"""Dumps FuzzerJobs entries from the database to stdout in CSV format."""
batches = _query_fuzzer_jobs_batches()

writer = csv.DictWriter(sys.stdout, fieldnames=['batch'] + _FUZZER_JOB_FIELDS)
writer.writeheader()

for batch in batches:
for fuzzer_job in batch.fuzzer_jobs:
fields = _fuzzer_job_to_dict(fuzzer_job)
fields['batch'] = batch.key.id()
writer.writerow(fields)


def _dump_entries(entry_type: EntryType) -> None:
"""Dumps entries of the given type from the database to stdout."""
if entry_type == EntryType.FUZZER_JOB:
_dump_fuzzer_jobs()
elif entry_type == EntryType.FUZZER_JOBS:
_dump_fuzzer_jobs_batches()


def _fuzzer_job_matches(
fuzzer_job: data_types.FuzzerJob,
fuzzers: Optional[Sequence[str]],
jobs: Optional[Sequence[str]],
) -> bool:
"""Returns whether the given FuzzerJob matches the given optional filters."""
if fuzzers and fuzzer_job.fuzzer not in fuzzers:
return False

if jobs and fuzzer_job.job not in jobs:
return False

return True


def _print_stats(fuzzer_jobs: List[data_types.FuzzerJob],
total_weight: float) -> None:
"""Helper for `_aggregate_fuzzer_jobs()`."""
weight = _sum_weights(fuzzer_jobs)
probability = weight / total_weight

print(f' Count: {len(fuzzer_jobs)}')
print(f' Total weight: {weight}')
print(f' Total probability: {_display_prob(probability)}')

# New in Python 3.8. We appease the linter by disabling `no-member` below.
if len(fuzzer_jobs) < 2 or not hasattr(statistics, 'quantiles'):
return

# `quantiles()` returns n-1 cut points between n quantiles.
# `weight_deciles[0]` separates the first from the second decile, i.e. it is
# the 10% percentile value. `weight_deciles[i]` is the (i+1)*10-th.
weight_deciles = statistics.quantiles(_iter_weights(fuzzer_jobs), n=10) # pylint: disable=no-member
weight_median = weight_deciles[4]
weight_90p = weight_deciles[8]

prob_median = weight_median / total_weight
prob_90p = weight_90p / total_weight

print(f' Median weight: {weight_median}')
print(f' Median probability: {_display_prob(prob_median)}')
print(f' 90th percentile weight: {weight_90p}')
print(f' 90th percentile probability: {_display_prob(prob_90p)}')


def _aggregate_fuzzer_jobs(
platform: str,
fuzzers: Optional[Sequence[str]] = None,
jobs: Optional[Sequence[str]] = None,
) -> None:
"""Aggregates statistics for matching and non-matching FuzzerJob entries."""
fuzzer_jobs = list(_query_fuzzer_jobs(platforms=[platform.upper()]))
total_weight = _sum_weights(fuzzer_jobs)

matches = []
others = []
for fuzzer_job in fuzzer_jobs:
if _fuzzer_job_matches(fuzzer_job, fuzzers, jobs):
matches.append(fuzzer_job)
else:
others.append(fuzzer_job)

print('Matching FuzzerJob entries:')
_print_stats(matches, total_weight)
print('Other FuzzerJob entries:')
_print_stats(others, total_weight)


def execute(args) -> None:
"""Entrypoint from butler.py."""
os.environ['CONFIG_DIR_OVERRIDE'] = args.config_dir
local_config.ProjectConfig().set_environment()

with ndb_init.context():
cmd = args.weights_command
if cmd == 'platforms':
list_platforms()
elif cmd == 'dump':
_dump_entries(EntryType(args.type))
elif cmd == 'list':
_list_fuzzer_jobs(
_query_fuzzer_jobs(
platforms=args.platforms, fuzzers=args.fuzzers, jobs=args.jobs))
elif cmd == 'aggregate':
_aggregate_fuzzer_jobs(
args.platform, fuzzers=args.fuzzers, jobs=args.jobs)
else:
raise TypeError(f'weights command {repr(cmd)} unrecognized')

0 comments on commit d3854a4

Please sign in to comment.