forked from swarm64/s64da-benchmark-toolkit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_benchmark
executable file
·109 lines (84 loc) · 3.57 KB
/
prepare_benchmark
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
import argparse
import logging
import os
import sys
import importlib.util
from checks import check_program_exists
from dateutil import parser
from logging.config import fileConfig
from s64da_benchmark_toolkit.streams import Streams, Benchmark
fileConfig('logging.ini')
logger = logging.getLogger()
if __name__ == '__main__':
py_version_info = sys.version_info
if py_version_info < (3, 10):
logger.error('Your python version does not match the requirements.')
sys.exit()
check_program_exists('psql')
args_to_parse = argparse.ArgumentParser(
argument_default=argparse.SUPPRESS,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
args_to_parse.add_argument('--dsn', required=True, help=(
'The PostgreSQL DSN to connect to. Supply with DB, e.g. '
'postgresql://postgres@localhost/dbname'
))
benchmarks = []
benchmarks_dir = 'benchmarks'
try:
for directory in os.listdir('benchmarks'):
base_dir = os.path.join(benchmarks_dir, directory)
benchmarks.append(Benchmark(name=directory, base_dir=base_dir))
except FileNotFoundError:
pass
benchmark_names = sorted([benchmark.name for benchmark in benchmarks])
args_to_parse.add_argument('--benchmark', required=True, choices=benchmark_names, help=(
'Which benchmark to run.'
))
scale_factor_required = any(x in ''.join(sys.argv) for x in ('tpcds', 'tpch', 'ssb', 'htap'))
args_to_parse.add_argument('--scale-factor', type=int, default=None,
required=scale_factor_required, help=(
'Scale factor for specific benchmarks (e.g. TPC-DS/H, SSB).'
))
args_to_parse.add_argument('--chunks', type=int, default=10, help=(
'How many chunks to generate if a data generator is involved.'
))
args_to_parse.add_argument('--schema', required=True, help=(
'Which schema of the benchmark to use.'
))
args_to_parse.add_argument('--max-jobs', type=int, default=8, help=(
'Limit the overall parallelism to this amount of jobs.'
))
args_to_parse.add_argument('--check-diskspace-of-directory', default=None, help=(
'If flag is present, a disk space check on the passed storage directory will be '
'performed prior to ingestion.'
))
args_to_parse.add_argument('--data-dir', default=None, help=(
'The directory holding the data files to ingest from.'
))
args_to_parse.add_argument('--num-partitions', default=None, help=(
'The number of partitions for partitioned schemas.'
))
args_to_parse.add_argument(
'--start-date', default='1992-01-01', type=parser.isoparse, help=(
'The start date for TPC-C.'
))
args_to_parse.add_argument('--umbra', default=False, action='store_true',
required=False, help=('Run in Umbra compatibility mode')
)
args = args_to_parse.parse_args()
for benchmark in benchmarks:
if benchmark.name == args.benchmark:
benchmark_to_run = benchmark
break
if args.benchmark == 'ssb':
# Parameter --check-diskspace-of-directory not supported for ssb yet
args.check_diskspace_of_directory = ''
if args.benchmark == 'tpcds':
check_program_exists('recode')
spec = importlib.util.spec_from_file_location(
's64da_benchmark_toolkit.prepare',
os.path.join(benchmark_to_run.base_dir, 'prepare.py'))
prepare = importlib.util.module_from_spec(spec)
spec.loader.exec_module(prepare)
prepare.PrepareBenchmark(args, benchmark).run()