-
-
Notifications
You must be signed in to change notification settings - Fork 134
/
upload_results.py
211 lines (175 loc) · 6.84 KB
/
upload_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""Script to upload results from the benchmark to OpenML.
The benchmark run needs to be executed on OpenML datasets to be eligible for upload.
"""
import argparse
import contextlib
from contextlib import contextmanager
from datetime import datetime
import logging
import os
import pathlib
from typing import Optional
import openml
from openml import OpenMLRun
from amlb.defaults import default_dirs
from amlb.resources import config_load
from amlb.uploads import process_task_folder, missing_folds, _load_task_data
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
def parse_args():
description = "Script to upload results from the benchmark to OpenML."
parser = argparse.ArgumentParser(description)
parser.add_argument(
"-i",
"--input-directory",
type=pathlib.Path,
default=None,
dest="input_directory",
help="Directory that stores results from the runbenchmark.py invocation. "
"By default use the most recent folder in the results "
"folder as specified in the configuration.",
)
parser.add_argument(
"-a",
"--api-key",
dest="apikey",
default=None,
help="By default, the api key configured in your OpenML configuration file is"
"used. Specify this parameter if you want to overwrite this value or"
"you do not have an OpenML configuration file. A valid key is "
"*required* to upload to the OpenML server."
"This argument is ignored when uploading to the test server, as "
"the default openml-python test server api key will be used instead.",
)
parser.add_argument(
"-m",
"--mode",
dest="mode",
default="check",
help="Run mode (default=%(default)s)."
"• check: only report whether results can be uploaded."
"• upload: upload all complete results.",
)
parser.add_argument(
"-x",
"--fail-fast",
dest="fail_fast",
action="store_true",
help="Stop as soon as a task fails to upload due to an error during uploading.",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
dest="verbose",
help="Output progress to console.",
)
parser.add_argument(
"-t",
"--task",
type=str,
dest="task",
default=None,
help="Only upload results for this specific task.",
)
args = parser.parse_args()
if args.mode not in ["check", "upload"]:
raise ValueError(f"Invalid value for argument 'mode': '{args.mode}'.")
return args
def find_most_recent_result_folder() -> pathlib.Path:
root_dir = pathlib.Path(__file__).parent
config = config_load(root_dir / "resources" / "config.yaml")
output_dir = pathlib.Path(config.output_dir or default_dirs.output_dir)
def dirname_to_datetime(dirname: str) -> datetime:
_, timestamp = dirname.rsplit(".", 1)
return datetime.strptime(timestamp, "%Y%m%dT%H%M%S")
run_directories = output_dir.glob("*.*.*.*")
_, run_directory = max((dirname_to_datetime(str(d)), d) for d in run_directories)
return run_directory
def resolve_input_directory(path: Optional[pathlib.Path]) -> pathlib.Path:
path = path or find_most_recent_result_folder()
path = path.expanduser().absolute()
if not path.is_dir():
raise ValueError(f"{path} is not a directory.")
return path
def configure_logging(verbose: bool):
formatter = logging.Formatter(
"[%(asctime)s] %(levelname)s: %(message)s",
"%H:%M:%S",
)
log_level = logging.DEBUG if verbose else logging.INFO
console = logging.StreamHandler()
console.setFormatter(formatter)
console.setLevel(log_level)
log.addHandler(console)
@contextmanager
def _connect_to_test_server():
openml.config.start_using_configuration_for_example()
yield
openml.config.stop_using_configuration_for_example()
def server_for_task(task: pathlib.Path):
metadata = _load_task_data(task)
if metadata.test_server:
server_connection = _connect_to_test_server()
else:
server_connection = contextlib.nullcontext()
return server_connection
def upload_task(task_directory: pathlib.Path) -> Optional[OpenMLRun]:
task_name = os.path.basename(task_directory)
try:
with server_for_task(task_directory):
log.debug("Starting upload for '%s'." % task_name)
run = process_task_folder(task_directory)
log.info(
"%s result stored at %s/r/%d"
% (task_name, openml.config.server[:-11], run.id)
)
return run
except Exception as e:
message = e.message if hasattr(e, "message") else e.args[0]
log.warning("Task %s failed to upload: %s" % (task_name, message))
if args.fail_fast:
raise
def process_results(result_dir: pathlib.Path, mode: str = "check"):
prediction_directory = result_dir / "predictions"
if not prediction_directory.exists():
raise ValueError(f"result_dir '{result_dir!s}' has no predictions.")
if args.task is None:
tasks_to_process = [d for d in prediction_directory.iterdir() if d.is_dir()]
elif (prediction_directory / args.task).is_dir():
tasks_to_process = [args.task]
else:
raise ValueError(f"Task '{args.task}' not in '{prediction_directory}'.")
for task_name in tasks_to_process:
full_task_directory = prediction_directory / task_name
folds = missing_folds(full_task_directory)
if len(folds) > 0:
log.info("%s has missing folds: %s" % (task_name, ", ".join(sorted(folds))))
continue
metadata = _load_task_data(full_task_directory)
if "openml_task_id" not in metadata:
log.info("%s has openml task metadata" % task_name)
continue
if mode == "check":
log.info("%s is ready for upload." % task_name)
elif mode == "upload":
upload_task(full_task_directory)
def configure_apikey(key: Optional[str]) -> bool:
if key:
openml.config.apikey = key
# API does not support easy checking of validity
is_valid = True
return openml.config.apikey is not None and is_valid
if __name__ == "__main__":
args = parse_args()
configure_logging(args.verbose)
valid_key = configure_apikey(args.apikey)
if not valid_key and args.mode == "upload":
raise ValueError(
"No valid OpenML API key configured, use the '--api-key' argument "
"or follow instructions: https://openml.github.io/openml-python/master/usage.html#configuration"
)
input_directory = resolve_input_directory(args.input_directory)
mode_verb = "Uploading" if args.mode == "upload" else "Checking"
log.info("%s results from '%s'." % (mode_verb, input_directory))
process_results(input_directory, args.mode)