-
Notifications
You must be signed in to change notification settings - Fork 14
/
run_mmlu_evaluation.py
95 lines (78 loc) · 5.9 KB
/
run_mmlu_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
import click
import torch
import run_clm
from experiments.mmlu_utils import run_evaluation
CHECKPOINT_BASE_DIR_DICT = {
"output_c4_lora_20230909_ranks64_7b_nf3" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lora_20230909_ranks64_7b_nf4" : "/export/share/experiments/20230912/795acd350b74/",
"output_c4_lora_20230909_ranks64_7b_gptq-3bit" : "/export/share/experiments/20230916/5290946275a0/",
"output_c4_lora_20230909_ranks64_7b_gptq-4bit" : "/export/share/experiments/20230916/5290946275a0/",
"output_c4_lora_20230909_ranks64_7b_None_2.5" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lora_20230909_ranks64_7b_None_2.75" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lora_20230909_ranks64_7b_None_3" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lora_20230909_ranks64_7b_None_3.25" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lora_20230909_ranks64_7b_None_3.5" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lora_20230909_ranks64_7b_None_3.75" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lora_20230909_ranks64_7b_None_4" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_7b_None_2.5" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_7b_None_2.75" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_7b_None_3" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_7b_None_3.25" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_7b_None_3.5" : "/export/share/experiments/20230914/795acd350b74/",
"output_c4_lpq_20230909_ranks64_7b_None_3.75" : "/export/share/experiments/20230912/795acd350b74/",
"output_c4_lpq_20230909_ranks64_7b_None_4" : "/export/share/experiments/20230912/795acd350b74/",
"output_c4_lpq_20230909_ranks64_7b_c4_2.5" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_7b_c4_2.75" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_7b_c4_3" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_7b_c4_3.25" : "/export/share/experiments/20230912/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_7b_c4_3.5" : "/export/share/experiments/20230914/795acd350b74/",
"output_c4_lpq_20230909_ranks64_7b_c4_3.75" : "/export/share/experiments/20230912/795acd350b74/",
"output_c4_lpq_20230909_ranks64_7b_c4_4" : "/export/share/experiments/20230912/795acd350b74/",
# 70B
"output_c4_lora_20230909_ranks64_70b_nf3" : "/export/share/experiments/20230922/b7a0cbd20ed7/",
"output_c4_lora_20230909_ranks64_70b_nf4" : "/export/share3/experiments/20230927/ab23a4f35ea4/",
"output_c4_lora_20230909_ranks64_70b_None_2.5" : "/export/share3/experiments/20230927/24053ff618b3/",
"output_c4_lora_20230909_ranks64_70b_None_2.75" : "/export/share3/experiments/20230927/24053ff618b3/",
"output_c4_lora_20230909_ranks64_70b_None_3" : "/export/share3/experiments/20230925/bc9575f2d323/",
"output_c4_lora_20230909_ranks64_70b_None_3.25" : "/export/share3/experiments/20230922/21f9d4382283/",
"output_c4_lora_20230909_ranks64_70b_None_3.5" : "/export/share3/experiments/20230922/21f9d4382283/",
"output_c4_lora_20230909_ranks64_70b_None_3.75" : "/export/share3/experiments/20230922/21f9d4382283/",
"output_c4_lora_20230909_ranks64_70b_None_4" : "/export/share3/experiments/20230922/21f9d4382283/",
"output_c4_lpq_20230909_ranks64_70b_None_2.5" : "/export/share3/experiments/20230925/bc9575f2d323/",
"output_c4_lpq_20230909_ranks64_70b_None_2.75" : "/export/share3/experiments/20230925/bc9575f2d323/",
"output_c4_lpq_20230909_ranks64_70b_None_3" : "/export/share3/experiments/20230925/bc9575f2d323/",
"output_c4_lpq_20230909_ranks64_70b_None_3.25" : "/export/share3/experiments/20230925/bc9575f2d323/",
"output_c4_lpq_20230909_ranks64_70b_None_3.5" : "/export/share3/experiments/20230925/bc9575f2d323/",
"output_c4_lpq_20230909_ranks64_70b_None_3.75" : "/export/share3/experiments/20230925/bc9575f2d323/",
"output_c4_lpq_20230909_ranks64_70b_None_4" : "/export/share3/experiments/20230925/bc9575f2d323/",
"output_c4_lpq_20230909_ranks64_70b_c4_2.5" : "/export/share/experiments/20230922/b7a0cbd20ed7/",
"output_c4_lpq_20230909_ranks64_70b_c4_2.75" : "/export/share/experiments/20230922/b7a0cbd20ed7/",
"output_c4_lpq_20230909_ranks64_70b_c4_3" : "/export/share/experiments/20230922/b7a0cbd20ed7/",
"output_c4_lpq_20230909_ranks64_70b_c4_3.25" : "/export/share3/experiments/20230927/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_70b_c4_3.5" : "/export/share3/experiments/20230927/ab23a4f35ea4/",
"output_c4_lpq_20230909_ranks64_70b_c4_3.75" : "/export/share/experiments/20230922/b7a0cbd20ed7/",
"output_c4_lpq_20230909_ranks64_70b_c4_4" : "/export/share3/experiments/20230927/ab23a4f35ea4/",
}
if __name__ == "__main__":
# Setting up the model, tokenizer
trainer = run_clm.main(return_trainer=True)
# Load the model checkpoint
checkpoint_base_dir = os.getenv(
"CHECKPOINT_BASE_DIR",
default=None)
if checkpoint_base_dir is None:
checkpoint_base_dir = (
CHECKPOINT_BASE_DIR_DICT[
trainer.args.output_dir])
checkpoint_path = os.path.join(
checkpoint_base_dir,
trainer.args.output_dir,
"full_model.pth")
state_dict = torch.load(
checkpoint_path,
map_location=torch.device("cpu"))
trainer.model.load_state_dict(state_dict)
click.secho(f"Loaded model from {checkpoint_path}", fg="green")
# Run the evaluation
run_evaluation(trainer=trainer)