alan-turing-institute · philswatton · May 28, 2024 · Apr 18, 2024 · Apr 18, 2024 · Apr 18, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # Mac OS
 .DS_Store
+wandb/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -130,3 +131,10 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# Vscode
+.vscode/
+
+# Project related
+configs/experiment/*/
+train_scripts/
diff --git a/configs/data/example_tofu_1.yaml b/configs/data/example_tofu_1.yaml
@@ -0,0 +1,7 @@
+dataset_name: tofu
+data_kwargs:
+  granularity: question
+  stratified: true
+  forget_random: true
+  forgotten_author_fraction: 0.2
+  forgotten_fact_fraction: 0.2
diff --git a/configs/data/example_tofu_2.yaml b/configs/data/example_tofu_2.yaml
@@ -0,0 +1,7 @@
+dataset_name: tofu
+data_kwargs:
+  granularity: question
+  stratified: true
+  forget_random: true
+  forgotten_author_fraction: 0.1
+  forgotten_fact_fraction: 0.4
diff --git a/configs/data/example_tofu_full.yaml b/configs/data/example_tofu_full.yaml
@@ -0,0 +1,9 @@
+# The only fields that matter in this one are the fractions - this means the full
+# TOFU dataset will be returned as retain and nothing will be returned as forget
+dataset_name: tofu
+data_kwargs:
+  granularity: question
+  stratified: false
+  forget_random: false
+  forgotten_author_fraction: 0.0
+  forgotten_fact_fraction: 0.0
diff --git a/configs/experiment/example_top_experiment_config.yaml b/configs/experiment/example_top_experiment_config.yaml
@@ -0,0 +1,33 @@
+# Combinations to build runs over
+combinations:
+  data_config:
+    - example_tofu_1
+    - example_tofu_2
+  model_config:
+    - [gpt2, shorter]
+    - [gpt2, longer]
+  seed:
+    - 42
+    - 43
+    - 44
+# TODO: add other configs that define a group of experiments end to end
+
+# Full data config: which dataset to use to build full model to do forgetting on
+full_data_config: example_tofu_full
+
+# Baskerville kwargs
+use_bask: true
+bask:
+  walltime: '0-5:0:0'
+  gpu_number: 1
+  node_number: 1
+  cpu_per_gpu: 36
+
+# Wandb kwargs
+wandb_kwargs:
+  use_wandb: true
+  wandb_config:
+    entity: turing-arc
+    project: selective-forgetting
+    log_model: "false"
+    group: debug-runs
diff --git a/configs/experiment/gpt2_longer_experiment_full.yaml b/configs/experiment/gpt2_longer_experiment_full.yaml
@@ -0,0 +1,19 @@
+# Type: retain or full model
+train_type: full
+
+# All of thse should be contained under configs/type/
+data_config: example_tofu_1
+model_config: gpt2
+hyperparameter_config: longer
+# TODO: add other configs that define a single experiment end to end
+
+# Seed for random components
+seed: 42
+
+# Wandb kwargs
+use_wandb: true
+wandb_config:
+  entity: turing-arc
+  project: selective-forgetting
+  log_model: "false"
+  group: debug-runs
diff --git a/configs/experiment/gpt2_longer_experiment_retain.yaml b/configs/experiment/gpt2_longer_experiment_retain.yaml
@@ -0,0 +1,22 @@
+# Type: retain or full model
+train_type: retain
+
+# Path to full model to compare this model against.
+full_model_name: gpt2_longer_experiment_full
+
+# All of thse should be contained under configs/type/
+data_config: example_tofu_1
+model_config: gpt2
+hyperparameter_config: longer
+# TODO: add other configs that define a single experiment end to end
+
+# Seed for random components
+seed: 42
+
+# wandb kwargs
+use_wandb: true
+wandb_config:
+  entity: turing-arc
+  project: selective-forgetting
+  log_model: "false"
+  group: debug-runs
diff --git a/configs/model/gpt2/gpt2.yaml b/configs/model/gpt2/gpt2.yaml
@@ -0,0 +1,4 @@
+model_id: gpt2 # The name as found on HuggingFace to load the model with
+model_kwargs: # passed to AutoModelForCausalLM.from_pretrained
+  device_map: auto
+add_padding_token: True
diff --git a/configs/model/gpt2/hyperparameters/longer.yaml b/configs/model/gpt2/hyperparameters/longer.yaml
@@ -0,0 +1,27 @@
+trainer_kwargs: # passed to TrainingArguments
+  # Batch size
+  per_device_train_batch_size: 1
+  per_device_eval_batch_size: 1
+  gradient_accumulation_steps: 1
+
+  # Core hyperparameters
+  learning_rate: 5.e-5
+  num_train_epochs: 50
+
+  # Evaluation
+  evaluation_strategy: epoch
+
+  # Logging
+  logging_strategy: epoch
+
+  # Early stopping
+  load_best_model_at_end: true
+  metric_for_best_model: eval_loss
+  save_strategy: epoch
+  save_total_limit: 1
+
+  # Outputs
+  output_dir: output
+# Early stopping kwargs (only needed if save strategy is not null)
+early_stopping_kwargs:
+  early_stopping_patience: 2
diff --git a/configs/model/gpt2/hyperparameters/shorter.yaml b/configs/model/gpt2/hyperparameters/shorter.yaml
@@ -0,0 +1,28 @@
+trainer_kwargs: # passed to TrainingArguments
+  # Batch size
+  per_device_train_batch_size: 1
+  per_device_eval_batch_size: 1
+  gradient_accumulation_steps: 1
+
+  # Core hyperparameters
+  learning_rate: 5.e-5
+  num_train_epochs: 10
+
+  # Evaluation
+  evaluation_strategy: steps
+
+  # Logging
+  logging_strategy: steps
+  logging_steps: 0.5 # can be float or int
+
+  # Early stopping
+  load_best_model_at_end: true
+  metric_for_best_model: eval_loss
+  save_strategy: steps
+  save_total_limit: 1
+
+  # Outputs
+  output_dir: output
+# Early stopping kwargs (only needed if save strategy is not null)
+early_stopping_kwargs:
+  early_stopping_patience: 2