You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Using GPU in script?: Yes, NVIDIA GeForce GTX 1660 SUPER
Using distributed or parallel set-up in script?: No
Information
Model I am using (Bert, XLNet ...): openai/whisper-small
Language I am using the model on (English, Chinese ...): Hindi
Adapter setup I am using (if any): LoRAConfig(r=8, alpha=16)
To reproduce
Execute this example script:
fromdatasetsimportload_dataset, DatasetDictfromtransformersimportWhisperProcessor, Seq2SeqTrainingArgumentsfromdatasetsimportAudiofromadaptersimportWhisperAdapterModel, LoRAConfig, Seq2SeqAdapterTrainerimporttorchfromdataclassesimportdataclassfromtypingimportAny, Dict, List, Unionimportevaluatedefprepare_dataset(batch):
# load and resample audio data from 48 to 16kHzaudio=batch["audio"]
# compute log-Mel input features from input audio arraybatch["input_features"] = \
processor.feature_extractor(audio["array"], sampling_rate=audio["sampling_rate"]).input_features[0]
# encode target text to label idsbatch["labels"] =processor.tokenizer(batch["sentence"]).input_idsreturnbatch# Preprocessingprocessor=WhisperProcessor.from_pretrained("openai/whisper-small", language="Hindi", task="transcribe")
common_voice=DatasetDict()
common_voice["train"] =load_dataset("mozilla-foundation/common_voice_11_0", "hi", split="train",
trust_remote_code=True)
common_voice["test"] =load_dataset("mozilla-foundation/common_voice_11_0", "hi", split="test", trust_remote_code=True)
common_voice=common_voice.remove_columns(
["accent", "age", "client_id", "down_votes", "gender", "locale", "path", "segment", "up_votes"])
common_voice=common_voice.cast_column("audio", Audio(sampling_rate=16000))
common_voice=common_voice.map(prepare_dataset, remove_columns=common_voice.column_names[
"train"]) # no multiprocessing used here as this results in errors# Model setupmodel=WhisperAdapterModel.from_pretrained("openai/whisper-small")
model.generation_config.language="hindi"model.generation_config.task="transcribe"model.generation_config.forced_decoder_ids=Nonetask_name="whisper_hindi_lora"config=LoRAConfig(r=8, alpha=16)
model.add_adapter(task_name, config=config)
model.add_seq2seq_lm_head(task_name)
model.train_adapter(task_name)
@dataclassclassDataCollatorSpeechSeq2SeqWithPadding:
processor: Anydecoder_start_token_id: intdef__call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) ->Dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need different padding methods# first treat the audio inputs by simply returning torch tensorsinput_features= [{"input_features": feature["input_features"]} forfeatureinfeatures]
batch=self.processor.feature_extractor.pad(input_features, return_tensors="pt")
# get the tokenized label sequenceslabel_features= [{"input_ids": feature["labels"]} forfeatureinfeatures]
# pad the labels to max lengthlabels_batch=self.processor.tokenizer.pad(label_features, return_tensors="pt")
# replace padding with -100 to ignore loss correctlylabels=labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
# if bos token is appended in previous tokenization step,# cut bos token here as it's append later anywaysif (labels[:, 0] ==self.decoder_start_token_id).all().cpu().item():
labels=labels[:, 1:]
batch["labels"] =labelsreturnbatchdata_collator=DataCollatorSpeechSeq2SeqWithPadding(
processor=processor,
decoder_start_token_id=model.config.decoder_start_token_id,
)
metric=evaluate.load("wer")
defcompute_metrics(pred):
pred_ids=pred.predictionslabel_ids=pred.label_ids# replace -100 with the pad_token_idlabel_ids[label_ids==-100] =processor.tokenizer.pad_token_id# we do not want to group tokens when computing the metricspred_str=processor.tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
label_str=processor.tokenizer.batch_decode(label_ids, skip_special_tokens=True)
wer=100*metric.compute(predictions=pred_str, references=label_str)
return {"wer": wer}
training_args=Seq2SeqTrainingArguments(
output_dir="./"+task_name,
per_device_train_batch_size=2,
per_device_eval_batch_size=2,
learning_rate=1e-5,
fp16=True,
evaluation_strategy="steps",
predict_with_generate=True, # This is the parameter causing the errorgeneration_max_length=225,
eval_steps=2,
metric_for_best_model="wer",
push_to_hub=False,
overwrite_output_dir=True,
)
trainer=Seq2SeqAdapterTrainer(
args=training_args,
model=model,
train_dataset=common_voice["train"],
eval_dataset=common_voice["test"],
data_collator=data_collator,
compute_metrics=compute_metrics,
tokenizer=processor.feature_extractor,
)
trainer.train()
Resulting error:
File transformers\trainer.py", line 1932, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File transformers\trainer.py", line 2345, in _inner_training_loop
self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
File transformers\trainer.py", line 2793, in _maybe_log_save_evaluate
metrics = self._evaluate(trial, ignore_keys_for_eval)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File transformers\trainer.py", line 2750, in _evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File transformers\trainer_seq2seq.py", line 180, in evaluate
return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File transformers\trainer.py", line 3641, in evaluate
output = eval_loop(
^^^^^^^^^^
File transformers\trainer.py", line 3826, in evaluation_loop
losses, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File transformers\trainer_seq2seq.py", line 310, in prediction_step
generated_tokens = self.model.generate(**generation_inputs, **gen_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File transformers\generation\utils.py", line 1640, in generate
self._validate_model_kwargs(model_kwargs.copy())
File transformers\generation\utils.py", line 1238, in _validate_model_kwargs
raise ValueError(
ValueError: The following `model_kwargs` are not used by the model: ['labels'] (note: typos in the generate arguments will also show up in this list)
Explanation: In generate(), the method _validate_model_kwargs() checks for any unused kwargs. Because we do not specify labels as parameter in the forward method of the AdapterModels and only give it in kwargs this line
Environment info
adapters
version: 1.0.0.dev0 (latest main)Information
LoRAConfig(r=8, alpha=16)
To reproduce
Execute this example script:
Resulting error:
Explanation: In
generate()
, the method_validate_model_kwargs()
checks for any unused kwargs. Because we do not specifylabels
as parameter in the forward method of theAdapterModels
and only give it in kwargs this linein
_validate_model_kwargs()
will identifylabels
as unused, resulting in the thrown ValueError.Expected behavior
Execution of the evaluation loop with use of
generate()
method without the error.Current workaround: set
predict_with_generate=False
.The text was updated successfully, but these errors were encountered: