Skip to content

Commit

Permalink
Improve docs related to dynamic T5 sequence length selection.
Browse files Browse the repository at this point in the history
  • Loading branch information
RyanJDick committed Nov 29, 2024
1 parent 4581a37 commit 8d04ec3
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
4 changes: 3 additions & 1 deletion invokeai/app/invocations/flux_text_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ def _t5_encode(self, context: InvocationContext) -> torch.Tensor:

valid_seq_lens = [self.t5_max_seq_len]
if self.use_short_t5_seq_len:
valid_seq_lens = [128, 256, 512]
# We allow a minimum sequence length of 128. Going too short results in more significant image chagnes.
valid_seq_lens = list(range(128, self.t5_max_seq_len, 128))
valid_seq_lens.append(self.t5_max_seq_len)

with (
t5_text_encoder_info as t5_text_encoder,
Expand Down
12 changes: 11 additions & 1 deletion invokeai/backend/flux/modules/conditioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,17 @@ def __init__(self, encoder: PreTrainedModel, tokenizer: PreTrainedTokenizer, is_
self.hf_module = self.hf_module.eval().requires_grad_(False)

def forward(self, text: list[str], valid_seq_lens: list[int]) -> Tensor:
"""Encode text into a tensor.
Args:
text: A list of text prompts to encode.
valid_seq_lens: A list of valid sequence lengths. The shortest valid sequence length that can contain the
text will be used. If the largest valid sequence length cannot contain the text, the encoding will be
truncated.
"""
valid_seq_lens = sorted(valid_seq_lens)

# Perform initial encoding with the maximum valid sequence length.
batch_encoding = self.tokenizer(
text,
truncation=True,
Expand All @@ -26,8 +36,8 @@ def forward(self, text: list[str], valid_seq_lens: list[int]) -> Tensor:
return_tensors="pt",
)

seq_len: int = batch_encoding["length"][0].item()
# Find selected_seq_len, the minimum valid sequence length that can contain all of the input tokens.
seq_len: int = batch_encoding["length"][0].item()
selected_seq_len = valid_seq_lens[-1]
for len in valid_seq_lens:
if len >= seq_len:
Expand Down

0 comments on commit 8d04ec3

Please sign in to comment.