2 each lstm layer bptt question! #15643

YooSungHyun · 2022-11-11T12:28:02Z

YooSungHyun
Nov 11, 2022

I am making RNN Transducer

RNN Transducer is two each lstm parts

transcripts encoder (bi-lstm)
prediction decoder (lstm)

and 2 lstm based parts output concat
3. joint network

looks like this

so, i confused use bptt on training_step.

i used self.truncated_bptt_steps = 2 and
model's forward like this

def forward(self, inputs, targets, inputs_lengths, targets_lengths, hiddens):
    enc_hiddens = hiddens[0]
    dec_hiddens = hiddens[1]
    # Use for inference only (separate from training_step)
    zero = torch.zeros((targets.shape[0], 1)).long()
    targets_add_blank = torch.cat((zero, targets), dim=1)
    enc_state, enc_hidden_states = self.transnet(inputs, inputs_lengths, enc_hiddens)
    dec_state, dec_hidden_states = self.prednet(targets_add_blank, targets_lengths + 1, dec_hiddens)
    logits = self.jointnet(enc_state, dec_state)
    return logits, (enc_hidden_states, dec_hidden_states)

and each transnet and prednet input like lstm(inputs, prev_hidden_states)

and training_step looks like this

def training_step(self, batch, batch_idx, optimizer_idx, hiddens):
        """
        If the following conditions are satisfied:
        1) cudnn is enabled,
        2) input data is on the GPU
        3) input data has dtype torch.float16
        4) V100 GPU is used,
        5) input data is not in PackedSequence format persistent algorithm
        can be selected to improve performance.
        """
        input_values, labels, seq_lengths, target_lengths = batch
        inputs_lengths = torch.IntTensor(seq_lengths)
        targets_lengths = torch.IntTensor(target_lengths)

        logits, enc_dec_hiddens = self(input_values, labels, inputs_lengths, targets_lengths, hiddens)
        # the training step must be updated to accept a ``hiddens`` argument
        # hiddens are the hiddens from the previous truncated backprop step
        return {"loss": ..., "hiddens": enc_dec_hiddens}

this code is look well?

YooSungHyun · 2022-11-14T16:24:39Z

YooSungHyun
Nov 14, 2022
Author

I think that is right
i tested 2 code
1.
time sequence 2000
manual_seed 42
not bptt

import math

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

from pytorch_lightning import LightningModule, Trainer


class LSTMModel(LightningModule):
    """LSTM sequence-to-sequence model for testing TBPTT with automatic optimization."""

    def __init__(self, truncated_bptt_steps=2, input_size=1, hidden_size=8):
        super().__init__()
        torch.manual_seed(42)
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.lstm2 = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.linear = torch.nn.Linear(hidden_size * 2, 1)
        self.automatic_optimization = True

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=0.01)

    def training_step(self, batch, batch_idx):
        x, y = batch
        # if hiddens is not None:
        #     hiddens1, hiddens2 = hiddens
        # else:
        #     hiddens1 = None
        #     hiddens2 = None
        pred1, _ = self.lstm(x)
        pred2, _ = self.lstm2(x)
        logits = torch.concat([pred1, pred2], dim=-1)
        linear = self.linear(logits)
        loss = F.mse_loss(linear, y)
        return {"loss": loss}

    def train_dataloader(self):
        dataset = TensorDataset(torch.rand(50, 2000, self.input_size), torch.rand(50, 2000, self.input_size))
        return DataLoader(dataset=dataset, batch_size=4)


model = LSTMModel(truncated_bptt_steps=100)
trainer = Trainer(
    default_root_dir="./",
    max_epochs=2,
    log_every_n_steps=2,
    enable_model_summary=False,
    enable_checkpointing=False,
)
trainer.fit(model)

time sequence 2000
manual_seed 42
100 bptt

import math

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

from pytorch_lightning import LightningModule, Trainer


class LSTMModel(LightningModule):
    """LSTM sequence-to-sequence model for testing TBPTT with automatic optimization."""

    def __init__(self, truncated_bptt_steps=2, input_size=1, hidden_size=8):
        super().__init__()
        torch.manual_seed(42)
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.lstm2 = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.linear = torch.nn.Linear(hidden_size * 2, 1)
        self.truncated_bptt_steps = truncated_bptt_steps
        self.automatic_optimization = True

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=0.01)

    def training_step(self, batch, batch_idx, hiddens):
        x, y = batch
        if hiddens is not None:
            hiddens1, hiddens2 = hiddens
        else:
            hiddens1 = None
            hiddens2 = None
        pred1, hiddens1 = self.lstm(x, hiddens1)
        pred2, hiddens2 = self.lstm2(x, hiddens2)
        logits = torch.concat([pred1, pred2], dim=-1)
        linear = self.linear(logits)
        loss = F.mse_loss(linear, y)
        return {"loss": loss, "hiddens": (hiddens1, hiddens2)}

    def train_dataloader(self):
        dataset = TensorDataset(torch.rand(50, 2000, self.input_size), torch.rand(50, 2000, self.input_size))
        return DataLoader(dataset=dataset, batch_size=4)


model = LSTMModel(truncated_bptt_steps=100)
trainer = Trainer(
    default_root_dir="./",
    max_epochs=2,
    log_every_n_steps=2,
    enable_model_summary=False,
    enable_checkpointing=False,
)
trainer.fit(model)

and result (look loss)

loss=0.215
loss=0.0832

and, i am read down optimize_step, i think hiddens only used connected past gradient, and type hint is any, and any used calculate. it just hiddens check -> cpu detach -> input kwargs -> go to next training_step

am i right?

1 reply

YooSungHyun Nov 14, 2022
Author

time sequence 2000
manual_seed 42
100 bptt but every hidden is None

import math

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

from pytorch_lightning import LightningModule, Trainer


class LSTMModel(LightningModule):
    """LSTM sequence-to-sequence model for testing TBPTT with automatic optimization."""

    def __init__(self, truncated_bptt_steps=2, input_size=1, hidden_size=8):
        super().__init__()
        torch.manual_seed(42)
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.lstm2 = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.linear = torch.nn.Linear(hidden_size * 2, 1)
        self.truncated_bptt_steps = truncated_bptt_steps
        self.automatic_optimization = True

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=0.01)

    def training_step(self, batch, batch_idx, hiddens):
        x, y = batch
        pred1, hiddens1 = self.lstm(x)
        pred2, hiddens2 = self.lstm2(x)
        logits = torch.concat([pred1, pred2], dim=-1)
        linear = self.linear(logits)
        loss = F.mse_loss(linear, y)
        return {"loss": loss, "hiddens": (hiddens1, hiddens2)}

    def train_dataloader(self):
        dataset = TensorDataset(torch.rand(50, 2000, self.input_size), torch.rand(50, 2000, self.input_size))
        return DataLoader(dataset=dataset, batch_size=4)


model = LSTMModel(truncated_bptt_steps=100)
trainer = Trainer(
    default_root_dir="./",
    max_epochs=2,
    log_every_n_steps=2,
    enable_model_summary=False,
    enable_checkpointing=False,
)
trainer.fit(model)

loss=0.0832

i think this case is wrong, but good loss, why?

YooSungHyun · 2022-11-14T17:05:35Z

YooSungHyun
Nov 14, 2022
Author

just one of kinds lstm bptt loss is same to 2 lstm hiddens return

import math

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

from pytorch_lightning import LightningModule, Trainer


class LSTMModel(LightningModule):
    """LSTM sequence-to-sequence model for testing TBPTT with automatic optimization."""

    def __init__(self, truncated_bptt_steps=2, input_size=1, hidden_size=8):
        super().__init__()
        torch.manual_seed(42)
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.lstm2 = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.linear = torch.nn.Linear(hidden_size * 2, 50)
        self.truncated_bptt_steps = truncated_bptt_steps
        self.automatic_optimization = True

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=0.01)

    def training_step(self, batch, batch_idx, hiddens):
        x, y = batch
        if hiddens is not None:
            hiddens1 = hiddens
        else:
            hiddens1 = None
            hiddens2 = None
        pred1, hiddens1 = self.lstm(x, hiddens1)
        pred2, hiddens2 = self.lstm2(x)
        logits = torch.concat([pred1, pred2], dim=-1)
        linear = self.linear(logits)
        loss = F.mse_loss(linear, y)
        return {"loss": loss, "hiddens": hiddens1}

    def train_dataloader(self):
        dataset = TensorDataset(torch.rand(50, 2000, self.input_size), torch.rand(50, 2000, self.input_size))
        return DataLoader(dataset=dataset, batch_size=4)


model = LSTMModel(truncated_bptt_steps=100)
trainer = Trainer(
    default_root_dir="./",
    max_epochs=2,
    log_every_n_steps=2,
    enable_model_summary=False,
    enable_checkpointing=False,
)
trainer.fit(model)

loss=0.0831 ~ 0.0832
i think second scenario is right way, but test result like that is make me confuse...😑

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

2 each lstm layer bptt question! #15643

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Replies: 2 comments 1 reply

{{title}}

{{title}}

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Select a reply

2 each lstm layer bptt question! #15643

YooSungHyun Nov 11, 2022

Replies: 2 comments · 1 reply

YooSungHyun Nov 14, 2022 Author

YooSungHyun Nov 14, 2022 Author

YooSungHyun Nov 14, 2022 Author

YooSungHyun
Nov 11, 2022

Replies: 2 comments 1 reply

YooSungHyun
Nov 14, 2022
Author

YooSungHyun Nov 14, 2022
Author

YooSungHyun
Nov 14, 2022
Author