Skip to content
Snippets Groups Projects
Commit 326289cb authored by marvnsch's avatar marvnsch
Browse files

Add all required models (Atention model TBD)

parent 86860abe
No related branches found
No related tags found
No related merge requests found
import os
import random
from pathlib import Path
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
import utils.pytorch
import utils.training
import data.preprocessing
project_root = Path(__file__).parent.parent.parent.absolute()
work_dir = Path(__file__).parent.absolute()
class Encoder(nn.Module):
def __init__(self, input_size: int, embedding_size: int,
hidden_size: int, num_layers: int, torch_device: torch.device,
bidirectional: bool = False):
super(Encoder, self).__init__()
self.num_layers = num_layers
self.embedding = nn.Embedding(input_size, embedding_size, device=torch_device)
self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size,
num_layers=num_layers, device=torch_device, bidirectional=bidirectional)
def forward(self, x):
# shape x : (sequence_len, batch_size)
embedding = self.embedding(x)
# shape embedding : sequence_len, batch_size, embedding_size)
output, (hidden, cell) = self.rnn(embedding)
return hidden, cell
class Decoder(nn.Module):
def __init__(self, input_size: int, embedding_size: int,
hidden_size: int, num_layers: int, output_size: int,
torch_device: torch.device, bidirectional: bool = False):
super(Decoder, self).__init__()
self.num_layers = num_layers
self.embedding = nn.Embedding(input_size, embedding_size, device=torch_device)
self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size,
num_layers=num_layers, device=torch_device, bidirectional=bidirectional)
self.fc = nn.Linear(hidden_size if not bidirectional else 2 * hidden_size, output_size, device=torch_device)
def forward(self, x, hidden, cell):
x = x.reshape(1, -1)
# shape x : (1, batch_size)
embedding = self.embedding(x)
# embedding shape : (1, batch_size, embedding_size)
dec_output, (hidden, cell) = self.rnn(embedding, (hidden, cell))
# shape output : (1, batch_size, hidden_size)
predictions = self.fc(dec_output)
# shape predictions : (1, batch_size, vocab_len)
predictions = predictions.squeeze(0)
return predictions, hidden, cell
class Seq2Seq(nn.Module):
def __init__(self, encoder: Encoder, decoder: Decoder,
torch_device: torch.device, target_vocab_size: int,
teacher_forcing_ratio: float = 0.5):
super(Seq2Seq, self).__init__()
self.target_vocab_size = target_vocab_size
self.torch_device = torch_device
self.teacher_forcing_ratio = teacher_forcing_ratio
self.encoder = encoder
self.decoder = decoder
def forward(self, source, target=None):
dec_batch_size = source.shape[1]
target_len = target.shape[0]
outputs = torch.zeros(target_len, dec_batch_size, self.target_vocab_size, device=self.torch_device)
hidden, cell = self.encoder(source)
x = target[0]
for t in range(1, target_len):
output, hidden, cell = self.decoder(x, hidden, cell)
outputs[t] = output
best_guess = output.argmax(1)
if target is not None and self.training:
x = target[t] if random.random() < self.teacher_forcing_ratio else best_guess
else:
x = best_guess
return outputs
# setup environment
source_data_path = str(project_root / "data/tokenizer-data/news-commentary-v11.en")
target_data_path = str(project_root / "data/tokenizer-data/news-commentary-v11.de")
device = utils.pytorch.get_available_device()
date_time_now = datetime.now().strftime("%m%d%Y_%H%M")
model_output_path = str(work_dir / f"./checkpoints/{date_time_now}_RNN_no_attention_unidirectional.pt")
# define hyperparameters
vocab_size = 10000
input_size_encoder = vocab_size
input_size_decoder = vocab_size
output_size_decoder = vocab_size
encoder_embedding_size = 300
decoder_embedding_size = 300
model_hidden_size = 1024
model_num_layers = 2
num_epochs = 50
learning_rate = 0.001
batch_size = 64
dataset_size = 100000
train_dev_val_split = (.8, .1, .1)
train_batches_count = int(train_dev_val_split[0] * dataset_size // batch_size)
# create model
encoder_net = Encoder(input_size=input_size_encoder,
embedding_size=encoder_embedding_size,
hidden_size=model_hidden_size,
num_layers=model_num_layers,
torch_device=device,
bidirectional=True)
decoder_net = Decoder(input_size=input_size_decoder,
embedding_size=decoder_embedding_size,
hidden_size=model_hidden_size,
num_layers=model_num_layers,
output_size=output_size_decoder,
torch_device=device,
bidirectional=True)
model = Seq2Seq(encoder=encoder_net, decoder=decoder_net, torch_device=device, target_vocab_size=vocab_size)
model.train()
# prepare training run
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# get training data
source_data, target_data = data.preprocessing.get_prepared_data(source_data_path=source_data_path,
target_data_path=target_data_path)
source_tokenizer, target_tokenizer = data.preprocessing.create_tokenizers(source_data_path=source_data_path,
target_data_path=target_data_path,
vocab_size=vocab_size)
data_loader = data.preprocessing.data_loader(source=source_data,
target=target_data,
batch_size=batch_size,
source_tokenizer=source_tokenizer,
target_tokenizer=target_tokenizer,
dataset_size=dataset_size,
torch_device=device,
data_split=train_dev_val_split)
source_data = None
target_data = None
# create checkpoints directory
try:
os.mkdir(work_dir / "./checkpoints")
except FileExistsError:
pass
# train the model
utils.training.train(model=model,
data_loader=data_loader,
criterion=criterion,
optimizer=optimizer,
num_epochs=num_epochs,
num_of_batches_per_epoch=train_batches_count,
saving_interval=500,
model_output_path=model_output_path)
......@@ -17,13 +17,14 @@ work_dir = Path(__file__).parent.absolute()
class Encoder(nn.Module):
def __init__(self, input_size: int, embedding_size: int,
hidden_size: int, num_layers: int, torch_device: torch.device):
hidden_size: int, num_layers: int, torch_device: torch.device,
bidirectional: bool = False):
super(Encoder, self).__init__()
self.num_layers = num_layers
self.embedding = nn.Embedding(input_size, embedding_size, device=torch_device)
self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size,
num_layers=num_layers, device=torch_device)
num_layers=num_layers, device=torch_device, bidirectional=bidirectional)
def forward(self, x):
# shape x : (sequence_len, batch_size)
......@@ -36,13 +37,13 @@ class Encoder(nn.Module):
class Decoder(nn.Module):
def __init__(self, input_size: int, embedding_size: int,
hidden_size: int, num_layers: int, output_size: int,
torch_device: torch.device):
torch_device: torch.device, bidirectional: bool = False):
super(Decoder, self).__init__()
self.num_layers = num_layers
self.embedding = nn.Embedding(input_size, embedding_size, device=torch_device)
self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size,
num_layers=num_layers, device=torch_device)
num_layers=num_layers, device=torch_device, bidirectional=bidirectional)
self.fc = nn.Linear(hidden_size, output_size, device=torch_device)
def forward(self, x, hidden, cell):
......@@ -98,22 +99,22 @@ source_data_path = str(project_root / "data/tokenizer-data/news-commentary-v11.e
target_data_path = str(project_root / "data/tokenizer-data/news-commentary-v11.de")
device = utils.pytorch.get_available_device()
date_time_now = datetime.now().strftime("%m%d%Y_%H%M")
model_output_path = str(work_dir / f"./checkpoints/{date_time_now}RNN_no_attention_unidirectional")
model_output_path = str(work_dir / f"./checkpoints/{date_time_now}_RNN_no_attention_unidirectional.pt")
# define hyperparameters
vocab_size = 10000
input_size_encoder = vocab_size
input_size_decoder = vocab_size
output_size_decoder = vocab_size
encoder_embedding_size = 200
decoder_embedding_size = 200
encoder_embedding_size = 300
decoder_embedding_size = 300
model_hidden_size = 1024
model_num_layers = 2
num_epochs = 10
num_epochs = 50
learning_rate = 0.001
batch_size = 64
dataset_size = 10000
dataset_size = 100000
train_dev_val_split = (.8, .1, .1)
train_batches_count = int(train_dev_val_split[0] * dataset_size // batch_size)
......@@ -146,6 +147,7 @@ source_data, target_data = data.preprocessing.get_prepared_data(source_data_path
source_tokenizer, target_tokenizer = data.preprocessing.create_tokenizers(source_data_path=source_data_path,
target_data_path=target_data_path,
vocab_size=vocab_size)
data_loader = data.preprocessing.data_loader(source=source_data,
target=target_data,
batch_size=batch_size,
......@@ -172,5 +174,5 @@ utils.training.train(model=model,
optimizer=optimizer,
num_epochs=num_epochs,
num_of_batches_per_epoch=train_batches_count,
saving_interval=1000,
saving_interval=500,
model_output_path=model_output_path)
import os
import random
from pathlib import Path
from datetime import datetime
import torch
import torch.nn as nn
import torch.optim as optim
import utils.pytorch
import utils.training
import data.preprocessing
project_root = Path(__file__).parent.parent.parent.absolute()
work_dir = Path(__file__).parent.absolute()
class Encoder(nn.Module):
def __init__(self, input_size: int, embedding_size: int,
hidden_size: int, num_layers: int, torch_device: torch.device,
bidirectional: bool = False):
super(Encoder, self).__init__()
self.num_layers = num_layers
self.embedding = nn.Embedding(input_size, embedding_size, device=torch_device)
self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size,
num_layers=num_layers, device=torch_device, bidirectional=bidirectional)
def forward(self, x):
# shape x : (sequence_len, batch_size)
embedding = self.embedding(x)
# shape embedding : sequence_len, batch_size, embedding_size)
output, (hidden, cell) = self.rnn(embedding)
return hidden, cell
class Decoder(nn.Module):
def __init__(self, input_size: int, embedding_size: int,
hidden_size: int, num_layers: int, output_size: int,
torch_device: torch.device, bidirectional: bool = False):
super(Decoder, self).__init__()
self.num_layers = num_layers
self.embedding = nn.Embedding(input_size, embedding_size, device=torch_device)
self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size,
num_layers=num_layers, device=torch_device, bidirectional=bidirectional)
self.fc = nn.Linear(hidden_size, output_size, device=torch_device)
def forward(self, x, hidden, cell):
x = x.reshape(1, -1)
# shape x : (1, batch_size)
embedding = self.embedding(x)
# embedding shape : (1, batch_size, embedding_size)
dec_output, (hidden, cell) = self.rnn(embedding, (hidden, cell))
# shape output : (1, batch_size, hidden_size)
predictions = self.fc(dec_output)
# shape predictions : (1, batch_size, vocab_len)
predictions = predictions.squeeze(0)
return predictions, hidden, cell
class Seq2Seq(nn.Module):
def __init__(self, encoder: Encoder, decoder: Decoder,
torch_device: torch.device, target_vocab_size: int,
teacher_forcing_ratio: float = 0.5):
super(Seq2Seq, self).__init__()
self.target_vocab_size = target_vocab_size
self.torch_device = torch_device
self.teacher_forcing_ratio = teacher_forcing_ratio
self.encoder = encoder
self.decoder = decoder
def forward(self, source, target=None):
dec_batch_size = source.shape[1]
target_len = target.shape[0]
outputs = torch.zeros(target_len, dec_batch_size, self.target_vocab_size, device=self.torch_device)
hidden, cell = self.encoder(source)
x = target[0]
for t in range(1, target_len):
output, hidden, cell = self.decoder(x, hidden, cell)
outputs[t] = output
best_guess = output.argmax(1)
if target is not None and self.training:
x = target[t] if random.random() < self.teacher_forcing_ratio else best_guess
else:
x = best_guess
return outputs
# setup environment
source_data_path = str(project_root / "data/tokenizer-data/news-commentary-v11.en")
target_data_path = str(project_root / "data/tokenizer-data/news-commentary-v11.de")
device = utils.pytorch.get_available_device()
date_time_now = datetime.now().strftime("%m%d%Y_%H%M")
model_output_path = str(work_dir / f"./checkpoints/{date_time_now}_RNN_no_attention_unidirectional.pt")
# define hyperparameters
vocab_size = 10000
input_size_encoder = vocab_size
input_size_decoder = vocab_size
output_size_decoder = vocab_size
encoder_embedding_size = 300
decoder_embedding_size = 300
model_hidden_size = 1024
model_num_layers = 2
num_epochs = 50
learning_rate = 0.001
batch_size = 64
dataset_size = 100000
train_dev_val_split = (.8, .1, .1)
train_batches_count = int(train_dev_val_split[0] * dataset_size // batch_size)
# create model
encoder_net = Encoder(input_size=input_size_encoder,
embedding_size=encoder_embedding_size,
hidden_size=model_hidden_size,
num_layers=model_num_layers,
torch_device=device)
decoder_net = Decoder(input_size=input_size_decoder,
embedding_size=decoder_embedding_size,
hidden_size=model_hidden_size,
num_layers=model_num_layers,
output_size=output_size_decoder,
torch_device=device)
model = Seq2Seq(encoder=encoder_net, decoder=decoder_net, torch_device=device, target_vocab_size=vocab_size)
model.train()
# prepare training run
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# get training data
source_data, target_data = data.preprocessing.get_prepared_data(source_data_path=source_data_path,
target_data_path=target_data_path)
source_tokenizer, target_tokenizer = data.preprocessing.create_tokenizers(source_data_path=source_data_path,
target_data_path=target_data_path,
vocab_size=vocab_size)
data_loader = data.preprocessing.data_loader(source=source_data,
target=target_data,
batch_size=batch_size,
source_tokenizer=source_tokenizer,
target_tokenizer=target_tokenizer,
dataset_size=dataset_size,
torch_device=device,
data_split=train_dev_val_split)
source_data = None
target_data = None
# create checkpoints directory
try:
os.mkdir(work_dir / "./checkpoints")
except FileExistsError:
pass
# train the model
utils.training.train(model=model,
data_loader=data_loader,
criterion=criterion,
optimizer=optimizer,
num_epochs=num_epochs,
num_of_batches_per_epoch=train_batches_count,
saving_interval=500,
model_output_path=model_output_path)
......@@ -5,13 +5,13 @@ from prettytable import PrettyTable
def get_available_device() -> torch.device:
if torch.cuda.is_available():
device = torch.device("cuda")
print("device: cuda")
print("Using device: cuda")
elif torch.backends.mps.is_available():
device = torch.device("mps")
print("device: mps")
print("Using device: mps")
else:
device = torch.device("cpu")
print("device: cpu")
print("Using device: cpu")
return device
......
import itertools
import torch
import progressbar
from utils.pytorch import print_model_parameters
def train(model, data_loader, num_of_batches_per_epoch: int,
criterion: torch.nn.modules.loss, optimizer: torch.optim,
......@@ -21,11 +20,15 @@ def train(model, data_loader, num_of_batches_per_epoch: int,
:param saving_interval: the number of steps after the model is saved
"""
print('Starting training for the following model')
print_model_parameters(model)
print()
save_counter = 0
model.train()
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch + 1, num_epochs))
print('----- Epoch {}/{} -----'.format(epoch + 1, num_epochs))
# get data generators
train_loader, _, val_loader = data_loader()
......@@ -74,4 +77,4 @@ def train(model, data_loader, num_of_batches_per_epoch: int,
loss_value += loss.item()
val_batch_count += 1
print("--> loss : " + str(loss_value / val_batch_count))
\ No newline at end of file
print("loss : " + str(loss_value / val_batch_count))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment