Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
nlp-machine-translation-project
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Marvin Schaefers
nlp-machine-translation-project
Commits
f37b839d
There was an error fetching the commit references. Please try again later.
Commit
f37b839d
authored
Jan 17, 2024
by
marvnsch
Browse files
Options
Downloads
Patches
Plain Diff
Add copy cat notebook
parent
f6b60e06
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
copycat.ipynb
+93
-25
93 additions, 25 deletions
copycat.ipynb
with
93 additions
and
25 deletions
copycat.ipynb
+
93
−
25
View file @
f37b839d
...
...
@@ -14,6 +14,7 @@
"import torch.optim as optim\n",
"import random\n",
"\n",
"from tinycss2 import tokenizer\n",
"from tokenizers import Tokenizer\n",
"from tokenizers.models import BPE\n",
"from tokenizers.trainers import BpeTrainer\n",
...
...
@@ -130,14 +131,18 @@
"source": [
"def training_data(source: list[str],\n",
" target: list[str],\n",
" dataset_size: int,\n",
" batch_size: int = 64,\n",
" sort: bool = True) -> tuple[torch.tensor, torch.tensor]:\n",
" tokenizer_de.no_padding()\n",
" tokenizer_en.no_padding()\n",
" \n",
" if dataset_size > len(source):\n",
" raise IndexError(\"Dataset size is larger than the source data\")\n",
" \n",
" # sort the training data if true\n",
" if sort:\n",
" temp = ([list(a) for a in zip(source
, target
)])\n",
" temp = ([list(a) for a in zip(source
[:dataset_size], target[:dataset_size]
)])\n",
" temp.sort(key=lambda s: len(s[0]) + len(s[1]))\n",
" source, target = list(zip(*temp))\n",
"\n",
...
...
@@ -176,9 +181,7 @@
"outputs": [],
"source": [
"# data test cell\n",
"print(len(de)/64)\n",
"\n",
"for idx, _ in enumerate(training_data(source=de, target=en, batch_size=64)):\n",
"for idx, _ in enumerate(training_data(source=de, target=en, dataset_size=10000, batch_size=64)):\n",
" print(idx)"
],
"metadata": {
...
...
@@ -236,7 +239,7 @@
" self.fc = nn.Linear(hidden_size, output_size)\n",
" \n",
" def forward(self, x, hidden, cell):\n",
" x = x.
view
(1, -1)\n",
" x = x.
reshape
(1, -1)\n",
" # shape x : (1, batch_size)\n",
" embedding = self.dropout(self.embedding(x))\n",
" # embedding shape : (1, batch_size, embedding_size)\n",
...
...
@@ -244,7 +247,7 @@
" # shape output : (1, batch_size, hidden_size)\n",
" predictions = self.fc(output)\n",
" # shape predictions : (1, batch_size, vocab_len)\n",
" predictions = predictions.squeeze(
1
)\n",
" predictions = predictions.squeeze(
0
)\n",
" \n",
" return predictions, hidden, cell\n",
" \n",
...
...
@@ -254,14 +257,13 @@
" self.encoder = encoder\n",
" self.decoder = decoder\n",
" \n",
" def forward(self, source, target, teacher_forcing_ratio: float = 0.5):\n",
" def forward(self, source, target
= None
, teacher_forcing_ratio: float = 0.5):\n",
" batch_size = source.shape[1]\n",
" target_len = target.shape[0]\n",
" \n",
" outputs = torch.zeros(target_len, batch_size, target_vocab_size)\n",
" \n",
" hidden, cell = self.encoder(source)\n",
" \n",
" x = target[0]\n",
" \n",
" for t in range(1, target_len):\n",
...
...
@@ -269,20 +271,22 @@
" \n",
" outputs[t] = output\n",
" \n",
" best_guess = output.argmax(
2
)\n",
" \n",
" best_guess = output.argmax(
1
)\n",
"
if target is not None:
\n",
" x = target[t] if random.random() < teacher_forcing_ratio else best_guess\n",
" else:\n",
" x = best_guess\n",
" return outputs"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-1
4T22:07:32.406911
Z",
"start_time": "2024-01-1
4T22:07:32.401944
Z"
"end_time": "2024-01-1
7T15:15:48.836634
Z",
"start_time": "2024-01-1
7T15:15:48.834028
Z"
}
},
"id": "3b2c4dbc74a1f144",
"execution_count":
129
"execution_count":
76
},
{
"cell_type": "markdown",
...
...
@@ -296,12 +300,21 @@
},
{
"cell_type": "code",
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/3\n"
]
}
],
"source": [
"# training hyperparameters\n",
"num_epochs =
20
\n",
"num_epochs =
3
\n",
"learning_rate = 0.001\n",
"batch_size = 64\n",
"batch_size = 128\n",
"dataset_size = 5000\n",
"\n",
"# model hyperparameters\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
...
...
@@ -312,8 +325,8 @@
"decoder_embedding_size = 300\n",
"hidden_size = 1024\n",
"num_layers = 2\n",
"encoder_dropout = 0.
5
\n",
"decoder_dropout = 0.
5
\n",
"encoder_dropout = 0.
1
\n",
"decoder_dropout = 0.
1
\n",
"\n",
"encoder_net = Encoder(input_size=input_size_encoder, \n",
" embedding_size=encoder_embedding_size, \n",
...
...
@@ -335,28 +348,83 @@
"\n",
"for epoch in range(num_epochs):\n",
" print('Epoch {}/{}'.format(epoch + 1, num_epochs))\n",
" loss_value = 0\n",
" \n",
" for batch_idx, (x_train, y_train) in enumerate(training_data(source=en, \n",
" target=en)):\n",
" target=en,\n",
" dataset_size=dataset_size,\n",
" batch_size=batch_size)):\n",
" optimizer.zero_grad()\n",
" \n",
" output = model(x_train, y_train)\n",
" output_debug = output\n",
" output = output[1:].reshape(-1, output.shape[2])\n",
" y_train = y_train[1:].reshape(-1)\n",
" \n",
" optimizer.zero_grad()\n",
" loss = criterion(output, y_train)\n",
" \n",
" loss.backward()\n",
" \n",
" torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)\n",
" print(batch_idx)\n",
" loss_value += loss.item()\n",
" \n",
" #torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)\n",
" \n",
" print(\"loss: \" + str(loss
.item(
)))
\n
"
" print(\"loss: \" + str(loss
_value / (dataset_size / batch_size
)))"
],
"metadata": {
"collapsed": false
"collapsed": false,
"is_executing": true,
"ExecuteTime": {
"start_time": "2024-01-17T15:18:52.707157Z"
}
},
"id": "ee166d65b3b975d",
"execution_count": null
},
{
"cell_type": "code",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([8, 1])\n",
"mischung Tragödie ergänzen ergänzen Mitbürger daß daß\n"
]
}
],
"source": [
"# test the model\n",
"\n",
"test_sentence_en = \"This is the first point.\"\n",
"test_sentence_de = \"Das ist der erste Punkt.\"\n",
"\n",
"test_sentence_en_encoded = tokenizer_en.encode(test_sentence_en)\n",
"test_sentence_de_encoded = tokenizer_de.encode(test_sentence_de)\n",
"\n",
"target_vector = torch.zeros(len(test_sentence_de_encoded.ids), 1)\n",
"\n",
"model.eval()\n",
"x_test = torch.transpose(torch.tensor([test_sentence_en_encoded.ids]), 0, 1)\n",
"y_test = torch.transpose(torch.tensor([test_sentence_de_encoded.ids]), 0, 1)\n",
"print(y_test.shape)\n",
"prediction = model(x_test, y_test, teacher_forcing_ratio=0.0)\n",
"\n",
"logits = torch.nn.functional.softmax(prediction, dim=2)\n",
"\n",
"result_ids = logits.argmax(dim=2)\n",
"\n",
"print(tokenizer_de.decode(list(result_ids)))"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-01-17T15:18:46.448865Z",
"start_time": "2024-01-17T15:18:46.419022Z"
}
},
"id": "fa8a86342abe0a97",
"execution_count": 79
}
],
"metadata": {
...
...
%% Cell type:code id:initial_id tags:
```
python
import
torch
import
torch.nn
as
nn
import
torch.optim
as
optim
import
random
from
tinycss2
import
tokenizer
from
tokenizers
import
Tokenizer
from
tokenizers.models
import
BPE
from
tokenizers.trainers
import
BpeTrainer
from
tokenizers.pre_tokenizers
import
Whitespace
from
tokenizers.processors
import
TemplateProcessing
```
%% Cell type:markdown id:2b9477923b668978 tags:
# Data Preparation
%% Cell type:code id:dbc5f26f27746098 tags:
```
python
def
load_data
()
->
tuple
[
list
[
str
],
list
[
str
]]:
with
open
(
"
data/training-data/eup/europarl-v7.de-en.de
"
,
"
r
"
,
encoding
=
"
utf8
"
)
as
f
:
data_de
=
[
line
.
rstrip
(
"
\n
"
)
for
line
in
f
]
with
open
(
"
data/training-data/eup/europarl-v7.de-en.en
"
,
"
r
"
,
encoding
=
"
utf8
"
)
as
f
:
data_en
=
[
line
.
rstrip
(
"
\n
"
)
for
line
in
f
]
ltd
=
set
()
# save lines to delete later
for
i
in
range
(
max
(
len
(
data_de
),
len
(
data_en
))):
# Move sentence to next line if line is empty other file
if
data_de
[
i
]
==
""
:
data_en
[
i
+
1
]
=
data_en
[
i
]
+
"
"
+
data_en
[
i
+
1
]
ltd
.
add
(
i
)
if
data_en
[
i
]
==
""
:
data_de
[
i
+
1
]
=
data_de
[
i
]
+
"
"
+
data_de
[
i
+
1
]
ltd
.
add
(
i
)
# Remove lines, where difference in words is > 40%
if
abs
(
count_words
(
data_de
[
i
])
-
count_words
(
data_en
[
i
]))
/
(
max
(
count_words
(
data_de
[
i
]),
count_words
(
data_en
[
i
]))
+
1
)
>
0.4
:
ltd
.
add
(
i
)
# Remove lines < 3 words or > 10 words
if
max
(
count_words
(
data_de
[
i
]),
count_words
(
data_en
[
i
]))
<
3
or
max
(
count_words
(
data_de
[
i
]),
count_words
(
data_en
[
i
]))
>
10
:
ltd
.
add
(
i
)
temp_de
=
[
l
for
i
,
l
in
enumerate
(
data_de
)
if
i
not
in
ltd
]
data_de
=
temp_de
temp_en
=
[
l
for
i
,
l
in
enumerate
(
data_en
)
if
i
not
in
ltd
]
data_en
=
temp_en
print
(
len
(
data_de
),
len
(
data_en
))
# Print 3 random sentence pairs
ix
=
torch
.
randint
(
low
=
0
,
high
=
max
(
len
(
data_de
),
len
(
data_en
)),
size
=
(
3
,
))
for
i
in
ix
:
print
(
f
"
Zeile:
{
i
}
\n
Deutsch:
{
data_de
[
i
]
}
\n
English:
{
data_en
[
i
]
}
\n
"
)
print
(
f
"
\n
Number of lines:
{
len
(
data_de
),
len
(
data_en
)
}
"
)
return
data_de
,
data_en
def
count_words
(
string
:
str
)
->
int
:
return
len
(
string
.
split
())
de
,
en
=
load_data
()
# setting the unknown token (e.g. for emojis)
tokenizer_en
=
Tokenizer
(
BPE
(
unk_token
=
"
[UNK]
"
))
tokenizer_de
=
Tokenizer
(
BPE
(
unk_token
=
"
[UNK]
"
))
# adding special tokens
# [UNK] : unknown word/token
# [CLS] : starting token (new sentence sequence)
# [SEP] : separator for chaining multiple sentences
# [PAD] : padding needed for encoder input
trainer
=
BpeTrainer
(
vocab_size
=
10000
,
special_tokens
=
[
"
[UNK]
"
,
"
[SOS]
"
,
"
[EOS]
"
,
"
[PAD]
"
])
tokenizer_en
.
pre_tokenizer
=
Whitespace
()
tokenizer_de
.
pre_tokenizer
=
Whitespace
()
tokenizer_en
.
train
([
"
data/training-data/eup/europarl-v7.de-en.en
"
],
trainer
)
tokenizer_de
.
train
([
"
data/training-data/eup/europarl-v7.de-en.de
"
],
trainer
)
# configure post processing
tokenizer_en
.
post_processor
=
TemplateProcessing
(
single
=
"
[SOS] $A [EOS]
"
,
special_tokens
=
[
(
"
[SOS]
"
,
tokenizer_en
.
token_to_id
(
"
[SOS]
"
)),
(
"
[EOS]
"
,
tokenizer_en
.
token_to_id
(
"
[EOS]
"
)),
],
)
tokenizer_de
.
post_processor
=
TemplateProcessing
(
single
=
"
[SOS] $A [EOS]
"
,
special_tokens
=
[
(
"
[SOS]
"
,
tokenizer_de
.
token_to_id
(
"
[SOS]
"
)),
(
"
[EOS]
"
,
tokenizer_de
.
token_to_id
(
"
[EOS]
"
)),
],
)
target_vocab_size
=
tokenizer_de
.
get_vocab_size
()
source_vocab_size
=
tokenizer_en
.
get_vocab_size
()
```
%% Cell type:code id:8edfacb67dc8c527 tags:
```
python
def
training_data
(
source
:
list
[
str
],
target
:
list
[
str
],
dataset_size
:
int
,
batch_size
:
int
=
64
,
sort
:
bool
=
True
)
->
tuple
[
torch
.
tensor
,
torch
.
tensor
]:
tokenizer_de
.
no_padding
()
tokenizer_en
.
no_padding
()
if
dataset_size
>
len
(
source
):
raise
IndexError
(
"
Dataset size is larger than the source data
"
)
# sort the training data if true
if
sort
:
temp
=
([
list
(
a
)
for
a
in
zip
(
source
,
target
)])
temp
=
([
list
(
a
)
for
a
in
zip
(
source
[:
dataset_size
],
target
[:
dataset_size
]
)])
temp
.
sort
(
key
=
lambda
s
:
len
(
s
[
0
])
+
len
(
s
[
1
]))
source
,
target
=
list
(
zip
(
*
temp
))
# select random sentences
for
i
in
range
(
0
,
len
(
source
)
-
batch_size
,
batch_size
):
x_training_data
=
source
[
i
:
i
+
batch_size
]
y_training_data
=
target
[
i
:
i
+
batch_size
]
# tokenize data
tokenizer_en
.
enable_padding
(
pad_id
=
3
)
x_training_data
=
tokenizer_en
.
encode_batch
(
x_training_data
)
tokenizer_de
.
enable_padding
(
pad_id
=
3
)
y_training_data
=
tokenizer_de
.
encode_batch
(
y_training_data
)
# extract ids for every sequence
for
j
in
range
(
batch_size
):
x_training_data
[
j
]
=
x_training_data
[
j
].
ids
y_training_data
[
j
]
=
y_training_data
[
j
].
ids
# put data into tensor
x_training_data
=
torch
.
tensor
(
x_training_data
)
y_training_data
=
torch
.
tensor
(
y_training_data
)
# transpose tensors to match input requirements for lstm
x_training_data
=
torch
.
transpose
(
x_training_data
,
0
,
1
)
y_training_data
=
torch
.
transpose
(
y_training_data
,
0
,
1
)
yield
x_training_data
,
y_training_data
```
%% Cell type:code id:524195fe40653308 tags:
```
python
# data test cell
print
(
len
(
de
)
/
64
)
for
idx
,
_
in
enumerate
(
training_data
(
source
=
de
,
target
=
en
,
batch_size
=
64
)):
for
idx
,
_
in
enumerate
(
training_data
(
source
=
de
,
target
=
en
,
dataset_size
=
10000
,
batch_size
=
64
)):
print
(
idx
)
```
%% Cell type:markdown id:ca6d3d436fd31e33 tags:
### Model Definition
%% Cell type:code id:3b2c4dbc74a1f144 tags:
```
python
# Prepare model
class
Encoder
(
nn
.
Module
):
def
__init__
(
self
,
input_size
:
int
,
embedding_size
:
int
,
hidden_size
:
int
,
num_layers
:
int
,
dropout_prob
:
float
):
super
(
Encoder
,
self
).
__init__
()
self
.
hidden_size
=
hidden_size
self
.
num_layers
=
num_layers
self
.
dropout
=
nn
.
Dropout
(
dropout_prob
)
self
.
embedding
=
nn
.
Embedding
(
input_size
,
embedding_size
)
self
.
rnn
=
nn
.
LSTM
(
input_size
=
embedding_size
,
hidden_size
=
hidden_size
,
num_layers
=
num_layers
,
dropout
=
dropout_prob
)
def
forward
(
self
,
x
):
# shape x : (sequence_len, batch_size)
embedding
=
self
.
dropout
(
self
.
embedding
(
x
))
# shape embedding : sequence_len, batch_size, embedding_size)
output
,
(
hidden
,
cell
)
=
self
.
rnn
(
embedding
)
return
hidden
,
cell
class
Decoder
(
nn
.
Module
):
def
__init__
(
self
,
input_size
:
int
,
embedding_size
:
int
,
hidden_size
:
int
,
num_layers
:
int
,
output_size
:
int
,
dropout_prob
:
float
):
super
(
Decoder
,
self
).
__init__
()
self
.
hidden_size
=
hidden_size
self
.
num_layers
=
num_layers
self
.
dropout
=
nn
.
Dropout
(
dropout_prob
)
self
.
embedding
=
nn
.
Embedding
(
input_size
,
embedding_size
)
self
.
rnn
=
nn
.
LSTM
(
input_size
=
embedding_size
,
hidden_size
=
hidden_size
,
num_layers
=
num_layers
,
dropout
=
dropout_prob
)
self
.
fc
=
nn
.
Linear
(
hidden_size
,
output_size
)
def
forward
(
self
,
x
,
hidden
,
cell
):
x
=
x
.
view
(
1
,
-
1
)
x
=
x
.
reshape
(
1
,
-
1
)
# shape x : (1, batch_size)
embedding
=
self
.
dropout
(
self
.
embedding
(
x
))
# embedding shape : (1, batch_size, embedding_size)
output
,
(
hidden
,
cell
)
=
self
.
rnn
(
embedding
,
(
hidden
,
cell
))
# shape output : (1, batch_size, hidden_size)
predictions
=
self
.
fc
(
output
)
# shape predictions : (1, batch_size, vocab_len)
predictions
=
predictions
.
squeeze
(
1
)
predictions
=
predictions
.
squeeze
(
0
)
return
predictions
,
hidden
,
cell
class
Seq2Seq
(
nn
.
Module
):
def
__init__
(
self
,
encoder
:
Encoder
,
decoder
:
Decoder
):
super
(
Seq2Seq
,
self
).
__init__
()
self
.
encoder
=
encoder
self
.
decoder
=
decoder
def
forward
(
self
,
source
,
target
,
teacher_forcing_ratio
:
float
=
0.5
):
def
forward
(
self
,
source
,
target
=
None
,
teacher_forcing_ratio
:
float
=
0.5
):
batch_size
=
source
.
shape
[
1
]
target_len
=
target
.
shape
[
0
]
outputs
=
torch
.
zeros
(
target_len
,
batch_size
,
target_vocab_size
)
hidden
,
cell
=
self
.
encoder
(
source
)
x
=
target
[
0
]
for
t
in
range
(
1
,
target_len
):
output
,
hidden
,
cell
=
self
.
decoder
(
x
,
hidden
,
cell
)
outputs
[
t
]
=
output
best_guess
=
output
.
argmax
(
2
)
x
=
target
[
t
]
if
random
.
random
()
<
teacher_forcing_ratio
else
best_guess
best_guess
=
output
.
argmax
(
1
)
if
target
is
not
None
:
x
=
target
[
t
]
if
random
.
random
()
<
teacher_forcing_ratio
else
best_guess
else
:
x
=
best_guess
return
outputs
```
%% Cell type:markdown id:9854eaee8392caa1 tags:
### Model Training
%% Cell type:code id:ee166d65b3b975d tags:
```
python
# training hyperparameters
num_epochs
=
20
num_epochs
=
3
learning_rate
=
0.001
batch_size
=
64
batch_size
=
128
dataset_size
=
5000
# model hyperparameters
device
=
torch
.
device
(
"
cuda
"
if
torch
.
cuda
.
is_available
()
else
"
cpu
"
)
input_size_encoder
=
source_vocab_size
input_size_decoder
=
target_vocab_size
output_size_decoder
=
target_vocab_size
encoder_embedding_size
=
300
decoder_embedding_size
=
300
hidden_size
=
1024
num_layers
=
2
encoder_dropout
=
0.
5
decoder_dropout
=
0.
5
encoder_dropout
=
0.
1
decoder_dropout
=
0.
1
encoder_net
=
Encoder
(
input_size
=
input_size_encoder
,
embedding_size
=
encoder_embedding_size
,
hidden_size
=
hidden_size
,
num_layers
=
num_layers
,
dropout_prob
=
encoder_dropout
)
decoder_net
=
Decoder
(
input_size
=
input_size_decoder
,
embedding_size
=
decoder_embedding_size
,
hidden_size
=
hidden_size
,
num_layers
=
num_layers
,
dropout_prob
=
decoder_dropout
,
output_size
=
output_size_decoder
)
model
=
Seq2Seq
(
encoder
=
encoder_net
,
decoder
=
decoder_net
)
criterion
=
nn
.
CrossEntropyLoss
(
ignore_index
=
3
)
optimizer
=
optim
.
Adam
(
model
.
parameters
(),
lr
=
learning_rate
)
for
epoch
in
range
(
num_epochs
):
print
(
'
Epoch {}/{}
'
.
format
(
epoch
+
1
,
num_epochs
))
loss_value
=
0
for
batch_idx
,
(
x_train
,
y_train
)
in
enumerate
(
training_data
(
source
=
en
,
target
=
en
)):
target
=
en
,
dataset_size
=
dataset_size
,
batch_size
=
batch_size
)):
optimizer
.
zero_grad
()
output
=
model
(
x_train
,
y_train
)
output_debug
=
output
output
=
output
[
1
:].
reshape
(
-
1
,
output
.
shape
[
2
])
y_train
=
y_train
[
1
:].
reshape
(
-
1
)
optimizer
.
zero_grad
()
loss
=
criterion
(
output
,
y_train
)
loss
.
backward
()
torch
.
nn
.
utils
.
clip_grad_norm_
(
model
.
parameters
(),
max_norm
=
1
)
print
(
batch_idx
)
loss_value
+=
loss
.
item
()
#torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
print
(
"
loss:
"
+
str
(
loss
.
item
(
)))
print
(
"
loss:
"
+
str
(
loss
_value
/
(
dataset_size
/
batch_size
)))
```
%% Output
Epoch 1/3
%% Cell type:code id:fa8a86342abe0a97 tags:
```
python
# test the model
test_sentence_en
=
"
This is the first point.
"
test_sentence_de
=
"
Das ist der erste Punkt.
"
test_sentence_en_encoded
=
tokenizer_en
.
encode
(
test_sentence_en
)
test_sentence_de_encoded
=
tokenizer_de
.
encode
(
test_sentence_de
)
target_vector
=
torch
.
zeros
(
len
(
test_sentence_de_encoded
.
ids
),
1
)
model
.
eval
()
x_test
=
torch
.
transpose
(
torch
.
tensor
([
test_sentence_en_encoded
.
ids
]),
0
,
1
)
y_test
=
torch
.
transpose
(
torch
.
tensor
([
test_sentence_de_encoded
.
ids
]),
0
,
1
)
print
(
y_test
.
shape
)
prediction
=
model
(
x_test
,
y_test
,
teacher_forcing_ratio
=
0.0
)
logits
=
torch
.
nn
.
functional
.
softmax
(
prediction
,
dim
=
2
)
result_ids
=
logits
.
argmax
(
dim
=
2
)
print
(
tokenizer_de
.
decode
(
list
(
result_ids
)))
```
%% Output
torch.Size([8, 1])
mischung Tragödie ergänzen ergänzen Mitbürger daß daß
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment