Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Nandeesh Patel Gowdru Prabushanker
master-thesis
Commits
1e831d89
Commit
1e831d89
authored
Aug 27, 2021
by
Nandeesh Patel Gowdru Prabushanker
Browse files
Changing the folder structure for python module
parent
8ce36d44
Changes
12
Hide whitespace changes
Inline
Side-by-side
NEL/.gitignore
0 → 100644
View file @
1e831d89
*.props
data/*
saved_model/*
\ No newline at end of file
embeddings_index
/app.ini
→
NEL
/app.ini
View file @
1e831d89
File moved
embeddings_index
/app.py
→
NEL
/app.py
View file @
1e831d89
File moved
embeddings_index
/index_manager.py
→
NEL
/index_manager.py
View file @
1e831d89
File moved
nel/model
/model.py
→
NEL
/model.py
View file @
1e831d89
import
pandas
as
pd
import
numpy
as
np
import
tensorflow.keras.models
from
tensorflow.keras.layers
import
Input
,
ReLU
,
Dense
,
LSTM
,
Bidirectional
from
tensorflow.keras.models
import
Model
from
tensorflow.keras.losses
import
CosineSimilarity
from
sklearn.metrics.pairwise
import
cosine_similarity
from
sklearn.metrics
import
f1_score
from
utils
import
prepare_training_data
from
os.path
import
exists
def
prepare_data
():
embeddings_df
=
pd
.
read_csv
(
"./data/embeddings.tsv"
,
sep
=
"
\t
"
)
embeddings_dict
=
{}
for
index
,
entry
in
embeddings_df
.
iterrows
():
embeddings_dict
[
entry
.
get
(
'entity'
)]
=
entry
.
get
(
'embeddings'
)
return
embeddings_df
,
embeddings_dict
def
train_model
(
train_x
,
train_y
):
...
...
@@ -56,18 +66,21 @@ def evaluate(x_test, y_test):
if
__name__
==
"__main__"
:
data
=
pd
.
read_csv
(
"./../data/embeddings.tsv"
,
sep
=
"
\t
"
)
embeddings
=
{}
for
index
,
entry
in
data
.
iterrows
():
embeddings
[
entry
.
get
(
'entity'
)]
=
entry
.
get
(
'embeddings'
)
X_train
,
Y_train
=
prepare_training_data
(
embeddings
,
"./../data/train_data.csv"
)
X_train
=
np
.
array
(
X_train
)
Y_train
=
np
.
array
(
Y_train
)
entity_linker
=
train_model
(
X_train
,
Y_train
)
X_train
=
None
Y_train
=
None
X_test
,
Y_test
=
prepare_training_data
(
embeddings
,
"./../data/test_data.csv"
)
X_test
=
np
.
array
(
X_test
)
Y_test
=
np
.
array
(
Y_test
)
evaluate
(
X_test
,
Y_test
)
print
(
"Done"
)
if
exists
(
"./saved_model/linker"
):
entity_linker
=
tensorflow
.
keras
.
models
.
load_model
(
"./saved_model/linker"
)
print
(
"trained model already exists"
)
else
:
data
,
embeddings
=
prepare_data
()
x_train
,
y_train
=
prepare_training_data
(
embeddings
,
"./data/test_data.csv"
)
x_train
=
np
.
array
(
x_train
)
y_train
=
np
.
array
(
y_train
)
entity_linker
=
train_model
(
x_train
,
y_train
)
x_train
=
None
y_train
=
None
entity_linker
.
save
(
"./saved_model/linker"
)
X_test
,
Y_test
=
prepare_training_data
(
embeddings
,
"./data/test_data.csv"
)
embeddings
=
None
X_test
=
np
.
array
(
X_test
)
Y_test
=
np
.
array
(
Y_test
)
evaluate
(
X_test
,
Y_test
)
print
(
"Done"
)
embeddings_index
/readme.md
→
NEL
/readme.md
View file @
1e831d89
File moved
embeddings_index
/requirements.txt
→
NEL
/requirements.txt
View file @
1e831d89
...
...
@@ -4,4 +4,9 @@ Flask-Cors==3.0.10
jproperties==2.1.1
requests==2.26.0
uWSGI==2.0.19.1
dask==2021.8.0
\ No newline at end of file
dask==2021.8.0
numpy==1.19.5
pandas==1.3.1
scikit-learn==0.24.2
scipy==1.7.1
tensorflow==2.6.0
embeddings_index
/run.py
→
NEL
/run.py
View file @
1e831d89
File moved
nel/model
/utils.py
→
NEL
/utils.py
View file @
1e831d89
import
numpy
as
np
def
process_line
(
text
,
entity_embeddings
):
line
=
text
.
split
(
"
\t
"
)
gold
=
line
[
2
]
try
:
gold
=
np
.
array
(
entity_embeddings
[
gold
].
split
(
","
),
dtype
=
"float"
)
except
KeyError
:
gold
=
np
.
zeros
(
100
)
candidates
=
[]
for
i
in
range
(
3
,
len
(
line
)):
if
i
%
2
!=
0
:
candidate
=
line
[
i
]
try
:
candidate
=
np
.
array
(
entity_embeddings
[
candidate
].
split
(
","
),
dtype
=
"float"
)
except
KeyError
:
candidate
=
np
.
full
(
100
,
-
1
)
candidates
.
append
(
candidate
)
if
len
(
candidates
)
<
30
:
while
len
(
candidates
)
<
30
:
candidates
.
append
(
np
.
full
(
100
,
-
1
))
else
:
candidates
=
candidates
[:
30
]
return
candidates
,
gold
def
prepare_training_data
(
entity_embeddings
,
path
):
with
open
(
path
,
"r"
)
as
train_file
:
line
=
train_file
.
readline
().
strip
()
x_train
=
[]
y_train
=
[]
while
line
!=
""
:
line
=
line
.
split
(
"
\t
"
)
gold
=
line
[
2
]
try
:
gold
=
np
.
array
(
entity_embeddings
[
gold
].
split
(
","
),
dtype
=
"float"
)
except
KeyError
:
gold
=
np
.
zeros
(
100
)
candidates
,
gold
=
process_line
(
line
,
entity_embeddings
)
y_train
.
append
(
gold
)
candidates
=
[]
for
i
in
range
(
3
,
len
(
line
)):
if
i
%
2
!=
0
:
candidate
=
line
[
i
]
try
:
candidate
=
np
.
array
(
entity_embeddings
[
candidate
].
split
(
","
),
dtype
=
"float"
)
except
KeyError
:
candidate
=
np
.
full
(
100
,
-
1
)
candidates
.
append
(
candidate
)
if
len
(
candidates
)
<
30
:
while
len
(
candidates
)
<
30
:
candidates
.
append
(
np
.
full
(
100
,
-
1
))
else
:
candidates
=
candidates
[:
30
]
x_train
.
append
(
candidates
)
line
=
train_file
.
readline
().
strip
()
return
x_train
,
y_train
embeddings_index/.gitignore
deleted
100644 → 0
View file @
8ce36d44
*.props
\ No newline at end of file
nel/model/__init__.py
deleted
100644 → 0
View file @
8ce36d44
nel/requirements.txt
deleted
100644 → 0
View file @
8ce36d44
numpy==1.19.5
pandas==1.3.1
scikit-learn==0.24.2
scipy==1.7.1
tensorflow==2.6.0
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment