Commit 1e831d89 authored by Nandeesh Patel Gowdru Prabushanker's avatar Nandeesh Patel Gowdru Prabushanker
Browse files

Changing the folder structure for python module

parent 8ce36d44
*.props
data/*
saved_model/*
\ No newline at end of file
import pandas as pd
import numpy as np
import tensorflow.keras.models
from tensorflow.keras.layers import Input, ReLU, Dense, LSTM, Bidirectional
from tensorflow.keras.models import Model
from tensorflow.keras.losses import CosineSimilarity
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import f1_score
from utils import prepare_training_data
from os.path import exists
def prepare_data():
embeddings_df = pd.read_csv("./data/embeddings.tsv", sep="\t")
embeddings_dict = {}
for index, entry in embeddings_df.iterrows():
embeddings_dict[entry.get('entity')] = entry.get('embeddings')
return embeddings_df, embeddings_dict
def train_model(train_x, train_y):
......@@ -56,18 +66,21 @@ def evaluate(x_test, y_test):
if __name__ == "__main__":
data = pd.read_csv("./../data/embeddings.tsv", sep="\t")
embeddings = {}
for index, entry in data.iterrows():
embeddings[entry.get('entity')] = entry.get('embeddings')
X_train, Y_train = prepare_training_data(embeddings, "./../data/train_data.csv")
X_train = np.array(X_train)
Y_train = np.array(Y_train)
entity_linker = train_model(X_train, Y_train)
X_train = None
Y_train = None
X_test, Y_test = prepare_training_data(embeddings, "./../data/test_data.csv")
X_test = np.array(X_test)
Y_test = np.array(Y_test)
evaluate(X_test, Y_test)
print("Done")
if exists("./saved_model/linker"):
entity_linker = tensorflow.keras.models.load_model("./saved_model/linker")
print("trained model already exists")
else:
data, embeddings = prepare_data()
x_train, y_train = prepare_training_data(embeddings, "./data/test_data.csv")
x_train = np.array(x_train)
y_train = np.array(y_train)
entity_linker = train_model(x_train, y_train)
x_train = None
y_train = None
entity_linker.save("./saved_model/linker")
X_test, Y_test = prepare_training_data(embeddings, "./data/test_data.csv")
embeddings = None
X_test = np.array(X_test)
Y_test = np.array(Y_test)
evaluate(X_test, Y_test)
print("Done")
......@@ -4,4 +4,9 @@ Flask-Cors==3.0.10
jproperties==2.1.1
requests==2.26.0
uWSGI==2.0.19.1
dask==2021.8.0
\ No newline at end of file
dask==2021.8.0
numpy==1.19.5
pandas==1.3.1
scikit-learn==0.24.2
scipy==1.7.1
tensorflow==2.6.0
import numpy as np
def process_line(text, entity_embeddings):
line = text.split("\t")
gold = line[2]
try:
gold = np.array(entity_embeddings[gold].split(","), dtype="float")
except KeyError:
gold = np.zeros(100)
candidates = []
for i in range(3, len(line)):
if i % 2 != 0:
candidate = line[i]
try:
candidate = np.array(entity_embeddings[candidate].split(","), dtype="float")
except KeyError:
candidate = np.full(100, -1)
candidates.append(candidate)
if len(candidates) < 30:
while len(candidates) < 30:
candidates.append(np.full(100, -1))
else:
candidates = candidates[:30]
return candidates, gold
def prepare_training_data(entity_embeddings, path):
with open(path, "r") as train_file:
line = train_file.readline().strip()
x_train = []
y_train = []
while line != "":
line = line.split("\t")
gold = line[2]
try:
gold = np.array(entity_embeddings[gold].split(","), dtype="float")
except KeyError:
gold = np.zeros(100)
candidates, gold = process_line(line, entity_embeddings)
y_train.append(gold)
candidates = []
for i in range(3, len(line)):
if i % 2 != 0:
candidate = line[i]
try:
candidate = np.array(entity_embeddings[candidate].split(","), dtype="float")
except KeyError:
candidate = np.full(100, -1)
candidates.append(candidate)
if len(candidates) < 30:
while len(candidates) < 30:
candidates.append(np.full(100, -1))
else:
candidates = candidates[:30]
x_train.append(candidates)
line = train_file.readline().strip()
return x_train, y_train
*.props
\ No newline at end of file
numpy==1.19.5
pandas==1.3.1
scikit-learn==0.24.2
scipy==1.7.1
tensorflow==2.6.0
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment