Skip to content
Snippets Groups Projects
Commit d473aabc authored by AjUm-HEIDI's avatar AjUm-HEIDI
Browse files

Set the bag of words

parent 8a1dddc6
No related branches found
No related tags found
No related merge requests found
......@@ -11,7 +11,7 @@ from CustomDataset.Text.Base import Base
class DBLP(Base):
def __init__(self, path='rawData/dblp', bag_of_words_size=100, remove_all_false_values=False):
def __init__(self, path='rawData/dblp', bag_of_words_size=500, remove_all_false_values=False):
"""
Loads the DBLP dataset and constructs a HeteroData object for PyTorch Geometric.
......
......@@ -6,7 +6,7 @@ from Utils.Utils import group_themes
from CustomDataset.Text.Base import Base
class IMDB(Base):
def __init__(self, path='rawData/imdb/movie_metadata.csv', bag_of_words_size=100, remove_all_false_values=False):
def __init__(self, path='rawData/imdb/movie_metadata.csv', bag_of_words_size=300, remove_all_false_values=False):
"""
Initialize the IMDB dataset.
......@@ -32,7 +32,7 @@ class IMDB(Base):
)
# Filter out movies without the desired genres or zero bag-of-words features
valid_movie_indices = torch.where(movie_tensor.sum(dim=1) != 0)[0]
valid_movie_indices = torch.where(movie_tensor.sum(dim=1) >=2)[0]
filtered_index_map = {i.item(): idx for idx, i in enumerate(valid_movie_indices)}
# Apply the filtering logic
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment