Skip to content
Snippets Groups Projects
Commit 27296f36 authored by AjUm-HEIDI's avatar AjUm-HEIDI
Browse files

Add minimun features

parent d473aabc
No related branches found
No related tags found
No related merge requests found
......@@ -6,7 +6,7 @@ from Utils.Utils import group_themes
from CustomDataset.Text.Base import Base
class IMDB(Base):
def __init__(self, path='rawData/imdb/movie_metadata.csv', bag_of_words_size=300, remove_all_false_values=False):
def __init__(self, path='rawData/imdb/movie_metadata.csv', bag_of_words_size=300, min_features=1):
"""
Initialize the IMDB dataset.
......@@ -14,7 +14,8 @@ class IMDB(Base):
path (str): Path to the IMDB directory.
"""
super().__init__(path=path, bag_of_words_size=bag_of_words_size, remove_all_false_values=remove_all_false_values)
super().__init__(path=path, bag_of_words_size=bag_of_words_size)
self.min_features=min_features
self._initialize()
def _initialize(self):
......@@ -32,7 +33,7 @@ class IMDB(Base):
)
# Filter out movies without the desired genres or zero bag-of-words features
valid_movie_indices = torch.where(movie_tensor.sum(dim=1) >=2)[0]
valid_movie_indices = torch.where(movie_tensor.sum(dim=1) >= self.min_features)[0]
filtered_index_map = {i.item(): idx for idx, i in enumerate(valid_movie_indices)}
# Apply the filtering logic
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment