Skip to content
Snippets Groups Projects
Commit 75174efd authored by AjUm-HEIDI's avatar AjUm-HEIDI
Browse files

Add number of iterations

parent 8cbf91f4
No related branches found
No related tags found
No related merge requests found
......@@ -87,7 +87,7 @@ class DiscriminativeExplainer:
return OWLObjectMinCardinality(ce.get_cardinality(), ce.get_property(), ce2)
return ce
def __init__(self, gnn, data: Union[HeteroData, Data], namespace = "http://example.org/", owl_graph_path = "./owlGraphs/example.owl", generate_new_owl_file: bool = False, create_nominals: bool = False, add_edge_counts: bool = False, create_data_properties_as_object: bool = False, full_edge_name: bool = False, ignore_nodes: bool = False, high_level_concepts: dict = None) -> None:
def __init__(self, gnn, data: Union[HeteroData, Data], namespace = "http://example.org/", owl_graph_path = "./owlGraphs/example.owl", generate_new_owl_file: bool = False, create_nominals: bool = False, add_edge_counts: bool = False, create_data_properties_as_boolean: bool = False, full_edge_name: bool = False, ignore_nodes: bool = False, high_level_concepts: dict = None, create_high_level_concepts_as_boolean: bool = False,) -> None:
"""Initializes the explainer based on the given GNN and the Dataset. After the initialization the object should
be able to produce explanations of single labels.
......@@ -99,7 +99,8 @@ class DiscriminativeExplainer:
generate_new_owl_file: Whether to generate a new OWL file
create_nominals: Create separate classes for each individual
add_edge_counts: Add edge count properties
create_data_properties_as_object: Convert data properties to object properties
create_data_properties_as_boolean: Write data properties as boolean
create_high_level_concepts_as_boolean: Write high level concepts as boolean
full_edge_name: Use full names for edges
"""
self.gnn = gnn
......@@ -108,13 +109,14 @@ class DiscriminativeExplainer:
self.owl_graph_path = owl_graph_path
self.create_nominals = create_nominals
self.ignore_nodes = ignore_nodes
self.create_high_level_concepts_as_boolean = create_high_level_concepts_as_boolean
self.is_multi_graph = not isinstance(self.data, HeteroData)
self.classNames = find_classes_with_y_labels(self.data, first_only=False) if not self.is_multi_graph else []
self.high_level_concepts = high_level_concepts
if generate_new_owl_file and os.path.isfile(self.owl_graph_path):
os.remove(self.owl_graph_path)
if not os.path.isfile(self.owl_graph_path):
self.owlGraph = convert_to_owl(data=self.data, namespace=self.namespace, owlGraphPath=self.owl_graph_path, high_level_concepts=self.high_level_concepts, create_nominals=create_nominals, add_edge_counts=add_edge_counts, create_data_properties_as_object = create_data_properties_as_object, full_edge_name=full_edge_name, ignore_nodes=self.ignore_nodes)
self.owlGraph = convert_to_owl(data=self.data, namespace=self.namespace, owlGraphPath=self.owl_graph_path, high_level_concepts=self.high_level_concepts, create_nominals=create_nominals, add_edge_counts=add_edge_counts, create_data_properties_as_boolean = create_data_properties_as_boolean, full_edge_name=full_edge_name, ignore_nodes=self.ignore_nodes, create_high_level_concepts_as_boolean=create_high_level_concepts_as_boolean)
self.owlGraph.buildGraph()
self.knowledge_base = KnowledgeBase(path=self.owl_graph_path)
......@@ -249,6 +251,6 @@ if __name__ == "__main__":
data,
"http://example.org/",
owl_graph_path = "/Users/ajay/Documents/gitlabThesis/Thesis/owlGraphs/dblp_5_protege.rdf",
create_data_properties_as_object=True,
create_data_properties_as_boolean=True,
generate_new_owl_file=False)
hypotheses, model = explainer.explain(0, 5, debug=True, max_runtime=30, num_generations=400, quality_func=F1())
File suppressed by a .gitattributes entry, the file's encoding is unsupported, or the file size exceeds the limit.
......@@ -35,21 +35,23 @@ class SingleGraphOWLConverter(BaseOWLConverter):
def __init__(self, data: HeteroData, namespace: str, owlGraphPath: str,
high_level_concepts: dict = None,
create_data_properties: bool = True,
create_data_properties_as_object: bool = False,
create_data_properties_as_boolean: bool = False,
add_edge_counts: bool = False,
create_nominals: bool = False,
full_edge_name: bool = True,
add_false_values: bool = False,
create_high_level_concepts_as_boolean: bool = True,
**kwargs):
super().__init__(namespace, owlGraphPath)
self.dataset = data
self.high_level_concepts = high_level_concepts or {}
self.create_data_properties = create_data_properties
self.create_data_properties_as_object = create_data_properties_as_object
self.create_data_properties_as_boolean = create_data_properties_as_boolean
self.add_edge_counts = add_edge_counts
self.create_nominals = create_nominals
self.full_edge_name = full_edge_name
self.add_false_values = add_false_values
self.create_high_level_concepts_as_boolean = create_high_level_concepts_as_boolean
self.kwargs = kwargs
def _build(self):
......@@ -99,7 +101,7 @@ class SingleGraphOWLConverter(BaseOWLConverter):
# Builds OWL datatype properties (attributes) for each node type in the heterodata.
def _buildDataProperties(self):
classNamespace = Namespace(self.namespace)
xsdRange = XSD.boolean if self.create_data_properties_as_object else XSD.double
xsdRange = XSD.boolean if self.create_data_properties_as_boolean else XSD.double
for node in self.dataset.node_types:
if "x" in self.dataset[node]:
n = self.dataset[node].x.size(1)
......@@ -107,7 +109,7 @@ class SingleGraphOWLConverter(BaseOWLConverter):
propertyObjectPropertyName = f'{node}_property_{i+1}'
if "xKeys" in self.dataset[node] and len(self.dataset[node].xKeys) > i:
propertyObjectPropertyName = self.dataset[node].xKeys[i]
if self.create_data_properties_as_object:
if self.create_data_properties_as_boolean:
propertyObjectPropertyName = "has_" + propertyObjectPropertyName
propertyObjectProperty = classNamespace[propertyObjectPropertyName]
self.graph.add((propertyObjectProperty, RDF.type, OWL.DatatypeProperty))
......@@ -118,12 +120,12 @@ class SingleGraphOWLConverter(BaseOWLConverter):
if node in self.high_level_concepts:
for theme in self.high_level_concepts[node].get('themes', []):
propertyObjectPropertyName = f'has_theme_{theme}'
if self.create_data_properties_as_object:
propertyObjectPropertyName = propertyObjectPropertyName
if not self.create_high_level_concepts_as_boolean:
propertyObjectPropertyName = propertyObjectPropertyName + "_count"
theme_namespace = classNamespace[propertyObjectPropertyName]
self.graph.add((theme_namespace, RDF.type, OWL.DatatypeProperty))
self.graph.add((theme_namespace, RDFS.domain, classNamespace[node]))
self.graph.add((theme_namespace, RDFS.range, XSD.double))
self.graph.add((theme_namespace, RDFS.range, XSD.boolean if self.create_high_level_concepts_as_boolean else XSD.double))
def _buildObjectProperties(self):
classNamespace = Namespace(self.namespace)
......@@ -158,10 +160,10 @@ class SingleGraphOWLConverter(BaseOWLConverter):
propertyObjectPropertyName = f'{node_type}_property_{col_idx+1}'
if "xKeys" in self.dataset[node_type] and len(self.dataset[node_type].xKeys) > col_idx:
propertyObjectPropertyName = self.dataset[node_type].xKeys[col_idx]
if self.create_data_properties_as_object:
if self.create_data_properties_as_boolean:
propertyObjectPropertyName = "has_" + propertyObjectPropertyName
propertyObjectProperty = classNamespace[propertyObjectPropertyName]
if self.create_data_properties_as_object:
if self.create_data_properties_as_boolean:
val = True if val != 0 else False
if self.add_false_values or val or val != 0:
self.graph.add((newNode, propertyObjectProperty, Literal(val)))
......@@ -174,6 +176,8 @@ class SingleGraphOWLConverter(BaseOWLConverter):
for theme_idx, theme in enumerate(themes): # Iterate over columns (n columns)
val = presence_matrix[row_idx, theme_idx].item()
if val != 0:
if self.create_high_level_concepts_as_boolean:
val = True if val != 0 else False
self.graph.add((newNode, classNamespace[f'has_theme_{theme}'], Literal(val)))
# if node_type in self.high_level_concepts:
......@@ -184,7 +188,7 @@ class SingleGraphOWLConverter(BaseOWLConverter):
# for row_idx, presence_matrix_row in enumerate(presence_matrix):
# for theme_idx, theme_value in enumerate(presence_matrix_row): # Iterate over columns (n columns)
# val = theme_value.item()
# if self.create_data_properties_as_object:
# if self.create_data_properties_as_boolean:
# val = True if val != 0 else False
# if self.add_false_values or val or val != 0:
# self.graph.add((newNode, classNamespace[f'has_theme_{themes[theme_idx]}'], Literal(val)))
......@@ -427,7 +431,7 @@ def convert_to_owl(data: Union[HeteroData, Data], namespace: str, owlGraphPath:
if isinstance(data, HeteroData):
return SingleGraphOWLConverter(data, namespace, owlGraphPath, high_level_concepts, **kwargs)
else:
return MultiGraphOWLConverter(data, namespace, owlGraphPath, high_level_concepts=high_level_concepts, **kwargs)
return MultiGraphOWLConverter(data, namespace, owlGraphPath, high_level_concepts, **kwargs)
if __name__ == "__main__":
# Example usage with MUTAG dataset
......
from datetime import datetime
import json
import sys
import os
import csv
from sklearn.metrics import confusion_matrix
......@@ -14,7 +13,7 @@ from ontolearn.owlapy.render import DLSyntaxObjectRenderer
from GNN.HeterogenousGNN import GNN
renderer = DLSyntaxObjectRenderer()
generate_new_owl_file = True
def run_gnn(structuredDataset: Base, entity_name, datasetName, timeStamp):
evaluations = {
......@@ -68,17 +67,14 @@ def append_to_csv_file(results, filename, dataset_key, num_groups, write_header=
"""
os.makedirs(os.path.dirname(filename), exist_ok=True)
# Open the CSV file in append mode
with open(filename, mode='a', newline='', encoding='utf-8') as csvfile:
fieldnames = ['Dataset', 'Number of Groups', 'Length', 'Label Name',
'Hypothesis', 'Accuracy', 'Recall', 'Precision', 'F1 Score']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# Write the header only once
if write_header:
writer.writeheader()
# Write the new results
for label, data in results.items():
writer.writerow({
'Dataset': dataset_key,
......@@ -97,13 +93,12 @@ def append_to_csv_file(results, filename, dataset_key, num_groups, write_header=
def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level_concepts):
"""
Explains and evaluates each label in the dataset.
Continuously writes results to a CSV file.
"""
explainer = DiscriminativeExplainer(
None, dataset, "http://example.org/",
owl_graph_path=owl_graph_path,
generate_new_owl_file=True,
create_data_properties_as_object=True,
generate_new_owl_file=generate_new_owl_file,
create_data_properties_as_boolean=True,
high_level_concepts=high_level_concepts
)
......@@ -133,7 +128,6 @@ def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level
print("Evaluation results:")
print(evaluation)
# Store results for the current label
all_results[label] = {
'label_name': label_name,
'hypothesis': concept_string,
......@@ -143,7 +137,50 @@ def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level
return all_results
def experiment(grouped_keyword_dir, dataset_name, entity_name, bag_of_words_size=1000):
def summarize_aggregated_results(aggregated_results, summary_filename):
"""
Summarizes the aggregated results, calculating best, average, max, and min scores for all metrics.
"""
os.makedirs(os.path.dirname(summary_filename), exist_ok=True)
with open(summary_filename, mode="w", newline="", encoding="utf-8") as csvfile:
fieldnames = [
"Label Name", "Best Hypothesis", "Best F1 Score", "Average F1 Score",
"Max F1 Score", "Min F1 Score",
"Average Accuracy", "Max Accuracy", "Min Accuracy",
"Average Recall", "Max Recall", "Min Recall",
"Average Precision", "Max Precision", "Min Precision",
"Average Length", "Max Length", "Min Length"
]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for label, data in aggregated_results.items():
scores = data["all_scores"]
writer.writerow({
"Label Name": data["label_name"],
"Best Hypothesis": data["best_hypothesis"],
"Best F1 Score": data["best_F1"],
"Average F1 Score": sum(scores["F1"]) / len(scores["F1"]),
"Max F1 Score": max(scores["F1"]),
"Min F1 Score": min(scores["F1"]),
"Average Accuracy": sum(scores["Accuracy"]) / len(scores["Accuracy"]),
"Max Accuracy": max(scores["Accuracy"]),
"Min Accuracy": min(scores["Accuracy"]),
"Average Recall": sum(scores["Recall"]) / len(scores["Recall"]),
"Max Recall": max(scores["Recall"]),
"Min Recall": min(scores["Recall"]),
"Average Precision": sum(scores["Precision"]) / len(scores["Precision"]),
"Max Precision": max(scores["Precision"]),
"Min Precision": min(scores["Precision"]),
"Average Length": sum(scores["Length"]) / len(scores["Length"]),
"Max Length": max(scores["Length"]),
"Min Length": min(scores["Length"]),
})
print(f"Summary results saved to {summary_filename}")
def experiment(grouped_keyword_dir, dataset_name, entity_name, bag_of_words_size=1000, iterations=5):
"""
Handles dataset loading and evaluation for experiments.
......@@ -163,54 +200,65 @@ def experiment(grouped_keyword_dir, dataset_name, entity_name, bag_of_words_size
# Set up the CSV file for continuous writing
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"./evaluation_results/{dataset_name}_evaluation_{timestamp}.csv"
summary_filename = f"./evaluation_results/{dataset_name}_summary_{timestamp}.csv"
aggregated_results = {}
# Load dataset
dataset = load_datasets(dataset_name=dataset_name, bag_of_words_size=bag_of_words_size)
print(dataset.dataset)
for run in range(1, iterations + 1):
print(f"\nStarting Experiment Iteration {run}/{iterations}")
run_timestamp = f"{timestamp}_run_{run}"
run_csv_filename = f"./evaluation_results/{dataset_name}_evaluation_{run_timestamp}.csv"
# Run the GNN
model = run_gnn(dataset, entity_name, dataset_name, timestamp)
dataset = load_datasets(dataset_name=dataset_name, bag_of_words_size=bag_of_words_size)
model = run_gnn(dataset, entity_name, dataset_name, run_timestamp)
# Load grouped keyword files or use an empty string for no groups
grouped_keyword_files = [
os.path.join(grouped_keyword_dir, f)
for f in os.listdir(grouped_keyword_dir)
if f.startswith('groupedKeywords_') and f.endswith('.json')
]
grouped_keyword_files.insert(0, "") # Add empty string to handle no grouped keywords case
write_header = True
for boolean_flag in [True, False]: # Loop for boolean configurations
for group_keyword_file in sorted(grouped_keyword_files):
num_groups = 0 if group_keyword_file == "" else int(group_keyword_file.split('_')[1].split('.')[0])
owl_graph_path = f'./owlGraphs/{dataset_name}_{timestamp}_{num_groups}_groups.owl'
owl_graph_path = f'./owlGraphs/{dataset_name}_{run_timestamp}_{num_groups}_groups_boolean_{boolean_flag}.owl'
print("\n" + "=" * 50)
if group_keyword_file == "":
print(f"Running experiment without groupedKeywords on {dataset_name.upper()} dataset")
else:
print(f"Running experiment with groupedKeywords from {group_keyword_file} on {dataset_name.upper()} dataset")
print(f"Running experiment with boolean={boolean_flag}")
print("=" * 50)
high_level_concept = None
high_level_concept = fetch_high_level_concepts(dataset, num_groups, group_keyword_file) if num_groups != 0 else None
if num_groups != 0:
high_level_concept = fetch_high_level_concepts(dataset, num_groups, group_keyword_file)
results = explain_and_evaluate(
model, dataset.dataset, entity_name, owl_graph_path, high_level_concept
)
# Evaluate and log results
results = explain_and_evaluate(model, dataset.dataset, entity_name, owl_graph_path, high_level_concept)
append_to_csv_file(results, run_csv_filename, dataset_name, num_groups, write_header=write_header)
# Continuously write results to CSV file after evaluating each label
append_to_csv_file(results, csv_filename, dataset_name, num_groups, write_header=write_header)
for label, data in results.items():
if label not in aggregated_results:
aggregated_results[label] = {
"label_name": data["label_name"],
"best_hypothesis": data["hypothesis"],
"best_F1": data["evaluation"]["F1"],
"all_scores": {"F1": [], "Accuracy": [], "Recall": [], "Precision": [], "Length": []}
}
else:
if data["evaluation"]["F1"] > aggregated_results[label]["best_F1"]:
aggregated_results[label]["best_F1"] = data["evaluation"]["F1"]
aggregated_results[label]["best_hypothesis"] = data["hypothesis"]
write_header = False
for metric, value in data["evaluation"].items():
aggregated_results[label]["all_scores"][metric].append(value)
aggregated_results[label]["all_scores"]["Length"].append(data["length"])
print("\n" + "=" * 50)
print(f"Experiments completed and results written to CSV file: {csv_filename}")
print("=" * 50)
write_header = False
summarize_aggregated_results(aggregated_results, summary_filename)
print(f"\nExperiments completed. Summary saved to {summary_filename}")
def main():
datasets = [
......@@ -228,7 +276,12 @@ def main():
]
for dataset in datasets:
experiment(dataset["grouped_keyword_dir"], dataset["dataset_name"], dataset["entity_name"])
experiment(
dataset["grouped_keyword_dir"],
dataset["dataset_name"],
dataset["entity_name"],
iterations=5 # Repeat experiment 5 times
)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment