Skip to content
Snippets Groups Projects
Commit 8e77ffe4 authored by AjUm-HEIDI's avatar AjUm-HEIDI
Browse files

Fix issue in the experiment

parent 009d7aba
No related branches found
No related tags found
No related merge requests found
......@@ -69,7 +69,7 @@ def append_to_csv_file(results, filename, dataset_key, num_groups, write_header=
with open(filename, mode='a', newline='', encoding='utf-8') as csvfile:
fieldnames = ['Dataset', 'Number of Groups', 'Length', 'Label Name',
'Hypothesis', 'Accuracy', 'Recall', 'Precision', 'F1 Score']
'Hypothesis', 'Accuracy', 'Recall', 'Precision', 'F1 Score', 'High Level Concepts As Boolean']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
if write_header:
......@@ -85,20 +85,22 @@ def append_to_csv_file(results, filename, dataset_key, num_groups, write_header=
'Accuracy': data['evaluation'].get('Accuracy', 'N/A'),
'Recall': data['evaluation'].get('Recall', 'N/A'),
'Precision': data['evaluation'].get('Precision', 'N/A'),
'F1 Score': data['evaluation'].get('F1', 'N/A')
'F1 Score': data['evaluation'].get('F1', 'N/A'),
"High Level Concepts As Boolean": data['evaluation'].get('high_level_concepts_as_boolean', 'N/A')
})
print(f"Results appended to {filename}")
def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level_concepts):
def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level_concepts, create_high_level_concepts_as_boolean):
"""
Explains and evaluates each label in the dataset.
"""
explainer = DiscriminativeExplainer(
None, dataset, "http://example.org/",
model, dataset, "http://example.org/",
owl_graph_path=owl_graph_path,
generate_new_owl_file=generate_new_owl_file,
create_data_properties_as_boolean=True,
create_high_level_concepts_as_boolean=create_high_level_concepts_as_boolean,
high_level_concepts=high_level_concepts
)
......@@ -107,10 +109,11 @@ def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level
for label, label_name in enumerate(dataset[entity_name].yLabel):
print(f"\nExplaining {entity_name} label {label}: {label_name}")
hypotheses, model = explainer.explain(
label, 5, debug=True, max_runtime=90,
label, 5, debug=False, max_runtime=90,
num_generations=750, use_data_properties=True
)
[print(renderer.render(hypothesis.concept), hypothesis.quality) for hypothesis in hypotheses]
for hypothesis in hypotheses:
print(renderer.render(hypothesis.concept), hypothesis.quality)
best_hypothesis = hypotheses[0].concept
concept_string = renderer.render(best_hypothesis)
......@@ -132,50 +135,44 @@ def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level
'label_name': label_name,
'hypothesis': concept_string,
'evaluation': evaluation,
"length": hypotheses[0].len
"length": hypotheses[0].len,
"high_level_concepts_as_boolean": create_high_level_concepts_as_boolean
}
return all_results
def summarize_aggregated_results(aggregated_results, summary_filename):
"""
Summarizes the aggregated results, calculating best, average, max, and min scores for all metrics.
Summarizes the aggregated results, including for each label:
- Best Hypothesis
- Best F1 Score
- Least F1 Score
- Average F1 Score
- Length at Best F1
- Number of Groups used at best F1
And writes the summary to a CSV file.
"""
os.makedirs(os.path.dirname(summary_filename), exist_ok=True)
with open(summary_filename, mode="w", newline="", encoding="utf-8") as csvfile:
fieldnames = [
"Label Name", "Best Hypothesis", "Best F1 Score", "Average F1 Score",
"Max F1 Score", "Min F1 Score",
"Average Accuracy", "Max Accuracy", "Min Accuracy",
"Average Recall", "Max Recall", "Min Recall",
"Average Precision", "Max Precision", "Min Precision",
"Average Length", "Max Length", "Min Length"
"Label Name", "Best Hypothesis", "Best F1 Score", "Least F1 Score",
"Average F1 Score", "Length at Best F1", "Number of Groups"
]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for label, data in aggregated_results.items():
scores = data["all_scores"]
f1_scores = data["all_scores"]["F1"]
avg_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0
writer.writerow({
"Label Name": data["label_name"],
"Best Hypothesis": data["best_hypothesis"],
"Best F1 Score": data["best_F1"],
"Average F1 Score": sum(scores["F1"]) / len(scores["F1"]),
"Max F1 Score": max(scores["F1"]),
"Min F1 Score": min(scores["F1"]),
"Average Accuracy": sum(scores["Accuracy"]) / len(scores["Accuracy"]),
"Max Accuracy": max(scores["Accuracy"]),
"Min Accuracy": min(scores["Accuracy"]),
"Average Recall": sum(scores["Recall"]) / len(scores["Recall"]),
"Max Recall": max(scores["Recall"]),
"Min Recall": min(scores["Recall"]),
"Average Precision": sum(scores["Precision"]) / len(scores["Precision"]),
"Max Precision": max(scores["Precision"]),
"Min Precision": min(scores["Precision"]),
"Average Length": sum(scores["Length"]) / len(scores["Length"]),
"Max Length": max(scores["Length"]),
"Min Length": min(scores["Length"]),
"Least F1 Score": min(f1_scores) if f1_scores else "N/A",
"Average F1 Score": avg_f1,
"Length at Best F1": data.get("length_at_best_f1", "N/A"),
"Number of Groups": data.get("num_groups_at_best_f1", 0)
})
print(f"Summary results saved to {summary_filename}")
......@@ -183,27 +180,14 @@ def summarize_aggregated_results(aggregated_results, summary_filename):
def experiment(grouped_keyword_dir, dataset_name, entity_name, bag_of_words_size=1000, iterations=5):
"""
Handles dataset loading and evaluation for experiments.
Args:
grouped_keyword_dir (str): Directory containing grouped keyword files.
dataset_name (str): Name of the dataset.
label_name (str): Label name for the dataset.
csv_filename (str): Path to the CSV file for storing results.
bag_of_words_size (int): Size of the bag of words.
Returns:
None
"""
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"CUDA is {'available. Using GPU.' if device.type == 'cuda' else 'not available. Using CPU.'}")
# Set up the CSV file for continuous writing
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
summary_filename = f"./evaluation_results/{dataset_name}_summary_{timestamp}.csv"
aggregated_results = {}
for run in range(1, iterations + 1):
print(f"\nStarting Experiment Iteration {run}/{iterations}")
run_timestamp = f"{timestamp}_run_{run}"
......@@ -217,38 +201,44 @@ def experiment(grouped_keyword_dir, dataset_name, entity_name, bag_of_words_size
for f in os.listdir(grouped_keyword_dir)
if f.startswith('groupedKeywords_') and f.endswith('.json')
]
grouped_keyword_files.insert(0, "") # Add empty string to handle no grouped keywords case
grouped_keyword_files.insert(0, "") # Allow the possibility of no grouped keywords
write_header = True
for boolean_flag in [True, False]: # Loop for boolean configurations
for create_high_level_concepts_as_boolean in [True, False]:
for group_keyword_file in sorted(grouped_keyword_files):
num_groups = 0 if group_keyword_file == "" else int(group_keyword_file.split('_')[1].split('.')[0])
owl_graph_path = f'./owlGraphs/{dataset_name}_{run_timestamp}_{num_groups}_groups_boolean_{boolean_flag}.owl'
owl_graph_path = f'./owlGraphs/{dataset_name}_{run_timestamp}_{num_groups}_groups_{"bool" if create_high_level_concepts_as_boolean else "data" }.owl'
print("\n" + "=" * 50)
print(f"Running experiment with boolean={boolean_flag}")
print(f"Running experiment {run} with create_high_level_concepts_as_boolean={create_high_level_concepts_as_boolean} and num_groups={num_groups}")
print("=" * 50)
high_level_concept = fetch_high_level_concepts(dataset, num_groups, group_keyword_file) if num_groups != 0 else None
high_level_concepts = fetch_high_level_concepts(dataset, num_groups, group_keyword_file) if num_groups != 0 else None
results = explain_and_evaluate(
model, dataset.dataset, entity_name, owl_graph_path, high_level_concept
model, dataset.dataset, entity_name, owl_graph_path, high_level_concepts, create_high_level_concepts_as_boolean
)
append_to_csv_file(results, run_csv_filename, dataset_name, num_groups, write_header=write_header)
for label, data in results.items():
# Initialize aggregation for this label if not yet present
if label not in aggregated_results:
aggregated_results[label] = {
"label_name": data["label_name"],
"best_hypothesis": data["hypothesis"],
"best_F1": data["evaluation"]["F1"],
"length_at_best_f1": data["length"],
"num_groups_at_best_f1": num_groups,
"all_scores": {"F1": [], "Accuracy": [], "Recall": [], "Precision": [], "Length": []}
}
else:
# If the current F1 is better than the stored best, update best values
if data["evaluation"]["F1"] > aggregated_results[label]["best_F1"]:
aggregated_results[label]["best_F1"] = data["evaluation"]["F1"]
aggregated_results[label]["best_hypothesis"] = data["hypothesis"]
aggregated_results[label]["length_at_best_f1"] = data["length"]
aggregated_results[label]["num_groups_at_best_f1"] = num_groups
for metric, value in data["evaluation"].items():
aggregated_results[label]["all_scores"][metric].append(value)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment