Fix issue in the experiment

8e77ffe4 · AjUm-HEIDI · 009d7aba · 8e77ffe4
Commit 8e77ffe4 authored 6 months ago by AjUm-HEIDI
--- a/text_based_datasets_experiment.py
+++ b/text_based_datasets_experiment.py
@@ -69,7 +69,7 @@ def append_to_csv_file(results, filename, dataset_key, num_groups, write_header=

    with open(filename, mode='a', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Dataset', 'Number of Groups', 'Length', 'Label Name', 
-                      'Hypothesis', 'Accuracy', 'Recall', 'Precision', 'F1 Score']
+                      'Hypothesis', 'Accuracy', 'Recall', 'Precision', 'F1 Score', 'High Level Concepts As Boolean']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        if write_header:
@@ -85,20 +85,22 @@ def append_to_csv_file(results, filename, dataset_key, num_groups, write_header=
                'Accuracy': data['evaluation'].get('Accuracy', 'N/A'),
                'Recall': data['evaluation'].get('Recall', 'N/A'),
                'Precision': data['evaluation'].get('Precision', 'N/A'),
-                'F1 Score': data['evaluation'].get('F1', 'N/A')
+                'F1 Score': data['evaluation'].get('F1', 'N/A'),
+                "High Level Concepts As Boolean": data['evaluation'].get('high_level_concepts_as_boolean', 'N/A')
            })

    print(f"Results appended to {filename}")

-def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level_concepts):
+def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level_concepts, create_high_level_concepts_as_boolean):
    """
    Explains and evaluates each label in the dataset.
    """
    explainer = DiscriminativeExplainer(
-        None, dataset, "http://example.org/",
+        model, dataset, "http://example.org/",
        owl_graph_path=owl_graph_path,
        generate_new_owl_file=generate_new_owl_file,
        create_data_properties_as_boolean=True,
+        create_high_level_concepts_as_boolean=create_high_level_concepts_as_boolean,
        high_level_concepts=high_level_concepts
    )
    
@@ -107,10 +109,11 @@ def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level
    for label, label_name in enumerate(dataset[entity_name].yLabel):
        print(f"\nExplaining {entity_name} label {label}: {label_name}")
        hypotheses, model = explainer.explain(
-            label, 5, debug=True, max_runtime=90,
+            label, 5, debug=False, max_runtime=90,
            num_generations=750, use_data_properties=True
        )
-        [print(renderer.render(hypothesis.concept), hypothesis.quality) for hypothesis in hypotheses]
+        for hypothesis in hypotheses:
+            print(renderer.render(hypothesis.concept), hypothesis.quality)
        
        best_hypothesis = hypotheses[0].concept
        concept_string = renderer.render(best_hypothesis)
@@ -132,50 +135,44 @@ def explain_and_evaluate(model, dataset, entity_name, owl_graph_path, high_level
            'label_name': label_name,
            'hypothesis': concept_string,
            'evaluation': evaluation,
-            "length": hypotheses[0].len
+            "length": hypotheses[0].len,
+            "high_level_concepts_as_boolean": create_high_level_concepts_as_boolean
        }
        
    return all_results

 def summarize_aggregated_results(aggregated_results, summary_filename):
    """
-    Summarizes the aggregated results, calculating best, average, max, and min scores for all metrics.
+    Summarizes the aggregated results, including for each label:
+    - Best Hypothesis
+    - Best F1 Score
+    - Least F1 Score
+    - Average F1 Score
+    - Length at Best F1
+    - Number of Groups used at best F1
+    And writes the summary to a CSV file.
    """
    os.makedirs(os.path.dirname(summary_filename), exist_ok=True)

    with open(summary_filename, mode="w", newline="", encoding="utf-8") as csvfile:
        fieldnames = [
-            "Label Name", "Best Hypothesis", "Best F1 Score", "Average F1 Score",
-            "Max F1 Score", "Min F1 Score",
-            "Average Accuracy", "Max Accuracy", "Min Accuracy",
-            "Average Recall", "Max Recall", "Min Recall",
-            "Average Precision", "Max Precision", "Min Precision",
-            "Average Length", "Max Length", "Min Length"
+            "Label Name", "Best Hypothesis", "Best F1 Score", "Least F1 Score",
+            "Average F1 Score", "Length at Best F1", "Number of Groups"
        ]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for label, data in aggregated_results.items():
-            scores = data["all_scores"]
+            f1_scores = data["all_scores"]["F1"]
+            avg_f1 = sum(f1_scores) / len(f1_scores) if f1_scores else 0
            writer.writerow({
                "Label Name": data["label_name"],
                "Best Hypothesis": data["best_hypothesis"],
                "Best F1 Score": data["best_F1"],
-                "Average F1 Score": sum(scores["F1"]) / len(scores["F1"]),
-                "Max F1 Score": max(scores["F1"]),
-                "Min F1 Score": min(scores["F1"]),
-                "Average Accuracy": sum(scores["Accuracy"]) / len(scores["Accuracy"]),
-                "Max Accuracy": max(scores["Accuracy"]),
-                "Min Accuracy": min(scores["Accuracy"]),
-                "Average Recall": sum(scores["Recall"]) / len(scores["Recall"]),
-                "Max Recall": max(scores["Recall"]),
-                "Min Recall": min(scores["Recall"]),
-                "Average Precision": sum(scores["Precision"]) / len(scores["Precision"]),
-                "Max Precision": max(scores["Precision"]),
-                "Min Precision": min(scores["Precision"]),
-                "Average Length": sum(scores["Length"]) / len(scores["Length"]),
-                "Max Length": max(scores["Length"]),
-                "Min Length": min(scores["Length"]),
+                "Least F1 Score": min(f1_scores) if f1_scores else "N/A",
+                "Average F1 Score": avg_f1,
+                "Length at Best F1": data.get("length_at_best_f1", "N/A"),
+                "Number of Groups": data.get("num_groups_at_best_f1", 0)
            })

    print(f"Summary results saved to {summary_filename}")
@@ -183,27 +180,14 @@ def summarize_aggregated_results(aggregated_results, summary_filename):
 def experiment(grouped_keyword_dir, dataset_name, entity_name, bag_of_words_size=1000, iterations=5):
    """
    Handles dataset loading and evaluation for experiments.
-
-    Args:
-        grouped_keyword_dir (str): Directory containing grouped keyword files.
-        dataset_name (str): Name of the dataset.
-        label_name (str): Label name for the dataset.
-        csv_filename (str): Path to the CSV file for storing results.
-        bag_of_words_size (int): Size of the bag of words.
-
-    Returns:
-        None
    """
-    # Check if CUDA is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"CUDA is {'available. Using GPU.' if device.type == 'cuda' else 'not available. Using CPU.'}")

-    # Set up the CSV file for continuous writing
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    summary_filename = f"./evaluation_results/{dataset_name}_summary_{timestamp}.csv"
    aggregated_results = {}

-
    for run in range(1, iterations + 1):
        print(f"\nStarting Experiment Iteration {run}/{iterations}")
        run_timestamp = f"{timestamp}_run_{run}"
@@ -217,38 +201,44 @@ def experiment(grouped_keyword_dir, dataset_name, entity_name, bag_of_words_size
            for f in os.listdir(grouped_keyword_dir)
            if f.startswith('groupedKeywords_') and f.endswith('.json')
        ]
-        grouped_keyword_files.insert(0, "")  # Add empty string to handle no grouped keywords case
+        grouped_keyword_files.insert(0, "")  # Allow the possibility of no grouped keywords

        write_header = True
-        for boolean_flag in [True, False]:  # Loop for boolean configurations
+        for create_high_level_concepts_as_boolean in [True, False]:
            for group_keyword_file in sorted(grouped_keyword_files):
                num_groups = 0 if group_keyword_file == "" else int(group_keyword_file.split('_')[1].split('.')[0])
-                owl_graph_path = f'./owlGraphs/{dataset_name}_{run_timestamp}_{num_groups}_groups_boolean_{boolean_flag}.owl'
+                owl_graph_path = f'./owlGraphs/{dataset_name}_{run_timestamp}_{num_groups}_groups_{"bool" if create_high_level_concepts_as_boolean else "data" }.owl'

                print("\n" + "=" * 50)
-                print(f"Running experiment with boolean={boolean_flag}")
+                print(f"Running experiment {run} with create_high_level_concepts_as_boolean={create_high_level_concepts_as_boolean} and num_groups={num_groups}")
                print("=" * 50)

-                high_level_concept = fetch_high_level_concepts(dataset, num_groups, group_keyword_file) if num_groups != 0 else None
+                high_level_concepts = fetch_high_level_concepts(dataset, num_groups, group_keyword_file) if num_groups != 0 else None

                results = explain_and_evaluate(
-                    model, dataset.dataset, entity_name, owl_graph_path, high_level_concept
+                    model, dataset.dataset, entity_name, owl_graph_path, high_level_concepts, create_high_level_concepts_as_boolean
                )

                append_to_csv_file(results, run_csv_filename, dataset_name, num_groups, write_header=write_header)

                for label, data in results.items():
+                    # Initialize aggregation for this label if not yet present
                    if label not in aggregated_results:
                        aggregated_results[label] = {
                            "label_name": data["label_name"],
                            "best_hypothesis": data["hypothesis"],
                            "best_F1": data["evaluation"]["F1"],
+                            "length_at_best_f1": data["length"],
+                            "num_groups_at_best_f1": num_groups,
                            "all_scores": {"F1": [], "Accuracy": [], "Recall": [], "Precision": [], "Length": []}
                        }
                    else:
+                        # If the current F1 is better than the stored best, update best values
                        if data["evaluation"]["F1"] > aggregated_results[label]["best_F1"]:
                            aggregated_results[label]["best_F1"] = data["evaluation"]["F1"]
                            aggregated_results[label]["best_hypothesis"] = data["hypothesis"]
+                            aggregated_results[label]["length_at_best_f1"] = data["length"]
+                            aggregated_results[label]["num_groups_at_best_f1"] = num_groups

                    for metric, value in data["evaluation"].items():
                        aggregated_results[label]["all_scores"][metric].append(value)