Update the summary file

b3333a2b · AjUm-HEIDI · af9df659 · b3333a2b
Commit b3333a2b authored 6 months ago by AjUm-HEIDI
--- a/structured_datasets_experiment.py
+++ b/structured_datasets_experiment.py
@@ -25,7 +25,7 @@ def explain_gnn(model, dataset, datasetName, run_dir, add_node_type, high_level_
        generate_new_owl_file=True,
        ignore_nodes=False,
        high_level_concepts=high_level_concepts,
-        create_high_level_concepts_as_boolean=True,
+        create_high_level_concepts_as_boolean=False,
        add_node_type=add_node_type
    )

@@ -134,9 +134,9 @@ def experiment(datasetName: str, add_node_type = True, iterations: int = 1):
            for row in cm:
                writer.writerow(row)

-        ## Explain GNN before finding motifs
-        # print("\nBefore finding motifs:")
-        # explain_gnn(model, structuredDataset.dataset, datasetName, run_dir, add_node_type)
+        # Explain GNN before finding motifs
+        print("\nBefore finding motifs:")
+        explain_gnn(model, structuredDataset.dataset, datasetName, run_dir, add_node_type)

        print("\nDetecting motifs...")
        patterns, presence_matrix = structuredDataset.detect_motifs(visualizationPath=run_dir)
@@ -145,8 +145,7 @@ def experiment(datasetName: str, add_node_type = True, iterations: int = 1):
        print("\nDetected motifs...")
        structuredDataset.visualize_patterns()

-        graph_indices_to_visualize = [1, 2, 5, 7, 9,    # Label 0
-                    10, 11, 12, 14, 15]   # Label 1    
+        graph_indices_to_visualize = [1, 2, 5, 7, 9, 10, 11, 12, 14, 15]
        print(f"Visualizing graphs: {graph_indices_to_visualize}")
        graph_visualization_dir = structuredDataset.visualize_graphs(graph_indices_to_visualize)
        if graph_visualization_dir:
@@ -178,23 +177,61 @@ def experiment(datasetName: str, add_node_type = True, iterations: int = 1):
                        "Label": label,
                        "MotifsAdded": motif_added,
                        "BestHypothesis": row["BestHypothesis"],
-                        "Quality": float(row["Quality"]),
                        "Length": int(row["Length"]),
+                        "F1": float(row["F1"]),
+                        "Accuracy": float(row["Accuracy"]),
+                        "Recall": float(row["Recall"]),
+                        "Precision": float(row["Precision"])
                    }
                    # Update if current result is better
                    key = (label, motif_added)
-                    if key not in best_results or current_result["Quality"] > best_results[key]["Quality"]:
+                    if key not in best_results or current_result["F1"] > best_results[key]["F1"]:
                        best_results[key] = current_result

        print(f"Results for iteration {iteration + 1} saved in {run_dir}")

    # Write the best results for each label and motif combination to the parent directory
    with open(results / f"best_results_by_label_and_motif.csv", "w", newline="") as f:
-        writer = csv.DictWriter(f, fieldnames=["Label", "MotifsAdded", "Iteration", "BestHypothesis", "Quality", "Length"])
+        fieldnames = [
+            "Label", "MotifsAdded", "Iteration", 
+            "Best F1 Score", "Length at Best F1", "Average F1 Score", 
+            "Best Accuracy", "Average Accuracy", "Average Length", 
+            "Best Hypothesis"
+        ]
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
+
+        # Compute aggregated metrics for each label and motif combination
        for (label, motif_added), result in best_results.items():
            if result is not None:
-                writer.writerow(result)
+                # Identify all results for this label/motif combination
+                all_results = [
+                    r for (lbl, mot), r in best_results.items()
+                    if lbl == label and mot == motif_added
+                ]
+
+                # Compute averages
+                avg_f1 = sum(r["F1"] for r in all_results) / len(all_results)
+                avg_accuracy = sum(r["Accuracy"] for r in all_results) / len(all_results)
+                avg_length = sum(r["Length"] for r in all_results) / len(all_results)
+
+                # Determine the result with the highest F1 score
+                best_result = max(all_results, key=lambda r: r["F1"])
+
+                # Write the best result to the CSV
+                writer.writerow({
+                    "Label": label,
+                    "MotifsAdded": motif_added,
+                    "Iteration": best_result["Iteration"],
+                    "Best F1 Score": best_result["F1"],
+                    "Length at Best F1": best_result["Length"],
+                    "Average F1 Score": avg_f1,
+                    "Best Accuracy": best_result["Accuracy"],
+                    "Average Accuracy": avg_accuracy,
+                    "Average Length": avg_length,
+                    "Best Hypothesis": best_result["BestHypothesis"],
+                })
+

    print(f"All iterations for {datasetName} completed. Best results saved in {results / 'best_results_by_label_and_motif.csv'}")