Skip to content
Snippets Groups Projects
Commit 4b66cd6c authored by Michael Youkeim's avatar Michael Youkeim
Browse files

Added parent commit JARs & refactored clustering

parent 8ae8d01d
No related branches found
No related tags found
No related merge requests found
......@@ -60,9 +60,39 @@ def process_commit_hashes(commit_hashes, repo_path, output_dir):
json.dump(java_versions, file, indent=2)
def find_orphan_parents(json_file):
try:
with open(json_file, 'r') as file:
data = json.load(file)
hashes = set()
parents = set()
for commits in data.values():
for commit in commits:
hashes.add(commit['Hash'])
parents.update(commit['Parents'])
orphan_parents = parents - hashes
return orphan_parents
except FileNotFoundError:
print(f"Error: The file '{json_file}' does not exist.")
return set()
except json.JSONDecodeError:
print(f"Error: The file '{json_file}' contains invalid JSON.")
return set()
except Exception as e:
print(f"An unexpected error occurred: {e}")
return set()
def create_packages(repo_path, commit_info_file_path, output_dir):
# Open the file and load the data
with open(commit_info_file_path, "r") as file:
commit_hashes = [commit["commit_hash"] for commit in json.load(file)]
process_commit_hashes(commit_hashes, repo_path, output_dir)
def create_packages_for_parents(repo_path, issue_commit_relation_file, output_dir):
orphan_parents = find_orphan_parents(issue_commit_relation_file)
process_commit_hashes(orphan_parents, repo_path, output_dir)
......@@ -61,7 +61,7 @@ class RepoClusterer:
print(
f"ACDC run completed for {rsf_file_path}. Output generated: {log_path}")
def run_clusterer(self):
def run_clusterer(self, algorithm):
jar_path = os.path.join(self.lib_dir, "arcade_core_clusterer.jar")
for subdir, dirs, files in os.walk(self.output_dir):
for dir_name in dirs:
......@@ -76,9 +76,9 @@ class RepoClusterer:
f"projname={self.project_name}",
f"projversion={dir_name}",
f"language={self.language}",
f"algo=Limbo"
f"algo={algorithm}"
]
log_path = self.run_jar_with_logging(
jar_path, args, dir_path, "clusterer_output_limbo.log")
jar_path, args, dir_path, f"clusterer_output_{algorithm.lower()}.log")
print(
f"Limbo clustering run completed for {dir_name}. Results saved to {log_path}")
f"{algorithm} clustering run completed for {dir_name}. Results saved to {log_path}")
......@@ -4,12 +4,12 @@ import argparse
import os
from utils import setup_logging
from build_manager import create_packages
from build_manager import create_packages, create_packages_for_parents
from issue_commit_linkage import link_issues_to_commits
from visualization import create_charts
from java_version_manager import switch_java_version
from dependency_analyzer import analyze_dependencies, get_rsf_file_paths
from dependency_analyzer import analyze_dependencies
from clustering import RepoClusterer
from clustering_result_analyzer import analyze_clustering_results
......@@ -48,6 +48,7 @@ def main():
switch_java_version("1.8")
link_issues_to_commits(repo_path, issues_file_path, output_dir)
create_packages(repo_path, commit_info_file_path, output_dir)
create_packages_for_parents(repo_path, os.path.join(output_dir, "issue_commit_relationships.json"), output_dir)
# Visualization
create_charts(commit_info_file_path, output_dir)
......@@ -60,7 +61,8 @@ def main():
clusterer = RepoClusterer(output_dir)
clusterer.run_pkg()
clusterer.run_acdc()
clusterer.run_clusterer()
clusterer.run_clusterer("Limbo")
clusterer.run_clusterer("WCA")
# Analyzing the clustering results
analyze_clustering_results(output_dir)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment