Skip to content
Snippets Groups Projects
Commit 0ee389fd authored by Michael Youkeim's avatar Michael Youkeim
Browse files

Final version week 2

parent 9c72724c
No related branches found
No related tags found
No related merge requests found
Showing
with 24221 additions and 5627 deletions
#!/bin/bash
# Path to the .rsf file
DEPS_FILE="/Users/mukulsachdeva/DSSE-Project/out.rsf"
# Define the output directory for ACDC results
OUTPUT_DIR="/Users/mukulsachdeva/DSSE-Project/acdc_output"
# Path to the ACDC jar file
ACDC_JAR="/Users/mukulsachdeva/DSSE-Project/arcade_core-ACDC.jar"
# Ensure the output directory exists
mkdir -p $OUTPUT_DIR
# Extract filename without extension for output naming
filename=$(basename -- "$DEPS_FILE")
filename="${filename%.*}"
# Define output file path
output_file="$OUTPUT_DIR/${filename}_acdc_output.rsf"
echo "Processing file: $DEPS_FILE"
# Run the ACDC jar on the dependencies file
java -jar $ACDC_JAR $DEPS_FILE $output_file
echo "Output generated: $output_file"
echo "All files processed."
#!/bin/bash
# path to the Pkg jar file
PKG_JAR_PATH="/Users/nehapokharel/Documents/DSSE/arcade_core-Pkg.jar"
# path to dependencies file
DEPS_PATH="/Users/nehapokharel/Documents/DSSE/out.rsf"
# output directory for the result
PROJECT_PATH="/Users/nehapokharel/Documents/DSSE/pkg"
# project name
PROJECT_NAME="Hadoop common"
# commit ID
PROJECT_VERSION="bd1a08b2cfba7dcab89791ddba97e15bb2d2c0de"
# Specify the programming language
LANGUAGE="java"
# Specify file level clustering (true for file level, false for package level)
FILE_LEVEL="true"
# Run the Pkg command
java -jar "$PKG_JAR_PATH" \
depspath="$DEPS_PATH" \
projectpath="$PROJECT_PATH" \
projectname="$PROJECT_NAME" \
projectversion="$PROJECT_VERSION" \
language="$LANGUAGE" \
filelevel="$FILE_LEVEL" > "$PROJECT_PATH/pkg_output.log" 2>&1
# Print statement
echo "Clustering completed for commit $PROJECT_VERSION" | tee -a "$PROJECT_PATH/pkg_output.log"
\ No newline at end of file
#!/bin/bash
# Define the path to the Clusterer jar file
CLUSTERER_JAR_PATH="D:/MScComputerScience/Semester5/DSSE/Assignments/FirstAssignment/Week2/arcade_core_clusterer.jar"
# Define the path to your dependencies file
DEPS_PATH="D:/MScComputerScience/Semester5/DSSE/Assignments/FirstAssignment/Week2/out.rsf"
# Define the output directory for the results
PROJECT_PATH="D:/MScComputerScience/Semester5/DSSE/Assignments/FirstAssignment/Week2/output"
# Define the project name
PROJECT_NAME="Hadoop common"
# Define the commit ID
PROJECT_VERSION="bd1a08b2cfba7dcab89791ddba97e15bb2d2c0de"
# Specify the language (C or Java)
LANGUAGE="java"
# algo="WCA"
# measure="UEM"
# Run the clusterer command and redirect output to a log file
java -Xmx14024m -jar "$CLUSTERER_JAR_PATH" \
deps="$DEPS_PATH" \
projpath="$PROJECT_PATH" \
measure="UEM" \
projname="$PROJECT_NAME" \
projversion="$PROJECT_VERSION" \
language="$LANGUAGE" \
algo="WCA" > "$PROJECT_PATH/clusterer_output1.log" 2>&1
java -Xmx14024m -jar "$CLUSTERER_JAR_PATH" \
deps="$DEPS_PATH" \
projpath="$PROJECT_PATH" \
measure="UEMNM" \
projname="$PROJECT_NAME" \
projversion="$PROJECT_VERSION" \
language="$LANGUAGE" \
algo="WCA" > "$PROJECT_PATH/clusterer_output2.log" 2>&1
echo "Clustering completed for commit $PROJECT_VERSION" | tee -a "$PROJECT_PATH/clusterer_output.log"
\ No newline at end of file
from jar_runner import run_jar
import os
def run_pkg(output_dir):
base_dir = os.path.dirname(os.path.abspath(__file__))
lib_dir = os.path.join(base_dir, "../../lib")
pkg_jar_path = os.path.join(lib_dir, "arcade_core-Pkg.jar")
project_name = "Hadoop common"
language = "java"
file_level = "true"
for subdir, dirs, files in os.walk(output_dir):
for dir_name in dirs:
# The name of the directory is used as the project_version (commit ID)
project_version = dir_name
full_dir_path = os.path.join(subdir, dir_name)
# Find the RSF file
rsf_file = next(
(f for f in os.listdir(full_dir_path) if f.endswith("dependencies.rsf")), None
)
if rsf_file:
full_rsf_path = os.path.join(full_dir_path, rsf_file)
output_log = os.path.join(full_dir_path, "pkg_output.log")
# Construct arguments required by PKG
args = [
f'depspath={full_rsf_path}',
f'projectpath={full_dir_path}',
f'projectname={project_name}',
f'projectversion={project_version}',
f'language={language}',
f'filelevel={file_level}',
]
print(
f"Running PKG on {full_rsf_path} with commit ID {project_version}..."
)
# Run PKG
stdout, stderr = run_jar(pkg_jar_path, args=args, cwd=full_dir_path)
# Handle logging to file
print(f"PKG Clustering completed for commit {project_version}\n")
if stdout:
print(stdout + "\n")
if stderr:
print(f"Error processing {rsf_file}: {stderr}\n")
print(f"Clustering results saved to {output_log}")
def run_acdc(output_dir):
base_dir = os.path.dirname(os.path.abspath(__file__))
lib_dir = os.path.join(base_dir, "../../lib")
acdc_jar_path = os.path.join(lib_dir, "arcade_core-ACDC.jar")
for subdir, dirs, files in os.walk(output_dir):
for dir_name in dirs:
full_dir_path = os.path.join(subdir, dir_name)
rsf_files = [f for f in os.listdir(full_dir_path) if f.endswith("dependencies.rsf")]
for rsf_file in rsf_files:
full_rsf_path = os.path.join(full_dir_path, rsf_file)
# Extract filename without extension for output naming
filename_without_ext = os.path.splitext(rsf_file)[0]
output_file_path = os.path.join(full_dir_path, f"{filename_without_ext}_acdc_ .rsf")
# Construct arguments for the ACDC JAR
args = [full_rsf_path, output_file_path]
print(f"Running ACDC on {full_rsf_path}...")
# Run ACDC
stdout, stderr = run_jar(acdc_jar_path, args=args)
if stdout:
print(stdout)
if stderr:
print(f"Error processing {rsf_file}: {stderr}")
print(f"Output generated: {output_file_path}")
def run_clusterer(output_dir):
base_dir = os.path.dirname(os.path.abspath(__file__))
lib_dir = os.path.join(base_dir, "../../lib")
clusterer_jar_path = os.path.join(lib_dir, "arcade_core_clusterer.jar")
project_name = "Hadoop common"
language = "java"
for subdir, dirs, files in os.walk(output_dir):
for dir_name in dirs:
full_dir_path = os.path.join(subdir, dir_name)
rsf_files = [f for f in os.listdir(full_dir_path) if f.endswith("dependencies.rsf")]
for rsf_file in rsf_files:
full_rsf_path = os.path.join(full_dir_path, rsf_file)
output_path = full_dir_path
project_version = dir_name
output_log_uem = os.path.join(output_path, "clusterer_output_uem.log")
output_log_uemm = os.path.join(output_path, "clusterer_output_uemm.log")
# Construct arguments for the first WCA execution
args1 = [
f"-Xmx14024m",
f"deps={full_rsf_path}",
f"projpath={output_path}",
f"measure=UEM",
f"projname={project_name}",
f"projversion={project_version}",
f"language={language}",
f"algo=WCA"
]
# Construct arguments for the second WCA execution
args2 = [
f"-Xmx14024m",
f"deps={full_rsf_path}",
f"projpath={output_path}",
f"measure=UEMNM",
f"projname={project_name}",
f"projversion={project_version}",
f"language={language}",
f"algo=WCA"
]
print(f"Running WCA on {full_rsf_path} with commit ID {project_version}...")
# Run WCA for UEM
print("Jar path:", clusterer_jar_path)
print("Project path:", full_dir_path)
print("RSF path:", full_rsf_path)
stdout1, stderr1 = run_jar(clusterer_jar_path, args=args1, cwd=output_path)
# Run WCA for UEMNM
stdout2, stderr2 = run_jar(clusterer_jar_path, args=args2, cwd=output_path)
# Log outputs
with open(output_log_uem, "w") as log_file1, open(output_log_uemm, "w") as log_file2:
log_file1.write(stdout1 if stdout1 else stderr1)
log_file2.write(stdout2 if stdout2 else stderr2)
print(f"Output for UEM saved to {output_log_uem}")
print(f"Output for UEMNM saved to {output_log_uemm}")
\ No newline at end of file
......@@ -9,7 +9,7 @@ def analyze_dependencies(output_dir):
for subdir, dirs, files in os.walk(output_dir):
for file_name in files:
if file_name.endswith("SNAPSHOT.jar"):
if not file_name.endswith("SNAPSHOT.jar"):
pass
full_jar_path = os.path.join(subdir, file_name)
......@@ -27,3 +27,14 @@ def analyze_dependencies(output_dir):
print(stdout)
if stderr:
print(f"Error processing {file_name}: {stderr}")
def get_rsf_file_paths(output_dir):
rsf_files = []
for subdir, dirs, files in os.walk(output_dir):
for file_name in files:
if file_name.endswith(".rsf"):
full_file_path = os.path.join(subdir, file_name)
rsf_files.append(full_file_path)
return rsf_files
\ No newline at end of file
......@@ -17,6 +17,7 @@ def run_jar(jar_path, args=None, cwd=None):
args = []
command = ["java", "-jar", jar_path] + args
print("Executing command:", command)
result = subprocess.run(command, capture_output=True, text=True, cwd=cwd)
if result.stderr:
......
......@@ -8,7 +8,8 @@ from build_manager import create_packages
from issue_commit_linkage import link_issues_to_commits
from visualization import create_charts
from dependency_analyzer import analyze_dependencies
from dependency_analyzer import analyze_dependencies, get_rsf_file_paths
from clustering import run_pkg, run_acdc, run_clusterer
# Define default paths for input and output directories
DEFAULT_INPUT_DIR = "data/input"
......@@ -52,6 +53,11 @@ def main():
# Dependency Analysis
analyze_dependencies(output_dir)
# Clustering
run_pkg(output_dir)
run_acdc(output_dir)
run_clusterer(output_dir)
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment