Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
DSSE_Group1
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Neha Pokharel
DSSE_Group1
Commits
4b66cd6c
There was an error fetching the commit references. Please try again later.
Commit
4b66cd6c
authored
May 20, 2024
by
Michael Youkeim
Browse files
Options
Downloads
Patches
Plain Diff
Added parent commit JARs & refactored clustering
parent
8ae8d01d
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/hadoop_analysis/build_manager.py
+30
-0
30 additions, 0 deletions
src/hadoop_analysis/build_manager.py
src/hadoop_analysis/clustering.py
+4
-4
4 additions, 4 deletions
src/hadoop_analysis/clustering.py
src/hadoop_analysis/main.py
+5
-3
5 additions, 3 deletions
src/hadoop_analysis/main.py
with
39 additions
and
7 deletions
src/hadoop_analysis/build_manager.py
+
30
−
0
View file @
4b66cd6c
...
...
@@ -60,9 +60,39 @@ def process_commit_hashes(commit_hashes, repo_path, output_dir):
json
.
dump
(
java_versions
,
file
,
indent
=
2
)
def
find_orphan_parents
(
json_file
):
try
:
with
open
(
json_file
,
'
r
'
)
as
file
:
data
=
json
.
load
(
file
)
hashes
=
set
()
parents
=
set
()
for
commits
in
data
.
values
():
for
commit
in
commits
:
hashes
.
add
(
commit
[
'
Hash
'
])
parents
.
update
(
commit
[
'
Parents
'
])
orphan_parents
=
parents
-
hashes
return
orphan_parents
except
FileNotFoundError
:
print
(
f
"
Error: The file
'
{
json_file
}
'
does not exist.
"
)
return
set
()
except
json
.
JSONDecodeError
:
print
(
f
"
Error: The file
'
{
json_file
}
'
contains invalid JSON.
"
)
return
set
()
except
Exception
as
e
:
print
(
f
"
An unexpected error occurred:
{
e
}
"
)
return
set
()
def
create_packages
(
repo_path
,
commit_info_file_path
,
output_dir
):
# Open the file and load the data
with
open
(
commit_info_file_path
,
"
r
"
)
as
file
:
commit_hashes
=
[
commit
[
"
commit_hash
"
]
for
commit
in
json
.
load
(
file
)]
process_commit_hashes
(
commit_hashes
,
repo_path
,
output_dir
)
def
create_packages_for_parents
(
repo_path
,
issue_commit_relation_file
,
output_dir
):
orphan_parents
=
find_orphan_parents
(
issue_commit_relation_file
)
process_commit_hashes
(
orphan_parents
,
repo_path
,
output_dir
)
This diff is collapsed.
Click to expand it.
src/hadoop_analysis/clustering.py
+
4
−
4
View file @
4b66cd6c
...
...
@@ -61,7 +61,7 @@ class RepoClusterer:
print
(
f
"
ACDC run completed for
{
rsf_file_path
}
. Output generated:
{
log_path
}
"
)
def
run_clusterer
(
self
):
def
run_clusterer
(
self
,
algorithm
):
jar_path
=
os
.
path
.
join
(
self
.
lib_dir
,
"
arcade_core_clusterer.jar
"
)
for
subdir
,
dirs
,
files
in
os
.
walk
(
self
.
output_dir
):
for
dir_name
in
dirs
:
...
...
@@ -76,9 +76,9 @@ class RepoClusterer:
f
"
projname=
{
self
.
project_name
}
"
,
f
"
projversion=
{
dir_name
}
"
,
f
"
language=
{
self
.
language
}
"
,
f
"
algo=
Limbo
"
f
"
algo=
{
algorithm
}
"
]
log_path
=
self
.
run_jar_with_logging
(
jar_path
,
args
,
dir_path
,
"
clusterer_output_
limbo
.log
"
)
jar_path
,
args
,
dir_path
,
f
"
clusterer_output_
{
algorithm
.
lower
()
}
.log
"
)
print
(
f
"
Limbo
clustering run completed for
{
dir_name
}
. Results saved to
{
log_path
}
"
)
f
"
{
algorithm
}
clustering run completed for
{
dir_name
}
. Results saved to
{
log_path
}
"
)
This diff is collapsed.
Click to expand it.
src/hadoop_analysis/main.py
+
5
−
3
View file @
4b66cd6c
...
...
@@ -4,12 +4,12 @@ import argparse
import
os
from
utils
import
setup_logging
from
build_manager
import
create_packages
from
build_manager
import
create_packages
,
create_packages_for_parents
from
issue_commit_linkage
import
link_issues_to_commits
from
visualization
import
create_charts
from
java_version_manager
import
switch_java_version
from
dependency_analyzer
import
analyze_dependencies
,
get_rsf_file_paths
from
dependency_analyzer
import
analyze_dependencies
from
clustering
import
RepoClusterer
from
clustering_result_analyzer
import
analyze_clustering_results
...
...
@@ -48,6 +48,7 @@ def main():
switch_java_version
(
"
1.8
"
)
link_issues_to_commits
(
repo_path
,
issues_file_path
,
output_dir
)
create_packages
(
repo_path
,
commit_info_file_path
,
output_dir
)
create_packages_for_parents
(
repo_path
,
os
.
path
.
join
(
output_dir
,
"
issue_commit_relationships.json
"
),
output_dir
)
# Visualization
create_charts
(
commit_info_file_path
,
output_dir
)
...
...
@@ -60,7 +61,8 @@ def main():
clusterer
=
RepoClusterer
(
output_dir
)
clusterer
.
run_pkg
()
clusterer
.
run_acdc
()
clusterer
.
run_clusterer
()
clusterer
.
run_clusterer
(
"
Limbo
"
)
clusterer
.
run_clusterer
(
"
WCA
"
)
# Analyzing the clustering results
analyze_clustering_results
(
output_dir
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment