Skip to content
Snippets Groups Projects
Commit 83e2078f authored by Michael Youkeim's avatar Michael Youkeim
Browse files

Removed the temp. visualization script

parent 22927cb1
No related branches found
No related tags found
No related merge requests found
import pandas as pd
import matplotlib.pyplot as plt
import json
input_file = 'C:/Users/91991/Desktop/detailed_commits_info.json'
# Initialize an empty list to store JSON objects
data_list = []
# Read JSON data line by line and load each line as JSON
# with open(input_file, 'r') as f:
# for line in f:
# data = json.loads(line)
# data_list.append(data)
# df = pd.DataFrame(data_list)
df = pd.read_json(input_file)
print(df)
# Check for missing data
print("Missing Data Summary:")
print(df.isnull().sum())
# Perform analysis and visualization
# For example, let's assume the JSON structure contains 'Key', 'addedFiles', 'deletedFiles', 'modifiedFiles' fields.
# Top 10 decision keys with most number of added, deleted, or modified files
#grp = df.groupby('Key')[['Number_of_added_files', 'Number_of_deleted_files', 'Number_of_modified_files']].sum()
#grp['Total'] = grp.sum(axis=1)
#top_10 = grp.nlargest(10, 'Total')
# top_10[['Number_of_added_files', 'Number_of_deleted_files', 'Number_of_modified_files']].plot(kind='bar', stacked=True)
# plt.xlabel('K')
# plt.ylabel('Total added, deleted, and modified files')
# plt.title('Stacked Bar Chart for Top 10 decision keys')
# plt.show()
# Decision keys with averages of extremely high risk and low risk unit size
# Assuming JSON structure contains 'Key' and 'DMM_UNIT_SIZE' fields
grp_data = df.groupby('Key')['DMM_unit_size'].mean()
filtered_k_high = grp_data[grp_data >= 0.8]
filtered_k_low = grp_data[grp_data <= 0.2]
plt.figure(figsize=(10, 6))
plt.bar(filtered_k_high.index, filtered_k_high.values, color='blue', label='Mean X >= 0.8')
plt.bar(filtered_k_low.index, filtered_k_low.values, color='red', label='Mean X < 0.2')
plt.xlabel('Decision Keys', fontsize=8)
plt.ylabel('Mean Unit Size')
plt.title('Decision Keys with High risk and Low Risk Unit Size')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
for Key, grp_data in df.groupby("Key"):
grp_data.boxplot(column=["DMM_unit_size", "DMM_unit_complexity", "DMM_unit_interfacing"])
plt.title(f"Boxplots for Key '{Key}'")
plt.ylabel("Values")
plt.xlabel("Variables")
plt.show()
grp_complexity = df.groupby('Key')['complexity'].mean().reset_index()
filtered_grouped = grp_complexity[grp_complexity['complexity'] >= 200]
plt.figure(figsize=(10, 6))
plt.bar(filtered_grouped['Key'], filtered_grouped['complexity'])
plt.xlabel('Decision Key')
plt.ylabel('Average Complexity')
plt.title('Decision Keys with Average Complexity greater than 200')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment