Skip to content
Snippets Groups Projects
Commit bc1c6769 authored by Abhay Kishorbhai Vaghasiya's avatar Abhay Kishorbhai Vaghasiya
Browse files

updatd RQ3 code and results

parent 8b023436
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import shapiro, f_oneway, kruskal
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd, MultiComparison
def load_data(filepath):
"""Load the dataset from a specified file path."""
df = pd.read_csv('/Users/abhayvaghasiya/Desktop/DS4SE_2/DSSE-Group-7/Assignment-3/Bonus/WEEK3/model_results2.csv')
types = ['Concept', 'Technology', 'Methodology', 'Pattern']
df['Type'] = pd.Series([types[i % 4] for i in range(len(df))])
return df
def normality_tests(df):
grouped = df.groupby('Type')
for name, group in grouped:
stat, p = shapiro(group['ans_count'])
print(f'Normality test for {name}: Statistics={stat}, p-value={p}')
def perform_statistical_tests(df):
grouped = df.groupby('Type')['ans_count']
data = [group for name, group in grouped]
for name, group in grouped:
stat, p = shapiro(group)
print(f'Normality test for {name}: Statistics={stat}, p-value={p}')
if any(shapiro(group)[1] < 0.05 for group in data):
print("Data does not follow a normal distribution, performing Kruskal-Wallis test.")
stat, p = kruskal(*data)
print(f'Kruskal-Wallis test: Statistics={stat}, p-value={p}')
else:
print("Data follows a normal distribution, performing ANOVA test.")
stat, p = f_oneway(*data)
print(f'ANOVA test: Statistics={stat}, p-value={p}')
if p < 0.05:
mc = MultiComparison(df['ans_count'], df['Type'])
result = mc.tukeyhsd()
print("Tukey's post-hoc test results:")
print(result)
def main():
filepath = '/Users/abhayvaghasiya/Desktop/DS4SE_2/DSSE-Group-7/Assignment-3/Bonus/WEEK3/model_results2.csv'
df = load_data(filepath)
normality_tests(df)
perform_statistical_tests(df)
if __name__ == "__main__":
main()
```
%% Output
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[11], line 51
48 perform_statistical_tests(df)
50 if __name__ == "__main__":
---> 51 main()
Cell In[11], line 47, in main()
45 filepath = '/Users/abhayvaghasiya/Downloads/processed_data.csv'
46 df = load_data(filepath)
---> 47 normality_tests(df)
48 perform_statistical_tests(df)
Cell In[11], line 18, in normality_tests(df)
16 grouped = df.groupby('Type')
17 for name, group in grouped:
---> 18 stat, p = shapiro(group['ans_count'])
19 print(f'Normality test for {name}: Statistics={stat}, p-value={p}')
File ~/.pyenv/versions/3.10.4/lib/python3.10/site-packages/scipy/stats/_axis_nan_policy.py:531, in _axis_nan_policy_factory.<locals>.axis_nan_policy_decorator.<locals>.axis_nan_policy_wrapper(***failed resolving arguments***)
529 if sentinel:
530 samples = _remove_sentinel(samples, paired, sentinel)
--> 531 res = hypotest_fun_out(*samples, **kwds)
532 res = result_to_tuple(res)
533 res = _add_reduced_axes(res, reduced_axes, keepdims)
File ~/.pyenv/versions/3.10.4/lib/python3.10/site-packages/scipy/stats/_morestats.py:1994, in shapiro(x)
1992 N = len(x)
1993 if N < 3:
-> 1994 raise ValueError("Data must be at least length 3.")
1996 a = zeros(N//2, dtype=np.float64)
1997 init = 0
ValueError: Data must be at least length 3.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment