Skip to content
Snippets Groups Projects
Commit bcedfa3c authored by Jakob Mathias Greuel's avatar Jakob Mathias Greuel
Browse files
parents f63178b2 962a32fa
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:7722a591 tags:
``` python
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
df_dataset = pd.read_csv("data/dataset.csv.zip", parse_dates=["timestamp"])
df_dataset.head(5)
```
%% Output
sensor_1 sensor_2 timestamp product_id machine_id quality \
0 -0.168 -0.164 2021-05-17 08:12:48 P3.2.500 Printer F0815 OK
1 -0.171 -0.227 2021-05-17 08:12:48 P3.2.500 Printer F0815 OK
2 -0.242 -0.154 2021-05-17 08:12:48 P3.2.500 Printer F0815 OK
3 -0.139 -0.176 2021-05-17 08:12:48 P3.2.500 Printer F0815 OK
4 -0.081 -0.110 2021-05-17 08:12:48 P3.2.500 Printer F0815 OK
label
0 1
1 1
2 1
3 1
4 1
%% Cell type:code id:cce9267e tags:
``` python
X = df_dataset[['sensor_1', 'sensor_2']]
print(X)
Y = df_dataset[['label']]
print(Y)
```
%% Output
sensor_1 sensor_2
0 -0.168 -0.164
1 -0.171 -0.227
2 -0.242 -0.154
3 -0.139 -0.176
4 -0.081 -0.110
... ... ...
33914875 -0.085 -0.227
33914876 0.005 -0.115
33914877 -0.120 -0.017
33914878 -0.117 0.083
33914879 -0.200 0.000
[33914880 rows x 2 columns]
[[1]
[1]
[1]
...
[0]
[0]
[0]]
%% Cell type:code id:86f14508 tags:
``` python
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 100)
```
%% Cell type:code id:aba023fe tags:
``` python
print('Training set:\n', Y_train.value_counts())
print('Test set:\n', Y_test.value_counts())
```
%% Output
Training set:
label
1 22421730
0 1318686
dtype: int64
Test set:
label
1 9608990
0 565474
dtype: int64
%% Cell type:code id:3166c922 tags:
``` python
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
```
%% Output
label
1 32030720
0 1884160
dtype: int64
%% Cell type:code id:3a2ebfe8 tags:
``` python
print('0: ', (Y_train.values.ravel() == 0).sum(), '1: ', (Y_train.values.ravel() == 1).sum())
```
%% Output
0: 1318686 1: 22421730
array([0.52940643, 9.00154244, 0.52940643, ..., 0.52940643, 0.52940643,
0.52940643])
%% Cell type:code id:93ecc889 tags:
``` python
ada = AdaBoostClassifier(n_estimators = 50, learning_rate = 1)
ada
```
%% Output
AdaBoostClassifier(learning_rate=1)
%% Cell type:code id:c7b12639 tags:
``` python
adaboost_fit = AdaBoostClassifier(n_estimators = 50, learning_rate = 1).fit(X_train, Y_train.values.ravel())
adaboost_fit
```
%% Output
AdaBoostClassifier(learning_rate=1)
%% Cell type:code id:491ce85e tags:
``` python
score = adaboost_fit.score(X_test, Y_test)
score
```
%% Output
0.9443985452206622
%% Cell type:code id:f5d2bc89 tags:
``` python
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(Y_test, Y_test_Pred,normalize='true')
ConfusionMatrixDisplay(cm).plot()
```
%% Output
<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x243102db850>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment