Commit cc848bd1 authored by markus's avatar markus
Browse files

Notebook for clustering

parent 327afca9
%% Cell type:code id: tags:
```
import numpy as np
from sklearn.cluster import DBSCAN, KMeans
from pykeen_embeddings import generate_and_save_embeddings,load_embeddings_from_file
from run import extract_resources, read
from lib import ml_metrics
```
%% Cell type:code id: tags:
```
learning_problems = read(path='data/kg-mini-project-train.ttl', format='turtle')
embeddings = load_embeddings_from_file("data/embeddings/embeddings_carcinogenesis_transr_16dim.tsv")
pos, neg = extract_resources(learning_problems, 1)
all_res = pos + neg
all_res_embeddings = np.array([embeddings[x] for x in all_res])
clustering = KMeans(n_clusters=2, random_state=0).fit(all_res_embeddings)
```
%% Cell type:code id: tags:
```
positives_labels = 1 if sum(clustering.labels_[:len(pos)]) > len(pos)/2 else 0
print(clustering.labels_[:41])
```
%%%% Output: error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/tmp/ipykernel_8748/4221489849.py in <module>
----> 1 positives_labels = 0 if clustering.labels_[:len(pos)].count(0) > clustering.labels_[:len(pos)].count(1) else 1
2
3 print(clustering.labels_[:41])
4
AttributeError: 'numpy.ndarray' object has no attribute 'count'
%% Cell type:code id: tags:
```
print(max(clustering.labels_), min(clustering.labels_))
```
%% Cell type:code id: tags:
```
len(pos)
print(clustering.labels_[:41])
```
%% Cell type:code id: tags:
```
sum(clustering.labels_[41:])
```
%%%% Output: execute_result
235
%% Cell type:code id: tags:
```
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment