Commit b44c638e authored by markus's avatar markus
Browse files

cleanup

parent 39d5146c
......@@ -26,7 +26,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -129,50 +129,71 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 5,
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "error",
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-5-3445bfdcc8e9>, line 3)",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-5-3445bfdcc8e9>\"\u001b[0;36m, line \u001b[0;32m3\u001b[0m\n\u001b[0;31m pos, neg = extract_resources(lps 2)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"learning_problems = read(path='../data/kg-mini-project-train_v2.ttl', format='turtle')\n",
"embeddings = load_embeddings_from_file(\"../data/embeddings/embeddings_carcinogenesis_transr_16dim.tsv\")\n",
"\n",
"# Combine positive and negative results\n",
"pos, neg = extract_resources(learning_problems, 2)\n",
"all_res = pos + neg\n",
"all_res_embeddings = np.array([embeddings[x] for x in all_res])\n",
"\n",
"clustering = KMeans(n_clusters=2, random_state=0).fit(all_res_embeddings)"
"def execute_on_lp(lps : Graph, embeddings : list, lp : int):\n",
" # Combine positive and negative results\n",
" pos, neg = extract_resources(lps, lp)\n",
" all_res = pos + neg\n",
" all_res_embeddings = np.array([embeddings[x] for x in all_res])\n",
"\n",
" clustering = KMeans(n_clusters=2, random_state=0).fit(all_res_embeddings)\n",
" # Identify cluster with majority positive examples\n",
" positives_label = 1 if sum(clustering.labels_[:len(pos)]) > len(pos)/2 else 0\n",
" negatives_label = 1 - 1 * positives_label\n",
" print(\"Positive cluster label: \" + str(positives_label))\n",
" print(\"Negative cluster label: \" + str(negatives_label))\n",
" TP = list(clustering.labels_[:len(pos)]).count(positives_label)\n",
" print(\"True positives: \" + str(TP))\n",
" TN = list(clustering.labels_[len(pos):]).count(negatives_label)\n",
" print(\"True negatives: \" + str(TN))\n",
" FP = list(clustering.labels_[len(pos):]).count(positives_label)\n",
" print(\"False positives: \" + str(FP))\n",
" FN = list(clustering.labels_[:len(pos)]).count(negatives_label)\n",
" print(\"False negatives: \" + str(FN))\n",
" print_metrics(TP, TN, FP, FN)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Positive cluster label: 0\nNegative cluster label: 1\nTrue positives: 78\nTrue negatives: 10979\nFalse positives: 11246\nFalse negatives: 69\nAccuracy: 0.4942338637582693\nPrecision: 0.00688802543270929\nRecall: 0.5306122448979592\nF1-Score: 0.013599511812396478\n"
"Positive cluster label: 0\n",
"Negative cluster label: 1\n",
"True positives: 82\n",
"True negatives: 10992\n",
"False positives: 11233\n",
"False negatives: 65\n",
"Accuracy: 0.49499374217772213\n",
"Precision: 0.007247017233760495\n",
"Recall: 0.5578231292517006\n",
"F1-Score: 0.014308148665154423\n"
]
}
],
"source": [
"\n",
"# Identify cluster with majority positive examples\n",
"positives_label = 1 if sum(clustering.labels_[:len(pos)]) > len(pos)/2 else 0\n",
"negatives_label = 1 - 1 * positives_label\n",
"print(\"Positive cluster label: \" + str(positives_label))\n",
"print(\"Negative cluster label: \" + str(negatives_label))\n",
"TP = list(clustering.labels_[:len(pos)]).count(positives_label)\n",
"print(\"True positives: \" + str(TP))\n",
"TN = list(clustering.labels_[len(pos):]).count(negatives_label)\n",
"print(\"True negatives: \" + str(TN))\n",
"FP = list(clustering.labels_[len(pos):]).count(positives_label)\n",
"print(\"False positives: \" + str(FP))\n",
"FN = list(clustering.labels_[:len(pos)]).count(negatives_label)\n",
"print(\"False negatives: \" + str(FN))\n",
"print_metrics(TP, TN, FP, FN)"
"learning_problems = read(path='../data/kg-mini-project-train_v2.ttl', format='turtle')\n",
"embeddings = load_embeddings_from_file(\"../data/embeddings/embeddings_carcinogenesis_transr_64dim.tsv\")\n",
"for i in range(1, 26):\n",
" print(\"Execution for LP \" + str(i) + \": \")\n",
" execute_on_lp(learning_problems, embeddings, i)\n",
" print(\"\\n\\n\")"
]
}
]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment