Skip to content
Snippets Groups Projects
Commit 344f99fc authored by David Buderus's avatar David Buderus
Browse files

Clean up

parent 0721953a
No related branches found
No related tags found
No related merge requests found
Pipeline #
......@@ -5,7 +5,6 @@ import time
from algorithm.IRTAlgorithm import IRTAlgorithm
from algorithm.IterativeMergingAlgorithm import IterativeMergingAlgorithm
from algorithm.TreeMergingAlgorithm import TreeMergingAlgorithm
from bwt.SimpleBWT import SimpleBWT
from bwt.closing_strategy.LongestNotUsedClosing import LongestNotUsedClosing
from bwt.closing_strategy.RandomClosing import RandomClosing
from framework.Framework import Framework
......@@ -91,4 +90,3 @@ def evaluate(args):
if __name__ == "__main__":
evaluate(sys.argv[1:])
# Master Thesis
Install pipenv and create the environment
`pip install pipenv`
`pipenv install`
## Idea
- Do merging with the IRT, use the rank array with the bitvector and add everything at the same time
\ No newline at end of file
Open the help page
`pipenv run python Main.py --help`
Python Version: 3.9.2
\ No newline at end of file
#!/bin/bash
for strategy in random longestNotUsed
do
for memory in $(seq .05 .05 .2)
do
for file in {10..15..1}
do
for n in {10..100..10}
do
for m in {1000..10000..1000}
do
echo "Using strategy $strategy with input size $(("$n"*"$m")) with limits file $((2**"$file")) and memory $memory"
pipenv run python Main.py irt_test irt -gi "$n" "$m" -cs "$strategy" -ml "$memory" -fs $((2**"$file")) --noEvaluate --clearOutput
done
done
done
done
done
pipenv run python Main.py irt_test irt -i "" --evaluate
\ No newline at end of file
import os
import sys
from multiprocessing import Process
from multiprocessing.pool import ThreadPool
import numpy as np
import Main
from framework.Framework import TestReport
def run_with_different_input_size(algorithm, folder, strategy, memory, file):
n = 50
for m in range(7000, 11000, 1000):
if not run_test(algorithm, folder, strategy, memory, file, n, m):
return # Terminated, bigger sizes will terminate too
def run_test(algorithm, folder, strategy, memory, file, n, m):
report = TestReport(Main.algorithms[algorithm](False).name, Main.closing_strategies[strategy]().name,
n, m, m, n * m, memory, 2 ** file, 4)
report_path = os.path.join(
folder,
f"{report.algorithm_name}_{report.total}_{hash(report)}",
"report.json"
)
print(f"Checking {report_path}")
if os.path.exists(report_path):
print(
f"Already calculated strategy {strategy} with input size {n * m} with limits file {2 ** file} and memory {memory}")
return True
print(
f"Using {algorithm} strategy {strategy} with input size {n * m} with limits file {2 ** file} and memory {memory}"
)
process = Process(target=Main.evaluate, args=([
folder, algorithm, "-gi", str(n), str(m), "-cs",
strategy, "-ml", str(memory), "-fs",
str(2 ** file), "--noEvaluate", "--clearOutput"
],))
process.daemon = True
process.start()
process.join(timeout=14400) # 4 hours = 14400 seconds
if process.is_alive():
process.terminate()
print("Terminated run")
os.system(f'rm {folder}/output* -R')
os.system(f'rm {folder}/bwt_output* -R')
return False
return True
def full_test(folder):
algorithm = "iterativeMerging"
print(f"Starting test for {algorithm} in folder {folder}")
run_with_different_input_size(algorithm, folder, "random", 0.1, 10)
Main.evaluate([folder, algorithm, "-i", "", "--evaluate"])
if __name__ == "__main__":
full_test(sys.argv[1])
......@@ -12,7 +12,7 @@ from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import cm, colors
from matplotlib import cm
from bwt.closing_strategy.RandomClosing import RandomClosing
......@@ -225,8 +225,6 @@ class Framework:
"Input size": map(lambda test_report: test_report.total, results)
}
)
# memory_accesses_data.plot.box().get_figure().savefig(path.join(self._folder_path, "memory_accesses.pdf"))
# hard_disc_accesses_data.plot.box().get_figure().savefig(path.join(self._folder_path, "hard_disc_accesses.pdf"))
algorithms = list(set(map(lambda test_report: test_report.algorithm_name, results)))
fig, ax = plt.subplots()
......@@ -272,10 +270,6 @@ class Framework:
"n": map(lambda report: report.n, results),
}
)
pd.set_option('display.float_format', lambda x: '%.5f' % x)
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option("display.precision", 2)
filter_df = filter_df[filter_df.strategy == "Random"]
......@@ -290,30 +284,11 @@ class Framework:
ax.scatter(df["total"], df["runtime"], label=algorithm)
df = df[df.memory_limit == 0.1]
df = df[df.n == 50]
df = df[df.total < 500000]
print(
df[["name", "total", "memory_limit", "runtime", "hard_disc_access", "hard_disc_access_pc",
"memory_access", "memory_access_pc"]].sort_values(by="total")
)
ax.set_xlabel('Input size')
ax.set_ylabel('Runtime in minutes')
ax.legend(loc="upper right")
fig.savefig(path.join(self._folder_path, "overall_comparison_with_opt_file.pdf"))
"""try:
n_m_to_runtime_plot = plt.figure().gca(projection='3d')
n_m_to_runtime_plot.plot_trisurf(n_m_to_runtime["n"], n_m_to_runtime["m"], n_m_to_runtime['runtime'],
cmap=cm.coolwarm)
n_m_to_runtime_plot.set_xlabel('n')
n_m_to_runtime_plot.set_ylabel('m')
n_m_to_runtime_plot.set_zlabel('Runtime in minutes')
plt.savefig(path.join(self._folder_path, "n_n_runtime.pdf"))
except (ValueError, RuntimeError):
print("n_n_runtime.pdf can not be created")"""
for alg in algorithms:
alg_path = path.join(self._folder_path, alg.lower())
if not path.exists(alg_path):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment