Clean up

344f99fc · David Buderus · 0721953a · 344f99fc · 344f99fc · 0721953a
Commit 344f99fc authored Jul 25, 2022 by David Buderus
--- a/Main.py
+++ b/Main.py
@@ -5,7 +5,6 @@ import time
 from algorithm.IRTAlgorithm import IRTAlgorithm
 from algorithm.IterativeMergingAlgorithm import IterativeMergingAlgorithm
 from algorithm.TreeMergingAlgorithm import TreeMergingAlgorithm
-from bwt.SimpleBWT import SimpleBWT
 from bwt.closing_strategy.LongestNotUsedClosing import LongestNotUsedClosing
 from bwt.closing_strategy.RandomClosing import RandomClosing
 from framework.Framework import Framework
@@ -91,4 +90,3 @@ def evaluate(args):

 if __name__ == "__main__":
    evaluate(sys.argv[1:])
-
--- a/README.md
+++ b/README.md
 # Master Thesis

+Install pipenv and create the environment  
+`pip install pipenv`  
 `pipenv install`

-## Idea
- Do merging with the IRT, use the rank array with the bitvector and add everything at the same time
\ No newline at end of file
+Open the help page  
+`pipenv run python Main.py --help`
+
+Python Version: 3.9.2
\ No newline at end of file
--- a/evaluate.sh
+++ b/evaluate.sh
-#!/bin/bash
-
-for strategy in random longestNotUsed
-do
-  for memory in $(seq .05 .05 .2)
-  do
-    for file in {10..15..1}
-    do
-      for n in {10..100..10}
-      do
-        for m in {1000..10000..1000}
-        do
-          echo "Using strategy $strategy with input size $(("$n"*"$m")) with limits file $((2**"$file")) and memory $memory"
-          pipenv run python Main.py irt_test irt -gi "$n" "$m" -cs "$strategy" -ml "$memory" -fs $((2**"$file")) --noEvaluate --clearOutput
-        done
-      done
-    done
-  done
-done
-pipenv run python Main.py irt_test irt -i "" --evaluate
\ No newline at end of file
--- a/evaluate_with_timeout.py
+++ b/evaluate_with_timeout.py
-import os
-import sys
-from multiprocessing import Process
-from multiprocessing.pool import ThreadPool
-
-import numpy as np
-
-import Main
-from framework.Framework import TestReport
-
-
-def run_with_different_input_size(algorithm, folder, strategy, memory, file):
-    n = 50
-    for m in range(7000, 11000, 1000):
-        if not run_test(algorithm, folder, strategy, memory, file, n, m):
-            return  # Terminated, bigger sizes will terminate too
-
-
-def run_test(algorithm, folder, strategy, memory, file, n, m):
-    report = TestReport(Main.algorithms[algorithm](False).name, Main.closing_strategies[strategy]().name,
-                        n, m, m, n * m, memory, 2 ** file, 4)
-
-    report_path = os.path.join(
-        folder,
-        f"{report.algorithm_name}_{report.total}_{hash(report)}",
-        "report.json"
-    )
-    print(f"Checking {report_path}")
-
-    if os.path.exists(report_path):
-        print(
-            f"Already calculated strategy {strategy} with input size {n * m} with limits file {2 ** file} and memory {memory}")
-        return True
-
-    print(
-        f"Using {algorithm} strategy {strategy} with input size {n * m} with limits file {2 ** file} and memory {memory}"
-    )
-    process = Process(target=Main.evaluate, args=([
-                                                      folder, algorithm, "-gi", str(n), str(m), "-cs",
-                                                      strategy, "-ml", str(memory), "-fs",
-                                                      str(2 ** file), "--noEvaluate", "--clearOutput"
-                                                  ],))
-    process.daemon = True
-    process.start()
-
-    process.join(timeout=14400)  # 4 hours = 14400 seconds
-
-    if process.is_alive():
-        process.terminate()
-        print("Terminated run")
-        os.system(f'rm {folder}/output* -R')
-        os.system(f'rm {folder}/bwt_output* -R')
-        return False
-
-    return True
-
-
-def full_test(folder):
-    algorithm = "iterativeMerging"
-    print(f"Starting test for {algorithm} in folder {folder}")
-
-    run_with_different_input_size(algorithm, folder, "random", 0.1, 10)
-
-    Main.evaluate([folder, algorithm, "-i", "", "--evaluate"])
-
-
-if __name__ == "__main__":
-    full_test(sys.argv[1])
-
--- a/framework/Framework.py
+++ b/framework/Framework.py
@@ -12,7 +12,7 @@ from pathlib import Path
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from matplotlib import cm, colors
+from matplotlib import cm

 from bwt.closing_strategy.RandomClosing import RandomClosing

@@ -225,8 +225,6 @@ class Framework:
                "Input size": map(lambda test_report: test_report.total, results)
            }
        )
-        # memory_accesses_data.plot.box().get_figure().savefig(path.join(self._folder_path, "memory_accesses.pdf"))
-        # hard_disc_accesses_data.plot.box().get_figure().savefig(path.join(self._folder_path, "hard_disc_accesses.pdf"))

        algorithms = list(set(map(lambda test_report: test_report.algorithm_name, results)))
        fig, ax = plt.subplots()
@@ -272,10 +270,6 @@ class Framework:
                "n": map(lambda report: report.n, results),
            }
        )
-        pd.set_option('display.float_format', lambda x: '%.5f' % x)
-        pd.set_option('display.max_columns', None)
-        pd.set_option('display.expand_frame_repr', False)
-        pd.set_option("display.precision", 2)

        filter_df = filter_df[filter_df.strategy == "Random"]

@@ -290,30 +284,11 @@ class Framework:

            ax.scatter(df["total"], df["runtime"], label=algorithm)

-            df = df[df.memory_limit == 0.1]
-            df = df[df.n == 50]
-            df = df[df.total < 500000]
-            print(
-                df[["name", "total", "memory_limit", "runtime", "hard_disc_access", "hard_disc_access_pc",
-                    "memory_access", "memory_access_pc"]].sort_values(by="total")
-            )
-
        ax.set_xlabel('Input size')
        ax.set_ylabel('Runtime in minutes')
        ax.legend(loc="upper right")
        fig.savefig(path.join(self._folder_path, "overall_comparison_with_opt_file.pdf"))

-        """try:
-            n_m_to_runtime_plot = plt.figure().gca(projection='3d')
-            n_m_to_runtime_plot.plot_trisurf(n_m_to_runtime["n"], n_m_to_runtime["m"], n_m_to_runtime['runtime'],
-                                             cmap=cm.coolwarm)
-            n_m_to_runtime_plot.set_xlabel('n')
-            n_m_to_runtime_plot.set_ylabel('m')
-            n_m_to_runtime_plot.set_zlabel('Runtime in minutes')
-            plt.savefig(path.join(self._folder_path, "n_n_runtime.pdf"))
-        except (ValueError, RuntimeError):
-            print("n_n_runtime.pdf can not be created")"""
-
        for alg in algorithms:
            alg_path = path.join(self._folder_path, alg.lower())
            if not path.exists(alg_path):