From 0638d5e6662ff3bf4e0ccdd63080ce6e18f6f25b Mon Sep 17 00:00:00 2001
From: zomseffen <steffen@tom.bi>
Date: Mon, 7 Feb 2022 21:08:45 +0100
Subject: [PATCH] adds labyrinth and subjects as well as performance increases

---
 Client/Client.py                  |  146 +---
 Objects/Structure.py              |  126 ++--
 Objects/World.py                  |  176 +++--
 WorldProvider/WorldProvider.py    |    4 +-
 labirinth_ai/LabyrinthClient.py   |   43 ++
 labirinth_ai/LabyrinthProvider.py |    6 +
 labirinth_ai/LabyrinthWorld.py    |  232 +++++++
 labirinth_ai/Subject.py           | 1055 +++++++++++++++++++++++++++++
 labirinth_ai/__init__.py          |    0
 labirinth_ai/loss.py              |   37 +
 10 files changed, 1591 insertions(+), 234 deletions(-)
 create mode 100644 labirinth_ai/LabyrinthClient.py
 create mode 100644 labirinth_ai/LabyrinthProvider.py
 create mode 100644 labirinth_ai/LabyrinthWorld.py
 create mode 100644 labirinth_ai/Subject.py
 create mode 100644 labirinth_ai/__init__.py
 create mode 100644 labirinth_ai/loss.py

diff --git a/Client/Client.py b/Client/Client.py
index 2f6699d..e64554a 100644
--- a/Client/Client.py
+++ b/Client/Client.py
@@ -41,10 +41,30 @@ def value_to_color(v, min_value, max_value):
 
 
 class Client:
-    def __init__(self, test=False, pos=[0, 0, 0]):
+    def __init__(self, test=False, pos=[0, 0, 0], world_class=WorldProvider):
         self.state = 0
         with open('./config.json', 'r') as f:
             self.config = json.load(f)
+        self.init_shaders()
+
+        self.world_provider = world_class(self.normal_program)
+        self.draw_world()
+
+        self.pos = pos
+        self.time = time.time()
+
+        glutReshapeFunc(self.resize)
+        glutDisplayFunc(self.display)
+        glutKeyboardFunc(self.keyboardHandler)
+        glutSpecialFunc(self.funcKeydHandler)
+
+        if not test:
+            glutMainLoop()
+        else:
+            self.display()
+            self.resize(100, 100)
+
+    def init_shaders(self):
         glutInit(sys.argv)
         self.width = 1920
         self.height = 1080
@@ -96,7 +116,7 @@ class Client:
             self.depth_program[self.normal_program[key]] = Spotlight.getDepthProgram(self.vertex_shader_id,
                                                                                      key.GeometryShaderId)
 
-        self.world_provider = WorldProvider(self.normal_program)
+    def draw_world(self):
         for x_pos in range(0, 100):
             for y_pos in range(0, 100):
                 for z_pos in range(0, 1):
@@ -118,97 +138,11 @@ class Client:
                         r, g, b = colors[int(self.world_provider.world.plates[x_pos, y_pos])]
                     self.world_provider.world.set_color(x_pos, y_pos, z_pos, r, g, b)
 
-        # total_x = self.world_provider.world.chunk_n_x * self.world_provider.world.chunk_size_x
-        # total_y = self.world_provider.world.chunk_n_y * self.world_provider.world.chunk_size_y
-        # for x_pos in range(0, 100):
-        #     for y_pos in range(0, 100):
-        #         if self.world_provider.world.faults[x_pos, y_pos] == -2:
-        #             self.world_provider.world.set_color(x_pos, y_pos, 0, 0, 0, 0)
-        #
-        # for line_index, line in enumerate(self.world_provider.world.fault_lines):
-        #     for x_pos in range(0, 100):
-        #         for y_pos in range(0, 100):
-        #             if self.world_provider.world.faults[x_pos, y_pos] == line_index:
-        #                 if line_index != 9:
-        #                     self.world_provider.world.set_color(x_pos, y_pos, 0, 0, 0, 1)
-        #                 else:
-        #                     self.world_provider.world.set_color(x_pos, y_pos, 0, 1, 1, 1)
-        #
-        # for x_pos in range(0, 100):
-        #     for y_pos in range(0, 100):
-        #         for z_pos in range(0, 1):
-        #             if [x_pos, y_pos] in self.world_provider.world.fault_nodes:
-        #                 r, g, b = 1, 0, 0
-        #                 self.world_provider.world.set_color(x_pos, y_pos, z_pos, r, g, b)
-
-        # # visualize direction lengths
-        # lengths = np.sqrt(np.sum(np.square(self.world_provider.world.directions), axis=2))
-        # lengths = lengths / np.max(lengths)
-        # for x_pos in range(0, 100):
-        #     for y_pos in range(0, 100):
-        #         for z_pos in range(0, 1):
-        #             r, g, b = lengths[x_pos, y_pos], lengths[x_pos, y_pos], lengths[x_pos, y_pos]
-        #             self.world_provider.world.set_color(x_pos, y_pos, z_pos, r, g, b)
-
         self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)
 
         self.rx = self.cx = self.cy = 0
         self.opening = 45
 
-        glutReshapeFunc(self.resize)
-        glutDisplayFunc(self.display)
-        glutKeyboardFunc(self.keyboardHandler)
-        glutSpecialFunc(self.funcKeydHandler)
-
-        self.pos = pos
-
-        self.time = time.time()
-
-        self.field = (100, 100, 1)
-        self.e_a = np.array([
-            [0, 0, 0],
-            [1, 0, 0],
-            [1, 1, 0],
-            [0, 1, 0],
-            [-1, 1, 0],
-            [-1, 0, 0],
-            [-1, -1, 0],
-            [0, -1, 0],
-            [1, -1, 0],
-        ])
-
-        self.relaxation_time = 0.55  # 0.55
-        self.w_a = [
-            4.0 / 9.0,
-            1.0 / 9.0,
-            1.0 / 36.0,
-            1.0 / 9.0,
-            1.0 / 36.0,
-            1.0 / 9.0,
-            1.0 / 36.0,
-            1.0 / 9.0,
-            1.0 / 36.0
-        ]
-
-        self.n_a = np.zeros((len(self.e_a),) + self.field)
-        self.n_a_eq = np.zeros(self.n_a.shape)
-        self.n = np.zeros(self.field)
-        self.n[:, :, :] += 1.0
-        self.gravity_applies = np.zeros(self.field)
-        # self.n /= np.sum(self.n)
-        self.n_a[0] = np.array(self.n)
-        self.u = np.zeros(self.field + (self.e_a.shape[1],))
-
-        self.compressible = True
-        self.max_n = self.w_a[0]
-
-        self.test_pixel = [40, 50, 0]
-
-        if not test:
-            glutMainLoop()
-        else:
-            self.display()
-            self.resize(100, 100)
 
     def display(self):
         glClearColor(0, 0, 0, 0)
@@ -261,41 +195,7 @@ class Client:
 
         glutSwapBuffers()
 
-        min_value = 0
-        max_value_n = np.max(self.n)
-        # max_value_n = 1.0
-
-        vel = np.sqrt(np.sum(np.square(self.u), axis=3)) *self.n
-        max_value_vel = np.max(vel)
-        # max_value_vel = np.sqrt(3)
-
-        # print('round')
-        # print('sum n: %f' % np.sum(self.n))
-        # print('max n: %f' % np.max(self.n))
-        # print('min n: %f' % np.min(self.n))
-        # print('sum vel: %f' % np.sum(vel))
-        # print('max vel: %f' % np.max(vel))
-        # print('min vel: %f' % np.min(vel))
-
-        # for x_pos in range(0, 100):
-        #     for y_pos in range(0, 100):
-        #         for z_pos in range(0, 1):
-        #             # if self.state == 2:
-        #             #     r, g, b = value_to_color(int(self.gravity_applies[x_pos, y_pos, z_pos]), 0, 1)
-        #             # if self.state == 1:
-        #             #     r, g, b = value_to_color(vel[x_pos, y_pos, z_pos], min_value, max_value_vel)
-        #             # if self.state == 0:
-        #             #     r, g, b = value_to_color(self.n[x_pos, y_pos, z_pos], min_value, max_value_n)
-        #             r, g, b, = 128, 128, 128
-        #             if [x_pos, y_pos] in self.world_provider.world.fault_nodes:
-        #                 r, g, b = 128, 0, 0
-        #
-        #             self.world_provider.world.set_color(x_pos, y_pos, z_pos, r, g, b)
-        # self.world_provider.world.set_color(int(round(self.test_pixel[0])),
-        #                                     int(round(self.test_pixel[1])),
-        #                                     int(round(self.test_pixel[2])), 1.0, 1.0, 1.0)
-
-        print(1.0 / (time.time() - self.time))
+        print('fps', 1.0 / (time.time() - self.time))
         self.time = time.time()
         glutPostRedisplay()
 
diff --git a/Objects/Structure.py b/Objects/Structure.py
index 5c49893..02cd5a4 100644
--- a/Objects/Structure.py
+++ b/Objects/Structure.py
@@ -19,6 +19,9 @@ class Structure(Renderable):
         self.Objects = {}
         self.vais = {}
         self.dirty = True
+        self.dirty_pos = True
+        self.dirty_color = True
+        self.dirty_size = True
 
         self.x_offset = x_offset
         self.y_offset = y_offset
@@ -31,6 +34,7 @@ class Structure(Renderable):
     @x_offset.setter
     def x_offset(self, value):
         self.dirty = True
+        self.dirty_pos = True
         self._x_offset = value
 
     @property
@@ -40,6 +44,7 @@ class Structure(Renderable):
     @y_offset.setter
     def y_offset(self, value):
         self.dirty = True
+        self.dirty_pos = True
         self._y_offset = value
 
     @property
@@ -49,6 +54,7 @@ class Structure(Renderable):
     @z_offset.setter
     def z_offset(self, value):
         self.dirty = True
+        self.dirty_pos = True
         self._z_offset = value
 
     def addShape(self, program, shape):
@@ -56,6 +62,9 @@ class Structure(Renderable):
             self.Objects[program] = []
         self.Objects[program].append(shape)
         self.dirty = True
+        self.dirty_color = True
+        self.dirty_pos = True
+        self.dirty_size = True
 
     def removeShape(self, program, shape):
         if program in self.Objects.keys():
@@ -63,72 +72,89 @@ class Structure(Renderable):
             if len(self.Objects[program]) == 0:
                 self.Objects.pop(program)
         self.dirty = True
+        self.dirty_color = True
+        self.dirty_pos = True
+        self.dirty_size = True
 
     def buildvertexArrays(self):
         if self.dirty:
-            self.clearVertexArrays()
+            # self.clearVertexArrays()
             glEnableClientState(GL_VERTEX_ARRAY)
             glEnableClientState(GL_TEXTURE_COORD_ARRAY)
             glEnableClientState(GL_NORMAL_ARRAY)
             glEnableClientState(GL_COLOR_ARRAY)
-            self.vais = {}
 
             for key, objects in self.Objects.items():
-                tvai = GLuint(0)
-                tpbi = GLuint(0)
-                tcbi = GLuint(0)
-                tsbi = GLuint(0)
-                num = len(objects)
-
-                glGenVertexArrays(1, tvai)
+                needs_new_buffers = key not in self.vais.keys()
+                if needs_new_buffers:
+                    tvai = GLuint(0)
+                    tpbi = GLuint(0)
+                    tcbi = GLuint(0)
+                    tsbi = GLuint(0)
+                    num = len(objects)
+                else:
+                    tvai, tpbi, tcbi, tsbi, num = self.vais[key]
+                if needs_new_buffers:
+                    glGenVertexArrays(1, tvai)
                 glBindVertexArray(tvai)
+                if self.dirty_pos:
+                    if needs_new_buffers:
+                        vid = glGetAttribLocation(key, "in_position")
+                        glEnableVertexAttribArray(vid)
 
-                vid = glGetAttribLocation(key, "in_position")
-                glEnableVertexAttribArray(vid)
-
-                tpbi = glGenBuffers(1)
-                glBindBuffer(GL_ARRAY_BUFFER, tpbi)
-                positions = []
-                for o in objects:
-                    positions.append(o.pos[0] + self.x_offset)
-                    positions.append(o.pos[1] + self.y_offset)
-                    positions.append(o.pos[2] + self.z_offset)
-                glBufferData(GL_ARRAY_BUFFER, np.array(positions, dtype=np.float32), GL_STATIC_DRAW)
-                glVertexAttribPointer(vid, 3, GL_FLOAT, GL_FALSE, 0, None)
-                self.check_error("Could not create position buffer")
-
-                colors = []
-                for o in objects:
-                    colors.append(o.color[0])
-                    colors.append(o.color[1])
-                    colors.append(o.color[2])
-                tcbi = glGenBuffers(1)
-                glBindBuffer(GL_ARRAY_BUFFER, tcbi)
-                glBufferData(GL_ARRAY_BUFFER, np.array(colors, dtype=np.float32), GL_STATIC_DRAW)
-                vc = glGetAttribLocation(key, "MyInColor")
-                if vc != -1:
-                    glEnableVertexAttribArray(vc)
-                    glVertexAttribPointer(vc, 3, GL_FLOAT, GL_FALSE, 0, None)
-                    self.check_error("Could not create color buffer")
-
-                if hasattr(objects[0], 'size'):
-                    sizes = []
+                    tpbi = glGenBuffers(1)
+                    glBindBuffer(GL_ARRAY_BUFFER, tpbi)
+                    positions = []
                     for o in objects:
-                        sizes.append(o.size[0])
-                        sizes.append(o.size[1])
-                        sizes.append(o.size[2])
-                    tsbi = glGenBuffers(1)
-                    glBindBuffer(GL_ARRAY_BUFFER, tsbi)
-                    glBufferData(GL_ARRAY_BUFFER, np.array(sizes, dtype=np.float32), GL_STATIC_DRAW)
-                    vs = glGetAttribLocation(key, "MyInSize")
-                    if vs != -1:
-                        glEnableVertexAttribArray(vs)
-                        glVertexAttribPointer(vs, 3, GL_FLOAT, GL_FALSE, 0, None)
-                        self.check_error("Could not create size buffer")
+                        positions.append(o.pos[0] + self.x_offset)
+                        positions.append(o.pos[1] + self.y_offset)
+                        positions.append(o.pos[2] + self.z_offset)
+                    glBufferData(GL_ARRAY_BUFFER, np.array(positions, dtype=np.float32), GL_STATIC_DRAW)
+                    if needs_new_buffers:
+                        glVertexAttribPointer(vid, 3, GL_FLOAT, GL_FALSE, 0, None)
+                    self.check_error("Could not create position buffer")
+
+                if self.dirty_color:
+                    colors = []
+                    for o in objects:
+                        colors.append(o.color[0])
+                        colors.append(o.color[1])
+                        colors.append(o.color[2])
+                    if needs_new_buffers:
+                        tcbi = glGenBuffers(1)
+                    glBindBuffer(GL_ARRAY_BUFFER, tcbi)
+                    glBufferData(GL_ARRAY_BUFFER, np.array(colors, dtype=np.float32), GL_STATIC_DRAW)
+                    if needs_new_buffers:
+                        vc = glGetAttribLocation(key, "MyInColor")
+                        if vc != -1:
+                            glEnableVertexAttribArray(vc)
+                            glVertexAttribPointer(vc, 3, GL_FLOAT, GL_FALSE, 0, None)
+                            self.check_error("Could not create color buffer")
+
+                if self.dirty_size:
+                    if hasattr(objects[0], 'size'):
+                        sizes = []
+                        for o in objects:
+                            sizes.append(o.size[0])
+                            sizes.append(o.size[1])
+                            sizes.append(o.size[2])
+                        if needs_new_buffers:
+                            tsbi = glGenBuffers(1)
+                        glBindBuffer(GL_ARRAY_BUFFER, tsbi)
+                        glBufferData(GL_ARRAY_BUFFER, np.array(sizes, dtype=np.float32), GL_STATIC_DRAW)
+                        if needs_new_buffers:
+                            vs = glGetAttribLocation(key, "MyInSize")
+                            if vs != -1:
+                                glEnableVertexAttribArray(vs)
+                                glVertexAttribPointer(vs, 3, GL_FLOAT, GL_FALSE, 0, None)
+                                self.check_error("Could not create size buffer")
 
                 glBindVertexArray(0)
                 self.vais[key] = (tvai, tpbi, tcbi, tsbi, num)
             self.dirty = False
+            self.dirty_pos = False
+            self.dirty_color = False
+            self.dirty_size = False
 
     def clearVertexArrays(self):
         temp = dict(self.vais)
diff --git a/Objects/World.py b/Objects/World.py
index 473ccf2..1f0d4a3 100644
--- a/Objects/World.py
+++ b/Objects/World.py
@@ -1,3 +1,5 @@
+import time
+
 from Lights.Lights import Light
 from Objects.Objects import Object
 from Objects.Renderable import Renderable
@@ -9,7 +11,8 @@ import math
 import numpy as np
 import random
 import sys
-
+import ctypes
+float_pointer = ctypes.POINTER(ctypes.c_float)
 # Plate Types
 SEA_PLATE = 0
 CONTINENTAL_PLATE = 1
@@ -22,6 +25,7 @@ METAMORPH_STONE = 3
 SEDIMENTAL_STONE = 4
 SEDIMENT = 5
 
+
 class WorldChunk(Structure):
     def __init__(self, width: int, length: int, height: int, programs: dict):
         assert width > 0, 'Width must be greater than 0'
@@ -38,6 +42,8 @@ class WorldChunk(Structure):
         self.height = height
         self.programs = programs
 
+        self.objects = {}
+
         for x in range(width):
             self.content.append([])
             self.visible.append([])
@@ -54,6 +60,7 @@ class WorldChunk(Structure):
         assert 0 <= z < self.height, 'Put out of bounds for z coordinate! Must be between 0 and %i' % self.height
         no_visibility_changes = (self.content[x][y][z] is None) == (new_object is None)
 
+        old_object = self.content[x][y][z]
         self.content[x][y][z] = new_object
         new_object.translate(translate(x, y, z))
 
@@ -87,6 +94,32 @@ class WorldChunk(Structure):
             else:
                 self.visible[x][y][z - 1] += change
 
+        # todo: add visibility check for object listing
+        added = False
+        if old_object is not None:
+            if new_object is not None and type(old_object) == type(new_object):
+                new_object.buffer_id = old_object.buffer_id
+                self.objects[self.programs[type(old_object)]][old_object.buffer_id] = new_object
+                added = True
+            else:
+                # todo: maybe replace the element with a placeholder that is skipped when rendering/ saving and have a
+                #  cleanup task, since this could be exploited to lower update rates
+                leading = self.objects[self.programs[type(old_object)]][:old_object.buffer_id]
+                following = self.objects[self.programs[type(old_object)]][old_object.buffer_id + 1:]
+                for element in following:
+                    element.buffer_id -= 1
+                self.objects[self.programs[type(old_object)]] = leading + following
+
+        if not added and new_object is not None:
+            if self.programs[type(new_object)] not in self.objects.keys():
+                self.objects[self.programs[type(new_object)]] = []
+            new_object.buffer_id = len(self.objects[self.programs[type(new_object)]])
+            self.objects[self.programs[type(new_object)]].append(new_object)
+
+        self.dirty = True
+        self.dirty_pos = True
+        self.dirty_color = True
+        self.dirty_size = True
         return visible_carry_over
 
     def get_object(self, x: int, y: int, z: int):
@@ -112,80 +145,92 @@ class WorldChunk(Structure):
 
     def buildvertexArrays(self):
         if self.dirty:
-            self.clearVertexArrays()
+            # self.clearVertexArrays()
             glEnableClientState(GL_VERTEX_ARRAY)
             glEnableClientState(GL_TEXTURE_COORD_ARRAY)
             glEnableClientState(GL_NORMAL_ARRAY)
             glEnableClientState(GL_COLOR_ARRAY)
-            self.vais = {}
 
-            objects = {}
-            counts = {}
-            for x in range(self.width):
-                for y in range(self.length):
-                    for z in range(self.height):
-                        if self.content[x][y][z] is not None:  # and self.visible[x][y][z] > 0: TODO: check visibility...
-                            if self.programs[type(self.content[x][y][z])] not in objects.keys():
-                                objects[self.programs[type(self.content[x][y][z])]] = []
-                                counts[self.programs[type(self.content[x][y][z])]] = 0
-                            objects[self.programs[type(self.content[x][y][z])]].append(self.content[x][y][z])
-                            counts[self.programs[type(self.content[x][y][z])]] += 1
+            for key, object_list in self.objects.items():
+                needs_new_buffers = key not in self.vais.keys()
+                if needs_new_buffers:
+                    tvai = GLuint(0)
+                    tpbi = GLuint(0)
+                    tcbi = GLuint(0)
+                    tsbi = GLuint(0)
 
-            for key, object_list in objects.items():
-                tvai = GLuint(0)
-                tpbi = GLuint(0)
-                tcbi = GLuint(0)
-                tsbi = GLuint(0)
-
-                glGenVertexArrays(1, tvai)
+                    glGenVertexArrays(1, tvai)
+                else:
+                    tvai, tpbi, tcbi, tsbi, old_len = self.vais[key]
                 glBindVertexArray(tvai)
 
-                vid = glGetAttribLocation(key, "in_position")
-                glEnableVertexAttribArray(vid)
+                if self.dirty_pos:
+                    if needs_new_buffers:
+                        vid = glGetAttribLocation(key, "in_position")
+                        glEnableVertexAttribArray(vid)
+                        tpbi = glGenBuffers(1)
+                    glBindBuffer(GL_ARRAY_BUFFER, tpbi)
+                    positions = []
+                    for index, o in enumerate(object_list):
+                        o.buffer_id = index
+                        positions.append(o.pos[0] + self.x_offset)
+                        positions.append(o.pos[1] + self.y_offset)
+                        positions.append(o.pos[2] + self.z_offset)
 
-                tpbi = glGenBuffers(1)
-                glBindBuffer(GL_ARRAY_BUFFER, tpbi)
-                positions = []
-                for o in object_list:
-                    positions.append(o.pos[0] + self.x_offset)
-                    positions.append(o.pos[1] + self.y_offset)
-                    positions.append(o.pos[2] + self.z_offset)
-                glBufferData(GL_ARRAY_BUFFER, np.array(positions, dtype=np.float32), GL_STATIC_DRAW)
-                glVertexAttribPointer(vid, 3, GL_FLOAT, GL_FALSE, 0, None)
-                self.check_error("Could not create position buffer")
+                    glBufferData(GL_ARRAY_BUFFER, np.array(positions, dtype=np.float32), GL_STATIC_DRAW)
 
-                colors = []
-                for o in object_list:
-                    colors.append(o.color[0])
-                    colors.append(o.color[1])
-                    colors.append(o.color[2])
-                tcbi = glGenBuffers(1)
-                glBindBuffer(GL_ARRAY_BUFFER, tcbi)
-                glBufferData(GL_ARRAY_BUFFER, np.array(colors, dtype=np.float32), GL_STATIC_DRAW)
-                vc = glGetAttribLocation(key, "MyInColor")
-                if vc != -1:
-                    glEnableVertexAttribArray(vc)
-                    glVertexAttribPointer(vc, 3, GL_FLOAT, GL_FALSE, 0, None)
+                    if needs_new_buffers:
+                        glVertexAttribPointer(vid, 3, GL_FLOAT, GL_FALSE, 0, None)
+                    self.check_error("Could not create position buffer")
+
+                if self.dirty_color:
+                    colors = []
+                    for o in object_list:
+                        colors.append(o.color[0])
+                        colors.append(o.color[1])
+                        colors.append(o.color[2])
+                    if needs_new_buffers:
+                        tcbi = glGenBuffers(1)
+                    glBindBuffer(GL_ARRAY_BUFFER, tcbi)
+                    if needs_new_buffers or old_len != len(object_list):
+                        glBufferData(GL_ARRAY_BUFFER, np.array(colors, dtype=np.float32), GL_STATIC_DRAW)
+                    else:
+                        # todo: check if this improves anything. Timewise it seems to be the same
+                        ptr = ctypes.cast(glMapBuffer(GL_ARRAY_BUFFER, GL_READ_WRITE), float_pointer)
+                        for index, value in enumerate(colors):
+                            ptr[index] = value
+                        glUnmapBuffer(GL_ARRAY_BUFFER)
+                    if needs_new_buffers:
+                        vc = glGetAttribLocation(key, "MyInColor")
+                        if vc != -1:
+                            glEnableVertexAttribArray(vc)
+                            glVertexAttribPointer(vc, 3, GL_FLOAT, GL_FALSE, 0, None)
                     self.check_error("Could not create color buffer")
 
-                if hasattr(object_list[0], 'size'):
-                    sizes = []
-                    for o in object_list:
-                        sizes.append(o.size[0])
-                        sizes.append(o.size[1])
-                        sizes.append(o.size[2])
-                    tsbi = glGenBuffers(1)
-                    glBindBuffer(GL_ARRAY_BUFFER, tsbi)
-                    glBufferData(GL_ARRAY_BUFFER, np.array(sizes, dtype=np.float32), GL_STATIC_DRAW)
-                    vs = glGetAttribLocation(key, "MyInSize")
-                    if vs != -1:
-                        glEnableVertexAttribArray(vs)
-                        glVertexAttribPointer(vs, 3, GL_FLOAT, GL_FALSE, 0, None)
+                if self.dirty_size:
+                    if hasattr(object_list[0], 'size'):
+                        sizes = []
+                        for o in object_list:
+                            sizes.append(o.size[0])
+                            sizes.append(o.size[1])
+                            sizes.append(o.size[2])
+                        if needs_new_buffers:
+                            tsbi = glGenBuffers(1)
+                        glBindBuffer(GL_ARRAY_BUFFER, tsbi)
+                        glBufferData(GL_ARRAY_BUFFER, np.array(sizes, dtype=np.float32), GL_STATIC_DRAW)
+                        if needs_new_buffers:
+                            vs = glGetAttribLocation(key, "MyInSize")
+                            if vs != -1:
+                                glEnableVertexAttribArray(vs)
+                                glVertexAttribPointer(vs, 3, GL_FLOAT, GL_FALSE, 0, None)
                         self.check_error("Could not create size buffer")
 
                 glBindVertexArray(0)
-                self.vais[key] = (tvai, tpbi, tcbi, tsbi, counts[key])
+                self.vais[key] = (tvai, tpbi, tcbi, tsbi, len(object_list))
             self.dirty = False
+            self.dirty_pos = False
+            self.dirty_color = False
+            self.dirty_size = False
 
     def render(self, proj_matrix, geometry_rot_matrix, alternate_programs=None,
                preselected_program=None, projection_pos=None, rot_pos=None):
@@ -204,6 +249,17 @@ class WorldChunk(Structure):
         if self.content[x][y][z] is not None:
             self.content[x][y][z].setColor(r, g, b)
             self.dirty = True
+            self.dirty_color = True
+
+    def load(self):
+        for x in range(self.width):
+            for y in range(self.length):
+                for z in range(self.height):
+                    if self.content[x][y][z] is not None:  # and self.visible[x][y][z] > 0: TODO: check visibility...
+                        if self.programs[type(self.content[x][y][z])] not in self.objects.keys():
+                            self.objects[self.programs[type(self.content[x][y][z])]] = []
+                        self.objects[self.programs[type(self.content[x][y][z])]].append(self.content[x][y][z])
+
 
 class World(Renderable):
     def __init__(self, chunk_size_x: int, chunk_size_y: int, chunk_size_z: int,
@@ -488,6 +544,8 @@ class World(Renderable):
                                                               y % self.chunk_size_y,
                                                               z % self.chunk_size_z,
                                                               r, g, b)
+        else:
+            print('Changing color of nonexistant element!')
 
     def put_object(self, x: int, y: int, z: int, new_object: Object):
         x = x % (self.chunk_size_x * self.chunk_n_x)
diff --git a/WorldProvider/WorldProvider.py b/WorldProvider/WorldProvider.py
index a9629b7..1e6367c 100644
--- a/WorldProvider/WorldProvider.py
+++ b/WorldProvider/WorldProvider.py
@@ -2,8 +2,8 @@ from Objects.World import World
 
 
 class WorldProvider:
-    def __init__(self, programs):
-        self.world: World = World(10, 10, 10, 10, 10, 10, programs)
+    def __init__(self, programs, world_class=World):
+        self.world: World = world_class(10, 10, 10, 10, 10, 10, programs)
         self.world.generate()
 
     def update(self):
diff --git a/labirinth_ai/LabyrinthClient.py b/labirinth_ai/LabyrinthClient.py
new file mode 100644
index 0000000..fdcb22e
--- /dev/null
+++ b/labirinth_ai/LabyrinthClient.py
@@ -0,0 +1,43 @@
+import time
+
+from Client.Client import Client, MAX_DISTANCE
+from MatrixStuff.Transformations import perspectiveMatrix
+from labirinth_ai.LabyrinthProvider import LabyrinthProvider
+
+import numpy as np
+
+class LabyrinthClient(Client):
+    def __init__(self, test=False, pos=[0, 0, 0], world_class=LabyrinthProvider):
+        super(LabyrinthClient, self).__init__(test, pos, world_class)
+
+    def draw_world(self):
+        start_time = time.time()
+        for x in range(self.world_provider.world.chunk_size_x * self.world_provider.world.chunk_n_x):
+            for y in range(self.world_provider.world.chunk_size_y * self.world_provider.world.chunk_n_y):
+                if self.world_provider.world.board[x, y] in [1, 2]:
+                    r, g, b = 57, 92, 152
+                    if 1.5 >= self.world_provider.world.hunter_grass[x, y] > 0.5:
+                        r, g, b = 25, 149, 156
+                    if 3 >= self.world_provider.world.hunter_grass[x, y] > 1.5:
+                        r, g, b = 112, 198, 169
+                    self.world_provider.world.set_color(x, y, 0, r / 255.0, g / 255.0, b / 255.0)
+                if self.world_provider.world.board[x, y] == 3:
+                    self.world_provider.world.set_color(x, y, 0, 139 / 255.0, 72 / 255.0, 82 / 255.0)
+
+        for sub in self.world_provider.world.subjects:
+            if not sub.random:
+                # pyxel.rectb(sub.x * 4 + 1, sub.y * 4 + 1, 2, 2, sub.col)
+                self.world_provider.world.set_color(sub.x, sub.y, 0, sub.r / 255.0, sub.g / 255.0, sub.b / 255.0)
+            else:
+                self.world_provider.world.set_color(sub.x, sub.y, 0, 212 / 255.0, 150 / 255.0, 222 / 255.0)
+
+        self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)
+        print('redraw', time.time() - start_time)
+
+    def display(self):
+        super(LabyrinthClient, self).display()
+        self.draw_world()
+        self.world_provider.world.update()
+
+if __name__ == '__main__':
+    client = LabyrinthClient(pos=[-50, -50, -200])
diff --git a/labirinth_ai/LabyrinthProvider.py b/labirinth_ai/LabyrinthProvider.py
new file mode 100644
index 0000000..4af8345
--- /dev/null
+++ b/labirinth_ai/LabyrinthProvider.py
@@ -0,0 +1,6 @@
+from WorldProvider.WorldProvider import WorldProvider
+from labirinth_ai.LabyrinthWorld import LabyrinthWorld
+
+class LabyrinthProvider(WorldProvider):
+    def __init__(self, programs):
+        super(LabyrinthProvider, self).__init__(programs, LabyrinthWorld)
diff --git a/labirinth_ai/LabyrinthWorld.py b/labirinth_ai/LabyrinthWorld.py
new file mode 100644
index 0000000..2a2e3e7
--- /dev/null
+++ b/labirinth_ai/LabyrinthWorld.py
@@ -0,0 +1,232 @@
+import time
+
+from Objects.Cube.Cube import Cube
+from Objects.World import World
+import numpy as np
+import random
+
+
+class LabyrinthWorld(World):
+    randomBuffer = 0
+    batchsize = 1000
+    randomBuffer = max(4 * batchsize, randomBuffer)
+
+    def __init__(self, chunk_size_x: int, chunk_size_y: int, chunk_size_z: int,
+                 chunk_n_x: int, chunk_n_y: int, chunk_n_z: int, programs: dict):
+        self.board_shape = (chunk_size_x * chunk_n_x, chunk_size_y * chunk_n_y)
+        self.board = np.zeros(self.board_shape)
+        super(LabyrinthWorld, self).__init__(chunk_size_x, chunk_size_y, chunk_size_z,
+                                             chunk_n_x, chunk_n_y, chunk_n_z, programs)
+        self.max_room_dim = 20
+
+        self.min_room_dim = 6
+
+        self.max_room_num = 32
+        self.max_corridors = 4 * self.max_room_num
+
+        self.max_crates = self.max_room_num
+
+        self.subjects = []
+        self.ins = []
+        self.actions = []
+        self.targets = []
+
+        self.model = None
+        self.lastUpdate = time.time()
+        self.nextTrain = self.randomBuffer
+        self.round = 0
+
+        self.trailMix = np.zeros(self.board_shape)
+        self.grass = np.zeros(self.board_shape)
+        self.hunter_grass = np.zeros(self.board_shape)
+        self.subjectDict = {}
+
+    def generate(self, seed: int = None, sea_plate_height: int = 50, continental_plate_height: int = 200):
+        board = np.zeros(self.board_shape)
+        random.seed(seed)
+        np.random.seed(seed)
+
+        # find random starting point
+        px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
+        py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
+
+        # 0, 0 is top left
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+
+        # place rooms
+        room_num = 0
+        corridor_num = 0
+        while room_num < self.max_room_num and corridor_num < self.max_corridors:
+            # try to place Room
+            w = random.randint(self.min_room_dim, self.max_room_dim)
+            h = random.randint(self.min_room_dim, self.max_room_dim)
+            can_place_room = np.sum(
+                board[px - int(w / 2.0):px + int(w / 2.0), py - int(h / 2.0):py + int(h / 2.0)] == 1) == 0 and px - int(
+                w / 2.0) >= 0 and px + int(w / 2.0) < self.board_shape[0] and \
+                             py - int(h / 2.0) >= 0 and py + int(h / 2.0) < self.board_shape[1]
+
+            if can_place_room:
+                # place Room
+                board[px - int(w / 2.0):px + int(w / 2.0), py - int(h / 2.0):py + int(h / 2.0)] = 1
+                room_num += 1
+            else:
+                # move && place Corridor
+                directions = []
+                while len(directions) == 0:
+                    movable = []
+                    corridor_length = random.randint(self.min_room_dim, self.max_room_dim)
+                    if px - corridor_length >= 0:
+                        movable.append(left)
+                        if board[px - 1, py] != 2:
+                            directions.append(left)
+
+                    if px + corridor_length < self.board_shape[0]:
+                        movable.append(right)
+                        if board[px + 1, py] != 2:
+                            directions.append(right)
+
+                    if py - corridor_length >= 0:
+                        movable.append(up)
+                        if board[px, py - 1] != 2:
+                            directions.append(up)
+
+                    if py + corridor_length < self.board_shape[1]:
+                        movable.append(down)
+                        if board[px, py + 1] != 2:
+                            directions.append(down)
+
+                    if len(directions) != 0:
+                        if len(directions) > 1:
+                            d = directions[random.randint(0, len(directions) - 1)]
+                        else:
+                            d = directions[0]
+                        changed = False
+                        for _ in range(corridor_length):
+                            if board[px, py] != 1 and board[px, py] != 2:
+                                board[px, py] = 2
+                                if (-d[0], -d[1]) not in movable or board[px - d[0], py - d[1]] != 2:
+                                    changed = True
+                            px += d[0]
+                            py += d[1]
+                        if changed:
+                            corridor_num += 1
+                    else:
+                        if len(movable) != 0:
+                            if len(movable) > 1:
+                                d = movable[random.randint(0, len(movable) - 1)]
+                            else:
+                                d = movable[0]
+                            for _ in range(corridor_length):
+                                px += d[0]
+                                py += d[1]
+
+        crates = 0
+        while crates < self.max_crates:
+            px = random.randint(0, (self.board_shape[0] - 1))
+            py = random.randint(0, (self.board_shape[1] - 1))
+
+            if board[px, py] == 1:
+                board[px, py] = 3
+                crates += 1
+
+        board[board == 2] = 1
+
+        print((room_num, self.max_room_num))
+        print((corridor_num, self.max_corridors))
+        self.board = board
+
+        # setting up the board
+        for x_pos in range(0, self.board_shape[0]):
+            for y_pos in range(0, self.board_shape[1]):
+                for z_pos in range(0, 1):
+                    self.put_object(x_pos, y_pos, z_pos, Cube().setColor(1, 1, 1))
+
+        # adding subjects
+        from labirinth_ai.Subject import Hunter, Herbivore
+        while len(self.subjects) < 2:
+            px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
+            py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
+            if self.board[px, py] == 1:
+                self.subjects.append(Hunter(px, py))
+                self.ins += self.subjects[-1].x_in
+                self.actions += self.subjects[-1].actions
+                self.targets += self.subjects[-1].target
+
+        while len(self.subjects) < 10:
+            px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
+            py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
+            if self.board[px, py] == 1:
+                self.subjects.append(Herbivore(px, py))
+                self.ins += self.subjects[-1].x_in
+                self.actions += self.subjects[-1].actions
+                self.targets += self.subjects[-1].target
+
+        for x in range(self.board_shape[0]):
+            for y in range(self.board_shape[1]):
+                self.subjectDict[(x, y)] = []
+
+        for sub in self.subjects:
+            self.subjectDict[(sub.x, sub.y)].append(sub)
+
+    def update(self):
+        # start = time.time()
+        if self.model is None:
+            for sub in self.subjects:
+                sub.calculateAction(self)
+        else:
+            states = list(map(lambda e: e.createState(self), self.subjects))
+            states = sum(list(map(lambda e: [e, e, e, e], states)), [])
+            vals = self.model.predict(states)
+            vals = np.reshape(np.transpose(np.reshape(vals, (len(self.subjects), 4, 2)), (0, 2, 1)),
+                              (len(self.subjects), 1, 8))
+            list(map(lambda e: e[1].calculateAction(self, vals[e[0]], states[e[0]]), enumerate(self.subjects)))
+
+        for sub in self.subjects:
+            if sub.alive:
+                sub.update(self, doTrain=self.model is None)
+            sub.tick += 1
+
+        if self.model is not None:
+            if self.round >= self.nextTrain:
+                samples = list(map(lambda e: e.generateSamples(), self.subjects))
+                states = sum(list(map(lambda e: e[0], samples)), [])
+                targets = sum(list(map(lambda e: e[1], samples)), [])
+                self.model.fit(states, targets)
+                self.nextTrain = self.batchsize / 5
+                self.round = 0
+                for sub in self.subjects:
+                    if len(sub.samples) > 20*self.batchsize:
+                        sub.samples = sub.samples[:-20*self.batchsize]
+            else:
+                self.round += 1
+
+        new_subjects = []
+        kill_table = {}
+        live_table = {}
+        for sub in self.subjects:
+            if sub.name not in kill_table.keys():
+                kill_table[sub.name] = 0
+                live_table[sub.name] = 0
+            kill_table[sub.name] += sub.kills
+            live_table[sub.name] += sub.lives
+            if sub.alive:
+                new_subjects.append(sub)
+            else:
+                px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
+                py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
+                while self.board[px, py] == 0:
+                    px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
+                    py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
+                sub.respawnUpdate(px, py, self)
+                new_subjects.append(sub)
+
+        self.subjects = new_subjects
+        self.trailMix *= 0.99
+
+        self.grass = np.minimum(self.grass + 0.01 * (self.board != 0), 3)
+        self.hunter_grass = np.minimum(self.hunter_grass + 0.01 * (self.board != 0), 3)
+
+        self.trailMix *= (self.trailMix > 0.01)
diff --git a/labirinth_ai/Subject.py b/labirinth_ai/Subject.py
new file mode 100644
index 0000000..ec0593c
--- /dev/null
+++ b/labirinth_ai/Subject.py
@@ -0,0 +1,1055 @@
+import random
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+
+from labirinth_ai.LabyrinthWorld import LabyrinthWorld
+from labirinth_ai.loss import loss2, loss3
+
+# import torch
+# dtype = torch.float
+# device = torch.device("cpu")
+
+
+class Subject:
+    name = 'random'
+    col = 8
+    num = 0
+    random = True
+    r = 255
+    g = 255
+    b = 255
+
+    def __init__(self, x, y):
+        self.alive = True
+        self.x = x
+        self.y = y
+        self.kills = 0
+        self.lives = 1
+        self.tick = 0
+
+        self.id = self.num
+        Subject.num += 1
+
+    def update(self, world: LabyrinthWorld):
+        # 0, 0 is top left
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+        directions = []
+
+        if self.x - 1 >= 0:
+            if world.board[self.x - 1, self.y] != 0:
+                directions.append(left)
+
+        if self.x + 1 < world.board_shape[0]:
+            if world.board[self.x + 1, self.y] != 0:
+                directions.append(right)
+
+        if self.y - 1 >= 0:
+            if world.board[self.x, self.y - 1] != 0:
+                directions.append(up)
+
+        if self.y + 1 < world.board_shape[1]:
+            if world.board[self.x, self.y + 1] != 0:
+                directions.append(down)
+
+        if directions != [] and self.alive:
+            if len(directions) > 1:
+                d = directions[random.randint(0, len(directions) - 1)]
+            else:
+                d = directions[0]
+
+            if len(world.subjectDict[(self.x + d[0], self.y + d[1])]) > 0:
+                for sub in world.subjectDict[(self.x + d[0], self.y + d[1])]:
+                    if sub.alive:
+                        self.kills += 1
+                    sub.alive = False
+                    self.alive = True
+
+            world.subjectDict[(self.x, self.y)].remove(self)
+            world.trailMix[self.x, self.y] += 1
+            self.x += d[0]
+            self.y += d[1]
+            world.subjectDict[(self.x, self.y)].append(self)
+
+    def respawnUpdate(self, x, y, world: LabyrinthWorld):
+        world.subjectDict[(self.x, self.y)].remove(self)
+        self.x = x
+        self.y = y
+        world.subjectDict[(self.x, self.y)].append(self)
+        self.alive = True
+        self.lives += 1
+
+
+class QLearner(Subject):
+    name = 'QLearner'
+    col = 14
+    learningRate = 0.25
+    discountFactor = 0.5
+    random = False
+
+    Q = {}
+    def __init__(self, x, y):
+        super(QLearner, self).__init__(x, y)
+        # self.Q = {}
+        self.viewD = 3
+        self.lastAction = None
+        self.lastState = None
+        self.lastReward = 0
+
+    def respawnUpdate(self, x, y, world: LabyrinthWorld):
+        super(QLearner, self).respawnUpdate(x, y, world)
+        self.lastReward -= 20
+
+    def createState(self, world: LabyrinthWorld):
+        state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.int)  # - 1
+
+        # # floodfill state
+        # queued = [(0, 0)]
+        # todo = [(0, 0, 0)]
+        # while todo != []:
+        #     doing = todo.pop(0)
+        #
+        #     if self.x + doing[0] >= 0 and self.x + doing[0] < 64 and self.y + doing[1] >= 0 and self.y + doing[1] < 64:
+        #         value = world.board[self.x + doing[0], self.y + doing[1]]
+        #         state[self.viewD + doing[0], self.viewD + doing[1]] = value
+        #
+        #         # if value == 3:
+        #         #     state[self.viewD + doing[0], self.viewD + doing[1]] = value
+        #
+        #         if value != 0 and doing[2] < self.viewD:
+        #             for i in range(-1, 2, 1):
+        #                 for j in range(-1, 2, 1):
+        #                     # 4-neighbour. without it it is 8-neighbour
+        #                     if abs(i) + abs(j) == 1:
+        #                         if (doing[0] + i, doing[1] + j) not in queued:
+        #                             queued.append((doing[0] + i, doing[1] + j))
+        #                             todo.append((doing[0] + i, doing[1] + j, doing[2] + 1))
+        #
+        # for sub in world.subjects:
+        #     if (sub.x - self.x, sub.y - self.y) in queued and state[
+        #         self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3:
+        #         state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = state[
+        #                                                                               self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] * 100 + sub.col
+
+        maxdirleft = self.x - max(self.x - (self.viewD), 0)
+        maxdirright = min(self.x + (self.viewD), (world.board_shape[0] - 1)) - self.x
+        maxdirup = self.y - max(self.y - (self.viewD), 0)
+        maxdirdown = min(self.y + (self.viewD), (world.board_shape[1] - 1)) - self.y
+
+        # state[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.board[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
+        for sub in world.subjects:
+            if abs(sub.x - self.x) < self.viewD and abs(sub.y - self.y) < self.viewD:
+                if state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3:
+                    state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] * 100 + 1# sub.col
+
+        return state
+
+    def update(self, world: LabyrinthWorld):
+        # 0, 0 is top left
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+        directions = []
+
+        if self.x - 1 >= 0:
+            if world.board[self.x - 1, self.y] != 0:
+                directions.append(left)
+
+        if self.x + 1 < world.board_shape[0]:
+            if world.board[self.x + 1, self.y] != 0:
+                directions.append(right)
+
+        if self.y - 1 >= 0:
+            if world.board[self.x, self.y - 1] != 0:
+                directions.append(up)
+
+        if self.y + 1 < world.board_shape[1]:
+            if world.board[self.x, self.y + 1] != 0:
+                directions.append(down)
+
+        if directions != [] and self.alive:
+            state = self.createState(world)
+
+            if str(state) not in self.Q.keys():
+                self.Q[str(state)] = {}
+            for dir in directions:
+                if dir not in self.Q[str(state)].keys():
+                    self.Q[str(state)][dir] = random.randint(0, 5)
+
+            allowedActions = dict(filter(lambda elem: elem[0] in directions,self.Q[str(state)].items()))
+            action = max(allowedActions, key=allowedActions.get)
+
+            if self.learningRate != 0:
+                self.Q[str(state)][action] = (1 - self.learningRate) * self.Q[str(state)][action] + self.learningRate * (self.lastReward + self.discountFactor * self.Q[str(state)][action])
+
+            self.lastAction = action
+            self.lastState = state
+            self.lastReward = 0
+
+            if len(action) == 2:
+                if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0:
+                    for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
+                        if sub.alive:
+                            self.kills += 1
+                        sub.alive = False
+                        self.alive = True
+                        self.lastReward += 10
+
+                world.subjectDict[(self.x, self.y)].remove(self)
+                self.x += action[0]
+                self.y += action[1]
+                world.subjectDict[(self.x, self.y)].append(self)
+            pass
+
+
+class DoubleQLearner(QLearner):
+    name = 'DoubleQLearner'
+    col = 11
+    learningRate = 0.5
+    discountFactor = 0.5
+    random = False
+
+    QA = {}
+    QB = {}
+    def __init__(self, x, y):
+        super(DoubleQLearner, self).__init__(x, y)
+        self.viewD = 3
+        self.lastAction = None
+        self.lastState = None
+        self.lastReward = 0
+
+    def respawnUpdate(self, x, y, world: LabyrinthWorld):
+        super(DoubleQLearner, self).respawnUpdate(x, y, world)
+
+    def update(self, world: LabyrinthWorld):
+        # 0, 0 is top left
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+        directions = []
+
+        if self.x - 1 >= 0:
+            if world.board[self.x - 1, self.y] != 0:
+                directions.append(left)
+
+        if self.x + 1 < world.board_shape[0]:
+            if world.board[self.x + 1, self.y] != 0:
+                directions.append(right)
+
+        if self.y - 1 >= 0:
+            if world.board[self.x, self.y - 1] != 0:
+                directions.append(up)
+
+        if self.y + 1 < world.board_shape[1]:
+            if world.board[self.x, self.y + 1] != 0:
+                directions.append(down)
+
+        if directions != [] and self.alive:
+            state = self.createState(world)
+
+            if str(state) not in self.QA.keys():
+                self.QA[str(state)] = {}
+                self.QB[str(state)] = {}
+            for dir in directions:
+                if dir not in self.QA[str(state)].keys():
+                    self.QA[str(state)][dir] = random.randint(0, 5)
+                    self.QB[str(state)][dir] = random.randint(0, 5)
+
+            allowedActionsA = dict(filter(lambda elem: elem[0] in directions, self.QA[str(state)].items()))
+            allowedActionsB = dict(filter(lambda elem: elem[0] in directions, self.QB[str(state)].items()))
+            allowedActions = {}
+            for key in allowedActionsA.keys():
+                allowedActions[key] = allowedActionsA[key] + allowedActionsB[key]
+
+            actionA = max(allowedActionsA, key=allowedActionsA.get)
+            actionB = max(allowedActionsB, key=allowedActionsB.get)
+            action = max(allowedActions, key=allowedActions.get)
+
+            if self.learningRate != 0:
+                if random.randint(0, 1) == 0:
+                    valA = self.QA[str(state)][action]
+                    self.QA[str(state)][action] = valA + self.learningRate * (self.lastReward + self.discountFactor * self.QB[str(state)][actionA] - valA)
+                else:
+                    valB = self.QB[str(state)][action]
+                    self.QB[str(state)][action] = valB + self.learningRate * (self.lastReward + self.discountFactor * self.QA[str(state)][actionB] - valB)
+
+            self.lastAction = action
+            self.lastState = state
+            self.lastReward = 0
+
+            if len(action) == 2:
+                if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0:
+                    for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
+                        if sub.alive:
+                            self.kills += 1
+                        sub.alive = False
+                        self.alive = True
+                        self.lastReward += 10
+
+                world.subjectDict[(self.x, self.y)].remove(self)
+                self.x += action[0]
+                self.y += action[1]
+                world.subjectDict[(self.x, self.y)].append(self)
+            pass
+
+
+class NetLearner(Subject):
+    right = (1, 0)
+    left = (-1, 0)
+    up = (0, -1)
+    down = (0, 1)
+    act2IDict = {right: 0, left: 1, up: 2, down: 3}
+
+    name = 'NetLearner'
+    col = 15
+    viewD = 3
+    historyLength = 2
+    channels = 4
+
+    learningRate = 0.001
+    discountFactor = 0.5
+    randomBuffer = 0
+    batchsize = 1000
+    randomBuffer = max(4*batchsize, randomBuffer)
+    randomChance = 9
+
+    historySizeMul = 20
+
+    # samples = []
+
+    # x_in = keras.Input(shape=(4 * (2 * viewD + 1) * (2 * viewD + 1) + 2))
+    # target = keras.Input(shape=(10, 1))
+    # inVec = keras.layers.Flatten()(x_in)
+    # # kernel_regularizer=keras.regularizers.l2(0.01)
+    # actions = keras.layers.Dense((3 * (2 * viewD + 1) * (2 * viewD + 1)), activation='relu')(inVec)
+    # actions = keras.layers.Dense(((2 * viewD + 1) * (2 * viewD + 1)), activation='relu')(actions)
+    # actions = keras.layers.Dense(8, activation='linear', use_bias=False)(actions)
+    #
+    # model = keras.Model(inputs=x_in, outputs=actions)
+    #
+    # # model.compile(optimizer='adam', loss=loss, target_tensors=[target])
+    # model.compile(optimizer=tf.keras.optimizers.RMSprop(learningRate), loss=loss, target_tensors=[target])
+
+    def respawnUpdate(self, x, y, world: LabyrinthWorld):
+        super(NetLearner, self).respawnUpdate(x, y, world)
+        # self.lastReward -= 20
+
+        if len(self.samples) < self.randomBuffer or random.randint(0, 10) > self.randomChance:
+            self.random = True
+            # print('Rando ' + self.name)
+            pass
+        else:
+            self.random = False
+            # print('Slau ' + self.name)
+
+        self.strikes = 0
+
+    def __init__(self, x, y):
+        super(NetLearner, self).__init__(x, y)
+
+        self.action = None
+        self.state = None
+        self.actDict = {}
+
+        self.history = []
+        self.lastAction = None
+        self.lastState = None
+        self.lastReward = 0
+        self.lastVal = 0
+        self.random = False
+        self.nextTrain = self.randomBuffer
+
+        self.samples = []
+
+        self.x_in = []
+        self.actions = []
+        self.target = []
+        for i in range(4):
+            x_in = keras.Input(shape=(self.channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2))
+            self.x_in.append(x_in)
+            inVec = keras.layers.Flatten()(x_in)
+            actions = keras.layers.Dense(((2 * self.viewD + 1) * (2 * self.viewD + 1)), activation='elu',
+                                         kernel_regularizer=keras.regularizers.l2(0.001),
+                                         name=self.name + str(self.id) + 'Dense' + str(i) + 'l1')(inVec)
+            actions = keras.layers.Dense(((self.viewD + 1) * (self.viewD + 1)), activation='elu',
+                                         kernel_regularizer=keras.regularizers.l2(0.001))(actions)
+            self.target.append(keras.Input(shape=(2, 1)))
+            self.actions.append(keras.layers.Dense(2, activation='linear', use_bias=False, kernel_regularizer=keras.regularizers.l2(0.001))(actions))
+
+        self.model = keras.Model(inputs=self.x_in, outputs=self.actions)
+
+        self.model.compile(optimizer=tf.keras.optimizers.RMSprop(self.learningRate), loss=loss3,
+                           target_tensors=self.target)
+
+        if len(self.samples) < self.randomBuffer:
+            self.random = True
+        else:
+            self.random = False
+
+        self.strikes = 0
+
+        self.lastRewards = []
+
+    def visualize(self):
+        print(self.name)
+        layers = self.model.get_weights()
+        # layers.reverse()
+        layersN = [[0, 1, 8, 9, 16], [2, 3, 10, 11, 17], [4, 5, 12, 13, 18], [6, 7, 14, 15, 19]]
+        for action in range(8):
+            v = np.zeros((1, 2))
+            v[0][0 if action < 4 else 1] = 1.0
+            layerN = list(layersN[action % 4])
+            layerN.reverse()
+            for n in layerN:
+                l = layers[n]
+                if len(l.shape) == 2:
+                    layer = np.transpose(l)
+                    v = np.dot(v, layer)
+                else:
+                    layer = np.array([l])
+                    v = v + layer
+            lastAction = v[0, -2:]
+            v = np.reshape(v[0, :-2], (4, (2 * self.viewD + 1), (2 * self.viewD + 1)))
+
+            # right, left, up, down
+            dir = {0: 'right', 1: 'left', 2: 'up', 3: 'down'}
+            dir = dir[action % 4]
+            #0-3 current
+            #4-8 future
+            if action < 4:
+                time = 'current '
+            else:
+                time = 'future '
+            import matplotlib
+            import matplotlib.pyplot as plt
+            fig, axs = plt.subplots(2, 2, figsize=(5, 5))
+
+            fig.suptitle(time + dir)
+            im = axs[0, 0].pcolor(np.rot90(v[0]))
+            fig.colorbar(im, ax=axs[0, 0])
+            axs[0, 0].set_title('board')
+
+            axs[0, 1].pcolor(np.rot90(v[1]))
+            fig.colorbar(im, ax=axs[0, 1])
+            axs[0, 1].set_title('subjects')
+
+            axs[1, 0].pcolor(np.rot90(v[2]))
+            fig.colorbar(im, ax=axs[1, 0])
+            axs[1, 0].set_title('trail')
+
+            axs[1, 1].pcolor(np.rot90(v[3]))
+            fig.colorbar(im, ax=axs[1, 1])
+            axs[1, 1].set_title('grass')
+            plt.show(block=True)
+
+
+    def createState(self, world: LabyrinthWorld):
+        state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1
+        state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1
+        state3 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1
+        state4 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1
+
+        maxdirleft = self.x - max(self.x - (self.viewD), 0)
+        maxdirright = min(self.x + (self.viewD), (world.board_shape[0] - 1)) - self.x
+        maxdirup = self.y - max(self.y - (self.viewD), 0)
+        maxdirdown = min(self.y + (self.viewD), (world.board_shape[1] - 1)) - self.y
+
+        state[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.board[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
+        # for sub in world.subjects:
+        #     if abs(sub.x - self.x) < self.viewD and abs(sub.y - self.y) < self.viewD:
+        #         if state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3:
+        #             state2[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = sub.col
+        for x in range(-maxdirleft, maxdirright, 1):
+            for y in range(-maxdirup, maxdirdown, 1):
+                if world.subjectDict[(self.x + x, self.y + y)] != []:
+                    state2[x + maxdirleft, y + maxdirup] = 1#world.subjectDict[(self.x + x, self.y + y)][0].col
+
+        state3[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.trailMix[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
+        state4[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.hunter_grass[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
+
+        if not self.random:
+            test=1
+
+        area = np.reshape(np.stack((state, state2, state3, state4)), (4 * (2 * self.viewD + 1) * (2 * self.viewD + 1)))
+        action = [0, 0]
+        if self.lastAction is not None:
+            action = self.lastAction
+        return np.reshape(np.concatenate((area, action)), (1, 4 * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2))
+
+    def calculateAction(self, world: LabyrinthWorld, vals=None, state=None):
+        # 0, 0 is top left
+        directions = []
+
+        if self.x - 1 >= 0:
+            if world.board[self.x - 1, self.y] != 0:
+                directions.append(self.left)
+
+        if self.x + 1 < world.board_shape[0]:
+            if world.board[self.x + 1, self.y] != 0:
+                directions.append(self.right)
+
+        if self.y - 1 >= 0:
+            if world.board[self.x, self.y - 1] != 0:
+                directions.append(self.up)
+
+        if self.y + 1 < world.board_shape[1]:
+            if world.board[self.x, self.y + 1] != 0:
+                directions.append(self.down)
+
+        if directions == []:
+            print('Wut?')
+
+        if directions != [] and self.alive:
+            if state is None:
+                state = self.createState(world)
+            if vals is None:
+                vals = self.model.predict([state, state, state, state])
+                vals = np.reshape(np.transpose(np.reshape(vals, (4, 2)), (1, 0)),
+                                  (1, 8))
+
+            self.actDict = {self.right: vals[0][0] + vals[0][4], self.left: vals[0][1] + vals[0][5], self.up: vals[0][2] + vals[0][6], self.down: vals[0][3] + vals[0][7]}
+
+            allowedActions = dict(filter(lambda elem: elem[0] in directions, self.actDict.items()))
+
+            # if self.name == 'Herbivore' and self.id == 11 and not self.random:
+            #     print(allowedActions)
+            #     print(self.lastReward)
+            if self.strikes <= 0:
+                self.random = False
+
+            if not self.random:
+                self.action = max(allowedActions, key=allowedActions.get)
+            else:
+                self.action = self.randomAct(world)
+
+            self.state = state
+
+    def update(self, world: LabyrinthWorld, doTrain=True):
+        if self.lastAction is not None:
+            if not self.random:
+                if self.lastAction[0] + self.action[0] == 0 and self.lastAction[1] + self.action[1] == 0:
+                    self.strikes += 1
+                else:
+                    self.strikes -= 1
+                if self.strikes > 100:
+                    self.random = True
+            else:
+                self.strikes -= 1
+
+            if len(self.history) >= self.historyLength:
+                self.history.pop(0)
+            self.history.append((self.lastState.copy(), int(self.act2IDict[self.lastAction]), int(self.lastVal), float(self.lastReward), np.array(self.lastRewards)))
+
+            # if self.lastReward != 0 or random.randint(0, 9) == 0:
+            if len(self.history) == self.historyLength:
+                self.samples.append(self.history.copy())
+
+            # if len(self.samples) % self.batchsize == 0 and len(self.samples) >= self.randomBuffer:
+            if len(self.samples) > self.nextTrain and doTrain:
+                print('train')
+                self.train()
+                self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize)
+
+        self.lastAction = self.action
+        self.lastState = self.state
+        self.lastReward = 0
+        self.lastVal = self.actDict[self.action]
+
+        maxVal = 0
+
+        self.executeAction(world, self.action)
+
+    def randomAct(self, world: LabyrinthWorld):
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+        directions = []
+
+        if self.x - 1 >= 0:
+            if world.board[self.x - 1, self.y] != 0:
+                directions.append(left)
+
+        if self.x + 1 < world.board_shape[0]:
+            if world.board[self.x + 1, self.y] != 0:
+                directions.append(right)
+
+        if self.y - 1 >= 0:
+            if world.board[self.x, self.y - 1] != 0:
+                directions.append(up)
+
+        if self.y + 1 < world.board_shape[1]:
+            if world.board[self.x, self.y + 1] != 0:
+                directions.append(down)
+
+        d = random.randint(0, len(directions) - 1)
+        action = directions[d]
+
+        return action
+
+    def executeAction(self, world: LabyrinthWorld, action):
+        pass
+
+    def generateSamples(self):
+        # history element: (self.lastState.copy(), self.act2IDict[self.lastAction], self.lastVal, self.lastReward, np.array(self.lastRewards))
+        # history: [t-2, t-1]
+        states = []
+        targets = []
+        for i in range(4):
+            true_batch = int(self.batchsize/4)
+            target = np.zeros((true_batch, 2, 1))
+            samples = np.array(self.samples[:-self.batchsize])
+            # print('Samples for ' + str(i))
+            # print(len(samples))
+            samples = np.array(list(filter(lambda e: e[0, 1] == i, list(samples))))
+            # print(len(samples))
+            partTwo = True
+            if len(samples) == 0:
+                print('No samples for:' + str(i))
+                partTwo = False
+                samples = np.array(self.samples[:-self.batchsize])
+            buffer_size = len(samples)
+            index = np.random.choice(np.arange(buffer_size),
+                                     size=true_batch,
+                                     replace=True)
+            samples = samples[index]
+            # self.samples = []
+            if partTwo:
+                target[:, 1, 0] = samples[:, 1, 3] #reward t-2 got
+
+                nextState = np.concatenate(samples[:, 1, 0]) #states of t-1
+                nextVals = self.model.predict([nextState, nextState, nextState, nextState])
+
+                nextVals2 = nextVals[i][:,  0] + nextVals[i][:, 1]
+                target[:, 0, 0] = nextVals2 #best q t-1
+            else:
+                target[:, 1, 0] = np.array(list(map(lambda elem: list(elem), list(np.array(samples[:, 1, 4])))))[:, i]  # reward t-2 got
+
+            targets.append(target)
+
+            states.append(np.concatenate(samples[:, 0, 0])) #states of t-2
+
+        return states, targets
+
+    def train(self):
+        print(self.name)
+        states, target = self.generateSamples()
+        self.model.fit(states, target, epochs=1)
+
+        self.samples = self.samples[-self.historySizeMul*self.batchsize:]
+
+        # print(self.model.get_weights())
+
+        pass
+
+
+class Herbivore(NetLearner):
+    name = 'Herbivore'
+    col = 9
+    r = 255
+    g = 255
+    b = 0
+    viewD = 3
+    historyLength = 2
+
+    learningRate = 0.001
+    discountFactor = 0.5
+    randomBuffer = 0
+    batchsize = 1000
+    randomBuffer = max(2 * batchsize, randomBuffer)
+    randomChance = 9
+
+    samples = []
+
+    # x_in = keras.Input(shape=(4 * (2 * viewD + 1) * (2 * viewD + 1) + 2))
+    # target = keras.Input(shape=(10, 1))
+    # inVec = keras.layers.Flatten()(x_in)
+    # # kernel_regularizer=keras.regularizers.l2(0.01)
+    # actions = keras.layers.Dense((4 * (2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(inVec)
+    # actions = keras.layers.Dense(((2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(actions)
+    # actions = keras.layers.Dense(8, activation='linear', use_bias=False)(actions)
+    # # actions = keras.layers.Dense(4, activation='linear', use_bias=False)(inVec)
+    #
+    # model = keras.Model(inputs=x_in, outputs=actions)
+    #
+    # # model.compile(optimizer='adam', loss=loss2, target_tensors=[target])
+    # model.compile(optimizer=tf.keras.optimizers.RMSprop(learningRate), loss=loss2, target_tensors=[target])
+
+    # def __init__(self, x, y):
+    #     super(Herbivore, self).__init__(x, y)
+
+    def createState(self, world: LabyrinthWorld):
+        state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1
+        state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1
+        state3 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1
+        state4 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1
+
+        maxdirleft = self.x - max(self.x - (self.viewD), 0)
+        maxdirright = min(self.x + (self.viewD), (world.board_shape[0] - 1)) - self.x
+        maxdirup = self.y - max(self.y - (self.viewD), 0)
+        maxdirdown = min(self.y + (self.viewD), (world.board_shape[1] - 1)) - self.y
+
+        state[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.board[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
+        # for sub in world.subjects:
+        #     if abs(sub.x - self.x) < self.viewD and abs(sub.y - self.y) < self.viewD:
+        #         if state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3:
+        #             state2[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = sub.col
+        for x in range(-maxdirleft, maxdirright, 1):
+            for y in range(-maxdirup, maxdirdown, 1):
+                if world.subjectDict[(self.x + x, self.y + y)] != []:
+                    state2[x + maxdirleft, y + maxdirup] = 1#world.subjectDict[(self.x + x, self.y + y)][0].col
+
+        state3[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.trailMix[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
+        state4[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.grass[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown]
+
+        if not self.random:
+            test=1
+
+        area = np.reshape(np.stack((state, state2, state3, state4)), (4 * (2 * self.viewD + 1) * (2 * self.viewD + 1)))
+        action = [0, 0]
+        if self.lastAction is not None:
+            action = self.lastAction
+        return np.reshape(np.concatenate((area, action)), (1, 4 * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2))
+
+    def executeAction(self, world: LabyrinthWorld, action):
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+        directions = []
+
+        if self.x - 1 >= 0:
+            if world.board[self.x - 1, self.y] != 0:
+                directions.append(left)
+
+        if self.x + 1 < world.board_shape[0]:
+            if world.board[self.x + 1, self.y] != 0:
+                directions.append(right)
+
+        if self.y - 1 >= 0:
+            if world.board[self.x, self.y - 1] != 0:
+                directions.append(up)
+
+        if self.y + 1 < world.board_shape[1]:
+            if world.board[self.x, self.y + 1] != 0:
+                directions.append(down)
+        if len(action) == 2:
+            if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0:
+                for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
+                    if sub.alive:
+                        self.kills += 1
+                    sub.alive = False
+                    self.alive = True
+
+            self.lastRewards = []
+            if right in directions:
+                self.lastRewards.append(world.grass[self.x + 1, self.y])
+            else:
+                self.lastRewards.append(0)
+            if left in directions:
+                self.lastRewards.append(world.grass[self.x - 1, self.y])
+            else:
+                self.lastRewards.append(0)
+            if up in directions:
+                self.lastRewards.append(world.grass[self.x, self.y - 1])
+            else:
+                self.lastRewards.append(0)
+            if down in directions:
+               self.lastRewards.append(world.grass[self.x, self.y + 1])
+            else:
+                self.lastRewards.append(0)
+            assert len(self.lastRewards) == 4, 'Last Rewards not filled correctly!'
+
+            world.subjectDict[(self.x, self.y)].remove(self)
+            self.lastReward += world.trailMix[self.x, self.y]
+            self.x += action[0]
+            self.y += action[1]
+            world.subjectDict[(self.x, self.y)].append(self)
+            world.trailMix[self.x, self.y] = max(1.0, world.trailMix[self.x, self.y])
+            self.lastReward += (world.grass[self.x, self.y] - 0.0)
+            world.grass[self.x, self.y] = 0
+            world.hunter_grass[self.x, self.y] = 0
+
+    def randomAct(self, world: LabyrinthWorld):
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+        directions = []
+        actDict = {}
+
+        if self.x - 1 >= 0:
+            if world.board[self.x - 1, self.y] != 0:
+                directions.append(left)
+                actDict[left] = world.grass[self.x - 1, self.y]
+
+        if self.x + 1 < world.board_shape[0]:
+            if world.board[self.x + 1, self.y] != 0:
+                directions.append(right)
+                actDict[right] = world.grass[self.x + 1, self.y]
+
+        if self.y - 1 >= 0:
+            if world.board[self.x, self.y - 1] != 0:
+                directions.append(up)
+                actDict[up] = world.grass[self.x, self.y - 1]
+
+        if self.y + 1 < world.board_shape[1]:
+            if world.board[self.x, self.y + 1] != 0:
+                directions.append(down)
+                actDict[down] = world.grass[self.x, self.y + 1]
+
+        allowedActions = dict(filter(lambda elem: elem[0] in directions, actDict.items()))
+        action = max(allowedActions, key=allowedActions.get)
+
+        return action
+
+
+class Hunter(NetLearner):
+    name = 'Hunter'
+    hunterGrassScale = 0.5
+    r = 0
+    g = 255
+    b = 255
+    def randomAct(self, world: LabyrinthWorld):
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+        directions = []
+        actDict = {}
+
+        if self.x - 1 >= 0:
+            if world.board[self.x - 1, self.y] > 0.01:
+                directions.append(left)
+
+                sub = self.getClosestSubject(world, self.x - 1, self.y)
+                dist = self.viewD
+                if sub is not None:
+                    dist = np.sqrt(np.square(self.x - 1 - sub.x) + np.square(self.y - sub.y))
+                distReward = self.viewD - dist
+
+                actDict[left] = world.trailMix[self.x - 1, self.y] + world.hunter_grass[self.x - 1, self.y] * self.hunterGrassScale + distReward
+                if len(world.subjectDict[(self.x + left[0], self.y + left[1])]) > 0:
+                    for sub in world.subjectDict[(self.x + left[0], self.y + left[1])]:
+                        if sub.col != self.col:
+                            actDict[left] += 10
+
+        if self.x + 1 < world.board_shape[0]:
+            if world.board[self.x + 1, self.y] > 0.01:
+                directions.append(right)
+
+                sub = self.getClosestSubject(world, self.x + 1, self.y)
+                dist = self.viewD
+                if sub is not None:
+                    dist = np.sqrt(np.square(self.x + 1 - sub.x) + np.square(self.y - sub.y))
+                distReward = self.viewD - dist
+
+                actDict[right] = world.trailMix[self.x + 1, self.y] + world.hunter_grass[self.x + 1, self.y] * self.hunterGrassScale + distReward
+                if len(world.subjectDict[(self.x + right[0], self.y + right[1])]) > 0:
+                    for sub in world.subjectDict[(self.x + right[0], self.y + right[1])]:
+                        if sub.col != self.col:
+                            actDict[right] += 10
+
+        if self.y - 1 >= 0:
+            if world.board[self.x, self.y - 1] > 0.01:
+                directions.append(up)
+
+                sub = self.getClosestSubject(world, self.x, self.y - 1)
+                dist = self.viewD
+                if sub is not None:
+                    dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y - 1 - sub.y))
+                distReward = self.viewD - dist
+
+                actDict[up] = world.trailMix[self.x, self.y - 1] + world.hunter_grass[self.x, self.y - 1] * self.hunterGrassScale + distReward
+                if len(world.subjectDict[(self.x + up[0], self.y + up[1])]) > 0:
+                    for sub in world.subjectDict[(self.x + up[0], self.y + up[1])]:
+                        if sub.col != self.col:
+                            actDict[up] += 10
+
+        if self.y + 1 < world.board_shape[1]:
+            if world.board[self.x, self.y + 1] > 0.01:
+                directions.append(down)
+
+                sub = self.getClosestSubject(world, self.x, self.y + 1)
+                dist = self.viewD
+                if sub is not None:
+                    dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y + 1 - sub.y))
+                distReward = self.viewD - dist
+
+                actDict[down] = world.trailMix[self.x, self.y + 1] + world.hunter_grass[self.x, self.y + 1] * self.hunterGrassScale + distReward
+                if len(world.subjectDict[(self.x + down[0], self.y + down[1])]) > 0:
+                    for sub in world.subjectDict[(self.x + down[0], self.y + down[1])]:
+                        if sub.col != self.col:
+                            actDict[down] += 10
+
+        if len(actDict) > 0:
+            allowedActions = dict(filter(lambda elem: elem[0] in directions, actDict.items()))
+        else:
+            return super(Hunter, self).randomAct(world)
+        action = max(allowedActions, key=allowedActions.get)
+
+        return action
+
+    def respawnUpdate(self, x, y, world: LabyrinthWorld):
+        super(Hunter, self).respawnUpdate(x, y, world)
+        self.lastReward -= 1
+
+    def getClosestSubject(self, world, x, y):
+        for dist in range(1, self.viewD):
+            dy = dist
+            for dx in range(-dist, dist):
+                if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0:
+                    for sub in world.subjectDict[(x + dx, y + dy)]:
+                        if sub.alive and sub.col != self.col:
+                            return sub
+
+            dy = -dist
+            for dx in range(-dist, dist):
+                if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0:
+                    for sub in world.subjectDict[(x + dx, y + dy)]:
+                        if sub.alive and sub.col != self.col:
+                            return sub
+
+            dx = dist
+            for dy in range(-dist, dist):
+                if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0:
+                    for sub in world.subjectDict[(x + dx, y + dy)]:
+                        if sub.alive and sub.col != self.col:
+                            return sub
+
+            dx = -dist
+            for dy in range(-dist, dist):
+                if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0:
+                    for sub in world.subjectDict[(x + dx, y + dy)]:
+                        if sub.alive and sub.col != self.col:
+                            return sub
+        return None
+
+    def executeAction(self, world: LabyrinthWorld, action):
+        grass_factor = 0.5
+
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+        directions = []
+
+        if self.x - 1 >= 0:
+            if world.board[self.x - 1, self.y] != 0:
+                directions.append(left)
+
+        if self.x + 1 < world.board_shape[0]:
+            if world.board[self.x + 1, self.y] != 0:
+                directions.append(right)
+
+        if self.y - 1 >= 0:
+            if world.board[self.x, self.y - 1] != 0:
+                directions.append(up)
+
+        if self.y + 1 < world.board_shape[1]:
+            if world.board[self.x, self.y + 1] != 0:
+                directions.append(down)
+
+        if len(action) == 2:
+            right_kill = left_kill = up_kill = down_kill = False
+            if right in directions:
+                for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
+                    if sub.alive:
+                        if sub.col != self.col:
+                            right_kill = True
+            if left in directions:
+                for sub in world.subjectDict[(self.x + left[0], self.y + left[1])]:
+                    if sub.alive:
+                        if sub.col != self.col:
+                            left_kill = True
+            if up in directions:
+                for sub in world.subjectDict[(self.x + up[0], self.y + up[1])]:
+                    if sub.alive:
+                        if sub.col != self.col:
+                            up_kill = True
+            if down in directions:
+                for sub in world.subjectDict[(self.x + down[0], self.y + down[1])]:
+                    if sub.alive:
+                        if sub.col != self.col:
+                            down_kill = True
+            
+            if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0:
+                for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]:
+                    if sub.alive:
+                        self.kills += 1
+                        if sub.col != self.col:
+                            self.lastReward += 10
+                    sub.alive = False
+                    self.alive = True
+
+            self.lastRewards = []
+            if right in directions:
+                sub = self.getClosestSubject(world, self.x + 1, self.y)
+                dist = self.viewD
+                if sub is not None:
+                    dist = np.sqrt(np.square(self.x + 1 - sub.x) + np.square(self.y - sub.y))
+                distReward = self.viewD - dist
+                if right_kill:
+                    self.lastRewards.append(10 + world.trailMix[self.x + 1, self.y] + world.hunter_grass[self.x + 1, self.y] * grass_factor + distReward)
+                else:
+                    self.lastRewards.append(world.trailMix[self.x + 1, self.y] + world.hunter_grass[self.x + 1, self.y] * grass_factor + distReward)
+            else:
+                self.lastRewards.append(0)
+            if left in directions:
+                sub = self.getClosestSubject(world, self.x - 1, self.y)
+                dist = self.viewD
+                if sub is not None:
+                    dist = np.sqrt(np.square(self.x - 1 - sub.x) + np.square(self.y - sub.y))
+                distReward = self.viewD - dist
+                if left_kill:
+                    self.lastRewards.append(10 + world.trailMix[self.x - 1, self.y] + world.hunter_grass[self.x - 1, self.y] * grass_factor + distReward)
+                else:
+                    self.lastRewards.append(world.trailMix[self.x - 1, self.y] + world.hunter_grass[self.x - 1, self.y] * grass_factor + distReward)
+            else:
+                self.lastRewards.append(0)
+            if up in directions:
+                sub = self.getClosestSubject(world, self.x, self.y - 1)
+                dist = self.viewD
+                if sub is not None:
+                    dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y - sub.y - 1))
+                distReward = self.viewD - dist
+                if up_kill:
+                    self.lastRewards.append(10 + world.trailMix[self.x, self.y - 1] + world.hunter_grass[self.x, self.y - 1] * grass_factor + distReward)
+                else:
+                    self.lastRewards.append(world.trailMix[self.x, self.y - 1] + world.hunter_grass[self.x, self.y - 1] * grass_factor + distReward)
+            else:
+                self.lastRewards.append(0)
+            if down in directions:
+                sub = self.getClosestSubject(world, self.x, self.y + 1)
+                dist = self.viewD
+                if sub is not None:
+                    dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y + 1 - sub.y))
+                distReward = self.viewD - dist
+                if down_kill:
+                    self.lastRewards.append(10 + world.trailMix[self.x, self.y + 1] + world.hunter_grass[self.x, self.y + 1] * grass_factor + distReward)
+                else:
+                    self.lastRewards.append(world.trailMix[self.x, self.y + 1] + world.hunter_grass[self.x, self.y + 1] * grass_factor + distReward)
+            else:
+                self.lastRewards.append(0)
+            assert len(self.lastRewards) == 4, 'Last Rewards not filled correctly!'
+
+            world.subjectDict[(self.x, self.y)].remove(self)
+            self.x += action[0]
+            self.y += action[1]
+            self.lastReward += world.trailMix[self.x, self.y]
+            world.subjectDict[(self.x, self.y)].append(self)
+            self.lastReward += (world.hunter_grass[self.x, self.y] * 0.1)
+            world.hunter_grass[self.x, self.y] = 0
+
+            sub = self.getClosestSubject(world, self.x, self.y)
+            dist = self.viewD
+            if sub is not None:
+                dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y - sub.y))
+            distReward = self.viewD - dist
+
+            self.lastReward += distReward
diff --git a/labirinth_ai/__init__.py b/labirinth_ai/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/labirinth_ai/loss.py b/labirinth_ai/loss.py
new file mode 100644
index 0000000..333a9a4
--- /dev/null
+++ b/labirinth_ai/loss.py
@@ -0,0 +1,37 @@
+import tensorflow as tf
+
+
+def loss(nextState, actions):
+    # return tf.reduce_sum(tf.square(nextState[:, 2:, 0] * (0.5 * (nextState[:, 0] + 0.25 * nextState[:, 1] - actions))), axis=1)
+    return tf.reduce_mean(tf.square(nextState[:, 0] + 0.25 * nextState[:, 1] - tf.reduce_sum(
+        nextState[:, 2:6, 0] * (actions[:, :4] + actions[:, 4:]), axis=1))) + tf.reduce_mean(
+        tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+
+
+def loss2(nextState, actions):
+    # return tf.reduce_sum(tf.square(nextState[:, 2:, 0] * (0.5 * (nextState[:, 0] + 0.25 * nextState[:, 1] - actions))), axis=1)
+
+    # return 0.1 * tf.reduce_mean(tf.square(0.75 * nextState[:, 1] - tf.reduce_sum(nextState[:, 2:6, 0] * (actions[:, 4:] + actions[:, :4]),axis=1))) + 0.9 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+
+    # return 0.0 * tf.reduce_mean(tf.square(0.75 * nextState[:, 1] - tf.reduce_sum(nextState[:, 2:6, 0] * (actions[:, :4]),axis=1))) + 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+
+    return tf.reduce_mean(
+        tf.reduce_max(nextState[:, 2:6, 0] * tf.square((nextState[:, 6:, 0] - (actions[:, :4] + actions[:, 4:]))),
+                      axis=1), axis=0)
+
+    # action = nextState[:, 3] * 1 + nextState[:, 4] * 2 + nextState[:, 5] * 3
+    # action = tf.cast(action, tf.int32)
+    # action = tf.reshape(action, (-1,))
+    #
+    # # test = actions[:, action[:]]
+    #
+    # test1 = tf.slice(actions[:, :4], action, (-1, 1))
+    # test2 = tf.slice(actions[:, 4:], action, (-1, 1))
+    #
+    # return 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square((0.1 * nextState[:, 1] + nextState[:, 6:, 0]) - (test1 + test2)), axis=1)) + 0.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+    # return 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square((0.1 * nextState[:, 1] + nextState[:, 6:, 0]) - (actions[:, :4] + actions[:, 4:])), axis=1)) + 0.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+
+
+def loss3(target, pred):
+    return tf.reduce_mean(0.5 * tf.square(0.1 * target[:, 0, 0] + target[:, 1, 0] - (pred[:, 0] + pred[:, 1]))
+                          + 0.5 * tf.square(target[:, 1, 0] - pred[:, 0]), axis=0)