From 0638d5e6662ff3bf4e0ccdd63080ce6e18f6f25b Mon Sep 17 00:00:00 2001 From: zomseffen Date: Mon, 7 Feb 2022 21:08:45 +0100 Subject: [PATCH] adds labyrinth and subjects as well as performance increases --- Client/Client.py | 146 +--- Objects/Structure.py | 126 ++-- Objects/World.py | 176 +++-- WorldProvider/WorldProvider.py | 4 +- labirinth_ai/LabyrinthClient.py | 43 ++ labirinth_ai/LabyrinthProvider.py | 6 + labirinth_ai/LabyrinthWorld.py | 232 +++++++ labirinth_ai/Subject.py | 1055 +++++++++++++++++++++++++++++ labirinth_ai/__init__.py | 0 labirinth_ai/loss.py | 37 + 10 files changed, 1591 insertions(+), 234 deletions(-) create mode 100644 labirinth_ai/LabyrinthClient.py create mode 100644 labirinth_ai/LabyrinthProvider.py create mode 100644 labirinth_ai/LabyrinthWorld.py create mode 100644 labirinth_ai/Subject.py create mode 100644 labirinth_ai/__init__.py create mode 100644 labirinth_ai/loss.py diff --git a/Client/Client.py b/Client/Client.py index 2f6699d..e64554a 100644 --- a/Client/Client.py +++ b/Client/Client.py @@ -41,10 +41,30 @@ def value_to_color(v, min_value, max_value): class Client: - def __init__(self, test=False, pos=[0, 0, 0]): + def __init__(self, test=False, pos=[0, 0, 0], world_class=WorldProvider): self.state = 0 with open('./config.json', 'r') as f: self.config = json.load(f) + self.init_shaders() + + self.world_provider = world_class(self.normal_program) + self.draw_world() + + self.pos = pos + self.time = time.time() + + glutReshapeFunc(self.resize) + glutDisplayFunc(self.display) + glutKeyboardFunc(self.keyboardHandler) + glutSpecialFunc(self.funcKeydHandler) + + if not test: + glutMainLoop() + else: + self.display() + self.resize(100, 100) + + def init_shaders(self): glutInit(sys.argv) self.width = 1920 self.height = 1080 @@ -96,7 +116,7 @@ class Client: self.depth_program[self.normal_program[key]] = Spotlight.getDepthProgram(self.vertex_shader_id, key.GeometryShaderId) - self.world_provider = WorldProvider(self.normal_program) + def draw_world(self): for x_pos in range(0, 100): for y_pos in range(0, 100): for z_pos in range(0, 1): @@ -118,97 +138,11 @@ class Client: r, g, b = colors[int(self.world_provider.world.plates[x_pos, y_pos])] self.world_provider.world.set_color(x_pos, y_pos, z_pos, r, g, b) - # total_x = self.world_provider.world.chunk_n_x * self.world_provider.world.chunk_size_x - # total_y = self.world_provider.world.chunk_n_y * self.world_provider.world.chunk_size_y - # for x_pos in range(0, 100): - # for y_pos in range(0, 100): - # if self.world_provider.world.faults[x_pos, y_pos] == -2: - # self.world_provider.world.set_color(x_pos, y_pos, 0, 0, 0, 0) - # - # for line_index, line in enumerate(self.world_provider.world.fault_lines): - # for x_pos in range(0, 100): - # for y_pos in range(0, 100): - # if self.world_provider.world.faults[x_pos, y_pos] == line_index: - # if line_index != 9: - # self.world_provider.world.set_color(x_pos, y_pos, 0, 0, 0, 1) - # else: - # self.world_provider.world.set_color(x_pos, y_pos, 0, 1, 1, 1) - # - # for x_pos in range(0, 100): - # for y_pos in range(0, 100): - # for z_pos in range(0, 1): - # if [x_pos, y_pos] in self.world_provider.world.fault_nodes: - # r, g, b = 1, 0, 0 - # self.world_provider.world.set_color(x_pos, y_pos, z_pos, r, g, b) - - # # visualize direction lengths - # lengths = np.sqrt(np.sum(np.square(self.world_provider.world.directions), axis=2)) - # lengths = lengths / np.max(lengths) - # for x_pos in range(0, 100): - # for y_pos in range(0, 100): - # for z_pos in range(0, 1): - # r, g, b = lengths[x_pos, y_pos], lengths[x_pos, y_pos], lengths[x_pos, y_pos] - # self.world_provider.world.set_color(x_pos, y_pos, z_pos, r, g, b) - self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE) self.rx = self.cx = self.cy = 0 self.opening = 45 - glutReshapeFunc(self.resize) - glutDisplayFunc(self.display) - glutKeyboardFunc(self.keyboardHandler) - glutSpecialFunc(self.funcKeydHandler) - - self.pos = pos - - self.time = time.time() - - self.field = (100, 100, 1) - self.e_a = np.array([ - [0, 0, 0], - [1, 0, 0], - [1, 1, 0], - [0, 1, 0], - [-1, 1, 0], - [-1, 0, 0], - [-1, -1, 0], - [0, -1, 0], - [1, -1, 0], - ]) - - self.relaxation_time = 0.55 # 0.55 - self.w_a = [ - 4.0 / 9.0, - 1.0 / 9.0, - 1.0 / 36.0, - 1.0 / 9.0, - 1.0 / 36.0, - 1.0 / 9.0, - 1.0 / 36.0, - 1.0 / 9.0, - 1.0 / 36.0 - ] - - self.n_a = np.zeros((len(self.e_a),) + self.field) - self.n_a_eq = np.zeros(self.n_a.shape) - self.n = np.zeros(self.field) - self.n[:, :, :] += 1.0 - self.gravity_applies = np.zeros(self.field) - # self.n /= np.sum(self.n) - self.n_a[0] = np.array(self.n) - self.u = np.zeros(self.field + (self.e_a.shape[1],)) - - self.compressible = True - self.max_n = self.w_a[0] - - self.test_pixel = [40, 50, 0] - - if not test: - glutMainLoop() - else: - self.display() - self.resize(100, 100) def display(self): glClearColor(0, 0, 0, 0) @@ -261,41 +195,7 @@ class Client: glutSwapBuffers() - min_value = 0 - max_value_n = np.max(self.n) - # max_value_n = 1.0 - - vel = np.sqrt(np.sum(np.square(self.u), axis=3)) *self.n - max_value_vel = np.max(vel) - # max_value_vel = np.sqrt(3) - - # print('round') - # print('sum n: %f' % np.sum(self.n)) - # print('max n: %f' % np.max(self.n)) - # print('min n: %f' % np.min(self.n)) - # print('sum vel: %f' % np.sum(vel)) - # print('max vel: %f' % np.max(vel)) - # print('min vel: %f' % np.min(vel)) - - # for x_pos in range(0, 100): - # for y_pos in range(0, 100): - # for z_pos in range(0, 1): - # # if self.state == 2: - # # r, g, b = value_to_color(int(self.gravity_applies[x_pos, y_pos, z_pos]), 0, 1) - # # if self.state == 1: - # # r, g, b = value_to_color(vel[x_pos, y_pos, z_pos], min_value, max_value_vel) - # # if self.state == 0: - # # r, g, b = value_to_color(self.n[x_pos, y_pos, z_pos], min_value, max_value_n) - # r, g, b, = 128, 128, 128 - # if [x_pos, y_pos] in self.world_provider.world.fault_nodes: - # r, g, b = 128, 0, 0 - # - # self.world_provider.world.set_color(x_pos, y_pos, z_pos, r, g, b) - # self.world_provider.world.set_color(int(round(self.test_pixel[0])), - # int(round(self.test_pixel[1])), - # int(round(self.test_pixel[2])), 1.0, 1.0, 1.0) - - print(1.0 / (time.time() - self.time)) + print('fps', 1.0 / (time.time() - self.time)) self.time = time.time() glutPostRedisplay() diff --git a/Objects/Structure.py b/Objects/Structure.py index 5c49893..02cd5a4 100644 --- a/Objects/Structure.py +++ b/Objects/Structure.py @@ -19,6 +19,9 @@ class Structure(Renderable): self.Objects = {} self.vais = {} self.dirty = True + self.dirty_pos = True + self.dirty_color = True + self.dirty_size = True self.x_offset = x_offset self.y_offset = y_offset @@ -31,6 +34,7 @@ class Structure(Renderable): @x_offset.setter def x_offset(self, value): self.dirty = True + self.dirty_pos = True self._x_offset = value @property @@ -40,6 +44,7 @@ class Structure(Renderable): @y_offset.setter def y_offset(self, value): self.dirty = True + self.dirty_pos = True self._y_offset = value @property @@ -49,6 +54,7 @@ class Structure(Renderable): @z_offset.setter def z_offset(self, value): self.dirty = True + self.dirty_pos = True self._z_offset = value def addShape(self, program, shape): @@ -56,6 +62,9 @@ class Structure(Renderable): self.Objects[program] = [] self.Objects[program].append(shape) self.dirty = True + self.dirty_color = True + self.dirty_pos = True + self.dirty_size = True def removeShape(self, program, shape): if program in self.Objects.keys(): @@ -63,72 +72,89 @@ class Structure(Renderable): if len(self.Objects[program]) == 0: self.Objects.pop(program) self.dirty = True + self.dirty_color = True + self.dirty_pos = True + self.dirty_size = True def buildvertexArrays(self): if self.dirty: - self.clearVertexArrays() + # self.clearVertexArrays() glEnableClientState(GL_VERTEX_ARRAY) glEnableClientState(GL_TEXTURE_COORD_ARRAY) glEnableClientState(GL_NORMAL_ARRAY) glEnableClientState(GL_COLOR_ARRAY) - self.vais = {} for key, objects in self.Objects.items(): - tvai = GLuint(0) - tpbi = GLuint(0) - tcbi = GLuint(0) - tsbi = GLuint(0) - num = len(objects) - - glGenVertexArrays(1, tvai) + needs_new_buffers = key not in self.vais.keys() + if needs_new_buffers: + tvai = GLuint(0) + tpbi = GLuint(0) + tcbi = GLuint(0) + tsbi = GLuint(0) + num = len(objects) + else: + tvai, tpbi, tcbi, tsbi, num = self.vais[key] + if needs_new_buffers: + glGenVertexArrays(1, tvai) glBindVertexArray(tvai) + if self.dirty_pos: + if needs_new_buffers: + vid = glGetAttribLocation(key, "in_position") + glEnableVertexAttribArray(vid) - vid = glGetAttribLocation(key, "in_position") - glEnableVertexAttribArray(vid) - - tpbi = glGenBuffers(1) - glBindBuffer(GL_ARRAY_BUFFER, tpbi) - positions = [] - for o in objects: - positions.append(o.pos[0] + self.x_offset) - positions.append(o.pos[1] + self.y_offset) - positions.append(o.pos[2] + self.z_offset) - glBufferData(GL_ARRAY_BUFFER, np.array(positions, dtype=np.float32), GL_STATIC_DRAW) - glVertexAttribPointer(vid, 3, GL_FLOAT, GL_FALSE, 0, None) - self.check_error("Could not create position buffer") - - colors = [] - for o in objects: - colors.append(o.color[0]) - colors.append(o.color[1]) - colors.append(o.color[2]) - tcbi = glGenBuffers(1) - glBindBuffer(GL_ARRAY_BUFFER, tcbi) - glBufferData(GL_ARRAY_BUFFER, np.array(colors, dtype=np.float32), GL_STATIC_DRAW) - vc = glGetAttribLocation(key, "MyInColor") - if vc != -1: - glEnableVertexAttribArray(vc) - glVertexAttribPointer(vc, 3, GL_FLOAT, GL_FALSE, 0, None) - self.check_error("Could not create color buffer") - - if hasattr(objects[0], 'size'): - sizes = [] + tpbi = glGenBuffers(1) + glBindBuffer(GL_ARRAY_BUFFER, tpbi) + positions = [] for o in objects: - sizes.append(o.size[0]) - sizes.append(o.size[1]) - sizes.append(o.size[2]) - tsbi = glGenBuffers(1) - glBindBuffer(GL_ARRAY_BUFFER, tsbi) - glBufferData(GL_ARRAY_BUFFER, np.array(sizes, dtype=np.float32), GL_STATIC_DRAW) - vs = glGetAttribLocation(key, "MyInSize") - if vs != -1: - glEnableVertexAttribArray(vs) - glVertexAttribPointer(vs, 3, GL_FLOAT, GL_FALSE, 0, None) - self.check_error("Could not create size buffer") + positions.append(o.pos[0] + self.x_offset) + positions.append(o.pos[1] + self.y_offset) + positions.append(o.pos[2] + self.z_offset) + glBufferData(GL_ARRAY_BUFFER, np.array(positions, dtype=np.float32), GL_STATIC_DRAW) + if needs_new_buffers: + glVertexAttribPointer(vid, 3, GL_FLOAT, GL_FALSE, 0, None) + self.check_error("Could not create position buffer") + + if self.dirty_color: + colors = [] + for o in objects: + colors.append(o.color[0]) + colors.append(o.color[1]) + colors.append(o.color[2]) + if needs_new_buffers: + tcbi = glGenBuffers(1) + glBindBuffer(GL_ARRAY_BUFFER, tcbi) + glBufferData(GL_ARRAY_BUFFER, np.array(colors, dtype=np.float32), GL_STATIC_DRAW) + if needs_new_buffers: + vc = glGetAttribLocation(key, "MyInColor") + if vc != -1: + glEnableVertexAttribArray(vc) + glVertexAttribPointer(vc, 3, GL_FLOAT, GL_FALSE, 0, None) + self.check_error("Could not create color buffer") + + if self.dirty_size: + if hasattr(objects[0], 'size'): + sizes = [] + for o in objects: + sizes.append(o.size[0]) + sizes.append(o.size[1]) + sizes.append(o.size[2]) + if needs_new_buffers: + tsbi = glGenBuffers(1) + glBindBuffer(GL_ARRAY_BUFFER, tsbi) + glBufferData(GL_ARRAY_BUFFER, np.array(sizes, dtype=np.float32), GL_STATIC_DRAW) + if needs_new_buffers: + vs = glGetAttribLocation(key, "MyInSize") + if vs != -1: + glEnableVertexAttribArray(vs) + glVertexAttribPointer(vs, 3, GL_FLOAT, GL_FALSE, 0, None) + self.check_error("Could not create size buffer") glBindVertexArray(0) self.vais[key] = (tvai, tpbi, tcbi, tsbi, num) self.dirty = False + self.dirty_pos = False + self.dirty_color = False + self.dirty_size = False def clearVertexArrays(self): temp = dict(self.vais) diff --git a/Objects/World.py b/Objects/World.py index 473ccf2..1f0d4a3 100644 --- a/Objects/World.py +++ b/Objects/World.py @@ -1,3 +1,5 @@ +import time + from Lights.Lights import Light from Objects.Objects import Object from Objects.Renderable import Renderable @@ -9,7 +11,8 @@ import math import numpy as np import random import sys - +import ctypes +float_pointer = ctypes.POINTER(ctypes.c_float) # Plate Types SEA_PLATE = 0 CONTINENTAL_PLATE = 1 @@ -22,6 +25,7 @@ METAMORPH_STONE = 3 SEDIMENTAL_STONE = 4 SEDIMENT = 5 + class WorldChunk(Structure): def __init__(self, width: int, length: int, height: int, programs: dict): assert width > 0, 'Width must be greater than 0' @@ -38,6 +42,8 @@ class WorldChunk(Structure): self.height = height self.programs = programs + self.objects = {} + for x in range(width): self.content.append([]) self.visible.append([]) @@ -54,6 +60,7 @@ class WorldChunk(Structure): assert 0 <= z < self.height, 'Put out of bounds for z coordinate! Must be between 0 and %i' % self.height no_visibility_changes = (self.content[x][y][z] is None) == (new_object is None) + old_object = self.content[x][y][z] self.content[x][y][z] = new_object new_object.translate(translate(x, y, z)) @@ -87,6 +94,32 @@ class WorldChunk(Structure): else: self.visible[x][y][z - 1] += change + # todo: add visibility check for object listing + added = False + if old_object is not None: + if new_object is not None and type(old_object) == type(new_object): + new_object.buffer_id = old_object.buffer_id + self.objects[self.programs[type(old_object)]][old_object.buffer_id] = new_object + added = True + else: + # todo: maybe replace the element with a placeholder that is skipped when rendering/ saving and have a + # cleanup task, since this could be exploited to lower update rates + leading = self.objects[self.programs[type(old_object)]][:old_object.buffer_id] + following = self.objects[self.programs[type(old_object)]][old_object.buffer_id + 1:] + for element in following: + element.buffer_id -= 1 + self.objects[self.programs[type(old_object)]] = leading + following + + if not added and new_object is not None: + if self.programs[type(new_object)] not in self.objects.keys(): + self.objects[self.programs[type(new_object)]] = [] + new_object.buffer_id = len(self.objects[self.programs[type(new_object)]]) + self.objects[self.programs[type(new_object)]].append(new_object) + + self.dirty = True + self.dirty_pos = True + self.dirty_color = True + self.dirty_size = True return visible_carry_over def get_object(self, x: int, y: int, z: int): @@ -112,80 +145,92 @@ class WorldChunk(Structure): def buildvertexArrays(self): if self.dirty: - self.clearVertexArrays() + # self.clearVertexArrays() glEnableClientState(GL_VERTEX_ARRAY) glEnableClientState(GL_TEXTURE_COORD_ARRAY) glEnableClientState(GL_NORMAL_ARRAY) glEnableClientState(GL_COLOR_ARRAY) - self.vais = {} - objects = {} - counts = {} - for x in range(self.width): - for y in range(self.length): - for z in range(self.height): - if self.content[x][y][z] is not None: # and self.visible[x][y][z] > 0: TODO: check visibility... - if self.programs[type(self.content[x][y][z])] not in objects.keys(): - objects[self.programs[type(self.content[x][y][z])]] = [] - counts[self.programs[type(self.content[x][y][z])]] = 0 - objects[self.programs[type(self.content[x][y][z])]].append(self.content[x][y][z]) - counts[self.programs[type(self.content[x][y][z])]] += 1 + for key, object_list in self.objects.items(): + needs_new_buffers = key not in self.vais.keys() + if needs_new_buffers: + tvai = GLuint(0) + tpbi = GLuint(0) + tcbi = GLuint(0) + tsbi = GLuint(0) - for key, object_list in objects.items(): - tvai = GLuint(0) - tpbi = GLuint(0) - tcbi = GLuint(0) - tsbi = GLuint(0) - - glGenVertexArrays(1, tvai) + glGenVertexArrays(1, tvai) + else: + tvai, tpbi, tcbi, tsbi, old_len = self.vais[key] glBindVertexArray(tvai) - vid = glGetAttribLocation(key, "in_position") - glEnableVertexAttribArray(vid) + if self.dirty_pos: + if needs_new_buffers: + vid = glGetAttribLocation(key, "in_position") + glEnableVertexAttribArray(vid) + tpbi = glGenBuffers(1) + glBindBuffer(GL_ARRAY_BUFFER, tpbi) + positions = [] + for index, o in enumerate(object_list): + o.buffer_id = index + positions.append(o.pos[0] + self.x_offset) + positions.append(o.pos[1] + self.y_offset) + positions.append(o.pos[2] + self.z_offset) - tpbi = glGenBuffers(1) - glBindBuffer(GL_ARRAY_BUFFER, tpbi) - positions = [] - for o in object_list: - positions.append(o.pos[0] + self.x_offset) - positions.append(o.pos[1] + self.y_offset) - positions.append(o.pos[2] + self.z_offset) - glBufferData(GL_ARRAY_BUFFER, np.array(positions, dtype=np.float32), GL_STATIC_DRAW) - glVertexAttribPointer(vid, 3, GL_FLOAT, GL_FALSE, 0, None) - self.check_error("Could not create position buffer") + glBufferData(GL_ARRAY_BUFFER, np.array(positions, dtype=np.float32), GL_STATIC_DRAW) - colors = [] - for o in object_list: - colors.append(o.color[0]) - colors.append(o.color[1]) - colors.append(o.color[2]) - tcbi = glGenBuffers(1) - glBindBuffer(GL_ARRAY_BUFFER, tcbi) - glBufferData(GL_ARRAY_BUFFER, np.array(colors, dtype=np.float32), GL_STATIC_DRAW) - vc = glGetAttribLocation(key, "MyInColor") - if vc != -1: - glEnableVertexAttribArray(vc) - glVertexAttribPointer(vc, 3, GL_FLOAT, GL_FALSE, 0, None) + if needs_new_buffers: + glVertexAttribPointer(vid, 3, GL_FLOAT, GL_FALSE, 0, None) + self.check_error("Could not create position buffer") + + if self.dirty_color: + colors = [] + for o in object_list: + colors.append(o.color[0]) + colors.append(o.color[1]) + colors.append(o.color[2]) + if needs_new_buffers: + tcbi = glGenBuffers(1) + glBindBuffer(GL_ARRAY_BUFFER, tcbi) + if needs_new_buffers or old_len != len(object_list): + glBufferData(GL_ARRAY_BUFFER, np.array(colors, dtype=np.float32), GL_STATIC_DRAW) + else: + # todo: check if this improves anything. Timewise it seems to be the same + ptr = ctypes.cast(glMapBuffer(GL_ARRAY_BUFFER, GL_READ_WRITE), float_pointer) + for index, value in enumerate(colors): + ptr[index] = value + glUnmapBuffer(GL_ARRAY_BUFFER) + if needs_new_buffers: + vc = glGetAttribLocation(key, "MyInColor") + if vc != -1: + glEnableVertexAttribArray(vc) + glVertexAttribPointer(vc, 3, GL_FLOAT, GL_FALSE, 0, None) self.check_error("Could not create color buffer") - if hasattr(object_list[0], 'size'): - sizes = [] - for o in object_list: - sizes.append(o.size[0]) - sizes.append(o.size[1]) - sizes.append(o.size[2]) - tsbi = glGenBuffers(1) - glBindBuffer(GL_ARRAY_BUFFER, tsbi) - glBufferData(GL_ARRAY_BUFFER, np.array(sizes, dtype=np.float32), GL_STATIC_DRAW) - vs = glGetAttribLocation(key, "MyInSize") - if vs != -1: - glEnableVertexAttribArray(vs) - glVertexAttribPointer(vs, 3, GL_FLOAT, GL_FALSE, 0, None) + if self.dirty_size: + if hasattr(object_list[0], 'size'): + sizes = [] + for o in object_list: + sizes.append(o.size[0]) + sizes.append(o.size[1]) + sizes.append(o.size[2]) + if needs_new_buffers: + tsbi = glGenBuffers(1) + glBindBuffer(GL_ARRAY_BUFFER, tsbi) + glBufferData(GL_ARRAY_BUFFER, np.array(sizes, dtype=np.float32), GL_STATIC_DRAW) + if needs_new_buffers: + vs = glGetAttribLocation(key, "MyInSize") + if vs != -1: + glEnableVertexAttribArray(vs) + glVertexAttribPointer(vs, 3, GL_FLOAT, GL_FALSE, 0, None) self.check_error("Could not create size buffer") glBindVertexArray(0) - self.vais[key] = (tvai, tpbi, tcbi, tsbi, counts[key]) + self.vais[key] = (tvai, tpbi, tcbi, tsbi, len(object_list)) self.dirty = False + self.dirty_pos = False + self.dirty_color = False + self.dirty_size = False def render(self, proj_matrix, geometry_rot_matrix, alternate_programs=None, preselected_program=None, projection_pos=None, rot_pos=None): @@ -204,6 +249,17 @@ class WorldChunk(Structure): if self.content[x][y][z] is not None: self.content[x][y][z].setColor(r, g, b) self.dirty = True + self.dirty_color = True + + def load(self): + for x in range(self.width): + for y in range(self.length): + for z in range(self.height): + if self.content[x][y][z] is not None: # and self.visible[x][y][z] > 0: TODO: check visibility... + if self.programs[type(self.content[x][y][z])] not in self.objects.keys(): + self.objects[self.programs[type(self.content[x][y][z])]] = [] + self.objects[self.programs[type(self.content[x][y][z])]].append(self.content[x][y][z]) + class World(Renderable): def __init__(self, chunk_size_x: int, chunk_size_y: int, chunk_size_z: int, @@ -488,6 +544,8 @@ class World(Renderable): y % self.chunk_size_y, z % self.chunk_size_z, r, g, b) + else: + print('Changing color of nonexistant element!') def put_object(self, x: int, y: int, z: int, new_object: Object): x = x % (self.chunk_size_x * self.chunk_n_x) diff --git a/WorldProvider/WorldProvider.py b/WorldProvider/WorldProvider.py index a9629b7..1e6367c 100644 --- a/WorldProvider/WorldProvider.py +++ b/WorldProvider/WorldProvider.py @@ -2,8 +2,8 @@ from Objects.World import World class WorldProvider: - def __init__(self, programs): - self.world: World = World(10, 10, 10, 10, 10, 10, programs) + def __init__(self, programs, world_class=World): + self.world: World = world_class(10, 10, 10, 10, 10, 10, programs) self.world.generate() def update(self): diff --git a/labirinth_ai/LabyrinthClient.py b/labirinth_ai/LabyrinthClient.py new file mode 100644 index 0000000..fdcb22e --- /dev/null +++ b/labirinth_ai/LabyrinthClient.py @@ -0,0 +1,43 @@ +import time + +from Client.Client import Client, MAX_DISTANCE +from MatrixStuff.Transformations import perspectiveMatrix +from labirinth_ai.LabyrinthProvider import LabyrinthProvider + +import numpy as np + +class LabyrinthClient(Client): + def __init__(self, test=False, pos=[0, 0, 0], world_class=LabyrinthProvider): + super(LabyrinthClient, self).__init__(test, pos, world_class) + + def draw_world(self): + start_time = time.time() + for x in range(self.world_provider.world.chunk_size_x * self.world_provider.world.chunk_n_x): + for y in range(self.world_provider.world.chunk_size_y * self.world_provider.world.chunk_n_y): + if self.world_provider.world.board[x, y] in [1, 2]: + r, g, b = 57, 92, 152 + if 1.5 >= self.world_provider.world.hunter_grass[x, y] > 0.5: + r, g, b = 25, 149, 156 + if 3 >= self.world_provider.world.hunter_grass[x, y] > 1.5: + r, g, b = 112, 198, 169 + self.world_provider.world.set_color(x, y, 0, r / 255.0, g / 255.0, b / 255.0) + if self.world_provider.world.board[x, y] == 3: + self.world_provider.world.set_color(x, y, 0, 139 / 255.0, 72 / 255.0, 82 / 255.0) + + for sub in self.world_provider.world.subjects: + if not sub.random: + # pyxel.rectb(sub.x * 4 + 1, sub.y * 4 + 1, 2, 2, sub.col) + self.world_provider.world.set_color(sub.x, sub.y, 0, sub.r / 255.0, sub.g / 255.0, sub.b / 255.0) + else: + self.world_provider.world.set_color(sub.x, sub.y, 0, 212 / 255.0, 150 / 255.0, 222 / 255.0) + + self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE) + print('redraw', time.time() - start_time) + + def display(self): + super(LabyrinthClient, self).display() + self.draw_world() + self.world_provider.world.update() + +if __name__ == '__main__': + client = LabyrinthClient(pos=[-50, -50, -200]) diff --git a/labirinth_ai/LabyrinthProvider.py b/labirinth_ai/LabyrinthProvider.py new file mode 100644 index 0000000..4af8345 --- /dev/null +++ b/labirinth_ai/LabyrinthProvider.py @@ -0,0 +1,6 @@ +from WorldProvider.WorldProvider import WorldProvider +from labirinth_ai.LabyrinthWorld import LabyrinthWorld + +class LabyrinthProvider(WorldProvider): + def __init__(self, programs): + super(LabyrinthProvider, self).__init__(programs, LabyrinthWorld) diff --git a/labirinth_ai/LabyrinthWorld.py b/labirinth_ai/LabyrinthWorld.py new file mode 100644 index 0000000..2a2e3e7 --- /dev/null +++ b/labirinth_ai/LabyrinthWorld.py @@ -0,0 +1,232 @@ +import time + +from Objects.Cube.Cube import Cube +from Objects.World import World +import numpy as np +import random + + +class LabyrinthWorld(World): + randomBuffer = 0 + batchsize = 1000 + randomBuffer = max(4 * batchsize, randomBuffer) + + def __init__(self, chunk_size_x: int, chunk_size_y: int, chunk_size_z: int, + chunk_n_x: int, chunk_n_y: int, chunk_n_z: int, programs: dict): + self.board_shape = (chunk_size_x * chunk_n_x, chunk_size_y * chunk_n_y) + self.board = np.zeros(self.board_shape) + super(LabyrinthWorld, self).__init__(chunk_size_x, chunk_size_y, chunk_size_z, + chunk_n_x, chunk_n_y, chunk_n_z, programs) + self.max_room_dim = 20 + + self.min_room_dim = 6 + + self.max_room_num = 32 + self.max_corridors = 4 * self.max_room_num + + self.max_crates = self.max_room_num + + self.subjects = [] + self.ins = [] + self.actions = [] + self.targets = [] + + self.model = None + self.lastUpdate = time.time() + self.nextTrain = self.randomBuffer + self.round = 0 + + self.trailMix = np.zeros(self.board_shape) + self.grass = np.zeros(self.board_shape) + self.hunter_grass = np.zeros(self.board_shape) + self.subjectDict = {} + + def generate(self, seed: int = None, sea_plate_height: int = 50, continental_plate_height: int = 200): + board = np.zeros(self.board_shape) + random.seed(seed) + np.random.seed(seed) + + # find random starting point + px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim) + py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim) + + # 0, 0 is top left + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + + # place rooms + room_num = 0 + corridor_num = 0 + while room_num < self.max_room_num and corridor_num < self.max_corridors: + # try to place Room + w = random.randint(self.min_room_dim, self.max_room_dim) + h = random.randint(self.min_room_dim, self.max_room_dim) + can_place_room = np.sum( + board[px - int(w / 2.0):px + int(w / 2.0), py - int(h / 2.0):py + int(h / 2.0)] == 1) == 0 and px - int( + w / 2.0) >= 0 and px + int(w / 2.0) < self.board_shape[0] and \ + py - int(h / 2.0) >= 0 and py + int(h / 2.0) < self.board_shape[1] + + if can_place_room: + # place Room + board[px - int(w / 2.0):px + int(w / 2.0), py - int(h / 2.0):py + int(h / 2.0)] = 1 + room_num += 1 + else: + # move && place Corridor + directions = [] + while len(directions) == 0: + movable = [] + corridor_length = random.randint(self.min_room_dim, self.max_room_dim) + if px - corridor_length >= 0: + movable.append(left) + if board[px - 1, py] != 2: + directions.append(left) + + if px + corridor_length < self.board_shape[0]: + movable.append(right) + if board[px + 1, py] != 2: + directions.append(right) + + if py - corridor_length >= 0: + movable.append(up) + if board[px, py - 1] != 2: + directions.append(up) + + if py + corridor_length < self.board_shape[1]: + movable.append(down) + if board[px, py + 1] != 2: + directions.append(down) + + if len(directions) != 0: + if len(directions) > 1: + d = directions[random.randint(0, len(directions) - 1)] + else: + d = directions[0] + changed = False + for _ in range(corridor_length): + if board[px, py] != 1 and board[px, py] != 2: + board[px, py] = 2 + if (-d[0], -d[1]) not in movable or board[px - d[0], py - d[1]] != 2: + changed = True + px += d[0] + py += d[1] + if changed: + corridor_num += 1 + else: + if len(movable) != 0: + if len(movable) > 1: + d = movable[random.randint(0, len(movable) - 1)] + else: + d = movable[0] + for _ in range(corridor_length): + px += d[0] + py += d[1] + + crates = 0 + while crates < self.max_crates: + px = random.randint(0, (self.board_shape[0] - 1)) + py = random.randint(0, (self.board_shape[1] - 1)) + + if board[px, py] == 1: + board[px, py] = 3 + crates += 1 + + board[board == 2] = 1 + + print((room_num, self.max_room_num)) + print((corridor_num, self.max_corridors)) + self.board = board + + # setting up the board + for x_pos in range(0, self.board_shape[0]): + for y_pos in range(0, self.board_shape[1]): + for z_pos in range(0, 1): + self.put_object(x_pos, y_pos, z_pos, Cube().setColor(1, 1, 1)) + + # adding subjects + from labirinth_ai.Subject import Hunter, Herbivore + while len(self.subjects) < 2: + px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim) + py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim) + if self.board[px, py] == 1: + self.subjects.append(Hunter(px, py)) + self.ins += self.subjects[-1].x_in + self.actions += self.subjects[-1].actions + self.targets += self.subjects[-1].target + + while len(self.subjects) < 10: + px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim) + py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim) + if self.board[px, py] == 1: + self.subjects.append(Herbivore(px, py)) + self.ins += self.subjects[-1].x_in + self.actions += self.subjects[-1].actions + self.targets += self.subjects[-1].target + + for x in range(self.board_shape[0]): + for y in range(self.board_shape[1]): + self.subjectDict[(x, y)] = [] + + for sub in self.subjects: + self.subjectDict[(sub.x, sub.y)].append(sub) + + def update(self): + # start = time.time() + if self.model is None: + for sub in self.subjects: + sub.calculateAction(self) + else: + states = list(map(lambda e: e.createState(self), self.subjects)) + states = sum(list(map(lambda e: [e, e, e, e], states)), []) + vals = self.model.predict(states) + vals = np.reshape(np.transpose(np.reshape(vals, (len(self.subjects), 4, 2)), (0, 2, 1)), + (len(self.subjects), 1, 8)) + list(map(lambda e: e[1].calculateAction(self, vals[e[0]], states[e[0]]), enumerate(self.subjects))) + + for sub in self.subjects: + if sub.alive: + sub.update(self, doTrain=self.model is None) + sub.tick += 1 + + if self.model is not None: + if self.round >= self.nextTrain: + samples = list(map(lambda e: e.generateSamples(), self.subjects)) + states = sum(list(map(lambda e: e[0], samples)), []) + targets = sum(list(map(lambda e: e[1], samples)), []) + self.model.fit(states, targets) + self.nextTrain = self.batchsize / 5 + self.round = 0 + for sub in self.subjects: + if len(sub.samples) > 20*self.batchsize: + sub.samples = sub.samples[:-20*self.batchsize] + else: + self.round += 1 + + new_subjects = [] + kill_table = {} + live_table = {} + for sub in self.subjects: + if sub.name not in kill_table.keys(): + kill_table[sub.name] = 0 + live_table[sub.name] = 0 + kill_table[sub.name] += sub.kills + live_table[sub.name] += sub.lives + if sub.alive: + new_subjects.append(sub) + else: + px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim) + py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim) + while self.board[px, py] == 0: + px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim) + py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim) + sub.respawnUpdate(px, py, self) + new_subjects.append(sub) + + self.subjects = new_subjects + self.trailMix *= 0.99 + + self.grass = np.minimum(self.grass + 0.01 * (self.board != 0), 3) + self.hunter_grass = np.minimum(self.hunter_grass + 0.01 * (self.board != 0), 3) + + self.trailMix *= (self.trailMix > 0.01) diff --git a/labirinth_ai/Subject.py b/labirinth_ai/Subject.py new file mode 100644 index 0000000..ec0593c --- /dev/null +++ b/labirinth_ai/Subject.py @@ -0,0 +1,1055 @@ +import random +import numpy as np +import tensorflow as tf +from tensorflow import keras + +from labirinth_ai.LabyrinthWorld import LabyrinthWorld +from labirinth_ai.loss import loss2, loss3 + +# import torch +# dtype = torch.float +# device = torch.device("cpu") + + +class Subject: + name = 'random' + col = 8 + num = 0 + random = True + r = 255 + g = 255 + b = 255 + + def __init__(self, x, y): + self.alive = True + self.x = x + self.y = y + self.kills = 0 + self.lives = 1 + self.tick = 0 + + self.id = self.num + Subject.num += 1 + + def update(self, world: LabyrinthWorld): + # 0, 0 is top left + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + directions = [] + + if self.x - 1 >= 0: + if world.board[self.x - 1, self.y] != 0: + directions.append(left) + + if self.x + 1 < world.board_shape[0]: + if world.board[self.x + 1, self.y] != 0: + directions.append(right) + + if self.y - 1 >= 0: + if world.board[self.x, self.y - 1] != 0: + directions.append(up) + + if self.y + 1 < world.board_shape[1]: + if world.board[self.x, self.y + 1] != 0: + directions.append(down) + + if directions != [] and self.alive: + if len(directions) > 1: + d = directions[random.randint(0, len(directions) - 1)] + else: + d = directions[0] + + if len(world.subjectDict[(self.x + d[0], self.y + d[1])]) > 0: + for sub in world.subjectDict[(self.x + d[0], self.y + d[1])]: + if sub.alive: + self.kills += 1 + sub.alive = False + self.alive = True + + world.subjectDict[(self.x, self.y)].remove(self) + world.trailMix[self.x, self.y] += 1 + self.x += d[0] + self.y += d[1] + world.subjectDict[(self.x, self.y)].append(self) + + def respawnUpdate(self, x, y, world: LabyrinthWorld): + world.subjectDict[(self.x, self.y)].remove(self) + self.x = x + self.y = y + world.subjectDict[(self.x, self.y)].append(self) + self.alive = True + self.lives += 1 + + +class QLearner(Subject): + name = 'QLearner' + col = 14 + learningRate = 0.25 + discountFactor = 0.5 + random = False + + Q = {} + def __init__(self, x, y): + super(QLearner, self).__init__(x, y) + # self.Q = {} + self.viewD = 3 + self.lastAction = None + self.lastState = None + self.lastReward = 0 + + def respawnUpdate(self, x, y, world: LabyrinthWorld): + super(QLearner, self).respawnUpdate(x, y, world) + self.lastReward -= 20 + + def createState(self, world: LabyrinthWorld): + state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.int) # - 1 + + # # floodfill state + # queued = [(0, 0)] + # todo = [(0, 0, 0)] + # while todo != []: + # doing = todo.pop(0) + # + # if self.x + doing[0] >= 0 and self.x + doing[0] < 64 and self.y + doing[1] >= 0 and self.y + doing[1] < 64: + # value = world.board[self.x + doing[0], self.y + doing[1]] + # state[self.viewD + doing[0], self.viewD + doing[1]] = value + # + # # if value == 3: + # # state[self.viewD + doing[0], self.viewD + doing[1]] = value + # + # if value != 0 and doing[2] < self.viewD: + # for i in range(-1, 2, 1): + # for j in range(-1, 2, 1): + # # 4-neighbour. without it it is 8-neighbour + # if abs(i) + abs(j) == 1: + # if (doing[0] + i, doing[1] + j) not in queued: + # queued.append((doing[0] + i, doing[1] + j)) + # todo.append((doing[0] + i, doing[1] + j, doing[2] + 1)) + # + # for sub in world.subjects: + # if (sub.x - self.x, sub.y - self.y) in queued and state[ + # self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3: + # state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = state[ + # self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] * 100 + sub.col + + maxdirleft = self.x - max(self.x - (self.viewD), 0) + maxdirright = min(self.x + (self.viewD), (world.board_shape[0] - 1)) - self.x + maxdirup = self.y - max(self.y - (self.viewD), 0) + maxdirdown = min(self.y + (self.viewD), (world.board_shape[1] - 1)) - self.y + + # state[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.board[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown] + for sub in world.subjects: + if abs(sub.x - self.x) < self.viewD and abs(sub.y - self.y) < self.viewD: + if state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3: + state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] * 100 + 1# sub.col + + return state + + def update(self, world: LabyrinthWorld): + # 0, 0 is top left + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + directions = [] + + if self.x - 1 >= 0: + if world.board[self.x - 1, self.y] != 0: + directions.append(left) + + if self.x + 1 < world.board_shape[0]: + if world.board[self.x + 1, self.y] != 0: + directions.append(right) + + if self.y - 1 >= 0: + if world.board[self.x, self.y - 1] != 0: + directions.append(up) + + if self.y + 1 < world.board_shape[1]: + if world.board[self.x, self.y + 1] != 0: + directions.append(down) + + if directions != [] and self.alive: + state = self.createState(world) + + if str(state) not in self.Q.keys(): + self.Q[str(state)] = {} + for dir in directions: + if dir not in self.Q[str(state)].keys(): + self.Q[str(state)][dir] = random.randint(0, 5) + + allowedActions = dict(filter(lambda elem: elem[0] in directions,self.Q[str(state)].items())) + action = max(allowedActions, key=allowedActions.get) + + if self.learningRate != 0: + self.Q[str(state)][action] = (1 - self.learningRate) * self.Q[str(state)][action] + self.learningRate * (self.lastReward + self.discountFactor * self.Q[str(state)][action]) + + self.lastAction = action + self.lastState = state + self.lastReward = 0 + + if len(action) == 2: + if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0: + for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]: + if sub.alive: + self.kills += 1 + sub.alive = False + self.alive = True + self.lastReward += 10 + + world.subjectDict[(self.x, self.y)].remove(self) + self.x += action[0] + self.y += action[1] + world.subjectDict[(self.x, self.y)].append(self) + pass + + +class DoubleQLearner(QLearner): + name = 'DoubleQLearner' + col = 11 + learningRate = 0.5 + discountFactor = 0.5 + random = False + + QA = {} + QB = {} + def __init__(self, x, y): + super(DoubleQLearner, self).__init__(x, y) + self.viewD = 3 + self.lastAction = None + self.lastState = None + self.lastReward = 0 + + def respawnUpdate(self, x, y, world: LabyrinthWorld): + super(DoubleQLearner, self).respawnUpdate(x, y, world) + + def update(self, world: LabyrinthWorld): + # 0, 0 is top left + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + directions = [] + + if self.x - 1 >= 0: + if world.board[self.x - 1, self.y] != 0: + directions.append(left) + + if self.x + 1 < world.board_shape[0]: + if world.board[self.x + 1, self.y] != 0: + directions.append(right) + + if self.y - 1 >= 0: + if world.board[self.x, self.y - 1] != 0: + directions.append(up) + + if self.y + 1 < world.board_shape[1]: + if world.board[self.x, self.y + 1] != 0: + directions.append(down) + + if directions != [] and self.alive: + state = self.createState(world) + + if str(state) not in self.QA.keys(): + self.QA[str(state)] = {} + self.QB[str(state)] = {} + for dir in directions: + if dir not in self.QA[str(state)].keys(): + self.QA[str(state)][dir] = random.randint(0, 5) + self.QB[str(state)][dir] = random.randint(0, 5) + + allowedActionsA = dict(filter(lambda elem: elem[0] in directions, self.QA[str(state)].items())) + allowedActionsB = dict(filter(lambda elem: elem[0] in directions, self.QB[str(state)].items())) + allowedActions = {} + for key in allowedActionsA.keys(): + allowedActions[key] = allowedActionsA[key] + allowedActionsB[key] + + actionA = max(allowedActionsA, key=allowedActionsA.get) + actionB = max(allowedActionsB, key=allowedActionsB.get) + action = max(allowedActions, key=allowedActions.get) + + if self.learningRate != 0: + if random.randint(0, 1) == 0: + valA = self.QA[str(state)][action] + self.QA[str(state)][action] = valA + self.learningRate * (self.lastReward + self.discountFactor * self.QB[str(state)][actionA] - valA) + else: + valB = self.QB[str(state)][action] + self.QB[str(state)][action] = valB + self.learningRate * (self.lastReward + self.discountFactor * self.QA[str(state)][actionB] - valB) + + self.lastAction = action + self.lastState = state + self.lastReward = 0 + + if len(action) == 2: + if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0: + for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]: + if sub.alive: + self.kills += 1 + sub.alive = False + self.alive = True + self.lastReward += 10 + + world.subjectDict[(self.x, self.y)].remove(self) + self.x += action[0] + self.y += action[1] + world.subjectDict[(self.x, self.y)].append(self) + pass + + +class NetLearner(Subject): + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + act2IDict = {right: 0, left: 1, up: 2, down: 3} + + name = 'NetLearner' + col = 15 + viewD = 3 + historyLength = 2 + channels = 4 + + learningRate = 0.001 + discountFactor = 0.5 + randomBuffer = 0 + batchsize = 1000 + randomBuffer = max(4*batchsize, randomBuffer) + randomChance = 9 + + historySizeMul = 20 + + # samples = [] + + # x_in = keras.Input(shape=(4 * (2 * viewD + 1) * (2 * viewD + 1) + 2)) + # target = keras.Input(shape=(10, 1)) + # inVec = keras.layers.Flatten()(x_in) + # # kernel_regularizer=keras.regularizers.l2(0.01) + # actions = keras.layers.Dense((3 * (2 * viewD + 1) * (2 * viewD + 1)), activation='relu')(inVec) + # actions = keras.layers.Dense(((2 * viewD + 1) * (2 * viewD + 1)), activation='relu')(actions) + # actions = keras.layers.Dense(8, activation='linear', use_bias=False)(actions) + # + # model = keras.Model(inputs=x_in, outputs=actions) + # + # # model.compile(optimizer='adam', loss=loss, target_tensors=[target]) + # model.compile(optimizer=tf.keras.optimizers.RMSprop(learningRate), loss=loss, target_tensors=[target]) + + def respawnUpdate(self, x, y, world: LabyrinthWorld): + super(NetLearner, self).respawnUpdate(x, y, world) + # self.lastReward -= 20 + + if len(self.samples) < self.randomBuffer or random.randint(0, 10) > self.randomChance: + self.random = True + # print('Rando ' + self.name) + pass + else: + self.random = False + # print('Slau ' + self.name) + + self.strikes = 0 + + def __init__(self, x, y): + super(NetLearner, self).__init__(x, y) + + self.action = None + self.state = None + self.actDict = {} + + self.history = [] + self.lastAction = None + self.lastState = None + self.lastReward = 0 + self.lastVal = 0 + self.random = False + self.nextTrain = self.randomBuffer + + self.samples = [] + + self.x_in = [] + self.actions = [] + self.target = [] + for i in range(4): + x_in = keras.Input(shape=(self.channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2)) + self.x_in.append(x_in) + inVec = keras.layers.Flatten()(x_in) + actions = keras.layers.Dense(((2 * self.viewD + 1) * (2 * self.viewD + 1)), activation='elu', + kernel_regularizer=keras.regularizers.l2(0.001), + name=self.name + str(self.id) + 'Dense' + str(i) + 'l1')(inVec) + actions = keras.layers.Dense(((self.viewD + 1) * (self.viewD + 1)), activation='elu', + kernel_regularizer=keras.regularizers.l2(0.001))(actions) + self.target.append(keras.Input(shape=(2, 1))) + self.actions.append(keras.layers.Dense(2, activation='linear', use_bias=False, kernel_regularizer=keras.regularizers.l2(0.001))(actions)) + + self.model = keras.Model(inputs=self.x_in, outputs=self.actions) + + self.model.compile(optimizer=tf.keras.optimizers.RMSprop(self.learningRate), loss=loss3, + target_tensors=self.target) + + if len(self.samples) < self.randomBuffer: + self.random = True + else: + self.random = False + + self.strikes = 0 + + self.lastRewards = [] + + def visualize(self): + print(self.name) + layers = self.model.get_weights() + # layers.reverse() + layersN = [[0, 1, 8, 9, 16], [2, 3, 10, 11, 17], [4, 5, 12, 13, 18], [6, 7, 14, 15, 19]] + for action in range(8): + v = np.zeros((1, 2)) + v[0][0 if action < 4 else 1] = 1.0 + layerN = list(layersN[action % 4]) + layerN.reverse() + for n in layerN: + l = layers[n] + if len(l.shape) == 2: + layer = np.transpose(l) + v = np.dot(v, layer) + else: + layer = np.array([l]) + v = v + layer + lastAction = v[0, -2:] + v = np.reshape(v[0, :-2], (4, (2 * self.viewD + 1), (2 * self.viewD + 1))) + + # right, left, up, down + dir = {0: 'right', 1: 'left', 2: 'up', 3: 'down'} + dir = dir[action % 4] + #0-3 current + #4-8 future + if action < 4: + time = 'current ' + else: + time = 'future ' + import matplotlib + import matplotlib.pyplot as plt + fig, axs = plt.subplots(2, 2, figsize=(5, 5)) + + fig.suptitle(time + dir) + im = axs[0, 0].pcolor(np.rot90(v[0])) + fig.colorbar(im, ax=axs[0, 0]) + axs[0, 0].set_title('board') + + axs[0, 1].pcolor(np.rot90(v[1])) + fig.colorbar(im, ax=axs[0, 1]) + axs[0, 1].set_title('subjects') + + axs[1, 0].pcolor(np.rot90(v[2])) + fig.colorbar(im, ax=axs[1, 0]) + axs[1, 0].set_title('trail') + + axs[1, 1].pcolor(np.rot90(v[3])) + fig.colorbar(im, ax=axs[1, 1]) + axs[1, 1].set_title('grass') + plt.show(block=True) + + + def createState(self, world: LabyrinthWorld): + state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 + state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 + state3 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 + state4 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 + + maxdirleft = self.x - max(self.x - (self.viewD), 0) + maxdirright = min(self.x + (self.viewD), (world.board_shape[0] - 1)) - self.x + maxdirup = self.y - max(self.y - (self.viewD), 0) + maxdirdown = min(self.y + (self.viewD), (world.board_shape[1] - 1)) - self.y + + state[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.board[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown] + # for sub in world.subjects: + # if abs(sub.x - self.x) < self.viewD and abs(sub.y - self.y) < self.viewD: + # if state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3: + # state2[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = sub.col + for x in range(-maxdirleft, maxdirright, 1): + for y in range(-maxdirup, maxdirdown, 1): + if world.subjectDict[(self.x + x, self.y + y)] != []: + state2[x + maxdirleft, y + maxdirup] = 1#world.subjectDict[(self.x + x, self.y + y)][0].col + + state3[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.trailMix[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown] + state4[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.hunter_grass[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown] + + if not self.random: + test=1 + + area = np.reshape(np.stack((state, state2, state3, state4)), (4 * (2 * self.viewD + 1) * (2 * self.viewD + 1))) + action = [0, 0] + if self.lastAction is not None: + action = self.lastAction + return np.reshape(np.concatenate((area, action)), (1, 4 * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2)) + + def calculateAction(self, world: LabyrinthWorld, vals=None, state=None): + # 0, 0 is top left + directions = [] + + if self.x - 1 >= 0: + if world.board[self.x - 1, self.y] != 0: + directions.append(self.left) + + if self.x + 1 < world.board_shape[0]: + if world.board[self.x + 1, self.y] != 0: + directions.append(self.right) + + if self.y - 1 >= 0: + if world.board[self.x, self.y - 1] != 0: + directions.append(self.up) + + if self.y + 1 < world.board_shape[1]: + if world.board[self.x, self.y + 1] != 0: + directions.append(self.down) + + if directions == []: + print('Wut?') + + if directions != [] and self.alive: + if state is None: + state = self.createState(world) + if vals is None: + vals = self.model.predict([state, state, state, state]) + vals = np.reshape(np.transpose(np.reshape(vals, (4, 2)), (1, 0)), + (1, 8)) + + self.actDict = {self.right: vals[0][0] + vals[0][4], self.left: vals[0][1] + vals[0][5], self.up: vals[0][2] + vals[0][6], self.down: vals[0][3] + vals[0][7]} + + allowedActions = dict(filter(lambda elem: elem[0] in directions, self.actDict.items())) + + # if self.name == 'Herbivore' and self.id == 11 and not self.random: + # print(allowedActions) + # print(self.lastReward) + if self.strikes <= 0: + self.random = False + + if not self.random: + self.action = max(allowedActions, key=allowedActions.get) + else: + self.action = self.randomAct(world) + + self.state = state + + def update(self, world: LabyrinthWorld, doTrain=True): + if self.lastAction is not None: + if not self.random: + if self.lastAction[0] + self.action[0] == 0 and self.lastAction[1] + self.action[1] == 0: + self.strikes += 1 + else: + self.strikes -= 1 + if self.strikes > 100: + self.random = True + else: + self.strikes -= 1 + + if len(self.history) >= self.historyLength: + self.history.pop(0) + self.history.append((self.lastState.copy(), int(self.act2IDict[self.lastAction]), int(self.lastVal), float(self.lastReward), np.array(self.lastRewards))) + + # if self.lastReward != 0 or random.randint(0, 9) == 0: + if len(self.history) == self.historyLength: + self.samples.append(self.history.copy()) + + # if len(self.samples) % self.batchsize == 0 and len(self.samples) >= self.randomBuffer: + if len(self.samples) > self.nextTrain and doTrain: + print('train') + self.train() + self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize) + + self.lastAction = self.action + self.lastState = self.state + self.lastReward = 0 + self.lastVal = self.actDict[self.action] + + maxVal = 0 + + self.executeAction(world, self.action) + + def randomAct(self, world: LabyrinthWorld): + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + directions = [] + + if self.x - 1 >= 0: + if world.board[self.x - 1, self.y] != 0: + directions.append(left) + + if self.x + 1 < world.board_shape[0]: + if world.board[self.x + 1, self.y] != 0: + directions.append(right) + + if self.y - 1 >= 0: + if world.board[self.x, self.y - 1] != 0: + directions.append(up) + + if self.y + 1 < world.board_shape[1]: + if world.board[self.x, self.y + 1] != 0: + directions.append(down) + + d = random.randint(0, len(directions) - 1) + action = directions[d] + + return action + + def executeAction(self, world: LabyrinthWorld, action): + pass + + def generateSamples(self): + # history element: (self.lastState.copy(), self.act2IDict[self.lastAction], self.lastVal, self.lastReward, np.array(self.lastRewards)) + # history: [t-2, t-1] + states = [] + targets = [] + for i in range(4): + true_batch = int(self.batchsize/4) + target = np.zeros((true_batch, 2, 1)) + samples = np.array(self.samples[:-self.batchsize]) + # print('Samples for ' + str(i)) + # print(len(samples)) + samples = np.array(list(filter(lambda e: e[0, 1] == i, list(samples)))) + # print(len(samples)) + partTwo = True + if len(samples) == 0: + print('No samples for:' + str(i)) + partTwo = False + samples = np.array(self.samples[:-self.batchsize]) + buffer_size = len(samples) + index = np.random.choice(np.arange(buffer_size), + size=true_batch, + replace=True) + samples = samples[index] + # self.samples = [] + if partTwo: + target[:, 1, 0] = samples[:, 1, 3] #reward t-2 got + + nextState = np.concatenate(samples[:, 1, 0]) #states of t-1 + nextVals = self.model.predict([nextState, nextState, nextState, nextState]) + + nextVals2 = nextVals[i][:, 0] + nextVals[i][:, 1] + target[:, 0, 0] = nextVals2 #best q t-1 + else: + target[:, 1, 0] = np.array(list(map(lambda elem: list(elem), list(np.array(samples[:, 1, 4])))))[:, i] # reward t-2 got + + targets.append(target) + + states.append(np.concatenate(samples[:, 0, 0])) #states of t-2 + + return states, targets + + def train(self): + print(self.name) + states, target = self.generateSamples() + self.model.fit(states, target, epochs=1) + + self.samples = self.samples[-self.historySizeMul*self.batchsize:] + + # print(self.model.get_weights()) + + pass + + +class Herbivore(NetLearner): + name = 'Herbivore' + col = 9 + r = 255 + g = 255 + b = 0 + viewD = 3 + historyLength = 2 + + learningRate = 0.001 + discountFactor = 0.5 + randomBuffer = 0 + batchsize = 1000 + randomBuffer = max(2 * batchsize, randomBuffer) + randomChance = 9 + + samples = [] + + # x_in = keras.Input(shape=(4 * (2 * viewD + 1) * (2 * viewD + 1) + 2)) + # target = keras.Input(shape=(10, 1)) + # inVec = keras.layers.Flatten()(x_in) + # # kernel_regularizer=keras.regularizers.l2(0.01) + # actions = keras.layers.Dense((4 * (2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(inVec) + # actions = keras.layers.Dense(((2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(actions) + # actions = keras.layers.Dense(8, activation='linear', use_bias=False)(actions) + # # actions = keras.layers.Dense(4, activation='linear', use_bias=False)(inVec) + # + # model = keras.Model(inputs=x_in, outputs=actions) + # + # # model.compile(optimizer='adam', loss=loss2, target_tensors=[target]) + # model.compile(optimizer=tf.keras.optimizers.RMSprop(learningRate), loss=loss2, target_tensors=[target]) + + # def __init__(self, x, y): + # super(Herbivore, self).__init__(x, y) + + def createState(self, world: LabyrinthWorld): + state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 + state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 + state3 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 + state4 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 + + maxdirleft = self.x - max(self.x - (self.viewD), 0) + maxdirright = min(self.x + (self.viewD), (world.board_shape[0] - 1)) - self.x + maxdirup = self.y - max(self.y - (self.viewD), 0) + maxdirdown = min(self.y + (self.viewD), (world.board_shape[1] - 1)) - self.y + + state[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.board[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown] + # for sub in world.subjects: + # if abs(sub.x - self.x) < self.viewD and abs(sub.y - self.y) < self.viewD: + # if state[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] != 3: + # state2[self.viewD + sub.x - self.x, self.viewD + sub.y - self.y] = sub.col + for x in range(-maxdirleft, maxdirright, 1): + for y in range(-maxdirup, maxdirdown, 1): + if world.subjectDict[(self.x + x, self.y + y)] != []: + state2[x + maxdirleft, y + maxdirup] = 1#world.subjectDict[(self.x + x, self.y + y)][0].col + + state3[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.trailMix[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown] + state4[self.viewD - maxdirleft: self.viewD + maxdirright, self.viewD - maxdirup: self.viewD + maxdirdown] = world.grass[self.x - maxdirleft: self.x + maxdirright, self.y - maxdirup: self.y + maxdirdown] + + if not self.random: + test=1 + + area = np.reshape(np.stack((state, state2, state3, state4)), (4 * (2 * self.viewD + 1) * (2 * self.viewD + 1))) + action = [0, 0] + if self.lastAction is not None: + action = self.lastAction + return np.reshape(np.concatenate((area, action)), (1, 4 * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2)) + + def executeAction(self, world: LabyrinthWorld, action): + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + directions = [] + + if self.x - 1 >= 0: + if world.board[self.x - 1, self.y] != 0: + directions.append(left) + + if self.x + 1 < world.board_shape[0]: + if world.board[self.x + 1, self.y] != 0: + directions.append(right) + + if self.y - 1 >= 0: + if world.board[self.x, self.y - 1] != 0: + directions.append(up) + + if self.y + 1 < world.board_shape[1]: + if world.board[self.x, self.y + 1] != 0: + directions.append(down) + if len(action) == 2: + if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0: + for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]: + if sub.alive: + self.kills += 1 + sub.alive = False + self.alive = True + + self.lastRewards = [] + if right in directions: + self.lastRewards.append(world.grass[self.x + 1, self.y]) + else: + self.lastRewards.append(0) + if left in directions: + self.lastRewards.append(world.grass[self.x - 1, self.y]) + else: + self.lastRewards.append(0) + if up in directions: + self.lastRewards.append(world.grass[self.x, self.y - 1]) + else: + self.lastRewards.append(0) + if down in directions: + self.lastRewards.append(world.grass[self.x, self.y + 1]) + else: + self.lastRewards.append(0) + assert len(self.lastRewards) == 4, 'Last Rewards not filled correctly!' + + world.subjectDict[(self.x, self.y)].remove(self) + self.lastReward += world.trailMix[self.x, self.y] + self.x += action[0] + self.y += action[1] + world.subjectDict[(self.x, self.y)].append(self) + world.trailMix[self.x, self.y] = max(1.0, world.trailMix[self.x, self.y]) + self.lastReward += (world.grass[self.x, self.y] - 0.0) + world.grass[self.x, self.y] = 0 + world.hunter_grass[self.x, self.y] = 0 + + def randomAct(self, world: LabyrinthWorld): + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + directions = [] + actDict = {} + + if self.x - 1 >= 0: + if world.board[self.x - 1, self.y] != 0: + directions.append(left) + actDict[left] = world.grass[self.x - 1, self.y] + + if self.x + 1 < world.board_shape[0]: + if world.board[self.x + 1, self.y] != 0: + directions.append(right) + actDict[right] = world.grass[self.x + 1, self.y] + + if self.y - 1 >= 0: + if world.board[self.x, self.y - 1] != 0: + directions.append(up) + actDict[up] = world.grass[self.x, self.y - 1] + + if self.y + 1 < world.board_shape[1]: + if world.board[self.x, self.y + 1] != 0: + directions.append(down) + actDict[down] = world.grass[self.x, self.y + 1] + + allowedActions = dict(filter(lambda elem: elem[0] in directions, actDict.items())) + action = max(allowedActions, key=allowedActions.get) + + return action + + +class Hunter(NetLearner): + name = 'Hunter' + hunterGrassScale = 0.5 + r = 0 + g = 255 + b = 255 + def randomAct(self, world: LabyrinthWorld): + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + directions = [] + actDict = {} + + if self.x - 1 >= 0: + if world.board[self.x - 1, self.y] > 0.01: + directions.append(left) + + sub = self.getClosestSubject(world, self.x - 1, self.y) + dist = self.viewD + if sub is not None: + dist = np.sqrt(np.square(self.x - 1 - sub.x) + np.square(self.y - sub.y)) + distReward = self.viewD - dist + + actDict[left] = world.trailMix[self.x - 1, self.y] + world.hunter_grass[self.x - 1, self.y] * self.hunterGrassScale + distReward + if len(world.subjectDict[(self.x + left[0], self.y + left[1])]) > 0: + for sub in world.subjectDict[(self.x + left[0], self.y + left[1])]: + if sub.col != self.col: + actDict[left] += 10 + + if self.x + 1 < world.board_shape[0]: + if world.board[self.x + 1, self.y] > 0.01: + directions.append(right) + + sub = self.getClosestSubject(world, self.x + 1, self.y) + dist = self.viewD + if sub is not None: + dist = np.sqrt(np.square(self.x + 1 - sub.x) + np.square(self.y - sub.y)) + distReward = self.viewD - dist + + actDict[right] = world.trailMix[self.x + 1, self.y] + world.hunter_grass[self.x + 1, self.y] * self.hunterGrassScale + distReward + if len(world.subjectDict[(self.x + right[0], self.y + right[1])]) > 0: + for sub in world.subjectDict[(self.x + right[0], self.y + right[1])]: + if sub.col != self.col: + actDict[right] += 10 + + if self.y - 1 >= 0: + if world.board[self.x, self.y - 1] > 0.01: + directions.append(up) + + sub = self.getClosestSubject(world, self.x, self.y - 1) + dist = self.viewD + if sub is not None: + dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y - 1 - sub.y)) + distReward = self.viewD - dist + + actDict[up] = world.trailMix[self.x, self.y - 1] + world.hunter_grass[self.x, self.y - 1] * self.hunterGrassScale + distReward + if len(world.subjectDict[(self.x + up[0], self.y + up[1])]) > 0: + for sub in world.subjectDict[(self.x + up[0], self.y + up[1])]: + if sub.col != self.col: + actDict[up] += 10 + + if self.y + 1 < world.board_shape[1]: + if world.board[self.x, self.y + 1] > 0.01: + directions.append(down) + + sub = self.getClosestSubject(world, self.x, self.y + 1) + dist = self.viewD + if sub is not None: + dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y + 1 - sub.y)) + distReward = self.viewD - dist + + actDict[down] = world.trailMix[self.x, self.y + 1] + world.hunter_grass[self.x, self.y + 1] * self.hunterGrassScale + distReward + if len(world.subjectDict[(self.x + down[0], self.y + down[1])]) > 0: + for sub in world.subjectDict[(self.x + down[0], self.y + down[1])]: + if sub.col != self.col: + actDict[down] += 10 + + if len(actDict) > 0: + allowedActions = dict(filter(lambda elem: elem[0] in directions, actDict.items())) + else: + return super(Hunter, self).randomAct(world) + action = max(allowedActions, key=allowedActions.get) + + return action + + def respawnUpdate(self, x, y, world: LabyrinthWorld): + super(Hunter, self).respawnUpdate(x, y, world) + self.lastReward -= 1 + + def getClosestSubject(self, world, x, y): + for dist in range(1, self.viewD): + dy = dist + for dx in range(-dist, dist): + if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0: + for sub in world.subjectDict[(x + dx, y + dy)]: + if sub.alive and sub.col != self.col: + return sub + + dy = -dist + for dx in range(-dist, dist): + if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0: + for sub in world.subjectDict[(x + dx, y + dy)]: + if sub.alive and sub.col != self.col: + return sub + + dx = dist + for dy in range(-dist, dist): + if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0: + for sub in world.subjectDict[(x + dx, y + dy)]: + if sub.alive and sub.col != self.col: + return sub + + dx = -dist + for dy in range(-dist, dist): + if world.board_shape[0] > x + dx >= 0 and world.board_shape[1] > y + dy >= 0: + for sub in world.subjectDict[(x + dx, y + dy)]: + if sub.alive and sub.col != self.col: + return sub + return None + + def executeAction(self, world: LabyrinthWorld, action): + grass_factor = 0.5 + + right = (1, 0) + left = (-1, 0) + up = (0, -1) + down = (0, 1) + directions = [] + + if self.x - 1 >= 0: + if world.board[self.x - 1, self.y] != 0: + directions.append(left) + + if self.x + 1 < world.board_shape[0]: + if world.board[self.x + 1, self.y] != 0: + directions.append(right) + + if self.y - 1 >= 0: + if world.board[self.x, self.y - 1] != 0: + directions.append(up) + + if self.y + 1 < world.board_shape[1]: + if world.board[self.x, self.y + 1] != 0: + directions.append(down) + + if len(action) == 2: + right_kill = left_kill = up_kill = down_kill = False + if right in directions: + for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]: + if sub.alive: + if sub.col != self.col: + right_kill = True + if left in directions: + for sub in world.subjectDict[(self.x + left[0], self.y + left[1])]: + if sub.alive: + if sub.col != self.col: + left_kill = True + if up in directions: + for sub in world.subjectDict[(self.x + up[0], self.y + up[1])]: + if sub.alive: + if sub.col != self.col: + up_kill = True + if down in directions: + for sub in world.subjectDict[(self.x + down[0], self.y + down[1])]: + if sub.alive: + if sub.col != self.col: + down_kill = True + + if len(world.subjectDict[(self.x + action[0], self.y + action[1])]) > 0: + for sub in world.subjectDict[(self.x + action[0], self.y + action[1])]: + if sub.alive: + self.kills += 1 + if sub.col != self.col: + self.lastReward += 10 + sub.alive = False + self.alive = True + + self.lastRewards = [] + if right in directions: + sub = self.getClosestSubject(world, self.x + 1, self.y) + dist = self.viewD + if sub is not None: + dist = np.sqrt(np.square(self.x + 1 - sub.x) + np.square(self.y - sub.y)) + distReward = self.viewD - dist + if right_kill: + self.lastRewards.append(10 + world.trailMix[self.x + 1, self.y] + world.hunter_grass[self.x + 1, self.y] * grass_factor + distReward) + else: + self.lastRewards.append(world.trailMix[self.x + 1, self.y] + world.hunter_grass[self.x + 1, self.y] * grass_factor + distReward) + else: + self.lastRewards.append(0) + if left in directions: + sub = self.getClosestSubject(world, self.x - 1, self.y) + dist = self.viewD + if sub is not None: + dist = np.sqrt(np.square(self.x - 1 - sub.x) + np.square(self.y - sub.y)) + distReward = self.viewD - dist + if left_kill: + self.lastRewards.append(10 + world.trailMix[self.x - 1, self.y] + world.hunter_grass[self.x - 1, self.y] * grass_factor + distReward) + else: + self.lastRewards.append(world.trailMix[self.x - 1, self.y] + world.hunter_grass[self.x - 1, self.y] * grass_factor + distReward) + else: + self.lastRewards.append(0) + if up in directions: + sub = self.getClosestSubject(world, self.x, self.y - 1) + dist = self.viewD + if sub is not None: + dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y - sub.y - 1)) + distReward = self.viewD - dist + if up_kill: + self.lastRewards.append(10 + world.trailMix[self.x, self.y - 1] + world.hunter_grass[self.x, self.y - 1] * grass_factor + distReward) + else: + self.lastRewards.append(world.trailMix[self.x, self.y - 1] + world.hunter_grass[self.x, self.y - 1] * grass_factor + distReward) + else: + self.lastRewards.append(0) + if down in directions: + sub = self.getClosestSubject(world, self.x, self.y + 1) + dist = self.viewD + if sub is not None: + dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y + 1 - sub.y)) + distReward = self.viewD - dist + if down_kill: + self.lastRewards.append(10 + world.trailMix[self.x, self.y + 1] + world.hunter_grass[self.x, self.y + 1] * grass_factor + distReward) + else: + self.lastRewards.append(world.trailMix[self.x, self.y + 1] + world.hunter_grass[self.x, self.y + 1] * grass_factor + distReward) + else: + self.lastRewards.append(0) + assert len(self.lastRewards) == 4, 'Last Rewards not filled correctly!' + + world.subjectDict[(self.x, self.y)].remove(self) + self.x += action[0] + self.y += action[1] + self.lastReward += world.trailMix[self.x, self.y] + world.subjectDict[(self.x, self.y)].append(self) + self.lastReward += (world.hunter_grass[self.x, self.y] * 0.1) + world.hunter_grass[self.x, self.y] = 0 + + sub = self.getClosestSubject(world, self.x, self.y) + dist = self.viewD + if sub is not None: + dist = np.sqrt(np.square(self.x - sub.x) + np.square(self.y - sub.y)) + distReward = self.viewD - dist + + self.lastReward += distReward diff --git a/labirinth_ai/__init__.py b/labirinth_ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/labirinth_ai/loss.py b/labirinth_ai/loss.py new file mode 100644 index 0000000..333a9a4 --- /dev/null +++ b/labirinth_ai/loss.py @@ -0,0 +1,37 @@ +import tensorflow as tf + + +def loss(nextState, actions): + # return tf.reduce_sum(tf.square(nextState[:, 2:, 0] * (0.5 * (nextState[:, 0] + 0.25 * nextState[:, 1] - actions))), axis=1) + return tf.reduce_mean(tf.square(nextState[:, 0] + 0.25 * nextState[:, 1] - tf.reduce_sum( + nextState[:, 2:6, 0] * (actions[:, :4] + actions[:, 4:]), axis=1))) + tf.reduce_mean( + tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) + + +def loss2(nextState, actions): + # return tf.reduce_sum(tf.square(nextState[:, 2:, 0] * (0.5 * (nextState[:, 0] + 0.25 * nextState[:, 1] - actions))), axis=1) + + # return 0.1 * tf.reduce_mean(tf.square(0.75 * nextState[:, 1] - tf.reduce_sum(nextState[:, 2:6, 0] * (actions[:, 4:] + actions[:, :4]),axis=1))) + 0.9 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) + + # return 0.0 * tf.reduce_mean(tf.square(0.75 * nextState[:, 1] - tf.reduce_sum(nextState[:, 2:6, 0] * (actions[:, :4]),axis=1))) + 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) + + return tf.reduce_mean( + tf.reduce_max(nextState[:, 2:6, 0] * tf.square((nextState[:, 6:, 0] - (actions[:, :4] + actions[:, 4:]))), + axis=1), axis=0) + + # action = nextState[:, 3] * 1 + nextState[:, 4] * 2 + nextState[:, 5] * 3 + # action = tf.cast(action, tf.int32) + # action = tf.reshape(action, (-1,)) + # + # # test = actions[:, action[:]] + # + # test1 = tf.slice(actions[:, :4], action, (-1, 1)) + # test2 = tf.slice(actions[:, 4:], action, (-1, 1)) + # + # return 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square((0.1 * nextState[:, 1] + nextState[:, 6:, 0]) - (test1 + test2)), axis=1)) + 0.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) + # return 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square((0.1 * nextState[:, 1] + nextState[:, 6:, 0]) - (actions[:, :4] + actions[:, 4:])), axis=1)) + 0.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0) + + +def loss3(target, pred): + return tf.reduce_mean(0.5 * tf.square(0.1 * target[:, 0, 0] + target[:, 1, 0] - (pred[:, 0] + pred[:, 1])) + + 0.5 * tf.square(target[:, 1, 0] - pred[:, 0]), axis=0)