adds labyrinth and subjects as well as performance increases

2022-02-07 21:08:45 +01:00 · 2022-02-07 21:08:45 +01:00 · 0638d5e666
commit 0638d5e666
parent 6c5cae958b
10 changed files with 1591 additions and 234 deletions
--- a/labirinth_ai/LabyrinthClient.py
+++ b/labirinth_ai/LabyrinthClient.py
@ -0,0 +1,43 @@
+import time
+
+from Client.Client import Client, MAX_DISTANCE
+from MatrixStuff.Transformations import perspectiveMatrix
+from labirinth_ai.LabyrinthProvider import LabyrinthProvider
+
+import numpy as np
+
+class LabyrinthClient(Client):
+    def __init__(self, test=False, pos=[0, 0, 0], world_class=LabyrinthProvider):
+        super(LabyrinthClient, self).__init__(test, pos, world_class)
+
+    def draw_world(self):
+        start_time = time.time()
+        for x in range(self.world_provider.world.chunk_size_x * self.world_provider.world.chunk_n_x):
+            for y in range(self.world_provider.world.chunk_size_y * self.world_provider.world.chunk_n_y):
+                if self.world_provider.world.board[x, y] in [1, 2]:
+                    r, g, b = 57, 92, 152
+                    if 1.5 >= self.world_provider.world.hunter_grass[x, y] > 0.5:
+                        r, g, b = 25, 149, 156
+                    if 3 >= self.world_provider.world.hunter_grass[x, y] > 1.5:
+                        r, g, b = 112, 198, 169
+                    self.world_provider.world.set_color(x, y, 0, r / 255.0, g / 255.0, b / 255.0)
+                if self.world_provider.world.board[x, y] == 3:
+                    self.world_provider.world.set_color(x, y, 0, 139 / 255.0, 72 / 255.0, 82 / 255.0)
+
+        for sub in self.world_provider.world.subjects:
+            if not sub.random:
+                # pyxel.rectb(sub.x * 4 + 1, sub.y * 4 + 1, 2, 2, sub.col)
+                self.world_provider.world.set_color(sub.x, sub.y, 0, sub.r / 255.0, sub.g / 255.0, sub.b / 255.0)
+            else:
+                self.world_provider.world.set_color(sub.x, sub.y, 0, 212 / 255.0, 150 / 255.0, 222 / 255.0)
+
+        self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)
+        print('redraw', time.time() - start_time)
+
+    def display(self):
+        super(LabyrinthClient, self).display()
+        self.draw_world()
+        self.world_provider.world.update()
+
+if __name__ == '__main__':
+    client = LabyrinthClient(pos=[-50, -50, -200])
--- a/labirinth_ai/LabyrinthProvider.py
+++ b/labirinth_ai/LabyrinthProvider.py
@ -0,0 +1,6 @@
+from WorldProvider.WorldProvider import WorldProvider
+from labirinth_ai.LabyrinthWorld import LabyrinthWorld
+
+class LabyrinthProvider(WorldProvider):
+    def __init__(self, programs):
+        super(LabyrinthProvider, self).__init__(programs, LabyrinthWorld)
--- a/labirinth_ai/LabyrinthWorld.py
+++ b/labirinth_ai/LabyrinthWorld.py
@ -0,0 +1,232 @@
+import time
+
+from Objects.Cube.Cube import Cube
+from Objects.World import World
+import numpy as np
+import random
+
+
+class LabyrinthWorld(World):
+    randomBuffer = 0
+    batchsize = 1000
+    randomBuffer = max(4 * batchsize, randomBuffer)
+
+    def __init__(self, chunk_size_x: int, chunk_size_y: int, chunk_size_z: int,
+                 chunk_n_x: int, chunk_n_y: int, chunk_n_z: int, programs: dict):
+        self.board_shape = (chunk_size_x * chunk_n_x, chunk_size_y * chunk_n_y)
+        self.board = np.zeros(self.board_shape)
+        super(LabyrinthWorld, self).__init__(chunk_size_x, chunk_size_y, chunk_size_z,
+                                             chunk_n_x, chunk_n_y, chunk_n_z, programs)
+        self.max_room_dim = 20
+
+        self.min_room_dim = 6
+
+        self.max_room_num = 32
+        self.max_corridors = 4 * self.max_room_num
+
+        self.max_crates = self.max_room_num
+
+        self.subjects = []
+        self.ins = []
+        self.actions = []
+        self.targets = []
+
+        self.model = None
+        self.lastUpdate = time.time()
+        self.nextTrain = self.randomBuffer
+        self.round = 0
+
+        self.trailMix = np.zeros(self.board_shape)
+        self.grass = np.zeros(self.board_shape)
+        self.hunter_grass = np.zeros(self.board_shape)
+        self.subjectDict = {}
+
+    def generate(self, seed: int = None, sea_plate_height: int = 50, continental_plate_height: int = 200):
+        board = np.zeros(self.board_shape)
+        random.seed(seed)
+        np.random.seed(seed)
+
+        # find random starting point
+        px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
+        py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
+
+        # 0, 0 is top left
+        right = (1, 0)
+        left = (-1, 0)
+        up = (0, -1)
+        down = (0, 1)
+
+        # place rooms
+        room_num = 0
+        corridor_num = 0
+        while room_num < self.max_room_num and corridor_num < self.max_corridors:
+            # try to place Room
+            w = random.randint(self.min_room_dim, self.max_room_dim)
+            h = random.randint(self.min_room_dim, self.max_room_dim)
+            can_place_room = np.sum(
+                board[px - int(w / 2.0):px + int(w / 2.0), py - int(h / 2.0):py + int(h / 2.0)] == 1) == 0 and px - int(
+                w / 2.0) >= 0 and px + int(w / 2.0) < self.board_shape[0] and \
+                             py - int(h / 2.0) >= 0 and py + int(h / 2.0) < self.board_shape[1]
+
+            if can_place_room:
+                # place Room
+                board[px - int(w / 2.0):px + int(w / 2.0), py - int(h / 2.0):py + int(h / 2.0)] = 1
+                room_num += 1
+            else:
+                # move && place Corridor
+                directions = []
+                while len(directions) == 0:
+                    movable = []
+                    corridor_length = random.randint(self.min_room_dim, self.max_room_dim)
+                    if px - corridor_length >= 0:
+                        movable.append(left)
+                        if board[px - 1, py] != 2:
+                            directions.append(left)
+
+                    if px + corridor_length < self.board_shape[0]:
+                        movable.append(right)
+                        if board[px + 1, py] != 2:
+                            directions.append(right)
+
+                    if py - corridor_length >= 0:
+                        movable.append(up)
+                        if board[px, py - 1] != 2:
+                            directions.append(up)
+
+                    if py + corridor_length < self.board_shape[1]:
+                        movable.append(down)
+                        if board[px, py + 1] != 2:
+                            directions.append(down)
+
+                    if len(directions) != 0:
+                        if len(directions) > 1:
+                            d = directions[random.randint(0, len(directions) - 1)]
+                        else:
+                            d = directions[0]
+                        changed = False
+                        for _ in range(corridor_length):
+                            if board[px, py] != 1 and board[px, py] != 2:
+                                board[px, py] = 2
+                                if (-d[0], -d[1]) not in movable or board[px - d[0], py - d[1]] != 2:
+                                    changed = True
+                            px += d[0]
+                            py += d[1]
+                        if changed:
+                            corridor_num += 1
+                    else:
+                        if len(movable) != 0:
+                            if len(movable) > 1:
+                                d = movable[random.randint(0, len(movable) - 1)]
+                            else:
+                                d = movable[0]
+                            for _ in range(corridor_length):
+                                px += d[0]
+                                py += d[1]
+
+        crates = 0
+        while crates < self.max_crates:
+            px = random.randint(0, (self.board_shape[0] - 1))
+            py = random.randint(0, (self.board_shape[1] - 1))
+
+            if board[px, py] == 1:
+                board[px, py] = 3
+                crates += 1
+
+        board[board == 2] = 1
+
+        print((room_num, self.max_room_num))
+        print((corridor_num, self.max_corridors))
+        self.board = board
+
+        # setting up the board
+        for x_pos in range(0, self.board_shape[0]):
+            for y_pos in range(0, self.board_shape[1]):
+                for z_pos in range(0, 1):
+                    self.put_object(x_pos, y_pos, z_pos, Cube().setColor(1, 1, 1))
+
+        # adding subjects
+        from labirinth_ai.Subject import Hunter, Herbivore
+        while len(self.subjects) < 2:
+            px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
+            py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
+            if self.board[px, py] == 1:
+                self.subjects.append(Hunter(px, py))
+                self.ins += self.subjects[-1].x_in
+                self.actions += self.subjects[-1].actions
+                self.targets += self.subjects[-1].target
+
+        while len(self.subjects) < 10:
+            px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
+            py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
+            if self.board[px, py] == 1:
+                self.subjects.append(Herbivore(px, py))
+                self.ins += self.subjects[-1].x_in
+                self.actions += self.subjects[-1].actions
+                self.targets += self.subjects[-1].target
+
+        for x in range(self.board_shape[0]):
+            for y in range(self.board_shape[1]):
+                self.subjectDict[(x, y)] = []
+
+        for sub in self.subjects:
+            self.subjectDict[(sub.x, sub.y)].append(sub)
+
+    def update(self):
+        # start = time.time()
+        if self.model is None:
+            for sub in self.subjects:
+                sub.calculateAction(self)
+        else:
+            states = list(map(lambda e: e.createState(self), self.subjects))
+            states = sum(list(map(lambda e: [e, e, e, e], states)), [])
+            vals = self.model.predict(states)
+            vals = np.reshape(np.transpose(np.reshape(vals, (len(self.subjects), 4, 2)), (0, 2, 1)),
+                              (len(self.subjects), 1, 8))
+            list(map(lambda e: e[1].calculateAction(self, vals[e[0]], states[e[0]]), enumerate(self.subjects)))
+
+        for sub in self.subjects:
+            if sub.alive:
+                sub.update(self, doTrain=self.model is None)
+            sub.tick += 1
+
+        if self.model is not None:
+            if self.round >= self.nextTrain:
+                samples = list(map(lambda e: e.generateSamples(), self.subjects))
+                states = sum(list(map(lambda e: e[0], samples)), [])
+                targets = sum(list(map(lambda e: e[1], samples)), [])
+                self.model.fit(states, targets)
+                self.nextTrain = self.batchsize / 5
+                self.round = 0
+                for sub in self.subjects:
+                    if len(sub.samples) > 20*self.batchsize:
+                        sub.samples = sub.samples[:-20*self.batchsize]
+            else:
+                self.round += 1
+
+        new_subjects = []
+        kill_table = {}
+        live_table = {}
+        for sub in self.subjects:
+            if sub.name not in kill_table.keys():
+                kill_table[sub.name] = 0
+                live_table[sub.name] = 0
+            kill_table[sub.name] += sub.kills
+            live_table[sub.name] += sub.lives
+            if sub.alive:
+                new_subjects.append(sub)
+            else:
+                px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
+                py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
+                while self.board[px, py] == 0:
+                    px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
+                    py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
+                sub.respawnUpdate(px, py, self)
+                new_subjects.append(sub)
+
+        self.subjects = new_subjects
+        self.trailMix *= 0.99
+
+        self.grass = np.minimum(self.grass + 0.01 * (self.board != 0), 3)
+        self.hunter_grass = np.minimum(self.hunter_grass + 0.01 * (self.board != 0), 3)
+
+        self.trailMix *= (self.trailMix > 0.01)
--- a/labirinth_ai/Subject.py
+++ b/labirinth_ai/Subject.py
--- a/labirinth_ai/init.py
+++ b/labirinth_ai/init.py
--- a/labirinth_ai/loss.py
+++ b/labirinth_ai/loss.py
@ -0,0 +1,37 @@
+import tensorflow as tf
+
+
+def loss(nextState, actions):
+    # return tf.reduce_sum(tf.square(nextState[:, 2:, 0] * (0.5 * (nextState[:, 0] + 0.25 * nextState[:, 1] - actions))), axis=1)
+    return tf.reduce_mean(tf.square(nextState[:, 0] + 0.25 * nextState[:, 1] - tf.reduce_sum(
+        nextState[:, 2:6, 0] * (actions[:, :4] + actions[:, 4:]), axis=1))) + tf.reduce_mean(
+        tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+
+
+def loss2(nextState, actions):
+    # return tf.reduce_sum(tf.square(nextState[:, 2:, 0] * (0.5 * (nextState[:, 0] + 0.25 * nextState[:, 1] - actions))), axis=1)
+
+    # return 0.1 * tf.reduce_mean(tf.square(0.75 * nextState[:, 1] - tf.reduce_sum(nextState[:, 2:6, 0] * (actions[:, 4:] + actions[:, :4]),axis=1))) + 0.9 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+
+    # return 0.0 * tf.reduce_mean(tf.square(0.75 * nextState[:, 1] - tf.reduce_sum(nextState[:, 2:6, 0] * (actions[:, :4]),axis=1))) + 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+
+    return tf.reduce_mean(
+        tf.reduce_max(nextState[:, 2:6, 0] * tf.square((nextState[:, 6:, 0] - (actions[:, :4] + actions[:, 4:]))),
+                      axis=1), axis=0)
+
+    # action = nextState[:, 3] * 1 + nextState[:, 4] * 2 + nextState[:, 5] * 3
+    # action = tf.cast(action, tf.int32)
+    # action = tf.reshape(action, (-1,))
+    #
+    # # test = actions[:, action[:]]
+    #
+    # test1 = tf.slice(actions[:, :4], action, (-1, 1))
+    # test2 = tf.slice(actions[:, 4:], action, (-1, 1))
+    #
+    # return 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square((0.1 * nextState[:, 1] + nextState[:, 6:, 0]) - (test1 + test2)), axis=1)) + 0.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+    # return 1.0 * tf.reduce_mean(tf.reduce_sum(tf.square((0.1 * nextState[:, 1] + nextState[:, 6:, 0]) - (actions[:, :4] + actions[:, 4:])), axis=1)) + 0.0 * tf.reduce_mean(tf.reduce_sum(tf.square(nextState[:, 6:, 0] - actions[:, :4]), axis=1), axis=0)
+
+
+def loss3(target, pred):
+    return tf.reduce_mean(0.5 * tf.square(0.1 * target[:, 0, 0] + target[:, 1, 0] - (pred[:, 0] + pred[:, 1]))
+                          + 0.5 * tf.square(target[:, 1, 0] - pred[:, 0]), axis=0)