neat implementation up to mutate

2022-08-12 15:48:30 +02:00 · 2022-08-12 15:48:30 +02:00 · cf4d773c10
commit cf4d773c10
parent 4a05baa103
8 changed files with 468 additions and 144 deletions
--- a/Client/Client.py
+++ b/Client/Client.py
@ -52,6 +52,7 @@ class Client:

        self.pos = pos
        self.time = time.time()
+        self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)

        glutReshapeFunc(self.resize)
        glutDisplayFunc(self.display)
@ -195,7 +196,7 @@ class Client:

        glutSwapBuffers()

-        print('fps', 1.0 / (time.time() - self.time))
+        # print('fps', 1.0 / (time.time() - self.time))
        self.time = time.time()
        glutPostRedisplay()

--- a/labirinth_ai/LabyrinthClient.py
+++ b/labirinth_ai/LabyrinthClient.py
@ -1,13 +1,16 @@
 import time

-from Client.Client import Client, MAX_DISTANCE
+from Client.Client import Client, MAX_DISTANCE, glutPostRedisplay
 from MatrixStuff.Transformations import perspectiveMatrix
 from labirinth_ai.LabyrinthProvider import LabyrinthProvider

 import numpy as np

+
 class LabyrinthClient(Client):
    def __init__(self, test=False, pos=[0, 0, 0], world_class=LabyrinthProvider):
+        self.render = True
+        self.round_timer = time.time()
        super(LabyrinthClient, self).__init__(test, pos, world_class)

    def draw_world(self):
@ -32,12 +35,25 @@ class LabyrinthClient(Client):
                self.world_provider.world.set_color(sub.x, sub.y, 0, 212 / 255.0, 150 / 255.0, 222 / 255.0)

        self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)
-        print('redraw', time.time() - start_time)
+        # print('redraw', time.time() - start_time)

    def display(self):
+        if self.render:
            super(LabyrinthClient, self).display()
            self.draw_world()
+        else:
+            glutPostRedisplay()
        self.world_provider.world.update()
+        # round_end = time.time()
+        # print('round time', round_end - self.round_timer)
+        # self.round_timer = round_end
+
+    def keyboardHandler(self, key: int, x: int, y: int):
+        super().keyboardHandler(key, x, y)
+
+        if key == b' ':
+            self.render = not self.render
+

 if __name__ == '__main__':
    client = LabyrinthClient(pos=[-50, -50, -200])
--- a/labirinth_ai/LabyrinthWorld.py
+++ b/labirinth_ai/LabyrinthWorld.py
@ -1,11 +1,11 @@
 import time
+from typing import Tuple

 from Objects.Cube.Cube import Cube
 from Objects.World import World
 import numpy as np
 import random

-
 class LabyrinthWorld(World):
    randomBuffer = 0
    batchsize = 1000
@ -26,21 +26,37 @@ class LabyrinthWorld(World):

        self.max_crates = self.max_room_num

-        self.subjects = []
-        self.ins = []
-        self.actions = []
-        self.targets = []
-
        self.model = None
        self.lastUpdate = time.time()
        self.nextTrain = self.randomBuffer
-        self.round = 0
+        self.round = 1
+        self.evolve_timer = 10
+        # self.evolve_timer = 1500

        self.trailMix = np.zeros(self.board_shape)
        self.grass = np.zeros(self.board_shape)
        self.hunter_grass = np.zeros(self.board_shape)
        self.subjectDict = {}

+        self._hunters = None
+        self._herbivores = None
+
+    @property
+    def hunters(self):
+        if self._hunters is None:
+            return []
+        return self._hunters.subjects
+
+    @property
+    def herbivores(self):
+        if self._herbivores is None:
+            return []
+        return self._herbivores.subjects
+
+    @property
+    def subjects(self):
+        return self.hunters + self.herbivores
+
    def generate(self, seed: int = None, sea_plate_height: int = 50, continental_plate_height: int = 200):
        board = np.zeros(self.board_shape)
        random.seed(seed)
@ -146,36 +162,40 @@ class LabyrinthWorld(World):

        # adding subjects
        from labirinth_ai.Subject import Hunter, Herbivore
-        for _ in range(10):
+        from labirinth_ai.Population import Population
+        self._hunters = Population(Hunter, self, 10)
+
+        self._herbivores = Population(Herbivore, self, 40)
+
+        self.subjectDict = self.build_subject_dict()
+
+    def generate_free_coordinates(self) -> Tuple[int, int]:
        while True:
            px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
            py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
            if self.board[px, py] == 1:
-                    self.subjects.append(Hunter(px, py))
-                    self.ins += self.subjects[-1].x_in
-                    self.actions += self.subjects[-1].actions
-                    self.targets += self.subjects[-1].target
-                    break
-
-        for _ in range(40):
-            while True:
-                px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
-                py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
-                if self.board[px, py] == 1:
-                    self.subjects.append(Herbivore(px, py))
-                    self.ins += self.subjects[-1].x_in
-                    self.actions += self.subjects[-1].actions
-                    self.targets += self.subjects[-1].target
-                    break
+                return px, py

+    def build_subject_dict(self):
+        subject_dict = {}
        for x in range(self.board_shape[0]):
            for y in range(self.board_shape[1]):
-                self.subjectDict[(x, y)] = []
+                subject_dict[(x, y)] = []

        for sub in self.subjects:
-            self.subjectDict[(sub.x, sub.y)].append(sub)
+            subject_dict[(sub.x, sub.y)].append(sub)
+        return subject_dict

    def update(self):
+
+        if self.round % self.evolve_timer == 0:
+            print('Evolve population')
+            self.round = 0
+            self._hunters.evolve()
+            self._herbivores.evolve()
+            self.subjectDict = self.build_subject_dict()
+        self.round += 1
+
        # start = time.time()
        for sub in self.subjects:
            sub.calculateAction(self)
@ -185,7 +205,6 @@ class LabyrinthWorld(World):
                sub.update(self)
            sub.tick += 1

-        new_subjects = []
        kill_table = {}
        live_table = {}
        for sub in self.subjects:
@ -194,18 +213,14 @@ class LabyrinthWorld(World):
                live_table[sub.name] = 0
            kill_table[sub.name] += sub.kills
            live_table[sub.name] += sub.lives
-            if sub.alive:
-                new_subjects.append(sub)
-            else:
+            if not sub.alive:
                px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
                py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
                while self.board[px, py] == 0:
                    px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
                    py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
                sub.respawnUpdate(px, py, self)
-                new_subjects.append(sub)

-        self.subjects = new_subjects
        self.trailMix *= 0.99

        self.grass = np.minimum(self.grass + 0.01 * (self.board != 0), 3)
--- a/labirinth_ai/Models/BaseModel.py
+++ b/labirinth_ai/Models/BaseModel.py
@ -44,8 +44,8 @@ class BaseModel(nn.Module):
 class BaseDataSet(Dataset):
    def __init__(self, states, targets):
        assert len(states) == len(targets), "Needs to have as many states as targets!"
-        self.states = torch.tensor(states, dtype=torch.float32)
-        self.targets = torch.tensor(targets, dtype=torch.float32)
+        self.states = torch.tensor(np.array(states), dtype=torch.float32)
+        self.targets = torch.tensor(np.array(targets), dtype=torch.float32)

    def __len__(self):
        return len(self.states)
@ -69,7 +69,7 @@ def create_loss_function(action):


 def from_numpy(x):
-    return torch.tensor(x, dtype=torch.float32)
+    return torch.tensor(np.array(x), dtype=torch.float32)


 def train(states, targets, model, optimizer):
--- a/labirinth_ai/Models/EvolutionModel.py
+++ b/labirinth_ai/Models/EvolutionModel.py
@ -3,40 +3,16 @@ from torch import nn
 import numpy as np
 import tqdm
 from torch.utils.data import Dataset, DataLoader
-from labirinth_ai.Models.BaseModel import device
-
-
-class NodeGene:
-    valid_types = ['sensor', 'hidden', 'output']
-
-    def __init__(self, node_id, node_type, bias=None):
-        assert node_type in self.valid_types, 'Unknown node type!'
-        self.node_id = node_id
-        self.node_type = node_type
-        if node_type == 'hidden':
-            assert bias is not None, 'Expected a bias for hidden node types!'
-            self.bias = bias
-        else:
-            self.bias = None
-
-
-class ConnectionGene:
-    def __init__(self, start, end, enabled, innovation_num, weight=None, recurrent=False):
-        self.start = start
-        self.end = end
-        self.enabled = enabled
-        self.innvovation_num = innovation_num
-        self.recurrent = recurrent
-        if weight is None:
-            self.weight = np.random.random(1)[0] * 2 - 1.0
-        else:
-            self.weight = weight
+from labirinth_ai.Models.BaseModel import device, BaseDataSet, create_loss_function, create_optimizer
+from labirinth_ai.Models.Genotype import Genotype


 class EvolutionModel(nn.Module):
    evolutionary = True

-    def __init__(self, view_dimension, action_num, channels, genes=None):
+    def __init__(self, view_dimension, action_num, channels, genes: Genotype = None, genotype_class=None):
+        if genotype_class is None:
+            genotype_class = Genotype
        super(EvolutionModel, self).__init__()
        self.flatten = nn.Flatten()

@ -46,25 +22,29 @@ class EvolutionModel(nn.Module):

        if genes is None:
            self.num_input_nodes = channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2
+            self.genes = genotype_class(action_num, self.num_input_nodes)
+        else:
+            self.num_input_nodes = len(list(filter(lambda element: element[1].node_type == 'sensor', genes.nodes.items())))
+            assert self.num_input_nodes > 0, 'Network needs to have sensor nodes!'
+            is_input_over = False
+            is_output_over = False
+            for key, node in genes.nodes.items():
+                if node.node_type == 'sensor':
+                    if is_input_over:
+                        raise ValueError('Node genes need to follow the order sensor, output, hidden!')

-            self.genes = {'nodes': {}, 'connections': []}
-            node_id = 0
-            for _ in range(self.num_input_nodes):
-                self.genes['nodes'][node_id] = NodeGene(node_id, 'sensor')
-                node_id += 1
-            first_action = node_id
-            for _ in range(action_num * 2):
-                self.genes['nodes'][node_id] = NodeGene(node_id, 'output')
-                node_id += 1
+                if node.node_type == 'output':
+                    is_input_over = True
+                    if is_output_over:
+                        raise ValueError('Node genes need to follow the order sensor, output, hidden!')

-            for index in range(self.num_input_nodes):
-                for action in range(action_num * 2):
-                    self.genes['connections'].append(
-                        ConnectionGene(index, first_action + action, True, index*(action_num * 2) + action)
-                    )
+                if node.node_type == 'hidden':
+                    is_output_over = True
+
+            self.genes = genes

        self.incoming_connections = {}
-        for connection in self.genes['connections']:
+        for connection in self.genes.connections:
            if connection.end not in self.incoming_connections.keys():
                self.incoming_connections[connection.end] = []
            self.incoming_connections[connection.end].append(connection)
@ -73,16 +53,17 @@ class EvolutionModel(nn.Module):
        self.indices = {}

        self.has_recurrent = False
-        non_recurrent_indices = {}
+        self.non_recurrent_indices = {}
+        self.recurrent_indices = {}
        with torch.no_grad():
            for key, value in self.incoming_connections.items():
                value.sort(key=lambda element: element.start)

-                lin = nn.Linear(len(value), 1, bias=self.genes['nodes'][key].bias is not None)
+                lin = nn.Linear(len(value), 1, bias=self.genes.nodes[key].bias is not None)
                for index, connection in enumerate(value):
                    lin.weight[0, index] = value[index].weight
-                if self.genes['nodes'][key].bias is not None:
-                    lin.bias[0] = self.genes['nodes'][key].bias
+                if self.genes.nodes[key].bias is not None:
+                    lin.bias[0] = self.genes.nodes[key].bias

                non_lin = nn.ELU()
                sequence = nn.Sequential(
@ -93,15 +74,17 @@ class EvolutionModel(nn.Module):
                self.layers[key] = sequence
                self.indices[key] = list(map(lambda element: element.start, value))

-                non_recurrent_indices[key] = list(filter(lambda element: not element.recurrent, value))
-                if not self.has_recurrent and len(non_recurrent_indices[key]) != len(self.indices[key]):
+                self.non_recurrent_indices[key] = list(filter(lambda element: not element.recurrent, value))
+                self.recurrent_indices[key] = list(filter(lambda element: element.recurrent, value))
+                if not self.has_recurrent and len(self.non_recurrent_indices[key]) != len(self.indices[key]):
                    self.has_recurrent = True
-                non_recurrent_indices[key] = list(map(lambda element: element.start, non_recurrent_indices[key]))
+                self.non_recurrent_indices[key] = list(map(lambda element: element.start, self.non_recurrent_indices[key]))
+                self.recurrent_indices[key] = list(map(lambda element: element.start, self.recurrent_indices[key]))
        rank_of_node = {}
        for i in range(self.num_input_nodes):
            rank_of_node[i] = 0

-        layers_to_add = list(non_recurrent_indices.items())
+        layers_to_add = list(self.non_recurrent_indices.items())
        while len(layers_to_add) > 0:
            for index, (key, incoming_nodes) in enumerate(list(layers_to_add)):
                max_rank = -1
@ -120,44 +103,123 @@ class EvolutionModel(nn.Module):
        ranked_layers = list(rank_of_node.items())
        ranked_layers.sort(key=lambda element: element[1])
        ranked_layers = list(filter(lambda element: element[1] > 0, ranked_layers))
-        self.layer_order = list(map(lambda element: element[0], ranked_layers))
-        self.memory = torch.Tensor((max(map(lambda element: element[1].node_id, self.genes['nodes'].items())) + 1))

-    def forward(self, x, memory=None):
+        ranked_layers = list(map(lambda element: (element, 0),
+                                 filter(lambda recurrent_element:
+                                        recurrent_element not in list(
+                                            map(lambda ranked_layer: ranked_layer[0], ranked_layers)
+                                        ),
+                                        list(filter(lambda recurrent_keys:
+                                                    len(self.recurrent_indices[recurrent_keys]) > 0,
+                                                    self.recurrent_indices.keys()))))) + ranked_layers
+
+        self.layer_order = list(map(lambda element: element[0], ranked_layers))
+        self.memory_size = (max(map(lambda element: element[1].node_id, self.genes.nodes.items())) + 1)
+        self.memory = torch.Tensor(self.memory_size)
+        self.output_range = range(self.num_input_nodes, self.num_input_nodes + self.action_num * 2)
+
+    def forward(self, x, last_memory=None):
        x_flat = self.flatten(x)
-        if memory is None:
-            memory = torch.Tensor(self.memory)
+        if last_memory is not None:
+            last_memory_flat = self.flatten(last_memory)
+        elif self.has_recurrent:
+            raise ValueError('Recurrent networks need to be passed their previous memory!')
+
+        memory = torch.Tensor(self.memory_size)
        outs = []
-            for batch_element in x_flat:
+        for batch_index, batch_element in enumerate(x_flat):
            memory[0:self.num_input_nodes] = batch_element
            for layer_index in self.layer_order:
-                    memory[layer_index] = self.layers[layer_index](memory[self.indices[layer_index]])
+                non_recurrent_in = memory[self.non_recurrent_indices[layer_index]]
+                non_recurrent_in = torch.stack([non_recurrent_in])
+                if self.has_recurrent and len(self.recurrent_indices[layer_index]) > 0:
+                    recurrent_in = last_memory_flat[batch_index, self.recurrent_indices[layer_index]]
+                    recurrent_in = torch.stack([recurrent_in])
+
+                    combined_in = torch.concat([non_recurrent_in, recurrent_in], dim=1)
+                else:
+                    combined_in = non_recurrent_in
+
+                memory[layer_index] = self.layers[layer_index](combined_in)
            outs.append(memory[self.num_input_nodes: self.num_input_nodes + self.action_num * 2])
        outs = torch.stack(outs)
        self.memory = torch.Tensor(memory)
-            return torch.reshape(outs, (x.shape[0], 4, 2))
-        else:
-            memory[:, 0:self.num_input_nodes] = x
-            for layer_index in self.layer_order:
-                memory[:, layer_index] = self.layers[layer_index](memory[:, self.indices[layer_index]])
-            return torch.reshape(
-                memory[:, self.num_input_nodes: self.num_input_nodes + self.action_num * 2],
-                (x.shape[0], 4, 2))
+        return torch.reshape(outs, (x.shape[0], outs.shape[1]//2, 2))
+
+    def update_genes_with_weights(self):
+        for key, value in self.incoming_connections.items():
+            value.sort(key=lambda element: element.start)
+
+            sequence = self.layers[key]
+            lin = sequence[0]
+            for index, connection in enumerate(value):
+                value[index].weight = float(lin.weight[0, index])
+            if self.genes.nodes[key].bias is not None:
+                self.genes.nodes[key].bias = float(lin.bias[0])
+
+
+
+class RecurrentDataSet(BaseDataSet):
+    def __init__(self, states, targets, memory):
+        super().__init__(states, targets)
+        assert len(states) == len(memory), "Needs to have as many states as memories!"
+        self.memory = torch.tensor(np.array(memory), dtype=torch.float32)
+
+    def __getitem__(self, idx):
+        return self.states[idx], self.memory[idx], self.targets[idx]
+
+
+def train_recurrent(states, memory, targets, model, optimizer):
+    for action in range(model.action_num):
+        data_set = RecurrentDataSet(states[action], targets[action], memory[action])
+        dataloader = DataLoader(data_set, batch_size=64, shuffle=True)
+        loss_fn = create_loss_function(action)
+
+        size = len(dataloader)
+        model.train()
+        for batch, (X, M, y) in enumerate(dataloader):
+            X, y, M = X.to(device), y.to(device), M.to(device)
+
+            # Compute prediction error
+            pred = model(X, M)
+            loss = loss_fn(pred, y)
+
+            # Backpropagation
+            optimizer.zero_grad()
+            loss.backward(retain_graph=True)
+            optimizer.step()
+
+            if batch % 100 == 0:
+                loss, current = loss.item(), batch * len(X)
+                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
+        model.eval()
+
+        del data_set
+        del dataloader


 if __name__ == '__main__':
-    sample = np.random.random((1, 486))
+    sample = np.random.random((1, 1))
+    last_memory = np.zeros((1, 3))

-    model = EvolutionModel(5, 4, 4).to(device)
-    print(model)
+    from labirinth_ai.Models.Genotype import NodeGene, ConnectionGene, Genotype
+    genes = Genotype(nodes={0: NodeGene(0, 'sensor'), 1: NodeGene(1, 'output'), 2: NodeGene(2, 'hidden', 1)},
+                     connections=[ConnectionGene(0, 2, True, 0, recurrent=True), ConnectionGene(2, 1, True, 1, 1)])
+
+    model = EvolutionModel(1, 1, 1, genes)
+
+    model = model.to(device)
+    # print(model)
    print(model.has_recurrent)

-    test = model(torch.tensor(sample, dtype=torch.float32))
+    test = model(torch.tensor(sample, dtype=torch.float32), torch.tensor(last_memory, dtype=torch.float32))
    # test = test.cpu().detach().numpy()
-    print(test)
+    # print(test)

-    state = np.random.random((1, 486))
-    target = np.random.random((4, 2))
+    state = np.random.random((1, 1))
+    memory = np.random.random((1, 1))
+
+    target = np.random.random((2, 1))
    states = [
        [state],
        [state],
@ -170,7 +232,12 @@ if __name__ == '__main__':
        [target],
        [target],
    ]
+    memories = [
+        [memory],
+        [memory],
+        [memory],
+        [memory],
+    ]

    optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
-    from labirinth_ai.Models.BaseModel import train
-    train(states, targets, model, optimizer)
+    train_recurrent(states, memories, targets, model, optimizer)
--- a/labirinth_ai/Models/Genotype.py
+++ b/labirinth_ai/Models/Genotype.py
@ -0,0 +1,139 @@
+from abc import abstractmethod
+from typing import List, Dict
+
+import numpy as np
+
+
+class NodeGene:
+    valid_types = ['sensor', 'hidden', 'output']
+
+    def __init__(self, node_id, node_type, bias=None):
+        assert node_type in self.valid_types, 'Unknown node type!'
+        self.node_id = node_id
+        self.node_type = node_type
+        if node_type == 'hidden':
+            assert bias is not None, 'Expected a bias for hidden node types!'
+            self.bias = bias
+        else:
+            self.bias = None
+
+
+class ConnectionGene:
+    def __init__(self, start, end, enabled, innovation_num, weight=None, recurrent=False):
+        self.start = start
+        self.end = end
+        self.enabled = enabled
+        self.innvovation_num = innovation_num
+        self.recurrent = recurrent
+        if weight is None:
+            self.weight = np.random.random(1)[0] * 2 - 1.0
+        else:
+            self.weight = weight
+
+
+class Genotype:
+    def __init__(self, action_num: int = None, num_input_nodes: int = None,
+                 nodes: Dict[int, NodeGene] = None, connections: List[ConnectionGene] = None):
+        self.nodes = {}
+        self.connections = []
+        if action_num is not None and num_input_nodes is not None:
+            node_id = 0
+            for _ in range(num_input_nodes):
+                self.nodes[node_id] = NodeGene(node_id, 'sensor')
+                node_id += 1
+            first_action = node_id
+            for _ in range(action_num * 2):
+                self.nodes[node_id] = NodeGene(node_id, 'output')
+                node_id += 1
+
+            for index in range(num_input_nodes):
+                for action in range(action_num * 2):
+                    self.connections.append(
+                        ConnectionGene(index, first_action + action, True, index * (action_num * 2) + action)
+                    )
+        if nodes is not None and connections is not None:
+            self.nodes = nodes
+            self.connections = connections
+
+    def calculate_rank_of_nodes(self):
+        rank_of_node = {}
+        nodes_to_rank = list(self.nodes.items())
+        while len(nodes_to_rank) > 0:
+            for list_index, (id, node) in enumerate(nodes_to_rank):
+                incoming_connections = list(filter(lambda connection: connection.end == id and
+                                                                      not connection.recurrent, self.connections))
+                if len(incoming_connections) == 0:
+                    rank_of_node[id] = 0
+                    nodes_to_rank.pop(list_index)
+                    break
+
+                incoming_connections_starts = list(map(lambda connection: connection.start, incoming_connections))
+                start_ranks = list(map(lambda element: rank_of_node[element[0]],
+                                       filter(lambda start_node: start_node[0] in incoming_connections_starts and
+                                                                 start_node[0] in rank_of_node.keys(),
+                                              self.nodes.items())))
+                if len(start_ranks) == len(incoming_connections):
+                    rank_of_node[id] = max(start_ranks) + 1
+                    nodes_to_rank.pop(list_index)
+                    break
+        return rank_of_node
+
+    @abstractmethod
+    def mutate(self, innovation_num) -> int:
+        """
+        Decides whether or not to mutate this network. Then returns the new innovation number.
+        :param innovation_num: Current innovation number
+        :return: Updated innovation number
+        """
+
+        # return innovation_num
+        raise NotImplementedError()
+
+    @abstractmethod
+    def cross(self, other):
+        raise NotImplementedError()
+        # return self
+
+
+class NeatLike(Genotype):
+    connection_add_thr = 0.3
+    node_add_thr = 0.3
+
+    def mutate(self, innovation_num, allow_recurrent=False) -> int:
+        """
+        Decides whether or not to mutate this network. Then returns the new innovation number.
+        :param allow_recurrent: Optional parameter allowing or disallowing recurrent connections to form
+        :param innovation_num: Current innovation number
+        :return: Updated innovation number
+        """
+        # add connection
+        if np.random.random(1)[0] < self.connection_add_thr or True:
+            nodes = list(self.nodes.keys())
+            rank_of_node = self.calculate_rank_of_nodes()
+            end_nodes = list(filter(lambda node: rank_of_node[node] > 0, nodes))
+
+            connection_tuple = list(map(lambda connection: (connection.start, connection.end), self.connections))
+
+            start = np.random.randint(0, len(nodes))
+            end = np.random.randint(0, len(end_nodes))
+
+            tries = 50
+            while (rank_of_node[end_nodes[end]] == 0 or
+                   ((not allow_recurrent) and rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]])
+                   or nodes[start] == end_nodes[end] or (nodes[start], end_nodes[end]) in connection_tuple) and\
+                    tries > 0:
+                end = np.random.randint(0, len(end_nodes))
+                if (not allow_recurrent) and rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]]:
+                    start = np.random.randint(0, len(nodes))
+                tries -= 1
+            if tries > 0:
+                innovation_num += 1
+                self.connections.append(
+                    ConnectionGene(nodes[start], end_nodes[end], True, innovation_num,
+                                   recurrent=rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]]))
+        #todo add node
+
+        return innovation_num
+
+    def cross(self, other):
+        return self
--- a/labirinth_ai/Population.py
+++ b/labirinth_ai/Population.py
@ -0,0 +1,97 @@
+import random
+import numpy as np
+
+from labirinth_ai.Models.Genotype import NeatLike
+
+
+def fib(n):
+    if n == 0:
+        return [1]
+    elif n < 0:
+        return [0]
+    else:
+        return [fib(n - 1)[0] + fib(n - 2)[0]] + fib(n - 1)
+
+
+class Population:
+    def __init__(self, subject_class, world, subject_number):
+        self.subjects = []
+        self.world = world
+        for _ in range(subject_number):
+            px, py = self.world.generate_free_coordinates()
+            self.subjects.append(subject_class(px, py, genotype_class=NeatLike))
+        self.subject_number = subject_number
+        self.subject_class = subject_class
+
+    def select(self):
+        ranked = list(self.subjects)
+        ranked.sort(key=lambda subject: subject.accumulated_rewards, reverse=True)
+
+        return ranked[:int(self.subject_number / 2)]
+
+    @classmethod
+    def scatter(cls, n, buckets):
+        out = np.zeros(buckets)
+        if n == 0:
+            return out
+
+        fib_number = 0
+        fibs = fib(fib_number)
+        while np.sum(fibs) <= n and len(fibs) <= buckets:
+            fib_number += 1
+            fibs = fib(fib_number)
+        fib_number -= 1
+        fibs = fib(fib_number)
+
+        for bucket in range(buckets):
+            if bucket < len(fibs):
+                out[bucket] += fibs[bucket]
+            else:
+                break
+
+        return out + cls.scatter(n - np.sum(fibs), buckets)
+
+    def evolve(self):
+        # get updated weights from the models
+        for subject in self.subjects:
+            subject.model.update_genes_with_weights()
+
+        # crossbreed the current pop
+        best_subjects = self.select()
+        distribution = list(self.scatter(self.subject_number - int(self.subject_number / 2), int(self.subject_number / 2)))
+
+        new_subjects = list(best_subjects)
+        for index, offspring_num in enumerate(distribution):
+            for _ in range(int(offspring_num)):
+                parent_1 = best_subjects[index]
+                parent_2 = best_subjects[random.randint(index + 1, len(best_subjects) - 1)]
+
+                new_genes = parent_1.model.genes.cross(parent_2.model.genes)
+
+                # position doesn't matter, since mutation will set it
+                new_subject = self.subject_class(0, 0, new_genes)
+                new_subject.history = parent_1.history
+                new_subject.samples = parent_1.samples + parent_2.samples
+                new_subjects.append(new_subject)
+
+        assert len(new_subjects) == self.subject_number, 'All generations should have constant size!'
+
+        # mutate the pop
+        mutated_subjects = []
+        innovation_num = max(map(lambda subject: max(map(lambda connection: connection.innvovation_num,
+                                                         subject.model.genes.connections
+                                                         )
+                                                     )
+                             , new_subjects))
+        for subject in new_subjects:
+            subject.accumulated_rewards = 0
+
+            innovation_num = subject.model.genes.mutate(innovation_num)
+
+            px, py = self.world.generate_free_coordinates()
+            new_subject = self.subject_class(px, py, subject.model.genes)
+            new_subject.history = subject.history
+            new_subject.samples = subject.samples
+            mutated_subjects.append(new_subject)
+
+        self.subjects = mutated_subjects
--- a/labirinth_ai/Subject.py
+++ b/labirinth_ai/Subject.py
@ -4,6 +4,7 @@ import tensorflow as tf
 from tensorflow import keras

 from labirinth_ai.LabyrinthWorld import LabyrinthWorld
+from labirinth_ai.Models.EvolutionModel import EvolutionModel
 from labirinth_ai.loss import loss2, loss3
 from labirinth_ai.Models.BaseModel import BaseModel, train, create_optimizer, device, from_numpy

@ -350,7 +351,7 @@ class NetLearner(Subject):

        self.strikes = 0

-    def __init__(self, x, y):
+    def __init__(self, x, y, genes=None, genotype_class=None):
        super(NetLearner, self).__init__(x, y)

        self.action = None
@ -370,7 +371,10 @@ class NetLearner(Subject):
        self.x_in = []
        self.actions = []
        self.target = []
-        self.model = BaseModel(self.viewD, 4, 4).to(device)
+
+        # self.model = BaseModel(self.viewD, 4, 4).to(device)
+        self.model = EvolutionModel(self.viewD, 4, 4, genes=genes, genotype_class=genotype_class).to(device)
+
        self.optimizer = create_optimizer(self.model)

        if len(self.samples) < self.randomBuffer:
@ -540,9 +544,11 @@ class NetLearner(Subject):

            # if len(self.samples) % self.batchsize == 0 and len(self.samples) >= self.randomBuffer:
            if len(self.samples) > self.nextTrain and doTrain:
-                print('train')
+                print('train', len(self.samples))
                self.train()
+                self.nextTrain = len(self.samples)
                self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize)
+                print(len(self.samples), self.nextTrain)

        self.accumulated_rewards += self.lastReward

@ -657,23 +663,6 @@ class Herbivore(NetLearner):

    samples = []

-    # x_in = keras.Input(shape=(4 * (2 * viewD + 1) * (2 * viewD + 1) + 2))
-    # target = keras.Input(shape=(10, 1))
-    # inVec = keras.layers.Flatten()(x_in)
-    # # kernel_regularizer=keras.regularizers.l2(0.01)
-    # actions = keras.layers.Dense((4 * (2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(inVec)
-    # actions = keras.layers.Dense(((2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(actions)
-    # actions = keras.layers.Dense(8, activation='linear', use_bias=False)(actions)
-    # # actions = keras.layers.Dense(4, activation='linear', use_bias=False)(inVec)
-    #
-    # model = keras.Model(inputs=x_in, outputs=actions)
-    #
-    # # model.compile(optimizer='adam', loss=loss2, target_tensors=[target])
-    # model.compile(optimizer=tf.keras.optimizers.RMSprop(learningRate), loss=loss2, target_tensors=[target])
-
-    # def __init__(self, x, y):
-    #     super(Herbivore, self).__init__(x, y)
-
    def createState(self, world: LabyrinthWorld):
        state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1
        state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float)  # - 1