diff --git a/Client/Client.py b/Client/Client.py index e64554a..80f146e 100644 --- a/Client/Client.py +++ b/Client/Client.py @@ -52,6 +52,7 @@ class Client: self.pos = pos self.time = time.time() + self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE) glutReshapeFunc(self.resize) glutDisplayFunc(self.display) @@ -195,7 +196,7 @@ class Client: glutSwapBuffers() - print('fps', 1.0 / (time.time() - self.time)) + # print('fps', 1.0 / (time.time() - self.time)) self.time = time.time() glutPostRedisplay() diff --git a/labirinth_ai/LabyrinthClient.py b/labirinth_ai/LabyrinthClient.py index daa8e93..227c2a0 100644 --- a/labirinth_ai/LabyrinthClient.py +++ b/labirinth_ai/LabyrinthClient.py @@ -1,13 +1,16 @@ import time -from Client.Client import Client, MAX_DISTANCE +from Client.Client import Client, MAX_DISTANCE, glutPostRedisplay from MatrixStuff.Transformations import perspectiveMatrix from labirinth_ai.LabyrinthProvider import LabyrinthProvider import numpy as np + class LabyrinthClient(Client): def __init__(self, test=False, pos=[0, 0, 0], world_class=LabyrinthProvider): + self.render = True + self.round_timer = time.time() super(LabyrinthClient, self).__init__(test, pos, world_class) def draw_world(self): @@ -32,12 +35,25 @@ class LabyrinthClient(Client): self.world_provider.world.set_color(sub.x, sub.y, 0, 212 / 255.0, 150 / 255.0, 222 / 255.0) self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE) - print('redraw', time.time() - start_time) + # print('redraw', time.time() - start_time) def display(self): - super(LabyrinthClient, self).display() - self.draw_world() + if self.render: + super(LabyrinthClient, self).display() + self.draw_world() + else: + glutPostRedisplay() self.world_provider.world.update() + # round_end = time.time() + # print('round time', round_end - self.round_timer) + # self.round_timer = round_end + + def keyboardHandler(self, key: int, x: int, y: int): + super().keyboardHandler(key, x, y) + + if key == b' ': + self.render = not self.render + if __name__ == '__main__': client = LabyrinthClient(pos=[-50, -50, -200]) diff --git a/labirinth_ai/LabyrinthWorld.py b/labirinth_ai/LabyrinthWorld.py index b22a0ea..f2adaf9 100644 --- a/labirinth_ai/LabyrinthWorld.py +++ b/labirinth_ai/LabyrinthWorld.py @@ -1,11 +1,11 @@ import time +from typing import Tuple from Objects.Cube.Cube import Cube from Objects.World import World import numpy as np import random - class LabyrinthWorld(World): randomBuffer = 0 batchsize = 1000 @@ -26,21 +26,37 @@ class LabyrinthWorld(World): self.max_crates = self.max_room_num - self.subjects = [] - self.ins = [] - self.actions = [] - self.targets = [] - self.model = None self.lastUpdate = time.time() self.nextTrain = self.randomBuffer - self.round = 0 + self.round = 1 + self.evolve_timer = 10 + # self.evolve_timer = 1500 self.trailMix = np.zeros(self.board_shape) self.grass = np.zeros(self.board_shape) self.hunter_grass = np.zeros(self.board_shape) self.subjectDict = {} + self._hunters = None + self._herbivores = None + + @property + def hunters(self): + if self._hunters is None: + return [] + return self._hunters.subjects + + @property + def herbivores(self): + if self._herbivores is None: + return [] + return self._herbivores.subjects + + @property + def subjects(self): + return self.hunters + self.herbivores + def generate(self, seed: int = None, sea_plate_height: int = 50, continental_plate_height: int = 200): board = np.zeros(self.board_shape) random.seed(seed) @@ -146,36 +162,40 @@ class LabyrinthWorld(World): # adding subjects from labirinth_ai.Subject import Hunter, Herbivore - for _ in range(10): - while True: - px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim) - py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim) - if self.board[px, py] == 1: - self.subjects.append(Hunter(px, py)) - self.ins += self.subjects[-1].x_in - self.actions += self.subjects[-1].actions - self.targets += self.subjects[-1].target - break + from labirinth_ai.Population import Population + self._hunters = Population(Hunter, self, 10) - for _ in range(40): - while True: - px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim) - py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim) - if self.board[px, py] == 1: - self.subjects.append(Herbivore(px, py)) - self.ins += self.subjects[-1].x_in - self.actions += self.subjects[-1].actions - self.targets += self.subjects[-1].target - break + self._herbivores = Population(Herbivore, self, 40) + self.subjectDict = self.build_subject_dict() + + def generate_free_coordinates(self) -> Tuple[int, int]: + while True: + px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim) + py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim) + if self.board[px, py] == 1: + return px, py + + def build_subject_dict(self): + subject_dict = {} for x in range(self.board_shape[0]): for y in range(self.board_shape[1]): - self.subjectDict[(x, y)] = [] + subject_dict[(x, y)] = [] for sub in self.subjects: - self.subjectDict[(sub.x, sub.y)].append(sub) + subject_dict[(sub.x, sub.y)].append(sub) + return subject_dict def update(self): + + if self.round % self.evolve_timer == 0: + print('Evolve population') + self.round = 0 + self._hunters.evolve() + self._herbivores.evolve() + self.subjectDict = self.build_subject_dict() + self.round += 1 + # start = time.time() for sub in self.subjects: sub.calculateAction(self) @@ -185,7 +205,6 @@ class LabyrinthWorld(World): sub.update(self) sub.tick += 1 - new_subjects = [] kill_table = {} live_table = {} for sub in self.subjects: @@ -194,18 +213,14 @@ class LabyrinthWorld(World): live_table[sub.name] = 0 kill_table[sub.name] += sub.kills live_table[sub.name] += sub.lives - if sub.alive: - new_subjects.append(sub) - else: + if not sub.alive: px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim) py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim) while self.board[px, py] == 0: px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim) py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim) sub.respawnUpdate(px, py, self) - new_subjects.append(sub) - self.subjects = new_subjects self.trailMix *= 0.99 self.grass = np.minimum(self.grass + 0.01 * (self.board != 0), 3) diff --git a/labirinth_ai/Models/BaseModel.py b/labirinth_ai/Models/BaseModel.py index 9678f50..2434b61 100644 --- a/labirinth_ai/Models/BaseModel.py +++ b/labirinth_ai/Models/BaseModel.py @@ -44,8 +44,8 @@ class BaseModel(nn.Module): class BaseDataSet(Dataset): def __init__(self, states, targets): assert len(states) == len(targets), "Needs to have as many states as targets!" - self.states = torch.tensor(states, dtype=torch.float32) - self.targets = torch.tensor(targets, dtype=torch.float32) + self.states = torch.tensor(np.array(states), dtype=torch.float32) + self.targets = torch.tensor(np.array(targets), dtype=torch.float32) def __len__(self): return len(self.states) @@ -69,7 +69,7 @@ def create_loss_function(action): def from_numpy(x): - return torch.tensor(x, dtype=torch.float32) + return torch.tensor(np.array(x), dtype=torch.float32) def train(states, targets, model, optimizer): diff --git a/labirinth_ai/Models/EvolutionModel.py b/labirinth_ai/Models/EvolutionModel.py index 38276f6..8a180d5 100644 --- a/labirinth_ai/Models/EvolutionModel.py +++ b/labirinth_ai/Models/EvolutionModel.py @@ -3,40 +3,16 @@ from torch import nn import numpy as np import tqdm from torch.utils.data import Dataset, DataLoader -from labirinth_ai.Models.BaseModel import device - - -class NodeGene: - valid_types = ['sensor', 'hidden', 'output'] - - def __init__(self, node_id, node_type, bias=None): - assert node_type in self.valid_types, 'Unknown node type!' - self.node_id = node_id - self.node_type = node_type - if node_type == 'hidden': - assert bias is not None, 'Expected a bias for hidden node types!' - self.bias = bias - else: - self.bias = None - - -class ConnectionGene: - def __init__(self, start, end, enabled, innovation_num, weight=None, recurrent=False): - self.start = start - self.end = end - self.enabled = enabled - self.innvovation_num = innovation_num - self.recurrent = recurrent - if weight is None: - self.weight = np.random.random(1)[0] * 2 - 1.0 - else: - self.weight = weight +from labirinth_ai.Models.BaseModel import device, BaseDataSet, create_loss_function, create_optimizer +from labirinth_ai.Models.Genotype import Genotype class EvolutionModel(nn.Module): evolutionary = True - def __init__(self, view_dimension, action_num, channels, genes=None): + def __init__(self, view_dimension, action_num, channels, genes: Genotype = None, genotype_class=None): + if genotype_class is None: + genotype_class = Genotype super(EvolutionModel, self).__init__() self.flatten = nn.Flatten() @@ -46,25 +22,29 @@ class EvolutionModel(nn.Module): if genes is None: self.num_input_nodes = channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2 + self.genes = genotype_class(action_num, self.num_input_nodes) + else: + self.num_input_nodes = len(list(filter(lambda element: element[1].node_type == 'sensor', genes.nodes.items()))) + assert self.num_input_nodes > 0, 'Network needs to have sensor nodes!' + is_input_over = False + is_output_over = False + for key, node in genes.nodes.items(): + if node.node_type == 'sensor': + if is_input_over: + raise ValueError('Node genes need to follow the order sensor, output, hidden!') - self.genes = {'nodes': {}, 'connections': []} - node_id = 0 - for _ in range(self.num_input_nodes): - self.genes['nodes'][node_id] = NodeGene(node_id, 'sensor') - node_id += 1 - first_action = node_id - for _ in range(action_num * 2): - self.genes['nodes'][node_id] = NodeGene(node_id, 'output') - node_id += 1 + if node.node_type == 'output': + is_input_over = True + if is_output_over: + raise ValueError('Node genes need to follow the order sensor, output, hidden!') - for index in range(self.num_input_nodes): - for action in range(action_num * 2): - self.genes['connections'].append( - ConnectionGene(index, first_action + action, True, index*(action_num * 2) + action) - ) + if node.node_type == 'hidden': + is_output_over = True + + self.genes = genes self.incoming_connections = {} - for connection in self.genes['connections']: + for connection in self.genes.connections: if connection.end not in self.incoming_connections.keys(): self.incoming_connections[connection.end] = [] self.incoming_connections[connection.end].append(connection) @@ -73,16 +53,17 @@ class EvolutionModel(nn.Module): self.indices = {} self.has_recurrent = False - non_recurrent_indices = {} + self.non_recurrent_indices = {} + self.recurrent_indices = {} with torch.no_grad(): for key, value in self.incoming_connections.items(): value.sort(key=lambda element: element.start) - lin = nn.Linear(len(value), 1, bias=self.genes['nodes'][key].bias is not None) + lin = nn.Linear(len(value), 1, bias=self.genes.nodes[key].bias is not None) for index, connection in enumerate(value): lin.weight[0, index] = value[index].weight - if self.genes['nodes'][key].bias is not None: - lin.bias[0] = self.genes['nodes'][key].bias + if self.genes.nodes[key].bias is not None: + lin.bias[0] = self.genes.nodes[key].bias non_lin = nn.ELU() sequence = nn.Sequential( @@ -93,15 +74,17 @@ class EvolutionModel(nn.Module): self.layers[key] = sequence self.indices[key] = list(map(lambda element: element.start, value)) - non_recurrent_indices[key] = list(filter(lambda element: not element.recurrent, value)) - if not self.has_recurrent and len(non_recurrent_indices[key]) != len(self.indices[key]): + self.non_recurrent_indices[key] = list(filter(lambda element: not element.recurrent, value)) + self.recurrent_indices[key] = list(filter(lambda element: element.recurrent, value)) + if not self.has_recurrent and len(self.non_recurrent_indices[key]) != len(self.indices[key]): self.has_recurrent = True - non_recurrent_indices[key] = list(map(lambda element: element.start, non_recurrent_indices[key])) + self.non_recurrent_indices[key] = list(map(lambda element: element.start, self.non_recurrent_indices[key])) + self.recurrent_indices[key] = list(map(lambda element: element.start, self.recurrent_indices[key])) rank_of_node = {} for i in range(self.num_input_nodes): rank_of_node[i] = 0 - layers_to_add = list(non_recurrent_indices.items()) + layers_to_add = list(self.non_recurrent_indices.items()) while len(layers_to_add) > 0: for index, (key, incoming_nodes) in enumerate(list(layers_to_add)): max_rank = -1 @@ -120,44 +103,123 @@ class EvolutionModel(nn.Module): ranked_layers = list(rank_of_node.items()) ranked_layers.sort(key=lambda element: element[1]) ranked_layers = list(filter(lambda element: element[1] > 0, ranked_layers)) - self.layer_order = list(map(lambda element: element[0], ranked_layers)) - self.memory = torch.Tensor((max(map(lambda element: element[1].node_id, self.genes['nodes'].items())) + 1)) - def forward(self, x, memory=None): + ranked_layers = list(map(lambda element: (element, 0), + filter(lambda recurrent_element: + recurrent_element not in list( + map(lambda ranked_layer: ranked_layer[0], ranked_layers) + ), + list(filter(lambda recurrent_keys: + len(self.recurrent_indices[recurrent_keys]) > 0, + self.recurrent_indices.keys()))))) + ranked_layers + + self.layer_order = list(map(lambda element: element[0], ranked_layers)) + self.memory_size = (max(map(lambda element: element[1].node_id, self.genes.nodes.items())) + 1) + self.memory = torch.Tensor(self.memory_size) + self.output_range = range(self.num_input_nodes, self.num_input_nodes + self.action_num * 2) + + def forward(self, x, last_memory=None): x_flat = self.flatten(x) - if memory is None: - memory = torch.Tensor(self.memory) - outs = [] - for batch_element in x_flat: - memory[0:self.num_input_nodes] = batch_element - for layer_index in self.layer_order: - memory[layer_index] = self.layers[layer_index](memory[self.indices[layer_index]]) - outs.append(memory[self.num_input_nodes: self.num_input_nodes + self.action_num * 2]) - outs = torch.stack(outs) - self.memory = torch.Tensor(memory) - return torch.reshape(outs, (x.shape[0], 4, 2)) - else: - memory[:, 0:self.num_input_nodes] = x + if last_memory is not None: + last_memory_flat = self.flatten(last_memory) + elif self.has_recurrent: + raise ValueError('Recurrent networks need to be passed their previous memory!') + + memory = torch.Tensor(self.memory_size) + outs = [] + for batch_index, batch_element in enumerate(x_flat): + memory[0:self.num_input_nodes] = batch_element for layer_index in self.layer_order: - memory[:, layer_index] = self.layers[layer_index](memory[:, self.indices[layer_index]]) - return torch.reshape( - memory[:, self.num_input_nodes: self.num_input_nodes + self.action_num * 2], - (x.shape[0], 4, 2)) + non_recurrent_in = memory[self.non_recurrent_indices[layer_index]] + non_recurrent_in = torch.stack([non_recurrent_in]) + if self.has_recurrent and len(self.recurrent_indices[layer_index]) > 0: + recurrent_in = last_memory_flat[batch_index, self.recurrent_indices[layer_index]] + recurrent_in = torch.stack([recurrent_in]) + + combined_in = torch.concat([non_recurrent_in, recurrent_in], dim=1) + else: + combined_in = non_recurrent_in + + memory[layer_index] = self.layers[layer_index](combined_in) + outs.append(memory[self.num_input_nodes: self.num_input_nodes + self.action_num * 2]) + outs = torch.stack(outs) + self.memory = torch.Tensor(memory) + return torch.reshape(outs, (x.shape[0], outs.shape[1]//2, 2)) + + def update_genes_with_weights(self): + for key, value in self.incoming_connections.items(): + value.sort(key=lambda element: element.start) + + sequence = self.layers[key] + lin = sequence[0] + for index, connection in enumerate(value): + value[index].weight = float(lin.weight[0, index]) + if self.genes.nodes[key].bias is not None: + self.genes.nodes[key].bias = float(lin.bias[0]) + + + +class RecurrentDataSet(BaseDataSet): + def __init__(self, states, targets, memory): + super().__init__(states, targets) + assert len(states) == len(memory), "Needs to have as many states as memories!" + self.memory = torch.tensor(np.array(memory), dtype=torch.float32) + + def __getitem__(self, idx): + return self.states[idx], self.memory[idx], self.targets[idx] + + +def train_recurrent(states, memory, targets, model, optimizer): + for action in range(model.action_num): + data_set = RecurrentDataSet(states[action], targets[action], memory[action]) + dataloader = DataLoader(data_set, batch_size=64, shuffle=True) + loss_fn = create_loss_function(action) + + size = len(dataloader) + model.train() + for batch, (X, M, y) in enumerate(dataloader): + X, y, M = X.to(device), y.to(device), M.to(device) + + # Compute prediction error + pred = model(X, M) + loss = loss_fn(pred, y) + + # Backpropagation + optimizer.zero_grad() + loss.backward(retain_graph=True) + optimizer.step() + + if batch % 100 == 0: + loss, current = loss.item(), batch * len(X) + print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]") + model.eval() + + del data_set + del dataloader if __name__ == '__main__': - sample = np.random.random((1, 486)) + sample = np.random.random((1, 1)) + last_memory = np.zeros((1, 3)) - model = EvolutionModel(5, 4, 4).to(device) - print(model) + from labirinth_ai.Models.Genotype import NodeGene, ConnectionGene, Genotype + genes = Genotype(nodes={0: NodeGene(0, 'sensor'), 1: NodeGene(1, 'output'), 2: NodeGene(2, 'hidden', 1)}, + connections=[ConnectionGene(0, 2, True, 0, recurrent=True), ConnectionGene(2, 1, True, 1, 1)]) + + model = EvolutionModel(1, 1, 1, genes) + + model = model.to(device) + # print(model) print(model.has_recurrent) - test = model(torch.tensor(sample, dtype=torch.float32)) + test = model(torch.tensor(sample, dtype=torch.float32), torch.tensor(last_memory, dtype=torch.float32)) # test = test.cpu().detach().numpy() - print(test) + # print(test) - state = np.random.random((1, 486)) - target = np.random.random((4, 2)) + state = np.random.random((1, 1)) + memory = np.random.random((1, 1)) + + target = np.random.random((2, 1)) states = [ [state], [state], @@ -170,7 +232,12 @@ if __name__ == '__main__': [target], [target], ] + memories = [ + [memory], + [memory], + [memory], + [memory], + ] optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3) - from labirinth_ai.Models.BaseModel import train - train(states, targets, model, optimizer) + train_recurrent(states, memories, targets, model, optimizer) diff --git a/labirinth_ai/Models/Genotype.py b/labirinth_ai/Models/Genotype.py new file mode 100644 index 0000000..4bea59f --- /dev/null +++ b/labirinth_ai/Models/Genotype.py @@ -0,0 +1,139 @@ +from abc import abstractmethod +from typing import List, Dict + +import numpy as np + + +class NodeGene: + valid_types = ['sensor', 'hidden', 'output'] + + def __init__(self, node_id, node_type, bias=None): + assert node_type in self.valid_types, 'Unknown node type!' + self.node_id = node_id + self.node_type = node_type + if node_type == 'hidden': + assert bias is not None, 'Expected a bias for hidden node types!' + self.bias = bias + else: + self.bias = None + + +class ConnectionGene: + def __init__(self, start, end, enabled, innovation_num, weight=None, recurrent=False): + self.start = start + self.end = end + self.enabled = enabled + self.innvovation_num = innovation_num + self.recurrent = recurrent + if weight is None: + self.weight = np.random.random(1)[0] * 2 - 1.0 + else: + self.weight = weight + + +class Genotype: + def __init__(self, action_num: int = None, num_input_nodes: int = None, + nodes: Dict[int, NodeGene] = None, connections: List[ConnectionGene] = None): + self.nodes = {} + self.connections = [] + if action_num is not None and num_input_nodes is not None: + node_id = 0 + for _ in range(num_input_nodes): + self.nodes[node_id] = NodeGene(node_id, 'sensor') + node_id += 1 + first_action = node_id + for _ in range(action_num * 2): + self.nodes[node_id] = NodeGene(node_id, 'output') + node_id += 1 + + for index in range(num_input_nodes): + for action in range(action_num * 2): + self.connections.append( + ConnectionGene(index, first_action + action, True, index * (action_num * 2) + action) + ) + if nodes is not None and connections is not None: + self.nodes = nodes + self.connections = connections + + def calculate_rank_of_nodes(self): + rank_of_node = {} + nodes_to_rank = list(self.nodes.items()) + while len(nodes_to_rank) > 0: + for list_index, (id, node) in enumerate(nodes_to_rank): + incoming_connections = list(filter(lambda connection: connection.end == id and + not connection.recurrent, self.connections)) + if len(incoming_connections) == 0: + rank_of_node[id] = 0 + nodes_to_rank.pop(list_index) + break + + incoming_connections_starts = list(map(lambda connection: connection.start, incoming_connections)) + start_ranks = list(map(lambda element: rank_of_node[element[0]], + filter(lambda start_node: start_node[0] in incoming_connections_starts and + start_node[0] in rank_of_node.keys(), + self.nodes.items()))) + if len(start_ranks) == len(incoming_connections): + rank_of_node[id] = max(start_ranks) + 1 + nodes_to_rank.pop(list_index) + break + return rank_of_node + + @abstractmethod + def mutate(self, innovation_num) -> int: + """ + Decides whether or not to mutate this network. Then returns the new innovation number. + :param innovation_num: Current innovation number + :return: Updated innovation number + """ + + # return innovation_num + raise NotImplementedError() + + @abstractmethod + def cross(self, other): + raise NotImplementedError() + # return self + + +class NeatLike(Genotype): + connection_add_thr = 0.3 + node_add_thr = 0.3 + + def mutate(self, innovation_num, allow_recurrent=False) -> int: + """ + Decides whether or not to mutate this network. Then returns the new innovation number. + :param allow_recurrent: Optional parameter allowing or disallowing recurrent connections to form + :param innovation_num: Current innovation number + :return: Updated innovation number + """ + # add connection + if np.random.random(1)[0] < self.connection_add_thr or True: + nodes = list(self.nodes.keys()) + rank_of_node = self.calculate_rank_of_nodes() + end_nodes = list(filter(lambda node: rank_of_node[node] > 0, nodes)) + + connection_tuple = list(map(lambda connection: (connection.start, connection.end), self.connections)) + + start = np.random.randint(0, len(nodes)) + end = np.random.randint(0, len(end_nodes)) + + tries = 50 + while (rank_of_node[end_nodes[end]] == 0 or + ((not allow_recurrent) and rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]]) + or nodes[start] == end_nodes[end] or (nodes[start], end_nodes[end]) in connection_tuple) and\ + tries > 0: + end = np.random.randint(0, len(end_nodes)) + if (not allow_recurrent) and rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]]: + start = np.random.randint(0, len(nodes)) + tries -= 1 + if tries > 0: + innovation_num += 1 + self.connections.append( + ConnectionGene(nodes[start], end_nodes[end], True, innovation_num, + recurrent=rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]])) + #todo add node + + return innovation_num + + def cross(self, other): + return self diff --git a/labirinth_ai/Population.py b/labirinth_ai/Population.py new file mode 100644 index 0000000..70eef4f --- /dev/null +++ b/labirinth_ai/Population.py @@ -0,0 +1,97 @@ +import random +import numpy as np + +from labirinth_ai.Models.Genotype import NeatLike + + +def fib(n): + if n == 0: + return [1] + elif n < 0: + return [0] + else: + return [fib(n - 1)[0] + fib(n - 2)[0]] + fib(n - 1) + + +class Population: + def __init__(self, subject_class, world, subject_number): + self.subjects = [] + self.world = world + for _ in range(subject_number): + px, py = self.world.generate_free_coordinates() + self.subjects.append(subject_class(px, py, genotype_class=NeatLike)) + self.subject_number = subject_number + self.subject_class = subject_class + + def select(self): + ranked = list(self.subjects) + ranked.sort(key=lambda subject: subject.accumulated_rewards, reverse=True) + + return ranked[:int(self.subject_number / 2)] + + @classmethod + def scatter(cls, n, buckets): + out = np.zeros(buckets) + if n == 0: + return out + + fib_number = 0 + fibs = fib(fib_number) + while np.sum(fibs) <= n and len(fibs) <= buckets: + fib_number += 1 + fibs = fib(fib_number) + fib_number -= 1 + fibs = fib(fib_number) + + for bucket in range(buckets): + if bucket < len(fibs): + out[bucket] += fibs[bucket] + else: + break + + return out + cls.scatter(n - np.sum(fibs), buckets) + + def evolve(self): + # get updated weights from the models + for subject in self.subjects: + subject.model.update_genes_with_weights() + + # crossbreed the current pop + best_subjects = self.select() + distribution = list(self.scatter(self.subject_number - int(self.subject_number / 2), int(self.subject_number / 2))) + + new_subjects = list(best_subjects) + for index, offspring_num in enumerate(distribution): + for _ in range(int(offspring_num)): + parent_1 = best_subjects[index] + parent_2 = best_subjects[random.randint(index + 1, len(best_subjects) - 1)] + + new_genes = parent_1.model.genes.cross(parent_2.model.genes) + + # position doesn't matter, since mutation will set it + new_subject = self.subject_class(0, 0, new_genes) + new_subject.history = parent_1.history + new_subject.samples = parent_1.samples + parent_2.samples + new_subjects.append(new_subject) + + assert len(new_subjects) == self.subject_number, 'All generations should have constant size!' + + # mutate the pop + mutated_subjects = [] + innovation_num = max(map(lambda subject: max(map(lambda connection: connection.innvovation_num, + subject.model.genes.connections + ) + ) + , new_subjects)) + for subject in new_subjects: + subject.accumulated_rewards = 0 + + innovation_num = subject.model.genes.mutate(innovation_num) + + px, py = self.world.generate_free_coordinates() + new_subject = self.subject_class(px, py, subject.model.genes) + new_subject.history = subject.history + new_subject.samples = subject.samples + mutated_subjects.append(new_subject) + + self.subjects = mutated_subjects diff --git a/labirinth_ai/Subject.py b/labirinth_ai/Subject.py index f9426eb..dc8e886 100644 --- a/labirinth_ai/Subject.py +++ b/labirinth_ai/Subject.py @@ -4,6 +4,7 @@ import tensorflow as tf from tensorflow import keras from labirinth_ai.LabyrinthWorld import LabyrinthWorld +from labirinth_ai.Models.EvolutionModel import EvolutionModel from labirinth_ai.loss import loss2, loss3 from labirinth_ai.Models.BaseModel import BaseModel, train, create_optimizer, device, from_numpy @@ -350,7 +351,7 @@ class NetLearner(Subject): self.strikes = 0 - def __init__(self, x, y): + def __init__(self, x, y, genes=None, genotype_class=None): super(NetLearner, self).__init__(x, y) self.action = None @@ -370,7 +371,10 @@ class NetLearner(Subject): self.x_in = [] self.actions = [] self.target = [] - self.model = BaseModel(self.viewD, 4, 4).to(device) + + # self.model = BaseModel(self.viewD, 4, 4).to(device) + self.model = EvolutionModel(self.viewD, 4, 4, genes=genes, genotype_class=genotype_class).to(device) + self.optimizer = create_optimizer(self.model) if len(self.samples) < self.randomBuffer: @@ -540,9 +544,11 @@ class NetLearner(Subject): # if len(self.samples) % self.batchsize == 0 and len(self.samples) >= self.randomBuffer: if len(self.samples) > self.nextTrain and doTrain: - print('train') + print('train', len(self.samples)) self.train() + self.nextTrain = len(self.samples) self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize) + print(len(self.samples), self.nextTrain) self.accumulated_rewards += self.lastReward @@ -657,23 +663,6 @@ class Herbivore(NetLearner): samples = [] - # x_in = keras.Input(shape=(4 * (2 * viewD + 1) * (2 * viewD + 1) + 2)) - # target = keras.Input(shape=(10, 1)) - # inVec = keras.layers.Flatten()(x_in) - # # kernel_regularizer=keras.regularizers.l2(0.01) - # actions = keras.layers.Dense((4 * (2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(inVec) - # actions = keras.layers.Dense(((2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(actions) - # actions = keras.layers.Dense(8, activation='linear', use_bias=False)(actions) - # # actions = keras.layers.Dense(4, activation='linear', use_bias=False)(inVec) - # - # model = keras.Model(inputs=x_in, outputs=actions) - # - # # model.compile(optimizer='adam', loss=loss2, target_tensors=[target]) - # model.compile(optimizer=tf.keras.optimizers.RMSprop(learningRate), loss=loss2, target_tensors=[target]) - - # def __init__(self, x, y): - # super(Herbivore, self).__init__(x, y) - def createState(self, world: LabyrinthWorld): state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1