neat implementation up to mutate

This commit is contained in:
zomseffen 2022-08-12 15:48:30 +02:00
parent 4a05baa103
commit cf4d773c10
8 changed files with 468 additions and 144 deletions

View file

@ -52,6 +52,7 @@ class Client:
self.pos = pos self.pos = pos
self.time = time.time() self.time = time.time()
self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)
glutReshapeFunc(self.resize) glutReshapeFunc(self.resize)
glutDisplayFunc(self.display) glutDisplayFunc(self.display)
@ -195,7 +196,7 @@ class Client:
glutSwapBuffers() glutSwapBuffers()
print('fps', 1.0 / (time.time() - self.time)) # print('fps', 1.0 / (time.time() - self.time))
self.time = time.time() self.time = time.time()
glutPostRedisplay() glutPostRedisplay()

View file

@ -1,13 +1,16 @@
import time import time
from Client.Client import Client, MAX_DISTANCE from Client.Client import Client, MAX_DISTANCE, glutPostRedisplay
from MatrixStuff.Transformations import perspectiveMatrix from MatrixStuff.Transformations import perspectiveMatrix
from labirinth_ai.LabyrinthProvider import LabyrinthProvider from labirinth_ai.LabyrinthProvider import LabyrinthProvider
import numpy as np import numpy as np
class LabyrinthClient(Client): class LabyrinthClient(Client):
def __init__(self, test=False, pos=[0, 0, 0], world_class=LabyrinthProvider): def __init__(self, test=False, pos=[0, 0, 0], world_class=LabyrinthProvider):
self.render = True
self.round_timer = time.time()
super(LabyrinthClient, self).__init__(test, pos, world_class) super(LabyrinthClient, self).__init__(test, pos, world_class)
def draw_world(self): def draw_world(self):
@ -32,12 +35,25 @@ class LabyrinthClient(Client):
self.world_provider.world.set_color(sub.x, sub.y, 0, 212 / 255.0, 150 / 255.0, 222 / 255.0) self.world_provider.world.set_color(sub.x, sub.y, 0, 212 / 255.0, 150 / 255.0, 222 / 255.0)
self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE) self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)
print('redraw', time.time() - start_time) # print('redraw', time.time() - start_time)
def display(self): def display(self):
if self.render:
super(LabyrinthClient, self).display() super(LabyrinthClient, self).display()
self.draw_world() self.draw_world()
else:
glutPostRedisplay()
self.world_provider.world.update() self.world_provider.world.update()
# round_end = time.time()
# print('round time', round_end - self.round_timer)
# self.round_timer = round_end
def keyboardHandler(self, key: int, x: int, y: int):
super().keyboardHandler(key, x, y)
if key == b' ':
self.render = not self.render
if __name__ == '__main__': if __name__ == '__main__':
client = LabyrinthClient(pos=[-50, -50, -200]) client = LabyrinthClient(pos=[-50, -50, -200])

View file

@ -1,11 +1,11 @@
import time import time
from typing import Tuple
from Objects.Cube.Cube import Cube from Objects.Cube.Cube import Cube
from Objects.World import World from Objects.World import World
import numpy as np import numpy as np
import random import random
class LabyrinthWorld(World): class LabyrinthWorld(World):
randomBuffer = 0 randomBuffer = 0
batchsize = 1000 batchsize = 1000
@ -26,21 +26,37 @@ class LabyrinthWorld(World):
self.max_crates = self.max_room_num self.max_crates = self.max_room_num
self.subjects = []
self.ins = []
self.actions = []
self.targets = []
self.model = None self.model = None
self.lastUpdate = time.time() self.lastUpdate = time.time()
self.nextTrain = self.randomBuffer self.nextTrain = self.randomBuffer
self.round = 0 self.round = 1
self.evolve_timer = 10
# self.evolve_timer = 1500
self.trailMix = np.zeros(self.board_shape) self.trailMix = np.zeros(self.board_shape)
self.grass = np.zeros(self.board_shape) self.grass = np.zeros(self.board_shape)
self.hunter_grass = np.zeros(self.board_shape) self.hunter_grass = np.zeros(self.board_shape)
self.subjectDict = {} self.subjectDict = {}
self._hunters = None
self._herbivores = None
@property
def hunters(self):
if self._hunters is None:
return []
return self._hunters.subjects
@property
def herbivores(self):
if self._herbivores is None:
return []
return self._herbivores.subjects
@property
def subjects(self):
return self.hunters + self.herbivores
def generate(self, seed: int = None, sea_plate_height: int = 50, continental_plate_height: int = 200): def generate(self, seed: int = None, sea_plate_height: int = 50, continental_plate_height: int = 200):
board = np.zeros(self.board_shape) board = np.zeros(self.board_shape)
random.seed(seed) random.seed(seed)
@ -146,36 +162,40 @@ class LabyrinthWorld(World):
# adding subjects # adding subjects
from labirinth_ai.Subject import Hunter, Herbivore from labirinth_ai.Subject import Hunter, Herbivore
for _ in range(10): from labirinth_ai.Population import Population
self._hunters = Population(Hunter, self, 10)
self._herbivores = Population(Herbivore, self, 40)
self.subjectDict = self.build_subject_dict()
def generate_free_coordinates(self) -> Tuple[int, int]:
while True: while True:
px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim) px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim) py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
if self.board[px, py] == 1: if self.board[px, py] == 1:
self.subjects.append(Hunter(px, py)) return px, py
self.ins += self.subjects[-1].x_in
self.actions += self.subjects[-1].actions
self.targets += self.subjects[-1].target
break
for _ in range(40):
while True:
px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
if self.board[px, py] == 1:
self.subjects.append(Herbivore(px, py))
self.ins += self.subjects[-1].x_in
self.actions += self.subjects[-1].actions
self.targets += self.subjects[-1].target
break
def build_subject_dict(self):
subject_dict = {}
for x in range(self.board_shape[0]): for x in range(self.board_shape[0]):
for y in range(self.board_shape[1]): for y in range(self.board_shape[1]):
self.subjectDict[(x, y)] = [] subject_dict[(x, y)] = []
for sub in self.subjects: for sub in self.subjects:
self.subjectDict[(sub.x, sub.y)].append(sub) subject_dict[(sub.x, sub.y)].append(sub)
return subject_dict
def update(self): def update(self):
if self.round % self.evolve_timer == 0:
print('Evolve population')
self.round = 0
self._hunters.evolve()
self._herbivores.evolve()
self.subjectDict = self.build_subject_dict()
self.round += 1
# start = time.time() # start = time.time()
for sub in self.subjects: for sub in self.subjects:
sub.calculateAction(self) sub.calculateAction(self)
@ -185,7 +205,6 @@ class LabyrinthWorld(World):
sub.update(self) sub.update(self)
sub.tick += 1 sub.tick += 1
new_subjects = []
kill_table = {} kill_table = {}
live_table = {} live_table = {}
for sub in self.subjects: for sub in self.subjects:
@ -194,18 +213,14 @@ class LabyrinthWorld(World):
live_table[sub.name] = 0 live_table[sub.name] = 0
kill_table[sub.name] += sub.kills kill_table[sub.name] += sub.kills
live_table[sub.name] += sub.lives live_table[sub.name] += sub.lives
if sub.alive: if not sub.alive:
new_subjects.append(sub)
else:
px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim) px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim) py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
while self.board[px, py] == 0: while self.board[px, py] == 0:
px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim) px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim) py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
sub.respawnUpdate(px, py, self) sub.respawnUpdate(px, py, self)
new_subjects.append(sub)
self.subjects = new_subjects
self.trailMix *= 0.99 self.trailMix *= 0.99
self.grass = np.minimum(self.grass + 0.01 * (self.board != 0), 3) self.grass = np.minimum(self.grass + 0.01 * (self.board != 0), 3)

View file

@ -44,8 +44,8 @@ class BaseModel(nn.Module):
class BaseDataSet(Dataset): class BaseDataSet(Dataset):
def __init__(self, states, targets): def __init__(self, states, targets):
assert len(states) == len(targets), "Needs to have as many states as targets!" assert len(states) == len(targets), "Needs to have as many states as targets!"
self.states = torch.tensor(states, dtype=torch.float32) self.states = torch.tensor(np.array(states), dtype=torch.float32)
self.targets = torch.tensor(targets, dtype=torch.float32) self.targets = torch.tensor(np.array(targets), dtype=torch.float32)
def __len__(self): def __len__(self):
return len(self.states) return len(self.states)
@ -69,7 +69,7 @@ def create_loss_function(action):
def from_numpy(x): def from_numpy(x):
return torch.tensor(x, dtype=torch.float32) return torch.tensor(np.array(x), dtype=torch.float32)
def train(states, targets, model, optimizer): def train(states, targets, model, optimizer):

View file

@ -3,40 +3,16 @@ from torch import nn
import numpy as np import numpy as np
import tqdm import tqdm
from torch.utils.data import Dataset, DataLoader from torch.utils.data import Dataset, DataLoader
from labirinth_ai.Models.BaseModel import device from labirinth_ai.Models.BaseModel import device, BaseDataSet, create_loss_function, create_optimizer
from labirinth_ai.Models.Genotype import Genotype
class NodeGene:
valid_types = ['sensor', 'hidden', 'output']
def __init__(self, node_id, node_type, bias=None):
assert node_type in self.valid_types, 'Unknown node type!'
self.node_id = node_id
self.node_type = node_type
if node_type == 'hidden':
assert bias is not None, 'Expected a bias for hidden node types!'
self.bias = bias
else:
self.bias = None
class ConnectionGene:
def __init__(self, start, end, enabled, innovation_num, weight=None, recurrent=False):
self.start = start
self.end = end
self.enabled = enabled
self.innvovation_num = innovation_num
self.recurrent = recurrent
if weight is None:
self.weight = np.random.random(1)[0] * 2 - 1.0
else:
self.weight = weight
class EvolutionModel(nn.Module): class EvolutionModel(nn.Module):
evolutionary = True evolutionary = True
def __init__(self, view_dimension, action_num, channels, genes=None): def __init__(self, view_dimension, action_num, channels, genes: Genotype = None, genotype_class=None):
if genotype_class is None:
genotype_class = Genotype
super(EvolutionModel, self).__init__() super(EvolutionModel, self).__init__()
self.flatten = nn.Flatten() self.flatten = nn.Flatten()
@ -46,25 +22,29 @@ class EvolutionModel(nn.Module):
if genes is None: if genes is None:
self.num_input_nodes = channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2 self.num_input_nodes = channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2
self.genes = genotype_class(action_num, self.num_input_nodes)
else:
self.num_input_nodes = len(list(filter(lambda element: element[1].node_type == 'sensor', genes.nodes.items())))
assert self.num_input_nodes > 0, 'Network needs to have sensor nodes!'
is_input_over = False
is_output_over = False
for key, node in genes.nodes.items():
if node.node_type == 'sensor':
if is_input_over:
raise ValueError('Node genes need to follow the order sensor, output, hidden!')
self.genes = {'nodes': {}, 'connections': []} if node.node_type == 'output':
node_id = 0 is_input_over = True
for _ in range(self.num_input_nodes): if is_output_over:
self.genes['nodes'][node_id] = NodeGene(node_id, 'sensor') raise ValueError('Node genes need to follow the order sensor, output, hidden!')
node_id += 1
first_action = node_id
for _ in range(action_num * 2):
self.genes['nodes'][node_id] = NodeGene(node_id, 'output')
node_id += 1
for index in range(self.num_input_nodes): if node.node_type == 'hidden':
for action in range(action_num * 2): is_output_over = True
self.genes['connections'].append(
ConnectionGene(index, first_action + action, True, index*(action_num * 2) + action) self.genes = genes
)
self.incoming_connections = {} self.incoming_connections = {}
for connection in self.genes['connections']: for connection in self.genes.connections:
if connection.end not in self.incoming_connections.keys(): if connection.end not in self.incoming_connections.keys():
self.incoming_connections[connection.end] = [] self.incoming_connections[connection.end] = []
self.incoming_connections[connection.end].append(connection) self.incoming_connections[connection.end].append(connection)
@ -73,16 +53,17 @@ class EvolutionModel(nn.Module):
self.indices = {} self.indices = {}
self.has_recurrent = False self.has_recurrent = False
non_recurrent_indices = {} self.non_recurrent_indices = {}
self.recurrent_indices = {}
with torch.no_grad(): with torch.no_grad():
for key, value in self.incoming_connections.items(): for key, value in self.incoming_connections.items():
value.sort(key=lambda element: element.start) value.sort(key=lambda element: element.start)
lin = nn.Linear(len(value), 1, bias=self.genes['nodes'][key].bias is not None) lin = nn.Linear(len(value), 1, bias=self.genes.nodes[key].bias is not None)
for index, connection in enumerate(value): for index, connection in enumerate(value):
lin.weight[0, index] = value[index].weight lin.weight[0, index] = value[index].weight
if self.genes['nodes'][key].bias is not None: if self.genes.nodes[key].bias is not None:
lin.bias[0] = self.genes['nodes'][key].bias lin.bias[0] = self.genes.nodes[key].bias
non_lin = nn.ELU() non_lin = nn.ELU()
sequence = nn.Sequential( sequence = nn.Sequential(
@ -93,15 +74,17 @@ class EvolutionModel(nn.Module):
self.layers[key] = sequence self.layers[key] = sequence
self.indices[key] = list(map(lambda element: element.start, value)) self.indices[key] = list(map(lambda element: element.start, value))
non_recurrent_indices[key] = list(filter(lambda element: not element.recurrent, value)) self.non_recurrent_indices[key] = list(filter(lambda element: not element.recurrent, value))
if not self.has_recurrent and len(non_recurrent_indices[key]) != len(self.indices[key]): self.recurrent_indices[key] = list(filter(lambda element: element.recurrent, value))
if not self.has_recurrent and len(self.non_recurrent_indices[key]) != len(self.indices[key]):
self.has_recurrent = True self.has_recurrent = True
non_recurrent_indices[key] = list(map(lambda element: element.start, non_recurrent_indices[key])) self.non_recurrent_indices[key] = list(map(lambda element: element.start, self.non_recurrent_indices[key]))
self.recurrent_indices[key] = list(map(lambda element: element.start, self.recurrent_indices[key]))
rank_of_node = {} rank_of_node = {}
for i in range(self.num_input_nodes): for i in range(self.num_input_nodes):
rank_of_node[i] = 0 rank_of_node[i] = 0
layers_to_add = list(non_recurrent_indices.items()) layers_to_add = list(self.non_recurrent_indices.items())
while len(layers_to_add) > 0: while len(layers_to_add) > 0:
for index, (key, incoming_nodes) in enumerate(list(layers_to_add)): for index, (key, incoming_nodes) in enumerate(list(layers_to_add)):
max_rank = -1 max_rank = -1
@ -120,44 +103,123 @@ class EvolutionModel(nn.Module):
ranked_layers = list(rank_of_node.items()) ranked_layers = list(rank_of_node.items())
ranked_layers.sort(key=lambda element: element[1]) ranked_layers.sort(key=lambda element: element[1])
ranked_layers = list(filter(lambda element: element[1] > 0, ranked_layers)) ranked_layers = list(filter(lambda element: element[1] > 0, ranked_layers))
self.layer_order = list(map(lambda element: element[0], ranked_layers))
self.memory = torch.Tensor((max(map(lambda element: element[1].node_id, self.genes['nodes'].items())) + 1))
def forward(self, x, memory=None): ranked_layers = list(map(lambda element: (element, 0),
filter(lambda recurrent_element:
recurrent_element not in list(
map(lambda ranked_layer: ranked_layer[0], ranked_layers)
),
list(filter(lambda recurrent_keys:
len(self.recurrent_indices[recurrent_keys]) > 0,
self.recurrent_indices.keys()))))) + ranked_layers
self.layer_order = list(map(lambda element: element[0], ranked_layers))
self.memory_size = (max(map(lambda element: element[1].node_id, self.genes.nodes.items())) + 1)
self.memory = torch.Tensor(self.memory_size)
self.output_range = range(self.num_input_nodes, self.num_input_nodes + self.action_num * 2)
def forward(self, x, last_memory=None):
x_flat = self.flatten(x) x_flat = self.flatten(x)
if memory is None: if last_memory is not None:
memory = torch.Tensor(self.memory) last_memory_flat = self.flatten(last_memory)
elif self.has_recurrent:
raise ValueError('Recurrent networks need to be passed their previous memory!')
memory = torch.Tensor(self.memory_size)
outs = [] outs = []
for batch_element in x_flat: for batch_index, batch_element in enumerate(x_flat):
memory[0:self.num_input_nodes] = batch_element memory[0:self.num_input_nodes] = batch_element
for layer_index in self.layer_order: for layer_index in self.layer_order:
memory[layer_index] = self.layers[layer_index](memory[self.indices[layer_index]]) non_recurrent_in = memory[self.non_recurrent_indices[layer_index]]
non_recurrent_in = torch.stack([non_recurrent_in])
if self.has_recurrent and len(self.recurrent_indices[layer_index]) > 0:
recurrent_in = last_memory_flat[batch_index, self.recurrent_indices[layer_index]]
recurrent_in = torch.stack([recurrent_in])
combined_in = torch.concat([non_recurrent_in, recurrent_in], dim=1)
else:
combined_in = non_recurrent_in
memory[layer_index] = self.layers[layer_index](combined_in)
outs.append(memory[self.num_input_nodes: self.num_input_nodes + self.action_num * 2]) outs.append(memory[self.num_input_nodes: self.num_input_nodes + self.action_num * 2])
outs = torch.stack(outs) outs = torch.stack(outs)
self.memory = torch.Tensor(memory) self.memory = torch.Tensor(memory)
return torch.reshape(outs, (x.shape[0], 4, 2)) return torch.reshape(outs, (x.shape[0], outs.shape[1]//2, 2))
else:
memory[:, 0:self.num_input_nodes] = x def update_genes_with_weights(self):
for layer_index in self.layer_order: for key, value in self.incoming_connections.items():
memory[:, layer_index] = self.layers[layer_index](memory[:, self.indices[layer_index]]) value.sort(key=lambda element: element.start)
return torch.reshape(
memory[:, self.num_input_nodes: self.num_input_nodes + self.action_num * 2], sequence = self.layers[key]
(x.shape[0], 4, 2)) lin = sequence[0]
for index, connection in enumerate(value):
value[index].weight = float(lin.weight[0, index])
if self.genes.nodes[key].bias is not None:
self.genes.nodes[key].bias = float(lin.bias[0])
class RecurrentDataSet(BaseDataSet):
def __init__(self, states, targets, memory):
super().__init__(states, targets)
assert len(states) == len(memory), "Needs to have as many states as memories!"
self.memory = torch.tensor(np.array(memory), dtype=torch.float32)
def __getitem__(self, idx):
return self.states[idx], self.memory[idx], self.targets[idx]
def train_recurrent(states, memory, targets, model, optimizer):
for action in range(model.action_num):
data_set = RecurrentDataSet(states[action], targets[action], memory[action])
dataloader = DataLoader(data_set, batch_size=64, shuffle=True)
loss_fn = create_loss_function(action)
size = len(dataloader)
model.train()
for batch, (X, M, y) in enumerate(dataloader):
X, y, M = X.to(device), y.to(device), M.to(device)
# Compute prediction error
pred = model(X, M)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
model.eval()
del data_set
del dataloader
if __name__ == '__main__': if __name__ == '__main__':
sample = np.random.random((1, 486)) sample = np.random.random((1, 1))
last_memory = np.zeros((1, 3))
model = EvolutionModel(5, 4, 4).to(device) from labirinth_ai.Models.Genotype import NodeGene, ConnectionGene, Genotype
print(model) genes = Genotype(nodes={0: NodeGene(0, 'sensor'), 1: NodeGene(1, 'output'), 2: NodeGene(2, 'hidden', 1)},
connections=[ConnectionGene(0, 2, True, 0, recurrent=True), ConnectionGene(2, 1, True, 1, 1)])
model = EvolutionModel(1, 1, 1, genes)
model = model.to(device)
# print(model)
print(model.has_recurrent) print(model.has_recurrent)
test = model(torch.tensor(sample, dtype=torch.float32)) test = model(torch.tensor(sample, dtype=torch.float32), torch.tensor(last_memory, dtype=torch.float32))
# test = test.cpu().detach().numpy() # test = test.cpu().detach().numpy()
print(test) # print(test)
state = np.random.random((1, 486)) state = np.random.random((1, 1))
target = np.random.random((4, 2)) memory = np.random.random((1, 1))
target = np.random.random((2, 1))
states = [ states = [
[state], [state],
[state], [state],
@ -170,7 +232,12 @@ if __name__ == '__main__':
[target], [target],
[target], [target],
] ]
memories = [
[memory],
[memory],
[memory],
[memory],
]
optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3) optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
from labirinth_ai.Models.BaseModel import train train_recurrent(states, memories, targets, model, optimizer)
train(states, targets, model, optimizer)

View file

@ -0,0 +1,139 @@
from abc import abstractmethod
from typing import List, Dict
import numpy as np
class NodeGene:
valid_types = ['sensor', 'hidden', 'output']
def __init__(self, node_id, node_type, bias=None):
assert node_type in self.valid_types, 'Unknown node type!'
self.node_id = node_id
self.node_type = node_type
if node_type == 'hidden':
assert bias is not None, 'Expected a bias for hidden node types!'
self.bias = bias
else:
self.bias = None
class ConnectionGene:
def __init__(self, start, end, enabled, innovation_num, weight=None, recurrent=False):
self.start = start
self.end = end
self.enabled = enabled
self.innvovation_num = innovation_num
self.recurrent = recurrent
if weight is None:
self.weight = np.random.random(1)[0] * 2 - 1.0
else:
self.weight = weight
class Genotype:
def __init__(self, action_num: int = None, num_input_nodes: int = None,
nodes: Dict[int, NodeGene] = None, connections: List[ConnectionGene] = None):
self.nodes = {}
self.connections = []
if action_num is not None and num_input_nodes is not None:
node_id = 0
for _ in range(num_input_nodes):
self.nodes[node_id] = NodeGene(node_id, 'sensor')
node_id += 1
first_action = node_id
for _ in range(action_num * 2):
self.nodes[node_id] = NodeGene(node_id, 'output')
node_id += 1
for index in range(num_input_nodes):
for action in range(action_num * 2):
self.connections.append(
ConnectionGene(index, first_action + action, True, index * (action_num * 2) + action)
)
if nodes is not None and connections is not None:
self.nodes = nodes
self.connections = connections
def calculate_rank_of_nodes(self):
rank_of_node = {}
nodes_to_rank = list(self.nodes.items())
while len(nodes_to_rank) > 0:
for list_index, (id, node) in enumerate(nodes_to_rank):
incoming_connections = list(filter(lambda connection: connection.end == id and
not connection.recurrent, self.connections))
if len(incoming_connections) == 0:
rank_of_node[id] = 0
nodes_to_rank.pop(list_index)
break
incoming_connections_starts = list(map(lambda connection: connection.start, incoming_connections))
start_ranks = list(map(lambda element: rank_of_node[element[0]],
filter(lambda start_node: start_node[0] in incoming_connections_starts and
start_node[0] in rank_of_node.keys(),
self.nodes.items())))
if len(start_ranks) == len(incoming_connections):
rank_of_node[id] = max(start_ranks) + 1
nodes_to_rank.pop(list_index)
break
return rank_of_node
@abstractmethod
def mutate(self, innovation_num) -> int:
"""
Decides whether or not to mutate this network. Then returns the new innovation number.
:param innovation_num: Current innovation number
:return: Updated innovation number
"""
# return innovation_num
raise NotImplementedError()
@abstractmethod
def cross(self, other):
raise NotImplementedError()
# return self
class NeatLike(Genotype):
connection_add_thr = 0.3
node_add_thr = 0.3
def mutate(self, innovation_num, allow_recurrent=False) -> int:
"""
Decides whether or not to mutate this network. Then returns the new innovation number.
:param allow_recurrent: Optional parameter allowing or disallowing recurrent connections to form
:param innovation_num: Current innovation number
:return: Updated innovation number
"""
# add connection
if np.random.random(1)[0] < self.connection_add_thr or True:
nodes = list(self.nodes.keys())
rank_of_node = self.calculate_rank_of_nodes()
end_nodes = list(filter(lambda node: rank_of_node[node] > 0, nodes))
connection_tuple = list(map(lambda connection: (connection.start, connection.end), self.connections))
start = np.random.randint(0, len(nodes))
end = np.random.randint(0, len(end_nodes))
tries = 50
while (rank_of_node[end_nodes[end]] == 0 or
((not allow_recurrent) and rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]])
or nodes[start] == end_nodes[end] or (nodes[start], end_nodes[end]) in connection_tuple) and\
tries > 0:
end = np.random.randint(0, len(end_nodes))
if (not allow_recurrent) and rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]]:
start = np.random.randint(0, len(nodes))
tries -= 1
if tries > 0:
innovation_num += 1
self.connections.append(
ConnectionGene(nodes[start], end_nodes[end], True, innovation_num,
recurrent=rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]]))
#todo add node
return innovation_num
def cross(self, other):
return self

View file

@ -0,0 +1,97 @@
import random
import numpy as np
from labirinth_ai.Models.Genotype import NeatLike
def fib(n):
if n == 0:
return [1]
elif n < 0:
return [0]
else:
return [fib(n - 1)[0] + fib(n - 2)[0]] + fib(n - 1)
class Population:
def __init__(self, subject_class, world, subject_number):
self.subjects = []
self.world = world
for _ in range(subject_number):
px, py = self.world.generate_free_coordinates()
self.subjects.append(subject_class(px, py, genotype_class=NeatLike))
self.subject_number = subject_number
self.subject_class = subject_class
def select(self):
ranked = list(self.subjects)
ranked.sort(key=lambda subject: subject.accumulated_rewards, reverse=True)
return ranked[:int(self.subject_number / 2)]
@classmethod
def scatter(cls, n, buckets):
out = np.zeros(buckets)
if n == 0:
return out
fib_number = 0
fibs = fib(fib_number)
while np.sum(fibs) <= n and len(fibs) <= buckets:
fib_number += 1
fibs = fib(fib_number)
fib_number -= 1
fibs = fib(fib_number)
for bucket in range(buckets):
if bucket < len(fibs):
out[bucket] += fibs[bucket]
else:
break
return out + cls.scatter(n - np.sum(fibs), buckets)
def evolve(self):
# get updated weights from the models
for subject in self.subjects:
subject.model.update_genes_with_weights()
# crossbreed the current pop
best_subjects = self.select()
distribution = list(self.scatter(self.subject_number - int(self.subject_number / 2), int(self.subject_number / 2)))
new_subjects = list(best_subjects)
for index, offspring_num in enumerate(distribution):
for _ in range(int(offspring_num)):
parent_1 = best_subjects[index]
parent_2 = best_subjects[random.randint(index + 1, len(best_subjects) - 1)]
new_genes = parent_1.model.genes.cross(parent_2.model.genes)
# position doesn't matter, since mutation will set it
new_subject = self.subject_class(0, 0, new_genes)
new_subject.history = parent_1.history
new_subject.samples = parent_1.samples + parent_2.samples
new_subjects.append(new_subject)
assert len(new_subjects) == self.subject_number, 'All generations should have constant size!'
# mutate the pop
mutated_subjects = []
innovation_num = max(map(lambda subject: max(map(lambda connection: connection.innvovation_num,
subject.model.genes.connections
)
)
, new_subjects))
for subject in new_subjects:
subject.accumulated_rewards = 0
innovation_num = subject.model.genes.mutate(innovation_num)
px, py = self.world.generate_free_coordinates()
new_subject = self.subject_class(px, py, subject.model.genes)
new_subject.history = subject.history
new_subject.samples = subject.samples
mutated_subjects.append(new_subject)
self.subjects = mutated_subjects

View file

@ -4,6 +4,7 @@ import tensorflow as tf
from tensorflow import keras from tensorflow import keras
from labirinth_ai.LabyrinthWorld import LabyrinthWorld from labirinth_ai.LabyrinthWorld import LabyrinthWorld
from labirinth_ai.Models.EvolutionModel import EvolutionModel
from labirinth_ai.loss import loss2, loss3 from labirinth_ai.loss import loss2, loss3
from labirinth_ai.Models.BaseModel import BaseModel, train, create_optimizer, device, from_numpy from labirinth_ai.Models.BaseModel import BaseModel, train, create_optimizer, device, from_numpy
@ -350,7 +351,7 @@ class NetLearner(Subject):
self.strikes = 0 self.strikes = 0
def __init__(self, x, y): def __init__(self, x, y, genes=None, genotype_class=None):
super(NetLearner, self).__init__(x, y) super(NetLearner, self).__init__(x, y)
self.action = None self.action = None
@ -370,7 +371,10 @@ class NetLearner(Subject):
self.x_in = [] self.x_in = []
self.actions = [] self.actions = []
self.target = [] self.target = []
self.model = BaseModel(self.viewD, 4, 4).to(device)
# self.model = BaseModel(self.viewD, 4, 4).to(device)
self.model = EvolutionModel(self.viewD, 4, 4, genes=genes, genotype_class=genotype_class).to(device)
self.optimizer = create_optimizer(self.model) self.optimizer = create_optimizer(self.model)
if len(self.samples) < self.randomBuffer: if len(self.samples) < self.randomBuffer:
@ -540,9 +544,11 @@ class NetLearner(Subject):
# if len(self.samples) % self.batchsize == 0 and len(self.samples) >= self.randomBuffer: # if len(self.samples) % self.batchsize == 0 and len(self.samples) >= self.randomBuffer:
if len(self.samples) > self.nextTrain and doTrain: if len(self.samples) > self.nextTrain and doTrain:
print('train') print('train', len(self.samples))
self.train() self.train()
self.nextTrain = len(self.samples)
self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize) self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize)
print(len(self.samples), self.nextTrain)
self.accumulated_rewards += self.lastReward self.accumulated_rewards += self.lastReward
@ -657,23 +663,6 @@ class Herbivore(NetLearner):
samples = [] samples = []
# x_in = keras.Input(shape=(4 * (2 * viewD + 1) * (2 * viewD + 1) + 2))
# target = keras.Input(shape=(10, 1))
# inVec = keras.layers.Flatten()(x_in)
# # kernel_regularizer=keras.regularizers.l2(0.01)
# actions = keras.layers.Dense((4 * (2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(inVec)
# actions = keras.layers.Dense(((2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(actions)
# actions = keras.layers.Dense(8, activation='linear', use_bias=False)(actions)
# # actions = keras.layers.Dense(4, activation='linear', use_bias=False)(inVec)
#
# model = keras.Model(inputs=x_in, outputs=actions)
#
# # model.compile(optimizer='adam', loss=loss2, target_tensors=[target])
# model.compile(optimizer=tf.keras.optimizers.RMSprop(learningRate), loss=loss2, target_tensors=[target])
# def __init__(self, x, y):
# super(Herbivore, self).__init__(x, y)
def createState(self, world: LabyrinthWorld): def createState(self, world: LabyrinthWorld):
state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1 state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1