neat implementation up to mutate
This commit is contained in:
parent
4a05baa103
commit
cf4d773c10
8 changed files with 468 additions and 144 deletions
|
@ -52,6 +52,7 @@ class Client:
|
||||||
|
|
||||||
self.pos = pos
|
self.pos = pos
|
||||||
self.time = time.time()
|
self.time = time.time()
|
||||||
|
self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)
|
||||||
|
|
||||||
glutReshapeFunc(self.resize)
|
glutReshapeFunc(self.resize)
|
||||||
glutDisplayFunc(self.display)
|
glutDisplayFunc(self.display)
|
||||||
|
@ -195,7 +196,7 @@ class Client:
|
||||||
|
|
||||||
glutSwapBuffers()
|
glutSwapBuffers()
|
||||||
|
|
||||||
print('fps', 1.0 / (time.time() - self.time))
|
# print('fps', 1.0 / (time.time() - self.time))
|
||||||
self.time = time.time()
|
self.time = time.time()
|
||||||
glutPostRedisplay()
|
glutPostRedisplay()
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,16 @@
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from Client.Client import Client, MAX_DISTANCE
|
from Client.Client import Client, MAX_DISTANCE, glutPostRedisplay
|
||||||
from MatrixStuff.Transformations import perspectiveMatrix
|
from MatrixStuff.Transformations import perspectiveMatrix
|
||||||
from labirinth_ai.LabyrinthProvider import LabyrinthProvider
|
from labirinth_ai.LabyrinthProvider import LabyrinthProvider
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class LabyrinthClient(Client):
|
class LabyrinthClient(Client):
|
||||||
def __init__(self, test=False, pos=[0, 0, 0], world_class=LabyrinthProvider):
|
def __init__(self, test=False, pos=[0, 0, 0], world_class=LabyrinthProvider):
|
||||||
|
self.render = True
|
||||||
|
self.round_timer = time.time()
|
||||||
super(LabyrinthClient, self).__init__(test, pos, world_class)
|
super(LabyrinthClient, self).__init__(test, pos, world_class)
|
||||||
|
|
||||||
def draw_world(self):
|
def draw_world(self):
|
||||||
|
@ -32,12 +35,25 @@ class LabyrinthClient(Client):
|
||||||
self.world_provider.world.set_color(sub.x, sub.y, 0, 212 / 255.0, 150 / 255.0, 222 / 255.0)
|
self.world_provider.world.set_color(sub.x, sub.y, 0, 212 / 255.0, 150 / 255.0, 222 / 255.0)
|
||||||
|
|
||||||
self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)
|
self.projMatrix = perspectiveMatrix(45.0, 400 / 400, 0.01, MAX_DISTANCE)
|
||||||
print('redraw', time.time() - start_time)
|
# print('redraw', time.time() - start_time)
|
||||||
|
|
||||||
def display(self):
|
def display(self):
|
||||||
|
if self.render:
|
||||||
super(LabyrinthClient, self).display()
|
super(LabyrinthClient, self).display()
|
||||||
self.draw_world()
|
self.draw_world()
|
||||||
|
else:
|
||||||
|
glutPostRedisplay()
|
||||||
self.world_provider.world.update()
|
self.world_provider.world.update()
|
||||||
|
# round_end = time.time()
|
||||||
|
# print('round time', round_end - self.round_timer)
|
||||||
|
# self.round_timer = round_end
|
||||||
|
|
||||||
|
def keyboardHandler(self, key: int, x: int, y: int):
|
||||||
|
super().keyboardHandler(key, x, y)
|
||||||
|
|
||||||
|
if key == b' ':
|
||||||
|
self.render = not self.render
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
client = LabyrinthClient(pos=[-50, -50, -200])
|
client = LabyrinthClient(pos=[-50, -50, -200])
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
import time
|
import time
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
from Objects.Cube.Cube import Cube
|
from Objects.Cube.Cube import Cube
|
||||||
from Objects.World import World
|
from Objects.World import World
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
|
||||||
class LabyrinthWorld(World):
|
class LabyrinthWorld(World):
|
||||||
randomBuffer = 0
|
randomBuffer = 0
|
||||||
batchsize = 1000
|
batchsize = 1000
|
||||||
|
@ -26,21 +26,37 @@ class LabyrinthWorld(World):
|
||||||
|
|
||||||
self.max_crates = self.max_room_num
|
self.max_crates = self.max_room_num
|
||||||
|
|
||||||
self.subjects = []
|
|
||||||
self.ins = []
|
|
||||||
self.actions = []
|
|
||||||
self.targets = []
|
|
||||||
|
|
||||||
self.model = None
|
self.model = None
|
||||||
self.lastUpdate = time.time()
|
self.lastUpdate = time.time()
|
||||||
self.nextTrain = self.randomBuffer
|
self.nextTrain = self.randomBuffer
|
||||||
self.round = 0
|
self.round = 1
|
||||||
|
self.evolve_timer = 10
|
||||||
|
# self.evolve_timer = 1500
|
||||||
|
|
||||||
self.trailMix = np.zeros(self.board_shape)
|
self.trailMix = np.zeros(self.board_shape)
|
||||||
self.grass = np.zeros(self.board_shape)
|
self.grass = np.zeros(self.board_shape)
|
||||||
self.hunter_grass = np.zeros(self.board_shape)
|
self.hunter_grass = np.zeros(self.board_shape)
|
||||||
self.subjectDict = {}
|
self.subjectDict = {}
|
||||||
|
|
||||||
|
self._hunters = None
|
||||||
|
self._herbivores = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hunters(self):
|
||||||
|
if self._hunters is None:
|
||||||
|
return []
|
||||||
|
return self._hunters.subjects
|
||||||
|
|
||||||
|
@property
|
||||||
|
def herbivores(self):
|
||||||
|
if self._herbivores is None:
|
||||||
|
return []
|
||||||
|
return self._herbivores.subjects
|
||||||
|
|
||||||
|
@property
|
||||||
|
def subjects(self):
|
||||||
|
return self.hunters + self.herbivores
|
||||||
|
|
||||||
def generate(self, seed: int = None, sea_plate_height: int = 50, continental_plate_height: int = 200):
|
def generate(self, seed: int = None, sea_plate_height: int = 50, continental_plate_height: int = 200):
|
||||||
board = np.zeros(self.board_shape)
|
board = np.zeros(self.board_shape)
|
||||||
random.seed(seed)
|
random.seed(seed)
|
||||||
|
@ -146,36 +162,40 @@ class LabyrinthWorld(World):
|
||||||
|
|
||||||
# adding subjects
|
# adding subjects
|
||||||
from labirinth_ai.Subject import Hunter, Herbivore
|
from labirinth_ai.Subject import Hunter, Herbivore
|
||||||
for _ in range(10):
|
from labirinth_ai.Population import Population
|
||||||
|
self._hunters = Population(Hunter, self, 10)
|
||||||
|
|
||||||
|
self._herbivores = Population(Herbivore, self, 40)
|
||||||
|
|
||||||
|
self.subjectDict = self.build_subject_dict()
|
||||||
|
|
||||||
|
def generate_free_coordinates(self) -> Tuple[int, int]:
|
||||||
while True:
|
while True:
|
||||||
px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
|
px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
|
||||||
py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
|
py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
|
||||||
if self.board[px, py] == 1:
|
if self.board[px, py] == 1:
|
||||||
self.subjects.append(Hunter(px, py))
|
return px, py
|
||||||
self.ins += self.subjects[-1].x_in
|
|
||||||
self.actions += self.subjects[-1].actions
|
|
||||||
self.targets += self.subjects[-1].target
|
|
||||||
break
|
|
||||||
|
|
||||||
for _ in range(40):
|
|
||||||
while True:
|
|
||||||
px = random.randint(self.max_room_dim, self.board_shape[0] - self.max_room_dim)
|
|
||||||
py = random.randint(self.max_room_dim, self.board_shape[1] - self.max_room_dim)
|
|
||||||
if self.board[px, py] == 1:
|
|
||||||
self.subjects.append(Herbivore(px, py))
|
|
||||||
self.ins += self.subjects[-1].x_in
|
|
||||||
self.actions += self.subjects[-1].actions
|
|
||||||
self.targets += self.subjects[-1].target
|
|
||||||
break
|
|
||||||
|
|
||||||
|
def build_subject_dict(self):
|
||||||
|
subject_dict = {}
|
||||||
for x in range(self.board_shape[0]):
|
for x in range(self.board_shape[0]):
|
||||||
for y in range(self.board_shape[1]):
|
for y in range(self.board_shape[1]):
|
||||||
self.subjectDict[(x, y)] = []
|
subject_dict[(x, y)] = []
|
||||||
|
|
||||||
for sub in self.subjects:
|
for sub in self.subjects:
|
||||||
self.subjectDict[(sub.x, sub.y)].append(sub)
|
subject_dict[(sub.x, sub.y)].append(sub)
|
||||||
|
return subject_dict
|
||||||
|
|
||||||
def update(self):
|
def update(self):
|
||||||
|
|
||||||
|
if self.round % self.evolve_timer == 0:
|
||||||
|
print('Evolve population')
|
||||||
|
self.round = 0
|
||||||
|
self._hunters.evolve()
|
||||||
|
self._herbivores.evolve()
|
||||||
|
self.subjectDict = self.build_subject_dict()
|
||||||
|
self.round += 1
|
||||||
|
|
||||||
# start = time.time()
|
# start = time.time()
|
||||||
for sub in self.subjects:
|
for sub in self.subjects:
|
||||||
sub.calculateAction(self)
|
sub.calculateAction(self)
|
||||||
|
@ -185,7 +205,6 @@ class LabyrinthWorld(World):
|
||||||
sub.update(self)
|
sub.update(self)
|
||||||
sub.tick += 1
|
sub.tick += 1
|
||||||
|
|
||||||
new_subjects = []
|
|
||||||
kill_table = {}
|
kill_table = {}
|
||||||
live_table = {}
|
live_table = {}
|
||||||
for sub in self.subjects:
|
for sub in self.subjects:
|
||||||
|
@ -194,18 +213,14 @@ class LabyrinthWorld(World):
|
||||||
live_table[sub.name] = 0
|
live_table[sub.name] = 0
|
||||||
kill_table[sub.name] += sub.kills
|
kill_table[sub.name] += sub.kills
|
||||||
live_table[sub.name] += sub.lives
|
live_table[sub.name] += sub.lives
|
||||||
if sub.alive:
|
if not sub.alive:
|
||||||
new_subjects.append(sub)
|
|
||||||
else:
|
|
||||||
px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
|
px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
|
||||||
py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
|
py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
|
||||||
while self.board[px, py] == 0:
|
while self.board[px, py] == 0:
|
||||||
px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
|
px = random.randint(self.max_room_dim, (self.board_shape[0] - 1) - self.max_room_dim)
|
||||||
py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
|
py = random.randint(self.max_room_dim, (self.board_shape[1] - 1) - self.max_room_dim)
|
||||||
sub.respawnUpdate(px, py, self)
|
sub.respawnUpdate(px, py, self)
|
||||||
new_subjects.append(sub)
|
|
||||||
|
|
||||||
self.subjects = new_subjects
|
|
||||||
self.trailMix *= 0.99
|
self.trailMix *= 0.99
|
||||||
|
|
||||||
self.grass = np.minimum(self.grass + 0.01 * (self.board != 0), 3)
|
self.grass = np.minimum(self.grass + 0.01 * (self.board != 0), 3)
|
||||||
|
|
|
@ -44,8 +44,8 @@ class BaseModel(nn.Module):
|
||||||
class BaseDataSet(Dataset):
|
class BaseDataSet(Dataset):
|
||||||
def __init__(self, states, targets):
|
def __init__(self, states, targets):
|
||||||
assert len(states) == len(targets), "Needs to have as many states as targets!"
|
assert len(states) == len(targets), "Needs to have as many states as targets!"
|
||||||
self.states = torch.tensor(states, dtype=torch.float32)
|
self.states = torch.tensor(np.array(states), dtype=torch.float32)
|
||||||
self.targets = torch.tensor(targets, dtype=torch.float32)
|
self.targets = torch.tensor(np.array(targets), dtype=torch.float32)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.states)
|
return len(self.states)
|
||||||
|
@ -69,7 +69,7 @@ def create_loss_function(action):
|
||||||
|
|
||||||
|
|
||||||
def from_numpy(x):
|
def from_numpy(x):
|
||||||
return torch.tensor(x, dtype=torch.float32)
|
return torch.tensor(np.array(x), dtype=torch.float32)
|
||||||
|
|
||||||
|
|
||||||
def train(states, targets, model, optimizer):
|
def train(states, targets, model, optimizer):
|
||||||
|
|
|
@ -3,40 +3,16 @@ from torch import nn
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tqdm
|
import tqdm
|
||||||
from torch.utils.data import Dataset, DataLoader
|
from torch.utils.data import Dataset, DataLoader
|
||||||
from labirinth_ai.Models.BaseModel import device
|
from labirinth_ai.Models.BaseModel import device, BaseDataSet, create_loss_function, create_optimizer
|
||||||
|
from labirinth_ai.Models.Genotype import Genotype
|
||||||
|
|
||||||
class NodeGene:
|
|
||||||
valid_types = ['sensor', 'hidden', 'output']
|
|
||||||
|
|
||||||
def __init__(self, node_id, node_type, bias=None):
|
|
||||||
assert node_type in self.valid_types, 'Unknown node type!'
|
|
||||||
self.node_id = node_id
|
|
||||||
self.node_type = node_type
|
|
||||||
if node_type == 'hidden':
|
|
||||||
assert bias is not None, 'Expected a bias for hidden node types!'
|
|
||||||
self.bias = bias
|
|
||||||
else:
|
|
||||||
self.bias = None
|
|
||||||
|
|
||||||
|
|
||||||
class ConnectionGene:
|
|
||||||
def __init__(self, start, end, enabled, innovation_num, weight=None, recurrent=False):
|
|
||||||
self.start = start
|
|
||||||
self.end = end
|
|
||||||
self.enabled = enabled
|
|
||||||
self.innvovation_num = innovation_num
|
|
||||||
self.recurrent = recurrent
|
|
||||||
if weight is None:
|
|
||||||
self.weight = np.random.random(1)[0] * 2 - 1.0
|
|
||||||
else:
|
|
||||||
self.weight = weight
|
|
||||||
|
|
||||||
|
|
||||||
class EvolutionModel(nn.Module):
|
class EvolutionModel(nn.Module):
|
||||||
evolutionary = True
|
evolutionary = True
|
||||||
|
|
||||||
def __init__(self, view_dimension, action_num, channels, genes=None):
|
def __init__(self, view_dimension, action_num, channels, genes: Genotype = None, genotype_class=None):
|
||||||
|
if genotype_class is None:
|
||||||
|
genotype_class = Genotype
|
||||||
super(EvolutionModel, self).__init__()
|
super(EvolutionModel, self).__init__()
|
||||||
self.flatten = nn.Flatten()
|
self.flatten = nn.Flatten()
|
||||||
|
|
||||||
|
@ -46,25 +22,29 @@ class EvolutionModel(nn.Module):
|
||||||
|
|
||||||
if genes is None:
|
if genes is None:
|
||||||
self.num_input_nodes = channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2
|
self.num_input_nodes = channels * (2 * self.viewD + 1) * (2 * self.viewD + 1) + 2
|
||||||
|
self.genes = genotype_class(action_num, self.num_input_nodes)
|
||||||
|
else:
|
||||||
|
self.num_input_nodes = len(list(filter(lambda element: element[1].node_type == 'sensor', genes.nodes.items())))
|
||||||
|
assert self.num_input_nodes > 0, 'Network needs to have sensor nodes!'
|
||||||
|
is_input_over = False
|
||||||
|
is_output_over = False
|
||||||
|
for key, node in genes.nodes.items():
|
||||||
|
if node.node_type == 'sensor':
|
||||||
|
if is_input_over:
|
||||||
|
raise ValueError('Node genes need to follow the order sensor, output, hidden!')
|
||||||
|
|
||||||
self.genes = {'nodes': {}, 'connections': []}
|
if node.node_type == 'output':
|
||||||
node_id = 0
|
is_input_over = True
|
||||||
for _ in range(self.num_input_nodes):
|
if is_output_over:
|
||||||
self.genes['nodes'][node_id] = NodeGene(node_id, 'sensor')
|
raise ValueError('Node genes need to follow the order sensor, output, hidden!')
|
||||||
node_id += 1
|
|
||||||
first_action = node_id
|
|
||||||
for _ in range(action_num * 2):
|
|
||||||
self.genes['nodes'][node_id] = NodeGene(node_id, 'output')
|
|
||||||
node_id += 1
|
|
||||||
|
|
||||||
for index in range(self.num_input_nodes):
|
if node.node_type == 'hidden':
|
||||||
for action in range(action_num * 2):
|
is_output_over = True
|
||||||
self.genes['connections'].append(
|
|
||||||
ConnectionGene(index, first_action + action, True, index*(action_num * 2) + action)
|
self.genes = genes
|
||||||
)
|
|
||||||
|
|
||||||
self.incoming_connections = {}
|
self.incoming_connections = {}
|
||||||
for connection in self.genes['connections']:
|
for connection in self.genes.connections:
|
||||||
if connection.end not in self.incoming_connections.keys():
|
if connection.end not in self.incoming_connections.keys():
|
||||||
self.incoming_connections[connection.end] = []
|
self.incoming_connections[connection.end] = []
|
||||||
self.incoming_connections[connection.end].append(connection)
|
self.incoming_connections[connection.end].append(connection)
|
||||||
|
@ -73,16 +53,17 @@ class EvolutionModel(nn.Module):
|
||||||
self.indices = {}
|
self.indices = {}
|
||||||
|
|
||||||
self.has_recurrent = False
|
self.has_recurrent = False
|
||||||
non_recurrent_indices = {}
|
self.non_recurrent_indices = {}
|
||||||
|
self.recurrent_indices = {}
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for key, value in self.incoming_connections.items():
|
for key, value in self.incoming_connections.items():
|
||||||
value.sort(key=lambda element: element.start)
|
value.sort(key=lambda element: element.start)
|
||||||
|
|
||||||
lin = nn.Linear(len(value), 1, bias=self.genes['nodes'][key].bias is not None)
|
lin = nn.Linear(len(value), 1, bias=self.genes.nodes[key].bias is not None)
|
||||||
for index, connection in enumerate(value):
|
for index, connection in enumerate(value):
|
||||||
lin.weight[0, index] = value[index].weight
|
lin.weight[0, index] = value[index].weight
|
||||||
if self.genes['nodes'][key].bias is not None:
|
if self.genes.nodes[key].bias is not None:
|
||||||
lin.bias[0] = self.genes['nodes'][key].bias
|
lin.bias[0] = self.genes.nodes[key].bias
|
||||||
|
|
||||||
non_lin = nn.ELU()
|
non_lin = nn.ELU()
|
||||||
sequence = nn.Sequential(
|
sequence = nn.Sequential(
|
||||||
|
@ -93,15 +74,17 @@ class EvolutionModel(nn.Module):
|
||||||
self.layers[key] = sequence
|
self.layers[key] = sequence
|
||||||
self.indices[key] = list(map(lambda element: element.start, value))
|
self.indices[key] = list(map(lambda element: element.start, value))
|
||||||
|
|
||||||
non_recurrent_indices[key] = list(filter(lambda element: not element.recurrent, value))
|
self.non_recurrent_indices[key] = list(filter(lambda element: not element.recurrent, value))
|
||||||
if not self.has_recurrent and len(non_recurrent_indices[key]) != len(self.indices[key]):
|
self.recurrent_indices[key] = list(filter(lambda element: element.recurrent, value))
|
||||||
|
if not self.has_recurrent and len(self.non_recurrent_indices[key]) != len(self.indices[key]):
|
||||||
self.has_recurrent = True
|
self.has_recurrent = True
|
||||||
non_recurrent_indices[key] = list(map(lambda element: element.start, non_recurrent_indices[key]))
|
self.non_recurrent_indices[key] = list(map(lambda element: element.start, self.non_recurrent_indices[key]))
|
||||||
|
self.recurrent_indices[key] = list(map(lambda element: element.start, self.recurrent_indices[key]))
|
||||||
rank_of_node = {}
|
rank_of_node = {}
|
||||||
for i in range(self.num_input_nodes):
|
for i in range(self.num_input_nodes):
|
||||||
rank_of_node[i] = 0
|
rank_of_node[i] = 0
|
||||||
|
|
||||||
layers_to_add = list(non_recurrent_indices.items())
|
layers_to_add = list(self.non_recurrent_indices.items())
|
||||||
while len(layers_to_add) > 0:
|
while len(layers_to_add) > 0:
|
||||||
for index, (key, incoming_nodes) in enumerate(list(layers_to_add)):
|
for index, (key, incoming_nodes) in enumerate(list(layers_to_add)):
|
||||||
max_rank = -1
|
max_rank = -1
|
||||||
|
@ -120,44 +103,123 @@ class EvolutionModel(nn.Module):
|
||||||
ranked_layers = list(rank_of_node.items())
|
ranked_layers = list(rank_of_node.items())
|
||||||
ranked_layers.sort(key=lambda element: element[1])
|
ranked_layers.sort(key=lambda element: element[1])
|
||||||
ranked_layers = list(filter(lambda element: element[1] > 0, ranked_layers))
|
ranked_layers = list(filter(lambda element: element[1] > 0, ranked_layers))
|
||||||
self.layer_order = list(map(lambda element: element[0], ranked_layers))
|
|
||||||
self.memory = torch.Tensor((max(map(lambda element: element[1].node_id, self.genes['nodes'].items())) + 1))
|
|
||||||
|
|
||||||
def forward(self, x, memory=None):
|
ranked_layers = list(map(lambda element: (element, 0),
|
||||||
|
filter(lambda recurrent_element:
|
||||||
|
recurrent_element not in list(
|
||||||
|
map(lambda ranked_layer: ranked_layer[0], ranked_layers)
|
||||||
|
),
|
||||||
|
list(filter(lambda recurrent_keys:
|
||||||
|
len(self.recurrent_indices[recurrent_keys]) > 0,
|
||||||
|
self.recurrent_indices.keys()))))) + ranked_layers
|
||||||
|
|
||||||
|
self.layer_order = list(map(lambda element: element[0], ranked_layers))
|
||||||
|
self.memory_size = (max(map(lambda element: element[1].node_id, self.genes.nodes.items())) + 1)
|
||||||
|
self.memory = torch.Tensor(self.memory_size)
|
||||||
|
self.output_range = range(self.num_input_nodes, self.num_input_nodes + self.action_num * 2)
|
||||||
|
|
||||||
|
def forward(self, x, last_memory=None):
|
||||||
x_flat = self.flatten(x)
|
x_flat = self.flatten(x)
|
||||||
if memory is None:
|
if last_memory is not None:
|
||||||
memory = torch.Tensor(self.memory)
|
last_memory_flat = self.flatten(last_memory)
|
||||||
|
elif self.has_recurrent:
|
||||||
|
raise ValueError('Recurrent networks need to be passed their previous memory!')
|
||||||
|
|
||||||
|
memory = torch.Tensor(self.memory_size)
|
||||||
outs = []
|
outs = []
|
||||||
for batch_element in x_flat:
|
for batch_index, batch_element in enumerate(x_flat):
|
||||||
memory[0:self.num_input_nodes] = batch_element
|
memory[0:self.num_input_nodes] = batch_element
|
||||||
for layer_index in self.layer_order:
|
for layer_index in self.layer_order:
|
||||||
memory[layer_index] = self.layers[layer_index](memory[self.indices[layer_index]])
|
non_recurrent_in = memory[self.non_recurrent_indices[layer_index]]
|
||||||
|
non_recurrent_in = torch.stack([non_recurrent_in])
|
||||||
|
if self.has_recurrent and len(self.recurrent_indices[layer_index]) > 0:
|
||||||
|
recurrent_in = last_memory_flat[batch_index, self.recurrent_indices[layer_index]]
|
||||||
|
recurrent_in = torch.stack([recurrent_in])
|
||||||
|
|
||||||
|
combined_in = torch.concat([non_recurrent_in, recurrent_in], dim=1)
|
||||||
|
else:
|
||||||
|
combined_in = non_recurrent_in
|
||||||
|
|
||||||
|
memory[layer_index] = self.layers[layer_index](combined_in)
|
||||||
outs.append(memory[self.num_input_nodes: self.num_input_nodes + self.action_num * 2])
|
outs.append(memory[self.num_input_nodes: self.num_input_nodes + self.action_num * 2])
|
||||||
outs = torch.stack(outs)
|
outs = torch.stack(outs)
|
||||||
self.memory = torch.Tensor(memory)
|
self.memory = torch.Tensor(memory)
|
||||||
return torch.reshape(outs, (x.shape[0], 4, 2))
|
return torch.reshape(outs, (x.shape[0], outs.shape[1]//2, 2))
|
||||||
else:
|
|
||||||
memory[:, 0:self.num_input_nodes] = x
|
def update_genes_with_weights(self):
|
||||||
for layer_index in self.layer_order:
|
for key, value in self.incoming_connections.items():
|
||||||
memory[:, layer_index] = self.layers[layer_index](memory[:, self.indices[layer_index]])
|
value.sort(key=lambda element: element.start)
|
||||||
return torch.reshape(
|
|
||||||
memory[:, self.num_input_nodes: self.num_input_nodes + self.action_num * 2],
|
sequence = self.layers[key]
|
||||||
(x.shape[0], 4, 2))
|
lin = sequence[0]
|
||||||
|
for index, connection in enumerate(value):
|
||||||
|
value[index].weight = float(lin.weight[0, index])
|
||||||
|
if self.genes.nodes[key].bias is not None:
|
||||||
|
self.genes.nodes[key].bias = float(lin.bias[0])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class RecurrentDataSet(BaseDataSet):
|
||||||
|
def __init__(self, states, targets, memory):
|
||||||
|
super().__init__(states, targets)
|
||||||
|
assert len(states) == len(memory), "Needs to have as many states as memories!"
|
||||||
|
self.memory = torch.tensor(np.array(memory), dtype=torch.float32)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
return self.states[idx], self.memory[idx], self.targets[idx]
|
||||||
|
|
||||||
|
|
||||||
|
def train_recurrent(states, memory, targets, model, optimizer):
|
||||||
|
for action in range(model.action_num):
|
||||||
|
data_set = RecurrentDataSet(states[action], targets[action], memory[action])
|
||||||
|
dataloader = DataLoader(data_set, batch_size=64, shuffle=True)
|
||||||
|
loss_fn = create_loss_function(action)
|
||||||
|
|
||||||
|
size = len(dataloader)
|
||||||
|
model.train()
|
||||||
|
for batch, (X, M, y) in enumerate(dataloader):
|
||||||
|
X, y, M = X.to(device), y.to(device), M.to(device)
|
||||||
|
|
||||||
|
# Compute prediction error
|
||||||
|
pred = model(X, M)
|
||||||
|
loss = loss_fn(pred, y)
|
||||||
|
|
||||||
|
# Backpropagation
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss.backward(retain_graph=True)
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
if batch % 100 == 0:
|
||||||
|
loss, current = loss.item(), batch * len(X)
|
||||||
|
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
del data_set
|
||||||
|
del dataloader
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sample = np.random.random((1, 486))
|
sample = np.random.random((1, 1))
|
||||||
|
last_memory = np.zeros((1, 3))
|
||||||
|
|
||||||
model = EvolutionModel(5, 4, 4).to(device)
|
from labirinth_ai.Models.Genotype import NodeGene, ConnectionGene, Genotype
|
||||||
print(model)
|
genes = Genotype(nodes={0: NodeGene(0, 'sensor'), 1: NodeGene(1, 'output'), 2: NodeGene(2, 'hidden', 1)},
|
||||||
|
connections=[ConnectionGene(0, 2, True, 0, recurrent=True), ConnectionGene(2, 1, True, 1, 1)])
|
||||||
|
|
||||||
|
model = EvolutionModel(1, 1, 1, genes)
|
||||||
|
|
||||||
|
model = model.to(device)
|
||||||
|
# print(model)
|
||||||
print(model.has_recurrent)
|
print(model.has_recurrent)
|
||||||
|
|
||||||
test = model(torch.tensor(sample, dtype=torch.float32))
|
test = model(torch.tensor(sample, dtype=torch.float32), torch.tensor(last_memory, dtype=torch.float32))
|
||||||
# test = test.cpu().detach().numpy()
|
# test = test.cpu().detach().numpy()
|
||||||
print(test)
|
# print(test)
|
||||||
|
|
||||||
state = np.random.random((1, 486))
|
state = np.random.random((1, 1))
|
||||||
target = np.random.random((4, 2))
|
memory = np.random.random((1, 1))
|
||||||
|
|
||||||
|
target = np.random.random((2, 1))
|
||||||
states = [
|
states = [
|
||||||
[state],
|
[state],
|
||||||
[state],
|
[state],
|
||||||
|
@ -170,7 +232,12 @@ if __name__ == '__main__':
|
||||||
[target],
|
[target],
|
||||||
[target],
|
[target],
|
||||||
]
|
]
|
||||||
|
memories = [
|
||||||
|
[memory],
|
||||||
|
[memory],
|
||||||
|
[memory],
|
||||||
|
[memory],
|
||||||
|
]
|
||||||
|
|
||||||
optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
|
optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
|
||||||
from labirinth_ai.Models.BaseModel import train
|
train_recurrent(states, memories, targets, model, optimizer)
|
||||||
train(states, targets, model, optimizer)
|
|
||||||
|
|
139
labirinth_ai/Models/Genotype.py
Normal file
139
labirinth_ai/Models/Genotype.py
Normal file
|
@ -0,0 +1,139 @@
|
||||||
|
from abc import abstractmethod
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class NodeGene:
|
||||||
|
valid_types = ['sensor', 'hidden', 'output']
|
||||||
|
|
||||||
|
def __init__(self, node_id, node_type, bias=None):
|
||||||
|
assert node_type in self.valid_types, 'Unknown node type!'
|
||||||
|
self.node_id = node_id
|
||||||
|
self.node_type = node_type
|
||||||
|
if node_type == 'hidden':
|
||||||
|
assert bias is not None, 'Expected a bias for hidden node types!'
|
||||||
|
self.bias = bias
|
||||||
|
else:
|
||||||
|
self.bias = None
|
||||||
|
|
||||||
|
|
||||||
|
class ConnectionGene:
|
||||||
|
def __init__(self, start, end, enabled, innovation_num, weight=None, recurrent=False):
|
||||||
|
self.start = start
|
||||||
|
self.end = end
|
||||||
|
self.enabled = enabled
|
||||||
|
self.innvovation_num = innovation_num
|
||||||
|
self.recurrent = recurrent
|
||||||
|
if weight is None:
|
||||||
|
self.weight = np.random.random(1)[0] * 2 - 1.0
|
||||||
|
else:
|
||||||
|
self.weight = weight
|
||||||
|
|
||||||
|
|
||||||
|
class Genotype:
|
||||||
|
def __init__(self, action_num: int = None, num_input_nodes: int = None,
|
||||||
|
nodes: Dict[int, NodeGene] = None, connections: List[ConnectionGene] = None):
|
||||||
|
self.nodes = {}
|
||||||
|
self.connections = []
|
||||||
|
if action_num is not None and num_input_nodes is not None:
|
||||||
|
node_id = 0
|
||||||
|
for _ in range(num_input_nodes):
|
||||||
|
self.nodes[node_id] = NodeGene(node_id, 'sensor')
|
||||||
|
node_id += 1
|
||||||
|
first_action = node_id
|
||||||
|
for _ in range(action_num * 2):
|
||||||
|
self.nodes[node_id] = NodeGene(node_id, 'output')
|
||||||
|
node_id += 1
|
||||||
|
|
||||||
|
for index in range(num_input_nodes):
|
||||||
|
for action in range(action_num * 2):
|
||||||
|
self.connections.append(
|
||||||
|
ConnectionGene(index, first_action + action, True, index * (action_num * 2) + action)
|
||||||
|
)
|
||||||
|
if nodes is not None and connections is not None:
|
||||||
|
self.nodes = nodes
|
||||||
|
self.connections = connections
|
||||||
|
|
||||||
|
def calculate_rank_of_nodes(self):
|
||||||
|
rank_of_node = {}
|
||||||
|
nodes_to_rank = list(self.nodes.items())
|
||||||
|
while len(nodes_to_rank) > 0:
|
||||||
|
for list_index, (id, node) in enumerate(nodes_to_rank):
|
||||||
|
incoming_connections = list(filter(lambda connection: connection.end == id and
|
||||||
|
not connection.recurrent, self.connections))
|
||||||
|
if len(incoming_connections) == 0:
|
||||||
|
rank_of_node[id] = 0
|
||||||
|
nodes_to_rank.pop(list_index)
|
||||||
|
break
|
||||||
|
|
||||||
|
incoming_connections_starts = list(map(lambda connection: connection.start, incoming_connections))
|
||||||
|
start_ranks = list(map(lambda element: rank_of_node[element[0]],
|
||||||
|
filter(lambda start_node: start_node[0] in incoming_connections_starts and
|
||||||
|
start_node[0] in rank_of_node.keys(),
|
||||||
|
self.nodes.items())))
|
||||||
|
if len(start_ranks) == len(incoming_connections):
|
||||||
|
rank_of_node[id] = max(start_ranks) + 1
|
||||||
|
nodes_to_rank.pop(list_index)
|
||||||
|
break
|
||||||
|
return rank_of_node
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def mutate(self, innovation_num) -> int:
|
||||||
|
"""
|
||||||
|
Decides whether or not to mutate this network. Then returns the new innovation number.
|
||||||
|
:param innovation_num: Current innovation number
|
||||||
|
:return: Updated innovation number
|
||||||
|
"""
|
||||||
|
|
||||||
|
# return innovation_num
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def cross(self, other):
|
||||||
|
raise NotImplementedError()
|
||||||
|
# return self
|
||||||
|
|
||||||
|
|
||||||
|
class NeatLike(Genotype):
|
||||||
|
connection_add_thr = 0.3
|
||||||
|
node_add_thr = 0.3
|
||||||
|
|
||||||
|
def mutate(self, innovation_num, allow_recurrent=False) -> int:
|
||||||
|
"""
|
||||||
|
Decides whether or not to mutate this network. Then returns the new innovation number.
|
||||||
|
:param allow_recurrent: Optional parameter allowing or disallowing recurrent connections to form
|
||||||
|
:param innovation_num: Current innovation number
|
||||||
|
:return: Updated innovation number
|
||||||
|
"""
|
||||||
|
# add connection
|
||||||
|
if np.random.random(1)[0] < self.connection_add_thr or True:
|
||||||
|
nodes = list(self.nodes.keys())
|
||||||
|
rank_of_node = self.calculate_rank_of_nodes()
|
||||||
|
end_nodes = list(filter(lambda node: rank_of_node[node] > 0, nodes))
|
||||||
|
|
||||||
|
connection_tuple = list(map(lambda connection: (connection.start, connection.end), self.connections))
|
||||||
|
|
||||||
|
start = np.random.randint(0, len(nodes))
|
||||||
|
end = np.random.randint(0, len(end_nodes))
|
||||||
|
|
||||||
|
tries = 50
|
||||||
|
while (rank_of_node[end_nodes[end]] == 0 or
|
||||||
|
((not allow_recurrent) and rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]])
|
||||||
|
or nodes[start] == end_nodes[end] or (nodes[start], end_nodes[end]) in connection_tuple) and\
|
||||||
|
tries > 0:
|
||||||
|
end = np.random.randint(0, len(end_nodes))
|
||||||
|
if (not allow_recurrent) and rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]]:
|
||||||
|
start = np.random.randint(0, len(nodes))
|
||||||
|
tries -= 1
|
||||||
|
if tries > 0:
|
||||||
|
innovation_num += 1
|
||||||
|
self.connections.append(
|
||||||
|
ConnectionGene(nodes[start], end_nodes[end], True, innovation_num,
|
||||||
|
recurrent=rank_of_node[nodes[start]] > rank_of_node[end_nodes[end]]))
|
||||||
|
#todo add node
|
||||||
|
|
||||||
|
return innovation_num
|
||||||
|
|
||||||
|
def cross(self, other):
|
||||||
|
return self
|
97
labirinth_ai/Population.py
Normal file
97
labirinth_ai/Population.py
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
import random
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from labirinth_ai.Models.Genotype import NeatLike
|
||||||
|
|
||||||
|
|
||||||
|
def fib(n):
|
||||||
|
if n == 0:
|
||||||
|
return [1]
|
||||||
|
elif n < 0:
|
||||||
|
return [0]
|
||||||
|
else:
|
||||||
|
return [fib(n - 1)[0] + fib(n - 2)[0]] + fib(n - 1)
|
||||||
|
|
||||||
|
|
||||||
|
class Population:
|
||||||
|
def __init__(self, subject_class, world, subject_number):
|
||||||
|
self.subjects = []
|
||||||
|
self.world = world
|
||||||
|
for _ in range(subject_number):
|
||||||
|
px, py = self.world.generate_free_coordinates()
|
||||||
|
self.subjects.append(subject_class(px, py, genotype_class=NeatLike))
|
||||||
|
self.subject_number = subject_number
|
||||||
|
self.subject_class = subject_class
|
||||||
|
|
||||||
|
def select(self):
|
||||||
|
ranked = list(self.subjects)
|
||||||
|
ranked.sort(key=lambda subject: subject.accumulated_rewards, reverse=True)
|
||||||
|
|
||||||
|
return ranked[:int(self.subject_number / 2)]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def scatter(cls, n, buckets):
|
||||||
|
out = np.zeros(buckets)
|
||||||
|
if n == 0:
|
||||||
|
return out
|
||||||
|
|
||||||
|
fib_number = 0
|
||||||
|
fibs = fib(fib_number)
|
||||||
|
while np.sum(fibs) <= n and len(fibs) <= buckets:
|
||||||
|
fib_number += 1
|
||||||
|
fibs = fib(fib_number)
|
||||||
|
fib_number -= 1
|
||||||
|
fibs = fib(fib_number)
|
||||||
|
|
||||||
|
for bucket in range(buckets):
|
||||||
|
if bucket < len(fibs):
|
||||||
|
out[bucket] += fibs[bucket]
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
return out + cls.scatter(n - np.sum(fibs), buckets)
|
||||||
|
|
||||||
|
def evolve(self):
|
||||||
|
# get updated weights from the models
|
||||||
|
for subject in self.subjects:
|
||||||
|
subject.model.update_genes_with_weights()
|
||||||
|
|
||||||
|
# crossbreed the current pop
|
||||||
|
best_subjects = self.select()
|
||||||
|
distribution = list(self.scatter(self.subject_number - int(self.subject_number / 2), int(self.subject_number / 2)))
|
||||||
|
|
||||||
|
new_subjects = list(best_subjects)
|
||||||
|
for index, offspring_num in enumerate(distribution):
|
||||||
|
for _ in range(int(offspring_num)):
|
||||||
|
parent_1 = best_subjects[index]
|
||||||
|
parent_2 = best_subjects[random.randint(index + 1, len(best_subjects) - 1)]
|
||||||
|
|
||||||
|
new_genes = parent_1.model.genes.cross(parent_2.model.genes)
|
||||||
|
|
||||||
|
# position doesn't matter, since mutation will set it
|
||||||
|
new_subject = self.subject_class(0, 0, new_genes)
|
||||||
|
new_subject.history = parent_1.history
|
||||||
|
new_subject.samples = parent_1.samples + parent_2.samples
|
||||||
|
new_subjects.append(new_subject)
|
||||||
|
|
||||||
|
assert len(new_subjects) == self.subject_number, 'All generations should have constant size!'
|
||||||
|
|
||||||
|
# mutate the pop
|
||||||
|
mutated_subjects = []
|
||||||
|
innovation_num = max(map(lambda subject: max(map(lambda connection: connection.innvovation_num,
|
||||||
|
subject.model.genes.connections
|
||||||
|
)
|
||||||
|
)
|
||||||
|
, new_subjects))
|
||||||
|
for subject in new_subjects:
|
||||||
|
subject.accumulated_rewards = 0
|
||||||
|
|
||||||
|
innovation_num = subject.model.genes.mutate(innovation_num)
|
||||||
|
|
||||||
|
px, py = self.world.generate_free_coordinates()
|
||||||
|
new_subject = self.subject_class(px, py, subject.model.genes)
|
||||||
|
new_subject.history = subject.history
|
||||||
|
new_subject.samples = subject.samples
|
||||||
|
mutated_subjects.append(new_subject)
|
||||||
|
|
||||||
|
self.subjects = mutated_subjects
|
|
@ -4,6 +4,7 @@ import tensorflow as tf
|
||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
|
|
||||||
from labirinth_ai.LabyrinthWorld import LabyrinthWorld
|
from labirinth_ai.LabyrinthWorld import LabyrinthWorld
|
||||||
|
from labirinth_ai.Models.EvolutionModel import EvolutionModel
|
||||||
from labirinth_ai.loss import loss2, loss3
|
from labirinth_ai.loss import loss2, loss3
|
||||||
from labirinth_ai.Models.BaseModel import BaseModel, train, create_optimizer, device, from_numpy
|
from labirinth_ai.Models.BaseModel import BaseModel, train, create_optimizer, device, from_numpy
|
||||||
|
|
||||||
|
@ -350,7 +351,7 @@ class NetLearner(Subject):
|
||||||
|
|
||||||
self.strikes = 0
|
self.strikes = 0
|
||||||
|
|
||||||
def __init__(self, x, y):
|
def __init__(self, x, y, genes=None, genotype_class=None):
|
||||||
super(NetLearner, self).__init__(x, y)
|
super(NetLearner, self).__init__(x, y)
|
||||||
|
|
||||||
self.action = None
|
self.action = None
|
||||||
|
@ -370,7 +371,10 @@ class NetLearner(Subject):
|
||||||
self.x_in = []
|
self.x_in = []
|
||||||
self.actions = []
|
self.actions = []
|
||||||
self.target = []
|
self.target = []
|
||||||
self.model = BaseModel(self.viewD, 4, 4).to(device)
|
|
||||||
|
# self.model = BaseModel(self.viewD, 4, 4).to(device)
|
||||||
|
self.model = EvolutionModel(self.viewD, 4, 4, genes=genes, genotype_class=genotype_class).to(device)
|
||||||
|
|
||||||
self.optimizer = create_optimizer(self.model)
|
self.optimizer = create_optimizer(self.model)
|
||||||
|
|
||||||
if len(self.samples) < self.randomBuffer:
|
if len(self.samples) < self.randomBuffer:
|
||||||
|
@ -540,9 +544,11 @@ class NetLearner(Subject):
|
||||||
|
|
||||||
# if len(self.samples) % self.batchsize == 0 and len(self.samples) >= self.randomBuffer:
|
# if len(self.samples) % self.batchsize == 0 and len(self.samples) >= self.randomBuffer:
|
||||||
if len(self.samples) > self.nextTrain and doTrain:
|
if len(self.samples) > self.nextTrain and doTrain:
|
||||||
print('train')
|
print('train', len(self.samples))
|
||||||
self.train()
|
self.train()
|
||||||
|
self.nextTrain = len(self.samples)
|
||||||
self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize)
|
self.nextTrain = min(self.batchsize + self.nextTrain, (self.historySizeMul + 1) * self.batchsize)
|
||||||
|
print(len(self.samples), self.nextTrain)
|
||||||
|
|
||||||
self.accumulated_rewards += self.lastReward
|
self.accumulated_rewards += self.lastReward
|
||||||
|
|
||||||
|
@ -657,23 +663,6 @@ class Herbivore(NetLearner):
|
||||||
|
|
||||||
samples = []
|
samples = []
|
||||||
|
|
||||||
# x_in = keras.Input(shape=(4 * (2 * viewD + 1) * (2 * viewD + 1) + 2))
|
|
||||||
# target = keras.Input(shape=(10, 1))
|
|
||||||
# inVec = keras.layers.Flatten()(x_in)
|
|
||||||
# # kernel_regularizer=keras.regularizers.l2(0.01)
|
|
||||||
# actions = keras.layers.Dense((4 * (2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(inVec)
|
|
||||||
# actions = keras.layers.Dense(((2 * viewD + 1) * (2 * viewD + 1)), activation='elu')(actions)
|
|
||||||
# actions = keras.layers.Dense(8, activation='linear', use_bias=False)(actions)
|
|
||||||
# # actions = keras.layers.Dense(4, activation='linear', use_bias=False)(inVec)
|
|
||||||
#
|
|
||||||
# model = keras.Model(inputs=x_in, outputs=actions)
|
|
||||||
#
|
|
||||||
# # model.compile(optimizer='adam', loss=loss2, target_tensors=[target])
|
|
||||||
# model.compile(optimizer=tf.keras.optimizers.RMSprop(learningRate), loss=loss2, target_tensors=[target])
|
|
||||||
|
|
||||||
# def __init__(self, x, y):
|
|
||||||
# super(Herbivore, self).__init__(x, y)
|
|
||||||
|
|
||||||
def createState(self, world: LabyrinthWorld):
|
def createState(self, world: LabyrinthWorld):
|
||||||
state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
|
state = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
|
||||||
state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
|
state2 = np.zeros((2 * self.viewD + 1, 2 * self.viewD + 1), np.float) # - 1
|
||||||
|
|
Loading…
Reference in a new issue