Source code for gaggle.problem.environment.rl_problem

from gaggle.problem import Problem
from gaggle.arguments import ProblemArgs, SysArgs
from gaggle.population import Individual
from gaggle.problem.environment.environment_factory import EnvironmentFactory

import torch
import numpy as np


[docs]class RLProblem(Problem): """Problem that uses a reinforcement learning environment as a fitness evaluation process. Used mainly for OpenAI gym environments but can be used for any environment that supports OpenAI Gym's environment API. """ def __init__(self, problem_args: ProblemArgs = None, sys_args: SysArgs = None): super(RLProblem, self).__init__(problem_args, sys_args) self.environment = EnvironmentFactory.from_problem_args(problem_args)
[docs] @torch.no_grad() def evaluate(self, individual: Individual, *args, **kwargs) -> float: steps = self.problem_args.steps runs = self.problem_args.runs gui = self.problem_args.gui stop_on_done = self.problem_args.stop_on_done observations = [] rewards = [] for r in range(runs): observation, _ = self.environment.reset() observation = torch.Tensor(observation).to(self.sys_args.device) run_observations = [observation] run_rewards = [] for t in range(steps): if gui: self.environment.render() action = individual(observation, *args, **kwargs).cpu().item() observation, reward, done, info, _ = self.environment.step(action) observation = torch.Tensor(observation).to(self.sys_args.device) run_observations.append(observation) run_rewards.append(reward) if stop_on_done and done: break observations.append(run_observations) rewards.append(run_rewards) sums = [sum(run) for run in rewards] return np.mean(sums).item()