Source code for gaggle.supervisor.ga_supervisor

from gaggle.arguments import ConfigArgs, ProblemArgs, SysArgs, GAArgs, IndividualArgs, OutdirArgs
from gaggle.population import PopulationManager
from gaggle.ga import GAFactory, GA
from gaggle.utils.special_print import print_warning
from typing import Callable
from gaggle.problem import FunctionalProblem


[docs]class GASupervisor:
    """Gives a single-line interface to our framework. At minimum,
       a user needs to specify which problem they want to solve.
       Also allows for customization by passing functions or new operators.

    """
    def __init__(self, ga_name: str = "simple", population_size: int = 100, num_parents: int = 100,
                 generations: int = 100, problem_name: str = "MNIST", individual_name: str = "nn",
                 individual_size: int = 100, device: str = "cpu", elitism: float = 0.1, crossover: str = "uniform",
                 k_point: int = 1, tournament_size: int = 3, selection_pressure: float = 0.5, mutation: str = "normal",
                 mutation_chance: float = 0.01, mutation_std: float = 0.05, use_freshness: bool = True,
                 mutate_protected: bool = False, uniform_mutation_min_val: float = -1.,
                 uniform_mutation_max_val: float = 1., selection: str = "weighted", parent_survival_rate: float = 0.5,
                 batch_size: int = -1, eval_batch_size: int = -1, save_best_every: int = None,
                 eval_every_generation: int = 50, model_name: str = "lenet", root: str = "./runs", name: str = "run",
                 seed: int = 1337, steps: int = 1, runs: int = 1, dataset_root: str = "None", gui: bool = False,
                 display_train_metrics: bool = True, display_test_metrics: bool = True):
        """Initialize the GASupervisor..

        Args:
            ga_name: name of the overall GA to use.
            population_size: number of individuals in the population to evolve.
            num_parents: num parents selected during the selection process (recommended value is = population_size).
            generations: number of generations.
            problem_name: name of the problem, if custom, set_custom_fitness needs to be called to setup the custom
            fitness formula.
            individual_name: the type of individual to use to represent the solutions to evolve.
            individual_size: length of the parameter tensor for the basic NumpyIndividual and PytorchIndividual.
            This argument is irrelevant for other individuals (unless is has been customized).
            device: device to run algorithms on. Can be a torch.device object or a str ("cpu" or "cuda").
            elitism: % of top models (rounded down) that always gets to survive to the next generation.
            crossover: type of crossover to use.
            k_point: number of points for k-point-crossover.
            tournament_size: number of participants per tournament in tournament_selection.
            selection_pressure: probability used when performing tournament selection, represents the likelihood of
            selecting the best performer.
            mutation: type of mutation to use.
            mutation_chance: per gene probability that a gene will be mutated.
            mutation_std: standard deviation when using normal-based random mutation.
            use_freshness: whether to use freshness to not recompute the fitness of surviving members that have not
            been modified from a generation to the next.
            mutate_protected: whether to mutate the protected individuals that are selected to survive (elitism).
            uniform_mutation_min_val: minimum value when sampling mutations values in uniform mutation.
            uniform_mutation_max_val: maximum value when sampling mutations values in uniform mutation.
            selection: type of selection to use.
            parent_survival_rate: (aka probability of crossover) probability to keep the parents rather than the children for crossover.
            batch_size: batch size for training. Only relevant for classification and other dataset-based problems.
            eval_batch_size: batch size for inference. Only relevant for classification and other dataset-based
            problems.
            save_best_every: save best performer in the population every this many generations.
            eval_every_generation: evaluate the population pool on the test set after this many generations.
            model_name: name of the model architecture. Only relevant for neural network individuals.
            root: Root folder where to put the experiments (good choice can be f'{os.getcwd()}/experiments').
            name: Name of each experiment folder.
            seed: seed to fix randomness.
            steps: number of steps to take in the environment for a single run. Only relevant for rl problems.
            runs: number of runs per evaluation. Only relevant for rl problems.
            dataset_root: path to the data on the local storage. Only relevant for classification and other
            dataset-based problems.
            gui: if the environment has a gui, display it if True. Only relevant for rl problems with a gui (OpenAI Gym problems).
            display_train_metrics: whether to draw a graph with the train metrics at the end of training, needs at
             least 11 generations of training as the default window size is 10.
            display_test_metrics: whether to draw a graph with the train metrics at the end of training, needs
            number of generation / eval_every_generation > 10 to draw anything since the default window size is 10.
        """
        self.config_args = ConfigArgs()
        self.problem_args = ProblemArgs()
        self.sys_args = SysArgs()
        self.ga_args = GAArgs()
        self.individual_args = IndividualArgs()
        self.outdir_args = OutdirArgs()
        # replace the init arguments
        self.ga_args.ga_name = ga_name
        self.ga_args.population_size = population_size
        self.ga_args.num_parents = num_parents
        self.ga_args.generations = generations
        self.problem_args.problem_name = problem_name
        self.problem_args.dataset_root = dataset_root
        self.problem_args.seed = seed
        self.problem_args.steps = steps
        self.problem_args.runs = runs
        self.problem_args.gui = gui
        self.sys_args.device = device
        self.ga_args.crossover = crossover
        self.ga_args.k_point = k_point
        self.ga_args.tournament_size = tournament_size
        self.ga_args.selection_pressure = selection_pressure
        self.ga_args.elitism = elitism
        self.ga_args.mutation = mutation
        self.ga_args.mutate_protected = mutate_protected
        self.ga_args.mutation_chance = mutation_chance
        self.ga_args.mutation_std = mutation_std
        self.ga_args.uniform_mutation_min_val = uniform_mutation_min_val
        self.ga_args.uniform_mutation_max_val = uniform_mutation_max_val
        self.ga_args.selection = selection
        self.ga_args.parent_survival_rate = parent_survival_rate
        self.problem_args.batch_size = batch_size
        self.problem_args.eval_batch_size = eval_batch_size
        self.ga_args.save_best_every = save_best_every
        self.ga_args.eval_every_generation = eval_every_generation
        self.ga_args.use_freshness = use_freshness
        self.individual_args.model_name = model_name
        self.individual_args.individual_name = individual_name
        self.individual_args.individual_size = individual_size
        self.outdir_args.root = root
        self.outdir_args.name = name
        self.custom_fitness_function = None
        self.display_train_metrics = display_train_metrics
        self.display_test_metrics = display_test_metrics
        self.args = []
        self.kwargs = {}

[docs]    def set_custom_fitness(self, fitness_function: Callable, *args, **kwargs):
        """If during initialization, problem_name is set to "custom", then this function needs to be called to
        setup the fitness function to evaluate the population on before calling self.run(). This takes in any callable
        that will return a float value of the individual. If custom arguments are necessary, they can be passed
        as *args and **kwargs and will be used when invoking the fitness_function.

        Args:
            fitness_function: fitness function to optimize
            *args:
            **kwargs:

        Returns:

        """
        self.custom_fitness_function = fitness_function
        self.args = args
        self.kwargs = kwargs

    def _run_default(self, *args, **kwargs):
        """Run script for default parameters and default fitness function.

        Args:
            *args:
            **kwargs:

        Returns:

        """
        population_manager: PopulationManager = PopulationManager(self.ga_args, self.individual_args,
                                                                  sys_args=self.sys_args, *args, **kwargs)
        trainer: GA = GAFactory.from_ga_args(population_manager=population_manager, ga_args=self.ga_args,
                                             problem_args=self.problem_args, sys_args=self.sys_args,
                                             outdir_args=self.outdir_args, individual_args=self.individual_args)
        trainer.train(display_train_metrics=self.display_train_metrics, display_test_metrics=self.display_test_metrics)

    def _run_custom(self, *args, **kwargs):
        """Run script for a custom fitness function.

        Returns:

        """
        if self.custom_fitness_function is None:
            print_warning(f"Attempted to run a custom fitness function but the fitness function was not set")
            return

        # we first initialize the population
        population_manager: PopulationManager = PopulationManager(ga_args=self.ga_args,
                                                                  individual_args=self.individual_args,
                                                                  sys_args=self.sys_args, *args, **kwargs)
        # we then define the problem
        problem = FunctionalProblem(fitness_function=self.custom_fitness_function, problem_args=self.problem_args,
                                    sys_args=self.sys_args, *self.args, **self.kwargs)
        trainer: GA = GAFactory.from_ga_args(population_manager=population_manager, problem=problem,
                                             ga_args=self.ga_args,
                                             problem_args=self.problem_args, sys_args=self.sys_args,
                                             outdir_args=self.outdir_args, individual_args=self.individual_args)
        trainer.train(display_train_metrics=self.display_train_metrics, display_test_metrics=self.display_test_metrics)

[docs]    def run(self, *args, **kwargs):
        """Run the genetic algorithm described during __init__. When running pre-existing problems, if additional
        parameters need to be passed to the initialization of the PopulationManager, they can be given in *args and
        **kwargs.

        Args:
            *args:
            **kwargs:

        Returns:

        """
        if self.problem_args.problem_name != "custom":
            self._run_default(*args, **kwargs)
        else:
            self._run_custom(*args, **kwargs)