8 Genetic Algorithm Overview

Objective functions can optionally take in step, budget, and generations.

step - The same objective function will be run for #evaluation_early_stop_steps, the current step will be passed into the function as an interger. (This is useful for getting a single fold of cross validation for example).

budget - A parameter that varies over the course of the generations. Gets passed into the objective function as a float between 0 and 1. If the budget of the previous evaluation is less than the current budget, it will get re-evaluated. Useful for using smaller datasets earlier in training.

generations - an int corresponding to the current generation number.

In [1]:

Copied!





#knapsack problem
import numpy as np
import tpot2
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster

class SubsetSelector(tpot2.individual_representations.BaseIndividual):
    def __init__(   self,
                    values,
                    initial_set = None,
                    k=1, #step size for shuffling
                ):

        if isinstance(values, int):
            self.values = set(range(0,values))
        else:
            self.values = set(values)


        if initial_set is None:
            self.subsets = set(random.choices(values, k=k))
        else:
            self.subsets = set(initial_set)

        self.k = k

        self.mutation_list = [self._mutate_add, self._mutate_remove]
        self.crossover_list = [self._crossover_swap]
        

    def mutate(self, rng_=None):
        mutation_list_copy = self.mutation_list.copy()
        random.shuffle(mutation_list_copy)
        for func in mutation_list_copy:
            if func():
                return True
        return False

    def crossover(self, ind2, rng_=None):
        crossover_list_copy = self.crossover_list.copy()
        random.shuffle(crossover_list_copy)
        for func in crossover_list_copy:
            if func(ind2):
                return True
        return False

    def _mutate_add(self,):
        not_included = list(self.values.difference(self.subsets))
        if len(not_included) > 1:
            self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
            return True
        else:
            return False

    def _mutate_remove(self,):
        if len(self.subsets) > 1:
            self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))

    def _crossover_swap(self, ss2):
        diffs = self.subsets.symmetric_difference(ss2.subsets)

        if len(diffs) == 0:
            return False
        for v in diffs:
            self.subsets.discard(v)
            ss2.subsets.discard(v)
            random.choice([self.subsets, ss2.subsets]).add(v)
        
        return True

    def unique_id(self):
        return str(tuple(sorted(self.subsets)))

def individual_generator():
    while True:
        yield SubsetSelector(values=np.arange(len(values)))


values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50

def simple_objective(ind, **kwargs):
    subset = np.array(list(ind.subsets))
    if len(subset) == 0:
        return 0, 0

    total_weight = np.sum(weights[subset])
    total_value = np.sum(values[subset])

    if total_weight > max_weight:
        total_value = 0

    return total_value, total_weight

objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]



evolver = tpot2.evolvers.BaseEvolver(   individual_generator=individual_generator(), 
                                objective_functions=[simple_objective],
                                objective_function_weights = objective_function_weights,
                                bigger_is_better = True,
                                population_size= 100,
                                objective_names = objective_names,
                                generations= 100,
                                n_jobs=1,
                                verbose = 1,

)

evolver.optimize()
#knapsack problem
import numpy as np
import tpot2
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster

class SubsetSelector(tpot2.individual_representations.BaseIndividual):
    def __init__(   self,
                    values,
                    initial_set = None,
                    k=1, #step size for shuffling
                ):

        if isinstance(values, int):
            self.values = set(range(0,values))
        else:
            self.values = set(values)


        if initial_set is None:
            self.subsets = set(random.choices(values, k=k))
        else:
            self.subsets = set(initial_set)

        self.k = k

        self.mutation_list = [self._mutate_add, self._mutate_remove]
        self.crossover_list = [self._crossover_swap]
        

    def mutate(self, rng_=None):
        mutation_list_copy = self.mutation_list.copy()
        random.shuffle(mutation_list_copy)
        for func in mutation_list_copy:
            if func():
                return True
        return False

    def crossover(self, ind2, rng_=None):
        crossover_list_copy = self.crossover_list.copy()
        random.shuffle(crossover_list_copy)
        for func in crossover_list_copy:
            if func(ind2):
                return True
        return False

    def _mutate_add(self,):
        not_included = list(self.values.difference(self.subsets))
        if len(not_included) > 1:
            self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
            return True
        else:
            return False

    def _mutate_remove(self,):
        if len(self.subsets) > 1:
            self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))

    def _crossover_swap(self, ss2):
        diffs = self.subsets.symmetric_difference(ss2.subsets)

        if len(diffs) == 0:
            return False
        for v in diffs:
            self.subsets.discard(v)
            ss2.subsets.discard(v)
            random.choice([self.subsets, ss2.subsets]).add(v)
        
        return True

    def unique_id(self):
        return str(tuple(sorted(self.subsets)))

def individual_generator():
    while True:
        yield SubsetSelector(values=np.arange(len(values)))


values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50

def simple_objective(ind, **kwargs):
    subset = np.array(list(ind.subsets))
    if len(subset) == 0:
        return 0, 0

    total_weight = np.sum(weights[subset])
    total_value = np.sum(values[subset])

    if total_weight > max_weight:
        total_value = 0

    return total_value, total_weight

objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]



evolver = tpot2.evolvers.BaseEvolver(   individual_generator=individual_generator(), 
                                objective_functions=[simple_objective],
                                objective_function_weights = objective_function_weights,
                                bigger_is_better = True,
                                population_size= 100,
                                objective_names = objective_names,
                                generations= 100,
                                n_jobs=1,
                                verbose = 1,

)

evolver.optimize()

Generation: 100%|██████████| 100/100 [04:05<00:00,  2.46s/it]

In [2]:

Copied!





final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})

best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()

print("All results")
final_population_results
final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})

best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()

print("All results")
final_population_results

best subset {0, 65, 2, 1, 38, 71, 40, 75, 44, 15, 48, 16, 85, 59, 60, 62}
Best value 2056.0, weight 48.6142482308331

All results

Out[2]:

	Selected Index	Value	Weight	Parents	Variation_Function	Individual	Generation	Submitted Timestamp	Completed Timestamp	Pareto_Front
0	(16,)	75.0	2.054788	NaN	NaN	<__main__.SubsetSelector object at 0x7faf86cfc...	0.0	1.708121e+09	1.708121e+09	NaN
1	(13,)	11.0	4.466691	NaN	NaN	<__main__.SubsetSelector object at 0x7faf86635...	0.0	1.708121e+09	1.708121e+09	NaN
2	(41,)	50.0	6.249590	NaN	NaN	<__main__.SubsetSelector object at 0x7faf84e87...	0.0	1.708121e+09	1.708121e+09	NaN
3	(40,)	35.0	0.992726	NaN	NaN	<__main__.SubsetSelector object at 0x7faf83fdf...	0.0	1.708121e+09	1.708121e+09	NaN
4	(77,)	0.0	1.475988	NaN	NaN	<__main__.SubsetSelector object at 0x7faf83ff1...	0.0	1.708121e+09	1.708121e+09	NaN
...	...	...	...	...	...	...	...	...	...	...
9995	(0, 1, 5, 15, 60, 62, 65, 75, 83, 85)	1323.0	17.180098	((0, 5, 15, 60, 62, 65, 75, 83, 85), (0, 5, 15...	ind_mutate	<__main__.SubsetSelector object at 0x7faf695e5...	99.0	1.708121e+09	1.708121e+09	1.0
9996	(0, 8, 15, 60, 62, 65, 75, 96)	916.0	18.695221	((0, 15, 39, 40, 60, 62, 65, 75, 85), (0, 15, ...	ind_mutate , ind_mutate , ind_crossover	<__main__.SubsetSelector object at 0x7faf69fbf...	99.0	1.708121e+09	1.708121e+09	NaN
9997	(0, 15, 57, 62, 65, 75, 85, 86, 92)	967.0	15.581100	((0, 15, 60, 62, 65, 75, 85, 86), (0, 15, 60, ...	ind_mutate	<__main__.SubsetSelector object at 0x7faf6b05a...	99.0	1.708121e+09	1.708121e+09	NaN
9998	(0, 15, 21, 65, 75, 76)	878.0	18.495023	((0, 15, 60, 65, 75), (0, 15, 60, 65, 75))	ind_mutate	<__main__.SubsetSelector object at 0x7faf5eec0...	99.0	1.708121e+09	1.708121e+09	NaN
9999	(0, 15, 39, 65, 75, 83, 85, 92)	1054.0	14.423653	((0, 2, 15, 39, 60, 65, 75, 83, 85), (0, 15, 3...	ind_mutate , ind_mutate , ind_crossover	<__main__.SubsetSelector object at 0x7faf6b36b...	99.0	1.708121e+09	1.708121e+09	NaN

10000 rows × 10 columns

In [3]:

Copied!





from scipy.stats import binned_statistic_2d

y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]

x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)

ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])

fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))

im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")

cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()
from scipy.stats import binned_statistic_2d

y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]

x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)

ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])

fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))

im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")

cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()

No description has been provided for this image