8 Genetic Algorithm Overview
Objective functions can optionally take in step, budget, and generations.
step - The same objective function will be run for #evaluation_early_stop_steps, the current step will be passed into the function as an interger. (This is useful for getting a single fold of cross validation for example).
budget - A parameter that varies over the course of the generations. Gets passed into the objective function as a float between 0 and 1. If the budget of the previous evaluation is less than the current budget, it will get re-evaluated. Useful for using smaller datasets earlier in training.
generations - an int corresponding to the current generation number.
In [1]:
Copied!
#knapsack problem
import numpy as np
import tpot2
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster
class SubsetSelector(tpot2.individual_representations.BaseIndividual):
def __init__( self,
values,
initial_set = None,
k=1, #step size for shuffling
):
if isinstance(values, int):
self.values = set(range(0,values))
else:
self.values = set(values)
if initial_set is None:
self.subsets = set(random.choices(values, k=k))
else:
self.subsets = set(initial_set)
self.k = k
self.mutation_list = [self._mutate_add, self._mutate_remove]
self.crossover_list = [self._crossover_swap]
def mutate(self, rng_=None):
mutation_list_copy = self.mutation_list.copy()
random.shuffle(mutation_list_copy)
for func in mutation_list_copy:
if func():
return True
return False
def crossover(self, ind2, rng_=None):
crossover_list_copy = self.crossover_list.copy()
random.shuffle(crossover_list_copy)
for func in crossover_list_copy:
if func(ind2):
return True
return False
def _mutate_add(self,):
not_included = list(self.values.difference(self.subsets))
if len(not_included) > 1:
self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
return True
else:
return False
def _mutate_remove(self,):
if len(self.subsets) > 1:
self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))
def _crossover_swap(self, ss2):
diffs = self.subsets.symmetric_difference(ss2.subsets)
if len(diffs) == 0:
return False
for v in diffs:
self.subsets.discard(v)
ss2.subsets.discard(v)
random.choice([self.subsets, ss2.subsets]).add(v)
return True
def unique_id(self):
return str(tuple(sorted(self.subsets)))
def individual_generator():
while True:
yield SubsetSelector(values=np.arange(len(values)))
values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50
def simple_objective(ind, **kwargs):
subset = np.array(list(ind.subsets))
if len(subset) == 0:
return 0, 0
total_weight = np.sum(weights[subset])
total_value = np.sum(values[subset])
if total_weight > max_weight:
total_value = 0
return total_value, total_weight
objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]
evolver = tpot2.evolvers.BaseEvolver( individual_generator=individual_generator(),
objective_functions=[simple_objective],
objective_function_weights = objective_function_weights,
bigger_is_better = True,
population_size= 100,
objective_names = objective_names,
generations= 100,
n_jobs=1,
verbose = 1,
)
evolver.optimize()
#knapsack problem
import numpy as np
import tpot2
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster
class SubsetSelector(tpot2.individual_representations.BaseIndividual):
def __init__( self,
values,
initial_set = None,
k=1, #step size for shuffling
):
if isinstance(values, int):
self.values = set(range(0,values))
else:
self.values = set(values)
if initial_set is None:
self.subsets = set(random.choices(values, k=k))
else:
self.subsets = set(initial_set)
self.k = k
self.mutation_list = [self._mutate_add, self._mutate_remove]
self.crossover_list = [self._crossover_swap]
def mutate(self, rng_=None):
mutation_list_copy = self.mutation_list.copy()
random.shuffle(mutation_list_copy)
for func in mutation_list_copy:
if func():
return True
return False
def crossover(self, ind2, rng_=None):
crossover_list_copy = self.crossover_list.copy()
random.shuffle(crossover_list_copy)
for func in crossover_list_copy:
if func(ind2):
return True
return False
def _mutate_add(self,):
not_included = list(self.values.difference(self.subsets))
if len(not_included) > 1:
self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
return True
else:
return False
def _mutate_remove(self,):
if len(self.subsets) > 1:
self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))
def _crossover_swap(self, ss2):
diffs = self.subsets.symmetric_difference(ss2.subsets)
if len(diffs) == 0:
return False
for v in diffs:
self.subsets.discard(v)
ss2.subsets.discard(v)
random.choice([self.subsets, ss2.subsets]).add(v)
return True
def unique_id(self):
return str(tuple(sorted(self.subsets)))
def individual_generator():
while True:
yield SubsetSelector(values=np.arange(len(values)))
values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50
def simple_objective(ind, **kwargs):
subset = np.array(list(ind.subsets))
if len(subset) == 0:
return 0, 0
total_weight = np.sum(weights[subset])
total_value = np.sum(values[subset])
if total_weight > max_weight:
total_value = 0
return total_value, total_weight
objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]
evolver = tpot2.evolvers.BaseEvolver( individual_generator=individual_generator(),
objective_functions=[simple_objective],
objective_function_weights = objective_function_weights,
bigger_is_better = True,
population_size= 100,
objective_names = objective_names,
generations= 100,
n_jobs=1,
verbose = 1,
)
evolver.optimize()
Generation: 100%|██████████| 100/100 [04:05<00:00, 2.46s/it]
In [2]:
Copied!
final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})
best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()
print("All results")
final_population_results
final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})
best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()
print("All results")
final_population_results
best subset {0, 65, 2, 1, 38, 71, 40, 75, 44, 15, 48, 16, 85, 59, 60, 62} Best value 2056.0, weight 48.6142482308331 All results
Out[2]:
Selected Index | Value | Weight | Parents | Variation_Function | Individual | Generation | Submitted Timestamp | Completed Timestamp | Pareto_Front | |
---|---|---|---|---|---|---|---|---|---|---|
0 | (16,) | 75.0 | 2.054788 | NaN | NaN | <__main__.SubsetSelector object at 0x7faf86cfc... | 0.0 | 1.708121e+09 | 1.708121e+09 | NaN |
1 | (13,) | 11.0 | 4.466691 | NaN | NaN | <__main__.SubsetSelector object at 0x7faf86635... | 0.0 | 1.708121e+09 | 1.708121e+09 | NaN |
2 | (41,) | 50.0 | 6.249590 | NaN | NaN | <__main__.SubsetSelector object at 0x7faf84e87... | 0.0 | 1.708121e+09 | 1.708121e+09 | NaN |
3 | (40,) | 35.0 | 0.992726 | NaN | NaN | <__main__.SubsetSelector object at 0x7faf83fdf... | 0.0 | 1.708121e+09 | 1.708121e+09 | NaN |
4 | (77,) | 0.0 | 1.475988 | NaN | NaN | <__main__.SubsetSelector object at 0x7faf83ff1... | 0.0 | 1.708121e+09 | 1.708121e+09 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9995 | (0, 1, 5, 15, 60, 62, 65, 75, 83, 85) | 1323.0 | 17.180098 | ((0, 5, 15, 60, 62, 65, 75, 83, 85), (0, 5, 15... | ind_mutate | <__main__.SubsetSelector object at 0x7faf695e5... | 99.0 | 1.708121e+09 | 1.708121e+09 | 1.0 |
9996 | (0, 8, 15, 60, 62, 65, 75, 96) | 916.0 | 18.695221 | ((0, 15, 39, 40, 60, 62, 65, 75, 85), (0, 15, ... | ind_mutate , ind_mutate , ind_crossover | <__main__.SubsetSelector object at 0x7faf69fbf... | 99.0 | 1.708121e+09 | 1.708121e+09 | NaN |
9997 | (0, 15, 57, 62, 65, 75, 85, 86, 92) | 967.0 | 15.581100 | ((0, 15, 60, 62, 65, 75, 85, 86), (0, 15, 60, ... | ind_mutate | <__main__.SubsetSelector object at 0x7faf6b05a... | 99.0 | 1.708121e+09 | 1.708121e+09 | NaN |
9998 | (0, 15, 21, 65, 75, 76) | 878.0 | 18.495023 | ((0, 15, 60, 65, 75), (0, 15, 60, 65, 75)) | ind_mutate | <__main__.SubsetSelector object at 0x7faf5eec0... | 99.0 | 1.708121e+09 | 1.708121e+09 | NaN |
9999 | (0, 15, 39, 65, 75, 83, 85, 92) | 1054.0 | 14.423653 | ((0, 2, 15, 39, 60, 65, 75, 83, 85), (0, 15, 3... | ind_mutate , ind_mutate , ind_crossover | <__main__.SubsetSelector object at 0x7faf6b36b... | 99.0 | 1.708121e+09 | 1.708121e+09 | NaN |
10000 rows × 10 columns
In [3]:
Copied!
from scipy.stats import binned_statistic_2d
y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]
x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)
ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])
fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))
im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")
cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()
from scipy.stats import binned_statistic_2d
y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]
x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)
ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])
fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))
im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")
cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()