9 Genetic Algorithm Overview
Objective functions can optionally take in step, budget, and generations.
step - The same objective function will be run for #evaluation_early_stop_steps, the current step will be passed into the function as an interger. (This is useful for getting a single fold of cross validation for example).
budget - A parameter that varies over the course of the generations. Gets passed into the objective function as a float between 0 and 1. If the budget of the previous evaluation is less than the current budget, it will get re-evaluated. Useful for using smaller datasets earlier in training.
generations - an int corresponding to the current generation number.
In [1]:
Copied!
#knapsack problem
import numpy as np
import tpot
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster
class SubsetSelector(tpot.individual.BaseIndividual):
def __init__( self,
values,
initial_set = None,
k=1, #step size for shuffling
):
if isinstance(values, int):
self.values = set(range(0,values))
else:
self.values = set(values)
if initial_set is None:
self.subsets = set(random.choices(values, k=k))
else:
self.subsets = set(initial_set)
self.k = k
self.mutation_list = [self._mutate_add, self._mutate_remove]
self.crossover_list = [self._crossover_swap]
def mutate(self, rng=None):
mutation_list_copy = self.mutation_list.copy()
random.shuffle(mutation_list_copy)
for func in mutation_list_copy:
if func():
return True
return False
def crossover(self, ind2, rng=None):
crossover_list_copy = self.crossover_list.copy()
random.shuffle(crossover_list_copy)
for func in crossover_list_copy:
if func(ind2):
return True
return False
def _mutate_add(self,):
not_included = list(self.values.difference(self.subsets))
if len(not_included) > 1:
self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
return True
else:
return False
def _mutate_remove(self,):
if len(self.subsets) > 1:
self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))
def _crossover_swap(self, ss2):
diffs = self.subsets.symmetric_difference(ss2.subsets)
if len(diffs) == 0:
return False
for v in diffs:
self.subsets.discard(v)
ss2.subsets.discard(v)
random.choice([self.subsets, ss2.subsets]).add(v)
return True
def unique_id(self):
return str(tuple(sorted(self.subsets)))
def individual_generator():
while True:
yield SubsetSelector(values=np.arange(len(values)))
values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50
def simple_objective(ind, **kwargs):
subset = np.array(list(ind.subsets))
if len(subset) == 0:
return 0, 0
total_weight = np.sum(weights[subset])
total_value = np.sum(values[subset])
if total_weight > max_weight:
total_value = 0
return total_value, total_weight
objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]
evolver = tpot.evolvers.BaseEvolver( individual_generator=individual_generator(),
objective_functions=[simple_objective],
objective_function_weights = objective_function_weights,
bigger_is_better = True,
population_size= 100,
objective_names = objective_names,
generations= 100,
n_jobs=32,
verbose = 1,
)
evolver.optimize()
#knapsack problem
import numpy as np
import tpot
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster
class SubsetSelector(tpot.individual.BaseIndividual):
def __init__( self,
values,
initial_set = None,
k=1, #step size for shuffling
):
if isinstance(values, int):
self.values = set(range(0,values))
else:
self.values = set(values)
if initial_set is None:
self.subsets = set(random.choices(values, k=k))
else:
self.subsets = set(initial_set)
self.k = k
self.mutation_list = [self._mutate_add, self._mutate_remove]
self.crossover_list = [self._crossover_swap]
def mutate(self, rng=None):
mutation_list_copy = self.mutation_list.copy()
random.shuffle(mutation_list_copy)
for func in mutation_list_copy:
if func():
return True
return False
def crossover(self, ind2, rng=None):
crossover_list_copy = self.crossover_list.copy()
random.shuffle(crossover_list_copy)
for func in crossover_list_copy:
if func(ind2):
return True
return False
def _mutate_add(self,):
not_included = list(self.values.difference(self.subsets))
if len(not_included) > 1:
self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
return True
else:
return False
def _mutate_remove(self,):
if len(self.subsets) > 1:
self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))
def _crossover_swap(self, ss2):
diffs = self.subsets.symmetric_difference(ss2.subsets)
if len(diffs) == 0:
return False
for v in diffs:
self.subsets.discard(v)
ss2.subsets.discard(v)
random.choice([self.subsets, ss2.subsets]).add(v)
return True
def unique_id(self):
return str(tuple(sorted(self.subsets)))
def individual_generator():
while True:
yield SubsetSelector(values=np.arange(len(values)))
values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50
def simple_objective(ind, **kwargs):
subset = np.array(list(ind.subsets))
if len(subset) == 0:
return 0, 0
total_weight = np.sum(weights[subset])
total_value = np.sum(values[subset])
if total_weight > max_weight:
total_value = 0
return total_value, total_weight
objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]
evolver = tpot.evolvers.BaseEvolver( individual_generator=individual_generator(),
objective_functions=[simple_objective],
objective_function_weights = objective_function_weights,
bigger_is_better = True,
population_size= 100,
objective_names = objective_names,
generations= 100,
n_jobs=32,
verbose = 1,
)
evolver.optimize()
/opt/anaconda3/envs/tpotenv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm Generation: 100%|██████████| 100/100 [01:43<00:00, 1.03s/it]
In [2]:
Copied!
final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})
best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()
print("All results")
final_population_results
final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})
best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()
print("All results")
final_population_results
best subset {1, 8, 9, 16, 17, 22, 23, 24, 28, 29, 31, 42, 43, 48, 50, 61, 62, 68, 80, 89, 91, 97, 98} Best value 3070.0, weight 49.01985602703945 All results
Out[2]:
Selected Index | Value | Weight | Parents | Variation_Function | Individual | Generation | Submitted Timestamp | Completed Timestamp | Eval Error | Pareto_Front | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | (40,) | 89.0 | 9.883465 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa80eb0> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
1 | (45,) | 116.0 | 6.643557 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa83b50> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
2 | (52,) | 172.0 | 9.273163 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa81210> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
3 | (33,) | 112.0 | 1.594347 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa838e0> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
4 | (37,) | 90.0 | 3.273826 | NaN | NaN | <__main__.SubsetSelector object at 0x32aa83e50> | 0.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9995 | (1, 9, 16, 23, 24, 31, 77, 79) | 998.0 | 11.622582 | ((1, 9, 16, 17, 23, 24, 31, 77), (1, 9, 16, 17... | ind_mutate | <__main__.SubsetSelector object at 0x3a739b010> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
9996 | (1, 8, 9, 16, 22, 23, 24, 28, 29, 31, 48, 49, ... | 0.0 | 51.400433 | ((1, 8, 9, 16, 17, 22, 23, 24, 28, 29, 31, 48,... | ind_mutate | <__main__.SubsetSelector object at 0x3af9a4460> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
9997 | (1, 4, 8, 9, 16, 17, 23, 24, 31, 49, 68, 77, 8... | 1728.0 | 15.997430 | ((1, 4, 8, 9, 16, 17, 23, 24, 31, 68, 77, 88, ... | ind_mutate | <__main__.SubsetSelector object at 0x3aa303430> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | 1.0 |
9998 | (8, 9, 17, 23, 24, 25, 31, 51, 77) | 972.0 | 11.991547 | ((8, 9, 17, 23, 24, 31, 77, 88), (8, 9, 17, 23... | ind_mutate | <__main__.SubsetSelector object at 0x3a7399600> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
9999 | (8, 23, 24, 73, 79) | 648.0 | 12.109013 | ((8, 16, 17, 23, 24), (8, 16, 17, 23, 24)) | ind_mutate | <__main__.SubsetSelector object at 0x3a88d4430> | 99.0 | 1.740209e+09 | 1.740209e+09 | None | NaN |
10000 rows × 11 columns
In [3]:
Copied!
from scipy.stats import binned_statistic_2d
y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]
x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)
ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])
fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))
im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")
cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()
from scipy.stats import binned_statistic_2d
y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]
x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)
ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])
fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))
im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")
cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()